You are here

protected function JSqueeze::extractStrings in Advanced CSS/JS Aggregation 7.2

Same name and namespace in other branches
  1. 8.4 advagg_js_minify/jsqueeze.inc \Patchwork\JSqueeze::extractStrings()
  2. 8.2 advagg_js_minify/jsqueeze.inc \Patchwork\JSqueeze::extractStrings()
  3. 8.3 advagg_js_minify/jsqueeze.inc \Patchwork\JSqueeze::extractStrings()
1 call to JSqueeze::extractStrings()
JSqueeze::squeeze in advagg_js_compress/jsqueeze.inc
Squeezes a JavaScript source code.

File

advagg_js_compress/jsqueeze.inc, line 190

Class

JSqueeze

Namespace

Patchwork

Code

protected function extractStrings($f) {
  if ($cc_on = false !== strpos($f, '@cc_on')) {

    // Protect conditional comments from being removed
    $f = str_replace('#', '##', $f);
    $f = str_replace('/*@', '1#@', $f);
    $f = preg_replace("'//@([^\n]+)'", '2#@$1@#3', $f);
    $f = str_replace('@*/', '@#1', $f);
  }
  $len = strlen($f);
  $code = str_repeat(' ', $len);
  $j = 0;
  $strings = array();
  $K = 0;
  $instr = false;
  $q = array(
    "'",
    '"',
    "'" => 0,
    '"' => 0,
  );

  // Extract strings, removes comments
  for ($i = 0; $i < $len; ++$i) {
    if ($instr) {
      if ('//' == $instr) {
        if ("\n" == $f[$i]) {
          $f[$i--] = ' ';
          $instr = false;
        }
      }
      elseif ($f[$i] == $instr || '/' == $f[$i] && "/'" == $instr) {
        if ('!' == $instr) {
        }
        elseif ('*' == $instr) {
          if ('/' == $f[$i + 1]) {
            ++$i;
            $instr = false;
          }
        }
        else {
          if ("/'" == $instr) {
            while (isset($f[$i + 1]) && false !== strpos('gmi', $f[$i + 1])) {
              $s[] = $f[$i++];
            }
            $s[] = $f[$i];
          }
          $instr = false;
        }
      }
      elseif ('*' == $instr) {
      }
      elseif ('!' == $instr) {
        if ('*' == $f[$i] && '/' == $f[$i + 1]) {
          $s[] = "*/\r";
          ++$i;
          $instr = false;
        }
        elseif ("\n" == $f[$i]) {
          $s[] = "\r";
        }
        else {
          $s[] = $f[$i];
        }
      }
      elseif ('\\' == $f[$i]) {
        ++$i;
        if ("\n" != $f[$i]) {
          isset($q[$f[$i]]) && ++$q[$f[$i]];
          $s[] = '\\' . $f[$i];
        }
      }
      elseif ('[' == $f[$i] && "/'" == $instr) {
        $instr = '/[';
        $s[] = '[';
      }
      elseif (']' == $f[$i] && '/[' == $instr) {
        $instr = "/'";
        $s[] = ']';
      }
      elseif ("'" == $f[$i] || '"' == $f[$i]) {
        ++$q[$f[$i]];
        $s[] = '\\' . $f[$i];
      }
      else {
        $s[] = $f[$i];
      }
    }
    else {
      switch ($f[$i]) {
        case ';':

          // Remove triple semi-colon
          if ($i > 0 && ';' == $f[$i - 1] && $i + 1 < $len && ';' == $f[$i + 1]) {
            $f[$i] = $f[$i + 1] = '/';
          }
          else {
            $code[++$j] = ';';
            break;
          }
        case '/':
          if ('*' == $f[$i + 1]) {
            ++$i;
            $instr = '*';
            if ($this->keepImportantComments && '!' == $f[$i + 1]) {
              ++$i;

              // no break here
            }
            else {
              break;
            }
          }
          elseif ('/' == $f[$i + 1]) {
            ++$i;
            $instr = '//';
            break;
          }
          else {
            $a = $j && (' ' == $code[$j] || "" == $code[$j]) ? $code[$j - 1] : $code[$j];
            if (false !== strpos('-!%&;<=>~:^+|,()*?[{} ', $a) || false !== strpos('oenfd', $a) && preg_match("'(?<![\$.a-zA-Z0-9_])(do|else|return|typeof|yield[ ]?\\*?)[ ]?\$'", substr($code, $j - 7, 8))) {
              if (')' === $a && $j > 1) {
                $a = 1;
                $k = $j - (' ' == $code[$j] || "" == $code[$j]) - 1;
                while ($k >= 0 && $a) {
                  if ('(' === $code[$k]) {
                    --$a;
                  }
                  elseif (')' === $code[$k]) {
                    ++$a;
                  }
                  --$k;
                }
                if (!preg_match("'(?<![\$.a-zA-Z0-9_])(if|for|while)[ ]?\$'", substr($code, 0, $k + 1))) {
                  $code[++$j] = '/';
                  break;
                }
              }
              $key = "//''\"\"" . $K++ . ($instr = "/'");
              $a = $j;
              $code .= $key;
              while (isset($key[++$j - $a - 1])) {
                $code[$j] = $key[$j - $a - 1];
              }
              --$j;
              isset($s) && ($s = implode('', $s)) && $cc_on && $this
                ->restoreCc($s);
              $strings[$key] = array(
                '/',
              );
              $s =& $strings[$key];
            }
            else {
              $code[++$j] = '/';
            }
            break;
          }
        case "'":
        case '"':
          $instr = $f[$i];
          $key = "//''\"\"" . $K++ . ('!' == $instr ? ']' : "'");
          $a = $j;
          $code .= $key;
          while (isset($key[++$j - $a - 1])) {
            $code[$j] = $key[$j - $a - 1];
          }
          --$j;
          isset($s) && ($s = implode('', $s)) && $cc_on && $this
            ->restoreCc($s);
          $strings[$key] = array();
          $s =& $strings[$key];
          '!' == $instr && ($s[] = "\r/*!");
          break;
        case "\n":
          if ($j > 3) {
            if (' ' == $code[$j] || "" == $code[$j]) {
              --$j;
            }
            if (false === strpos('oefd', $code[$j]) || !preg_match("'(?<![\$.a-zA-Z0-9_])(?:do|else|typeof|void)[ ]?\$'", substr($code, $j - 6, 8))) {
              $code[++$j] = false !== strpos('kend', $code[$j - 1]) && preg_match("'(?<![\$.a-zA-Z0-9_])(?:break|continue|return|yield[ ]?\\*?)[ ]?\$'", substr($code, $j - 9, 10)) ? ';' : "";
              break;
            }
          }
        case "\t":
          $f[$i] = ' ';
        case ' ':
          if (!$j || ' ' == $code[$j] || "" == $code[$j]) {
            break;
          }
        default:
          $code[++$j] = $f[$i];
      }
    }
  }
  isset($s) && ($s = implode('', $s)) && $cc_on && $this
    ->restoreCc($s);
  unset($s);
  $code = substr($code, 0, $j + 1);
  $cc_on && $this
    ->restoreCc($code, false);

  // Deal with newlines before/after postfix/prefix operators
  // (a string literal starts with `//` and ends with `'` at this stage)
  // http://inimino.org/~inimino/blog/javascript_semicolons
  // Newlines before prefix are a new statement when a completed expression precedes because postfix is a "restrictd production"
  // A closing bracket `)` from if/for/while does not complete an expression, so mark possible `;` as `#` to deal with later
  $code = preg_replace("#(?<=[a-zA-Z\$_\\d'\\]}])(--|\\+\\+)#", ';$1', $code);
  $code = preg_replace("#(?<=\\))(--|\\+\\+)#", '#$1', $code);

  // Newlines after postfix are a new statement if the following token can't be parsed otherwise
  // i.e. it's a keyword, identifier, string or number literal, prefix operator, opening brace
  // But a prefix operator can have a newline before its operand, so check a completed expression precedes to be sure it's a postfix
  // Again mark case after closing bracket with `#` to deal with later
  // Also ensure keywords that may be followed by an expression aren't mistaken for the end of a completed expression
  // (note that postfix cannot apply to an expression completed with `}`)
  $code = preg_replace("#(?<![\$.a-zA-Z0-9_])(do|else|return|throw|typeof|void|yield) ?+(--|\\+\\+)#", '$1$2 ', $code);
  $code = preg_replace("#(?<=[a-zA-Z\$_\\d'\\]]) ?+(--|\\+\\+)(?=//|--|\\+\\+|[a-zA-Z\$_\\d[({])#", '$1;', $code);
  $code = preg_replace("#(?<=\\)) ?+(--|\\+\\+)(?=//|--|\\+\\+|[a-zA-Z\$_\\d[({])#", '$1#', $code);

  // Protect wanted spaces and remove unwanted ones
  $code = strtr($code, "", ' ');
  $code = str_replace('- -', "--", $code);
  $code = str_replace('+ +', "++", $code);
  $code = preg_replace("'(\\d)\\s+\\.\\s*([a-zA-Z\$_[(])'", "\$1.\$2", $code);
  $code = preg_replace("# ([-!%&;<=>~:.^+|,()*?[\\]{}/']+)#", '$1', $code);
  $code = preg_replace("#([-!%&;<=>~:.^+|,()*?[\\]{}/]+) #", '$1', $code);
  $cc_on && ($code = preg_replace_callback("'//[^\\'].*?@#3'", function ($m) {
    return strtr($m[0], ' ', "");
  }, $code));

  // Replace new Array/Object by []/{}
  false !== strpos($code, 'new Array') && ($code = preg_replace("'new Array(?:\\(\\)|([;\\])},:]))'", '[]$1', $code));
  false !== strpos($code, 'new Object') && ($code = preg_replace("'new Object(?:\\(\\)|([;\\])},:]))'", '{}$1', $code));

  // Add missing semi-colons after curly braces
  // This adds more semi-colons than strictly needed,
  // but it seems that later gzipping is favorable to the repetition of "};"
  $code = preg_replace("'\\}(?![:,;.()\\[\\]}\\|&?]|(else|catch|finally|while)[^\$.a-zA-Z0-9_])'", '};', $code);

  // Tag possible empty instruction for easy detection
  $code = preg_replace("'(?<![\$.a-zA-Z0-9_])if\\('", '1#(', $code);
  $code = preg_replace("'(?<![\$.a-zA-Z0-9_])for\\('", '2#(', $code);
  $code = preg_replace("'(?<![\$.a-zA-Z0-9_])do while\\('", '4#(', $code);
  $code = preg_replace("'(?<![\$.a-zA-Z0-9_])while\\('", '3#(', $code);
  $code = preg_replace("'(?<![\$.a-zA-Z0-9_])do(?![\$a-zA-Z0-9_])'", '5#', $code);
  $forPool = array();
  $instrPool = array();
  $doPool = array();
  $s = 0;
  $d = 0;
  $f = array();
  $j = -1;

  // Remove as much semi-colon as possible
  $len = strlen($code);
  for ($i = 0; $i < $len; ++$i) {
    switch ($code[$i]) {
      case '(':
        if ($j >= 0 && "\n" == $f[$j]) {
          $f[$j] = ';';
        }
        ++$s;
        if ($i > 1 && '#' == $code[$i - 1]) {
          switch ($code[$i - 2]) {
            case '3':
              if (isset($doPool[$d])) {
                $instrPool[$s - 1] = 5;

                // `while` corresponds to `do`
                unset($doPool[$d]);
              }
              else {
                $instrPool[$s - 1] = 1;
              }
              break;
            case '2':
              $forPool[$s] = 1;

            // also set $instrPool
            case '1':
            case '4':
              $instrPool[$s - 1] = 1;
          }
        }
        $f[++$j] = '(';
        break;
      case ']':
      case ')':
        if ($i + 1 < $len && !isset($forPool[$s]) && !isset($instrPool[$s - 1]) && preg_match("'[a-zA-Z0-9_\$]'", $code[$i + 1])) {
          $f[$j] .= $code[$i];
          $f[++$j] = "\n";
        }
        else {
          $f[++$j] = $code[$i];
        }
        if (')' == $code[$i]) {
          unset($forPool[$s]);
          --$s;
          if (isset($instrPool[$s]) && 5 === $instrPool[$s]) {
            $f[$j - 1] .= ')';
            $f[$j] = ';';
          }
        }
        continue 2;
      case '{':
        ++$d;
        $f[++$j] = '{';
        break;
      case '}':
        --$d;
        if ("\n" == $f[$j]) {
          $f[$j] = '}';
        }
        else {
          $f[++$j] = '}';
        }
        break;
      case '+':
      case '-':
        $f[++$j] = $code[$i];
        if ($i + 1 < $len && ($code[$i] === $code[$i + 1] || '#' === $code[$i + 1])) {

          // delay unsetting $instrPool[$s]
          continue 2;
        }
        break;
      case '#':
        switch ($f[$j]) {
          case '1':
            $f[$j] = 'if';
            break 2;
          case '2':
            $f[$j] = 'for';
            break 2;
          case '3':
            $f[$j] = 'while';
            break 2;
          case '4':

            // special case `while` that doesn't correspond to the `do`
            $f[$j] = 'do while';
            $doPool[$d] = 1;
            break 2;
          case '5':
            $f[$j] = 'do';
            $doPool[$d] = 1;
          case ';':

            // added after `do..while` - no extra `;` needed
            break 2;
          case ')':
          case '+':
          case '-':
            if (isset($instrPool[$s])) {

              // prefix operator in conditional/loop statement - no `;`
              break 2;
            }
        }
      case ';':
        if (isset($forPool[$s]) || isset($instrPool[$s]) && 5 !== $instrPool[$s]) {
          $f[++$j] = ';';
        }
        elseif ($j >= 0 && "\n" != $f[$j] && ';' != $f[$j]) {
          $f[++$j] = "\n";
        }
        break;
      case '[':
        if ($j >= 0 && "\n" == $f[$j]) {
          $f[$j] = ';';
        }
      default:
        $f[++$j] = $code[$i];
    }
    unset($instrPool[$s]);
  }
  $f = implode('', $f);
  $cc_on && ($f = str_replace('@#3', "\r", $f));

  // Fix "else ;" empty instructions
  $f = preg_replace("'(?<![\$.a-zA-Z0-9_])else([\n}])'", '$1', $f);
  $r1 = array(
    // keywords with a direct object
    'case',
    'delete',
    'do',
    'else',
    'function',
    'in',
    'instanceof',
    'of',
    'break',
    'new',
    'return',
    'throw',
    'typeof',
    'var',
    'void',
    'yield',
    'let',
    'if',
    'const',
    'get',
    'set',
    'continue',
  );
  $r2 = array(
    // keywords with a subject
    'in',
    'instanceof',
    'of',
  );

  // Fix missing semi-colons
  $f = preg_replace("'(?<!(?<![a-zA-Z0-9_\$])" . implode(')(?<!(?<![a-zA-Z0-9_\\$])', $r1) . ') (?!(' . implode('|', $r2) . ")(?![a-zA-Z0-9_\$]))'", "\n", $f);
  $f = preg_replace("'(?<!(?<![a-zA-Z0-9_\$])do)(?<!(?<![a-zA-Z0-9_\$])else) if\\('", "\nif(", $f);
  $f = preg_replace("'(?<=--|\\+\\+)(?<![a-zA-Z0-9_\$])(" . implode('|', $r1) . ")(?![a-zA-Z0-9_\$])'", "\n\$1", $f);
  $f = preg_replace("'(?<![a-zA-Z0-9_\$])for\neach\\('", 'for each(', $f);
  $f = preg_replace("'(?<![a-zA-Z0-9_\$])\n(" . implode('|', $r2) . ")(?![a-zA-Z0-9_\$])'", '$1', $f);

  // Merge strings
  if ($q["'"] > $q['"']) {
    $q = array(
      $q[1],
      $q[0],
    );
  }
  $f = preg_replace("#//''\"\"[0-9]+'#", $q[0] . '$0' . $q[0], $f);
  strpos($f, $q[0] . '+' . $q[0]) && ($f = str_replace($q[0] . '+' . $q[0], '', $f));
  $len = count($strings);
  foreach ($strings as $r1 => &$r2) {
    $r2 = "/'" == substr($r1, -2) ? str_replace(array(
      "\\'",
      '\\"',
    ), array(
      "'",
      '"',
    ), $r2) : str_replace('\\' . $q[1], $q[1], $r2);
  }

  // Restore wanted spaces
  $f = strtr($f, "", ' ');
  return array(
    $f,
    $strings,
  );
}