You are here

public function JSTokenizer::get in Javascript Aggregator 6

2 calls to JSTokenizer::get()
JSTokenizer::match in ./jsminplus.php
JSTokenizer::peek in ./jsminplus.php

File

./jsminplus.php, line 1827

Class

JSTokenizer

Code

public function get($chunksize = 1000) {
  while ($this->lookahead) {
    $this->lookahead--;
    $this->tokenIndex = $this->tokenIndex + 1 & 3;
    $token = $this->tokens[$this->tokenIndex];
    if ($token->type != TOKEN_NEWLINE || $this->scanNewlines) {
      return $token->type;
    }
  }
  $conditional_comment = false;

  // strip whitespace and comments
  while (true) {
    $input = $this
      ->getInput($chunksize);

    // whitespace handling; gobble up \r as well (effectively we don't have support for MAC newlines!)
    $re = $this->scanNewlines ? '/^[ \\r\\t]+/' : '/^\\s+/';
    if (preg_match($re, $input, $match)) {
      $spaces = $match[0];
      $spacelen = strlen($spaces);
      $this->cursor += $spacelen;
      if (!$this->scanNewlines) {
        $this->lineno += substr_count($spaces, "\n");
      }
      if ($spacelen == $chunksize) {
        continue;
      }

      // complete chunk contained whitespace
      $input = $this
        ->getInput($chunksize);
      if ($input == '' || $input[0] != '/') {
        break;
      }
    }

    // Comments
    if (!preg_match('/^\\/(?:\\*(@(?:cc_on|if|elif|else|end))?.*?\\*\\/|\\/[^\\n]*)/s', $input, $match)) {
      if (!$chunksize) {
        break;
      }

      // retry with a full chunk fetch; this also prevents breakage of long regular expressions (which will never match a comment)
      $chunksize = null;
      continue;
    }

    // check if this is a conditional (JScript) comment
    if (!empty($match[1])) {
      $match[0] = '/*' . $match[1];
      $conditional_comment = true;
      break;
    }
    else {
      $this->cursor += strlen($match[0]);
      $this->lineno += substr_count($match[0], "\n");
    }
  }
  if ($input == '') {
    $tt = TOKEN_END;
    $match = array(
      '',
    );
  }
  elseif ($conditional_comment) {
    $tt = TOKEN_CONDCOMMENT_START;
  }
  else {
    switch ($input[0]) {
      case '0':

        // hexadecimal
        if (($input[1] == 'x' || $input[1] == 'X') && preg_match('/^0x[0-9a-f]+/i', $input, $match)) {
          $tt = TOKEN_NUMBER;
          break;
        }

      // FALL THROUGH
      case '1':
      case '2':
      case '3':
      case '4':
      case '5':
      case '6':
      case '7':
      case '8':
      case '9':

        // should always match
        preg_match('/^\\d+(?:\\.\\d*)?(?:[eE][-+]?\\d+)?/', $input, $match);
        $tt = TOKEN_NUMBER;
        break;
      case "'":
        if (preg_match('/^\'(?:[^\\\\\'\\r\\n]++|\\\\(?:.|\\r?\\n))*\'/', $input, $match)) {
          $tt = TOKEN_STRING;
        }
        else {
          if ($chunksize) {
            return $this
              ->get(null);
          }

          // retry with a full chunk fetch
          throw $this
            ->newSyntaxError('Unterminated string literal');
        }
        break;
      case '"':
        if (preg_match('/^"(?:[^\\\\"\\r\\n]++|\\\\(?:.|\\r?\\n))*"/', $input, $match)) {
          $tt = TOKEN_STRING;
        }
        else {
          if ($chunksize) {
            return $this
              ->get(null);
          }

          // retry with a full chunk fetch
          throw $this
            ->newSyntaxError('Unterminated string literal');
        }
        break;
      case '/':
        if ($this->scanOperand && preg_match('/^\\/((?:\\\\.|\\[(?:\\\\.|[^\\]])*\\]|[^\\/])+)\\/([gimy]*)/', $input, $match)) {
          $tt = TOKEN_REGEXP;
          break;
        }

      // FALL THROUGH
      case '|':
      case '^':
      case '&':
      case '<':
      case '>':
      case '+':
      case '-':
      case '*':
      case '%':
      case '=':
      case '!':

        // should always match
        preg_match($this->opRegExp, $input, $match);
        $op = $match[0];
        if (in_array($op, $this->assignOps) && $input[strlen($op)] == '=') {
          $tt = OP_ASSIGN;
          $match[0] .= '=';
        }
        else {
          $tt = $op;
          if ($this->scanOperand) {
            if ($op == OP_PLUS) {
              $tt = OP_UNARY_PLUS;
            }
            elseif ($op == OP_MINUS) {
              $tt = OP_UNARY_MINUS;
            }
          }
          $op = null;
        }
        break;
      case '.':
        if (preg_match('/^\\.\\d+(?:[eE][-+]?\\d+)?/', $input, $match)) {
          $tt = TOKEN_NUMBER;
          break;
        }

      // FALL THROUGH
      case ';':
      case ',':
      case '?':
      case ':':
      case '~':
      case '[':
      case ']':
      case '{':
      case '}':
      case '(':
      case ')':

        // these are all single
        $match = array(
          $input[0],
        );
        $tt = $input[0];
        break;
      case '@':

        // check end of conditional comment
        if (substr($input, 0, 3) == '@*/') {
          $match = array(
            '@*/',
          );
          $tt = TOKEN_CONDCOMMENT_END;
        }
        else {
          throw $this
            ->newSyntaxError('Illegal token');
        }
        break;
      case "\n":
        if ($this->scanNewlines) {
          $match = array(
            "\n",
          );
          $tt = TOKEN_NEWLINE;
        }
        else {
          throw $this
            ->newSyntaxError('Illegal token');
        }
        break;
      default:

        // FIXME: add support for unicode and unicode escape sequence \uHHHH
        if (preg_match('/^[$\\w]+/', $input, $match)) {
          $tt = in_array($match[0], $this->keywords) ? $match[0] : TOKEN_IDENTIFIER;
        }
        else {
          throw $this
            ->newSyntaxError('Illegal token');
        }
    }
  }
  $this->tokenIndex = $this->tokenIndex + 1 & 3;
  if (!isset($this->tokens[$this->tokenIndex])) {
    $this->tokens[$this->tokenIndex] = new JSToken();
  }
  $token = $this->tokens[$this->tokenIndex];
  $token->type = $tt;
  if ($tt == OP_ASSIGN) {
    $token->assignOp = $op;
  }
  $token->start = $this->cursor;
  $token->value = $match[0];
  $this->cursor += strlen($match[0]);
  $token->end = $this->cursor;
  $token->lineno = $this->lineno;
  return $tt;
}