You are here

Lexer.php in Translation template extractor 6.3

Same filename and directory in other branches
  1. 7.3 vendor/Twig/Lexer.php
  2. 7.2 vendor/Twig/Lexer.php

File

vendor/Twig/Lexer.php
View source
<?php

/*
 * This file is part of Twig.
 *
 * (c) 2009 Fabien Potencier
 * (c) 2009 Armin Ronacher
 *
 * For the full copyright and license information, please view the LICENSE
 * file that was distributed with this source code.
 */

/**
 * Lexes a template string.
 *
 * @author Fabien Potencier <fabien@symfony.com>
 */
class Twig_Lexer implements Twig_LexerInterface {
  protected $tokens;
  protected $code;
  protected $cursor;
  protected $lineno;
  protected $end;
  protected $state;
  protected $states;
  protected $brackets;
  protected $env;
  protected $filename;
  protected $options;
  protected $regexes;
  protected $position;
  protected $positions;
  protected $currentVarBlockLine;
  const STATE_DATA = 0;
  const STATE_BLOCK = 1;
  const STATE_VAR = 2;
  const STATE_STRING = 3;
  const STATE_INTERPOLATION = 4;
  const REGEX_NAME = '/[a-zA-Z_\\x7f-\\xff][a-zA-Z0-9_\\x7f-\\xff]*/A';
  const REGEX_NUMBER = '/[0-9]+(?:\\.[0-9]+)?/A';
  const REGEX_STRING = '/"([^#"\\\\]*(?:\\\\.[^#"\\\\]*)*)"|\'([^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'/As';
  const REGEX_DQ_STRING_DELIM = '/"/A';
  const REGEX_DQ_STRING_PART = '/[^#"\\\\]*(?:(?:\\\\.|#(?!\\{))[^#"\\\\]*)*/As';
  const PUNCTUATION = '()[]{}?:.,|';
  public function __construct(Twig_Environment $env, array $options = array()) {
    $this->env = $env;
    $this->options = array_merge(array(
      'tag_comment' => array(
        '{#',
        '#}',
      ),
      'tag_block' => array(
        '{%',
        '%}',
      ),
      'tag_variable' => array(
        '{{',
        '}}',
      ),
      'whitespace_trim' => '-',
      'interpolation' => array(
        '#{',
        '}',
      ),
    ), $options);
    $this->regexes = array(
      'lex_var' => '/\\s*' . preg_quote($this->options['whitespace_trim'] . $this->options['tag_variable'][1], '/') . '\\s*|\\s*' . preg_quote($this->options['tag_variable'][1], '/') . '/A',
      'lex_block' => '/\\s*(?:' . preg_quote($this->options['whitespace_trim'] . $this->options['tag_block'][1], '/') . '\\s*|\\s*' . preg_quote($this->options['tag_block'][1], '/') . ')\\n?/A',
      'lex_raw_data' => '/(' . preg_quote($this->options['tag_block'][0] . $this->options['whitespace_trim'], '/') . '|' . preg_quote($this->options['tag_block'][0], '/') . ')\\s*(?:end%s)\\s*(?:' . preg_quote($this->options['whitespace_trim'] . $this->options['tag_block'][1], '/') . '\\s*|\\s*' . preg_quote($this->options['tag_block'][1], '/') . ')/s',
      'operator' => $this
        ->getOperatorRegex(),
      'lex_comment' => '/(?:' . preg_quote($this->options['whitespace_trim'], '/') . preg_quote($this->options['tag_comment'][1], '/') . '\\s*|' . preg_quote($this->options['tag_comment'][1], '/') . ')\\n?/s',
      'lex_block_raw' => '/\\s*(raw|verbatim)\\s*(?:' . preg_quote($this->options['whitespace_trim'] . $this->options['tag_block'][1], '/') . '\\s*|\\s*' . preg_quote($this->options['tag_block'][1], '/') . ')/As',
      'lex_block_line' => '/\\s*line\\s+(\\d+)\\s*' . preg_quote($this->options['tag_block'][1], '/') . '/As',
      'lex_tokens_start' => '/(' . preg_quote($this->options['tag_variable'][0], '/') . '|' . preg_quote($this->options['tag_block'][0], '/') . '|' . preg_quote($this->options['tag_comment'][0], '/') . ')(' . preg_quote($this->options['whitespace_trim'], '/') . ')?/s',
      'interpolation_start' => '/' . preg_quote($this->options['interpolation'][0], '/') . '\\s*/A',
      'interpolation_end' => '/\\s*' . preg_quote($this->options['interpolation'][1], '/') . '/A',
    );
  }

  /**
   * {@inheritdoc}
   */
  public function tokenize($code, $filename = null) {
    if (function_exists('mb_internal_encoding') && (int) ini_get('mbstring.func_overload') & 2) {
      $mbEncoding = mb_internal_encoding();
      mb_internal_encoding('ASCII');
    }
    else {
      $mbEncoding = null;
    }
    $this->code = str_replace(array(
      "\r\n",
      "\r",
    ), "\n", $code);
    $this->filename = $filename;
    $this->cursor = 0;
    $this->lineno = 1;
    $this->end = strlen($this->code);
    $this->tokens = array();
    $this->state = self::STATE_DATA;
    $this->states = array();
    $this->brackets = array();
    $this->position = -1;

    // find all token starts in one go
    preg_match_all($this->regexes['lex_tokens_start'], $this->code, $matches, PREG_OFFSET_CAPTURE);
    $this->positions = $matches;
    while ($this->cursor < $this->end) {

      // dispatch to the lexing functions depending
      // on the current state
      switch ($this->state) {
        case self::STATE_DATA:
          $this
            ->lexData();
          break;
        case self::STATE_BLOCK:
          $this
            ->lexBlock();
          break;
        case self::STATE_VAR:
          $this
            ->lexVar();
          break;
        case self::STATE_STRING:
          $this
            ->lexString();
          break;
        case self::STATE_INTERPOLATION:
          $this
            ->lexInterpolation();
          break;
      }
    }
    $this
      ->pushToken(Twig_Token::EOF_TYPE);
    if (!empty($this->brackets)) {
      list($expect, $lineno) = array_pop($this->brackets);
      throw new Twig_Error_Syntax(sprintf('Unclosed "%s"', $expect), $lineno, $this->filename);
    }
    if ($mbEncoding) {
      mb_internal_encoding($mbEncoding);
    }
    return new Twig_TokenStream($this->tokens, $this->filename);
  }
  protected function lexData() {

    // if no matches are left we return the rest of the template as simple text token
    if ($this->position == count($this->positions[0]) - 1) {
      $this
        ->pushToken(Twig_Token::TEXT_TYPE, substr($this->code, $this->cursor));
      $this->cursor = $this->end;
      return;
    }

    // Find the first token after the current cursor
    $position = $this->positions[0][++$this->position];
    while ($position[1] < $this->cursor) {
      if ($this->position == count($this->positions[0]) - 1) {
        return;
      }
      $position = $this->positions[0][++$this->position];
    }

    // push the template text first
    $text = $textContent = substr($this->code, $this->cursor, $position[1] - $this->cursor);
    if (isset($this->positions[2][$this->position][0])) {
      $text = rtrim($text);
    }
    $this
      ->pushToken(Twig_Token::TEXT_TYPE, $text);
    $this
      ->moveCursor($textContent . $position[0]);
    switch ($this->positions[1][$this->position][0]) {
      case $this->options['tag_comment'][0]:
        $this
          ->lexComment();
        break;
      case $this->options['tag_block'][0]:

        // raw data?
        if (preg_match($this->regexes['lex_block_raw'], $this->code, $match, null, $this->cursor)) {
          $this
            ->moveCursor($match[0]);
          $this
            ->lexRawData($match[1]);

          // {% line \d+ %}
        }
        elseif (preg_match($this->regexes['lex_block_line'], $this->code, $match, null, $this->cursor)) {
          $this
            ->moveCursor($match[0]);
          $this->lineno = (int) $match[1];
        }
        else {
          $this
            ->pushToken(Twig_Token::BLOCK_START_TYPE);
          $this
            ->pushState(self::STATE_BLOCK);
          $this->currentVarBlockLine = $this->lineno;
        }
        break;
      case $this->options['tag_variable'][0]:
        $this
          ->pushToken(Twig_Token::VAR_START_TYPE);
        $this
          ->pushState(self::STATE_VAR);
        $this->currentVarBlockLine = $this->lineno;
        break;
    }
  }
  protected function lexBlock() {
    if (empty($this->brackets) && preg_match($this->regexes['lex_block'], $this->code, $match, null, $this->cursor)) {
      $this
        ->pushToken(Twig_Token::BLOCK_END_TYPE);
      $this
        ->moveCursor($match[0]);
      $this
        ->popState();
    }
    else {
      $this
        ->lexExpression();
    }
  }
  protected function lexVar() {
    if (empty($this->brackets) && preg_match($this->regexes['lex_var'], $this->code, $match, null, $this->cursor)) {
      $this
        ->pushToken(Twig_Token::VAR_END_TYPE);
      $this
        ->moveCursor($match[0]);
      $this
        ->popState();
    }
    else {
      $this
        ->lexExpression();
    }
  }
  protected function lexExpression() {

    // whitespace
    if (preg_match('/\\s+/A', $this->code, $match, null, $this->cursor)) {
      $this
        ->moveCursor($match[0]);
      if ($this->cursor >= $this->end) {
        throw new Twig_Error_Syntax(sprintf('Unclosed "%s"', $this->state === self::STATE_BLOCK ? 'block' : 'variable'), $this->currentVarBlockLine, $this->filename);
      }
    }

    // operators
    if (preg_match($this->regexes['operator'], $this->code, $match, null, $this->cursor)) {
      $this
        ->pushToken(Twig_Token::OPERATOR_TYPE, preg_replace('/\\s+/', ' ', $match[0]));
      $this
        ->moveCursor($match[0]);
    }
    elseif (preg_match(self::REGEX_NAME, $this->code, $match, null, $this->cursor)) {
      $this
        ->pushToken(Twig_Token::NAME_TYPE, $match[0]);
      $this
        ->moveCursor($match[0]);
    }
    elseif (preg_match(self::REGEX_NUMBER, $this->code, $match, null, $this->cursor)) {
      $number = (double) $match[0];

      // floats
      if (ctype_digit($match[0]) && $number <= PHP_INT_MAX) {
        $number = (int) $match[0];

        // integers lower than the maximum
      }
      $this
        ->pushToken(Twig_Token::NUMBER_TYPE, $number);
      $this
        ->moveCursor($match[0]);
    }
    elseif (false !== strpos(self::PUNCTUATION, $this->code[$this->cursor])) {

      // opening bracket
      if (false !== strpos('([{', $this->code[$this->cursor])) {
        $this->brackets[] = array(
          $this->code[$this->cursor],
          $this->lineno,
        );
      }
      elseif (false !== strpos(')]}', $this->code[$this->cursor])) {
        if (empty($this->brackets)) {
          throw new Twig_Error_Syntax(sprintf('Unexpected "%s"', $this->code[$this->cursor]), $this->lineno, $this->filename);
        }
        list($expect, $lineno) = array_pop($this->brackets);
        if ($this->code[$this->cursor] != strtr($expect, '([{', ')]}')) {
          throw new Twig_Error_Syntax(sprintf('Unclosed "%s"', $expect), $lineno, $this->filename);
        }
      }
      $this
        ->pushToken(Twig_Token::PUNCTUATION_TYPE, $this->code[$this->cursor]);
      ++$this->cursor;
    }
    elseif (preg_match(self::REGEX_STRING, $this->code, $match, null, $this->cursor)) {
      $this
        ->pushToken(Twig_Token::STRING_TYPE, stripcslashes(substr($match[0], 1, -1)));
      $this
        ->moveCursor($match[0]);
    }
    elseif (preg_match(self::REGEX_DQ_STRING_DELIM, $this->code, $match, null, $this->cursor)) {
      $this->brackets[] = array(
        '"',
        $this->lineno,
      );
      $this
        ->pushState(self::STATE_STRING);
      $this
        ->moveCursor($match[0]);
    }
    else {
      throw new Twig_Error_Syntax(sprintf('Unexpected character "%s"', $this->code[$this->cursor]), $this->lineno, $this->filename);
    }
  }
  protected function lexRawData($tag) {
    if (!preg_match(str_replace('%s', $tag, $this->regexes['lex_raw_data']), $this->code, $match, PREG_OFFSET_CAPTURE, $this->cursor)) {
      throw new Twig_Error_Syntax(sprintf('Unexpected end of file: Unclosed "%s" block', $tag), $this->lineno, $this->filename);
    }
    $text = substr($this->code, $this->cursor, $match[0][1] - $this->cursor);
    $this
      ->moveCursor($text . $match[0][0]);
    if (false !== strpos($match[1][0], $this->options['whitespace_trim'])) {
      $text = rtrim($text);
    }
    $this
      ->pushToken(Twig_Token::TEXT_TYPE, $text);
  }
  protected function lexComment() {
    if (!preg_match($this->regexes['lex_comment'], $this->code, $match, PREG_OFFSET_CAPTURE, $this->cursor)) {
      throw new Twig_Error_Syntax('Unclosed comment', $this->lineno, $this->filename);
    }
    $this
      ->moveCursor(substr($this->code, $this->cursor, $match[0][1] - $this->cursor) . $match[0][0]);
  }
  protected function lexString() {
    if (preg_match($this->regexes['interpolation_start'], $this->code, $match, null, $this->cursor)) {
      $this->brackets[] = array(
        $this->options['interpolation'][0],
        $this->lineno,
      );
      $this
        ->pushToken(Twig_Token::INTERPOLATION_START_TYPE);
      $this
        ->moveCursor($match[0]);
      $this
        ->pushState(self::STATE_INTERPOLATION);
    }
    elseif (preg_match(self::REGEX_DQ_STRING_PART, $this->code, $match, null, $this->cursor) && strlen($match[0]) > 0) {
      $this
        ->pushToken(Twig_Token::STRING_TYPE, stripcslashes($match[0]));
      $this
        ->moveCursor($match[0]);
    }
    elseif (preg_match(self::REGEX_DQ_STRING_DELIM, $this->code, $match, null, $this->cursor)) {
      list($expect, $lineno) = array_pop($this->brackets);
      if ($this->code[$this->cursor] != '"') {
        throw new Twig_Error_Syntax(sprintf('Unclosed "%s"', $expect), $lineno, $this->filename);
      }
      $this
        ->popState();
      ++$this->cursor;
    }
  }
  protected function lexInterpolation() {
    $bracket = end($this->brackets);
    if ($this->options['interpolation'][0] === $bracket[0] && preg_match($this->regexes['interpolation_end'], $this->code, $match, null, $this->cursor)) {
      array_pop($this->brackets);
      $this
        ->pushToken(Twig_Token::INTERPOLATION_END_TYPE);
      $this
        ->moveCursor($match[0]);
      $this
        ->popState();
    }
    else {
      $this
        ->lexExpression();
    }
  }
  protected function pushToken($type, $value = '') {

    // do not push empty text tokens
    if (Twig_Token::TEXT_TYPE === $type && '' === $value) {
      return;
    }
    $this->tokens[] = new Twig_Token($type, $value, $this->lineno);
  }
  protected function moveCursor($text) {
    $this->cursor += strlen($text);
    $this->lineno += substr_count($text, "\n");
  }
  protected function getOperatorRegex() {
    $operators = array_merge(array(
      '=',
    ), array_keys($this->env
      ->getUnaryOperators()), array_keys($this->env
      ->getBinaryOperators()));
    $operators = array_combine($operators, array_map('strlen', $operators));
    arsort($operators);
    $regex = array();
    foreach ($operators as $operator => $length) {

      // an operator that ends with a character must be followed by
      // a whitespace or a parenthesis
      if (ctype_alpha($operator[$length - 1])) {
        $r = preg_quote($operator, '/') . '(?=[\\s()])';
      }
      else {
        $r = preg_quote($operator, '/');
      }

      // an operator with a space can be any amount of whitespaces
      $r = preg_replace('/\\s+/', '\\s+', $r);
      $regex[] = $r;
    }
    return '/' . implode('|', $regex) . '/A';
  }
  protected function pushState($state) {
    $this->states[] = $this->state;
    $this->state = $state;
  }
  protected function popState() {
    if (0 === count($this->states)) {
      throw new Exception('Cannot pop state without a previous state');
    }
    $this->state = array_pop($this->states);
  }

}

Classes

Namesort descending Description
Twig_Lexer Lexes a template string.