You are here

CssParser.php in QueryPath 6

Same filename and directory in other branches
  1. 7.3 QueryPath/CssParser.php
  2. 7.2 QueryPath/CssParser.php

File

QueryPath/CssParser.php
View source
<?php

require_once 'CssEventHandler.php';
interface CssEventHandler {
  const isExactly = 0;
  const containsWithSpace = 1;
  const containsWithHyphen = 2;
  const containsInString = 3;
  const beginsWith = 4;
  const endsWith = 5;
  const anyElement = '*';
  public function elementID($id);
  public function element($name);
  public function elementNS($name, $namespace = NULL);
  public function anyElement();
  public function anyElementInNS($ns);
  public function elementClass($name);
  public function attribute($name, $value = NULL, $operation = CssEventHandler::isExactly);
  public function attributeNS($name, $ns, $value = NULL, $operation = CssEventHandler::isExactly);
  public function pseudoClass($name, $value = NULL);
  public function pseudoElement($name);
  public function directDescendant();
  public function adjacent();
  public function anotherSelector();
  public function sibling();
  public function anyDescendant();

}
final class CssToken {
  const char = 0;
  const star = 1;
  const rangle = 2;
  const dot = 3;
  const octo = 4;
  const rsquare = 5;
  const lsquare = 6;
  const colon = 7;
  const rparen = 8;
  const lparen = 9;
  const plus = 10;
  const tilde = 11;
  const eq = 12;
  const pipe = 13;
  const comma = 14;
  const white = 15;
  const quote = 16;
  const squote = 17;
  const bslash = 18;
  const carat = 19;
  const dollar = 20;
  const at = 21;
  const stringLegal = 99;
  static function name($const_int) {
    $a = array(
      'character',
      'star',
      'right angle bracket',
      'dot',
      'octothorp',
      'right square bracket',
      'left square bracket',
      'colon',
      'right parenthesis',
      'left parenthesis',
      'plus',
      'tilde',
      'equals',
      'vertical bar',
      'comma',
      'space',
      'quote',
      'single quote',
      'backslash',
      'carat',
      'dollar',
      'at',
    );
    if (isset($a[$const_int]) && is_numeric($const_int)) {
      return $a[$const_int];
    }
    elseif ($const_int == 99) {
      return 'a legal non-alphanumeric character';
    }
    elseif ($const_int == FALSE) {
      return 'end of file';
    }
    return sprintf('illegal character (%s)', $const_int);
  }

}
class CssParser {
  protected $scanner = NULL;
  protected $buffer = '';
  protected $handler = NULL;
  protected $strict = FALSE;
  protected $DEBUG = FALSE;
  public function __construct($string, CssEventHandler $handler) {
    $this->originalString = $string;
    $is = new CssInputStream($string);
    $this->scanner = new CssScanner($is);
    $this->handler = $handler;
  }
  public function parse() {
    $this->scanner
      ->nextToken();
    while ($this->scanner->token !== FALSE) {
      $position = $this->scanner
        ->position();
      if ($this->DEBUG) {
        print "PARSE " . $this->scanner->token . "\n";
      }
      $this
        ->selector();
      $finalPosition = $this->scanner
        ->position();
      if ($this->scanner->token !== FALSE && $finalPosition == $position) {
        throw new CssParseException('CSS selector is not well formed.');
      }
    }
  }
  private function selector() {
    if ($this->DEBUG) {
      print "SELECTOR{$this->scanner->position()}\n";
    }
    $this
      ->consumeWhitespace();
    $this
      ->simpleSelectors();
    $this
      ->combinator();
  }
  private function consumeWhitespace() {
    if ($this->DEBUG) {
      print "CONSUME WHITESPACE\n";
    }
    $white = 0;
    while ($this->scanner->token == CssToken::white) {
      $this->scanner
        ->nextToken();
      ++$white;
    }
    return $white;
  }
  private function combinator() {
    if ($this->DEBUG) {
      print "COMBINATOR\n";
    }
    $inCombinator = FALSE;
    $white = $this
      ->consumeWhitespace();
    $t = $this->scanner->token;
    if ($t == CssToken::rangle) {
      $this->handler
        ->directDescendant();
      $this->scanner
        ->nextToken();
      $inCombinator = TRUE;
    }
    elseif ($t == CssToken::plus) {
      $this->handler
        ->adjacent();
      $this->scanner
        ->nextToken();
      $inCombinator = TRUE;
    }
    elseif ($t == CssToken::comma) {
      $this->handler
        ->anotherSelector();
      $this->scanner
        ->nextToken();
      $inCombinator = TRUE;
    }
    elseif ($t == CssToken::tilde) {
      $this->handler
        ->sibling();
      $this->scanner
        ->nextToken();
      $inCombinator = TRUE;
    }
    if ($inCombinator) {
      $white = 0;
      if ($this->DEBUG) {
        print "COMBINATOR: " . CssToken::name($t) . "\n";
      }
      $this
        ->consumeWhitespace();
      if ($this
        ->isCombinator($this->scanner->token)) {
        throw new CssParseException("Illegal combinator: Cannot have two combinators in sequence.");
      }
    }
    elseif ($white > 0) {
      if ($this->DEBUG) {
        print "COMBINATOR: any descendant\n";
      }
      $inCombinator = TRUE;
      $this->handler
        ->anyDescendant();
    }
    else {
      if ($this->DEBUG) {
        print "COMBINATOR: no combinator found.\n";
      }
    }
  }
  private function isCombinator($tok) {
    $combinators = array(
      CssToken::plus,
      CssToken::rangle,
      CssToken::comma,
      CssToken::tilde,
    );
    return in_array($tok, $combinators);
  }
  private function simpleSelectors() {
    if ($this->DEBUG) {
      print "SIMPLE SELECTOR\n";
    }
    $this
      ->allElements();
    $this
      ->elementName();
    $this
      ->elementClass();
    $this
      ->elementID();
    $this
      ->pseudoClass();
    $this
      ->attribute();
  }
  private function elementID() {
    if ($this->DEBUG) {
      print "ELEMENT ID\n";
    }
    if ($this->scanner->token == CssToken::octo) {
      $this->scanner
        ->nextToken();
      if ($this->scanner->token !== CssToken::char) {
        throw new CssParseException("Expected string after #");
      }
      $id = $this->scanner
        ->getNameString();
      $this->handler
        ->elementID($id);
    }
  }
  private function elementClass() {
    if ($this->DEBUG) {
      print "ELEMENT CLASS\n";
    }
    if ($this->scanner->token == CssToken::dot) {
      $this->scanner
        ->nextToken();
      $this
        ->consumeWhitespace();
      $cssClass = $this->scanner
        ->getNameString();
      $this->handler
        ->elementClass($cssClass);
    }
  }
  private function pseudoClass($restricted = FALSE) {
    if ($this->DEBUG) {
      print "PSEUDO-CLASS\n";
    }
    if ($this->scanner->token == CssToken::colon) {
      $isPseudoElement = FALSE;
      if ($this->scanner
        ->nextToken() === CssToken::colon) {
        $isPseudoElement = TRUE;
        $this->scanner
          ->nextToken();
      }
      $name = $this->scanner
        ->getNameString();
      if ($restricted && $name == 'not') {
        throw new CssParseException("The 'not' pseudo-class is illegal in this context.");
      }
      $value = NULL;
      if ($this->scanner->token == CssToken::lparen) {
        if ($isPseudoElement) {
          throw new CssParseException("Illegal left paren. Pseudo-Element cannot have arguments.");
        }
        $value = $this
          ->pseudoClassValue();
      }
      if ($isPseudoElement) {
        if ($restricted) {
          throw new CssParseException("Pseudo-Elements are illegal in this context.");
        }
        $this->handler
          ->pseudoElement($name);
        $this
          ->consumeWhitespace();
        if ($this->scanner->token !== FALSE && $this->scanner->token !== CssToken::comma) {
          throw new CssParseException("A Pseudo-Element must be the last item in a selector.");
        }
      }
      else {
        $this->handler
          ->pseudoClass($name, $value);
      }
    }
  }
  private function pseudoClassValue() {
    if ($this->scanner->token == CssToken::lparen) {
      $buf = '';
      $buf .= $this->scanner
        ->getQuotedString();
      return $buf;
    }
  }
  private function elementName() {
    if ($this->DEBUG) {
      print "ELEMENT NAME\n";
    }
    if ($this->scanner->token === CssToken::pipe) {
      $this->scanner
        ->nextToken();
      $this
        ->consumeWhitespace();
      $elementName = $this->scanner
        ->getNameString();
      $this->handler
        ->element($elementName);
    }
    elseif ($this->scanner->token === CssToken::char) {
      $elementName = $this->scanner
        ->getNameString();
      if ($this->scanner->token == CssToken::pipe) {
        $elementNS = $elementName;
        $this->scanner
          ->nextToken();
        $this
          ->consumeWhitespace();
        if ($this->scanner->token === CssToken::star) {
          $this->handler
            ->anyElementInNS($elementNS);
          $this->scanner
            ->nextToken();
        }
        elseif ($this->scanner->token !== CssToken::char) {
          $this
            ->throwError(CssToken::char, $this->scanner->token);
        }
        else {
          $elementName = $this->scanner
            ->getNameString();
          $this->handler
            ->elementNS($elementName, $elementNS);
        }
      }
      else {
        $this->handler
          ->element($elementName);
      }
    }
  }
  private function allElements() {
    if ($this->scanner->token === CssToken::star) {
      $this->scanner
        ->nextToken();
      if ($this->scanner->token === CssToken::pipe) {
        $this->scanner
          ->nextToken();
        if ($this->scanner->token === CssToken::star) {
          $this->scanner
            ->nextToken();
          $this->handler
            ->anyElementInNS('*');
        }
        else {
          $name = $this->scanner
            ->getNameString();
          $this->handler
            ->elementNS($name, '*');
        }
      }
      else {
        $this->handler
          ->anyElement();
      }
    }
  }
  private function attribute() {
    if ($this->scanner->token == CssToken::lsquare) {
      $attrVal = $op = $ns = NULL;
      $this->scanner
        ->nextToken();
      $this
        ->consumeWhitespace();
      if ($this->scanner->token === CssToken::at) {
        if ($this->strict) {
          throw new CssParseException('The @ is illegal in attributes.');
        }
        else {
          $this->scanner
            ->nextToken();
          $this
            ->consumeWhitespace();
        }
      }
      if ($this->scanner->token === CssToken::star) {
        $ns = '*';
        $this->scanner
          ->nextToken();
      }
      if ($this->scanner->token === CssToken::pipe) {
        $this->scanner
          ->nextToken();
        $this
          ->consumeWhitespace();
      }
      $attrName = $this->scanner
        ->getNameString();
      $this
        ->consumeWhitespace();
      if ($this->scanner->token === CssToken::pipe && $this->scanner
        ->peek() !== '=') {
        $ns = $attrName;
        $this->scanner
          ->nextToken();
        $attrName = $this->scanner
          ->getNameString();
        $this
          ->consumeWhitespace();
      }
      switch ($this->scanner->token) {
        case CssToken::eq:
          $this
            ->consumeWhitespace();
          $op = CssEventHandler::isExactly;
          break;
        case CssToken::tilde:
          if ($this->scanner
            ->nextToken() !== CssToken::eq) {
            $this
              ->throwError(CssToken::eq, $this->scanner->token);
          }
          $op = CssEventHandler::containsWithSpace;
          break;
        case CssToken::pipe:
          if ($this->scanner
            ->nextToken() !== CssToken::eq) {
            $this
              ->throwError(CssToken::eq, $this->scanner->token);
          }
          $op = CssEventHandler::containsWithHyphen;
          break;
        case CssToken::star:
          if ($this->scanner
            ->nextToken() !== CssToken::eq) {
            $this
              ->throwError(CssToken::eq, $this->scanner->token);
          }
          $op = CssEventHandler::containsInString;
          break;
        case CssToken::dollar:
          if ($this->scanner
            ->nextToken() !== CssToken::eq) {
            $this
              ->throwError(CssToken::eq, $this->scanner->token);
          }
          $op = CssEventHandler::endsWith;
          break;
        case CssToken::carat:
          if ($this->scanner
            ->nextToken() !== CssToken::eq) {
            $this
              ->throwError(CssToken::eq, $this->scanner->token);
          }
          $op = CssEventHandler::beginsWith;
          break;
      }
      if (isset($op)) {
        $this->scanner
          ->nextToken();
        $this
          ->consumeWhitespace();
        if ($this->scanner->token === CssToken::quote || $this->scanner->token === CssToken::squote) {
          $attrVal = $this->scanner
            ->getQuotedString();
        }
        else {
          $attrVal = $this->scanner
            ->getNameString();
        }
        if ($this->DEBUG) {
          print "ATTR: {$attrVal} AND OP: {$op}\n";
        }
      }
      $this
        ->consumeWhitespace();
      if ($this->scanner->token != CssToken::rsquare) {
        $this
          ->throwError(CssToken::rsquare, $this->scanner->token);
      }
      if (isset($ns)) {
        $this->handler
          ->attributeNS($attrName, $ns, $attrVal, $op);
      }
      elseif (isset($attrVal)) {
        $this->handler
          ->attribute($attrName, $attrVal, $op);
      }
      else {
        $this->handler
          ->attribute($attrName);
      }
      $this->scanner
        ->nextToken();
    }
  }
  private function throwError($expected, $got) {
    $filter = sprintf('Expected %s, got %s', CssToken::name($expected), CssToken::name($got));
    throw new CssParseException($filter);
  }

}
final class CssScanner {
  var $is = NULL;
  public $value = NULL;
  public $token = NULL;
  var $recurse = FALSE;
  var $it = 0;
  public function __construct(CssInputStream $in) {
    $this->is = $in;
  }
  public function position() {
    return $this->is->position;
  }
  public function peek() {
    return $this->is
      ->peek();
  }
  public function nextToken() {
    $tok = -1;
    ++$this->it;
    if ($this->is
      ->isEmpty()) {
      if ($this->recurse) {
        throw new Exception("Recursion error detected at iteration " . $this->it . '.');
        exit;
      }
      $this->recurse = TRUE;
      $this->token = FALSE;
      return FALSE;
    }
    $ch = $this->is
      ->consume();
    if (ctype_space($ch)) {
      $this->value = ' ';
      $this->token = $tok = CssToken::white;
      return $tok;
    }
    if (ctype_alnum($ch) || $ch == '-' || $ch == '_') {
      $this->value = $ch;
      $this->token = $tok = CssToken::char;
      return $tok;
    }
    $this->value = $ch;
    switch ($ch) {
      case '*':
        $tok = CssToken::star;
        break;
      case chr(ord('>')):
        $tok = CssToken::rangle;
        break;
      case '.':
        $tok = CssToken::dot;
        break;
      case '#':
        $tok = CssToken::octo;
        break;
      case '[':
        $tok = CssToken::lsquare;
        break;
      case ']':
        $tok = CssToken::rsquare;
        break;
      case ':':
        $tok = CssToken::colon;
        break;
      case '(':
        $tok = CssToken::lparen;
        break;
      case ')':
        $tok = CssToken::rparen;
        break;
      case '+':
        $tok = CssToken::plus;
        break;
      case '~':
        $tok = CssToken::tilde;
        break;
      case '=':
        $tok = CssToken::eq;
        break;
      case '|':
        $tok = CssToken::pipe;
        break;
      case ',':
        $tok = CssToken::comma;
        break;
      case chr(34):
        $tok = CssToken::quote;
        break;
      case "'":
        $tok = CssToken::squote;
        break;
      case '\\':
        $tok = CssToken::bslash;
        break;
      case '^':
        $tok = CssToken::carat;
        break;
      case '$':
        $tok = CssToken::dollar;
        break;
      case '@':
        $tok = CssToken::at;
        break;
    }
    if ($tok == -1) {
      $ord = ord($ch);
      if ($ord >= 32 && $ord <= 126 || $ord >= 128 && $ord <= 255) {
        $tok = CssToken::stringLegal;
      }
      else {
        throw new Exception('Illegal character found in stream: ' . $ord);
      }
    }
    $this->token = $tok;
    return $tok;
  }
  public function getNameString() {
    $buf = '';
    while ($this->token === CssToken::char) {
      $buf .= $this->value;
      $this
        ->nextToken();
    }
    return $buf;
  }
  public function getQuotedString() {
    if ($this->token == CssToken::quote || $this->token == CssToken::squote || $this->token == CssToken::lparen) {
      $end = $this->token == CssToken::lparen ? CssToken::rparen : $this->token;
      $buf = '';
      $escape = FALSE;
      $this
        ->nextToken();
      while ($this->token !== FALSE && $this->token > -1) {
        if ($this->token == CssToken::bslash && !$escape) {
          $escape = TRUE;
        }
        elseif ($escape) {
          $buf .= $this->value;
          $escape = FALSE;
        }
        elseif ($this->token === $end) {
          $this
            ->nextToken();
          break;
        }
        else {
          $buf .= $this->value;
        }
        $this
          ->nextToken();
      }
      return $buf;
    }
  }

}
class CssInputStream {
  protected $stream = NULL;
  public $position = 0;
  function __construct($string) {
    $this->stream = str_split($string);
  }
  function peek() {
    return $this->stream[0];
  }
  function consume() {
    $ret = array_shift($this->stream);
    if (!empty($ret)) {
      $this->position++;
    }
    return $ret;
  }
  function isEmpty() {
    return count($this->stream) == 0;
  }

}
class CSSParseException extends EXCEPTION {

}