You are here

class XBBCodeParser in Extensible BBCode 8.3

Same name and namespace in other branches
  1. 4.0.x src/Parser/XBBCodeParser.php \Drupal\xbbcode\Parser\XBBCodeParser

The standard XBBCode parser.

Hierarchy

Expanded class hierarchy of XBBCodeParser

5 files declare their use of XBBCodeParser
TagElement.php in src/Parser/Tree/TagElement.php
TagForm.php in src/Form/TagForm.php
TagFormBase.php in src/Form/TagFormBase.php
TagPluginCollection.php in src/TagPluginCollection.php
XBBCodeFilter.php in src/Plugin/Filter/XBBCodeFilter.php

File

src/Parser/XBBCodeParser.php, line 15

Namespace

Drupal\xbbcode\Parser
View source
class XBBCodeParser implements ParserInterface {

  /**
   * The plugins for rendering.
   *
   * @var \Drupal\xbbcode\Parser\Processor\TagProcessorInterface[]
   */
  protected $processors;

  /**
   * XBBCodeParser constructor.
   *
   * @param mixed $processors
   *   The plugins for rendering.
   */
  public function __construct($processors = NULL) {
    $this->processors = $processors;
  }

  /**
   * {@inheritdoc}
   */
  public function parse($text) : NodeElementInterface {
    $tokens = static::tokenize($text, $this->processors);
    $tokens = static::validateTokens($tokens);
    $tree = static::buildTree($text, $tokens);
    if ($this->processors) {
      static::decorateTree($tree, $this->processors);
    }
    return $tree;
  }

  /**
   * Find the opening and closing tags in a text.
   *
   * @param string $text
   *   The source text.
   * @param array|\ArrayAccess|null $allowed
   *   An array keyed by tag name, with non-empty values for allowed tags.
   *   Omit this argument to allow all tag names.
   *
   * @return array[]
   *   The tokens.
   */
  public static function tokenize($text, $allowed = NULL) : array {

    // Find all opening and closing tags in the text.
    $matches = [];
    preg_match_all("%\n      \\[\n        (?'closing'/?)\n        (?'name'[a-z0-9_-]+)\n        (?'argument'\n          (?:(?=\\k'closing')            # only take an argument in opening tags.\n            (?:\n              =(?:\\\\.|[^\\\\\\[\\]])*  # unquoted option must escape brackets.\n              |\n              =(?'quote1'['\"]|"|&\\#039;)\n               (?:\\\\.|(?!\\k'quote1')[^\\\\])*\n               \\k'quote1'\n              |\n              (?:\\s+[\\w-]+=\n                (?:\n                  (?'quote2'['\"]|"|&\\#039;)\n                  (?:\\\\.|(?!\\k'quote2')[^\\\\])*\n                  \\k'quote2'\n                  |\n                  (?!\\g'quote2')        # unquoted values cannot begin with quotes.\n                  (?:\\\\.|[^\\[\\]\\s\\\\])*\n                )\n              )*\n            )\n          )?\n        )\n      ]\n      %x", $text, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
    $tokens = [];
    foreach ($matches as $i => $match) {
      $name = $match['name'][0];
      if ($allowed && empty($allowed[$name])) {
        continue;
      }
      $start = $match[0][1];
      $tokens[] = [
        'name' => $name,
        'start' => $start,
        'end' => $start + strlen($match[0][0]),
        'argument' => $match['argument'][0],
        'closing' => !empty($match['closing'][0]),
      ];
    }
    return $tokens;
  }

  /**
   * Parse a string of attribute assignments.
   *
   * @param string $argument
   *   The string containing the attributes, including initial whitespace.
   *
   * @return string[]
   *   An associative array of all attributes.
   */
  public static function parseAttributes($argument) : array {
    $assignments = [];
    preg_match_all("/\n    (?<=\\s)                                # preceded by whitespace.\n    (?'key'[\\w-]+)=\n    (?:\n        (?'quote'['\"]|&quot;|&\\#039;)     # quotes may be encoded.\n        (?'value'\n          (?:\\\\.|(?!\\\\|\\k'quote')[^\\\\])*   # value can contain the delimiter.\n        )\n        \\k'quote'\n        |\n        (?'unquoted'\n          (?!\\g'quote')           # unquoted values cannot start with a quote.\n          (?:\\\\.|[^\\s\\\\])*\n        )\n    )\n    (?=\\s|\$)/x", $argument, $assignments, PREG_SET_ORDER);
    $attributes = [];
    foreach ($assignments as $assignment) {

      // Strip backslashes from the escape sequences in each case.
      $value = $assignment['value'] ?: $assignment['unquoted'];
      $attributes[$assignment['key']] = stripslashes($value);
    }
    return $attributes;
  }

  /**
   * Parse an option string.
   *
   * @param string $argument
   *   The argument string, including the initial =.
   *
   * @return string
   *   The parsed option value.
   */
  public static function parseOption($argument) : string {
    if (preg_match("/\n      ^=\n      (?'quote'['\"]|&quot;|&\\#039;)\n      (?'value'.*)\n      \\k'quote'\n      \$/x", $argument, $match)) {
      $value = $match['value'];
    }
    else {
      $value = substr($argument, 1);
    }
    return stripslashes($value);
  }

  /**
   * Validate the nesting, and remove tokens that are not nested.
   *
   * @param array[] $tokens
   *   The tokens.
   *
   * @return array[]
   *   A well-formed list of tokens.
   */
  public static function validateTokens(array $tokens) : array {

    // Initialize the counter for each tag name.
    $counter = [];
    foreach ($tokens as $token) {
      $counter[$token['name']] = 0;
    }
    $stack = [];
    foreach ($tokens as $i => $token) {
      if ($token['closing']) {
        if ($counter[$token['name']] > 0) {

          // Pop the stack until a matching token is reached.
          do {
            $last = array_pop($stack);
            $counter[$last['name']]--;
          } while ($last['name'] !== $token['name']);
          $tokens[$last['id']] += [
            'length' => $token['start'] - $last['end'],
            'verified' => TRUE,
          ];
          $tokens[$i]['verified'] = TRUE;
        }
      }
      else {

        // Stack this token together with its position.
        $stack[] = $token + [
          'id' => $i,
        ];
        $counter[$token['name']]++;
      }
    }

    // Filter the tokens.
    return array_filter($tokens, static function ($token) {
      return !empty($token['verified']);
    });
  }

  /**
   * Convert a well-formed list of tokens into a tree.
   *
   * @param string $text
   *   The source text.
   * @param array[] $tokens
   *   The tokens.
   *
   * @return \Drupal\xbbcode\Parser\Tree\NodeElementInterface
   *   The element representing the tree.
   */
  public static function buildTree($text, array $tokens) : NodeElementInterface {

    /** @var \Drupal\xbbcode\Parser\Tree\NodeElement[] $stack */
    $stack = [
      new RootElement(),
    ];

    // Tracks the current position in the text.
    $index = 0;
    foreach ($tokens as $token) {

      // Append any text before the token to the parent.
      $leading = substr($text, $index, $token['start'] - $index);
      if ($leading) {
        end($stack)
          ->append(new TextElement($leading));
      }

      // Advance to the end of the token.
      $index = $token['end'];
      if (!$token['closing']) {

        // Push the element on the stack.
        $stack[] = new TagElement($token['name'], $token['argument'], substr($text, $token['end'], $token['length']));
      }
      else {

        // Pop the closed element.
        $element = array_pop($stack);
        end($stack)
          ->append($element);
      }
    }
    $final = substr($text, $index);
    if ($final) {
      end($stack)
        ->append(new TextElement($final));
    }
    return array_pop($stack);
  }

  /**
   * Assign processors to the tag elements of a tree.
   *
   * @param \Drupal\xbbcode\Parser\Tree\NodeElementInterface $node
   *   The tree to decorate.
   * @param \Drupal\xbbcode\Parser\Processor\TagProcessorInterface[]|\ArrayAccess $processors
   *   The processors, keyed by name.
   */
  public static function decorateTree(NodeElementInterface $node, $processors) : void {
    foreach ($node
      ->getChildren() as $child) {
      if ($child instanceof TagElementInterface) {
        $child
          ->setParent($node);
        if ($processor = $processors[$child
          ->getName()]) {
          $child
            ->setProcessor($processor);
        }
        static::decorateTree($child, $processors);
      }
    }
  }

}

Members

Namesort descending Modifiers Type Description Overrides
XBBCodeParser::$processors protected property The plugins for rendering.
XBBCodeParser::buildTree public static function Convert a well-formed list of tokens into a tree.
XBBCodeParser::decorateTree public static function Assign processors to the tag elements of a tree.
XBBCodeParser::parse public function Parse a text and build an element tree. Overrides ParserInterface::parse
XBBCodeParser::parseAttributes public static function Parse a string of attribute assignments.
XBBCodeParser::parseOption public static function Parse an option string.
XBBCodeParser::tokenize public static function Find the opening and closing tags in a text.
XBBCodeParser::validateTokens public static function Validate the nesting, and remove tokens that are not nested.
XBBCodeParser::__construct public function XBBCodeParser constructor.