You are here

XBBCodeParser.php in Extensible BBCode 4.0.x

Same filename and directory in other branches
  1. 8.3 src/Parser/XBBCodeParser.php

File

src/Parser/XBBCodeParser.php
View source
<?php

namespace Drupal\xbbcode\Parser;

use Drupal\xbbcode\Parser\Tree\NodeElementInterface;
use Drupal\xbbcode\Parser\Tree\RootElement;
use Drupal\xbbcode\Parser\Tree\TagElement;
use Drupal\xbbcode\Parser\Tree\TagElementInterface;
use Drupal\xbbcode\Parser\Tree\TextElement;
use function strlen;

/**
 * The standard XBBCode parser.
 */
class XBBCodeParser implements ParserInterface {

  /**
   * The plugins for rendering.
   *
   * @var \Drupal\xbbcode\Parser\Processor\TagProcessorInterface[]
   */
  protected $processors;

  /**
   * XBBCodeParser constructor.
   *
   * @param mixed $processors
   *   The plugins for rendering.
   */
  public function __construct($processors = NULL) {
    $this->processors = $processors;
  }

  /**
   * {@inheritdoc}
   */
  public function parse(string $text) : NodeElementInterface {
    $tokens = static::tokenize($text, $this->processors);
    $tokens = static::validateTokens($tokens);
    $tree = static::buildTree($text, $tokens);
    if ($this->processors) {
      static::decorateTree($tree, $this->processors);
    }
    return $tree;
  }

  /**
   * Find the opening and closing tags in a text.
   *
   * @param string $text
   *   The source text.
   * @param array|\ArrayAccess|null $allowed
   *   An array keyed by tag name, with non-empty values for allowed tags.
   *   Omit this argument to allow all tag names.
   *
   * @return array[]
   *   The tokens.
   */
  public static function tokenize(string $text, $allowed = NULL) : array {

    // Find all opening and closing tags in the text.
    $matches = [];
    preg_match_all("%\n      \\[\n        (?'closing'/?)\n        (?'name'[\\w-]+)\n        (?'argument'\n          (?:(?=\\k'closing')            # only take an argument in opening tags.\n            (?:\n              =(?:\\\\.|[^\\\\\\[\\]])*  # unquoted option must escape brackets.\n              |\n              =(?'quote1'['\"]|&quot;|&\\#039;)\n               (?:\\\\.|(?!\\k'quote1')[^\\\\])*\n               \\k'quote1'\n              |\n              (?:\\s+[\\w-]+=\n                (?:\n                  (?'quote2'['\"]|&quot;|&\\#039;)\n                  (?:\\\\.|(?!\\k'quote2')[^\\\\])*\n                  \\k'quote2'\n                  |\n                  (?!\\g'quote2')        # unquoted values cannot begin with quotes.\n                  (?:\\\\.|[^\\[\\]\\s\\\\])*\n                )\n              )*\n            )\n          )?\n        )\n      ]\n      %x", $text, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
    $tokens = [];
    foreach ($matches as $i => $match) {
      $name = $match['name'][0];
      $canonical_name = mb_strtolower($name);
      if ($allowed && empty($allowed[$canonical_name])) {
        continue;
      }
      $start = $match[0][1];
      $tokens[] = [
        'name' => $name,
        'cname' => $canonical_name,
        'start' => $start,
        'end' => $start + strlen($match[0][0]),
        'argument' => $match['argument'][0],
        'closing' => !empty($match['closing'][0]),
      ];
    }
    return $tokens;
  }

  /**
   * Parse a string of attribute assignments.
   *
   * @param string $argument
   *   The string containing the attributes, including initial whitespace.
   *
   * @return string[]
   *   An associative array of all attributes.
   */
  public static function parseAttributes(string $argument) : array {
    $assignments = [];
    preg_match_all("/\n    (?<=\\s)                                # preceded by whitespace.\n    (?'key'[\\w-]+)=\n    (?:\n        (?'quote'['\"]|&quot;|&\\#039;)     # quotes may be encoded.\n        (?'value'\n          (?:\\\\.|(?!\\\\|\\k'quote')[^\\\\])*   # value can contain the delimiter.\n        )\n        \\k'quote'\n        |\n        (?'unquoted'\n          (?!\\g'quote')           # unquoted values cannot start with a quote.\n          (?:\\\\.|[^\\s\\\\])*\n        )\n    )\n    (?=\\s|\$)/x", $argument, $assignments, PREG_SET_ORDER);
    $attributes = [];
    foreach ($assignments as $assignment) {

      // Strip backslashes from the escape sequences in each case.
      $value = $assignment['value'] ?: $assignment['unquoted'];
      $attributes[$assignment['key']] = stripslashes($value);
    }
    return $attributes;
  }

  /**
   * Parse an option string.
   *
   * @param string $argument
   *   The argument string, including the initial =.
   *
   * @return string
   *   The parsed option value.
   */
  public static function parseOption(string $argument) : string {
    if (preg_match("/\n      ^=\n      (?'quote'['\"]|&quot;|&\\#039;)\n      (?'value'.*)\n      \\k'quote'\n      \$/x", $argument, $match)) {
      $value = $match['value'];
    }
    else {
      $value = substr($argument, 1);
    }
    return stripslashes($value);
  }

  /**
   * Validate the nesting, and remove tokens that are not nested.
   *
   * @param array[] $tokens
   *   The tokens.
   *
   * @return array[]
   *   A well-formed list of tokens.
   */
  public static function validateTokens(array $tokens) : array {

    // Initialize the counter for each tag name.
    $counter = [];
    foreach ($tokens as $token) {
      $counter[$token['cname']] = 0;
    }
    $stack = [];
    foreach ($tokens as $i => $token) {
      if ($token['closing']) {
        if ($counter[$token['cname']] > 0) {

          // Pop the stack until a matching token is reached.
          do {
            $last = array_pop($stack);
            $counter[$last['cname']]--;
          } while ($last['cname'] !== $token['cname']);
          $tokens[$last['id']] += [
            'length' => $token['start'] - $last['end'],
            'verified' => TRUE,
          ];
          $tokens[$i]['verified'] = TRUE;
        }
      }
      else {

        // Stack this token together with its position.
        $stack[] = $token + [
          'id' => $i,
        ];
        $counter[$token['cname']]++;
      }
    }

    // Filter the tokens.
    return array_filter($tokens, static function ($token) {
      return !empty($token['verified']);
    });
  }

  /**
   * Convert a well-formed list of tokens into a tree.
   *
   * @param string $text
   *   The source text.
   * @param array[] $tokens
   *   The tokens.
   *
   * @return \Drupal\xbbcode\Parser\Tree\NodeElementInterface
   *   The element representing the tree.
   */
  public static function buildTree(string $text, array $tokens) : NodeElementInterface {

    /** @var \Drupal\xbbcode\Parser\Tree\NodeElement[] $stack */
    $stack = [
      new RootElement(),
    ];

    // Tracks the current position in the text.
    $index = 0;
    foreach ($tokens as $token) {

      // Append any text before the token to the parent.
      $leading = substr($text, $index, $token['start'] - $index);
      if ($leading) {
        end($stack)
          ->append(new TextElement($leading));
      }

      // Advance to the end of the token.
      $index = $token['end'];
      if (!$token['closing']) {

        // Push the element on the stack.
        $stack[] = new TagElement($token['name'], $token['argument'], substr($text, $token['end'], $token['length']));
      }
      else {

        // Pop the closed element.

        /** @var \Drupal\xbbcode\Parser\Tree\TagElementInterface $element */
        $element = array_pop($stack);
        $element
          ->setClosingName($token['name']);
        end($stack)
          ->append($element);
      }
    }
    $final = substr($text, $index);
    if ($final) {
      end($stack)
        ->append(new TextElement($final));
    }
    return array_pop($stack);
  }

  /**
   * Assign processors to the tag elements of a tree.
   *
   * @param \Drupal\xbbcode\Parser\Tree\NodeElementInterface $node
   *   The tree to decorate.
   * @param \Drupal\xbbcode\Parser\Processor\TagProcessorInterface[]|\ArrayAccess $processors
   *   The processors, keyed by name.
   */
  public static function decorateTree(NodeElementInterface $node, $processors) : void {
    foreach ($node
      ->getChildren() as $child) {
      if ($child instanceof TagElementInterface) {
        $child
          ->setParent($node);
        if ($processor = $processors[$child
          ->getName()]) {
          $child
            ->setProcessor($processor);
        }
        static::decorateTree($child, $processors);
      }
    }
  }

}

Classes

Namesort descending Description
XBBCodeParser The standard XBBCode parser.