You are here

class OutputRules in Zircon Profile 8

Same name and namespace in other branches
  1. 8.0 vendor/masterminds/html5/src/HTML5/Serializer/OutputRules.php \Masterminds\HTML5\Serializer\OutputRules

Generate the output html5 based on element rules.

Hierarchy

Expanded class hierarchy of OutputRules

3 files declare their use of OutputRules
HTML5.php in vendor/masterminds/html5/src/HTML5.php
OutputRulesTest.php in vendor/masterminds/html5/test/HTML5/Serializer/OutputRulesTest.php
TraverserTest.php in vendor/masterminds/html5/test/HTML5/Serializer/TraverserTest.php

File

vendor/masterminds/html5/src/HTML5/Serializer/OutputRules.php, line 16
The rules for generating output in the serializer.

Namespace

Masterminds\HTML5\Serializer
View source
class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface {

  /**
   * Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0
   */
  const NAMESPACE_HTML = 'http://www.w3.org/1999/xhtml';
  const NAMESPACE_MATHML = 'http://www.w3.org/1998/Math/MathML';
  const NAMESPACE_SVG = 'http://www.w3.org/2000/svg';
  const NAMESPACE_XLINK = 'http://www.w3.org/1999/xlink';
  const NAMESPACE_XML = 'http://www.w3.org/XML/1998/namespace';
  const NAMESPACE_XMLNS = 'http://www.w3.org/2000/xmlns/';

  /**
   * Holds the HTML5 element names that causes a namespace switch
   *
   * @var array
   */
  protected $implicitNamespaces = array(
    self::NAMESPACE_HTML,
    self::NAMESPACE_SVG,
    self::NAMESPACE_MATHML,
    self::NAMESPACE_XML,
    self::NAMESPACE_XMLNS,
  );
  const IM_IN_HTML = 1;
  const IM_IN_SVG = 2;
  const IM_IN_MATHML = 3;

  /**
   * Used as cache to detect if is available ENT_HTML5
   * @var boolean
   */
  private $hasHTML5 = false;
  protected $traverser;
  protected $encode = false;
  protected $out;
  protected $outputMode;
  private $xpath;
  protected $nonBooleanAttributes = array(
    /*
    array(
        'nodeNamespace'=>'http://www.w3.org/1999/xhtml',
        'attrNamespace'=>'http://www.w3.org/1999/xhtml',

        'nodeName'=>'img', 'nodeName'=>array('img', 'a'),
        'attrName'=>'alt', 'attrName'=>array('title', 'alt'),
    ),
    */
    array(
      'nodeNamespace' => 'http://www.w3.org/1999/xhtml',
      'attrName' => array(
        'href',
        'hreflang',
        'http-equiv',
        'icon',
        'id',
        'keytype',
        'kind',
        'label',
        'lang',
        'language',
        'list',
        'maxlength',
        'media',
        'method',
        'name',
        'placeholder',
        'rel',
        'rows',
        'rowspan',
        'sandbox',
        'spellcheck',
        'scope',
        'seamless',
        'shape',
        'size',
        'sizes',
        'span',
        'src',
        'srcdoc',
        'srclang',
        'srcset',
        'start',
        'step',
        'style',
        'summary',
        'tabindex',
        'target',
        'title',
        'type',
        'value',
        'width',
        'border',
        'charset',
        'cite',
        'class',
        'code',
        'codebase',
        'color',
        'cols',
        'colspan',
        'content',
        'coords',
        'data',
        'datetime',
        'default',
        'dir',
        'dirname',
        'enctype',
        'for',
        'form',
        'formaction',
        'headers',
        'height',
        'accept',
        'accept-charset',
        'accesskey',
        'action',
        'align',
        'alt',
        'bgcolor',
      ),
    ),
    array(
      'nodeNamespace' => 'http://www.w3.org/1999/xhtml',
      'xpath' => 'starts-with(local-name(), \'data-\')',
    ),
  );
  const DOCTYPE = '<!DOCTYPE html>';
  public function __construct($output, $options = array()) {
    if (isset($options['encode_entities'])) {
      $this->encode = $options['encode_entities'];
    }
    $this->outputMode = static::IM_IN_HTML;
    $this->out = $output;

    // If HHVM, see https://github.com/facebook/hhvm/issues/2727
    $this->hasHTML5 = defined('ENT_HTML5') && !defined('HHVM_VERSION');
  }
  public function addRule(array $rule) {
    $this->nonBooleanAttributes[] = $rule;
  }
  public function setTraverser(\Masterminds\HTML5\Serializer\Traverser $traverser) {
    $this->traverser = $traverser;
    return $this;
  }
  public function document($dom) {
    $this
      ->doctype();
    if ($dom->documentElement) {
      $this->traverser
        ->node($dom->documentElement);
      $this
        ->nl();
    }
  }
  protected function doctype() {
    $this
      ->wr(static::DOCTYPE);
    $this
      ->nl();
  }
  public function element($ele) {
    $name = $ele->tagName;

    // Per spec:
    // If the element has a declared namespace in the HTML, MathML or
    // SVG namespaces, we use the lname instead of the tagName.
    if ($this->traverser
      ->isLocalElement($ele)) {
      $name = $ele->localName;
    }

    // If we are in SVG or MathML there is special handling.
    // Using if/elseif instead of switch because it's faster in PHP.
    if ($name == 'svg') {
      $this->outputMode = static::IM_IN_SVG;
      $name = Elements::normalizeSvgElement($name);
    }
    elseif ($name == 'math') {
      $this->outputMode = static::IM_IN_MATHML;
    }
    $this
      ->openTag($ele);
    if (Elements::isA($name, Elements::TEXT_RAW)) {
      foreach ($ele->childNodes as $child) {
        $this
          ->wr($child->data);
      }
    }
    else {

      // Handle children.
      if ($ele
        ->hasChildNodes()) {
        $this->traverser
          ->children($ele->childNodes);
      }

      // Close out the SVG or MathML special handling.
      if ($name == 'svg' || $name == 'math') {
        $this->outputMode = static::IM_IN_HTML;
      }
    }

    // If not unary, add a closing tag.
    if (!Elements::isA($name, Elements::VOID_TAG)) {
      $this
        ->closeTag($ele);
    }
  }

  /**
   * Write a text node.
   *
   * @param \DOMText $ele
   *            The text node to write.
   */
  public function text($ele) {
    if (isset($ele->parentNode) && isset($ele->parentNode->tagName) && Elements::isA($ele->parentNode->localName, Elements::TEXT_RAW)) {
      $this
        ->wr($ele->data);
      return;
    }

    // FIXME: This probably needs some flags set.
    $this
      ->wr($this
      ->enc($ele->data));
  }
  public function cdata($ele) {

    // This encodes CDATA.
    $this
      ->wr($ele->ownerDocument
      ->saveXML($ele));
  }
  public function comment($ele) {

    // These produce identical output.
    // $this->wr('<!--')->wr($ele->data)->wr('-->');
    $this
      ->wr($ele->ownerDocument
      ->saveXML($ele));
  }
  public function processorInstruction($ele) {
    $this
      ->wr('<?')
      ->wr($ele->target)
      ->wr(' ')
      ->wr($ele->data)
      ->wr('?>');
  }

  /**
   * Write the namespace attributes
   *
   *
   * @param \DOMNode $ele
   *            The element being written.
   */
  protected function namespaceAttrs($ele) {
    if (!$this->xpath || $this->xpath->document !== $ele->ownerDocument) {
      $this->xpath = new \DOMXPath($ele->ownerDocument);
    }
    foreach ($this->xpath
      ->query('namespace::*[not(.=../../namespace::*)]', $ele) as $nsNode) {
      if (!in_array($nsNode->nodeValue, $this->implicitNamespaces)) {
        $this
          ->wr(' ')
          ->wr($nsNode->nodeName)
          ->wr('="')
          ->wr($nsNode->nodeValue)
          ->wr('"');
      }
    }
  }

  /**
   * Write the opening tag.
   *
   * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the
   * qualified name (8.3).
   *
   * @param \DOMNode $ele
   *            The element being written.
   */
  protected function openTag($ele) {
    $this
      ->wr('<')
      ->wr($this->traverser
      ->isLocalElement($ele) ? $ele->localName : $ele->tagName);
    $this
      ->attrs($ele);
    $this
      ->namespaceAttrs($ele);
    if ($this->outputMode == static::IM_IN_HTML) {
      $this
        ->wr('>');
    }
    else {
      if ($ele
        ->hasChildNodes()) {
        $this
          ->wr('>');
      }
      else {
        $this
          ->wr(' />');
      }
    }
  }
  protected function attrs($ele) {

    // FIXME: Needs support for xml, xmlns, xlink, and namespaced elements.
    if (!$ele
      ->hasAttributes()) {
      return $this;
    }

    // TODO: Currently, this always writes name="value", and does not do
    // value-less attributes.
    $map = $ele->attributes;
    $len = $map->length;
    for ($i = 0; $i < $len; ++$i) {
      $node = $map
        ->item($i);
      $val = $this
        ->enc($node->value, true);

      // XXX: The spec says that we need to ensure that anything in
      // the XML, XMLNS, or XLink NS's should use the canonical
      // prefix. It seems that DOM does this for us already, but there
      // may be exceptions.
      $name = $node->name;

      // Special handling for attributes in SVG and MathML.
      // Using if/elseif instead of switch because it's faster in PHP.
      if ($this->outputMode == static::IM_IN_SVG) {
        $name = Elements::normalizeSvgAttribute($name);
      }
      elseif ($this->outputMode == static::IM_IN_MATHML) {
        $name = Elements::normalizeMathMlAttribute($name);
      }
      $this
        ->wr(' ')
        ->wr($name);
      if (isset($val) && $val !== '' || $this
        ->nonBooleanAttribute($node)) {
        $this
          ->wr('="')
          ->wr($val)
          ->wr('"');
      }
    }
  }
  protected function nonBooleanAttribute(\DOMAttr $attr) {
    $ele = $attr->ownerElement;
    foreach ($this->nonBooleanAttributes as $rule) {
      if (isset($rule['nodeNamespace']) && $rule['nodeNamespace'] !== $ele->namespaceURI) {
        continue;
      }
      if (isset($rule['attNamespace']) && $rule['attNamespace'] !== $attr->namespaceURI) {
        continue;
      }
      if (isset($rule['nodeName']) && !is_array($rule['nodeName']) && $rule['nodeName'] !== $ele->localName) {
        continue;
      }
      if (isset($rule['nodeName']) && is_array($rule['nodeName']) && !in_array($ele->localName, $rule['nodeName'], true)) {
        continue;
      }
      if (isset($rule['attrName']) && !is_array($rule['attrName']) && $rule['attrName'] !== $attr->localName) {
        continue;
      }
      if (isset($rule['attrName']) && is_array($rule['attrName']) && !in_array($attr->localName, $rule['attrName'], true)) {
        continue;
      }
      if (isset($rule['xpath'])) {
        $xp = $this
          ->getXPath($attr);
        if (isset($rule['prefixes'])) {
          foreach ($rule['prefixes'] as $nsPrefix => $ns) {
            $xp
              ->registerNamespace($nsPrefix, $ns);
          }
        }
        if (!$xp
          ->evaluate($rule['xpath'], $attr)) {
          continue;
        }
      }
      return true;
    }
    return false;
  }
  private function getXPath(\DOMNode $node) {
    if (!$this->xpath) {
      $this->xpath = new \DOMXPath($node->ownerDocument);
    }
    return $this->xpath;
  }

  /**
   * Write the closing tag.
   *
   * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the
   * qualified name (8.3).
   *
   * @param \DOMNode $ele
   *            The element being written.
   */
  protected function closeTag($ele) {
    if ($this->outputMode == static::IM_IN_HTML || $ele
      ->hasChildNodes()) {
      $this
        ->wr('</')
        ->wr($this->traverser
        ->isLocalElement($ele) ? $ele->localName : $ele->tagName)
        ->wr('>');
    }
  }

  /**
   * Write to the output.
   *
   * @param string $text
   *            The string to put into the output.
   *
   * @return \Masterminds\HTML5\Serializer\Traverser $this so it can be used in chaining.
   */
  protected function wr($text) {
    fwrite($this->out, $text);
    return $this;
  }

  /**
   * Write a new line character.
   *
   * @return \Masterminds\HTML5\Serializer\Traverser $this so it can be used in chaining.
   */
  protected function nl() {
    fwrite($this->out, PHP_EOL);
    return $this;
  }

  /**
   * Encode text.
   *
   * When encode is set to false, the default value, the text passed in is
   * escaped per section 8.3 of the html5 spec. For details on how text is
   * escaped see the escape() method.
   *
   * When encoding is set to true the text is converted to named character
   * references where appropriate. Section 8.1.4 Character references of the
   * html5 spec refers to using named character references. This is useful for
   * characters that can't otherwise legally be used in the text.
   *
   * The named character references are listed in section 8.5.
   *
   * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#named-character-references True encoding will turn all named character references into their entities.
   *      This includes such characters as +.# and many other common ones. By default
   *      encoding here will just escape &'<>".
   *
   *      Note, PHP 5.4+ has better html5 encoding.
   *
   * @todo Use the Entities class in php 5.3 to have html5 entities.
   *
   * @param string $text
   *            text to encode.
   * @param boolean $attribute
   *            True if we are encoding an attrubute, false otherwise
   *
   * @return string The encoded text.
   */
  protected function enc($text, $attribute = false) {

    // Escape the text rather than convert to named character references.
    if (!$this->encode) {
      return $this
        ->escape($text, $attribute);
    }

    // If we are in PHP 5.4+ we can use the native html5 entity functionality to
    // convert the named character references.
    if ($this->hasHTML5) {
      return htmlentities($text, ENT_HTML5 | ENT_SUBSTITUTE | ENT_QUOTES, 'UTF-8', false);
    }
    else {
      return strtr($text, \Masterminds\HTML5\Serializer\HTML5Entities::$map);
    }
  }

  /**
   * Escape test.
   *
   * According to the html5 spec section 8.3 Serializing HTML fragments, text
   * within tags that are not style, script, xmp, iframe, noembed, and noframes
   * need to be properly escaped.
   *
   * The & should be converted to &amp;, no breaking space unicode characters
   * converted to &nbsp;, when in attribute mode the " should be converted to
   * &quot;, and when not in attribute mode the < and > should be converted to
   * &lt; and &gt;.
   *
   * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#escapingString
   *
   * @param string $text
   *            text to escape.
   * @param boolean $attribute
   *            True if we are escaping an attrubute, false otherwise
   */
  protected function escape($text, $attribute = false) {

    // Not using htmlspecialchars because, while it does escaping, it doesn't
    // match the requirements of section 8.5. For example, it doesn't handle
    // non-breaking spaces.
    if ($attribute) {
      $replace = array(
        '"' => '&quot;',
        '&' => '&amp;',
        " " => '&nbsp;',
      );
    }
    else {
      $replace = array(
        '<' => '&lt;',
        '>' => '&gt;',
        '&' => '&amp;',
        " " => '&nbsp;',
      );
    }
    return strtr($text, $replace);
  }

}

Members

Namesort descending Modifiers Type Description Overrides
OutputRules::$encode protected property
OutputRules::$hasHTML5 private property Used as cache to detect if is available ENT_HTML5
OutputRules::$implicitNamespaces protected property Holds the HTML5 element names that causes a namespace switch
OutputRules::$nonBooleanAttributes protected property
OutputRules::$out protected property
OutputRules::$outputMode protected property
OutputRules::$traverser protected property
OutputRules::$xpath private property
OutputRules::addRule public function
OutputRules::attrs protected function
OutputRules::cdata public function Write a CDATA node. Overrides RulesInterface::cdata
OutputRules::closeTag protected function Write the closing tag.
OutputRules::comment public function Write a comment node. Overrides RulesInterface::comment
OutputRules::doctype protected function
OutputRules::DOCTYPE constant
OutputRules::document public function Write a document element (\DOMDocument). Overrides RulesInterface::document
OutputRules::element public function Write an element. Overrides RulesInterface::element
OutputRules::enc protected function Encode text.
OutputRules::escape protected function Escape test.
OutputRules::getXPath private function
OutputRules::IM_IN_HTML constant
OutputRules::IM_IN_MATHML constant
OutputRules::IM_IN_SVG constant
OutputRules::namespaceAttrs protected function Write the namespace attributes
OutputRules::NAMESPACE_HTML constant Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0
OutputRules::NAMESPACE_MATHML constant
OutputRules::NAMESPACE_SVG constant
OutputRules::NAMESPACE_XLINK constant
OutputRules::NAMESPACE_XML constant
OutputRules::NAMESPACE_XMLNS constant
OutputRules::nl protected function Write a new line character.
OutputRules::nonBooleanAttribute protected function
OutputRules::openTag protected function Write the opening tag.
OutputRules::processorInstruction public function Write a processor instruction. Overrides RulesInterface::processorInstruction
OutputRules::setTraverser public function Register the traverser used in but the rules. Overrides RulesInterface::setTraverser
OutputRules::text public function Write a text node. Overrides RulesInterface::text
OutputRules::wr protected function Write to the output.
OutputRules::__construct public function The class constructor. Overrides RulesInterface::__construct