You are here

protected function Tokenizer::doctype in Zircon Profile 8

Same name and namespace in other branches
  1. 8.0 vendor/masterminds/html5/src/HTML5/Parser/Tokenizer.php \Masterminds\HTML5\Parser\Tokenizer::doctype()

Parse a DOCTYPE.

Parse a DOCTYPE declaration. This method has strong bearing on whether or not Quirksmode is enabled on the event handler.

@todo This method is a little long. Should probably refactor.

1 call to Tokenizer::doctype()
Tokenizer::markupDeclaration in vendor/masterminds/html5/src/HTML5/Parser/Tokenizer.php
Look for markup.

File

vendor/masterminds/html5/src/HTML5/Parser/Tokenizer.php, line 671

Class

Tokenizer
The HTML5 tokenizer.

Namespace

Masterminds\HTML5\Parser

Code

protected function doctype() {
  if (strcasecmp($this->scanner
    ->current(), 'D')) {
    return false;
  }

  // Check that string is DOCTYPE.
  $chars = $this->scanner
    ->charsWhile("DOCTYPEdoctype");
  if (strcasecmp($chars, 'DOCTYPE')) {
    $this
      ->parseError('Expected DOCTYPE, got %s', $chars);
    return $this
      ->bogusComment('<!' . $chars);
  }
  $this->scanner
    ->whitespace();
  $tok = $this->scanner
    ->current();

  // EOF: die.
  if ($tok === false) {
    $this->events
      ->doctype('html5', EventHandler::DOCTYPE_NONE, '', true);
    return $this
      ->eof();
  }
  $doctypeName = '';

  // NULL char: convert.
  if ($tok === "\0") {
    $this
      ->parseError("Unexpected null character in DOCTYPE.");
    $doctypeName .= UTF8::FFFD;
    $tok = $this->scanner
      ->next();
  }
  $stop = " \n\f>";
  $doctypeName = $this->scanner
    ->charsUntil($stop);

  // Lowercase ASCII, replace \0 with FFFD
  $doctypeName = strtolower(strtr($doctypeName, "\0", UTF8Utils::FFFD));
  $tok = $this->scanner
    ->current();

  // If false, emit a parse error, DOCTYPE, and return.
  if ($tok === false) {
    $this
      ->parseError('Unexpected EOF in DOCTYPE declaration.');
    $this->events
      ->doctype($doctypeName, EventHandler::DOCTYPE_NONE, null, true);
    return true;
  }

  // Short DOCTYPE, like <!DOCTYPE html>
  if ($tok == '>') {

    // DOCTYPE without a name.
    if (strlen($doctypeName) == 0) {
      $this
        ->parseError("Expected a DOCTYPE name. Got nothing.");
      $this->events
        ->doctype($doctypeName, 0, null, true);
      $this->scanner
        ->next();
      return true;
    }
    $this->events
      ->doctype($doctypeName);
    $this->scanner
      ->next();
    return true;
  }
  $this->scanner
    ->whitespace();
  $pub = strtoupper($this->scanner
    ->getAsciiAlpha());
  $white = strlen($this->scanner
    ->whitespace());
  $tok = $this->scanner
    ->current();

  // Get ID, and flag it as pub or system.
  if (($pub == 'PUBLIC' || $pub == 'SYSTEM') && $white > 0) {

    // Get the sys ID.
    $type = $pub == 'PUBLIC' ? EventHandler::DOCTYPE_PUBLIC : EventHandler::DOCTYPE_SYSTEM;
    $id = $this
      ->quotedString("\0>");
    if ($id === false) {
      $this->events
        ->doctype($doctypeName, $type, $pub, false);
      return false;
    }

    // Premature EOF.
    if ($this->scanner
      ->current() === false) {
      $this
        ->parseError("Unexpected EOF in DOCTYPE");
      $this->events
        ->doctype($doctypeName, $type, $id, true);
      return true;
    }

    // Well-formed complete DOCTYPE.
    $this->scanner
      ->whitespace();
    if ($this->scanner
      ->current() == '>') {
      $this->events
        ->doctype($doctypeName, $type, $id, false);
      $this->scanner
        ->next();
      return true;
    }

    // If we get here, we have <!DOCTYPE foo PUBLIC "bar" SOME_JUNK
    // Throw away the junk, parse error, quirks mode, return true.
    $this->scanner
      ->charsUntil(">");
    $this
      ->parseError("Malformed DOCTYPE.");
    $this->events
      ->doctype($doctypeName, $type, $id, true);
    $this->scanner
      ->next();
    return true;
  }

  // Else it's a bogus DOCTYPE.
  // Consume to > and trash.
  $this->scanner
    ->charsUntil('>');
  $this
    ->parseError("Expected PUBLIC or SYSTEM. Got %s.", $pub);
  $this->events
    ->doctype($doctypeName, 0, null, true);
  $this->scanner
    ->next();
  return true;
}