You are here

class EmailLexer in Zircon Profile 8

Same name and namespace in other branches
  1. 8.0 vendor/egulias/email-validator/src/Egulias/EmailValidator/EmailLexer.php \Egulias\EmailValidator\EmailLexer

Hierarchy

Expanded class hierarchy of EmailLexer

4 files declare their use of EmailLexer
DomainPart.php in vendor/egulias/email-validator/src/Egulias/EmailValidator/Parser/DomainPart.php
EmailLexerTest.php in vendor/egulias/email-validator/tests/egulias/Tests/EmailValidator/EmailLexerTest.php
LocalPart.php in vendor/egulias/email-validator/src/Egulias/EmailValidator/Parser/LocalPart.php
Parser.php in vendor/egulias/email-validator/src/Egulias/EmailValidator/Parser/Parser.php

File

vendor/egulias/email-validator/src/Egulias/EmailValidator/EmailLexer.php, line 7

Namespace

Egulias\EmailValidator
View source
class EmailLexer extends AbstractLexer {

  //ASCII values
  const C_DEL = 127;
  const C_NUL = 0;
  const S_AT = 64;
  const S_BACKSLASH = 92;
  const S_DOT = 46;
  const S_DQUOTE = 34;
  const S_OPENPARENTHESIS = 49;
  const S_CLOSEPARENTHESIS = 261;
  const S_OPENBRACKET = 262;
  const S_CLOSEBRACKET = 263;
  const S_HYPHEN = 264;
  const S_COLON = 265;
  const S_DOUBLECOLON = 266;
  const S_SP = 267;
  const S_HTAB = 268;
  const S_CR = 269;
  const S_LF = 270;
  const S_IPV6TAG = 271;
  const S_LOWERTHAN = 272;
  const S_GREATERTHAN = 273;
  const S_COMMA = 274;
  const S_SEMICOLON = 275;
  const S_OPENQBRACKET = 276;
  const S_CLOSEQBRACKET = 277;
  const S_SLASH = 278;
  const S_EMPTY = null;
  const GENERIC = 300;
  const CRLF = 301;
  const INVALID = 302;
  const ASCII_INVALID_FROM = 127;
  const ASCII_INVALID_TO = 199;

  /**
   * US-ASCII visible characters not valid for atext (@link http://tools.ietf.org/html/rfc5322#section-3.2.3)
   *
   * @var array
   */
  protected $charValue = array(
    '(' => self::S_OPENPARENTHESIS,
    ')' => self::S_CLOSEPARENTHESIS,
    '<' => self::S_LOWERTHAN,
    '>' => self::S_GREATERTHAN,
    '[' => self::S_OPENBRACKET,
    ']' => self::S_CLOSEBRACKET,
    ':' => self::S_COLON,
    ';' => self::S_SEMICOLON,
    '@' => self::S_AT,
    '\\' => self::S_BACKSLASH,
    '/' => self::S_SLASH,
    ',' => self::S_COMMA,
    '.' => self::S_DOT,
    '"' => self::S_DQUOTE,
    '-' => self::S_HYPHEN,
    '::' => self::S_DOUBLECOLON,
    ' ' => self::S_SP,
    "\t" => self::S_HTAB,
    "\r" => self::S_CR,
    "\n" => self::S_LF,
    "\r\n" => self::CRLF,
    'IPv6' => self::S_IPV6TAG,
    '<' => self::S_LOWERTHAN,
    '>' => self::S_GREATERTHAN,
    '{' => self::S_OPENQBRACKET,
    '}' => self::S_CLOSEQBRACKET,
    '' => self::S_EMPTY,
    '\\0' => self::C_NUL,
  );
  protected $hasInvalidTokens = false;
  protected $previous;
  public function reset() {
    $this->hasInvalidTokens = false;
    parent::reset();
  }
  public function hasInvalidTokens() {
    return $this->hasInvalidTokens;
  }

  /**
   * @param $type
   * @throws \UnexpectedValueException
   * @return boolean
   */
  public function find($type) {
    $search = clone $this;
    $search
      ->skipUntil($type);
    if (!$search->lookahead) {
      throw new \UnexpectedValueException($type . ' not found');
    }
    return true;
  }

  /**
   * getPrevious
   *
   * @return array token
   */
  public function getPrevious() {
    return $this->previous;
  }

  /**
   * moveNext
   *
   * @return boolean
   */
  public function moveNext() {
    $this->previous = $this->token;
    return parent::moveNext();
  }

  /**
   * Lexical catchable patterns.
   *
   * @return string[]
   */
  protected function getCatchablePatterns() {
    return array(
      '[a-zA-Z_]+[46]?',
      //ASCII and domain literal
      '[^\\x00-\\x7F]',
      //UTF-8
      '[0-9]+',
      '\\r\\n',
      '::',
      '\\s+?',
      '.',
    );
  }

  /**
   * Lexical non-catchable patterns.
   *
   * @return string[]
   */
  protected function getNonCatchablePatterns() {
    return array(
      '[\\xA0-\\xff]+',
    );
  }

  /**
   * Retrieve token type. Also processes the token value if necessary.
   *
   * @param string $value
   * @throws \InvalidArgumentException
   * @return integer
   */
  protected function getType(&$value) {
    if ($this
      ->isNullType($value)) {
      return self::C_NUL;
    }
    if ($this
      ->isValid($value)) {
      return $this->charValue[$value];
    }
    if ($this
      ->isUTF8Invalid($value)) {
      $this->hasInvalidTokens = true;
      return self::INVALID;
    }
    return self::GENERIC;
  }
  protected function isValid($value) {
    if (isset($this->charValue[$value])) {
      return true;
    }
    return false;
  }

  /**
   * @param $value
   * @return bool
   */
  protected function isNullType($value) {
    if ($value === "\0") {
      return true;
    }
    return false;
  }

  /**
   * @param $value
   * @return bool
   */
  protected function isUTF8Invalid($value) {
    if (preg_match('/\\p{Cc}+/u', $value)) {
      return true;
    }
    return false;
  }
  protected function getModifiers() {
    return 'iu';
  }

}

Members

Namesort descending Modifiers Type Description Overrides
AbstractLexer::$input private property Lexer original input string.
AbstractLexer::$lookahead public property The next token in the input.
AbstractLexer::$peek private property Current peek of current lexer position.
AbstractLexer::$position private property Current lexer position in input string.
AbstractLexer::$token public property The last matched/seen token.
AbstractLexer::$tokens private property Array of scanned tokens.
AbstractLexer::getInputUntilPosition public function Retrieve the original lexer's input until a given position.
AbstractLexer::getLiteral public function Gets the literal for a given token.
AbstractLexer::glimpse public function Peeks at the next token, returns it and immediately resets the peek.
AbstractLexer::isA public function Checks if given value is identical to the given token.
AbstractLexer::isNextToken public function Checks whether a given token matches the current lookahead.
AbstractLexer::isNextTokenAny public function Checks whether any of the given tokens matches the current lookahead.
AbstractLexer::peek public function Moves the lookahead token forward.
AbstractLexer::resetPeek public function Resets the peek pointer to 0.
AbstractLexer::resetPosition public function Resets the lexer position on the input to the given position.
AbstractLexer::scan protected function Scans the input string for tokens.
AbstractLexer::setInput public function Sets the input data to be tokenized.
AbstractLexer::skipUntil public function Tells the lexer to skip input tokens until it sees a token with the given value.
EmailLexer::$charValue protected property US-ASCII visible characters not valid for atext (@link http://tools.ietf.org/html/rfc5322#section-3.2.3)
EmailLexer::$hasInvalidTokens protected property
EmailLexer::$previous protected property
EmailLexer::ASCII_INVALID_FROM constant
EmailLexer::ASCII_INVALID_TO constant
EmailLexer::CRLF constant
EmailLexer::C_DEL constant
EmailLexer::C_NUL constant
EmailLexer::find public function
EmailLexer::GENERIC constant
EmailLexer::getCatchablePatterns protected function Lexical catchable patterns. Overrides AbstractLexer::getCatchablePatterns
EmailLexer::getModifiers protected function Regex modifiers Overrides AbstractLexer::getModifiers
EmailLexer::getNonCatchablePatterns protected function Lexical non-catchable patterns. Overrides AbstractLexer::getNonCatchablePatterns
EmailLexer::getPrevious public function getPrevious
EmailLexer::getType protected function Retrieve token type. Also processes the token value if necessary. Overrides AbstractLexer::getType
EmailLexer::hasInvalidTokens public function
EmailLexer::INVALID constant
EmailLexer::isNullType protected function
EmailLexer::isUTF8Invalid protected function
EmailLexer::isValid protected function
EmailLexer::moveNext public function moveNext Overrides AbstractLexer::moveNext
EmailLexer::reset public function Resets the lexer. Overrides AbstractLexer::reset
EmailLexer::S_AT constant
EmailLexer::S_BACKSLASH constant
EmailLexer::S_CLOSEBRACKET constant
EmailLexer::S_CLOSEPARENTHESIS constant
EmailLexer::S_CLOSEQBRACKET constant
EmailLexer::S_COLON constant
EmailLexer::S_COMMA constant
EmailLexer::S_CR constant
EmailLexer::S_DOT constant
EmailLexer::S_DOUBLECOLON constant
EmailLexer::S_DQUOTE constant
EmailLexer::S_EMPTY constant
EmailLexer::S_GREATERTHAN constant
EmailLexer::S_HTAB constant
EmailLexer::S_HYPHEN constant
EmailLexer::S_IPV6TAG constant
EmailLexer::S_LF constant
EmailLexer::S_LOWERTHAN constant
EmailLexer::S_OPENBRACKET constant
EmailLexer::S_OPENPARENTHESIS constant
EmailLexer::S_OPENQBRACKET constant
EmailLexer::S_SEMICOLON constant
EmailLexer::S_SLASH constant
EmailLexer::S_SP constant