You are here

class EasyRdf_Parser_Ntriples in Zircon Profile 8

Same name and namespace in other branches
  1. 8.0 vendor/easyrdf/easyrdf/lib/EasyRdf/Parser/Ntriples.php \EasyRdf_Parser_Ntriples

A pure-php class to parse N-Triples with no dependancies.

@package EasyRdf @copyright Copyright (c) 2009-2013 Nicholas J Humfrey @license http://www.opensource.org/licenses/bsd-license.php

Hierarchy

Expanded class hierarchy of EasyRdf_Parser_Ntriples

1 string reference to 'EasyRdf_Parser_Ntriples'
Format.php in vendor/easyrdf/easyrdf/lib/EasyRdf/Format.php

File

vendor/easyrdf/easyrdf/lib/EasyRdf/Parser/Ntriples.php, line 45

View source
class EasyRdf_Parser_Ntriples extends EasyRdf_Parser {

  /**
   * Decodes an encoded N-Triples string. Any \-escape sequences are substituted
   * with their decoded value.
   *
   * @param  string $str An encoded N-Triples string.
   * @return The unencoded string.
   **/
  protected function unescapeString($str) {
    if (strpos($str, '\\') === false) {
      return $str;
    }
    $mappings = array(
      't' => chr(0x9),
      'b' => chr(0x8),
      'n' => chr(0xa),
      'r' => chr(0xd),
      'f' => chr(0xc),
      '\\"' => chr(0x22),
      '\'' => chr(0x27),
    );
    foreach ($mappings as $in => $out) {
      $str = preg_replace('/\\x5c([' . $in . '])/', $out, $str);
    }
    if (stripos($str, '\\u') === false) {
      return $str;
    }
    while (preg_match('/\\\\(U)([0-9A-F]{8})/', $str, $matches) || preg_match('/\\\\(u)([0-9A-F]{4})/', $str, $matches)) {
      $no = hexdec($matches[2]);
      if ($no < 128) {

        // 0x80
        $char = chr($no);
      }
      elseif ($no < 2048) {

        // 0x800
        $char = chr(($no >> 6) + 192) . chr(($no & 63) + 128);
      }
      elseif ($no < 65536) {

        // 0x10000
        $char = chr(($no >> 12) + 224) . chr(($no >> 6 & 63) + 128) . chr(($no & 63) + 128);
      }
      elseif ($no < 2097152) {

        // 0x200000
        $char = chr(($no >> 18) + 240) . chr(($no >> 12 & 63) + 128) . chr(($no >> 6 & 63) + 128) . chr(($no & 63) + 128);
      }
      else {

        # FIXME: throw an exception instead?
        $char = '';
      }
      $str = str_replace('\\' . $matches[1] . $matches[2], $char, $str);
    }
    return $str;
  }

  /**
   * @ignore
   */
  protected function parseNtriplesSubject($sub, $lineNum) {
    if (preg_match('/<([^<>]+)>/', $sub, $matches)) {
      return $this
        ->unescapeString($matches[1]);
    }
    elseif (preg_match('/_:([A-Za-z0-9]*)/', $sub, $matches)) {
      if (empty($matches[1])) {
        return $this->graph
          ->newBNodeId();
      }
      else {
        $nodeid = $this
          ->unescapeString($matches[1]);
        return $this
          ->remapBnode($nodeid);
      }
    }
    else {
      throw new EasyRdf_Parser_Exception("Failed to parse subject: {$sub}", $lineNum);
    }
  }

  /**
   * @ignore
   */
  protected function parseNtriplesObject($obj, $lineNum) {
    if (preg_match('/"(.+)"\\^\\^<([^<>]+)>/', $obj, $matches)) {
      return array(
        'type' => 'literal',
        'value' => $this
          ->unescapeString($matches[1]),
        'datatype' => $this
          ->unescapeString($matches[2]),
      );
    }
    elseif (preg_match('/"(.+)"@([\\w\\-]+)/', $obj, $matches)) {
      return array(
        'type' => 'literal',
        'value' => $this
          ->unescapeString($matches[1]),
        'lang' => $this
          ->unescapeString($matches[2]),
      );
    }
    elseif (preg_match('/"(.*)"/', $obj, $matches)) {
      return array(
        'type' => 'literal',
        'value' => $this
          ->unescapeString($matches[1]),
      );
    }
    elseif (preg_match('/<([^<>]+)>/', $obj, $matches)) {
      return array(
        'type' => 'uri',
        'value' => $matches[1],
      );
    }
    elseif (preg_match('/_:([A-Za-z0-9]*)/', $obj, $matches)) {
      if (empty($matches[1])) {
        return array(
          'type' => 'bnode',
          'value' => $this->graph
            ->newBNodeId(),
        );
      }
      else {
        $nodeid = $this
          ->unescapeString($matches[1]);
        return array(
          'type' => 'bnode',
          'value' => $this
            ->remapBnode($nodeid),
        );
      }
    }
    else {
      throw new EasyRdf_Parser_Exception("Failed to parse object: {$obj}", $lineNum);
    }
  }

  /**
   * Parse an N-Triples document into an EasyRdf_Graph
   *
   * @param object EasyRdf_Graph $graph   the graph to load the data into
   * @param string               $data    the RDF document data
   * @param string               $format  the format of the input data
   * @param string               $baseUri the base URI of the data being parsed
   * @return integer             The number of triples added to the graph
   */
  public function parse($graph, $data, $format, $baseUri) {
    parent::checkParseParams($graph, $data, $format, $baseUri);
    if ($format != 'ntriples') {
      throw new EasyRdf_Exception("EasyRdf_Parser_Ntriples does not support: {$format}");
    }
    $lines = preg_split('/\\x0D?\\x0A/', strval($data));
    foreach ($lines as $index => $line) {
      $lineNum = $index + 1;
      if (preg_match('/^\\s*#/', $line)) {

        # Comment
        continue;
      }
      elseif (preg_match('/^\\s*(.+?)\\s+<([^<>]+?)>\\s+(.+?)\\s*\\.\\s*$/', $line, $matches)) {
        $this
          ->addTriple($this
          ->parseNtriplesSubject($matches[1], $lineNum), $this
          ->unescapeString($matches[2]), $this
          ->parseNtriplesObject($matches[3], $lineNum));
      }
      elseif (preg_match('/^\\s*$/', $line)) {

        # Blank line
        continue;
      }
      else {
        throw new EasyRdf_Parser_Exception("Failed to parse statement", $lineNum);
      }
    }
    return $this->tripleCount;
  }

}

Members

Namesort descending Modifiers Type Description Overrides
EasyRdf_Parser::$baseUri protected property The base URI for the document currently being parsed
EasyRdf_Parser::$bnodeMap private property Mapping from source to graph bnode identifiers
EasyRdf_Parser::$format protected property The format of the document currently being parsed
EasyRdf_Parser::$graph protected property The current graph to insert triples into
EasyRdf_Parser::$tripleCount protected property
EasyRdf_Parser::addTriple protected function Add a triple to the current graph, and keep count of the number of triples @ignore 1
EasyRdf_Parser::checkParseParams protected function Check, cleanup parameters and prepare for parsing @ignore
EasyRdf_Parser::remapBnode protected function Create a new, unique bnode identifier from a source identifier. If the source identifier has previously been seen, the same new bnode identifier is returned. @ignore
EasyRdf_Parser::resetBnodeMap protected function Delete the bnode mapping - to be called at the start of a new parse @ignore
EasyRdf_Parser_Ntriples::parse public function Parse an N-Triples document into an EasyRdf_Graph Overrides EasyRdf_Parser::parse 1
EasyRdf_Parser_Ntriples::parseNtriplesObject protected function @ignore
EasyRdf_Parser_Ntriples::parseNtriplesSubject protected function @ignore
EasyRdf_Parser_Ntriples::unescapeString protected function Decodes an encoded N-Triples string. Any \-escape sequences are substituted with their decoded value.