class EasyRdf_Parser_Ntriples in Zircon Profile 8
Same name and namespace in other branches
- 8.0 vendor/easyrdf/easyrdf/lib/EasyRdf/Parser/Ntriples.php \EasyRdf_Parser_Ntriples
A pure-php class to parse N-Triples with no dependancies.
@package EasyRdf @copyright Copyright (c) 2009-2013 Nicholas J Humfrey @license http://www.opensource.org/licenses/bsd-license.php
Hierarchy
- class \EasyRdf_Parser
- class \EasyRdf_Parser_Ntriples
Expanded class hierarchy of EasyRdf_Parser_Ntriples
1 string reference to 'EasyRdf_Parser_Ntriples'
- Format.php in vendor/
easyrdf/ easyrdf/ lib/ EasyRdf/ Format.php
File
- vendor/
easyrdf/ easyrdf/ lib/ EasyRdf/ Parser/ Ntriples.php, line 45
View source
class EasyRdf_Parser_Ntriples extends EasyRdf_Parser {
/**
* Decodes an encoded N-Triples string. Any \-escape sequences are substituted
* with their decoded value.
*
* @param string $str An encoded N-Triples string.
* @return The unencoded string.
**/
protected function unescapeString($str) {
if (strpos($str, '\\') === false) {
return $str;
}
$mappings = array(
't' => chr(0x9),
'b' => chr(0x8),
'n' => chr(0xa),
'r' => chr(0xd),
'f' => chr(0xc),
'\\"' => chr(0x22),
'\'' => chr(0x27),
);
foreach ($mappings as $in => $out) {
$str = preg_replace('/\\x5c([' . $in . '])/', $out, $str);
}
if (stripos($str, '\\u') === false) {
return $str;
}
while (preg_match('/\\\\(U)([0-9A-F]{8})/', $str, $matches) || preg_match('/\\\\(u)([0-9A-F]{4})/', $str, $matches)) {
$no = hexdec($matches[2]);
if ($no < 128) {
// 0x80
$char = chr($no);
}
elseif ($no < 2048) {
// 0x800
$char = chr(($no >> 6) + 192) . chr(($no & 63) + 128);
}
elseif ($no < 65536) {
// 0x10000
$char = chr(($no >> 12) + 224) . chr(($no >> 6 & 63) + 128) . chr(($no & 63) + 128);
}
elseif ($no < 2097152) {
// 0x200000
$char = chr(($no >> 18) + 240) . chr(($no >> 12 & 63) + 128) . chr(($no >> 6 & 63) + 128) . chr(($no & 63) + 128);
}
else {
# FIXME: throw an exception instead?
$char = '';
}
$str = str_replace('\\' . $matches[1] . $matches[2], $char, $str);
}
return $str;
}
/**
* @ignore
*/
protected function parseNtriplesSubject($sub, $lineNum) {
if (preg_match('/<([^<>]+)>/', $sub, $matches)) {
return $this
->unescapeString($matches[1]);
}
elseif (preg_match('/_:([A-Za-z0-9]*)/', $sub, $matches)) {
if (empty($matches[1])) {
return $this->graph
->newBNodeId();
}
else {
$nodeid = $this
->unescapeString($matches[1]);
return $this
->remapBnode($nodeid);
}
}
else {
throw new EasyRdf_Parser_Exception("Failed to parse subject: {$sub}", $lineNum);
}
}
/**
* @ignore
*/
protected function parseNtriplesObject($obj, $lineNum) {
if (preg_match('/"(.+)"\\^\\^<([^<>]+)>/', $obj, $matches)) {
return array(
'type' => 'literal',
'value' => $this
->unescapeString($matches[1]),
'datatype' => $this
->unescapeString($matches[2]),
);
}
elseif (preg_match('/"(.+)"@([\\w\\-]+)/', $obj, $matches)) {
return array(
'type' => 'literal',
'value' => $this
->unescapeString($matches[1]),
'lang' => $this
->unescapeString($matches[2]),
);
}
elseif (preg_match('/"(.*)"/', $obj, $matches)) {
return array(
'type' => 'literal',
'value' => $this
->unescapeString($matches[1]),
);
}
elseif (preg_match('/<([^<>]+)>/', $obj, $matches)) {
return array(
'type' => 'uri',
'value' => $matches[1],
);
}
elseif (preg_match('/_:([A-Za-z0-9]*)/', $obj, $matches)) {
if (empty($matches[1])) {
return array(
'type' => 'bnode',
'value' => $this->graph
->newBNodeId(),
);
}
else {
$nodeid = $this
->unescapeString($matches[1]);
return array(
'type' => 'bnode',
'value' => $this
->remapBnode($nodeid),
);
}
}
else {
throw new EasyRdf_Parser_Exception("Failed to parse object: {$obj}", $lineNum);
}
}
/**
* Parse an N-Triples document into an EasyRdf_Graph
*
* @param object EasyRdf_Graph $graph the graph to load the data into
* @param string $data the RDF document data
* @param string $format the format of the input data
* @param string $baseUri the base URI of the data being parsed
* @return integer The number of triples added to the graph
*/
public function parse($graph, $data, $format, $baseUri) {
parent::checkParseParams($graph, $data, $format, $baseUri);
if ($format != 'ntriples') {
throw new EasyRdf_Exception("EasyRdf_Parser_Ntriples does not support: {$format}");
}
$lines = preg_split('/\\x0D?\\x0A/', strval($data));
foreach ($lines as $index => $line) {
$lineNum = $index + 1;
if (preg_match('/^\\s*#/', $line)) {
# Comment
continue;
}
elseif (preg_match('/^\\s*(.+?)\\s+<([^<>]+?)>\\s+(.+?)\\s*\\.\\s*$/', $line, $matches)) {
$this
->addTriple($this
->parseNtriplesSubject($matches[1], $lineNum), $this
->unescapeString($matches[2]), $this
->parseNtriplesObject($matches[3], $lineNum));
}
elseif (preg_match('/^\\s*$/', $line)) {
# Blank line
continue;
}
else {
throw new EasyRdf_Parser_Exception("Failed to parse statement", $lineNum);
}
}
return $this->tripleCount;
}
}
Members
Name | Modifiers | Type | Description | Overrides |
---|---|---|---|---|
EasyRdf_Parser:: |
protected | property | The base URI for the document currently being parsed | |
EasyRdf_Parser:: |
private | property | Mapping from source to graph bnode identifiers | |
EasyRdf_Parser:: |
protected | property | The format of the document currently being parsed | |
EasyRdf_Parser:: |
protected | property | The current graph to insert triples into | |
EasyRdf_Parser:: |
protected | property | ||
EasyRdf_Parser:: |
protected | function | Add a triple to the current graph, and keep count of the number of triples @ignore | 1 |
EasyRdf_Parser:: |
protected | function | Check, cleanup parameters and prepare for parsing @ignore | |
EasyRdf_Parser:: |
protected | function | Create a new, unique bnode identifier from a source identifier. If the source identifier has previously been seen, the same new bnode identifier is returned. @ignore | |
EasyRdf_Parser:: |
protected | function | Delete the bnode mapping - to be called at the start of a new parse @ignore | |
EasyRdf_Parser_Ntriples:: |
public | function |
Parse an N-Triples document into an EasyRdf_Graph Overrides EasyRdf_Parser:: |
1 |
EasyRdf_Parser_Ntriples:: |
protected | function | @ignore | |
EasyRdf_Parser_Ntriples:: |
protected | function | @ignore | |
EasyRdf_Parser_Ntriples:: |
protected | function | Decodes an encoded N-Triples string. Any \-escape sequences are substituted with their decoded value. |