View source
<?php
class ARC_sparql_parser {
var $version = "0.2.5";
var $init_args = array();
var $bnode_prefix = "";
var $bnode_count = 0;
var $base = "";
var $q = "";
function __construct($args = "") {
if (is_array($args)) {
$this->init_args = $args;
foreach ($args as $k => $v) {
$this->{$k} = $v;
}
}
}
function ARC_sparql_parser($args = "") {
$this
->__construct($args);
}
function get_infos() {
return $this->infos;
}
function get_query() {
return $this->q;
}
function set_bnode_prefix($prefix = "") {
$this->bnode_prefix = $prefix ? $prefix : "arc" . substr(md5(microtime()), 0, 4) . "b";
}
function get_warnings() {
return $this->warnings;
}
function get_errors() {
return $this->errors;
}
function get_logs() {
return $this->logs;
}
function get_log() {
return "- " . implode("<br />- ", $this->logs);
}
function get_default_prefixes() {
if (!isset($this->default_prefixes)) {
$this
->set_default_prefixes();
}
return $this->default_prefixes;
}
function set_default_prefixes() {
$this->default_prefixes = array(
"rdf:" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
"rdfs:" => "http://www.w3.org/2000/01/rdf-schema#",
"owl:" => "http://www.w3.org/2002/07/owl#",
"xsd:" => "http://www.w3.org/2001/XMLSchema#",
"dc:" => "http://purl.org/dc/elements/1.1/",
"dct:" => "http://purl.org/dc/terms/",
"dcterms:" => "http://purl.org/dc/terms/",
"rss:" => "http://purl.org/rss/1.0/",
"foaf:" => "http://xmlns.com/foaf/0.1/",
"doap:" => "http://usefulinc.com/ns/doap#",
);
}
function get_next_bnode_id() {
$this->bnode_count++;
return "_:" . $this->bnode_prefix . $this->bnode_count;
}
function set_base($base = "") {
$this->base = $this
->get_url_base($base);
if (!$base) {
$this->warnings[] = "empty base";
}
}
function get_url_base($url = "") {
$base = $url;
if (preg_match("/([^#]*)[#]?/", $url, $matches)) {
$base = $matches[1];
}
if (preg_match("/\\/\\/[^\\/]+\$/", $base, $matches)) {
$base .= "/";
}
return $base;
}
function calc_iri($path = "") {
$result = "";
if (strpos($path, ":") !== false) {
if (strpos($path, "/") === false || strpos($path, "/") > strpos($path, ":")) {
return $path;
}
}
if (strpos($path, "//") === 0) {
return "http:" . $path;
}
$cur_base = $this
->get_url_base($this->base);
if (strpos($path, "#") === 0) {
return $cur_base . $path;
}
if (strpos($path, "/") === 0) {
if (preg_match("/([^\\/]*[\\/]{1,2}[^\\/]+)\\//", $cur_base, $matches)) {
return $matches[1] . $path;
}
}
if ($path == "") {
return $cur_base;
}
$cur_base = substr($cur_base, 0, strrpos($cur_base, "/")) . "/";
if (strpos($path, "../") === 0) {
if (preg_match("/([^\\/]*[\\/]{1,2}[^\\/]+\\/)(.*)\\//", $cur_base, $matches)) {
$server_part = $matches[1];
$path_part = $matches[2];
}
else {
$server_part = $cur_base;
$path_part = "";
}
while (strpos($path, "../") === 0) {
$path = substr($path, 3);
$path_part = strlen($path_part) ? substr($path_part, 0, -1) : "";
if (strpos($path_part, "/")) {
$path_part = substr($path_part, 0, strrpos($path_part, "/")) . "/";
}
else {
$path_part = "";
}
}
return $server_part . $path_part . $path;
}
else {
return $cur_base . $path;
}
return $path;
}
function extract_vars($val = "") {
$vars = array();
if (preg_match_all("/[\\?\$]{1}([0-9a-z_]+)/i", $val, $matches)) {
foreach ($matches[1] as $cur_var) {
if (!in_array($cur_var, $vars)) {
$vars[] = $cur_var;
$this->logs[] = "adding var " . $cur_var;
}
}
}
return $vars;
}
function expand_qname($val = "") {
$iri = "";
if (preg_match("/(.*\\:)(.*)/", $val, $matches)) {
$prefix = $matches[1];
$name = $matches[2];
if (array_key_exists($prefix, $this->prefixes)) {
$iri = $this->prefixes[$prefix] . $name;
}
elseif (array_key_exists($prefix, $this->default_prefixes)) {
$iri = $this->default_prefixes[$prefix] . $name;
}
if (!in_array($iri, $this->iris)) {
$this->iris[] = $iri;
}
return $iri;
}
$this->errors[] = "could not expand '" . $val . "' in expand_qname()";
return $val;
}
function expand_to_iri($val = "") {
if (strpos($val, ":") !== false) {
return $this
->expand_qname($val);
}
elseif (preg_match("/\\|(_iri_[0-9]+)\\|/", $val, $matches)) {
$iri = $this
->calc_iri($this->iri_placeholders[trim($matches[1])]);
if (!in_array($iri, $this->iris)) {
$this->iris[] = $iri;
}
return $iri;
}
$this->errors[] = "could not expand '" . $val . "' in expand_to_iri()";
return $val;
}
function substitute_iri_refs($val = "") {
if (preg_match_all("/\\<([^>\\s]*)\\>/sU", $val, $matches)) {
$iris = $matches[1];
$prefix = "_iri_";
for ($i = 0, $i_max = count($iris); $i < $i_max; $i++) {
$cur_iri = $iris[$i];
$val = str_replace("<" . $cur_iri . ">", "|" . $prefix . $i . "|", $val);
$this->iri_placeholders[$prefix . $i] = $cur_iri;
}
}
return $val;
}
function substitute_strings($val = "") {
$result = "";
$delims = array(
"d1" => '"""',
"d2" => "'''",
"d3" => "'",
"d4" => '"',
"d5" => '`',
"d6" => "#",
);
$brs = array(
"b1" => "\r\n",
"b2" => "\r",
"bn3" => "\n",
);
$cur_pos = 0;
$val_length = strlen($val);
$prefix = "_string_";
$subs_count = 0;
while ($cur_pos < $val_length) {
$next_delim_pos = $val_length;
$next_delim_name = "";
foreach ($delims as $cur_delim_name => $cur_delim_code) {
$cur_next_delim_pos = strpos($val, $cur_delim_code, $cur_pos);
if ($cur_next_delim_pos !== false && $cur_next_delim_pos < $next_delim_pos) {
$next_delim_pos = $cur_next_delim_pos;
$next_delim_name = $cur_delim_name;
}
}
if ($next_delim_name) {
if ($next_delim_name === "d6") {
$next_br_pos = $val_length;
$next_br_name = "";
foreach ($brs as $cur_br_name => $cur_br_code) {
$cur_next_br_pos = strpos($val, $cur_br_code, $next_delim_pos);
if ($cur_next_br_pos !== false && $cur_next_br_pos < $next_br_pos) {
$next_br_pos = $cur_next_br_pos;
$next_br_name = $cur_br_name;
}
}
$result .= substr($val, $cur_pos, $next_delim_pos - $cur_pos);
$cur_pos = $next_br_pos;
$this->logs[] = "removed comment '" . substr($val, $next_delim_pos, $next_br_pos - $next_delim_pos) . "' in substitute_strings()";
}
else {
$next_delim_code = $delims[$next_delim_name];
$next_end_pos = strpos($val, $next_delim_code, $next_delim_pos + strlen($next_delim_code));
while (($cur_prev_char = substr($val, $next_end_pos - 1, 1)) && $cur_prev_char == "\\") {
$next_end_pos = strpos($val, $next_delim_code, $next_end_pos + 1);
if (!$next_end_pos || $next_end_pos == $val_length - 1) {
$this->errors[] = "unterminated literal in substitute_strings()";
$next_end_pos = $val_length;
break;
}
}
if ($next_end_pos) {
$result .= substr($val, $cur_pos, $next_delim_pos - $cur_pos);
$str_val = substr($val, $next_delim_pos + strlen($next_delim_code), $next_end_pos - (strlen($next_delim_code) + $next_delim_pos));
if (preg_match_all("/\\|(_iri_.+)\\|/U", $str_val, $matches)) {
$iri_subs = $matches[1];
foreach ($iri_subs as $cur_subs) {
if ($iri = $this->iri_placeholders[$cur_subs]) {
$str_val = str_replace("|" . $cur_subs . "|", "<" . $iri . ">", $str_val);
}
}
}
$this->str_placeholders[$prefix . $subs_count] = array(
"delim_code" => $next_delim_code,
"val" => $str_val,
);
$result .= $prefix . $subs_count;
$cur_pos = $next_end_pos + strlen($next_delim_code);
$subs_count++;
}
else {
$this->errors[] = "unterminated literal in substitute_strings()";
$result .= substr($val, $cur_pos);
$cur_pos = $val_length;
}
}
}
else {
$result .= substr($val, $cur_pos);
$cur_pos = $val_length;
}
}
return $result;
}
function extract_bracket_data($val = "") {
$chars = array(
"(" => ")",
"{" => "}",
"[" => "]",
);
if (($start_char = substr($val, 0, 1)) && ($end_char = $chars[$start_char])) {
$level = 1;
$val = substr($val, 1);
$val_length = strlen($val);
$cur_pos = 0;
while ($level != 0 && $cur_pos < $val_length) {
$next_end = strpos($val, $end_char, $cur_pos);
if ($next_end !== false) {
$next_start = strpos($val, $start_char, $cur_pos);
if ($next_start !== false && $next_start < $next_end) {
$cur_pos = $next_start + 1;
$level++;
}
else {
$cur_pos = $next_end + 1;
$level--;
}
}
else {
$cur_pos = $val_length;
$this->errors[] = "could not extract data in extract_bracket_data()";
}
}
return substr($val, 0, $cur_pos - 1);
}
else {
$this->errors[] = "could not extract data in extract_bracket_data()";
return false;
}
}
function pop($ar = "") {
$new_ar = array();
if (is_array($ar)) {
for ($i = 0, $i_max = count($ar); $i < $i_max - 1; $i++) {
$new_ar[] = $ar[$i];
}
}
return $new_ar;
}
function parse($q = "") {
$this->warnings = array();
$this->errors = array();
$this->logs = array();
$this->infos = array(
"vars" => array(),
"result_vars" => array(),
);
$this->iri_placeholders = array();
$this->str_placeholders = array();
$this->iris = array();
$this
->set_bnode_prefix($this->bnode_prefix);
$this
->set_default_prefixes();
$this->prefixes = array();
if (!$q) {
$this->errors[] = "empty query";
return true;
}
$this->q_init = $q;
$this->q = $q;
$this->q = $this
->substitute_iri_refs($this->q);
$this->q = $this
->substitute_strings($this->q);
$this
->parse_Query();
}
function parse_Query() {
$this->q = trim($this->q);
$this
->parse_Prolog();
if (preg_match("/^(SELECT|CONSTRUCT|DESCRIBE|ASK)/i", $this->q, $matches)) {
$this->infos["query_type"] = strtolower($matches[1]);
$this->q = trim(substr($this->q, strlen($matches[0])));
$mthd = "parse_" . ucfirst(strtolower($matches[1])) . "Query";
$this
->{$mthd}();
}
else {
$this->errors[] = "missing or invalid query type in '" . $this->q . "'";
}
}
function parse_Prolog() {
$this
->parse_BaseDecl();
$this
->parse_PrefixDecl();
}
function parse_BaseDecl() {
if (preg_match("/^BASE\\s*\\|(.*)\\|/isU", $this->q, $matches)) {
$base_iri = $this->iri_placeholders[$matches[1]];
$this->q = trim(substr($this->q, strlen($matches[0])));
$this
->set_base($base_iri);
$this->logs[] = "setting base to " . $base_iri;
}
}
function parse_PrefixDecl() {
$q = $this->q;
while (preg_match("/^PREFIX\\s*([^\\s]*\\:)\\s+\\|(.*)\\|/isU", $q, $matches)) {
$qname_ns = trim($matches[1]);
$q_iri_ref = $this->iri_placeholders[trim($matches[2])];
$this->prefixes[$qname_ns] = $this
->calc_iri($q_iri_ref);
$this->logs[] = "adding prefix '" . $qname_ns . "' -> '" . $this->prefixes[$qname_ns] . "'";
$q = trim(substr($q, strlen($matches[0])));
}
$this->q = trim($q);
}
function parse_SelectQuery() {
$this->infos["distinct"] = false;
if (preg_match("/^DISTINCT/i", $this->q, $matches)) {
$this->infos["distinct"] = true;
$this->q = trim(substr($this->q, strlen($matches[0])));
}
$vars = $this
->extract_vars($this->q);
$result_vars = array();
if (preg_match("/^\\*(.*)\$/s", $this->q, $matches)) {
$result_vars = $vars;
$this->q = trim($matches[1]);
}
else {
$q = $this->q;
while (preg_match("/^[\\?\$]{1}([0-9a-z_]+)/i", $q, $matches)) {
$result_vars[] = $matches[1];
$this->logs[] = "adding result var " . $matches[1];
$q = trim(substr($q, strlen($matches[0])));
}
$this->q = $q;
}
$this->infos["vars"] = $vars;
$this->infos["result_vars"] = $result_vars;
$this
->parse_DatasetClause();
$this
->parse_WhereClause();
$this
->parse_SolutionModifier();
}
function parse_ConstructQuery() {
$this->infos["vars"] = $this
->extract_vars($this->q);
$this
->parse_ConstructTemplate();
$this
->parse_DatasetClause();
$this
->parse_WhereClause();
$this
->parse_SolutionModifier();
}
function parse_DescribeQuery() {
$vars = $this
->extract_vars($this->q);
$result_vars = array();
$result_iris = array();
$return_all = false;
if (preg_match("/^\\*/", $this->q, $matches)) {
$result_vars = $vars;
$return_all = true;
}
else {
$q = $this->q;
while ($sub_result = $this
->parse_VarOrIRIref($q)) {
if ($sub_result["type"] == "var") {
$result_vars[] = $sub_result["val"];
$this->logs[] = "adding result var " . $sub_result["val"];
}
elseif ($sub_result["type"] == "iri") {
$result_iris[] = $sub_result["val"];
$this->logs[] = "adding result iri " . $sub_result["val"];
}
$q = $sub_result["unparsed_val"];
}
$this->q = $q;
}
$this->infos["vars"] = $vars;
$this->infos["result_vars"] = $result_vars;
$this->infos["result_iris"] = $result_iris;
$this
->parse_DatasetClause();
$this
->parse_WhereClause();
$this
->parse_SolutionModifier();
if ($return_all) {
$this->infos["result_iris"] = $this->iris;
}
}
function parse_AskQuery() {
$this->infos["vars"] = $this
->extract_vars($this->q);
$this
->parse_DatasetClause();
$this
->parse_WhereClause();
}
function parse_DatasetClause() {
$q = $this->q;
$this->infos["datasets"] = array();
$this->infos["named_datasets"] = array();
while (preg_match("/^FROM\\s*(NAMED)?\\s*([^\\s]+)\\s/is", $q, $matches)) {
$named = $matches[1] ? true : false;
$iri = $this
->expand_to_iri($matches[2]);
if ($named) {
$this->infos["named_datasets"][] = $iri;
$this->logs[] = "adding named dataset: '" . $iri . "'";
}
else {
$this->infos["datasets"][] = $iri;
$this->logs[] = "adding default dataset: '" . $iri . "'";
}
$q = trim(substr($q, strlen($matches[0])));
}
$this->q = trim($q);
}
function parse_WhereClause() {
if (preg_match("/^(WHERE)?\\s*(\\{.*)\$/is", $this->q, $matches)) {
if ($sub_result = $this
->parse_GroupGraphPattern(trim($matches[2]))) {
$this->q = $sub_result["unparsed_val"];
unset($sub_result["unparsed_val"]);
$this->infos["patterns"] = $sub_result["entries"];
}
else {
$this->errors[] = "could not extract group graph pattern in parse_WhereClause()";
}
}
else {
if ($this->infos["query_type"] != "describe") {
$this->errors[] = "empty where clause (or missing brackets) in parse_WhereClause()";
}
}
}
function parse_SolutionModifier() {
$this
->parse_OrderClause();
$this
->parse_LimitClause();
$this
->parse_OffsetClause();
}
function parse_OrderClause() {
if (preg_match("/^ORDER\\s*BY\\s*(.*)/is", $this->q, $matches)) {
$this->q = trim($matches[1]);
$this
->parse_OrderCondition();
}
}
function parse_OrderCondition() {
$q = $this->q;
$conds = array();
do {
$cond = false;
if (preg_match("/^(ASC|DESC)?(\\s*)(\\(.*)\$/is", $q, $matches)) {
if (($bracket_data = $this
->extract_bracket_data($matches[3])) && ($sub_result = $this
->parse_Expression(trim($bracket_data)))) {
$cond = true;
$conds[] = array(
"type" => "expression",
"direction" => strtolower($matches[1]),
"expression" => $sub_result,
);
$q = trim(substr($q, strlen($matches[1] . $matches[2] . $bracket_data) + 2));
}
if (preg_match("/^(ASC|DESC)?(\\s*)(\\(.*)\$/is", $q, $matches)) {
if (($bracket_data = $this
->extract_bracket_data($matches[3])) && ($sub_result = $this
->parse_Expression(trim($bracket_data)))) {
$cond = true;
$conds[] = array(
"type" => "expression",
"direction" => strtolower($matches[1]),
"expression" => $sub_result,
);
$q = trim(substr($q, strlen($matches[1] . $matches[2] . $bracket_data) + 2));
}
}
if (preg_match("/^(ASC|DESC)?(\\s*)(\\(.*)\$/is", $q, $matches)) {
if (($bracket_data = $this
->extract_bracket_data($matches[3])) && ($sub_result = $this
->parse_Expression(trim($bracket_data)))) {
$cond = true;
$conds[] = array(
"type" => "expression",
"direction" => strtolower($matches[1]),
"expression" => $sub_result,
);
$q = trim(substr($q, strlen($matches[1] . $matches[2] . $bracket_data) + 2));
}
}
if (preg_match("/^(ASC|DESC)?(\\s*)(\\(.*)\$/is", $q, $matches)) {
if (($bracket_data = $this
->extract_bracket_data($matches[3])) && ($sub_result = $this
->parse_Expression(trim($bracket_data)))) {
$cond = true;
$conds[] = array(
"type" => "expression",
"direction" => strtolower($matches[1]),
"expression" => $sub_result,
);
$q = trim(substr($q, strlen($matches[1] . $matches[2] . $bracket_data) + 2));
}
}
}
elseif ($sub_result = $this
->parse_Var($q)) {
$cond = true;
$q = $sub_result["unparsed_val"];
unset($sub_result["unparsed_val"]);
$conds[] = $sub_result;
}
elseif ($sub_result = $this
->parse_FunctionCall($q)) {
$cond = true;
$q = $sub_result["unparsed_val"];
unset($sub_result["unparsed_val"]);
$conds[] = $sub_result;
}
} while ($cond);
$this->infos["order_conditions"] = $conds;
$this->q = trim($q);
}
function parse_LimitClause() {
if (preg_match("/^LIMIT\\s*([0-9]+)/is", $this->q, $matches)) {
$this->infos["limit"] = $matches[1];
$this->q = trim(substr($this->q, strlen($matches[0])));
}
}
function parse_OffsetClause() {
if (preg_match("/^OFFSET\\s*([0-9]+)/is", $this->q, $matches)) {
$this->infos["offset"] = $matches[1];
$this->q = trim(substr($this->q, strlen($matches[0])));
}
}
function parse_GroupGraphPattern($val = "") {
if (preg_match("/^(\\{.*)\$/s", $val, $matches)) {
$bracket_data = $this
->extract_bracket_data($matches[1]);
$unparsed_val_1 = trim(substr($val, strlen($bracket_data) + 2));
$unparsed_val_1 = substr($unparsed_val_1, 0, 1) == "." ? trim(substr($unparsed_val_1, 1)) : $unparsed_val_1;
$pattern = $this
->parse_GraphPattern(trim($bracket_data));
$unparsed_val_2 = trim($pattern["unparsed_val"]);
return array(
"type" => "group",
"entries" => $pattern["entries"],
"unparsed_val" => $unparsed_val_1,
);
}
return false;
}
function parse_GraphPattern($val = "") {
$entries = array();
if ($val && ($sub_result = $this
->parse_Triples($val)) && count($sub_result["triples"])) {
$entries[] = $sub_result;
$val = $sub_result["unparsed_val"];
$val = substr($val, 0, 1) === "." ? trim(substr($val, 1)) : $val;
}
if ($val && ($sub_result = $this
->parse_GraphPatternNotTriples($val)) && $sub_result["type"]) {
$val = $sub_result["unparsed_val"];
unset($sub_result["unparsed_val"]);
$entries[] = $sub_result;
$val = substr($val, 0, 1) === "." ? trim(substr($val, 1)) : $val;
if ($val && ($sub_result = $this
->parse_GraphPattern($val)) && count($sub_result["entries"])) {
$entries[] = $sub_result["entries"];
$val = $sub_result["unparsed_val"];
$val = substr($val, 0, 1) === "." ? trim(substr($val, 1)) : $val;
}
}
return array(
"entries" => $entries,
"unparsed_val" => trim($val),
);
}
function parse_GraphPatternNotTriples($val = "") {
if (preg_match("/^(OPTIONAL)(\\s*)(\\{.*)\$/is", $val, $matches)) {
$bracket_data = $this
->extract_bracket_data($matches[3]);
return array(
"type" => "optional",
"pattern" => $this
->parse_GroupGraphPattern("{" . trim($bracket_data) . "}"),
"unparsed_val" => trim(substr($val, strlen($matches[1] . $matches[2] . $bracket_data) + 2)),
);
}
if (($sub_result = $this
->parse_GroupGraphPattern($val)) && $sub_result["type"]) {
$val = $sub_result["unparsed_val"];
$result = $sub_result;
if (preg_match("/^UNION/i", $val)) {
unset($sub_result["unparsed_val"]);
$result = array(
"type" => "union",
"entries" => array(
$sub_result,
),
);
while (preg_match("/^UNION\\s*(.*)\$/s", $val, $matches)) {
$val = trim($matches[1]);
if (($sub_result = $this
->parse_GroupGraphPattern($val)) && $sub_result["type"]) {
$val = $sub_result["unparsed_val"];
unset($sub_result["unparsed_val"]);
$result["entries"][] = $sub_result;
}
}
$result["unparsed_val"] = $val;
}
return $result;
}
if ($sub_result = $this
->parse_GraphGraphPattern($val)) {
return $sub_result;
}
if ($sub_result = $this
->parse_Constraint($val)) {
return $sub_result;
}
return false;
}
function parse_GraphGraphPattern($val = "") {
if (preg_match("/^(GRAPH)(\\s*)(.*)\$/is", $val, $matches)) {
$val = trim($matches[3]);
if ($sub_result = $this
->parse_Var($val)) {
}
elseif ($sub_result = $this
->parse_BlankNode($val)) {
}
elseif ($sub_result = $this
->parse_IRIref($val)) {
}
if ($sub_result) {
$val = $sub_result["unparsed_val"];
unset($sub_result["unparsed_val"]);
if ($sub_sub_result = $this
->parse_GroupGraphPattern($val)) {
$val = $sub_sub_result["unparsed_val"];
unset($sub_sub_result["unparsed_val"]);
return array(
"type" => "graph",
"graph" => $sub_result,
"pattern" => $sub_sub_result,
"unparsed_val" => $val,
);
}
}
}
return false;
}
function parse_Constraint($val = "") {
if (preg_match("/^(FILTER)(\\s*)(.*)\$/is", $val, $matches)) {
$val = trim($matches[3]);
if ($sub_result = $this
->parse_BrackettedExpression($val)) {
return array(
"type" => "filter",
"sub_type" => "expression",
"expression" => $sub_result["expression"],
"unparsed_val" => $sub_result["unparsed_val"],
);
}
elseif ($sub_result = $this
->parse_BuiltInCall($val)) {
return array(
"type" => "filter",
"sub_type" => "built_in_call",
"call" => $sub_result,
"unparsed_val" => $sub_result["unparsed_val"],
);
}
elseif ($sub_result = $this
->parse_FunctionCall($val)) {
return array(
"type" => "filter",
"sub_type" => "function_call",
"call" => $sub_result,
"unparsed_val" => $sub_result["unparsed_val"],
);
}
}
return false;
}
function parse_ConstructTemplate() {
$q = $this->q;
if (preg_match("/^(\\{.*)\$/s", $q, $matches)) {
$bracket_data = $this
->extract_bracket_data($matches[1]);
$this->q = trim(substr($q, strlen($bracket_data) + 2));
if ($sub_result = $this
->parse_ConstructTriples(trim($bracket_data))) {
$this->infos["template_triples"] = $sub_result;
foreach ($sub_result["triples"] as $cur_triple) {
foreach (array(
"s",
"p",
"o",
) as $cur_term_key) {
$cur_term = $cur_triple[$cur_term_key];
$cur_term_type = $cur_term["type"];
$cur_term_val = $cur_term["val"];
if ($cur_term_type == "var") {
if (!in_array($cur_term_val, $this->infos["result_vars"])) {
$this->infos["result_vars"][] = $cur_term_val;
}
}
if ($cur_term_type == "bnode") {
$cur_term_val = str_replace(":", "_", $cur_term_val);
if (!in_array($cur_term_val, $this->infos["result_vars"])) {
$this->infos["result_vars"][] = $cur_term_val;
}
}
}
}
}
}
else {
$this->errors[] = "couldn't extract ConstructTriples in parse_ConstructTemplate";
}
}
function parse_ConstructTriples($val = "") {
$triples = array();
if ($val && ($sub_result = $this
->parse_Triples1($val)) && count($sub_result["triples"])) {
$triples = array_merge($triples, $sub_result["triples"]);
$val = $sub_result["unparsed_val"];
$val = substr($val, 0, 1) === "." ? trim(substr($val, 1)) : $val;
if ($val && ($sub_result = $this
->parse_ConstructTriples($val)) && count($sub_result["triples"])) {
$triples = array_merge($triples, $sub_result["triples"]);
$val = $sub_result["unparsed_val"];
$val = substr($val, 0, 1) === "." ? trim(substr($val, 1)) : $val;
}
}
return array(
"type" => "triples",
"triples" => $triples,
"unparsed_val" => trim($val),
);
}
function parse_Triples($val = "") {
$triples = array();
if ($val && ($sub_result = $this
->parse_Triples1($val)) && count($sub_result["triples"])) {
$triples = array_merge($triples, $sub_result["triples"]);
$val = $sub_result["unparsed_val"];
$val = substr($val, 0, 1) === "." ? trim(substr($val, 1)) : $val;
if ($val && ($sub_result = $this
->parse_Triples($val)) && count($sub_result["triples"])) {
$triples = array_merge($triples, $sub_result["triples"]);
$val = $sub_result["unparsed_val"];
$val = substr($val, 0, 1) === "." ? trim(substr($val, 1)) : $val;
}
}
return array(
"type" => "triples",
"triples" => $triples,
"unparsed_val" => trim($val),
);
}
function parse_Triples1($val = "") {
$nr = rand();
$triples = array();
$state = 1;
$s_stack = array();
$p_stack = array();
$state_stack = array();
do {
$proceed = false;
$blank_node_prop_list_start_found = false;
$term = false;
if ($sub_result = $this
->parse_Var($val)) {
$term = $sub_result;
$val = $sub_result["unparsed_val"];
}
elseif ($sub_result = $this
->parse_IRIref($val)) {
$term = $sub_result;
$val = $sub_result["unparsed_val"];
}
elseif ($sub_result = $this
->parse_RDFLiteral($val)) {
$term = $sub_result;
$val = $sub_result["unparsed_val"];
}
elseif (preg_match("/^(\\+\\-)?(.*)\$/s", $val, $matches) && ($sub_result = $this
->parse_NumericLiteral(trim($matches[2])))) {
$sub_result["negated"] = $matches[1] == "-" ? true : false;
$term = $sub_result;
$val = $sub_result["unparsed_val"];
}
elseif ($sub_result = $this
->parse_BooleanLiteral($val)) {
$term = $sub_result;
$val = $sub_result["unparsed_val"];
}
elseif ($sub_result = $this
->parse_BlankNode($val)) {
$term = $sub_result;
$val = $sub_result["unparsed_val"];
}
elseif (preg_match("/^\\(\\s*\\)(.*)\$/s", $val, $matches)) {
$term = array(
"type" => "iri",
"val" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil",
);
$val = trim($matches[1]);
}
elseif (preg_match("/^a\\s+(.*)\$/s", $val, $matches)) {
$term = array(
"type" => "iri",
"val" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#type",
);
$val = trim($matches[1]);
}
elseif (preg_match("/^=\\s*(.*)\$/s", $val, $matches)) {
$term = array(
"type" => "iri",
"val" => "http://www.w3.org/2002/07/owl#sameAs",
);
$val = trim($matches[1]);
}
elseif ($sub_result = $this
->parse_Collection($val)) {
$term = $sub_result;
$val = $sub_result["unparsed_val"];
}
elseif (preg_match("/^\\[\\s*(.*)\$/s", $val, $matches)) {
$id = $this
->get_next_bnode_id();
if (!in_array(str_replace(":", "_", $id), $this->infos["vars"])) {
$this->infos["vars"][] = str_replace(":", "_", $id);
}
$term = array(
"type" => "bnode",
"val" => $id,
);
$val = trim($matches[1]);
$state_stack[] = $state;
$blank_node_prop_list_start_found = true;
}
elseif (preg_match("/^\\]\\s*(.*)\$/s", $val, $matches)) {
$val = trim($matches[1]);
$proceed = true;
$s_stack = $this
->pop($s_stack);
$p_stack = $this
->pop($p_stack);
$state = count($state_stack) ? $state_stack[count($state_stack) - 1] : 1;
$state_stack = $this
->pop($state_stack);
}
elseif (preg_match("/^\\;\\s*(.*)\$/s", $val, $matches)) {
$val = trim($matches[1]);
$proceed = true;
$state = 2;
$p_stack = $this
->pop($p_stack);
}
elseif (preg_match("/^\\,\\s*(.*)\$/s", $val, $matches)) {
$val = trim($matches[1]);
$proceed = true;
$state = 3;
}
if ($term) {
unset($term["unparsed_val"]);
$proceed = true;
if ($state == 1) {
$s_stack[] = $term;
$state = 2;
$p_stack = $this
->pop($p_stack);
}
elseif ($state == 2) {
$p_stack[] = $term;
$state = 3;
}
elseif ($state == 3) {
$triples[] = array(
"s" => $s_stack[count($s_stack) - 1],
"p" => $p_stack[count($p_stack) - 1],
"o" => $term,
);
$this->logs[] = "adding triple '" . $s_stack[count($s_stack) - 1]["val"] . "' - '" . $p_stack[count($p_stack) - 1]["val"] . "' - '" . $term["val"] . "'";
$state = 1;
if ($blank_node_prop_list_start_found) {
$state = 2;
$s_stack[] = $term;
}
elseif (count($state_stack)) {
$state = 2;
}
elseif (substr($val, 0, 1) === ".") {
$val = trim(substr($val, 1));
}
}
}
} while ($proceed);
return array(
"type" => "triples",
"triples" => $triples,
"unparsed_val" => trim($val),
);
}
function parse_Collection($val = "") {
if (preg_match("/^(\\(.*)\$/s", $val, $matches)) {
$bracket_data = $this
->extract_bracket_data($matches[1]);
return array(
"type" => "collection",
"val" => trim($bracket_data),
"unparsed_val" => trim(substr($val, strlen($bracket_data) + 2)),
);
}
return false;
}
function parse_VarOrIRIref($val = "") {
if ($sub_result = $this
->parse_Var($val)) {
return $sub_result;
}
if ($sub_result = $this
->parse_IRIref($val)) {
return $sub_result;
}
return false;
}
function parse_Var($val = "") {
if (preg_match("/^[\\?\$]{1}([0-9a-z_]+)/i", $val, $matches)) {
return array(
"type" => "var",
"val" => $matches[1],
"unparsed_val" => trim(substr($val, strlen($matches[0]))),
);
}
return false;
}
function parse_Expression($val = "") {
return $this
->parse_ConditionalOrExpression($val);
}
function parse_ConditionalOrExpression($val = "") {
if ($sub_result = $this
->parse_ConditionalAndExpression($val)) {
$val = $sub_result["unparsed_val"];
$entries = array(
$sub_result,
);
do {
$proceed = false;
if (preg_match("/^(\\|\\|)(.*)\$/s", $val, $matches)) {
$operator = $matches[1];
$val = trim($matches[2]);
if ($sub_sub_result = $this
->parse_ConditionalAndExpression($val)) {
$proceed = true;
$val = $sub_sub_result["unparsed_val"];
unset($sub_sub_result["unparsed_val"]);
$sub_sub_result["operator"] = $operator;
$entries[] = $sub_sub_result;
}
}
} while ($proceed);
if (count($entries) == 1) {
return $sub_result;
}
else {
unset($entries[0]["unparsed_val"]);
return array(
"type" => "expression",
"sub_type" => "or",
"entries" => $entries,
"unparsed_val" => $val,
);
}
}
return false;
}
function parse_ConditionalAndExpression($val = "") {
if ($sub_result = $this
->parse_ValueLogical($val)) {
$val = $sub_result["unparsed_val"];
$entries = array(
$sub_result,
);
do {
$proceed = false;
if (preg_match("/^(\\&\\&)(.*)\$/s", $val, $matches)) {
$operator = $matches[1];
$val = trim($matches[2]);
if ($val && ($sub_sub_result = $this
->parse_ValueLogical($val))) {
$proceed = true;
$val = $sub_sub_result["unparsed_val"];
unset($sub_sub_result["unparsed_val"]);
$sub_sub_result["operator"] = $operator;
$entries[] = $sub_sub_result;
}
}
} while ($proceed);
if (count($entries) == 1) {
return $sub_result;
}
else {
return array(
"type" => "expression",
"sub_type" => "and",
"entries" => $entries,
"unparsed_val" => $val,
);
}
}
return false;
}
function parse_ValueLogical($val = "") {
return $this
->parse_RelationalExpression($val);
}
function parse_RelationalExpression($val = "") {
if ($sub_result = $this
->parse_NumericExpression($val)) {
$val = $sub_result["unparsed_val"];
if (preg_match("/^(\\=|\\!\\=|\\<|\\>|\\<\\=|\\>\\=)(.*)\$/s", $val, $matches)) {
$operator = $matches[1];
$val = trim($matches[2]);
if ($sub_sub_result = $this
->parse_NumericExpression($val)) {
$val = $sub_sub_result["unparsed_val"];
unset($sub_sub_result["unparsed_val"]);
$sub_sub_result["operator"] = $operator;
unset($sub_result["unparsed_val"]);
return array(
"expressions" => array(
$sub_result,
$sub_sub_result,
),
"unparsed_val" => $val,
);
}
else {
$this->errors[] = "expected NumericExpression in '" . $val . "' in parse_RelationalExpression()";
}
}
return $sub_result;
}
return false;
}
function parse_NumericExpression($val = "") {
return $this
->parse_AdditiveExpression($val);
}
function parse_AdditiveExpression($val = "") {
if ($sub_result = $this
->parse_MultiplicativeExpression($val)) {
$val = $sub_result["unparsed_val"];
$entries = array(
$sub_result,
);
do {
$proceed = false;
if (preg_match("/^(\\+|\\-)/", $val, $matches)) {
$operator = $matches[1];
$val = trim(substr($val, 1));
if ($sub_sub_result = $this
->parse_MultiplicativeExpression($val)) {
$proceed = true;
$val = $sub_sub_result["unparsed_val"];
unset($sub_sub_result["unparsed_val"]);
$sub_sub_result["operator"] = $operator;
$entries[] = $sub_sub_result;
}
}
} while ($proceed);
if (count($entries) == 1) {
return $sub_result;
}
else {
unset($entries[0]["unparsed_val"]);
return array(
"type" => "expression",
"sub_type" => "additive",
"entries" => $entries,
"unparsed_val" => $val,
);
}
}
return false;
}
function parse_MultiplicativeExpression($val = "") {
if ($sub_result = $this
->parse_UnaryExpression($val)) {
$val = $sub_result["unparsed_val"];
$entries = array(
$sub_result,
);
do {
$proceed = false;
if (preg_match("/^(\\*|\\/)/", $val, $matches)) {
$operator = $matches[1];
$val = trim(substr($val, 1));
if ($sub_sub_result = $this
->parse_UnaryExpression($val)) {
$proceed = true;
$val = $sub_sub_result["unparsed_val"];
unset($sub_sub_result["unparsed_val"]);
$sub_sub_result["operator"] = $operator;
$entries[] = $sub_sub_result;
}
}
} while ($proceed);
if (count($entries) == 1) {
return $sub_result;
}
else {
unset($entries[0]["unparsed_val"]);
return array(
"type" => "expression",
"sub_type" => "multiplicative",
"entries" => $entries,
"unparsed_val" => $val,
);
}
}
return false;
}
function parse_UnaryExpression($val = "") {
if (preg_match("/^(\\!|\\+|\\-)?(.+)\$/s", $val, $matches)) {
$result = $this
->parse_PrimaryExpression(trim($matches[2]));
$result["modifier"] = $matches[1];
return $result;
}
return false;
}
function parse_BuiltInCall($val = "") {
if (preg_match("/^BOUND\\s*\\([\\?\$]{1}([0-9a-z_]+)\\)/is", $val, $matches)) {
return array(
"type" => "built_in_call",
"call" => "bound",
"var" => $matches[1],
"unparsed_val" => trim(substr($val, strlen($matches[0]))),
);
}
if (preg_match("/^(STR|LANG|DATATYPE|isIRI|isURI|isBlank|isLiteral)(\\s*)(\\(.*)\$/is", $val, $matches)) {
$bracket_data = $this
->extract_bracket_data($matches[3]);
return array(
"type" => "built_in_call",
"call" => strtolower($matches[1]),
"expression" => $this
->parse_Expression(trim($bracket_data)),
"unparsed_val" => trim(substr($val, strlen($matches[1] . $matches[2] . $bracket_data) + 2)),
);
}
if (preg_match("/^(langMatches)(\\s*)(\\(.*)\$/is", $val, $matches)) {
$bracket_data = $this
->extract_bracket_data($matches[3]);
$expr_1 = $this
->parse_Expression(trim($bracket_data));
$rest = trim($expr_1["unparsed_val"]);
$expr_2 = preg_match("/^,\\s*(.*)\$/s", $rest, $sub_matches) ? $this
->parse_Expression(trim($sub_matches[1])) : array();
return array(
"type" => "built_in_call",
"call" => strtolower($matches[1]),
"expressions" => array(
$expr_1,
$expr_2,
),
"unparsed_val" => trim(substr($val, strlen($matches[1] . $matches[2] . $bracket_data) + 2)),
);
}
if (preg_match("/^(REGEX)(\\s*)(\\(.*)\$/is", $val, $matches)) {
$bracket_data = $this
->extract_bracket_data($matches[3]);
$expr_1 = $this
->parse_Expression(trim($bracket_data));
$expr_2 = preg_match("/^,\\s*(.*)\$/s", trim($expr_1["unparsed_val"]), $sub_matches) ? $this
->parse_Expression(trim($sub_matches[1])) : array();
$expr_3 = preg_match("/^,\\s*(.*)\$/s", trim($expr_2["unparsed_val"]), $sub_matches) ? $this
->parse_Expression(trim($sub_matches[1])) : array();
return array(
"type" => "built_in_call",
"call" => strtolower($matches[1]),
"expressions" => array(
$expr_1,
$expr_2,
$expr_3,
),
"unparsed_val" => trim(substr($val, strlen($matches[1] . $matches[2] . $bracket_data) + 2)),
);
}
return false;
}
function parse_FunctionCall($val = "") {
if ($sub_result = $this
->parse_IRIref($val)) {
$val = $sub_result["unparsed_val"];
if ($sub_sub_result = $this
->parse_ArgList($val)) {
$val = $sub_sub_result["unparsed_val"];
unset($sub_sub_result["unparsed_val"]);
return array(
"type" => "function_call",
"iri" => $sub_result["val"],
"arg_list" => $sub_sub_result,
"unparsed_val" => $val,
);
}
}
return false;
}
function parse_IRIrefOrFunction($val = "") {
if ($sub_result = $this
->parse_IRIref($val)) {
$val = $sub_result["unparsed_val"];
if ($sub_sub_result = $this
->parse_ArgList($val)) {
$val = $sub_sub_result["unparsed_val"];
unset($sub_sub_result["unparsed_val"]);
return array(
"type" => "function_call",
"iri" => $sub_result["val"],
"arg_list" => $sub_sub_result,
"unparsed_val" => $val,
);
}
else {
return $sub_result;
}
}
return false;
}
function parse_ArgList($val = "") {
if (preg_match("/^(\\(\\s*\\))(.*)\$/s", $val, $matches)) {
return array(
"type" => "arg_list",
"entries" => array(),
"unparsed_val" => trim($matches[2]),
);
}
if (preg_match("/^(\\(.*)\$/s", $val, $matches)) {
$bracket_data = $this
->extract_bracket_data($matches[1]);
$unparsed_val = trim(substr($val, strlen($bracket_data) + 2));
$val = $bracket_data;
$entries = array();
do {
$proceed = false;
$val = substr($val, 0, 1) == "," ? trim(substr($val, 1)) : trim($val);
if ($val && ($sub_result = $this
->parse_Expression($val))) {
$proceed = true;
$val = $sub_result["unparsed_val"];
unset($sub_result["unparsed_val"]);
$entries[] = $sub_result;
}
} while ($proceed);
return array(
"type" => "arg_list",
"entries" => $entries,
"unparsed_val" => $unparsed_val,
);
}
return false;
}
function parse_BrackettedExpression($val = "") {
if (preg_match("/^(\\(.*)\$/is", $val, $matches)) {
$bracket_data = $this
->extract_bracket_data($matches[1]);
return array(
"type" => "expression",
"expression" => $this
->parse_Expression(trim($bracket_data)),
"unparsed_val" => trim(substr($val, strlen($bracket_data) + 2)),
);
}
return false;
}
function parse_PrimaryExpression($val = "") {
if (!$val) {
return false;
}
if (preg_match("/^[\\?\$]{1}([0-9a-z_]+)/i", $val, $matches)) {
return array(
"type" => "var",
"val" => $matches[1],
"unparsed_val" => trim(substr($val, strlen($matches[0]))),
);
}
if ($sub_result = $this
->parse_BuiltInCall($val)) {
return $sub_result;
}
if ($sub_result = $this
->parse_RDFLiteral($val)) {
return $sub_result;
}
if ($sub_result = $this
->parse_NumericLiteral($val)) {
return $sub_result;
}
if ($sub_result = $this
->parse_BooleanLiteral($val)) {
return $sub_result;
}
if ($sub_result = $this
->parse_BlankNode($val)) {
return $sub_result;
}
if ($sub_result = $this
->parse_BrackettedExpression($val)) {
return $sub_result;
}
if ($sub_result = $this
->parse_IRIrefOrFunction($val)) {
return $sub_result;
}
return false;
}
function parse_NumericLiteral($val = "") {
if (preg_match("/^[0-9]*\\.?[0-9]*[eE][+-]?[0-9]+/", $val, $matches)) {
return array(
"type" => "numeric",
"val" => $matches[0],
"sub_type" => "double",
"unparsed_val" => trim(substr($val, strlen($matches[0]))),
);
}
if (preg_match("/^[0-9]+\\.[0-9]+/", $val, $matches)) {
return array(
"type" => "numeric",
"val" => $matches[0],
"sub_type" => "decimal",
"unparsed_val" => trim(substr($val, strlen($matches[0]))),
);
}
if (preg_match("/^[0-9]+\\.[0-9]*/", $val, $matches)) {
return array(
"type" => "numeric",
"val" => $matches[0],
"sub_type" => "decimal",
"unparsed_val" => trim(substr($val, strlen($matches[0]))),
);
}
if (preg_match("/^[0-9]*\\.[0-9]+/", $val, $matches)) {
return array(
"type" => "numeric",
"val" => $matches[0],
"sub_type" => "decimal",
"unparsed_val" => trim(substr($val, strlen($matches[0]))),
);
}
if (preg_match("/^[0-9]+/", $val, $matches)) {
return array(
"type" => "numeric",
"val" => $matches[0],
"sub_type" => "integer",
"unparsed_val" => trim(substr($val, strlen($matches[0]))),
);
}
return false;
}
function parse_RDFLiteral($val = "") {
if (preg_match("/^(_string_[0-9]+)/", $val, $matches)) {
$result = array(
"type" => "literal",
"val" => $this->str_placeholders[$matches[1]]["val"],
"delim_code" => $this->str_placeholders[$matches[1]]["delim_code"],
);
$unparsed_val = trim(substr($val, strlen($matches[0])));
if (preg_match("/^\\@([a-z]+)(\\-?)([a-z0-9]*)/i", $unparsed_val, $matches)) {
$result["lang"] = $matches[1] . $matches[2] . $matches[3];
$unparsed_val = trim(substr($unparsed_val, strlen($matches[0])));
}
elseif (preg_match("/^\\^\\^(.*)/s", $unparsed_val, $matches)) {
if ($sub_result = $this
->parse_IRIref($matches[1])) {
$result["dt"] = $sub_result["val"];
$unparsed_val = trim($sub_result["unparsed_val"]);
}
}
$result["unparsed_val"] = $unparsed_val;
return $result;
}
return false;
}
function parse_BooleanLiteral($val = "") {
if (preg_match("/^(true|false)/i", $val, $matches)) {
return array(
"type" => "boolean",
"val" => $matches[1],
"unparsed_val" => trim(substr($val, strlen($matches[0]))),
);
}
return false;
}
function parse_IRIref($val = "") {
if (preg_match("/^\\|(_iri_[0-9]+)\\|(.*)\$/s", $val, $matches)) {
$iri = $this
->calc_iri($this->iri_placeholders[trim($matches[1])]);
if (!in_array($iri, $this->iris)) {
$this->iris[] = $iri;
}
return array(
"type" => "iri",
"val" => $iri,
"unparsed_val" => trim($matches[2]),
);
}
if (preg_match("/^([a-z0-9]*\\:[a-z0-9.\\-_]*)(.*)\$/si", $val, $matches)) {
return array(
"type" => "iri",
"val" => $this
->expand_qname($matches[1]),
"unparsed_val" => trim($matches[2]),
);
}
return false;
}
function parse_BlankNode($val = "") {
if (preg_match("/^_\\:([a-z0-9\\.\\-\\_]*)(.*)\$/si", $val, $matches)) {
if (!in_array("__" . $matches[1], $this->infos["vars"])) {
$this->infos["vars"][] = "__" . $matches[1];
}
return array(
"type" => "bnode",
"val" => "_:" . $matches[1],
"unparsed_val" => trim($matches[2]),
);
}
if (preg_match("/^\\[\\s*\\](.*)\$/s", $val, $matches)) {
$id = $this
->get_next_bnode_id();
if (!in_array(str_replace(":", "_", $id), $this->infos["vars"])) {
$this->infos["vars"][] = str_replace(":", "_", $id);
}
return array(
"type" => "bnode",
"val" => $id,
"unparsed_val" => trim($matches[1]),
);
}
return false;
}
}