class ARC_rdfxml_parser in Taxonomy import/export via XML 5.2
Same name and namespace in other branches
- 5 arc/ARC_rdfxml_parser.php \ARC_rdfxml_parser
- 6.2 arc/ARC_rdfxml_parser.php \ARC_rdfxml_parser
- 6 arc/ARC_rdfxml_parser.php \ARC_rdfxml_parser
Hierarchy
- class \ARC_rdfxml_parser
Expanded class hierarchy of ARC_rdfxml_parser
1 string reference to 'ARC_rdfxml_parser'
File
- arc/
ARC_rdfxml_parser.php, line 70
View source
class ARC_rdfxml_parser {
var $version = "0.2.8";
var $triples;
var $subjs;
var $nsps;
var $s_count = 0;
var $t_count = 0;
var $bnode_id = 0;
var $xml_lang = "";
var $xml_base = "";
var $state = 1;
var $max_lines = 0;
var $save_data = false;
function __construct($args = "") {
$this->init_args = $args;
/* base, bnode_prefix, proxy_host, proxy_port, user_agent, headers, save_data, max_lines, encoding */
$this->skip_terms = array(
"http://www.w3.org/1999/02/22-rdf-syntax-ns# RDF",
"http://www.w3.org/1999/02/22-rdf-syntax-ns# Description",
"http://www.w3.org/1999/02/22-rdf-syntax-ns# ID",
"http://www.w3.org/1999/02/22-rdf-syntax-ns# about",
"http://www.w3.org/1999/02/22-rdf-syntax-ns# parseType",
"http://www.w3.org/1999/02/22-rdf-syntax-ns# resource",
"http://www.w3.org/1999/02/22-rdf-syntax-ns# nodeID",
"http://www.w3.org/1999/02/22-rdf-syntax-ns# datatype",
"http://www.w3.org/1999/02/22-rdf-syntax-ns# type",
);
}
function ARC_rdfxml_parser($args = "") {
$this
->__construct($args);
}
function init($create_parser = true) {
$this->triples = array();
$this->subjs = array();
$this->nsps = array();
/* namespace prefixes */
$this->bnode_id = 0;
$this->s_count = 0;
$this->t_count = 0;
$this->xml_lang = "";
$this->state = 1;
$this->xml_base = "";
$this->result_headers = array();
$this->encoding = "UTF-8";
/* base */
if ($base = $this->init_args["base"]) {
$this
->set_base($base);
}
/* bnode_prefix */
if ($bnode_prefix = $this->init_args["bnode_prefix"]) {
$this->bnode_prefix = $bnode_prefix;
}
else {
$this->bnode_prefix = "arc" . substr(md5(uniqid(rand())), 0, 4) . "b";
}
/* save_data */
if (isset($this->init_args["save_data"])) {
$this->save_data = $this->init_args["save_data"];
$this->data = "";
}
/* max_lines */
if (isset($this->init_args["max_lines"])) {
$this->max_lines = $this->init_args["max_lines"];
}
/* encoding */
if (isset($this->init_args["encoding"]) && $this->init_args["encoding"]) {
$this->encoding = $this->init_args["encoding"];
}
/* parser */
if ($create_parser) {
$this
->create_parser();
}
}
function create_parser() {
$parser = xml_parser_create_ns($this->encoding, " ");
xml_parser_set_option($parser, XML_OPTION_SKIP_WHITE, 0);
xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0);
xml_set_element_handler($parser, "handle_open", "handle_close");
xml_set_character_data_handler($parser, "handle_cdata");
xml_set_start_namespace_decl_handler($parser, "handle_ns_decl");
xml_set_object($parser, $this);
$this->parser = $parser;
}
function get_target_encoding() {
return $this->target_encoding;
}
function get_data() {
return $this->data;
}
function get_parsed_url() {
return isset($this->parsed_url) ? $this->parsed_url : "";
}
function done() {
}
function get_triple_count() {
return $this->t_count;
}
/* */
function set_base($base) {
if (strlen($this->xml_base === 0) || strpos($base, ":") !== false) {
$this->xml_base = $base;
}
else {
$this->xml_base = $this
->calc_base($base);
}
}
function get_cur_xml_base($s = "") {
if ($s) {
if (isset($s["p_xml_base"]) && ($base = $s["p_xml_base"])) {
return $base;
}
elseif (isset($s["xml_base"]) && ($base = $s["xml_base"])) {
return $base;
}
}
return $this->xml_base;
}
function get_clean_base($base = "") {
/* remove fragment */
if (preg_match("/([^#]*)[#]?/", $base, $matches)) {
/* should always match, remove fragment */
$base = $matches[1];
}
/* no path, no query, no trailing slash, e.g. http://www.example.com -> add slash */
if (preg_match("/\\/\\/(.*)/", $base, $matches)) {
/* //+something */
if (strpos($matches[1], "/") === false) {
/* no more slashes */
$base .= "/";
}
}
return $base;
}
/* */
function calc_abs_path($path = "", $base = "") {
if (strpos($path, "/") === 0) {
/* leading slash */
if (preg_match("/([^\\/]*[\\/]{1,2}[^\\/]+)\\//", $base, $matches)) {
return $matches[1] . $path;
}
}
elseif ($path == "") {
return $base;
}
else {
/* rel path (../ or path) */
/* remove stuff after last slash */
$base = substr($base, 0, strrpos($base, "/")) . "/";
if (strpos($path, "../") === 0) {
if (preg_match("/([^\\/]*[\\/]{1,2}[^\\/]+\\/)(.*)\\//", $base, $matches)) {
$server_part = $matches[1];
$path_part = $matches[2];
}
else {
$server_part = $base;
$path_part = "";
}
while (strpos($path, "../") === 0) {
$path = substr($path, 3);
$path_part = strlen($path_part) ? substr($path_part, 0, -1) : "";
/* remove / */
if (strpos($path_part, "/")) {
$path_part = substr($path_part, 0, strrpos($path_part, "/")) . "/";
/* remove stuff after (new) last slash */
}
else {
$path_part = "";
}
}
return $server_part . $path_part . $path;
}
else {
return $base . $path;
}
}
return $path;
}
function calc_base($path = "") {
if (strpos($path, ":") !== false) {
/* is abs uri */
return $path;
}
elseif (strpos($path, "//") === 0) {
/* net path */
return "http:" . $path;
}
/* relative base */
$s = $this->s_count ? $this->subjs[$this->s_count - 1] : false;
$cur_base = $this
->get_cur_xml_base($s);
$cur_base = $this
->get_clean_base($cur_base);
return $this
->calc_abs_path($path, $cur_base);
}
function calc_uri($s = "", $path = "", $term = "") {
$result = "";
if (strpos($path, ":") !== false) {
/* is abs uri */
return $path;
}
$cur_base = $this
->get_cur_xml_base($s);
$cur_base = $this
->get_clean_base($cur_base);
if ($term == "ID") {
return $cur_base . "#" . $path;
}
elseif (strpos($path, "#") === 0) {
return $cur_base . $path;
}
elseif (strpos($path, "//") === 0) {
/* net path */
return "http:" . $path;
}
return $this
->calc_abs_path($path, $cur_base);
}
/* */
function add_triple($s, $p, $o) {
/* echo "adding triple: ".$s["bnode_id"].$s["uri"]." ".$p." ".$o["uri"].$o["bnode_id"].$o["val"]."\n"; */
$this->triples[$this->t_count] = array(
"s" => $s,
"p" => $p,
"o" => $o,
);
$this->t_count++;
}
/* */
function reify($statement, &$s, $p, $o) {
$this
->add_triple($statement, "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", array(
"type" => "uri",
"uri" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement",
));
$this
->add_triple($statement, "http://www.w3.org/1999/02/22-rdf-syntax-ns#subject", $s);
$this
->add_triple($statement, "http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate", $p);
$this
->add_triple($statement, "http://www.w3.org/1999/02/22-rdf-syntax-ns#object", $o);
}
/* */
function create_bnode_id() {
$this->bnode_id++;
return "_:" . $this->bnode_prefix . $this->bnode_id;
}
/* */
function push_s(&$s) {
$this->subjs[$this->s_count] = $s;
$this->s_count++;
}
function pop_s() {
$new_subjs = array();
$this->s_count--;
for ($i = 0, $i_max = $this->s_count; $i < $i_max; $i++) {
$new_subjs[] = $this->subjs[$i];
}
$this->subjs = $new_subjs;
return true;
}
function get_cur_lang($s = "") {
if ($s) {
if (isset($s["p_xml_lang"]) && ($lang = $s["p_xml_lang"])) {
return $lang;
}
elseif (isset($s["xml_lang"]) && ($lang = $s["xml_lang"])) {
return $lang;
}
}
return $this->xml_lang;
}
/* */
function handle_open($parser, $tag, $attrs) {
/* echo "at state ".$this->state." opening ".$tag."\n"; */
switch ($this->state) {
case 2:
/* expecting p open */
$this
->handle_open_2($tag, $attrs);
break;
case 4:
/* expecting sub_node */
$this
->handle_open_4($tag, $attrs);
break;
case 1:
/* expecting s open */
$this
->handle_open_1($tag, $attrs);
break;
case 6:
/* expecting xml data */
$this
->handle_open_6($tag, $attrs);
break;
default:
echo "unexpected handle_open call (at state " . $this->state . ") (" . $tag . ") \n";
}
}
function handle_open_1($tag, $attrs) {
$xml = "http://www.w3.org/XML/1998/namespace";
$rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
/* rdf:RDF */
if ($tag === $rdf . " RDF") {
/* lang */
$this->xml_lang = isset($attrs[$xml . " lang"]) && ($xml_lang = $attrs[$xml . " lang"]) ? $xml_lang : $this->xml_lang;
/* base */
if (isset($attrs[$xml . " base"]) && ($xml_base = $attrs[$xml . " base"])) {
$this
->set_base($xml_base);
}
return true;
}
$cur_s = array();
/* base */
if (isset($attrs[$xml . " base"]) && ($xml_base = $attrs[$xml . " base"])) {
$cur_s["xml_base"] = $this
->calc_base($xml_base);
}
elseif ($prev_s =& $this->subjs[$this->s_count - 1]) {
/* s is an o, too */
if ($p_xml_base = $prev_s["p_xml_base"]) {
$cur_s["xml_base"] = $p_xml_base;
}
elseif ($xml_base = $prev_s["xml_base"]) {
$cur_s["xml_base"] = $xml_base;
}
}
else {
/* top level node */
$cur_s["xml_base"] = $this->xml_base;
}
/* lang */
if (isset($attrs[$xml . " lang"]) && ($xml_lang = $attrs[$xml . " lang"])) {
$cur_s["xml_lang"] = $xml_lang;
}
elseif ($prev_s =& $this->subjs[$this->s_count - 1]) {
/* s is an o, too */
if ($p_xml_lang = $prev_s["p_xml_lang"]) {
$cur_s["xml_lang"] = $p_xml_lang;
}
elseif ($xml_lang = $prev_s["xml_lang"]) {
$cur_s["xml_lang"] = $xml_lang;
}
}
else {
/* top level node */
$cur_s["xml_lang"] = $this->xml_lang;
}
/* rdf:ID */
if (isset($attrs[$rdf . " ID"]) && ($rdf_id = $attrs[$rdf . " ID"])) {
$cur_s["type"] = "uri";
$cur_s["uri"] = $this
->calc_uri($cur_s, $rdf_id, "ID");
}
elseif (isset($attrs[$rdf . " about"])) {
$cur_s["type"] = "uri";
$uri = $attrs[$rdf . " about"];
$cur_s["uri"] = $this
->calc_uri($cur_s, $uri, "about");
}
else {
$cur_s["type"] = "bnode";
/* rdf:nodeID */
if (isset($attrs[$rdf . " nodeID"]) && ($rdf_nodeID = $attrs[$rdf . " nodeID"])) {
$cur_s["bnode_id"] = "_:" . $rdf_nodeID;
}
else {
/* create bnode_id */
$cur_s["bnode_id"] = $this
->create_bnode_id();
}
}
/* typed node */
if ($tag != $rdf . " Description") {
$this
->add_triple($cur_s, $rdf . "type", array(
"type" => "uri",
"uri" => str_replace(" ", "", $tag),
));
}
/* (additional) typing attr */
if (isset($attrs[$rdf . " type"]) && ($rdf_type = $attrs[$rdf . " type"])) {
$this
->add_triple($cur_s, $rdf . "type", array(
"type" => "uri",
"uri" => $rdf_type,
));
}
/* Seq|Bag|Alt */
$cur_s["li_count"] = 0;
/* rdf:li elements can exist in any description element */
if ($tag === $rdf . " Seq" || $tag === $rdf . " Bag" || $tag === $rdf . " Alt") {
$cur_s["sba"] = true;
}
/* any other attrs (qualified, but not from rdf skip_terms or xml namespace) */
$cur_lang = $this
->get_cur_lang($cur_s);
foreach ($attrs as $k => $v) {
if (strpos($k, $xml) === false && strpos($k, " ") !== false) {
if (strpos($k, $rdf) === false) {
$this
->add_triple($cur_s, str_replace(" ", "", $k), array(
"type" => "literal",
"val" => $v,
"lang" => $cur_lang,
));
}
elseif (!in_array($k, $this->skip_terms)) {
/* add, but may warn */
$this
->add_triple($cur_s, str_replace(" ", "", $k), array(
"type" => "literal",
"val" => $v,
"lang" => $cur_lang,
));
}
}
}
$this
->push_s($cur_s);
$this->state = 2;
}
function handle_open_2($tag, $attrs) {
$xml = "http://www.w3.org/XML/1998/namespace";
$rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
$cur_p = $tag;
$cur_s =& $this->subjs[$this->s_count - 1];
unset($cur_s["p_xml_base"]);
unset($cur_s["p_xml_lang"]);
unset($cur_s["p_rdf_ID"]);
unset($cur_s["coll"]);
/* base */
if ($xml_base = @$attrs[$xml . " base"]) {
$cur_s["p_xml_base"] = $this
->calc_base($xml_base);
}
/* lang */
if ($xml_lang = @$attrs[$xml . " lang"]) {
$cur_s["p_xml_lang"] = $xml_lang;
}
/* adjust li */
if ($cur_p === $rdf . " li") {
$li_count = @$cur_s["li_count"] + 1;
$cur_s["li_count"] = $li_count;
$cur_p = $rdf . "_" . $li_count;
}
$cur_s["cur_p"] = str_replace(" ", "", $cur_p);
/* rdf:ID => reification */
if ($rdf_ID = @$attrs[$rdf . " ID"]) {
$cur_s["p_rdf_ID"] = $rdf_ID;
}
/* rdf:resource */
if (isset($attrs[$rdf . " resource"])) {
$rdf_resource = $attrs[$rdf . " resource"];
$rdf_resource = $this
->calc_uri($cur_s, $rdf_resource, "resource");
$this
->add_triple($cur_s, $cur_s["cur_p"], array(
"type" => "uri",
"uri" => $rdf_resource,
));
/* typing */
if (isset($attrs[$rdf . " type"])) {
$this
->add_triple(array(
"type" => "uri",
"uri" => $rdf_resource,
), $rdf . "type", array(
"type" => "uri",
"uri" => $attrs[$rdf . " type"],
));
}
/* reification */
if ($rdf_ID) {
/* reify, p is an empty element */
$this
->reify(array(
"type" => "uri",
"uri" => $this
->calc_uri($cur_s, $rdf_ID, "ID"),
), $cur_s, array(
"type" => "uri",
"uri" => $cur_s["cur_p"],
), array(
"type" => "uri",
"uri" => $rdf_resource,
));
unset($cur_s["p_rdf_ID"]);
}
$this->state = 3;
}
elseif ($rdf_nodeID = @$attrs[$rdf . " nodeID"]) {
$this
->add_triple($cur_s, $cur_s["cur_p"], array(
"type" => "bnode",
"bnode_id" => "_:" . $rdf_nodeID,
));
$this->state = 3;
if ($rdf_ID) {
/* reify */
$this
->reify(array(
"type" => "uri",
"uri" => $this
->calc_uri($cur_s, $rdf_ID, "ID"),
), $cur_s, array(
"type" => "uri",
"uri" => $cur_s["cur_p"],
), array(
"type" => "bnode",
"bnode_id" => "_:" . $rdf_nodeID,
));
}
}
elseif ($rdf_parseType = @$attrs[$rdf . " parseType"]) {
if ($rdf_parseType === "Literal") {
$cur_s["o_xml_level"] = 0;
$cur_s["o_xml_data"] = "";
$cur_s["p_xml_literal_level"] = 0;
$cur_s["declared_namespaces"] = array();
$this->state = 6;
}
elseif ($rdf_parseType === "Resource") {
$sub_s = array(
"type" => "bnode",
"bnode_id" => $this
->create_bnode_id(),
);
$this
->add_triple($cur_s, str_replace(" ", "", $cur_p), $sub_s);
$this
->push_s($sub_s);
if (isset($cur_s["p_rdf_ID"]) && ($p_rdf_ID = $cur_s["p_rdf_ID"])) {
/* reify, p is an empty element */
$this
->reify(array(
"type" => "uri",
"uri" => $this
->calc_uri($cur_s, $p_rdf_ID, "ID"),
), $cur_s, array(
"type" => "uri",
"uri" => $cur_s["cur_p"],
), $sub_s);
unset($cur_s["p_rdf_ID"]);
}
$this->state = 2;
}
elseif ($rdf_parseType === "Collection") {
$cur_s["coll"] = true;
$this->state = 4;
}
}
else {
/* o is sub_node or literal */
/* typed literal */
if ($rdf_datatype = @$attrs[$rdf . " datatype"]) {
$cur_s["o_rdf_datatype"] = $rdf_datatype;
}
$this->state = 4;
}
/* any other attrs (qualified, but not from rdf or xml namespace, except rdf:type) */
unset($tmp_node);
foreach ($attrs as $k => $v) {
if (strpos($k, $rdf) === false && strpos($k, $xml) === false && strpos($k, " ") !== false) {
if (!isset($tmp_node) || !$tmp_node) {
$cur_lang = $this
->get_cur_lang($cur_s);
if ($rdf_resource) {
$tmp_node = array(
"type" => "uri",
"uri" => $rdf_resource,
);
}
else {
$tmp_node = array(
"type" => "bnode",
"bnode_id" => $this
->create_bnode_id(),
);
$this
->add_triple($cur_s, str_replace(" ", "", $cur_p), $tmp_node);
}
}
if (isset($cur_s["p_rdf_ID"]) && ($p_rdf_ID = $cur_s["p_rdf_ID"])) {
/* reify, but only once, p is an empty element */
$this
->reify(array(
"type" => "uri",
"uri" => $this
->calc_uri($cur_s, $p_rdf_ID, "ID"),
), $cur_s, array(
"type" => "uri",
"uri" => $cur_s["cur_p"],
), $tmp_node);
unset($cur_s["p_rdf_ID"]);
}
$this
->add_triple($tmp_node, str_replace(" ", "", $k), array(
"type" => "literal",
"val" => $v,
"lang" => $cur_lang,
));
$this->state = 3;
}
}
}
function handle_open_4($tag, $attrs) {
$cur_s = array();
$prev_s =& $this->subjs[$this->s_count - 1];
/* base */
if ($xml_base = @$attrs["http://www.w3.org/XML/1998/namespace base"]) {
$cur_s["xml_base"] = $this
->calc_base($xml_base);
}
elseif ($p_xml_base = @$prev_s["p_xml_base"]) {
$cur_s["xml_base"] = $p_xml_base;
}
elseif ($xml_base = @$prev_s["xml_base"]) {
$cur_s["xml_base"] = $xml_base;
}
else {
/* top level node */
$cur_s["xml_base"] = $this->xml_base;
}
/* lang */
if ($xml_lang = @$attrs["http://www.w3.org/XML/1998/namespace lang"]) {
$cur_s["xml_lang"] = $xml_lang;
}
elseif ($p_xml_lang = @$prev_s["p_xml_lang"]) {
$cur_s["xml_lang"] = $p_xml_lang;
}
elseif ($xml_lang = @$prev_s["xml_lang"]) {
$cur_s["xml_lang"] = $xml_lang;
}
else {
/* top level node */
$cur_s["xml_lang"] = $this->xml_lang;
}
/* rdf:ID */
if ($rdf_id = @$attrs["http://www.w3.org/1999/02/22-rdf-syntax-ns# ID"]) {
$cur_s["type"] = "uri";
//$cur_s["uri"]=$this->full_base."#".$rdf_id;
$cur_s["uri"] = $this
->calc_uri($cur_s, $rdf_id, "ID");
}
elseif (isset($attrs["http://www.w3.org/1999/02/22-rdf-syntax-ns# about"])) {
$cur_s["type"] = "uri";
$uri = $attrs["http://www.w3.org/1999/02/22-rdf-syntax-ns# about"];
$cur_s["uri"] = $this
->calc_uri($cur_s, $uri, "about");
}
else {
$cur_s["type"] = "bnode";
/* rdf:nodeID */
if ($rdf_nodeID = @$attrs["http://www.w3.org/1999/02/22-rdf-syntax-ns# nodeID"]) {
$cur_s["bnode_id"] = "_:" . $rdf_nodeID;
}
else {
/* create bnode_id */
$cur_s["bnode_id"] = $this
->create_bnode_id();
}
}
/* Collection */
if (@$prev_s["coll"] || @$prev_s["is_list"]) {
/* collection is not empty || cur_s is next entry in collection */
$list_bnode_id = $this
->create_bnode_id();
$list = array(
"type" => "bnode",
"bnode_id" => $list_bnode_id,
);
if ($prev_p = @$prev_s["cur_p"]) {
$this
->add_triple($prev_s, $prev_s["cur_p"], $list);
}
else {
$this
->add_triple($prev_s, "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest", $list);
}
$list["is_list"] = true;
$this
->push_s($list);
/* cur_s is first */
$this
->add_triple($list, "http://www.w3.org/1999/02/22-rdf-syntax-ns#first", $cur_s);
$cur_s["in_list"] = true;
$this
->push_s($cur_s);
$this->state = 2;
}
else {
$this
->add_triple($prev_s, $prev_s["cur_p"], $cur_s);
$this
->push_s($cur_s);
$this->state = 2;
}
/* typed node */
if ($tag != "http://www.w3.org/1999/02/22-rdf-syntax-ns# Description") {
$this
->add_triple($cur_s, "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", array(
"type" => "uri",
"uri" => str_replace(" ", "", $tag),
));
}
/* (additional) typing attr */
if ($rdf_type = @$attrs["http://www.w3.org/1999/02/22-rdf-syntax-ns# type"]) {
$this
->add_triple($cur_s, "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", array(
"type" => "uri",
"uri" => $rdf_type,
));
}
/* Seq|Bag|Alt */
$cur_s["li_count"] = 0;
/* rdf:li elements can exist in any description element */
if ($tag === "http://www.w3.org/1999/02/22-rdf-syntax-ns# Seq" || $tag === "http://www.w3.org/1999/02/22-rdf-syntax-ns# Bag" || $tag === "http://www.w3.org/1999/02/22-rdf-syntax-ns# Alt") {
$cur_s["sba"] = true;
}
/* any other attrs (qualified, but not from rdf skip_terms or xml namespace) */
$cur_lang = $this
->get_cur_lang($cur_s);
foreach ($attrs as $k => $v) {
if (strpos($k, "http://www.w3.org/XML/1998/namespace") === false && strpos($k, " ") !== false) {
if (strpos($k, "http://www.w3.org/1999/02/22-rdf-syntax-ns#") === false) {
$this
->add_triple($cur_s, str_replace(" ", "", $k), array(
"type" => "literal",
"val" => $v,
"lang" => $cur_lang,
));
}
elseif (!in_array($k, $this->skip_terms)) {
/* add, but may warn */
$this
->add_triple($cur_s, str_replace(" ", "", $k), array(
"type" => "literal",
"val" => $v,
"lang" => $cur_lang,
));
}
}
}
}
function handle_open_6($tag, $attrs) {
$cur_s =& $this->subjs[$this->s_count - 1];
$data = $cur_s["o_xml_data"];
$xml_level = $cur_s["o_xml_level"];
$decl_nss = $cur_s["declared_namespaces"];
$tag_parts = explode(" ", $tag);
if (count($tag_parts) == 1) {
/* no qname */
$data .= '<' . $tag;
}
else {
$ns_uri = $tag_parts[0];
$local_name = $tag_parts[1];
$nsp = $this->nsps[$ns_uri];
$data .= strlen($nsp) ? '<' . $nsp . ":" . $local_name : '<' . $local_name;
/* declare ns */
if (!@$decl_nss[$nsp . "=" . $ns_uri]) {
$data .= strlen($nsp) ? ' xmlns:' . $nsp . '="' . $ns_uri . '"' : ' xmlns="' . $ns_uri . '"';
$decl_nss[$nsp . "=" . $ns_uri] = true;
$cur_s["declared_namespaces"] = $decl_nss;
}
}
foreach ($attrs as $k => $v) {
if (strpos($k, " ")) {
/* qualified attr */
$attr_parts = explode(" ", $k);
$a_ns_uri = $attr_parts[0];
$a_local_name = $attr_parts[1];
$a_nsp = $this->nsps[$a_ns_uri];
$data .= strlen($a_nsp) ? ' ' . $a_nsp . ':' . $a_local_name . '="' . $v . '"' : ' ' . $a_local_name . '="' . $v . '"';
}
else {
/* unqualified attr */
$data .= ' ' . $k . '="' . $v . '"';
}
}
$data .= '>';
$cur_s["o_xml_data"] = $data;
$cur_s["o_xml_level"] = $xml_level + 1;
if (str_replace(" ", "", $tag) == $cur_s["cur_p"]) {
/* container prop in XML */
$cur_s["p_xml_literal_level"] = $cur_s["p_xml_literal_level"] + 1;
}
}
/* */
function handle_close($parser, $tag) {
/* echo "at state ".$this->state." closing ".$tag."\n"; */
switch ($this->state) {
case 3:
/* p _close_ */
$this->state = 2;
break;
case 2:
/* no (more) props */
if ($cur_s = $this->subjs[$this->s_count - 1]) {
$cur_p = isset($cur_s["cur_p"]) ? $cur_s["cur_p"] : "";
if ($cur_p === str_replace(" ", "", $tag) || $tag === "http://www.w3.org/1999/02/22-rdf-syntax-ns# li" && $cur_p === "http://www.w3.org/1999/02/22-rdf-syntax-ns#_" . $cur_s["li_count"]) {
/* closing p */
}
else {
$this
->pop_s();
$this->state = @$this->subjs[$this->s_count - 1] ? 2 : 1;
/* s was o of upper triple | back at root, expecting siblings */
}
if (@$cur_s["in_list"]) {
$this->state = 4;
}
}
break;
case 4:
/* empty p or p_close after cdata reading or p_close after collection */
$cur_s =& $this->subjs[$this->s_count - 1];
if (@$cur_s["is_list"]) {
$this
->add_triple($cur_s, "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest", array(
"type" => "uri",
"uri" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil",
));
/* back to list start */
$coll_p = str_replace(" ", "", $tag);
while ($cur_s["cur_p"] != $coll_p) {
$next_s = $cur_s;
$this
->pop_s();
$cur_s =& $this->subjs[$this->s_count - 1];
}
if ($p_rdf_ID = $cur_s["p_rdf_ID"]) {
/* reify */
$this
->reify(array(
"type" => "uri",
"uri" => $this
->calc_uri($cur_s, $p_rdf_ID, "ID"),
), $cur_s, array(
"type" => "uri",
"uri" => $cur_s["cur_p"],
), $next_s);
}
$this->state = 2;
}
else {
$this
->add_triple($cur_s, $cur_s["cur_p"], array(
"type" => "literal",
"val" => @$cur_s["o_cdata"],
"dt" => @$cur_s["o_rdf_datatype"],
"lang" => $this
->get_cur_lang($cur_s),
));
if ($p_rdf_ID = @$cur_s["p_rdf_ID"]) {
/* reify */
$this
->reify(array(
"type" => "uri",
"uri" => $this
->calc_uri($cur_s, $p_rdf_ID, "ID"),
), $cur_s, array(
"type" => "uri",
"uri" => $cur_s["cur_p"],
), array(
"type" => "literal",
"val" => $cur_s["o_cdata"],
"dt" => $cur_s["o_rdf_datatype"],
"lang" => $this
->get_cur_lang($cur_s),
));
}
unset($cur_s["o_cdata"]);
unset($cur_s["o_rdf_datatype"]);
$this->state = 2;
}
break;
case 6:
/* expecting xml data */
$cur_s =& $this->subjs[$this->s_count - 1];
$data = $cur_s["o_xml_data"];
$xml_level = $cur_s["o_xml_level"];
if ($xml_level === 0) {
/* p close after xml reading */
$this
->add_triple($cur_s, $cur_s["cur_p"], array(
"type" => "literal",
"val" => trim($data),
"dt" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral",
"lang" => $this
->get_cur_lang($cur_s),
));
unset($cur_s["o_xml_data"]);
$this->state = 2;
}
else {
$tag_parts = explode(" ", $tag);
if (count($tag_parts) == 1) {
/* no qname */
$data .= '</' . $tag . '>';
}
else {
$ns_uri = $tag_parts[0];
$local_name = $tag_parts[1];
$nsp = $this->nsps[$ns_uri];
$data .= strlen($nsp) ? '</' . $nsp . ":" . $local_name . '>' : '</' . $local_name . '>';
}
$cur_s["o_xml_data"] = $data;
$cur_s["o_xml_level"] = $xml_level - 1;
if (str_replace(" ", "", $tag) === $cur_s["cur_p"]) {
/* container prop in XML */
$cur_s["p_xml_literal_level"]--;
}
}
break;
}
}
/* */
function handle_cdata($parser, $cdata) {
switch ($this->state) {
case 6:
$cur_s =& $this->subjs[$this->s_count - 1];
if (isset($cur_s["o_xml_data"])) {
$cur_s["o_xml_data"] .= $cdata;
}
elseif ($cdata == "\n" || $cdata == "\r\n") {
$cur_s["o_xml_data"] = $cdata;
}
elseif (trim($cdata)) {
$cur_s["o_xml_data"] = $cdata;
}
break;
case 4:
$cur_s =& $this->subjs[$this->s_count - 1];
if (isset($cur_s["o_cdata"])) {
$cur_s["o_cdata"] .= $cdata;
}
else {
$cur_s["o_cdata"] = $cdata;
}
break;
}
}
/* */
function handle_ns_decl($parser, $nsp, $ns_uri) {
$this->nsps[$ns_uri] = $nsp;
}
/* */
function get_triples() {
return $this->triples;
}
function get_result_headers() {
return $this->result_headers;
}
/* */
function parse_web_file($url = "", $redir_count = 0) {
if (!isset($this->init_args["base"]) || !$this->init_args["base"]) {
$this->init_args["base"] = $url;
}
$this
->init(false);
if (!$url) {
$url = $this->full_base;
}
if ($url) {
if ($redir_count) {
$this->parsed_url = $url;
}
/* http method */
$http_method = isset($this->init_args["http_method"]) ? $this->init_args["http_method"] : "GET";
$url_parts = parse_url($url);
if (!isset($url_parts["port"])) {
$url_parts["port"] = 80;
}
if (isset($url_parts["user"]) && strlen($url_parts["user"]) || $this->init_args["proxy_host"] && $this->init_args["proxy_port"]) {
$http_code = $http_method . ' ' . $url . ' HTTP/1.0' . "\r\n";
}
else {
$http_code = $http_method . ' ' . $url_parts["path"];
$http_code .= isset($url_parts["query"]) && strlen($url_parts["query"]) ? "?" . $url_parts["query"] : "";
$http_code .= isset($url_parts["fragment"]) && strlen($url_parts["fragment"]) ? "#" . $url_parts["fragment"] : "";
$http_code .= ' HTTP/1.0' . "\r\n";
}
/* custom headers */
if ($headers = $this->init_args["headers"]) {
for ($i = 0, $i_max = count($headers); $i < $i_max; $i++) {
$http_code .= $headers[$i] . "\r\n";
}
}
if (strpos($http_code, "Host: ") === false) {
$http_code .= 'Host: ' . $url_parts["host"] . "\r\n";
}
if (strpos($http_code, "Accept: ") === false) {
$http_code .= 'Accept: application/rdf+xml; q=0.9, */*; q=0.1' . "\r\n";
}
if (strpos($http_code, "User-Agent: ") === false) {
$ua_string = $this->init_args["user_agent"] ? $this->init_args["user_agent"] : "ARC RDF/XML Parser v" . $this->version . " (http://www.appmosphere.com/en-arc_rdfxml_parser)";
$http_code .= 'User-Agent: ' . $ua_string . "\r\n";
}
$http_code .= "\r\n";
/* socket */
if ($this->init_args["proxy_host"] && $this->init_args["proxy_port"]) {
$fp = @fsockopen($this->init_args["proxy_host"], $this->init_args["proxy_port"]);
$server_str = $this->init_args["proxy_host"] . ":" . $this->init_args["proxy_port"];
}
else {
$fp = @fsockopen($url_parts["host"], $url_parts["port"]);
$server_str = $url_parts["host"] . ":" . $url_parts["port"];
}
if (!$fp) {
return "Socket error: could not connect to server '" . $server_str . "'";
}
else {
fputs($fp, $http_code);
/* http-headers */
$cur_line = fgets($fp, 256);
/* 304/4xx/5xx handling */
if (preg_match("/^HTTP[^\\s]+\\s+([0-9]{1})([0-9]{2})(.*)\$/i", trim($cur_line), $matches)) {
$code_1 = $matches[1];
$code_2 = $matches[2];
$msg = trim($matches[3]);
if (in_array($code_1, array(
"4",
"5",
))) {
return $code_1 . $code_2 . " " . $msg;
}
if ($code_1 . $code_2 == "304") {
return $code_1 . $code_2 . " " . $msg;
}
$redirect = $code_1 == "3" ? true : false;
}
while (!feof($fp) && trim($cur_line)) {
$this->result_headers[] = $cur_line;
if ($this->encoding == "auto" && strpos(strtolower($cur_line), "content-type") !== false) {
if (strpos(strtolower($cur_line), "utf-8")) {
$this->encoding = "UTF-8";
}
elseif (strpos(strtolower($cur_line), "iso-8859-1")) {
$this->encoding = "ISO-8859-1";
}
elseif (strpos(strtolower($cur_line), "us-ascii")) {
$this->encoding = "US-ASCII";
}
}
/* 3xx handling */
if ($redirect && preg_match("/^Location:\\s*(http.*)\$/i", $cur_line, $matches)) {
fclose($fp);
unset($this->encoding);
unset($this->init_args["base"]);
return $redir_count > 3 ? $cur_line : $this
->parse_web_file(trim($matches[1]), $redir_count + 1);
}
$cur_line = fgets($fp, 256);
}
/* first lines of body to detect encoding */
$pre_data = fread($fp, 512);
if ($this->encoding == "auto" && preg_match("/\\<\\?xml .* encoding(.+).*\\?\\>/", $pre_data, $matches)) {
$cur_match = $matches[1];
if (strpos(strtolower($cur_match), "utf-8")) {
$this->encoding = "UTF-8";
}
elseif (strpos(strtolower($cur_match), "iso-8859-1")) {
$this->encoding = "ISO-8859-1";
}
elseif (strpos(strtolower($cur_match), "us-ascii")) {
$this->encoding = "US-ASCII";
}
}
if ($this->encoding == "auto") {
$this->encoding = "UTF-8";
}
$this
->create_parser();
/* body */
$max_lns = $this->max_lines;
while (($data = $pre_data . fread($fp, 4096)) && ($max_lns === 0 || xml_get_current_line_number($this->parser) <= $max_lns)) {
$started = true;
$pre_data = "";
if ($this->save_data) {
$this->data .= $data;
}
if (!($success = xml_parse($this->parser, $data, feof($fp)))) {
$error_str = xml_error_string(xml_get_error_code($this->parser));
$line = xml_get_current_line_number($this->parser);
fclose($fp);
xml_parser_free($this->parser);
return "XML error: '" . $error_str . "' at line " . $line . "\n";
}
}
$this->target_encoding = xml_parser_get_option($this->parser, XML_OPTION_TARGET_ENCODING);
xml_parser_free($this->parser);
fclose($fp);
$this
->done();
}
}
return $this->triples;
}
function parse_file($path) {
if ($fp = fopen($path, "r")) {
if (!$this->init_args["base"]) {
$this->init_args["base"] = $path;
}
$this
->init(false);
$this->encoding = $this->encoding == "auto" ? "UTF-8" : $this->encoding;
$this
->create_parser();
while ($data = fread($fp, 4096)) {
if ($this->save_data) {
$this->data .= $data;
}
if (!($success = xml_parse($this->parser, $data, feof($fp)))) {
$error_str = xml_error_string(xml_get_error_code($this->parser));
$line = xml_get_current_line_number($this->parser);
fclose($fp);
xml_parser_free($this->parser);
return "XML error: '" . $error_str . "' at line " . $line . "\n";
}
}
$this->target_encoding = xml_parser_get_option($this->parser, XML_OPTION_TARGET_ENCODING);
xml_parser_free($this->parser);
fclose($fp);
$this
->done();
}
return $this->triples;
}
function parse_data($data) {
$this
->init(false);
$this->encoding = $this->encoding == "auto" ? "UTF-8" : $this->encoding;
$this
->create_parser();
if ($this->save_data) {
$this->data = $data;
}
if (!($success = xml_parse($this->parser, $data, true))) {
$error_str = xml_error_string(xml_get_error_code($this->parser));
$line = xml_get_current_line_number($this->parser);
xml_parser_free($this->parser);
return "XML error: '" . $error_str . "' at line " . $line . "\n";
}
$this->target_encoding = xml_parser_get_option($this->parser, XML_OPTION_TARGET_ENCODING);
xml_parser_free($this->parser);
$this
->done();
return $this->triples;
}
}