function ARC_erdf_parser::get_triple_infos in Taxonomy import/export via XML 5.2
Same name and namespace in other branches
- 5 arc/ARC_erdf_parser.php \ARC_erdf_parser::get_triple_infos()
- 6.2 arc/ARC_erdf_parser.php \ARC_erdf_parser::get_triple_infos()
- 6 arc/ARC_erdf_parser.php \ARC_erdf_parser::get_triple_infos()
2 calls to ARC_erdf_parser::get_triple_infos()
File
- arc/
ARC_erdf_parser.php, line 496
Class
Code
function get_triple_infos($args = "") {
if (!$this->is_erdf && (!is_array($args) || !isset($args["ignore_missing_profile"]) || !$args["ignore_missing_profile"])) {
return array(
"error" => "could not extract triples",
"result" => array(),
);
}
$triples = array();
$prefixes = array();
$same_ids = array();
$this
->index_nodes_by_parent();
/* prefixes */
if (isset($this->nodes_by_parent[1])) {
$head_nodes = $this->nodes_by_parent[1];
foreach ($head_nodes as $cur_node) {
$tag = $cur_node["tag"];
$attrs = $cur_node["attrs"];
foreach (array(
"rel",
"href",
) as $cur_attr) {
${$cur_attr} = isset($attrs[$cur_attr]) ? trim($attrs[$cur_attr]) : "";
}
/* link (schema definitions) */
if ($tag == "link" && preg_match("/^schema\\.([0-9a-z_]+)\$/i", $rel, $m)) {
$prefix = $m[1];
$iri = $href;
if (!isset($prefixes[$prefix])) {
$prefixes[$prefix] = $iri;
}
}
}
}
/* triples */
foreach ($this->nodes_by_parent as $p_id => $cur_p) {
foreach ($cur_p as $cur_node) {
$tag = $cur_node["tag"];
$subj = isset($same_ids[$cur_node["subj"]]) ? $same_ids[$cur_node["subj"]] : $cur_node["subj"];
$attrs = $cur_node["attrs"];
foreach (array(
"rel",
"name",
"href",
"src",
"content",
"class",
"id",
"rev",
"title",
) as $cur_attr) {
${$cur_attr} = isset($attrs["full_" . $cur_attr]) ? trim($attrs["full_" . $cur_attr]) : (isset($attrs[$cur_attr]) ? trim($attrs[$cur_attr]) : "");
}
/* meta name content */
if ($tag == "meta" && preg_match_all("/([0-9a-z_]+)[\\.\\-]([^\\s]+)/si", $name, $m)) {
for ($i = 0, $i_max = count($m[1]); $i < $i_max; $i++) {
$prefix = $m[1][$i];
$local_name = $m[2][$i];
if (isset($prefixes[$prefix])) {
$triples[] = array(
"s" => $subj,
"p" => $prefixes[$prefix] . $local_name,
"p_qname" => $prefix . ":" . $local_name,
"p_type" => "dt",
"o" => $content,
);
}
}
}
/* link|a rel href */
if (in_array($tag, array(
"link",
"a",
)) && preg_match_all("/([0-9a-z_]+)[\\.\\-]([^\\s]+)/si", $rel, $m)) {
for ($i = 0, $i_max = count($m[1]); $i < $i_max; $i++) {
$prefix = $m[1][$i];
$local_name = $m[2][$i];
if (isset($prefixes[$prefix])) {
$p = $prefixes[$prefix] . $local_name;
if ($p == "http://www.w3.org/2002/07/owl#sameAs") {
$same_ids[$subj] = $href;
}
else {
$triples[] = array(
"s" => $subj,
"p" => $p,
"p_qname" => $prefix . ":" . $local_name,
"p_type" => "obj",
"o" => $href,
);
if ($title) {
$triples[] = array(
"s" => $href,
"p" => "http://www.w3.org/2000/01/rdf-schema#label",
"p_qname" => "rdfs:label",
"p_type" => "dt",
"o" => $title,
);
}
elseif ($label = $this
->get_all_cdata($cur_node)) {
$triples[] = array(
"s" => $href,
"p" => "http://www.w3.org/2000/01/rdf-schema#label",
"p_qname" => "rdfs:label",
"p_type" => "dt",
"o" => $label,
);
}
}
}
}
}
/* link|a rev href */
if (in_array($tag, array(
"link",
"a",
)) && preg_match_all("/([0-9a-z_]+)[\\.\\-]([^\\s]+)/si", $rev, $m)) {
for ($i = 0, $i_max = count($m[1]); $i < $i_max; $i++) {
$prefix = $m[1][$i];
$local_name = $m[2][$i];
if (isset($prefixes[$prefix])) {
$triples[] = array(
"s" => trim($href),
"p" => $prefixes[$prefix] . $local_name,
"p_qname" => $prefix . ":" . $local_name,
"p_type" => "obj",
"o" => $subj,
);
if ($title) {
$triples[] = array(
"s" => $href,
"p" => "http://www.w3.org/2000/01/rdf-schema#label",
"p_qname" => "rdfs:label",
"p_type" => "dt",
"o" => $title,
);
}
elseif ($label = $this
->get_all_cdata($cur_node)) {
$triples[] = array(
"s" => $href,
"p" => "http://www.w3.org/2000/01/rdf-schema#label",
"p_qname" => "rdfs:label",
"p_type" => "dt",
"o" => $label,
);
}
}
}
}
/* img class src */
if ($tag == "img" && preg_match_all("/(\\-?)([0-9a-z_]+)[\\.\\-]([^\\s]+)/si", $class, $m)) {
for ($i = 0, $i_max = count($m[2]); $i < $i_max; $i++) {
$as_class = $m[1][$i] == "-";
$prefix = $m[2][$i];
$local_name = $m[3][$i];
if (isset($prefixes[$prefix])) {
if ($as_class) {
$triples[] = array(
"s" => $src,
"p" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#type",
"p_qname" => "rdf:type",
"p_type" => "obj",
"o" => $prefixes[$prefix] . $local_name,
);
}
else {
$triples[] = array(
"s" => $subj,
"p" => $prefixes[$prefix] . $local_name,
"p_qname" => $prefix . ":" . $local_name,
"p_type" => "obj",
"o" => $src,
);
}
if ($title) {
$triples[] = array(
"s" => $src,
"p" => "http://www.w3.org/2000/01/rdf-schema#label",
"p_qname" => "rdfs:label",
"p_type" => "dt",
"o" => $title,
);
}
}
}
}
/* class */
if (!in_array($tag, array(
"img",
"a",
"link",
"meta",
"object",
"iframe",
)) && preg_match_all("/(\\-?)([0-9a-z_]+)[\\.\\-]([^\\s]+)/si", $class, $m)) {
for ($i = 0, $i_max = count($m[2]); $i < $i_max; $i++) {
$as_class = $m[1][$i] == "-";
$prefix = $m[2][$i];
$local_name = $m[3][$i];
if (isset($prefixes[$prefix])) {
if ($id) {
if ($as_class) {
$triples[] = array(
"s" => $id,
"p" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#type",
"p_qname" => "rdf:type",
"p_type" => "obj",
"o" => $prefixes[$prefix] . $local_name,
);
}
else {
$triples[] = array(
"s" => $subj,
"p" => $prefixes[$prefix] . $local_name,
"p_qname" => $prefix . ":" . $local_name,
"p_type" => "obj",
"o" => $id,
);
}
}
elseif (isset($attrs["title"])) {
$triples[] = array(
"s" => $subj,
"p" => $prefixes[$prefix] . $local_name,
"p_qname" => $prefix . ":" . $local_name,
"p_type" => "dt",
"o" => $title,
);
}
elseif ($label = $this
->get_all_cdata($cur_node)) {
$triples[] = array(
"s" => $subj,
"p" => $prefixes[$prefix] . $local_name,
"p_qname" => $prefix . ":" . $local_name,
"p_type" => "dt",
"o" => $label,
);
}
}
}
}
}
}
return array(
"triples" => $triples,
"prefixes" => $prefixes,
"same_ids" => $same_ids,
);
}