You are here

function ARC_erdf_parser::get_triple_infos in Taxonomy import/export via XML 5.2

Same name and namespace in other branches
  1. 5 arc/ARC_erdf_parser.php \ARC_erdf_parser::get_triple_infos()
  2. 6.2 arc/ARC_erdf_parser.php \ARC_erdf_parser::get_triple_infos()
  3. 6 arc/ARC_erdf_parser.php \ARC_erdf_parser::get_triple_infos()
2 calls to ARC_erdf_parser::get_triple_infos()
ARC_erdf_parser::get_rdfxml in arc/ARC_erdf_parser.php
ARC_erdf_parser::get_triples in arc/ARC_erdf_parser.php

File

arc/ARC_erdf_parser.php, line 496

Class

ARC_erdf_parser

Code

function get_triple_infos($args = "") {
  if (!$this->is_erdf && (!is_array($args) || !isset($args["ignore_missing_profile"]) || !$args["ignore_missing_profile"])) {
    return array(
      "error" => "could not extract triples",
      "result" => array(),
    );
  }
  $triples = array();
  $prefixes = array();
  $same_ids = array();
  $this
    ->index_nodes_by_parent();

  /* prefixes */
  if (isset($this->nodes_by_parent[1])) {
    $head_nodes = $this->nodes_by_parent[1];
    foreach ($head_nodes as $cur_node) {
      $tag = $cur_node["tag"];
      $attrs = $cur_node["attrs"];
      foreach (array(
        "rel",
        "href",
      ) as $cur_attr) {
        ${$cur_attr} = isset($attrs[$cur_attr]) ? trim($attrs[$cur_attr]) : "";
      }

      /* link (schema definitions) */
      if ($tag == "link" && preg_match("/^schema\\.([0-9a-z_]+)\$/i", $rel, $m)) {
        $prefix = $m[1];
        $iri = $href;
        if (!isset($prefixes[$prefix])) {
          $prefixes[$prefix] = $iri;
        }
      }
    }
  }

  /* triples */
  foreach ($this->nodes_by_parent as $p_id => $cur_p) {
    foreach ($cur_p as $cur_node) {
      $tag = $cur_node["tag"];
      $subj = isset($same_ids[$cur_node["subj"]]) ? $same_ids[$cur_node["subj"]] : $cur_node["subj"];
      $attrs = $cur_node["attrs"];
      foreach (array(
        "rel",
        "name",
        "href",
        "src",
        "content",
        "class",
        "id",
        "rev",
        "title",
      ) as $cur_attr) {
        ${$cur_attr} = isset($attrs["full_" . $cur_attr]) ? trim($attrs["full_" . $cur_attr]) : (isset($attrs[$cur_attr]) ? trim($attrs[$cur_attr]) : "");
      }

      /* meta name content */
      if ($tag == "meta" && preg_match_all("/([0-9a-z_]+)[\\.\\-]([^\\s]+)/si", $name, $m)) {
        for ($i = 0, $i_max = count($m[1]); $i < $i_max; $i++) {
          $prefix = $m[1][$i];
          $local_name = $m[2][$i];
          if (isset($prefixes[$prefix])) {
            $triples[] = array(
              "s" => $subj,
              "p" => $prefixes[$prefix] . $local_name,
              "p_qname" => $prefix . ":" . $local_name,
              "p_type" => "dt",
              "o" => $content,
            );
          }
        }
      }

      /* link|a rel href */
      if (in_array($tag, array(
        "link",
        "a",
      )) && preg_match_all("/([0-9a-z_]+)[\\.\\-]([^\\s]+)/si", $rel, $m)) {
        for ($i = 0, $i_max = count($m[1]); $i < $i_max; $i++) {
          $prefix = $m[1][$i];
          $local_name = $m[2][$i];
          if (isset($prefixes[$prefix])) {
            $p = $prefixes[$prefix] . $local_name;
            if ($p == "http://www.w3.org/2002/07/owl#sameAs") {
              $same_ids[$subj] = $href;
            }
            else {
              $triples[] = array(
                "s" => $subj,
                "p" => $p,
                "p_qname" => $prefix . ":" . $local_name,
                "p_type" => "obj",
                "o" => $href,
              );
              if ($title) {
                $triples[] = array(
                  "s" => $href,
                  "p" => "http://www.w3.org/2000/01/rdf-schema#label",
                  "p_qname" => "rdfs:label",
                  "p_type" => "dt",
                  "o" => $title,
                );
              }
              elseif ($label = $this
                ->get_all_cdata($cur_node)) {
                $triples[] = array(
                  "s" => $href,
                  "p" => "http://www.w3.org/2000/01/rdf-schema#label",
                  "p_qname" => "rdfs:label",
                  "p_type" => "dt",
                  "o" => $label,
                );
              }
            }
          }
        }
      }

      /* link|a rev href */
      if (in_array($tag, array(
        "link",
        "a",
      )) && preg_match_all("/([0-9a-z_]+)[\\.\\-]([^\\s]+)/si", $rev, $m)) {
        for ($i = 0, $i_max = count($m[1]); $i < $i_max; $i++) {
          $prefix = $m[1][$i];
          $local_name = $m[2][$i];
          if (isset($prefixes[$prefix])) {
            $triples[] = array(
              "s" => trim($href),
              "p" => $prefixes[$prefix] . $local_name,
              "p_qname" => $prefix . ":" . $local_name,
              "p_type" => "obj",
              "o" => $subj,
            );
            if ($title) {
              $triples[] = array(
                "s" => $href,
                "p" => "http://www.w3.org/2000/01/rdf-schema#label",
                "p_qname" => "rdfs:label",
                "p_type" => "dt",
                "o" => $title,
              );
            }
            elseif ($label = $this
              ->get_all_cdata($cur_node)) {
              $triples[] = array(
                "s" => $href,
                "p" => "http://www.w3.org/2000/01/rdf-schema#label",
                "p_qname" => "rdfs:label",
                "p_type" => "dt",
                "o" => $label,
              );
            }
          }
        }
      }

      /* img class src */
      if ($tag == "img" && preg_match_all("/(\\-?)([0-9a-z_]+)[\\.\\-]([^\\s]+)/si", $class, $m)) {
        for ($i = 0, $i_max = count($m[2]); $i < $i_max; $i++) {
          $as_class = $m[1][$i] == "-";
          $prefix = $m[2][$i];
          $local_name = $m[3][$i];
          if (isset($prefixes[$prefix])) {
            if ($as_class) {
              $triples[] = array(
                "s" => $src,
                "p" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#type",
                "p_qname" => "rdf:type",
                "p_type" => "obj",
                "o" => $prefixes[$prefix] . $local_name,
              );
            }
            else {
              $triples[] = array(
                "s" => $subj,
                "p" => $prefixes[$prefix] . $local_name,
                "p_qname" => $prefix . ":" . $local_name,
                "p_type" => "obj",
                "o" => $src,
              );
            }
            if ($title) {
              $triples[] = array(
                "s" => $src,
                "p" => "http://www.w3.org/2000/01/rdf-schema#label",
                "p_qname" => "rdfs:label",
                "p_type" => "dt",
                "o" => $title,
              );
            }
          }
        }
      }

      /* class */
      if (!in_array($tag, array(
        "img",
        "a",
        "link",
        "meta",
        "object",
        "iframe",
      )) && preg_match_all("/(\\-?)([0-9a-z_]+)[\\.\\-]([^\\s]+)/si", $class, $m)) {
        for ($i = 0, $i_max = count($m[2]); $i < $i_max; $i++) {
          $as_class = $m[1][$i] == "-";
          $prefix = $m[2][$i];
          $local_name = $m[3][$i];
          if (isset($prefixes[$prefix])) {
            if ($id) {
              if ($as_class) {
                $triples[] = array(
                  "s" => $id,
                  "p" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#type",
                  "p_qname" => "rdf:type",
                  "p_type" => "obj",
                  "o" => $prefixes[$prefix] . $local_name,
                );
              }
              else {
                $triples[] = array(
                  "s" => $subj,
                  "p" => $prefixes[$prefix] . $local_name,
                  "p_qname" => $prefix . ":" . $local_name,
                  "p_type" => "obj",
                  "o" => $id,
                );
              }
            }
            elseif (isset($attrs["title"])) {
              $triples[] = array(
                "s" => $subj,
                "p" => $prefixes[$prefix] . $local_name,
                "p_qname" => $prefix . ":" . $local_name,
                "p_type" => "dt",
                "o" => $title,
              );
            }
            elseif ($label = $this
              ->get_all_cdata($cur_node)) {
              $triples[] = array(
                "s" => $subj,
                "p" => $prefixes[$prefix] . $local_name,
                "p_qname" => $prefix . ":" . $local_name,
                "p_type" => "dt",
                "o" => $label,
              );
            }
          }
        }
      }
    }
  }
  return array(
    "triples" => $triples,
    "prefixes" => $prefixes,
    "same_ids" => $same_ids,
  );
}