You are here

rdf_utils.inc in Taxonomy import/export via XML 7

Experimental library for working with RDF within taxonomy_xml

File

rdf_utils.inc
View source
<?php

/**
 * @file
 * Experimental library for working with RDF within taxonomy_xml
 */
define('TAXONOMY_XML_RDF_NS', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#');
define('TAXONOMY_XML_TYPE', TAXONOMY_XML_RDF_NS . 'type');
define('TAXONOMY_XML_UNTYPED', 'UNTYPED');
define('TAXONOMY_XML_RDFS_NS', 'http://www.w3.org/2000/01/rdf-schema#');

// See  http://www.w3.org/2004/12/q/doc/rdf-labels.html
define('TAXONOMY_XML_CONTENTLABEL_NS', 'http://www.w3.org/2004/12/q/contentlabel#');
define('TAXONOMY_XML_CATEGORY', TAXONOMY_XML_CONTENTLABEL_NS . 'Category');

// OWL - Web Ontology Language - Formalized Meaning and Logic.
define('TAXONOMY_XML_OWL_NS', 'http://www.w3.org/2002/07/owl#');
define('TAXONOMY_XML_W3C_WN', 'http://www.w3.org/2006/03/wn/wn20/');
define('TAXONOMY_XML_W3C_WN_SCHEMA', TAXONOMY_XML_W3C_WN . 'schema/');

// Dublin Core - Metadata standards
define('TAXONOMY_XML_DC_NS', 'http://purl.org/dc/elements/1.1/');

// Simple Knowledge Organization System - Structural information management
define('TAXONOMY_XML_SKOS_NS', 'http://www.w3.org/2004/02/skos/core#');

// Taxonomic Database Working Group -
// Biodiversity Information Standards (LSIDs etc).
define('TAXONOMY_XML_TDWG_NS', 'http://rs.tdwg.org/ontology/voc/Collection#');

// Freebase data
define('TAXONOMY_XML_FB_NS', 'http://rdf.freebase.com/ns/');

/**
 * Given a D7 entity with its 'rdf_mapping' available, create and fill in data
 * on an RDF/XML node in an XML document.
 *
 * Fully supports namespaces, even though internally the RDF mapping only uses
 * CURIE notation.
 *
 * @param object $entity
 *   a Drupal entity. Should contain $entity->rdf_mapping instructions.
 * @param DOMDocument $XMLDoc
 *   handle on an XML Document to build the element within.
 * @param DOMElement|NULL $domcontainer
 *   element to add this structure to. While it's not
 *   required, allowing the DOM constructor here to add early will assist in
 *   reducing namespace clutter!
 *
 * @return DOMElement
 *   an XML node containing the data. Does not actually add the element
 *   to the DOM
 */
function rdf_entity_to_xml($entity, DOMDocument $XMLDoc, $domcontainer = NULL) {
  if (empty($entity->rdf_mapping)) {
    trigger_error("No rdf mapping loaded on this entity " . print_r($entity, 1), E_USER_ERROR);
    return FALSE;
  }
  if (empty($XMLDoc)) {
    $XMLDoc = new DOMDocument();
  }
  static $ns;
  if (empty($ns)) {
    $ns = rdf_get_namespaces();
  }
  $mapping = $entity->rdf_mapping;

  /*
   * rdf_mapping = array(
   *   rdftype => array( 0 => 'skos:Collection'),
   *   name    => array(
   *     predicates      => array(0 =>'rdfs: label'),
   *   )
   * )
   */
  $rdftype = reset($mapping['rdftype']);

  // Is a soft CURIE, eg 'skos:Collection'
  // Need to know the real URI before adding it to a strict DOM.
  $ns_id = fully_qualified_from_curie($rdftype);
  $entity_node = $XMLDoc
    ->createElementNS($ns_id['ns'], $rdftype);
  if (!$entity_node) {
    trigger_error("Failed to create an XML node '{$rdftype}' on the XML document");
    return FALSE;
  }
  if (!empty($domcontainer)) {
    $domcontainer
      ->appendchild($entity_node);
  }
  foreach ($mapping as $property_name => $mapping_details) {
    if (empty($mapping_details['predicates'])) {
      continue;
    }

    // The value of this predicate
    // will be the corresponding property on this object.
    if (!isset($entity->{$property_name})) {
      watchdog('taxonomy_xml', 'Needs work. The rdf mapping wanted to know the value of the property %property_name but the entity did not hve a corresponding value set. ' . __FUNCTION__, array(
        '%property_name' => $property_name,
      ), WATCHDOG_DEBUG);
      continue;
    }

    // Skip some elements for now.
    // Some mapping needs special treatment!
    // Elements with 'type' rel or rev actually link to resources.
    // I THINK the rdf_mapping 'callback' may help here,
    // but I've not seen it used correctly yet
    // eg
    //  'skos:member',
    //  'skos:broader',
    //  'skos:narrower',
    if (!empty($mapping_details['type']) && ($mapping_details['type'] == 'rel' || $mapping_details['type'] == 'rev')) {
      continue;
    }

    // That should have filtered out non-string or numeric values (arrays)
    // so we know the value is now useful.
    $property_value = trim($entity->{$property_name});
    foreach ($mapping_details['predicates'] as $predicate) {
      $ns_id = fully_qualified_from_curie($predicate);
      if (empty($property_value)) {
        continue;
      }
      if (!is_array($property_value)) {

        // Assume it's a list. tighter code.
        $property_value = array(
          $property_value,
        );
      }
      foreach ($property_value as $property_val) {
        $property_node = $entity_node
          ->appendChild($XMLDoc
          ->createElementNS($ns_id['ns'], $predicate));
        $property_node
          ->appendChild($XMLDoc
          ->createTextNode($property_val));
      }
    }
  }
  return $entity_node;
}

/**
 * Set up an RDF document preamble.
 *
 * Returns a document, also sets the passed handle to the RDF node that content
 * should land in
 */
function taxonomy_xml_rdf_document() {
  $dom = new DOMDocument('1.0', 'UTF-8');
  $ns = rdf_get_namespaces();
  $dom
    ->appendchild($dom
    ->createprocessinginstruction('xml-stylesheet', 'href="render-taxonomy-rdf.xsl" type="text/xsl"'));
  $dom
    ->appendchild($dom
    ->createcomment(xmlentities("\n    This file was created by Drupal taxonomy_xml import/export tool.\n    http://drupal.org/project/taxonomy_xml\n    ")));
  watchdog(__FUNCTION__, print_r(get_defined_vars(), 1));
  $domcontainer = $dom
    ->createelementns($ns['rdf'], 'rdf:RDF');
  $dom
    ->appendchild($domcontainer);

  // Here's how to add namespace declarations right.
  // http://bytes.com/topic/net/answers/468470-how-declare-namespace-prefix-java#post1800600
  $domcontainer
    ->setAttributeNS("http://www.w3.org/2000/xmlns/", "xmlns:rdfs", $ns['rdfs']);
  $domcontainer
    ->setAttributeNS("http://www.w3.org/2000/xmlns/", "xmlns:owl", $ns['owl']);
  $domcontainer
    ->setAttributeNS("http://www.w3.org/2000/xmlns/", "xmlns:skos", $ns['skos']);
  return $domcontainer;
}

/**
 * Resolve shortnames back into namespaces.
 */
function fully_qualified_from_curie($curie) {
  $ns = rdf_get_namespaces();
  list($prefix, $id) = explode(':', $curie);
  return array(
    'p' => $prefix,
    'id' => $id,
    'ns' => $ns[$prefix],
  );
}

/**
 * Return the path of the arc2 library.
 *
 * Semi-intelligent lookup of places I expect it to be.
 */
function rdf_arc2_library_path() {
  $arc2_library_path = variable_get('arc2_library_path', '');
  if (is_readable($arc2_library_path)) {
    return $arc2_library_path;
  }

  // Try other spots?
  $possibilities = array(
    'sites/all/libraries/ARC2/arc',
    // RDFX may have wanted to install it here
    'sites/all/libraries/arc',
    dirname(__FILE__) . '/arc',
  );
  foreach ($possibilities as $arc2_library_path) {
    if (is_readable($arc2_library_path)) {
      variable_set('arc2_library_path', $arc2_library_path);
      return $arc2_library_path;
    }
  }
  return NULL;
}

/**
 * Loads the arc2 library from likely location.
 */
function rdf_load_arc2() {

  // Implicit assignment in the if here is deliberate.
  if ($arc2_library_path = rdf_arc2_library_path()) {
    require_once $arc2_library_path . "/ARC2.php";
    return TRUE;
  }
  else {
    $message = 'ARC2 RDF Parser is unavailable. see !install';
    $strings = array(
      '!install' => l('INSTALL.txt', drupal_get_path('module', 'taxonomy_xml') . '/INSTALL.txt'),
    );
    drupal_set_message(t($message, $strings), 'error');
    watchdog('taxonomy_xml', $message, $strings, WATCHDOG_ERROR);
    return FALSE;
  }
}

/**
 * Check ARC RDF library is available.
 *
 * (should look into starting to use chaostools to manage plugins?)
 *
 * For now, if it returns anything but an empty string, that means there's
 * a fail.
 */
function taxonomy_xml_rdf_requirements() {
  $requirements = array();
  $arc2_library_path = rdf_arc2_library_path();
  if (!is_readable($arc2_library_path . "/ARC2.php")) {
    $requirements['taxonomy_xml_rdf'] = array(
      'value' => t('ARC1 RDF Parser is unavailable.'),
      'severity' => REQUIREMENT_WARNING,
      'description' => t('
        See <a href="!install">INSTALL.txt</a>
        for the extra features that the external
        <a href="!arc">ARC library</a> can add
        if you download it to %path.
      ', array(
        '!arc' => 'https://github.com/semsol/arc2',
        '!install' => url(drupal_get_path('module', 'taxonomy_xml') . '/INSTALL.txt'),
        '%path' => 'sites/all/libraries',
      )),
    );
  }
  return $requirements;
}

Functions

Namesort descending Description
fully_qualified_from_curie Resolve shortnames back into namespaces.
rdf_arc2_library_path Return the path of the arc2 library.
rdf_entity_to_xml Given a D7 entity with its 'rdf_mapping' available, create and fill in data on an RDF/XML node in an XML document.
rdf_load_arc2 Loads the arc2 library from likely location.
taxonomy_xml_rdf_document Set up an RDF document preamble.
taxonomy_xml_rdf_requirements Check ARC RDF library is available.

Constants