rdf_format.inc in Taxonomy import/export via XML 6.2

Same filename and directory in other branches
Include routines for RDF parsing and taxonomy/term creation. @author dman http://coders.co.nz
2009-09 Code to support bnodes (internal references to other nodes within an RDF document) prompted by a patch contribution from by Remzi Celebi
File

rdf_format.inc
View source
<?php

/**
 * @file
 *   Include routines for RDF parsing and taxonomy/term creation.
 * @author dman http://coders.co.nz
 *
 * 2009-09 Code to support bnodes (internal references to other nodes within an
 * RDF document) prompted by a patch contribution from by Remzi Celebi
 *
 */
define('TAXONOMY_XML_RDF_NS', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#');
define('TAXONOMY_XML_TYPE', TAXONOMY_XML_RDF_NS . 'type');
define('TAXONOMY_XML_UNTYPED', 'UNTYPED');
define('TAXONOMY_XML_RDFS_NS', 'http://www.w3.org/2000/01/rdf-schema#');

// See  http://www.w3.org/2004/12/q/doc/rdf-labels.html
define('TAXONOMY_XML_CONTENTLABEL_NS', 'http://www.w3.org/2004/12/q/contentlabel#');
define('TAXONOMY_XML_CATEGORY', TAXONOMY_XML_CONTENTLABEL_NS . 'Category');

// OWL - Web Ontology Language - Formalized Meaning and Logic
define('TAXONOMY_XML_OWL_NS', 'http://www.w3.org/2002/07/owl#');
define('TAXONOMY_XML_W3C_WN', 'http://www.w3.org/2006/03/wn/wn20/');
define('TAXONOMY_XML_W3C_WN_SCHEMA', TAXONOMY_XML_W3C_WN . 'schema/');

// Dublin Core - Metadata standards
define('TAXONOMY_XML_DC_NS', 'http://purl.org/dc/elements/1.1/');

// Simple Knowledge Organization System - Structural information management
define('TAXONOMY_XML_SKOS_NS', 'http://www.w3.org/2004/02/skos/core#');

// IPTC gets SKOS/RDF URIs wrong. Allow it anyway.
define('TAXONOMY_XML_SKOSREF_NS', 'http://www.w3.org/TR/skos-reference/skos.html#');

// Taxonomic Database Working Group - Biodiversity Information Standards (LSIDs etc)
define('TAXONOMY_XML_TDWG_NS', 'http://rs.tdwg.org/ontology/voc/Collection#');

// Freebase data
define('TAXONOMY_XML_FB_NS', 'http://rdf.freebase.com/ns/');
define('TAXONOMY_XML_GEO_NS', 'http://www.w3.org/2003/01/geo/wgs84_pos#');
define('TAXONOMY_XML_KML_NS', 'http://earth.google.com/kml/2.1/');

// Arc can either be in this modules folder, or in a system-shared location
define('TAXONOMY_XML_ARC1_PATH', drupal_get_path('module', 'taxonomy_xml') . '/arc');

// Made up just for myself
define('TAXONOMY_XML_DRUPAL_NS', 'http://drupal.org/project/taxonomy_xml#');

/**
 * Return information about this format
 *
 * Implimentation of (internal) taxonomy_xml_HOOK_format_info()
 */
function taxonomy_xml_rdf_format_info() {
  $formats = array(
    'RDF' => array(
      'id' => 'RDF',
      'name' => 'RDF/XML',
      'module' => 'taxonomy_xml',
      'parser_callback' => 'taxonomy_xml_rdf_parse',
    ),
  );
  return $formats;
}

/**
 * Read in RDF taxonomies and vocabularies. Create vocabs and terms as needed.
 *
 * See formats.html readme for information about the RDF input supported.
 *
 * Targets include:
 *   ICRA      Content Rating  http://www.icra.org/vocabulary/
 *   WordNet   Lexicon http: //wordnet.princeton.edu/
 *   SUMO      http://www. ontologyportal.org/
 *   Freebase
 *
 * ... and the ontologies found at http://www.schemaweb.info/ that implement
 * appropriate parts of the RDF Schema "rdfs" (eg Classes with subclassOf)
 *
 * This function takes care of the parsing of RDF syntax into attributes
 * (predicates). Actual term creation and logic is done by taxonomy_xml.module,
 * mostly in taxonomy_xml_rdf_make_term() taxonomy_xml_canonisize_predicates().
 *
 * @param $data the string containing XML/RDF
 * @param $vid int Vocab ID. May be modified by ref if this process creates a
 * new vocab to use.
 * @param $url optional source URL this RDF came from if needed to resolve GUIDs
 * etc. Cannot work for uploads.
 *
 * @return a list of resulting terms. FALSE on failure.
 */
function taxonomy_xml_rdf_parse(&$data, &$vid, $url = NULL) {

  // See if it's really a different file we need to parse
  @(list($resource_url, $anchor) = split('#', $url));
  $triples = taxonomy_xml_rdf_parse_data_into_triples($data, $resource_url);
  if (empty($triples)) {
    drupal_set_message(t("No data extracted from input %url.", array(
      '%url' => $resource_url,
    )));
    return FALSE;
  }

  // If a specific ID was defined in the file, this means we just need to load
  // that one. This will help break things up for batches, and also allow us to
  // grab only sub-trees from big files.
  if (!empty($anchor)) {
    watchdog('taxonomy_xml', "\n      We were only asked about #%anchor in this document.\n      Reducing the data down to statements about that.", array(
      '%anchor' => $anchor,
    ), WATCHDOG_DEBUG);
    $triples = taxonomy_xml_rdf_get_statements_about($url, $triples);
    if (empty($triples)) {
      watchdog('taxonomy_xml', "Found no information about  %anchor in the document !resource_url", array(
        '%anchor' => $anchor,
        '!resource_url' => l($resource_url, $resource_url),
      ), WATCHDOG_WARNING);
    }
  }

  // The RDF input may come in several flavours,
  // Resources of the following 'types' may be cast into taxonomy terms for our purposes.
  // That is, an rdf:Class is a Drupal:term
  //
  // These are the things to look for.
  // Add to this list as needed
  //
  $term_types = array(
    TAXONOMY_XML_RDF_NS . 'Property',
    TAXONOMY_XML_DC_NS . 'subject',
    TAXONOMY_XML_RDFS_NS . 'Class',
    TAXONOMY_XML_OWL_NS . 'Class',
    TAXONOMY_XML_W3C_WN_SCHEMA . 'Word',
    TAXONOMY_XML_W3C_WN_SCHEMA . 'NounWordSense',
    TAXONOMY_XML_W3C_WN_SCHEMA . 'NounSynset',
    TAXONOMY_XML_CONTENTLABEL_NS . 'Category',
    TAXONOMY_XML_SKOS_NS . 'Concept',
    TAXONOMY_XML_SKOSREF_NS . 'Concept',
    'urn:lsid:ubio.org:classificationbank',
    'http://prismstandard.org/namespaces/2.0/pcv/Descriptor',
    TAXONOMY_XML_FB_NS . 'common.topic',
    // A freebase core 'topic'
    // freebase 'topic' is a superclass of useful things like 'music.genre'
    // @see http://www.alexandria.ucsb.edu/gazetteer/FeatureTypes/FTT_metadata.htm
    'http://www.esri.com/metadata/catalog/adl/#PT',
  );

  // A Drupal 'vocabulary' is represented by an owl:Ontology
  // or other similar shaped constructs
  $vocabulary_types = array(
    TAXONOMY_XML_OWL_NS . 'Ontology',
    TAXONOMY_XML_RDF_NS . 'Description',
    'http://www.w3.org/2001/12/Glossary',
    TAXONOMY_XML_TDWG_NS . 'Collection',
    TAXONOMY_XML_SKOS_NS . 'ConceptScheme',
    TAXONOMY_XML_SKOSREF_NS . 'ConceptScheme',
    # Resources that are of type fb:type_profile are often collections of 'topics'

    # thus, the are analogous to our 'vocabulary'
    TAXONOMY_XML_FB_NS . 'freebase.type_profile',
    TAXONOMY_XML_FB_NS . 'base.ontologies.ontology_class',
  );

  // Group the statements about things together.
  // This will flatten the structure a little, and discards namespaces
  $resources_by_type = taxonomy_xml_convert_triples_to_sorted_objects($triples);

  // The resources are all initialized as data objects.
  // Resource types we expect to be dealing with are just vocabs and terms.
  if (!$anchor) {

    // Message is just noise if using anchors.
    watchdog('taxonomy_xml', "\n      Found %count different <strong>kinds</strong> of resources\n      in the named input : %types\n      ", array(
      '%count' => count($resources_by_type),
      '%types' => join(', ', array_keys($resources_by_type)),
    ), WATCHDOG_INFO);
  }
  if (count($resources_by_type) == 0) {
    watchdog('taxonomy_xml', "\n      It sure doesn't look like this is any useful sort of RDF source.\n      Probably need to do content-negotiation or something. Aborting.", array(
      '%url' => '',
    ), WATCHDOG_WARNING);
    return;
  }

  #dpm($resources_by_type);
  $vocab_uri = NULL;
  if ($vid == 0) {

    // We've been asked to use the vocab described in the source file.
    // If the vid has already been set, we ignore vocab definitions found in the file
    // Scan the sorted objects for vocabulary definitions
    // Hopefully there's only one vocab per file, but loop anyway
    $vocabularies = array();
    foreach ($vocabulary_types as $vocabulary_type) {
      if (isset($resources_by_type[$vocabulary_type]) && is_array($resources_by_type[$vocabulary_type])) {
        foreach ($resources_by_type[$vocabulary_type] as $vocab_uri => &$vocabulary_handle) {
          $vocabularies[$vocab_uri] =& $vocabulary_handle;
        }
      }
    }
    drupal_set_message(t("Found %count resources to be used as vocabulary definitions", array(
      '%count' => count($vocabularies),
    )));
    if (!$vocabularies) {

      // Create a placeholder.
      $vocabularies[] = (object) array(
        'name' => 'Imported Vocabulary',
      );
    }
    $vid = taxonomy_xml_absorb_vocabulary_definitions($vocabularies);

    // $vocabularies now contains a keyed array of target vocabularies the terms may be put into
    // $vid is the default one (most common is one vocab per input file) to be used unless otherwise defined per-term.
    if (empty($vid)) {
      drupal_set_message(t("No vocabulary to add terms to, aborting."), 'error');
      return FALSE;
    }
  }
  else {

    // Else using a form-selected vocob.
    $vocabularies[$vid] = taxonomy_vocabulary_load($vid);

    // Note that a pre-made vocab already in the system will not have predicates
    // any more. Don't count on them
  }
  foreach ($vocabularies as $vocabulary) {
    module_invoke_all('taxonomy_xml_vocabulary_presave', $vocabulary);
  }

  //
  // VOCAB set up, start on TERMS...

  ///

  #dpm(array('vocabs are' => $vocabularies));

  // Gather the resources that will become terms.
  // Slightly long way (not using array_merge), as I need to merge indexed and by reference
  $terms = array();
  foreach ($term_types as $term_type) {

    // watchdog('taxonomy_xml', 'Adding all %term_type to the list of terms to be processed', array('%term_type' => $term_type), WATCHDOG_DEBUG);
    if (isset($resources_by_type[$term_type]) && is_array($resources_by_type[$term_type])) {
      foreach ($resources_by_type[$term_type] as $guid => &$term_handle) {

        // Grab name/label early for debugging and indexing
        $predicates = @$term_handle->predicates;
        if (isset($predicates['label'])) {
          $term_handle->name = reset($predicates['label']);
        }
        $terms[$guid] =& $term_handle;
      }
    }
  }

  // A FB import MAY also tell us a vocabulary is a top-level term
  // FB allows it to be both. We don't, it breaks things
  if (isset($terms[$vocab_uri])) {
    watchdog('taxonomy_xml', 'Vocab %vocab_uri was allegedly both a vocab and a term. Drupal can not handle that. Simplifing', array(
      '%vocab_uri' => $vocab_uri,
    ), WATCHDOG_NOTICE);
    unset($terms[$vocab_uri]);
  }

  // Some of the RDF documents I've been fed DO NOT DEFINE A TYPE for their primary subject.
  // Neither
  // http://www.ubio.org/authority/metadata.php nor
  // http://biocol.org/ nor
  // http://lsid.tdwg.org/
  // return RDF that says WHAT the data is. Those that use LSIDs have a type encoded in the Identifier itself :-/
  // I end up with a collection of data but no idea what it's really talking about.
  // But IF an entity is rdf:about="THIS URL" then we will take a leap and assume that is our target lump of data.
  // ... this worked for biocol input
  foreach ((array) @$resources_by_type[TAXONOMY_XML_UNTYPED] as $identifier => $untyped_lump) {
    if ($identifier == $url) {

      // Looks like this was the specific thing we were looking for
      $terms[$identifier] = $untyped_lump;
    }
  }

  // FREEBASE only
  // Special case for freebase.
  // If we are reading a top-level topic type page
  // eg http://www.freebase.com/tools/explore/music/genre
  // type = fb:type_profile
  // then it may contain a list of 'instances' which represent our desired
  // member terms.
  $fb_vocab_type = TAXONOMY_XML_FB_NS . 'freebase.type_profile';
  foreach ((array) @$resources_by_type[$fb_vocab_type] as $vocab_uri => $vocabulary) {
    $instances = @$vocabulary->predicates['type.type.instance'];
    if (!empty($instances)) {

      // I've got a list of URIs that represent terms, but not even a name for them
      // The system will still hopefully be able to work it out from just that.
      watchdog('taxonomy_xml', "\n        FREEBASE: Each <em>instance</em> listed in a freebase <em>type profile</em>\n        will be imported as a term.", array(), WATCHDOG_INFO);
      foreach ($instances as $term_guid) {
        $terms[$term_guid] = $placeholder_term = (object) array(
          'guid' => $term_guid,
          'vid' => $vid,
        );

        // Queue a full lookup of this item
        taxonomy_xml_add_term_to_batch_queue($placeholder_term);
        watchdog('taxonomy_xml', "Queuing a full retrieval of term !term_uri it for later retrieval and import", array(
          '!term_uri' => l($term_guid, $term_guid),
        ), WATCHDOG_INFO);
      }

      // loop over all term 'instances' mentioned by the vocab
    }

    // Extra diagnostic - freebase-specific
    if (isset($vocabulary->predicates)) {
      $instance_count = $vocabulary->predicates['freebase.type_profile.instance_count'];
      if ($instance_count > count($instances)) {
        watchdog('taxonomy_xml', "\n          FREEBASE: The topic set definition claims there are %instance_count\n          topic instances in the set, but I can see only %actual_count.\n          Some data may be missing from this doc that I am unable to retrieve.\n          ", array(
          '%instance_count' => reset($instance_count),
          '%actual_count' => count($instances),
        ), WATCHDOG_WARNING);
      }
    }

    // Resources that are being processed as vocabs are NOT also terms.
    // But the freenet schema labels topic sets as 'topics' themselves.
    // Unset this so as not to make a vocab definition a member of itself.
    unset($resources_by_type[TAXONOMY_XML_FB_NS . 'common.topic'][$vocab_uri]);
  }
  if (!$anchor) {

    // Shh.
    drupal_set_message(t("Found %count resources to be imported as terms into vocabulary %vid", array(
      '%count' => count($terms),
      '%vid' => $vid,
    )));
  }

  //
  // START MAKING TERMS
  //
  foreach ($terms as $identifier => &$term) {

    #drupal_set_message(t("Reviewing term %identifier '%name' and analyzing its properties", array('%identifier' => $identifier, '%name' => $term->name)));
    $term->identifier = $identifier;
    if (!isset($term->vid)) {

      // This is just a default fallback. Imported terms should really have already chosen their vid.
      $term->vid = $vid;
    }
    taxonomy_xml_rdf_make_term($term);
  }

  // Now the terms are all happily created, create their relationships
  // Couldn't do so until they had all been given tids.
  taxonomy_xml_set_term_relations($terms);

  // Note this will not yet affect terms that have been queued for later processing.
  // Such terms will need to attach themselves to the parent terms themselves.

  #dpm(array('After re-linking, we now have all terms set' => $terms));
  foreach ($vocabularies as $vocabulary) {
    module_invoke_all('taxonomy_xml_vocabulary_postsave', $vocabulary);
  }
  return $terms;
}

/**
 * Invoke the ARC parser on the given data.
 *
 * Uses some minor caching if the base $url is the same.
 * If the requested base URI is the same as the previous one, you'll get a
 * cached version, but those data objects are not held onto in a true cache
 * array.
 * This will be optimal for one big file being called all the time (an all-in-
 * one taxonomy), and NOT fill up with crud if lots of different files are
 * requested once (as happens when spidering).
 */
function taxonomy_xml_rdf_parse_data_into_triples($data, $url) {
  static $old_triples, $old_url;
  if (!empty($url) && $url == $old_url) {

    // re-using parser cache
    return $old_triples;
  }
  watchdog('taxonomy_xml', "Parsing RDF from !url", array(
    '!url' => l($url, $url),
  ), WATCHDOG_INFO);

  // Use ARC parser
  require_once TAXONOMY_XML_ARC1_PATH . "/ARC_rdfxml_parser.php";
  $parser_args = array(
    "bnode_prefix" => "genid",
    "base" => "",
  );
  $parser = new ARC_rdfxml_parser($parser_args);
  $triples = $parser
    ->parse_data($data);
  if (!is_array($triples)) {
    drupal_set_message(t("Problem parsing input %message", array(
      '%message' => $triples,
    )), 'error');
    return;
  }
  watchdog('taxonomy_xml', "\n    %count data triples (atomic statements) found in the source RDF doc", array(
    '%count' => count($triples),
  ), WATCHDOG_INFO);

  # dpm($triples);

  # drupal_set_message('<pre>' . print_r($triples, 1) . '</pre>');

  // Caching
  $old_url = $url;
  $old_triples = $triples;
  return $triples;
}

/**
 * Filter a big list of triples down to only the ones about one subject;
 */
function taxonomy_xml_rdf_get_statements_about($guid, $triples) {
  $filtered_statements = array();
  foreach ($triples as $triplenum => $statement) {
    @($subject_uri = $statement['s']['uri']);
    switch ($statement['s']['type']) {
      case 'uri':
        $subject_uri = $statement['s']['uri'];
        break;
      case 'bnode':
        $subject_uri = trim($statement['s']['bnode_id']);
        break;
      default:
        $subject_uri = trim($statement['s']['val']);
    }
    if ($subject_uri == $guid) {
      $filtered_statements[$triplenum] = $statement;
    }

    // else ignore
  }
  return $filtered_statements;
}

/**
 * Create the placeholder and fill in the values for this term - NOT its
 * relationships yet.
 */
function taxonomy_xml_rdf_make_term(&$term) {
  $identifier = $term->identifier;

  # drupal_set_message(t("Reviewing term %identifier '%name' and analyzing its properties", array('%identifier' => $identifier, '%name' => @$term->name)));

  // When running in batch, children will have a hard time finding their
  // parents if they only know them by source-localized ID (probably a URI)
  // and the destination-taxonomy (here) HASN'T REMEMBERED THAT INFO.
  // Because taxonomy.module just doesn't.
  // We require some other module (taxonomy_enhancer is good) to save that
  // metadata for us so the child can find its target later.
  // This is our 'identifier' - the REMOTE identifier not the local one.
  if (!isset($term->guid)) {
    $term->guid = $identifier;
  }

  // Build term from data
  // Convert all input predicates into attributes on the object
  // the taxonomy.module will understand
  taxonomy_xml_canonicize_predicates($term);

  // Ensure name is valid
  if (empty($term->name)) {

    // which of these approaches is correct?
    // Look, if we don't even have a name, creating a term is a waste of time.
    // RDF feeds commonly consist of a bunch of pointers, we can't invent placeholders until we know a little more.
    // Let's not do this.

    #drupal_set_message(t("Not enough information yet (not even a name) to create a term referred to as %identifier. Not creating it yet.", array('%identifier' => $identifier)));

    #unset($terms[$identifier]);

    #continue;

    // If the parent is trying to link to a child thats not yet made,
    // we probably don't know a proper name or label.
    // Fallback to a name, identifier derived (roughly) from the URI identifier - not always meaningful, but all we have in some contexts.
    $term->name = taxonomy_xml_label_from_uri($identifier);
    watchdog('taxonomy_xml', "\n      We were unable to find a specific label for the term\n      referred to as %identifier.\n      Guessing that %name will be good enough.", array(
      '%identifier' => $identifier,
      '%name' => $term->name,
    ), WATCHDOG_NOTICE);

    // Still, this causes problems if queuing data about terms that are not yet loaded
    // - such as those that are ONLY referenced by URI with no human name (Freenet)
    // Our munged names are temporary until the full data is retrieved.
    if (empty($term->name)) {

      // Still not set?
      // This should be impossible - all subjects must have a URI
      // But who knows what wierdness the input gave us
      drupal_set_message(t("\n        A term called %identifier didn't produce any readable name to use. ", array(
        '%identifier' => $identifier,
      )), 'error');
      continue;
    }
  }
  $force_new = variable_get('taxonomy_xml_duplicate', FALSE);

  // See if a definition matching this terms name already exists in the DB.
  // Build on that.
  $existing_term = taxonomy_xml_get_term_by_guid($term->guid, $term->vid);
  if (!$existing_term) {
    $existing_term = _taxonomy_xml_get_term_placeholder($term->name, $term->vid, $force_new);
  }

  #dpm(array('old term' => $existing_term, 'new term' => $term));

  // Merge the old term objects properties into this one. Really just want its tid, but there may be more info I should not lose.
  // New input takes precedence over older data. Old data just fills in the gaps.
  foreach ((array) $existing_term as $key => $value) {
    if (!isset($term->{$key})) {
      $term->{$key} = $value;
    }
  }

  // The term object is now as tidy as it can be as a self-contained entity.

  # dpm($term);
  if (variable_get('taxonomy_xml_reuseids', FALSE)) {

    // TODO this has not been tested since migration from D5!
    // MAINTAIN IDS
    // Because this is likely to be used with a site-cloning set-up,
    // it would help if we tried to match IDs
    // OTOH, doing so could be very messy for other situations.
    // So,
    //  iff there is no pre-existing term with this id,
    //  create this one as a clone with the old ID.
    // This requires a little DB sneakiness.
    if (isset($term->internal_id) && !taxonomy_term_load($term->internal_id)) {
      $term->tid = $term->internal_id;
      drupal_set_message(t("Doing sneaky import of %term_name re-using the internal id = %term_id", array(
        '%term_name' => $term->name,
        '%term_id' => $term->internal_id,
      )));
      db_query("INSERT INTO {term_data} (tid, name, description, vid, weight) VALUES (%d, '%s', '%s', %d, %d)", $term->tid, $term->name, $term->description, $term->vid, $term->weight);

      # sequences is gone in D6. Will inserting beyond the auto-increment self-correct?
      $current_id = db_last_insert_id('term_data', 'tid');
      if ($current_id < $term->tid) {

        // This is probably now MYSQL specific.
        db_query("ALTER TABLE {term_data} AUTO_INCREMENT = %d;", $term->tid);
      }
    }
  }

  # Here's where last-minute data storage done by other modules gets set up

  // module_invoke_all doesn't do pass-by-reference, so do our own loop.
  foreach (module_implements('taxonomy_xml_term_presave') as $module) {
    $function = $module . '_' . 'taxonomy_xml_term_presave';
    $function($term);
  }

  ////////////////////////////

  // Assist taxonomy_enhancer
  if (module_exists('taxonomy_enhancer')) {
    $fields = taxonomy_enhancer_get_fields_by_vocabulary($term->vid);
    foreach ($fields as $te_field) {
      if (isset($term->predicates[$te_field->title])) {

        // Looks like a predicate of the same name as a te field exists. Set it
        foreach ($term->predicates[$te_field->title] as $delta => $value) {
          $term->fields[$te_field->fid][$delta] = array(
            'value' => $term->predicates[$te_field->title][$delta],
            'format' => 0,
          );
        }
      }
    }
  }

  #dpm($term);

  // finished taxonomy_enhancer (should be delegated to a helper hook)

  /////////////////////////

  #dpm(array("ready to save" => $term));
  $save_term = (array) $term;
  $status = taxonomy_save_term($save_term);

  # Need to ensure the new hook callbacks fire also during that term saving

  // Re-retrieve the new term definition,
  // just in case anything extra happened to it during processing
  $new_term = taxonomy_xml_get_term_by_name_from_vocab($term->name, $term->vid);
  if (!$new_term) {
    drupal_set_message(t("\n      It seems like we failed to create and retrieve a term called %term_name", array(
      '%term_name' => $term->name,
    )), 'error');
  }

  // Merge retrieved values back over our main definition so the handles are up-to-date
  foreach ((array) $new_term as $key => $value) {
    $term->{$key} = $value;
  }
  if ($status == SAVED_NEW) {

    // Just remember this is fresh - for useful feedback messages.
    $term->taxonomy_xml_new_term = TRUE;
  }

  // It's possible that not all the referenced items were available in the current document/loop
  // Add referred items to the import queue for later processing
  taxonomy_xml_add_all_children_to_queue($term);
  $term->taxonomy_xml_presaved = TRUE;

  // A flag to avoid double-processing
  // Allow other hooks to do last-minute processing
  // http://drupal.org/node/791376
  foreach (module_implements('taxonomy_xml_term_postsave') as $module) {
    $function = $module . '_' . 'taxonomy_xml_term_postsave';
    $function($term);
  }
  return $term;

  // end term-construction;
}

/**
 * Compile triple statements into information objects again.
 *
 * Returns a nested array, Indexed on their URI/id, and grouped by type
 * (references so we can change them).
 *
 * Not all RDF data objects declare exactly what they are, some just announce
 * that they exist.
 * Some guesswork is done if their identifier is an LSID - we can deduce
 * what type of object it refers to. An explicit RDF:type will take priority
 * over this assumption.
 */
function taxonomy_xml_convert_triples_to_sorted_objects(&$triples) {

  // Triples are boringly granular bits of information.
  // Merge them.
  $resources = array();
  $resources_by_type = array();
  foreach ($triples as $triplenum => $statement) {
    @($subject_uri = $statement['s']['uri']);

    // Inspect the subject type and get value of the specified field
    // if type is 'uri' get the value of 'uri' or type is 'bnode' then get 'bnode_id'
    // by Remzi Celebi
    switch ($statement['s']['type']) {
      case 'uri':
        $subject_uri = $statement['s']['uri'];
        break;
      case 'bnode':
        $subject_uri = trim($statement['s']['bnode_id']);
        break;
      default:
        $subject_uri = trim($statement['s']['val']);
    }
    if (!isset($resources[$subject_uri])) {

      // Create placeholder if this is the first occurance of this subject
      $resources[$subject_uri] = (object) array();
    }
    $subject =& $resources[$subject_uri];

    # dpm(array("Processing a statement about $subject_uri" => $statement));

    // Namespaces are boring, Simplify the predicates
    // TODO - revisit if namespaces are needed
    $predicate = taxonomy_xml_rdf_shortname($statement['p']);

    // All predicates are stored in arrays, setup placeholder
    if (!isset($subject->predicates[$predicate])) {
      $subject->predicates[$predicate] = array();
    }

    // Set the object of this subject, into its predicate array.
    // The object may be a literal, an identifier, or a bnode
    // In the case of an identifier or bnode, that is a reference to
    // something found elsewhere.
    // Find and apply the $object_val
    switch ($statement['o']['type']) {
      case 'uri':
        $object_uri = $object_val = $statement['o']['uri'];

        // Also make a placeholder for the object, for convenience.
        // It's not much fun referring to something that doesn't exist.
        if (!isset($resources[$object_uri])) {
          $resources[$object_uri] = (object) array();
        }

        // Only add uniques, Keeps clutter down
        if (!in_array($object_val, $subject->predicates[$predicate])) {
          $subject->predicates[$predicate][] = $object_val;
        }
        break;
      case 'literal':
        @($object_val = trim($statement['o']['val']));

        // If there appear to be alternative versions of the same thing,
        // save both, keyed by language if appropriate.
        if ($lang = $statement['o']['lang']) {

          /**
           * Need to do extra strangeness to support multiple values (synonyms)
           * x multiple languages!
           *
           * <skos:altLabel xml:lang="en">Emergency relief</skos:altLabel>
           * <skos:altLabel xml:lang="en">Emergency assistance in disasters</skos:altLabel>
           * <skos:altLabel xml:lang="en">Disaster assistance</skos:altLabel>
           * <skos:altLabel xml:lang="fr">Panic</skos:altLabel>
           *
           * becomes
           * $term->predicates['altLabel'] = array(
           *   'en' => 'Emergency relief',
           *   'en:1' => 'Emergency assistance in disasters',
           *   'en:2' => 'Disaster assistance',
           *   'fr' => 'Panic',
           * )
           *
           * The actual key is not expected to be re-used at the moment, it's
           * just informational. I haven't thought this through.
           *
           */
          $key = $lang;
          if (isset($subject->predicates[$predicate][$lang])) {
            $key = $lang . ":" . count($subject->predicates[$predicate]);
          }
          $subject->predicates[$predicate][$key] = $object_val;
        }
        else {
          $subject->predicates[$predicate][] = $object_val;
        }
        break;
      case 'bnode':
        $object_val = $statement['o']['bnode_id'];
        if (isset($statement['o']['bnode_id'])) {
          $subject->predicates[$predicate][$statement['o']['bnode_id']] = $object_val;

          // 2010-05-27 dman
          // Generally we discard bnodes after sucking the usefulness out of them
          // BUT if they are really useful structured data, hang on to them for
          // possible later, deeper processing.
          $subject->bnodes[$predicate][$statement['o']['bnode_id']] =& $resources[$object_val];
        }
        break;
    }
    if ($predicate == 'type') {

      // Very important info!
      $subject->type = $object_val;

      // Sort it! (by reference)
      $resources_by_type[$subject->type][$subject_uri] =& $subject;

      // It's legal for a resource to have more than one 'type' (see Freenet)
      // This is fine, a pointer to the item is placed in both bags.
    }
    if ($predicate == TAXONOMY_XML_NAME) {
      $subject->name = $object_val;
    }

    // This is very memory-intensive for big vocabs. Try to clean up:(
    unset($triples[$triplenum]);
  }

  // A special work-around for irregular data.
  // Scan the full array for any lost (untyped) data,
  // Make some guesses if we can, and collect the rest into a catch-all 'untyped' list.
  $unknown_resources = array();
  foreach ($resources as $guid => &$subject) {

    // While we are looping,
    // Make a guess at its original, internal ID
    // grabbing the last numeric bit from the id in the document
    // eg from '#vocab/1' or '#vocabulary:1' or #term33
    // Be very generic and forgiving in the format we look for
    $parts = preg_split('|[^\\d]|', $guid);
    $last_num = array_pop($parts);
    if (is_numeric($last_num)) {
      $subject->internal_id = $last_num;
    }

    // Not really used much yet.
    // Anyway, check the type. If not known,
    // This could confuse us later, make a note for analysis.
    if (!isset($subject->type)) {
      $url_parts = @parse_url($guid);
      if (isset($url_parts['host'])) {

        // looks (roughly) like a valid URI - No need to complain about legal external references.
        // It's only unresolvable ones that could be a problem.
        continue;
      }

      // If the identifier of this resource is an 'LSID'
      // then the type is sort of embedded in the string as the 'namespace'.
      // See if we can extract it.
      if ($lsid = taxonomy_xml_parse_lsid($guid)) {
        $resources_by_type[$lsid['type']][$guid] =& $subject;
        continue;
      }

      // Nope, it's a total UFO, make a note for debugging
      if (drupal_substr($guid, 0, 2) != '_:') {

        // Ignore 'Here' nodes produced by ARC, eg '_:genid1', '_:genid2'
        $unknown_resources[$guid] =& $subject;
      }
    }
  }
  if ($unknown_resources) {

    // Just FYI, make a note about the quality of data found.
    // Do not complain about URLs - this is quite normal.
    watchdog('taxonomy_xml', "\n      Found %count Unsorted (untyped) resources.\n      They are entities that are the subject of a statement,\n      but I don't know what <em>type</em> of thing they are.\n      Not sure what I'll do with these.\n      They are things that have had statements made about them ..\n      that I don't recognise.\n      Probably just extra data found in the input and ignored.\n      ID was: %unknown", array(
      '%count' => count($unknown_resources),
      '%unknown' => join(', ', array_keys($unknown_resources)),
    ), WATCHDOG_DEBUG);
    $resources_by_type[TAXONOMY_XML_UNTYPED] = $unknown_resources;
  }
  return $resources_by_type;
}

/**
 * Choose a string from an array of language-tagged possibilities
 *
 * Util func to help read complex RDF statements.
 */
function taxonomy_xml_get_literal_string($values) {
  if (!is_array($values)) {
    return trim($values);
  }

  // May need to choose language
  if (count($values) == 1) {
    $out = array_pop($values);
  }
  else {

    // TODO add language selector
    if ($label = @$values['en']) {
      $out = $label;
    }
    else {

      // fine, whatever
      $out = array_pop($values);
    }
  }
  return trim($out);
}

/**
 * Return the shorthand label of a potentially long RDF URI
 *
 * EG, for http://www.w3.org/1999/02/22-rdf-syntax-ns#Property
 * return 'Property'
 * ... for sanity
 *
 * Also flatten LSIDs - which are used like URIs but just are NOT as useful
 *
 */
function taxonomy_xml_rdf_shortname($uri) {

  // For LSID simplification, flatten assorted RDF-LSID-Predicates (from any authority) into their simple name
  if (($lsid = taxonomy_xml_parse_lsid($uri)) && $lsid['namespace'] == 'predicates') {
    return $lsid['identifier'];
  }

  // If I recognised namespaces, I could use short ones. That would be fine.
  // But I don't want to start conflicting with rdf.modules ones.

  #if (function_exists('rdf_uri_to_qname')) {

  #  return rdf_uri_to_qname($uri);

  #}

  # yeah, by trimming namespaces and making guesses, now can't put them back in.

  # Needs revision.
  $parts = parse_url($uri);
  $shortname = !empty($parts['fragment']) ? $parts['fragment'] : (!empty($parts['query']) ? $parts['query'] : basename($parts['path']));

  // The proper method for guessing simple names is probably documented elsewhere.
  // ... this does the trick for now.
  return $shortname;
}

/**
 * Return an XML/RDF document representing this vocab
 *
 * I'd like to use ARC libraries, but it doesn't appear to include an RDF
 * serializer output method, only an input parser...
 *
 * Uses PHP DOM to create DOM document and nodes.
 *
 * We use namespaces carefully here, although it may create wordy output if the
 * DOM is not optimizing the declarations for us. Still, best to be explicit, it
 * would seem.
 *
 * The URI used to refer to other resources is based on the source document
 * location, eg
 * http://this.server/taxonomy_xml/{vid}/rdf#{tid}
 *
 * Preamble should look something like:
 *
 * <rdf:RDF xmlns:rdf ="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
 *   xmlns: rdfs="http://www.w3.org/2000/01/rdf-schema#"
 *   xmlns: owl="http://www.w3.org/2002/07/owl#"
 *
 */
function taxonomy_xml_rdf_create($vid, $parent = 0, $depth = -1, $max_depth = NULL) {
  $vocabulary = taxonomy_vocabulary_load($vid);
  $domcontainer = taxonomy_xml_rdf_document();
  $dom = $domcontainer->ownerDocument;

  // Define the vocab
  taxonomy_xml_add_vocab_as_rdf($domcontainer, $vocabulary);

  // Now start adding terms.
  // They are listed as siblings, not children of the ontology
  $tree = module_invoke('taxonomy', 'get_tree', $vid, $parent, $depth, $max_depth);
  $tree = taxonomy_get_tree($vid, $parent, $depth, $max_depth);
  taxonomy_xml_add_terms_as_rdf($domcontainer, $tree, $vocabulary);
  $result = $dom
    ->savexml();

  // Minor layout tweak for readability
  $result = preg_replace('|(<[^<]*/[^>]*>)|', "\$1\n", $result);
  $result = preg_replace('|><|', ">\n<", $result);
  return $result;
}

/**
 * Set up an RDF document preamble.
 * Returns a document, also sets the passed handle to the RDF node that content
 * should land in
 *
 */
function taxonomy_xml_rdf_document() {
  $dom = new domdocument('1.0', 'UTF-8');
  $dom
    ->appendchild($dom
    ->createcomment(xmlentities("\n    This file was created by Drupal taxonomy_xml import/export tool.\n    http://drupal.org/project/taxonomy_xml\n\n    The RDF schema in this file is intended to match the RDF predicate\n    mapping rules defined in Drupal 7 (unless overridden)\n    Almost entirely SKOS, with a little RDF thrown in.\n    ")));
  $dom
    ->appendchild($dom
    ->createprocessinginstruction('xml-stylesheet', 'href="render-taxonomy-rdf.xsl" type="text/xsl"'));
  $domcontainer = $dom
    ->createelementns(TAXONOMY_XML_RDF_NS, 'rdf:RDF');
  $dom
    ->appendchild($domcontainer);
  taxonomy_xml_rdf_add_namespace($domcontainer, TAXONOMY_XML_RDFS_NS, 'rdfs');
  taxonomy_xml_rdf_add_namespace($domcontainer, TAXONOMY_XML_OWL_NS, 'owl');
  taxonomy_xml_rdf_add_namespace($domcontainer, TAXONOMY_XML_DC_NS, 'dc');
  taxonomy_xml_rdf_add_namespace($domcontainer, TAXONOMY_XML_SKOS_NS, 'skos');

  // Invoke additional module support hooks may need to add namespaces.
  $hook = 'taxonomy_xml_rdf_document_setup';
  foreach (module_implements($hook) as $module) {
    $function = $module . '_' . $hook;
    $function($domcontainer);
  }
  return $domcontainer;
}

/**
 * Adding namespaces is fiddly.
 */
function taxonomy_xml_rdf_add_namespace($element, $uri, $prefix) {

  // By appending a namespaced att, the extra namespaces appear at the top.
  // Then remove them again, but don;t remove the namespace!
  // Otherwise the appear everywhere. There must be a better way
  $element
    ->setattributens($uri, "{$prefix}:hack", "Initializing namespace in PHP is hard");
  $element
    ->removeattribute("{$prefix}:hack");
}

/**
 * Create a vocabulary definition (just the def, not its terms) and insert it
 * into the given document element.
 *
 * @param $domcontainer an XML dom document, modified by ref.
 * @param $vocabulary a vocab object
 */
function taxonomy_xml_add_vocab_as_rdf(&$domcontainer, $vocabulary) {
  $dom = $domcontainer->ownerDocument;
  $vocabnode = taxonomy_xml_entity_to_rdf($vocabulary, 'taxonomy_vocabulary', $domcontainer);

  // That has already added it to the document - required to prevent it adding dummy namespaces
  $vocabulary->uri = taxonomy_xml_taxonomy_vocabulary_uri($vocabulary);
  $vocabnode
    ->setattributens(TAXONOMY_XML_RDF_NS, 'rdf:ID', $vocabulary->uri['id']);

  // Apparently rdf:ID and rdf:about on the same element is illegal. Hm.
  // Use xml:base instead. http://www.ibm.com/developerworks/xml/library/x-tiprdfai.html
  $domcontainer
    ->setattribute('xml:base', url($vocabulary->uri['path'], array(
    'absolute' => TRUE,
  )));
  $vocabnode
    ->appendchild($dom
    ->createelementns(TAXONOMY_XML_OWL_NS, 'owl:versionInfo', xmlentities(format_date($_SERVER['REQUEST_TIME'], 'long'))));
}

/**
 * Given a list of terms, append definitions of them to the passed DOM container
 *
 * Following w3c, SUMO and Wordnet examples (tho not any explicit instructions,
 * taxonomy terms are modelled as rdfs:Class objects structured using rdfs:
 * subClassOf statements.
 *
 * Sample from Wordnet:
 *
 * <Class rdf:about="http://xmlns.com/wordnet/1.6/Cat">
 *   <label>Cat  [ 1 ]</label>
 *   <comment>feline mammal usually having thick soft fur and being unable
 * to roar; domestic cats; wildcats</comment>
 *   <subClassOf>
 *     <Class rdf:about="http://xmlns.com/wordnet/1.6/Feline" />
 *   </subClassOf>
 * </Class>
 *
 * I'm copying that syntax.
 *
 * @param $termlist a FLAT array of all terms, internally cross-referenced to
 * each other defining the tree stucture
 */
function taxonomy_xml_add_terms_as_rdf(&$domcontainer, $termlist, $vocabulary) {
  if (!$termlist) {
    return;
  }
  $dom = $domcontainer->ownerDocument;
  $mapping = taxonomy_xml_get_mapping('taxonomy_term');

  // Allow submission of a single term
  if (!is_array($termlist)) {
    $termlist = array(
      $termlist,
    );
  }
  foreach ($termlist as $term) {
    module_invoke_all('taxonomy_term_load', $term);
    $term->uri = taxonomy_xml_taxonomy_term_uri($term);

    // List child terms, this will help if breaking the XML into lumps
    $term->child = taxonomy_get_children($term->tid, $term->vid);
    $termnode = taxonomy_xml_entity_to_rdf($term, 'taxonomy_term', $domcontainer);

    // That has already added it to the document - required to prevent it adding dummy namespaces
    $termnode
      ->setattributens(TAXONOMY_XML_RDF_NS, 'rdf:ID', $term->uri['id']);

    // Add this because it helps visualizations
    if (empty($term->parent)) {
      $vocabulary_uri = taxonomy_xml_taxonomy_vocabulary_uri($term->vid);
      $rel_node = $dom
        ->createelementns(TAXONOMY_XML_SKOS_NS, 'skos:topConceptOf');
      $rel_node
        ->setattributens(TAXONOMY_XML_RDF_NS, 'rdf:resource', '#' . $vocabulary->uri['id']);
      $termnode
        ->appendchild($rel_node);
    }
    if ($guid = taxonomy_xml_get_term_guid($term)) {
      $termnode
        ->setattributens(TAXONOMY_XML_RDF_NS, 'rdf:about', $guid);
    }

    // Additional module support
    // eg taxonomy_image, geotaxonomy, path
    //
    $hook = 'taxonomy_xml_rdf_export_term';

    // Can't use module_invoke as we need pass-by-ref
    foreach (module_implements($hook) as $module) {
      $function = $module . '_' . $hook;
      $function($termnode, $term);
    }

    # dpm(array('adding term to rdf' => $term));

    #$termnode->appendchild($dom->createcomment(print_r($term, 1)));

    // workaround for large vocabs - extend runtime indefinately
    set_time_limit(10);
  }

  // Done all terms in list
}

/**
 * Check ARC RDF library is available
 *
 * (should look into starting to use chaostools to manage plugins?)
 *
 * For now, if it returns anything but an empty string, that means there's
 * a fail.
 */
function taxonomy_xml_rdf_requirements() {
  $requirements = array();
  if (!is_readable(TAXONOMY_XML_ARC1_PATH . "/ARC_rdfxml_parser.php")) {
    $requirements['taxonomy_xml_rdf'] = array(
      'value' => t('ARC1 RDF Parser is unavailable.'),
      'severity' => 1,
      // REQUIREMENT_WARNING,
      'description' => t('
        See <a href="!install">INSTALL.txt</a>
        for the extra features that the external
        <a href="!arc">ARC library</a> can add
        if you download it to %path.
      ', array(
        '!arc' => 'http://arc.semsol.org/',
        '!install' => url(drupal_get_path('module', 'taxonomy_xml') . '/INSTALL.txt'),
        '%path' => TAXONOMY_XML_ARC1_PATH,
      )),
    );
  }
  return $requirements;
}

/**
 * Return a term as RDF. Header and all
 */
function taxonomy_xml_rdf_export_term($term, $depth = -1, $max_depth = NULL) {
  if (is_numeric($term)) {
    $term = taxonomy_get_term($term);
  }

  // Load in all extra data
  module_invoke_all('taxonomy_term_load', $term);
  $domcontainer = taxonomy_xml_rdf_document();
  $dom = $domcontainer->ownerDocument;
  taxonomy_xml_add_terms_as_rdf($domcontainer, $term);

  // Now start adding terms.
  // They are listed as siblings, not children of the ontology
  $tree = module_invoke('taxonomy', 'get_tree', $term->vid, $term->tid, $depth, $max_depth);
  taxonomy_xml_add_terms_as_rdf($domcontainer, $tree);
  $result = $dom
    ->savexml();

  // Minor layout tweak for readability
  $result = preg_replace('|(<[^<]*/[^>]*>)|', "\$1\n", $result);
  $result = preg_replace('|><|', ">\n<", $result);

  # dpm($result);
  print $result;
  exit;
}

###############################

# RDF & XML Utilities

# Funcs below here are scavenged from other projects, included here to reduce dependencies

# Full functions exist in D7, or rdf.module rdf_mapping.module etc

# Also, if I used Arc2, most of this would be half automatic.

/**
 * Given a Drupal object, some mapping rules and a DOMDocument, create the XML representationof the thing
 * 
 * This should have been in the RDF project from day 1, but instead I'll invent it today, here.
 * @see rdf_mapping project
 * 
 * @return a DOMNode
 */
function taxonomy_xml_entity_to_rdf($object, $object_type, $domcontainer) {
  $dom = $domcontainer->ownerDocument;

  // Get the mapping rules for rdf schema, D7 style
  $mapping = taxonomy_xml_get_mapping($object_type);

  // What is the rdf type we use to describe this type of thing (eg 'skos:ConceptScheme')
  $object_type_curie = array_pop($mapping['rdftype']);
  $object_type_full = taxonomy_xml_parse_curie($object_type_curie);

  // Describe the thing itself, create a DOMNode
  $object_node = $dom
    ->createelementns($object_type_full['uri'], $object_type_full['id']);

  // Add it to the document immediately so it can inherit the xmlns declarations and not re-invent them
  $domcontainer
    ->appendchild($object_node);

  // Map everything that has a matching attribute to an RDF element of the appropriate name
  foreach ($mapping as $drupal_attribute => $attribute_mapping) {
    if (!empty($object->{$drupal_attribute})) {

      // TODO - using isset made a load of empty things, but will empty() bork on zero?
      foreach ($attribute_mapping['predicates'] as $predicate_curie) {
        $predicate_full = taxonomy_xml_parse_curie($predicate_curie);
        $data = $object->{$drupal_attribute};

        // The data may be an array. Some fields can be multiple values.
        // Assume it always is, that's easier than switching
        if (!is_array($data)) {
          $data = array(
            $data,
          );
        }
        foreach ($data as $datum) {

          // May need to transform the data a little. Often to unpack internal IDs into portable ones
          if (isset($attribute_mapping['callback']) && function_exists($attribute_mapping['callback'])) {
            $callback = $attribute_mapping['callback'];
            $datum = $callback($datum, '#');

            // The callback may return a structured URI if a rel type was asked for
            // But normally it's just cooked data.
          }

          // 'rel' data becomes RDF URI links.
          if (isset($attribute_mapping['type']) && $attribute_mapping['type'] == 'rel') {
            $rel_node = $dom
              ->createelementns($predicate_full['uri'], $predicate_full['id']);
            if (is_array($datum)) {
              if (isset($datum['id'])) {
                $rel_node
                  ->setattributens(TAXONOMY_XML_RDF_NS, 'rdf:resource', '#' . $datum['id']);
              }
              else {
                if (isset($datum['path'])) {
                  $rel_node
                    ->setattributens(TAXONOMY_XML_RDF_NS, 'rdf:resource', url($datum['path']));
                }
              }

              // This was handy for humans, but redundant in the graph

              //if (isset($datum['title'])) {

              //  $rel_node->setattributens(TAXONOMY_XML_RDFS_NS, 'rdfs:title', xmlentities($datum['title']) );

              //}
            }
            else {
              $rel_node
                ->setattributens(TAXONOMY_XML_RDF_NS, 'rdf:resource', xmlentities(trim($datum)));
            }
            $object_node
              ->appendchild($rel_node);
          }
          else {

            // Normal content
            $object_node
              ->appendchild($dom
              ->createelementns($predicate_full['uri'], $predicate_full['id'], xmlentities(trim($datum))));
          }
        }

        // each attribute value
      }

      // each predicate
    }

    // attribute is set
  }

  // each mapping
  return $object_node;
}

/**
 * Translate the rdf entity mapping array into something indexed.
 * 
 * Return the mapping for a given entity type.
 */
function taxonomy_xml_get_mapping($type) {
  static $mappings;
  if (!isset($mappings[$type])) {
    $RDF_DEFAULT_BUNDLE = '';
    $raw_mappings = module_invoke_all('rdf_mapping');
    foreach ($raw_mappings as $mapping) {
      if ($mapping['bundle'] == $RDF_DEFAULT_BUNDLE) {
        $mappings[$mapping['type']] = $mapping;
      }
    }
  }
  return $mappings[$type]['mapping'];
}

/**
 * Utility function to try and figure out what a given CURIE means
 * 
 * Returns an array containing (most likely) an id shortname of the CURIE.
 * This will be either the fragment or the last pat of the path found.
 * 
 * Depending on available informations, maybe also the prefix and namespace.
 * 
 * Give it either a URI or CURIE - it'll guess.
 * TODO actual namespaces.
 * 
 * @param $part Name a part of the CURIE/URI, eg 'prefix', 'id', 'host' and that
 * is the bit that will be returned. 
 *
 * @see  parse_url()
 * @ingroup sideport
 */
function taxonomy_xml_parse_curie($curie, $part = NULL) {
  $rdf_namespaces = taxonomy_xml_get_namespaces();
  if (taxonomy_xml_is_valid_curie($curie)) {
    list($prefix, $id) = explode(':', $curie, 2);
    $curie_parts['prefix'] = $prefix;
    $curie_parts['id'] = $id;

    // TODO namespace expansion - when we need it
  }
  elseif (valid_url($curie)) {
    $curie_parts = parse_url($curie);

    // in that case, the CURIE is the version splitting the main from the last # or /
    // TODO
    $curie_parts['id'] = empty($curie_parts['fragment']) ? basename($curie_parts['id']) : $curie_parts['fragment'];
  }
  $curie_parts = $curie_parts + array(
    'css_class' => preg_replace('/[^a-z0-9]+/i', '-', basename($curie_parts['id'])),
  );
  if (empty($curie_parts['uri']) && !empty($rdf_namespaces[$curie_parts['prefix']])) {
    $curie_parts['uri'] = $rdf_namespaces[$curie_parts['prefix']];
  }
  if ($part) {
    return @$curie_parts[$part];
  }
  return $curie_parts;
}

/**
 * Returns an array of RDF namespaces defined in modules that implement
 * hook_rdf_namespaces().
 * 
 * Backport from D7 rdf_get_namespaces()
 * @ingroup sideport
 */
function taxonomy_xml_get_namespaces() {
  static $rdf_namespaces;
  if (!empty($rdf_namespaces)) {
    return $rdf_namespaces;
  }
  $rdf_namespaces = module_invoke_all('rdf_namespaces');

  // module_invoke_all() uses array_merge_recursive() which might return nested
  // arrays if several modules redefine the same prefix multiple times. We need
  // to ensure the array of namespaces is flat and only contains strings as
  // URIs.
  foreach ($rdf_namespaces as $prefix => $uri) {
    if (is_array($uri)) {
      if (count(array_unique($uri)) == 1) {

        // All namespaces declared for this prefix are the same, merge them all
        // into a single namespace.
        $rdf_namespaces[$prefix] = $uri[0];
      }
      else {

        // There are conflicting namespaces for this prefix, do not include
        // duplicates in order to avoid asserting any inaccurate RDF
        // statements.
        unset($rdf_namespaces[$prefix]);
      }
    }
  }
  return $rdf_namespaces;
}

/**
 * Util function. adapted from D6 rdf.module
 */
function taxonomy_xml_is_valid_curie($curie) {

  // Looks a bit like an URL but has no slashes? Probably a CURIE.
  return preg_match('/^\\[?[\\w\\-\\.]+:[\\w\\-\\.]*\\]?$/', (string) $curie);
}

/**
 * Entity uri callback.
 */
function taxonomy_xml_taxonomy_term_uri($term) {
  if (is_numeric($term)) {
    $term = taxonomy_get_term($term);
  }
  return array(
    'path' => 'taxonomy/term/' . $term->tid,
    'title' => $term->name,
    'id' => 'term-' . $term->tid,
  );
}

/**
 * Entity uri callback.
 */
function taxonomy_xml_taxonomy_vocabulary_uri($vocabulary) {
  if (is_numeric($vocabulary)) {
    $vocabulary = taxonomy_vocabulary_load($vocabulary);
  }
  $vocabulary->machine_name = 'vocabulary-' . preg_replace('/[^a-z0-9]+/', '_', strtolower($vocabulary->name));

  // If it is a features vocabulary, its cannonic ID is overloaded in the 'module' field.
  // Makes enough sense. Use that
  if (strpos($vocabulary->module, 'features_') === 0) {

    // Simply display the existing machine name if we have one.
    $vocabulary->machine_name = substr($vocabulary->module, 9);
  }
  return array(
    'path' => 'taxonomy/vocabulary/' . $vocabulary->vid,
    'title' => $vocabulary->name,
    'id' => $vocabulary->machine_name,
  );
}

/**
 * Implements hook_rdf_namespaces().
 * 
 * Backport from D7 rdf_rdf_namespaces(), only a different useful set (from ARC)
 * The D7 one mapped dc: to dc/terms - which was quite wrong
 * 
 */
function taxonomy_xml_rdf_namespaces() {
  return array(
    'og' => 'http://ogp.me/ns#',
    'an' => 'http://www.w3.org/2000/10/annotation-ns#',
    'content' => 'http://purl.org/rss/1.0/modules/content/',
    'dc' => 'http://purl.org/dc/elements/1.1/',
    'dct' => 'http://purl.org/dc/terms/',
    'foaf' => 'http://xmlns.com/foaf/0.1/',
    'geo' => 'http://www.w3.org/2003/01/geo/wgs84_pos#',
    'ical' => 'http://www.w3.org/2002/12/cal/icaltzd#',
    'owl' => 'http://www.w3.org/2002/07/owl#',
    'posh' => 'http://poshrdf.org/ns/posh/',
    'rdf' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
    'rdfs' => 'http://www.w3.org/2000/01/rdf-schema#',
    'rev' => 'http://www.purl.org/stuff/rev#',
    'rss' => 'http://purl.org/rss/1.0/',
    'sioc' => 'http://rdfs.org/sioc/ns#',
    'sioct' => 'http://rdfs.org/sioc/types#',
    # The 2008 scheme never got stable or recommended

    #'skos' => 'http://www.w3.org/2008/05/skos#',

    # D7 etc settled on :
    'skos' => 'http://www.w3.org/2004/02/skos/core#',
    'uri' => 'http://www.w3.org/2006/uri#',
    'vcard' => 'http://www.w3.org/2006/vcard/ns#',
    'xfn' => 'http://gmpg.org/xfn/11#',
    'xml' => 'http://www.w3.org/XML/1998/namespace',
    'xsd' => 'http://www.w3.org/2001/XMLSchema#',
  );
}
Functions

Name	Description
taxonomy_xml_add_terms_as_rdf	Given a list of terms, append definitions of them to the passed DOM container
taxonomy_xml_add_vocab_as_rdf	Create a vocabulary definition (just the def, not its terms) and insert it into the given document element.
taxonomy_xml_convert_triples_to_sorted_objects	Compile triple statements into information objects again.
taxonomy_xml_entity_to_rdf	Given a Drupal object, some mapping rules and a DOMDocument, create the XML representationof the thing
taxonomy_xml_get_literal_string	Choose a string from an array of language-tagged possibilities
taxonomy_xml_get_mapping	Translate the rdf entity mapping array into something indexed.
taxonomy_xml_get_namespaces	Returns an array of RDF namespaces defined in modules that implement hook_rdf_namespaces().
taxonomy_xml_is_valid_curie	Util function. adapted from D6 rdf.module
taxonomy_xml_parse_curie	Utility function to try and figure out what a given CURIE means
taxonomy_xml_rdf_add_namespace	Adding namespaces is fiddly.
taxonomy_xml_rdf_create	Return an XML/RDF document representing this vocab
taxonomy_xml_rdf_document	Set up an RDF document preamble. Returns a document, also sets the passed handle to the RDF node that content should land in
taxonomy_xml_rdf_export_term	Return a term as RDF. Header and all
taxonomy_xml_rdf_format_info	Return information about this format
taxonomy_xml_rdf_get_statements_about	Filter a big list of triples down to only the ones about one subject;
taxonomy_xml_rdf_make_term	Create the placeholder and fill in the values for this term - NOT its relationships yet.
taxonomy_xml_rdf_namespaces	Implements hook_rdf_namespaces().
taxonomy_xml_rdf_parse	Read in RDF taxonomies and vocabularies. Create vocabs and terms as needed.
taxonomy_xml_rdf_parse_data_into_triples	Invoke the ARC parser on the given data.
taxonomy_xml_rdf_requirements	Check ARC RDF library is available
taxonomy_xml_rdf_shortname	Return the shorthand label of a potentially long RDF URI
taxonomy_xml_taxonomy_term_uri	Entity uri callback.
taxonomy_xml_taxonomy_vocabulary_uri	Entity uri callback.
Constants

Name	Description
TAXONOMY_XML_ARC1_PATH
TAXONOMY_XML_CATEGORY
TAXONOMY_XML_CONTENTLABEL_NS
TAXONOMY_XML_DC_NS
TAXONOMY_XML_DRUPAL_NS
TAXONOMY_XML_FB_NS
TAXONOMY_XML_GEO_NS
TAXONOMY_XML_KML_NS
TAXONOMY_XML_OWL_NS
TAXONOMY_XML_RDFS_NS
TAXONOMY_XML_RDF_NS	@file Include routines for RDF parsing and taxonomy/term creation. @author dman http://coders.co.nz
TAXONOMY_XML_SKOSREF_NS
TAXONOMY_XML_SKOS_NS
TAXONOMY_XML_TDWG_NS
TAXONOMY_XML_TYPE
TAXONOMY_XML_UNTYPED
TAXONOMY_XML_W3C_WN
TAXONOMY_XML_W3C_WN_SCHEMA
You are here

rdf_format.inc in Taxonomy import/export via XML 6.2

File

Functions

Constants

API Navigation