function taxonomy_xml_rdf_parse in Taxonomy import/export via XML 5
Same name and namespace in other branches
- 5.2 rdf_format.inc \taxonomy_xml_rdf_parse()
- 6.2 rdf_format.inc \taxonomy_xml_rdf_parse()
- 6 rdf_format.inc \taxonomy_xml_rdf_parse()
- 7 formats/rdf_format.inc \taxonomy_xml_rdf_parse()
Read in RDF taxonomies and vocabularies. Create vocabs and terms as needed.
See formats.html readme for information about the RDF input supported.
Targets include : ICRA Content Rating http://www.icra.org/vocabulary/ WordNet Lexicon http: //wordnet.princeton.edu/ SUMO http://www. ontologyportal.org/
... and the ontologies found at http://www.schemaweb.info/ that implement appropriate parts of the RDF Schema "rdfs" (eg Classes with subclassOf)
File
- ./
rdf_format.inc, line 42 - Include routines for RDF parsing and taxonomy/term creation.
Code
function taxonomy_xml_rdf_parse(&$data, $vid) {
if (!file_exists(drupal_get_path('module', 'taxonomy_xml'))) {
drupal_set_message(t('This method requires the ARC library to be available. Please check the taxonomy_xml INSTALL.txt'));
return false;
}
// Use ARC parser
include_once "arc/ARC_rdfxml_parser.php";
$parser_args = array(
"bnode_prefix" => "genid",
"base" => "",
);
$parser = new ARC_rdfxml_parser($parser_args);
$triples = $parser
->parse_data($data);
if (!is_array($triples)) {
drupal_set_message(t("Problem parsing input %message", array(
'%message' => $triples,
)), 'error');
return false;
}
drupal_set_message(t("%count data triples (atomic statements) found in the source RDF doc", array(
'%count' => count($triples),
)));
#dpm($triples);
// The RDF input may come in several flavours,
// Resources of the following 'types' may be cast into taxonomy terms for our purposes.
// That is, an rdf:Class is a Drupal:term
//
// Add to this list as needed
//
$term_types = array(
TAXONOMY_XML_RDF_NS . 'Property',
'http://purl.org/dc/elements/1.1/subject',
TAXONOMY_XML_RDFS_NS . 'Class',
TAXONOMY_XML_W3C_WN_SCHEMA . 'Word',
TAXONOMY_XML_W3C_WN_SCHEMA . 'NounWordSense',
TAXONOMY_XML_W3C_WN_SCHEMA . 'NounSynset',
'http://www.w3.org/2004/12/q/contentlabel#Category',
TAXONOMY_XML_SKOS_NS . 'Concept',
);
// A Drupal 'vocabulary' is represented by an owl:Ontology
// or other similar shaped constructs
$vocabulary_types = array(
TAXONOMY_XML_OWL_NS . 'Ontology',
TAXONOMY_XML_RDF_NS . 'Description',
'http://www.w3.org/2001/12/Glossary',
);
$resources_by_type = taxonomy_xml_convert_triples_to_sorted_objects($triples);
// The resources are all initialized as data objects.
// Resource types we expect to be dealing with are just vocabs and terms.
drupal_set_message(t("Found %count different <strong>kinds</strong> of resources in the input : %types", array(
'%count' => count($resources_by_type),
'%types' => join(', ', array_keys($resources_by_type)),
)));
#dpm($resources_by_type);
// Scan the sorted objects for vocabulary definitions
// Hopefully there's only one vocab per file, but loop anyway
$vocabularies = array();
foreach ($vocabulary_types as $vocabulary_type) {
if (isset($resources_by_type[$vocabulary_type]) && is_array($resources_by_type[$vocabulary_type])) {
foreach ($resources_by_type[$vocabulary_type] as $uri => &$vocabulary_handle) {
$vocabularies[$uri] =& $vocabulary_handle;
}
}
}
drupal_set_message(t("Found %count resources to be used as vocabulary definitions", array(
'%count' => count($vocabularies),
)));
if ($vid == 0) {
// We've been asked to use the vocab described in the source file.
if (!$vocabularies) {
// Create a placeholder.
$vocabularies[] = array(
'name' => 'Imported Vocabulary',
);
}
}
$vid = taxonomy_xml_absorb_vocabulary_definitions($vocabularies);
// $vocabularies now contains a keyed array of target vocabularies the terms may be put into
// $vid is the default one (most common is one vocab per input file) to be used unless otherwise defined per-term.
// Gather the resources that will become terms.
// Slightly long way (not using array_merge), as I need to merge indexed and by reference
$terms = array();
foreach ($term_types as $term_type) {
if (isset($resources_by_type[$term_type]) && is_array($resources_by_type[$term_type])) {
foreach ($resources_by_type[$term_type] as $uri => &$term_handle) {
// Grab name/label early for debugging and indexing
$predicates = $term_handle->predicates;
if (isset($predicates['label'])) {
$term_handle->name = $predicates['label'][0];
}
$terms[$uri] =& $term_handle;
}
}
}
drupal_set_message(t("Found %count resources to be imported as terms into vocabulary %vid", array(
'%count' => count($terms),
'%vid' => $vid,
)));
// $predicate_synonyms is a translation array to match rdf-speak with Drupal concepts
$predicate_synonyms = taxonomy_xml_relationship_synonyms();
//
// START MAKING TERMS
//
foreach ($terms as $uri => &$term) {
drupal_set_message(t("Reviewing term %uri '%name' and analyzing its properties", array(
'%uri' => $uri,
'%name' => $term->name,
)));
if (!isset($term->vid)) {
// This is just a default fallback. Imported terms should really have already chosen their vid.
$term->vid = $vid;
}
// Build term from data
// Convert all input predicates into attributes on the object the taxonomy.module will understand
foreach ($term->predicates as $predicate => $values) {
$original_predicate = $predicate;
// First translate misc terminology synonyms to the cannonic predicate I use everywhere
// This allows us to interpret several XML dialects at once
if (isset($predicate_synonyms[$predicate]) && ($cannonic = $predicate_synonyms[$predicate])) {
$predicate = $cannonic;
}
#drupal_set_message(t("Applying '$predicate' ($predicate) value of ". print_r($values, 1) ." found in $uri"));
switch ($predicate) {
case 'type':
// These are already done. Ignore
case 'subPropertyOf':
// Useless, ignore also
break;
case TAXONOMY_XML_NAME:
$term->name = taxonomy_xml_get_literal_string($values);
break;
case TAXONOMY_XML_DESCRIPTION:
$term->description = taxonomy_xml_get_literal_string($values);
break;
case TAXONOMY_XML_PARENT:
case TAXONOMY_XML_RELATED:
case TAXONOMY_XML_CHILD:
// A term relationship.
// Translate each referred item from URI to label or handle,
// and save to be linked in later
foreach ($values as $i => $target_uri) {
$term->predicates[$predicate][$target_uri] = $target_uri;
}
break;
case TAXONOMY_XML_HAS_SYNONYM:
$term->synonyms_array = isset($term->synonyms_array) ? array_merge($term->synonyms_array, $values) : $values;
$term->synonyms = join("\n", array_unique($term->synonyms_array));
break;
case TAXONOMY_XML_IN_VOCABULARY:
// This term need to be in the vocabulary referred to by this URI
// check our known vocabs to see if they are recognised
// Do we know a vocab with an ID matching this 'isdefinedby' value?
foreach ($values as $value) {
// probably just one...
if ($target_vocab = $vocabularies[$value]) {
// I know this vocab!
$term->vid = $target_vocab->vid;
}
}
break;
case 'unused':
// Explicitly ignore these
break;
default:
drupal_set_message(t("Dunno what to do with '{$predicate}' value of " . print_r($values, 1) . " found in {$uri}", array(
'$predicate' => $predicate,
'%values' => print_r($values, 1),
'%uri' => $uri,
)));
}
}
// Look for existing term matching this one and blend the properties
// Ensure name is valid
if (!$term->name) {
// Fallback to a name, identifier derived (roughly) from the URI - not always meaningful, but all we have in some contexts.
$term->name = basename($uri);
drupal_set_message(t("Problem, we were unable to find a specific label for the term referred to as %uri. Guessing that %name will be good enough.", array(
'%uri' => $uri,
'%name' => $term->name,
)));
}
if (!$term->name) {
// Should never get here
drupal_set_message(t("Problem, this term %uri does not have a readable label.", array(
'%uri' => $uri,
)));
next;
}
# dpm(array('data to merge' => $term));
// See if a definition already exisits in the DB. Build on that.
$existing_term = _taxonomy_xml_get_term_placeholder($term->name, $vid);
// Merge the old term objects properties into this one. Really just want its tid, but there may be more info I should not lose.
// New input takes precedence over older data
foreach ((array) $existing_term as $key => $value) {
if (!isset($term->{$key})) {
$term->{$key} = $value;
}
}
// The term object is now as tidy as it can be as a self-contained entity.
# dpm($term);
// MAINTAIN IDS
// Because this is likely to be used with a site-cloning set-up, it would help if we tried to match IDs
// OTOH, doing so could be very messy for other situations.
// So,
// iff there is no pre-existing term with this id,
// create this one as a clone with the old ID.
// This requires a little DB sneakiness.
if ($term->internal_id && !taxonomy_get_term($term->internal_id)) {
$term->tid = $term->internal_id;
drupal_set_message(t("Doing sneaky import of %term_name re-using the internal id = %term_id", array(
'%term_name' => $term->name,
'%term_id' => $term->internal_id,
)));
db_query("INSERT INTO {term_data} (tid, name, description, vid, weight) VALUES (%d, '%s', '%s', %d, %d)", $term->tid, $term->name, $term->description, $term->vid, $term->weight);
// Fudge the sequences table to patch the hack we just did, avoid over-writing later
$current_id = db_result(db_query("SELECT id FROM {sequences} WHERE name = '%s'", '{term_data}_tid'));
if ($current_id < $term->tid) {
db_query("REPLACE INTO {sequences} VALUES ('%s', %d)", '{term_data}_tid', $term->tid);
}
}
#dpm(array("ready to save" => $term));
$save_term = (array) $term;
taxonomy_save_term($save_term);
// Re-retrieve the new term definition, just in case anything extra happened to it during processing
$new_term = taxonomy_xml_get_term_by_name_from_vocab($term->name, $term->vid);
if (!$new_term) {
drupal_set_message(t("It seems like we failed to create and retrieve a term called %term_name", array(
'%term_name' => $term->name,
)), 'error');
}
// Merge retrieved values back over our main definition so the handles are up-to-date
foreach ((array) $new_term as $key => $value) {
$term->{$key} = $value;
}
}
// end term-construction loop;
#dpm("Saved all, now linking!");
#dpm($terms);
// Now the terms are all happily created, create their relationships
// Couldn't do so until they had all been given tids.
taxonomy_xml_set_term_relations($terms);
$term_list = array();
foreach ($terms as $term) {
$term_list[] = l($term->name, 'admin/content/taxonomy/edit/term/' . $term->tid);
}
drupal_set_message(t('Updated %count term(s)', array(
'%count' => count($terms),
)) . ' <i>' . implode(', ', $term_list) . '.</i> ');
drupal_set_message(t("\n Finished importing vocabulary %vocab_name. \n You may now need to <a href='!settings_link'>Review the vocabulary settings</a> \n or <a href='!list_link'>List the terms</a>", array(
'%vocab_name' => $vocabulary->name,
'!settings_link' => url('admin/content/taxonomy/edit/vocabulary/' . $vid),
'!list_link' => url('admin/content/taxonomy/' . $vid),
)));
return count($term_list);
}