View source
<?php
define('TAXONOMY_XML_RELATED', 'Related Terms');
define('TAXONOMY_XML_PARENT', 'Broader Terms');
define('TAXONOMY_XML_CHILD', 'Narrower Terms');
define('TAXONOMY_XML_HAS_SYNONYM', 'Used for');
define('TAXONOMY_XML_SYNONYM_OF', 'Use');
define('TAXONOMY_XML_DESCRIPTION', 'Definition');
define('TAXONOMY_XML_IN_VOCABULARY', 'Part of');
define('TAXONOMY_XML_NAME', 'name');
define('TAXONOMY_XML_UNUSED', 'unused');
define('TAXONOMY_XML_NULL', '');
define('TAXONOMY_XML_OTHER_PREDICATE', 'other_rdf');
define('TAXONOMY_XML_MAX_BATCH_SIZE', 50);
define('TAXONOMY_XML_DETERMINED_BY_SOURCE_FILE', 0);
define('TAXONOMY_XML_CREATE_NEW', -1);
define('TAXONOMY_XML_ADMIN', 'admin/content/taxonomy');
define('TAXONOMY_XML_ALLOWED_UPLOAD_EXTENSIONS', 'txt csv xml rdf');
function taxonomy_xml_help($path, $arg) {
$doc_path = drupal_get_path('module', 'taxonomy_xml') . '/help';
switch ($path) {
case 'admin/modules#description':
return t('Makes it possible to import and export taxonomy terms via XML.');
case TAXONOMY_XML_ADMIN . '/import':
return t("\n You can upload or import a vocabulary and/or taxonomy terms\n from a properly-formatted input document or web service.\n ") . theme("more_help_link", url('admin/help/taxonomy_xml'));
case TAXONOMY_XML_ADMIN . '/export':
return t("\n You can export XML documents for each vocabulary and its terms in\n this website's taxonomies.\n Choose the vocabulary from the list below.\n See more about !taxonomy_formats in the module docs.\n ", array(
'!taxonomy_formats' => l(t("taxonomy formats"), "admin/help/taxonomy_xml"),
));
case 'admin/help#taxonomy_xml':
return t(file_get_contents($doc_path . '/help.html'), array(
'!downloads' => url(TAXONOMY_XML_ADMIN . "/export"),
'!upload' => url(TAXONOMY_XML_ADMIN . "/import"),
'!formats' => url("{$doc_path}/formats.html"),
'!services' => url(TAXONOMY_XML_ADMIN . "/import/services"),
'!rdf' => url("{$doc_path}/rdf.html"),
));
case TAXONOMY_XML_ADMIN . '/import/services':
return file_get_contents("{$doc_path}/services.html");
}
}
function taxonomy_xml_menu() {
if (!module_exists('taxonomy')) {
return;
}
$items = array();
$items[TAXONOMY_XML_ADMIN . '/export'] = array(
'title' => t('Export'),
'access arguments' => array(
'export taxonomy',
),
'page callback' => 'taxonomy_xml_export',
'type' => MENU_LOCAL_TASK,
);
$items[TAXONOMY_XML_ADMIN . '/import'] = array(
'title' => t('Import'),
'access arguments' => array(
'administer taxonomy',
),
'page callback' => 'taxonomy_xml_import',
'type' => MENU_LOCAL_TASK,
);
$items[TAXONOMY_XML_ADMIN . '_xml/flush'] = array(
'title' => t('Delete cache file'),
'access arguments' => array(
'administer taxonomy',
),
'page callback' => 'taxonomy_xml_flush_cache_file',
'type' => MENU_CALLBACK,
);
$items['taxonomy_xml'] = array(
'title' => t('Taxonomy XML'),
'access arguments' => array(
'access content',
),
'page callback' => 'taxonomy_xml_file',
'type' => MENU_CALLBACK,
);
$items[TAXONOMY_XML_ADMIN . '/import/services'] = array(
'title' => t('About taxonomy_import services'),
'access arguments' => array(
'administer taxonomy',
),
'page callback' => 'taxonomy_xml_about_services',
'type' => MENU_LOCAL_TASK,
);
if (module_exists('rdf')) {
$items['taxonomy/term/%/rdf'] = array(
'title' => 'RDF',
'type' => MENU_CALLBACK,
'access arguments' => array(
'access content',
),
'page callback' => 'taxonomy_xml_rdf_export_term',
'page arguments' => array(
2,
),
'file' => 'taxonomy_xml_rdf.inc',
);
$items['taxonomy/vocabulary/%/rdf'] = array(
'title' => 'RDF',
'type' => MENU_CALLBACK,
'access arguments' => array(
'access content',
),
'page callback' => 'taxonomy_xml_rdf_export_vocabulary',
'page arguments' => array(
2,
),
'file' => 'taxonomy_xml_rdf.inc',
);
}
return $items;
}
function taxonomy_xml_perm() {
return array(
'export taxonomy',
);
}
function taxonomy_xml_export() {
$output = '';
$vocabularies = taxonomy_get_vocabularies();
if (empty($vocabularies)) {
$output .= t('There are no vocabularies present');
}
else {
foreach ($vocabularies as $vocabulary) {
$vocablist[$vocabulary->vid] = $vocabulary->name;
$vocabcount = db_result(db_query("SELECT count(*) FROM {term_data} WHERE vid=%n", $vocabulary->vid));
$vocablist[$vocabulary->vid] .= t(' (%vocabcount terms) ', array(
'%vocabcount' => $vocabcount,
));
$vocablist[$vocabulary->vid] .= ' ' . l('XML', "taxonomy_xml/{$vocabulary->vid}", array(
'attributes' => array(
'title' => "This format is Drupal-only. It closely matches the internal data structure, but is not portable outside of Drupal without work.",
),
));
$vocablist[$vocabulary->vid] .= ' ' . l("RDF", "taxonomy_xml/{$vocabulary->vid}/rdf", array(
'attributes' => array(
'title' => "RDF is recommended for portability with external databases, although it is verbose and sometimes unreadable to humans.",
),
));
$vocablist[$vocabulary->vid] .= ' ' . l("TCS", "taxonomy_xml/{$vocabulary->vid}/tcs", array(
'attributes' => array(
'title' => "The Taxon Concept Schema is used in Life Sciences to notate biological families of living things.",
),
));
}
$output = theme_item_list($vocablist);
}
return $output;
}
function taxonomy_xml_file($vid, $format = 'xml') {
$vocabulary = taxonomy_vocabulary_load($vid);
$vname = drupal_strtolower(str_replace(' ', '_', trim($vocabulary->name)));
unset($vocabulary);
module_load_include('inc', 'taxonomy_xml', $format . '_format');
taxonomy_xml_include_module_hooks();
$create_funcname = "taxonomy_xml_{$format}_create";
$file = $create_funcname($vid);
if (!empty($_SERVER['HTTP_USER_AGENT']) && (strpos($_SERVER['HTTP_USER_AGENT'], 'MSIE 5.5') || strpos($_SERVER['HTTP_USER_AGENT'], 'Opera'))) {
header('Content-Type: application/dummy');
}
else {
header('Content-Type: text/xml; charset=UTF-8');
}
if (headers_sent()) {
echo 'Some data has already been output to browser, can\'t send file';
}
header('Content-Length: ' . drupal_strlen($file));
header("Content-Disposition: attachment; filename=taxonomy_{$vname}.{$format}.xml");
echo $file;
}
function taxonomy_xml_export_term($tid, $format = 'rdf') {
$term = is_numeric($term) ? taxonomy_term_load($term) : $term;
module_load_include('inc', 'taxonomy_xml', $format . '_format');
taxonomy_xml_include_module_hooks();
$create_funcname = "taxonomy_xml_{$format}_create_term";
$file = $create_funcname($tid);
if (!empty($_SERVER['HTTP_USER_AGENT']) && (strpos($_SERVER['HTTP_USER_AGENT'], 'MSIE 5.5') || strpos($_SERVER['HTTP_USER_AGENT'], 'Opera'))) {
header('Content-Type: application/dummy');
}
else {
header('Content-Type: text/xml; charset=UTF-8');
}
if (headers_sent()) {
echo 'Some data has already been output to browser, can\'t send file';
}
header('Content-Length: ' . drupal_strlen($file));
header("Content-Disposition: attachment; filename=taxonomy_{$vname}.{$format}.xml");
echo $file;
}
function taxonomy_xml_import() {
if (module_exists('pathauto')) {
$pathauto_taxonomy_pattern = variable_get("pathauto_taxonomy_pattern", '');
if (!empty($pathauto_taxonomy_pattern)) {
drupal_set_message(t('
Pathauto has been profiled to slow down the import process by over 50%.
If you are doing large updates, it really needs to be turned off during that process.
This module will now <em>unset</em> the "Default path pattern" so that it has a hope of doing its job.
See <a href="!pathauto_settings">Pathauto settings</a> to review your settings.', array(
'!pathauto_settings' => url('admin/build/path/patterns'),
)), 'warning');
variable_set("pathauto_taxonomy_pattern", '');
}
}
return drupal_get_form('taxonomy_xml_import_form');
}
function taxonomy_xml_import_form($form_state) {
drupal_add_js(drupal_get_path('module', 'taxonomy_xml') . '/taxonomy_xml.js');
drupal_add_css(drupal_get_path('module', 'taxonomy_xml') . '/taxonomy_xml.css');
$formats = taxonomy_xml_formats();
$vocs[TAXONOMY_XML_DETERMINED_BY_SOURCE_FILE] = t('[Determined by source file]');
foreach (taxonomy_get_vocabularies() as $vid => $voc) {
$vocs[$vid] = $voc->name;
}
$vocs[TAXONOMY_XML_CREATE_NEW] = t('[Create new]');
$form['vid'] = array(
'#type' => 'select',
'#title' => t('Target vocabulary'),
'#default_value' => variable_get('taxonomy_xml_vid', TAXONOMY_XML_CREATE_NEW),
'#options' => $vocs,
'#description' => t('
The vocabulary into which terms should be loaded.
If you choose a pre-existing vocabulary,
existing vocabulary settings (tags, node types etc) will NOT be modified.
If it is to be created new, they <em>may</em> be retained.
Internal vocabulary ID "vid" cannot be imported.
'),
);
$form['data_source'] = array(
'#type' => 'fieldset',
'#id' => 'data_source',
'#attributes' => array(
'id' => 'data_source',
),
);
$form['data_source']['protocol'] = array(
'#type' => 'select',
'#title' => t('Data Source'),
'#options' => array(
'none' => t('CHOOSE'),
'upload-file' => t('Upload File'),
'url' => t('Web URL'),
'filepath' => t('Local File'),
'service' => t('Web Service'),
),
'#attributes' => array(
'id' => 'protocol',
),
'#default_value' => variable_get('taxonomy_xml_protocol', 'none'),
);
$form['data_source']['upload_file'] = array(
'#type' => 'file',
'#title' => t('File to import'),
);
global $user;
if ($user->uid != 1) {
$form['data_source']['upload_file']['#description'] = t('Only [%allowed_extensions] files are allowed.', array(
'%allowed_extensions' => TAXONOMY_XML_ALLOWED_UPLOAD_EXTENSIONS,
));
}
$form['data_source']['url'] = array(
'#type' => 'textfield',
'#size' => 128,
'#title' => t('URL to import from'),
'#description' => t('
Enter the URL of a file or web service containing a vocabulary definition.
<br/>eg <em>http://www.w3.org/TR/2003/PR-owl-guide-20031215/food</em>
<br/>The URL will be fetched requesting a content-type rdf/xml if available,
this means sometimes you can enter URLs that look like web pages
but we will actually retrieve the raw data.
(On well-behaved sources)
'),
'#default_value' => variable_get('taxonomy_xml_url', ''),
);
$form['data_source']['filepath'] = array(
'#type' => 'textfield',
'#size' => 128,
'#title' => t('Local filepath'),
'#description' => t('
Enter the path of a file containing an appropriately formatted vocabulary.
The path can either be relative to the files directory or to Drupal root.
<br>Eg: <pre>!example</pre>
', array(
'!example' => drupal_get_path('module', 'taxonomy_xml') . '/samples/sample-Dewey_System-100_terms.csv',
)),
'#default_value' => variable_get('taxonomy_xml_filepath', ''),
);
$available_services = taxonomy_xml_lookup_services('lookup', 'options');
$form['data_source']['service'] = array(
'#type' => 'fieldset',
'#attributes' => array(
'id' => 'edit-service-wrapper',
),
'service_id' => array(
'#type' => 'select',
'#title' => t('Taxonomy Server'),
'#description' => t('
Choose one of the available taxonomy server services.
These preset services are defined in the taxonomy_xml module
and may be extended by other contributed modules.
'),
'#default_value' => variable_get('taxonomy_xml_service_id', ''),
'#options' => $available_services,
),
'identifier' => array(
'#type' => 'textfield',
'#title' => t('Unique Identifier for this service'),
'#description' => t('Optional ID, GIUD, LSID, URI or other UID as (if) required by this service.'),
'#default_value' => variable_get('taxonomy_xml_identifier', ''),
),
'information' => array(
'#type' => 'markup',
'#value' => l('More info about remote taxonomy services', 'admin/content/taxonomy/import/services'),
),
);
$form['data_source']['format'] = array(
'#type' => 'select',
'#title' => t('Format of file'),
'#default_value' => variable_get('taxonomy_xml_format', 'xml_format'),
'#options' => $formats,
);
$form['recurse_down'] = array(
'#type' => 'checkbox',
'#title' => t('Recurse down the taxonomy tree'),
'#description' => t('
Some taxonomy sources return references to further external URL
sources (child terms).
Tick this if those references are to be followed.
<br/>The recursion may get intensive, although the tasks will be "batched".
<br/>Note: You will <b>need</b> taxonomy_guid, taxonomy_enhancer, rdf,
or something similar to be recording the external GUIDs
or relationships (heirarchy) cannot be maintained over batches.
'),
'#default_value' => variable_get('taxonomy_xml_recurse_down', TRUE),
);
if (!module_exists('taxonomy_guid')) {
$form['recurse_down']['#default_value'] = FALSE;
$form['recurse_down']['#disabled'] = TRUE;
$form['recurse_down']['#description'] = '<p><strong>' . t('Enable taxonomy_guid before trying recursion.') . '</strong></p>' . $form['recurse_down']['#description'];
}
$form['advanced'] = array(
'#type' => 'fieldset',
'#title' => t('Advanced'),
'#collapsible' => TRUE,
'#collapsed' => TRUE,
);
$form['advanced']['duplicate'] = array(
'#type' => 'checkbox',
'#title' => t('Allow duplicate terms'),
'#description' => t('If you want to keep the same term in different positions in the vocabulary hierarchy, check this'),
'#default_value' => variable_get('taxonomy_xml_duplicate', FALSE),
);
$form['advanced']['reuseids'] = array(
'#type' => 'checkbox',
'#title' => t('Re-use IDs'),
'#description' => t('If the source data includes numeric IDs, try to use them as Drupal term IDs. This may have mixed results on sites that are not directly synched.'),
'#default_value' => variable_get('taxonomy_xml_reuseids', FALSE),
);
$form['advanced']['taxonomy_xml_watchdog_level'] = array(
'#type' => 'select',
'#title' => t('Debug Level'),
'#description' => t('To assist development, taxonomy_xml has the ability to display parsing messages as they are logged.'),
'#options' => taxonomy_xml_watchdog_levels(),
'#default_value' => variable_get('taxonomy_xml_watchdog_level', WATCHDOG_NOTICE),
);
$form['advanced']['flush_cache_description'] = array(
'#type' => 'markup',
'#prefix' => '<p>',
'#suffix' => '</p>',
'#value' => t('When retrieving remote data, a local cache is kept of successful file downloads. These are not expected to change, but may get in the way during testing. Flush the cache to delete them <em>all</em>.'),
);
$form['advanced']['flush_cache'] = array(
'#type' => 'submit',
'#value' => t('Flush Cache'),
'#submit' => array(
'taxonomy_xml_flush_file_cache',
),
);
$form['submit'] = array(
'#type' => 'submit',
'#value' => t('Import'),
);
$form['#attributes'] = array(
'enctype' => 'multipart/form-data',
);
return $form;
}
function taxonomy_xml_import_form_submit($form, &$form_state) {
variable_del('taxonomy_xml_service_id', NULL);
variable_set('taxonomy_xml_format', $form_state['values']['format']);
variable_set('taxonomy_xml_vid', $form_state['values']['vid']);
variable_set('taxonomy_xml_duplicate', $form_state['values']['duplicate']);
variable_set('taxonomy_xml_reuseids', $form_state['values']['reuseids']);
variable_set('taxonomy_xml_protocol', $form_state['values']['protocol']);
variable_set('taxonomy_xml_identifier', $form_state['values']['identifier']);
variable_set('taxonomy_xml_url', $form_state['values']['url']);
variable_set('taxonomy_xml_filepath', $form_state['values']['filepath']);
variable_set('taxonomy_xml_recurse_down', $form_state['values']['recurse_down']);
variable_set('taxonomy_xml_watchdog_level', $form_state['values']['taxonomy_xml_watchdog_level']);
taxonomy_xml_fetch_and_import($form_state['values']);
}
function taxonomy_xml_fetch_and_import($form_values) {
switch ($form_values['protocol']) {
case 'upload-file':
global $user;
$validators = array(
'file_validate_extensions' => array(
TAXONOMY_XML_ALLOWED_UPLOAD_EXTENSIONS,
),
);
if ($file = file_save_upload('upload_file', $validators)) {
$fd = fopen($file->filepath, "rb");
if (!$fd) {
drupal_set_message(t('Vocabulary import failed: file %filename cannot be read.', array(
'%filename' => $file->filename,
)), 'error');
}
else {
$info = fstat($fd);
$len = $info["size"];
$text = fread($fd, $len);
fclose($fd);
drupal_set_message(t('Loaded file %filename. Now processing it.', array(
'%filename' => $file->filename,
)));
$form_values['file'] = $file;
taxonomy_xml_invoke_import($text, $form_values);
}
}
else {
drupal_set_message(t('Vocabulary import failed: file was not uploaded.'), 'error');
}
break;
case 'url':
$url = $form_values['url'];
taxonomy_xml_invoke_import_on_url($url, $form_values);
break;
case 'filepath':
$filepath = $form_values['filepath'];
taxonomy_xml_invoke_import_on_filepath($filepath, $form_values);
break;
case 'service':
variable_set('taxonomy_xml_service_id', $form_values['service_id']);
$services = taxonomy_xml_lookup_services(NULL, 'full');
$service = $services[$form_values['service_id']];
taxonomy_xml_invoke_service_request($service, $form_values);
break;
}
batch_set(taxonomy_xml_add_term_to_batch_queue());
}
function taxonomy_xml_invoke_import($text, $form_values, $url = NULL) {
$vid = $form_values['vid'];
if ($vid == TAXONOMY_XML_CREATE_NEW) {
$newname = !empty($form_values['file']) ? basename($form_values['file']->filename) : basename($url);
$vocabulary = _taxonomy_xml_get_vocabulary_placeholder($newname);
$vid = $vocabulary->vid;
variable_set('taxonomy_xml_vid', $vid);
}
$format = $form_values['format'];
module_load_include('inc', 'taxonomy_xml', $format . '_format');
taxonomy_xml_include_module_hooks();
$funcname = "taxonomy_xml_{$format}_parse";
if (function_exists($funcname)) {
$modified_terms = $funcname($text, $vid, $url);
$vocabulary = taxonomy_vocabulary_load($vid);
if (empty($vocabulary)) {
drupal_set_message("Failed to create or update vocabulary. Invalid ID", 'error');
return FALSE;
}
if (!empty($modified_terms)) {
if (is_array($modified_terms)) {
$term_list = array();
foreach ($modified_terms as $list_term) {
$term_list[] = l($list_term->name, 'admin/content/taxonomy/edit/term/' . $list_term->tid);
}
drupal_set_message(t('Updated %count term(s)', array(
'%count' => count($modified_terms),
)) . ' <i>' . implode(', ', $term_list) . '.</i> ');
drupal_set_message(t("\n Imported vocabulary %vocab_name.\n You may now need to <a href='!settings_link'>Review the vocabulary settings</a>\n or <a href='!list_link'>List the terms</a>", array(
'%vocab_name' => $vocabulary->name,
'!settings_link' => url('admin/content/taxonomy/edit/vocabulary/' . $vid),
'!list_link' => url('admin/content/taxonomy/' . $vid),
)));
}
else {
}
return TRUE;
}
else {
drupal_set_message(t("Failed to import any new terms. This may be due to syntax or formattings errors in the import file.", array()), 'error');
return FALSE;
}
}
else {
drupal_set_message("Unavailable format. {$funcname} was not found in formatting library {$format}_format .", 'error');
return FALSE;
}
}
function taxonomy_xml_invoke_import_on_url($url, $form_values) {
if ($url) {
$text = taxonomy_xml_cached_get_contents($url);
if (!empty($text)) {
drupal_set_message(t('Retrieved Submitted URL %url. Now starting an import process.', array(
'%url' => $url,
)));
return taxonomy_xml_invoke_import($text, $form_values, $url);
}
else {
drupal_set_message(t('<strong>Taxonomy XML</strong>: Failed to retrieve content from <a href="!url">!url</a>. Check this URL and access to it. This will not work on sites requiring authentication', array(
'!url' => $url,
)), 'error');
}
}
else {
drupal_set_message(t('<strong>Taxonomy XML</strong>: No URL. A valid, readable URL required.'), 'error');
}
return FALSE;
}
function taxonomy_xml_invoke_import_on_filepath($filepath, $form_values) {
if ($filepath) {
$text = file_get_contents($filepath);
if (!empty($text)) {
drupal_set_message(t('Retrieved file %filepath. Now starting a %format import process.', array(
'%filepath' => $filepath,
'%format' => $form_values['format'],
)));
return taxonomy_xml_invoke_import($text, $form_values, $filepath);
}
else {
drupal_set_message(t('<strong>Taxonomy XML</strong>: Failed to retrieve content from %filepath. Check this file exists and is readable', array(
'%filepath' => $filepath,
)), 'error');
}
}
else {
drupal_set_message(t('<strong>Taxonomy XML</strong>: No filepath. A valid, readable file path is required.'), 'error');
}
return FALSE;
}
function taxonomy_xml_formats() {
$module_dir = drupal_get_path('module', 'taxonomy_xml');
$incs = file_scan_directory($module_dir, '.*_format.inc');
$formats = array();
foreach ($incs as $filepath => $file) {
include_once $file->filename;
$format_name = preg_replace('/_format$/', '', $file->name);
$funcname = "taxonomy_xml_{$format_name}_requirements";
$error = function_exists($funcname) ? $funcname() : NULL;
if (empty($error)) {
$formats[$format_name] = drupal_strtoupper($format_name);
}
else {
drupal_set_message($error['taxonomy_xml_' . $format_name]['description'], 'warning');
}
}
return $formats;
}
function taxonomy_xml_include_module_hooks($reset = FALSE) {
$modules = array(
'path',
'geotaxonomy',
'menu',
);
foreach (array_filter($modules, 'module_exists') as $module) {
module_load_include('inc', 'taxonomy_xml', "includes/taxonomy_xml.{$module}");
}
}
function _taxonomy_xml_get_vocabulary_placeholder($name, $edit = array()) {
if ($vocabulary = taxonomy_xml_get_vocabulary_by_name($name)) {
return $vocabulary;
}
$vocabulary = array(
'name' => $name,
'relations' => TRUE,
'hierarchy' => 2,
) + $edit;
taxonomy_save_vocabulary($vocabulary);
$vid = db_result(db_query("SELECT vid FROM {vocabulary} WHERE LOWER('%s') LIKE LOWER(name)", $vocabulary['name']));
$vocabulary = taxonomy_vocabulary_load($vid);
drupal_set_message(t('Created vocabulary %vid %vocabname to put these terms into. You probably want to <a href="!vocablink">go edit it now</a>.', array(
'%vocabname' => $vocabulary->name,
'%vid' => $vid,
'!vocablink' => url('admin/content/taxonomy/edit/vocabulary/' . $vid),
)));
return $vocabulary;
}
function taxonomy_xml_absorb_vocabulary_definitions(&$vocabularies) {
if (is_array($vocabularies)) {
if (count($vocabularies) > 1) {
drupal_set_message(t("When importing, I found more than one vocabulary definition in the same resource. This could be confusing. <pre>!object</pre>", array(
'!object' => print_r($vocabularies, 1),
)), 'warning');
}
foreach ($vocabularies as $vocabid => &$vocab) {
if (!empty($vocab->predicates)) {
taxonomy_xml_merge_predicates_into_attributes($vocab);
}
if (empty($vocab->name)) {
drupal_set_message("We require a NAME to create a vocabulary. Vocabulary definition appeared to have no name. Using a label derived from the URI instead.", 'warning');
$vocab->name = taxonomy_xml_label_from_uri($vocabid);
}
$target_vocab = NULL;
if (isset($vocab->vid)) {
$vocab->internal_id = $vocab->vid;
drupal_set_message(t("Found a vocabulary definition in the input, called {$vocabid}. vid={$vocab->internal_id}"));
$target_vocab = taxonomy_vocabulary_load($vocab->internal_id);
}
if (!empty($target_vocab) && $target_vocab->name == $vocab->name) {
$vocab->vid = $vocab->internal_id;
drupal_set_message(t("Found matching target vocabulary '%vocab_name' vid=%vocab_vid", array(
'%vocab_name' => $vocab->name,
'%vocab_vid' => $vocab->vid,
)));
}
else {
if ($target_vocab) {
drupal_set_message(t("The vocab ID given in the input file (%vocab_vid) conflicts with an existing vocabulary. We need a different ID... ", array(
'%vocab_vid' => $vocab->vid,
)));
}
unset($vocab->vid);
if ($target_vocab = taxonomy_xml_get_vocabulary_by_name($vocab->name)) {
$vocab->vid = $target_vocab->vid;
drupal_set_message(t("Found a target vocabulary already in the database, matching by name '%name' vid=%vid . This will be used, but not updated.", array(
'%name' => $vocab->name,
'%vid' => $vocab->vid,
)));
}
}
if (empty($vocab->vid)) {
$vocab = _taxonomy_xml_get_vocabulary_placeholder($vocab->name);
$vocab_array = (array) $vocab;
$status = taxonomy_save_vocabulary($vocab_array);
$strings = array(
'%name' => $vocab->name,
'%description' => $vocab->description,
);
$vocab = taxonomy_vocabulary_load($vocab_array['vid']);
if (!empty($vocab->vid)) {
drupal_set_message(t("Made a new Drupal vocabulary definition from data found in the input. Vocab is called: '%name': %description ", $strings));
}
else {
drupal_set_message(t("Failed to create a new vocabulary called: '%name' : %description \n This is fatal, aborting.", $strings), 'error');
return FALSE;
}
}
}
}
else {
drupal_set_message("The document provided no recognisible vocabulary definitions");
}
return isset($vocab->vid) ? $vocab->vid : NULL;
}
function taxonomy_xml_canonicize_predicates(&$term) {
if (empty($term->predicates)) {
$term->predicates = array();
}
$predicate_synonyms = taxonomy_xml_relationship_synonyms();
foreach ($term->predicates as $predicate => $values) {
$original_predicate = $predicate;
if (isset($predicate_synonyms[$predicate]) && ($cannonic = $predicate_synonyms[$predicate])) {
$predicate = $cannonic;
}
switch ($predicate) {
case TAXONOMY_XML_DESCRIPTION:
$term->description = taxonomy_xml_get_literal_string($values);
break;
case TAXONOMY_XML_NAME:
$val = taxonomy_xml_get_literal_string($values);
if (isset($term->name) && $val != $term->name) {
$term->name .= ' (' . $val . ')';
}
else {
$term->name = $val;
}
break;
case TAXONOMY_XML_PARENT:
foreach ($values as $i => $target_uri) {
$term->predicates[$predicate][$i] = $target_uri;
$strings = array(
'%predicate' => $predicate,
'%subject' => isset($term->name) ? $term->name : $term->guid,
'%target_uri' => $target_uri,
);
}
break;
case TAXONOMY_XML_RELATED:
case TAXONOMY_XML_CHILD:
foreach ($values as $i => $target_uri) {
$term->predicates[$predicate][$i] = $target_uri;
}
break;
case TAXONOMY_XML_HAS_SYNONYM:
$term->synonyms_array = isset($term->synonyms_array) ? array_merge($term->synonyms_array, $values) : $values;
$term->synonyms = implode("\n", array_unique($term->synonyms_array));
break;
case TAXONOMY_XML_IN_VOCABULARY:
break;
case 'type':
case TAXONOMY_XML_UNUSED:
break;
case TAXONOMY_XML_OTHER_PREDICATE:
foreach ($values as $value) {
$term->rdf[] = array(
'subject' => NULL,
'predicate' => $original_predicate,
'object' => $value,
);
}
watchdog('taxonomy_xml', "\n Found a useful predicate '<b>%predicate</b> = %value'.\n Making a note of it for pure-RDF storage.\n ", array(
'%predicate' => "{$predicate} ({$original_predicate})",
'%subject' => isset($term->name) ? $term->name : $term->guid,
'%value' => $value,
), WATCHDOG_INFO);
break;
default:
watchdog('taxonomy_xml', "\n Dunno what to do with '<b>%predicate</b>'.\n Subject '%subject' has value(s) = <pre>!values</pre>\n A later content type may absorb this info,\n but it's not a core term property.", array(
'%predicate' => $predicate,
'%subject' => isset($term->name) ? $term->name : $term->guid,
'!values' => print_r($values, 1),
), WATCHDOG_DEBUG);
}
}
if (!empty($term->guid)) {
taxonomy_xml_set_term_guid($term, $term->guid);
}
}
function taxonomy_xml_add_all_children_to_queue($term) {
if (variable_get('taxonomy_xml_recurse_down', TRUE) && !empty($term->predicates[TAXONOMY_XML_CHILD])) {
$children = $term->predicates[TAXONOMY_XML_CHILD];
foreach ((array) $children as $child_ref) {
$scheme = "unknown";
if (valid_url($child_ref)) {
$url_parts = @parse_url($child_ref);
$scheme = isset($url_parts['scheme']) ? $url_parts['scheme'] : 'no scheme';
}
if (isset($url_parts['host']) && $url_parts['host'] == '_') {
continue;
}
if ($scheme == 'http') {
if ($found_term = taxonomy_xml_get_term_by_guid($child_ref, $term->vid)) {
watchdog('taxonomy_xml', "While processing %term_name, found an existing local version\n of its child. # !ref\n This means it will not be re-queued.\n ", array(
'%term_name' => $term->name,
'!ref' => l('taxonomy/term/' . $found_term->tid, $found_term->tid),
), WATCHDOG_DEBUG);
$terms =& taxonomy_xml_current_terms();
$terms[$child_ref] = $found_term;
}
else {
$placeholder_term = (object) array(
'guid' => $child_ref,
'parent' => array(
$term->tid => $term->tid,
),
'vid' => $term->vid,
);
taxonomy_xml_add_term_to_batch_queue($placeholder_term);
watchdog('taxonomy_xml', "\n While processing %term_name,\n Found a reference to child term !child_ref.\n Queuing it for later retrieval and import", array(
'%term_name' => $term->name,
'!child_ref' => l($child_ref, $child_ref),
), WATCHDOG_NOTICE);
}
}
else {
if ($service_id = variable_get('taxonomy_xml_service_id', '')) {
$services = taxonomy_xml_lookup_services(NULL, 'full');
$service = $services[$service_id];
$lookup_uri = taxonomy_xml_sub_placeholders_into_pattern($service['pattern'], array(
$service['identifier'] => $child_ref,
));
$placeholder_term = (object) array(
'guid' => $lookup_uri,
'parent' => array(
$term->tid => $term->tid,
),
);
taxonomy_xml_add_term_to_batch_queue($placeholder_term);
}
else {
drupal_set_message(t('Cannot yet resolve non-URI references, and no resolver service is active. %child_ref', array(
'%child_ref' => $child_ref,
)));
}
}
}
}
}
function taxonomy_xml_merge_predicates_into_attributes(&$object) {
if (empty($object)) {
return;
}
$predicate_synonyms = taxonomy_xml_relationship_synonyms();
if (empty($object->predicates)) {
watchdog('taxonomy_xml', "When importing an object, I found some data with no predicates at all. This is odd, but probably no big deal. <pre>!object</pre>", array(
'!object' => print_r($object, 1),
), WATCHDOG_NOTICE);
$object->predicates = array();
}
foreach ($object->predicates as $predicate => $vals) {
$predicate = isset($predicate_synonyms[$predicate]) ? $predicate_synonyms[$predicate] : $predicate;
$object->{$predicate} = array_pop($vals);
}
if (empty($object->description) && isset($object->{TAXONOMY_XML_DESCRIPTION})) {
$object->description = $object->{TAXONOMY_XML_DESCRIPTION};
}
return $object;
}
function _taxonomy_xml_get_term_placeholder($name, $vid = 0, $new = FALSE) {
if (!$new) {
if ($name) {
$term = taxonomy_xml_get_term_by_name_from_vocab($name, $vid);
}
else {
drupal_set_message(t("Asked to make a term with no name ... that can't be right. I refuse!"), 'error');
return NULL;
}
}
if (empty($term)) {
$term = (object) array(
'name' => $name,
'vid' => $vid,
'description' => '',
'weight' => 0,
'predicates' => array(),
'synonyms_array' => array(),
);
}
else {
}
return $term;
}
function taxonomy_xml_set_term_relations(&$terms) {
$relationship_predicates = array(
TAXONOMY_XML_PARENT,
TAXONOMY_XML_CHILD,
TAXONOMY_XML_RELATED,
);
foreach ($terms as $guid => &$term) {
$strings = array(
'%tid' => $term->tid,
'%guid' => $guid,
'!name' => l($term->name, 'admin/content/taxonomy/edit/term/' . $term->tid),
);
if (isset($term->taxonomy_xml_linked)) {
continue;
}
$ancestors = taxonomy_xml_get_term_ancestors($term);
if (isset($term->predicates) && is_array($term->predicates)) {
foreach ($term->predicates as $predicate => &$targets) {
$strings['%predicate'] = $predicate;
if (in_array($predicate, $relationship_predicates)) {
$found_term_names = array();
foreach ($targets as $target_ix => &$target) {
watchdog('taxonomy_xml', "Term %termname references %target as a %predicate", array(
'%termname' => $term->name,
'%target' => $target,
'%predicate' => $predicate,
), WATCHDOG_DEBUG);
if (!isset($terms[$target])) {
if ($found_term = taxonomy_xml_get_term_by_guid($target, $term->vid)) {
$terms[$target] = $found_term;
$found_term_names[] = l($found_term->name, "taxonomy/term/{$found_term->tid}") . ' ' . l('#', $target);
}
else {
if ($found_term = taxonomy_xml_get_term_by_guid($target, $term->vid)) {
$terms[$target] = $found_term;
$found_term_names[] = l($found_term->name, "taxonomy/term/{$found_term->tid}") . ' ' . l('#', $target);
}
else {
$found_term_names[] = $target;
}
}
}
else {
$found_term_names[] = $terms[$target]->name;
}
if (isset($term->guid) && $term->guid == $target) {
watchdog('taxonomy_xml', "Not supporting setting !name as related to itself as a %predicate. Avoiding a potential infinite loop.", $strings, WATCHDOG_WARNING);
unset($term->predicates[$predicate][$target_ix]);
}
}
watchdog('taxonomy_xml', '%predicate relations of %term_name are : %targets', array(
'%term_name' => $term->name,
'%predicate' => $predicate,
'%targets' => implode(', ', $found_term_names),
), WATCHDOG_INFO);
}
}
}
if (isset($term->predicates[TAXONOMY_XML_PARENT]) && is_array($term->predicates[TAXONOMY_XML_PARENT])) {
foreach (array_unique($term->predicates[TAXONOMY_XML_PARENT]) as $key => $othertermname) {
if ($othertermname) {
if (isset($terms[$othertermname])) {
$parent = $terms[$othertermname];
if ($parent && isset($parent->tid)) {
$ancestors = taxonomy_xml_get_term_ancestors($parent);
if (in_array($term->tid, array_keys($ancestors))) {
watchdog('taxonomy_xml', "Not setting !name as a descendant of itself. Avoiding a potential infinite loop.", $strings, WATCHDOG_WARNING);
continue;
}
global $_taxonomy_xml_current_doc;
drupal_set_message(t("!name # %tid is a child of !parent # %ptid (<a href='!source' style='font-size:x-small'>source</a>)", array(
'!name' => l($term->name, 'admin/content/taxonomy/edit/term/' . $term->tid),
'%tid' => $term->tid,
'!parent' => l($parent->name, 'admin/content/taxonomy/edit/term/' . $parent->tid),
'%ptid' => $parent->tid,
'!source' => $_taxonomy_xml_current_doc,
)));
$term->parent[$parent->tid] = $parent->tid;
}
}
else {
}
}
}
$term->taxonomy_xml_relinked = TRUE;
}
if (isset($term->predicates[TAXONOMY_XML_CHILD]) && is_array($term->predicates[TAXONOMY_XML_CHILD])) {
foreach (array_unique($term->predicates[TAXONOMY_XML_CHILD]) as $key => $othertermname) {
$strings['!child_guid'] = $othertermname;
if (in_array($term->tid, array_keys($ancestors))) {
watchdog('taxonomy_xml', "Not supporting setting !name as related to an ancestor as a %predicate. Avoiding a potential infinite loop.", $strings, WATCHDOG_WARNING);
unset($term->predicates[$predicate][$target_ix]);
}
watchdog('taxonomy_xml', "!name # %tid Has a child identified as !child_guid", $strings, WATCHDOG_DEBUG);
if (!empty($othertermname) && isset($terms[$othertermname])) {
$child_term =& $terms[$othertermname];
if (!$child_term->tid) {
continue;
}
$strings['!child'] = l($child_term->name, 'admin/content/taxonomy/edit/term/' . $child_term->tid);
if (empty($child_term->parent)) {
$child_term->parent = array();
}
if (!in_array($term->tid, $child_term->parent)) {
$child_term->parent[$term->tid] = $term->tid;
drupal_set_message(t("!name # %tid Has a child called !child", $strings));
if (empty($child_term->taxonomy_xml_relinked)) {
$save_term = (array) $child_term;
taxonomy_save_term($save_term);
}
}
else {
watchdog('taxonomy_xml', "!name already knows it has a child called !child", $strings, WATCHDOG_DEBUG);
}
}
else {
watchdog('taxonomy_xml', "We haven't loaded child term !child_guid in this run, so not touching it. Could be if recursion is off.", $strings, WATCHDOG_DEBUG);
}
}
}
if (isset($term->predicates[TAXONOMY_XML_RELATED]) && is_array($term->predicates[TAXONOMY_XML_RELATED])) {
foreach (array_unique($term->predicates[TAXONOMY_XML_RELATED]) as $key => $othertermname) {
if ($othertermname) {
if (isset($terms[$othertermname])) {
$related = $terms[$othertermname];
$term->relations[$related->tid] = $related->tid;
}
else {
drupal_set_message(t("\n Couldn't find the term called '%termname'\n to link to '%name' as being related to this.\n This relationship will be discarded. ", array(
'%name' => $term->name,
'%termname' => $othertermname,
'%debug' => print_r(array_keys($terms), 1),
)));
}
}
}
$term->taxonomy_xml_relinked = TRUE;
}
if (!empty($term->synonyms_array)) {
$term->synonyms = implode("\n", array_unique($term->synonyms_array));
$term->taxonomy_xml_relinked = TRUE;
}
$term->taxonomy_xml_linked = TRUE;
if (!empty($term->taxonomy_xml_relinked)) {
$save_term = (array) $term;
taxonomy_save_term($save_term);
unset($term->taxonomy_xml_relinked);
}
}
}
function taxonomy_xml_get_term_ancestors($term) {
$parents = taxonomy_get_parents_all($term->tid);
array_pop($parents);
$ancestors = array();
foreach ($parents as $parent) {
$ancestors[$parent->tid] = $parent;
}
return $ancestors;
}
function taxonomy_xml_set_term_guid(&$term, $guid) {
$term->guid = $guid;
if (module_exists('taxonomy_enhancer')) {
$term->field_guid[0]['#value'] = $guid;
$term->fields['field_guid'][0]['value'] = $guid;
}
if (module_exists('rdf')) {
$term->rdf[] = array(
'predicate' => 'owl:sameAs',
'object' => $guid,
);
}
}
function taxonomy_xml_get_term_guid(&$term) {
if (!empty($term->guid)) {
return $term->guid;
}
if (isset($term->field_guid)) {
$term->guid = $term->field_guid[0]['#value'];
return $term->guid;
}
if (module_exists('rdf')) {
$term_url = taxonomy_xml_rdf_taxonomy_term_path($term);
$about_term = rdf_query($term_url, 'owl:sameAs', NULL, array())
->to_array();
foreach ($about_term as $sid => $statement) {
$term->guid = $statement[2];
return $term->guid;
}
}
}
function taxonomy_xml_get_term_uri($term) {
watchdog('taxonomy_xml', __FUNCTION__ . ' deprecated. Use taxonomy_xml_get_term_guid() instead', array(), WATCHDOG_NOTICE);
return taxonomy_xml_get_term_guid($term);
}
function taxonomy_xml_get_term_by_guid($guid, $vid = NULL) {
if (!$guid) {
return NULL;
}
if (module_exists('taxonomy_guid')) {
$terms = taxonomy_guid_get_term($guid, $vid);
if (count($terms) > 1) {
watchdog('taxonomy_xml', "This is confusing, apparently there are more than one local match\n with the GUID '%guid' .\n <pre>!lookups</pre>", array(
'%guid' => $guid,
'!lookups' => print_r($terms, 1),
), WATCHDOG_WARNING);
}
if (!empty($terms)) {
return reset($terms);
}
}
if (function_exists('taxonomy_enhancer')) {
$searchterm = (object) array(
'field_guid' => $guid,
);
$results = taxonomy_enhancer_get_term($searchterm);
if (!empty($results)) {
$term = array_pop($results);
}
else {
}
}
if (module_exists('rdf')) {
$lookups = rdf_normalize(rdf_query(NULL, 'owl:sameAs', $guid));
$local_term_paths = array_keys($lookups);
if (count($local_term_paths) > 1) {
watchdog('taxonomy_xml', "This is confusing, apparently there are more than one local match\n that are sameAs '%guid' .\n <pre>!lookups</pre>\n Possibly the same concept in a different vocabulary.\n I'm only going to deal with one of them (the one in the current vocab - if any).", array(
'%guid' => $guid,
'!lookups' => print_r(array_keys($lookups), 1),
), WATCHDOG_DEBUG);
}
$term_base_url = url('taxonomy/term/', array(
'absolute' => TRUE,
));
foreach ((array) $lookups as $subject => $predicate_array) {
foreach ($predicate_array as $predicate => $value_array) {
foreach ($value_array as $i => $found_value) {
if (strstr($subject, $term_base_url)) {
$tid = intval(drupal_substr($subject, drupal_strlen($term_base_url)));
if ($found_term = taxonomy_get_term($tid)) {
watchdog('taxonomy_xml', 'Found <a href="!term_link">an existing term %term_name</a>
in vocab %vid when looking for %guid', array(
'%guid' => $guid,
'%term_name' => $found_term->name,
'%vid' => $found_term->vid,
'!term_link' => url('taxonomy/term/' . $found_term->tid),
), WATCHDOG_DEBUG);
if ($vid && $found_term->vid == $vid) {
$term = $found_term;
}
}
}
}
}
}
}
return isset($term) ? $term : NULL;
}
function taxonomy_xml_label_from_uri($uri) {
$url_parts = @parse_url($uri);
if (!empty($url_parts['fragment'])) {
return $url_parts['fragment'];
}
else {
return basename($uri);
}
}
function taxonomy_xml_cached_get_contents($url, $flush = FALSE) {
global $_taxonomy_xml_current_doc;
$_taxonomy_xml_current_doc = $url;
$url_parts = @parse_url($url);
if ($url_parts['scheme'] != 'http' && $url_parts['scheme'] != 'https' && $url_parts['scheme'] != 'ftp') {
watchdog('taxonomy_xml', "Not retrieving remote file. !url is not an HTTP URL", array(
'!url' => l($url, $url),
WATCHDOG_WARNING,
));
return NULL;
}
@(list($url, $anchor) = split('#', $url));
static $old_url, $old_data;
if ($url == $old_url && !$flush) {
return $old_data;
}
$old_url = $url;
$cachedir = file_directory_path() . '/url_cache';
$save_as = $cachedir . '/' . md5($url);
if (file_exists($save_as)) {
$content = file_get_contents($save_as);
$old_data = $content;
if ($content) {
$flush = l("flush", TAXONOMY_XML_ADMIN . '_xml/flush/' . md5($url));
watchdog('taxonomy_xml', "Using locally cached copy !local_copy of !url !flush", array(
'!local_copy' => l(md5($url), $save_as),
'!url' => l($url, $url),
'!flush' => $flush,
), WATCHDOG_DEBUG);
return $content;
}
}
file_check_directory($cachedir, FILE_CREATE_DIRECTORY);
$opts = array(
'http' => array(
'method' => "GET",
'header' => "Accept: application/rdf+xml,*/* \r\n",
'user_agent' => "taxonomy_xml.module data import running from a Drupal CMS. [" . variable_get('site_name', '') . "]",
),
);
$context = stream_context_create($opts);
$content = file_get_contents($url, NULL, $context);
if (!empty($content)) {
file_put_contents($save_as, $content);
}
else {
watchdog('taxonomy_xml', 'Failed to retrieve valid content from URL <a href="!url">!url</a>', array(
'!url' => $url,
), WATCHDOG_ERROR);
}
$old_data = $content;
return $content;
}
function taxonomy_xml_flush_cache_file($hash) {
$cachedir = file_directory_path() . '/url_cache';
unlink($cachedir . '/' . $hash);
return "Deleted {$hash}";
}
function taxonomy_xml_flush_file_cache() {
$cachedir = file_directory_path() . '/url_cache';
$file_list = file_scan_directory($cachedir, '.*');
foreach ($file_list as $file_info) {
unlink($file_info->filename);
}
return "Deleted all files within {$cachedir}";
}
function taxonomy_xml_get_vocabulary_by_name($name) {
$vs = taxonomy_get_vocabularies();
foreach ($vs as $voc) {
if ($voc->name == $name) {
return $voc;
}
}
}
function taxonomy_xml_get_term_by_name_from_vocab($name, $vid) {
$matched_terms = taxonomy_get_term_by_name($name);
while (($term = array_pop($matched_terms)) && $term->vid != $vid) {
continue;
}
if (!$term) {
return NULL;
}
if ($parent_list = array_keys(taxonomy_get_parents($term->tid))) {
$term->parent = array_combine($parent_list, $parent_list);
}
if ($relation_list = array_keys(taxonomy_get_related($term->tid))) {
$term->relations = array_combine($relation_list, $relation_list);
}
$term->synonyms_array = taxonomy_get_synonyms($term->tid);
module_invoke_all('taxonomy_term_load', $term);
return $term;
}
function taxonomy_xml_rdf_taxonomy_term_path($term) {
return url('taxonomy/term/', array(
'absolute' => TRUE,
)) . (is_numeric($term) ? $term : (is_object($term) ? $term->tid : $term['tid']));
}
function taxonomy_xml_get_term($tid, $vid) {
if (!$vid) {
$term = taxonomy_get_term($tid);
$vid = $term->vid;
}
return taxonomy_get_tree($vid, $tid, -1, $max_depth = 1);
}
function taxonomy_xml_taxonomy_term_load($term) {
if ($parent_list = array_keys(taxonomy_get_parents($term->tid))) {
$term->parent = array_combine($parent_list, $parent_list);
}
if ($relation_list = array_keys(taxonomy_get_related($term->tid))) {
$term->relations = array_combine($relation_list, $relation_list);
}
$term->synonyms_array = taxonomy_get_synonyms($term->tid);
if ($guid = taxonomy_xml_get_term_guid($term)) {
$term->guid = $guid;
}
}
function taxonomy_xml_parse_lsid($id) {
$bits = split(":", $id);
if (count($bits) < 5) {
return NULL;
}
$lsid = array(
'urn' => $bits[0],
'schema' => $bits[1],
'authority' => $bits[2],
'namespace' => $bits[3],
'identifier' => $bits[4],
'version' => @$bits[5],
'type' => implode(':', array(
$bits[0],
$bits[1],
$bits[2],
$bits[3],
)),
);
if (count($bits) > 4 && $lsid['urn'] == 'urn' && $lsid['schema'] == 'lsid') {
return $lsid;
}
return NULL;
}
function taxonomy_xml_add_term_to_batch_queue($term = NULL) {
if ($term) {
$_SESSION['taxonomy_xml_batch_queue'][$term->guid] = array(
'taxonomy_xml_import_from_url',
array(
$term,
),
);
watchdog('taxonomy_xml', "Batch Queued %term for import later...", array(
'%term' => $term->guid,
), WATCHDOG_DEBUG);
if (count($_SESSION['taxonomy_xml_batch_queue']) >= TAXONOMY_XML_MAX_BATCH_SIZE) {
batch_set(taxonomy_xml_add_term_to_batch_queue());
}
}
else {
if (!empty($_SESSION['taxonomy_xml_batch_queue'])) {
$batch_settings = array(
'finished' => 'taxonomy_xml_batch_import_finished',
'title' => t('Processing all queued import requests.'),
'init_message' => t('Starting Batch Taxonomy Import.'),
'progress_message' => t('Processed @current out of @total. (May require further recursion)', array()),
'error_message' => t('Batch Taxonomy Import has encountered an error.'),
);
$batch_settings['operations'] = $_SESSION['taxonomy_xml_batch_queue'];
unset($_SESSION['taxonomy_xml_batch_queue']);
drupal_set_message(t("Retrieving the next batch queue. %operations_count operations in this batch . ", array(
'%operations_count' => count($batch_settings['operations']),
)));
$batch_settings['operations']['final'] = array(
'taxonomy_xml_batch_requeue_more',
array(),
);
return $batch_settings;
}
else {
return NULL;
}
}
}
function taxonomy_xml_import_from_url($term_placeholder, &$context) {
$text = taxonomy_xml_cached_get_contents($term_placeholder->guid);
if (empty($text)) {
drupal_set_message(__FUNCTION__ . ' ' . t('Retrieved no content from URL %url. Returning failure.', array(
'%url' => $term_placeholder->guid,
)), 'error');
return FALSE;
}
$format = variable_get('taxonomy_xml_format', 'rdf');
$vid = isset($term_placeholder->vid) ? $term_placeholder->vid : variable_get('taxonomy_xml_vid', 0);
module_load_include('inc', 'taxonomy_xml', $format . '_format');
taxonomy_xml_include_module_hooks();
$funcname = "taxonomy_xml_{$format}_parse";
if (function_exists($funcname)) {
$terms = $funcname($text, $vid, $term_placeholder->guid);
}
else {
watchdog('taxonomy_xml', 'Error loading expected parse function %funcname . This is pretty bad and wholly unexpeceted. The library %format_format must be broken?', array(
'%funcname' => $funcname,
'%format' => $format,
), WATCHDOG_ERROR);
}
$context['message'] = "Imported from " . $term_placeholder->guid;
if (!empty($terms[$term_placeholder->guid])) {
$this_term = $terms[$term_placeholder->guid];
$context['results'][$this_term->tid] = $this_term->name;
$context['message'] .= "<br/>Result: " . $this_term->name;
}
return $terms;
}
function taxonomy_xml_batch_requeue_more(&$context) {
if ($new_jobs = taxonomy_xml_add_term_to_batch_queue()) {
$context['message'] = t("Finished one round of imports, but the process found still more to do. Restarting to process a further %remaining_count items.", array(
'%remaining_count' => count($new_jobs['operations']),
));
batch_set($new_jobs);
watchdog('taxonomy_xml', $context['message']);
}
}
function taxonomy_xml_batch_import_finished($success, $results, $operations) {
if ($success) {
$message = t("Completed a batch round. %count items processed.", array(
'%count' => count($results),
));
foreach ($results as $tid => $term_name) {
$message .= " " . l($term_name, 'taxonomy/term/' . $tid);
}
}
else {
$error_operation = reset($operations);
$message = 'An error occurred while processing ' . $error_operation[0] . ' with arguments :' . print_r($error_operation[1], TRUE);
watchdog('taxonomy_xml', "Batch error " . print_r(array(
$success,
$results,
$operations,
), 1), array(), WATCHDOG_ERROR);
}
watchdog('taxonomy_xml', $message);
drupal_set_message($message);
}
function taxonomy_xml_lookup_services($type = NULL, $mode = 'full') {
module_load_include('inc', 'taxonomy_xml', 'lookup_services');
static $services;
if (empty($services)) {
$services = module_invoke_all('taxonomy_servers');
}
$requested = $services;
if ($type) {
foreach ($requested as $id => $service) {
if ($service['servicetype'] != $type) {
unset($requested[$id]);
}
}
}
if ($mode == 'options') {
$options = array();
foreach ($requested as $id => $service) {
$options[$id] = $service['provider'] . " - " . $service['name'];
}
return $options;
}
return $requested;
}
function taxonomy_xml_sub_placeholders_into_pattern($pattern, $values) {
$subs = array();
foreach ($values as $var => $val) {
$subs['!' . $var] = $val;
}
return strtr($pattern, $subs);
}
function taxonomy_xml_about_services() {
drupal_set_title(t('About Taxonomy Import Services'));
$services = taxonomy_xml_lookup_services();
$output = "";
foreach ($services as $service) {
@($output .= "<dt>{$service['provider']} - {$service['name']} ({$service['protocol']} {$service['servicetype']})</dt>");
@($output .= "<dd>{$service['description']} <br/> <em>{$service['format']}</em> <br/> " . l($service['about'], $service['about']) . "</dd>");
}
$output = "<dl>{$output}</dl>";
return $output;
}
function taxonomy_xml_invoke_service_request($service, $values) {
switch ($service['protocol']) {
case 'URI':
$values['format'] = $service['format'];
variable_set('taxonomy_xml_format', $values['format']);
$req = taxonomy_xml_sub_placeholders_into_pattern($service['pattern'], $values);
$batch_settings = array(
'title' => t('Invoking a request on taxonomy server %name.', array(
'%name' => $service['name'],
)),
'operations' => array(),
'finished' => 'taxonomy_xml_batch_import_finished',
);
$batch_settings['operations'][] = array(
'taxonomy_xml_cached_get_contents',
array(
$req,
),
);
$batch_settings['operations'][] = array(
'taxonomy_xml_invoke_import_on_url',
array(
$req,
$values,
),
);
$batch_settings['operations']['final'] = array(
'taxonomy_xml_batch_requeue_more',
array(),
);
batch_set($batch_settings);
break;
case 'FILE':
taxonomy_xml_invoke_import_on_filepath($service['filepath'], $values);
break;
default:
drupal_set_message(t("Taxonomy server protocol %protocol is not yet supported", array(
'%protocol' => $service['protocol'],
)), 'warning');
}
}
function taxonomy_xml_watchdog($log_message) {
if ($log_message['type'] == 'taxonomy_xml') {
static $watchdog_level;
if (empty($watchdog_level)) {
$watchdog_level = variable_get('taxonomy_xml_watchdog_level', WATCHDOG_NOTICE);
}
if ($log_message['severity'] > $watchdog_level) {
return;
}
$levels = taxonomy_xml_watchdog_levels();
drupal_set_message($levels[$log_message['severity']] . ": " . t($log_message['message'], $log_message['variables']));
}
}
function taxonomy_xml_taxonomy($op, $type, $item) {
if ($op == 'delete' && $type == 'vocabulary') {
$taxonomy_xml_imports = variable_get('taxonomy_xml_imports', array());
$feature_name = str_replace('features_', '', $item['module']);
unset($taxonomy_xml_imports[$feature_name]);
variable_set('taxonomy_xml_imports', $taxonomy_xml_imports);
}
}
function taxonomy_xml_watchdog_levels() {
return array(
WATCHDOG_DEBUG => 'debug',
WATCHDOG_INFO => 'info',
WATCHDOG_NOTICE => 'notice',
WATCHDOG_WARNING => 'warning',
WATCHDOG_ERROR => 'error',
);
}
function xmlentities($str) {
return preg_replace('/[^\\x00-\\x25\\x27-\\x7F]/e', '"&#".ord("$0").";"', $str);
}
function &taxonomy_xml_current_terms() {
static $terms;
if (!isset($terms)) {
$terms = array();
}
return $terms;
}
function taxonomy_xml_relationship_synonyms() {
static $synonyms;
if (!isset($synonyms)) {
$synonyms = array(
'Related Terms' => TAXONOMY_XML_RELATED,
'Related' => TAXONOMY_XML_RELATED,
'related' => TAXONOMY_XML_RELATED,
'RT' => TAXONOMY_XML_RELATED,
'seeAlso' => TAXONOMY_XML_RELATED,
'Broader Terms' => TAXONOMY_XML_PARENT,
'Broader' => TAXONOMY_XML_PARENT,
'broader' => TAXONOMY_XML_PARENT,
'Broad Term' => TAXONOMY_XML_PARENT,
'BT' => TAXONOMY_XML_PARENT,
'subClassOf' => TAXONOMY_XML_PARENT,
'SubClassOf' => TAXONOMY_XML_PARENT,
'ChildOf' => TAXONOMY_XML_PARENT,
'hypernym' => TAXONOMY_XML_PARENT,
'hyponymOf' => TAXONOMY_XML_PARENT,
'parent' => TAXONOMY_XML_PARENT,
'is child taxon of' => TAXONOMY_XML_PARENT,
'biology.organism_classification.higher_classification' => TAXONOMY_XML_PARENT,
'music.genre.parent_genre' => TAXONOMY_XML_PARENT,
'media_common.media_genre.parent_genre' => TAXONOMY_XML_PARENT,
'broaderTransitive' => TAXONOMY_XML_PARENT,
'Narrower Terms' => TAXONOMY_XML_CHILD,
'Narrower' => TAXONOMY_XML_CHILD,
'Narrow Term' => TAXONOMY_XML_CHILD,
'narrower' => TAXONOMY_XML_CHILD,
'NT' => TAXONOMY_XML_CHILD,
'superClassOf' => TAXONOMY_XML_CHILD,
'ParentOf' => TAXONOMY_XML_CHILD,
'hasChild' => TAXONOMY_XML_CHILD,
'hasCAVConcept' => TAXONOMY_XML_CHILD,
'hyponym' => TAXONOMY_XML_CHILD,
'hyponymOf' => TAXONOMY_XML_CHILD,
'is parent taxon of' => TAXONOMY_XML_CHILD,
'biology.organism_classification.lower_classifications' => TAXONOMY_XML_CHILD,
'music.genre.subgenre' => TAXONOMY_XML_CHILD,
'Description' => TAXONOMY_XML_DESCRIPTION,
'description' => TAXONOMY_XML_DESCRIPTION,
'definition' => TAXONOMY_XML_DESCRIPTION,
'Definition' => TAXONOMY_XML_DESCRIPTION,
'comment' => TAXONOMY_XML_DESCRIPTION,
'gloss' => TAXONOMY_XML_DESCRIPTION,
'Scope Note' => TAXONOMY_XML_DESCRIPTION,
'scopeNote' => TAXONOMY_XML_DESCRIPTION,
'note' => TAXONOMY_XML_DESCRIPTION,
'SN' => TAXONOMY_XML_DESCRIPTION,
'Used for' => TAXONOMY_XML_HAS_SYNONYM,
'UF' => TAXONOMY_XML_HAS_SYNONYM,
'AKA' => TAXONOMY_XML_HAS_SYNONYM,
'synonym' => TAXONOMY_XML_HAS_SYNONYM,
'altLabel' => TAXONOMY_XML_HAS_SYNONYM,
'equivalentClass' => TAXONOMY_XML_HAS_SYNONYM,
'has synonym' => TAXONOMY_XML_HAS_SYNONYM,
'has vernacular' => TAXONOMY_XML_HAS_SYNONYM,
'common.topic.alias' => TAXONOMY_XML_HAS_SYNONYM,
'biology.organism_classification.scientific_name',
TAXONOMY_XML_HAS_SYNONYM,
'See' => TAXONOMY_XML_SYNONYM_OF,
'USE' => TAXONOMY_XML_SYNONYM_OF,
'Use' => TAXONOMY_XML_SYNONYM_OF,
'Preferred Term' => TAXONOMY_XML_SYNONYM_OF,
'PT' => TAXONOMY_XML_SYNONYM_OF,
'related' => TAXONOMY_XML_RELATED,
'seeAlso' => TAXONOMY_XML_RELATED,
'See Also' => TAXONOMY_XML_RELATED,
'memberMeronymOf' => TAXONOMY_XML_RELATED,
'similarTo' => TAXONOMY_XML_RELATED,
'RT' => TAXONOMY_XML_RELATED,
'Related Term' => TAXONOMY_XML_RELATED,
'Part of' => TAXONOMY_XML_IN_VOCABULARY,
'belongs-to-facet' => TAXONOMY_XML_IN_VOCABULARY,
'isDefinedBy' => TAXONOMY_XML_IN_VOCABULARY,
'inScheme' => TAXONOMY_XML_IN_VOCABULARY,
'name' => TAXONOMY_XML_NAME,
'title' => TAXONOMY_XML_NAME,
'lexicalForm' => TAXONOMY_XML_NAME,
'label' => TAXONOMY_XML_NAME,
'scientific name' => TAXONOMY_XML_NAME,
'Scientific Name' => TAXONOMY_XML_NAME,
'prefLabel' => TAXONOMY_XML_NAME,
'type.object.name' => TAXONOMY_XML_NAME,
'subPropertyOf' => TAXONOMY_XML_UNUSED,
'hasDescriptor' => TAXONOMY_XML_UNUSED,
'subjectIndicator' => TAXONOMY_XML_UNUSED,
'type.object.key' => TAXONOMY_XML_UNUSED,
'license' => TAXONOMY_XML_UNUSED,
'attributionName' => TAXONOMY_XML_UNUSED,
'attributionURL' => TAXONOMY_XML_UNUSED,
'example' => TAXONOMY_XML_UNUSED,
'created' => TAXONOMY_XML_UNUSED,
'modified' => TAXONOMY_XML_UNUSED,
'source' => TAXONOMY_XML_UNUSED,
'editorialNote' => TAXONOMY_XML_UNUSED,
'closeMatch' => TAXONOMY_XML_UNUSED,
'music.genre.albums' => TAXONOMY_XML_UNUSED,
'music.genre.artists' => TAXONOMY_XML_UNUSED,
'common.topic.webpage' => TAXONOMY_XML_UNUSED,
'common.topic.article' => TAXONOMY_XML_UNUSED,
'location.location.geolocation' => TAXONOMY_XML_UNUSED,
'type.type.expected_by' => TAXONOMY_XML_UNUSED,
'common.topic.image' => TAXONOMY_XML_UNUSED,
'status' => TAXONOMY_XML_UNUSED,
'sameAs' => TAXONOMY_XML_OTHER_PREDICATE,
'notation' => TAXONOMY_XML_OTHER_PREDICATE,
'identifier' => TAXONOMY_XML_OTHER_PREDICATE,
'type.type.instance' => TAXONOMY_XML_OTHER_PREDICATE,
);
}
$synonyms['location.location.contains'] = TAXONOMY_XML_CHILD;
$synonyms['location.location.containedby'] = TAXONOMY_XML_PARENT;
return $synonyms;
}
function taxonomy_xml_features_api() {
return array(
'taxonomy_xml_source' => array(
'name' => t('Taxonomy Import (taxonomy_xml)'),
'default_hook' => 'taxonomy_xml_source_default_items',
'feature_source' => TRUE,
'default_file' => FEATURES_DEFAULTS_CUSTOM,
'default_filename' => 'features.taxonomy',
'file' => drupal_get_path('module', 'taxonomy_xml') . '/taxonomy_xml.features.inc',
'module' => 'taxonomy_xml',
'feature_source' => TRUE,
),
);
}
function taxonomy_xml_rdf_mapping() {
$RDF_DEFAULT_BUNDLE = '';
return array(
array(
'type' => 'taxonomy_term',
'bundle' => $RDF_DEFAULT_BUNDLE,
'mapping' => array(
'rdftype' => array(
'skos:Concept',
),
'name' => array(
'predicates' => array(
'rdfs:label',
'skos:prefLabel',
),
),
'description' => array(
'predicates' => array(
'skos:definition',
),
),
'vid' => array(
'predicates' => array(
'skos:inScheme',
),
'type' => 'rel',
'callback' => 'taxonomy_xml_taxonomy_vocabulary_uri',
),
'parent' => array(
'predicates' => array(
'skos:broader',
),
'type' => 'rel',
'callback' => 'taxonomy_xml_taxonomy_term_uri',
),
'child' => array(
'predicates' => array(
'skos:narrower',
),
'type' => 'rel',
'callback' => 'taxonomy_xml_taxonomy_term_uri',
),
'synonyms_array' => array(
'predicates' => array(
'skos:altLabel',
),
),
'related' => array(
'predicates' => array(
'skos:related',
),
'type' => 'rel',
'callback' => 'taxonomy_xml_taxonomy_term_uri',
),
),
),
array(
'type' => 'taxonomy_vocabulary',
'bundle' => $RDF_DEFAULT_BUNDLE,
'mapping' => array(
'rdftype' => array(
'skos:ConceptScheme',
),
'name' => array(
'predicates' => array(
'dc:title',
),
),
'description' => array(
'predicates' => array(
'rdfs:comment',
),
),
),
),
);
}