xml_format.inc in Taxonomy import/export via XML 7
Include routines for the original XML parsing and taxonomy/term creation.
@TODO this format is largely deprecated as it's not at all portable, possibly not even between Drupal versions. As such, some features may be missing or untested.
File
formats/xml_format.incView source
<?php
/* double-commented to avoid conflict with svn
*/
/**
* @file
* Include routines for the original XML parsing and taxonomy/term creation.
*
* @TODO this format is largely deprecated as it's not at all portable, possibly
* not even between Drupal versions.
* As such, some features may be missing or untested.
*/
/**
* sub-hook
* @see taxonomy_xml_HOOK_format_info()
*
* Returns info about this syntax
*/
function taxonomy_xml_xml_format_info() {
return array(
'description' => "This format is Drupal-only. It closely matches the internal data structure, but is not portable outside of Drupal without work.",
);
}
/**
* Return an XML representation of a taxonomy.
*
* @param $vid
* Which vocabulary to generate the tree for.
*
* @param $parent
* The term ID under which to generate the tree. If 0, generate the tree
* for the entire vocabulary.
*
* @param $depth
* Internal use only.
*
* @param $max_depth
* The number of levels of the tree to return. Leave NULL to return all levels.
*
* @return
* The text of an XML document.
*/
function taxonomy_xml_xml_create($vid, $parent = 0, $depth = -1, $max_depth = NULL) {
$output = "<?xml version=\"1.0\" standalone=\"no\"?>\n";
$output .= "<!DOCTYPE taxonomy SYSTEM \"taxonomy.dtd\">\n";
$tree = taxonomy_get_tree($vid, $parent, $max_depth, $depth);
if ($tree) {
$vocabulary = taxonomy_vocabulary_load($vid);
$output .= "<vocabulary>\n";
// dump all entity properties as named XML tags
foreach ($vocabulary as $key => $value) {
if (is_array($value)) {
#$output .= "<$key>" . check_plain(implode(',', $value)) . "</$key>";
}
else {
$output .= "<{$key}>" . check_plain($value) . "</{$key}>";
}
}
foreach ($tree as $term) {
#$synonyms = taxonomy_get_synonyms($term->tid);
$output .= "<term>";
foreach ($term as $key => $value) {
if ($key == 'parents') {
foreach ($value as $parent) {
$output .= "<parent>" . check_plain($parent) . "</parent>";
}
}
else {
$output .= "<{$key}>" . check_plain($value) . "</{$key}>";
}
}
if (!empty($synonyms)) {
$output .= "<synonyms>";
$output .= implode("\n", $synonyms);
$output .= "</synonyms>";
}
$output .= "</term>";
}
$output .= "</vocabulary>\n";
}
return $output;
}
/**
* Call-back function used by the XML parser.
*/
function taxonomy_xml_element_start($parser, $name, $attributes) {
global $_tx_term, $_tx_element, $_tx_tag;
switch ($name) {
case 'vocabulary':
$_tx_element = $name;
break;
case 'term':
$_tx_element = $name;
$_tx_term += 1;
}
$_tx_tag = $name;
}
/**
* Call-back function used by the XML parser.
*/
function taxonomy_xml_element_end($parser, $name) {
global $_tx_element;
switch ($name) {
case 'vocabulary':
case 'term':
$_tx_element = '';
}
}
/**
* Call-back function used by the XML parser.
*/
function taxonomy_xml_element_data($parser, $data) {
global $_tx_vocabulary, $_tx_element, $_tx_terms, $_tx_term, $_tx_tag;
switch ($_tx_element) {
case 'term':
if ($_tx_tag == 'parent') {
if (trim($data)) {
$_tx_terms[$_tx_term][$_tx_tag][] = $data;
}
}
else {
@($_tx_terms[$_tx_term][$_tx_tag] .= trim($data));
}
break;
default:
$_tx_vocabulary[$_tx_tag] = isset($_tx_vocabulary[$_tx_tag]) ? $_tx_vocabulary[$_tx_tag] .= trim($data) : trim($data);
}
}
/**
* Initiate the parser on the custom XML schema.
*
* This uses the XML callback parser with tag callbacks.
*
* @param $data XML string
*
* @param $vid will either be an existing vid that the data is to be merged into
* (only the data, not the vocab definition) or
* TAXONOMY_XML_DETERMINED_BY_SOURCE_FILE - which means the vid is to be
* determined from the source file. If so it may become modified by reference.
*
* @return an array of new/modified term objects.
*
* @see taxonomy_xml_element_start()
* @see taxonomy_xml_element_end()
* @see taxonomy_xml_element_data()
*
*
* @todo ONLY EXPERIMENTALLY UPGRADED to D7 - no testing!
*/
function taxonomy_xml_xml_parse(&$data, &$vid = 0) {
global $_tx_terms, $_tx_vocabulary;
// Unset the global variables before we use them:
unset($GLOBALS['_tx_element'], $GLOBALS['_tx_term'], $GLOBALS['_tx_tag']);
$_tx_terms = array();
$_tx_vocabulary = array();
////////////////////////
// Parse the data:
//
$xml_parser = drupal_xml_parser_create($data);
xml_parser_set_option($xml_parser, XML_OPTION_CASE_FOLDING, FALSE);
//
xml_set_element_handler($xml_parser, 'taxonomy_xml_element_start', 'taxonomy_xml_element_end');
xml_set_character_data_handler($xml_parser, 'taxonomy_xml_element_data');
if (!xml_parse($xml_parser, $data, 1)) {
watchdog('taxonomy_xml', 'Failed to parse XML file: %error at line %line.', array(
'%error' => xml_error_string(xml_get_error_code($xml_parser)),
'%line' => xml_get_current_line_number($xml_parser),
), WATCHDOG_ERROR);
drupal_set_message(t('Failed to parse file: %error at line %line.', array(
'%error' => xml_error_string(xml_get_error_code($xml_parser)),
'%line' => xml_get_current_line_number($xml_parser),
)), 'error');
}
xml_parser_free($xml_parser);
////////////////////////
// Define the vocabulary
//
// If an existing vocabulary has been chosen or has the same name as the vocabulary being added,
// terms should be added to the existing vocabulary. Otherwise a new vocabulary should be created.
if ($vid == TAXONOMY_XML_DETERMINED_BY_SOURCE_FILE) {
// Asked to define vocabulary details from file data.
$vid = taxonomy_xml_setup_vocabulary_from_data($_tx_vocabulary);
}
else {
// We have an existing, valid vid. Load the existing vocab, and go from there
}
// If I can't load from the vid, something previous has gone wrong
// and everything else will fail (and add orphaned terms), so abort.
$vocabulary = (array) taxonomy_vocabulary_load($vid);
if (empty($vocabulary['vid'])) {
drupal_set_message(t("Failed to initialize vocabulary to put terms into. Vocabulary:%vid is an invalid ID. Aborting import.", array(
'%vid' => $vid,
)), 'error');
return FALSE;
}
////////////////////////
// Start loading terms
//
// These collections are just for auditing:
$modified_terms = array();
// names
$new_terms = array();
// objects
$skipped_terms = array();
// names
// $new_tid is a list mapping from old IDs (defined in the file)
// to new IDs (because the database needs new numbers when creating things
// - to avoid conflicts and overrwrites)
$new_tid = array();
// Get the maximum depth of terms
$term_depth = array();
foreach ($_tx_terms as $term) {
$term_depth[] = $term['depth'];
}
// Import terms in order of depth, no matter what order in the file
// This is because we require parents to exist before children.
for ($i = 0; $i <= max($term_depth); $i++) {
// Loop breadth-first.
foreach ($_tx_terms as $term_data) {
if ($term_data['depth'] != $i) {
continue;
}
$term_data['vid'] = $vocabulary['vid'];
$term_data['old_tid'] = $term_data['tid'];
unset($term_data['tid']);
// Calculate parent link-ups
if (isset($term_data['parent'])) {
foreach ((array) $term_data['parent'] as $key => $value) {
if ($value) {
if (!isset($new_tid[$value])) {
drupal_set_message("Input claims that {$term_data['name']} has a parent {$value} but that term doesn't exist (yet). tricky.");
}
else {
$term_data['parent'][$key] = $new_tid[$value];
}
}
}
}
$term_exists = FALSE;
if (!variable_get('taxonomy_xml_duplicate', 0)) {
// Retrieve pre-existing terms - by name - if possible
$existing_terms = taxonomy_get_term_by_name($term_data['name']);
if (count($existing_terms) > 0) {
foreach ($existing_terms as $existing_term) {
if ($existing_term->vid == $term_data['vid']) {
$term_exists = TRUE;
// Map the term tid from the imported XML file to the tid in term_data database table
$new_tid[$term_data['old_tid']] = $existing_term->tid;
$modified_terms[$existing_term->tid] = $existing_term;
$skipped_terms[$existing_term->tid] = $existing_term->name;
}
}
}
}
// If the term doesn't already exist in this vocabulary, add it.
if (!$term_exists) {
$term = (object) $term_data;
taxonomy_term_save($term);
// Map the term tid from the imported XML file to the tid in term_data database table
$new_tid[$term_data['old_tid']] = $term->tid;
$new_terms[$term->tid] = $term->name;
$modified_terms[$term->tid] = taxonomy_term_load($term->tid);
}
}
}
////////////////////////
// Summarize results
$output = t('Vocabulary %name: ', array(
'%name' => $vocabulary['name'],
)) . '<br/>';
if ($new_terms) {
$output .= t('<b>Added</b> term(s) %terms. ', array(
'%terms' => implode(', ', $new_terms),
));
}
else {
$output .= t('No terms added. ');
}
if (!empty($skipped_terms)) {
$output .= "<br/>" . t('<b>Ignored</b> duplicate term(s) %terms. ', array(
'%terms' => implode(', ', $skipped_terms),
));
}
drupal_set_message($output);
return $modified_terms;
}
/**
* The setting is to initialize a vocabulary from the given settings (an array
* of attributes).
*
* Depending on the state of pre-existing taxonomies, this will either create a
* new vocab, or update an existing one.
*
* @param $edit
* array of vocabulary settings parsed from the data.
*
* @return the appropriate vocabulary id
*/
function taxonomy_xml_setup_vocabulary_from_data($edit) {
// Massage/Parse vocabulary node types from csv value (old syntax) into the array we need
if (!empty($edit['nodes']) && is_string($edit['nodes'])) {
$node_types = explode(',', $edit['nodes']);
if (!empty($node_types)) {
$edit['nodes'] = array_combine($node_types, $node_types);
}
}
// Potential conflict with vids?
// See if the data does in fact define a vid
if (isset($edit['vid'])) {
$placeholder_vocabulary = (array) taxonomy_vocabulary_load($edit['vid']);
// Does this vocab already exist?
// Note that a failure to load a vocab does not return NULL, it returns an array with an actual empty item. Annoying
if (empty($placeholder_vocabulary['vid'])) {
drupal_set_message(t("\n Declared vocab ID:%data_vid does NOT yet exist.\n Need to make a new one, (cannot retain the internal IDs).", array(
'%data_vid' => $edit['vid'],
)));
// Drupal database API will not allow me to override the VID when creating a new vocab.
// Fair enough I guess. Make a brand new one.
$placeholder_vocabulary = (array) _taxonomy_xml_get_vocabulary_placeholder($edit['name']);
$vid = $placeholder_vocabulary['vid'];
// Use other attributes defined in the source file (all except the vid)
unset($edit['vid']);
$placeholder_vocabulary = array_merge($placeholder_vocabulary, $edit);
drupal_set_message(t("Applying imported settings onto vocabulary:%vid.", array(
'%vid' => $vid,
)));
}
else {
// TODO valid vocab found, but is it REALLY the same?
drupal_set_message(t("\n Declared vocab ID:%data_vid DOES already exist.\n Terms will be imported there, but <a href='!settings_url'>existing vocabulary settings</a>\n will not be modified.", array(
'%data_vid' => $edit['vid'],
'!settings_url' => url('admin/content/taxonomy/edit/vocabulary/' . $edit['vid']),
)));
// TODO sanity check to avoid inadvertant overwrites.
// No change
$vid = $placeholder_vocabulary['vid'];
}
}
else {
// Input did not define a vid. Make a new vocab with these new values
// Should I try to match on *name* to find an earlier version?
$placeholder_vocabulary = (array) _taxonomy_xml_get_vocabulary_placeholder($edit['name']);
$vid = $placeholder_vocabulary['vid'];
// Use other attributes defined in the source file (all except the vid)
unset($edit['vid']);
$placeholder_vocabulary = array_merge($placeholder_vocabulary, $edit);
drupal_set_message(t("Applying imported settings onto vocabulary:%vid.", array(
'%vid' => $vid,
)));
}
// Ensure the merged attributes are up to date.
taxonomy_vocabulary_save($vocabulary);
return $placeholder_vocabulary['vid'];
}
Functions
Name![]() |
Description |
---|---|
taxonomy_xml_element_data | Call-back function used by the XML parser. |
taxonomy_xml_element_end | Call-back function used by the XML parser. |
taxonomy_xml_element_start | Call-back function used by the XML parser. |
taxonomy_xml_setup_vocabulary_from_data | The setting is to initialize a vocabulary from the given settings (an array of attributes). |
taxonomy_xml_xml_create | Return an XML representation of a taxonomy. |
taxonomy_xml_xml_format_info | sub-hook |
taxonomy_xml_xml_parse | Initiate the parser on the custom XML schema. |