csv_format.inc in Taxonomy import/export via XML 7
Include routines for CSV parsing and taxonomy/term creation.
File
formats/csv_format.incView source
<?php
/* double-commented to avoid conflict with svn
*/
/**
* @file
* Include routines for CSV parsing and taxonomy/term creation.
*/
/**
* Scan the input CSV file and create a taxonomy structure out of it.
*
* See the sample files for the expected format of the CSV
*
*
* This scan process takes many rows of discrete 'statements' and combines them
* into one interleaved description of many dependant terms. It does this in
* three passes,
* - The first to collect and enumerate the terms being used.
* - The second to retrieve or create the terms.
* - The third to link the dependancies together.
*
* The wording used in the source CSV may vary depending on your sources, add
* extra terminology to the provided taxonomy_xml_relationship_synonyms()
* function to adapt other words.
*/
function taxonomy_xml_csv_parse(&$data, $vid) {
$output = '';
// Unset the global variables before we use them:
unset($GLOBALS['element'], $GLOBALS['term'], $GLOBALS['tag']);
$terms = array();
$new_terms = array();
$skipped_terms = array();
$vocabulary = array();
if ($vid) {
$vocabulary = taxonomy_vocabulary_load($vid);
}
else {
drupal_set_message(t('No vocab to import into. Either make one or choose one.'));
return;
}
$inverses = array(
TAXONOMY_XML_PARENT => TAXONOMY_XML_CHILD,
TAXONOMY_XML_RELATED => TAXONOMY_XML_RELATED,
);
$inverses = array_merge($inverses, array_flip($inverses));
$rows = explode("\n", $data);
drupal_set_message(t('%rowcount rows of data', array(
'%rowcount' => count($rows),
)));
// PHASE 1
//
// Enumerate all terms and their properties
// This goes through all the input and sets up an array of placeholders for
// the terms, before actually creating any.
$predicate_synonyms = taxonomy_xml_relationship_synonyms();
foreach ($rows as $row) {
$triple = csv_string_to_array($row);
if (count($triple) == 1) {
// Assume it's just a simple list : "I am"
$triple = array(
$triple[0],
TAXONOMY_XML_NAME,
$triple[0],
);
}
else {
if (count($triple) < 3) {
# drupal_set_message("This line containes no triple: '$row'", 'error');
continue;
}
}
$subject = trim($triple[0], '"');
$predicate = $original_predicate = trim($triple[1], '"');
$object = trim($triple[2], "\n\r\"");
if (!$subject) {
continue;
}
// Translate terminology synonyms to the real predicate, because the source data can be inconsistant
if (isset($predicate_synonyms[$predicate])) {
$predicate = $predicate_synonyms[$predicate];
}
unset($term);
// As we are dealing with handles, be careful to avoid inadvertant re-use
unset($other_term);
$term = isset($terms[$subject]) ? $terms[$subject] : NULL;
if (!$term) {
// Start by looking for it
$term = _taxonomy_xml_get_term_placeholder($subject, $vid);
$terms[$subject] =& $term;
// Created term placeholder, or have a handle on it
}
// Set its property as an array value. Allow duplicates, we will filter later
if (!isset($term->predicates[$predicate]) || !is_array($term->predicates[$predicate])) {
$term->predicates[$predicate] = array();
}
$term->predicates[$predicate][] = $object;
// Also set up reciprocal links with the opposite term.
// We use reciprocals because we allow either broader or narrower terms, but don't require both.
if (isset($inverses[$predicate])) {
$inverse = $inverses[$predicate];
// Ensure the other word exists. fetch it or make a placeholder
$other_term = isset($terms[$object]) ? $terms[$object] : NULL;
if (!$other_term) {
$other_term = _taxonomy_xml_get_term_placeholder($object, $vid);
$terms[$object] =& $other_term;
}
// Set the inverse property on it, referring back to the current subject.
if (!isset($other_term->predicates[$inverse]) || !is_array($other_term->predicates[$inverse])) {
$other_term->predicates[$inverse] = array();
}
$other_term->predicates[$inverse][] = $subject;
}
else {
// This predicate has no inverse, it's not a relationship, it's flat data
switch ($predicate) {
case TAXONOMY_XML_NAME:
$term->name = $object;
break;
case TAXONOMY_XML_DESCRIPTION:
// Multiple descriptions roll up into one big string.
$term->description = $term->description ? $term->description . "\n" . $object : $object;
break;
case TAXONOMY_XML_HAS_SYNONYM:
// This strong term also uses the weak one as a synonym
$term->synonyms_array[] = $object;
// Synonyms are just extra text labels
break;
case TAXONOMY_XML_SYNONYM_OF:
// This weak term is just another word for the referred to one.
// It's not really a full term. Do nothing now, tag the strong term later. It may not exist yet.
break;
default:
drupal_set_message("Not quite sure what '{$original_predicate}' ('{$predicate}') in '{$row}' means. You may add this term to the translation array in the module code to make it become useful.");
}
}
$terms[$subject] =& $term;
}
unset($term);
drupal_set_message(t("Processing statements about %count terms", array(
'%count' => count($terms),
)));
# dpm(array('terms from data' => $terms));
// Note the $terms array is all handles, not copies. Changes to them happen everywhere.
// PHASE 2
// Ordered all the input, go through and actually add terms to Drupal (if needed)
//
// Ensure a definition exists for them, Make one if needed, retrieve the id
//
foreach ($terms as $name => $term) {
drupal_set_message(t("Processing term %name (%termname) %tid", array(
'%name' => $name,
'%termname' => $term->name,
'%tid' => isset($term->tid) ? $term->tid : 'new',
)));
if (!is_object($term)) {
drupal_set_message("Having difficulty analyzing term info '{$name}':" . print_r($term, 1), 'error');
// Bad data got this far. Ignore.
continue;
}
$term->vid = $vocabulary->vid;
// If the first pass was indexed on identifier, not name, we would not have retrieved it. Try again.
if ($loaded_term = taxonomy_xml_get_term_by_name_from_vocab($term->name, $vid)) {
// Found one by name this time, merge data with it and keep a handle on it
foreach ($term as $att => $val) {
$loaded_term->{$att} = $val;
}
$term = $loaded_term;
$terms[$name] = $term;
}
if (empty($term->tid)) {
if (count($term->predicates) == 1 && isset($term->predicates[TAXONOMY_XML_SYNONYM_OF])) {
// If a term was only listed to be a synonym, don't really make it.
drupal_set_message(t("The term %name is just a synonym for %strong_term - not a true term.", array(
'%name' => $term->name,
'%strong_term' => print_r($term->predicates[TAXONOMY_XML_SYNONYM_OF], 1),
)));
// Ensure the stronger term knows ...
foreach ($term->predicates[TAXONOMY_XML_SYNONYM_OF] as $strong_term) {
$terms[$strong_term]->synonyms_array[] = $term->name;
}
// And now it's attached to its stronger term, we can forget it.
unset($terms[$name]);
}
else {
// Make new term!
#drupal_set_message(t("Did not find an existing entry for %termname - making a new one ", array('%termname' => $term->name) ));
$term->synonyms = join("\n", array_unique((array) $term->synonyms_array));
taxonomy_term_save($term);
// Even though $term was created and possibly modified by reference, it SHOULD still retain all the raw data we had it hold.
// @todo unit test this
$new_terms[] = $term->name;
}
}
else {
// Term already existed. Just make a note.
$skipped_terms[] = $term->name;
}
}
#dpm($terms);
drupal_set_message(t('Created all %count needed terms, now linking them together.', array(
'%count' => count($terms),
)));
// PHASE 3
// Third time through, set the related terms and structure,
// and save again
taxonomy_xml_set_term_relations($terms);
if ($new_terms) {
drupal_set_message(t('Added term(s)') . ' <i>' . implode(', ', $new_terms) . '.</i> ');
}
else {
drupal_set_message(t('No new terms added.'));
}
if ($skipped_terms) {
drupal_set_message(t('Did not need to re-create %skipped_count duplicate/existing term(s)', array(
'%skipped_count' => count($skipped_terms),
)));
}
return $terms;
}
/**
* Given a CSV string that may or may not contain quoted values,
* Split it into an array of values.
*/
function csv_string_to_array($str) {
if (drupal_substr($str, 0, 1) == '#' || drupal_substr($str, 0, 1) == ';') {
return;
}
$expr = "/,(?=(?:[^\"]*\"[^\"]*\")*(?![^\"]*\"))/";
$results = preg_split($expr, $str);
$results = array_map('trim', $results);
return preg_replace("/^\"(.*)\"\$/", "\$1", $results);
}
Functions
Name![]() |
Description |
---|---|
csv_string_to_array | Given a CSV string that may or may not contain quoted values, Split it into an array of values. |
taxonomy_xml_csv_parse | Scan the input CSV file and create a taxonomy structure out of it. |