You are here

csvancestry_format.inc in Taxonomy import/export via XML 6.2

File

csvancestry_format.inc
View source
<?php

/**
 * @file
 *   Include    routines for CSV parsing and taxonomy/term creation.
 *
 * This is an alternate CSV format, taking each term on a new line with its
 * ancestors in previous columns.
 *
 * drinks
 * drinks, beer
 * drinks, wine
 * drinks, wine, red wine
 * drinks, wine, white wine
 * drinks, milk
 *
 * ...etc, It's very limited (and wordy), but also about as obvious as possible.
 *
 * This format was used by google base for its merchant product taxonomy.
 *
 * Much of the code here is just reporting. The meat could be done in less than
 * a page.
 *
 * @author Dan Morrison (dman) http://coders.co.nz/
 */
module_load_include('inc', 'taxonomy_xml', 'csv_format');

/**
 * Return information about this format
 *
 * Implimentation of (internal) taxonomy_xml_HOOK_format_info()
 */
function taxonomy_xml_csvancestry_format_info() {
  $formats = array(
    'csvancestry' => array(
      'id' => 'csvancestry',
      'name' => 'CSV, ancestry on one line',
      'module' => 'taxonomy_xml',
      'parser_callback' => 'taxonomy_xml_csvancestry_parse',
    ),
  );
  return $formats;
}

/**
 * Scan the input CSV file and create a taxonomy structure out of it.
 *
 * See the sample files for the expected format of the CSV
 *
 */
function taxonomy_xml_csvancestry_parse(&$data, $vid) {
  $output = '';

  // Unset the global variables before we use them:
  unset($GLOBALS['$_taxonomy_xml_terms']);
  $terms = array();
  $new_terms = array();
  $skipped_terms = array();
  $vocabulary = array();
  if (!$vid) {
    drupal_set_message(t('No vocab to import into. Either make one or choose one.'));
    return;
  }
  $vocabulary = taxonomy_vocabulary_load($vid);
  module_invoke_all('taxonomy_xml_vocabulary_presave', $vocabulary);
  $rows = explode("\n", $data);

  #drupal_set_message(t('%rowcount rows of data', array('%rowcount' => count($rows))));

  // Unlike all other formats, each line and term is complete.
  // It's required that parents are created before children.
  // We don't need to mess around with parsing, just create the term.
  $terms = array();

  // Prepare a batch config
  $batch_settings = array(
    'title' => t('Processing all queued import requests.'),
    'file' => drupal_get_path('module', 'taxonomy_xml') . '/csvancestry_format.inc',
    'operations' => array(),
    'finished' => 'cvsancestry_import_finished',
  );
  foreach ($rows as $row) {
    $row_data = csv_string_to_array($row);

    // Queue the import of this line
    $batch_settings['operations'][] = array(
      'cvsancestry_import_row',
      array(
        $vid,
        $row_data,
      ),
    );
  }
  batch_set($batch_settings);
  module_invoke_all('taxonomy_xml_vocabulary_postsave', $vocabulary);
  drupal_set_message(t('Queued %rowcount rows of data', array(
    '%rowcount' => count($rows),
  )));
  return "OK, processing is being done in batch...";
}

/**
 * Summarize the results.
 */
function cvsancestry_import_finished($success, $results, $operations) {
  $results = array_merge(array(
    'new_terms' => array(),
    'old_terms' => array(),
  ), $results);
  if ($success) {
    $message = t('%new new and %old old terms processed', array(
      '%new' => count($results['new_terms']),
      '%old' => count($results['old_terms']),
    ));
  }
  drupal_set_message($message);
}
function cvsancestry_import_row($vid, $row_data, &$context) {

  // $_taxonomy_xml_terms is semi-persistant.
  // We expect to be called atomically, but if the term has been cached recently,
  // keep a note of it in the meantime.
  global $_taxonomy_xml_terms;
  while (!empty($row_data) && empty($term_name)) {
    $term_name = array_pop($row_data);
  }
  if (empty($term_name)) {

    // blank line;
    return;
  }
  $term = isset($_taxonomy_xml_terms[$term_name]) ? $_taxonomy_xml_terms[$term_name] : NULL;
  if (!$term) {

    // Start by looking for it
    $term = _taxonomy_xml_get_term_placeholder($term_name, $vid);
    if (empty($term->tid)) {
      $context['results']['new_terms'][] = $term_name;
    }
    else {
      $context['results']['old_terms'][] = $term_name;
    }
    $_taxonomy_xml_terms[$term_name] = $term;
  }

  // Attach parent, if it exists
  $parent_name = array_pop($row_data);
  if (!empty($parent_name)) {
    csvancestry_set_parent($term, $parent_name);
  }

  // And save it.
  $term_data = (array) $term;
  taxonomy_save_term($term_data);

  // Re-retrieve it, so we know the tid.
  $retrieved_term = taxonomy_xml_get_term_by_name_from_vocab($term_name, $vid);
  $_taxonomy_xml_terms[$term_name] = $term;
}

/**
 * Set the parent of this term.
 * $term must be a valid term. parent term may be invented on the fly.
 */
function csvancestry_set_parent($term, $parent_name) {
  if ($parent_name == $term->name) {
    drupal_set_message(t("Not setting %name as a child of itself", array(
      '%name' => $term->name,
    )));
    continue;
  }
  $parent_term = _taxonomy_xml_get_term_placeholder($parent_name, $term->vid);
  if (empty($parent_term->tid)) {
    $parent_data = (array) $parent_term;
    taxonomy_save_term($parent_data);

    // Retrieve the term object to get hold of the tid if needed
    $parent_term = taxonomy_xml_get_term_by_name_from_vocab($parent_name, $term->vid);
  }
  if ($parent_term && isset($parent_term->tid)) {
    drupal_set_message(t("!name # %tid is a child of !parent # %ptid ", array(
      '!name' => $term->name,
      '%tid' => $term->tid,
      '!parent' => l($parent_term->name, 'admin/content/taxonomy/edit/term/' . $parent_term->tid),
      '%ptid' => $parent_term->tid,
    )));
    $term->parent[$parent_term->tid] = $parent_term->tid;
  }
}

Functions

Namesort descending Description
csvancestry_set_parent Set the parent of this term. $term must be a valid term. parent term may be invented on the fly.
cvsancestry_import_finished Summarize the results.
cvsancestry_import_row
taxonomy_xml_csvancestry_format_info Return information about this format
taxonomy_xml_csvancestry_parse Scan the input CSV file and create a taxonomy structure out of it.