You are here

taxonomy_csv.import.parser.api.inc in Taxonomy CSV import/export 7.4

Check a line of imported terms (duplicates, format...).

File

import/taxonomy_csv.import.parser.api.inc
View source
<?php

/**
 * @file
 * Check a line of imported terms (duplicates, format...).
 */

/**
 * Helper function to validate an imported line.
 *
 * @param $line
 *   Array of items from a cleaned line.
 * @param $options
 *   Array of available options. See taxonomy_csv_line_import_process.
 * @param $previous_items
 *   (Optional) Cleaned and checked previous imported line names and tids array.
 *   Needed with some contents as one term array structure or Taxonomy manager.
 *   Specificities:
 *   - taxonomy_manager:
 *       tid array is indexed with old tids.
 *   - def_links:
 *       'name' and 'tid' arrays contain sub-arrays 'vocabulary' and 'term' with
 *       imported identifiant as key and name or tid/vid as value in order to
 *       allow duplicate names import and use of number identifiants.
 * @param &$messages
 *   (Optional) By reference array of messages codes to be returned.
 *
 * @return
 *   Array of checked items of imported line.
 */
function _taxonomy_csv_line_import_check($line, $options, $previous_items = array(), &$messages = array()) {
  $checked_items = array();

  // Simplify used options.
  $existing_items =& $options['existing_items'];

  // No input check because line and previous line are already checked.
  // @todo A php callback function may be used to simplify checking.
  switch ($options['import_format']) {
    case TAXONOMY_CSV_FORMAT_ALONE_TERMS:

      // Checks empty first column.
      // Example: ", Item 1, Item 2"
      if (empty($line[0])) {
        $messages[] = 480;

        // Warning no first column.
        break;
      }
      if (count($line) > 1) {
        $messages[] = 501;

        // Notice too many items.
      }
      $checked_items = array(
        $line[0],
      );
      if (drupal_strlen($checked_items[0]) > 255) {
        $messages[] = 454;

        // Warning too long.
        break;
      }
      break;
    case TAXONOMY_CSV_FORMAT_DEFINITION_LINKS:
      if (empty($line[0])) {
        $messages[] = 464;

        // Warning no name.
        break;
      }

      // Complete $line for easier check.
      $line = array_pad($line, 9, '');

      // Example: "Term, 1, voc, Description, 0, No,..."
      foreach (array(
        4,
        5,
        6,
        7,
        8,
      ) as $key) {
        if (!empty($line[$key]) && !is_numeric($line[$key])) {
          $messages[] = 451;

          // Warning not a number.
          break 2;
        }
      }

      // Check count of items.
      $links_count = $line[5] + $line[6] + $line[7] + $line[8];
      if (count($line) < 9 + $links_count) {
        $messages[] = 430;

        // Warning some empty items.
        break;
      }

      // Check vocabularies of related terms. If empty, use of main term vid.
      if (count($line) < 9 + $links_count + $line[8]) {
        $messages[] = 639;

        // Info some empty vocabulary related items.
        $line = array_pad($line, 9 + $links_count + $line[8], '');
      }
      elseif (count($line) > 9 + $links_count + $line[8]) {
        $messages[] = 434;

        // Warning too many items.
        break;
      }

      // Example: "Term, 1, voc, Description, 0, 1, 2, 3, 4, item, , item 2..."
      $imported_items = array_map('strval', array_slice($line, 9, $links_count));
      foreach ($imported_items as $key => $value) {
        if (empty($value)) {
          $messages[] = 430;

          // Warning some empty items.
          break 2;
        }
      }

      // Check and remove duplicate synonyms.
      $synonyms = array_slice($line, 9, $line[5]);
      $unique_synonyms = array_unique($synonyms);
      if (count($unique_synonyms) < count($synonyms)) {
        $messages[] = 535;

        // Notice duplicate items (removed).
        $line[5] = count($unique_synonyms);
        $line = array_merge(array_slice($line, 0, 9), $unique_synonyms, array_slice($line, 9 + count($synonyms)));
      }

      // Check and remove duplicate parents.
      $parents = array_slice($line, 9 + $line[5], $line[6]);
      $unique_parents = array_unique($parents);
      if (count($unique_parents) < count($parents)) {
        $messages[] = 536;

        // Notice duplicate items (removed).
        $line[6] = count($unique_parents);
        $line = array_merge(array_slice($line, 0, 9 + $line[5]), $unique_parents, array_slice($line, 9 + $line[5] + count($parents)));
      }

      // Check and remove duplicate children.
      $children = array_slice($line, 9 + $line[5] + $line[6], $line[7]);
      $unique_children = array_unique($children);
      if (count($unique_children) < count($children)) {
        $messages[] = 537;

        // Notice duplicate children (removed).
        $line[7] = count($unique_children);
        $line = array_merge(array_slice($line, 0, 9 + $line[5] + $line[6]), $unique_children, array_slice($line, 9 + $line[5] + $line[6] + count($children)));
      }

      // Check and remove duplicate related.
      $related = array_slice($line, 9 + $line[5] + $line[6] + $line[7], $line[8]);
      $unique_related = array_unique($related);
      if (count($unique_related) < count($related)) {
        $messages[] = 538;

        // Notice duplicate related (removed).
        $line[8] = count($unique_related);
        $line = array_merge(array_slice($line, 0, 9 + $line[5]) + $line[6] + $line[7], $unique_related, array_slice($line, 9 + $line[5] + $line[6] + $line[7] + count($related)));
      }

      // Example: "Term, 1, voc, Description, 0, 1, 2, 3, 4, item 1, item 1..."
      $imported_items = array_slice($line, 9, $line[5] + $line[6] + $line[7] + $line[8]);
      $imported_unique_items = array_unique($imported_items);
      if (count($imported_unique_items) < count($imported_items)) {
        $messages[] = 532;

        // Notice duplicate items (allowed).
      }

      // Example: "Name, 1, voc, Description, 0, 1, 2, 3, 4, Name, item 1..."
      if (in_array($line[0], $imported_unique_items)) {
        $messages[] = 433;

        // Warning name and some items are same.
        break;
      }
      $checked_items = $line;
      foreach (array_merge(array_slice($checked_items, 0, 3), array_slice($checked_items, 9)) as $name) {
        if (drupal_strlen($name) > 255) {
          $messages[] = 454;

          // Warning too long.
          break;
        }
      }
      break;
    case TAXONOMY_CSV_FORMAT_FLAT:
      if (count($line) == 0) {
        $messages[] = 491;

        // Warning no item.
        break;
      }
      $checked_items = array_unique(array_filter($line));
      if (count($checked_items) < count($line)) {
        $messages[] = 531;

        // Notice duplicates, which are removed.
      }
      foreach ($checked_items as $name) {
        if (drupal_strlen($name) > 255) {
          $messages[] = 454;

          // Warning too long.
          break;
        }
      }
      break;
    case TAXONOMY_CSV_FORMAT_TREE_STRUCTURE:
    case TAXONOMY_CSV_FORMAT_POLYHIERARCHY:

      // Check last empty column before first item with previous imported items.
      for ($first_non_empty = 0; $first_non_empty < count($line) && empty($line[$first_non_empty]); $first_non_empty++) {
      }

      // Example: Previous line("Term 1,Item 2") ; Current line(",,,Item4")
      if ($first_non_empty && !isset($previous_items['name'][$first_non_empty - 1])) {
        $messages[] = 410;

        // Warning impossible to get parent.
        break;
      }

      // Example: Previous line("Term 1,Item 2") ; Current line(",,,Item4")
      // "0" value are lost, but that is not important for a taxonomy.
      $imported_items = array_filter(array_slice($line, $first_non_empty));
      if (count($imported_items) == 0) {
        $messages[] = 491;

        // Warning no item.
        break;
      }
      if (count($imported_items) < count($line) - $first_non_empty) {
        $messages[] = 510;

        // Notice empty items.
      }
      if (count(array_unique($imported_items)) < count($imported_items)) {
        $messages[] = 632;

        // Info duplicates (not removed).
      }
      if ($first_non_empty == 0) {
        $checked_items = $imported_items;
      }
      else {
        $checked_items = array_merge(array_fill(0, $first_non_empty, ''), $imported_items);
      }
      foreach ($checked_items as $name) {
        if (drupal_strlen($name) > 255) {
          $messages[] = 454;

          // Warning too long.
          break;
        }
      }
      break;
    case TAXONOMY_CSV_FORMAT_PARENTS:
    case TAXONOMY_CSV_FORMAT_CHILDREN:
    case TAXONOMY_CSV_FORMAT_RELATIONS:
    case TAXONOMY_CSV_FORMAT_SYNONYMS:
      if (empty($line[0])) {
        $messages[] = 480;

        // Warning no first column.
        break;
      }

      // Example: "Term,Item 1,,Item 2"
      $imported_items = array_map('strval', array_slice($line, 1));
      foreach ($imported_items as $key => $value) {
        if ($value == '') {
          unset($imported_items[$key]);
          $messages[] = 530;

          // Notice some empty items.
        }
      }

      // Example: "Term,Item 1,Item 2,Item 1"
      $imported_unique_items = array_unique($imported_items);
      if (count($imported_unique_items) < count($imported_items)) {
        $messages[] = 531;

        // Notice duplicates, which are removed.
      }

      // Example: "Term,Item 1,Term,Item 2"
      $checked_items = array_unique(array_merge(array(
        $line[0],
      ), $imported_unique_items));
      if (count($checked_items) <= count($imported_unique_items)) {
        $messages[] = 533;

        // Notice name and some items are same, so removed.
      }

      // Example: "Term"
      if (count($checked_items) == 1 && $existing_items == TAXONOMY_CSV_EXISTING_UPDATE_REPLACE) {
        $messages[] = 580;

        // Notice remove items.
      }
      foreach ($checked_items as $name) {
        if (drupal_strlen($name) > 255) {
          $messages[] = 454;

          // Warning too long.
          break;
        }
      }
      break;
    case TAXONOMY_CSV_FORMAT_DEFINITIONS:
      if (empty($line[0])) {
        $messages[] = 480;

        // Warning no first column.
      }
      elseif (count($line) == 1) {
        $messages[] = 510;

        // Notice empty items.
        if ($existing_items == TAXONOMY_CSV_EXISTING_UPDATE_REPLACE) {
          $messages[] = 580;

          // Notice remove items.
        }
        $checked_items = array(
          $line[0],
          0,
        );
      }
      elseif (!is_numeric($line[1]) && !is_int($line[1]) && $line[1]) {
        $messages[] = 450;

        // Warning weight is not a number.
      }
      elseif (count($line) <= 3) {
        $checked_items = $line;
      }
      else {

        // Example: "Term,Item 1,,Item 2"
        // "0" value is lost, but this is not important for a taxonomy.
        $imported_items = array_filter(array_slice($line, 3));
        if (count($imported_items) < count($line) - 3) {
          $messages[] = 530;

          // Notice some empty items.
        }

        // Example: "Term,Item 1,Item 2,Item 1"
        $imported_unique_items = array_unique($imported_items);
        if (count($imported_unique_items) < count($imported_items)) {
          $messages[] = 531;

          // Notice duplicates, which are removed.
        }

        // Example: "Term,Item 1,Term,Item 2"
        $temp_checked_items = array_unique(array_merge(array(
          $line[0],
        ), $imported_unique_items));
        if (count($temp_checked_items) <= count($imported_unique_items)) {
          $messages[] = 533;

          // Notice name and some items are same, so removed.
        }
        $checked_items = array_merge(array_slice($line, 0, 3), array_slice($temp_checked_items, 1));
      }
      foreach (array_merge(array_slice($checked_items, 0, 1), array_slice($checked_items, 3)) as $name) {
        if (drupal_strlen($name) > 255) {
          $messages[] = 454;

          // Warning too long.
          break;
        }
      }
      break;
    case TAXONOMY_CSV_FORMAT_DESCRIPTIONS:
      if (empty($line[0])) {
        $messages[] = 480;

        // Warning no first column.
        break;
      }
      if (count($line) == 1 && $existing_items == TAXONOMY_CSV_EXISTING_UPDATE_REPLACE) {
        $messages[] = 580;

        // Notice remove items.
        $checked_items = array(
          $line[0],
          '',
        );
      }
      else {
        if (count($line) > 2) {
          $messages[] = 541;

          // Notice too many items.
        }
        $checked_items = array(
          $line[0],
          $line[1],
        );
      }
      if (drupal_strlen($checked_items[0]) > 255) {
        $messages[] = 454;

        // Warning too long.
        break;
      }
      break;
    case TAXONOMY_CSV_FORMAT_WEIGHTS:
      if (empty($line[0])) {
        $messages[] = 480;

        // Warning no first column.
        break;
      }
      if (count($line) > 1 && !is_numeric($line[1]) && !is_int($line[1]) && $line[1]) {
        $messages[] = 450;

        // Warning weight is not a number.
        break;
      }
      if (count($line) == 1 && $existing_items == TAXONOMY_CSV_EXISTING_UPDATE_REPLACE) {
        $messages[] = 580;

        // Notice remove items.
        $checked_items = array(
          $line[0],
          0,
        );
      }
      else {
        if (count($line) > 2) {
          $messages[] = 541;

          // Notice too many items.
        }
        $checked_items = array(
          $line[0],
          intval($line[1]),
        );
      }
      if (drupal_strlen($checked_items[0]) > 255) {
        $messages[] = 454;

        // Warning too long.
        break;
      }
      break;
    case TAXONOMY_CSV_FORMAT_TAXONOMY_MANAGER:
      $checked_items = $line;
      if (empty($line[0])) {
        $messages[] = 550;

        // Notice no first column.
      }
      if (count($line) < 4) {
        $messages[] = 450;

        // Warning some items lacks.
        break;
      }
      if (empty($line[1]) || empty($line[2]) || $line[1] == 0 || $line[2] == '') {
        $messages[] = 481;

        // Warning no item in second or third column.
        break;
      }
      if (count($line) == 4) {
        $messages[] = 551;

        // Notice root term.
        $checked_items[] = 0;
      }
      foreach ($checked_items as $key => $value) {
        if ($key != 2 && $key != 3 && !is_numeric($value)) {
          $messages[] = 451;

          // Warning not a number.
          break 2;
        }
      }
      $list_parents = array_unique(array_slice($checked_items, 4));
      sort($list_parents);
      if (count($list_parents) != count(array_slice($checked_items, 4))) {
        $messages[] = 531;

        // Notice duplicates, which are removed.
      }
      foreach ($list_parents as $value) {
        if ($value == $checked_items[1]) {
          $messages[] = 452;

          // Warning a term can't be a parent of itself.
          break 2;
        }
      }
      $checked_items = array_merge(array_slice($checked_items, 0, 4), $list_parents);
      if ($list_parents[0] === 0 && count($list_parents) > 1) {
        $messages[] = 453;

        // Warning: root term has a parent.
        break;
      }

      // $previous_items keys contain all previous lines source tid.
      if (isset($previous_items['tid'][$checked_items[1]])) {
        $messages[] = 552;

        // Notice: term has been already imported.
      }
      foreach ($list_parents as $value) {
        if ($value != 0 && !isset($previous_items['tid'][$value])) {
          $messages[] = 352;

          // Error: parent tid hasn't been already imported.
          break;
        }
      }
      foreach (array_merge(array_slice($checked_items, 0, 3), array_slice($checked_items, 4)) as $name) {
        if (drupal_strlen($name) > 255) {
          $messages[] = 454;

          // Warning too long.
          break;
        }
      }
      break;
    default:
      $messages[] = 306;
  }
  return array_values($checked_items);
}

Functions

Namesort descending Description
_taxonomy_csv_line_import_check Helper function to validate an imported line.