You are here

class MigrateSourceCSV in Migrate 7.2

Same name and namespace in other branches
  1. 6.2 plugins/sources/csv.inc \MigrateSourceCSV

Implementation of MigrateSource, to handle imports from CSV files.

If the CSV file contains non-ASCII characters, make sure it includes a UTF BOM (Byte Order Marker) so they are interpreted correctly.

Hierarchy

Expanded class hierarchy of MigrateSourceCSV

File

plugins/sources/csv.inc, line 221
Define a MigrateSource for importing from comma separated values files.

View source
class MigrateSourceCSV extends MigrateSource {

  /**
   * List of available source fields.
   *
   * @var array
   */
  protected $fields = array();

  /**
   * Parameters for the fgetcsv() call.
   *
   * @var array
   */
  protected $fgetcsv = array();

  /**
   * File handle for the CSV file being iterated.
   *
   * @var resource
   */
  protected $csvHandle = NULL;

  /**
   * The number of rows in the CSV file before the data starts.
   *
   * @var integer
   */
  protected $headerRows = 0;

  /**
   * The current row/line number in the CSV file.
   *
   * @var integer
   */
  protected $rowNumber;

  /**
   * The path to the source file.
   *
   * @var string
   */
  protected $file;

  /**
   * Simple initialization.
   *
   * @param string $path
   *  The path to the source file
   * @param array $csvcolumns
   *  Keys are integers. values are array(field name, description).
   * @param array $options
   *  Options applied to this source.
   * @param array $fields
   *  Optional - keys are field names, values are descriptions. Use to override
   *  the default descriptions, or to add additional source fields which the
   *  migration will add via other means (e.g., prepareRow()).
   */
  public function __construct($path, array $csvcolumns = array(), array $options = array(), array $fields = array()) {
    parent::__construct($options);
    $this->file = $path;
    if (!empty($options['header_rows'])) {
      $this->headerRows = $options['header_rows'];
    }
    else {
      $this->headerRows = 0;
    }
    $this->options = $options;
    $this->fields = $fields;

    // fgetcsv specific options
    foreach (array(
      'length' => NULL,
      'delimiter' => ',',
      'enclosure' => '"',
      'escape' => '\\',
    ) as $key => $default) {
      $this->fgetcsv[$key] = isset($options[$key]) ? $options[$key] : $default;
    }

    // One can either pass in an explicit list of column names to use, or if we have
    // a header row we can use the names from that
    if ($this->headerRows && empty($csvcolumns)) {
      $this->csvcolumns = array();
      $this->csvHandle = fopen($this->file, 'r');
      if (!$this
        ->validResource()) {
        return;
      }

      // Skip all but the last header
      for ($i = 0; $i < $this->headerRows - 1; $i++) {
        $this
          ->getNextLine();
      }
      $row = $this
        ->getNextLine();
      foreach ($row as $header) {
        $header = trim($header);
        $this->csvcolumns[] = array(
          $header,
          $header,
        );
      }
      fclose($this->csvHandle);
      $this->csvHandle = NULL;
    }
    else {
      $this->csvcolumns = $csvcolumns;
    }
  }

  /**
   * Return a string representing the source query.
   *
   * @return string
   */
  public function __toString() {
    return $this->file;
  }

  /**
   * Returns a list of fields available to be mapped from the source query.
   *
   * @return array
   *  Keys: machine names of the fields (to be passed to addFieldMapping)
   *  Values: Human-friendly descriptions of the fields.
   */
  public function fields() {
    $fields = array();
    foreach ($this->csvcolumns as $values) {
      $fields[$values[0]] = $values[1];
    }

    // Any caller-specified fields with the same names as extracted fields will
    // override them; any others will be added
    if ($this->fields) {
      $fields = $this->fields + $fields;
    }
    return $fields;
  }

  /**
   * Return a count of all available source records.
   */
  public function computeCount() {

    // If the data may have embedded newlines, the file line count won't reflect
    // the number of CSV records (one record will span multiple lines). We need
    // to scan with fgetcsv to get the true count.
    if (!empty($this->options['embedded_newlines'])) {
      $this->csvHandle = fopen($this->file, 'r');
      $count = 0;
      if (!$this
        ->validResource()) {
        return $count;
      }

      // Skip all but the last header
      for ($i = 0; $i < $this->headerRows; $i++) {
        fgets($this->csvHandle);
      }
      while ($this
        ->getNextLine()) {
        $count++;
      }
      fclose($this->csvHandle);
      $this->csvHandle = NULL;
    }
    else {

      // TODO. If this takes too much time/memory, use exec('wc -l')
      $count = count(file($this->file));
      $count -= $this->headerRows;
    }
    return $count;
  }

  /**
   * Implementation of MigrateSource::performRewind().
   *
   * @return void
   */
  public function performRewind() {

    // Close any previously-opened handle
    if (!is_null($this->csvHandle)) {
      fclose($this->csvHandle);
      $this->csvHandle = NULL;
    }

    // Load up the first row, skipping the header(s) if necessary
    $this->csvHandle = fopen($this->file, 'r');
    if (!$this
      ->validResource()) {
      return;
    }
    for ($i = 0; $i < $this->headerRows; $i++) {
      $this
        ->getNextLine();
    }
    $this->rowNumber = 1;
  }

  /**
   * Implementation of MigrateSource::getNextRow().
   * Return the next line of the source CSV file as an object.
   *
   * @return null|object
   */
  public function getNextRow() {
    $row = $this
      ->getNextLine();
    if ($row) {

      // only use rows specified in $this->csvcolumns().
      $row = array_intersect_key($row, $this->csvcolumns);

      // Set meaningful keys for the columns mentioned in $this->csvcolumns().
      foreach ($this->csvcolumns as $int => $values) {
        list($key, $description) = $values;

        // Copy value to more descriptive string based key and then unset original.
        $row[$key] = isset($row[$int]) ? $row[$int] : NULL;
        unset($row[$int]);
      }
      $row['csvrownum'] = $this->rowNumber++;
      return (object) $row;
    }
    else {
      fclose($this->csvHandle);
      $this->csvHandle = NULL;
      return NULL;
    }
  }
  protected function getNextLine() {

    // escape parameter was added in PHP 5.3.
    if (version_compare(phpversion(), '5.3', '<')) {
      $row = fgetcsv($this->csvHandle, $this->fgetcsv['length'], $this->fgetcsv['delimiter'], $this->fgetcsv['enclosure']);
    }
    else {
      $row = fgetcsv($this->csvHandle, $this->fgetcsv['length'], $this->fgetcsv['delimiter'], $this->fgetcsv['enclosure'], $this->fgetcsv['escape']);
    }
    return $row;
  }

  /**
   * Check if resource loaded correctly.
   *
   * @return bool
   */
  public function validResource() {
    if (!$this->csvHandle) {
      Migration::displayMessage(t('Could not open CSV file !url', array(
        '!url' => $this->file,
      )));
    }
    return (bool) $this->csvHandle;
  }

}

Members

Namesort descending Modifiers Type Description Overrides
MigrateSource::$activeMap protected property The MigrateMap class for the current migration.
MigrateSource::$activeMigration protected property The Migration class currently invoking us, during rewind() and next().
MigrateSource::$cacheCounts protected property Whether this instance should cache the source count.
MigrateSource::$cacheKey protected property Key to use for caching counts.
MigrateSource::$currentKey protected property The primary key of the current row
MigrateSource::$currentRow protected property The current row from the quey
MigrateSource::$highwaterField protected property Information on the highwater mark for the current migration, if any.
MigrateSource::$idList protected property List of source IDs to process.
MigrateSource::$mapRowAdded protected property By default, next() will directly read the map row and add it to the data row. A source plugin implementation may do this itself (in particular, the SQL source can incorporate the map table into the query) - if so, it should set this TRUE so we…
MigrateSource::$multikeySeparator protected property Used in the case of multiple key sources that need to use idlist.
MigrateSource::$numIgnored protected property Number of rows intentionally ignored (prepareRow() returned FALSE)
MigrateSource::$numProcessed protected property Number of rows we've at least looked at. 1
MigrateSource::$originalHighwater protected property The highwater mark at the beginning of the import operation.
MigrateSource::$skipCount protected property Whether this instance should not attempt to count the source.
MigrateSource::$trackChanges protected property If TRUE, we will maintain hashed source rows to determine whether incoming data has changed.
MigrateSource::count public function Return a count of available source records, from the cache if appropriate. Returns -1 if the source is not countable.
MigrateSource::current public function Implementation of Iterator::current() - called when entering a loop iteration, returning the current row
MigrateSource::dataChanged protected function Determine whether this row has changed, and therefore whether it should be processed.
MigrateSource::getCurrentKey public function
MigrateSource::getIgnored public function
MigrateSource::getProcessed public function
MigrateSource::hash protected function Generate a hash of the source row. 3
MigrateSource::key public function Implementation of Iterator::key - called when entering a loop iteration, returning the key of the current row. It must be a scalar - we will serialize to fulfill the requirement, but using getCurrentKey() is preferable.
MigrateSource::next public function Implementation of Iterator::next() - subclasses of MigrateSource should implement getNextRow() to retrieve the next valid source rocord to process.
MigrateSource::prepareRow protected function Give the calling migration a shot at manipulating, and possibly rejecting, the source row.
MigrateSource::resetStats public function Reset numIgnored back to 0.
MigrateSource::rewind public function Implementation of Iterator::rewind() - subclasses of MigrateSource should implement performRewind() to do any class-specific setup for iterating source records.
MigrateSource::valid public function Implementation of Iterator::valid() - called at the top of the loop, returning TRUE to process the loop and FALSE to terminate it
MigrateSourceCSV::$csvHandle protected property File handle for the CSV file being iterated.
MigrateSourceCSV::$fgetcsv protected property Parameters for the fgetcsv() call.
MigrateSourceCSV::$fields protected property List of available source fields.
MigrateSourceCSV::$file protected property The path to the source file.
MigrateSourceCSV::$headerRows protected property The number of rows in the CSV file before the data starts.
MigrateSourceCSV::$rowNumber protected property The current row/line number in the CSV file.
MigrateSourceCSV::computeCount public function Return a count of all available source records.
MigrateSourceCSV::fields public function Returns a list of fields available to be mapped from the source query. Overrides MigrateSource::fields
MigrateSourceCSV::getNextLine protected function
MigrateSourceCSV::getNextRow public function Implementation of MigrateSource::getNextRow(). Return the next line of the source CSV file as an object.
MigrateSourceCSV::performRewind public function Implementation of MigrateSource::performRewind().
MigrateSourceCSV::validResource public function Check if resource loaded correctly.
MigrateSourceCSV::__construct public function Simple initialization. Overrides MigrateSource::__construct
MigrateSourceCSV::__toString public function Return a string representing the source query.