class MigrateSourceCSV in Migrate 7.2
Same name and namespace in other branches
- 6.2 plugins/sources/csv.inc \MigrateSourceCSV
Implementation of MigrateSource, to handle imports from CSV files.
If the CSV file contains non-ASCII characters, make sure it includes a UTF BOM (Byte Order Marker) so they are interpreted correctly.
Hierarchy
- class \MigrateSource implements \Iterator
- class \MigrateSourceCSV
Expanded class hierarchy of MigrateSourceCSV
File
- plugins/
sources/ csv.inc, line 221 - Define a MigrateSource for importing from comma separated values files.
View source
class MigrateSourceCSV extends MigrateSource {
/**
* List of available source fields.
*
* @var array
*/
protected $fields = array();
/**
* Parameters for the fgetcsv() call.
*
* @var array
*/
protected $fgetcsv = array();
/**
* File handle for the CSV file being iterated.
*
* @var resource
*/
protected $csvHandle = NULL;
/**
* The number of rows in the CSV file before the data starts.
*
* @var integer
*/
protected $headerRows = 0;
/**
* The current row/line number in the CSV file.
*
* @var integer
*/
protected $rowNumber;
/**
* The path to the source file.
*
* @var string
*/
protected $file;
/**
* Simple initialization.
*
* @param string $path
* The path to the source file
* @param array $csvcolumns
* Keys are integers. values are array(field name, description).
* @param array $options
* Options applied to this source.
* @param array $fields
* Optional - keys are field names, values are descriptions. Use to override
* the default descriptions, or to add additional source fields which the
* migration will add via other means (e.g., prepareRow()).
*/
public function __construct($path, array $csvcolumns = array(), array $options = array(), array $fields = array()) {
parent::__construct($options);
$this->file = $path;
if (!empty($options['header_rows'])) {
$this->headerRows = $options['header_rows'];
}
else {
$this->headerRows = 0;
}
$this->options = $options;
$this->fields = $fields;
// fgetcsv specific options
foreach (array(
'length' => NULL,
'delimiter' => ',',
'enclosure' => '"',
'escape' => '\\',
) as $key => $default) {
$this->fgetcsv[$key] = isset($options[$key]) ? $options[$key] : $default;
}
// One can either pass in an explicit list of column names to use, or if we have
// a header row we can use the names from that
if ($this->headerRows && empty($csvcolumns)) {
$this->csvcolumns = array();
$this->csvHandle = fopen($this->file, 'r');
if (!$this
->validResource()) {
return;
}
// Skip all but the last header
for ($i = 0; $i < $this->headerRows - 1; $i++) {
$this
->getNextLine();
}
$row = $this
->getNextLine();
foreach ($row as $header) {
$header = trim($header);
$this->csvcolumns[] = array(
$header,
$header,
);
}
fclose($this->csvHandle);
$this->csvHandle = NULL;
}
else {
$this->csvcolumns = $csvcolumns;
}
}
/**
* Return a string representing the source query.
*
* @return string
*/
public function __toString() {
return $this->file;
}
/**
* Returns a list of fields available to be mapped from the source query.
*
* @return array
* Keys: machine names of the fields (to be passed to addFieldMapping)
* Values: Human-friendly descriptions of the fields.
*/
public function fields() {
$fields = array();
foreach ($this->csvcolumns as $values) {
$fields[$values[0]] = $values[1];
}
// Any caller-specified fields with the same names as extracted fields will
// override them; any others will be added
if ($this->fields) {
$fields = $this->fields + $fields;
}
return $fields;
}
/**
* Return a count of all available source records.
*/
public function computeCount() {
// If the data may have embedded newlines, the file line count won't reflect
// the number of CSV records (one record will span multiple lines). We need
// to scan with fgetcsv to get the true count.
if (!empty($this->options['embedded_newlines'])) {
$this->csvHandle = fopen($this->file, 'r');
$count = 0;
if (!$this
->validResource()) {
return $count;
}
// Skip all but the last header
for ($i = 0; $i < $this->headerRows; $i++) {
fgets($this->csvHandle);
}
while ($this
->getNextLine()) {
$count++;
}
fclose($this->csvHandle);
$this->csvHandle = NULL;
}
else {
// TODO. If this takes too much time/memory, use exec('wc -l')
$count = count(file($this->file));
$count -= $this->headerRows;
}
return $count;
}
/**
* Implementation of MigrateSource::performRewind().
*
* @return void
*/
public function performRewind() {
// Close any previously-opened handle
if (!is_null($this->csvHandle)) {
fclose($this->csvHandle);
$this->csvHandle = NULL;
}
// Load up the first row, skipping the header(s) if necessary
$this->csvHandle = fopen($this->file, 'r');
if (!$this
->validResource()) {
return;
}
for ($i = 0; $i < $this->headerRows; $i++) {
$this
->getNextLine();
}
$this->rowNumber = 1;
}
/**
* Implementation of MigrateSource::getNextRow().
* Return the next line of the source CSV file as an object.
*
* @return null|object
*/
public function getNextRow() {
$row = $this
->getNextLine();
if ($row) {
// only use rows specified in $this->csvcolumns().
$row = array_intersect_key($row, $this->csvcolumns);
// Set meaningful keys for the columns mentioned in $this->csvcolumns().
foreach ($this->csvcolumns as $int => $values) {
list($key, $description) = $values;
// Copy value to more descriptive string based key and then unset original.
$row[$key] = isset($row[$int]) ? $row[$int] : NULL;
unset($row[$int]);
}
$row['csvrownum'] = $this->rowNumber++;
return (object) $row;
}
else {
fclose($this->csvHandle);
$this->csvHandle = NULL;
return NULL;
}
}
protected function getNextLine() {
// escape parameter was added in PHP 5.3.
if (version_compare(phpversion(), '5.3', '<')) {
$row = fgetcsv($this->csvHandle, $this->fgetcsv['length'], $this->fgetcsv['delimiter'], $this->fgetcsv['enclosure']);
}
else {
$row = fgetcsv($this->csvHandle, $this->fgetcsv['length'], $this->fgetcsv['delimiter'], $this->fgetcsv['enclosure'], $this->fgetcsv['escape']);
}
return $row;
}
/**
* Check if resource loaded correctly.
*
* @return bool
*/
public function validResource() {
if (!$this->csvHandle) {
Migration::displayMessage(t('Could not open CSV file !url', array(
'!url' => $this->file,
)));
}
return (bool) $this->csvHandle;
}
}
Members
Name![]() |
Modifiers | Type | Description | Overrides |
---|---|---|---|---|
MigrateSource:: |
protected | property | The MigrateMap class for the current migration. | |
MigrateSource:: |
protected | property | The Migration class currently invoking us, during rewind() and next(). | |
MigrateSource:: |
protected | property | Whether this instance should cache the source count. | |
MigrateSource:: |
protected | property | Key to use for caching counts. | |
MigrateSource:: |
protected | property | The primary key of the current row | |
MigrateSource:: |
protected | property | The current row from the quey | |
MigrateSource:: |
protected | property | Information on the highwater mark for the current migration, if any. | |
MigrateSource:: |
protected | property | List of source IDs to process. | |
MigrateSource:: |
protected | property | By default, next() will directly read the map row and add it to the data row. A source plugin implementation may do this itself (in particular, the SQL source can incorporate the map table into the query) - if so, it should set this TRUE so we… | |
MigrateSource:: |
protected | property | Used in the case of multiple key sources that need to use idlist. | |
MigrateSource:: |
protected | property | Number of rows intentionally ignored (prepareRow() returned FALSE) | |
MigrateSource:: |
protected | property | Number of rows we've at least looked at. | 1 |
MigrateSource:: |
protected | property | The highwater mark at the beginning of the import operation. | |
MigrateSource:: |
protected | property | Whether this instance should not attempt to count the source. | |
MigrateSource:: |
protected | property | If TRUE, we will maintain hashed source rows to determine whether incoming data has changed. | |
MigrateSource:: |
public | function | Return a count of available source records, from the cache if appropriate. Returns -1 if the source is not countable. | |
MigrateSource:: |
public | function | Implementation of Iterator::current() - called when entering a loop iteration, returning the current row | |
MigrateSource:: |
protected | function | Determine whether this row has changed, and therefore whether it should be processed. | |
MigrateSource:: |
public | function | ||
MigrateSource:: |
public | function | ||
MigrateSource:: |
public | function | ||
MigrateSource:: |
protected | function | Generate a hash of the source row. | 3 |
MigrateSource:: |
public | function | Implementation of Iterator::key - called when entering a loop iteration, returning the key of the current row. It must be a scalar - we will serialize to fulfill the requirement, but using getCurrentKey() is preferable. | |
MigrateSource:: |
public | function | Implementation of Iterator::next() - subclasses of MigrateSource should implement getNextRow() to retrieve the next valid source rocord to process. | |
MigrateSource:: |
protected | function | Give the calling migration a shot at manipulating, and possibly rejecting, the source row. | |
MigrateSource:: |
public | function | Reset numIgnored back to 0. | |
MigrateSource:: |
public | function | Implementation of Iterator::rewind() - subclasses of MigrateSource should implement performRewind() to do any class-specific setup for iterating source records. | |
MigrateSource:: |
public | function | Implementation of Iterator::valid() - called at the top of the loop, returning TRUE to process the loop and FALSE to terminate it | |
MigrateSourceCSV:: |
protected | property | File handle for the CSV file being iterated. | |
MigrateSourceCSV:: |
protected | property | Parameters for the fgetcsv() call. | |
MigrateSourceCSV:: |
protected | property | List of available source fields. | |
MigrateSourceCSV:: |
protected | property | The path to the source file. | |
MigrateSourceCSV:: |
protected | property | The number of rows in the CSV file before the data starts. | |
MigrateSourceCSV:: |
protected | property | The current row/line number in the CSV file. | |
MigrateSourceCSV:: |
public | function | Return a count of all available source records. | |
MigrateSourceCSV:: |
public | function |
Returns a list of fields available to be mapped from the source query. Overrides MigrateSource:: |
|
MigrateSourceCSV:: |
protected | function | ||
MigrateSourceCSV:: |
public | function | Implementation of MigrateSource::getNextRow(). Return the next line of the source CSV file as an object. | |
MigrateSourceCSV:: |
public | function | Implementation of MigrateSource::performRewind(). | |
MigrateSourceCSV:: |
public | function | Check if resource loaded correctly. | |
MigrateSourceCSV:: |
public | function |
Simple initialization. Overrides MigrateSource:: |
|
MigrateSourceCSV:: |
public | function | Return a string representing the source query. |