You are here

public function ParserCSV::parse in Feeds 7

Same name and namespace in other branches
  1. 6 libraries/ParserCSV.inc \ParserCSV::parse()
  2. 7.2 libraries/ParserCSV.inc \ParserCSV::parse()

Parse CSV files into a two dimensional array.

Parameters

Iterator $lineIterator: An Iterator object that yields line strings, e.g. ParserCSVIterator.

$start: The byte number from where to start parsing the file.

$lines: The number of lines to parse, 0 for all lines.

Return value

Two dimensional array that contains the data in the CSV file.

File

libraries/ParserCSV.inc, line 185

Class

ParserCSV
Functionality to parse CSV files into a two dimensional array.

Code

public function parse(Iterator $lineIterator) {
  $skipLine = $this->skipFirstLine;
  $rows = array();
  $this->timeoutReached = FALSE;
  $this->lastLinePos = 0;
  $maxTime = empty($this->timeout) ? FALSE : microtime() + $this->timeout;
  $linesParsed = 0;
  for ($lineIterator
    ->rewind($this->startByte); $lineIterator
    ->valid(); $lineIterator
    ->next()) {

    // Make really sure we've got lines without trailing newlines.
    $line = trim($lineIterator
      ->current(), "\r\n");

    // Skip empty lines.
    if (empty($line)) {
      continue;
    }

    // If the first line contains column names, skip it.
    if ($skipLine) {
      $skipLine = FALSE;
      continue;
    }

    // The actual parser. explode() is unfortunately not suitable because the
    // delimiter might be located inside a quoted field, and that would break
    // the field and/or require additional effort to re-join the fields.
    $quoted = FALSE;
    $currentIndex = 0;
    $currentField = '';
    $fields = array();
    while ($currentIndex <= strlen($line)) {
      if ($quoted) {
        $nextQuoteIndex = strpos($line, '"', $currentIndex);
        if ($nextQuoteIndex === FALSE) {

          // There's a line break before the quote is closed, so fetch the
          // next line and start from there.
          $currentField .= substr($line, $currentIndex);
          $lineIterator
            ->next();
          if (!$lineIterator
            ->valid()) {

            // Whoa, an unclosed quote! Well whatever, let's just ignore
            // that shortcoming and record it nevertheless.
            $fields[] = $currentField;
            break;
          }

          // Ok, so, on with fetching the next line, as mentioned above.
          $currentField .= "\n";
          $line = trim($lineIterator
            ->current(), "\r\n");
          $currentIndex = 0;
          continue;
        }

        // There's actually another quote in this line...
        // find out whether it's escaped or not.
        $currentField .= substr($line, $currentIndex, $nextQuoteIndex - $currentIndex);
        if (isset($line[$nextQuoteIndex + 1]) && $line[$nextQuoteIndex + 1] === '"') {

          // Escaped quote, add a single one to the field and proceed quoted.
          $currentField .= '"';
          $currentIndex = $nextQuoteIndex + 2;
        }
        else {

          // End of the quoted section, close the quote and let the
          // $quoted == FALSE block finalize the field.
          $quoted = FALSE;
          $currentIndex = $nextQuoteIndex + 1;
        }
      }
      else {

        // $quoted == FALSE
        // First, let's find out where the next character of interest is.
        $nextQuoteIndex = strpos($line, '"', $currentIndex);
        $nextDelimiterIndex = strpos($line, $this->delimiter, $currentIndex);
        if ($nextQuoteIndex === FALSE) {
          $nextIndex = $nextDelimiterIndex;
        }
        elseif ($nextDelimiterIndex === FALSE) {
          $nextIndex = $nextQuoteIndex;
        }
        else {
          $nextIndex = min($nextQuoteIndex, $nextDelimiterIndex);
        }
        if ($nextIndex === FALSE) {

          // This line is done, add the rest of it as last field.
          $currentField .= substr($line, $currentIndex);
          $fields[] = $currentField;
          break;
        }
        elseif ($line[$nextIndex] === $this->delimiter[0]) {
          $length = $nextIndex + strlen($this->delimiter) - 1 - $currentIndex;
          $currentField .= substr($line, $currentIndex, $length);
          $fields[] = $currentField;
          $currentField = '';
          $currentIndex += $length + 1;

          // Continue with the next field.
        }
        else {

          // $line[$nextIndex] == '"'
          $quoted = TRUE;
          $currentField .= substr($line, $currentIndex, $nextIndex - $currentIndex);
          $currentIndex = $nextIndex + 1;

          // Continue this field in the $quoted == TRUE block.
        }
      }
    }

    // End of CSV parser. We've now got all the fields of the line as strings
    // in the $fields array.
    if (empty($this->columnNames)) {
      $row = $fields;
    }
    else {
      $row = array();
      foreach ($this->columnNames as $columnName) {
        $field = array_shift($fields);
        $row[$columnName] = isset($field) ? $field : '';
      }
    }
    $rows[] = $row;

    // Quit parsing if timeout has been reached or requested lines have been
    // reached.
    if (!empty($maxTime) && microtime() > $maxTime) {
      $this->timeoutReached = TRUE;
      $this->lastLinePos = $lineIterator
        ->currentPos();
      break;
    }
    $linesParsed++;
    if ($this->lineLimit && $linesParsed >= $this->lineLimit) {
      $this->lastLinePos = $lineIterator
        ->currentPos();
      break;
    }
  }
  return $rows;
}