You are here

class CsvParser in Feeds 8.3

Same name in this branch
  1. 8.3 src/Component/CsvParser.php \Drupal\feeds\Component\CsvParser
  2. 8.3 src/Feeds/Parser/CsvParser.php \Drupal\feeds\Feeds\Parser\CsvParser

Parses an RFC 4180 style CSV file.

Hierarchy

  • class \Drupal\feeds\Component\CsvParser implements \Drupal\feeds\Component\Iterator

Expanded class hierarchy of CsvParser

See also

http://tools.ietf.org/html/rfc4180

2 files declare their use of CsvParser
CsvParser.php in src/Feeds/Parser/CsvParser.php
CsvParserTest.php in tests/src/Unit/Component/CsvParserTest.php

File

src/Component/CsvParser.php, line 10

Namespace

Drupal\feeds\Component
View source
class CsvParser implements \Iterator {

  /**
   * The column delimiter.
   *
   * @var string
   */
  protected $delimiter = ',';

  /**
   * Whether or not the first line contains a header.
   *
   * @var bool
   */
  protected $hasHeader = FALSE;

  /**
   * The position in the file to start from.
   *
   * @var int
   */
  protected $startByte = 0;

  /**
   * The file handle to the CSV file.
   *
   * @var resource
   */
  protected $handle;

  /**
   * The current line.
   *
   * @var array|bool
   */
  protected $currentLine;

  /**
   * The number of lines read.
   *
   * @var int
   */
  protected $linesRead = 0;

  /**
   * The byte position in the file.
   *
   * @var int
   */
  protected $filePosition;

  /**
   * Constructs a CsvParser object.
   *
   * @param resource $handle
   *   An open file handle.
   */
  public function __construct($handle) {
    if (!is_resource($handle)) {
      throw new \InvalidArgumentException('$handle must be a resource.');
    }
    $this->handle = $handle;
  }

  /**
   * Creates a CsvParser object from a file path.
   *
   * @param string $filepath
   *   The file path.
   *
   * @return \Drupal\feeds\Component\CsvParser
   *   A new CsvParser object.
   */
  public static function createFromFilePath($filepath) {
    if (!is_file($filepath) || !is_readable($filepath)) {
      throw new \InvalidArgumentException('$filepath must exist and be readable.');
    }
    $previous = ini_set('auto_detect_line_endings', '1');
    $handle = fopen($filepath, 'rb');
    ini_set('auto_detect_line_endings', $previous);
    return new static($handle);
  }

  /**
   * Creates a CsvParser object from a string.
   *
   * @param string $string
   *   The in-memory contents of a CSV file.
   *
   * @return \Drupal\feeds\Component\CsvParser
   *   A new CsvParser object.
   */
  public static function createFromString($string) {
    $previous = ini_set('auto_detect_line_endings', '1');
    $handle = fopen('php://temp', 'w+b');
    ini_set('auto_detect_line_endings', $previous);
    fwrite($handle, $string);
    fseek($handle, 0);
    return new static($handle);
  }

  /**
   * Destructs a CsvParser object.
   */
  public function __destruct() {
    if (is_resource($this->handle)) {
      fclose($this->handle);
    }
  }

  /**
   * Sets the column delimiter string.
   *
   * @param string $delimiter
   *   By default, the comma (',') is used as delimiter.
   *
   * @return $this
   */
  public function setDelimiter($delimiter) {
    $this->delimiter = $delimiter;
    return $this;
  }

  /**
   * Sets whether or not the CSV file contains a header.
   *
   * @param bool $has_header
   *   (optional) Whether or the CSV file has a header. Defaults to true.
   *
   * @return $this
   */
  public function setHasHeader($has_header = TRUE) {
    $this->hasHeader = (bool) $has_header;
    return $this;
  }

  /**
   * Returns the header row.
   *
   * @return array
   *   A list of the header names.
   */
  public function getHeader() {
    $prev = ftell($this->handle);
    rewind($this->handle);
    $header = $this
      ->parseLine($this
      ->readLine());
    fseek($this->handle, $prev);
    return $header;
  }

  /**
   * Gets the byte number where the parser left off.
   *
   * This position can be used to set the start byte for the next iteration.
   *
   * @return int
   *   The byte position of where parsing ended.
   */
  public function lastLinePos() {
    return $this->filePosition;
  }

  /**
   * Sets the byte where file should be started at.
   *
   * Useful when parsing a file in batches.
   *
   * @param int $start
   *   The byte position to start parsing.
   *
   * @return $this
   */
  public function setStartByte($start) {
    $this->startByte = (int) $start;
    return $this;
  }

  /**
   * Implements \Iterator::current().
   */
  public function current() {
    return $this->currentLine;
  }

  /**
   * Implements \Iterator::key().
   */
  public function key() {
    return $this->linesRead - 1;
  }

  /**
   * Implements \Iterator::next().
   */
  public function next() {

    // Record the file position before reading the next line since we
    // preemptively read lines to avoid returning empty rows.
    $this->filePosition = ftell($this->handle);
    do {
      $line = $this
        ->readLine();

      // End of file.
      if ($line === FALSE) {
        $this->currentLine = FALSE;
        return;
      }

      // Skip empty lines that aren't wrapped in an enclosure.
    } while (!strlen(rtrim($line, "\r\n")));
    $this->currentLine = $this
      ->parseLine($line);
    $this->linesRead++;
  }

  /**
   * Implements \Iterator::rewind().
   */
  public function rewind() {
    rewind($this->handle);
    if ($this->hasHeader && !$this->startByte) {
      $this
        ->parseLine($this
        ->readLine());
    }
    elseif ($this->startByte) {
      fseek($this->handle, $this->startByte);
    }
    $this->linesRead = 0;

    // Preemptively advance to the next line.
    $this
      ->next();
  }

  /**
   * Implements \Iterator::valid().
   */
  public function valid() {
    return (bool) $this->currentLine;
  }

  /**
   * Returns a new line from the CSV file.
   *
   * @return string|bool
   *   Returns the next line in the file, or false if the end has been reached.
   *
   * @todo Add encoding conversion.
   */
  protected function readLine() {
    return fgets($this->handle);
  }

  /**
   * Parses a single CSV line.
   *
   * @param string $line
   *   A line from a CSV file.
   * @param bool $in_quotes
   *   Do not use. For recursion only.
   * @param string $field
   *   Do not use. For recursion only.
   * @param array $fields
   *   Do not use. For recursion only.
   *
   * @return array
   *   The list of cells in the CSV row.
   */
  protected function parseLine($line, $in_quotes = FALSE, $field = '', array $fields = []) {
    $line_length = strlen($line);

    // Traverse the line byte-by-byte.
    for ($index = 0; $index < $line_length; ++$index) {
      $byte = $line[$index];
      $next_byte = isset($line[$index + 1]) ? $line[$index + 1] : '';

      // Beginning a quoted field.
      if ($byte === '"' && $field === '' && !$in_quotes) {
        $in_quotes = TRUE;
      }
      elseif ($byte === '"' && $next_byte !== '"' && $in_quotes) {
        $in_quotes = FALSE;
      }
      elseif ($byte === '"' && $next_byte === '"' && $in_quotes) {
        $field .= '"';

        // Skip the next quote.
        ++$index;
      }
      elseif (!$in_quotes && $byte === $this->delimiter) {
        $fields[] = $field;
        $field = '';
      }
      elseif (!$in_quotes && $next_byte === '') {
        $fields[] = $this
          ->trimNewline($byte, $field);
        $field = '';
      }
      else {
        $field .= $byte;
      }
    }

    // If we're still in quotes after the line is read, continue reading on the
    // next line. Check that we're not at the end of a malformed file.
    if ($in_quotes && ($line = $this
      ->readLine())) {
      $fields = $this
        ->parseLine($line, $in_quotes, $field, $fields);
    }

    // If we're not in quoted after the line is read but the field contains
    // data, we are processing a line that did not end.
    if (!$in_quotes && $field) {
      $fields[] = $field;
    }
    return $fields;
  }

  /**
   * Removes the trailing line ending.
   *
   * This does not call trim() since we only want to remove the last line
   * ending, not all line endings.
   *
   * @param string $last_character
   *   The last character.
   * @param string $field
   *   The current field.
   *
   * @return string
   *   The field with the line ending removed.
   */
  protected function trimNewline($last_character, $field) {

    // Windows line ending.
    if ($last_character === "\n" && substr($field, -1) === "\r") {
      return substr($field, 0, -1);
    }

    // Unix or Mac line ending.
    if ($last_character === "\n" || $last_character === "\r") {
      return $field;
    }

    // Line ended without a trailing newline.
    return $field . $last_character;
  }

}

Members

Namesort descending Modifiers Type Description Overrides
CsvParser::$currentLine protected property The current line.
CsvParser::$delimiter protected property The column delimiter.
CsvParser::$filePosition protected property The byte position in the file.
CsvParser::$handle protected property The file handle to the CSV file.
CsvParser::$hasHeader protected property Whether or not the first line contains a header.
CsvParser::$linesRead protected property The number of lines read.
CsvParser::$startByte protected property The position in the file to start from.
CsvParser::createFromFilePath public static function Creates a CsvParser object from a file path.
CsvParser::createFromString public static function Creates a CsvParser object from a string.
CsvParser::current public function Implements \Iterator::current().
CsvParser::getHeader public function Returns the header row.
CsvParser::key public function Implements \Iterator::key().
CsvParser::lastLinePos public function Gets the byte number where the parser left off.
CsvParser::next public function Implements \Iterator::next().
CsvParser::parseLine protected function Parses a single CSV line.
CsvParser::readLine protected function Returns a new line from the CSV file.
CsvParser::rewind public function Implements \Iterator::rewind().
CsvParser::setDelimiter public function Sets the column delimiter string.
CsvParser::setHasHeader public function Sets whether or not the CSV file contains a header.
CsvParser::setStartByte public function Sets the byte where file should be started at.
CsvParser::trimNewline protected function Removes the trailing line ending.
CsvParser::valid public function Implements \Iterator::valid().
CsvParser::__construct public function Constructs a CsvParser object.
CsvParser::__destruct public function Destructs a CsvParser object.