You are here

function node_import_read_from_file in Node import 6

Returns one record from the file starting at file offset using the supplied file options.

Parameters

$filepath: String. Path to file.

$file_offset: Integer. Starting point of record.

$file_options: Array with 'record separator', 'field separator', 'text delimiter' and 'escape character'. If not set, the options default to the CSV options ("\n", ',', '"', '"').

Return value

Array ($file_offset, $record). The $file_offset is the start offset of the next record. The $record is an array of fields (strings).

On error or when the end of the file has been reached we return FALSE.

Related topics

6 calls to node_import_read_from_file()
NodeImportAPITestCase::readFromFile in tests/node_import.test
Read all data from specified file with given file options.
node_import_add_form_submit_reload in ./node_import.admin.inc
node_import_autodetect in ./node_import.inc
Return an autodetected file format and file options for given file.
node_import_debug_report in ./node_import.admin.inc
Save a file with a debug report.
node_import_do_task in ./node_import.inc
Import a number of rows from the specified task. Should only be called from within hook_cron() or from a JS callback as this function may take a long time.

... See full list

File

./node_import.inc, line 1791
Public API of the Node import module.

Code

function node_import_read_from_file($filepath, $file_offset, $file_options) {

  // Open file and set to file offset.
  if (($fp = fopen($filepath, 'r')) === FALSE) {
    return FALSE;
  }
  if (fseek($fp, $file_offset)) {
    return FALSE;
  }

  // File options.
  _node_import_sanitize_file_options($file_options);
  $rs = $file_options['record separator'];
  $fs = $file_options['field separator'];
  $td = $file_options['text delimiter'];
  $ec = $file_options['escape character'];

  // The current record is stored in the $fields array. The $new_offset
  // contains the file position of the end of the returned record. Note
  // that if $new_offset == $file_offset we have reached the end of the
  // file.
  $fields = array();
  $new_offset = $file_offset;
  $start = 0;

  // We read $length bytes at a time in the $buffer.
  $length = variable_get('node_import:fgets:length', 1024);
  $buffer = fgets($fp, $length);

  // A field can be enclosed in text delimiters or not. If this variable is
  // TRUE, we need to parse until we find the next unescaped text delimiter.
  // If FALSE, the field value was not enclosed.
  $enclosed = FALSE;

  // Read until the EOF or until end of record.
  while (!feof($fp) || $start < strlen($buffer)) {
    if (!$enclosed) {

      // Find the next record separator, field separator and text delimiter.
      if ($rs === "\n") {
        $pos_rs = strpos($buffer, "\n", $start);
        $pos_rs = $pos_rs !== FALSE ? $pos_rs : strpos($buffer, "\r", $start);
      }
      else {
        $pos_rs = strpos($buffer, $rs, $start);
      }
      $pos_fs = strpos($buffer, $fs, $start);
      $pos_td = strlen($td) > 0 ? strpos($buffer, $td, $start) : FALSE;

      // Check for begin of text delimited field.
      if ($pos_td !== FALSE && ($pos_rs === FALSE || $pos_td <= $pos_rs) && ($pos_fs === FALSE || $pos_td <= $pos_fs)) {
        $enclosed = TRUE;
        $buffer = substr($buffer, 0, $pos_td) . substr($buffer, $pos_td + strlen($td));
        $new_offset += strlen($td);
        $start = $pos_td;
        continue;
      }

      // Check for end of record.
      if ($pos_rs !== FALSE && ($pos_fs === FALSE || $pos_rs <= $pos_fs)) {
        if ($pos_rs >= 0) {
          $fields[] = substr($buffer, 0, $pos_rs);
          $buffer = '';
          $new_offset += $pos_rs;
          $start = 0;
        }
        else {
          if (empty($fields)) {
            $buffer = substr($buffer, strlen($rs), strlen($buffer) - strlen($rs));
            $new_offset += strlen($rs);
            $start = 0;
            continue;
          }
        }
        $new_offset += strlen($rs);
        break;
      }

      // Check for end of field.
      if ($pos_fs !== FALSE) {
        $fields[] = substr($buffer, 0, $pos_fs);
        $buffer = substr($buffer, $pos_fs + strlen($fs));
        $new_offset += $pos_fs + strlen($fs);
        $start = 0;
        continue;
      }
    }
    else {

      // Find the next text delimiter and escaped text delimiter.
      $pos_td = strpos($buffer, $td, $start);
      $pos_ec = strlen($ec) > 0 ? strpos($buffer, $ec . $td, $start) : FALSE;

      // Check for end of text delimited field.
      if ($pos_td !== FALSE && ($pos_ec === FALSE || $pos_td <= $pos_ec - strlen($td))) {
        $enclosed = FALSE;
        $buffer = substr($buffer, 0, $pos_td) . substr($buffer, $pos_td + strlen($td));
        $new_offset += strlen($td);
        $start = $pos_td;
        continue;
      }

      // Check for escaped text delimiter.
      if ($pos_ec !== FALSE) {
        $buffer = substr($buffer, 0, $pos_ec) . substr($buffer, $pos_ec + strlen($ec));
        $new_offset += strlen($ec);
        $start = $pos_ec + strlen($td);
        continue;
      }
    }

    // Nothing found... read more data.
    $start = strlen($buffer);
    $buffer .= fgets($fp, $length);
  }

  // Check if we need to add the last field.
  if (feof($fp) && strlen($buffer) > 0) {
    $fields[] = $buffer;
    $new_offset += strlen($buffer);
  }

  // Remove extra white space.
  $fields = array_map('trim', $fields);

  // Check whether the whole row is empty.
  $empty_row = TRUE;
  foreach ($fields as $field) {
    if (strlen($field) > 0) {
      $empty_row = FALSE;
      break;
    }
  }
  if ($empty_row && !feof($fp) && !empty($fields)) {
    return node_import_read_from_file($filepath, $new_offset, $file_options);
  }

  // Cleanup and return.
  $result = !feof($fp) || !empty($fields) ? array(
    $new_offset,
    $fields,
  ) : FALSE;
  unset($buffer);
  fclose($fp);
  return $result;
}