public function FeedsExcelParser::parse in Feeds XLS 7
Parse content fetched by fetcher.
FIXME - Start and the pointer are not working properly due to the headers being set and the pointer being set to "2". This needs fixing so that it works nicely with batch.
File
- ./
FeedsExcelParser.inc, line 47
Class
- FeedsExcelParser
- Parses a given file as a Excel file. This is heavily inspired by Feeds very own CSV parser.
Code
public function parse(FeedsSource $source, FeedsFetcherResult $fetcher_result) {
// Increase the memory limit
ini_set('memory_limit', '1500M');
ini_set('max_execution_time', 240);
// Get config and current state
$source_config = $source
->getConfigFor($this);
$this->state = $source
->state(FEEDS_PARSE);
// Set the total rows to process based on the configuration.
$this->state->total = $source_config['max_rows'];
// Get the filepath of the submitted file.
$import_file_path = drupal_realpath($fetcher_result
->getFilePath());
// Load the PHPExcel shenanigans
// Note, we're caching this in a file as PDO/MySQL complain when it is
// included in the $this->state object (dodgy characters).
if (!isset($this->state->reader_object_cache_path) || !file_exists($this->state->reader_object_cache_path)) {
// We use drupal_realpath here as it appears that the ZipArchive class is
// not able to handle our use of public://... Stream URIs. This will need
// to be fixed before Drupal 8.
$file_type = PHPExcel_IOFactory::identify($import_file_path);
$this->reader = PHPExcel_IOFactory::createReader($file_type);
// No need to check for the presence of the chunkReadFilter function, if
// it doesn't exist, we're screwed anyway.
if ($source_config['use_chunk_reader']) {
if (method_exists($this->reader, 'setReadFilter')) {
$this->chunk_filter = new chunkReadFilter();
}
$this->reader
->setReadFilter($this->chunk_filter);
if (method_exists($this->reader, 'setReadDataOnly')) {
$this->reader
->setReadDataOnly(true);
}
}
$tempnam = drupal_tempnam('temporary://', 'feeds_xls_data_');
@chmod($tempnam, 0660);
file_put_contents($tempnam, serialize($this));
$this->state->reader_object_cache_path = $tempnam;
// Set pointer to 0
$this->state->pointer = 0;
}
else {
$previous_this = unserialize(file_get_contents($this->state->reader_object_cache_path));
$this->reader = $previous_this->reader;
if ($source_config['use_chunk_reader']) {
$this->chunk_filter = $previous_this->chunk_filter;
}
}
// P A R S E !
$rows = array();
if ($source_config['use_chunk_reader']) {
$this->chunk_filter
->setRows($this->state->pointer, isset($source_config['chunk_size']) ? $source_config['chunk_size'] : variable_get('feeds_process_limit', FEEDS_PROCESS_LIMIT));
}
// Excel object for doing the do.
$excel_obj = $this->reader
->load($import_file_path);
$excel_obj
->setActiveSheetIndex();
$row_in_file = 0;
$num_rows_pulled = 0;
foreach ($excel_obj
->getActiveSheet()
->getRowIterator() as $row) {
$row_in_file++;
if ($row_in_file > $this->state->pointer || $source_config['use_chunk_reader']) {
$num_rows_pulled++;
$cellIterator = $row
->getCellIterator();
$cellIterator
->setIterateOnlyExistingCells(false);
// Loop all cells, even if it is not set
$column = 0;
$row_values = array();
foreach ($cellIterator as $cell) {
if (!is_null($cell)) {
if (!isset($this->state->column_names)) {
$row_values[] = trim($cell
->getCalculatedValue());
}
else {
if (strlen(trim('' . $cell
->getCalculatedValue())) || count($this->state->column_names) && strtoupper($this->state->column_names[$column]) == 'GUID') {
$row_values[count($this->state->column_names) ? $this->state->column_names[$column] : $column] = '' . $cell
->getCalculatedValue();
}
}
}
$column++;
}
// A special case. We need to ensure that the GUID column has a value
// defined, even if that value is the empty string.
if (count($row_values) && $column < count($this->state->column_names) && !isset($row_values['GUID']) && array_search('GUID', $this->state->column_names)) {
$row_values['GUID'] = '';
}
if (!isset($this->state->column_names)) {
// Loop through the headers we have been provided with, and convert them
// to the exact case/capitalisation as the mappings that the importer
// was set up with.
// Note, I am not sure if Feeds allows two mappings with the same name
// that differ only by case, but if it does, it's very silly and will
// cause issues here.
foreach ($row_values as $key => $value) {
foreach ($source->importer->processor->config['mappings'] as $new_key => $mapping) {
if (strtolower($value) == strtolower($mapping['source'])) {
$row_values[$key] = $mapping['source'];
break;
}
}
}
$this->state->column_names = $row_values;
}
else {
if ($this
->isRowNotEmpty($row_values)) {
$rows[] = $row_values;
}
}
if (!$source_config['use_chunk_reader'] && $num_rows_pulled >= (isset($source_config['chunk_size']) ? $source_config['chunk_size'] : variable_get('feeds_process_limit', FEEDS_PROCESS_LIMIT))) {
break;
}
}
}
// Set the total here, most likely we've finished!
if (!count($rows)) {
$this->state->total = $this->state->pointer - 1;
@drupal_unlink($this->state->reader_object_cache_path);
}
// Try to regain some memory!
$excel_obj
->disconnectWorksheets();
unset($excel_obj);
if (function_exists('gc_collect_cycles')) {
gc_collect_cycles();
}
// Report progress.
$this->state->pointer += count($rows);
$this->state
->progress($this->state->total, $this->state->pointer);
// FIXME - does this need $source->feed_nid ??
return new FeedsParserResult($rows, $source->feed_nid);
}