public static function FeedImport::processXMLChunked in Feed Import 7.2
Imports and process a huge xml in chunks
Parameters
array $feed: Feed info array
Return value
array An array of objects
File
- ./
feed_import.inc.php, line 1049 - Feed import class for parsing and processing content.
Class
- FeedImport
- @file Feed import class for parsing and processing content.
Code
public static function processXMLChunked(array $feed) {
// This will hold all generated entities.
$entities = array();
// XML head.
$xml_head = $feed['xpath']['#settings']['xml_properties'];
// Bytes read with fread.
$chunk_length = $feed['xpath']['#settings']['chunk_size'];
// Items count.
$items_count = $feed['xpath']['#settings']['items_count'];
// Substring function.
if (empty($feed['xpath']['#settings']['substr_function'])) {
$substr = 'substr';
}
else {
$substr = $feed['xpath']['#settings']['substr_function'];
}
$current = 0;
// Open xml url.
try {
$fp = fopen($feed['url'], 'rb');
} catch (Exception $e) {
return NULL;
}
// Preparing tags.
$tag = explode('/', $feed['xpath']['#root']);
$tag = trim(end($tag));
$tag = array(
'open' => '<' . $tag,
'close' => '</' . $tag . '>',
'length' => drupal_strlen($tag),
);
$tag['closelength'] = drupal_strlen($tag['close']);
// This holds xml content.
$content = '';
// Read all content in chunks.
while (!feof($fp)) {
$content .= fread($fp, $chunk_length);
// If there isn't content read again.
if (!$content) {
continue;
}
while (TRUE) {
$openpos = strpos($content, $tag['open']);
$openposclose = $openpos + $tag['length'] + 1;
// Check for open tag.
if ($openpos === FALSE || !isset($content[$openposclose])) {
break;
}
elseif ($content[$openposclose] != ' ' && $content[$openposclose] != '>') {
$content = $substr($content, $openposclose);
continue;
}
$closepos = strpos($content, $tag['close'], $openposclose);
if ($closepos === FALSE) {
break;
}
// We have data!
$closepos += $tag['closelength'];
// Create xml string.
$item = $xml_head . $substr($content, $openpos, $closepos - $openpos);
// New content.
$content = $substr($content, $closepos - 1);
// Create xml object.
try {
$item = simplexml_load_string($item, self::$simpleXMLElement, LIBXML_NOCDATA);
} catch (Exception $e) {
continue;
}
// Parse item.
$item = $item
->xpath($feed['xpath']['#root']);
$item = reset($item);
if (empty($item)) {
continue;
}
// Create entity.
$item = self::createEntity($feed, $item);
// Put in entities array.
$entities[] = $item;
$current++;
// Check if we have to save imported entities.
if ($current == $items_count) {
// Save entities.
self::saveEntities($feed, $entities);
// Delete imported items so far to save memory.
$entities = array();
// Reset counter.
$current = 0;
}
// No need anymore.
unset($item);
}
}
// Close file.
// If fp is not a resurce then catch warning.
// Minimum chances for this to happen.
try {
fclose($fp);
} catch (Exception $e) {
// Nothing to handle here. Used for reporting error.
}
if (!empty($entities)) {
// Save left entities.
self::saveEntities($feed, $entities);
}
// Delete feed info.
unset($feed);
// Return NULL because we saved all entities.
return NULL;
}