You are here

public static function FeedImport::processXMLChunked in Feed Import 7.2

Imports and process a huge xml in chunks

Parameters

array $feed: Feed info array

Return value

array An array of objects

File

./feed_import.inc.php, line 1049
Feed import class for parsing and processing content.

Class

FeedImport
@file Feed import class for parsing and processing content.

Code

public static function processXMLChunked(array $feed) {

  // This will hold all generated entities.
  $entities = array();

  // XML head.
  $xml_head = $feed['xpath']['#settings']['xml_properties'];

  // Bytes read with fread.
  $chunk_length = $feed['xpath']['#settings']['chunk_size'];

  // Items count.
  $items_count = $feed['xpath']['#settings']['items_count'];

  // Substring function.
  if (empty($feed['xpath']['#settings']['substr_function'])) {
    $substr = 'substr';
  }
  else {
    $substr = $feed['xpath']['#settings']['substr_function'];
  }
  $current = 0;

  // Open xml url.
  try {
    $fp = fopen($feed['url'], 'rb');
  } catch (Exception $e) {
    return NULL;
  }

  // Preparing tags.
  $tag = explode('/', $feed['xpath']['#root']);
  $tag = trim(end($tag));
  $tag = array(
    'open' => '<' . $tag,
    'close' => '</' . $tag . '>',
    'length' => drupal_strlen($tag),
  );
  $tag['closelength'] = drupal_strlen($tag['close']);

  // This holds xml content.
  $content = '';

  // Read all content in chunks.
  while (!feof($fp)) {
    $content .= fread($fp, $chunk_length);

    // If there isn't content read again.
    if (!$content) {
      continue;
    }
    while (TRUE) {
      $openpos = strpos($content, $tag['open']);
      $openposclose = $openpos + $tag['length'] + 1;

      // Check for open tag.
      if ($openpos === FALSE || !isset($content[$openposclose])) {
        break;
      }
      elseif ($content[$openposclose] != ' ' && $content[$openposclose] != '>') {
        $content = $substr($content, $openposclose);
        continue;
      }
      $closepos = strpos($content, $tag['close'], $openposclose);
      if ($closepos === FALSE) {
        break;
      }

      // We have data!
      $closepos += $tag['closelength'];

      // Create xml string.
      $item = $xml_head . $substr($content, $openpos, $closepos - $openpos);

      // New content.
      $content = $substr($content, $closepos - 1);

      // Create xml object.
      try {
        $item = simplexml_load_string($item, self::$simpleXMLElement, LIBXML_NOCDATA);
      } catch (Exception $e) {
        continue;
      }

      // Parse item.
      $item = $item
        ->xpath($feed['xpath']['#root']);
      $item = reset($item);
      if (empty($item)) {
        continue;
      }

      // Create entity.
      $item = self::createEntity($feed, $item);

      // Put in entities array.
      $entities[] = $item;
      $current++;

      // Check if we have to save imported entities.
      if ($current == $items_count) {

        // Save entities.
        self::saveEntities($feed, $entities);

        // Delete imported items so far to save memory.
        $entities = array();

        // Reset counter.
        $current = 0;
      }

      // No need anymore.
      unset($item);
    }
  }

  // Close file.
  // If fp is not a resurce then catch warning.
  // Minimum chances for this to happen.
  try {
    fclose($fp);
  } catch (Exception $e) {

    // Nothing to handle here. Used for reporting error.
  }
  if (!empty($entities)) {

    // Save left entities.
    self::saveEntities($feed, $entities);
  }

  // Delete feed info.
  unset($feed);

  // Return NULL because we saved all entities.
  return NULL;
}