You are here

public static function FeedImport::processHTMLPage in Feed Import 7.2

Imports and process a HTML page

Parameters

array $feed: Feed info array

Return value

array An array of objects

File

./feed_import.inc.php, line 1197
Feed import class for parsing and processing content.

Class

FeedImport
@file Feed import class for parsing and processing content.

Code

public static function processHTMLPage(array $feed) {

  // Create DOM Document.
  $xml = new DOMDocument();
  $xml->strictErrorChecking = FALSE;
  $xml->preserveWhiteSpace = FALSE;
  $xml->recover = TRUE;

  // Load HTML file from url.
  try {
    if ($feed['xpath']['#settings']['report_html_errors']) {
      $xml
        ->loadHTMLFile($feed['url']);
    }
    else {
      @$xml
        ->loadHTMLFile($feed['url']);
    }
  } catch (Exception $e) {

    // This try-catch is just to parse the HTML file. Nothing to handle.
  }

  // Normalize document.
  $xml
    ->normalizeDocument();

  // Try to convert to xml.
  try {
    $xml = simplexml_import_dom($xml, self::$simpleXMLElement);
  } catch (Exception $e) {
    return NULL;
  }

  // If there is no SimpleXMLElement object.
  if (!$xml instanceof self::$simpleXMLElement) {
    return NULL;
  }

  // Get items from root.
  $xml = $xml
    ->xpath($feed['xpath']['#root']);

  // Get total number of items.
  $count_items = count($xml);

  // Check if there are items.
  if (!$count_items) {
    return NULL;
  }

  // Check feed items.
  foreach ($xml as &$item) {

    // Set this item value to entity, so all entities will be in $xml at end!
    $item = self::createEntity($feed, $item);
  }
  unset($feed);

  // Return created entities.
  return $xml;
}