You are here

function feeds_imagegrabber_scrape_images in Feeds Image Grabber 7

Same name and namespace in other branches
  1. 6 feeds_imagegrabber.module \feeds_imagegrabber_scrape_images()

Scrape images from HTML/XML content.

1 call to feeds_imagegrabber_scrape_images()
feeds_imagegrabber_feeds_set_target in ./feeds_imagegrabber.module
Callback for mapping. Here is where the actual mapping happens.

File

./feeds_imagegrabber.module, line 506
Grabs images for items imported using the feeds module.

Code

function feeds_imagegrabber_scrape_images($content, $base_url, $settings, array $options = array(), &$error_log = array()) {

  // Merge the default options.
  $options += array(
    'expression' => "//img",
    'getsize' => TRUE,
    'max_imagesize' => 512000,
    'timeout' => 10,
    'max_redirects' => 3,
    'cardinality' => 1,
  );
  $doc = new DOMDocument();
  if (@$doc
    ->loadXML($content) === FALSE && @$doc
    ->loadHTML($content) === FALSE) {
    $error_log['code'] = -5;
    $error_log['error'] = "unable to parse the xml//html content";
    return FALSE;
  }
  $xpath = new DOMXPath($doc);
  $hrefs = @$xpath
    ->evaluate($options['expression']);
  if ($options['getsize']) {
    timer_start(__FUNCTION__);
  }
  $images = array();
  $imagesize = 0;
  for ($i = 0; $i < $hrefs->length; $i++) {
    $url = $hrefs
      ->item($i)
      ->getAttribute('src');
    if (!isset($url) || empty($url) || $url == '') {
      continue;
    }
    if (function_exists('encode_url')) {
      $url = encode_url($url);
    }
    if (function_exists('url_to_absolute')) {
      $url = url_to_absolute($base_url, $url);
    }
    if ($url == FALSE) {
      continue;
    }
    if ($options['getsize']) {
      if (($imagesize = feeds_imagegrabber_validate_download_size($url, $options['max_imagesize'], $options['timeout'] - timer_read(__FUNCTION__) / 1000)) != -1) {
        $images[$url] = $imagesize;
        if ($settings['feeling_lucky'] && count($images) == $options['cardinality']) {
          break;
        }
      }
      if ($options['timeout'] - timer_read(__FUNCTION__) / 1000 <= 0) {
        $error_log['code'] = HTTP_REQUEST_TIMEOUT;
        $error_log['error'] = "timeout occured while scraping the content";
        break;
      }
    }
    else {
      $images[$url] = $imagesize;
      if ($settings['feeling_lucky'] && count($images) == $options['cardinality']) {
        break;
      }
    }
  }
  return $images;
}