function feeds_imagegrabber_scrape_images in Feeds Image Grabber 6
Same name and namespace in other branches
- 7 feeds_imagegrabber.module \feeds_imagegrabber_scrape_images()
1 call to feeds_imagegrabber_scrape_images()
- feeds_imagegrabber_feeds_set_target in ./
feeds_imagegrabber.module - Implementation of hook_feeds_set_target().
File
- ./
feeds_imagegrabber.module, line 492 - Grabs image for each feed-item from their respective web pages and stores it in an image field. Requires Feeds module.
Code
function feeds_imagegrabber_scrape_images($content, $base_url, array $options = array(), &$error_log = array()) {
// Merge the default options.
$options += array(
'expression' => "//img",
'getsize' => TRUE,
'max_imagesize' => 512000,
'timeout' => 10,
'max_redirects' => 3,
'feeling_lucky' => 0,
);
$doc = new DOMDocument();
if (@$doc
->loadXML($content) === FALSE && @$doc
->loadHTML($content) === FALSE) {
$error_log['code'] = -5;
$error_log['error'] = "unable to parse the xml//html content";
return FALSE;
}
$xpath = new DOMXPath($doc);
$hrefs = @$xpath
->evaluate($options['expression']);
if ($options['getsize']) {
timer_start(__FUNCTION__);
}
$images = array();
$imagesize = 0;
for ($i = 0; $i < $hrefs->length; $i++) {
$url = $hrefs
->item($i)
->getAttribute('src');
if (!isset($url) || empty($url) || $url == '') {
continue;
}
if (function_exists('encode_url')) {
$url = encode_url($url);
}
$url = url_to_absolute($base_url, $url);
if ($url == FALSE) {
continue;
}
if ($options['getsize']) {
if (($imagesize = feeds_imagegrabber_validate_download_size($url, $options['max_imagesize'], $options['timeout'] - timer_read(__FUNCTION__) / 1000)) != -1) {
$images[$url] = $imagesize;
if ($settings['feeling_lucky']) {
break;
}
}
if ($options['timeout'] - timer_read(__FUNCTION__) / 1000 <= 0) {
$error_log['code'] = FIG_HTTP_REQUEST_TIMEOUT;
$error_log['error'] = "timeout occured while scraping the content";
break;
}
}
else {
$images[$url] = $imagesize;
if ($settings['feeling_lucky']) {
break;
}
}
}
return $images;
}