View source
<?php
define('FIG_HTTP_REQUEST_TIMEOUT', 1);
function feeds_imagegrabber_menu() {
$items = array();
$items['admin/settings/feeds_imagegrabber'] = array(
'title' => 'Feeds Image Grabber',
'page callback' => 'drupal_get_form',
'page arguments' => array(
'feeds_imagegrabber_admin',
),
'access arguments' => array(
'administer site configuration',
),
'description' => 'Configure default options for Feeds Image Grabber',
);
return $items;
}
function feeds_imagegrabber_form_alter(&$form, $form_state, $form_id) {
if ($form['#id'] == 'node-form') {
if ($importer = feeds_get_importer_id($form['type']['#value'])) {
if (isset($form_state['values']['feeds_imagegrabber'])) {
$settings = $form_state['values']['feeds_imagegrabber'];
}
elseif (!isset($form['#node']->nid) || ($settings = feeds_imagegrabber_get_settings($form['#node']->nid)) === FALSE) {
$settings = feeds_imagegrabber_get_default_settings();
}
$form['feeds_imagegrabber'] = array(
'#type' => 'fieldset',
'#title' => t('Feeds Image Grabber'),
'#tree' => TRUE,
'#collapsible' => TRUE,
'#collapsed' => TRUE,
);
feeds_imagegrabber_form($form, $settings);
$form['#validate'][] = 'feeds_imagegrabber_form_validate';
}
}
}
function feeds_imagegrabber_form_validate($form, &$form_state) {
if ($form_state['values']['feeds_imagegrabber']['enabled'] == 1) {
$id_class = $form_state['values']['feeds_imagegrabber']['id_class'];
$id_class_desc = $form_state['values']['feeds_imagegrabber']['id_class_desc'];
if ($id_class) {
if (!isset($id_class_desc) || empty($id_class_desc) || $id_class_desc == '') {
form_set_error('feeds_imagegrabber][id_class_desc', "Specify the id/class of the desired tag.");
}
if ($id_class == 1 && !preg_match('/^[a-zA-Z]+[_a-zA-Z0-9-]*$/', $id_class_desc)) {
form_set_error('feeds_imagegrabber][id_class_desc', "Only alphabets, digits, hyphens and underscores are allowed in HTML id");
}
else {
if ($id_class == 2 && !preg_match('/^[a-zA-Z]+[_a-zA-Z0-9- ]*$/', $id_class_desc)) {
form_set_error('feeds_imagegrabber][id_class_desc', "Only alphabets, digits, hyphens, spaces and underscores are allowed in HTML class");
}
}
}
else {
form_set_value($form['feeds_imagegrabber']['id_class_desc'], '', $form_state);
}
$temp = $form_state['values']['feeds_imagegrabber']['exec_time'];
if (!is_numeric($temp) || $temp < 10 || $temp != round($temp) || $temp > 75) {
form_set_error('feeds_imagegrabber][exec_time', t('Select the correct option for FIG execution time.'));
}
}
}
function feeds_imagegrabber_nodeapi(&$node, $op, $form) {
switch ($op) {
case 'insert':
case 'update':
if ($importer_id = feeds_get_importer_id($node->type)) {
if ($node->feeds_imagegrabber['enabled']) {
$settings = array(
'feed_nid' => $node->nid,
'enabled' => $node->feeds_imagegrabber['enabled'],
'id_class' => $node->feeds_imagegrabber['id_class'],
'id_class_desc' => $node->feeds_imagegrabber['id_class_desc'],
'exec_time' => $node->feeds_imagegrabber['exec_time'],
'feeling_lucky' => $node->feeds_imagegrabber['feeling_lucky'],
);
drupal_write_record('feeds_imagegrabber', $settings, array(
'feed_nid',
));
if (!db_affected_rows()) {
drupal_write_record('feeds_imagegrabber', $settings);
}
}
else {
db_query("UPDATE {feeds_imagegrabber} SET enabled = %d WHERE feed_nid = %d", 0, $node->nid);
}
}
break;
case 'delete':
@db_query("DELETE FROM {feeds_imagegrabber} where feed_nid = %d", $node->nid);
break;
}
}
function feeds_imagegrabber_feeds_node_processor_targets_alter($targets, $content_type) {
$info = content_types($content_type);
$fields = array();
if (isset($info['fields']) && count($info['fields'])) {
foreach ($info['fields'] as $field_name => $field) {
if (isset($field['widget']['type']) && $field['widget']['type'] == 'imagefield_widget') {
$name = isset($field['widget']['label']) ? $field['widget']['label'] : $field_name;
$targets[$field_name . ':fig'] = array(
'name' => $name . ' (FIG)',
'callback' => 'feeds_imagegrabber_feeds_set_target',
'description' => t('The Item URL for the CCK @name imagefield of the node.', array(
'@name' => $name,
)),
);
}
}
}
}
function feeds_imagegrabber_feeds_set_target($node, $target, $page_url) {
$feed_nid = $node->feeds_node_item->feed_nid;
$settings = feeds_imagegrabber_get_settings($feed_nid);
if (!$settings || !$settings['enabled']) {
return;
}
if (!feeds_imagegrabber_include_library('url_to_absolute.php', 'feeds_imagegrabber')) {
drupal_set_message(t('Feeds Image Grabber: The URL conversion script is missing. Go to <a href="!admin-reports-status">Status Report page</a>', array(
'!admin-reports-status' => url('admin/reports/status'),
)), 'error');
return FALSE;
}
list($field_name) = split(':', $target);
$field = content_fields($field_name, $node->type);
$max_filesize = parse_size(file_upload_max_size());
if (!empty($field['widget']['max_filesize_per_file']) && parse_size($field['widget']['max_filesize_per_file']) < $max_filesize) {
$max_filesize = parse_size($field['widget']['max_filesize_per_file']);
}
$max_exec_time = ini_get('max_execution_time');
$timeout = $max_exec_time == 0 ? 10 : $settings['exec_time'] * $max_exec_time / 100;
$page_time = timer_read('page') / 1000;
if (function_exists('encode_url')) {
$page_url = encode_url($page_url);
}
if (valid_url($page_url)) {
$xml = feeds_imagegrabber_webpage_scraper($page_url, $settings['id_class'], $settings['id_class_desc'], $timeout);
if ($xml == FALSE) {
return;
}
$timeout = $timeout - timer_read('page') / 1000 + $page_time;
$options = array(
'max_imagesize' => $max_filesize,
'timeout' => $timeout,
'feeling_lucky' => $settings['feeling_lucky'],
);
$images = feeds_imagegrabber_scrape_images($xml, $page_url, $options);
if ($images == FALSE || count($images) <= 0) {
return;
}
asort($images);
$images = array_reverse($images, TRUE);
$items = isset($node->{$field_name}) ? $node->{$field_name} : array();
foreach ($images as $url => $size) {
$enclosure = new FeedsEnclosure($url, 'application/octet-stream');
if (($file = $enclosure
->getFile()) && ($file = feeds_imagegrabber_is_image($file))) {
$target_dir = filefield_widget_file_path($field, user_load($node->uid));
$file_validators = filefield_widget_upload_validators($field);
$image_validators = imagefield_widget_upload_validators($field);
$validators = array_merge($file_validators, $image_validators);
if (array_key_exists('filefield_validate_is_image', $validators)) {
unset($validators['filefield_validate_is_image']);
}
$info = field_file_save_file($file, $validators, $target_dir, user_load($node->uid));
if ($info) {
$info['list'] = array();
$info['data'] = array(
'description' => '',
);
if ($field['list_field']) {
$info['list'] = $field['list_default'];
}
$items[] = $info;
break;
}
}
}
$node->{$field_name} = $items;
}
}
function feeds_imagegrabber_get_settings($feed_nid) {
$settings = db_fetch_array(db_query("SELECT enabled, id_class, id_class_desc, feeling_lucky, exec_time FROM {feeds_imagegrabber} WHERE feed_nid = %d", $feed_nid));
return $settings;
}
function feeds_imagegrabber_get_default_settings() {
$default = array(
'enabled' => 0,
'id_class' => 0,
'id_class_desc' => '',
'exec_time' => 10,
'feeling_lucky' => 0,
);
return variable_get('feeds_imagegrabber', $default);
}
function feeds_imagegrabber_admin(&$form_state) {
$form = array();
$settings = feeds_imagegrabber_get_default_settings();
$form['feeds_imagegrabber'] = array(
'#type' => 'fieldset',
'#title' => t('Default Settings'),
'#tree' => TRUE,
'#collapsible' => FALSE,
);
feeds_imagegrabber_form($form, $settings);
$form['#validate'][] = 'feeds_imagegrabber_form_validate';
return system_settings_form($form);
}
function feeds_imagegrabber_form(&$form, $default_settings) {
$form['feeds_imagegrabber']['enabled'] = array(
'#type' => 'checkbox',
'#title' => t('Enable Feeds Image Grabber'),
'#description' => t('Check if you want to download images of the feed items for this feed.'),
'#default_value' => $default_settings['enabled'],
);
$form['feeds_imagegrabber']['id_class'] = array(
'#type' => 'radios',
'#title' => t('Search for an image between the tag which is identified by'),
'#options' => array(
t('None, search the whole web-page for the image.'),
t('an Id'),
t('a Class'),
),
'#default_value' => $default_settings['id_class'],
);
$form['feeds_imagegrabber']['id_class_desc'] = array(
'#type' => 'textfield',
'#title' => t('<i>Id</i> or <i>Class</i> of the HTML tag (Leave empty if you selected <i>None</i> above.)'),
'#default_value' => $default_settings['id_class_desc'],
'#description' => t('Separate multiple classes with spaces (as present in the HTML)'),
'#maxlength' => 100,
);
$form['feeds_imagegrabber']['feeling_lucky'] = array(
'#type' => 'radios',
'#title' => t('Feeling lucky, huh?'),
'#options' => array(
t('No, select the largest image between the tag.'),
t('Yes, select the first image between the tag. (Recommended)'),
),
'#default_value' => $default_settings['feeling_lucky'],
);
$form['feeds_imagegrabber']['exec_time'] = array(
'#type' => 'select',
'#title' => t('Execution time[%]'),
'#options' => drupal_map_assoc(array(
10,
20,
30,
50,
75,
)),
'#default_value' => $default_settings['exec_time'],
'#description' => t('Select the percentage of maximum PHP execution time to take while grabbing image for a feed item.'),
);
}
function feeds_imagegrabber_validate_download_size($file_url, $max_size, $timeout = 10, $max_redirects = 3) {
$options = array(
'headers' => array(),
'method' => 'HEAD',
'data' => NULL,
'max_redirects' => $max_redirects,
'timeout' => $timeout,
);
$result = feeds_imagegrabber_http_request($file_url, $options);
if ($result->code == 200 && isset($result->headers) && is_array($result->headers)) {
$headers = array_change_key_case($result->headers);
if (isset($headers['content-length']) && $headers['content-length'] <= $max_size) {
return $headers['content-length'];
}
}
return -1;
}
function feeds_imagegrabber_webpage_scraper(&$page_url, $itype, $ivalue = '', $timeout = 15, $max_redirects = 3, &$error_log = array()) {
$options = array(
'headers' => array(),
'method' => 'GET',
'data' => NULL,
'max_redirects' => $max_redirects,
'timeout' => $timeout,
);
$result = feeds_imagegrabber_http_request($page_url, $options);
if (in_array($result->redirect_code, array(
301,
302,
307,
))) {
$page_url = $result->redirect_url;
}
if ($result->code != 200) {
$error_log['code'] = $result->code;
$error_log['error'] = "unable to retrieve content from web page";
return FALSE;
}
if (empty($result->data) || drupal_strlen($result->data) <= 0) {
$error_log['code'] = -1;
$error_log['error'] = "no data available on url";
return FALSE;
}
$doc = new DOMDocument();
if (@$doc
->loadHTML($result->data) === FALSE) {
$error_log['code'] = -2;
$error_log['error'] = "unable to parse the html content";
return FALSE;
}
if ($itype == 0) {
$items = @$doc
->getElementsByTagName("body");
if ($items != NULL && $items->length > 0) {
$dist = $items
->item(0);
}
else {
$dist = NULL;
}
}
elseif ($itype == 1) {
$dist = @$doc
->getElementById($ivalue);
}
elseif ($itype == 2) {
$xpath = new DOMXPath($doc);
$ivalue = preg_replace('/\\s\\s+/', ' ', trim($ivalue));
$items = $xpath
->query("//*[@class and contains(concat(' ',normalize-space(@class),' '), ' {$ivalue} ')]");
if ($items != NULL && $items->length > 0) {
$dist = $items
->item(0);
}
else {
$dist = NULL;
}
}
else {
$dist = NULL;
}
if ($dist == NULL) {
$error_log['code'] = -3;
$error_log['error'] = "tag not found";
return FALSE;
}
$content = '';
if (($content = @$dist->ownerDocument
->saveXML($dist)) === FALSE) {
$error_log['code'] = -4;
$error_log['error'] = "error converting content to XML";
return FALSE;
}
return $content;
}
function feeds_imagegrabber_scrape_images($content, $base_url, array $options = array(), &$error_log = array()) {
$options += array(
'expression' => "//img",
'getsize' => TRUE,
'max_imagesize' => 512000,
'timeout' => 10,
'max_redirects' => 3,
'feeling_lucky' => 0,
);
$doc = new DOMDocument();
if (@$doc
->loadXML($content) === FALSE && @$doc
->loadHTML($content) === FALSE) {
$error_log['code'] = -5;
$error_log['error'] = "unable to parse the xml//html content";
return FALSE;
}
$xpath = new DOMXPath($doc);
$hrefs = @$xpath
->evaluate($options['expression']);
if ($options['getsize']) {
timer_start(__FUNCTION__);
}
$images = array();
$imagesize = 0;
for ($i = 0; $i < $hrefs->length; $i++) {
$url = $hrefs
->item($i)
->getAttribute('src');
if (!isset($url) || empty($url) || $url == '') {
continue;
}
if (function_exists('encode_url')) {
$url = encode_url($url);
}
$url = url_to_absolute($base_url, $url);
if ($url == FALSE) {
continue;
}
if ($options['getsize']) {
if (($imagesize = feeds_imagegrabber_validate_download_size($url, $options['max_imagesize'], $options['timeout'] - timer_read(__FUNCTION__) / 1000)) != -1) {
$images[$url] = $imagesize;
if ($settings['feeling_lucky']) {
break;
}
}
if ($options['timeout'] - timer_read(__FUNCTION__) / 1000 <= 0) {
$error_log['code'] = FIG_HTTP_REQUEST_TIMEOUT;
$error_log['error'] = "timeout occured while scraping the content";
break;
}
}
else {
$images[$url] = $imagesize;
if ($settings['feeling_lucky']) {
break;
}
}
}
return $images;
}
function feeds_imagegrabber_is_image($filepath) {
$extensions = 'jpeg jpg png gif';
if ($filepath) {
$info = feeds_imagegrabber_get_image_info($filepath);
if ($info && !empty($info['extension'])) {
if (!count(feeds_imagegrabber_validate_extensions($filepath, $extensions))) {
return $filepath;
}
else {
$basename = basename($filepath);
$directory = dirname($filepath);
if ($pos = strrpos($basename, '.')) {
$name = substr($basename, 0, $pos);
$ext = substr($basename, $pos);
$regex = '/\\.(' . preg_replace('/ +/', '|', preg_quote($extensions)) . ')/i';
if (preg_match($regex, $ext, $matches)) {
$ext = $matches[1];
}
else {
$ext = $info['extension'];
}
$basename = $name . '.' . $ext;
}
else {
$basename .= '.' . $info['extension'];
}
if ($basename == basename($filepath)) {
return $filepath;
}
$dest = $directory . '/' . $basename;
if (rename($filepath, $dest)) {
return $dest;
}
}
}
}
return FALSE;
}
function feeds_imagegrabber_validate_extensions($filename, $extensions) {
$errors = array();
$regex = '/\\.(' . preg_replace('/ +/', '|', preg_quote($extensions)) . ')$/i';
if (!preg_match($regex, $filename)) {
$errors[] = t('Only files with the following extensions are allowed: %files-allowed.', array(
'%files-allowed' => $extensions,
));
}
return $errors;
}
function feeds_imagegrabber_get_image_info($file) {
if (!is_file($file)) {
return FALSE;
}
$details = FALSE;
$data = @getimagesize($file);
if (isset($data) && is_array($data)) {
$extensions = array(
'1' => 'gif',
'2' => 'jpg',
'3' => 'png',
);
$extension = array_key_exists($data[2], $extensions) ? $extensions[$data[2]] : '';
$details = array(
'width' => $data[0],
'height' => $data[1],
'extension' => $extension,
'mime_type' => $data['mime'],
);
}
return $details;
}
function feeds_imagegrabber_http_request($url, array $options = array()) {
global $db_prefix;
$result = new stdClass();
$uri = @parse_url($url);
if ($uri == FALSE) {
$result->error = 'unable to parse URL';
$result->code = -1001;
return $result;
}
if (!isset($uri['scheme'])) {
$result->error = 'missing schema';
$result->code = -1002;
return $result;
}
timer_start(__FUNCTION__);
$options += array(
'headers' => array(),
'method' => 'GET',
'data' => NULL,
'max_redirects' => 3,
'timeout' => 30,
);
switch ($uri['scheme']) {
case 'http':
$port = isset($uri['port']) ? $uri['port'] : 80;
$host = $uri['host'] . ($port != 80 ? ':' . $port : '');
$fp = @fsockopen($uri['host'], $port, $errno, $errstr, $options['timeout']);
break;
case 'https':
$port = isset($uri['port']) ? $uri['port'] : 443;
$host = $uri['host'] . ($port != 443 ? ':' . $port : '');
$fp = @fsockopen('ssl://' . $uri['host'], $port, $errno, $errstr, $options['timeout']);
break;
default:
$result->error = 'invalid schema ' . $uri['scheme'];
$result->code = -1003;
return $result;
}
if (!$fp) {
$result->code = -$errno;
$result->error = trim($errstr);
variable_set('drupal_http_request_fails', TRUE);
return $result;
}
$path = isset($uri['path']) ? $uri['path'] : '/';
if (isset($uri['query'])) {
$path .= '?' . $uri['query'];
}
$options['headers'] += array(
'User-Agent' => 'Drupal (+http://drupal.org/)',
);
$options['headers']['Host'] = $host;
$content_length = strlen($options['data']);
if ($content_length > 0 || $options['method'] == 'POST' || $options['method'] == 'PUT') {
$options['headers']['Content-Length'] = $content_length;
}
if (isset($uri['user'])) {
$options['headers']['Authorization'] = 'Basic ' . base64_encode($uri['user'] . (!empty($uri['pass']) ? ":" . $uri['pass'] : ''));
}
if (is_string($db_prefix) && preg_match("/simpletest\\d+/", $db_prefix, $matches)) {
$options['headers']['User-Agent'] = drupal_generate_test_ua($matches[0]);
}
$request = $options['method'] . ' ' . $path . " HTTP/1.0\r\n";
foreach ($options['headers'] as $name => $value) {
$request .= $name . ': ' . trim($value) . "\r\n";
}
$request .= "\r\n" . $options['data'];
$result->request = $request;
fwrite($fp, $request);
$response = '';
while (!feof($fp)) {
$timeout = $options['timeout'] - timer_read(__FUNCTION__) / 1000;
if ($timeout <= 0) {
$result->code = FIG_HTTP_REQUEST_TIMEOUT;
$result->error = 'request timed out';
return $result;
}
stream_set_timeout($fp, floor($timeout), floor(1000000 * fmod($timeout, 1)));
$response .= fread($fp, 1024);
}
fclose($fp);
list($response, $result->data) = explode("\r\n\r\n", $response, 2);
$response = preg_split("/\r\n|\n|\r/", $response);
list($protocol, $code, $status_message) = explode(' ', trim(array_shift($response)), 3);
$result->protocol = $protocol;
$result->status_message = $status_message;
$result->headers = array();
while ($line = trim(array_shift($response))) {
list($header, $value) = explode(':', $line, 2);
if (isset($result->headers[$header]) && $header == 'Set-Cookie') {
$result->headers[$header] .= ',' . trim($value);
}
else {
$result->headers[$header] = trim($value);
}
}
$responses = array(
100 => 'Continue',
101 => 'Switching Protocols',
200 => 'OK',
201 => 'Created',
202 => 'Accepted',
203 => 'Non-Authoritative Information',
204 => 'No Content',
205 => 'Reset Content',
206 => 'Partial Content',
300 => 'Multiple Choices',
301 => 'Moved Permanently',
302 => 'Found',
303 => 'See Other',
304 => 'Not Modified',
305 => 'Use Proxy',
307 => 'Temporary Redirect',
400 => 'Bad Request',
401 => 'Unauthorized',
402 => 'Payment Required',
403 => 'Forbidden',
404 => 'Not Found',
405 => 'Method Not Allowed',
406 => 'Not Acceptable',
407 => 'Proxy Authentication Required',
408 => 'Request Time-out',
409 => 'Conflict',
410 => 'Gone',
411 => 'Length Required',
412 => 'Precondition Failed',
413 => 'Request Entity Too Large',
414 => 'Request-URI Too Large',
415 => 'Unsupported Media Type',
416 => 'Requested range not satisfiable',
417 => 'Expectation Failed',
500 => 'Internal Server Error',
501 => 'Not Implemented',
502 => 'Bad Gateway',
503 => 'Service Unavailable',
504 => 'Gateway Time-out',
505 => 'HTTP Version not supported',
);
if (!isset($responses[$code])) {
$code = floor($code / 100) * 100;
}
$result->code = $code;
switch ($code) {
case 200:
case 304:
break;
case 301:
case 302:
case 307:
$location = $result->headers['Location'];
$options['timeout'] -= timer_read(__FUNCTION__) / 1000;
if ($options['timeout'] <= 0) {
$result->code = FIG_HTTP_REQUEST_TIMEOUT;
$result->error = 'request timed out';
}
elseif ($options['max_redirects']) {
$options['max_redirects']--;
$result = feeds_imagegrabber_http_request($location, $options);
$result->redirect_code = $code;
}
$result->redirect_url = $location;
break;
default:
$result->error = $status_message;
}
return $result;
}
function feeds_imagegrabber_include_library($file, $library) {
if (module_exists('libraries') && file_exists(libraries_get_path($library) . "/{$file}")) {
require_once libraries_get_path($library) . "/{$file}";
return TRUE;
}
else {
$paths = array(
drupal_get_path('module', 'feeds_imagegrabber'),
drupal_get_path('module', 'feeds_imagegrabber') . "/libraries",
'sites/all/libraries',
'sites/all/libraries/feeds_imagegrabber',
'sites/all/libraries/absoluteurl',
'sites/all/libraries/AbsoluteUrl',
);
foreach ($paths as $library_path) {
$path = $library_path . "/{$file}";
if (file_exists($path)) {
require_once $path;
return TRUE;
}
}
}
return FALSE;
}