View source
<?php
$plugin = array(
'form' => 'feeds_tamper_absolute_url_form',
'callback' => 'feeds_tamper_absolute_url_callback',
'name' => 'Make URLs absolute',
'category' => 'HTML',
);
function feeds_tamper_absolute_url_form($importer, $element_key, $settings) {
$form = array();
$links = htmlentities('<a href="/stuff/things"> to <a href="http://example.com/stuff/things">');
$form['info'] = array(
'#value' => t('Make URLs in markup absolute. (i.e. !links).', array(
'!links' => $links,
)),
);
return $form;
}
function feeds_tamper_absolute_url_callback($source, $item_key, $element_key, &$field, $settings) {
static $dom;
$b = parse_url($source->batch->link);
if (!isset($dom)) {
$dom = new DOMDocument();
}
$field = (string) $field;
if (!$field) {
return;
}
$errors = libxml_use_internal_errors(TRUE);
$entity_loader = libxml_disable_entity_loader(TRUE);
$dom
->loadHTML($field);
libxml_clear_errors();
libxml_use_internal_errors($errors);
libxml_disable_entity_loader($entity_loader);
$urls = array();
$tags = array(
'a' => 'href',
'img' => 'src',
'iframe' => 'src',
'script' => 'src',
'object' => 'codebase',
'link' => 'href',
'applet' => 'code',
'base' => 'href',
);
foreach ($tags as $tag => $attr) {
foreach ($dom
->getElementsByTagName($tag) as $t) {
$value = trim($t
->getAttribute($attr));
_feeds_tamper_absolute_url($value, $urls, $b);
}
}
$field = strtr($field, $urls);
}
function _feeds_tamper_absolute_url($r_url, &$urls, $b) {
if (!strlen($r_url)) {
return;
}
$r = parse_url($r_url);
if ($r === FALSE) {
return;
}
if (!empty($r['scheme']) || !empty($r['host'])) {
return;
}
$r['scheme'] = $b['scheme'];
unset($r['port']);
unset($r['user']);
unset($r['pass']);
$r['host'] = $b['host'];
if (isset($b['port'])) {
$r['port'] = $b['port'];
}
if (isset($b['user'])) {
$r['user'] = $b['user'];
}
if (isset($b['pass'])) {
$r['pass'] = $b['pass'];
}
if (empty($r['path'])) {
if (!empty($b['path'])) {
$r['path'] = $b['path'];
}
if (!isset($r['query']) && isset($b['query'])) {
$r['query'] = $b['query'];
}
$urls[$r_url] = _feeds_tamper_join_url($r);
return;
}
if (strpos($r['path'], '/') !== 0) {
if (empty($b['path'])) {
$b['path'] = '';
}
$r['path'] = $b['path'] . '/' . $r['path'];
}
$urls[$r_url] = _feeds_tamper_join_url($r);
}
function _feeds_tamper_join_url($parts) {
$url = '';
if (!empty($parts['scheme'])) {
$url .= $parts['scheme'] . ':';
}
if (isset($parts['host'])) {
$url .= '//';
if (isset($parts['user'])) {
$url .= $parts['user'];
if (isset($parts['pass'])) {
$url .= ':' . $parts['pass'];
}
$url .= '@';
}
if (preg_match('/!^[\\da-f]*:[\\da-f.:]+$!ui/', $parts['host'])) {
$url .= '[' . $parts['host'] . ']';
}
else {
$url .= $parts['host'];
}
if (isset($parts['port'])) {
$url .= ':' . $parts['port'];
}
if (!empty($parts['path']) && $parts['path'][0] != '/') {
$url .= '/';
}
}
if (!empty($parts['path'])) {
$url .= $parts['path'];
}
if (isset($parts['query'])) {
$url .= '?' . $parts['query'];
}
if (isset($parts['fragment'])) {
$url .= '#' . $parts['fragment'];
}
return $url;
}