function _url_extract in Spam 5.3
2 calls to _url_extract()
- url_spamapi in filters/
url/ url.module - URL filter plug in for the spam module. Copyright(c) 2007-2008 Jeremy Andrews <jeremy@tag1consulting.com>. All rights reserved.
- url_spam_filter in filters/
url/ url.module - Search for known spam urls in content.
File
- filters/
url/ url.module, line 48
Code
function _url_extract($content, $type, $fields, $extra = array()) {
static $urls = array();
$id = spam_invoke_module($type, 'content_id', $content, $extra);
if (is_object($content)) {
$content = (array) $content;
}
if (!isset($urls["{$type}-{$id}"])) {
$string = '';
foreach ($fields['main'] as $field) {
$string .= $content["{$field}"] . ' ';
}
if (is_array($fields['other'])) {
foreach ($fields['other'] as $field) {
$string .= $content["{$field}"] . ' ';
}
}
// TODO: Improve this matching. We don't actually extract mailto: urls.
$URI = "(http://|https://|ftp://|mailto:)";
// Find all urls in content.
preg_match_all("!(<p>|[ \n\r\t\\(]*)({$URI}([a-zA-Z0-9@:%_~#?&=.,/;-]*[a-zA-Z0-9@:%_~#&=/;-]))([.,?]?)(?=(</p>|[ \n\r\t\\)]*))!i", $string, $matches);
foreach ($matches[2] as $url) {
$url = preg_replace("'{$URI}'", '', $url);
// get full domain (ie www.sample.com)
preg_match("/^()?([^\\/\"\\']+)/i", $url, $domain);
// get root domain (ie sample.com)
preg_match("/[^\\.\\/]+\\.[^\\.\\/]+\$/", $domain[2], $root);
$url = htmlspecialchars(strtolower($root[0]));
_url_count($url);
$u[] = $url;
}
$urls["{$type}-{$id}"] = $u;
}
return $urls["{$type}-{$id}"];
}