You are here

function url_spam_filter in Spam 5.3

Search for known spam urls in content.

1 call to url_spam_filter()
url_spamapi in filters/url/url.module
URL filter plug in for the spam module. Copyright(c) 2007-2008 Jeremy Andrews <jeremy@tag1consulting.com>. All rights reserved.

File

filters/url/url.module, line 90

Code

function url_spam_filter($content, $type, $fields, $extra = array(), $filter_test = FALSE) {
  $action = array();
  $id = spam_invoke_module($type, 'content_id', $content, $extra);
  $spam = FALSE;
  $urls = _url_extract($content, $type, $fields, $extra);
  if (is_array($urls) && !empty($urls)) {
    $count = _url_count();
    $limit = variable_get('url_limit_total', 10);
    if ($limit > -1 && $count['total'] > $limit) {
      spam_log(SPAM_VERBOSE, 'url_spam_filter', t('total urls(@total) > url_limit_total(@limit)', array(
        '@total' => $count['total'],
        '@limit' => variable_get('url_limit_total', 10),
      )), $type, $id);
      $action['url'][] = array(
        'limit' => 'total',
        'total' => $count['total'],
      );
      $action['total'] = 99;
      return $action;
    }
    $limit = variable_get('url_limit_repeat', 5);
    if ($limit > -1) {

      // Sort urls from most repeated to least repeated.
      asort($count);

      // skip count['total']
      array_pop($count);
      $max = array_pop($count);
      if ($max > $limit) {
        spam_log(SPAM_VERBOSE, 'url_spam_filter', t('repeated urls(@total) > url_limit_repeat(@limit)', array(
          '@total' => $max,
          '@limit' => variable_get('url_limit_repeat', 5),
        )), $type, $id);
        $action['url'][] = array(
          'limit' => 'repeat',
          'total' => $max,
        );
        $action['total'] = 99;
      }
    }
    foreach ($urls as $url) {
      $p = db_fetch_object(db_query("SELECT probability FROM {bayesian_tokens} WHERE class = 'url' AND token = '%s'", $url));
      $action['url'][] = array(
        'url' => $url,
        'probability' => $p->probability,
      );
      if ($p->probability >= variable_get('spam_threshold', SPAM_DEFAULT_THRESHOLD)) {
        spam_log(SPAM_VERBOSE, 'url_spam_filter', t('found spam url(@url) probability(@probability)', array(
          '@url' => $url,
          '@probability' => $p->probability,
        )), $type, $id);
        $spam = TRUE;
        break;
      }
      spam_log(SPAM_DEBUG, 'url_spam_filter', t('not spam url(@url) probability(@probability)', array(
        '@url' => $url,
        '@probability' => $p->probability,
      )), $type, $id);
    }
  }
  if ($spam) {
    $action['total'] = 99;
  }
  else {
    $action['total'] = 0;
  }
  return $action;
}