You are here

spam.module in Spam 5

Same filename and directory in other branches
  1. 5.3 spam.module
  2. 6 spam.module

File

spam.module
View source
<?php

/**
 * Spam Module
 * Copyright (c) 2006-2007
 *   Jeremy Andrews <jeremy@kerneltrap.org>.  All rights reserved.
 */

/**
 * Determine whether or not provided text is spam.
 *
 * @param $source
 *   Which module this text came from (ie 'comment', 'node', 'trackback'...)
 * @param $id
 *   Numeric identifier for content (ie node id, comment id, etc)
 * @param $header
 *   Header portion of text to be filtered.
 * @param $body
 *   Body portion of text to  be filtered.
 * @param $callback
 *   Function to call once text is determined to be spam or not
 *
 * @return
 *   TRUE (spam) or FALSE (not spam)
 */
function spam_content_filter($source, $id, $header, $body, $callback = NULL) {

  // globals used in logging
  global $ID, $SOURCE;
  $ID = $id;
  $SOURCE = $source;

  // md5 hash used to catch same comment being posted over and over
  $hash = md5($header . $body);

  // sanity check comments
  if ($source == 'comment') {
    if (spam_validate_comment($id) == FALSE) {
      $probability = 99;
    }
  }

  // first filter, see if this is duplicated content
  if ($probability < variable_get('spam_threshold', 80)) {
    if (spam_duplicate_filter($hash, $source, $id, $header, $body)) {
      $probability = 99;
    }
  }

  // second filter, see if this content matches custom filters
  if ($probability < variable_get('spam_threshold', 80)) {
    $action = array();
    $weight = spam_custom_filter($header, $body, $action);
  }

  // third filter, see if content contains spam urls
  if ($probability + $weight < variable_get('spam_threshold', 80)) {
    $weight += spam_url_filter($header . ' ' . $body);
  }

  // fourth filter, see if content contains too many urls
  if ($probability + $weight < variable_get('spam_threshold', 80)) {

    // split content into tokens
    $tokens = spam_tokenize($header, 'header*');
    $tokens = array_merge($tokens, spam_tokenize($body));
    $weight += spam_url_limit(spam_urls_count());
  }

  // pass content through external filters, if any
  if ($probability + $weight < variable_get('spam_threshold', 80)) {

    // external spam filters
    $hook = spam_invoke_hook('filter', $header . ' ' . $body, $tokens);
    if ($hook['weight']) {
      $weight += $hook['weight'];

      // external modules should do their own logging, but just in case...
      spam_log(SPAM_VERBOSE, t('spam_content_filter: external module added weight of @weight', array(
        '@weight' => $hook['weight'],
      )), $source, $id);
    }
  }

  // finally, if necessary pass content through bayesian filter
  if ($probability + $weight < variable_get('spam_threshold', 80)) {
    $probability = spam_bayesian_filter($tokens);
  }

  // be sure probability is in valid range (1-99)
  $probability = spam_get_probability($probability, $weight);
  $old = db_fetch_object(db_query("SELECT * FROM {spam_tracker} WHERE source = '%s' AND id = %d", $source, $id));
  if ($old->id) {

    // content has been updated
    db_query("UPDATE {spam_tracker} SET probability = %d, hostname = '%s', hash = '%s', timestamp = %d WHERE source = '%s' AND id = '%d'", $probability, $_SERVER['REMOTE_ADDR'], $hash, time(), $source, $id);
  }
  else {

    // this is the first time we've filtered this content
    db_query("INSERT INTO {spam_tracker} (id, source, probability, hostname, hash, timestamp) VALUES(%d, '%s', %d, '%s', '%s', %d)", $id, $source, $probability, $_SERVER['REMOTE_ADDR'], $hash, time());
  }
  spam_log(SPAM_LOG, t('spam_content_filter: @prob% probability of being spam for @source "%header"', array(
    '@prob' => $probability,
    '@source' => $source,
    '%header' => $header,
  )), $source, $id);

  // pass probability to callback which will deal with it as is appropriate
  if ($callback != NULL) {
    $return = $callback($source, $id, $header, $body, $probability, $old, $action);
  }
  else {
    $return = spam_default_actions($source, $id, $header, $body, $probability, $old, $action);
  }
  if ($probability >= variable_get('spam_threshold', 80)) {

    /* This ip should have already been blocked when the content was previewed,
     * but if the website is not configured to require previews this is our
     * only chance to try and delay the spammer.  Delaying them here is much
     * less effective, as the spam was already posted and many automatic tools
     * probably don't wait around at this point.
     */
    spam_ip_filter($source, $id, TRUE);
  }
  return $return;
}

/**
 * If blacklist is enabled, check to see if this is a known spammer IP.  If it
 * is, make them wait a while then redirect them to the main page with an
 * indication that they're currently blacklisted.
 *
 * @param $source   Source (comment, node, ...)
 * @param $id       Unique identifier (cid, nid, ...)
 * @param $filtered Whether or not the data has already been run through the
 *                  spam filter.  If it has, we increase $blacklist by one
 *                  as we will also be counting the current content.
 */
function spam_ip_filter($source, $id, $filtered = FALSE) {
  $blacklist = variable_get('spam_blacklist_ip', 2);
  if ($blacklist > -1) {
    $filtered ? $blacklist++ : '';
    $ip = db_fetch_object(db_query("SELECT count(sid) AS count FROM {spam_tracker} WHERE probability >= %d AND hostname = '%s'", variable_get('spam_threshold', 80), $_SERVER['REMOTE_ADDR']));
    if ($ip->count >= $blacklist) {
      spam_log(SPAM_VERBOSE, t('spam_ip_filter: blocked @hostname from posting content due to @num earlier spam postings.', array(
        '@hostname' => $_SERVER['REMOTE_ADDR'],
        '@num' => "{$ip->count}",
      )), $source, $id);

      // This IP address has been blacklisted but is continuing to attempt to
      // post content.  Some spammer scripts can pound your site so heavily as
      // to cause a DoS, so we will attempt to slow them down by sleeping for
      // 25 seconds before notifying them that their attempt has been denied.
      // Unfortunately, this delay can also have a negative impact on us by
      // tieing up a database connection.  We test to see if the drupal throttle
      // is enabled, and if it is we don't sleep as our own resources are
      // already being overly taxed.
      if (!module_invoke('throttle', 'status')) {
        sleep(variable_get('spam_ip_filter_sleep', 25));
      }
      drupal_set_message(t('Your IP address (@ip) was recently used to post spam to this website.  For this reason, you are currently not allowed to post new content.  If you believe this is in error, please contact the site administrator.', array(
        '@ip' => $_SERVER['REMOTE_ADDR'],
      )));
      drupal_access_denied();
      exit;
    }
  }
}

/* Compares log request to current logging level, and if appropriate writes
 * the log entry into the spam_log database table.
 * 
 * @param $level    The level of the current log message.
 * @param $message  The text of the message to be logged.
 * @param $source   Optional text specifying source (type) of content.
 * @param $id       Optional id of content.
 */
define('SPAM_LOG', 1);
define('SPAM_VERBOSE', 3);
define('SPAM_DEBUG', 5);
function spam_log($level, $message, $source = NULL, $id = NULL) {

  // ID is set in spam_content_filter() and spam_page()
  global $ID, $SOURCE, $user;
  if ($id == NULL) {
    $id = $ID;
  }
  if ($source == NULL) {
    $source = $SOURCE;
  }
  if (variable_get('spam_log_level', SPAM_LOG) >= $level) {
    db_query("INSERT INTO {spam_log} (hostname, source, id, uid, entry, timestamp) VALUES('%s', '%s', %d, %d, '%s', %d)", $_SERVER['REMOTE_ADDR'], $source, $id, $user->uid, $message, time());
  }
}
define('WEIGHT_ALWAYS_SPAM', 200);
define('WEIGHT_USUALLY_SPAM', 50);
define('WEIGHT_MAYBE_SPAM', 20);
define('WEIGHT_MAYBE_NOTSPAM', -20);
define('WEIGHT_RARELY_SPAM', -50);
define('WEIGHT_NEVER_SPAM', -200);

// Drupal core hooks

/**
 * Drupal _help hook.  Provides help and informational text about the spam module.
 *
 * @path    Current display path
 * @return  Text appropriate for current $path
 */
function spam_help($path) {
  switch ($path) {
    case 'admin/help#spam':
      $output .= '<p>' . t("The spam module is a powerful collection of tools designed to help website administrators to automatically deal with spam. Spam is any content that is posted to a website that is unrelated to the subject at hand, usually in the form of advertising and links back to the spammer's own website. This module can automatically detect spam, instantly unpublish it, and send notification to the site administrator.") . '</p>';
      break;
    case 'admin/settings/spam':
      $output .= '<p>' . t("These are the global settings controlling the Spam module.") . '</p>';
      break;
    case 'admin/content/comment/list/spam':
    case 'admin/spam/comments':
      $output .= '<p>' . t('The following comments have been marked as spam.') . '</p>';
      break;
    case 'admin/content/comment/list/reported':
      $output .= '<p>' . t('The following comments have been reported as spam.') . '</p>';
      break;
    case 'admin/content/node/list/spam':
    case 'admin/spam/node':
      $output .= '<p>' . t('The following content has been marked as spam.') . '</p>';
      break;
    case 'admin/content/node/list/reported':
      $output .= '<p>' . t('The following content has been reported as spam.') . '</p>';
      break;
    case 'admin/logs/spam':
      $output .= t('If enabled, the spam module will log the various filter actions that lead up to site content being marked as spam or not spam.  These logs can be helpful in understanding why certain content was or was not marked as spam.');
      break;
    case 'admin/logs/spam/blockedips':
      $output .= t('The following IPs have been marked as blocked.');
      break;
    case 'admin/settings/spam/filter':
      $output .= t('This page allows you to select which type of postings will be checked for spam.');
      break;
    case 'admin/settings/spam/limits':
      $output .= t('This page allows you to set limits for the spam module.');
      break;
    case 'admin/settings/spam/actions':
      $output .= t('This page allows you to select which actions should be taken when spam is detected.');
      break;
    case 'admin/settings/spam/advanced':
      $output .= t('This page allows you to specify additional options for the spam module.');
      break;
    case 'admin/settings/spam/custom':
      $output .= t('Custom filters allow you to define words, phrases and/or regular expressions to be tested against new content on your site.  If your custom filter matches, you can cause this to increase or decrease the probability that the given content is spam.  For example, if a comment about "viagra" is completely out of place on your site, you can create a custom filter such that any comment with the word "viagra" in it will always be marked as spam.');
      break;
    case 'admin/settings/spam/url':
      $output .= t('The spam filter\'s Bayesian logic automatically learns spammer web domains.  Any new comment or other content containing one of the web domain names listed below will be automatically marked as spam.  For example, if "spam.com" is listed below, a new comment containing the text "http://spam.com/great/deals" will be marked as spam.');
      $output .= "<p>";
      $output .= t('The advantage that url filters offer is that they are automatically learned by the bayesian filter.  However, it is possible to instead block spammer domains by defining an appropriate custom filter.');
      break;
  }
  return $output;
}

/**
 * Provides configuration interface for module.
 *
 * @return  HTML for settings page.
 */
function spam_admin_settings() {
  $output .= '<h3>' . t('Spam Settings') . '</h3>';
  $output .= '<p>' . t('The settings for this module are separated by function with each group of settings represented in the tabs above.') . '</p>';

  //  $output .= '<p>'. t('') .'</p>';
  //  $output .= '<p>'. t('') .'</p>';
  $tabhdr = array(
    t('Tab'),
    t('Description'),
  );
  $tabs = spam_invoke_hook('tab_description');
  $x = ksort($tabs, SORT_REGULAR);
  $rows = array();
  foreach ($tabs as $key => $value) {
    $rows[] = array(
      $key,
      $value,
    );
  }
  $output .= theme('table', $tabhdr, $rows);
  $output .= '<p>&nbsp;</p>';
  $output .= '<p><strong>' . t('Note') . '</strong>: ' . t('As with all multi-page settings forms, you must use the "Save configuration" button on each page.') . '</p>';
  return $output;
}
function spam_admin_settings_filter() {

  // general settings
  $form['filter'] = array(
    '#type' => 'fieldset',
    '#title' => 'Filter',
  );
  $form['filter']['spam_filter_comments'] = array(
    '#type' => 'checkbox',
    '#title' => t('Filter comments'),
    '#return_value' => 1,
    '#default_value' => variable_get('spam_filter_comments', 1),
    '#description' => t('Enable this option to filter new comments as they are posted, determining whether or not they are spam.'),
  );
  $form['filter']['content_types'] = array(
    '#type' => 'fieldset',
    '#title' => t('Filter content types'),
    '#collapsible' => TRUE,
    '#collapsed' => FALSE,
    '#description' => t('Examine these content types as they are posted, determining whether or not they are spam. This could be useful if you allow anonymous users to post content or public registrations.'),
  );
  $node_types = node_get_types();
  foreach ($node_types as $type => $properties) {
    $form['filter']['content_types']["spam_filter_{$type}"] = array(
      '#type' => 'checkbox',
      '#title' => t("Filter {$properties->name} content"),
      '#return_value' => 1,
      '#default_value' => variable_get("spam_filter_{$type}", 0),
    );
  }
  $form['filter']['spam_filter_urls'] = array(
    '#type' => 'checkbox',
    '#title' => t('Filter spam URLs'),
    '#return_value' => 1,
    '#default_value' => variable_get('spam_filter_urls', 1),
    '#description' => t('Enabling this option will tell the spam filter to treat URLs embedded within comments and other contents as a special case.  When URLs that were found within known spam are found in new content, the new content is automatically considered to be spam.  When this option is enabled, a single spam URL found within an otherwise spam-free posting will cause the filter to mark the new content as spam.'),
  );

  // provide hook for external modules to define custom filter types
  $hook = spam_invoke_hook('filter_settings');
  if ($hook['group']) {
    $form['filter'] = array_merge($form['filter'], $hook['group']);
  }
  return system_settings_form($form);
}
function spam_admin_settings_limits() {

  // limits
  $form['limits'] = array(
    '#type' => 'fieldset',
    '#title' => 'Limits',
  );
  $limits = array(
    -1 => t('unlimited'),
    0 => t('none'),
    1 => 1,
    2 => 2,
    3 => 3,
    4 => 4,
    5 => 5,
    6 => 6,
    7 => 7,
    8 => 8,
    9 => 9,
    10 => 10,
    15 => 15,
    20 => 20,
    50 => 50,
    100 => 100,
  );
  $form['limits']['spam_urls_total'] = array(
    '#type' => 'select',
    '#title' => t('Maximum allowed URLs'),
    '#default_value' => variable_get('spam_urls_total', 10),
    '#options' => $limits,
    '#description' => t('Specificy the maximum number of URLs that are allowed in a single posting before it is considered to be spam.  For example, if you select 5 from the pop down menu, and then a posting to your site has 6 weblinks, the posting will be marked as spam.  Some form of filtering must be enabled above for this to have any affect.'),
  );
  unset($limits[0]);
  $form['limits']['spam_urls_repeat'] = array(
    '#type' => 'select',
    '#title' => t('Maximum repeat URLs'),
    '#default_value' => variable_get('spam_urls_repeat', 5),
    '#options' => $limits,
    '#description' => t('Specificy the maximum number of times that the same URL  is allowed to appear in a single posting before it is considered to be spam.  For example, if you select 5 from the pop down menu, and then a posting has 6 weblinks to the same exact location, the posting will be marked as spam.  Some form of filtering must be enabled above for this to have any affect.'),
  );
  $form['limits']['spam_duplicate_content'] = array(
    '#type' => 'select',
    '#title' => t('Maximum duplicate content'),
    '#default_value' => variable_get('spam_duplicate_content', 2),
    '#options' => $limits,
    '#description' => t('Specify the maximum number of times the same identical posting will be allowed to your site before all of the duplicate postings are considered spam.  Some times a user may accidently hit "submit" multiple times, causing an otherwise acceptible posting to be duplicated.  However, other times a spammer may repeatedly post the same spam content to your site.'),
  );
  $form['limits']['spam_blacklist_ip'] = array(
    '#type' => 'select',
    '#title' => t('Detected spam postings before blacklisting IP'),
    '#default_value' => variable_get('spam_blacklist_ip', 2),
    '#options' => $limits,
    '#description' => t('Select the number of times a single IP address needs to be detected posting probable spam before the IP is blacklisted.  If enabled, future attempts to post any content will cause a 25 second delay, after which the user will be redirected to the site\'s front page where they will receive notification that their IP address is currently blacklisted.  It is advisable that you configure your site to require all content be previewed if you wish to use this functionality, as that will allow the filter to blacklist a user <em>before</em> the content is posted, minimizing database overhead.  If you configure spam to automatically expire, the IP will only be blacklisted until the previous spam from that IP expires.  A blacklisted IP is only prevented from posting new content, they are still allowed to view existing content.'),
  );
  return system_settings_form($form);
}
function spam_admin_settings_actions() {

  // actions
  $form['actions'] = array(
    '#type' => 'fieldset',
    '#title' => 'Actions',
  );
  $form['actions']['spam_unpublish'] = array(
    '#type' => 'checkbox',
    '#title' => t('Unpublish spam'),
    '#return_value' => 1,
    '#default_value' => variable_get('spam_unpublish', 1),
    '#description' => t('When checked, any new content that is detected as spam will be automatically unpublished.  This will prevent the content from being displayed, allowing a site administrator a chance to first review it.'),
  );
  $form['actions']['spam_notify_user'] = array(
    '#type' => 'checkbox',
    '#title' => t('Notify user'),
    '#return_value' => 1,
    '#default_value' => variable_get('spam_notify_user', 1),
    '#description' => t('If both the above "Unpublish spam" box and this box are checked, users will be notified with a message when the content they post is blocked by the spam filter.  This is intended to minimize confusion when the spam filter mistakes a valid posting as spam and the posting doesn\'t immediately apear to the user.'),
  );
  $form['actions']['spam_notify_admin'] = array(
    '#type' => 'checkbox',
    '#title' => t('Email notification'),
    '#return_value' => 1,
    '#default_value' => variable_get('spam_notify_admin', 1),
    '#description' => t('Enabling this option will cause an email to be sent to the site administrator whenever the filters detect spam content.'),
  );
  $period = drupal_map_assoc(array(
    0,
    3600,
    10800,
    21600,
    32400,
    43200,
    86400,
    172800,
    259200,
    604800,
    1209600,
    2419200,
    4838400,
    9676800,
    31536000,
  ), 'format_interval');
  $period[0] = t('never');
  $form['actions']['spam_expire_time'] = array(
    '#type' => 'select',
    '#title' => t('Expire spam after'),
    '#default_value' => variable_get('spam_expire_time', 1209600),
    '#options' => $period,
    '#description' => t('Content that is marked as spam for more than the selected amount of time will be automatically and permanently deleted.  Requires crontab.'),
  );

  // provide hook for external modules to define actions
  $updated = spam_invoke_hook('action_settings');
  if ($updated['group']) {
    $form['actions'] = array_merge($form['actions'], $updated['group']);
  }
  return system_settings_form($form);
}
function spam_admin_settings_advanced() {

  // advanced settings
  $form['advanced'] = array(
    '#type' => 'fieldset',
    '#title' => 'Advanced',
  );
  $form['advanced']['spam_log_level'] = array(
    '#type' => 'radios',
    '#title' => t('Log'),
    '#default_value' => variable_get('spam_log_level', SPAM_LOG),
    '#options' => array(
      0 => t('nothing'),
      SPAM_LOG => t('major events'),
      SPAM_VERBOSE => t('major and minor events'),
      SPAM_DEBUG => t('everything'),
    ),
    '#description' => t('By default, the spam module will only log major events.  If you are trying to figure out why the spam filter is marking content as spam or not spam, you may want to try logging everything.  If you\'re concerned about performance, you may want to disable logging completely.'),
  );
  $period = drupal_map_assoc(array(
    0,
    3600,
    10800,
    21600,
    32400,
    43200,
    86400,
    172800,
    259200,
    604800,
    1209600,
    2419200,
    4838400,
    9676800,
    31536000,
  ), 'format_interval');
  $period[0] = t('never');
  $form['advanced']['spam_flush_log_timer'] = array(
    '#type' => 'select',
    '#title' => t('Discard spam logs older than'),
    '#default_value' => variable_get('spam_flush_log_timer', 259200),
    '#options' => $period,
    '#description' => t('Older spam log entries will be automatically discarded.  Requires crontab.'),
  );
  $form['advanced']['spam_display_probability'] = array(
    '#type' => 'checkbox',
    '#title' => t('Display probability'),
    '#return_value' => 1,
    '#default_value' => variable_get('spam_display_probability', 0),
    '#description' => t('If enabled, the probability that a given piece of content is spam will be displayed by the content.  This is useful while you are tuning your spam filter, and will provide a link to the relevant logs showing how that content was determined to be or not to be spam.'),
  );
  $form['advanced']['spam_report_feedback'] = array(
    '#type' => 'checkbox',
    '#title' => t('Require feedback when reporting spam'),
    '#return_value' => 1,
    '#default_value' => variable_get('spam_report_feedback', TRUE),
    '#description' => t('If checked, users that are able to report content as spam will be required to leave feedback when reporting spam.  If unchecked, users will not be prompted to leave feedback.'),
  );

  /* If you feel you need to display more than 50 spam comments or spam nodes
   * at a time, uncomment the following two lines of code.  For most people
   * this will be unnecessary, and potentially confusing, so by default I am
   * disabling this functionality.
   */

  //$quantity = array(5 => 5, 10 => 10, 25 => 25, 50 => 50, 100 => 100, 250 => 250, 500 => 500, 1000 => 1000, 5000 => 5000, 1000000 => t('all'));

  //$form['advanced']['spam_display_quantity'] = array(

  //  '#type' => 'select',
  //  '#title' => t('Quantity to display in lists'),
  //  '#default_value' => variable_get('spam_display_quantity', 50),
  //  '#options' => $quantity,
  //  '#description' => t('Select the number of spam comments or other types of content to display per page in the administrative interfaces.'),

  //);

  // provide hook for external modules to define custom advanced settings
  $hook = spam_invoke_hook('advanced_settings');
  if ($hook['group']) {
    $group .= $hook['group'];
    $form['advanced'] = array_merge($form['advanced'], $hook['group']);
  }
  return system_settings_form($form);
}

/**
 * Drupal _cron hook.  Provides ability to automatically expired spam content.
 */
function spam_cron() {
  global $base_url;

  // send email notifications every 24 hours
  $email_timer = variable_get('spam_email_timer', 0);
  if ($email_timer < time() - 86400) {
    variable_set('spam_email_timer', time());
    $result = db_query('SELECT source, COUNT(source) AS count FROM {spam_reported} GROUP BY source');
    if (db_num_rows($result)) {
      $admin = user_load(array(
        'uid' => 1,
      ));
      $message = t("Hello @adminname,\n\n  Users have reported finding spam on your website.  The following content has been reported:\n", array(
        '@adminname' => $admin->name,
      ));
      $urls = array();
      while ($reported = db_fetch_object($result)) {
        $message .= t("    - @num @source\n", array(
          '@num' => $reported->count,
          '@source' => format_plural($reported->count, $reported->source, $reported->source . 's'),
        ));
        $urls[] = $base_url . url("admin/content/{$reported->source}/list/reported");
      }
      $message .= t("\n  Please review this reported spam by visiting the following @url:\n", array(
        '@url' => format_plural(sizeof($urls), 'url', 'urls'),
      ));
      foreach ($urls as $url) {
        $message .= "    {$url}\n";
      }
      spam_mail(t('[@sitename] Spam reported', array(
        '@sitename' => variable_get('site_name', 'drupal'),
      )), "{$message}");
    }
  }

  // expire spam content that is older than we're configured to keep.
  if ($expire = variable_get('spam_expire_time', 1209600)) {
    $result = db_query('SELECT source, id FROM {spam_tracker} WHERE timestamp < %d AND probability >= %d', time() - $expire, variable_get('spam_threshold', 80));
    while ($content = db_fetch_object($result)) {

      /* external content types (other than 'node' or 'comment') _must_ provide
       * a spam_delete_<type>() function.
       */
      spam_log(SPAM_LOG, t('spam_cron: deleting @source', array(
        '@source' => $content->source,
      )), $content->source, $content->id);
      $function = "spam_delete_{$content->source}";
      $function($content->id);
    }
  }

  // clean expired spam logs
  if ($flush = variable_get('spam_flush_log_timer', 259200)) {
    db_query('DELETE FROM {spam_log} WHERE timestamp < %d', time() - $flush);
    db_query('DELETE FROM {spam_reported} WHERE timestamp < %d', time() - $flush);
  }
}

/**
 * Drupal _comment hook.  Passes new comments to the spam filter.
 *
 * @param $comment The text of the comment.
 * @param $action  Specifies the action affecting the current comment.
 */
function spam_comment($comment, $action) {
  global $user;
  $comment = (object) $comment;

  /* Spam URLs can be embedded in the name, email adress or web address of
   * anonymous comments, so we append this to the comment text.  Note that
   * this can have the negative affect of causing an artificially high number
   * of URLs to appear in a comment.
   */
  $local_comment->comment = "{$comment->comment} {$comment->name} {$comment->mail} {$comment->homepage}";
  switch ($action) {
    case 'view':
      if ($comment->submit == t('Post comment')) {

        // Previewing a new comment.
        if (user_access('bypass filter')) {
          spam_log(SPAM_DEBUG, t('spam_comment: skipping comment "%subject" for user "@name"', array(
            '%subject' => $comment->subject,
            '@name' => $user->name,
          )), 'comment', $comment->cid);
        }
        else {

          // Check if this IP has been blacklisted, if so we don't return...
          spam_ip_filter('comment', $comment->cid);
        }
      }
      break;
    case 'insert':
    case 'update':
      if (user_access('bypass filter')) {
        spam_log(SPAM_DEBUG, t('spam_comment: skipping comment "%subject" for user "@name"', array(
          '%subject' => $comment->subject,
          '@name' => $user->name,
        )), 'comment', $comment->cid);
        return;
      }
      if (variable_get('spam_filter_comments', 1)) {
        spam_log(SPAM_LOG, t('spam_comment: @action action for comment "%subject"', array(
          '@action' => $action,
          '%subject' => $comment->subject,
        )), 'comment', $comment->cid);
        spam_content_filter('comment', $comment->cid, $comment->subject, $local_comment->comment);
      }
      break;
    case 'delete':
      spam_log(SPAM_LOG, t('spam_comment: deleting comment "%subject"', array(
        '%subject' => $comment->subject,
      )), 'comment', $comment->cid);
      db_query("DELETE FROM {spam_tracker} WHERE id = %d AND source = '%s'", $comment->cid, 'comment');
      db_query("DELETE FROM {spam_reported} WHERE id = %d AND source = '%s'", $comment->cid, 'comment');
      break;
  }
}

/**
 * Drupal _nodeapi hook.  Passes new node content through the spam filter.
 * 
 * @param $node   The text of the node.
 * @param $op     Specifies the current operation.
 * @param $arg    Optional argument.
 * @return        None.
 */
function spam_nodeapi(&$node, $op, $arg = 0) {
  global $user;
  switch ($op) {
    case 'view':
      if ($arg === 0) {
        if (user_access('bypass filter')) {
          spam_log(SPAM_DEBUG, t('spam_nodeapi: skipping node "%title" for user "@name"', array(
            '%title' => $node->title,
            '@name' => $user->name,
          )), 'node', $node->nid);
          return;
        }

        // check if this is a blacklisted spammer IP
        spam_ip_filter('node', $node->nid);
      }
      break;
    case 'insert':
    case 'update':
      if (user_access('bypass filter')) {
        spam_log(SPAM_DEBUG, t('spam_nodeapi: skipping node "%title" for user "@name"', array(
          '%title' => $node->title,
          '%name' => $user->name,
        )), 'node', $node->nid);
        return;
      }
      if (variable_get("spam_filter_{$node->type}", 0)) {
        spam_log(SPAM_LOG, t('spam_nodeapi: %action action for node "%title"', array(
          '%action' => $op,
          '%title' => $node->title,
        )), 'node', $node->nid);
        spam_content_filter('node', $node->nid, $node->title, $node->body);
      }
      break;
    case 'delete':
      spam_log(SPAM_LOG, t('spam_nodeapi: deleting node "%title"', array(
        '%title' => $node->title,
      )), 'node', $node->nid);
      db_query("DELETE FROM {spam_tracker} WHERE id = %d AND source = '%s'", $node->nid, 'node');
      db_query("DELETE FROM {spam_reported} WHERE id = %d AND source = '%s'", $node->nid, 'node');
      break;
  }
}

/**
 * Drupal _link hook.  Adds themable spam related links to content if enabled.
 *
 * @param $type   Type of content (comment, node, ...)
 * @param $node   Affected node.
 *
 * @return        An array.
 */
function spam_link($type, $node = 0, $main = 0) {
  if ($type == 'comment') {

    // allow comment spam links to be themed
    return theme('spam_link', $node, 'comment');
  }
  if ($type == 'node') {
    return theme('spam_link', $node, 'node');
  }
}

/**
 * Drupal _perm hook.  Establishes permissions used in this module.
 *
 * @return array of permissions used by this module.
 */
function spam_perm() {
  return array(
    'report spam',
    'access spam',
    'administer spam',
    'bypass filter',
  );
}

/**
 * Implementation of hook_menu().
 */
function spam_menu($may_cache) {
  $items = array();
  if ($may_cache) {
    $items[] = array(
      'path' => 'admin/logs/spam',
      'title' => t('Spam logs'),
      'access' => user_access('administer spam'),
      'callback' => 'spam_logs_overview',
      'description' => t('Detect and manage spam posts.'),
    );
    $items[] = array(
      'path' => 'admin/logs/spam/logs',
      'title' => t('Logs'),
      'access' => user_access('access spam'),
      'callback' => 'spam_logs_overview',
      'type' => MENU_DEFAULT_LOCAL_TASK,
      'weight' => -10,
    );
    $items[] = array(
      'path' => 'admin/logs/spam/blockedips',
      'title' => t('Blocked IPs'),
      'access' => user_access('administer spam'),
      'callback' => 'spam_blocked_ips_overview',
      'type' => MENU_LOCAL_TASK,
    );
    $items[] = array(
      'path' => 'admin/content/spam/logs/entry',
      'access' => user_access('access spam'),
      'callback' => 'spam_logs_entry',
      'type' => MENU_LOCAL_CALLBACK,
    );
    $items[] = array(
      'path' => 'admin/content/comment/list/spam',
      'title' => t('Spam'),
      'access' => user_access('administer spam'),
      'callback' => 'drupal_get_form',
      'callback arguments' => array(
        'spam_comment_overview',
      ),
      'type' => MENU_LOCAL_TASK,
    );

    // Main module administration page
    $items[] = array(
      'path' => 'admin/settings/spam',
      'title' => t('Spam'),
      'access' => user_access('administer spam'),
      'callback' => 'spam_admin_settings',
      'description' => t('Control settings for the spam module.'),
    );
    $items[] = array(
      'path' => 'admin/settings/spam/filter',
      'title' => t('Filters'),
      'access' => user_access('administer spam'),
      'callback' => 'drupal_get_form',
      'callback arguments' => array(
        'spam_admin_settings_filter',
      ),
      'description' => t('Select content type filters for the spam module.'),
      'weight' => -5,
      'type' => MENU_LOCAL_TASK,
    );
    $items[] = array(
      'path' => 'admin/settings/spam/limits',
      'title' => t('Limits'),
      'access' => user_access('administer spam'),
      'callback' => 'drupal_get_form',
      'callback arguments' => array(
        'spam_admin_settings_limits',
      ),
      'description' => t('Set filtering limits for the spam module.'),
      'weight' => -5,
      'type' => MENU_LOCAL_TASK,
    );
    $items[] = array(
      'path' => 'admin/settings/spam/actions',
      'title' => t('Actions'),
      'access' => user_access('administer spam'),
      'callback' => 'drupal_get_form',
      'callback arguments' => array(
        'spam_admin_settings_actions',
      ),
      'description' => t('Select actions for the spam module.'),
      'weight' => -4,
      'type' => MENU_LOCAL_TASK,
    );
    $items[] = array(
      'path' => 'admin/settings/spam/advanced',
      'title' => t('Advanced'),
      'access' => user_access('administer spam'),
      'callback' => 'drupal_get_form',
      'callback arguments' => array(
        'spam_admin_settings_advanced',
      ),
      'description' => t('Set advanced option for the spam module.'),
      'weight' => -4,
      'type' => MENU_LOCAL_TASK,
    );
    $items[] = array(
      'path' => 'admin/settings/spam/custom',
      'title' => t('Custom filters'),
      'access' => user_access('administer spam'),
      'callback' => 'spam_custom_filter_overview',
      'type' => MENU_LOCAL_TASK,
      'weight' => -3,
    );
    $items[] = array(
      'path' => 'admin/settings/spam/url',
      'title' => t('URL filters'),
      'access' => user_access('administer spam'),
      'callback' => 'spam_url_filter_overview',
      'type' => MENU_LOCAL_TASK,
      'weight' => -3,
    );

    // node admin submenu
    $items[] = array(
      'path' => 'admin/content/node/list/spam',
      'title' => t('Spam'),
      'access' => user_access('administer spam'),
      'callback' => 'drupal_get_form',
      'callback arguments' => array(
        'spam_node_overview',
      ),
      'type' => MENU_LOCAL_TASK,
    );

    // spam page
    $items[] = array(
      'path' => 'spam',
      'title' => t('Spam'),
      'access' => user_access('administer spam'),
      'callback' => 'spam_page',
      'type' => MENU_CALLBACK,
    );

    // report spam page
    $items[] = array(
      'path' => 'spam/report',
      'title' => t('Spam'),
      'access' => user_access('report spam'),
      'callback' => 'drupal_get_form',
      'callback arguments' => array(
        'spam_user_report',
      ),
      'type' => MENU_CALLBACK,
    );

    // reported comment spam admin submenu
    $items[] = array(
      'path' => 'admin/content/comment/list/reported',
      'title' => t('Reported spam'),
      'access' => user_access('administer spam'),
      'callback' => 'drupal_get_form',
      'callback arguments' => array(
        'spam_reported_comments_overview',
      ),
      'type' => MENU_LOCAL_TASK,
      'weight' => 5,
    );

    // reported node spam admin submenu
    $items[] = array(
      'path' => 'admin/content/node/list/reported',
      'title' => t('Reported spam'),
      'access' => user_access('administer spam'),
      'callback' => 'drupal_get_form',
      'callback arguments' => array(
        'spam_reported_nodes_overview',
      ),
      'type' => MENU_LOCAL_TASK,
      'weight' => 5,
    );
  }
  else {
    if (arg(6) == 'details') {
      $source = arg(4);
      $id = arg(5);

      // reported spam details page
      $items[] = array(
        'path' => "admin/content/spam/reported/{$source}/{$id}/details",
        'title' => t('Reported spam details'),
        'access' => user_access('administer spam'),
        'callback' => 'spam_reported_details',
        'type' => MENU_CALLBACK,
      );
    }
  }
  return $items;
}

/**
 * Drupal _page hook.  Provides various spam actions based on the URL that is
 *                     currently being accessed.
 */
function spam_page() {
  global $ID, $SOURCE;

  /* URL looks something like  spam/comment/12345/notspam
   *                           0    1       2     3
   * 0 has to be spam for us to be here.
   */
  $source = arg(1);
  $id = arg(2);
  $op = arg(3);
  if ($op) {

    // set spam to TRUE (spam) or FALSE (not spam)
    $is_spam = $op == 'spam';
  }
  else {

    // invalid URL, let Drupal core generate a 404
    return drupal_not_found();
  }
  $ID = $id;
  $SOURCE = $source;
  switch ($source) {
    case 'comment':
      $comment = spam_load_comment($id);
      $old = spam_load('comment', $id);
      $goto = "node/{$comment->nid}";
      $goto_fragment = "comment-{$comment->cid}";
      $header = $comment->subject;
      $body = "{$comment->comment} {$comment->name} {$comment->mail} {$comment->homepage}";
      watchdog('spam', t('Spam: marked comment "%subject" as @spam', array(
        '%subject' => $comment->subject,
        '@spam' => $is_spam ? 'spam' : 'not spam',
      )));
      break;
    case 'node':
      $node = node_load(array(
        'nid' => $id,
      ));
      $old = spam_load('node', $id);
      $goto = "node/{$node->nid}";
      $goto_fragment = "";
      $header = $node->title;
      $body = $node->body;
      watchdog('spam', t('Spam: marked node "%title" as @spam', array(
        '%title' => $node->title,
        '@spam' => $is_spam ? 'spam' : 'not spam',
      )));
      break;
    default:

      /* Allow external modules the ability to handle custom content types.
       * Adds _spam_page($id) hook.
       */
      $hook = spam_invoke_hook('page', $id);
      if ($hook['old']) {
        $old = $hook['old'];
      }
      if ($hook['goto']) {
        $goto = $hook['goto'];
      }
      if ($hook['goto_fragment']) {
        $goto_fragment = $hook['goto_fragment'];
      }
      if ($hook['header']) {
        $header = $hook['header'];
      }
      if ($hook['body']) {
        $body = $hook['body'];
      }
      break;
  }
  $hash = md5($header . $body);
  $tokens = spam_tokenize($header, 'header*');
  $tokens = array_merge($tokens, spam_tokenize($body));
  if ($old->id) {

    // we've filtered this content before, now we're updating it
    spam_tokens_unsave($tokens, $is_spam);
    spam_tokens_save($tokens, $is_spam);
    db_query("UPDATE {spam_tracker} SET probability = %d, hash = '%s', timestamp = %d WHERE source = '%s' AND id = %d", $is_spam ? 99 : 1, $hash, time(), $source, $id);
  }
  else {

    // this is the first time we've filtered this content
    spam_tokens_save($tokens, $is_spam);
    db_query("INSERT INTO {spam_tracker} (source,id,probability,hostname,hash,timestamp) VALUES('%s',%d,%d,'%s','%s',%d)", $source, $id, $is_spam ? 99 : 1, $_SERVER['REMOTE_ADDR'], $hash, time());
  }
  spam_default_actions($source, $id, $header, $body, $is_spam ? 99 : 1, $old, FALSE);
  spam_log(SPAM_LOG, t('spam_page: @source manually marked as @spam', array(
    '@source' => $source,
    '@spam' => $is_spam ? 'spam' : 'not spam',
  )), $source, $id);
  drupal_goto($goto, NULL, $goto_fragment);
}

// internal functions

/**
 * Validation code for the spam_user_report form/function
 */
function spam_user_report_validate($form_id, $form_values) {

  /* spam/report/<type>/<id>
   * 0    1      2      3
   */
  global $user;
  $source = arg(2);
  $id = arg(3);
  $previous = db_fetch_object(db_query("SELECT rid FROM {spam_reported} WHERE source = '%s' AND id = %d AND uid = %d", $source, $id, $user->uid));

  // if admin lets anonymous users report spam, duplicates are allowed...
  if ($user->uid && $previous->rid) {
    form_set_error('', t('You have already reported this content as spam.  The site administrator has been notified and will review the content shortly.'));
  }
}

/**
 * Submit code for the spam_user_report form/function
 */
function spam_user_report_submit($form_id, $form_values) {

  /* spam/report/<type>/<id>
   * 0    1      2      3
   */
  global $user;
  $source = arg(2);
  $id = arg(3);
  db_query("INSERT INTO {spam_reported} (source, id, uid, hostname, feedback, timestamp) VALUES('%s', %d, %d, '%s', '%s', %d)", $source, $id, $user->uid, $_SERVER['REMOTE_ADDR'], $form_values['spam_feedback'], time());
  drupal_set_message(t('Thank you for your assistance.  The site administrator has been notified and will review the content shortly.'));
  $feedback = strlen($form_values['spam_feedback']) < 128 ? $form_values['spam_feedback'] : substr($form_values['spam_feedback'], 0, 128) . '...';
  spam_log(SPAM_LOG, t('spam_report: reported as possible spam.  User feedback begins, "@feedback"', array(
    '@feedback' => $feedback,
  )), $source, $id);
  switch ($source) {
    case 'node':
      $url = "node/{$id}";
      break;
    case 'comment':
      $comment = db_fetch_object(db_query('SELECT subject,nid FROM {comments} WHERE cid = %d', $id));
      $url = "node/{$comment->nid}";
      break;
    default:
      $hook = spam_invoke_hook('report', $source, $id);
      $url = $hook['return_path'];
      break;
  }
  drupal_goto($url);
}

/**
 * Display the form that allows the user to edit comment spam
 */
function spam_user_report() {
  global $user;

  /* spam/report/<type>/<id>
   * 0    1      2      3
   */
  $source = arg(2);
  $id = arg(3);
  $output = '';
  switch ($source) {
    case 'node':
      $node = node_load(array(
        'nid' => $id,
      ));
      $confirm_message = t('You are reporting the @type content "!title" as spam.', array(
        '@type' => node_get_types('name', $node),
        '!title' => l($node->title, "node/{$id}"),
      ));
      break;
    case 'comment':
      $comment = db_fetch_object(db_query('SELECT subject,nid FROM {comments} WHERE cid = %d', $id));
      $confirm_message = t('You are reporting the comment "!subject" as spam.', array(
        '!subject' => l($comment->subject, "node/{$comment->nid}", array(), NULL, "comment-{$id}"),
      ));
      break;
    default:
      $hook = spam_invoke_hook('report', $source, $id);
      $confirm_message = $hook['confirm_message'];
      break;
  }
  menu_set_location(array(
    array(
      'path' => '/',
      'title' => t('Report Spam'),
    ),
  ));
  $form['feedback'] = array(
    '#type' => 'fieldset',
    '#title' => t('Spam Feedback'),
  );
  $form['feedback']['message'] = array(
    '#type' => 'markup',
    '#value' => $confirm_message,
    '#weight' => -1,
  );
  $form['feedback']['spam_feedback'] = array(
    '#type' => 'textarea',
    '#title' => t('Please help us by entering a brief explanation as to why this content is spam'),
    '#rows' => 10,
    '#default_value' => '',
    '#weight' => 0,
    '#required' => variable_get('spam_report_feedback', TRUE),
    '#description' => t('It is not always obvious why content should be marked as spam, so please assist our efforts to keep these pages spam free by explaining how you know this content is spam.  Your assistance is greatly appreciated.  If you got to this page on accident, please press the "back" button on your web browser to return to the original page.'),
  );
  $form['feedback']['submit'] = array(
    '#type' => 'submit',
    '#value' => t('Submit Report'),
    '#weight' => 5,
  );
  return $form;
}

/**
 * Called by other spam module functions to invoke optional _spam hooks in external modules.
 *
 * @param $name  Name of hook.
 * @param $arg1    Optional argument passed to hook.
 * @param $arg2    Optional argument passed to hook.
 * @param $arg3    Optional argument passed to hook.
 * @return       Array returned from hook.
 */
function spam_invoke_hook($name, $arg1 = NULL, $arg2 = NULL, $arg3 = NULL) {
  $hook = array();
  foreach (module_list() as $module) {
    $function = $module . '_spam';
    if (function_exists($function)) {
      $result = $function($name, $arg1, $arg2, $arg3);
      if (is_array($hook)) {
        $hook = array_merge($hook, $result);
      }
      else {
        if (isset($result)) {
          $hook[] = $result;
        }
      }
    }
  }
  return $hook;
}

/* Implementation of hook_spam
 *  This is a sample of the hook code which sets the tab names for the settings intro page.
 */
function spam_spam($name, $arg1, $arg2, $arg3) {

  // Do stuff based on the hook type (name).
  switch ($name) {
    case 'tab_description':
      $tabs = array();
      $tabs['Filters'] = t('Select content type filters for the spam module.');
      $tabs['Limits'] = t('Set filtering limits for the spam module.');
      $tabs['Actions'] = t('Select actions for the spam module.');
      $tabs['Advanced'] = t('Set advanced option for the spam module.');
      $tabs['Custom Filters'] = t('Define specific rules for identifying spam.');
      $tabs['URL Filters'] = t('List specific URLs that will be identified as spam.');
      return $tabs;
    default:
      return array();
  }
}

/**
 * Divide a string into tokens.
 *
 * @param $string  A text string to tokenize.
 * @param $tag     An optional tag to prepend to each token.
 * @return         An array of tokens that were obtained from the string.
 */
function spam_tokenize($string, $tag = NULL) {
  $tokens = array();
  $URI = "(http://|https://|ftp://|mailto:)";

  // strip out unwanted html/url noise
  $sanitized = preg_replace("'(www\\.)|(</a>)|(href=)|(target=)|(src=)'i", '', $string);
  $sanitized = preg_replace($URI, '', $sanitized);

  // Strip out values that should not be considered part of tokens, so
  // things like '{viagra}' and 'vi.agra' are counted as hits towards
  // 'viagra'
  $sanitized = preg_replace("/[()\\{\\}\\[\\]#.,]/", '', $sanitized);

  // Force all non URL* tokens to lowercase, again to aggregate
  // tokens.  This both lowers the total token number of rows in the
  // spam_tokens table and increases the strength of individual tokens
  // by linking them to capitalized versions.
  $sanitized = strtolower($sanitized);

  // divide sanitized string into tokens
  $delimiters = " \t\n\r-_<>'\"`/|*%^&+=~:;?";
  $tok = strtok($sanitized, $delimiters);
  while ($tok !== FALSE) {

    // if longer than 255 characters, truncate the token
    $tokens[] = htmlspecialchars(substr("{$tag}{$tok}", 0, 254));
    $tok = strtok($delimiters);
  }

  // second pass, grab urls from unsanitized string
  $matches = preg_match_all("!(<p>|[ \n\r\t\\(]*)({$URI}([a-zA-Z0-9@:%_~#?&=.,/;-]*[a-zA-Z0-9@:%_~#&=/;-]))([.,?]?)(?=(</p>|[ \n\r\t\\)]*))!i", $string, $urls);
  foreach ($urls[2] as $url) {
    $tokens[] = $url;
    spam_urls_count($url);
    $url = preg_replace("'{$URI}'", '', $url);

    // get full domain (ie www.sample.com)
    preg_match("/^()?([^\\/\"\\']+)/i", $url, $domain);

    // get root domain (ie sample.com)
    preg_match("/[^\\.\\/]+\\.[^\\.\\/]+\$/", $domain[2], $root);
    $tokens[] = htmlspecialchars("URL*{$root[0]}");
  }

  // allow external module ability to extract additional tokens
  $hook = spam_invoke_hook('tokenize', $string);
  if ($hook['tokens']) {
    $tokens = array_merge($tokens, $hook['tokens']);
  }
  return $tokens;
}

/**
 * Search the spam_tracker table to see if this new content is a duplicate
 * of earlier content.  If it is a duplicate, see if the content has been
 * duplicated more than the configured number of allowable times.
 *
 * @param $hash   The md5 hash of the content header and body.
 * @param $source Type of content (comment, node, etc).
 * @param $id     Content id (cid, nid, etc).
 * @param $header Content header, used when marking content as spam.
 * @param $body   Content body, used when marking content as spam.
 *
 * @return       FALSE = not spam   TRUE = duplicated too many times, spam
 */
function spam_duplicate_filter($hash, $source, $id, $header, $body) {
  $limit = variable_get('spam_duplicate_content', 2);
  if ($limit > -1) {
    $duplicate = db_fetch_object(db_query("SELECT COUNT(sid) AS count FROM {spam_tracker} WHERE hash = '%s' AND (source != '%s' OR id != %d)", $hash, $source, $id));
    if ($duplicate->count >= variable_get('spam_duplicate_content', 2)) {
      $total = 0;
      spam_log(SPAM_DEBUG, t('spam_duplicate_filter: hash "@hash" found @count times.', array(
        '@hash' => $hash,
        '@count' => $duplicate->count,
      )), $source, $id);
      $result = db_query("SELECT * FROM {spam_tracker} WHERE hash = '%s'", $hash);
      while ($duplicate = db_fetch_object($result)) {
        spam_log(SPAM_DEBUG, t('spam_duplicate_filter: marking duplicate @source "%header" as spam.', array(
          '@source' => $source,
          '%header' => $header,
        )), $duplicate->source, $duplicate->id);
        spam_default_actions($duplicate->source, $duplicate->id, $header, $body, 99, $duplicate, FALSE, FALSE);
        $total++;
      }

      // for efficiency, we update all duplicate comments with one query
      db_query("UPDATE {spam_tracker} SET probability = 99, timestamp = %d WHERE hash = '%s'", time(), $hash);
      spam_log(SPAM_LOG, t('spam_duplicate_filter: @duplicates marked as spam', array(
        '@duplicates' => format_plural($total, 'duplicate', 'duplicates'),
      )), $source, $id);
      return TRUE;
    }
  }
  return FALSE;
}

/**
 * Simple Bayesian logic to determine the probability that the passed in array
 * of tokens is spam.
 *
 * @param $tokens  An array of tokens.
 * 
 * @return         An integer from 1 to 99 which is the probability that the
 *                 array of tokens passed to this function are spam.
 */
function spam_bayesian_filter($tokens = array()) {

  /* Lookup each token in the databse to assign a probability that the token
   * is spam.  If the token doesn't exist in the database, assign a default
   * probability.  Finally, calculate how far this probability is away from
   * a median of 50%.
   */
  foreach ($tokens as $token) {

    // TODO:  Optimize, this is a lot of database queries!
    $p = db_fetch_object(db_query("SELECT probability FROM {spam_tokens} WHERE token = '%s'", $token));
    if (!$p->probability) {
      $p->probability = variable_get('spam_default_probability', 40);
    }
    $t["{$token},{$p->probability}"] = abs($p->probability - 50);
  }

  /* Sort token array so those tokens with the largest "drift" come first.
   * Drift is this distance from a median of 50%.
   */
  asort($t);

  /* Take the n most "interesting" tokens from the top of the token array.
   * The larger a token's drift, the more interesting it is.
   */
  $keys = array_keys($t);
  $max = variable_get('spam_interesting_tokens', 15);
  $total = 0;
  for ($i = 0; $i < $max; $i++) {
    if ($pair = array_pop($keys)) {
      $p = explode(',', $pair);
      $total = $total + $p[1];
      spam_log(SPAM_DEBUG, t("bayesian filter: token '@token' spam probability @percent%", array(
        '@token' => $p[0],
        '@percent' => $p[1],
      )));
    }
    else {

      // we've looked at all the tokens
      break;
    }
  }
  $probability = round($total / $i, 1);
  spam_log(SPAM_LOG, t("bayesian filter: examined @num tokens, spam probability of @percent%", array(
    '@num' => $i,
    '@percent' => $probability,
  )));
  return $probability;
}

/**
 * Search for text string, URL or pattern within the passed in string of text.
 * If found, adjusts the weight variable appropriately.  Matching some filters
 * can cause the delete parameter to be incremented, telling the calling 
 * function to delete the passed in content.
 *
 * @param $text    String of text to pass through custom filter.
 * @param @$delete Set to >0 if spam matching this filter should be deleted.
 *
 * @return         Weight applied if text matches one or more custom filters.
 */
define(SPAM_CUSTOM_PLAIN, 0);
define(SPAM_CUSTOM_REGEX, 1);
define(SPAM_CUSTOM_URL, 2);
define(SPAM_CUSTOM_ALWAYS_SPAM, 0);
define(SPAM_CUSTOM_USUALLY_SPAM, 1);
define(SPAM_CUSTOM_USUALLY_NOTSPAM, 2);
define(SPAM_CUSTOM_NEVER_SPAM, 3);
define(SPAM_CUSTOM_DISABLED, 4);
define(SPAM_CUSTOM_MAYBE_SPAM, 5);
define(SPAM_CUSTOM_MAYBE_NOTSPAM, 6);
define(SPAM_CUSTOM_ACTION_DELETE, 0x1);
define(SPAM_CUSTOM_ACTION_NOMAIL, 0x2);
define(SPAM_CUSTOM_ACTION_HEADER, 0x4);
define(SPAM_CUSTOM_ACTION_BODY, 0x8);
function spam_custom_filter($header, $body, &$action) {
  $weight = 0;
  $matches = 0;
  $text = $header . ' ' . $body;
  $result = db_query('SELECT scid, filter, style, effect, action FROM {spam_custom} WHERE effect != %d', SPAM_CUSTOM_DISABLED);
  while ($filter = db_fetch_object($result)) {

    // determine which portion of the content we are scanning
    if ($filter->action & SPAM_CUSTOM_ACTION_HEADER) {
      $text = $header;
    }
    else {
      if ($filter->action & SPAM_CUSTOM_ACTION_HEADER) {
        $text = $body;
      }
      else {
        $text = $header . ' ' . $body;
      }
    }

    // scan the content
    switch ($filter->style) {
      case SPAM_CUSTOM_PLAIN:
      case SPAM_CUSTOM_URL:
        $match = preg_match_all("/{$filter->filter}/", $text, $temporary);
        $matches += $match;
        break;
      case SPAM_CUSTOM_REGEX:
        $match = preg_match_all($filter->filter, $text, $temporary);
        $matches += $match;
        break;
    }

    // if matching, perform actions and apply weights
    if ($match) {
      db_query('UPDATE {spam_custom} SET matches = matches + %d, last = %d WHERE scid = %d', $match, time(), $filter->scid);
      if ($filter->action & SPAM_CUSTOM_ACTION_DELETE) {
        $action[SPAM_CUSTOM_ACTION_DELETE]++;
      }
      if ($filter->action & SPAM_CUSTOM_ACTION_NOMAIL) {
        $action[SPAM_CUSTOM_ACTION_NOMAIL]++;
      }

      // add or subtract appropriate weight for each match
      switch ($filter->effect) {
        case SPAM_CUSTOM_ALWAYS_SPAM:
          $weight += $match * WEIGHT_ALWAYS_SPAM;
          spam_log(SPAM_DEBUG, t("custom filters: matched 'always spam' filter @num times, added @weight weight, matched filter '%filter'", array(
            '@num' => $match,
            '@weight' => $match * WEIGHT_ALWAYS_SPAM,
            '%filter' => $filter->filter,
          )));
          break;
        case SPAM_CUSTOM_USUALLY_SPAM:
          $weight += $match * WEIGHT_USUALLY_SPAM;
          spam_log(SPAM_DEBUG, t("custom filters: matched 'usually spam' filter @num times, added @weight weight, matched filter '%filter'", array(
            '@num' => $match,
            '@weight' => $match * WEIGHT_USUALLY_SPAM,
            '%filter' => $filter->filter,
          )));
          break;
        case SPAM_CUSTOM_MAYBE_SPAM:
          $weight += $match * WEIGHT_MAYBE_SPAM;
          spam_log(SPAM_DEBUG, t("custom filters: matched 'maybe spam' filter @num times, added @weight weight, matched filter '%filter'", array(
            '@num' => $match,
            '@weight' => $match * WEIGHT_MAYBE_SPAM,
            '%filter' => $filter->filter,
          )));
          break;
        case SPAM_CUSTOM_MAYBE_NOTSPAM:
          $weight += $match * WEIGHT_MAYBE_NOTSPAM;
          spam_log(SPAM_DEBUG, t("custom filters: matched 'maybe spam' filter @num times, added @weight weight, matched filter '%filter'", array(
            '@num' => $match,
            '@weight' => $match * WEIGHT_MAYBE_NOTSPAM,
            '%filter' => $filter->filter,
          )));
          break;
        case SPAM_CUSTOM_USUALLY_NOTSPAM:
          $weight += $match * WEIGHT_RARELY_SPAM;
          spam_log(SPAM_DEBUG, t("custom filters: matched 'rarely spam' filter @num times, added @weight weight, matched filter '%filter'", array(
            '@num' => $match,
            '@weight' => $match * WEIGHT_RARELY_SPAM,
            '%filter' => $filter->filter,
          )));
          break;
        case SPAM_CUSTOM_NEVER_SPAM:
          $weight += $match * WEIGHT_NEVER_SPAM;
          spam_log(SPAM_DEBUG, t("custom filters: matched 'never spam' filter @num times, added @weight weight, matched filter '%filter'", array(
            '@num' => $match,
            '@weight' => $match * WEIGHT_NEVER_SPAM,
            '%filter' => $filter->filter,
          )));
          break;
      }
    }
  }
  if ($matches != 0) {
    spam_log(SPAM_LOG, t("custom filters: matched @num times, adding weight of @weight", array(
      '@num' => $matches,
      '@weight' => $weight,
    )));
  }
  return $weight;
}
function spam_url_filter($text) {
  if (variable_get('spam_filter_urls', 1)) {
    $weight = 0;
    $matches = 0;
    $result = db_query("SELECT token FROM {spam_tokens} WHERE probability >= %d AND token LIKE 'URL*%%'", variable_get('spam_threshold', 80));
    while ($url = db_fetch_object($result)) {
      $url = preg_replace('/^URL\\*/', '', $url->token);
      $match = preg_match_all("!{$url}!", $text, $temporary);
      $matches += $match;
      if ($match) {
        $weight += $match * WEIGHT_ALWAYS_SPAM;
        spam_log(SPAM_DEBUG, t("url filter: added @weight weight, matched URL '%url'", array(
          '@weight' => $match * WEIGHT_ALWAYS_SPAM,
          '%url' => $url,
        )));
      }
    }
  }
  if ($matches != 0) {
    spam_log(SPAM_LOG, t("url filter: matched @num times, adding weight of @weight", array(
      '@num' => $matches,
      '@weight' => $weight,
    )));
  }
  return $weight;
}

/**
 * Keep track of the total number of URLs found in the current content.
 *
 * @param $url  A URL to be added to a static array.
 * @return      Array of URLs showing how many times each URL is present, and
 *              the total number of arrays.
 */
function spam_urls_count($url = NULL) {

  // build up an array of all URLs seen in current content
  static $urls = array();
  if ($url != NULL) {
    $urls["{$url}"]++;
    $urls['total']++;
  }
  return $urls;
}

/**
 * Check if content has two many total urls, or if the same base url is
 * repeated too many times.
 *
 * @param $url  An array indicating how many time each url is found in the 
 *              content, as generated by spam_urls_count().
 * @return      Weighted added to spam probability, if any.
 *
 */
function spam_url_limit($urls = array()) {
  $weight = 0;
  $limit = variable_get('spam_urls_total', 10);
  if ($limit > -1) {
    if ($urls['total'] > $limit) {
      $weight += WEIGHT_ALWAYS_SPAM;
      spam_log(SPAM_LOG, t("url limit: total of @num urls, adding weight of @weight", array(
        '@num' => $urls['total'],
        '@weight' => WEIGHT_ALWAYS_SPAM,
      )));
    }
  }
  $limit = variable_get('spam_urls_repeat', 5);
  if ($limit > -1) {
    asort($urls);

    // skip past urls['total']
    array_pop($urls);
    $max = array_pop($urls);
    if ($max > $limit) {
      $weight += WEIGHT_ALWAYS_SPAM;
      spam_log(SPAM_LOG, t("url limit: repeated one url @num times, adding weight of @weight", array(
        '@num' => $max,
        '@weight' => WEIGHT_ALWAYS_SPAM,
      )));
    }
  }
  return $weight;
}

/**
 *
 */
function spam_tokens_save($tokens, $is_spam) {
  foreach ($tokens as $token) {
    $old = db_fetch_object(db_query("SELECT spam,notspam FROM {spam_tokens} WHERE token = '%s'", $token));
    if ($old) {

      // updating an existing token
      $total = $old->spam + $old->notspam + 1;
      $probability = ($old->spam + ($is_spam ? 1 : 0)) / $total * 100;
      $probability = spam_get_probability($probability);
      if ($is_spam) {
        db_query("UPDATE {spam_tokens} SET spam = spam + 1, probability = %d, last = %d WHERE token = '%s'", $probability, time(), $token);
      }
      else {
        db_query("UPDATE {spam_tokens} SET notspam = notspam + 1, probability = %d, last = %d WHERE token = '%s'", $probability, time(), $token);
      }
    }
    else {

      // adding a new token
      $probablity = $is_spam ? 99 : 1;
      @db_query("INSERT INTO {spam_tokens} (token, spam, notspam, probability, last) VALUES('%s', %d, %d, %d, %d)", $token, $is_spam ? 1 : 0, $is_spam ? 0 : 1, $is_spam ? 99 : 1, time());
    }
  }
}
function spam_tokens_unsave($tokens, $is_spam) {
  foreach ($tokens as $token) {
    $old = db_fetch_object(db_query("SELECT spam,notspam FROM {spam_tokens} WHERE token = '%s'", $token));
    if ($old) {
      if ($is_spam) {
        $new->spam = $old->spam;

        // be sure $new->notspam doesn't go negative
        $new->notspam = $old->notspam > 0 ? $old->notspam - 1 : 0;
        if ($old->notspam <= 0) {

          // TODO: find out why this happens with trackbacks.
          spam_log(SPAM_DEBUG, t('spam_tokens_unsave: invalid attempt to set "notspam" value for token "@token" to @value.', array(
            '@token' => $token,
            '@value' => $old->notspam - 1,
          )));
        }
      }
      else {

        // be sure $new->spam doesn't go negative
        $new->spam = $old->spam > 0 ? $old->spam - 1 : 0;
        if ($old->spam <= 0) {

          // TODO: find out why this happens with trackbacks.
          spam_log(SPAM_DEBUG, t('spam_tokens_unsave: invalid attempt to set "spam" value for token "@token" to @value.', array(
            '@token' => $token,
            '@value' => $old->spam - 1,
          )));
        }
        $new->notspam = $old->notspam;
      }

      // updating an existing token
      $total = $new->spam + $new->notspam;
      if ($total == 0) {
        $probability = 0;
      }
      else {
        $probability = $new->spam / $total * 100;
        $probability = spam_get_probability($probability);
      }
      db_query("UPDATE {spam_tokens} SET spam = %d, notspam = %d, probability = %d, last = %d WHERE token = '%s'", $new->spam, $new->notspam, $probability, time(), $token);
    }
  }
}

/**
 * Display spam status (if enabled), and provide adminsitrators with links to
 * mark content as spam or not spam.
 *
 * @param $content  comment or node object
 * @param $type     comment or node
 */
function theme_spam_link($content, $type = 'comment') {
  $output = array();
  if ($type == 'comment' && variable_get('spam_filter_comments', 1)) {
    $id = $content->cid;
  }
  else {
    if ($type == 'node' && variable_get("spam_filter_{$content->type}", 0)) {
      $id = $content->nid;
    }
    else {
      return $output;
    }
  }
  $p = db_fetch_object(db_query("SELECT probability FROM {spam_tracker} WHERE id = %d AND source = '%s'", $id, $type));
  $spam = array(
    'href' => "spam/{$type}/{$id}/spam",
    'title' => t('mark as spam'),
  );
  $notspam = array(
    'href' => "spam/{$type}/{$id}/notspam",
    'title' => t('mark as not spam'),
  );
  $access = user_access('access spam');
  $report = user_access('report spam');
  $admin = user_access('administer spam');
  $display = variable_get('spam_display_probability', 0);
  if (variable_get('spam_log_level', SPAM_LOG)) {
    $display_text = " (" . l($p->probability, "admin/content/spam/logs/{$type}/{$id}") . ")";
  }
  else {
    $display_text = " ({$p->probability})";
  }
  if (!$p->probability && $admin) {
    $output['spam-spam'] = $spam;
    $output['spam-notspam'] = $notspam;
  }
  else {
    if ($p->probability < variable_get('spam_threshold', 80)) {
      if ($access) {
        $output['spam-probability'] = array(
          'title' => t('not spam') . ($display ? $display_text : ''),
          'html' => TRUE,
        );
      }
      if ($admin) {
        $output['spam-spam'] = $spam;
      }
    }
    else {
      if ($access) {
        $output['spam-probability'] = array(
          'title' => t('spam') . ($display ? $display_text : ''),
          'html' => TRUE,
        );
      }
      if ($admin) {
        $output['spam-notspam'] = $notspam;
      }
    }
  }
  if ($report) {
    $output['spam-report'] = array(
      'href' => "spam/report/{$type}/{$id}",
      "title" => t('report spam'),
    );
  }
  return $output;
}

/**
 * Compare the passed in probability to the configured spam threshold.
 *
 * @param $probability  1-99% chance of being spam.
 * 
 * @return  0 = not spam,  1 = spam
 */
function _is_spam($probability) {
  if ($probability >= variable_get('spam_threshold', 80)) {
    return 1;
  }
  return 0;
}

/**
 * Displays an overview of the latest spam log entries.
 *
 * @param $source
 *   The type of spam log entry to view.
 * @param $id
 *   The specific set of log entries to view.
 */
function spam_logs_overview($source = NULL, $id = NULL) {
  drupal_set_title(t('Spam module logs'));
  $header = array(
    array(
      'data' => t('type'),
      'field' => 'source',
    ),
    array(
      'data' => t('id'),
      'field' => 'id',
    ),
    array(
      'data' => t('date'),
      'field' => 'timestamp',
    ),
    array(
      'data' => t('message'),
      'field' => 'entry',
    ),
    array(
      'data' => t('user'),
      'field' => 'uid',
    ),
    array(
      'data' => t('more'),
      'field' => 'sid',
      'sort' => 'desc',
    ),
  );
  if ($id) {
    $sql = "SELECT * FROM {spam_log} WHERE source = '%s' AND id = %d";
    $arguments = array(
      $source,
      $id,
    );
  }
  else {
    if ($source) {
      $sql = "SELECT * FROM {spam_log} WHERE source = '%s'";
      $arguments = array(
        $source,
      );
    }
    else {
      $sql = "SELECT * FROM {spam_log}";
      $arguments = array();
    }
  }
  $result = pager_query($sql . tablesort_sql($header), 50, 0, NULL, $arguments);
  while ($log = db_fetch_object($result)) {
    $rows[] = array(
      'data' => array(
        t($log->source),
        $log->id,
        format_date($log->timestamp, 'small'),
        truncate_utf8($log->entry, 64),
        theme('username', user_load(array(
          'uid' => $log->uid,
        ))),
        l(t('details'), "admin/content/spam/logs/entry/{$log->sid}"),
      ),
    );
  }
  if (!$rows) {
    $rows[] = array(
      array(
        'data' => t('No log messages available.'),
        'colspan' => 6,
      ),
    );
  }
  return theme('table', $header, $rows) . theme('pager', NULL, 50, 0);
}

/**
 * Displays complete information about a single log entry ID.
 *
 * @param $id
 *   The log entry to get details of.
 */
function spam_logs_entry($id = NULL) {
  if (!$id) {
    return NULL;
  }
  $entry = db_fetch_object(db_query('SELECT s.*, u.name, u.uid FROM {spam_log} s INNER JOIN {users} u ON s.uid = u.uid WHERE s.sid = %d', $id));
  return theme('table', NULL, array(
    array(
      array(
        'data' => t('Type'),
        'header' => TRUE,
      ),
      array(
        'data' => l(t($entry->source), "admin/logs/spam/logs/{$entry->source}"),
      ),
    ),
    array(
      array(
        'data' => t('!type ID', array(
          '!type' => ucfirst($entry->source),
        )),
        'header' => TRUE,
      ),
      array(
        'data' => l(t($entry->id), "admin/logs/spam/logs/{$entry->source}/{$entry->id}"),
      ),
    ),
    array(
      array(
        'data' => t('Date'),
        'header' => TRUE,
      ),
      array(
        'data' => format_date($entry->timestamp, 'large'),
      ),
    ),
    array(
      array(
        'data' => t('User'),
        'header' => TRUE,
      ),
      array(
        'data' => theme('username', $entry),
      ),
    ),
    array(
      array(
        'data' => t('Message'),
        'header' => TRUE,
      ),
      array(
        'data' => $entry->entry,
      ),
    ),
    array(
      array(
        'data' => t('Hostname'),
        'header' => TRUE,
      ),
      array(
        'data' => $entry->hostname,
      ),
    ),
  ));
}

/**
 * Manage custom spam filters.
 *
 * @param $id
 *   The ID of a custom filter to do an action upon.
 * @param $action
 *   One of "scan", "delete", or "edit".
 */
function spam_custom_filter_overview($id = NULL, $action = NULL) {

  // hand off action processing to other functions.
  if (isset($action) && isset($id) && is_numeric($id)) {
    $filter = spam_custom_filter_load($id);
    switch ($action) {
      case "scan":
        return drupal_get_form('spam_custom_filter_scan', $filter);
      case "edit":
        return drupal_get_form('spam_custom_filter_edit', $filter);
      case "delete":
        return drupal_get_form('spam_custom_filter_delete', $filter);
    }
  }
  drupal_set_title(t('Custom spam filters'));
  $spam_effects = array(
    SPAM_CUSTOM_DISABLED => t('disabled'),
    SPAM_CUSTOM_ALWAYS_SPAM => t('always spam'),
    SPAM_CUSTOM_USUALLY_SPAM => t('usually spam'),
    SPAM_CUSTOM_MAYBE_SPAM => t('maybe spam'),
    SPAM_CUSTOM_MAYBE_NOTSPAM => t('maybe not spam'),
    SPAM_CUSTOM_USUALLY_NOTSPAM => t('usually not spam'),
    SPAM_CUSTOM_NEVER_SPAM => t('never spam'),
  );
  $headers = array(
    array(
      'data' => t('filter'),
      'field' => 'filter',
      'sort' => 'asc',
    ),
    array(
      'data' => t('style'),
      'field' => 'style',
    ),
    array(
      'data' => t('scan'),
      'field' => 'action',
    ),
    array(
      'data' => t('effect'),
      'field' => 'effect',
    ),
    array(
      'data' => t('action'),
      'field' => 'action',
    ),
    array(
      'data' => t('matches'),
      'field' => 'matches',
    ),
    array(
      'data' => t('last match'),
      'field' => 'last',
    ),
    array(
      'data' => t('operations'),
      'colspan' => 3,
    ),
  );
  $rows = array();
  $result = pager_query('SELECT * FROM {spam_custom}' . tablesort_sql($headers), 25);
  while ($custom = db_fetch_object($result)) {
    if ($custom->action & SPAM_CUSTOM_ACTION_HEADER) {
      $scan = t('header only');
    }
    elseif ($custom->action & SPAM_CUSTOM_ACTION_BODY) {
      $scan = t('body only');
    }
    else {
      $scan = t('everything');
    }
    $action = NULL;
    if ($custom->action & SPAM_CUSTOM_ACTION_DELETE) {
      $action = t('DELETE');
    }
    if ($custom->action & SPAM_CUSTOM_ACTION_NOMAIL) {
      $action .= $action ? ', ' : NULL;
      $action .= t('NOMAIL');
    }
    if ($action == NULL) {
      $action = t('none');
    }
    $rows[] = array(
      'data' => array(
        htmlspecialchars($custom->filter),
        $custom->style == 0 ? t('text') : t('regex'),
        $scan,
        $spam_effects["{$custom->effect}"],
        $action,
        $custom->matches == 0 ? "none" : $custom->matches,
        $custom->last ? format_date($custom->last, 'small') : t('none'),
        l(t('scan'), "admin/settings/spam/custom/{$custom->scid}/scan") . "&nbsp;" . l(t('edit'), "admin/settings/spam/custom/{$custom->scid}/edit") . "&nbsp;" . l(t('delete'), "admin/settings/spam/custom/{$custom->scid}/delete"),
      ),
    );
  }
  $output = theme('table', $headers, $rows);
  $output .= theme('pager', NULL, 25, 0);
  $output .= drupal_get_form('spam_custom_filter_edit');
  return $output;
}

/**
 * Display form for adding/editing custom filters.
 *
 * @param $filter
 *  A loaded filter for editing.
 */
function spam_custom_filter_edit($filter = NULL) {
  $action = 0;
  if ($filter->action & SPAM_CUSTOM_ACTION_HEADER) {
    $action = SPAM_CUSTOM_ACTION_HEADER;
  }
  else {
    if ($filter->action & SPAM_CUSTOM_ACTION_BODY) {
      $action = SPAM_CUSTOM_ACTION_BODY;
    }
  }
  $form['scid'] = array(
    '#type' => 'hidden',
    '#value' => isset($filter) ? $filter->scid : '',
  );
  $form['options'] = array(
    '#collapsible' => TRUE,
    '#title' => t('Custom filter'),
    '#type' => 'fieldset',
  );
  $form['options']['filter'] = array(
    '#default_value' => isset($filter) ? $filter->filter : '',
    '#description' => t('Enter a custom filter string. You can enter a word, a phrase, or a complete regular expression. All new content that is being scanned for spam will also be tested against your custom filters.'),
    '#type' => 'textfield',
    '#title' => t('Filter expression'),
  );
  $form['options']['style'] = array(
    '#description' => t('Select the format of this filter. Note that if you select "regular expression" you need to format the expression as a <a href="http://www.php.net/manual/en/ref.pcre.php">Perl-compatible regular expression</a>. As a simple example, to do a case-insensitve match on the word "viagra", you would enter "<code>/viagra/i</code>" without the quotes.'),
    '#default_value' => isset($filter) ? $filter->style : '',
    '#options' => array(
      SPAM_CUSTOM_PLAIN => t('plain text'),
      SPAM_CUSTOM_REGEX => t('regular expression'),
    ),
    '#required' => TRUE,
    '#type' => 'radios',
    '#title' => t('Filter type'),
  );
  $form['options']['scan'] = array(
    '#default_value' => $action,
    '#description' => t('By default, the custom filter will scan both the header and the body of new content, but you can configure it to only scan the header or only the body of new content. For example, if "header only" is selected, the filter will only attempt to match against the subject of a new comment, and the title of a new node.'),
    '#options' => array(
      t('everything'),
      SPAM_CUSTOM_ACTION_HEADER => t('header only'),
      SPAM_CUSTOM_ACTION_BODY => t('body only'),
    ),
    '#required' => TRUE,
    '#type' => 'radios',
    '#title' => t('Scan'),
  );
  $form['options']['effect'] = array(
    '#default_value' => isset($filter) ? $filter->effect : '',
    '#description' => t('Define the effect when your custom filter matches on new content. If your filter defines "always spam", this increases the chances the new content will be marked spam by 200%. If your filter defines "usually spam", this increases the chances the new content will be marked spam by 50%. If you filter defines "maybe spam", this increases the chances the new content will be marked spam by 20%. If your filter defines "maybe not spam", this decreases the chances the new content will be marked spam by 20%. If your filter defines "usually not spam", this decreases the chances the new content will be marked spam by 50%. And if your filter defines "never spam", this decreases the chances the new content will be marked spam by 200%. Note that it is possible to match both an "always spam" and a "never spam" filter with the same content, and that then the filters will cancel each other out. Additionally, four "usually not spam" matches will cancel out one "always spam" match.'),
    '#options' => array(
      SPAM_CUSTOM_DISABLED => t('disabled'),
      SPAM_CUSTOM_ALWAYS_SPAM => t('always spam (+200%)'),
      SPAM_CUSTOM_USUALLY_SPAM => t('usually spam (+50%)'),
      SPAM_CUSTOM_MAYBE_SPAM => t('maybe spam (+20%)'),
      SPAM_CUSTOM_MAYBE_NOTSPAM => t('maybe not spam (-20%)'),
      SPAM_CUSTOM_USUALLY_NOTSPAM => t('usually not spam (-50%)'),
      SPAM_CUSTOM_NEVER_SPAM => t('never spam (-200%)'),
    ),
    '#required' => TRUE,
    '#title' => t('Match effect'),
    '#type' => 'radios',
  );
  $form['options']['nomail'] = array(
    '#description' => t('Checking this box will cause any new content that matches this filter and ultimately is determined to be spam to not generate a notification email.'),
    '#default_value' => isset($filter) ? $filter->action & SPAM_CUSTOM_ACTION_NOMAIL : '',
    '#title' => t('Disable email notification'),
    '#type' => 'checkbox',
  );
  $form['options']['autodelete'] = array(
    '#default_value' => isset($filter) ? $filter->action & SPAM_CUSTOM_ACTION_DELETE : '',
    '#description' => t('Checking this box will cause any new content that matches this filter and ultimately is determined to be spam to be automatically (and silently) deleted. It is not recommended that you enable this option unless you are fully confident that it will never match non-spam content.'),
    '#title' => t('Automatically delete spam'),
    '#type' => 'checkbox',
  );
  $form['options']['submit'] = array(
    '#type' => 'submit',
    '#value' => isset($filter) ? t('Update filter') : t('Add filter'),
  );
  return $form;
}

/**
 * Form API hook; validate the custom filter.
 */
function spam_custom_filter_edit_validate($form_id, $form_values) {
  if (!empty($form_values['filter'])) {
    if ($form_values['style'] == 1 && preg_match($form_values['filter'], 'test') === FALSE) {

      // failed regex validation is a critical error and things break, so we
      // just echo an error and exit.  (If we don't exit, additional errors
      // appear about modifying headers making it confusing).
      form_set_error('filter', t('Your regular expression @regex does not validate.', array(
        '@regex' => $form_values['filter'],
      )));
    }
    if (!isset($form_values['scid'])) {

      // there's no reason to allow duplicate filters
      $duplicate = db_fetch_object(db_query("SELECT scid FROM {spam_custom} WHERE filter = '%s'", $form_values['filter']));
      if ($duplicate->scid) {
        form_set_error('filter', t('Custom filter %filter already exists.', array(
          '%filter' => $form_values['filter'],
        )));
      }
    }
  }
}

/**
 * Form API hook; submit the custom filter.
 */
function spam_custom_filter_edit_submit($form_id, $form_values) {
  $action = 0;
  if ($form_values['scan'] & SPAM_CUSTOM_ACTION_HEADER) {
    $action += SPAM_CUSTOM_ACTION_HEADER;
  }
  else {
    if ($form_values['scan'] & SPAM_CUSTOM_ACTION_BODY) {
      $action += SPAM_CUSTOM_ACTION_BODY;
    }
  }
  if ($form_values['autodelete']) {
    $action += SPAM_CUSTOM_ACTION_DELETE;
  }
  if ($form_values['nomail']) {
    $action += SPAM_CUSTOM_ACTION_NOMAIL;
  }
  if ($form_values['scid']) {
    db_query("UPDATE {spam_custom} SET filter = '%s', style = %d, effect = %d, action = %d WHERE scid = %d", $form_values['filter'], $form_values['style'], $form_values['effect'], $action, $form_values['scid']);
    drupal_set_message(t('Custom filter %filter updated.', array(
      '%filter' => $form_values['filter'],
    )));
  }
  else {
    db_query("INSERT INTO {spam_custom} (filter, style, effect, action) VALUES('%s', %d, %d, %d)", $form_values['filter'], $form_values['style'], $form_values['effect'], $action);
    drupal_set_message(t('Custom filter %filter added.', array(
      '%filter' => $form_values['filter'],
    )));
  }
  return 'admin/settings/spam/custom';
}

/**
 * Display custom filter delete confirmation box.
 *
 * @param $filter
 *   A loaded filter for deleting.
 */
function spam_custom_filter_delete($filter = NULL) {
  $form['scid'] = array(
    '#type' => 'hidden',
    '#value' => $filter->scid,
  );
  return confirm_form($form, t('Are you sure you want to delete the %filter filter?', array(
    '%filter' => $filter->filter,
  )), 'admin/settings/spam/custom', t('This action can not be undone.'), t('Delete'), t('Cancel'));
}

/**
 * Removes custom filter from database.
 */
function spam_custom_filter_delete_submit($form_id, $form_values) {
  $filter = spam_custom_filter_load($form_values['scid']);
  db_query('DELETE FROM {spam_custom} WHERE scid = %d', $form_values['scid']);
  drupal_set_message(t('Filter %filter deleted.', array(
    '%filter' => $filter->filter,
  )));
  return 'admin/settings/spam/custom';
}

/**
 * Loads a custom filter.
 *
 * @param $scid
 *   The spam custom filter id.
 */
function spam_custom_filter_load($scid) {
  return db_fetch_object(db_query('SELECT * FROM {spam_custom} WHERE scid = %d', $scid));
}

/**
 * Logic to apply custom filter to existing site content.
 * (At this time, only comment scanning is supported).
 *
 * @param $filter
 *   The filter object we are scanning with.
 */
function spam_custom_filter_scan($filter) {
  $form['scid'] = array(
    '#type' => 'hidden',
    '#value' => $filter->scid,
  );
  $form['scan_help'] = array(
    '#type' => 'markup',
    '#value' => t('Scan results for filter %filter', array(
      '%filter' => $filter->filter,
    )),
    '#weight' => 1,
  );
  $form['options'] = array(
    '#type' => 'fieldset',
    '#title' => t('Update options'),
    '#prefix' => '<div class="container-inline">',
    '#suffix' => '</div>',
    '#weight' => 2,
  );
  $form['options']['operation'] = array(
    '#options' => array(
      'SPAM' => t('Mark the selected comments as spam'),
      'NOT_SPAM' => t('Mark the selected comments as not spam'),
      'UNPUBLISH' => t('Unpublish the selected comments'),
      'PUBLISH' => t('Publish the selected comments'),
      'DELETE' => t('Delete the selected comments (no confirmation)'),
    ),
    '#type' => 'select',
  );
  $form['options']['submit'] = array(
    '#type' => 'submit',
    '#value' => t('Update'),
  );
  $form['header'] = array(
    '#type' => 'value',
    '#value' => array(
      theme('table_select_header_cell'),
      array(
        'data' => t('rating'),
        'field' => 'probability',
        'sort' => 'asc',
      ),
      array(
        'data' => t('status'),
        'field' => 'status',
      ),
      array(
        'data' => t('subject'),
        'field' => 'subject',
      ),
      array(
        'data' => t('comment'),
        'field' => 'comment',
      ),
      array(
        'data' => t('operations'),
        'colspan' => 2,
      ),
    ),
  );
  if ($filter->style == 1) {

    // Try and convert perl regex to MySQL regex by stripping off regular
    // expression boundaries.  For example "/spam/i" becomes "spam".
    $boundary = substr($filter->filter, 0, 1);
    $contents = "/\\{$boundary}(.*)\\{$boundary}/";
    preg_match($contents, $filter->filter, $regex);
    if ($filter->action & SPAM_CUSTOM_ACTION_HEADER) {
      $where = "c.subject REGEXP '" . $regex[1] . "'";
    }
    else {
      if ($filter->action & SPAM_CUSTOM_ACTION_BODY) {
        $where = "(c.comment REGEXP '" . $regex[1] . "' OR c.name REGEXP '" . $regex[1] . "' OR c.mail REGEXP '" . $regex[1] . "' OR c.homepage REGEXP '" . $regex[1] . "')";
      }
      else {
        $where = "(c.subject REGEXP '" . $regex[1] . "' OR c.comment REGEXP '" . $regex[1] . "' OR c.name REGEXP '" . $regex[1] . "' OR c.mail REGEXP '" . $regex[1] . "' OR c.homepage REGEXP '" . $regex[1] . "')";
      }
    }
    $sql = "SELECT c.cid, c.status, c.nid, c.subject, c.comment, s.probability FROM {comments} c LEFT JOIN {spam_tracker} s ON c.cid = s.id WHERE {$where}";
  }
  else {
    if ($filter->action & SPAM_CUSTOM_ACTION_HEADER) {
      $where = "c.subject LIKE '%%" . $filter->filter . "%%'";
    }
    else {
      if ($filter->action & SPAM_CUSTOM_ACTION_BODY) {
        $where = "(c.comment LIKE '%%" . $filter->filter . "%%' OR c.name LIKE '%%" . $filter->filter . "%%' OR c.mail LIKE '%%" . $filter->filter . "%%' OR c.homepage LIKE '%%" . $filter->filter . "%%')";
      }
      else {
        $where = "(c.subject LIKE '%%" . $filter->filter . "%%' OR c.comment LIKE '%%" . $filter->filter . "%%' OR c.name LIKE '%%" . $filter->filter . "%%' OR c.mail LIKE '%%" . $filter->filter . "%%' OR c.homepage LIKE '%%" . $filter->filter . "%%')";
      }
    }
    $sql = "SELECT c.cid, c.nid, c.status, c.comment, c.subject, s.probability FROM {comments} c LEFT JOIN {spam_tracker} s ON c.cid = s.id WHERE {$where}";
  }
  $sql .= tablesort_sql($form['header']['#value']);
  $result = pager_query($sql, variable_get('spam_display_quantity', 50));

  // build a table listing the appropriate comments
  while ($comment = db_fetch_object($result)) {
    $is_spam = $comment->probability >= variable_get('spam_threshold', 80);
    $comments[$comment->cid] = '';
    $form['rating'][$comment->cid] = array(
      '#value' => $is_spam ? t('spam') : t('not spam'),
    );
    $form['status'][$comment->cid] = array(
      '#value' => $comment->status == 0 ? t('published') : t('not published'),
    );
    $form['subject'][$comment->cid] = array(
      '#value' => strlen($comment->subject) > 128 ? htmlspecialchars(substr($comment->subject, 0, 128)) . t('...') : htmlspecialchars($comment->subject),
    );
    $form['comment'][$comment->cid] = array(
      '#value' => strlen($comment->comment) > 256 ? htmlspecialchars(substr($comment->comment, 0, 256)) . t('...') : htmlspecialchars($comment->comment),
    );
    $form['operations'][$comment->cid] = array(
      '#value' => l(t('view'), "node/{$comment->nid}#comment-{$comment->cid}") . '&nbsp;' . $is_spam ? l(t('mark as not spam'), "spam/comment/{$comment->cid}/notspam") : l(t('mark as spam'), "spam/comment/{$comment->cid}/spam"),
    );
  }
  $form['comments'] = array(
    '#type' => 'checkboxes',
    '#options' => $comments,
  );
  $form['pager'] = array(
    '#value' => theme('pager', NULL, 50, 0),
  );
  return $form;
}

/**
 * Form API hook; validate the custom filter scanner.
 */
function spam_custom_filter_scan_validate($form_id, $form_values) {
  $form_values['comments'] = array_diff($form_values['comments'], array(
    0,
  ));
  if (count($form_values['comments']) == 0) {
    form_set_error(NULL, t('Please select one or more comments to perform the update on.'));
    return 'admin/settings/spam/custom/$form_values[scid]/scan';
  }
}

/**
 * Form API hook; submit the custom filter scanner.
 */
function spam_custom_filter_scan_submit($form_id, $form_values) {
  $operation = spam_comment_operations($form_values['operation']);
  $scid = $form_values['scid'];
  if ($operation) {
    foreach ($form_values['comments'] as $cid => $value) {
      if ($value) {
        $operation($cid);
      }
    }
    drupal_set_message(t('Comments have been updated.'));
  }
  return "admin/settings/spam/custom/{$scid}/scan";
}

/**
 * Form theme code for spam_custom_filter_scan().
 */
function theme_spam_custom_filter_scan($form) {
  $output = drupal_render($form['scan_help']);
  $output .= drupal_render($form['options']);
  if (isset($form['subject']) && is_array($form['subject'])) {
    foreach (element_children($form['subject']) as $key) {
      $row = array();
      $row[] = drupal_render($form['comments'][$key]);
      $row[] = drupal_render($form['rating'][$key]);
      $row[] = drupal_render($form['status'][$key]);
      $row[] = drupal_render($form['subject'][$key]);
      $row[] = drupal_render($form['comment'][$key]);
      $row[] = drupal_render($form['operations'][$key]);
      $rows[] = $row;
    }
  }
  else {
    $rows[] = array(
      array(
        'data' => t('No comments matched.'),
        'colspan' => '6',
      ),
    );
  }
  $output .= theme('table', $form['header']['#value'], $rows);
  if ($form['pager']['#value']) {
    $output .= drupal_render($form['pager']);
  }
  $output .= drupal_render($form);
  return $output;
}

/**
 * Manage URL spam filters.
 *
 * @param $id
 *   The ID of a custom filter to do an action upon.
 * @param $action
 *   One of "delete", or "edit".
 */
function spam_url_filter_overview($id = NULL, $action = NULL) {

  // hand off action processing to other functions.
  if (isset($action) && isset($id) && is_numeric($id)) {
    $filter = spam_url_filter_load($id);
    switch ($action) {
      case "edit":
        return drupal_get_form('spam_url_filter_edit', $filter);
      case "delete":
        return drupal_get_form('spam_url_filter_delete', $filter);
    }
  }
  if (variable_get('spam_filter_urls', 1) == 0) {
    drupal_set_message('The URL filtering functionality provided by this module is currently disabled. You can configure URL filters below, but they will not function until you check "Filter spammer URLs" at %url.', array(
      '%url' => l(t('Administer >> Site configuration >> Spam'), 'admin/settings/spam'),
    ));
  }
  $headers = array(
    array(
      'data' => t('domain'),
      'field' => 'token',
      'sort' => 'asc',
    ),
    array(
      'data' => t('spam matches'),
      'field' => 'spam',
    ),
    array(
      'data' => t('not spam matches'),
      'field' => 'notspam',
    ),
    array(
      'data' => t('spam probability'),
      'field' => 'probability',
    ),
    array(
      'data' => t('last match'),
      'field' => 'last',
    ),
    array(
      'data' => t('operations'),
      'colspan' => 2,
    ),
  );
  $rows = array();
  $sql = 'SELECT * FROM {spam_tokens} WHERE probability >= ' . variable_get('spam_threshold', 80) . " AND token LIKE 'URL*%%'";
  $sql .= tablesort_sql($headers);
  $result = pager_query($sql, 25);
  while ($url = db_fetch_object($result)) {
    $rows[] = array(
      'data' => array(
        htmlspecialchars(preg_replace('/^URL\\*/', '', $url->token)),
        $url->spam,
        $url->notspam,
        $url->probability . '%',
        $url->last ? format_date($url->last, 'small') : t('none'),
        l(t('edit'), "admin/settings/spam/url/{$url->tid}/edit") . '&nbsp;' . l(t('delete'), "admin/settings/spam/url/{$url->tid}/delete"),
      ),
    );
  }
  $output = theme('table', $headers, $rows);
  $output .= theme('pager', NULL, 25, 0);
  $output .= drupal_get_form('spam_url_filter_edit');
  return $output;
}

/**
 * Display form for adding/editing URL filters.
 *
 * @param $filter
 *  A loaded filter for editing.
 */
function spam_url_filter_edit($filter = NULL) {
  $form['tid'] = array(
    '#type' => 'hidden',
    '#value' => isset($filter) ? $filter->tid : '',
  );
  $form['options'] = array(
    '#collapsible' => TRUE,
    '#title' => t('Add new URL filter'),
    '#type' => 'fieldset',
  );
  $form['options']['url'] = array(
    '#default_value' => isset($filter) ? str_replace('URL*', '', $filter->token) : '',
    '#description' => t('Enter a domain name that if found in new site content will cause the content to be marked as spam. For example if you enter "spam.com" as a domain name, a comment containing the URL "http://spam.com/stuff/for/sale" will be automatically marked as spam.'),
    '#required' => TRUE,
    '#title' => t('Domain'),
    '#type' => 'textfield',
  );
  $form['options']['submit'] = array(
    '#type' => 'submit',
    '#value' => isset($filter) ? t('Update filter') : t('Add filter'),
  );
  return $form;
}

/**
 * Form API hook; validate the URL filter.
 */
function spam_url_filter_edit_validate($form_id, $form_values) {
  if (!isset($form_values['tid'])) {

    // there's no reason to allow duplicate filters
    $duplicate = db_fetch_object(db_query("SELECT token FROM {spam_tokens} WHERE token = 'URL*%s'", $form_values['url']));
    if ($duplicate->token) {
      form_set_error('domain', t('URL filter %filter already exists.', array(
        '%filter' => $form_values['url'],
      )));
    }
  }
}

/**
 * Form API hook; submit the URL filter.
 */
function spam_url_filter_edit_submit($form_id, $form_values) {
  if ($form_values['tid']) {
    db_query("UPDATE {spam_tokens} SET token = 'URL*%s' WHERE tid = %d", $form_values['url'], $form_values['tid']);
    drupal_set_message(t('URL filter %filter updated.', array(
      '%filter' => $form_values['url'],
    )));
  }
  else {
    db_query("INSERT INTO {spam_tokens} (token, probability) VALUES('URL*%s', %d)", $form_values['url'], 99);
    drupal_set_message(t('URL filter %filter added.', array(
      '%filter' => $form_values['url'],
    )));
  }
  return 'admin/settings/spam/url';
}

/**
 * Delete URL filter delete confirmation box.
 */
function spam_url_filter_delete($filter = NULL) {
  $form['tid'] = array(
    '#type' => 'hidden',
    '#value' => $filter->tid,
  );
  return confirm_form($form, t('Are you sure you want to delete the %filter filter?', array(
    '%filter' => str_replace('URL*', '', $filter->token),
  )), 'admin/settings/spam/url', t('This action can not be undone.'), t('Delete'), t('Cancel'));
}

/**
 * Removes custom URL from database.
 */
function spam_url_filter_delete_submit($form_id, $form_values) {
  $filter = spam_url_filter_load($form_values['tid']);
  db_query('DELETE FROM {spam_tokens} WHERE tid = %d', $filter->tid);
  drupal_set_message(t('URL filter %filter deleted.', array(
    '%filter' => str_replace('URL*', '', $filter->token),
  )));
  return 'admin/settings/spam/url';
}

/**
 * Load the spam url filter from the tokens table.
 *
 * @param $id
 *   The token tid which contains the URL filter.
 */
function spam_url_filter_load($id) {
  return db_fetch_object(db_query('SELECT * FROM {spam_tokens} WHERE tid = %d', $id));
}

/**
 * The default callback, performs all actions on spam/not spam content.
 *
 * @param $type        Type of content (ie 'comment', 'node'...)
 * @param $id          The content id.
 * @param $header      The header field of the content.
 * @param $body        The main text of the content.
 * @param $probability 1-99% chance of being spam.
 * @param $old         If updated content, information about pre-update.
 * @param $delete      Flag that if is set to 1 we should auto-delete content.
 */
function spam_default_actions($type, $id, $header, $body, $probability, $old, $action, $quiet = NULL) {
  static $counter;
  $is_spam = _is_spam($probability);

  // auto-delete
  if ($action[SPAM_CUSTOM_ACTION_DELETE] && $is_spam) {
    $function = "spam_delete_{$type}";
    $function($id);
    return;
  }

  // check if we've already filtered the content
  if ($old->id) {
    if ($is_spam == _is_spam($old->probability)) {

      // nothing has changed, we don't need to do anything
      return;
    }
  }

  // publish/unpublish the content
  if (variable_get('spam_unpublish', 1)) {
    if ($is_spam) {
      $function = "spam_unpublish_{$type}";
      $function($id);

      // notify user content was blocked
      if (variable_get('spam_notify_user', 1) && !$quiet) {
        if (!$counter++) {

          // only display this message once, even if we block multiple messages
          drupal_set_message(t('The @type you posted has been flagged as potential spam.  It will not be visible until the site administrator has a chance to review it.', array(
            '@type' => $type,
          )));
        }
      }
    }
    else {
      if ($old->id) {
        $function = "spam_publish_{$type}";
        $function($id);
      }
    }
  }

  // generate a mail message
  if (!($quiet || $action[SPAM_CUSTOM_ACTION_NOMAIL])) {
    if ($is_spam && variable_get('spam_notify_admin', 1)) {
      $subject = t('[@sitename] Detected spam @type', array(
        '@sitename' => variable_get('site_name', 'drupal'),
        '@type' => $type,
      ));
      if ($old->id) {

        // update
        spam_mail($subject . ' on update', spam_mail_body($type, $header, $body, $id));
      }
      else {

        // add
        spam_mail($subject, spam_mail_body($type, $header, $body, $id));
      }
    }
  }
}

/**
 * Wrapper to user_mail()
 *
 */
function spam_mail($subject, $message) {
  $admin = user_load(array(
    'uid' => 1,
  ));
  drupal_mail('spam_mail', $admin->mail, $subject, wordwrap($message, 72), variable_get('site_mail', ini_get('sendmail_from')));
}

/**
 * Generate an appropriate notification mail message
 */
function spam_mail_body($type, $header, $body, $id) {
  global $base_url;
  switch ($type) {
    case 'comment':
      return t("Hello @adminname.\n\n  A @type recently posted to your '@sitename' website has been automatically marked as spam.  The text of the spam @type is as follows:\n\n<spam>\nSubject: @title\n\n@body\n\n  Edit the @type here: @editurl\n  View all spam @types here: @allurl", array(
        '@adminname' => $admin->name,
        '@types' => $type . 's',
        '@type' => $type,
        '@sitename' => variable_get('site_name', 'drupal'),
        '@title' => $header,
        '@body' => $body,
        '@editurl' => $base_url . url("{$type}/edit/{$id}"),
        '@allurl' => $base_url . url("admin/content/{$type}/list/spam"),
      ));
    case 'node':
      return t("Hello @adminname.\n\n  A @type recently posted to your '@sitename' website has been automatically marked as spam.  The text of the spam @type is as follows:\n\n<spam>\nSubject: @title\n\n@body\n\n  Edit the @type here: @editurl\n  View all spam @types here: @allurl", array(
        '@adminname' => $admin->name,
        '@types' => $type . 's',
        '@type' => $type,
        '@sitename' => variable_get('site_name', 'drupal'),
        '@title' => $header,
        '@body' => $body,
        '@editurl' => $base_url . url("{$type}/{$id}/edit"),
        '@allurl' => $base_url . url("admin/content/{$type}/list/spam"),
      ));
    default:
      return t("Hello @adminname.\n\n  @type content recently posted to your '@sitename' website has been automatically marked as spam.  The text of the spam content is as follows:\n\n<spam>\nSubject: @title\n\n@body\n", array(
        '@adminname' => $admin->name,
        '@type' => $type,
        '@sitename' => variable_get('site_name', 'drupal'),
        '@title' => $header,
        '@body' => $body,
      ));
  }
}

/**
 * Load's all fields from the matching spam entry in the spam_tracker table.
 *
 * @param $source Module type such as 'comment', 'node', ...
 * @param @id     Unique id for content (cid, nid, ...)
 */
function spam_load($source, $id) {
  return db_fetch_object(db_query("SELECT * FROM {spam_tracker} WHERE source = '%s' AND id = %d", $source, $id));
}
function spam_get_probability($probability, $weight = 0) {
  $probability = $probability + $weight;
  if ($probability > 99) {
    $probability = 99;
  }
  else {
    if ($probability < 1) {
      $probability = 1;
    }
  }
  return $probability;
}

/**
 * Validation code for the spam_reported_comments_overview form/function
 */
function spam_reported_comments_overview_validate($form_id, $form_values) {
  $form_values['comments'] = array_diff($form_values['comments'], array(
    0,
  ));
  if (count($form_values['comments']) == 0) {
    form_set_error('', t('Please select one or more comments to perform the update on.'));
    drupal_goto('admin/content/comment/list/spam');
  }
}

/**
 * Submit code for the spam_reported_comments_overview form/function
 */
function spam_reported_comments_overview_submit($form_id, $form_values) {
  $operation = spam_reported_comment_operations($form_values['operation']);
  if ($operation) {
    foreach ($form_values['comments'] as $cid => $value) {
      if ($value) {
        $operation('comment', $cid);
      }
    }
    drupal_set_message(t('Comments have been updated.'));
  }
}

/**
 * Return the function to call dependant on the $action requested.
 */
function spam_reported_comment_operations($action = 'NOTHING') {
  switch ($action) {
    case 'IGNORE':
      $operation = 'spam_reported_ignore';
      break;
    case 'SPAM':
      $operation = 'spam_reported_spam';
      break;
    case 'DELETE':
      $operation = 'spam_reported_delete';
      break;
    default:
      $operation = 'spam_donothing_comment';

      // do nothing if we don't know what to do
      break;
  }
  return $operation;
}

/**
 * Display the form that allows the user to manage comment spam
 */
function spam_reported_comments_overview() {

  //drupal_set_html_head(spam_javascript());
  $operations = array(
    'IGNORE' => t('Ignore the selected comments'),
    'SPAM' => t('Mark the selected comments as spam'),
    'DELETE' => t('Delete the selected comments (no confirmation)'),
  );

  // build an 'Update options' form
  $form['options'] = array(
    '#type' => 'fieldset',
    '#title' => t('Update options'),
    '#prefix' => '<div class="container-inline">',
    '#suffix' => '</div>',
  );
  $form['options']['operation'] = array(
    '#type' => 'select',
    '#options' => $operations,
    '#description' => t(''),
  );
  $form['options']['submit'] = array(
    '#type' => 'submit',
    '#value' => t('Update'),
  );

  // create the select all button w/ select as a label

  //$selectButton = spam_select_all('spam_reported_comments_overview');
  $form['header'] = array(
    '#type' => 'value',
    '#value' => array(
      $selectButton,
      array(
        'data' => t('times reported'),
        'field' => 'count',
        'sort' => 'desc',
      ),
      array(
        'data' => t('last reported'),
        'field' => 's.timestamp',
      ),
      array(
        'data' => t('reported by'),
        'field' => 'r.name',
      ),
      array(
        'data' => t('subject'),
        'field' => 'subject',
      ),
      array(
        'data' => t('author'),
        'field' => 'u.name',
      ),
      array(
        'data' => t('hostname'),
        'field' => 'c.hostname',
      ),
      array(
        'data' => t('status'),
        'field' => 'status',
      ),
      array(
        'data' => t('operations'),
        'colspan' => 3,
      ),
    ),
  );
  $sql = "SELECT c.*, count(r.rid) as count, r.source, r.rid, r.id, r.uid as ruid, r.hostname, r.feedback, r.timestamp FROM {comments} c, {spam_reported} r WHERE r.source = 'comment' AND r.id = c.cid GROUP BY r.id";
  $sql .= tablesort_sql($form['header']['#value']);
  $result = pager_query($sql, variable_get('spam_display_quantity', 50));

  // build a table listing the appropriate comments
  while ($comment = db_fetch_object($result)) {
    $comments[$comment->cid] = '';
    $comment->name = $comment->uid ? $comment->registered_name : $comment->name;
    $form['times_reported'][$comment->cid] = array(
      '#value' => $comment->count,
    );
    $form['last_reported'][$comment->cid] = array(
      '#value' => format_date($comment->timestamp, 'small'),
    );
    $form['reported_by'][$comment->cid] = array(
      '#value' => theme('username', user_load(array(
        'uid' => $comment->ruid,
      ))),
    );
    $form['subject'][$comment->cid] = array(
      '#value' => l($comment->subject, "node/{$comment->nid}", array(
        'title' => htmlspecialchars(substr($comment->comment, 0, 128)),
      ), NULL, "comment-{$comment->cid}") . ' ' . (node_last_viewed($comment->nid) < $comment->timestamp ? theme('mark') : ''),
    );
    $form['author'][$comment->cid] = array(
      '#value' => theme('username', user_load(array(
        'uid' => $comment->uid,
      ))),
    );
    $form['hostname'][$comment->cid] = array(
      '#value' => $comment->hostname,
    );
    $form['status'][$comment->cid] = array(
      '#value' => $comment->status == 0 ? t('published') : t('not published'),
    );
    $form['operations'][$comment->cid] = array(
      '#value' => l(t('View log'), "admin/logs/spam/logs/comment/{$comment->cid}") . '&nbsp;' . l(t('Details'), "admin/content/spam/reported/comment/{$comment->cid}/details", array(
        'title' => htmlspecialchars(substr($comment->feedback, 0, 128)),
      )) . '&nbsp;' . l(t('Edit'), "comment/edit/{$comment->cid}"),
    );
  }
  $form['comments'] = array(
    '#type' => 'checkboxes',
    '#options' => $comments,
  );
  $form['pager'] = array(
    '#value' => theme('pager', NULL, variable_get('spam_display_quantity', 50), 0),
  );
  return $form;
}

/**
 * Form theme code for spam_reported_comments_overview code
 */
function theme_spam_reported_comments_overview($form) {
  $output = drupal_render($form['options']);
  if (isset($form['subject']) && is_array($form['subject'])) {
    foreach (element_children($form['subject']) as $key) {
      $row = array();
      $row[] = drupal_render($form['comments'][$key]);
      $row[] = drupal_render($form['times_reported'][$key]);
      $row[] = drupal_render($form['last_reported'][$key]);
      $row[] = drupal_render($form['reported_by'][$key]);
      $row[] = drupal_render($form['subject'][$key]);
      $row[] = drupal_render($form['author'][$key]);
      $row[] = drupal_render($form['hostname'][$key]);
      $row[] = drupal_render($form['status'][$key]);
      $row[] = drupal_render($form['operations'][$key]);
      $rows[] = $row;
    }
  }
  else {
    $rows[] = array(
      array(
        'data' => t('No comments reported.'),
        'colspan' => '99',
      ),
    );
  }
  $output .= theme('table', $form['header']['#value'], $rows);
  if ($form['pager']['#value']) {
    $output .= drupal_render($form['pager']);
  }
  $output .= drupal_render($form);
  return $output;
}

/**
 * Validation code for the spam_reported_nodes_overview form/function
 */
function spam_reported_nodes_overview_validate($form_id, $form_values) {
  $form_values['nodes'] = array_diff($form_values['nodes'], array(
    0,
  ));
  if (count($form_values['nodes']) == 0) {
    form_set_error('', t('Please select one or more nodes to perform the update on.'));
    drupal_goto('admin/content/node/list/reported');
  }
}

/**
 * Submit code for the spam_reported_nodes_overview form/function
 */
function spam_reported_nodes_overview_submit($form_id, $form_values) {
  $operation = spam_reported_nodes_operations($form_values['operation']);
  if ($operation) {
    foreach ($form_values['nodes'] as $cid => $value) {
      if ($value) {
        $operation('node', $cid);
      }
    }
    drupal_set_message(t('Nodes have been updated.'));
  }
}

/**
 * Return the function to call dependant on the $action requested.
 */
function spam_reported_nodes_operations($action = 'NOTHING') {
  switch ($action) {
    case 'IGNORE':
      $operation = 'spam_reported_ignore';
      break;
    case 'SPAM':
      $operation = 'spam_reported_spam';
      break;
    case 'DELETE':
      $operation = 'spam_reported_delete';
      break;
    default:
      $operation = 'spam_donothing_node';

      // do nothing if we don't know what to do
      break;
  }
  return $operation;
}
function spam_reported_nodes_overview() {

  //drupal_set_html_head(spam_javascript());
  $operations = array(
    'IGNORE' => t('Ignore the selected nodes'),
    'SPAM' => t('Mark the selected nodes as spam'),
    'DELETE' => t('Delete the selected nodes (no confirmation)'),
  );

  // build an 'Update options' form
  $form['options'] = array(
    '#type' => 'fieldset',
    '#title' => t('Update options'),
    '#prefix' => '<div class="container-inline">',
    '#suffix' => '</div>',
  );
  $form['options']['operation'] = array(
    '#type' => 'select',
    '#options' => $operations,
    '#description' => t(''),
  );
  $form['options']['submit'] = array(
    '#type' => 'submit',
    '#value' => t('Update'),
  );

  // create the select all button w/ select as a label

  //$selectButton = spam_select_all('spam_reported_nodes_overview');
  $form['header'] = array(
    '#type' => 'value',
    '#value' => array(
      $selectButton,
      array(
        'data' => t('times reported'),
        'field' => 'count',
        'sort' => 'desc',
      ),
      array(
        'data' => t('last reported'),
        'field' => 's.timestamp',
      ),
      array(
        'data' => t('reported by'),
        'field' => 'r.name',
      ),
      array(
        'data' => t('title'),
        'field' => 'title',
      ),
      array(
        'data' => t('type'),
        'field' => 'type',
      ),
      array(
        'data' => t('author'),
        'field' => 'u.name',
      ),
      array(
        'data' => t('hostname'),
        'field' => 'hostname',
      ),
      array(
        'data' => t('status'),
        'field' => 'status',
      ),
      array(
        'data' => t('timestamp'),
        'field' => 'n.changed',
        'sort' => 'desc',
      ),
      array(
        'data' => t('operations'),
        'colspan' => 3,
      ),
    ),
  );
  $sql = "SELECT n.*, count(r.rid) as count, r.source, r.rid, r.id, r.uid as ruid, r.hostname, r.feedback, r.timestamp FROM {node} n, {spam_reported} r WHERE r.source = 'node' AND r.id = n.nid GROUP BY r.id";
  $sql .= tablesort_sql($form['header']['#value']);
  $result = pager_query($sql, variable_get('spam_display_quantity', 50));

  // build a table listing the appropriate comments
  while ($node = db_fetch_object($result)) {
    $nodes[$node->nid] = '';
    $form['times_reported'][$node->nid] = array(
      '#value' => $node->count,
    );
    $form['last_reported'][$node->nid] = array(
      '#value' => format_date($node->timestamp, 'small'),
    );
    $form['reported_by'][$node->nid] = array(
      '#value' => theme('username', user_load(array(
        'uid' => $node->ruid,
      ))),
    );
    $form['title'][$node->nid] = array(
      '#value' => l($node->title, "node/{$node->nid}") . ' ' . (node_last_viewed($node->nid) < $node->changed ? theme_mark() : ''),
    );
    $form['type'][$node->nid] = array(
      '#value' => node_get_types('name', $node),
    );
    $form['author'][$node->nid] = array(
      '#value' => theme('username', user_load(array(
        'uid' => $node->uid,
      ))),
    );
    $form['hostname'][$node->nid] = array(
      '#value' => $node->hostname,
    );
    $form['status'][$node->nid] = array(
      '#value' => $node->status ? t('published') : t('not published'),
    );
    $form['timestamp'][$node->nid] = array(
      '#value' => format_date($node->changed, 'small'),
    );
    $form['operations'][$node->nid] = array(
      '#value' => l(t('View log'), "admin/logs/spam/logs/node/{$node->nid}") . '&nbsp;' . l(t('Details'), "admin/content/spam/reported/node/{$node->nid}/details", array(
        'title' => htmlspecialchars(substr($node->feedback, 0, 128)),
      )) . '&nbsp;' . l(t('Edit'), "node/{$node->nid}/edit"),
    );
  }
  $form['nodes'] = array(
    '#type' => 'checkboxes',
    '#options' => $nodes,
  );
  $form['pager'] = array(
    '#value' => theme('pager', NULL, variable_get('spam_display_quantity', 50), 0),
  );
  return $form;
}

/**
 * Form theme code for spam_reported_nodes_overview code
 */
function theme_spam_reported_nodes_overview($form) {
  $output = drupal_render($form['options']);
  if (isset($form['title']) && is_array($form['title'])) {
    foreach (element_children($form['title']) as $key) {
      $row = array();
      $row[] = drupal_render($form['nodes'][$key]);
      $row[] = drupal_render($form['times_reported'][$key]);
      $row[] = drupal_render($form['last_reported'][$key]);
      $row[] = drupal_render($form['reported_by'][$key]);
      $row[] = drupal_render($form['title'][$key]);
      $row[] = drupal_render($form['type'][$key]);
      $row[] = drupal_render($form['author'][$key]);
      $row[] = drupal_render($form['hostname'][$key]);
      $row[] = drupal_render($form['status'][$key]);
      $row[] = drupal_render($form['timestamp'][$key]);
      $row[] = drupal_render($form['operations'][$key]);
      $rows[] = $row;
    }
  }
  else {
    $rows[] = array(
      array(
        'data' => t('No nodes reported.'),
        'colspan' => '99',
      ),
    );
  }
  $output .= theme('table', $form['header']['#value'], $rows);
  if ($form['pager']['#value']) {
    $output .= drupal_render($form['pager']);
  }
  $output .= drupal_render($form);
  return $output;
}
function spam_reported_details() {

  /*  admin/content/spam/reported/source/id/details
   *  0     1       2    3        4      5  6
   */
  $source = arg(4);
  $id = arg(5);
  $output = '';
  if ($id) {
    $result = db_query("SELECT * FROM {spam_reported} WHERE source = '%s' AND id = %d ORDER BY timestamp DESC", $source, $id);
    while ($entry = db_fetch_object($result)) {
      if (!empty($output)) {
        $output .= ' <tr bgcolor="#888"><td colspan="2"></td></tr>';
      }
      else {
        $output .= '<table border="1" cellpadding="2" cellspacing="2">';
      }
      $output .= ' <tr><th>' . t('Type') . '</th><td>' . l(t($entry->source), "admin/logs/spam/logs/{$entry->source}") . '</td></tr>';
      $output .= ' <tr><th>' . t('@type ID', array(
        '@type' => ucfirst($entry->source),
      )) . '</th><td>' . l(t($entry->id), "admin/logs/spam/logs/{$entry->source}/{$entry->id}") . '</td></tr>';
      $output .= ' <tr><th>' . t('Date') . '</th><td>' . format_date($entry->timestamp, 'large') . '</td></tr>';
      $reported_user = user_load(array(
        'uid' => $entry->uid,
      ));
      $output .= ' <tr><th>' . t('User') . '</th><td>' . theme('username', $reported_user) . '</td></tr>';
      $output .= ' <tr><th>' . t('Feedback') . "</th><td>{$entry->feedback}</td></tr>";
      $output .= ' <tr><th>' . t('Hostname') . "</th><td>{$entry->hostname}</td></tr>";
    }
    $output .= '</table>';
  }
  return $output;
}

/**
 * Return the function to call dependant on the $action requested.
 */
function spam_comment_operations($action = 'NOTHING') {
  switch ($action) {
    case 'SPAM':
      $operation = 'spam_spam_comment';
      break;
    case 'NOT_SPAM':
      $operation = 'spam_notspam_comment';
      break;
    case 'UNPUBLISH':
      $operation = 'spam_unpublish_comment';
      break;
    case 'PUBLISH':
      $operation = 'spam_publish_comment';
      break;
    case 'DELETE':
      $operation = 'spam_delete_comment';
      break;
    default:
      $operation = 'spam_donothing_comment';

      // do nothing if we don't know what to do
      break;
  }
  return $operation;
}

/**
 * Display the form that allows the user to edit comment spam.
 */
function spam_comment_overview() {
  $form['options'] = array(
    '#prefix' => '<div class="container-inline">',
    '#suffix' => '</div>',
    '#title' => t('Update options'),
    '#type' => 'fieldset',
  );
  $form['options']['operation'] = array(
    '#options' => array(
      'NOT_SPAM' => t('Mark the selected comments as not spam'),
      'UNPUBLISH' => t('Unpublish the selected comments'),
      'PUBLISH' => t('Publish the selected comments'),
      'DELETE' => t('Delete the selected comments (no confirmation)'),
    ),
    '#type' => 'select',
  );
  $form['options']['submit'] = array(
    '#type' => 'submit',
    '#value' => t('Update'),
  );
  $form['header'] = array(
    '#type' => 'value',
    '#value' => array(
      theme('table_select_header_cell'),
      array(
        'data' => t('subject'),
        'field' => 'subject',
      ),
      array(
        'data' => t('author'),
        'field' => 'u.name',
      ),
      array(
        'data' => t('hostname'),
        'field' => 'c.hostname',
      ),
      array(
        'data' => t('status'),
        'field' => 'status',
      ),
      array(
        'data' => t('timestamp'),
        'field' => 'c.timestamp',
        'sort' => 'desc',
      ),
      array(
        'data' => t('operations'),
      ),
    ),
  );
  $sql = "SELECT c.*, s.id, s.probability FROM {spam_tracker} s, {comments} c WHERE s.source = 'comment' AND s.id = c.cid AND s.probability >= " . variable_get('spam_threshold', 80);
  $sql .= tablesort_sql($form['header']['#value']);
  $result = pager_query($sql, variable_get('spam_display_quantity', 50));

  // build a table listing the appropriate comments
  while ($comment = db_fetch_object($result)) {
    $comments[$comment->cid] = '';
    $comment->name = $comment->uid ? $comment->registered_name : $comment->name;
    $form['subject'][$comment->cid] = array(
      '#value' => l($comment->subject, 'node/' . $comment->nid, array(
        'title' => truncate_utf8($comment->comment, 128),
      ), NULL, 'comment-' . $comment->cid),
    );
    $form['username'][$comment->cid] = array(
      '#value' => theme('username', $comment),
    );
    $form['hostname'][$comment->cid] = array(
      '#value' => $comment->hostname,
    );
    $form['status'][$comment->cid] = array(
      '#value' => $comment->status == 0 ? t('published') : t('not published'),
    );
    $form['timestamp'][$comment->cid] = array(
      '#value' => format_date($comment->timestamp, 'small'),
    );
    $form['operations'][$comment->cid] = array(
      '#value' => l(t('edit'), "comment/edit/{$comment->cid}", array(), 'destination=admin/content/comment/') . '&nbsp;' . l(t('details'), "admin/logs/spam/logs/comment/{$comment->cid}"),
    );
  }
  $form['comments'] = array(
    '#type' => 'checkboxes',
    '#options' => $comments,
  );
  $form['pager'] = array(
    '#value' => theme('pager', NULL, 50, 0),
  );
  return $form;
}

/**
 * Display a list of all IPs that have been blocked by the spam module.
 */
function spam_blocked_ips_overview() {
  drupal_set_title(t('Spam Module Blocked IPs'));
  $header = array(
    array(
      'data' => t('IP Address'),
      'field' => 'hostname',
    ),
    array(
      'data' => t('Last Seen'),
      'field' => 'timestamp',
      'sort' => 'desc',
    ),
    array(
      'data' => t('Counter'),
      'field' => 'count',
    ),
  );

  // This SQL is *nasty*, so if you think you can do better, please be my guest!
  // This unfortunately has to be SQL because the pager module can't be told
  // how many rows we've got (so we can't do our own processing in PHP and
  // still get paging to work properly).
  $sql = "SELECT * FROM (SELECT DISTINCT x.hostname, x.timestamp, COUNT(x.hostname) AS count FROM (SELECT timestamp,hostname FROM {spam_tracker} WHERE probability>%d ORDER BY timestamp DESC) AS x GROUP BY x.hostname) AS y WHERE y.count>=%d";
  $arguments = array(
    variable_get('spam_threshold', 80),
    variable_get('spam_blacklist_ip', 2),
  );
  $count_sql = preg_replace('/^SELECT \\* /', 'SELECT count(hostname) ', $sql);
  $result = pager_query($sql . tablesort_sql($header), 50, 0, $count_sql, $arguments);
  while ($log = db_fetch_object($result)) {
    $rows[] = array(
      'data' => array(
        $log->hostname,
        format_date($log->timestamp, 'small'),
        $log->count,
      ),
    );
  }
  if (!$rows) {
    $rows[] = array(
      array(
        'data' => t('No log messages available.'),
        'colspan' => 6,
      ),
    );
  }
  return theme('table', $header, $rows) . theme('pager', NULL, 50, 0);
}

/**
 * Form API hook; validate spam_comment_overview() form.
 */
function spam_comment_overview_validate($form_id, $form_values) {
  $form_values['comments'] = array_diff($form_values['comments'], array(
    0,
  ));
  if (count($form_values['comments']) == 0) {
    form_set_error(NULL, t('Please select one or more comments to perform the update on.'));
    return 'admin/content/comment/list/spam';
  }
}

/**
 * Form API hook; submit spam_comment_overview() form.
 */
function spam_comment_overview_submit($form_id, $form_values) {
  $operation = spam_comment_operations($form_values['operation']);
  if ($operation) {
    foreach ($form_values['comments'] as $cid => $value) {
      if ($value) {
        $operation($cid);
      }
    }
    drupal_set_message(t('Comments have been updated.'));
  }
  return 'admin/content/comment/list/spam';
}

/**
 * Form theme code for spam_comment_overview().
 */
function theme_spam_comment_overview($form) {
  $output = drupal_render($form['options']);
  if (isset($form['subject']) && is_array($form['subject'])) {
    foreach (element_children($form['subject']) as $key) {
      $row = array();
      $row[] = drupal_render($form['comments'][$key]);
      $row[] = drupal_render($form['subject'][$key]);
      $row[] = drupal_render($form['username'][$key]);
      $row[] = drupal_render($form['hostname'][$key]);
      $row[] = drupal_render($form['status'][$key]);
      $row[] = drupal_render($form['timestamp'][$key]);
      $row[] = drupal_render($form['operations'][$key]);
      $rows[] = $row;
    }
  }
  else {
    $rows[] = array(
      array(
        'data' => t('No comments available.'),
        'colspan' => '7',
      ),
    );
  }
  $output .= theme('table', $form['header']['#value'], $rows);
  if ($form['pager']['#value']) {
    $output .= drupal_render($form['pager']);
  }
  $output .= drupal_render($form);
  return $output;
}

/**
 * Validation code for the spam_node_overview form/function
 */
function spam_node_overview_validate($form_id, $form_values) {
  $form_values['nodes'] = array_diff($form_values['nodes'], array(
    0,
  ));
  if (count($form_values['nodes']) == 0) {
    form_set_error('', t('Please select one or more nodes to perform the update on.'));
    drupal_goto('admin/content/node/list/spam');
  }
}

/**
 * Submit code for the spam_node_overview form/function
 */
function spam_node_overview_submit($form_id, $form_values) {
  $operation = spam_node_operations_list($form_values['operation']);
  if ($operation) {
    foreach ($form_values['nodes'] as $nid => $value) {
      if ($value) {
        $operation($nid);
      }
    }
    drupal_set_message(t('Nodes have been updated.'));
  }
}

/**
 * Return the function to call dependant on the $action requested.
 */
function spam_node_operations_list($action = 'NOTHING') {
  switch ($action) {
    case 'NOT_SPAM':
      $operation = 'spam_notspam_node';
      break;
    case 'UNPUBLISH':
      $operation = 'spam_unpublish_node';
      break;
    case 'PUBLISH':
      $operation = 'spam_publish_node';
      break;
    case 'DELETE':
      $operation = 'spam_delete_node';
      break;
    default:
      $operation = 'spam_donothing_node';

      // do nothing if we don't know what to do
      break;
  }
  return $operation;
}

/**
 * Form theme code for spam_node_overview
 */
function theme_spam_node_overview($form) {
  $output = drupal_render($form['options']);
  if (isset($form['title']) && is_array($form['title'])) {
    foreach (element_children($form['title']) as $key) {
      $row = array();
      $row[] = drupal_render($form['nodes'][$key]);
      $row[] = drupal_render($form['title'][$key]);
      $row[] = drupal_render($form['type'][$key]);
      $row[] = drupal_render($form['author'][$key]);
      $row[] = drupal_render($form['hostname'][$key]);
      $row[] = drupal_render($form['status'][$key]);
      $row[] = drupal_render($form['timestamp'][$key]);
      $row[] = drupal_render($form['operations'][$key]);
      $rows[] = $row;
    }
  }
  else {
    $rows[] = array(
      array(
        'data' => t('No nodes available.'),
        'colspan' => '99',
      ),
    );
  }
  $output .= theme('table', $form['header']['#value'], $rows);
  if ($form['pager']['#value']) {
    $output .= drupal_render($form['pager']);
  }
  $output .= drupal_render($form);
  return $output;
}

/**
 * Display the form that allows the user to edit comment spam
 */
function spam_node_overview() {

  //drupal_set_html_head(spam_javascript());
  $operations = array(
    'NOT_SPAM' => t('Mark the selected posts as not spam'),
    'UNPUBLISH' => t('Unpublish the selected posts'),
    'PUBLISH' => t('Publish the selected posts'),
    'DELETE' => t('Delete the selected posts (no confirmation)'),
  );

  // build an 'Update options' form
  $form['options'] = array(
    '#type' => 'fieldset',
    '#title' => t('Update options'),
    '#prefix' => '<div class="container-inline">',
    '#suffix' => '</div>',
  );
  $form['options']['operation'] = array(
    '#type' => 'select',
    '#options' => $operations,
    '#description' => t(''),
  );
  $form['options']['submit'] = array(
    '#type' => 'submit',
    '#value' => t('Update'),
  );

  // create the select all button w/ select as a label

  //$selectButton = spam_select_all('spam_node_overview');
  $form['header'] = array(
    '#type' => 'value',
    '#value' => array(
      $selectButton,
      array(
        'data' => t('title'),
        'field' => 'n.title',
      ),
      array(
        'data' => t('type'),
        'field' => 'n.type',
      ),
      array(
        'data' => t('author'),
        'field' => 'u.name',
      ),
      array(
        'data' => t('hostname'),
        'field' => 's.hostname',
      ),
      array(
        'data' => t('status'),
        'field' => 'n.status',
      ),
      array(
        'data' => t('timestamp'),
        'field' => 'n.changed',
        'sort' => 'desc',
      ),
      array(
        'data' => t('operations'),
      ),
    ),
  );
  $sql = "SELECT n.*, u.name, u.uid, s.id, s.probability, s.hostname FROM {spam_tracker} s, {node} n, {users} u WHERE s.source = 'node' AND n.uid = u.uid AND s.id = n.nid AND s.probability >= " . variable_get('spam_threshold', 80);
  $sql .= tablesort_sql($form['header']['#value']);
  $result = pager_query($sql, variable_get('spam_display_quantity', 50));

  // build a table listing the appropriate nodes
  while ($node = db_fetch_object($result)) {
    $nodes[$node->nid] = '';
    $form['title'][$node->nid] = array(
      '#value' => l($node->title, "node/{$node->nid}") . ' ' . (node_last_viewed($node->nid) < $node->changed ? theme_mark() : ''),
    );
    $form['type'][$node->nid] = array(
      '#value' => node_get_types('name', $node),
    );
    $form['author'][$node->nid] = array(
      '#value' => theme('username', $node),
    );
    $form['hostname'][$node->nid] = array(
      '#value' => $node->hostname,
    );
    $form['status'][$node->nid] = array(
      '#value' => $node->status ? t('published') : t('not published'),
    );
    $form['timestamp'][$node->nid] = array(
      '#value' => format_date($node->changed, 'small'),
    );
    $form['operations'][$node->nid] = array(
      '#value' => l(t('View log'), "admin/logs/spam/logs/node/{$node->nid}") . '&nbsp;' . l(t('Edit'), "node/{$node->nid}/edit") . '&nbsp;' . l(t('Delete'), "admin/content/node/delete/{$node->nid}"),
    );
  }
  $form['nodes'] = array(
    '#type' => 'checkboxes',
    '#options' => $nodes,
  );
  $form['pager'] = array(
    '#value' => theme('pager', NULL, 50, 0),
  );
  return $form;
}

/** comment support functions **/
function spam_load_comment($cid) {
  $comment = db_fetch_object(db_query('SELECT cid,nid,subject,comment,uid FROM {comments} WHERE cid = %d', $cid));
  if ($comment && $comment->cid != 0) {
    return $comment;
  }
  else {
    return NULL;
  }
}
function spam_validate_comment($cid) {
  $comment = db_fetch_object(db_query('SELECT nid FROM {comments} WHERE cid = %d', $cid));

  // new comments must be attached to nodes
  if ($comment->nid == 0) {
    spam_log(SPAM_LOG, t('spam_validate_comment: comment not attached to a node.'), 'comment', $cid);
    return FALSE;
  }
  $node = db_fetch_object(db_query('SELECT nid, status FROM {node} WHERE nid = %d', $comment->nid));
  if ($node && $node->nid) {

    // new comments must be attached to published nodes
    if ($node->status != 1) {
      spam_log(SPAM_LOG, t('spam_validate_comment: new comment attached to an unpublished node.'), 'comment', $cid);
      return FALSE;
    }
  }
  else {
    spam_log(SPAM_LOG, t('spam_validate_comment: new comment attached to a non-existing node.'), 'comment', $cid);
    return FALSE;
  }
  spam_log(SPAM_DEBUG, t('spam_validate_comment: new comment attached to existing and published node.'), 'comment', $cid);
  return TRUE;
}
function spam_reported_ignore($source, $id) {

  // ignore, just remove from spam_reported table
  db_query("DELETE FROM {spam_reported} WHERE source = '%s' AND id = %d", $source, $id);
}
function spam_reported_spam($source, $id) {

  // marking as spam, first remove from spam_reported table
  db_query("DELETE FROM {spam_reported} WHERE source = '%s' AND id = %d", $source, $id);
  $function = "spam_spam_{$source}";
  $function($id);
}
function spam_reported_delete($source, $id) {

  // deleting, first remove from spam_reported table
  db_query("DELETE FROM {spam_reported} WHERE source = '%s' AND id = %d", $source, $id);
  $function = "spam_delete_{$source}";
  $function($id);
}

/**
 * Publish a comment.
 */
function spam_publish_comment($cid) {
  if ($comment = spam_load_comment($cid)) {
    db_query('UPDATE {comments} SET status = 0 WHERE cid = %d', $comment->cid);
    _comment_update_node_statistics($comment->nid);
    cache_clear_all();
    watchdog('spam', t('Spam: published comment "%subject".', array(
      '%subject' => $comment->subject,
    )));
    spam_log(SPAM_LOG, t('spam_publish_comment: published comment "%subject".', array(
      '%subject' => $comment->subject,
    )), 'comment', $comment->cid);
  }
}
function spam_unpublish_comment($cid) {
  if ($comment = spam_load_comment($cid)) {
    db_query('UPDATE {comments} SET status = 1 WHERE cid = %d', $comment->cid);
    _comment_update_node_statistics($comment->nid);
    cache_clear_all();
    watchdog('spam', t('Spam: unpublished comment "%subject".', array(
      '%subject' => $comment->subject,
    )));
    spam_log(SPAM_LOG, t('spam_unpublish_comment: unpublished comment "%subject".', array(
      '%subject' => $comment->subject,
    )), 'comment', $comment->cid);
  }
}

/**
 * Delete the comment and all replies
 */
function spam_delete_comment($cid) {
  global $user;
  $save_user = $user;
  if ($cid == 0) {
    return;
  }
  if ($comment = spam_load_comment($cid)) {
    $user = user_load(array(
      'uid' => 1,
    ));
    _comment_delete_thread($comment);
    _comment_update_node_statistics($comment->nid);
    $user = $save_user;
    cache_clear_all();
    watchdog('spam', t('Spam: deleted comment "%subject" and all replies.', array(
      '%subject' => $comment->subject,
    )));
    spam_log(SPAM_LOG, t('spam_delete_comment: deleted comment "%subject" and all replies.', array(
      '%subject' => $comment->subject,
    )), 'comment', $comment->cid);
  }
}

/**
 * Mark the comment as spam. This may cause the comment to become unpublished depending on settings
 */
function spam_spam_comment($cid) {
  if ($comment = spam_load_comment($cid)) {
    $tokens = spam_tokenize($comment->subject, 'header*');
    $tokens = array_merge($tokens, spam_tokenize("{$comment->comment}  {$comment->name} {$comment->mail} {$comment->homepage}"));
    spam_tokens_unsave($tokens, 1);
    spam_tokens_save($tokens, 1);
    db_query("UPDATE {spam_tracker} SET probability = %d, timestamp = %d WHERE source = 'comment' AND id = %d", 99, time(), $cid);
    if (!db_affected_rows()) {
      db_query("INSERT INTO {spam_tracker} (id, source, probability, hostname, hash, timestamp) VALUES(%d, '%s', %d, '%s', '%s', %d)", $cid, 'comment', 99, $_SERVER['REMOTE_ADDR'], md5($comment->subject . $comment->comment), time());
    }
    spam_default_actions('comment', $cid, $comment->subject, $comment->comment, 99, NULL, FALSE, 1);
    spam_log(SPAM_LOG, t('spam_page: comment manually marked as spam'), 'comment', $comment->cid);
  }
}

/**
 * Mark the comment as not spam.  This may cause the comment to become published depending on settings
 */
function spam_notspam_comment($cid) {
  if ($comment = spam_load_comment($cid)) {
    $tokens = spam_tokenize($comment->subject, 'header*');
    $tokens = array_merge($tokens, spam_tokenize("{$comment->comment} {$comment->name} {$comment->mail} {$comment->homepage}"));
    spam_tokens_unsave($tokens, 0);
    spam_tokens_save($tokens, 0);
    db_query("UPDATE {spam_tracker} SET probability = %d, timestamp = %d WHERE source = 'comment' AND id = %d", 1, time(), $cid);
    spam_default_actions('comment', $cid, $comment->subject, $comment->comment, 1, NULL, FALSE);
    spam_log(SPAM_LOG, t('spam_page: comment manually marked as not spam'), 'comment', $comment->cid);
  }
}

/**
 * Take no action if for some reason the command cannot be interpreted
 */
function spam_donothing_comment($cid) {
  spam_log(SPAM_LOG, t('spam_page: invalid comment operation selection made'), 'comment', $cid);
}

/** end of comment support functions **/

/** node support functions **/

/**
 * Force a node to be published
 */
function spam_publish_node($nid) {
  if ($node = node_load(array(
    'nid' => $nid,
  ))) {
    db_query('UPDATE {node} SET status = 1 WHERE nid = %d', $node->nid);
    cache_clear_all();
    watchdog('spam', t('Spam: published @type "%subject".', array(
      '@type' => node_get_types('name', $node),
      '%subject' => $node->title,
    )));
    spam_log(SPAM_LOG, t('spam_publish_node: published node "%title".', array(
      '%title' => $node->title,
    )), 'node', $node->nid);
  }
}

/**
 * Force a node to be unpublished
 */
function spam_unpublish_node($nid) {
  if ($node = node_load(array(
    'nid' => $nid,
  ))) {
    db_query('UPDATE {node} SET status = 0 WHERE nid = %d', $node->nid);
    cache_clear_all();
    watchdog('spam', t('Spam: unpublished @type "%subject".', array(
      '@type' => node_get_types('name', $node),
      '%subject' => $node->title,
    )));
    spam_log(SPAM_LOG, t('spam_unpublish_node: unpublished node "%title".', array(
      '%title' => $node->title,
    )), 'node', $node->nid);
  }
}

/**
 * Delete the node
 */
function spam_delete_node($nid) {
  global $user;
  $save_user = $user;
  if ($nid == 0) {
    return;
  }
  $node = node_load($nid);
  if ($node && $node->nid != 0) {
    $node->confirm = 1;
    $user = user_load(array(
      'uid' => 1,
    ));
    node_delete($nid);
    $user = $save_user;
    watchdog('spam', t('Spam: deleted @type "%subject".', array(
      '@type' => node_get_types('name', $node),
      '%subject' => $node->title,
    )));
    spam_log(SPAM_LOG, t('spam_delete_node: deleted node "%title".', array(
      '%title' => $node->title,
    )), 'node', $node->nid);
  }
}

/**
 * Force a node to be marked as spam. May unpublish depending on settings
 */
function spam_spam_node($nid) {
  if ($node = node_load(array(
    'nid' => $nid,
  ))) {
    $tokens = spam_tokenize($node->title, 'header*');
    $tokens = array_merge($tokens, spam_tokenize($node->body));
    spam_tokens_save($tokens, 1);
    spam_tokens_unsave($tokens, 1);
    db_query("UPDATE {spam_tracker} SET probability = %d, timestamp = %d WHERE source = 'node' AND id = %d", 99, time(), $nid);
    if (!db_affected_rows()) {
      db_query("INSERT INTO {spam_tracker} (id, source, probability, hostname, hash, timestamp) VALUES(%d, '%s', %d, '%s', '%s', %d)", $nid, 'node', 99, $_SERVER['REMOTE_ADDR'], md5($node->title . $node->body), time());
    }
    spam_default_actions('node', $nid, $node->title, $node->body, 99, NULL, FALSE, 1);
    spam_log(SPAM_LOG, t('spam_page: node manually marked as spam'), 'node', $node->nid);
  }
}

/**
 * Force a node to be marked as not spam. May not publish depending on settings
 */
function spam_notspam_node($nid) {
  if ($node = node_load(array(
    'nid' => $nid,
  ))) {
    $tokens = spam_tokenize($node->title, 'header*');
    $tokens = array_merge($tokens, spam_tokenize($node->body));
    spam_tokens_unsave($tokens, 0);
    spam_tokens_save($tokens, 0);
    db_query("UPDATE {spam_tracker} SET probability = %d, timestamp = %d WHERE source = 'node' AND id = %d", 1, time(), $nid);
    spam_default_actions('node', $nid, $node->title, $node->body, 1, NULL, FALSE);
    spam_log(SPAM_LOG, t('spam_page: node manually marked as not spam'), 'node', $node->nid);
  }
}

/**
 * Take no action if for some reason the command cannot be interpreted
 */
function spam_donothing_node($nid) {
  spam_log(SPAM_LOG, t('spam_page: invalid node operation selection made'), 'node', $nid);
}

Functions

Namesort descending Description
spam_admin_settings Provides configuration interface for module.
spam_admin_settings_actions
spam_admin_settings_advanced
spam_admin_settings_filter
spam_admin_settings_limits
spam_bayesian_filter Simple Bayesian logic to determine the probability that the passed in array of tokens is spam.
spam_blocked_ips_overview Display a list of all IPs that have been blocked by the spam module.
spam_comment Drupal _comment hook. Passes new comments to the spam filter.
spam_comment_operations Return the function to call dependant on the $action requested.
spam_comment_overview Display the form that allows the user to edit comment spam.
spam_comment_overview_submit Form API hook; submit spam_comment_overview() form.
spam_comment_overview_validate Form API hook; validate spam_comment_overview() form.
spam_content_filter Determine whether or not provided text is spam.
spam_cron Drupal _cron hook. Provides ability to automatically expired spam content.
spam_custom_filter
spam_custom_filter_delete Display custom filter delete confirmation box.
spam_custom_filter_delete_submit Removes custom filter from database.
spam_custom_filter_edit Display form for adding/editing custom filters.
spam_custom_filter_edit_submit Form API hook; submit the custom filter.
spam_custom_filter_edit_validate Form API hook; validate the custom filter.
spam_custom_filter_load Loads a custom filter.
spam_custom_filter_overview Manage custom spam filters.
spam_custom_filter_scan Logic to apply custom filter to existing site content. (At this time, only comment scanning is supported).
spam_custom_filter_scan_submit Form API hook; submit the custom filter scanner.
spam_custom_filter_scan_validate Form API hook; validate the custom filter scanner.
spam_default_actions The default callback, performs all actions on spam/not spam content.
spam_delete_comment Delete the comment and all replies
spam_delete_node Delete the node
spam_donothing_comment Take no action if for some reason the command cannot be interpreted
spam_donothing_node Take no action if for some reason the command cannot be interpreted
spam_duplicate_filter Search the spam_tracker table to see if this new content is a duplicate of earlier content. If it is a duplicate, see if the content has been duplicated more than the configured number of allowable times.
spam_get_probability
spam_help Drupal _help hook. Provides help and informational text about the spam module.
spam_invoke_hook Called by other spam module functions to invoke optional _spam hooks in external modules.
spam_ip_filter If blacklist is enabled, check to see if this is a known spammer IP. If it is, make them wait a while then redirect them to the main page with an indication that they're currently blacklisted.
spam_link Drupal _link hook. Adds themable spam related links to content if enabled.
spam_load Load's all fields from the matching spam entry in the spam_tracker table.
spam_load_comment comment support functions *
spam_log
spam_logs_entry Displays complete information about a single log entry ID.
spam_logs_overview Displays an overview of the latest spam log entries.
spam_mail Wrapper to user_mail()
spam_mail_body Generate an appropriate notification mail message
spam_menu Implementation of hook_menu().
spam_nodeapi Drupal _nodeapi hook. Passes new node content through the spam filter.
spam_node_operations_list Return the function to call dependant on the $action requested.
spam_node_overview Display the form that allows the user to edit comment spam
spam_node_overview_submit Submit code for the spam_node_overview form/function
spam_node_overview_validate Validation code for the spam_node_overview form/function
spam_notspam_comment Mark the comment as not spam. This may cause the comment to become published depending on settings
spam_notspam_node Force a node to be marked as not spam. May not publish depending on settings
spam_page Drupal _page hook. Provides various spam actions based on the URL that is currently being accessed.
spam_perm Drupal _perm hook. Establishes permissions used in this module.
spam_publish_comment Publish a comment.
spam_publish_node Force a node to be published
spam_reported_comments_overview Display the form that allows the user to manage comment spam
spam_reported_comments_overview_submit Submit code for the spam_reported_comments_overview form/function
spam_reported_comments_overview_validate Validation code for the spam_reported_comments_overview form/function
spam_reported_comment_operations Return the function to call dependant on the $action requested.
spam_reported_delete
spam_reported_details
spam_reported_ignore
spam_reported_nodes_operations Return the function to call dependant on the $action requested.
spam_reported_nodes_overview
spam_reported_nodes_overview_submit Submit code for the spam_reported_nodes_overview form/function
spam_reported_nodes_overview_validate Validation code for the spam_reported_nodes_overview form/function
spam_reported_spam
spam_spam
spam_spam_comment Mark the comment as spam. This may cause the comment to become unpublished depending on settings
spam_spam_node Force a node to be marked as spam. May unpublish depending on settings
spam_tokenize Divide a string into tokens.
spam_tokens_save
spam_tokens_unsave
spam_unpublish_comment
spam_unpublish_node Force a node to be unpublished
spam_urls_count Keep track of the total number of URLs found in the current content.
spam_url_filter
spam_url_filter_delete Delete URL filter delete confirmation box.
spam_url_filter_delete_submit Removes custom URL from database.
spam_url_filter_edit Display form for adding/editing URL filters.
spam_url_filter_edit_submit Form API hook; submit the URL filter.
spam_url_filter_edit_validate Form API hook; validate the URL filter.
spam_url_filter_load Load the spam url filter from the tokens table.
spam_url_filter_overview Manage URL spam filters.
spam_url_limit Check if content has two many total urls, or if the same base url is repeated too many times.
spam_user_report Display the form that allows the user to edit comment spam
spam_user_report_submit Submit code for the spam_user_report form/function
spam_user_report_validate Validation code for the spam_user_report form/function
spam_validate_comment
theme_spam_comment_overview Form theme code for spam_comment_overview().
theme_spam_custom_filter_scan Form theme code for spam_custom_filter_scan().
theme_spam_link Display spam status (if enabled), and provide adminsitrators with links to mark content as spam or not spam.
theme_spam_node_overview Form theme code for spam_node_overview
theme_spam_reported_comments_overview Form theme code for spam_reported_comments_overview code
theme_spam_reported_nodes_overview Form theme code for spam_reported_nodes_overview code
_is_spam Compare the passed in probability to the configured spam threshold.

Constants