You are here

duplicate.module in Spam 5.3

File

filters/duplicate/duplicate.module
View source
<?php

/**
 * Duplicate filter module
 * Copyright(c) 2007-2008
 *  Jeremy Andrews <jeremy@tag1consulting.com>.  All rights reserved.
 *
 * Detects spam by looking for duplication of content, or posting IP.
 */
define('DUPLICATE_BLACKLIST_SILENT', 0);
define('DUPLICATE_BLACKLIST_NOTIFY', 1);
define('DUPLICATE_BLACKLIST_BLOCK', 2);
define('DUPLICATE_DEFAULT_THRESHOLD', 2);
define('DUPLICATE_DEFAULT_BLACKLIST', 3);
define('DUPLICATE_NOT_SPAM', 0);
define('DUPLICATE_SPAM', 1);

/**
 * Drupal _menu() hook.
 */
function duplicate_menu($may_cache) {
  $items = array();
  if ($may_cache) {
    $items[] = array(
      'path' => 'admin/settings/spam/filters/duplicate',
      'title' => t('Duplicate'),
      'callback' => 'drupal_get_form',
      'callback arguments' => array(
        'duplicate_admin_settings',
      ),
      'description' => t('Configure the spam duplicate filter.'),
      'type' => MENU_LOCAL_TASK,
    );
    $items[] = array(
      'path' => 'duplicate/denied/ip',
      'callback' => 'duplicate_denied_ip',
      'type' => MENU_CALLBACK,
      'access' => TRUE,
    );
    $items[] = array(
      'path' => 'duplicate/denied/post',
      'callback' => 'duplicate_denied_post',
      'type' => MENU_CALLBACK,
      'access' => TRUE,
    );
    $items[] = array(
      'path' => 'admin/logs/spam/blocked_ip',
      'title' => t('Blocked IPs'),
      'access' => user_access('administer spam'),
      'callback' => 'spam_logs_blocked_ip',
      'type' => MENU_LOCAL_TASK,
    );
  }
  return $items;
}

/**
 * The arg() function may not be availble early in the bootstrap process,
 * so we reimplement it here.
 */
function _duplicate_arg() {
  static $arguments, $q;
  if (empty($arguments) || $q != $_GET['q']) {
    $arguments = explode('/', $_GET['q']);
    $q = $_GET['q'];
  }
  if (isset($arguments[$index])) {
    return $arguments[$index];
  }
}

/**
 * If IP blacklisting and IP blocking are both enabled, perform a database
 * query on each page load to see if the current visitor has been blacklisted.
 */
function duplicate_init() {

  // Allow notification to blacklisted IP, if enabled.
  if (_duplicate_arg(0) == 'duplicate' && _duplicate_arg(1) == 'denied' && _duplicate_arg(2) == 'ip') {
    return;
  }

  // Only perform database queries if functionality is enabled.
  if (variable_get('duplicate_blacklist_action', DUPLICATE_BLACKLIST_NOTIFY) == DUPLICATE_BLACKLIST_BLOCK && variable_get('duplicate_blacklist', DUPLICATE_DEFAULT_BLACKLIST) > -1) {

    // Blacklisting and IP blocking enabled.
    $duplicate_ip = (int) db_query("SELECT COUNT(iid) FROM {spam_duplicate} WHERE hostname = '%s' AND spam = %d", $_SERVER['REMOTE_ADDR'], DUPLICATE_SPAM);
    if ($duplicate_ip >= variable_get('duplicate_blacklist', DUPLICATE_DEFAULT_BLACKLIST)) {
      if (user_access('bypass filters')) {
        spam_log(SPAM_DEBUG, 'duplicate_init', t('Found !count spam for IP !ip, ignoring because user !user (uid !uid) is configured to bypass filters', array(
          '!count' => $duplicate_ip,
          '!ip' => $_SERVER['REMOTE_ADDR'],
          '!user' => $user->name,
          'uid' => $user->uid,
        )), $type, $id);
        return;
      }
      drupal_goto("duplicate/denied/ip");
    }
  }
}

/**
 * Spam API Hook
 */
function duplicate_spamapi($op, $type = NULL, $content = array(), $fields = array(), $extra = NULL) {
  switch ($op) {
    case 'filter':
      if (!module_invoke('spam', 'filter_enabled', 'duplicate', $type, $content, $fields, $extra)) {
        return;
      }
      return duplicate_spam_filter($content, $type, $fields, $extra);
    case 'filter_module':
      return 'duplicate';
    case 'insert':
      if (!module_invoke('spam', 'filter_enabled', 'duplicate', $type, $content, $fields, $extra)) {
        return;
      }
      if (is_array($extra) && $extra['sid'] && $extra['host'] && !empty($content) && !empty($fields)) {
        $hash = _duplicate_content_hash($content, $fields);
        db_query("INSERT INTO {spam_duplicate} (sid, content_hash, hostname, timestamp) VALUES(%d, '%s', '%s', %d)", $extra['sid'], $hash, $extra['host'], time());
        $action = _duplicate_action();
        if (is_array($action) && !empty($action)) {
          if (isset($action['redirect'])) {
            drupal_goto($action['redirect']);
          }
        }
      }
      break;
    case 'update':
      if (!module_invoke('spam', 'filter_enabled', 'duplicate', $type, $content, $fields, $extra)) {
        return;
      }
      if (is_array($extra) && $extra['sid'] && $extra['host'] && !empty($content) && !empty($fields)) {
        $hash = _duplicate_content_hash($content, $fields);
        db_query("UPDATE {spam_duplicate} SET content_hash = '%s', hostname = '%s', timestamp = %d WHERE sid = %d", $hash, $extra['host'], time(), $extra['sid']);
        if (!db_affected_rows()) {
          db_query("INSERT INTO {spam_duplicate} (sid, content_hash, hostname, timestamp) VALUES(%d, '%s', '%s', %d)", $extra['sid'], $hash, $extra['host'], time());
        }
        $action = _duplicate_action();
        if (is_array($action) && !empty($action)) {
          if (isset($action['redirect'])) {
            drupal_goto($action['redirect']);
          }
        }
      }
      break;
    case 'delete':
      if (is_array($extra) && $extra['sid'] && !empty($content) && !empty($fields)) {
        db_query("DELETE FROM {spam_duplicate} WHERE sid = %d", $extra['sid']);
      }
      break;
    case 'filter_info':
      return array(
        'name' => t('Duplicate filter'),
        'module' => t('duplicate'),
        'description' => t('A duplication spam filter.'),
        'help' => t('The duplicate filter detects spam by detecting content duplication.'),
      );
      break;
    case 'filter_install':
      return array(
        'status' => SPAM_FILTER_ENABLED,
        'weight' => -8,
      );
    case 'mark_as_spam':
      if (!module_invoke('spam', 'filter_enabled', 'duplicate', $type, $content, $fields, $extra)) {
        return;
      }
      db_query('UPDATE {spam_duplicate} SET spam = %d WHERE sid = %d', DUPLICATE_SPAM, $extra['sid']);
      if (!db_affected_rows() && $extra['id'] && $extra['sid']) {
        $content = spam_invoke_module($type, 'load', $extra['id']);
        $fields = spam_invoke_module($type, 'filter_fields', $content);
        $hash = _duplicate_content_hash($content, $fields);
        $hostname = spam_invoke_module($type, 'hostname', $extra['id']);
        db_query("INSERT INTO {spam_duplicate} (sid, content_hash, hostname, timestamp) VALUES(%d, '%s', '%s', %d)", $extra['sid'], $hash, $hostname, time());
      }
      $action = _duplicate_action();
      if (is_array($action) && isset($action['redirect'])) {
        return $action['redirect'];
      }
      break;
    case 'mark_as_not_spam':
      if (!module_invoke('spam', 'filter_enabled', 'duplicate', $type, $content, $fields, $extra)) {
        return;
      }
      db_query('UPDATE {spam_duplicate} SET spam = %d WHERE sid = %d', DUPLICATE_NOT_SPAM, $extra['sid']);
      if (!db_affected_rows() && $extra['id'] && $extra['sid']) {

        // Updating content that we've not filtered before.  Retrive all the
        // data we need to add it to the spam_duplicate table.
        $fields = spam_invoke_module($type, 'filter_fields', $extra['content']);
        $hash = _duplicate_content_hash($extra['content'], $fields);
        $hostname = spam_invoke_module($type, 'hostname', $extra['id']);
        db_query("INSERT INTO {spam_duplicate} (sid, content_hash, hostname, timestamp) VALUES(%d, '%s', '%s', %d)", $extra['sid'], $hash, $hostname, time());
      }
      break;
  }
}

/**
 * 
 */
function duplicate_admin_settings() {
  $form['content'] = array(
    '#type' => 'fieldset',
    '#title' => t('Content'),
    '#collapsible' => TRUE,
  );
  $limits = drupal_map_assoc(range(2, 15));
  $limits[-1] = t('unlimited');
  $form['content']['duplicate_threshold'] = array(
    '#type' => 'select',
    '#title' => t('Duplication threshold'),
    '#default_value' => variable_get('duplicate_threshold', DUPLICATE_DEFAULT_THRESHOLD),
    '#options' => $limits,
    '#description' => t('Specify how many times the same identical content can be posted before it will be considered spam.  When tuning this filter, note that users may accidentally submit the same content multiple times causing an otherwise acceptible posting to be duplicated.'),
  );
  $form['content']['duplicate_post_message'] = array(
    '#type' => 'textarea',
    '#title' => t('Duplicate post message'),
    '#default_value' => variable_get('duplicate_post_message', t('<p>You have attempted to post the same identical content multiple times, causing your posts to be flagged as potential spam.  If this has happened in error, please report this error along with your IP address (%IP) to a @site site administrator.  We apologize for any inconvenience.</p>', array(
      '@site' => variable_get('site_name', 'Drupal'),
    ))),
    '#description' => t('Message to show visitors when their content has been blocked because it was posted multiple times.  The text "%IP" will be replaced by the visitors actual IP address.'),
  );
  $form['ip'] = array(
    '#type' => 'fieldset',
    '#title' => t('IP'),
    '#collapsible' => TRUE,
  );
  $limits = drupal_map_assoc(range(1, 15));
  $limits[-1] = t('unlimited');
  $form['ip']['duplicate_blacklist'] = array(
    '#type' => 'select',
    '#title' => t('IP blacklist threshold'),
    '#default_value' => variable_get('duplicate_blacklist', DUPLICATE_DEFAULT_BLACKLIST),
    '#options' => $limits,
    '#description' => t('Specify how many times a given IP address is allowed to post possible spam content before the IP address is blacklisted and prevented from posting any additional content.'),
  );
  $form['ip']['duplicate_blacklist_action'] = array(
    '#type' => 'radios',
    '#title' => t('IP blacklist action'),
    '#options' => array(
      t('Silently prevent visitor from posting'),
      t('Notify blacklisted visitor when posting, prevent from posting'),
      t('Notify blacklisted visitor, prevent from visiting site'),
    ),
    '#default_value' => variable_get('duplicate_blacklist_action', DUPLICATE_BLACKLIST_NOTIFY),
    '#description' => t('Select an action from the above options.  If notification is enabled, the user will be redirected to a custom page displaying the "Blacklisted IP message" defined below.  If you only prevent users from posting, they will be able to view all site content.  If you prevent a user from visiting your site, they will only ever see the "Blacklisted IP message".'),
  );
  $form['ip']['duplicate_blacklist_message'] = array(
    '#type' => 'textarea',
    '#title' => t('Blacklisted IP message'),
    '#default_value' => variable_get('duplicate_blacklist_message', t('<p>You are currently not allowed to post content to @site, as previous content posted by your IP address (%IP) has been flagged as potential spam.</p><p>If you have not posted spam to @site, please report this error along with your IP address to a site administrator.  We apologize for any inconvenience.</p>', array(
      '@site' => variable_get('site_name', 'Drupal'),
    ))),
    '#description' => t('Message to show visitors when their IP has been blacklisted.  The text "%IP" will be replaced by the visitors actual IP address.'),
  );
  return system_settings_form($form);
}

/** 
 * Save the configuration.
 */
function duplicate_admin_settings_submit($form_id, $form_values) {
  if ($form_values['op'] == t('Reset to defaults')) {
    variable_del('duplicate_threshold');
    variable_del('duplicate_post_message');
    variable_del('duplicate_blacklist');
    variable_del('duplicate_blacklist_action');
    variable_del('duplicate_blacklist_message');
    drupal_set_message('Configuration reset to defaults.');
  }
  else {
    variable_set('duplicate_threshold', $form_values['duplicate_threshold']);
    variable_set('duplicate_post_message', $form_values['duplicate_post_message']);
    variable_set('duplicate_blacklist', $form_values['duplicate_blacklist']);
    variable_set('duplicate_blacklist_action', $form_values['duplicate_blacklist_action']);
    variable_set('duplicate_blacklist_message', $form_values['duplicate_blacklist_message']);
    drupal_set_message('Configuration saved.');
  }
}

/**
 * Get and md5 hash of all content truncated together.
 */
function _duplicate_content_hash($content, $fields) {
  if (is_object($content)) {
    $content = (array) $content;
  }
  $hash = '';
  foreach ($fields['main'] as $field) {
    $hash .= $content[$field];
  }
  return md5($hash);
}

/**
 * Determine whether or not the content is spam.
 */
function duplicate_spam_filter($content, $type, $fields, $extra = array(), $filter_test = FALSE) {
  $score = 0;
  $action = array();
  $hash = _duplicate_content_hash($content, $fields);
  $id = spam_invoke_module($type, 'content_id', $content, $extra);
  $duplicate_hash = db_result(db_query("SELECT COUNT(d.iid) FROM {spam_duplicate} d LEFT JOIN {spam_tracker} t ON d.sid = t.sid WHERE content_hash = '%s' AND content_id != %d", $hash, $id)) + 1;
  if ($duplicate_hash >= variable_get('duplicate_threshold', DUPLICATE_DEFAULT_THRESHOLD)) {
    $sids = db_query("SELECT sid FROM spam_duplicate WHERE content_hash = '%s'", $hash);
    if (!$filter_test) {
      while ($sid = db_result($sids)) {
        $unpublish = db_fetch_object(db_query('SELECT content_type, content_id, score FROM {spam_tracker} WHERE sid = %d', $sid));
        spam_mark_as_spam($unpublish->content_type, $unpublish->content_id, array(
          'score' => 99,
        ));
      }

      // Update counter tracking that we've blocked a duplicate posting of this
      // content.  (It will actually increment the counter on
      // "duplicate_threshold" rows.)
      db_query("UPDATE {spam_duplicate} SET duplicate_hash = duplicate_hash + 1 WHERE content_hash = '%s'", $hash);
    }
    $action['hash'] = array(
      'score' => 99,
      'description' => t('Content is identical to %count other existing posts.', array(
        '%count' => variable_get('duplicate_threshold', DUPLICATE_DEFAULT_THRESHOLD),
      )),
    );
    $action['total'] = 99;
    $action['redirect'] = 'duplicate/denied/post';
    _duplicate_action($action);
    return $action;
  }
  $duplicate_ip = db_result(db_query("SELECT COUNT(iid) FROM {spam_duplicate} WHERE hostname = '%s' AND spam = %d", $_SERVER['REMOTE_ADDR'], DUPLICATE_SPAM));
  if ($duplicate_ip >= variable_get('duplicate_blacklist', DUPLICATE_DEFAULT_BLACKLIST) && variable_get('duplicate_blacklist', DUPLICATE_DEFAULT_BLACKLIST) > -1) {
    $action['ip'] = array(
      'score' => 99,
      'description' => t('Content was posted by the same IP address used to post %count other spam posts.', array(
        '%count' => variable_get('duplicate_blacklist', DUPLICATE_DEFAULT_BLACKLIST),
      )),
    );
    $action['total'] = 99;
    $action['redirect'] = 'duplicate/denied/ip';
  }
  return $action;
}
function _duplicate_action($register = array()) {
  static $action = array();
  if (!empty($register)) {
    $action = $register;
  }
  return $action;
}

/** 
 *
 */
function duplicate_denied_ip() {
  $message = strtr(variable_get('duplicate_blacklist_message', t('<p>You are currently not allowed to post content to @site, as previous content posted by your IP address (%IP) has been flagged as potential spam.</p><p>If you have not posted spam to @site, please report this error along with your IP address to a site administrator.  We apologize for any inconvenience.</p>')), array(
    '@site' => variable_get('site_name', 'Drupal'),
    '%IP' => $_SERVER['REMOTE_ADDR'],
  ));
  spam_denied_page($message, t('Your IP address has been blocked by our spam filter.'));
}

/** 
 *
 */
function duplicate_denied_post() {
  $message = strtr(variable_get('duplicate_post_message', t('<p>You have attempted to post the same identical content multiple times, causing your posts to be flagged as potential spam.  If this has happened in error, please report this error along with your IP address (%IP) to a @site site administrator.  We apologize for any inconvenience.</p>')), array(
    '@site' => variable_get('site_name', 'Drupal'),
    '%IP' => $_SERVER['REMOTE_ADDR'],
  ));
  spam_denied_page($message, t('You have attempted to post the same content multiple times.'));
}
function spam_logs_blocked_ip() {
  drupal_set_title(t('Spam Module Blocked IPs'));
  $header = array(
    array(
      'data' => t('IP Address'),
      'field' => 'hostname',
    ),
    array(
      'data' => t('Last Seen'),
      'field' => 'timestamp',
      'sort' => 'desc',
    ),
    array(
      'data' => t('Counter'),
      'field' => 'count',
    ),
  );

  // This SQL is *nasty*, so if you think you can do better, please be my guest!
  // This unfortunately has to be SQL because the pager module can't be told
  // how many rows we've got (so we can't do our own processing in PHP and
  // still get paging to work properly).
  $sql = "SELECT * FROM (SELECT DISTINCT x.hostname, x.timestamp, COUNT(x.hostname) AS count FROM (SELECT timestamp, hostname FROM {spam_tracker} WHERE score > %d ORDER BY timestamp DESC) AS x GROUP BY x.hostname) AS y WHERE y.count >= %d";
  $arguments = array(
    variable_get('spam_threshold', SPAM_DEFAULT_THRESHOLD),
    variable_get('spam_blacklist_ip', DUPLICATE_DEFAULT_BLACKLIST),
  );
  $count_sql = preg_replace('/^SELECT \\* /', 'SELECT count(hostname) ', $sql);
  $result = pager_query($sql . tablesort_sql($header), 50, 0, $count_sql, $arguments);
  while ($log = db_fetch_object($result)) {
    $rows[] = array(
      'data' => array(
        $log->hostname,
        format_date($log->timestamp, 'small'),
        $log->count,
      ),
    );
  }
  if (!$rows) {
    $rows[] = array(
      array(
        'data' => t('No log messages available.'),
        'colspan' => 6,
      ),
    );
  }
  return theme('table', $header, $rows) . theme('pager', NULL, 50, 0);
}

Functions

Namesort descending Description
duplicate_admin_settings
duplicate_admin_settings_submit Save the configuration.
duplicate_denied_ip
duplicate_denied_post
duplicate_init If IP blacklisting and IP blocking are both enabled, perform a database query on each page load to see if the current visitor has been blacklisted.
duplicate_menu Drupal _menu() hook.
duplicate_spamapi Spam API Hook
duplicate_spam_filter Determine whether or not the content is spam.
spam_logs_blocked_ip
_duplicate_action
_duplicate_arg The arg() function may not be availble early in the bootstrap process, so we reimplement it here.
_duplicate_content_hash Get and md5 hash of all content truncated together.

Constants