You are here

spamspan.module in SpamSpan filter 7

This module implements the spamspan technique (http://www.spamspan.com ) for hiding email addresses from spambots.

If javascript is disabled on the client-side, addresses appear as example [at] example [dot] com.

@author Lawrence Akka @copyright 2006-2010, Lawrence Akka @license http://www.gnu.org/licenses/gpl.txt GPL v 2.0

File

spamspan.module
View source
<?php

/**
 * @file
 * This module implements the spamspan technique (http://www.spamspan.com ) for hiding email addresses from spambots.
 *
 * If javascript is disabled on the client-side, addresses appear as
 * example [at] example [dot] com.
 *
 * @author Lawrence Akka
 * @copyright 2006-2010, Lawrence Akka
 * @license http://www.gnu.org/licenses/gpl.txt  GPL v 2.0
 *
 */

/**
 *  Set up a regex constant to split an email address into name and domain
 *  parts. The following pattern is not perfect (who is?), but is intended to
 * intercept things which look like email addresses.  It is not intended to
 * determine if an address is valid.  It will not intercept addresses with
 * quoted local parts.
 *
 * @constant string SPAMSPAN_EMAIL
 */
define('SPAMSPAN_EMAIL', "\n      ([-\\.\\~\\'\\!\\#\$\\%\\&\\+\\/\\*\\=\\?\\^\\_\\`\\{\\|\\}\\w\\+^@]+) # Group 1 - Match the name part - dash, dot or\n                           #special characters.\n     @                     # @\n     ((?:        # Group 2\n       [-\\w]+\\.            # one or more letters or dashes followed by a dot.\n       )+                  # The whole thing one or more times\n       [A-Z]{2,63}         # with between 2 and 63 letters at the end (NB\n                           # new TLDs)\n     )\n");

/**
 * Loads code used only in admin.
 */
function spamspan_admin() {
  static $object;
  if (!isset($object)) {
    $object = new spamspan_admin();
  }
  return $object;
}

/**
 * menu(): 'page arguments' => array('spamspan_admin_page'),
 */
function spamspan_admin_page($form, &$form_state) {
  watchdog('spamspan', 'admin_page()');
  return spamspan_admin()
    ->page($form, $form_state);
}

/**
 * .
 */
function spamspan_admin_page_submit($form, &$form_state) {
  watchdog('spamspan', 'admin_page_submit()');
  return spamspan_admin()
    ->page_submit($form, $form_state);
}

/**
 * Implements hook_filter_info().
 * This function is called on every page so keep it fast and simple.
 */
function spamspan_filter_info() {
  $filters['spamspan'] = array(
    'title' => t('SpamSpan email address encoding filter'),
    'description' => t('Attempt to hide email addresses from spam-bots.'),
    'process callback' => '_spamspan_filter_process',
    'settings callback' => '_spamspan_filter_settings',
    'tips callback' => '_spamspan_filter_tips',
    'default settings' => array(
      'spamspan_at' => ' [at] ',
      'spamspan_use_graphic' => 0,
      'spamspan_dot_enable' => 0,
      'spamspan_dot' => ' [dot] ',
      'spamspan_use_form' => 0,
      'spamspan_form_pattern' => '<a href="%url?goto=%email">%displaytext</a>',
      'spamspan_form_default_url' => 'contact',
      'spamspan_form_default_displaytext' => 'contact form',
    ),
  );
  return $filters;
}

/**
 * Spamspan filter process callback
 *
 * Scan text and replace email addresses with span tags
 *
 * We are aiming to replace emails with code like this:
 *   <span class="spamspan">
 *   <span class="u">user</span>
 *   [at]
 *   <span class="d">example [dot] com</span>
 *   <span class="t"tag contents></span></span>
 *
 */
function _spamspan_filter_process($text, $filter) {

  // The preg_replace_callback functions below cannot take any additional
  // arguments, so we pass the relevant settings in spamspan_admin.
  spamspan_admin()
    ->filter_set($filter);

  // Top and tail the email regexp it so that it is case insensitive and
  // ignores whitespace.
  $emailpattern = '!' . SPAMSPAN_EMAIL . '!ix';
  $emailpattern_with_options = '!' . SPAMSPAN_EMAIL . '\\[(.*?)\\]!ix';

  // Next set up a regex for mailto: URLs.
  // - see http://www.faqs.org/rfcs/rfc2368.html
  // This captures the whole mailto: URL into the second group,
  // the name into the third group and the domain into
  // the fourth. The tag contents go into the fifth.
  $mailtopattern = "!<a\\s+                                # opening <a and spaces\n      ((?:\\w+\\s*=\\s*)(?:\\w+|\"[^\"]*\"|'[^']*'))*?        # any attributes\n      \\s*                                                 # whitespace\n      href\\s*=\\s*(['\"])(mailto:" . SPAMSPAN_EMAIL . "(?:\\?[A-Za-z0-9_= %\\.\\-\\~\\_\\&;\\!\\*\\(\\)\\'#&]*)?)" . "\\2                                                # the relevant quote\n                                                          # character\n      ((?:\\s+\\w+\\s*=\\s*)(?:\\w+|\"[^\"]*\"|'[^']*'))*?     # any more attributes\n      >                                                   # end of the first tag\n      (.*?)                                               # tag contents.  NB this\n                                                          # will not work properly\n                                                          # if there is a nested\n                                                          # <a>, but this is not\n                                                          # valid xhtml anyway.\n      </a>                                                # closing tag\n      !ix";

  // HTML image tags need to be handled separately, as they may contain base64
  // encoded images slowing down the email regex function.
  // Therefore, remove all image contents and add them back later.
  // See https://drupal.org/node/1243042 for details.
  $images = array(
    array(),
  );
  $inline_image_pattern = '/data\\:(?:.+?)base64(?:.+?)["|\']/';
  preg_match_all($inline_image_pattern, $text, $images);
  $text = preg_replace($inline_image_pattern, '__spamspan_img_placeholder__', $text);

  // Now we can convert all mailto URLs
  $text = preg_replace_callback($mailtopattern, '_spamspan_callback_mailto', $text);

  // all bare email addresses with optional formatting information
  $text = preg_replace_callback($emailpattern_with_options, '_spamspan_email_addresses_with_options', $text);

  // and finally, all bare email addresses
  $text = preg_replace_callback($emailpattern, '_spamspan_bare_email_addresses', $text);

  // Revert back to the original image contents.
  foreach ($images[0] as $image) {
    $text = preg_replace('/__spamspan_img_placeholder__/', $image, $text, 1);
  }
  return $text;
}
function _spamspan_email_addresses_with_options($matches) {
  $vars = array();
  if (!empty($matches[3])) {
    $options = explode('|', $matches[3]);
    if (!empty($options[0])) {
      $custom_form_url = trim($options[0]);
      if (!empty($custom_form_url)) {
        $vars['custom_form_url'] = $custom_form_url;
      }
    }
    if (!empty($options[1])) {
      $custom_displaytext = trim($options[1]);
      if (!empty($custom_displaytext)) {
        $vars['custom_displaytext'] = $custom_displaytext;
      }
    }
  }
  return spamspan_admin()
    ->output($matches[1], $matches[2], '', array(), $vars);
}
function _spamspan_bare_email_addresses($matches) {
  return spamspan_admin()
    ->output($matches[1], $matches[2]);
}

/**
 * Settings callback for spamspan filter
 */
function _spamspan_filter_settings($form, $form_state, $filter, $format, $defaults, $filters) {
  return spamspan_admin()
    ->filter_settings($form, $form_state, $filter, $format, $defaults, $filters);
}

/**
 * Filter tips callback
 */
function _spamspan_filter_tips($filter, $format, $long = FALSE) {
  return t('Each email address will be obfuscated in a human readable fashion or, if JavaScript is enabled, replaced with a spam resistent clickable link.' . ' Email addresses will get the default web form unless specified. If replacement text (a persons name) is required a webform is also required. Separate each part with the "|" pipe symbol. Replace spaces in names with "_".');
}

/**
 * Implements hook_help().
 */
function spamspan_help($path, $arg) {
  switch ($path) {
    case 'admin/help#spamspan':
      return spamspan_admin()
        ->help($path, $arg);
  }
}

/**
 * The callback functions for preg_replace_callback
 *
 * Replace an email addresses which has been found with the appropriate
 * <span> tags
 *
 * @param $matches
 *  An array containing parts of an email address or mailto: URL.
 * @return
 *  The span with which to replace the email address
 */
function _spamspan_callback_mailto($matches) {

  // take the mailto: URL in $matches[3] and split the query string
  // into its component parts, putting them in $headers as
  // [0]=>"header=contents" etc.  We cannot use parse_str because
  // the query string might contain dots.
  // Single quote can be encoded as &#039; which breaks parse_url
  // Replace it back to a single quote which is perfectly valid
  $matches[3] = str_replace("&#039;", '\'', $matches[3]);
  $query = parse_url($matches[3], PHP_URL_QUERY);
  $query = str_replace('&amp;', '&', $query);
  $headers = preg_split('/[&;]/', $query);

  // if no matches, $headers[0] will be set to '' so $headers must be reset
  if ($headers[0] == '') {
    $headers = array();
  }

  // take all <a> attributes except the href and put them into custom $vars
  $vars = $attributes = array();

  //before href
  if (!empty($matches[1])) {
    $matches[1] = trim($matches[1]);
    $attributes[] = $matches[1];
  }

  //after href
  if (!empty($matches[6])) {
    $matches[6] = trim($matches[6]);
    $attributes[] = $matches[6];
  }
  if (count($attributes)) {
    $vars['extra_attributes'] = implode(' ', $attributes);
  }
  return spamspan_admin()
    ->output($matches[4], $matches[5], $matches[7], $headers, $vars);
}

/**
 * Implements hook_menu().
 */
function spamspan_menu() {
  $items = spamspan_admin()
    ->menu();
  return $items;
}

/**
 * A simple utility function wrapping the main processing callback.
 * This function may be called by other modules and themes.
 *
 * @param $text
 *  Text, maybe containing email addresses.
 * @param $settings
 * An associative array of settings to be applied.
 * @return
 *  The input text with emails replaced by spans
 */
function spamspan($text = '', $settings = array()) {

  //apply default settings
  $info = filter_get_filters();
  $defaults = $info['spamspan']['default settings'];

  //create a dummy filter object so we can apply the settings
  $filter = new stdClass();
  $filter->settings = $settings + $defaults;
  return _spamspan_filter_process($text, $filter);
}

/**
 * Implements hook_theme().
 */
function spamspan_theme($existing, $type, $theme, $path) {
  return array(
    'spamspan_at_sign' => array(
      'variables' => array(
        // the spamspan filter settings if anyone needs them
        'settings' => array(),
      ),
    ),
  );
}
function theme_spamspan_at_sign($variables) {
  $output = '<img class="spam-span-image" alt="at" width="10" src="' . base_path() . drupal_get_path('module', 'spamspan') . '/image.gif" />';
  return $output;
}

Functions

Namesort descending Description
spamspan A simple utility function wrapping the main processing callback. This function may be called by other modules and themes.
spamspan_admin Loads code used only in admin.
spamspan_admin_page menu(): 'page arguments' => array('spamspan_admin_page'),
spamspan_admin_page_submit .
spamspan_filter_info Implements hook_filter_info(). This function is called on every page so keep it fast and simple.
spamspan_help Implements hook_help().
spamspan_menu Implements hook_menu().
spamspan_theme Implements hook_theme().
theme_spamspan_at_sign
_spamspan_bare_email_addresses
_spamspan_callback_mailto The callback functions for preg_replace_callback
_spamspan_email_addresses_with_options
_spamspan_filter_process Spamspan filter process callback
_spamspan_filter_settings Settings callback for spamspan filter
_spamspan_filter_tips Filter tips callback

Constants

Namesort descending Description
SPAMSPAN_EMAIL Set up a regex constant to split an email address into name and domain parts. The following pattern is not perfect (who is?), but is intended to intercept things which look like email addresses. It is not intended to determine if an address is valid.…