spamspan.module in SpamSpan filter 7
Same filename and directory in other branches
This module implements the spamspan technique (http://www.spamspan.com ) for hiding email addresses from spambots.
If javascript is disabled on the client-side, addresses appear as example [at] example [dot] com.
@author Lawrence Akka @copyright 2006-2010, Lawrence Akka @license http://www.gnu.org/licenses/gpl.txt GPL v 2.0
File
spamspan.moduleView source
<?php
/**
* @file
* This module implements the spamspan technique (http://www.spamspan.com ) for hiding email addresses from spambots.
*
* If javascript is disabled on the client-side, addresses appear as
* example [at] example [dot] com.
*
* @author Lawrence Akka
* @copyright 2006-2010, Lawrence Akka
* @license http://www.gnu.org/licenses/gpl.txt GPL v 2.0
*
*/
/**
* Set up a regex constant to split an email address into name and domain
* parts. The following pattern is not perfect (who is?), but is intended to
* intercept things which look like email addresses. It is not intended to
* determine if an address is valid. It will not intercept addresses with
* quoted local parts.
*
* @constant string SPAMSPAN_EMAIL
*/
define('SPAMSPAN_EMAIL', "\n ([-\\.\\~\\'\\!\\#\$\\%\\&\\+\\/\\*\\=\\?\\^\\_\\`\\{\\|\\}\\w\\+^@]+) # Group 1 - Match the name part - dash, dot or\n #special characters.\n @ # @\n ((?: # Group 2\n [-\\w]+\\. # one or more letters or dashes followed by a dot.\n )+ # The whole thing one or more times\n [A-Z]{2,63} # with between 2 and 63 letters at the end (NB\n # new TLDs)\n )\n");
/**
* Loads code used only in admin.
*/
function spamspan_admin() {
static $object;
if (!isset($object)) {
$object = new spamspan_admin();
}
return $object;
}
/**
* menu(): 'page arguments' => array('spamspan_admin_page'),
*/
function spamspan_admin_page($form, &$form_state) {
watchdog('spamspan', 'admin_page()');
return spamspan_admin()
->page($form, $form_state);
}
/**
* .
*/
function spamspan_admin_page_submit($form, &$form_state) {
watchdog('spamspan', 'admin_page_submit()');
return spamspan_admin()
->page_submit($form, $form_state);
}
/**
* Implements hook_filter_info().
* This function is called on every page so keep it fast and simple.
*/
function spamspan_filter_info() {
$filters['spamspan'] = array(
'title' => t('SpamSpan email address encoding filter'),
'description' => t('Attempt to hide email addresses from spam-bots.'),
'process callback' => '_spamspan_filter_process',
'settings callback' => '_spamspan_filter_settings',
'tips callback' => '_spamspan_filter_tips',
'default settings' => array(
'spamspan_at' => ' [at] ',
'spamspan_use_graphic' => 0,
'spamspan_dot_enable' => 0,
'spamspan_dot' => ' [dot] ',
'spamspan_use_form' => 0,
'spamspan_form_pattern' => '<a href="%url?goto=%email">%displaytext</a>',
'spamspan_form_default_url' => 'contact',
'spamspan_form_default_displaytext' => 'contact form',
),
);
return $filters;
}
/**
* Spamspan filter process callback
*
* Scan text and replace email addresses with span tags
*
* We are aiming to replace emails with code like this:
* <span class="spamspan">
* <span class="u">user</span>
* [at]
* <span class="d">example [dot] com</span>
* <span class="t"tag contents></span></span>
*
*/
function _spamspan_filter_process($text, $filter) {
// The preg_replace_callback functions below cannot take any additional
// arguments, so we pass the relevant settings in spamspan_admin.
spamspan_admin()
->filter_set($filter);
// Top and tail the email regexp it so that it is case insensitive and
// ignores whitespace.
$emailpattern = '!' . SPAMSPAN_EMAIL . '!ix';
$emailpattern_with_options = '!' . SPAMSPAN_EMAIL . '\\[(.*?)\\]!ix';
// Next set up a regex for mailto: URLs.
// - see http://www.faqs.org/rfcs/rfc2368.html
// This captures the whole mailto: URL into the second group,
// the name into the third group and the domain into
// the fourth. The tag contents go into the fifth.
$mailtopattern = "!<a\\s+ # opening <a and spaces\n ((?:\\w+\\s*=\\s*)(?:\\w+|\"[^\"]*\"|'[^']*'))*? # any attributes\n \\s* # whitespace\n href\\s*=\\s*(['\"])(mailto:" . SPAMSPAN_EMAIL . "(?:\\?[A-Za-z0-9_= %\\.\\-\\~\\_\\&;\\!\\*\\(\\)\\'#&]*)?)" . "\\2 # the relevant quote\n # character\n ((?:\\s+\\w+\\s*=\\s*)(?:\\w+|\"[^\"]*\"|'[^']*'))*? # any more attributes\n > # end of the first tag\n (.*?) # tag contents. NB this\n # will not work properly\n # if there is a nested\n # <a>, but this is not\n # valid xhtml anyway.\n </a> # closing tag\n !ix";
// HTML image tags need to be handled separately, as they may contain base64
// encoded images slowing down the email regex function.
// Therefore, remove all image contents and add them back later.
// See https://drupal.org/node/1243042 for details.
$images = array(
array(),
);
$inline_image_pattern = '/data\\:(?:.+?)base64(?:.+?)["|\']/';
preg_match_all($inline_image_pattern, $text, $images);
$text = preg_replace($inline_image_pattern, '__spamspan_img_placeholder__', $text);
// Now we can convert all mailto URLs
$text = preg_replace_callback($mailtopattern, '_spamspan_callback_mailto', $text);
// all bare email addresses with optional formatting information
$text = preg_replace_callback($emailpattern_with_options, '_spamspan_email_addresses_with_options', $text);
// and finally, all bare email addresses
$text = preg_replace_callback($emailpattern, '_spamspan_bare_email_addresses', $text);
// Revert back to the original image contents.
foreach ($images[0] as $image) {
$text = preg_replace('/__spamspan_img_placeholder__/', $image, $text, 1);
}
return $text;
}
function _spamspan_email_addresses_with_options($matches) {
$vars = array();
if (!empty($matches[3])) {
$options = explode('|', $matches[3]);
if (!empty($options[0])) {
$custom_form_url = trim($options[0]);
if (!empty($custom_form_url)) {
$vars['custom_form_url'] = $custom_form_url;
}
}
if (!empty($options[1])) {
$custom_displaytext = trim($options[1]);
if (!empty($custom_displaytext)) {
$vars['custom_displaytext'] = $custom_displaytext;
}
}
}
return spamspan_admin()
->output($matches[1], $matches[2], '', array(), $vars);
}
function _spamspan_bare_email_addresses($matches) {
return spamspan_admin()
->output($matches[1], $matches[2]);
}
/**
* Settings callback for spamspan filter
*/
function _spamspan_filter_settings($form, $form_state, $filter, $format, $defaults, $filters) {
return spamspan_admin()
->filter_settings($form, $form_state, $filter, $format, $defaults, $filters);
}
/**
* Filter tips callback
*/
function _spamspan_filter_tips($filter, $format, $long = FALSE) {
return t('Each email address will be obfuscated in a human readable fashion or, if JavaScript is enabled, replaced with a spam resistent clickable link.' . ' Email addresses will get the default web form unless specified. If replacement text (a persons name) is required a webform is also required. Separate each part with the "|" pipe symbol. Replace spaces in names with "_".');
}
/**
* Implements hook_help().
*/
function spamspan_help($path, $arg) {
switch ($path) {
case 'admin/help#spamspan':
return spamspan_admin()
->help($path, $arg);
}
}
/**
* The callback functions for preg_replace_callback
*
* Replace an email addresses which has been found with the appropriate
* <span> tags
*
* @param $matches
* An array containing parts of an email address or mailto: URL.
* @return
* The span with which to replace the email address
*/
function _spamspan_callback_mailto($matches) {
// take the mailto: URL in $matches[3] and split the query string
// into its component parts, putting them in $headers as
// [0]=>"header=contents" etc. We cannot use parse_str because
// the query string might contain dots.
// Single quote can be encoded as ' which breaks parse_url
// Replace it back to a single quote which is perfectly valid
$matches[3] = str_replace("'", '\'', $matches[3]);
$query = parse_url($matches[3], PHP_URL_QUERY);
$query = str_replace('&', '&', $query);
$headers = preg_split('/[&;]/', $query);
// if no matches, $headers[0] will be set to '' so $headers must be reset
if ($headers[0] == '') {
$headers = array();
}
// take all <a> attributes except the href and put them into custom $vars
$vars = $attributes = array();
//before href
if (!empty($matches[1])) {
$matches[1] = trim($matches[1]);
$attributes[] = $matches[1];
}
//after href
if (!empty($matches[6])) {
$matches[6] = trim($matches[6]);
$attributes[] = $matches[6];
}
if (count($attributes)) {
$vars['extra_attributes'] = implode(' ', $attributes);
}
return spamspan_admin()
->output($matches[4], $matches[5], $matches[7], $headers, $vars);
}
/**
* Implements hook_menu().
*/
function spamspan_menu() {
$items = spamspan_admin()
->menu();
return $items;
}
/**
* A simple utility function wrapping the main processing callback.
* This function may be called by other modules and themes.
*
* @param $text
* Text, maybe containing email addresses.
* @param $settings
* An associative array of settings to be applied.
* @return
* The input text with emails replaced by spans
*/
function spamspan($text = '', $settings = array()) {
//apply default settings
$info = filter_get_filters();
$defaults = $info['spamspan']['default settings'];
//create a dummy filter object so we can apply the settings
$filter = new stdClass();
$filter->settings = $settings + $defaults;
return _spamspan_filter_process($text, $filter);
}
/**
* Implements hook_theme().
*/
function spamspan_theme($existing, $type, $theme, $path) {
return array(
'spamspan_at_sign' => array(
'variables' => array(
// the spamspan filter settings if anyone needs them
'settings' => array(),
),
),
);
}
function theme_spamspan_at_sign($variables) {
$output = '<img class="spam-span-image" alt="at" width="10" src="' . base_path() . drupal_get_path('module', 'spamspan') . '/image.gif" />';
return $output;
}
Functions
Name | Description |
---|---|
spamspan | A simple utility function wrapping the main processing callback. This function may be called by other modules and themes. |
spamspan_admin | Loads code used only in admin. |
spamspan_admin_page | menu(): 'page arguments' => array('spamspan_admin_page'), |
spamspan_admin_page_submit | . |
spamspan_filter_info | Implements hook_filter_info(). This function is called on every page so keep it fast and simple. |
spamspan_help | Implements hook_help(). |
spamspan_menu | Implements hook_menu(). |
spamspan_theme | Implements hook_theme(). |
theme_spamspan_at_sign | |
_spamspan_bare_email_addresses | |
_spamspan_callback_mailto | The callback functions for preg_replace_callback |
_spamspan_email_addresses_with_options | |
_spamspan_filter_process | Spamspan filter process callback |
_spamspan_filter_settings | Settings callback for spamspan filter |
_spamspan_filter_tips | Filter tips callback |
Constants
Name | Description |
---|---|
SPAMSPAN_EMAIL | Set up a regex constant to split an email address into name and domain parts. The following pattern is not perfect (who is?), but is intended to intercept things which look like email addresses. It is not intended to determine if an address is valid.… |