function spam_content_filter in Spam 5
Same name and namespace in other branches
- 5.3 spam.module \spam_content_filter()
- 6 spam.module \spam_content_filter()
Determine whether or not provided text is spam.
Parameters
$source: Which module this text came from (ie 'comment', 'node', 'trackback'...)
$id: Numeric identifier for content (ie node id, comment id, etc)
$header: Header portion of text to be filtered.
$body: Body portion of text to be filtered.
$callback: Function to call once text is determined to be spam or not
Return value
TRUE (spam) or FALSE (not spam)
2 calls to spam_content_filter()
- spam_comment in ./
spam.module - Drupal _comment hook. Passes new comments to the spam filter.
- spam_nodeapi in ./
spam.module - Drupal _nodeapi hook. Passes new node content through the spam filter.
File
- ./
spam.module, line 26
Code
function spam_content_filter($source, $id, $header, $body, $callback = NULL) {
// globals used in logging
global $ID, $SOURCE;
$ID = $id;
$SOURCE = $source;
// md5 hash used to catch same comment being posted over and over
$hash = md5($header . $body);
// sanity check comments
if ($source == 'comment') {
if (spam_validate_comment($id) == FALSE) {
$probability = 99;
}
}
// first filter, see if this is duplicated content
if ($probability < variable_get('spam_threshold', 80)) {
if (spam_duplicate_filter($hash, $source, $id, $header, $body)) {
$probability = 99;
}
}
// second filter, see if this content matches custom filters
if ($probability < variable_get('spam_threshold', 80)) {
$action = array();
$weight = spam_custom_filter($header, $body, $action);
}
// third filter, see if content contains spam urls
if ($probability + $weight < variable_get('spam_threshold', 80)) {
$weight += spam_url_filter($header . ' ' . $body);
}
// fourth filter, see if content contains too many urls
if ($probability + $weight < variable_get('spam_threshold', 80)) {
// split content into tokens
$tokens = spam_tokenize($header, 'header*');
$tokens = array_merge($tokens, spam_tokenize($body));
$weight += spam_url_limit(spam_urls_count());
}
// pass content through external filters, if any
if ($probability + $weight < variable_get('spam_threshold', 80)) {
// external spam filters
$hook = spam_invoke_hook('filter', $header . ' ' . $body, $tokens);
if ($hook['weight']) {
$weight += $hook['weight'];
// external modules should do their own logging, but just in case...
spam_log(SPAM_VERBOSE, t('spam_content_filter: external module added weight of @weight', array(
'@weight' => $hook['weight'],
)), $source, $id);
}
}
// finally, if necessary pass content through bayesian filter
if ($probability + $weight < variable_get('spam_threshold', 80)) {
$probability = spam_bayesian_filter($tokens);
}
// be sure probability is in valid range (1-99)
$probability = spam_get_probability($probability, $weight);
$old = db_fetch_object(db_query("SELECT * FROM {spam_tracker} WHERE source = '%s' AND id = %d", $source, $id));
if ($old->id) {
// content has been updated
db_query("UPDATE {spam_tracker} SET probability = %d, hostname = '%s', hash = '%s', timestamp = %d WHERE source = '%s' AND id = '%d'", $probability, $_SERVER['REMOTE_ADDR'], $hash, time(), $source, $id);
}
else {
// this is the first time we've filtered this content
db_query("INSERT INTO {spam_tracker} (id, source, probability, hostname, hash, timestamp) VALUES(%d, '%s', %d, '%s', '%s', %d)", $id, $source, $probability, $_SERVER['REMOTE_ADDR'], $hash, time());
}
spam_log(SPAM_LOG, t('spam_content_filter: @prob% probability of being spam for @source "%header"', array(
'@prob' => $probability,
'@source' => $source,
'%header' => $header,
)), $source, $id);
// pass probability to callback which will deal with it as is appropriate
if ($callback != NULL) {
$return = $callback($source, $id, $header, $body, $probability, $old, $action);
}
else {
$return = spam_default_actions($source, $id, $header, $body, $probability, $old, $action);
}
if ($probability >= variable_get('spam_threshold', 80)) {
/* This ip should have already been blocked when the content was previewed,
* but if the website is not configured to require previews this is our
* only chance to try and delay the spammer. Delaying them here is much
* less effective, as the spam was already posted and many automatic tools
* probably don't wait around at this point.
*/
spam_ip_filter($source, $id, TRUE);
}
return $return;
}