function bayesian_spam_filter in Spam 5.3
Determine whether or not the content is spam.
1 call to bayesian_spam_filter()
- bayesian_spamapi in filters/
bayesian/ bayesian.module - Spam API Hook
File
- filters/
bayesian/ bayesian.module, line 55
Code
function bayesian_spam_filter($content, $type, $fields, $extra = array(), $filter_test = FALSE) {
$class = 'spam';
$id = spam_invoke_module($type, 'content_id', $content, $extra);
$tokenizer = variable_get('bayesian_tokenizer', 'bayesian_tokenize');
$tokens = $tokenizer($content, $type, $fields, $extra);
if (is_array($tokens)) {
foreach ($tokens as $token) {
$p = db_fetch_object(db_query("SELECT probability FROM {bayesian_tokens} WHERE class = '%s' AND token = '%s'", $class, $token));
if (!$p->probability) {
$p->probability = variable_get('bayesian_default_probability', 40);
}
$t["{$token},{$p->probability}"] = abs($p->probability - 50);
}
}
else {
// No tokens, return default score.
$action['total'] = variable_get('bayesian_default_probability', 40);
return $action;
}
/* Sort token array so those tokens with the largest "drift" come first.
* Drift is this distance from a median of 50%.
*/
asort($t);
/* Take the n most "interesting" tokens from the top of the token array.
* The larger a token's drift, the more interesting it is.
*/
$keys = array_keys($t);
$max = variable_get('bayesian_interesting_tokens', 15);
$total = 0;
for ($i = 0; $i < $max; $i++) {
if ($pair = array_pop($keys)) {
$p = explode(',', $pair);
$total = $total + $p[1];
$action['bayesian'][$i] = array(
'token' => $p[0],
'probability' => $p[1],
);
spam_log(SPAM_DEBUG, 'bayesian_spam_filter', t('interesting token [@count] (@token) probability(@probability)', array(
'@token' => $p[0],
'@probability' => $p[1],
'@count' => $i + 1,
)), $type, $id);
}
else {
// we've looked at all the tokens
break;
}
}
$probability = round($total / $i, 1);
spam_log(SPAM_VERBOSE, 'bayesian_spam_filter', t('total(@total) count(@count) probability(@probability)', array(
'@probability' => $probability,
'@total' => $total,
'@count' => $i,
)), $type, $id);
$action['total'] = $probability;
return $action;
}