function fuzzysearch_index in Fuzzy Search 6
Index the node data in the fuzzy index table.
Parameters
nid: The node id of the node being indexed.
Return value
Returns TRUE on success, FALSE on failure.
1 call to fuzzysearch_index()
- fuzzysearch_cron in ./
fuzzysearch.module - Implementation of hook_cron().
File
- ./
fuzzysearch.module, line 252 - Module file for fuzzysearch module.
Code
function fuzzysearch_index($nid) {
// First step is removing past index
db_query("DELETE FROM {fuzzysearch_index} WHERE nid = %d", $nid);
// No node, nothing to do.
if (!($node = node_load($nid))) {
db_query("DELETE FROM {fuzzysearch_index_queue} WHERE nid = %d", $nid);
return;
}
// Let modules alter a node before indexing or prevent it from being indexed.
// See readme.txt.
foreach (module_implements('fuzzysearch_index') as $name) {
$function = $name . '_fuzzysearch_index';
$node = $function($node);
if (!$node) {
// Update the node table to make indexed = 1;
db_query("DELETE FROM {fuzzysearch_index_queue} WHERE nid = %d", $nid);
return;
}
}
// Index node title
$text .= '<h1> ' . $node->title . ' </h1>';
// Build and index the node body.
$node->build_mode = NODE_BUILD_SEARCH_INDEX;
$node = node_build_content($node, FALSE, FALSE);
$node->body = drupal_render($node->content);
$text .= $node->body;
// Implementation of nodeapi's update_index op.
$new_text = '';
foreach (module_implements('nodeapi') as $module) {
$function = $module . '_nodeapi';
$new_text = $function($node, 'update index', NULL, NULL);
if (isset($new_text) && is_string($new_text)) {
$text .= ' ' . $new_text;
}
}
// Insert code to allow other modules to filter indexed text before indexing
// Multipliers for scores of words inside certain HTML tags.
$tags = fuzzysearch_get_index_tags();
// Strip off all ignored tags to speed up processing
$text = strip_tags($text, '<' . implode('><', array_keys($tags)) . '>');
// Hook_fuzzysearch_filter lets modules filter text. This should be used for
// more complex filtering. Stop words should not use this. Create a stopword
// file instead. See fuzzysearch/stopwords/README.txt.
foreach (module_implements('fuzzysearch_filter') as $name) {
$function = $name . '_fuzzysearch_filter';
$text = $function('index', $text);
}
// Allow other modules to modify the score of the node based on each owns calculations
// the sum of all the scores added to each node is then multiplied by the score of the word,
// this allows for faster result queries because all scoring is done at the time of indexing
$hook_scores = module_invoke_all('fuzzysearch_score', 'index', $node);
// Build the final score multiplier for each node based on returned multipliers from other nodes
$node_score = 0;
foreach ($hook_scores as $score) {
$multiplier = variable_get('fuzzysearch_scoring_' . $score['id'], 5);
$node_score += $score['score'] * $multiplier;
}
// Begin indexing content.
// Remove stopwords.
$text = fuzzysearch_stopwords($text);
// Find all words not located within tags (score = 1)
$content = preg_replace('/<([A-Z][A-Z0-9]*)[^>]*>(.*?)<\\/\\1>/i', '', $text);
// $content = fuzzysearch_cleanse($content);
$words = array();
$index_words = array();
$words = preg_split('/\\s/', $content, -1, PREG_SPLIT_NO_EMPTY);
// Build the index array with scores
foreach ($words as $word) {
$key = array_search($word, $index_words);
if ($key === FALSE) {
$index_words[] = $word;
$index_scores[] = 1;
}
else {
$index_scores[$key] += 1;
}
}
// Find all words located within tags (score > 1)
preg_match_all('/<([A-Z][A-Z0-9]*)([^>]*)>(.*?)<\\/\\1>/i', $text, $tagged);
// filter through each set of content inbetween tags
foreach ($tagged[3] as $key => $content) {
// $content = fuzzysearch_cleanse($content);
$words = preg_split('/\\s/', $content, -1, PREG_SPLIT_NO_EMPTY);
$tag = $tagged[1][$key];
$tag_score = $tags[$tag];
foreach ($words as $word) {
$key = array_search($word, $index_words);
if ($key === FALSE) {
$index_words[] = $word;
$index_scores[] = $tag_score;
}
else {
$index_scores[$key] += $tag_score;
}
}
}
foreach ($index_words as $key => $word) {
// Each word gets a word_id, which comes from the last value in the id column,
// which is serial. First we check to make sure it's set. We have to do this
// to avoid a postrgresql error.
if (!$word_id) {
db_query("INSERT INTO {fuzzysearch_index} (nid, word_id, ngram, completeness, score) VALUES (0, 0, 'xxx', 0, 0)");
db_query("DELETE FROM {fuzzysearch_index} WHERE (nid = 0 AND word_id = 0 AND ngram = 'xxx' AND completeness = 0 AND score = 0)");
$word_id = db_last_insert_id('fuzzysearch_index', 'id');
}
else {
$word_id = db_last_insert_id('fuzzysearch_index', 'id');
}
fuzzysearch_index_insert($word, $word_id, $nid, $index_scores[$key], $node_score);
}
// Update the node table to make indexed = 1;
db_query("DELETE FROM {fuzzysearch_index_queue} WHERE nid = %d", $nid);
}