You are here

function fuzzysearch_index in Fuzzy Search 6

Index the node data in the fuzzy index table.

Parameters

nid: The node id of the node being indexed.

Return value

Returns TRUE on success, FALSE on failure.

1 call to fuzzysearch_index()
fuzzysearch_cron in ./fuzzysearch.module
Implementation of hook_cron().

File

./fuzzysearch.module, line 252
Module file for fuzzysearch module.

Code

function fuzzysearch_index($nid) {

  // First step is removing past index
  db_query("DELETE FROM {fuzzysearch_index} WHERE nid = %d", $nid);

  // No node, nothing to do.
  if (!($node = node_load($nid))) {
    db_query("DELETE FROM {fuzzysearch_index_queue} WHERE nid = %d", $nid);
    return;
  }

  // Let modules alter a node before indexing or prevent it from being indexed.
  // See readme.txt.
  foreach (module_implements('fuzzysearch_index') as $name) {
    $function = $name . '_fuzzysearch_index';
    $node = $function($node);
    if (!$node) {

      // Update the node table to make indexed = 1;
      db_query("DELETE FROM {fuzzysearch_index_queue} WHERE nid = %d", $nid);
      return;
    }
  }

  // Index node title
  $text .= '<h1> ' . $node->title . ' </h1>';

  // Build and index the node body.
  $node->build_mode = NODE_BUILD_SEARCH_INDEX;
  $node = node_build_content($node, FALSE, FALSE);
  $node->body = drupal_render($node->content);
  $text .= $node->body;

  // Implementation of nodeapi's update_index op.
  $new_text = '';
  foreach (module_implements('nodeapi') as $module) {
    $function = $module . '_nodeapi';
    $new_text = $function($node, 'update index', NULL, NULL);
    if (isset($new_text) && is_string($new_text)) {
      $text .= ' ' . $new_text;
    }
  }

  // Insert code to allow other modules to filter indexed text before indexing
  // Multipliers for scores of words inside certain HTML tags.
  $tags = fuzzysearch_get_index_tags();

  // Strip off all ignored tags to speed up processing
  $text = strip_tags($text, '<' . implode('><', array_keys($tags)) . '>');

  // Hook_fuzzysearch_filter lets modules filter text. This should be used for
  // more complex filtering. Stop words should not use this. Create a stopword
  // file instead. See fuzzysearch/stopwords/README.txt.
  foreach (module_implements('fuzzysearch_filter') as $name) {
    $function = $name . '_fuzzysearch_filter';
    $text = $function('index', $text);
  }

  //  Allow other modules to modify the score of the node based on each owns calculations
  //  the sum of all the scores added to each node is then multiplied by the score of the word,
  //  this allows for faster result queries because all scoring is done at the time of indexing
  $hook_scores = module_invoke_all('fuzzysearch_score', 'index', $node);

  //  Build the final score multiplier for each node based on returned multipliers from other nodes
  $node_score = 0;
  foreach ($hook_scores as $score) {
    $multiplier = variable_get('fuzzysearch_scoring_' . $score['id'], 5);
    $node_score += $score['score'] * $multiplier;
  }

  // Begin indexing content.
  // Remove stopwords.
  $text = fuzzysearch_stopwords($text);

  // Find all words not located within tags (score = 1)
  $content = preg_replace('/<([A-Z][A-Z0-9]*)[^>]*>(.*?)<\\/\\1>/i', '', $text);

  //  $content = fuzzysearch_cleanse($content);
  $words = array();
  $index_words = array();
  $words = preg_split('/\\s/', $content, -1, PREG_SPLIT_NO_EMPTY);

  // Build the index array with scores
  foreach ($words as $word) {
    $key = array_search($word, $index_words);
    if ($key === FALSE) {
      $index_words[] = $word;
      $index_scores[] = 1;
    }
    else {
      $index_scores[$key] += 1;
    }
  }

  // Find all words located within tags (score > 1)
  preg_match_all('/<([A-Z][A-Z0-9]*)([^>]*)>(.*?)<\\/\\1>/i', $text, $tagged);

  // filter through each set of content inbetween tags
  foreach ($tagged[3] as $key => $content) {

    //    $content = fuzzysearch_cleanse($content);
    $words = preg_split('/\\s/', $content, -1, PREG_SPLIT_NO_EMPTY);
    $tag = $tagged[1][$key];
    $tag_score = $tags[$tag];
    foreach ($words as $word) {
      $key = array_search($word, $index_words);
      if ($key === FALSE) {
        $index_words[] = $word;
        $index_scores[] = $tag_score;
      }
      else {
        $index_scores[$key] += $tag_score;
      }
    }
  }
  foreach ($index_words as $key => $word) {

    // Each word gets a word_id, which comes from the last value in the id column,
    // which is serial. First we check to make sure it's set. We have to do this
    // to avoid a postrgresql error.
    if (!$word_id) {
      db_query("INSERT INTO {fuzzysearch_index} (nid, word_id, ngram, completeness, score) VALUES (0, 0, 'xxx', 0, 0)");
      db_query("DELETE FROM {fuzzysearch_index} WHERE (nid = 0 AND word_id = 0 AND ngram = 'xxx' AND completeness = 0 AND score = 0)");
      $word_id = db_last_insert_id('fuzzysearch_index', 'id');
    }
    else {
      $word_id = db_last_insert_id('fuzzysearch_index', 'id');
    }
    fuzzysearch_index_insert($word, $word_id, $nid, $index_scores[$key], $node_score);
  }

  // Update the node table to make indexed = 1;
  db_query("DELETE FROM {fuzzysearch_index_queue} WHERE nid = %d", $nid);
}