You are here

static function ApacheSolrUpdate::update_index in Apache Solr Search 5

1 call to ApacheSolrUpdate::update_index()
apachesolr_update_index in ./apachesolr_search.module
Implementation of hook_update_index().

File

./apachesolr.module, line 302
Integration with the Apache Solr search application.

Class

ApacheSolrUpdate
The point of this class is to manage the update index needs of multiple search modules. Each one needs to track its own list of nodes that need updating.

Code

static function update_index($namespace) {
  $solr = FALSE;
  try {

    // Get the $solr object
    $solr =& apachesolr_get_solr(variable_get('apachesolr_host', 'localhost'), variable_get('apachesolr_port', 8983), variable_get('apachesolr_path', '/solr'));

    // If there is no $solr object, there is no server available, so don't continue.
    if (!$solr
      ->ping()) {
      throw new Exception(t('No Solr instance available during indexing'));
    }
  } catch (Exception $e) {
    watchdog('Apache Solr', $e
      ->getMessage(), WATCHDOG_ERROR);
    return;
  }

  // Get CCK fields list
  $cck_fields = apachesolr_cck_fields();
  $result = self::getNodesToIndex($namespace);
  $count = 0;
  $documents = array();
  while ($row = db_fetch_object($result)) {

    // Variables to track the last item changed.
    $solr_last_change = $row->last_change;
    $solr_last_id = $row->nid;

    // Set reset = TRUE to avoid static caching of all nodes that get indexed.
    $node = node_load($row->nid, NULL, TRUE);
    if ($node->nid) {

      // Build the node body.
      $node = node_build_content($node, FALSE, FALSE);
      $node->body = drupal_render($node->content);
      $text = check_plain($node->title) . ' ' . $node->body;

      // Fetch extra data normally not visible
      $extra = node_invoke_nodeapi($node, 'update index');
      foreach ($extra as $t) {
        $text .= $t;
      }

      // Update solr index.
      try {
        $document = new Apache_Solr_Document();
        $site = url(NULL, NULL, NULL, TRUE);
        $hash = md5($site);
        $document->site = $site;
        $document->hash = $hash;
        $document->url = url('node/' . $node->nid, NULL, NULL, TRUE);
        $document->nid = $node->nid;
        $document->uid = $node->uid;
        $document->title = $node->title;
        $document->body = $node->body;
        $document->type = $node->type;
        $document->changed = $node->changed;
        $document->comment_count = $node->comment_count;
        $document->name = $node->name;
        $document->language = $node->language;

        // Path aliases can have important information about the content.
        // Add them to the index as well.
        if (function_exists('drupal_get_path_alias')) {

          // Add any path alias to the index, looking first for language specific
          // aliases but using language neutral aliases otherwise.
          $language = empty($node->language) ? '' : $node->language;
          $path = 'node/' . $node->nid;
          $output = drupal_get_path_alias($path, $language);
          if ($output && $output != $path) {
            $document->path = $output;
            $text .= $output;
          }
        }
        foreach ($cck_fields as $key => $cck_info) {
          if (isset($node->{$key})) {

            // Got a CCK field. See if it is to be indexed.
            $function = $cck_info['callback'];
            if ($cck_info['callback'] && function_exists($function)) {
              $field = call_user_func_array($function, array(
                $node,
                $key,
              ));
            }
            else {
              $field = $node->{$key};
            }
            $index_key = apachesolr_index_key($cck_info);
            foreach ($field as $value) {

              // Don't index NULLs or empty strings
              if (isset($value['view']) && strlen($value['view'])) {
                if ($cck_info['multiple']) {
                  $document
                    ->setMultiValue($index_key, $value['view']);
                }
                else {
                  $document->{$index_key} = $value['view'];
                }
              }
            }
          }
        }

        // This is the string value of the title. Used for sorting.
        $document->stitle = $node->title;
        if (is_array($node->taxonomy)) {
          foreach ($node->taxonomy as $term) {

            // Double indexing of tids lets us do effecient searches (on tid)
            // and do accurate per-vocabulary faceting.
            // By including the ancestors to a term in the index we make
            // sure that searches for general categories match specific
            // categories, e.g. Fruit -> apple, a search for fruit will find
            // content categorized with apple.
            $ancestors = taxonomy_get_parents_all($term->tid);
            foreach ($ancestors as $ancestor) {
              $document
                ->setMultiValue('tid', $ancestor->tid);
              $document
                ->setMultiValue('imfield_vid' . $ancestor->vid, $ancestor->tid);
              $document
                ->setMultiValue('vid', $ancestor->vid);
              $document
                ->setMultiValue('taxonomy_name', $ancestor->name);
              $text .= ' ' . $ancestor->name;
            }
          }
        }
        $document->text = $text;

        // Let modules add to the document
        foreach (module_implements('apachesolr_update_index') as $module) {
          $function = $module . '_apachesolr_update_index';
          $function($document, $node);
        }
        $documents[] = $document;
      } catch (Exception $e) {
        watchdog('Apache Solr', $e
          ->getMessage(), WATCHDOG_ERROR);
      }
    }
    self::success('apachesolr', $solr_last_change, $solr_last_id);
  }
  if (is_object($solr) && count($documents) > 0) {
    try {
      watchdog('Apache Solr', t('Adding @count documents.', array(
        '@count' => count($documents),
      )));

      // Chunk the adds by 20s
      $docs_chunk = array_chunk($documents, 20);
      foreach ($docs_chunk as $docs) {
        $solr
          ->addDocuments($docs);
      }
      $solr
        ->commit();
      $solr
        ->optimize(FALSE, FALSE);
    } catch (Exception $e) {
      watchdog('Apache Solr', $e
        ->getMessage(), WATCHDOG_ERROR);
    }
  }
}