You are here

class ApacheSolrUpdate in Apache Solr Search 5

The point of this class is to manage the update index needs of multiple search modules. Each one needs to track its own list of nodes that need updating.

Hierarchy

Expanded class hierarchy of ApacheSolrUpdate

File

./apachesolr.module, line 256
Integration with the Apache Solr search application.

View source
class ApacheSolrUpdate {
  public static $_namespaces = array();
  static function reset($namespace) {
    variable_del($namespace . '_last_change');
    variable_del($namespace . '_last_id');
  }
  static function get_change($namespace) {
    $var = variable_get($namespace . '_last_change', 0);
    return $var;
  }
  static function get_last($namespace) {
    $var = variable_get($namespace . '_last_id', 0);
    return $var;
  }

  /**
   * Function to generically handle the fetching of nodes that need indexing on a cron run.
   * It takes a namespace which needs to be unique to the calling module and manages
   * all of the global variables and the shutdown function so that every search
   * implementation can have its own without needing to duplicate the query.
   * Returns a db_query $result.
   * Modules need to then call apache_update_success after each node is successfully
   * indexed.
   */
  static function getNodesToIndex($namespace) {
    register_shutdown_function('apachesolr_shutdown');
    $cron_change = self::get_change($namespace);
    $cron_last = self::get_last($namespace);
    $cron_limit = variable_get('search_cron_limit', 100);
    $result = db_query_range('SELECT GREATEST(IF(c.last_comment_timestamp IS NULL, 0, c.last_comment_timestamp), n.changed) as last_change, n.nid ' . 'FROM {node} n LEFT JOIN {node_comment_statistics} c ON n.nid = c.nid ' . 'WHERE n.status = 1 ' . 'AND ((GREATEST(IF(c.last_comment_timestamp IS NULL , 0, c.last_comment_timestamp ), n.changed) = %d AND n.nid > %d) OR n.changed > %d OR c.last_comment_timestamp > %d) ' . 'ORDER BY last_change ASC, n.nid ASC', $cron_change, $cron_last, $cron_change, $cron_change, 0, $cron_limit);
    return $result;
  }
  static function success($namespace, $last_change, $last_id) {
    self::$_namespaces[$namespace] = array(
      'last_change' => $last_change,
      'last_id' => $last_id,
    );
  }
  static function update_index($namespace) {
    $solr = FALSE;
    try {

      // Get the $solr object
      $solr =& apachesolr_get_solr(variable_get('apachesolr_host', 'localhost'), variable_get('apachesolr_port', 8983), variable_get('apachesolr_path', '/solr'));

      // If there is no $solr object, there is no server available, so don't continue.
      if (!$solr
        ->ping()) {
        throw new Exception(t('No Solr instance available during indexing'));
      }
    } catch (Exception $e) {
      watchdog('Apache Solr', $e
        ->getMessage(), WATCHDOG_ERROR);
      return;
    }

    // Get CCK fields list
    $cck_fields = apachesolr_cck_fields();
    $result = self::getNodesToIndex($namespace);
    $count = 0;
    $documents = array();
    while ($row = db_fetch_object($result)) {

      // Variables to track the last item changed.
      $solr_last_change = $row->last_change;
      $solr_last_id = $row->nid;

      // Set reset = TRUE to avoid static caching of all nodes that get indexed.
      $node = node_load($row->nid, NULL, TRUE);
      if ($node->nid) {

        // Build the node body.
        $node = node_build_content($node, FALSE, FALSE);
        $node->body = drupal_render($node->content);
        $text = check_plain($node->title) . ' ' . $node->body;

        // Fetch extra data normally not visible
        $extra = node_invoke_nodeapi($node, 'update index');
        foreach ($extra as $t) {
          $text .= $t;
        }

        // Update solr index.
        try {
          $document = new Apache_Solr_Document();
          $site = url(NULL, NULL, NULL, TRUE);
          $hash = md5($site);
          $document->site = $site;
          $document->hash = $hash;
          $document->url = url('node/' . $node->nid, NULL, NULL, TRUE);
          $document->nid = $node->nid;
          $document->uid = $node->uid;
          $document->title = $node->title;
          $document->body = $node->body;
          $document->type = $node->type;
          $document->changed = $node->changed;
          $document->comment_count = $node->comment_count;
          $document->name = $node->name;
          $document->language = $node->language;

          // Path aliases can have important information about the content.
          // Add them to the index as well.
          if (function_exists('drupal_get_path_alias')) {

            // Add any path alias to the index, looking first for language specific
            // aliases but using language neutral aliases otherwise.
            $language = empty($node->language) ? '' : $node->language;
            $path = 'node/' . $node->nid;
            $output = drupal_get_path_alias($path, $language);
            if ($output && $output != $path) {
              $document->path = $output;
              $text .= $output;
            }
          }
          foreach ($cck_fields as $key => $cck_info) {
            if (isset($node->{$key})) {

              // Got a CCK field. See if it is to be indexed.
              $function = $cck_info['callback'];
              if ($cck_info['callback'] && function_exists($function)) {
                $field = call_user_func_array($function, array(
                  $node,
                  $key,
                ));
              }
              else {
                $field = $node->{$key};
              }
              $index_key = apachesolr_index_key($cck_info);
              foreach ($field as $value) {

                // Don't index NULLs or empty strings
                if (isset($value['view']) && strlen($value['view'])) {
                  if ($cck_info['multiple']) {
                    $document
                      ->setMultiValue($index_key, $value['view']);
                  }
                  else {
                    $document->{$index_key} = $value['view'];
                  }
                }
              }
            }
          }

          // This is the string value of the title. Used for sorting.
          $document->stitle = $node->title;
          if (is_array($node->taxonomy)) {
            foreach ($node->taxonomy as $term) {

              // Double indexing of tids lets us do effecient searches (on tid)
              // and do accurate per-vocabulary faceting.
              // By including the ancestors to a term in the index we make
              // sure that searches for general categories match specific
              // categories, e.g. Fruit -> apple, a search for fruit will find
              // content categorized with apple.
              $ancestors = taxonomy_get_parents_all($term->tid);
              foreach ($ancestors as $ancestor) {
                $document
                  ->setMultiValue('tid', $ancestor->tid);
                $document
                  ->setMultiValue('imfield_vid' . $ancestor->vid, $ancestor->tid);
                $document
                  ->setMultiValue('vid', $ancestor->vid);
                $document
                  ->setMultiValue('taxonomy_name', $ancestor->name);
                $text .= ' ' . $ancestor->name;
              }
            }
          }
          $document->text = $text;

          // Let modules add to the document
          foreach (module_implements('apachesolr_update_index') as $module) {
            $function = $module . '_apachesolr_update_index';
            $function($document, $node);
          }
          $documents[] = $document;
        } catch (Exception $e) {
          watchdog('Apache Solr', $e
            ->getMessage(), WATCHDOG_ERROR);
        }
      }
      self::success('apachesolr', $solr_last_change, $solr_last_id);
    }
    if (is_object($solr) && count($documents) > 0) {
      try {
        watchdog('Apache Solr', t('Adding @count documents.', array(
          '@count' => count($documents),
        )));

        // Chunk the adds by 20s
        $docs_chunk = array_chunk($documents, 20);
        foreach ($docs_chunk as $docs) {
          $solr
            ->addDocuments($docs);
        }
        $solr
          ->commit();
        $solr
          ->optimize(FALSE, FALSE);
      } catch (Exception $e) {
        watchdog('Apache Solr', $e
          ->getMessage(), WATCHDOG_ERROR);
      }
    }
  }

}

Members

Namesort descending Modifiers Type Description Overrides
ApacheSolrUpdate::$_namespaces public static property
ApacheSolrUpdate::getNodesToIndex static function Function to generically handle the fetching of nodes that need indexing on a cron run. It takes a namespace which needs to be unique to the calling module and manages all of the global variables and the shutdown function so that every…
ApacheSolrUpdate::get_change static function
ApacheSolrUpdate::get_last static function
ApacheSolrUpdate::reset static function
ApacheSolrUpdate::success static function
ApacheSolrUpdate::update_index static function