class ApacheSolrUpdate in Apache Solr Search 5
The point of this class is to manage the update index needs of multiple search modules. Each one needs to track its own list of nodes that need updating.
Hierarchy
- class \ApacheSolrUpdate
Expanded class hierarchy of ApacheSolrUpdate
File
- ./
apachesolr.module, line 256 - Integration with the Apache Solr search application.
View source
class ApacheSolrUpdate {
public static $_namespaces = array();
static function reset($namespace) {
variable_del($namespace . '_last_change');
variable_del($namespace . '_last_id');
}
static function get_change($namespace) {
$var = variable_get($namespace . '_last_change', 0);
return $var;
}
static function get_last($namespace) {
$var = variable_get($namespace . '_last_id', 0);
return $var;
}
/**
* Function to generically handle the fetching of nodes that need indexing on a cron run.
* It takes a namespace which needs to be unique to the calling module and manages
* all of the global variables and the shutdown function so that every search
* implementation can have its own without needing to duplicate the query.
* Returns a db_query $result.
* Modules need to then call apache_update_success after each node is successfully
* indexed.
*/
static function getNodesToIndex($namespace) {
register_shutdown_function('apachesolr_shutdown');
$cron_change = self::get_change($namespace);
$cron_last = self::get_last($namespace);
$cron_limit = variable_get('search_cron_limit', 100);
$result = db_query_range('SELECT GREATEST(IF(c.last_comment_timestamp IS NULL, 0, c.last_comment_timestamp), n.changed) as last_change, n.nid ' . 'FROM {node} n LEFT JOIN {node_comment_statistics} c ON n.nid = c.nid ' . 'WHERE n.status = 1 ' . 'AND ((GREATEST(IF(c.last_comment_timestamp IS NULL , 0, c.last_comment_timestamp ), n.changed) = %d AND n.nid > %d) OR n.changed > %d OR c.last_comment_timestamp > %d) ' . 'ORDER BY last_change ASC, n.nid ASC', $cron_change, $cron_last, $cron_change, $cron_change, 0, $cron_limit);
return $result;
}
static function success($namespace, $last_change, $last_id) {
self::$_namespaces[$namespace] = array(
'last_change' => $last_change,
'last_id' => $last_id,
);
}
static function update_index($namespace) {
$solr = FALSE;
try {
// Get the $solr object
$solr =& apachesolr_get_solr(variable_get('apachesolr_host', 'localhost'), variable_get('apachesolr_port', 8983), variable_get('apachesolr_path', '/solr'));
// If there is no $solr object, there is no server available, so don't continue.
if (!$solr
->ping()) {
throw new Exception(t('No Solr instance available during indexing'));
}
} catch (Exception $e) {
watchdog('Apache Solr', $e
->getMessage(), WATCHDOG_ERROR);
return;
}
// Get CCK fields list
$cck_fields = apachesolr_cck_fields();
$result = self::getNodesToIndex($namespace);
$count = 0;
$documents = array();
while ($row = db_fetch_object($result)) {
// Variables to track the last item changed.
$solr_last_change = $row->last_change;
$solr_last_id = $row->nid;
// Set reset = TRUE to avoid static caching of all nodes that get indexed.
$node = node_load($row->nid, NULL, TRUE);
if ($node->nid) {
// Build the node body.
$node = node_build_content($node, FALSE, FALSE);
$node->body = drupal_render($node->content);
$text = check_plain($node->title) . ' ' . $node->body;
// Fetch extra data normally not visible
$extra = node_invoke_nodeapi($node, 'update index');
foreach ($extra as $t) {
$text .= $t;
}
// Update solr index.
try {
$document = new Apache_Solr_Document();
$site = url(NULL, NULL, NULL, TRUE);
$hash = md5($site);
$document->site = $site;
$document->hash = $hash;
$document->url = url('node/' . $node->nid, NULL, NULL, TRUE);
$document->nid = $node->nid;
$document->uid = $node->uid;
$document->title = $node->title;
$document->body = $node->body;
$document->type = $node->type;
$document->changed = $node->changed;
$document->comment_count = $node->comment_count;
$document->name = $node->name;
$document->language = $node->language;
// Path aliases can have important information about the content.
// Add them to the index as well.
if (function_exists('drupal_get_path_alias')) {
// Add any path alias to the index, looking first for language specific
// aliases but using language neutral aliases otherwise.
$language = empty($node->language) ? '' : $node->language;
$path = 'node/' . $node->nid;
$output = drupal_get_path_alias($path, $language);
if ($output && $output != $path) {
$document->path = $output;
$text .= $output;
}
}
foreach ($cck_fields as $key => $cck_info) {
if (isset($node->{$key})) {
// Got a CCK field. See if it is to be indexed.
$function = $cck_info['callback'];
if ($cck_info['callback'] && function_exists($function)) {
$field = call_user_func_array($function, array(
$node,
$key,
));
}
else {
$field = $node->{$key};
}
$index_key = apachesolr_index_key($cck_info);
foreach ($field as $value) {
// Don't index NULLs or empty strings
if (isset($value['view']) && strlen($value['view'])) {
if ($cck_info['multiple']) {
$document
->setMultiValue($index_key, $value['view']);
}
else {
$document->{$index_key} = $value['view'];
}
}
}
}
}
// This is the string value of the title. Used for sorting.
$document->stitle = $node->title;
if (is_array($node->taxonomy)) {
foreach ($node->taxonomy as $term) {
// Double indexing of tids lets us do effecient searches (on tid)
// and do accurate per-vocabulary faceting.
// By including the ancestors to a term in the index we make
// sure that searches for general categories match specific
// categories, e.g. Fruit -> apple, a search for fruit will find
// content categorized with apple.
$ancestors = taxonomy_get_parents_all($term->tid);
foreach ($ancestors as $ancestor) {
$document
->setMultiValue('tid', $ancestor->tid);
$document
->setMultiValue('imfield_vid' . $ancestor->vid, $ancestor->tid);
$document
->setMultiValue('vid', $ancestor->vid);
$document
->setMultiValue('taxonomy_name', $ancestor->name);
$text .= ' ' . $ancestor->name;
}
}
}
$document->text = $text;
// Let modules add to the document
foreach (module_implements('apachesolr_update_index') as $module) {
$function = $module . '_apachesolr_update_index';
$function($document, $node);
}
$documents[] = $document;
} catch (Exception $e) {
watchdog('Apache Solr', $e
->getMessage(), WATCHDOG_ERROR);
}
}
self::success('apachesolr', $solr_last_change, $solr_last_id);
}
if (is_object($solr) && count($documents) > 0) {
try {
watchdog('Apache Solr', t('Adding @count documents.', array(
'@count' => count($documents),
)));
// Chunk the adds by 20s
$docs_chunk = array_chunk($documents, 20);
foreach ($docs_chunk as $docs) {
$solr
->addDocuments($docs);
}
$solr
->commit();
$solr
->optimize(FALSE, FALSE);
} catch (Exception $e) {
watchdog('Apache Solr', $e
->getMessage(), WATCHDOG_ERROR);
}
}
}
}
Members
Name | Modifiers | Type | Description | Overrides |
---|---|---|---|---|
ApacheSolrUpdate:: |
public static | property | ||
ApacheSolrUpdate:: |
static | function | Function to generically handle the fetching of nodes that need indexing on a cron run. It takes a namespace which needs to be unique to the calling module and manages all of the global variables and the shutdown function so that every… | |
ApacheSolrUpdate:: |
static | function | ||
ApacheSolrUpdate:: |
static | function | ||
ApacheSolrUpdate:: |
static | function | ||
ApacheSolrUpdate:: |
static | function | ||
ApacheSolrUpdate:: |
static | function |