View source
<?php
define('SOLR_ATTACHMENT_NS', 'apachesolr_attachment');
define('SOLR_ATTACHMENT_WD', 'Solr Attachments');
function apachesolr_attachments_menu($may_cache) {
$items = array();
if ($may_cache) {
$items[] = array(
'path' => 'admin/settings/apachesolr/attachments',
'title' => t('Apache Solr Attachments Settings'),
'description' => t('Administer Apache Solr Attachments'),
'callback' => 'drupal_get_form',
'callback arguments' => 'apachesolr_attachments_settings',
'access' => user_access('administer site configuration'),
);
}
return $items;
}
function apachesolr_attachments_settings() {
$instruction_text = 'For each type of attachment, enter the path to the helper application installed on your server. "%file%" is a placeholder for the path of the attachment file and is required. If you don\'t want to search a type of attachment, leave the path setting blank (i.e., remove the content from the appropriate field below).';
$form['instructions'] = array(
'#type' => 'markup',
'#value' => t($instruction_text),
);
$form['apachesolr_attachment_pdf_path'] = array(
'#type' => 'textfield',
'#title' => t('PDF Helper'),
'#size' => 50,
'#maxlength' => 100,
'#description' => t("The full path to the helper for application/pdf files, plus any other parameters needed by the helper."),
'#default_value' => variable_get('apachesolr_attachment_pdf_path', ''),
);
$form['apachesolr_attachment_txt_path'] = array(
'#type' => 'textfield',
'#title' => t('Text Helper'),
'#size' => 50,
'#maxlength' => 100,
'#description' => t("The full path to the helper for text/plain files, plus any other parameters needed by the helper."),
'#default_value' => variable_get('apachesolr_attachment_txt_path', ''),
);
$form['apachesolr_attachment_doc_path'] = array(
'#type' => 'textfield',
'#title' => t('Word Doc Helper'),
'#size' => 50,
'#maxlength' => 100,
'#description' => t("The full path to the helper for application/msword files, plus any other parameters needed by the helper."),
'#default_value' => variable_get('apachesolr_attachment_doc_path', ''),
);
return system_settings_form($form);
}
function apachesolr_attachments_search($op = 'search', $keys = NULL) {
switch ($op) {
case 'name':
return '';
case 'reset':
ApacheSolrUpdate::reset(SOLR_ATTACHMENT_NS);
return;
case 'status':
return;
case 'search':
return apachesolr_search_search($op, $keys);
}
}
function apachesolr_attachments_update_index() {
$result = ApacheSolrUpdate::getNodesToIndex(SOLR_ATTACHMENT_NS);
while ($row = db_fetch_object($result)) {
$solr_last_change = $row->last_change;
$solr_last_id = $row->nid;
$node = node_load($row->nid);
if ($node->nid) {
_asa_remove_attachments_from_index($node->nid);
$files = _asa_get_indexable_files($node);
if (!empty($files)) {
try {
foreach ($files as $file) {
$file = (object) $file;
$text = _asa_get_attachment_text($file);
$text = trim($text);
if (!empty($text)) {
$document = new Apache_Solr_Document();
$site = url(NULL, NULL, NULL, TRUE);
$hash = md5($site);
$document->site = $site;
$document->hash = $hash;
$document->url = file_create_url($file->filepath);
$document->id = $file->fid;
$document->nid = $node->nid;
$document->title = $file->filename;
$document->changed = $node->changed;
$document->uid = $node->uid;
$document->body = $text;
$document->text = "{$file->description} {$file->filename} {$text}";
$document->type = $node->type;
$document->bsfield_isfile = TRUE;
_as_configure_taxonomy($document, $node);
foreach (module_implements('apachesolr_attachments_update_index') as $module) {
$function = $module . '_apachesolr_attachments_update_index';
$function($document, $node, $file);
}
$documents[] = $document;
}
}
} catch (Exception $e) {
watchdog(SOLR_ATTACHMENT_WD, $e
->getMessage(), WATCHDOG_ERROR);
}
}
ApacheSolrUpdate::success(SOLR_ATTACHMENT_NS, $solr_last_change, $solr_last_id);
}
}
_as_index_documents($documents);
}
function apachesolr_attachments_nodeapi($node, $op) {
switch ($op) {
case 'delete':
_asa_remove_attachments_from_index($node->nid);
break;
}
}
function apachesolr_attachments_apachesolr_process_results($results) {
if (is_array($results)) {
foreach ($results as &$item) {
if (isset($item['node']->bsfield_isfile) && $item['node']->bsfield_isfile === TRUE) {
$nid = $item['node']->nid;
$node_title = db_result(db_query("SELECT title FROM {node} WHERE nid = %d", $nid));
$item['snippet'] = l($node_title, "node/{$nid}") . ': ' . $item['snippet'];
}
}
}
}
function _asa_get_indexable_files($node) {
$files = array();
if (!empty($node->files)) {
$files = array_merge($files, $node->files);
}
$fields = _asa_get_cck_file_fields();
foreach ($fields as $field) {
if (!empty($node->{$field})) {
$files = array_merge($files, $node->{$field});
}
}
return $files;
}
function _asa_get_cck_file_fields() {
$file_fields = array();
if (module_exists('filefield')) {
$fields = content_fields();
foreach ($fields as $key => $values) {
if ($values['type'] == 'file') {
$file_fields[] = $key;
}
}
}
return $file_fields;
}
function _asa_get_attachment_text($file) {
$helper_command = _asa_get_file_helper_command($file->filemime);
if ($helper_command == '') {
return '';
}
$helper_command = preg_replace('/%file%/', "{$file->filepath}", $helper_command);
$helper_command = escapeshellcmd($helper_command);
$text = shell_exec($helper_command);
$cleaned_text = iconv("utf-8", "utf-8//IGNORE", $text);
$cleaned_text = preg_replace('/\\x0C/', '', $cleaned_text);
return $cleaned_text;
}
function _asa_remove_attachments_from_index($nid) {
try {
$solr = _get_solr_instance();
$solr
->deleteByQuery("nid:{$nid} AND bsfield_isfile:true");
$solr
->commit();
} catch (Exception $e) {
watchdog(SOLR_ATTACHMENT_WD, $e
->getMessage(), WATCHDOG_ERROR);
}
}
function _asa_get_file_url($fid) {
if (!empty($fid) && is_numeric($fid)) {
$result = db_query('SELECT * FROM {files} WHERE fid = %d', $fid);
$file = db_fetch_array($result);
return $file['filepath'];
}
}
function _asa_get_file_helper_command($type) {
switch ($type) {
case 'application/pdf':
$cmd = variable_get('apachesolr_attachment_pdf_path', '');
break;
case 'text/plain':
$cmd = variable_get('apachesolr_attachment_txt_path', '');
break;
case 'application/msword':
$cmd = variable_get('apachesolr_attachment_doc_path', '');
break;
default:
$cmd = '';
}
return $cmd;
}
function _asa_get_solr_instance() {
try {
return _get_solr_instance();
} catch (Exception $e) {
watchdog(SOLR_ATTACHMENT_WD, $e
->getMessage(), WATCHDOG_ERROR);
}
return FALSE;
}
function _get_solr_instance() {
$host = variable_get('apachesolr_host', 'localhost');
$port = variable_get('apachesolr_port', 8983);
$path = variable_get('apachesolr_path', '/solr');
$solr =& apachesolr_get_solr($host, $port, $path);
if (!$solr
->ping()) {
throw new Exception(t('No Solr instance available'));
}
return $solr;
}
function _as_configure_taxonomy($document, $node) {
if (is_array($node->taxonomy)) {
foreach ($node->taxonomy as $term) {
$document
->setMultiValue('tid', $term->tid);
$document
->setMultiValue('imfield_vid' . $term->vid, $term->tid);
$document
->setMultiValue('vid', $term->vid);
$document
->setMultiValue('taxonomy_name', $term->name);
}
}
}
function _as_index_documents($documents) {
$solr = _asa_get_solr_instance();
if (is_object($solr) && count($documents) > 0) {
watchdog(SOLR_ATTACHMENT_WD, t("Adding @count documents to Solr", array(
'@count' => count($documents),
)));
try {
$docs_chunk = array_chunk($documents, 50);
foreach ($docs_chunk as $docs) {
$solr
->addDocuments($docs);
}
$solr
->commit();
$solr
->optimize(FALSE, FALSE);
} catch (Exception $e) {
watchdog(SOLR_ATTACHMENT_WD, $e
->getMessage(), WATCHDOG_ERROR);
}
}
}