View source
<?php
define('EXTRACTING_SERVLET', 'extract/tika');
function apachesolr_attachments_menu() {
$items = array();
$items['admin/config/search/apachesolr/attachments'] = array(
'title' => 'Attachments',
'description' => 'Administer Apache Solr Attachments.',
'page callback' => 'apachesolr_attachments_admin_page',
'access arguments' => array(
'administer search',
),
'file' => 'apachesolr_attachments.admin.inc',
'type' => MENU_LOCAL_TASK,
);
$items['admin/config/search/apachesolr/attachments/test'] = array(
'title' => 'Test tika extraction',
'page callback' => 'apachesolr_attachments_test_tika_extraction',
'access arguments' => array(
'administer search',
),
'file' => 'apachesolr_attachments.admin.inc',
'type' => MENU_CALLBACK,
);
$items['admin/config/search/apachesolr/attachments/confirm/delete'] = array(
'title' => 'Delete and reindex all files',
'page callback' => 'drupal_get_form',
'page arguments' => array(
'apachesolr_attachments_confirm',
6,
),
'access arguments' => array(
'administer search',
),
'file' => 'apachesolr_attachments.admin.inc',
'type' => MENU_CALLBACK,
);
$items['admin/config/search/apachesolr/attachments/confirm/clear-cache'] = array(
'title' => 'Delete the local cache of file text',
'page callback' => 'drupal_get_form',
'page arguments' => array(
'apachesolr_attachments_confirm',
6,
),
'access arguments' => array(
'administer search',
),
'file' => 'apachesolr_attachments.admin.inc',
'type' => MENU_CALLBACK,
);
$items['admin/config/search/apachesolr/attachments/general'] = array(
'title' => 'General',
'type' => MENU_DEFAULT_LOCAL_TASK,
);
$items['admin/config/search/apachesolr/attachments/entity_bundle'] = array(
'title' => 'Bundle',
'description' => 'Administer Apache Solr Attachments per bundle settings.',
'page callback' => 'drupal_get_form',
'page arguments' => array(
'apachesolr_attachments_entity_bundle_settings',
),
'access arguments' => array(
'administer search',
),
'file' => 'apachesolr_attachments.admin.inc',
'type' => MENU_LOCAL_TASK,
'weight' => 1,
);
return $items;
}
function apachesolr_attachments_apachesolr_entity_info_alter(&$entity_info) {
$entity_info['file']['indexable'] = TRUE;
$entity_info['file']['status callback'][] = 'apachesolr_attachments_status_callback';
$entity_info['file']['document callback'][] = 'apachesolr_attachments_solr_document';
$entity_info['file']['reindex callback'] = 'apachesolr_attachments_solr_reindex';
$entity_info['file']['index_table'] = 'apachesolr_index_entities_file';
$entity_info['file']['result callback'] = 'apachesolr_attachments_file_result';
$entity_info['node']['document callback'][] = 'apachesolr_attachments_node_solr_document';
}
function apachesolr_attachments_solr_document(ApacheSolrDocument $document, $file, $entity_type, $env_id) {
module_load_include('inc', 'apachesolr_attachments', 'apachesolr_attachments.index');
$documents = array();
$table = apachesolr_get_indexer_table('file');
$text = apachesolr_attachments_get_attachment_text($file);
if (empty($text)) {
return $documents;
}
$parents = db_select($table, 'aie')
->fields('aie')
->condition('entity_type', 'file')
->condition('entity_id', $file->fid)
->execute();
foreach ($parents as $parent) {
$parent_entities = entity_load($parent->parent_entity_type, array(
$parent->parent_entity_id,
), NULL, TRUE);
$parent_entity = reset($parent_entities);
if (empty($parent_entity)) {
continue;
}
list($parent_entity_id, $parent_entity_vid, $parent_entity_bundle) = entity_extract_ids($parent->parent_entity_type, $parent_entity);
$parent_entity_type = $parent->parent_entity_type;
if (variable_get('apachesolr_attachments_entity_bundle_indexing_' . $parent_entity_bundle, 'seperate') == 'seperate') {
$filedocument = clone $document;
$callbacks = apachesolr_entity_get_callback($parent_entity_type, 'document callback');
$build_documents = array();
if (is_array($callbacks)) {
foreach ($callbacks as $callback) {
if (is_callable($callback)) {
$build_documents = array_merge($build_documents, $callback($filedocument, $parent_entity, $parent_entity_type, $env_id));
}
}
}
$filedocument = reset($build_documents);
if ($parent_entity_type == 'node' && function_exists('apachesolr_access_apachesolr_index_document_build_node')) {
apachesolr_access_apachesolr_index_document_build_node($filedocument, $parent_entity, $env_id);
}
$filedocument->id = apachesolr_document_id($file->fid . '-' . $parent_entity_id, $entity_type);
$filedocument->url = file_create_url($file->uri);
$path = file_stream_wrapper_get_instance_by_uri($file->uri)
->getExternalUrl();
if (!empty($path)) {
$filedocument->path = $path;
}
$filedocument->label = apachesolr_clean_text($file->filename);
$filedocument->content = apachesolr_clean_text($file->filename) . ' ' . $text;
$filedocument->ds_created = apachesolr_date_iso($file->timestamp);
$filedocument->ds_changed = $filedocument->ds_created;
$filedocument->created = apachesolr_date_iso($file->timestamp);
$filedocument->changed = $filedocument->created;
$parent_entity_info = entity_get_info($parent_entity_type);
$small_parent_entity = new stdClass();
$small_parent_entity->entity_type = $parent_entity_type;
$small_parent_entity->{$parent_entity_info['entity keys']['id']} = $parent_entity_id;
$small_parent_entity->{$parent_entity_info['entity keys']['bundle']} = $parent_entity_bundle;
if (isset($parent_entity_info['entity keys']['label'])) {
$small_parent_entity->{$parent_entity_info['entity keys']['label']} = $parent_entity->{$parent_entity_info['entity keys']['label']};
}
$filedocument->zm_parent_entity = drupal_json_encode($small_parent_entity);
$filedocument->sm_parent_entity_bundle = $parent_entity_type . "-" . $parent_entity_bundle;
$filedocument->sm_parent_entity_type = $parent_entity_type;
$filedocument->ss_filemime = $file->filemime;
$filedocument->ss_filesize = $file->filesize;
$documents[] = $filedocument;
}
}
return $documents;
}
function apachesolr_attachments_node_solr_document(ApacheSolrDocument &$document, $parent_entity, $env_id) {
module_load_include('inc', 'apachesolr_attachments', 'apachesolr_attachments.index');
list($parent_entity_id, $parent_entity_vid, $parent_entity_bundle) = entity_extract_ids('node', $parent_entity);
if (variable_get('apachesolr_attachments_entity_bundle_indexing_' . $parent_entity_bundle, 'seperate') == 'parent') {
$file_field_names = array();
$fields = field_info_field_by_ids();
if (is_array($fields)) {
foreach ($fields as $field_id => $field_info) {
if ($field_info['type'] == 'file') {
foreach ($field_info['bundles'] as $entity_type => $bundles) {
if (in_array($parent_entity_bundle, $bundles)) {
$file_field_names[$field_info['field_name']] = $field_info['field_name'];
}
}
}
}
}
foreach ($file_field_names as $file_field) {
if (isset($parent_entity->{$file_field})) {
$parent_entity_file_fields = $parent_entity->{$file_field};
foreach ($parent_entity_file_fields as $language => $files) {
foreach ($files as $file) {
$file = (object) $file;
$status = $file->status == 1 ? 1 : 0;
$status = $status & apachesolr_attachments_is_file($file);
$status = $status & apachesolr_attachments_allowed_mime($file->filemime);
if ($status) {
$text = apachesolr_attachments_get_attachment_text($file);
if ($text) {
$document->content .= apachesolr_clean_text($file->filename) . ' ' . $text;
}
}
}
}
}
}
}
return array();
}
function apachesolr_attachments_solr_reindex() {
module_load_include('inc', 'apachesolr_attachments', 'apachesolr_attachments.index');
$indexer_table = apachesolr_get_indexer_table('file');
$transaction = db_transaction();
$env_id = apachesolr_default_environment();
try {
$files = _apachesolr_attachments_get_all_files();
if (empty($files)) {
return TRUE;
}
foreach ($files as $parent_entity_type => $parent_entities) {
foreach ($parent_entities as $parent_entity_info) {
$file = new stdClass();
foreach ($parent_entity_info->extraFields as $key => $value) {
if (strpos($key, '_fid')) {
$file->fid = $parent_entity_info->extraFields->{$key};
}
}
list($parent_entity_id) = entity_extract_ids($parent_entity_type, $parent_entity_info);
apachesolr_attachments_add_file_usage($file, $parent_entity_type, $parent_entity_id);
}
}
} catch (Exception $e) {
$transaction
->rollback();
drupal_set_message($e
->getMessage(), 'error');
watchdog_exception('Apache Solr Attachments', $e);
return FALSE;
}
return TRUE;
}
function _apachesolr_attachments_get_all_files() {
$results = array();
$fields = field_info_field_by_ids();
foreach ($fields as $field_id => $field_info) {
if ($field_info['type'] == 'file') {
foreach ($field_info['bundles'] as $entity_type => $bundles) {
$entity_info = entity_get_info($entity_type);
if (empty($entity_info['apachesolr']['indexable'])) {
continue;
}
$query = new ApachesolrAttachmentsEntityFieldQuery();
$results_query = $query
->entityCondition('entity_type', $entity_type)
->fieldCondition($field_info['field_name'])
->addExtraField($field_info['field_name'], 'fid', 'fid')
->execute();
$results = array_merge_recursive($results, $results_query);
}
}
}
return $results;
}
function apachesolr_attachments_status_callback($entity_id, $entity_type) {
module_load_include('inc', 'apachesolr_attachments', 'apachesolr_attachments.index');
$entities = entity_load($entity_type, array(
$entity_id,
), NULL, TRUE);
$entity = reset($entities);
if (apachesolr_attachments_allowed_mime($entity->filemime) == FALSE) {
return FALSE;
}
if (apachesolr_attachments_is_file($entity) == FALSE) {
return FALSE;
}
if ($entity->status != 1) {
return FALSE;
}
return TRUE;
}
function apachesolr_attachments_apachesolr_file_exclude($entity_id, $row, $env_id) {
module_load_include('inc', 'apachesolr_attachments', 'apachesolr_attachments.index');
if (!$entity_id || !$row->parent_entity_id) {
return TRUE;
}
$parent_entity_id = $row->parent_entity_id;
$parent_entity_type = $row->parent_entity_type;
$exclude = apachesolr_attachments_is_parent_excluded($entity_id, 'file', $parent_entity_id, $parent_entity_type, $env_id);
if ($exclude) {
return TRUE;
}
$filesize_limit = variable_get('apachesolr_attachments_filesize_limit', '41943040');
$entities = entity_load('file', array(
$entity_id,
), NULL, TRUE);
$entity = reset($entities);
if (isset($entity->filesize) && $filesize_limit > 0 && $entity->filesize > $filesize_limit) {
watchdog('Apache Solr Attachments', 'Excluding file @filename with size @filesize bytes, which exceeds apachesolr_attachments_filesize_limit of @sizelimit bytes.', array(
'@filesize' => $entity->filesize,
'@filename' => $entity->filename,
'@sizelimit' => $filesize_limit,
));
return TRUE;
}
return FALSE;
}
function apachesolr_attachments_is_file($entity) {
if (!empty($entity->uri)) {
$filepath = drupal_realpath($entity->uri);
if (!$filepath) {
return FALSE;
}
elseif (!is_file($filepath)) {
watchdog('Apache Solr Attachments', '%filepath is not a valid file path', array(
'%filepath' => $entity->uri,
), WATCHDOG_WARNING);
return FALSE;
}
else {
return TRUE;
}
}
return FALSE;
}
function apachesolr_attachments_is_parent_excluded($entity_id, $entity_type, $parent_entity_id, $parent_entity_type, $env_id) {
$query = new EntityFieldQuery();
$result = $query
->entityCondition('entity_type', $parent_entity_type)
->entityCondition('entity_id', $parent_entity_id)
->execute();
if (empty($result)) {
return TRUE;
}
$values = array_values($result[$parent_entity_type]);
$stub_entity = reset($values);
$parent_entity_bundle = $stub_entity->type;
$bundles = apachesolr_get_index_bundles($env_id, $parent_entity_type);
if (empty($bundles)) {
return TRUE;
}
else {
if (!in_array($parent_entity_bundle, $bundles)) {
return TRUE;
}
}
$status_callbacks = apachesolr_entity_get_callback($parent_entity_type, 'status callback');
if (!empty($status_callbacks)) {
$status = TRUE;
foreach ($status_callbacks as $status_callback) {
if (is_callable($status_callback)) {
$status = $status && $status_callback($parent_entity_id, $parent_entity_type);
}
}
return !$status;
}
return TRUE;
}
function apachesolr_attachments_remove_attachments_from_index($parent_entity_type, $parent_entity) {
static $failed = FALSE;
if ($failed) {
return FALSE;
}
try {
list($parent_entity_id, $parent_entity_vid, $parent_entity_bundle) = entity_extract_ids($parent_entity_type, $parent_entity);
$solr = apachesolr_get_solr();
$solr
->deleteByQuery("sm_parent_entity:{$parent_entity_type}-{$parent_entity_bundle}-{$parent_entity_id} AND entity_type:file AND hash:" . apachesolr_site_hash());
return TRUE;
} catch (Exception $e) {
watchdog('Apache Solr Attachments', nl2br(check_plain($e
->getMessage())), NULL, WATCHDOG_ERROR);
$failed = TRUE;
return FALSE;
}
}
function apachesolr_attachments_apachesolr_query_alter(DrupalSolrQueryInterface $query) {
if ($query
->getName() == 'apachesolr') {
$query
->addParam('fl', array(
'zm_parent_entity',
'ss_filemime',
'ss_file_entity_title',
'ss_file_entity_url',
));
}
elseif ($query
->getName() == 'apachesolr_mlt') {
$query
->addFilter('entity_type', 'file', TRUE);
}
}
function apachesolr_attachments_entity_update($entity, $type) {
module_load_include('inc', 'apachesolr_attachments', 'apachesolr_attachments.index');
apachesolr_attachments_clean_index_table();
if ($type == 'file') {
_apachesolr_attachments_update_parent_entity($entity, $type);
}
}
function apachesolr_attachments_entity_insert($entity, $type) {
apachesolr_attachments_entity_update($entity, $type);
if ($type == 'file') {
_apachesolr_attachments_update_parent_entity($entity, $type);
}
}
function apachesolr_attachments_entity_delete($entity, $type) {
module_load_include('inc', 'apachesolr_attachments', 'apachesolr_attachments.index');
apachesolr_attachments_clean_index_table();
}
function _apachesolr_attachments_update_parent_entity($entity, $type) {
$parents = file_get_file_references($entity, NULL, FIELD_LOAD_CURRENT);
$parents_list = $parents ? reset($parents) : NULL;
if (!empty($parents_list)) {
foreach ($parents_list as $parent_entity_type => $parent) {
foreach ($parent as $parent_entity_id => $parent_info) {
$parent_entities = entity_load($parent_entity_type, array(
$parent_entity_id,
), NULL, TRUE);
$parent_entity = reset($parent_entities);
if (empty($parent_entity)) {
continue;
}
list($parent_entity_id, $parent_entity_vid, $parent_entity_bundle) = entity_extract_ids($parent_entity_type, $parent_entity);
if (variable_get('apachesolr_attachments_entity_bundle_indexing_' . $parent_entity_bundle, 'seperate') == 'parent') {
apachesolr_entity_update($parent_entity, $parent_entity_type);
}
}
}
}
}
function apachesolr_attachments_field_attach_insert($parent_entity_type, $parent_entity) {
apachesolr_attachments_field_attach_update($parent_entity_type, $parent_entity);
}
function apachesolr_attachments_field_attach_update($parent_entity_type, $parent_entity) {
if (isset($parent_entity->type)) {
foreach (field_info_instances($parent_entity_type, $parent_entity->type) as $instance) {
$field_info = field_info_field($instance['field_name']);
if ($field_info['type'] == 'file') {
module_load_include('inc', 'apachesolr_attachments', 'apachesolr_attachments.index');
$items = field_get_items($parent_entity_type, $parent_entity, $field_info['field_name']);
if ($items) {
foreach ($items as $file_info) {
$file = file_load($file_info['fid']);
if (empty($file)) {
continue;
}
list($parent_entity_id) = entity_extract_ids($parent_entity_type, $parent_entity);
apachesolr_attachments_add_file_usage($file, $parent_entity_type, $parent_entity_id);
}
}
}
}
}
}
function apachesolr_attachments_field_attach_delete($parent_entity_type, $parent_entity) {
if (isset($parent_entity->type)) {
foreach (field_info_instances($parent_entity_type, $parent_entity->type) as $instance) {
$field_info = field_info_field($instance['field_name']);
if ($field_info['type'] == 'file') {
module_load_include('inc', 'apachesolr_attachments', 'apachesolr_attachments.index');
$items = field_get_items($parent_entity_type, $parent_entity, $field_info['field_name']);
if (!empty($items)) {
foreach ($items as $file_info) {
$file = file_load($file_info['fid']);
if (empty($file)) {
continue;
}
list($parent_entity_id) = entity_extract_ids($parent_entity_type, $parent_entity);
apachesolr_attachments_add_file_usage($file, $parent_entity_type, $parent_entity_id);
}
}
}
}
}
}
function apachesolr_attachments_file_result($doc, &$result, &$extra) {
$doc->uid = $doc->is_uid;
$result += array(
'type' => t('File attachment'),
'user' => theme('username', array(
'account' => $doc,
)),
'date' => isset($doc->created) ? $doc->created : 0,
'node' => $doc,
'file' => $doc,
'uid' => $doc->is_uid,
);
}
function apachesolr_attachments_theme() {
return array(
'apachesolr_search_snippets__file' => array(
'variables' => array(
'doc' => NULL,
'snippets' => array(),
),
),
);
}
function apachesolr_attachments_preprocess_apachesolr_search_snippets__file(&$vars) {
apachesolr_search_preprocess_apachesolr_search_snippets($vars);
}
function theme_apachesolr_search_snippets__file($vars) {
$doc = $vars['doc'];
$snippets = $vars['flattened_snippets'];
$parent_entity_links = array();
foreach ($doc->zm_parent_entity as $parent_entity_encoded) {
$parent_decoded = (object) drupal_json_decode($parent_entity_encoded);
list($id) = entity_extract_ids($parent_decoded->entity_type, $parent_decoded);
$load = entity_load($parent_decoded->entity_type, array(
$id,
));
$parent_entity = array_shift($load);
$parent_entity_uri = entity_uri($parent_decoded->entity_type, $parent_entity);
$parent_entity_uri['options']['absolute'] = TRUE;
$parent_label = entity_label($parent_decoded->entity_type, $parent_entity);
$parent_entity_links[] = l($parent_label, $parent_entity_uri['path'], $parent_entity_uri['options']);
}
if (module_exists('file')) {
$file_type = t('!icon @filemime', array(
'@filemime' => $doc->ss_filemime,
'!icon' => theme('file_icon', array(
'file' => (object) array(
'filemime' => $doc->ss_filemime,
),
)),
));
}
else {
$file_type = t('@filemime', array(
'@filemime' => $doc->ss_filemime,
));
}
return implode(' ... ', $snippets) . '<span>' . $file_type . ' <em>attached to:</em>' . implode(', ', $parent_entity_links) . '</span>';
}
function apachesolr_attachments_default_excluded() {
$default = array(
'aif',
'art',
'avi',
'bmp',
'gif',
'ico',
'jpg',
'mov',
'mp3',
'mp4',
'mpg',
'oga',
'ogv',
'png',
'psd',
'ra',
'ram',
'rgb',
'tif',
'wmv',
);
return $default;
}
function apachesolr_attachments_facetapi_searcher_info_alter(array &$searcher_info) {
foreach ($searcher_info as $index => $info) {
$searcher_info[$index]['types'][] = 'file';
}
}
function apachesolr_attachments_facetapi_facet_info_alter(&$facet_info, $searcher_info) {
if (!empty($facet_info['bundle']['map options']['entities'])) {
$facet_info['bundle']['map options']['entities'][] = 'file';
}
else {
$facet_info['bundle']['map options']['entities'] = array(
'node',
'file',
);
}
}
class ApachesolrAttachmentsEntityFieldQuery extends EntityFieldQuery {
private $addedFields = array();
function finishQuery($select_query, $id_key = 'entity_id') {
foreach ($this->tags as $tag) {
$select_query
->addTag($tag);
}
foreach ($this->metaData as $key => $object) {
$select_query
->addMetaData($key, $object);
}
$select_query
->addMetaData('entity_field_query', $this);
if ($this->range) {
$select_query
->range($this->range['start'], $this->range['length']);
}
if ($this->count) {
return $select_query
->countQuery()
->execute()
->fetchField();
}
$return = array();
foreach ($this->addedFields as $addedField) {
$fields = $select_query
->getFields();
if (!empty($addedField['field_name'])) {
$column = $addedField['field_name'] . '_' . $addedField['column'];
$column_alias = $addedField['field_name'] . '_' . $addedField['column_alias'];
}
else {
$column = $addedField['column'];
$column_alias = $addedField['column_alias'];
}
$select_query
->addField($fields['entity_id']['table'], $column, $column_alias);
}
foreach ($select_query
->execute() as $partial_entity) {
$bundle = isset($partial_entity->bundle) ? $partial_entity->bundle : NULL;
$entity = entity_create_stub_entity($partial_entity->entity_type, array(
$partial_entity->entity_id,
$partial_entity->revision_id,
$bundle,
));
$entity->extraFields = $partial_entity;
$return[$partial_entity->entity_type][$partial_entity->{$id_key} . '_' . $partial_entity->{$column}] = $entity;
$this->ordered_results[] = $partial_entity;
}
return $return;
}
public function addExtraField($field_name, $column, $column_alias = NULL) {
$this->addedFields[] = array(
'field_name' => $field_name,
'column' => $column,
'column_alias' => $column_alias,
);
return $this;
}
}