View source  
  <?php
define('EXTRACTING_SERVLET', 'extract/tika');
function apachesolr_attachments_menu() {
  $items = array();
  $items['admin/config/search/apachesolr/attachments'] = array(
    'title' => 'Attachments',
    'description' => 'Administer Apache Solr Attachments.',
    'page callback' => 'apachesolr_attachments_admin_page',
    'access arguments' => array(
      'administer search',
    ),
    'file' => 'apachesolr_attachments.admin.inc',
    'type' => MENU_LOCAL_TASK,
  );
  $items['admin/config/search/apachesolr/attachments/test'] = array(
    'title' => 'Test tika extraction',
    'page callback' => 'apachesolr_attachments_test_tika_extraction',
    'access arguments' => array(
      'administer search',
    ),
    'file' => 'apachesolr_attachments.admin.inc',
    'type' => MENU_CALLBACK,
  );
  $items['admin/config/search/apachesolr/attachments/confirm/delete'] = array(
    'title' => 'Delete and reindex all files',
    'page callback' => 'drupal_get_form',
    'page arguments' => array(
      'apachesolr_attachments_confirm',
      6,
    ),
    'access arguments' => array(
      'administer search',
    ),
    'file' => 'apachesolr_attachments.admin.inc',
    'type' => MENU_CALLBACK,
  );
  $items['admin/config/search/apachesolr/attachments/confirm/clear-cache'] = array(
    'title' => 'Delete the local cache of file text',
    'page callback' => 'drupal_get_form',
    'page arguments' => array(
      'apachesolr_attachments_confirm',
      6,
    ),
    'access arguments' => array(
      'administer search',
    ),
    'file' => 'apachesolr_attachments.admin.inc',
    'type' => MENU_CALLBACK,
  );
  $items['admin/config/search/apachesolr/attachments/general'] = array(
    'title' => 'General',
    'type' => MENU_DEFAULT_LOCAL_TASK,
  );
  $items['admin/config/search/apachesolr/attachments/entity_bundle'] = array(
    'title' => 'Bundle',
    'description' => 'Administer Apache Solr Attachments per bundle settings.',
    'page callback' => 'drupal_get_form',
    'page arguments' => array(
      'apachesolr_attachments_entity_bundle_settings',
    ),
    'access arguments' => array(
      'administer search',
    ),
    'file' => 'apachesolr_attachments.admin.inc',
    'type' => MENU_LOCAL_TASK,
    'weight' => 1,
  );
  return $items;
}
function apachesolr_attachments_apachesolr_entity_info_alter(&$entity_info) {
  $entity_info['file']['indexable'] = TRUE;
  $entity_info['file']['status callback'][] = 'apachesolr_attachments_status_callback';
  $entity_info['file']['document callback'][] = 'apachesolr_attachments_solr_document';
  $entity_info['file']['reindex callback'] = 'apachesolr_attachments_solr_reindex';
  $entity_info['file']['index_table'] = 'apachesolr_index_entities_file';
  $entity_info['file']['result callback'] = 'apachesolr_attachments_file_result';
  
  $entity_info['node']['document callback'][] = 'apachesolr_attachments_node_solr_document';
}
function apachesolr_attachments_solr_document(ApacheSolrDocument $document, $file, $entity_type, $env_id) {
  module_load_include('inc', 'apachesolr_attachments', 'apachesolr_attachments.index');
  $documents = array();
  $table = apachesolr_get_indexer_table('file');
  
  $text = apachesolr_attachments_get_attachment_text($file);
  
  if (empty($text)) {
    return $documents;
  }
  
  $parents = db_select($table, 'aie')
    ->fields('aie')
    ->condition('entity_type', 'file')
    ->condition('entity_id', $file->fid)
    ->execute();
  foreach ($parents as $parent) {
    
    $parent_entities = entity_load($parent->parent_entity_type, array(
      $parent->parent_entity_id,
    ), NULL, TRUE);
    $parent_entity = reset($parent_entities);
    
    if (empty($parent_entity)) {
      continue;
    }
    
    list($parent_entity_id, $parent_entity_vid, $parent_entity_bundle) = entity_extract_ids($parent->parent_entity_type, $parent_entity);
    $parent_entity_type = $parent->parent_entity_type;
    
    if (variable_get('apachesolr_attachments_entity_bundle_indexing_' . $parent_entity_bundle, 'seperate') == 'seperate') {
      
      $filedocument = clone $document;
      
      $callbacks = apachesolr_entity_get_callback($parent_entity_type, 'document callback');
      $build_documents = array();
      if (is_array($callbacks)) {
        foreach ($callbacks as $callback) {
          
          if (is_callable($callback)) {
            $build_documents = array_merge($build_documents, $callback($filedocument, $parent_entity, $parent_entity_type, $env_id));
          }
        }
      }
      
      $filedocument = reset($build_documents);
      
      if ($parent_entity_type == 'node' && function_exists('apachesolr_access_apachesolr_index_document_build_node')) {
        apachesolr_access_apachesolr_index_document_build_node($filedocument, $parent_entity, $env_id);
      }
      
      $filedocument->id = apachesolr_document_id($file->fid . '-' . $parent_entity_id, $entity_type);
      $filedocument->url = file_create_url($file->uri);
      $path = file_stream_wrapper_get_instance_by_uri($file->uri)
        ->getExternalUrl();
      
      if (!empty($path)) {
        $filedocument->path = $path;
      }
      
      $filedocument->label = apachesolr_clean_text($file->filename);
      $filedocument->content = apachesolr_clean_text($file->filename) . ' ' . $text;
      $filedocument->ds_created = apachesolr_date_iso($file->timestamp);
      $filedocument->ds_changed = $filedocument->ds_created;
      $filedocument->created = apachesolr_date_iso($file->timestamp);
      $filedocument->changed = $filedocument->created;
      
      $parent_entity_info = entity_get_info($parent_entity_type);
      $small_parent_entity = new stdClass();
      $small_parent_entity->entity_type = $parent_entity_type;
      $small_parent_entity->{$parent_entity_info['entity keys']['id']} = $parent_entity_id;
      $small_parent_entity->{$parent_entity_info['entity keys']['bundle']} = $parent_entity_bundle;
      
      if (isset($parent_entity_info['entity keys']['label'])) {
        $small_parent_entity->{$parent_entity_info['entity keys']['label']} = $parent_entity->{$parent_entity_info['entity keys']['label']};
      }
      
      $filedocument->zm_parent_entity = drupal_json_encode($small_parent_entity);
      $filedocument->sm_parent_entity_bundle = $parent_entity_type . "-" . $parent_entity_bundle;
      $filedocument->sm_parent_entity_type = $parent_entity_type;
      
      $filedocument->ss_filemime = $file->filemime;
      $filedocument->ss_filesize = $file->filesize;
      $documents[] = $filedocument;
    }
  }
  return $documents;
}
function apachesolr_attachments_node_solr_document(ApacheSolrDocument &$document, $parent_entity, $env_id) {
  module_load_include('inc', 'apachesolr_attachments', 'apachesolr_attachments.index');
  list($parent_entity_id, $parent_entity_vid, $parent_entity_bundle) = entity_extract_ids('node', $parent_entity);
  
  if (variable_get('apachesolr_attachments_entity_bundle_indexing_' . $parent_entity_bundle, 'seperate') == 'parent') {
    $file_field_names = array();
    $fields = field_info_field_by_ids();
    if (is_array($fields)) {
      foreach ($fields as $field_id => $field_info) {
        if ($field_info['type'] == 'file') {
          foreach ($field_info['bundles'] as $entity_type => $bundles) {
            if (in_array($parent_entity_bundle, $bundles)) {
              $file_field_names[$field_info['field_name']] = $field_info['field_name'];
            }
          }
        }
      }
    }
    foreach ($file_field_names as $file_field) {
      if (isset($parent_entity->{$file_field})) {
        $parent_entity_file_fields = $parent_entity->{$file_field};
        
        foreach ($parent_entity_file_fields as $language => $files) {
          foreach ($files as $file) {
            $file = (object) $file;
            
            $status = $file->status == 1 ? 1 : 0;
            
            $status = $status & apachesolr_attachments_is_file($file);
            $status = $status & apachesolr_attachments_allowed_mime($file->filemime);
            if ($status) {
              $text = apachesolr_attachments_get_attachment_text($file);
              if ($text) {
                
                $document->content .= apachesolr_clean_text($file->filename) . ' ' . $text;
              }
            }
          }
        }
      }
    }
  }
  return array();
  
}
function apachesolr_attachments_solr_reindex() {
  module_load_include('inc', 'apachesolr_attachments', 'apachesolr_attachments.index');
  $indexer_table = apachesolr_get_indexer_table('file');
  $transaction = db_transaction();
  $env_id = apachesolr_default_environment();
  try {
    $files = _apachesolr_attachments_get_all_files();
    
    if (empty($files)) {
      return TRUE;
    }
    
    foreach ($files as $parent_entity_type => $parent_entities) {
      foreach ($parent_entities as $parent_entity_info) {
        
        $file = new stdClass();
        foreach ($parent_entity_info->extraFields as $key => $value) {
          if (strpos($key, '_fid')) {
            $file->fid = $parent_entity_info->extraFields->{$key};
          }
        }
        list($parent_entity_id) = entity_extract_ids($parent_entity_type, $parent_entity_info);
        apachesolr_attachments_add_file_usage($file, $parent_entity_type, $parent_entity_id);
      }
    }
  } catch (Exception $e) {
    $transaction
      ->rollback();
    drupal_set_message($e
      ->getMessage(), 'error');
    watchdog_exception('Apache Solr Attachments', $e);
    return FALSE;
  }
  return TRUE;
}
function _apachesolr_attachments_get_all_files() {
  $results = array();
  $fields = field_info_field_by_ids();
  foreach ($fields as $field_id => $field_info) {
    if ($field_info['type'] == 'file') {
      foreach ($field_info['bundles'] as $entity_type => $bundles) {
        $entity_info = entity_get_info($entity_type);
        
        if (empty($entity_info['apachesolr']['indexable'])) {
          continue;
        }
        $query = new ApachesolrAttachmentsEntityFieldQuery();
        $results_query = $query
          ->entityCondition('entity_type', $entity_type)
          ->fieldCondition($field_info['field_name'])
          ->addExtraField($field_info['field_name'], 'fid', 'fid')
          ->execute();
        $results = array_merge_recursive($results, $results_query);
      }
    }
  }
  return $results;
}
function apachesolr_attachments_status_callback($entity_id, $entity_type) {
  module_load_include('inc', 'apachesolr_attachments', 'apachesolr_attachments.index');
  
  $entities = entity_load($entity_type, array(
    $entity_id,
  ), NULL, TRUE);
  $entity = reset($entities);
  
  if (apachesolr_attachments_allowed_mime($entity->filemime) == FALSE) {
    
    return FALSE;
  }
  
  if (apachesolr_attachments_is_file($entity) == FALSE) {
    
    return FALSE;
  }
  
  if ($entity->status != 1) {
    
    return FALSE;
  }
  
  return TRUE;
}
function apachesolr_attachments_apachesolr_file_exclude($entity_id, $row, $env_id) {
  module_load_include('inc', 'apachesolr_attachments', 'apachesolr_attachments.index');
  
  if (!$entity_id || !$row->parent_entity_id) {
    
    return TRUE;
  }
  
  $parent_entity_id = $row->parent_entity_id;
  $parent_entity_type = $row->parent_entity_type;
  $exclude = apachesolr_attachments_is_parent_excluded($entity_id, 'file', $parent_entity_id, $parent_entity_type, $env_id);
  if ($exclude) {
    
    return TRUE;
  }
  
  $filesize_limit = variable_get('apachesolr_attachments_filesize_limit', '41943040');
  
  $entities = entity_load('file', array(
    $entity_id,
  ), NULL, TRUE);
  
  $entity = reset($entities);
  
  if (isset($entity->filesize) && $filesize_limit > 0 && $entity->filesize > $filesize_limit) {
    watchdog('Apache Solr Attachments', 'Excluding file @filename with size @filesize bytes, which exceeds apachesolr_attachments_filesize_limit of @sizelimit bytes.', array(
      '@filesize' => $entity->filesize,
      '@filename' => $entity->filename,
      '@sizelimit' => $filesize_limit,
    ));
    return TRUE;
  }
  
  return FALSE;
}
function apachesolr_attachments_is_file($entity) {
  if (!empty($entity->uri)) {
    $filepath = drupal_realpath($entity->uri);
    
    if (!$filepath) {
      return FALSE;
    }
    elseif (!is_file($filepath)) {
      watchdog('Apache Solr Attachments', '%filepath is not a valid file path', array(
        '%filepath' => $entity->uri,
      ), WATCHDOG_WARNING);
      return FALSE;
    }
    else {
      return TRUE;
    }
  }
  return FALSE;
}
function apachesolr_attachments_is_parent_excluded($entity_id, $entity_type, $parent_entity_id, $parent_entity_type, $env_id) {
  $query = new EntityFieldQuery();
  $result = $query
    ->entityCondition('entity_type', $parent_entity_type)
    ->entityCondition('entity_id', $parent_entity_id)
    ->execute();
  
  if (empty($result)) {
    
    return TRUE;
  }
  $values = array_values($result[$parent_entity_type]);
  
  $stub_entity = reset($values);
  $parent_entity_bundle = $stub_entity->type;
  
  $bundles = apachesolr_get_index_bundles($env_id, $parent_entity_type);
  if (empty($bundles)) {
    
    return TRUE;
  }
  else {
    if (!in_array($parent_entity_bundle, $bundles)) {
      
      return TRUE;
    }
  }
  
  $status_callbacks = apachesolr_entity_get_callback($parent_entity_type, 'status callback');
  if (!empty($status_callbacks)) {
    
    $status = TRUE;
    
    foreach ($status_callbacks as $status_callback) {
      if (is_callable($status_callback)) {
        
        $status = $status && $status_callback($parent_entity_id, $parent_entity_type);
      }
    }
    
    return !$status;
  }
  
  return TRUE;
}
function apachesolr_attachments_remove_attachments_from_index($parent_entity_type, $parent_entity) {
  static $failed = FALSE;
  if ($failed) {
    return FALSE;
  }
  try {
    
    list($parent_entity_id, $parent_entity_vid, $parent_entity_bundle) = entity_extract_ids($parent_entity_type, $parent_entity);
    $solr = apachesolr_get_solr();
    $solr
      ->deleteByQuery("sm_parent_entity:{$parent_entity_type}-{$parent_entity_bundle}-{$parent_entity_id} AND entity_type:file AND hash:" . apachesolr_site_hash());
    return TRUE;
  } catch (Exception $e) {
    watchdog('Apache Solr Attachments', nl2br(check_plain($e
      ->getMessage())), NULL, WATCHDOG_ERROR);
    
    $failed = TRUE;
    return FALSE;
  }
}
function apachesolr_attachments_apachesolr_query_alter(DrupalSolrQueryInterface $query) {
  if ($query
    ->getName() == 'apachesolr') {
    
    $query
      ->addParam('fl', array(
      'zm_parent_entity',
      'ss_filemime',
      'ss_file_entity_title',
      'ss_file_entity_url',
    ));
  }
  elseif ($query
    ->getName() == 'apachesolr_mlt') {
    
    $query
      ->addFilter('entity_type', 'file', TRUE);
  }
}
function apachesolr_attachments_entity_update($entity, $type) {
  module_load_include('inc', 'apachesolr_attachments', 'apachesolr_attachments.index');
  apachesolr_attachments_clean_index_table();
  if ($type == 'file') {
    _apachesolr_attachments_update_parent_entity($entity, $type);
  }
}
function apachesolr_attachments_entity_insert($entity, $type) {
  apachesolr_attachments_entity_update($entity, $type);
  if ($type == 'file') {
    _apachesolr_attachments_update_parent_entity($entity, $type);
  }
}
function apachesolr_attachments_entity_delete($entity, $type) {
  module_load_include('inc', 'apachesolr_attachments', 'apachesolr_attachments.index');
  apachesolr_attachments_clean_index_table();
}
function _apachesolr_attachments_update_parent_entity($entity, $type) {
  
  $parents = file_get_file_references($entity, NULL, FIELD_LOAD_CURRENT);
  $parents_list = $parents ? reset($parents) : NULL;
  if (!empty($parents_list)) {
    foreach ($parents_list as $parent_entity_type => $parent) {
      foreach ($parent as $parent_entity_id => $parent_info) {
        
        $parent_entities = entity_load($parent_entity_type, array(
          $parent_entity_id,
        ), NULL, TRUE);
        
        $parent_entity = reset($parent_entities);
        
        if (empty($parent_entity)) {
          continue;
        }
        
        list($parent_entity_id, $parent_entity_vid, $parent_entity_bundle) = entity_extract_ids($parent_entity_type, $parent_entity);
        if (variable_get('apachesolr_attachments_entity_bundle_indexing_' . $parent_entity_bundle, 'seperate') == 'parent') {
          apachesolr_entity_update($parent_entity, $parent_entity_type);
        }
      }
    }
  }
}
function apachesolr_attachments_field_attach_insert($parent_entity_type, $parent_entity) {
  apachesolr_attachments_field_attach_update($parent_entity_type, $parent_entity);
}
function apachesolr_attachments_field_attach_update($parent_entity_type, $parent_entity) {
  
  if (isset($parent_entity->type)) {
    
    foreach (field_info_instances($parent_entity_type, $parent_entity->type) as $instance) {
      $field_info = field_info_field($instance['field_name']);
      if ($field_info['type'] == 'file') {
        
        module_load_include('inc', 'apachesolr_attachments', 'apachesolr_attachments.index');
        $items = field_get_items($parent_entity_type, $parent_entity, $field_info['field_name']);
        if ($items) {
          foreach ($items as $file_info) {
            $file = file_load($file_info['fid']);
            
            if (empty($file)) {
              continue;
            }
            
            list($parent_entity_id) = entity_extract_ids($parent_entity_type, $parent_entity);
            apachesolr_attachments_add_file_usage($file, $parent_entity_type, $parent_entity_id);
          }
        }
      }
    }
  }
}
function apachesolr_attachments_field_attach_delete($parent_entity_type, $parent_entity) {
  
  if (isset($parent_entity->type)) {
    
    foreach (field_info_instances($parent_entity_type, $parent_entity->type) as $instance) {
      $field_info = field_info_field($instance['field_name']);
      if ($field_info['type'] == 'file') {
        
        module_load_include('inc', 'apachesolr_attachments', 'apachesolr_attachments.index');
        $items = field_get_items($parent_entity_type, $parent_entity, $field_info['field_name']);
        if (!empty($items)) {
          foreach ($items as $file_info) {
            $file = file_load($file_info['fid']);
            
            if (empty($file)) {
              continue;
            }
            
            list($parent_entity_id) = entity_extract_ids($parent_entity_type, $parent_entity);
            apachesolr_attachments_add_file_usage($file, $parent_entity_type, $parent_entity_id);
          }
        }
      }
    }
  }
}
function apachesolr_attachments_file_result($doc, &$result, &$extra) {
  $doc->uid = $doc->is_uid;
  $result += array(
    'type' => t('File attachment'),
    'user' => theme('username', array(
      'account' => $doc,
    )),
    'date' => isset($doc->created) ? $doc->created : 0,
    'node' => $doc,
    'file' => $doc,
    'uid' => $doc->is_uid,
  );
}
function apachesolr_attachments_theme() {
  return array(
    'apachesolr_search_snippets__file' => array(
      'variables' => array(
        'doc' => NULL,
        'snippets' => array(),
      ),
    ),
  );
}
function apachesolr_attachments_preprocess_apachesolr_search_snippets__file(&$vars) {
  
  
  apachesolr_search_preprocess_apachesolr_search_snippets($vars);
}
function theme_apachesolr_search_snippets__file($vars) {
  $doc = $vars['doc'];
  $snippets = $vars['flattened_snippets'];
  $parent_entity_links = array();
  
  foreach ($doc->zm_parent_entity as $parent_entity_encoded) {
    $parent_decoded = (object) drupal_json_decode($parent_entity_encoded);
    
    list($id) = entity_extract_ids($parent_decoded->entity_type, $parent_decoded);
    
    $load = entity_load($parent_decoded->entity_type, array(
      $id,
    ));
    $parent_entity = array_shift($load);
    $parent_entity_uri = entity_uri($parent_decoded->entity_type, $parent_entity);
    $parent_entity_uri['options']['absolute'] = TRUE;
    $parent_label = entity_label($parent_decoded->entity_type, $parent_entity);
    $parent_entity_links[] = l($parent_label, $parent_entity_uri['path'], $parent_entity_uri['options']);
  }
  if (module_exists('file')) {
    $file_type = t('!icon @filemime', array(
      '@filemime' => $doc->ss_filemime,
      '!icon' => theme('file_icon', array(
        'file' => (object) array(
          'filemime' => $doc->ss_filemime,
        ),
      )),
    ));
  }
  else {
    $file_type = t('@filemime', array(
      '@filemime' => $doc->ss_filemime,
    ));
  }
  return implode(' ... ', $snippets) . '<span>' . $file_type . ' <em>attached to:</em>' . implode(', ', $parent_entity_links) . '</span>';
}
function apachesolr_attachments_default_excluded() {
  $default = array(
    'aif',
    'art',
    'avi',
    'bmp',
    'gif',
    'ico',
    'jpg',
    'mov',
    'mp3',
    'mp4',
    'mpg',
    'oga',
    'ogv',
    'png',
    'psd',
    'ra',
    'ram',
    'rgb',
    'tif',
    'wmv',
  );
  return $default;
}
function apachesolr_attachments_facetapi_searcher_info_alter(array &$searcher_info) {
  foreach ($searcher_info as $index => $info) {
    $searcher_info[$index]['types'][] = 'file';
  }
}
function apachesolr_attachments_facetapi_facet_info_alter(&$facet_info, $searcher_info) {
  
  if (!empty($facet_info['bundle']['map options']['entities'])) {
    $facet_info['bundle']['map options']['entities'][] = 'file';
  }
  else {
    $facet_info['bundle']['map options']['entities'] = array(
      'node',
      'file',
    );
  }
}
class ApachesolrAttachmentsEntityFieldQuery extends EntityFieldQuery {
  
  private $addedFields = array();
  
  function finishQuery($select_query, $id_key = 'entity_id') {
    foreach ($this->tags as $tag) {
      $select_query
        ->addTag($tag);
    }
    foreach ($this->metaData as $key => $object) {
      $select_query
        ->addMetaData($key, $object);
    }
    $select_query
      ->addMetaData('entity_field_query', $this);
    if ($this->range) {
      $select_query
        ->range($this->range['start'], $this->range['length']);
    }
    if ($this->count) {
      return $select_query
        ->countQuery()
        ->execute()
        ->fetchField();
    }
    $return = array();
    foreach ($this->addedFields as $addedField) {
      $fields = $select_query
        ->getFields();
      if (!empty($addedField['field_name'])) {
        $column = $addedField['field_name'] . '_' . $addedField['column'];
        $column_alias = $addedField['field_name'] . '_' . $addedField['column_alias'];
      }
      else {
        $column = $addedField['column'];
        $column_alias = $addedField['column_alias'];
      }
      $select_query
        ->addField($fields['entity_id']['table'], $column, $column_alias);
    }
    foreach ($select_query
      ->execute() as $partial_entity) {
      $bundle = isset($partial_entity->bundle) ? $partial_entity->bundle : NULL;
      $entity = entity_create_stub_entity($partial_entity->entity_type, array(
        $partial_entity->entity_id,
        $partial_entity->revision_id,
        $bundle,
      ));
      
      $entity->extraFields = $partial_entity;
      
      $return[$partial_entity->entity_type][$partial_entity->{$id_key} . '_' . $partial_entity->{$column}] = $entity;
      $this->ordered_results[] = $partial_entity;
    }
    return $return;
  }
  public function addExtraField($field_name, $column, $column_alias = NULL) {
    $this->addedFields[] = array(
      'field_name' => $field_name,
      'column' => $column,
      'column_alias' => $column_alias,
    );
    return $this;
  }
}