You are here

function apachesolr_attachments_solr_document in Apache Solr Attachments 7

Same name and namespace in other branches
  1. 6.3 apachesolr_attachments.module \apachesolr_attachments_solr_document()

Builds the file-specific information for a Solr document.

Parameters

ApacheSolrDocument $document: The Solr document we are building up.

stdClass $entity: The entity we are indexing.

string $entity_type: The type of entity we're dealing with.

1 string reference to 'apachesolr_attachments_solr_document'
apachesolr_attachments_apachesolr_entity_info_alter in ./apachesolr_attachments.module
@file Indexer for the userhook_apachesolr_entity_info_alter entities for the Apachesolr module.

File

./apachesolr_attachments.module, line 89
Provides a file attachment search implementation for use with the Apache Solr module

Code

function apachesolr_attachments_solr_document(ApacheSolrDocument $document, $file, $entity_type, $env_id) {
  module_load_include('inc', 'apachesolr_attachments', 'apachesolr_attachments.index');
  $documents = array();
  $table = apachesolr_get_indexer_table('file');

  // Text is saved in the index table. Will be used by the node indexing if
  // available.
  $text = apachesolr_attachments_get_attachment_text($file);

  // If we don't have extracted text we should stop our process here
  if (empty($text)) {
    return $documents;
  }

  // Get the list of parents that we should index from the indexing table
  $parents = db_select($table, 'aie')
    ->fields('aie')
    ->condition('entity_type', 'file')
    ->condition('entity_id', $file->fid)
    ->execute();
  foreach ($parents as $parent) {

    // load the parent entity and reset cache
    $parent_entities = entity_load($parent->parent_entity_type, array(
      $parent->parent_entity_id,
    ), NULL, TRUE);
    $parent_entity = reset($parent_entities);

    // Skip invalid entities
    if (empty($parent_entity)) {
      continue;
    }

    // Retrieve the parent entity id and bundle
    list($parent_entity_id, $parent_entity_vid, $parent_entity_bundle) = entity_extract_ids($parent->parent_entity_type, $parent_entity);
    $parent_entity_type = $parent->parent_entity_type;

    // proceed with building this document only if the parent entity is not flagged for
    // indexing attachments with parent entity or not indexing attachements
    if (variable_get('apachesolr_attachments_entity_bundle_indexing_' . $parent_entity_bundle, 'seperate') == 'seperate') {

      // Get a clone of the bare minimum document
      $filedocument = clone $document;

      //Get the callback array to add stuff to the document
      $callbacks = apachesolr_entity_get_callback($parent_entity_type, 'document callback');
      $build_documents = array();
      if (is_array($callbacks)) {
        foreach ($callbacks as $callback) {

          // Call a type-specific callback to add stuff to the document.
          if (is_callable($callback)) {
            $build_documents = array_merge($build_documents, $callback($filedocument, $parent_entity, $parent_entity_type, $env_id));
          }
        }
      }

      // Take the top document from the stack
      $filedocument = reset($build_documents);

      // Add node access grants from the parent node to the file so that files
      // are appropriately shown or filtered out of search results based on
      // whether the user can access the node they are attached to.
      if ($parent_entity_type == 'node' && function_exists('apachesolr_access_apachesolr_index_document_build_node')) {
        apachesolr_access_apachesolr_index_document_build_node($filedocument, $parent_entity, $env_id);
      }

      // Build our separate document and overwrite basic information
      $filedocument->id = apachesolr_document_id($file->fid . '-' . $parent_entity_id, $entity_type);
      $filedocument->url = file_create_url($file->uri);
      $path = file_stream_wrapper_get_instance_by_uri($file->uri)
        ->getExternalUrl();

      // A path is not a requirement of an entity
      if (!empty($path)) {
        $filedocument->path = $path;
      }

      // Add extra info to our document
      $filedocument->label = apachesolr_clean_text($file->filename);
      $filedocument->content = apachesolr_clean_text($file->filename) . ' ' . $text;
      $filedocument->ds_created = apachesolr_date_iso($file->timestamp);
      $filedocument->ds_changed = $filedocument->ds_created;
      $filedocument->created = apachesolr_date_iso($file->timestamp);
      $filedocument->changed = $filedocument->created;

      // Add Parent information fields. See http://drupal.org/node/1515822 for explanation
      $parent_entity_info = entity_get_info($parent_entity_type);
      $small_parent_entity = new stdClass();
      $small_parent_entity->entity_type = $parent_entity_type;
      $small_parent_entity->{$parent_entity_info['entity keys']['id']} = $parent_entity_id;
      $small_parent_entity->{$parent_entity_info['entity keys']['bundle']} = $parent_entity_bundle;

      // Not all entities has entity key label set, so it should be checked first to avoid errors.
      if (isset($parent_entity_info['entity keys']['label'])) {
        $small_parent_entity->{$parent_entity_info['entity keys']['label']} = $parent_entity->{$parent_entity_info['entity keys']['label']};
      }

      // Add all to one field because if it is spread out over
      // multiple fields there is no way of knowing which multifield value
      // belongs to which entity
      // It does not load the complete entity in to the index because that
      // would dramatically increase the index size and processing time
      $filedocument->zm_parent_entity = drupal_json_encode($small_parent_entity);
      $filedocument->sm_parent_entity_bundle = $parent_entity_type . "-" . $parent_entity_bundle;
      $filedocument->sm_parent_entity_type = $parent_entity_type;

      // Add Apachesolr Attachments specific fields.
      $filedocument->ss_filemime = $file->filemime;
      $filedocument->ss_filesize = $file->filesize;
      $documents[] = $filedocument;
    }
  }
  return $documents;
}