You are here

function apachesolr_attachments_solr_document in Apache Solr Attachments 6.3

Same name and namespace in other branches
  1. 7 apachesolr_attachments.module \apachesolr_attachments_solr_document()

Builds the file-specific information for a Solr document.

Parameters

ApacheSolrDocument $document: The Solr document we are building up.

stdClass $entity: The entity we are indexing.

string $entity_type: The type of entity we're dealing with.

1 string reference to 'apachesolr_attachments_solr_document'
apachesolr_attachments_apachesolr_entity_info_alter in ./apachesolr_attachments.module
@file Indexer for the userhook_apachesolr_entity_info_alter entities for the Apachesolr module.

File

./apachesolr_attachments.module, line 72
Provides a file attachment search implementation for use with the Apache Solr module

Code

function apachesolr_attachments_solr_document(ApacheSolrDocument $document, $file, $entity_type, $env_id) {
  module_load_include('inc', 'apachesolr_attachments', 'apachesolr_attachments.index');
  $documents = array();
  $table = apachesolr_get_indexer_table('file');

  // Text is saved in the index table. Will be used by the node indexing if
  // available.
  $text = apachesolr_attachments_get_attachment_text($file);

  // If we don't have extracted text we should stop our process here
  if (empty($text)) {
    return $documents;
  }

  // Get the list of parents that we should index from the indexing table
  $parents = db_select($table, 'aie')
    ->fields('aie')
    ->condition('entity_type', 'file')
    ->condition('entity_id', $file->fid)
    ->execute();
  foreach ($parents as $parent) {

    // load the parent entity and reset cache
    $parent_entities = entity_load($parent->parent_entity_type, array(
      $parent->parent_entity_id,
    ), NULL, TRUE);
    $parent_entity = reset($parent_entities);

    // Skip invalid entities
    if (empty($parent_entity)) {
      continue;
    }

    // Retrieve the parent entity id and bundle
    list($parent_entity_id, $parent_entity_vid, $parent_entity_bundle) = entity_extract_ids($parent->parent_entity_type, $parent_entity);
    $parent_entity_type = $parent->parent_entity_type;

    // Get a clone of the bare minimum document
    $filedocument = clone $document;

    //Get the callback array to add stuff to the document
    $callbacks = apachesolr_entity_get_callback($parent_entity_type, 'document callback');

    // Skip invalid entity types
    if (empty($callbacks)) {
      continue;
    }
    $build_documents = array();
    foreach ($callbacks as $callback) {

      // Call a type-specific callback to add stuff to the document.
      $build_documents = array_merge($build_documents, $callback($filedocument, $parent_entity, $parent_entity_type, $env_id));
    }

    // Take the top document from the stack
    $filedocument = reset($build_documents);

    // Build our separate document and overwrite basic information
    $filedocument->id = apachesolr_document_id($file->fid . '-' . $parent_entity_type . '-' . $parent_entity_id, $entity_type);
    $filedocument->url = file_create_url($file->uri);
    $path = file_stream_wrapper_get_instance_by_uri($file->uri)
      ->getExternalUrl();

    // A path is not a requirement of an entity
    if (!empty($path)) {
      $filedocument->path = $path;
    }

    // Add extra info to our document
    $filedocument->label = apachesolr_clean_text($file->filename);
    $filedocument->content = apachesolr_clean_text($file->filename) . ' ' . $text;
    $filedocument->ds_created = apachesolr_date_iso($file->timestamp);
    $filedocument->ds_changed = $filedocument->ds_created;
    $filedocument->created = apachesolr_date_iso($file->timestamp);
    $filedocument->changed = $filedocument->created;

    // Add Parent information fields. See http://drupal.org/node/1515822 for explanation
    $parent_entity_info = entity_get_info($parent_entity_type);
    $small_parent_entity = new stdClass();
    $small_parent_entity->entity_type = $parent_entity_type;
    $small_parent_entity->{$parent_entity_info['entity keys']['id']} = $parent_entity_id;
    $small_parent_entity->{$parent_entity_info['entity keys']['bundle']} = $parent_entity_bundle;
    $small_parent_entity->{$parent_entity_info['entity keys']['label']} = $parent_entity->{$parent_entity_info['entity keys']['label']};

    // Add all to one field because if it is spread out over
    // multiple fields there is no way of knowing which multifield value
    // belongs to which entity
    // It does not load the complete entity in to the index because that
    // would dramatically increase the index size and processing time
    $filedocument->zm_parent_entity = drupal_json_encode($small_parent_entity);
    $filedocument->sm_parent_entity_bundle = $parent_entity_type . "-" . $parent_entity_bundle;
    $filedocument->sm_parent_entity_type = $parent_entity_type;

    // Add Apachesolr Attachments specific fields.
    $filedocument->ss_filemime = $file->filemime;
    $filedocument->ss_filesize = $file->filesize;
    $documents[] = $filedocument;
  }
  return $documents;
}