You are here

apachesolr_file.module in Apache Solr File 7

File

apachesolr_file.module
View source
<?php

function apachesolr_file_apachesolr_entity_info_alter(&$entity_info) {
  $entity_info['file']['indexable'] = TRUE;
  $entity_info['file']['status callback'][] = 'apachesolr_file_status_callback';
  $entity_info['file']['document callback'][] = 'apachesolr_file_index_solr_document';
  $entity_info['file']['reindex callback'] = 'apachesolr_file_solr_reindex';
  $entity_info['file']['index_table'] = 'apachesolr_index_entities_file';
  $entity_info['file']['result callback'] = 'apachesolr_file_result';
}

/**
 * Implements hook_facetapi_facet_info().
 */
function apachesolr_file_facetapi_facet_info($searcher_info) {
  $facets = array();
  $facets = array_merge($facets, apachesolr_entity_field_facets('file'));
  return $facets;
}

/**
 * Allows for alterations to the searcher definitions.
 *
 * @param array &$searcher_info
 *   The return values of hook_facetapi_searcher_info() implementations.
 *
 * @see hook_facetapi_searcher_info()
 */
function apachesolr_file_facetapi_searcher_info_alter(array &$searcher_info) {

  //  dpm($searcher_info);
}
function apachesolr_file_status_callback($entity_id, $entity_type) {
  $file = file_load($entity_id);
  return $file->status;
}
function apachesolr_file_index_solr_document(ApacheSolrDocument $document, $file, $entity_type) {
  $document->is_uid = $file->uid;
  $document->label = apachesolr_clean_text($file->filename);
  $document->timestamp = apachesolr_date_iso($file->timestamp);
  $document->ds_created = apachesolr_date_iso($file->timestamp);
  $document->ds_changed = apachesolr_date_iso($file->timestamp);
  $document->bundle = $file->type;
  $document->bundle_name = $file->type;
  $document->ts_uri = file_create_url($file->uri);
  if (function_exists('drupal_get_path_alias')) {
    $language = empty($file->language) ? NULL : $node->language;
    $path = 'file/' . $file->fid;
    $output = drupal_get_path_alias($path, $language);
    if ($output && $output != $path) {
      $document->path_alias = $output;
    }
  }
  $env_id = apachesolr_default_environment();
  $data = apachesolr_file_extract($env_id, $file);
  $text = $data['extracted'];

  //data['extracted_metadata']
  $document->content = apachesolr_clean_text($text);
  $documents = array();
  $documents[] = $document;
  return $documents;
}
function apachesolr_file_solr_reindex() {
  $indexer_table = apachesolr_get_indexer_table('file');
  $transaction = db_transaction();
  $env_id = apachesolr_default_environment();
  try {
    db_delete($indexer_table)
      ->condition('entity_type', 'file')
      ->execute();
    if (apachesolr_get_index_bundles($env_id, 'file')) {
      $select = db_select('file_managed', 'f');
      $select
        ->addExpression("'file'", 'entity_type');
      $select
        ->addField('f', 'fid', 'entity_id');
      $select
        ->addField('f', 'type', 'bundle');
      $select
        ->addField('f', 'status', 'status');
      $select
        ->addExpression(REQUEST_TIME, 'changed');
      $select
        ->condition('f.type', apachesolr_get_index_bundles($env_id, 'file'), 'IN');
      $insert = db_insert($indexer_table)
        ->fields(array(
        'entity_id',
        'bundle',
        'status',
        'entity_type',
        'changed',
      ))
        ->from($select)
        ->execute();
    }
  } catch (Exception $e) {
    $transaction
      ->rollback();
    drupal_set_message($e
      ->getMessage(), 'error');
    watchdog_exception('Apache Solr', $e);
    return FALSE;
  }
  return TRUE;
}
function apachesolr_file_result($doc, &$result, &$extra) {
  $result += array(
    'type' => file_entity_type_get_name($doc->bundle),
    'user' => theme('username', array(
      'account' => $doc,
    )),
    'date' => isset($doc->created) ? $doc->created : 0,
    'uid' => $doc->is_uid,
  );
}
function apachesolr_file_extract($env_id, $file) {
  $env = apachesolr_environment_load($env_id);
  $url = $env['url'] . '/update/extract/?extractOnly=true&wt=phps&extractFormat=text';
  $filepath = drupal_realpath($file->uri);

  // Construct a multi-part form-data POST body in $data.
  $boundary = '--' . md5(uniqid(REQUEST_TIME));
  $data = "--{$boundary}\r\n";

  // The 'filename' used here becomes the property name in the response.
  $data .= 'Content-Disposition: form-data; name="file"; filename="extracted"';
  $data .= "\r\nContent-Type: application/octet-stream\r\n\r\n";
  $data .= file_get_contents($filepath);
  $data .= "\r\n--{$boundary}--\r\n";
  $headers = array(
    'Content-Type' => 'multipart/form-data; boundary=' . $boundary,
  );
  $options = array(
    'method' => 'POST',
    'headers' => $headers,
    'data' => $data,
  );
  $request = drupal_http_request($url, $options);
  $response = @unserialize($request->data);
  return $response;
}