You are here

public function FilesExtractor::extractOrGetFromCache in Search API attachments 9.0.x

Same name and namespace in other branches
  1. 8 src/Plugin/search_api/processor/FilesExtractor.php \Drupal\search_api_attachments\Plugin\search_api\processor\FilesExtractor::extractOrGetFromCache()

Extract non text file data or get it from cache if available and cache it.

Parameters

\Drupal\Core\Entity\EntityInterface $entity: The entity the file is attached to.

\Drupal\file\Entity\File $file: A file object.

\Drupal\search_api_attachments\TextExtractorPluginInterface $extractor_plugin: The plugin used to extract file content.

Return value

string $extracted_data

1 call to FilesExtractor::extractOrGetFromCache()
FilesExtractor::addFieldValues in src/Plugin/search_api/processor/FilesExtractor.php

File

src/Plugin/search_api/processor/FilesExtractor.php, line 276

Class

FilesExtractor
Provides file fields processor.

Namespace

Drupal\search_api_attachments\Plugin\search_api\processor

Code

public function extractOrGetFromCache(EntityInterface $entity, File $file, TextExtractorPluginInterface $extractor_plugin) {

  // Directly process plaintext files.
  if (!empty($this->configuration['read_text_files_directly'])) {
    if (substr($file
      ->getMimeType(), 0, 5) == 'text/') {
      return file_get_contents($file
        ->getFileUri());
    }
  }
  $collection = 'search_api_attachments';
  $key = $collection . ':' . $file
    ->id();
  $extracted_data = '';
  if ($cache = $this->keyValue
    ->get($collection)
    ->get($key)) {
    $extracted_data = $this
      ->limitBytes($cache);
  }
  else {
    try {

      // Only extract if this file has not previously failed and was queued.
      $fallback_collection = $this->keyValue
        ->get(FilesExtractor::FALLBACK_QUEUE_KV);
      $queued_files = $fallback_collection
        ->get($file
        ->id());
      if (empty($queued_files[$entity
        ->getEntityTypeId()][$entity
        ->id()])) {
        $extracted_data = $extractor_plugin
          ->extract($file);
        $extracted_data = $this
          ->limitBytes($extracted_data);
        $this->keyValue
          ->get($collection)
          ->set($key, $extracted_data);
        $this->moduleHandler
          ->invokeAll('search_api_attachments_content_extracted', [
          $file,
          $entity,
        ]);
      }
      else {
        $this
          ->queueItem($entity, $file);
      }
    } catch (\Exception $e) {
      $error = Error::decodeException($e);
      $message_params = [
        '@file_id' => $file
          ->id(),
        '@entity_id' => $entity
          ->id(),
        '@entity_type' => $entity
          ->getEntityTypeId(),
        '@type' => $error['%type'],
        '@message' => $error['@message'],
        '@function' => $error['%function'],
        '@line' => $error['%line'],
        '@file' => $error['%file'],
      ];
      $this->logger
        ->log(LogLevel::ERROR, 'Error extracting text from file @file_id for @entity_type @entity_id. @type: @message in @function (line @line of @file).', $message_params);
      $this
        ->queueItem($entity, $file);
    }
  }
  return $extracted_data;
}