You are here

class FileSearch in Search File Attachments 8

Executes a keyword search for files against {file_managed} database table.

Plugin annotation


@SearchPlugin(
  id = "file_search",
  title = @Translation("File")
)

Hierarchy

Expanded class hierarchy of FileSearch

File

src/Plugin/Search/FileSearch.php, line 28

Namespace

Drupal\search_file_attachments\Plugin\Search
View source
class FileSearch extends SearchPluginBase implements AccessibleInterface, SearchIndexingInterface {

  /**
   * A database connection object.
   *
   * @var \Drupal\Core\Database\Connection
   */
  protected $database;

  /**
   * An entity manager object.
   *
   * @var \Drupal\Core\Entity\EntityManagerInterface
   */
  protected $entityManager;

  /**
   * A config object for 'search.settings'.
   *
   * @var \Drupal\Core\Config\Config
   */
  protected $searchSettings;

  /**
   * A config object for 'search_file_attachments.settings'.
   *
   * @var \Drupal\Core\Config\Config
   */
  protected $moduleSettings;

  /**
   * The language manager.
   *
   * @var \Drupal\Core\Language\LanguageManagerInterface
   */
  protected $languageManager;

  /**
   * The Drupal account to use for checking for access to advanced search.
   *
   * @var \Drupal\Core\Session\AccountInterface
   */
  protected $account;

  /**
   * An array of additional rankings from hook_ranking().
   *
   * @var array
   */
  protected $rankings;

  /**
   * An array of file mimetypes that should be included in the index.
   *
   * @var array
   */
  protected $includedMimetypes;

  /**
   * {@inheritdoc}
   */
  public static function create(ContainerInterface $container, array $configuration, $plugin_id, $plugin_definition) {
    return new static($configuration, $plugin_id, $plugin_definition, $container
      ->get('database'), $container
      ->get('entity.manager'), $container
      ->get('config.factory')
      ->get('search.settings'), $container
      ->get('config.factory')
      ->get('search_file_attachments.settings'), $container
      ->get('language_manager'), $container
      ->get('current_user'));
  }

  /**
   * Constructs a \Drupal\node\Plugin\Search\NodeSearch object.
   *
   * @param array $configuration
   *   A configuration array containing information about the plugin instance.
   * @param string $plugin_id
   *   The plugin_id for the plugin instance.
   * @param mixed $plugin_definition
   *   The plugin implementation definition.
   * @param \Drupal\Core\Database\Connection $database
   *   A database connection object.
   * @param \Drupal\Core\Entity\EntityManagerInterface $entity_manager
   *   An entity manager object.
   * @param \Drupal\Core\Config\Config $search_settings
   *   A config object for 'search.settings'.
   * @param \Drupal\Core\Config\Config $module_settings
   *   A config object for 'search_file_attachments.settings'.
   * @param \Drupal\Core\Language\LanguageManagerInterface $language_manager
   *   The language manager.
   * @param \Drupal\Core\Session\AccountInterface $account
   *   The $account object to use for checking for access to advanced search.
   */
  public function __construct(array $configuration, $plugin_id, $plugin_definition, Connection $database, EntityManagerInterface $entity_manager, Config $search_settings, Config $module_settings, LanguageManagerInterface $language_manager, AccountInterface $account = NULL) {
    $this->database = $database;
    $this->entityManager = $entity_manager;
    $this->searchSettings = $search_settings;
    $this->moduleSettings = $module_settings;
    $this->languageManager = $language_manager;
    $this->account = $account;
    $this
      ->setIncludedMimetypes();
    parent::__construct($configuration, $plugin_id, $plugin_definition);
  }

  /**
   * {@inheritdoc}
   */
  public function access($operation = 'view', AccountInterface $account = NULL, $return_as_object = FALSE) {
    $result = AccessResult::allowedIfHasPermission($account, 'search files');
    return $return_as_object ? $result : $result
      ->isAllowed();
  }

  /**
   * {@inheritdoc}
   */
  public function execute() {
    if ($this
      ->isSearchExecutable()) {
      $results = $this
        ->findResults();
      if ($results) {
        return $this
          ->prepareResults($results);
      }
    }
    return array();
  }

  /**
   * Queries to find search results, and sets status messages.
   *
   * This method can assume that $this->isSearchExecutable() has already been
   * checked and returned TRUE.
   *
   * @return \Drupal\Core\Database\StatementInterface|null
   *   Results from search query execute() method, or NULL if the search
   *   failed.
   */
  protected function findResults() {
    $keys = $this->keywords;
    $query = $this->database
      ->select('search_index', 'i', array(
      'target' => 'replica',
    ))
      ->extend('Drupal\\search\\SearchQuery')
      ->extend('Drupal\\Core\\Database\\Query\\PagerSelectExtender');
    $query
      ->join('file_managed', 'f', 'f.fid = i.sid');
    $query
      ->join('search_dataset', 'sd', 'sd.sid = i.sid AND sd.type = i.type');
    $query
      ->searchExpression($keys, $this
      ->getPluginId());

    // Run the query.
    $find = $query
      ->fields('i', array(
      'langcode',
    ))
      ->fields('sd', array(
      'data',
    ))
      ->groupBy('i.langcode')
      ->groupBy('sd.data')
      ->limit(10)
      ->execute();

    // Check query status and set messages if needed.
    $status = $query
      ->getStatus();
    if ($status & SearchQuery::EXPRESSIONS_IGNORED) {
      drupal_set_message($this
        ->t('Your search used too many AND/OR expressions. Only the first @count terms were included in this search.', array(
        '@count' => $this->searchSettings
          ->get('and_or_limit'),
      )), 'warning');
    }
    if ($status & SearchQuery::LOWER_CASE_OR) {
      drupal_set_message($this
        ->t('Search for either of the two terms with uppercase <strong>OR</strong>. For example, <strong>cats OR dogs</strong>.'), 'warning');
    }
    if ($status & SearchQuery::NO_POSITIVE_KEYWORDS) {
      drupal_set_message($this
        ->formatPlural($this->searchSettings
        ->get('index.minimum_word_size'), 'You must include at least one positive keyword with 1 character or more.', 'You must include at least one positive keyword with @count characters or more.'), 'warning');
    }
    return $find;
  }

  /**
   * Prepares search results for rendering.
   *
   * @param \Drupal\Core\Database\StatementInterface $found
   *   Results found from a successful search query execute() method.
   *
   * @return array
   *   Array of search result item render arrays (empty array if no results).
   */
  protected function prepareResults(StatementInterface $found) {
    $results = array();
    $file_storage = $this->entityManager
      ->getStorage('file');
    $keys = $this->keywords;
    foreach ($found as $item) {
      $file = $file_storage
        ->load($item->sid)
        ->getTranslation($item->langcode);
      $result = array(
        'link' => file_create_url($file
          ->getFileUri()),
        'title' => Html::escape($file
          ->getFilename()),
        'snippet' => search_excerpt($keys, $item->data, $item->langcode),
        'langcode' => $file
          ->language()
          ->getId(),
      );
      $results[] = $result;
    }
    return $results;
  }

  /**
   * {@inheritdoc}
   */
  public function indexStatus() {
    $total = $this->database
      ->query('SELECT COUNT(*) FROM {file_managed} WHERE status = 1')
      ->fetchField();
    $remaining = $this->database
      ->query("SELECT COUNT(*) FROM {file_managed} f LEFT JOIN {search_dataset} sd ON sd.sid = f.fid AND sd.type = :type WHERE f.status = 1 AND sd.sid IS NULL OR sd.reindex <> 0", array(
      ':type' => $this
        ->getPluginId(),
    ))
      ->fetchField();
    return array(
      'remaining' => $remaining,
      'total' => $total,
    );
  }

  /**
   * {@inheritdoc}
   */
  public function updateIndex() {

    // Interpret the cron limit setting as the maximum number of files to index
    // per cron run.
    $limit = (int) $this->searchSettings
      ->get('index.cron_limit');
    $result = $this->database
      ->queryRange("SELECT f.fid, MAX(sd.reindex) FROM {file_managed} f LEFT JOIN {search_dataset} sd ON sd.sid = f.fid AND sd.type = :type WHERE sd.sid IS NULL OR sd.reindex <> 0 GROUP BY f.fid ORDER BY MAX (sd.reindex) is null DESC, MAX (sd.reindex) ASC, f.fid ASC", 0, $limit, array(
      ':type' => $this
        ->getPluginId(),
    ), array(
      'target' => 'replica',
    ));
    $fids = $result
      ->fetchCol();
    if (!$fids) {
      return;
    }
    $file_storage = $this->entityManager
      ->getStorage('file');
    foreach ($file_storage
      ->loadMultiple($fids) as $file) {
      $this
        ->indexFile($file);
    }
  }

  /**
   * {@inheritdoc}
   */
  public function markForReindex() {

    // All NodeSearch pages share a common search index "type" equal to
    // the plugin ID.
    search_mark_for_reindex($this
      ->getPluginId());
  }

  /**
   * {@inheritdoc}
   */
  public function indexClear() {

    // All NodeSearch pages share a common search index "type" equal to
    // the plugin ID.
    search_index_clear($this
      ->getPluginId());
  }

  /**
   * Indexes a single file.
   *
   * @param \Drupal\Core\Entity\EntityInterface $file
   *   The file to index.
   */
  protected function indexFile(EntityInterface $file) {
    if (!in_array($file
      ->getMimeType(), $this->includedMimetypes)) {
      return;
    }
    $languages = $file
      ->getTranslationLanguages();
    foreach ($languages as $language) {
      $translation_options = array(
        'langcode' => $language
          ->getId(),
      );
      $content = $this
        ->t('Filename', array(), $translation_options) . ': ' . $file
        ->getFilename() . ' - ' . $this
        ->t('Content', array(), $translation_options) . ': ';

      // Extract the file content and add it to the drupal search index.
      $extracted_content = SafeMarkup::checkPlain($this
        ->getFileContent($file));
      $content .= $extracted_content;

      // Update index, using search index "type" equal to the plugin ID.
      search_index($this
        ->getPluginId(), $file
        ->id(), $language
        ->getId(), $content);
    }
  }

  /**
   * Extract the content of the given file.
   *
   * @param \Drupal\Core\Entity\EntityInterface $file
   *   The file that should be indexed.
   *
   * @return string
   *   A string with th extracted content from the file.
   */
  protected function getFileContent(EntityInterface $file) {
    $file_path = file_create_url($file
      ->getFileUri());
    $image_mimetypes = array(
      'image/jpeg',
      'image/jpg',
      'image/tiff',
    );
    if ($file
      ->getMimeType() == 'text/plain' || $file
      ->getMimeType() == 'text/x-diff') {
      $content = $this
        ->extractContentSimple($file, $file_path);
    }
    elseif (in_array($file
      ->getMimeType(), $image_mimetypes)) {
      $content = $this
        ->extractContentExif($file, $file_path);
    }
    else {
      $content = $this
        ->extractContentTika($file, $file_path);
    }
    return $content;
  }

  /**
   * Extract simple text.
   *
   * @param \Drupal\Core\Entity\EntityInterface $file
   *   The file object.
   * @param string $file_path
   *   The path to the file.
   *
   * @return string
   *   The extracted text.
   */
  protected function extractContentSimple(EntityInterface $file, $file_path) {
    $content = file_get_contents($file_path);
    $content = iconv("UTF-8", "UTF-8//IGNORE", $content);
    $content = htmlspecialchars(html_entity_decode($content, ENT_NOQUOTES, 'UTF-8'), ENT_NOQUOTES, 'UTF-8');
    $content = trim($content);
    return $content;
  }

  /**
   * Extract IPTC metadata from image.
   *
   * @param \Drupal\Core\Entity\EntityInterface $file
   *   The file object.
   * @param string $file_path
   *   The path to the file.
   *
   * @return string
   *   The extracted text.
   */
  protected function extractContentExif(EntityInterface $file, $file_path) {
    $content = '';
    $size = getimagesize($file_path, $info);
    if (isset($info['APP13'])) {
      $iptc_raw = iptcparse($info['APP13']);
      if (empty($iptc_raw)) {
        return $content;
      }
      $tagmarker = $this
        ->getExifTagmarker();
      $iptc = array();
      foreach ($iptc_raw as $key => $value) {

        // Add only values from the defined iptc fields.
        if (array_key_exists($key, $tagmarker)) {
          $iptc_field_value = array();
          foreach ($value as $innerkey => $innervalue) {
            $innervalue = trim($innervalue);
            if (!empty($innervalue)) {
              $iptc_field_value[] = $innervalue;
            }
          }
          if (!empty($iptc_field_value)) {
            $iptc[$tagmarker[$key]] = implode(', ', $iptc_field_value);
          }
        }
      }
      foreach ($iptc as $key => $value) {
        $content .= " <strong>{$key}:</strong> {$value}";
      }
      $content = trim($content);
    }
    return $content;
  }

  /**
   * Extract file content with Apache Tika.
   *
   * @param \Drupal\Core\Entity\EntityInterface $file
   *   The file object.
   * @param string $file_path
   *   The path to the file.
   *
   * @return string
   *   The extracted text.
   *
   * @throws \Drupal\search_file_attachments\Plugin\Search\Exception
   */
  protected function extractContentTika(EntityInterface $file, $file_path) {
    $tika_path = realpath($this->moduleSettings
      ->get('tika.path'));
    $tika = realpath($tika_path . '/' . $this->moduleSettings
      ->get('tika.jar'));
    if (!$tika || !is_file($tika)) {
      throw new Exception($this
        ->t('Invalid path or filename for tika application jar.'));
    }

    // UTF-8 multibyte characters will be stripped by escapeshellargs().
    // So temporarily set the locale to UTF-8 so that the filepath remain valid.
    $backup_locale = setlocale(LC_CTYPE, '0');
    setlocale(LC_CTYPE, 'en_US.UTF-8');
    $java_service = \Drupal::service('search_file_attachments.java');
    if ($this->moduleSettings
      ->get('java_path')) {
      $java_service
        ->setJavaPath($this->moduleSettings
        ->get('java_path'));
    }
    $java_path = $java_service
      ->getJavaPath();
    $param = '';
    if ($file->filemime != 'audio/mpeg') {
      $param = ' -Dfile.encoding=UTF8 -cp ' . escapeshellarg($tika_path);
    }
    if (DIRECTORY_SEPARATOR == '\\') {

      // If we on windows, use an other methode to escape the file path strings,
      // to prevent problems with paths that contains spaces. Because the
      // PHP escapeshellarg() function handle these correct.
      $cmd = $java_path . $param . ' -jar "' . str_replace('"', '\\"', $tika) . '" -t "' . str_replace('"', '\\"', $file_path) . '"';
    }
    else {
      $cmd = $java_path . $param . ' -jar ' . escapeshellarg($tika) . ' -t ' . escapeshellarg($file_path);
    }

    // Support utf-8 commands:
    // http://www.php.net/manual/pt_BR/function.shell-exec.php#85095
    $cmd = "LANG=en_US.utf-8; {$cmd}";

    // Restore the locale.
    setlocale(LC_CTYPE, $backup_locale);

    // Debug print.
    if ($this->moduleSettings
      ->get('debug')) {
      $result = shell_exec($cmd . ' 2>&1');
      \Drupal::logger('search_file_attachments')
        ->notice('<p><strong>Tika Command:</strong> <code>%command</code></p><br /> <p><strong>Result:</strong> %result</p>', array(
        '%command' => $cmd,
        '%result' => $result,
      ));

      // Empty the result, if it contains an error message, so that the error
      // is not in the index.
      if (strpos($result, 'Exception in thread') !== FALSE) {
        $result = FALSE;
      }
      return $result;
    }
    return shell_exec($cmd);
  }

  /**
   * Return the array of included mimetypes.
   *
   * @return array
   *   The array of mimetypes.
   */
  protected function getIncludedMimetypes() {
    return $this->includedMimetypes;
  }

  /**
   * Set the included mimetypes.
   *
   * Maps the included file types (file extensions) from the settings with
   * the correponding mimetypes.
   */
  protected function setIncludedMimetypes() {
    $mimetype_service = \Drupal::service('search_file_attachments.mimetype');
    $included_filetypes = $this->moduleSettings
      ->get('files.include');
    $this->includedMimetypes = $mimetype_service
      ->extensionsToMimetypes($included_filetypes);
  }

  /**
   * Defines the IPTC fields to be used for the search index.
   *
   * @return array
   *   A array of IPTC fields.
   */
  protected function getExifTagmarker() {
    $tagmarker = array(
      '2#005' => t('Object Name'),
      '2#015' => t('Category'),
      '2#020' => t('Supplementals'),
      '2#025' => t('Keywords'),
      '2#040' => t('Special Instructions'),
      '2#080' => t('By Line'),
      '2#085' => t('By Line Title'),
      '2#090' => t('City'),
      '2#092' => t('Sublocation'),
      '2#095' => t('Province State'),
      '2#100' => t('Country Code'),
      '2#101' => t('Country Name'),
      '2#105' => t('Headline'),
      '2#110' => t('Credits'),
      '2#115' => t('Source'),
      '2#116' => t('Copyright'),
      '2#118' => t('Contact'),
      '2#120' => t('Caption'),
      '2#122' => t('Caption Writer'),
    );

    // Allow other modules to alter defined IPTC fields.
    return \Drupal::moduleHandler()
      ->alter('search_file_attachments_exif_tagmarker', $tagmarker);
  }

}

Members

Namesort descending Modifiers Type Description Overrides
CacheableDependencyTrait::$cacheContexts protected property Cache contexts.
CacheableDependencyTrait::$cacheMaxAge protected property Cache max-age.
CacheableDependencyTrait::$cacheTags protected property Cache tags.
CacheableDependencyTrait::getCacheContexts public function 3
CacheableDependencyTrait::getCacheMaxAge public function 3
CacheableDependencyTrait::getCacheTags public function 3
CacheableDependencyTrait::setCacheability protected function Sets cacheability; useful for value object constructors.
DependencySerializationTrait::$_entityStorages protected property An array of entity type IDs keyed by the property name of their storages.
DependencySerializationTrait::$_serviceIds protected property An array of service IDs keyed by property name used for serialization.
DependencySerializationTrait::__sleep public function 1
DependencySerializationTrait::__wakeup public function 2
FileSearch::$account protected property The Drupal account to use for checking for access to advanced search.
FileSearch::$database protected property A database connection object.
FileSearch::$entityManager protected property An entity manager object.
FileSearch::$includedMimetypes protected property An array of file mimetypes that should be included in the index.
FileSearch::$languageManager protected property The language manager.
FileSearch::$moduleSettings protected property A config object for 'search_file_attachments.settings'.
FileSearch::$rankings protected property An array of additional rankings from hook_ranking().
FileSearch::$searchSettings protected property A config object for 'search.settings'.
FileSearch::access public function Checks data value access. Overrides AccessibleInterface::access
FileSearch::create public static function Creates an instance of the plugin. Overrides SearchPluginBase::create
FileSearch::execute public function Executes the search. Overrides SearchInterface::execute
FileSearch::extractContentExif protected function Extract IPTC metadata from image.
FileSearch::extractContentSimple protected function Extract simple text.
FileSearch::extractContentTika protected function Extract file content with Apache Tika.
FileSearch::findResults protected function Queries to find search results, and sets status messages.
FileSearch::getExifTagmarker protected function Defines the IPTC fields to be used for the search index.
FileSearch::getFileContent protected function Extract the content of the given file.
FileSearch::getIncludedMimetypes protected function Return the array of included mimetypes.
FileSearch::indexClear public function Clears the search index for this plugin. Overrides SearchIndexingInterface::indexClear
FileSearch::indexFile protected function Indexes a single file.
FileSearch::indexStatus public function Reports the status of indexing. Overrides SearchIndexingInterface::indexStatus
FileSearch::markForReindex public function Marks the search index for reindexing for this plugin. Overrides SearchIndexingInterface::markForReindex
FileSearch::prepareResults protected function Prepares search results for rendering.
FileSearch::setIncludedMimetypes protected function Set the included mimetypes.
FileSearch::updateIndex public function Updates the search index for this plugin. Overrides SearchIndexingInterface::updateIndex
FileSearch::__construct public function Constructs a \Drupal\node\Plugin\Search\NodeSearch object. Overrides PluginBase::__construct
MessengerTrait::$messenger protected property The messenger. 29
MessengerTrait::messenger public function Gets the messenger. 29
MessengerTrait::setMessenger public function Sets the messenger.
PluginBase::$configuration protected property Configuration information passed into the plugin. 1
PluginBase::$pluginDefinition protected property The plugin implementation definition. 1
PluginBase::$pluginId protected property The plugin_id.
PluginBase::DERIVATIVE_SEPARATOR constant A string which is used to separate base plugin IDs from the derivative ID.
PluginBase::getBaseId public function Gets the base_plugin_id of the plugin instance. Overrides DerivativeInspectionInterface::getBaseId
PluginBase::getDerivativeId public function Gets the derivative_id of the plugin instance. Overrides DerivativeInspectionInterface::getDerivativeId
PluginBase::getPluginDefinition public function Gets the definition of the plugin implementation. Overrides PluginInspectionInterface::getPluginDefinition 3
PluginBase::getPluginId public function Gets the plugin_id of the plugin instance. Overrides PluginInspectionInterface::getPluginId
PluginBase::isConfigurable public function Determines if the plugin is configurable.
RefinableCacheableDependencyTrait::addCacheableDependency public function 1
RefinableCacheableDependencyTrait::addCacheContexts public function
RefinableCacheableDependencyTrait::addCacheTags public function
RefinableCacheableDependencyTrait::mergeCacheMaxAge public function
SearchPluginBase::$keywords protected property The keywords to use in a search.
SearchPluginBase::$searchAttributes protected property Array of attributes - usually from the request object.
SearchPluginBase::$searchParameters protected property Array of parameters from the query string from the request.
SearchPluginBase::buildResults public function Executes the search and builds render arrays for the result items. Overrides SearchInterface::buildResults 1
SearchPluginBase::buildSearchUrlQuery public function Builds the URL GET query parameters array for search. Overrides SearchInterface::buildSearchUrlQuery 1
SearchPluginBase::getAttributes public function Returns the currently set attributes (from the request). Overrides SearchInterface::getAttributes
SearchPluginBase::getHelp public function Returns the searching help. Overrides SearchInterface::getHelp 1
SearchPluginBase::getKeywords public function Returns the currently set keywords of the plugin instance. Overrides SearchInterface::getKeywords
SearchPluginBase::getParameters public function Returns the current parameters set using setSearch(). Overrides SearchInterface::getParameters
SearchPluginBase::getType public function Returns the search index type this plugin uses. Overrides SearchInterface::getType 2
SearchPluginBase::isSearchExecutable public function Verifies if the values set via setSearch() are valid and sufficient. Overrides SearchInterface::isSearchExecutable 2
SearchPluginBase::searchFormAlter public function Alters the search form when being built for a given plugin. Overrides SearchInterface::searchFormAlter 1
SearchPluginBase::setSearch public function Sets the keywords, parameters, and attributes to be used by execute(). Overrides SearchInterface::setSearch 1
SearchPluginBase::suggestedTitle public function Provides a suggested title for a page of search results. Overrides SearchInterface::suggestedTitle
StringTranslationTrait::$stringTranslation protected property The string translation service. 1
StringTranslationTrait::formatPlural protected function Formats a string containing a count of items.
StringTranslationTrait::getNumberOfPlurals protected function Returns the number of plurals supported by a given language.
StringTranslationTrait::getStringTranslation protected function Gets the string translation service.
StringTranslationTrait::setStringTranslation public function Sets the string translation service to use. 2
StringTranslationTrait::t protected function Translates a string to the current language or to a given language.