View source
<?php
namespace Drupal\search_file_attachments\Plugin\Search;
use Drupal\Component\Utility\Html;
use Drupal\Core\Access\AccessResult;
use Drupal\Core\Config\Config;
use Drupal\Core\Database\Connection;
use Drupal\Core\Database\StatementInterface;
use Drupal\Core\Entity\EntityInterface;
use Drupal\Core\Entity\EntityManagerInterface;
use Drupal\Core\Language\LanguageManagerInterface;
use Drupal\Core\Session\AccountInterface;
use Drupal\Core\Access\AccessibleInterface;
use Drupal\search\Plugin\SearchPluginBase;
use Drupal\search\Plugin\SearchIndexingInterface;
use Drupal\Search\SearchQuery;
use Symfony\Component\DependencyInjection\ContainerInterface;
class FileSearch extends SearchPluginBase implements AccessibleInterface, SearchIndexingInterface {
protected $database;
protected $entityManager;
protected $searchSettings;
protected $moduleSettings;
protected $languageManager;
protected $account;
protected $rankings;
protected $includedMimetypes;
public static function create(ContainerInterface $container, array $configuration, $plugin_id, $plugin_definition) {
return new static($configuration, $plugin_id, $plugin_definition, $container
->get('database'), $container
->get('entity.manager'), $container
->get('config.factory')
->get('search.settings'), $container
->get('config.factory')
->get('search_file_attachments.settings'), $container
->get('language_manager'), $container
->get('current_user'));
}
public function __construct(array $configuration, $plugin_id, $plugin_definition, Connection $database, EntityManagerInterface $entity_manager, Config $search_settings, Config $module_settings, LanguageManagerInterface $language_manager, AccountInterface $account = NULL) {
$this->database = $database;
$this->entityManager = $entity_manager;
$this->searchSettings = $search_settings;
$this->moduleSettings = $module_settings;
$this->languageManager = $language_manager;
$this->account = $account;
$this
->setIncludedMimetypes();
parent::__construct($configuration, $plugin_id, $plugin_definition);
}
public function access($operation = 'view', AccountInterface $account = NULL, $return_as_object = FALSE) {
$result = AccessResult::allowedIfHasPermission($account, 'search files');
return $return_as_object ? $result : $result
->isAllowed();
}
public function execute() {
if ($this
->isSearchExecutable()) {
$results = $this
->findResults();
if ($results) {
return $this
->prepareResults($results);
}
}
return array();
}
protected function findResults() {
$keys = $this->keywords;
$query = $this->database
->select('search_index', 'i', array(
'target' => 'replica',
))
->extend('Drupal\\search\\SearchQuery')
->extend('Drupal\\Core\\Database\\Query\\PagerSelectExtender');
$query
->join('file_managed', 'f', 'f.fid = i.sid');
$query
->join('search_dataset', 'sd', 'sd.sid = i.sid AND sd.type = i.type');
$query
->searchExpression($keys, $this
->getPluginId());
$find = $query
->fields('i', array(
'langcode',
))
->fields('sd', array(
'data',
))
->groupBy('i.langcode')
->groupBy('sd.data')
->limit(10)
->execute();
$status = $query
->getStatus();
if ($status & SearchQuery::EXPRESSIONS_IGNORED) {
drupal_set_message($this
->t('Your search used too many AND/OR expressions. Only the first @count terms were included in this search.', array(
'@count' => $this->searchSettings
->get('and_or_limit'),
)), 'warning');
}
if ($status & SearchQuery::LOWER_CASE_OR) {
drupal_set_message($this
->t('Search for either of the two terms with uppercase <strong>OR</strong>. For example, <strong>cats OR dogs</strong>.'), 'warning');
}
if ($status & SearchQuery::NO_POSITIVE_KEYWORDS) {
drupal_set_message($this
->formatPlural($this->searchSettings
->get('index.minimum_word_size'), 'You must include at least one positive keyword with 1 character or more.', 'You must include at least one positive keyword with @count characters or more.'), 'warning');
}
return $find;
}
protected function prepareResults(StatementInterface $found) {
$results = array();
$file_storage = $this->entityManager
->getStorage('file');
$keys = $this->keywords;
foreach ($found as $item) {
$file = $file_storage
->load($item->sid)
->getTranslation($item->langcode);
$result = array(
'link' => file_create_url($file
->getFileUri()),
'title' => Html::escape($file
->getFilename()),
'snippet' => search_excerpt($keys, $item->data, $item->langcode),
'langcode' => $file
->language()
->getId(),
);
$results[] = $result;
}
return $results;
}
public function indexStatus() {
$total = $this->database
->query('SELECT COUNT(*) FROM {file_managed} WHERE status = 1')
->fetchField();
$remaining = $this->database
->query("SELECT COUNT(*) FROM {file_managed} f LEFT JOIN {search_dataset} sd ON sd.sid = f.fid AND sd.type = :type WHERE f.status = 1 AND sd.sid IS NULL OR sd.reindex <> 0", array(
':type' => $this
->getPluginId(),
))
->fetchField();
return array(
'remaining' => $remaining,
'total' => $total,
);
}
public function updateIndex() {
$limit = (int) $this->searchSettings
->get('index.cron_limit');
$result = $this->database
->queryRange("SELECT f.fid, MAX(sd.reindex) FROM {file_managed} f LEFT JOIN {search_dataset} sd ON sd.sid = f.fid AND sd.type = :type WHERE sd.sid IS NULL OR sd.reindex <> 0 GROUP BY f.fid ORDER BY MAX (sd.reindex) is null DESC, MAX (sd.reindex) ASC, f.fid ASC", 0, $limit, array(
':type' => $this
->getPluginId(),
), array(
'target' => 'replica',
));
$fids = $result
->fetchCol();
if (!$fids) {
return;
}
$file_storage = $this->entityManager
->getStorage('file');
foreach ($file_storage
->loadMultiple($fids) as $file) {
$this
->indexFile($file);
}
}
public function markForReindex() {
search_mark_for_reindex($this
->getPluginId());
}
public function indexClear() {
search_index_clear($this
->getPluginId());
}
protected function indexFile(EntityInterface $file) {
if (!in_array($file
->getMimeType(), $this->includedMimetypes)) {
return;
}
$languages = $file
->getTranslationLanguages();
foreach ($languages as $language) {
$translation_options = array(
'langcode' => $language
->getId(),
);
$content = $this
->t('Filename', array(), $translation_options) . ': ' . $file
->getFilename() . ' - ' . $this
->t('Content', array(), $translation_options) . ': ';
$extracted_content = SafeMarkup::checkPlain($this
->getFileContent($file));
$content .= $extracted_content;
search_index($this
->getPluginId(), $file
->id(), $language
->getId(), $content);
}
}
protected function getFileContent(EntityInterface $file) {
$file_path = file_create_url($file
->getFileUri());
$image_mimetypes = array(
'image/jpeg',
'image/jpg',
'image/tiff',
);
if ($file
->getMimeType() == 'text/plain' || $file
->getMimeType() == 'text/x-diff') {
$content = $this
->extractContentSimple($file, $file_path);
}
elseif (in_array($file
->getMimeType(), $image_mimetypes)) {
$content = $this
->extractContentExif($file, $file_path);
}
else {
$content = $this
->extractContentTika($file, $file_path);
}
return $content;
}
protected function extractContentSimple(EntityInterface $file, $file_path) {
$content = file_get_contents($file_path);
$content = iconv("UTF-8", "UTF-8//IGNORE", $content);
$content = htmlspecialchars(html_entity_decode($content, ENT_NOQUOTES, 'UTF-8'), ENT_NOQUOTES, 'UTF-8');
$content = trim($content);
return $content;
}
protected function extractContentExif(EntityInterface $file, $file_path) {
$content = '';
$size = getimagesize($file_path, $info);
if (isset($info['APP13'])) {
$iptc_raw = iptcparse($info['APP13']);
if (empty($iptc_raw)) {
return $content;
}
$tagmarker = $this
->getExifTagmarker();
$iptc = array();
foreach ($iptc_raw as $key => $value) {
if (array_key_exists($key, $tagmarker)) {
$iptc_field_value = array();
foreach ($value as $innerkey => $innervalue) {
$innervalue = trim($innervalue);
if (!empty($innervalue)) {
$iptc_field_value[] = $innervalue;
}
}
if (!empty($iptc_field_value)) {
$iptc[$tagmarker[$key]] = implode(', ', $iptc_field_value);
}
}
}
foreach ($iptc as $key => $value) {
$content .= " <strong>{$key}:</strong> {$value}";
}
$content = trim($content);
}
return $content;
}
protected function extractContentTika(EntityInterface $file, $file_path) {
$tika_path = realpath($this->moduleSettings
->get('tika.path'));
$tika = realpath($tika_path . '/' . $this->moduleSettings
->get('tika.jar'));
if (!$tika || !is_file($tika)) {
throw new Exception($this
->t('Invalid path or filename for tika application jar.'));
}
$backup_locale = setlocale(LC_CTYPE, '0');
setlocale(LC_CTYPE, 'en_US.UTF-8');
$java_service = \Drupal::service('search_file_attachments.java');
if ($this->moduleSettings
->get('java_path')) {
$java_service
->setJavaPath($this->moduleSettings
->get('java_path'));
}
$java_path = $java_service
->getJavaPath();
$param = '';
if ($file->filemime != 'audio/mpeg') {
$param = ' -Dfile.encoding=UTF8 -cp ' . escapeshellarg($tika_path);
}
if (DIRECTORY_SEPARATOR == '\\') {
$cmd = $java_path . $param . ' -jar "' . str_replace('"', '\\"', $tika) . '" -t "' . str_replace('"', '\\"', $file_path) . '"';
}
else {
$cmd = $java_path . $param . ' -jar ' . escapeshellarg($tika) . ' -t ' . escapeshellarg($file_path);
}
$cmd = "LANG=en_US.utf-8; {$cmd}";
setlocale(LC_CTYPE, $backup_locale);
if ($this->moduleSettings
->get('debug')) {
$result = shell_exec($cmd . ' 2>&1');
\Drupal::logger('search_file_attachments')
->notice('<p><strong>Tika Command:</strong> <code>%command</code></p><br /> <p><strong>Result:</strong> %result</p>', array(
'%command' => $cmd,
'%result' => $result,
));
if (strpos($result, 'Exception in thread') !== FALSE) {
$result = FALSE;
}
return $result;
}
return shell_exec($cmd);
}
protected function getIncludedMimetypes() {
return $this->includedMimetypes;
}
protected function setIncludedMimetypes() {
$mimetype_service = \Drupal::service('search_file_attachments.mimetype');
$included_filetypes = $this->moduleSettings
->get('files.include');
$this->includedMimetypes = $mimetype_service
->extensionsToMimetypes($included_filetypes);
}
protected function getExifTagmarker() {
$tagmarker = array(
'2#005' => t('Object Name'),
'2#015' => t('Category'),
'2#020' => t('Supplementals'),
'2#025' => t('Keywords'),
'2#040' => t('Special Instructions'),
'2#080' => t('By Line'),
'2#085' => t('By Line Title'),
'2#090' => t('City'),
'2#092' => t('Sublocation'),
'2#095' => t('Province State'),
'2#100' => t('Country Code'),
'2#101' => t('Country Name'),
'2#105' => t('Headline'),
'2#110' => t('Credits'),
'2#115' => t('Source'),
'2#116' => t('Copyright'),
'2#118' => t('Contact'),
'2#120' => t('Caption'),
'2#122' => t('Caption Writer'),
);
return \Drupal::moduleHandler()
->alter('search_file_attachments_exif_tagmarker', $tagmarker);
}
}