View source
<?php
namespace Drupal\search_api_attachments\Plugin\search_api_attachments;
use Drupal\Core\Form\FormStateInterface;
use Drupal\search_api_attachments\TextExtractorPluginBase;
use Drupal\file\Entity\File;
class TikaExtractor extends TextExtractorPluginBase {
public function extract(File $file) {
$output = '';
$filepath = $this
->getRealpath($file
->getFileUri());
$tika = realpath($this->configuration['tika_path']);
$java = $this->configuration['java_path'];
$backup_locale = setlocale(LC_CTYPE, '0');
setlocale(LC_CTYPE, 'en_US.UTF-8');
$param = '';
if ($file
->getMimeType() != 'audio/mpeg') {
$param = ' -Dfile.encoding=UTF8 -cp ' . escapeshellarg($tika);
}
$param = ' -Djava.awt.headless=true ' . $param;
$cmd = $java . $param . ' -jar ' . escapeshellarg($tika) . ' -t ' . escapeshellarg($filepath);
if (strpos(ini_get('extension_dir'), 'MAMP/')) {
$cmd = 'export DYLD_LIBRARY_PATH=""; ' . $cmd;
}
setlocale(LC_CTYPE, $backup_locale);
shell_exec("LANG=en_US.utf-8");
$output = shell_exec($cmd);
if (is_null($output)) {
throw new \Exception('Tika Exctractor is not available.');
}
return $output;
}
public function buildConfigurationForm(array $form, FormStateInterface $form_state) {
$form['java_path'] = [
'#type' => 'textfield',
'#title' => $this
->t('Path to java executable'),
'#description' => $this
->t('Enter the path to java executable. Example: "java".'),
'#default_value' => $this->configuration['java_path'],
'#required' => TRUE,
];
$form['tika_path'] = [
'#type' => 'textfield',
'#title' => $this
->t('Path to Tika .jar file'),
'#description' => $this
->t('Enter the full path to tika executable jar file. Example: "/var/apache-tika/tika-app-1.8.jar".'),
'#default_value' => $this->configuration['tika_path'],
'#required' => TRUE,
];
return $form;
}
public function validateConfigurationForm(array &$form, FormStateInterface $form_state) {
$values = $form_state
->getValue([
'text_extractor_config',
]);
$java_path = $values['java_path'];
$tika_path = $values['tika_path'];
exec($java_path, $output, $return_code);
if ($return_code != 1) {
$form_state
->setError($form['text_extractor_config']['java_path'], $this
->t('Invalid path or filename %path for java executable.', [
'%path' => $java_path,
]));
return;
}
if (!file_exists($tika_path)) {
$form_state
->setError($form['text_extractor_config']['tika_path'], $this
->t('Invalid path or filename %path for tika application jar.', [
'%path' => $tika_path,
]));
}
else {
$cmd = $java_path . ' -jar ' . escapeshellarg($tika_path) . ' -V';
exec($cmd, $output, $return_code);
if ($return_code) {
$form_state
->setError($form['text_extractor_config']['tika_path'], $this
->t('Tika could not be reached and executed.'));
}
else {
$this
->getMessenger()
->addStatus(t('Tika can be reached and be executed'));
}
}
}
public function submitConfigurationForm(array &$form, FormStateInterface $form_state) {
$this->configuration['java_path'] = $form_state
->getValue([
'text_extractor_config',
'java_path',
]);
$this->configuration['tika_path'] = $form_state
->getValue([
'text_extractor_config',
'tika_path',
]);
parent::submitConfigurationForm($form, $form_state);
}
}