class TikaServerExtractor in Search API attachments 9.0.x
Same name and namespace in other branches
- 8 src/Plugin/search_api_attachments/TikaServerExtractor.php \Drupal\search_api_attachments\Plugin\search_api_attachments\TikaServerExtractor
Provides tika server extractor.
Plugin annotation
@SearchApiAttachmentsTextExtractor(
id = "tika_server_extractor",
label = @Translation("Tika JAX-RS Server Extractor"),
description = @Translation("Adds Tika JAX-RS server extractor support."),
)
Hierarchy
- class \Drupal\Component\Plugin\PluginBase implements DerivativeInspectionInterface, PluginInspectionInterface
- class \Drupal\Core\Plugin\PluginBase uses DependencySerializationTrait, MessengerTrait, StringTranslationTrait
- class \Drupal\search_api_attachments\TextExtractorPluginBase implements ContainerFactoryPluginInterface, TextExtractorPluginInterface
- class \Drupal\search_api_attachments\Plugin\search_api_attachments\TikaServerExtractor
- class \Drupal\search_api_attachments\TextExtractorPluginBase implements ContainerFactoryPluginInterface, TextExtractorPluginInterface
- class \Drupal\Core\Plugin\PluginBase uses DependencySerializationTrait, MessengerTrait, StringTranslationTrait
Expanded class hierarchy of TikaServerExtractor
File
- src/
Plugin/ search_api_attachments/ TikaServerExtractor.php, line 25
Namespace
Drupal\search_api_attachments\Plugin\search_api_attachmentsView source
class TikaServerExtractor extends TextExtractorPluginBase {
/**
* The HTTP client.
*
* @var \GuzzleHttp\Client
*/
protected $httpClient;
/**
* {@inheritdoc}
*/
public function __construct(array $configuration, $plugin_id, array $plugin_definition, ConfigFactoryInterface $config_factory, StreamWrapperManagerInterface $stream_wrapper_manager, MimeTypeGuesserInterface $mime_type_guesser, MessengerInterface $messenger, FileSystemInterface $file_system, ClientInterface $http_client) {
parent::__construct($configuration, $plugin_id, $plugin_definition, $config_factory, $stream_wrapper_manager, $mime_type_guesser, $messenger, $file_system);
$this->httpClient = $http_client;
}
/**
* {@inheritdoc}
*/
public static function create(ContainerInterface $container, array $configuration, $plugin_id, $plugin_definition) {
return new static($configuration, $plugin_id, $plugin_definition, $container
->get('config.factory'), $container
->get('stream_wrapper_manager'), $container
->get('file.mime_type.guesser'), $container
->get('messenger'), $container
->get('file_system'), $container
->get('http_client'));
}
/**
* Extract file with a Tika JAX-RS Server.
*
* @param \Drupal\file\Entity\File $file
* A file object.
*
* @return string
* The text extracted from the file.
*
* @throws \GuzzleHttp\Exception\GuzzleException
*/
public function extract(File $file) {
$data = NULL;
$options = [
'timeout' => $this->configuration['timeout'],
'body' => fopen($file
->getFileUri(), 'r'),
'headers' => [
'Accept' => 'text/plain',
],
];
$response = $this->httpClient
->request('PUT', $this
->getServerUri() . '/tika', $options);
if ($response
->getStatusCode() === 200) {
$data = (string) $response
->getBody();
}
else {
throw new \Exception('Tika JAX-RS Server is not available.');
}
return $data;
}
/**
* Returns the Tika server URI from the current config.
*
* @return string
* The full Tika server URI.
*/
protected function getServerUri() {
return $this->configuration['scheme'] . '://' . $this->configuration['host'] . ':' . $this->configuration['port'];
}
/**
* {@inheritdoc}
*/
public function buildConfigurationForm(array $form, FormStateInterface $form_state) {
$form['scheme'] = [
'#type' => 'select',
'#title' => $this
->t('HTTP protocol'),
'#description' => $this
->t('The HTTP protocol to use for sending queries.'),
'#default_value' => isset($this->configuration['scheme']) ? $this->configuration['scheme'] : 'http',
'#options' => [
'http' => 'http',
'https' => 'https',
],
];
$form['host'] = [
'#type' => 'textfield',
'#title' => $this
->t('Tika server host'),
'#description' => $this
->t('The host name or IP of your Tika server, e.g. <code>localhost</code> or <code>www.example.com</code>.'),
'#default_value' => isset($this->configuration['host']) ? $this->configuration['host'] : 'localhost',
'#required' => TRUE,
];
$form['port'] = [
'#type' => 'textfield',
'#title' => $this
->t('Tika server port'),
'#description' => $this
->t('The default port is 9998.'),
'#default_value' => isset($this->configuration['port']) ? $this->configuration['port'] : '9998',
'#required' => TRUE,
];
$form['timeout'] = [
'#type' => 'number',
'#min' => 1,
'#max' => 180,
'#title' => $this
->t('Query timeout'),
'#description' => $this
->t('The timeout in seconds for queries sent to the Tika server.'),
'#default_value' => isset($this->configuration['timeout']) ? $this->configuration['timeout'] : 5,
'#required' => TRUE,
];
return $form;
}
/**
* {@inheritdoc}
*/
public function validateConfigurationForm(array &$form, FormStateInterface $form_state) {
$values = $form_state
->getValues();
if (isset($values['text_extractor_config']['port'])) {
$port = $values['text_extractor_config']['port'];
if (!is_numeric($port) || $port < 0 || $port > 65535) {
$form_state
->setError($form['text_extractor_config']['port'], $this
->t('The port has to be an integer between 0 and 65535.'));
}
}
}
/**
* {@inheritdoc}
*/
public function submitConfigurationForm(array &$form, FormStateInterface $form_state) {
$this->configuration['scheme'] = $form_state
->getValue([
'text_extractor_config',
'scheme',
]);
$this->configuration['host'] = $form_state
->getValue([
'text_extractor_config',
'host',
]);
$this->configuration['port'] = $form_state
->getValue([
'text_extractor_config',
'port',
]);
$this->configuration['timeout'] = $form_state
->getValue([
'text_extractor_config',
'timeout',
]);
parent::submitConfigurationForm($form, $form_state);
}
}
Members
Name | Modifiers | Type | Description | Overrides |
---|---|---|---|---|
DependencySerializationTrait:: |
protected | property | ||
DependencySerializationTrait:: |
protected | property | ||
DependencySerializationTrait:: |
public | function | 2 | |
DependencySerializationTrait:: |
public | function | 2 | |
MessengerTrait:: |
public | function | Gets the messenger. | 27 |
MessengerTrait:: |
public | function | Sets the messenger. | |
PluginBase:: |
protected | property | Configuration information passed into the plugin. | 1 |
PluginBase:: |
protected | property | The plugin implementation definition. | 1 |
PluginBase:: |
protected | property | The plugin_id. | |
PluginBase:: |
constant | A string which is used to separate base plugin IDs from the derivative ID. | ||
PluginBase:: |
public | function |
Gets the base_plugin_id of the plugin instance. Overrides DerivativeInspectionInterface:: |
|
PluginBase:: |
public | function |
Gets the derivative_id of the plugin instance. Overrides DerivativeInspectionInterface:: |
|
PluginBase:: |
public | function |
Gets the definition of the plugin implementation. Overrides PluginInspectionInterface:: |
2 |
PluginBase:: |
public | function |
Gets the plugin_id of the plugin instance. Overrides PluginInspectionInterface:: |
|
PluginBase:: |
public | function | Determines if the plugin is configurable. | |
StringTranslationTrait:: |
protected | property | The string translation service. | 4 |
StringTranslationTrait:: |
protected | function | Formats a string containing a count of items. | |
StringTranslationTrait:: |
protected | function | Returns the number of plurals supported by a given language. | |
StringTranslationTrait:: |
protected | function | Gets the string translation service. | |
StringTranslationTrait:: |
public | function | Sets the string translation service to use. | 2 |
StringTranslationTrait:: |
protected | function | Translates a string to the current language or to a given language. | |
TextExtractorPluginBase:: |
protected | property | Config factory service. | |
TextExtractorPluginBase:: |
protected | property |
The messenger. Overrides MessengerTrait:: |
|
TextExtractorPluginBase:: |
protected | property | Mime type guesser service. | |
TextExtractorPluginBase:: |
protected | property | Stream wrapper manager service. | |
TextExtractorPluginBase:: |
public | function | ||
TextExtractorPluginBase:: |
constant | Name of the config being edited. | ||
TextExtractorPluginBase:: |
public | function |
Gets default configuration for this plugin. Overrides ConfigurableInterface:: |
|
TextExtractorPluginBase:: |
public | function |
Gets this plugin's configuration. Overrides ConfigurableInterface:: |
|
TextExtractorPluginBase:: |
public | function | ||
TextExtractorPluginBase:: |
public | function | Helper method to get the PDF MIME types. | |
TextExtractorPluginBase:: |
public | function | Helper method to get the real path from an uri. | |
TextExtractorPluginBase:: |
public | function |
Sets the configuration for this plugin instance. Overrides ConfigurableInterface:: |
|
TikaServerExtractor:: |
protected | property | The HTTP client. | |
TikaServerExtractor:: |
public | function |
Form constructor. Overrides PluginFormInterface:: |
|
TikaServerExtractor:: |
public static | function |
Creates an instance of the plugin. Overrides TextExtractorPluginBase:: |
|
TikaServerExtractor:: |
public | function |
Extract file with a Tika JAX-RS Server. Overrides TextExtractorPluginBase:: |
|
TikaServerExtractor:: |
protected | function | Returns the Tika server URI from the current config. | |
TikaServerExtractor:: |
public | function |
Form submission handler. Overrides TextExtractorPluginBase:: |
|
TikaServerExtractor:: |
public | function |
Form validation handler. Overrides TextExtractorPluginBase:: |
|
TikaServerExtractor:: |
public | function |
Constructs a \Drupal\Component\Plugin\PluginBase object. Overrides TextExtractorPluginBase:: |