You are here

class TikaServerExtractor in Search API attachments 8

Same name and namespace in other branches
  1. 9.0.x src/Plugin/search_api_attachments/TikaServerExtractor.php \Drupal\search_api_attachments\Plugin\search_api_attachments\TikaServerExtractor

Provides tika server extractor.

Plugin annotation


@SearchApiAttachmentsTextExtractor(
  id = "tika_server_extractor",
  label = @Translation("Tika JAX-RS Server Extractor"),
  description = @Translation("Adds Tika JAX-RS server extractor support."),
)

Hierarchy

Expanded class hierarchy of TikaServerExtractor

File

src/Plugin/search_api_attachments/TikaServerExtractor.php, line 25

Namespace

Drupal\search_api_attachments\Plugin\search_api_attachments
View source
class TikaServerExtractor extends TextExtractorPluginBase {

  /**
   * The HTTP client.
   *
   * @var \GuzzleHttp\Client
   */
  protected $httpClient;

  /**
   * {@inheritdoc}
   */
  public function __construct(array $configuration, $plugin_id, array $plugin_definition, ConfigFactoryInterface $config_factory, StreamWrapperManagerInterface $stream_wrapper_manager, MimeTypeGuesserInterface $mime_type_guesser, MessengerInterface $messenger, FileSystemInterface $file_system, ClientInterface $http_client) {
    parent::__construct($configuration, $plugin_id, $plugin_definition, $config_factory, $stream_wrapper_manager, $mime_type_guesser, $messenger, $file_system);
    $this->httpClient = $http_client;
  }

  /**
   * {@inheritdoc}
   */
  public static function create(ContainerInterface $container, array $configuration, $plugin_id, $plugin_definition) {
    return new static($configuration, $plugin_id, $plugin_definition, $container
      ->get('config.factory'), $container
      ->get('stream_wrapper_manager'), $container
      ->get('file.mime_type.guesser'), $container
      ->get('messenger'), $container
      ->get('file_system'), $container
      ->get('http_client'));
  }

  /**
   * Extract file with a Tika JAX-RS Server.
   *
   * @param \Drupal\file\Entity\File $file
   *   A file object.
   *
   * @return string
   *   The text extracted from the file.
   *
   * @throws \GuzzleHttp\Exception\GuzzleException
   */
  public function extract(File $file) {
    $data = NULL;
    $options = [
      'timeout' => $this->configuration['timeout'],
      'body' => fopen($file
        ->getFileUri(), 'r'),
      'headers' => [
        'Accept' => 'text/plain',
      ],
    ];
    $response = $this->httpClient
      ->request('PUT', $this
      ->getServerUri() . '/tika', $options);
    if ($response
      ->getStatusCode() === 200) {
      $data = (string) $response
        ->getBody();
    }
    else {
      throw new \Exception('Tika JAX-RS Server is not available.');
    }
    return $data;
  }

  /**
   * Returns the Tika server URI from the current config.
   *
   * @return string
   *   The full Tika server URI.
   */
  protected function getServerUri() {
    return $this->configuration['scheme'] . '://' . $this->configuration['host'] . ':' . $this->configuration['port'];
  }

  /**
   * {@inheritdoc}
   */
  public function buildConfigurationForm(array $form, FormStateInterface $form_state) {
    $form['scheme'] = [
      '#type' => 'select',
      '#title' => $this
        ->t('HTTP protocol'),
      '#description' => $this
        ->t('The HTTP protocol to use for sending queries.'),
      '#default_value' => isset($this->configuration['scheme']) ? $this->configuration['scheme'] : 'http',
      '#options' => [
        'http' => 'http',
        'https' => 'https',
      ],
    ];
    $form['host'] = [
      '#type' => 'textfield',
      '#title' => $this
        ->t('Tika server host'),
      '#description' => $this
        ->t('The host name or IP of your Tika server, e.g. <code>localhost</code> or <code>www.example.com</code>.'),
      '#default_value' => isset($this->configuration['host']) ? $this->configuration['host'] : 'localhost',
      '#required' => TRUE,
    ];
    $form['port'] = [
      '#type' => 'textfield',
      '#title' => $this
        ->t('Tika server port'),
      '#description' => $this
        ->t('The default port is 9998.'),
      '#default_value' => isset($this->configuration['port']) ? $this->configuration['port'] : '9998',
      '#required' => TRUE,
    ];
    $form['timeout'] = [
      '#type' => 'number',
      '#min' => 1,
      '#max' => 180,
      '#title' => $this
        ->t('Query timeout'),
      '#description' => $this
        ->t('The timeout in seconds for queries sent to the Tika server.'),
      '#default_value' => isset($this->configuration['timeout']) ? $this->configuration['timeout'] : 5,
      '#required' => TRUE,
    ];
    return $form;
  }

  /**
   * {@inheritdoc}
   */
  public function validateConfigurationForm(array &$form, FormStateInterface $form_state) {
    $values = $form_state
      ->getValues();
    if (isset($values['text_extractor_config']['port'])) {
      $port = $values['text_extractor_config']['port'];
      if (!is_numeric($port) || $port < 0 || $port > 65535) {
        $form_state
          ->setError($form['text_extractor_config']['port'], $this
          ->t('The port has to be an integer between 0 and 65535.'));
      }
    }
  }

  /**
   * {@inheritdoc}
   */
  public function submitConfigurationForm(array &$form, FormStateInterface $form_state) {
    $this->configuration['scheme'] = $form_state
      ->getValue([
      'text_extractor_config',
      'scheme',
    ]);
    $this->configuration['host'] = $form_state
      ->getValue([
      'text_extractor_config',
      'host',
    ]);
    $this->configuration['port'] = $form_state
      ->getValue([
      'text_extractor_config',
      'port',
    ]);
    $this->configuration['timeout'] = $form_state
      ->getValue([
      'text_extractor_config',
      'timeout',
    ]);
    parent::submitConfigurationForm($form, $form_state);
  }

}

Members

Namesort descending Modifiers Type Description Overrides
DependencySerializationTrait::$_entityStorages protected property An array of entity type IDs keyed by the property name of their storages.
DependencySerializationTrait::$_serviceIds protected property An array of service IDs keyed by property name used for serialization.
DependencySerializationTrait::__sleep public function 1
DependencySerializationTrait::__wakeup public function 2
MessengerTrait::messenger public function Gets the messenger. 29
MessengerTrait::setMessenger public function Sets the messenger.
PluginBase::$configuration protected property Configuration information passed into the plugin. 1
PluginBase::$pluginDefinition protected property The plugin implementation definition. 1
PluginBase::$pluginId protected property The plugin_id.
PluginBase::DERIVATIVE_SEPARATOR constant A string which is used to separate base plugin IDs from the derivative ID.
PluginBase::getBaseId public function Gets the base_plugin_id of the plugin instance. Overrides DerivativeInspectionInterface::getBaseId
PluginBase::getDerivativeId public function Gets the derivative_id of the plugin instance. Overrides DerivativeInspectionInterface::getDerivativeId
PluginBase::getPluginDefinition public function Gets the definition of the plugin implementation. Overrides PluginInspectionInterface::getPluginDefinition 3
PluginBase::getPluginId public function Gets the plugin_id of the plugin instance. Overrides PluginInspectionInterface::getPluginId
PluginBase::isConfigurable public function Determines if the plugin is configurable.
StringTranslationTrait::$stringTranslation protected property The string translation service. 1
StringTranslationTrait::formatPlural protected function Formats a string containing a count of items.
StringTranslationTrait::getNumberOfPlurals protected function Returns the number of plurals supported by a given language.
StringTranslationTrait::getStringTranslation protected function Gets the string translation service.
StringTranslationTrait::setStringTranslation public function Sets the string translation service to use. 2
StringTranslationTrait::t protected function Translates a string to the current language or to a given language.
TextExtractorPluginBase::$configFactory protected property Config factory service.
TextExtractorPluginBase::$messenger protected property The messenger. Overrides MessengerTrait::$messenger
TextExtractorPluginBase::$mimeTypeGuesser protected property Mime type guesser service.
TextExtractorPluginBase::$streamWrapperManager protected property Stream wrapper manager service.
TextExtractorPluginBase::calculateDependencies public function
TextExtractorPluginBase::CONFIGNAME constant Name of the config being edited.
TextExtractorPluginBase::defaultConfiguration public function Gets default configuration for this plugin. Overrides ConfigurableInterface::defaultConfiguration
TextExtractorPluginBase::getConfiguration public function Gets this plugin's configuration. Overrides ConfigurableInterface::getConfiguration
TextExtractorPluginBase::getmessenger public function
TextExtractorPluginBase::getPdfMimeTypes public function Helper method to get the PDF MIME types.
TextExtractorPluginBase::getRealpath public function Helper method to get the real path from an uri.
TextExtractorPluginBase::setConfiguration public function Sets the configuration for this plugin instance. Overrides ConfigurableInterface::setConfiguration
TikaServerExtractor::$httpClient protected property The HTTP client.
TikaServerExtractor::buildConfigurationForm public function Form constructor. Overrides PluginFormInterface::buildConfigurationForm
TikaServerExtractor::create public static function Creates an instance of the plugin. Overrides TextExtractorPluginBase::create
TikaServerExtractor::extract public function Extract file with a Tika JAX-RS Server. Overrides TextExtractorPluginBase::extract
TikaServerExtractor::getServerUri protected function Returns the Tika server URI from the current config.
TikaServerExtractor::submitConfigurationForm public function Form submission handler. Overrides TextExtractorPluginBase::submitConfigurationForm
TikaServerExtractor::validateConfigurationForm public function Form validation handler. Overrides TextExtractorPluginBase::validateConfigurationForm
TikaServerExtractor::__construct public function Constructs a \Drupal\Component\Plugin\PluginBase object. Overrides TextExtractorPluginBase::__construct