You are here

class ExtractedText in Search API attachments 8

Same name and namespace in other branches
  1. 9.0.x src/Plugin/Field/FieldFormatter/ExtractedText.php \Drupal\search_api_attachments\Plugin\Field\FieldFormatter\ExtractedText

File formatter displaying text extracted form attachment document.

Plugin annotation


@FieldFormatter(
  id = "file_extracted_text",
  label = @Translation("Text extracted from attachment"),
  field_types = {
    "file"
  }
)

Hierarchy

Expanded class hierarchy of ExtractedText

File

src/Plugin/Field/FieldFormatter/ExtractedText.php, line 31

Namespace

Drupal\search_api_attachments\Plugin\Field\FieldFormatter
View source
class ExtractedText extends FileFormatterBase implements ContainerFactoryPluginInterface {

  /**
   * Files extractor config.
   *
   * @var \Drupal\Core\Config\Config
   */
  protected $config;

  /**
   * Module handler service.
   *
   * @var \Drupal\Core\Extension\ModuleHandlerInterface
   */
  protected $moduleHandler;

  /**
   * Search API Processor Plugin Manager.
   *
   * @var \Drupal\search_api\Processor\ProcessorPluginManager
   */
  protected $processorPluginManager;

  /**
   * Search API Attachments Text Extractor Plugin Manager.
   *
   * @var \Drupal\search_api_attachments\TextExtractorPluginManager
   */
  protected $textExtractorPluginManager;

  /**
   * FilesExtractor processor plugin.
   *
   * @var \Drupal\search_api_attachments\Plugin\search_api\processor\FilesExtractor
   */
  protected $extractor;

  /**
   * Extraction plugin.
   *
   * @var \Drupal\search_api_attachments\TextExtractorPluginInterface
   */
  protected $extractionMethod;

  /**
   * The extract file validator service.
   *
   * @var \Drupal\search_api_attachments\ExtractFileValidator
   */
  protected $extractFileValidator;

  /**
   * ExtractedText constructor.
   *
   * @param string $pluginId
   *   The plugin id.
   * @param mixed $pluginDefinition
   *   The plugin definition.
   * @param \Drupal\Core\Field\FieldDefinitionInterface $fieldDefinition
   *   The field definitions.
   * @param array $settings
   *   The settings.
   * @param string $label
   *   The label.
   * @param string $viewMode
   *   The view mode.
   * @param array $thirdPartySettings
   *   The third party settings.
   * @param \Drupal\Core\Extension\ModuleHandlerInterface $moduleHandler
   *   The module handler.
   * @param \Drupal\search_api\Processor\ProcessorPluginManager $processorPluginManager
   *   The processor plugin manager.
   * @param \Drupal\search_api_attachments\TextExtractorPluginManager $textExtractorPluginManager
   *   The text extractor plugin manager.
   * @param \Drupal\Core\Config\Config $config
   *   The configuration.
   * @param \Drupal\search_api_attachments\ExtractFileValidator $extractFileValidator
   *   The extract file validator.
   */
  public function __construct($pluginId, $pluginDefinition, FieldDefinitionInterface $fieldDefinition, array $settings, $label, $viewMode, array $thirdPartySettings, ModuleHandlerInterface $moduleHandler, ProcessorPluginManager $processorPluginManager, TextExtractorPluginManager $textExtractorPluginManager, Config $config, ExtractFileValidator $extractFileValidator) {
    parent::__construct($pluginId, $pluginDefinition, $fieldDefinition, $settings, $label, $viewMode, $thirdPartySettings);
    $this->moduleHandler = $moduleHandler;
    $this->processorPluginManager = $processorPluginManager;
    $this->textExtractorPluginManager = $textExtractorPluginManager;
    $this->config = $config;
    $this->extractFileValidator = $extractFileValidator;
    $extractorPluginId = $this->config
      ->get('extraction_method');
    $configuration = $this->config
      ->get($extractorPluginId . '_configuration');
    $this->extractionMethod = $this->textExtractorPluginManager
      ->createInstance($extractorPluginId, $configuration);
    $this->extractor = $this->processorPluginManager
      ->createInstance('file_attachments');
  }

  /**
   * {@inheritdoc}
   */
  public static function create(ContainerInterface $container, array $configuration, $plugin_id, $pluginDefinition) {
    return new static($plugin_id, $pluginDefinition, $configuration['field_definition'], $configuration['settings'], $configuration['label'], $configuration['view_mode'], $configuration['third_party_settings'], $container
      ->get('module_handler'), $container
      ->get('plugin.manager.search_api.processor'), $container
      ->get('plugin.manager.search_api_attachments.text_extractor'), $container
      ->get('config.factory')
      ->get(FilesExtractor::CONFIGNAME), $container
      ->get('search_api_attachments.extract_file_validator'));
  }

  /**
   * {@inheritdoc}
   */
  public function viewElements(FieldItemListInterface $items, $langcode) {
    $elements = [];
    $host_entity = $items
      ->getParent()
      ->getValue();
    foreach ($this
      ->getEntitiesToView($items, $langcode) as $delta => $file) {
      if ($contents = $this
        ->extractFileContents($host_entity, $file)) {
        $elements[$delta] = [
          '#markup' => $contents,
          '#cache' => [
            'tags' => $file
              ->getCacheTags(),
          ],
        ];
      }
    }
    return $elements;
  }

  /**
   * Extracts content of given file.
   *
   * @param \Drupal\Core\Entity\EntityInterface $entity
   *   The entity the file is attached to.
   * @param \Drupal\file\Entity\File $file
   *   A file object.
   *
   * @return string|null
   *   Content of the file or NULL if type of file is not supported.
   */
  protected function extractFileContents(EntityInterface $entity, File $file) {
    if ($this
      ->isFileIndexable($file)) {
      return $this->extractor
        ->extractOrGetFromCache($entity, $file, $this->extractionMethod);
    }
    return NULL;
  }

  /**
   * Check if the file is allowed to be indexed.
   *
   * @param object $file
   *   A file object.
   *
   * @return bool
   *   TRUE or FALSE
   */
  protected function isFileIndexable($file) {

    // This method is a copy of
    // Drupal\search_api_attachments\Plugin\search_api\processor\FilesExtractor::isFileIndexable()
    // and differs mostly in the signature. Unfortunately it can't be used here
    // as it requires second argument of type
    // \Drupal\search_api\Item\ItemInterface.
    // File should exist in disc.
    $indexable = file_exists($file
      ->getFileUri());
    if (!$indexable) {
      return FALSE;
    }

    // File should have a mime type that is allowed.
    $excluded_extensions_array = explode(' ', $this
      ->getSetting('excluded_extensions'));
    $all_excluded_mimes = $this->extractFileValidator
      ->getExcludedMimes($excluded_extensions_array);
    $indexable = $indexable && !in_array($file
      ->getMimeType(), $all_excluded_mimes);
    if (!$indexable) {
      return FALSE;
    }

    // File permanent.
    $indexable = $indexable && $file
      ->isPermanent();
    if (!$indexable) {
      return FALSE;
    }

    // File shouldn't exceed configured file size.
    $max_filesize = $this
      ->getSetting('max_filesize');
    $indexable = $indexable && $this->extractFileValidator
      ->isFileSizeAllowed($file, $max_filesize);
    if (!$indexable) {
      return FALSE;
    }

    // Whether a private file can be indexed or not.
    $excluded_private = $this
      ->getSetting('excluded_private');
    $indexable = $indexable && $this->extractFileValidator
      ->isPrivateFileAllowed($file, $excluded_private);
    if (!$indexable) {
      return FALSE;
    }
    $result = $this->moduleHandler
      ->invokeAll('search_api_attachments_indexable', [
      $file,
    ]);
    $indexable = !in_array(FALSE, $result, TRUE);
    return $indexable;
  }

  /**
   * {@inheritdoc}
   */
  public static function defaultSettings() {
    return [
      'excluded_extensions' => ExtractFileValidator::DEFAULT_EXCLUDED_EXTENSIONS,
      'max_filesize' => '0',
      'excluded_private' => TRUE,
    ] + parent::defaultSettings();
  }

  /**
   * {@inheritdoc}
   */
  public function settingsForm(array $form, FormStateInterface $form_state) {
    $form['excluded_extensions'] = [
      '#type' => 'textfield',
      '#title' => $this
        ->t('Excluded file extensions'),
      '#default_value' => $this
        ->getSetting('excluded_extensions'),
      '#size' => 80,
      '#maxlength' => 255,
      '#description' => $this
        ->t('File extensions that are excluded from indexing. Separate extensions with a space and do not include the leading dot.<br />Example: "aif art avi bmp gif ico mov oga ogv png psd ra ram rgb flv"<br />Extensions are internally mapped to a MIME type, so it is not necessary to put variations that map to the same type (e.g. tif is sufficient for tif and tiff)'),
    ];
    $form['max_filesize'] = [
      '#type' => 'textfield',
      '#title' => $this
        ->t('Maximum upload size'),
      '#default_value' => $this
        ->getSetting('max_filesize'),
      '#description' => $this
        ->t('Enter a value like "10 KB", "10 MB" or "10 GB" in order to restrict the max file size of files that should be indexed.<br /> Enter "0" for no limit restriction.'),
      '#size' => 10,
    ];
    $form['excluded_private'] = [
      '#type' => 'checkbox',
      '#title' => $this
        ->t('Exclude private files'),
      '#default_value' => $this
        ->getSetting('excluded_private'),
      '#description' => $this
        ->t('Check this box if you want to exclude private files from being indexed.'),
    ];
    return $form;
  }

  /**
   * {@inheritdoc}
   */
  public function settingsSummary() {
    $summary = [];
    $summary[] = $this
      ->t('Excluded file extensions: @extensions', [
      '@extensions' => $this
        ->getSetting('excluded_extensions'),
    ]);
    $summary[] = $this
      ->t('Maximum upload size: @maxsize', [
      '@maxsize' => $this
        ->getSetting('max_filesize'),
    ]);
    $isexcluded = $this
      ->getSetting('excluded_private') ? 'true' : 'false';
    $summary[] = $this
      ->t('Exclude private files: @isexcluded', [
      '@isexcluded' => $isexcluded,
    ]);
    return $summary;
  }

}

Members

Namesort descending Modifiers Type Description Overrides
DependencySerializationTrait::$_entityStorages protected property An array of entity type IDs keyed by the property name of their storages.
DependencySerializationTrait::$_serviceIds protected property An array of service IDs keyed by property name used for serialization.
DependencySerializationTrait::__sleep public function 1
DependencySerializationTrait::__wakeup public function 2
EntityReferenceFormatterBase::getEntitiesToView protected function Returns the referenced entities for display. 1
EntityReferenceFormatterBase::prepareView public function Loads the entities referenced in that field across all the entities being viewed. Overrides FormatterBase::prepareView
EntityReferenceFormatterBase::view public function Overrides FormatterBase::view
ExtractedText::$config protected property Files extractor config.
ExtractedText::$extractFileValidator protected property The extract file validator service.
ExtractedText::$extractionMethod protected property Extraction plugin.
ExtractedText::$extractor protected property FilesExtractor processor plugin.
ExtractedText::$moduleHandler protected property Module handler service.
ExtractedText::$processorPluginManager protected property Search API Processor Plugin Manager.
ExtractedText::$textExtractorPluginManager protected property Search API Attachments Text Extractor Plugin Manager.
ExtractedText::create public static function Creates an instance of the plugin. Overrides FormatterBase::create
ExtractedText::defaultSettings public static function Defines the default settings for this plugin. Overrides PluginSettingsBase::defaultSettings
ExtractedText::extractFileContents protected function Extracts content of given file.
ExtractedText::isFileIndexable protected function Check if the file is allowed to be indexed.
ExtractedText::settingsForm public function Returns a form to configure settings for the formatter. Overrides FormatterBase::settingsForm
ExtractedText::settingsSummary public function Returns a short summary for the current formatter settings. Overrides FormatterBase::settingsSummary
ExtractedText::viewElements public function Builds a renderable array for a field value. Overrides FormatterInterface::viewElements
ExtractedText::__construct public function ExtractedText constructor. Overrides FormatterBase::__construct
FileFormatterBase::checkAccess protected function Checks access to the given entity. Overrides EntityReferenceFormatterBase::checkAccess
FileFormatterBase::needsEntityLoad protected function Returns whether the entity referenced by an item needs to be loaded. Overrides EntityReferenceFormatterBase::needsEntityLoad 1
FormatterBase::$fieldDefinition protected property The field definition.
FormatterBase::$label protected property The label display setting.
FormatterBase::$settings protected property The formatter settings. Overrides PluginSettingsBase::$settings
FormatterBase::$viewMode protected property The view mode.
FormatterBase::getFieldSetting protected function Returns the value of a field setting.
FormatterBase::getFieldSettings protected function Returns the array of field settings.
FormatterBase::isApplicable public static function Returns if the formatter can be used for the provided field. Overrides FormatterInterface::isApplicable 14
MessengerTrait::$messenger protected property The messenger. 29
MessengerTrait::messenger public function Gets the messenger. 29
MessengerTrait::setMessenger public function Sets the messenger.
PluginBase::$configuration protected property Configuration information passed into the plugin. 1
PluginBase::$pluginDefinition protected property The plugin implementation definition. 1
PluginBase::$pluginId protected property The plugin_id.
PluginBase::DERIVATIVE_SEPARATOR constant A string which is used to separate base plugin IDs from the derivative ID.
PluginBase::getBaseId public function Gets the base_plugin_id of the plugin instance. Overrides DerivativeInspectionInterface::getBaseId
PluginBase::getDerivativeId public function Gets the derivative_id of the plugin instance. Overrides DerivativeInspectionInterface::getDerivativeId
PluginBase::getPluginDefinition public function Gets the definition of the plugin implementation. Overrides PluginInspectionInterface::getPluginDefinition 3
PluginBase::getPluginId public function Gets the plugin_id of the plugin instance. Overrides PluginInspectionInterface::getPluginId
PluginBase::isConfigurable public function Determines if the plugin is configurable.
PluginSettingsBase::$defaultSettingsMerged protected property Whether default settings have been merged into the current $settings.
PluginSettingsBase::$thirdPartySettings protected property The plugin settings injected by third party modules.
PluginSettingsBase::calculateDependencies public function Calculates dependencies for the configured plugin. Overrides DependentPluginInterface::calculateDependencies 6
PluginSettingsBase::getSetting public function Returns the value of a setting, or its default value if absent. Overrides PluginSettingsInterface::getSetting
PluginSettingsBase::getSettings public function Returns the array of settings, including defaults for missing settings. Overrides PluginSettingsInterface::getSettings
PluginSettingsBase::getThirdPartyProviders public function Gets the list of third parties that store information. Overrides ThirdPartySettingsInterface::getThirdPartyProviders
PluginSettingsBase::getThirdPartySetting public function Gets the value of a third-party setting. Overrides ThirdPartySettingsInterface::getThirdPartySetting
PluginSettingsBase::getThirdPartySettings public function Gets all third-party settings of a given module. Overrides ThirdPartySettingsInterface::getThirdPartySettings
PluginSettingsBase::mergeDefaults protected function Merges default settings values into $settings.
PluginSettingsBase::onDependencyRemoval public function Informs the plugin that some configuration it depends on will be deleted. Overrides PluginSettingsInterface::onDependencyRemoval 3
PluginSettingsBase::setSetting public function Sets the value of a setting for the plugin. Overrides PluginSettingsInterface::setSetting
PluginSettingsBase::setSettings public function Sets the settings for the plugin. Overrides PluginSettingsInterface::setSettings
PluginSettingsBase::setThirdPartySetting public function Sets the value of a third-party setting. Overrides ThirdPartySettingsInterface::setThirdPartySetting
PluginSettingsBase::unsetThirdPartySetting public function Unsets a third-party setting. Overrides ThirdPartySettingsInterface::unsetThirdPartySetting
StringTranslationTrait::$stringTranslation protected property The string translation service. 1
StringTranslationTrait::formatPlural protected function Formats a string containing a count of items.
StringTranslationTrait::getNumberOfPlurals protected function Returns the number of plurals supported by a given language.
StringTranslationTrait::getStringTranslation protected function Gets the string translation service.
StringTranslationTrait::setStringTranslation public function Sets the string translation service to use. 2
StringTranslationTrait::t protected function Translates a string to the current language or to a given language.