You are here

Stopwords.php in Search API 8

File

src/Plugin/search_api/processor/Stopwords.php
View source
<?php

namespace Drupal\search_api\Plugin\search_api\processor;

use Drupal\Core\Form\FormStateInterface;
use Drupal\search_api\Processor\FieldsProcessorPluginBase;
use Drupal\search_api\Query\QueryInterface;

/**
 * Allows you to define stopwords which will be ignored in searches.
 *
 * @SearchApiProcessor(
 *   id = "stopwords",
 *   label = @Translation("Stopwords"),
 *   description = @Translation("Allows you to define stopwords which will be ignored in searches. <strong>Caution:</strong> Only use after both 'Ignore case' and 'Tokenizer' have run."),
 *   stages = {
 *     "pre_index_save" = 0,
 *     "preprocess_index" = -5,
 *     "preprocess_query" = -2,
 *   }
 * )
 */
class Stopwords extends FieldsProcessorPluginBase {

  /**
   * Holds all words ignored for the last query.
   *
   * @var string[]
   */
  protected $ignored = [];

  /**
   * An array whose keys and values are the stopwords set for this processor.
   *
   * @var string[]
   */
  protected $stopwords;

  /**
   * {@inheritdoc}
   */
  public function defaultConfiguration() {
    $configuration = parent::defaultConfiguration();
    $configuration += [
      'stopwords' => [
        'a',
        'an',
        'and',
        'are',
        'as',
        'at',
        'be',
        'but',
        'by',
        'for',
        'if',
        'in',
        'into',
        'is',
        'it',
        'no',
        'not',
        'of',
        'on',
        'or',
        's',
        'such',
        't',
        'that',
        'the',
        'their',
        'then',
        'there',
        'these',
        'they',
        'this',
        'to',
        'was',
        'will',
        'with',
      ],
    ];
    return $configuration;
  }

  /**
   * {@inheritdoc}
   */
  public function setConfiguration(array $configuration) {
    parent::setConfiguration($configuration);
    unset($this->stopwords);
  }

  /**
   * {@inheritdoc}
   */
  public function buildConfigurationForm(array $form, FormStateInterface $form_state) {
    $form = parent::buildConfigurationForm($form, $form_state);
    $stopwords = $this
      ->getConfiguration()['stopwords'];
    if (is_array($stopwords)) {
      $default_value = implode("\n", $stopwords);
    }
    else {
      $default_value = $stopwords;
    }
    $description = $this
      ->t('Enter a list of stopwords, each on a separate line, that will be removed from content before it is indexed and from search terms before searching. <a href=":url">More info about stopwords.</a>.', [
      ':url' => 'https://en.wikipedia.org/wiki/Stop_words',
    ]);
    $form['stopwords'] = [
      '#type' => 'textarea',
      '#title' => $this
        ->t('Stopwords'),
      '#description' => $description,
      '#default_value' => $default_value,
    ];
    return $form;
  }

  /**
   * {@inheritdoc}
   */
  public function submitConfigurationForm(array &$form, FormStateInterface $form_state) {

    // Convert our text input to an array.
    $stopwords = $form_state
      ->getValue('stopwords', '');
    $stopwords = explode("\n", $stopwords);
    $stopwords = array_map('trim', $stopwords);
    $stopwords = array_filter($stopwords, 'strlen');
    $form_state
      ->setValue('stopwords', $stopwords);
    parent::submitConfigurationForm($form, $form_state);
  }

  /**
   * {@inheritdoc}
   */
  public function preprocessSearchQuery(QueryInterface $query) {
    $this->ignored = [];
    parent::preprocessSearchQuery($query);
    $results = $query
      ->getResults();
    foreach ($this->ignored as $ignored_search_key) {
      $results
        ->addIgnoredSearchKey($ignored_search_key);
    }
  }

  /**
   * {@inheritdoc}
   */
  protected function testType($type) {
    return $this
      ->getDataTypeHelper()
      ->isTextType($type);
  }

  /**
   * {@inheritdoc}
   */
  protected function process(&$value) {
    $stopwords = $this
      ->getStopWords();
    if (empty($stopwords)) {
      return;
    }
    $value = trim($value);
    if (isset($stopwords[$value])) {
      $this->ignored[$value] = $value;
      $value = '';
    }
  }

  /**
   * Gets the stopwords for this processor.
   *
   * @return string[]
   *   An array whose keys and values are the stopwords set for this processor.
   */
  protected function getStopWords() {
    if (!isset($this->stopwords)) {
      $this->stopwords = array_combine($this->configuration['stopwords'], $this->configuration['stopwords']);
    }
    return $this->stopwords;
  }

}

Classes

Namesort descending Description
Stopwords Allows you to define stopwords which will be ignored in searches.