You are here

snowball_stemmer.module in Snowball Stemmer 8

Same filename and directory in other branches
  1. 2.x snowball_stemmer.module

Core hooks for Snowball Stemmer.

File

snowball_stemmer.module
View source
<?php

/**
 * @file
 * Core hooks for Snowball Stemmer.
 */
use Drupal\Core\Routing\RouteMatchInterface;

/**
 * Implements hook_help().
 */
function snowball_stemmer_help($route_name, RouteMatchInterface $route_match) {
  switch ($route_name) {

    // Main module help for the porterstemmer module.
    case 'help.page.snowball_stemmer':
      $output = '';
      $output .= '<h3>' . t('About') . '</h3>';
      $output .= '<p>' . t('<a href="https://en.wikipedia.org/wiki/Stemming">Stemmer</a> service built with <a href="https://github.com/wamania/php-stemmer">PHP Stemmer</a>, supporting: English, French, German, Italian, Spanish, Portuguese, Romanian, Dutch, Swedish, Norwegian, Danish. Includes core and <a href="https://drupal.org/project/search_api>Search API</a> processor integration.') . '</p>';
      return $output;
    default:
  }
}

/**
 * Implements hook_search_preprocess().
 */
function snowball_stemmer_search_preprocess($text, $langcode = NULL) {

  // If the language is not set, get it from the language manager.
  if (!isset($langcode)) {
    $langcode = \Drupal::languageManager()
      ->getCurrentLanguage()
      ->getId();
  }
  $stemmer = \Drupal::service('snowball_stemmer.stemmer');
  if ($stemmer
    ->setLanguage($langcode)) {

    // Core's tokenization occurs after this hook. It's HTML processing and
    // removal has already happened. We need to separate out
    // words to stem, but then return into context for tokenizing.
    $words = preg_split('/([^\\p{L}\\p{N}]+)+/u', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
    $stemmed = [];
    $is_word = !preg_match('/[^\\p{L}\\p{N}]/u', $words[0]);
    foreach ($words as $key => $word) {
      if ($is_word && strlen($word)) {
        $words[$key] = $stemmer
          ->stem($word);
      }
      $is_word = !$is_word;
    }
    $text = implode('', $words);
  }
  return $text;
}