You are here

porterstemmer.module in Porter-Stemmer 8

File

porterstemmer.module
View source
<?php

/**
 * @file
 * Contains porterstemmer.module.
 */
use Drupal\Core\Routing\RouteMatchInterface;
use Drupal\porterstemmer\Porter2;

/**
 * Regular expression defining a word boundary for Porter Stemmer.
 *
 * A word boundary is anything not a letter or an apostrophe.
 */
define('PORTERSTEMMER_BOUNDARY', "[^a-zA-Z']+");

/**
 * Implements hook_help().
 */
function porterstemmer_help($route_name, RouteMatchInterface $route_match) {
  switch ($route_name) {

    // Main module help for the porterstemmer module.
    case 'help.page.porterstemmer':
      $output = '';
      $output .= '<h3>' . t('About') . '</h3>';
      $output .= '<p>' . t('Improves American English language searching by simplifying related words to their root (conjugations, plurals, ...).') . '</p>';
      return $output;
    default:
  }
}

/**
 * Implements hook_search_preprocess().
 *
 * Stems the words in $text, using the Porter 2 (English) stemming algorithm.
 */
function porterstemmer_search_preprocess($text, $langcode = NULL) {

  // If the language is not set, get it from the language manager.
  if (!isset($langcode)) {
    $langcode = \Drupal::languageManager()
      ->getCurrentLanguage()
      ->getId();
  }
  if ($langcode == 'en') {

    // Convert text to lower case, and replace special apostrophes with regular
    // apostrophes.
    $text = strtolower(str_replace('’', "'", $text));

    // Split into words.
    $words = preg_split('/(' . PORTERSTEMMER_BOUNDARY . '+)/', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
    if (!count($words)) {
      return $text;
    }
    $has_pecl_stem = _porterstemmer_pecl_loaded();

    // Process each word, skipping delimiters.
    $isword = !preg_match('/' . PORTERSTEMMER_BOUNDARY . '/', $words[0]);
    foreach ($words as $k => $word) {
      if ($isword) {
        if ($has_pecl_stem) {
          $words[$k] = stem_english($word);
        }
        else {
          $words[$k] = Porter2::stem($word);
        }
      }
      $isword = !$isword;
    }

    // Put it all back together (note that delimiters are in $words).
    return implode('', $words);
  }
  return $text;
}

/**
 * Checks to see if the PECL stem extension has been loaded.
 *
 * @return bool
 *   TRUE if the stem_english() function from the PECL stem library can be
 *   used, FALSE if not.
 */
function _porterstemmer_pecl_loaded() {
  static $has_pecl_stem = FALSE;
  static $already_checked = FALSE;
  if ($already_checked) {
    return $has_pecl_stem;
  }
  $has_pecl_stem = extension_loaded('stem') && function_exists('stem_english');
  $already_checked = TRUE;
  return $has_pecl_stem;
}

Functions

Namesort descending Description
porterstemmer_help Implements hook_help().
porterstemmer_search_preprocess Implements hook_search_preprocess().
_porterstemmer_pecl_loaded Checks to see if the PECL stem extension has been loaded.

Constants

Namesort descending Description
PORTERSTEMMER_BOUNDARY Regular expression defining a word boundary for Porter Stemmer.