You are here

function RussianStemmer::stem_word in Russian stemming 6

Same name and namespace in other branches
  1. 7 RussianStemmer.php \RussianStemmer::stem_word()

File

./rustemmer.module, line 98
Russian stemming algorith provided by Dr Martin Porter

Class

RussianStemmer
Implements Russian Stemming algorithm.

Code

function stem_word($word) {
  $word = drupal_strtolower($word);
  $word = str_replace('ё', 'е', $word);

  // Check against cache of stemmed words
  if ($this->Stem_Caching && isset($this->Stem_Cache[$word])) {
    return $this->Stem_Cache[$word];
  }
  $stem = $word;
  do {
    if (!preg_match($this->RVRE, $word, $p)) {
      break;
    }
    $start = $p[1];
    $RV = $p[2];
    if (!$RV) {
      break;
    }

    // Step 1
    if (!$this
      ->s($RV, $this->PERFECTIVEGROUND, '')) {
      $this
        ->s($RV, $this->REFLEXIVE, '');
      if ($this
        ->s($RV, $this->ADJECTIVE, '')) {
        $this
          ->s($RV, $this->PARTICIPLE, '');
      }
      else {
        if (!$this
          ->s($RV, $this->VERB, '')) {
          $this
            ->s($RV, $this->NOUN, '');
        }
      }
    }

    // Step 2
    $this
      ->s($RV, '/и$/u', '');

    // Step 3
    if ($this
      ->m($RV, $this->DERIVATIONAL)) {
      $this
        ->s($RV, '/ость?$/u', '');
    }

    // Step 4
    if (!$this
      ->s($RV, '/ь$/u', '')) {
      $this
        ->s($RV, '/ейше?/u', '');
      $this
        ->s($RV, '/нн$/u', 'н');
    }
    $stem = $start . $RV;
  } while (FALSE);
  if ($this->Stem_Caching) {
    $this->Stem_Cache[$word] = $stem;
  }
  return $stem;
}