function RussianStemmer::stem_word in Russian stemming 7
Same name and namespace in other branches
- 6 rustemmer.module \RussianStemmer::stem_word()
File
- ./
RussianStemmer.php, line 39 - Definition of RussianStemmer.
Class
- RussianStemmer
- Implements Russian Stemming algorithm.
Code
function stem_word($word) {
$word = drupal_strtolower($word);
$word = str_replace('ё', 'е', $word);
// Check against cache of stemmed words
if ($this->Stem_Caching && isset($this->Stem_Cache[$word])) {
return $this->Stem_Cache[$word];
}
$stem = $word;
do {
if (!preg_match($this->RVRE, $word, $p)) {
break;
}
$start = $p[1];
$RV = $p[2];
if (!$RV) {
break;
}
// Step 1
if (!$this
->s($RV, $this->PERFECTIVEGROUND, '')) {
$this
->s($RV, $this->REFLEXIVE, '');
if ($this
->s($RV, $this->ADJECTIVE, '')) {
$this
->s($RV, $this->PARTICIPLE, '');
}
else {
if (!$this
->s($RV, $this->VERB, '')) {
$this
->s($RV, $this->NOUN, '');
}
}
}
// Step 2
$this
->s($RV, '/и$/u', '');
// Step 3
if ($this
->m($RV, $this->DERIVATIONAL)) {
$this
->s($RV, '/ость?$/u', '');
}
// Step 4
if (!$this
->s($RV, '/ь$/u', '')) {
$this
->s($RV, '/ейше?/u', '');
$this
->s($RV, '/нн$/u', 'н');
}
$stem = $start . $RV;
} while (FALSE);
if ($this->Stem_Caching) {
$this->Stem_Cache[$word] = $stem;
}
return $stem;
}