You are here

function porterstemmer_sbp_excerpt_match in Porter-Stemmer 7

Same name and namespace in other branches
  1. 6.2 porterstemmer.module \porterstemmer_sbp_excerpt_match()

Implements hook_sbp_excerpt_match().

Allows Porter Stemmer to display better search excerpts with the Search by page module.

1 call to porterstemmer_sbp_excerpt_match()
PorterStemmerInternalsUnitTest::testExcerpts in ./porterstemmer.test
Tests the excerpt function porterstemmer_sbp_excerpt_match().

File

./porterstemmer.module, line 78
Porter 2 Stemming for Drupal.

Code

function porterstemmer_sbp_excerpt_match($key, $text, $offset, $boundary) {

  // We do not check for the PECL stem here as further parsing logic is needed.
  module_load_include('inc', 'porterstemmer', 'includes/standard-stemmer');

  // Stem the keyword down to its root form.
  $key = porterstemmer_stem($key);

  // In many cases, the root word is a substring of the full word, but not
  // all. The cases where it is not, the root ends in e, i, or y, and if this
  // last letter is removed, the root is a substring of the full word.
  // So remove these letters at the end of the root.
  $didit = FALSE;
  porterstemmer_suffix($key, 'i', '', $didit, NULL, 2) or porterstemmer_suffix($key, 'e', '', $didit, NULL, 2) or porterstemmer_suffix($key, 'y', '', $didit, NULL, 2);

  // Look for this modified key at the start of a word.
  $match = array();
  if (!preg_match('/' . $boundary . '(' . $key . ')/iu', $text, $match, PREG_OFFSET_CAPTURE, $offset)) {

    // Didn't match our modified key.
    return FALSE;
  }

  // If we get here, we have a potential match. Find the end of the word we
  // actually matched, so it can be highlighted (making sure it's a real match
  // for our key).
  $newmatch = array();
  $pos = $match[1][1];

  // Note: Do not use drupal_strlen/drupal_substr here! Need the real PHP
  // string lengths/pos.
  if (preg_match('/' . $boundary . '/iu', $text, $newmatch, PREG_OFFSET_CAPTURE, $pos + strlen($key))) {
    $keyfound = substr($text, $pos, $newmatch[0][1] - $pos);
  }
  else {

    // Assume we're going to the end of the string.
    $keyfound = substr($text, $pos);
  }
  $foundstem = porterstemmer_stem($keyfound);
  porterstemmer_suffix($foundstem, 'i', '', $didit, NULL, 2) or porterstemmer_suffix($foundstem, 'e', '', $didit, NULL, 2) or porterstemmer_suffix($foundstem, 'y', '', $didit, NULL, 2);

  // Both $foundstem and $key may contain upper case.
  if (drupal_strtolower($foundstem) == drupal_strtolower($key)) {
    return array(
      'where' => $pos,
      'keyword' => $keyfound,
    );
  }

  // If we get here, then it was a false match, and we should probably
  // search again later in the string.
  return porterstemmer_sbp_excerpt_match($key, $text, $pos + strlen($keyfound), $boundary);
}