You are here

protected function SearchApiHighlight::createExcerpt in Search API 7

Returns snippets from a piece of text, with certain keywords highlighted.

Largely copied from search_excerpt().

Parameters

string $text: The text to extract fragments from.

array $keys: Search keywords entered by the user.

Return value

string|null A string containing HTML for the excerpt, or NULL if none could be created.

1 call to SearchApiHighlight::createExcerpt()
SearchApiHighlight::postprocessSearchResults in includes/processor_highlight.inc
Does nothing.

File

includes/processor_highlight.inc, line 322
Contains the SearchApiHighlight class.

Class

SearchApiHighlight
Processor for highlighting search results.

Code

protected function createExcerpt($text, array $keys) {

  // Prepare text by stripping HTML tags and decoding HTML entities.
  $text = strip_tags(str_replace(array(
    '<',
    '>',
  ), array(
    ' <',
    '> ',
  ), $text));
  $text = decode_entities($text);
  $text = preg_replace('/\\s+/', ' ', $text);
  $text = trim($text, ' ');
  $text_length = strlen($text);

  // Try to reach the requested excerpt length with about two fragments (each
  // with a keyword and some context).
  $ranges = array();
  $length = 0;
  $look_start = array();
  $remaining_keys = $keys;

  // Get the set excerpt length from the configuration. If the length is too
  // small, only use one fragment.
  $excerpt_length = $this->options['excerpt_length'];
  $context_length = round($excerpt_length / 4) - 3;
  if ($context_length < 32) {
    $context_length = round($excerpt_length / 2) - 1;
  }
  while ($length < $excerpt_length && !empty($remaining_keys)) {
    $found_keys = array();
    foreach ($remaining_keys as $key) {
      if ($length >= $excerpt_length) {
        break;
      }

      // Remember where we last found $key, in case we are coming through a
      // second time.
      if (!isset($look_start[$key])) {
        $look_start[$key] = 0;
      }

      // See if we can find $key after where we found it the last time. Since
      // we are requiring a match on a word boundary, make sure $text starts
      // and ends with a space.
      $matches = array();
      if (empty($this->options['highlight_partial'])) {
        $found_position = FALSE;
        $regex = '/' . static::$boundary . preg_quote($key, '/') . static::$boundary . '/iu';
        if (preg_match($regex, ' ' . $text . ' ', $matches, PREG_OFFSET_CAPTURE, $look_start[$key])) {
          $found_position = $matches[0][1];
        }
      }
      else {
        $found_position = stripos($text, $key, $look_start[$key]);
      }
      if ($found_position !== FALSE) {
        $look_start[$key] = $found_position + 1;

        // Keep track of which keys we found this time, in case we need to
        // pass through again to find more text.
        $found_keys[] = $key;

        // Locate a space before and after this match, leaving some context on
        // each end.
        if ($found_position > $context_length) {
          $before = strpos($text, ' ', $found_position - $context_length);
          if ($before !== FALSE) {
            ++$before;
          }
        }
        else {
          $before = 0;
        }
        if ($before !== FALSE && $before <= $found_position) {
          if ($text_length > $found_position + $context_length) {
            $after = strrpos(substr($text, 0, $found_position + $context_length), ' ', $found_position);
          }
          else {
            $after = $text_length;
          }
          if ($after !== FALSE && $after > $found_position) {
            if ($before < $after) {

              // Save this range.
              $ranges[$before] = $after;
              $length += $after - $before;
            }
          }
        }
      }
    }

    // Next time through this loop, only look for keys we found this time,
    // if any.
    $remaining_keys = $found_keys;
  }
  if (!$ranges) {

    // We didn't find any keyword matches, return NULL.
    return NULL;
  }

  // Sort the text ranges by starting position.
  ksort($ranges);

  // Collapse overlapping text ranges into one. The sorting makes it O(n).
  $newranges = array();
  $from1 = $to1 = NULL;
  foreach ($ranges as $from2 => $to2) {
    if ($from1 === NULL) {

      // This is the first time through this loop: initialize.
      $from1 = $from2;
      $to1 = $to2;
      continue;
    }
    if ($from2 <= $to1) {

      // The ranges overlap: combine them.
      $to1 = max($to1, $to2);
    }
    else {

      // The ranges do not overlap: save the working range and start a new
      // one.
      $newranges[$from1] = $to1;
      $from1 = $from2;
      $to1 = $to2;
    }
  }

  // Save the remaining working range.
  $newranges[$from1] = $to1;

  // Fetch text within the combined ranges we found.
  $out = array();
  foreach ($newranges as $from => $to) {
    $out[] = substr($text, $from, $to - $from);
  }
  if (!$out) {
    return NULL;
  }

  // Let translators have the ... separator text as one chunk.
  $dots = explode('!excerpt', t('... !excerpt ... !excerpt ...'));
  $text = (isset($newranges[0]) ? '' : $dots[0]) . implode($dots[1], $out) . $dots[2];
  $text = check_plain($text);

  // Since we stripped the tags at the beginning, highlighting doesn't need to
  // handle HTML anymore.
  return $this
    ->highlightField($text, $keys, FALSE);
}