protected function SearchApiHighlight::createExcerpt in Search API 7
Returns snippets from a piece of text, with certain keywords highlighted.
Largely copied from search_excerpt().
Parameters
string $text: The text to extract fragments from.
array $keys: Search keywords entered by the user.
Return value
string|null A string containing HTML for the excerpt, or NULL if none could be created.
1 call to SearchApiHighlight::createExcerpt()
- SearchApiHighlight::postprocessSearchResults in includes/
processor_highlight.inc - Does nothing.
File
- includes/
processor_highlight.inc, line 322 - Contains the SearchApiHighlight class.
Class
- SearchApiHighlight
- Processor for highlighting search results.
Code
protected function createExcerpt($text, array $keys) {
// Prepare text by stripping HTML tags and decoding HTML entities.
$text = strip_tags(str_replace(array(
'<',
'>',
), array(
' <',
'> ',
), $text));
$text = decode_entities($text);
$text = preg_replace('/\\s+/', ' ', $text);
$text = trim($text, ' ');
$text_length = strlen($text);
// Try to reach the requested excerpt length with about two fragments (each
// with a keyword and some context).
$ranges = array();
$length = 0;
$look_start = array();
$remaining_keys = $keys;
// Get the set excerpt length from the configuration. If the length is too
// small, only use one fragment.
$excerpt_length = $this->options['excerpt_length'];
$context_length = round($excerpt_length / 4) - 3;
if ($context_length < 32) {
$context_length = round($excerpt_length / 2) - 1;
}
while ($length < $excerpt_length && !empty($remaining_keys)) {
$found_keys = array();
foreach ($remaining_keys as $key) {
if ($length >= $excerpt_length) {
break;
}
// Remember where we last found $key, in case we are coming through a
// second time.
if (!isset($look_start[$key])) {
$look_start[$key] = 0;
}
// See if we can find $key after where we found it the last time. Since
// we are requiring a match on a word boundary, make sure $text starts
// and ends with a space.
$matches = array();
if (empty($this->options['highlight_partial'])) {
$found_position = FALSE;
$regex = '/' . static::$boundary . preg_quote($key, '/') . static::$boundary . '/iu';
if (preg_match($regex, ' ' . $text . ' ', $matches, PREG_OFFSET_CAPTURE, $look_start[$key])) {
$found_position = $matches[0][1];
}
}
else {
$found_position = stripos($text, $key, $look_start[$key]);
}
if ($found_position !== FALSE) {
$look_start[$key] = $found_position + 1;
// Keep track of which keys we found this time, in case we need to
// pass through again to find more text.
$found_keys[] = $key;
// Locate a space before and after this match, leaving some context on
// each end.
if ($found_position > $context_length) {
$before = strpos($text, ' ', $found_position - $context_length);
if ($before !== FALSE) {
++$before;
}
}
else {
$before = 0;
}
if ($before !== FALSE && $before <= $found_position) {
if ($text_length > $found_position + $context_length) {
$after = strrpos(substr($text, 0, $found_position + $context_length), ' ', $found_position);
}
else {
$after = $text_length;
}
if ($after !== FALSE && $after > $found_position) {
if ($before < $after) {
// Save this range.
$ranges[$before] = $after;
$length += $after - $before;
}
}
}
}
}
// Next time through this loop, only look for keys we found this time,
// if any.
$remaining_keys = $found_keys;
}
if (!$ranges) {
// We didn't find any keyword matches, return NULL.
return NULL;
}
// Sort the text ranges by starting position.
ksort($ranges);
// Collapse overlapping text ranges into one. The sorting makes it O(n).
$newranges = array();
$from1 = $to1 = NULL;
foreach ($ranges as $from2 => $to2) {
if ($from1 === NULL) {
// This is the first time through this loop: initialize.
$from1 = $from2;
$to1 = $to2;
continue;
}
if ($from2 <= $to1) {
// The ranges overlap: combine them.
$to1 = max($to1, $to2);
}
else {
// The ranges do not overlap: save the working range and start a new
// one.
$newranges[$from1] = $to1;
$from1 = $from2;
$to1 = $to2;
}
}
// Save the remaining working range.
$newranges[$from1] = $to1;
// Fetch text within the combined ranges we found.
$out = array();
foreach ($newranges as $from => $to) {
$out[] = substr($text, $from, $to - $from);
}
if (!$out) {
return NULL;
}
// Let translators have the ... separator text as one chunk.
$dots = explode('!excerpt', t('... !excerpt ... !excerpt ...'));
$text = (isset($newranges[0]) ? '' : $dots[0]) . implode($dots[1], $out) . $dots[2];
$text = check_plain($text);
// Since we stripped the tags at the beginning, highlighting doesn't need to
// handle HTML anymore.
return $this
->highlightField($text, $keys, FALSE);
}