function search_excerpt in Drupal 9
Same name and namespace in other branches
- 8 core/modules/search/search.module \search_excerpt()
- 4 modules/search.module \search_excerpt()
- 5 modules/search/search.module \search_excerpt()
- 6 modules/search/search.module \search_excerpt()
- 7 modules/search/search.module \search_excerpt()
Returns snippets from a piece of text, with search keywords highlighted.
Used for formatting search results. All HTML tags will be stripped from $text.
Parameters
string $keys: A string containing a search query.
string $text: The text to extract fragments from.
string|null $langcode: Language code for the language of $text, if known.
Return value
array A render array containing HTML for the excerpt.
Related topics
3 calls to search_excerpt()
- HelpSearch::prepareResults in core/
modules/ help_topics/ src/ Plugin/ Search/ HelpSearch.php - Prepares search results for display.
- NodeSearch::prepareResults in core/
modules/ node/ src/ Plugin/ Search/ NodeSearch.php - Prepares search results for rendering.
- SearchExcerptTest::doSearchExcerpt in core/
modules/ search/ tests/ src/ Kernel/ SearchExcerptTest.php - Calls search_excerpt() and renders output.
File
- core/
modules/ search/ search.module, line 301 - Enables site-wide keyword searching.
Code
function search_excerpt($keys, $text, $langcode = NULL) {
// We highlight around non-indexable or CJK characters.
$boundary_character = '[' . Unicode::PREG_CLASS_WORD_BOUNDARY . PREG_CLASS_CJK . ']';
$preceded_by_boundary = '(?<=' . $boundary_character . ')';
$followed_by_boundary = '(?=' . $boundary_character . ')';
// Extract positive keywords and phrases.
preg_match_all('/ ("([^"]+)"|(?!OR)([^" ]+))/', ' ' . $keys, $matches);
$keys = array_merge($matches[2], $matches[3]);
// Prepare text by stripping HTML tags and decoding HTML entities.
$text = strip_tags(str_replace([
'<',
'>',
], [
' <',
'> ',
], $text));
$text = Html::decodeEntities($text);
$text_length = strlen($text);
// Make a list of unique keywords that are actually found in the text,
// which could be items in $keys or replacements that are equivalent through
// \Drupal\search\SearchTextProcessorInterface::analyze().
$temp_keys = [];
foreach ($keys as $key) {
$key = _search_find_match_with_simplify($key, $text, $boundary_character, $langcode);
if (isset($key)) {
// Quote slashes so they can be used in regular expressions.
$temp_keys[] = preg_quote($key, '/');
}
}
// Several keywords could have simplified down to the same thing, so pick
// out the unique ones.
$keys = array_unique($temp_keys);
// Extract fragments of about 60 characters around keywords, bounded by word
// boundary characters. Try to reach 256 characters, using second occurrences
// if necessary.
$ranges = [];
$length = 0;
$look_start = [];
$remaining_keys = $keys;
while ($length < 256 && !empty($remaining_keys)) {
$found_keys = [];
foreach ($remaining_keys as $key) {
if ($length >= 256) {
break;
}
// Remember where we last found $key, in case we are coming through a
// second time.
if (!isset($look_start[$key])) {
$look_start[$key] = 0;
}
// See if we can find $key after where we found it the last time. Since
// we are requiring a match on a word boundary, make sure $text starts
// and ends with a space.
$matches = [];
if (preg_match('/' . $preceded_by_boundary . $key . $followed_by_boundary . '/iu', ' ' . $text . ' ', $matches, PREG_OFFSET_CAPTURE, $look_start[$key])) {
$found_position = $matches[0][1];
$look_start[$key] = $found_position + 1;
// Keep track of which keys we found this time, in case we need to
// pass through again to find more text.
$found_keys[] = $key;
// Locate a space before and after this match, leaving about 60
// characters of context on each end.
$before = strpos(' ' . $text, ' ', max(0, $found_position - 61));
if ($before !== FALSE && $before <= $found_position) {
if ($text_length > $found_position + 60) {
$after = strrpos(substr($text, 0, $found_position + 60), ' ', $found_position);
}
else {
$after = $text_length;
}
if ($after !== FALSE && $after > $found_position) {
// Account for the spaces we added.
$before = max($before - 1, 0);
if ($before < $after) {
// Save this range.
$ranges[$before] = $after;
$length += $after - $before;
}
}
}
}
}
// Next time through this loop, only look for keys we found this time,
// if any.
$remaining_keys = $found_keys;
}
if (empty($ranges)) {
// We didn't find any keyword matches, so just return the first part of the
// text. We also need to re-encode any HTML special characters that we
// entity-decoded above.
return [
'#plain_text' => Unicode::truncate($text, 256, TRUE, TRUE),
];
}
// Sort the text ranges by starting position.
ksort($ranges);
// Collapse overlapping text ranges into one. The sorting makes it O(n).
$new_ranges = [];
$max_end = 0;
foreach ($ranges as $this_from => $this_to) {
$max_end = max($max_end, $this_to);
if (!isset($working_from)) {
// This is the first time through this loop: initialize.
$working_from = $this_from;
$working_to = $this_to;
continue;
}
if ($this_from <= $working_to) {
// The ranges overlap: combine them.
$working_to = max($working_to, $this_to);
}
else {
// The ranges do not overlap: save the working range and start a new one.
$new_ranges[$working_from] = $working_to;
$working_from = $this_from;
$working_to = $this_to;
}
}
// Save the remaining working range.
$new_ranges[$working_from] = $working_to;
// Fetch text within the combined ranges we found.
$out = [];
foreach ($new_ranges as $from => $to) {
$out[] = substr($text, $from, $to - $from);
}
// Combine the text chunks with "…" separators. The "…" needs to be
// translated. Let translators have the … separator text as one chunk.
$ellipses = explode('@excerpt', t('… @excerpt … @excerpt …'));
$text = (isset($new_ranges[0]) ? '' : $ellipses[0]) . implode($ellipses[1], $out) . ($max_end < strlen($text) - 1 ? $ellipses[2] : '');
$text = Html::escape($text);
// Highlight keywords. Must be done at once to prevent conflicts ('strong'
// and '<strong>').
$text = trim(preg_replace('/' . $preceded_by_boundary . '(?:' . implode('|', $keys) . ')' . $followed_by_boundary . '/iu', '<strong>\\0</strong>', ' ' . $text . ' '));
return [
'#markup' => $text,
'#allowed_tags' => [
'strong',
],
];
}