function search_excerpt in Drupal 4
Same name and namespace in other branches
- 8 core/modules/search/search.module \search_excerpt()
- 5 modules/search/search.module \search_excerpt()
- 6 modules/search/search.module \search_excerpt()
- 7 modules/search/search.module \search_excerpt()
- 9 core/modules/search/search.module \search_excerpt()
- 10 core/modules/search/search.module \search_excerpt()
Returns snippets from a piece of text, with certain keywords highlighted. Used for formatting search results.
Parameters
$keys: A string containing a search query.
$text: The text to extract fragments from.
Return value
A string containing HTML for the excerpt.
Related topics
1 call to search_excerpt()
- node_search in modules/
node.module - Implementation of hook_search().
File
- modules/
search.module, line 1097 - Enables site-wide keyword searching.
Code
function search_excerpt($keys, $text) {
// We highlight around non-indexable or CJK characters.
$boundary = '(?:(?<=[' . PREG_CLASS_SEARCH_EXCLUDE . PREG_CLASS_CJK . '])|(?=[' . PREG_CLASS_SEARCH_EXCLUDE . PREG_CLASS_CJK . ']))';
// Extract positive keywords and phrases
preg_match_all('/ ("([^"]+)"|(?!OR)([^" ]+))/', ' ' . $keys, $matches);
$keys = array_merge($matches[2], $matches[3]);
// Prepare text
$text = ' ' . strip_tags(str_replace(array(
'<',
'>',
), array(
' <',
'> ',
), $text)) . ' ';
array_walk($keys, '_search_excerpt_replace');
$workkeys = $keys;
// Extract a fragment per keyword for at most 4 keywords.
// First we collect ranges of text around each keyword, starting/ending
// at spaces.
// If the sum of all fragments is too short, we look for second occurrences.
$ranges = array();
$included = array();
$length = 0;
while ($length < 256 && count($workkeys)) {
foreach ($workkeys as $k => $key) {
if (strlen($key) == 0) {
unset($workkeys[$k]);
unset($keys[$k]);
continue;
}
if ($length >= 256) {
break;
}
// Remember occurrence of key so we can skip over it if more occurrences
// are desired.
if (!isset($included[$key])) {
$included[$key] = 0;
}
// Locate a keyword (position $p), then locate a space in front (position
// $q) and behind it (position $s)
if (preg_match('/' . $boundary . $key . $boundary . '/iu', $text, $match, PREG_OFFSET_CAPTURE, $included[$key])) {
$p = $match[0][1];
if (($q = strpos($text, ' ', max(0, $p - 60))) !== false) {
$end = substr($text, $p, 80);
if (($s = strrpos($end, ' ')) !== false) {
$ranges[$q] = $p + $s;
$length += $p + $s - $q;
$included[$key] = $p + 1;
}
else {
unset($workkeys[$k]);
}
}
else {
unset($workkeys[$k]);
}
}
else {
unset($workkeys[$k]);
}
}
}
// If we didn't find anything, return the beginning.
if (count($ranges) == 0) {
return truncate_utf8($text, 256) . ' ...';
}
// Sort the text ranges by starting position.
ksort($ranges);
// Now we collapse overlapping text ranges into one. The sorting makes it O(n).
$newranges = array();
foreach ($ranges as $from2 => $to2) {
if (!isset($from1)) {
$from1 = $from2;
$to1 = $to2;
continue;
}
if ($from2 <= $to1) {
$to1 = max($to1, $to2);
}
else {
$newranges[$from1] = $to1;
$from1 = $from2;
$to1 = $to2;
}
}
$newranges[$from1] = $to1;
// Fetch text
$out = array();
foreach ($newranges as $from => $to) {
$out[] = substr($text, $from, $to - $from);
}
$text = (isset($newranges[0]) ? '' : '... ') . implode(' ... ', $out) . ' ...';
// Highlight keywords. Must be done at once to prevent conflicts ('strong' and '<strong>').
$text = preg_replace('/' . $boundary . '(' . implode('|', $keys) . ')' . $boundary . '/iu', '<strong>\\0</strong>', $text);
return $text;
}