View source
<?php
namespace Drupal\search_api\Plugin\search_api\processor;
use Drupal\Component\Utility\Html;
use Drupal\Component\Utility\Unicode;
use Drupal\Core\Form\FormStateInterface;
use Drupal\Core\Plugin\PluginFormInterface;
use Drupal\Core\Render\Element;
use Drupal\search_api\LoggerTrait;
use Drupal\search_api\Plugin\PluginFormTrait;
use Drupal\search_api\Processor\ProcessorPluginBase;
use Drupal\search_api\Query\QueryInterface;
use Drupal\search_api\Query\ResultSetInterface;
use Drupal\search_api\Utility\DataTypeHelperInterface;
class Highlight extends ProcessorPluginBase implements PluginFormInterface {
use LoggerTrait;
use PluginFormTrait;
protected static $boundary;
protected static $split;
protected $dataTypeHelper;
public function __construct(array $configuration, $plugin_id, array $plugin_definition) {
parent::__construct($configuration, $plugin_id, $plugin_definition);
if (!isset(static::$boundary)) {
$cjk = '\\x{1100}-\\x{11FF}\\x{3040}-\\x{309F}\\x{30A1}-\\x{318E}' . '\\x{31A0}-\\x{31B7}\\x{31F0}-\\x{31FF}\\x{3400}-\\x{4DBF}\\x{4E00}-\\x{9FCF}' . '\\x{A000}-\\x{A48F}\\x{A4D0}-\\x{A4FD}\\x{A960}-\\x{A97F}\\x{AC00}-\\x{D7FF}' . '\\x{F900}-\\x{FAFF}\\x{FF21}-\\x{FF3A}\\x{FF41}-\\x{FF5A}\\x{FF66}-\\x{FFDC}' . '\\x{20000}-\\x{2FFFD}\\x{30000}-\\x{3FFFD}';
static::$boundary = '(?:(?<=[' . Unicode::PREG_CLASS_WORD_BOUNDARY . $cjk . '])|(?=[' . Unicode::PREG_CLASS_WORD_BOUNDARY . $cjk . ']))';
static::$split = '/[' . Unicode::PREG_CLASS_WORD_BOUNDARY . ']+/iu';
}
}
public function getDataTypeHelper() {
return $this->dataTypeHelper ?: \Drupal::service('search_api.data_type_helper');
}
public function setDataTypeHelper(DataTypeHelperInterface $data_type_helper) {
$this->dataTypeHelper = $data_type_helper;
return $this;
}
public function preIndexSave() {
parent::preIndexSave();
if (empty($this->configuration['exclude_fields'])) {
return;
}
$renames = $this->index
->getFieldRenames();
$selected_fields = array_flip($this->configuration['exclude_fields']);
$renames = array_intersect_key($renames, $selected_fields);
if ($renames) {
$new_fields = array_keys(array_diff_key($selected_fields, $renames));
$new_fields = array_merge($new_fields, array_values($renames));
$this->configuration['exclude_fields'] = $new_fields;
}
}
public function defaultConfiguration() {
return [
'prefix' => '<strong>',
'suffix' => '</strong>',
'excerpt' => TRUE,
'excerpt_length' => 256,
'highlight' => 'always',
'highlight_partial' => FALSE,
'exclude_fields' => [],
];
}
public function buildConfigurationForm(array $form, FormStateInterface $form_state) {
$parent_name = 'processors[highlight][settings]';
if (!empty($form['#parents'])) {
$parents = $form['#parents'];
$parent_name = $root = array_shift($parents);
if ($parents) {
$parent_name = $root . '[' . implode('][', $parents) . ']';
}
}
$form['highlight'] = [
'#type' => 'select',
'#title' => $this
->t('Highlight returned field data'),
'#description' => $this
->t('Select whether returned fields should be highlighted.'),
'#options' => [
'always' => $this
->t('Always'),
'server' => $this
->t('If the server returns fields'),
'never' => $this
->t('Never'),
],
'#default_value' => $this->configuration['highlight'],
];
$form['highlight_partial'] = [
'#type' => 'checkbox',
'#title' => $this
->t('Highlight partial matches'),
'#description' => $this
->t('When enabled, matches in parts of words will be highlighted as well.'),
'#default_value' => $this->configuration['highlight_partial'],
];
$form['excerpt'] = [
'#type' => 'checkbox',
'#title' => $this
->t('Create excerpt'),
'#description' => $this
->t('When enabled, an excerpt will be created for searches with keywords, containing all occurrences of keywords in a fulltext field.'),
'#default_value' => $this->configuration['excerpt'],
];
$form['excerpt_length'] = [
'#type' => 'number',
'#title' => $this
->t('Excerpt length'),
'#description' => $this
->t('The requested length of the excerpt, in characters'),
'#default_value' => $this->configuration['excerpt_length'],
'#min' => 50,
'#states' => [
'visible' => [
":input[name=\"{$parent_name}[excerpt]\"]" => [
'checked' => TRUE,
],
],
],
];
$fields = $this->index
->getFields();
$fulltext_fields = [];
foreach ($this->index
->getFulltextFields() as $field_id) {
$fulltext_fields[$field_id] = $fields[$field_id]
->getLabel() . ' (' . $field_id . ')';
}
$form['exclude_fields'] = [
'#type' => 'checkboxes',
'#title' => $this
->t('Exclude fields from excerpt'),
'#description' => $this
->t('Exclude certain fulltext fields from being included in the excerpt.'),
'#options' => $fulltext_fields,
'#default_value' => $this->configuration['exclude_fields'],
'#attributes' => [
'class' => [
'search-api-checkboxes-list',
],
],
'#states' => [
'visible' => [
":input[name=\"{$parent_name}[excerpt]\"]" => [
'checked' => TRUE,
],
],
],
];
$form['prefix'] = [
'#type' => 'textfield',
'#title' => $this
->t('Highlighting prefix'),
'#description' => $this
->t('Text/HTML that will be prepended to all occurrences of search keywords in highlighted text'),
'#default_value' => $this->configuration['prefix'],
];
$form['suffix'] = [
'#type' => 'textfield',
'#title' => $this
->t('Highlighting suffix'),
'#description' => $this
->t('Text/HTML that will be appended to all occurrences of search keywords in highlighted text'),
'#default_value' => $this->configuration['suffix'],
];
return $form;
}
public function submitConfigurationForm(array &$form, FormStateInterface $form_state) {
$excluded =& $form_state
->getValue('exclude_fields');
$excluded = array_keys(array_filter($excluded));
$this
->setConfiguration($form_state
->getValues());
}
public function postprocessSearchResults(ResultSetInterface $results) {
$query = $results
->getQuery();
if (!$results
->getResultCount() || $query
->getProcessingLevel() != QueryInterface::PROCESSING_FULL || !($keys = $this
->getKeywords($query))) {
return;
}
$excerpt_fulltext_fields = $this->index
->getFulltextFields();
if (!empty($this->configuration['exclude_fields'])) {
$excerpt_fulltext_fields = array_diff($excerpt_fulltext_fields, $this->configuration['exclude_fields']);
}
$result_items = $results
->getResultItems();
if ($this->configuration['excerpt']) {
$this
->addExcerpts($result_items, $excerpt_fulltext_fields, $keys);
}
if ($this->configuration['highlight'] != 'never') {
$highlighted_fields = $this
->highlightFields($result_items, $keys);
foreach ($highlighted_fields as $item_id => $item_fields) {
$item = $result_items[$item_id];
$item
->setExtraData('highlighted_fields', $item_fields);
}
}
}
protected function addExcerpts(array $results, array $fulltext_fields, array $keys) {
$items = $this
->getFulltextFields($results, $fulltext_fields);
foreach ($items as $item_id => $item) {
if (!$item) {
continue;
}
$text = call_user_func_array('array_merge', array_values($item));
$item_keys = $keys;
$highlighted_keys = $results[$item_id]
->getExtraData('highlighted_keys');
if ($highlighted_keys) {
$item_keys = array_unique(array_merge($keys, $highlighted_keys));
}
$results[$item_id]
->setExcerpt($this
->createExcerpt(implode($this
->getEllipses()[1], $text), $item_keys));
}
}
protected function highlightFields(array $results, array $keys) {
$highlighted_fields = [];
foreach ($results as $item_id => $item) {
$highlighted_fields[$item_id] = $item
->getExtraData('highlighted_fields', []);
}
$load = $this->configuration['highlight'] == 'always';
$item_fields = $this
->getFulltextFields($results, NULL, $load);
foreach ($item_fields as $item_id => $fields) {
foreach ($fields as $field_id => $values) {
if (empty($highlighted_fields[$item_id][$field_id])) {
$change = FALSE;
foreach ($values as $i => $value) {
$values[$i] = $this
->highlightField($value, $keys);
if ($values[$i] !== $value) {
$change = TRUE;
}
}
if ($change) {
$highlighted_fields[$item_id][$field_id] = $values;
}
}
}
}
return $highlighted_fields;
}
protected function getFulltextFields(array $result_items, array $fulltext_fields = NULL, $load = TRUE) {
$fields_by_datasource = [];
foreach ($this->index
->getFields() as $field_id => $field) {
if (isset($fulltext_fields) && !in_array($field_id, $fulltext_fields)) {
continue;
}
if ($this
->getDataTypeHelper()
->isTextType($field
->getType())) {
$fields_by_datasource[$field
->getDatasourceId()][$field
->getPropertyPath()] = $field_id;
}
}
return $this
->getFieldsHelper()
->extractItemValues($result_items, $fields_by_datasource, $load);
}
protected function getKeywords(QueryInterface $query) {
$keys = $query
->getOriginalKeys();
if (!$keys) {
return [];
}
if (is_array($keys)) {
return $this
->flattenKeysArray($keys);
}
$keywords_in = preg_split(static::$split, $keys);
if (!$keywords_in) {
return [];
}
$keywords = [];
foreach (array_filter($keywords_in) as $keyword) {
if ($keyword = trim($keyword, "'\"")) {
$keywords[$keyword] = $keyword;
}
}
return $keywords;
}
protected function flattenKeysArray(array $keys) {
if (!empty($keys['#negation'])) {
return [];
}
$keywords = [];
foreach ($keys as $i => $key) {
if (!Element::child($i)) {
continue;
}
if (is_array($key)) {
$keywords += $this
->flattenKeysArray($key);
}
else {
$keywords[$key] = $key;
}
}
return $keywords;
}
protected function createExcerpt($text, array $keys) {
$text = preg_replace('#<(style|script).*?>.*?</\\1>#is', ' ', $text);
$text = strip_tags(str_replace([
'<',
'>',
], [
' <',
'> ',
], $text));
$text = Html::decodeEntities($text);
$text = preg_replace('/\\s+/', ' ', $text);
$text = trim($text, ' ');
$text_length = mb_strlen($text);
$ranges = [];
$length = 0;
$look_start = [];
$remaining_keys = $keys;
$excerpt_length = $this->configuration['excerpt_length'];
$context_length = round($excerpt_length / 4) - 3;
if ($context_length < 32) {
$context_length = round($excerpt_length / 2) - 1;
}
while ($length < $excerpt_length && !empty($remaining_keys)) {
$found_keys = [];
foreach ($remaining_keys as $key) {
if ($length >= $excerpt_length) {
break;
}
if (!isset($look_start[$key])) {
$look_start[$key] = 0;
}
$matches = [];
if (!$this->configuration['highlight_partial']) {
$found_position = FALSE;
$regex = '/' . static::$boundary . preg_quote($key, '/') . static::$boundary . '/iu';
$offset = $look_start[$key];
if ($offset > 0) {
$offset = strlen(mb_substr(' ' . $text, 0, $offset));
}
if (preg_match($regex, ' ' . $text . ' ', $matches, PREG_OFFSET_CAPTURE, $offset)) {
$found_position = $matches[0][1];
$found_position = mb_strlen(substr(" {$text}", 0, $found_position));
}
}
else {
$found_position = mb_stripos($text, $key, $look_start[$key], 'UTF-8');
}
if ($found_position !== FALSE) {
$look_start[$key] = $found_position + 1;
$found_keys[] = $key;
if ($found_position > $context_length) {
$before = mb_strpos($text, ' ', $found_position - $context_length);
if ($before !== FALSE) {
++$before;
}
if ($before === FALSE || $before > $found_position) {
$before = $found_position - $context_length;
}
}
else {
$before = 0;
}
if ($before !== FALSE && $before <= $found_position) {
if ($text_length > $found_position + $context_length) {
$after = mb_strrpos(mb_substr($text, 0, $found_position + $context_length), ' ', $found_position);
}
else {
$after = $text_length;
}
if ($after !== FALSE && $after > $found_position) {
if ($before < $after) {
$ranges[$before] = $after;
$length += $after - $before;
}
}
}
}
}
$remaining_keys = $found_keys;
}
if (!$ranges) {
return NULL;
}
ksort($ranges);
$new_ranges = [];
$working_from = $working_to = NULL;
foreach ($ranges as $this_from => $this_to) {
if ($working_from === NULL) {
$working_from = $this_from;
$working_to = $this_to;
continue;
}
if ($this_from <= $working_to) {
$working_to = max($working_to, $this_to);
}
else {
$new_ranges[$working_from] = $working_to;
$working_from = $this_from;
$working_to = $this_to;
}
}
$new_ranges[$working_from] = $working_to;
$out = [];
foreach ($new_ranges as $from => $to) {
$out[] = Html::escape(mb_substr($text, $from, $to - $from));
}
if (!$out) {
return NULL;
}
$ellipses = $this
->getEllipses();
$excerpt = $ellipses[0] . implode($ellipses[1], $out) . $ellipses[2];
return $this
->highlightField($excerpt, $keys, FALSE);
}
protected function highlightField($text, array $keys, $html = TRUE) {
if ($html) {
$texts = preg_split('#((?:</?[[:alpha:]](?:[^>"\']*|"[^"]*"|\'[^\']\')*>)+)#i', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
if ($texts === FALSE) {
$args = [
'%error_num' => preg_last_error(),
];
$this
->getLogger()
->warning('A PCRE error (#%error_num) occurred during results highlighting.', $args);
return $text;
}
$textsCount = count($texts);
for ($i = 0; $i < $textsCount; $i += 2) {
$texts[$i] = $this
->highlightField($texts[$i], $keys, FALSE);
}
return implode('', $texts);
}
$keys = implode('|', array_map('preg_quote', $keys, array_fill(0, count($keys), '/')));
$boundary = !$this->configuration['highlight_partial'] ? static::$boundary : '';
$regex = '/' . $boundary . '(?:' . $keys . ')' . $boundary . '/iu';
$replace = $this->configuration['prefix'] . '\\0' . $this->configuration['suffix'];
$text = preg_replace($regex, $replace, ' ' . $text . ' ');
return trim($text);
}
protected function getEllipses() {
$ellipses = explode('@excerpt', $this
->t('… @excerpt … @excerpt …'));
return $ellipses;
}
}