class SearchApiHtmlFilter in Search API 7
Hierarchy
- class \SearchApiAbstractProcessor implements SearchApiProcessorInterface
- class \SearchApiHtmlFilter
Expanded class hierarchy of SearchApiHtmlFilter
1 string reference to 'SearchApiHtmlFilter'
- search_api_search_api_processor_info in ./
search_api.module - Implements hook_search_api_processor_info().
File
- includes/
processor_html_filter.inc, line 13 - Contains SearchApiHtmlFilter.
View source
class SearchApiHtmlFilter extends SearchApiAbstractProcessor {
/**
* @var array
*/
protected $tags;
public function __construct(SearchApiIndex $index, array $options = array()) {
parent::__construct($index, $options);
$this->options += array(
'title' => FALSE,
'alt' => TRUE,
'tags' => "h1 = 5\n" . "h2 = 3\n" . "h3 = 2\n" . "strong = 2\n" . "b = 2\n" . "em = 1.5\n" . 'u = 1.5',
);
$this->tags = drupal_parse_info_format($this->options['tags']);
// Specifying empty tags doesn't make sense.
unset($this->tags['br'], $this->tags['hr']);
}
public function configurationForm() {
$form = parent::configurationForm();
$form += array(
'title' => array(
'#type' => 'checkbox',
'#title' => t('Index title attribute'),
'#description' => t('If set, the contents of title attributes will be indexed.'),
'#default_value' => $this->options['title'],
),
'alt' => array(
'#type' => 'checkbox',
'#title' => t('Index alt attribute'),
'#description' => t('If set, the alternative text of images will be indexed.'),
'#default_value' => $this->options['alt'],
),
'tags' => array(
'#type' => 'textarea',
'#title' => t('Tag boosts'),
'#description' => t('Specify special boost values for certain HTML elements, in <a href="@link">INI file format</a>. ' . 'The boost values of nested elements are multiplied, elements not mentioned will have the default boost value of 1. ' . 'Assign a boost of 0 to ignore the text content of that HTML element.', array(
'@link' => url('http://api.drupal.org/api/function/drupal_parse_info_format/7'),
)),
'#default_value' => $this->options['tags'],
),
);
return $form;
}
public function configurationFormValidate(array $form, array &$values, array &$form_state) {
parent::configurationFormValidate($form, $values, $form_state);
if (empty($values['tags'])) {
return;
}
$tags = drupal_parse_info_format($values['tags']);
$errors = array();
foreach ($tags as $key => $value) {
if (is_array($value)) {
$errors[] = t("Boost value for tag <@tag> can't be an array.", array(
'@tag' => $key,
));
}
elseif (!is_numeric($value)) {
$errors[] = t("Boost value for tag <@tag> must be numeric.", array(
'@tag' => $key,
));
}
elseif ($value < 0) {
$errors[] = t('Boost value for tag <@tag> must be non-negative.', array(
'@tag' => $key,
));
}
}
if ($errors) {
form_error($form['tags'], implode("<br />\n", $errors));
}
}
protected function processFieldValue(&$value) {
$text = str_replace(array(
'<',
'>',
), array(
' <',
'> ',
), $value);
// Let removed tags still delimit words.
if ($this->options['title']) {
$text = preg_replace('/(<[-a-z_]+[^>]+)\\btitle\\s*=\\s*("([^"]+)"|\'([^\']+)\')([^>]*>)/i', '$1 $5 $3$4 ', $text);
}
if ($this->options['alt']) {
$text = preg_replace('/<img\\b[^>]+\\balt\\s*=\\s*("([^"]+)"|\'([^\']+)\')[^>]*>/i', ' <img>$2$3</img> ', $text);
}
if ($this->tags) {
$text = strip_tags($text, '<' . implode('><', array_keys($this->tags)) . '>');
$value = $this
->parseText($text);
}
else {
$value = $this
->decodeHtml(strip_tags($text));
}
}
protected function parseText(&$text, $active_tag = NULL, $boost = 1) {
$ret = array();
while (($pos = strpos($text, '<')) !== FALSE) {
if ($boost && $pos > 0) {
$token = substr($text, 0, $pos);
$ret[] = array(
'value' => $this
->decodeHtml($token),
'score' => $boost,
);
}
$text = substr($text, $pos + 1);
if (!preg_match('#^(/?)([:_a-zA-Z][-:_a-zA-Z0-9.]*)#', $text, $m)) {
continue;
}
$text = substr($text, strpos($text, '>') + 1);
if ($m[1]) {
// Closing tag.
if ($active_tag && $m[2] == $active_tag) {
return $ret;
}
}
else {
// Opening tag => recursive call.
$inner_boost = $boost * (isset($this->tags[$m[2]]) ? $this->tags[$m[2]] : 1);
$ret = array_merge($ret, $this
->parseText($text, $m[2], $inner_boost));
}
}
if ($text) {
$ret[] = array(
'value' => $this
->decodeHtml($text),
'score' => $boost,
);
$text = '';
}
return $ret;
}
/**
* Decodes HTML entities in a token and normalizes whitespace.
*
* All whitespace in the token will be converted to single spaces, with no
* leading or trailing whitespace.
*
* @param string $token
* The token to process.
*
* @return string
* The processed token.
*/
protected function decodeHtml($token) {
$token = html_entity_decode($token, ENT_QUOTES, 'UTF-8');
// Remove any multiple/leading/trailing spaces we might have introduced.
$token = trim(preg_replace('/[\\pZ\\pC]+/u', ' ', $token));
return $token;
}
}
Members
Name | Modifiers | Type | Description | Overrides |
---|---|---|---|---|
SearchApiAbstractProcessor:: |
protected | property | ||
SearchApiAbstractProcessor:: |
protected | property | ||
SearchApiAbstractProcessor:: |
public | function |
Submit callback for the form returned by configurationForm(). Overrides SearchApiProcessorInterface:: |
|
SearchApiAbstractProcessor:: |
protected | function | Internal helper function for imploding tokens into a single string. | |
SearchApiAbstractProcessor:: |
protected | function | Internal helper function for normalizing tokens. | |
SearchApiAbstractProcessor:: |
public | function |
Does nothing. Overrides SearchApiProcessorInterface:: |
2 |
SearchApiAbstractProcessor:: |
public | function |
Calls processField() for all appropriate fields. Overrides SearchApiProcessorInterface:: |
|
SearchApiAbstractProcessor:: |
public | function |
Calls processKeys() for the keys and processFilters() for the filters. Overrides SearchApiProcessorInterface:: |
1 |
SearchApiAbstractProcessor:: |
protected | function | Function that is ultimately called for all text by the standard implementation, and does nothing by default. | 5 |
SearchApiAbstractProcessor:: |
protected | function | Method for preprocessing field data. | |
SearchApiAbstractProcessor:: |
protected | function | Method for preprocessing query filters. | |
SearchApiAbstractProcessor:: |
protected | function | Called for processing a single filter value. The default implementation just calls process(). | |
SearchApiAbstractProcessor:: |
protected | function | Called for processing a single search keyword. The default implementation just calls process(). | |
SearchApiAbstractProcessor:: |
protected | function | Method for preprocessing search keys. | |
SearchApiAbstractProcessor:: |
public | function |
Check whether this processor is applicable for a certain index. Overrides SearchApiProcessorInterface:: |
|
SearchApiAbstractProcessor:: |
protected | function | Determines whether to process data from the given field. | |
SearchApiAbstractProcessor:: |
protected | function | Determines whether fields of the given type should normally be processed. | |
SearchApiHtmlFilter:: |
protected | property | ||
SearchApiHtmlFilter:: |
public | function |
Display a form for configuring this processor.
Since forcing users to specify options for disabled processors makes no
sense, none of the form elements should have the '#required' attribute set. Overrides SearchApiAbstractProcessor:: |
|
SearchApiHtmlFilter:: |
public | function |
Validation callback for the form returned by configurationForm(). Overrides SearchApiAbstractProcessor:: |
|
SearchApiHtmlFilter:: |
protected | function | Decodes HTML entities in a token and normalizes whitespace. | |
SearchApiHtmlFilter:: |
protected | function | ||
SearchApiHtmlFilter:: |
protected | function |
Called for processing a single text element in a field. The default
implementation just calls process(). Overrides SearchApiAbstractProcessor:: |
|
SearchApiHtmlFilter:: |
public | function |
Constructor, saving its arguments into properties. Overrides SearchApiAbstractProcessor:: |