class SearchApiTokenizer in Search API 7
Processor for tokenizing fulltext data by replacing (configurable) non-letters with spaces.
Hierarchy
- class \SearchApiAbstractProcessor implements SearchApiProcessorInterface
- class \SearchApiTokenizer
Expanded class hierarchy of SearchApiTokenizer
1 string reference to 'SearchApiTokenizer'
- search_api_search_api_processor_info in ./
search_api.module - Implements hook_search_api_processor_info().
File
- includes/
processor_tokenizer.inc, line 12 - Contains SearchApiTokenizer.
View source
class SearchApiTokenizer extends SearchApiAbstractProcessor {
/**
* @var string
*/
protected $spaces;
/**
* @var string
*/
protected $ignorable;
public function configurationForm() {
$form = parent::configurationForm();
// Only make fulltext fields available as options.
$fields = $this->index
->getFields();
$field_options = array();
foreach ($fields as $name => $field) {
if (empty($field['real_type']) && search_api_is_text_type($field['type'])) {
$field_options[$name] = $field['name'];
}
}
$form['fields']['#options'] = $field_options;
$form += array(
'spaces' => array(
'#type' => 'textfield',
'#title' => t('Whitespace characters'),
'#description' => t('Specify the characters that should be regarded as whitespace and therefore used as word-delimiters. ' . 'Specify the characters as a <a href="@link">PCRE character class</a>. ' . 'Note: For non-English content, the default setting might not be suitable.', array(
'@link' => url('http://www.php.net/manual/en/regexp.reference.character-classes.php'),
)),
'#default_value' => "[^[:alnum:]]",
),
'ignorable' => array(
'#type' => 'textfield',
'#title' => t('Ignorable characters'),
'#description' => t('Specify characters which should be removed from fulltext fields and search strings (e.g., "-"). The same format as above is used.'),
'#default_value' => "[']",
),
);
if (!empty($this->options)) {
$form['spaces']['#default_value'] = $this->options['spaces'];
$form['ignorable']['#default_value'] = $this->options['ignorable'];
}
return $form;
}
public function configurationFormValidate(array $form, array &$values, array &$form_state) {
parent::configurationFormValidate($form, $values, $form_state);
$spaces = str_replace('/', '\\/', $values['spaces']);
$ignorable = str_replace('/', '\\/', $values['ignorable']);
if (@preg_match('/(' . $spaces . ')+/u', '') === FALSE) {
$el = $form['spaces'];
form_error($el, $el['#title'] . ': ' . t('The entered text is no valid regular expression.'));
}
if (@preg_match('/(' . $ignorable . ')+/u', '') === FALSE) {
$el = $form['ignorable'];
form_error($el, $el['#title'] . ': ' . t('The entered text is no valid regular expression.'));
}
}
protected function processFieldValue(&$value) {
$this
->prepare();
if ($this->ignorable) {
$value = preg_replace('/(' . $this->ignorable . ')+/u', '', $value);
}
if ($this->spaces) {
$arr = preg_split('/(' . $this->spaces . ')+/u', $value);
if (count($arr) > 1) {
$value = array();
foreach ($arr as $token) {
$value[] = array(
'value' => $token,
);
}
}
}
}
protected function process(&$value) {
// We don't touch integers, NULL values or the like.
if (is_string($value)) {
$this
->prepare();
if ($this->ignorable) {
$value = preg_replace('/' . $this->ignorable . '+/u', '', $value);
}
if ($this->spaces) {
$value = preg_replace('/' . $this->spaces . '+/u', ' ', $value);
}
}
}
protected function prepare() {
if (!isset($this->spaces)) {
$this->spaces = str_replace('/', '\\/', $this->options['spaces']);
$this->ignorable = str_replace('/', '\\/', $this->options['ignorable']);
}
}
}
Members
Name | Modifiers | Type | Description | Overrides |
---|---|---|---|---|
SearchApiAbstractProcessor:: |
protected | property | ||
SearchApiAbstractProcessor:: |
protected | property | ||
SearchApiAbstractProcessor:: |
public | function |
Submit callback for the form returned by configurationForm(). Overrides SearchApiProcessorInterface:: |
|
SearchApiAbstractProcessor:: |
protected | function | Internal helper function for imploding tokens into a single string. | |
SearchApiAbstractProcessor:: |
protected | function | Internal helper function for normalizing tokens. | |
SearchApiAbstractProcessor:: |
public | function |
Does nothing. Overrides SearchApiProcessorInterface:: |
2 |
SearchApiAbstractProcessor:: |
public | function |
Calls processField() for all appropriate fields. Overrides SearchApiProcessorInterface:: |
|
SearchApiAbstractProcessor:: |
public | function |
Calls processKeys() for the keys and processFilters() for the filters. Overrides SearchApiProcessorInterface:: |
1 |
SearchApiAbstractProcessor:: |
protected | function | Method for preprocessing field data. | |
SearchApiAbstractProcessor:: |
protected | function | Method for preprocessing query filters. | |
SearchApiAbstractProcessor:: |
protected | function | Called for processing a single filter value. The default implementation just calls process(). | |
SearchApiAbstractProcessor:: |
protected | function | Called for processing a single search keyword. The default implementation just calls process(). | |
SearchApiAbstractProcessor:: |
protected | function | Method for preprocessing search keys. | |
SearchApiAbstractProcessor:: |
public | function |
Check whether this processor is applicable for a certain index. Overrides SearchApiProcessorInterface:: |
|
SearchApiAbstractProcessor:: |
protected | function | Determines whether to process data from the given field. | |
SearchApiAbstractProcessor:: |
protected | function | Determines whether fields of the given type should normally be processed. | |
SearchApiAbstractProcessor:: |
public | function |
Constructor, saving its arguments into properties. Overrides SearchApiProcessorInterface:: |
2 |
SearchApiTokenizer:: |
protected | property | ||
SearchApiTokenizer:: |
protected | property | ||
SearchApiTokenizer:: |
public | function |
Display a form for configuring this processor.
Since forcing users to specify options for disabled processors makes no
sense, none of the form elements should have the '#required' attribute set. Overrides SearchApiAbstractProcessor:: |
|
SearchApiTokenizer:: |
public | function |
Validation callback for the form returned by configurationForm(). Overrides SearchApiAbstractProcessor:: |
|
SearchApiTokenizer:: |
protected | function | ||
SearchApiTokenizer:: |
protected | function |
Function that is ultimately called for all text by the standard
implementation, and does nothing by default. Overrides SearchApiAbstractProcessor:: |
|
SearchApiTokenizer:: |
protected | function |
Called for processing a single text element in a field. The default
implementation just calls process(). Overrides SearchApiAbstractProcessor:: |