public function TokenizerTest::textDataProvider in Search API 8
Provides test data for testValueConfiguration().
Return value
array Arrays of parameters for testProcessFieldValue(), each containing (in this order):
- The field value passed to the processor's processFieldValue() method.
- The expected preprocessed value.
- (optional) Configuration to override the processor's defaults.
File
- tests/
src/ Unit/ Processor/ TokenizerTest.php, line 64
Class
- TokenizerTest
- Tests the "Tokenizer" processor.
Namespace
Drupal\Tests\search_api\Unit\ProcessorCode
public function textDataProvider() {
$word_token = Utility::createTextToken('word');
return [
// Test some simple cases.
[
'word',
[
$word_token,
],
],
[
'word word',
[
$word_token,
$word_token,
],
],
// Test whether the default splits on special characters, too.
[
'words!word',
[
Utility::createTextToken('words'),
$word_token,
],
],
[
'words$word',
[
Utility::createTextToken('words'),
$word_token,
],
],
// Test whether overriding the default works and is case-insensitive.
[
'wordXwordxword',
[
$word_token,
Utility::createTextToken('wordxword'),
],
[
'spaces' => 'X',
],
],
[
'word3word!word',
[
$word_token,
Utility::createTextToken('word!word'),
],
[
'spaces' => '\\d',
],
],
[
'wordXwordRword',
[
$word_token,
$word_token,
$word_token,
],
[
'spaces' => 'R-Z',
],
],
[
'wordXwordRword',
[
$word_token,
$word_token,
$word_token,
],
[
'spaces' => 'R-TW-Z',
],
],
[
'wordXword word',
[
$word_token,
$word_token,
$word_token,
],
[
'spaces' => 'R-Z',
],
],
// Test whether minimum word size works.
[
'wordSwo',
[
$word_token,
],
[
'spaces' => 'R-Z',
],
],
[
'wordSwo',
[
$word_token,
Utility::createTextToken('wo'),
],
[
'spaces' => 'R-Z',
'minimum_word_size' => 2,
],
],
[
'word w',
[
$word_token,
],
[
'minimum_word_size' => 2,
],
],
[
'word w',
[
$word_token,
Utility::createTextToken('w'),
],
[
'minimum_word_size' => 1,
],
],
[
'word wordword',
[],
[
'minimum_word_size' => 10,
],
],
[
'foo-bar',
[
Utility::createTextToken('foobar'),
],
],
// Test changing ignored characters.
[
'word-word',
[
$word_token,
$word_token,
],
[
'ignored' => '._',
],
],
[
'foobar',
[
Utility::createTextToken('foobr'),
],
[
'ignored' => 'a',
],
],
// Test multiple ignored characters are still treated as word boundary.
[
'foobar',
[
Utility::createTextToken('bar'),
],
[
'ignored' => 'o',
],
],
];
}