You are here

public function TokenizerTest::textDataProvider in Search API 8

Provides test data for testValueConfiguration().

Return value

array Arrays of parameters for testProcessFieldValue(), each containing (in this order):

  • The field value passed to the processor's processFieldValue() method.
  • The expected preprocessed value.
  • (optional) Configuration to override the processor's defaults.

File

tests/src/Unit/Processor/TokenizerTest.php, line 64

Class

TokenizerTest
Tests the "Tokenizer" processor.

Namespace

Drupal\Tests\search_api\Unit\Processor

Code

public function textDataProvider() {
  $word_token = Utility::createTextToken('word');
  return [
    // Test some simple cases.
    [
      'word',
      [
        $word_token,
      ],
    ],
    [
      'word word',
      [
        $word_token,
        $word_token,
      ],
    ],
    // Test whether the default splits on special characters, too.
    [
      'words!word',
      [
        Utility::createTextToken('words'),
        $word_token,
      ],
    ],
    [
      'words$word',
      [
        Utility::createTextToken('words'),
        $word_token,
      ],
    ],
    // Test whether overriding the default works and is case-insensitive.
    [
      'wordXwordxword',
      [
        $word_token,
        Utility::createTextToken('wordxword'),
      ],
      [
        'spaces' => 'X',
      ],
    ],
    [
      'word3word!word',
      [
        $word_token,
        Utility::createTextToken('word!word'),
      ],
      [
        'spaces' => '\\d',
      ],
    ],
    [
      'wordXwordRword',
      [
        $word_token,
        $word_token,
        $word_token,
      ],
      [
        'spaces' => 'R-Z',
      ],
    ],
    [
      'wordXwordRword',
      [
        $word_token,
        $word_token,
        $word_token,
      ],
      [
        'spaces' => 'R-TW-Z',
      ],
    ],
    [
      'wordXword word',
      [
        $word_token,
        $word_token,
        $word_token,
      ],
      [
        'spaces' => 'R-Z',
      ],
    ],
    // Test whether minimum word size works.
    [
      'wordSwo',
      [
        $word_token,
      ],
      [
        'spaces' => 'R-Z',
      ],
    ],
    [
      'wordSwo',
      [
        $word_token,
        Utility::createTextToken('wo'),
      ],
      [
        'spaces' => 'R-Z',
        'minimum_word_size' => 2,
      ],
    ],
    [
      'word w',
      [
        $word_token,
      ],
      [
        'minimum_word_size' => 2,
      ],
    ],
    [
      'word w',
      [
        $word_token,
        Utility::createTextToken('w'),
      ],
      [
        'minimum_word_size' => 1,
      ],
    ],
    [
      'word wordword',
      [],
      [
        'minimum_word_size' => 10,
      ],
    ],
    [
      'foo-bar',
      [
        Utility::createTextToken('foobar'),
      ],
    ],
    // Test changing ignored characters.
    [
      'word-word',
      [
        $word_token,
        $word_token,
      ],
      [
        'ignored' => '._',
      ],
    ],
    [
      'foobar',
      [
        Utility::createTextToken('foobr'),
      ],
      [
        'ignored' => 'a',
      ],
    ],
    // Test multiple ignored characters are still treated as word boundary.
    [
      'foobar',
      [
        Utility::createTextToken('bar'),
      ],
      [
        'ignored' => 'o',
      ],
    ],
  ];
}