You are here

public function PhpTransliterationTest::providerTestPhpTransliteration in Drupal 10

Same name and namespace in other branches
  1. 8 core/tests/Drupal/Tests/Component/Transliteration/PhpTransliterationTest.php \Drupal\Tests\Component\Transliteration\PhpTransliterationTest::providerTestPhpTransliteration()
  2. 9 core/tests/Drupal/Tests/Component/Transliteration/PhpTransliterationTest.php \Drupal\Tests\Component\Transliteration\PhpTransliterationTest::providerTestPhpTransliteration()

Provides data for self::testPhpTransliteration().

Return value

array An array of arrays, each containing the parameters for self::testPhpTransliteration().

File

core/tests/Drupal/Tests/Component/Transliteration/PhpTransliterationTest.php, line 105

Class

PhpTransliterationTest
Tests Transliteration component functionality.

Namespace

Drupal\Tests\Component\Transliteration

Code

public function providerTestPhpTransliteration() {
  $random_generator = new Random();
  $random = $random_generator
    ->string(10);

  // Make some strings with two, three, and four-byte characters for testing.
  // Note that the 3-byte character is overridden by the 'kg' language.
  // cSpell:disable-next-line
  $two_byte = 'Ä Ö Ü Å Ø äöüåøhello';

  // This is a Cyrillic character that looks something like a "u". See
  // http://www.unicode.org/charts/PDF/U0400.pdf
  $three_byte = html_entity_decode('ц', ENT_NOQUOTES, 'UTF-8');

  // This is a Canadian Aboriginal character like a triangle. See
  // http://www.unicode.org/charts/PDF/U1400.pdf
  $four_byte = html_entity_decode('ᐑ', ENT_NOQUOTES, 'UTF-8');

  // These are two Gothic alphabet letters. See
  // http://wikipedia.org/wiki/Gothic_alphabet
  // They are not in our tables, but should at least give us '?' (unknown).
  $five_byte = html_entity_decode('𐌰𐌸', ENT_NOQUOTES, 'UTF-8');

  // cSpell:disable
  return [
    // Each test case is language code, input, output, unknown character, max
    // length.
    'Test ASCII in English' => [
      'en',
      $random,
      $random,
    ],
    'Test ASCII in some other language with no overrides' => [
      'fr',
      $random,
      $random,
    ],
    'Test 3-byte characters from data table in a language without overrides' => [
      'fr',
      $three_byte,
      'c',
    ],
    'Test 4-byte characters from data table in a language without overrides' => [
      'fr',
      $four_byte,
      'wii',
    ],
    'Test 5-byte characters not existing in the data table' => [
      'en',
      $five_byte,
      '??',
    ],
    'Test a language with no overrides' => [
      'en',
      $two_byte,
      'A O U A O aouaohello',
    ],
    'Test language overrides in German' => [
      'de',
      $two_byte,
      'Ae Oe Ue A O aeoeueaohello',
    ],
    'Test ASCII in German language with overrides' => [
      'de',
      $random,
      $random,
    ],
    'Test language overrides in Danish' => [
      'da',
      $two_byte,
      'A O U Aa Oe aouaaoehello',
    ],
    'Test ASCII in Danish language with overrides' => [
      'da',
      $random,
      $random,
    ],
    'Test language overrides in Kyrgyz' => [
      'kg',
      $three_byte,
      'ts',
    ],
    'Test language overrides in Turkish' => [
      'tr',
      'Abayı serdiler bize. Söyleyeceğim yüzlerine. Sanırım hepimiz aynı şeyi düşünüyoruz.',
      'Abayi serdiler bize. Soyleyecegim yuzlerine. Sanirim hepimiz ayni seyi dusunuyoruz.',
    ],
    'Test language overrides in Ukrainian' => [
      'uk',
      'На подушечці форми любої є й ґудзик щоб пірʼя геть жовте сховати.',
      'Na podushechtsi formy lyuboyi ye y gudzyk shchob pirya het zhovte skhovaty.',
    ],
    'Max length' => [
      'de',
      $two_byte,
      'Ae Oe Ue A O aeoe',
      '?',
      17,
    ],
    'Do not split up the transliteration of a single character' => [
      'de',
      $two_byte,
      'Ae Oe Ue A O aeoe',
      '?',
      18,
    ],
    'Illegal/unknown unicode' => [
      'en',
      chr(0xf8) . chr(0x80) . chr(0x80) . chr(0x80) . chr(0x80),
      '?????',
    ],
    'Illegal/unknown unicode with non default replacement' => [
      'en',
      chr(0xf8) . chr(0x80) . chr(0x80) . chr(0x80) . chr(0x80),
      '-----',
      '-',
    ],
    'Contains Illegal/unknown unicode' => [
      'en',
      'Hel' . chr(0x80) . 'o World',
      'Hel?o World',
    ],
    'Illegal/unknown unicode at end' => [
      'en',
      'Hell' . chr(0x80) . ' World',
      'Hell? World',
    ],
    'Non default replacement' => [
      'en',
      chr(0x80) . 'ello World',
      '_ello World',
      '_',
    ],
    'Keep the original question marks' => [
      'en',
      chr(0xf8) . '?' . chr(0x80),
      '???',
    ],
    'Keep the original question marks when non default replacement' => [
      'en',
      chr(0x80) . 'ello ? World?',
      '_ello ? World?',
      '_',
    ],
    'Keep the original question marks in some other language' => [
      'pl',
      'aąeę' . chr(0x80) . 'oółżźz ?',
      'aaee?oolzzz ?',
    ],
    'Non-US-ASCII replacement in English' => [
      'en',
      chr(0x80) . 'ello World?',
      'Oello World?',
      'Ö',
    ],
    'Non-US-ASCII replacement in some other language' => [
      'pl',
      chr(0x80) . 'óóść',
      'ooosc',
      'ó',
    ],
    'Ensure question marks are replaced when max length used' => [
      'en',
      chr(0x80) . 'ello ? World?',
      '_ello ?',
      '_',
      7,
    ],
    'Empty replacement' => [
      'en',
      chr(0x80) . 'ello World' . chr(0xf8),
      'ello World',
      '',
    ],
    'Not affecting spacing from the beginning and end of a string' => [
      'en',
      ' Hello Abventor! ',
      ' Hello Abventor! ',
    ],
    'Not affecting spacing from the beginning and end of a string when max length used' => [
      'pl',
      ' Drupal Kraków Community',
      ' Drupal Krakow ',
      '?',
      15,
    ],
    'Keep many spaces between words' => [
      'en',
      'Too    many    spaces between words !',
      'Too    many    spaces between words !',
    ],
  ];

  // cSpell:enable
}