You are here

public static function AutoUsernameSettingsForm::autoUsernameCleanstring in Automatic User Names 8

Clean up a string segment to be used in a username.

Performs the following possible alterations:

  • Remove all HTML tags.
  • Process the string through the transliteration module.
  • Replace or remove punctuation with the separator character.
  • Remove back-slashes.
  • Replace non-ascii and non-numeric characters with the separator.
  • Remove common words.
  • Replace whitespace with the separator character.
  • Trim duplicate, leading, and trailing separators.
  • Convert to lower-case.
  • Shorten to a desired length and logical position based on word

boundaries.

Parameters

string $string: A string to clean.

Return value

mixed|string The cleaned string.

1 call to AutoUsernameSettingsForm::autoUsernameCleanstring()
AutoUsernameSettingsForm::autoUsernameCleanTokenValues in src/Form/AutoUsernameSettingsForm.php
Clean token values.

File

src/Form/AutoUsernameSettingsForm.php, line 512

Class

AutoUsernameSettingsForm
Class AutoUsernameSettingsForm.

Namespace

Drupal\auto_username\Form

Code

public static function autoUsernameCleanstring($string) {

  // Use the advanced drupal_static()pattern, since this is called very often.
  static $drupal_static_fast;
  if (!isset($drupal_static_fast)) {
    $drupal_static_fast['cache'] =& drupal_static(__FUNCTION__);
  }
  $cache =& $drupal_static_fast['cache'];

  // Generate and cache variables used in this function so that on the second
  // call to autoUsernameCleanstring() we focus on processing.
  if (!isset($cache)) {
    $cache = [
      'separator' => \Drupal::state()
        ->get('aun_separator', '-'),
      'transliterate' => \Drupal::state()
        ->get('aun_transliterate', FALSE) && \Drupal::moduleHandler()
        ->moduleExists('transliteration'),
      'punctuation' => [],
      'reduce_ascii' => (bool) \Drupal::state()
        ->get('aun_reduce_ascii', FALSE),
      'ignore_words_regex' => FALSE,
      'replace_whitespace' => (bool) \Drupal::state()
        ->get('aun_replace_whitespace', FALSE),
      'lowercase' => (bool) \Drupal::state()
        ->get('aun_case', AUN_CASE_LOWER),
      'maxlength' => min(\Drupal::state()
        ->get('aun_max_component_length', 60), self::autoUsernameGetSchemaNameMaxlength()),
    ];

    // Generate and cache the punctuation replacements for strtr().
    $punctuation = self::autoUsernamePunctuationChars();
    foreach ($punctuation as $name => $details) {
      $action = \Drupal::state()
        ->get('aun_punctuation_' . $name, AUN_PUNCTUATION_REMOVE);
      switch ($action) {
        case AUN_PUNCTUATION_REMOVE:
          $cache['punctuation'][$details['value']] = '';
          break;
        case AUN_PUNCTUATION_REPLACE:
          $cache['punctuation'][$details['value']] = $cache['separator'];
          break;
        case AUN_PUNCTUATION_DO_NOTHING:

          // Literally do nothing.
          break;
      }
    }

    // Generate and cache the ignored words regular expression.
    $ignore_words = \Drupal::state()
      ->get('aun_ignore_words', '');
    $ignore_words_regex = preg_replace([
      '/^[,\\s]+|[,\\s]+$/',
      '/[,\\s]+/',
    ], [
      '',
      '\\b|\\b',
    ], $ignore_words);
    if ($ignore_words_regex) {
      $cache['ignore_words_regex'] = '\\b' . $ignore_words_regex . '\\b';
      if (function_exists('mb_eregi_replace')) {
        $cache['ignore_words_callback'] = 'mb_eregi_replace';
      }
      else {
        $cache['ignore_words_callback'] = 'preg_replace';
        $cache['ignore_words_regex'] = '/' . $cache['ignore_words_regex'] . '/i';
      }
    }
  }

  // Empty strings do not need any processing.
  if ($string === '' || $string === NULL) {
    return '';
  }

  // Remove all HTML tags from the string.
  $output = strip_tags(Html::decodeEntities($string));

  // Replace or drop punctuation based on user settings.
  $output = strtr($output, $cache['punctuation']);

  // Reduce strings to letters and numbers.
  if ($cache['reduce_ascii']) {
    $output = preg_replace('/[^a-zA-Z0-9\\/]+/', $cache['separator'], $output);
  }

  // Get rid of words that are on the ignore list.
  if ($cache['ignore_words_regex']) {
    $words_removed = $cache['ignore_words_callback']($cache['ignore_words_regex'], '', $output);
    if (Unicode::strlen(trim($words_removed)) > 0) {
      $output = $words_removed;
    }
  }

  // Replace whitespace with the separator.
  if ($cache['replace_whitespace']) {
    $output = preg_replace('/\\s+/', $cache['separator'], $output);
  }

  // Trim duplicates and remove trailing and leading separators.
  $output = self::autoUsernameCleanSeparators($output, $cache['separator']);

  // Optionally convert to lower case.
  if ($cache['lowercase']) {
    $output = Unicode::strtolower($output);
  }

  // Shorten to a logical place based on word boundaries.
  $output = Unicode::truncate($output, $cache['maxlength'], TRUE);
  return $output;
}