public function AliasCleaner::cleanString in Pathauto 8
Clean up a string segment to be used in an URL alias.
Performs the following possible alterations:
- Remove all HTML tags.
- Process the string through the transliteration module.
- Replace or remove punctuation with the separator character.
- Remove back-slashes.
- Replace non-ascii and non-numeric characters with the separator.
- Remove common words.
- Replace whitespace with the separator character.
- Trim duplicate, leading, and trailing separators.
- Convert to lower-case.
- Shorten to a desired length and logical position based on word boundaries.
This function should *not* be called on URL alias or path strings because it is assumed that they are already clean.
Parameters
string $string: A string to clean.
array $options: (optional) A keyed array of settings and flags to control the Pathauto clean string replacement process. Supported options are:
- langcode: A language code to be used when translating strings.
Return value
string The cleaned string.
Overrides AliasCleanerInterface::cleanString
1 call to AliasCleaner::cleanString()
- AliasCleaner::cleanTokenValues in src/
AliasCleaner.php - Clean tokens so they are URL friendly.
File
- src/
AliasCleaner.php, line 161
Class
- AliasCleaner
- Provides an alias cleaner.
Namespace
Drupal\pathautoCode
public function cleanString($string, array $options = []) {
if (empty($this->cleanStringCache)) {
// Generate and cache variables used in this method.
$config = $this->configFactory
->get('pathauto.settings');
$this->cleanStringCache = [
'separator' => $config
->get('separator'),
'strings' => [],
'transliterate' => $config
->get('transliterate'),
'punctuation' => [],
'reduce_ascii' => (bool) $config
->get('reduce_ascii'),
'ignore_words_regex' => FALSE,
'lowercase' => (bool) $config
->get('case'),
'maxlength' => min($config
->get('max_component_length'), $this->aliasStorageHelper
->getAliasSchemaMaxLength()),
];
// Generate and cache the punctuation replacements for strtr().
$punctuation = $this
->getPunctuationCharacters();
foreach ($punctuation as $name => $details) {
$action = $config
->get('punctuation.' . $name);
switch ($action) {
case PathautoGeneratorInterface::PUNCTUATION_REMOVE:
$this->cleanStringCache['punctuation'][$details['value']] = '';
break;
case PathautoGeneratorInterface::PUNCTUATION_REPLACE:
$this->cleanStringCache['punctuation'][$details['value']] = $this->cleanStringCache['separator'];
break;
case PathautoGeneratorInterface::PUNCTUATION_DO_NOTHING:
// Literally do nothing.
break;
}
}
// Generate and cache the ignored words regular expression.
$ignore_words = $config
->get('ignore_words');
$ignore_words_regex = preg_replace([
'/^[,\\s]+|[,\\s]+$/',
'/[,\\s]+/',
], [
'',
'\\b|\\b',
], $ignore_words);
if ($ignore_words_regex) {
$this->cleanStringCache['ignore_words_regex'] = '\\b' . $ignore_words_regex . '\\b';
if (function_exists('mb_eregi_replace')) {
mb_regex_encoding('UTF-8');
$this->cleanStringCache['ignore_words_callback'] = 'mb_eregi_replace';
}
else {
$this->cleanStringCache['ignore_words_callback'] = 'preg_replace';
$this->cleanStringCache['ignore_words_regex'] = '/' . $this->cleanStringCache['ignore_words_regex'] . '/i';
}
}
}
// Empty strings do not need any processing.
if ($string === '' || $string === NULL) {
return '';
}
$langcode = NULL;
if (!empty($options['language'])) {
$langcode = $options['language']
->getId();
}
elseif (!empty($options['langcode'])) {
$langcode = $options['langcode'];
}
// Check if the string has already been processed, and if so return the
// cached result.
if (isset($this->cleanStringCache['strings'][$langcode][(string) $string])) {
return $this->cleanStringCache['strings'][$langcode][(string) $string];
}
// Remove all HTML tags from the string.
$output = Html::decodeEntities($string);
$output = PlainTextOutput::renderFromHtml($output);
// Replace or drop punctuation based on user settings.
$output = strtr($output, $this->cleanStringCache['punctuation']);
// Optionally transliterate.
if ($this->cleanStringCache['transliterate']) {
// If the reduce strings to letters and numbers is enabled, don't bother
// replacing unknown characters with a question mark. Use an empty string
// instead.
$output = $this->transliteration
->transliterate($output, $langcode, $this->cleanStringCache['reduce_ascii'] ? '' : '?');
// Replace or drop punctuation again as the transliteration process can
// convert special characters to punctuation.
$output = strtr($output, $this->cleanStringCache['punctuation']);
}
// Reduce strings to letters and numbers.
if ($this->cleanStringCache['reduce_ascii']) {
$output = preg_replace('/[^a-zA-Z0-9\\/]+/', $this->cleanStringCache['separator'], $output);
}
// Get rid of words that are on the ignore list.
if ($this->cleanStringCache['ignore_words_regex']) {
$words_removed = $this->cleanStringCache['ignore_words_callback']($this->cleanStringCache['ignore_words_regex'], '', $output);
if (mb_strlen(trim($words_removed)) > 0) {
$output = $words_removed;
}
}
// Always replace whitespace with the separator.
$output = preg_replace('/\\s+/', $this->cleanStringCache['separator'], $output);
// Trim duplicates and remove trailing and leading separators.
$output = $this
->getCleanSeparators($this
->getCleanSeparators($output, $this->cleanStringCache['separator']));
// Optionally convert to lower case.
if ($this->cleanStringCache['lowercase']) {
$output = mb_strtolower($output);
}
// Shorten to a logical place based on word boundaries.
$output = Unicode::truncate($output, $this->cleanStringCache['maxlength'], TRUE);
// Cache this result in the static array.
$this->cleanStringCache['strings'][$langcode][(string) $string] = $output;
return $output;
}