function pathauto_cleanstring in Pathauto 7
Same name and namespace in other branches
- 5.2 pathauto.inc \pathauto_cleanstring()
- 5 pathauto.module \pathauto_cleanstring()
- 6.2 pathauto.inc \pathauto_cleanstring()
- 6 pathauto.inc \pathauto_cleanstring()
Clean up a string segment to be used in an URL alias.
Performs the following possible alterations:
- Remove all HTML tags.
- Process the string through the transliteration module.
- Replace or remove punctuation with the separator character.
- Remove back-slashes.
- Replace non-ascii and non-numeric characters with the separator.
- Remove common words.
- Replace whitespace with the separator character.
- Trim duplicate, leading, and trailing separators.
- Convert to lower-case.
- Shorten to a desired length and logical position based on word boundaries.
This function should *not* be called on URL alias or path strings because it is assumed that they are already clean.
Parameters
string $string: A string to clean.
array $options: (optional) A keyed array of settings and flags to control the Pathauto clean string replacement process. Supported options are:
- langcode: A language code to be used when translating strings.
Return value
string The cleaned string.
3 calls to pathauto_cleanstring()
- PathautoUnitTestCase::testCleanString in ./
pathauto.test - Test pathauto_cleanstring().
- pathauto_clean_token_values in ./
pathauto.inc - Clean tokens so they are URL friendly.
- pathauto_tokens in ./
pathauto.tokens.inc - Implements hook_tokens().
File
- ./
pathauto.inc, line 121 - Miscellaneous functions for Pathauto.
Code
function pathauto_cleanstring($string, array $options = array()) {
// Use the advanced drupal_static() pattern, since this is called very often.
static $drupal_static_fast;
if (!isset($drupal_static_fast)) {
$drupal_static_fast['cache'] =& drupal_static(__FUNCTION__);
}
$cache =& $drupal_static_fast['cache'];
// Generate and cache variables used in this function so that on the second
// call to pathauto_cleanstring() we focus on processing.
if (!isset($cache)) {
$cache = array(
'separator' => variable_get('pathauto_separator', '-'),
'strings' => array(),
'transliterate' => variable_get('pathauto_transliterate', FALSE) && module_exists('transliteration'),
'punctuation' => array(),
'reduce_ascii' => (bool) variable_get('pathauto_reduce_ascii', FALSE),
'ignore_words_regex' => FALSE,
'lowercase' => (bool) variable_get('pathauto_case', PATHAUTO_CASE_LOWER),
'maxlength' => min(variable_get('pathauto_max_component_length', 100), _pathauto_get_schema_alias_maxlength()),
);
// Generate and cache the punctuation replacements for strtr().
$punctuation = pathauto_punctuation_chars();
foreach ($punctuation as $name => $details) {
$action = variable_get('pathauto_punctuation_' . $name, PATHAUTO_PUNCTUATION_REMOVE);
switch ($action) {
case PATHAUTO_PUNCTUATION_REMOVE:
$cache['punctuation'][$details['value']] = '';
break;
case PATHAUTO_PUNCTUATION_REPLACE:
$cache['punctuation'][$details['value']] = $cache['separator'];
break;
case PATHAUTO_PUNCTUATION_DO_NOTHING:
// Literally do nothing.
break;
}
}
// Generate and cache the ignored words regular expression.
$ignore_words = variable_get('pathauto_ignore_words', PATHAUTO_IGNORE_WORDS);
$ignore_words_regex = preg_replace(array(
'/^[,\\s]+|[,\\s]+$/',
'/[,\\s]+/',
), array(
'',
'\\b|\\b',
), $ignore_words);
if ($ignore_words_regex) {
$cache['ignore_words_regex'] = '\\b' . $ignore_words_regex . '\\b';
if (function_exists('mb_eregi_replace')) {
mb_regex_encoding('UTF-8');
$cache['ignore_words_callback'] = 'mb_eregi_replace';
}
else {
$cache['ignore_words_callback'] = 'preg_replace';
$cache['ignore_words_regex'] = '/' . $cache['ignore_words_regex'] . '/i';
}
}
}
// Empty strings do not need any processing.
if ($string === '' || $string === NULL) {
return '';
}
$langcode = NULL;
if (!empty($options['language']->language)) {
$langcode = $options['language']->language;
}
elseif (!empty($options['langcode'])) {
$langcode = $options['langcode'];
}
// Check if the string has already been processed, and if so return the
// cached result.
if (isset($cache['strings'][$langcode][$string])) {
return $cache['strings'][$langcode][$string];
}
// Remove all HTML tags from the string.
$output = strip_tags(decode_entities($string));
// Optionally transliterate (by running through the Transliteration module)
if ($cache['transliterate']) {
// If the reduce strings to letters and numbers is enabled, don't bother
// replacing unknown characters with a question mark. Use an empty string
// instead.
$output = transliteration_get($output, $cache['reduce_ascii'] ? '' : '?', $langcode);
}
// Replace or drop punctuation based on user settings
$output = strtr($output, $cache['punctuation']);
// Reduce strings to letters and numbers
if ($cache['reduce_ascii']) {
$output = preg_replace('/[^a-zA-Z0-9\\/]+/', $cache['separator'], $output);
}
// Get rid of words that are on the ignore list
if ($cache['ignore_words_regex']) {
$words_removed = $cache['ignore_words_callback']($cache['ignore_words_regex'], '', $output);
if (drupal_strlen(trim($words_removed)) > 0) {
$output = $words_removed;
}
}
// Always replace whitespace with the separator.
$output = preg_replace('/\\s+/', $cache['separator'], $output);
// Trim duplicates and remove trailing and leading separators.
$output = _pathauto_clean_separators($output, $cache['separator']);
// Optionally convert to lower case.
if ($cache['lowercase']) {
$output = drupal_strtolower($output);
}
// Shorten to a logical place based on word boundaries.
$output = truncate_utf8($output, $cache['maxlength'], TRUE);
// Cache this result in the static array.
$cache['strings'][$langcode][$string] = $output;
return $output;
}