function pathauto_cleanstring in Pathauto 5
Same name and namespace in other branches
- 5.2 pathauto.inc \pathauto_cleanstring()
- 6.2 pathauto.inc \pathauto_cleanstring()
- 6 pathauto.inc \pathauto_cleanstring()
- 7 pathauto.inc \pathauto_cleanstring()
11 calls to pathauto_cleanstring()
- blog_pathauto_bulkupdate in ./
pathauto_user.inc - event_pathauto_node in contrib/
pathauto_node_event.inc - forum_pathauto_bulkupdate in ./
pathauto_taxonomy.inc - Generate aliases for all forums and forum containers without aliases
- node_get_placeholders in ./
pathauto_node.inc - pathauto_menu_get_placeholders in ./
pathauto_menu.inc - Generate the menu placeholders.
File
- ./
pathauto.module, line 220
Code
function pathauto_cleanstring($string) {
// Default words to ignore
$ignore_words = array(
"a",
"an",
"as",
"at",
"before",
"but",
"by",
"for",
"from",
"is",
"in",
"into",
"like",
"of",
"off",
"on",
"onto",
"per",
"since",
"than",
"the",
"this",
"that",
"to",
"up",
"via",
"with",
);
static $i18n_loaded = false;
static $translations = array();
if (!$i18n_loaded) {
$path = drupal_get_path('module', 'pathauto');
if (is_file($path . '/i18n-ascii.txt')) {
$translations = parse_ini_file($path . '/i18n-ascii.txt');
}
$i18n_loaded = true;
}
$output = strtr($output, $translations);
// Replace or drop apostrophes based on user settings
$separator = variable_get('pathauto_separator', '-');
$quotes = variable_get('pathauto_quotes', 0);
$output = str_replace("'", $quotes ? $separator : '', $string);
// Convert accented characters to their ASCII counterparts...
/* $output = strtr(utf8_decode($output),
"\xA1\xAA\xBA\xBF".
"\xC0\xC1\xC2\xC3\xC5\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF".
"\xD0\xD1\xD2\xD3\xD4\xD5\xD8\xD9\xDA\xDB\xDD".
"\xE0\xE1\xE2\xE3\xE5\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF".
"\xF0\xF1\xF2\xF3\xF4\xF5\xF8\xF9\xFA\xFB\xFD\xFF",
"!ao?AAAAACEEEEIIIIDNOOOOOUUUYaaaaaceeeeiiiidnooooouuuyy");
// ...and ligatures too
$output = utf8_encode(strtr($output, array("\xC4"=>"Ae", "\xC6"=>"AE", "\xD6"=>"Oe",
"\xDC"=>"Ue", "\xDE"=>"TH", "\xDF"=>"ss", "\xE4"=>"ae", "\xE6"=>"ae",
"\xF6"=>"oe", "\xFC"=>"ue", "\xFE"=>"th")));*/
$output = strtr($output, $translations);
// Get rid of words that are on the ignore list
$ignore_re = "\\b" . preg_replace("/,/", "\\b|\\b", variable_get('pathauto_ignore_words', $ignore_words)) . "\\b";
$output = preg_replace("/{$ignore_re}/ie", "", $output);
// Preserve alphanumerics, everything else becomes a separator
$pattern = '/[^a-zA-Z0-9]+/ ';
$output = preg_replace($pattern, $separator, $output);
// Trim any leading or trailing separators (note the need to
// escape the separator if and only if it is not alphanumeric)
if ($separator) {
if (ctype_alnum($separator)) {
$seppattern = $separator;
}
else {
$seppattern = '\\' . $separator;
}
$output = preg_replace("/^{$seppattern}+|{$seppattern}+\$/", "", $output);
}
// Enforce the maximum component length
$maxlength = min(variable_get('pathauto_max_component_length', 100), 128);
$output = drupal_substr($output, 0, $maxlength);
return $output;
}