You are here

function pathauto_cleanstring in Pathauto 5

Same name and namespace in other branches
  1. 5.2 pathauto.inc \pathauto_cleanstring()
  2. 6.2 pathauto.inc \pathauto_cleanstring()
  3. 6 pathauto.inc \pathauto_cleanstring()
  4. 7 pathauto.inc \pathauto_cleanstring()
11 calls to pathauto_cleanstring()
blog_pathauto_bulkupdate in ./pathauto_user.inc
event_pathauto_node in contrib/pathauto_node_event.inc
forum_pathauto_bulkupdate in ./pathauto_taxonomy.inc
Generate aliases for all forums and forum containers without aliases
node_get_placeholders in ./pathauto_node.inc
pathauto_menu_get_placeholders in ./pathauto_menu.inc
Generate the menu placeholders.

... See full list

File

./pathauto.module, line 220

Code

function pathauto_cleanstring($string) {

  // Default words to ignore
  $ignore_words = array(
    "a",
    "an",
    "as",
    "at",
    "before",
    "but",
    "by",
    "for",
    "from",
    "is",
    "in",
    "into",
    "like",
    "of",
    "off",
    "on",
    "onto",
    "per",
    "since",
    "than",
    "the",
    "this",
    "that",
    "to",
    "up",
    "via",
    "with",
  );
  static $i18n_loaded = false;
  static $translations = array();
  if (!$i18n_loaded) {
    $path = drupal_get_path('module', 'pathauto');
    if (is_file($path . '/i18n-ascii.txt')) {
      $translations = parse_ini_file($path . '/i18n-ascii.txt');
    }
    $i18n_loaded = true;
  }
  $output = strtr($output, $translations);

  // Replace or drop apostrophes based on user settings
  $separator = variable_get('pathauto_separator', '-');
  $quotes = variable_get('pathauto_quotes', 0);
  $output = str_replace("'", $quotes ? $separator : '', $string);

  // Convert accented characters to their ASCII counterparts...

  /*  $output = strtr(utf8_decode($output),
         "\xA1\xAA\xBA\xBF".
         "\xC0\xC1\xC2\xC3\xC5\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF".
         "\xD0\xD1\xD2\xD3\xD4\xD5\xD8\xD9\xDA\xDB\xDD".
         "\xE0\xE1\xE2\xE3\xE5\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF".
         "\xF0\xF1\xF2\xF3\xF4\xF5\xF8\xF9\xFA\xFB\xFD\xFF",
         "!ao?AAAAACEEEEIIIIDNOOOOOUUUYaaaaaceeeeiiiidnooooouuuyy");
    // ...and ligatures too
    $output = utf8_encode(strtr($output, array("\xC4"=>"Ae", "\xC6"=>"AE", "\xD6"=>"Oe",
      "\xDC"=>"Ue", "\xDE"=>"TH", "\xDF"=>"ss", "\xE4"=>"ae", "\xE6"=>"ae",
      "\xF6"=>"oe", "\xFC"=>"ue", "\xFE"=>"th")));*/
  $output = strtr($output, $translations);

  // Get rid of words that are on the ignore list
  $ignore_re = "\\b" . preg_replace("/,/", "\\b|\\b", variable_get('pathauto_ignore_words', $ignore_words)) . "\\b";
  $output = preg_replace("/{$ignore_re}/ie", "", $output);

  // Preserve alphanumerics, everything else becomes a separator
  $pattern = '/[^a-zA-Z0-9]+/ ';
  $output = preg_replace($pattern, $separator, $output);

  // Trim any leading or trailing separators (note the need to
  // escape the separator if and only if it is not alphanumeric)
  if ($separator) {
    if (ctype_alnum($separator)) {
      $seppattern = $separator;
    }
    else {
      $seppattern = '\\' . $separator;
    }
    $output = preg_replace("/^{$seppattern}+|{$seppattern}+\$/", "", $output);
  }

  // Enforce the maximum component length
  $maxlength = min(variable_get('pathauto_max_component_length', 100), 128);
  $output = drupal_substr($output, 0, $maxlength);
  return $output;
}