You are here

function pathauto_cleanstring in Pathauto 5.2

Same name and namespace in other branches
  1. 5 pathauto.module \pathauto_cleanstring()
  2. 6.2 pathauto.inc \pathauto_cleanstring()
  3. 6 pathauto.inc \pathauto_cleanstring()
  4. 7 pathauto.inc \pathauto_cleanstring()

Clean up a string value provided by a module.

Resulting string contains only alphanumerics and separators.

Parameters

$string: A string to clean.

$clean_slash: Whether to clean slashes from the given string.

Return value

The cleaned string.

2 calls to pathauto_cleanstring()
pathauto_clean_token_values in ./pathauto.inc
Clean tokens so they are URL friendly.
pathauto_token_values in ./pathauto.module
Implementation of hook_token_values() for Pathauto specific tokens.

File

./pathauto.inc, line 129
Miscellaneous functions for Pathauto.

Code

function pathauto_cleanstring($string, $clean_slash = TRUE) {

  // Default words to ignore
  $ignore_words = array(
    'a',
    'an',
    'as',
    'at',
    'before',
    'but',
    'by',
    'for',
    'from',
    'is',
    'in',
    'into',
    'like',
    'of',
    'off',
    'on',
    'onto',
    'per',
    'since',
    'than',
    'the',
    'this',
    'that',
    'to',
    'up',
    'via',
    'with',
  );

  // Replace or drop punctuation based on user settings
  $separator = variable_get('pathauto_separator', '-');
  $output = $string;
  $punctuation = pathauto_punctuation_chars();
  foreach ($punctuation as $name => $details) {
    $action = variable_get('pathauto_punctuation_' . $name, 0);

    // 2 is the action for "do nothing" with the punctuation
    if ($action != 2) {

      // Slightly tricky inline if which either replaces with the separator or nothing
      $output = str_replace($details['value'], $action ? $separator : '', $output);
    }
  }

  // If something is already urlsafe then don't remove slashes
  if ($clean_slash) {
    $output = str_replace('/', '', $output);
  }

  // Optionally remove accents and transliterate
  if (variable_get('pathauto_transliterate', FALSE)) {
    static $i18n_loaded = false;
    static $translations = array();
    if (!$i18n_loaded) {
      $path = drupal_get_path('module', 'pathauto');
      if (is_file($path . '/i18n-ascii.txt')) {
        $translations = parse_ini_file($path . '/i18n-ascii.txt');
      }
      $i18n_loaded = true;
    }
    $output = strtr($output, $translations);
  }

  // Reduce to the subset of ASCII96 letters and numbers
  if (variable_get('pathauto_reduce_ascii', FALSE)) {
    $pattern = '/[^a-zA-Z0-9\\/]+/ ';
    $output = preg_replace($pattern, $separator, $output);
  }

  // Get rid of words that are on the ignore list
  $ignore_re = '\\b' . preg_replace('/,/', '\\b|\\b', variable_get('pathauto_ignore_words', $ignore_words)) . '\\b';
  if (function_exists('mb_eregi_replace')) {
    $output = mb_eregi_replace($ignore_re, '', $output);
  }
  else {
    $output = preg_replace("/{$ignore_re}/i", '', $output);
  }

  // Always replace whitespace with the separator.
  $output = preg_replace('/\\s+/', $separator, $output);

  // In preparation for pattern matching,
  // escape the separator if and only if it is not alphanumeric.
  if (isset($separator)) {
    if (preg_match('/^[^' . PREG_CLASS_ALNUM . ']+$/uD', $separator)) {
      $seppattern = $separator;
    }
    else {
      $seppattern = '\\' . $separator;
    }

    // Trim any leading or trailing separators (note the need to
    $output = preg_replace("/^{$seppattern}+|{$seppattern}+\$/", '', $output);

    // Replace multiple separators with a single one
    $output = preg_replace("/{$seppattern}+/", "{$separator}", $output);
  }

  // Enforce the maximum component length
  $maxlength = min(variable_get('pathauto_max_component_length', 100), 128);
  $output = drupal_substr($output, 0, $maxlength);
  return $output;
}