You are here

transliteration.module in Transliteration 7.3

Converts non-latin text to US-ASCII and sanitizes file names.

@author Stefan M. Kudwien (http://drupal.org/user/48898)

File

transliteration.module
View source
<?php

/**
 * @file
 * Converts non-latin text to US-ASCII and sanitizes file names.
 *
 * @author Stefan M. Kudwien (http://drupal.org/user/48898)
 */

/**
 * Implements hook_menu().
 */
function transliteration_menu() {
  $items['admin/config/media/file-system/settings'] = array(
    'title' => 'Settings',
    'file path' => drupal_get_path('module', 'system'),
    'weight' => -10,
    'type' => MENU_DEFAULT_LOCAL_TASK,
  );
  $items['admin/config/media/file-system/transliteration'] = array(
    'title' => 'Transliteration',
    'description' => 'Convert existing file names to US-ASCII.',
    'page callback' => 'drupal_get_form',
    'page arguments' => array(
      'transliteration_retroactive',
    ),
    'access arguments' => array(
      'administer site configuration',
    ),
    'file' => 'transliteration.admin.inc',
    'weight' => 10,
    'type' => MENU_LOCAL_TASK,
  );
  return $items;
}

/**
 * Implements hook_form_FORM_ID_alter().
 *
 * Adds transliteration settings to the file system configuration form.
 */
function transliteration_form_system_file_system_settings_alter(&$form, &$form_state) {
  $form['transliteration'] = array(
    '#type' => 'item',
    '#title' => t('Transliteration'),
    '#value' => '',
  );
  $form['transliteration']['transliteration_file_uploads'] = array(
    '#type' => 'checkbox',
    '#title' => t('Transliterate file names during upload.'),
    '#description' => t('Enable to convert file names to US-ASCII character set for cross-platform compatibility.'),
    '#default_value' => variable_get('transliteration_file_uploads', TRUE),
  );
  $form['transliteration']['transliteration_file_uploads_display_name'] = array(
    '#type' => 'checkbox',
    '#title' => t('Transliterate the displayed file name.'),
    '#description' => t('Enable to also convert the file name that is displayed within the site (for example, in link text).'),
    '#default_value' => variable_get('transliteration_file_uploads_display_name', TRUE),
    '#states' => array(
      'invisible' => array(
        'input[name="transliteration_file_uploads"]' => array(
          'checked' => FALSE,
        ),
      ),
    ),
  );
  $form['transliteration']['transliteration_file_lowercase'] = array(
    '#type' => 'checkbox',
    '#title' => t('Lowercase transliterated file names.'),
    '#default_value' => variable_get('transliteration_file_lowercase', TRUE),
    '#description' => t('This is a recommended setting to prevent issues with case-insensitive file systems. It has no effect if transliteration has been disabled.'),
    '#states' => array(
      'invisible' => array(
        'input[name="transliteration_file_uploads"]' => array(
          'checked' => FALSE,
        ),
      ),
    ),
  );
  $form['transliteration']['transliteration_file_underscore_replacement_option'] = array(
    '#type' => 'checkbox',
    '#title' => t('Enable Underscore replacement in the file names.'),
    '#default_value' => variable_get('transliteration_file_underscore_replacement_option', TRUE),
    '#description' => t('This is used to enaled replacing underscore in all file names.'),
  );
  $form['transliteration']['transliteration_file_underscore_replacement'] = array(
    '#type' => 'radios',
    '#title' => t('Replace underscore with'),
    '#default_value' => variable_get('transliteration_file_underscore_replacement', '_'),
    '#description' => t('Select what character to use when replacing Underscore in file names. It has no effect if transliteration has been disabled.'),
    '#options' => array(
      '_' => t('Default (_)'),
      '-' => t('Hyphen (-)'),
    ),
    '#states' => array(
      'invisible' => array(
        'input[name="transliteration_file_underscore_replacement_option"]' => array(
          'checked' => FALSE,
        ),
      ),
    ),
  );
  $form['buttons']['#weight'] = 1;
}

/**
 * Implements hook_form_FORM_ID_alter().
 *
 * Adds transliteration settings to the search settings form.
 */
function transliteration_form_search_admin_settings_alter(&$form, &$form_state) {
  $form['transliteration'] = array(
    '#type' => 'fieldset',
    '#title' => t('Transliteration'),
  );
  $form['transliteration']['transliteration_search'] = array(
    '#type' => 'checkbox',
    '#title' => t('Transliterate search index and searched strings.'),
    '#description' => t('Enable to allow searching and indexing using US-ASCII character set, i.e. to treat accented and unaccented letters the same.'),
    '#default_value' => variable_get('transliteration_search', TRUE),
  );
}

/**
 * Transliterates and sanitizes a file name.
 *
 * The resulting file name has white space replaced with underscores, consists
 * of only US-ASCII characters, and is converted to lowercase (if configured).
 * If multiple files have been submitted as an array, the names will be
 * processed recursively.
 *
 * @param string $filename
 *   A file name, or an array of file names.
 * @param string $source_langcode
 *   Optional ISO 639 language code that denotes the language of the input and
 *   is used to apply language-specific variations. If the source language is
 *   not known at the time of transliteration, it is recommended to set this
 *   argument to the site default language to produce consistent results.
 *   Otherwise the current display language will be used.
 *
 * @return mixed
 *   Sanitized file name, or array of sanitized file names.
 *
 * @see language_default()
 */
function transliteration_clean_filename($filename, $source_langcode = NULL) {
  if (is_array($filename)) {
    foreach ($filename as $key => $value) {
      $filename[$key] = transliteration_clean_filename($value, $source_langcode);
    }
    return $filename;
  }

  // Allow other modules to alter the filename prior to processing.
  drupal_alter('transliteration_clean_filename_prepare', $filename, $source_langcode);
  $filename = transliteration_get($filename, '', $source_langcode);

  // Replace whitespace.
  $filename = str_replace(' ', '_', $filename);

  // Enable underscore options.
  if (variable_get('transliteration_file_underscore_replacement_option', TRUE)) {

    // Replace underscore.
    $filename = str_replace('_', variable_get('transliteration_file_underscore_replacement', '_'), $filename);
  }

  // Remove remaining unsafe characters.
  $filename = preg_replace('![^0-9A-Za-z_.-]!', '', $filename);

  // Remove multiple consecutive non-alphabetical characters.
  $filename = preg_replace('/(_)_+|(\\.)\\.+|(-)-+/', '\\1\\2\\3', $filename);

  // Force lowercase to prevent issues on case-insensitive file systems.
  if (variable_get('transliteration_file_lowercase', TRUE)) {
    $filename = drupal_strtolower($filename);
  }
  return $filename;
}

/**
 * Transliterates text.
 *
 * Takes an input string in any language and character set, and tries to
 * represent it in US-ASCII characters by conveying, in Roman letters, the
 * pronunciation expressed by the text in some other writing system.
 *
 * @param string $text
 *   UTF-8 encoded text input.
 * @param string $unknown
 *   Replacement string for characters that do not have a suitable ASCII
 *   equivalent.
 * @param string $source_langcode
 *   Optional ISO 639 language code that denotes the language of the input and
 *   is used to apply language-specific variations. If the source language is
 *   not known at the time of transliteration, it is recommended to set this
 *   argument to the site default language to produce consistent results.
 *   Otherwise the current display language will be used.
 *
 * @return string
 *   Transliterated text.
 *
 * @see language_default()
 */
function transliteration_get($text, $unknown = '?', $source_langcode = NULL) {
  if (!function_exists('_transliteration_process')) {
    module_load_include('inc', 'transliteration');
  }
  return _transliteration_process($text, $unknown, $source_langcode);
}

/**
 * Implements hook_init().
 *
 * Sanitizes file names during upload.
 */
function transliteration_init() {
  if (!empty($_FILES['files']) && variable_get('transliteration_file_uploads', TRUE)) {

    // Figure out language, which is available in $_POST['language'] for node
    // forms.
    $langcode = NULL;
    if (!empty($_POST['language'])) {
      $languages = language_list();
      if (isset($languages[$_POST['language']])) {
        $langcode = $_POST['language'];
      }
    }
    if (is_array($_FILES['files']['name'])) {
      foreach ($_FILES['files']['name'] as $field => $filename) {

        // Keep a copy of the unaltered file name.
        $_FILES['files']['orig_name'][$field] = $filename;
        $_FILES['files']['name'][$field] = transliteration_clean_filename($filename, $langcode);
      }
    }
  }
}

/**
 * Implements hook_file_presave().
 */
function transliteration_file_presave($file) {

  // If an uploaded file had its name altered in transliteration_init() and if
  // the human-readable display name is not being transliterated, restore the
  // original version as the human-readable name before saving. (The
  // transliterated version will still be used in the file URI, which is the
  // only place where it matters.)
  if (!empty($_FILES['files']['name']) && variable_get('transliteration_file_uploads', TRUE) && !variable_get('transliteration_file_uploads_display_name', TRUE)) {
    $key = array_search($file->filename, $_FILES['files']['name']);
    if ($key !== FALSE && isset($_FILES['files']['orig_name'][$key])) {
      $file->filename = $_FILES['files']['orig_name'][$key];
    }
  }
}

/**
 * Implements hook_search_preprocess().
 *
 * Transliterates text added to the search index and user submitted search
 * keywords.
 */
function transliteration_search_preprocess($text) {
  if (variable_get('transliteration_search', TRUE)) {
    return transliteration_get($text, '', language_default('language'));
  }
  return $text;
}

/**
 * Implements hook_filter_info().
 */
function transliteration_filter_info() {
  return array(
    'transliteration' => array(
      'title' => t('Convert all characters to US-ASCII'),
      'process callback' => '_transliteration_filter_process',
      'settings callback' => '_transliteration_filter_settings',
      'default settings' => array(
        'no_known_transliteration' => '?',
      ),
      'tips callback' => '_transliteration_filter_tips',
    ),
  );
}

/**
 * Process callback for the transliteration filter.
 */
function _transliteration_filter_process($text, $filter, $format, $langcode, $cache, $cache_id) {
  return transliteration_get($text, $filter->settings['no_known_transliteration'], $langcode);
}

/**
 * Settings callback for the transliteration filter.
 */
function _transliteration_filter_settings($form, &$form_state, $filter, $format, $defaults, $filters) {
  $filter->settings += $defaults;
  return array(
    'no_known_transliteration' => array(
      '#type' => 'textfield',
      '#title' => t('Placeholder for characters with no known US-ASCII equivalent'),
      '#size' => 2,
      // The maximum length is 5 in order to accommodate unicode multibyte
      // input.
      '#maxlength' => 5,
      '#default_value' => $filter->settings['no_known_transliteration'],
    ),
  );
}

/**
 * Implements hook_help().
 */
function transliteration_help($path, $arg) {
  switch ($path) {
    case 'admin/help#transliteration':
      $output = '<h3>' . t('About') . '</h3>';
      $output .= '<p>' . t('Provides a central transliteration service to other Drupal modules, and sanitizes file names while uploading.') . '</p>';
      $output .= '<p>' . t('Provides one-way string transliteration (romanization) and cleans file names during upload by replacing unwanted characters..') . '</p>';
      $output .= '<p>' . t('Generally spoken, it takes Unicode text and tries to represent it in US-ASCII characters (universally displayable, unaccented characters) by attempting to transliterate the pronunciation expressed by the text in some other writing system to Roman letters..') . '</p>';
      $output .= '<p>' . t('According to Unidecode, from which most of the transliteration data has been derived, "Russian and Greek seem to work passably. But it works quite bad on Japanese and Thai.".') . '</p>';
      $output .= '<h3>' . t('Usage') . '</h3>';
      $output .= '<p>' . t("If you are installing to an existing Drupal site, you might want to fix existing file names after installation, which will update all file names containing non-ASCII characters. However, if you have manually entered links to those files in any contents, these links will break since the original files are renamed. Therefore it is a good idea to test the conversion first on a copy of your web site. You'll find the retroactive conversion at Configuration and modules >> Media >> File system >> Transliteration.") . '</p>';
      $output .= '<p>' . t("This project doesn't require special permissions.") . '</p>';
      $output .= '<p>' . t('This project can be configured from the File system configuration page (Configuration and modules » Media » File system » Settings).') . '</p>';
      $output .= '<p>' . t('Transliterate file names during upload: If you need more control over the resulting file names you might want to disable this feature here and install the FileField Paths  (http://drupal.org/project/filefield_paths) instead.') . '</p>';
      $output .= '<p>' . t('Lowercase transliterated file names: It is recommended to enable this option to prevent issues with case-insensitive file systems.') . '</p>';
      return $output;
  }
}

/**
 * Filter tips callback for the transliteration filter.
 */
function _transliteration_filter_tips($filter, $format, $long) {
  return t('Non-latin text (e.g., å, ö, 漢) will be converted to US-ASCII equivalents (a, o, ?).');
}

Functions

Namesort descending Description
transliteration_clean_filename Transliterates and sanitizes a file name.
transliteration_file_presave Implements hook_file_presave().
transliteration_filter_info Implements hook_filter_info().
transliteration_form_search_admin_settings_alter Implements hook_form_FORM_ID_alter().
transliteration_form_system_file_system_settings_alter Implements hook_form_FORM_ID_alter().
transliteration_get Transliterates text.
transliteration_help Implements hook_help().
transliteration_init Implements hook_init().
transliteration_menu Implements hook_menu().
transliteration_search_preprocess Implements hook_search_preprocess().
_transliteration_filter_process Process callback for the transliteration filter.
_transliteration_filter_settings Settings callback for the transliteration filter.
_transliteration_filter_tips Filter tips callback for the transliteration filter.