transliteration.module in Transliteration 7.3
Same filename and directory in other branches
Converts non-latin text to US-ASCII and sanitizes file names.
@author Stefan M. Kudwien (http://drupal.org/user/48898)
File
transliteration.moduleView source
<?php
/**
* @file
* Converts non-latin text to US-ASCII and sanitizes file names.
*
* @author Stefan M. Kudwien (http://drupal.org/user/48898)
*/
/**
* Implements hook_menu().
*/
function transliteration_menu() {
$items['admin/config/media/file-system/settings'] = array(
'title' => 'Settings',
'file path' => drupal_get_path('module', 'system'),
'weight' => -10,
'type' => MENU_DEFAULT_LOCAL_TASK,
);
$items['admin/config/media/file-system/transliteration'] = array(
'title' => 'Transliteration',
'description' => 'Convert existing file names to US-ASCII.',
'page callback' => 'drupal_get_form',
'page arguments' => array(
'transliteration_retroactive',
),
'access arguments' => array(
'administer site configuration',
),
'file' => 'transliteration.admin.inc',
'weight' => 10,
'type' => MENU_LOCAL_TASK,
);
return $items;
}
/**
* Implements hook_form_FORM_ID_alter().
*
* Adds transliteration settings to the file system configuration form.
*/
function transliteration_form_system_file_system_settings_alter(&$form, &$form_state) {
$form['transliteration'] = array(
'#type' => 'item',
'#title' => t('Transliteration'),
'#value' => '',
);
$form['transliteration']['transliteration_file_uploads'] = array(
'#type' => 'checkbox',
'#title' => t('Transliterate file names during upload.'),
'#description' => t('Enable to convert file names to US-ASCII character set for cross-platform compatibility.'),
'#default_value' => variable_get('transliteration_file_uploads', TRUE),
);
$form['transliteration']['transliteration_file_uploads_display_name'] = array(
'#type' => 'checkbox',
'#title' => t('Transliterate the displayed file name.'),
'#description' => t('Enable to also convert the file name that is displayed within the site (for example, in link text).'),
'#default_value' => variable_get('transliteration_file_uploads_display_name', TRUE),
'#states' => array(
'invisible' => array(
'input[name="transliteration_file_uploads"]' => array(
'checked' => FALSE,
),
),
),
);
$form['transliteration']['transliteration_file_lowercase'] = array(
'#type' => 'checkbox',
'#title' => t('Lowercase transliterated file names.'),
'#default_value' => variable_get('transliteration_file_lowercase', TRUE),
'#description' => t('This is a recommended setting to prevent issues with case-insensitive file systems. It has no effect if transliteration has been disabled.'),
'#states' => array(
'invisible' => array(
'input[name="transliteration_file_uploads"]' => array(
'checked' => FALSE,
),
),
),
);
$form['transliteration']['transliteration_file_underscore_replacement_option'] = array(
'#type' => 'checkbox',
'#title' => t('Enable Underscore replacement in the file names.'),
'#default_value' => variable_get('transliteration_file_underscore_replacement_option', TRUE),
'#description' => t('This is used to enaled replacing underscore in all file names.'),
);
$form['transliteration']['transliteration_file_underscore_replacement'] = array(
'#type' => 'radios',
'#title' => t('Replace underscore with'),
'#default_value' => variable_get('transliteration_file_underscore_replacement', '_'),
'#description' => t('Select what character to use when replacing Underscore in file names. It has no effect if transliteration has been disabled.'),
'#options' => array(
'_' => t('Default (_)'),
'-' => t('Hyphen (-)'),
),
'#states' => array(
'invisible' => array(
'input[name="transliteration_file_underscore_replacement_option"]' => array(
'checked' => FALSE,
),
),
),
);
$form['buttons']['#weight'] = 1;
}
/**
* Implements hook_form_FORM_ID_alter().
*
* Adds transliteration settings to the search settings form.
*/
function transliteration_form_search_admin_settings_alter(&$form, &$form_state) {
$form['transliteration'] = array(
'#type' => 'fieldset',
'#title' => t('Transliteration'),
);
$form['transliteration']['transliteration_search'] = array(
'#type' => 'checkbox',
'#title' => t('Transliterate search index and searched strings.'),
'#description' => t('Enable to allow searching and indexing using US-ASCII character set, i.e. to treat accented and unaccented letters the same.'),
'#default_value' => variable_get('transliteration_search', TRUE),
);
}
/**
* Transliterates and sanitizes a file name.
*
* The resulting file name has white space replaced with underscores, consists
* of only US-ASCII characters, and is converted to lowercase (if configured).
* If multiple files have been submitted as an array, the names will be
* processed recursively.
*
* @param string $filename
* A file name, or an array of file names.
* @param string $source_langcode
* Optional ISO 639 language code that denotes the language of the input and
* is used to apply language-specific variations. If the source language is
* not known at the time of transliteration, it is recommended to set this
* argument to the site default language to produce consistent results.
* Otherwise the current display language will be used.
*
* @return mixed
* Sanitized file name, or array of sanitized file names.
*
* @see language_default()
*/
function transliteration_clean_filename($filename, $source_langcode = NULL) {
if (is_array($filename)) {
foreach ($filename as $key => $value) {
$filename[$key] = transliteration_clean_filename($value, $source_langcode);
}
return $filename;
}
// Allow other modules to alter the filename prior to processing.
drupal_alter('transliteration_clean_filename_prepare', $filename, $source_langcode);
$filename = transliteration_get($filename, '', $source_langcode);
// Replace whitespace.
$filename = str_replace(' ', '_', $filename);
// Enable underscore options.
if (variable_get('transliteration_file_underscore_replacement_option', TRUE)) {
// Replace underscore.
$filename = str_replace('_', variable_get('transliteration_file_underscore_replacement', '_'), $filename);
}
// Remove remaining unsafe characters.
$filename = preg_replace('![^0-9A-Za-z_.-]!', '', $filename);
// Remove multiple consecutive non-alphabetical characters.
$filename = preg_replace('/(_)_+|(\\.)\\.+|(-)-+/', '\\1\\2\\3', $filename);
// Force lowercase to prevent issues on case-insensitive file systems.
if (variable_get('transliteration_file_lowercase', TRUE)) {
$filename = drupal_strtolower($filename);
}
return $filename;
}
/**
* Transliterates text.
*
* Takes an input string in any language and character set, and tries to
* represent it in US-ASCII characters by conveying, in Roman letters, the
* pronunciation expressed by the text in some other writing system.
*
* @param string $text
* UTF-8 encoded text input.
* @param string $unknown
* Replacement string for characters that do not have a suitable ASCII
* equivalent.
* @param string $source_langcode
* Optional ISO 639 language code that denotes the language of the input and
* is used to apply language-specific variations. If the source language is
* not known at the time of transliteration, it is recommended to set this
* argument to the site default language to produce consistent results.
* Otherwise the current display language will be used.
*
* @return string
* Transliterated text.
*
* @see language_default()
*/
function transliteration_get($text, $unknown = '?', $source_langcode = NULL) {
if (!function_exists('_transliteration_process')) {
module_load_include('inc', 'transliteration');
}
return _transliteration_process($text, $unknown, $source_langcode);
}
/**
* Implements hook_init().
*
* Sanitizes file names during upload.
*/
function transliteration_init() {
if (!empty($_FILES['files']) && variable_get('transliteration_file_uploads', TRUE)) {
// Figure out language, which is available in $_POST['language'] for node
// forms.
$langcode = NULL;
if (!empty($_POST['language'])) {
$languages = language_list();
if (isset($languages[$_POST['language']])) {
$langcode = $_POST['language'];
}
}
if (is_array($_FILES['files']['name'])) {
foreach ($_FILES['files']['name'] as $field => $filename) {
// Keep a copy of the unaltered file name.
$_FILES['files']['orig_name'][$field] = $filename;
$_FILES['files']['name'][$field] = transliteration_clean_filename($filename, $langcode);
}
}
}
}
/**
* Implements hook_file_presave().
*/
function transliteration_file_presave($file) {
// If an uploaded file had its name altered in transliteration_init() and if
// the human-readable display name is not being transliterated, restore the
// original version as the human-readable name before saving. (The
// transliterated version will still be used in the file URI, which is the
// only place where it matters.)
if (!empty($_FILES['files']['name']) && variable_get('transliteration_file_uploads', TRUE) && !variable_get('transliteration_file_uploads_display_name', TRUE)) {
$key = array_search($file->filename, $_FILES['files']['name']);
if ($key !== FALSE && isset($_FILES['files']['orig_name'][$key])) {
$file->filename = $_FILES['files']['orig_name'][$key];
}
}
}
/**
* Implements hook_search_preprocess().
*
* Transliterates text added to the search index and user submitted search
* keywords.
*/
function transliteration_search_preprocess($text) {
if (variable_get('transliteration_search', TRUE)) {
return transliteration_get($text, '', language_default('language'));
}
return $text;
}
/**
* Implements hook_filter_info().
*/
function transliteration_filter_info() {
return array(
'transliteration' => array(
'title' => t('Convert all characters to US-ASCII'),
'process callback' => '_transliteration_filter_process',
'settings callback' => '_transliteration_filter_settings',
'default settings' => array(
'no_known_transliteration' => '?',
),
'tips callback' => '_transliteration_filter_tips',
),
);
}
/**
* Process callback for the transliteration filter.
*/
function _transliteration_filter_process($text, $filter, $format, $langcode, $cache, $cache_id) {
return transliteration_get($text, $filter->settings['no_known_transliteration'], $langcode);
}
/**
* Settings callback for the transliteration filter.
*/
function _transliteration_filter_settings($form, &$form_state, $filter, $format, $defaults, $filters) {
$filter->settings += $defaults;
return array(
'no_known_transliteration' => array(
'#type' => 'textfield',
'#title' => t('Placeholder for characters with no known US-ASCII equivalent'),
'#size' => 2,
// The maximum length is 5 in order to accommodate unicode multibyte
// input.
'#maxlength' => 5,
'#default_value' => $filter->settings['no_known_transliteration'],
),
);
}
/**
* Implements hook_help().
*/
function transliteration_help($path, $arg) {
switch ($path) {
case 'admin/help#transliteration':
$output = '<h3>' . t('About') . '</h3>';
$output .= '<p>' . t('Provides a central transliteration service to other Drupal modules, and sanitizes file names while uploading.') . '</p>';
$output .= '<p>' . t('Provides one-way string transliteration (romanization) and cleans file names during upload by replacing unwanted characters..') . '</p>';
$output .= '<p>' . t('Generally spoken, it takes Unicode text and tries to represent it in US-ASCII characters (universally displayable, unaccented characters) by attempting to transliterate the pronunciation expressed by the text in some other writing system to Roman letters..') . '</p>';
$output .= '<p>' . t('According to Unidecode, from which most of the transliteration data has been derived, "Russian and Greek seem to work passably. But it works quite bad on Japanese and Thai.".') . '</p>';
$output .= '<h3>' . t('Usage') . '</h3>';
$output .= '<p>' . t("If you are installing to an existing Drupal site, you might want to fix existing file names after installation, which will update all file names containing non-ASCII characters. However, if you have manually entered links to those files in any contents, these links will break since the original files are renamed. Therefore it is a good idea to test the conversion first on a copy of your web site. You'll find the retroactive conversion at Configuration and modules >> Media >> File system >> Transliteration.") . '</p>';
$output .= '<p>' . t("This project doesn't require special permissions.") . '</p>';
$output .= '<p>' . t('This project can be configured from the File system configuration page (Configuration and modules » Media » File system » Settings).') . '</p>';
$output .= '<p>' . t('Transliterate file names during upload: If you need more control over the resulting file names you might want to disable this feature here and install the FileField Paths (http://drupal.org/project/filefield_paths) instead.') . '</p>';
$output .= '<p>' . t('Lowercase transliterated file names: It is recommended to enable this option to prevent issues with case-insensitive file systems.') . '</p>';
return $output;
}
}
/**
* Filter tips callback for the transliteration filter.
*/
function _transliteration_filter_tips($filter, $format, $long) {
return t('Non-latin text (e.g., å, ö, 漢) will be converted to US-ASCII equivalents (a, o, ?).');
}
Functions
Name | Description |
---|---|
transliteration_clean_filename | Transliterates and sanitizes a file name. |
transliteration_file_presave | Implements hook_file_presave(). |
transliteration_filter_info | Implements hook_filter_info(). |
transliteration_form_search_admin_settings_alter | Implements hook_form_FORM_ID_alter(). |
transliteration_form_system_file_system_settings_alter | Implements hook_form_FORM_ID_alter(). |
transliteration_get | Transliterates text. |
transliteration_help | Implements hook_help(). |
transliteration_init | Implements hook_init(). |
transliteration_menu | Implements hook_menu(). |
transliteration_search_preprocess | Implements hook_search_preprocess(). |
_transliteration_filter_process | Process callback for the transliteration filter. |
_transliteration_filter_settings | Settings callback for the transliteration filter. |
_transliteration_filter_tips | Filter tips callback for the transliteration filter. |