strip_utf8mb4.module in Strip 4-byte UTF8 7
Allow users to Strip 4-byte UTF8 characters. overly long 2 byte sequences, as well as characters above U+10000, and reject overly long 3 byte sequences and UTF-16
File
strip_utf8mb4.moduleView source
<?php
/**
* @file
* Allow users to Strip 4-byte UTF8 characters. overly long 2 byte sequences, as well as characters above U+10000, and reject overly long 3 byte sequences and UTF-16
*/
/**
* Implements hook_menu().
*/
function strip_utf8mb4_menu() {
$items = array();
$items['admin/config/content/strip_utf8mb4'] = array(
'type' => MENU_NORMAL_ITEM,
'title' => 'Strip 4-byte UTF8',
'description' => t('Configure text fields to reject overly long 2 byte sequences, as well as characters above U+10000, reject overly long 3 byte sequences and UTF-16.'),
'page callback' => 'drupal_get_form',
'page arguments' => array(
'strip_utf8mb4_configuration_form',
),
'access arguments' => array(
'administer site configuration',
),
'file' => 'strip_utf8mb4.admin.inc',
);
return $items;
}
/**
* Implement hook_webform_submission_presave().
*/
function strip_utf8mb4_webform_submission_presave($node, &$submission) {
$components = $node->webform['components'];
// The submission has no data about the type of the values, so we go through
// the componen saved in the node.
foreach ($components as $cid => $component) {
if (isset($component['type']) && _webform_strip_utf8mb4_for($component['type'])) {
$component_value =& $submission->data[$cid]['value'];
if (isset($component_value)) {
if (is_array($component_value) && count($component_value) > 0) {
foreach ($component_value as &$value) {
$value = _strip_utf8mb4_for_text_fields($value, variable_get('strip_utf8mb4_replace_string', '--'));
}
}
elseif (is_string($component_value) && $component_value != '') {
$component_value = _strip_utf8mb4_for_text_fields($component_value, variable_get('strip_utf8mb4_replace_string', '--'));
}
}
}
}
}
/**
* Implements hook_field_attach_presave().
*/
function strip_utf8mb4_field_attach_presave($entity_type, $entity) {
list($id, $vid, $bundle) = entity_extract_ids($entity_type, $entity);
foreach (field_info_instances($entity_type, $bundle) as $instance) {
if (isset($instance['widget']['type']) && _strip_utf8mb4_for($instance['widget']['type'])) {
// Grap the entity metadata wrapper for this field.
$entity_wrapper = entity_metadata_wrapper($entity_type, $entity);
// Grap the filed name from the instance.
$field_name = $instance['field_name'];
// Get text field values.
$text_field_data = $entity_wrapper->{$field_name}
->value();
// If we do have data in the field.
if (is_array($text_field_data) && count($text_field_data) > 0) {
// Reject not utf8 strings for the field value.
if (isset($text_field_data['value'])) {
$text_field_data['value'] = _strip_utf8mb4_for_text_fields($text_field_data['value'], variable_get('strip_utf8mb4_replace_string', '--'));
}
else {
foreach ($text_field_data as $text_field_data_item_key => $text_field_data_item) {
$text_field_data[$text_field_data_item_key] = _strip_utf8mb4_for_text_fields($text_field_data_item, variable_get('strip_utf8mb4_replace_string', '--'));
}
}
// Reject not utf8 strings for the field summary if we do have.
if (isset($text_field_data['summary'])) {
$text_field_data['summary'] = _strip_utf8mb4_for_text_fields($text_field_data['summary'], variable_get('strip_utf8mb4_replace_string', '--'));
}
// Save the filtered field data in the entity object.
$entity_wrapper->{$field_name}
->set($text_field_data);
}
elseif (is_string($text_field_data) && $text_field_data != '') {
$text_field_data = _strip_utf8mb4_for_text_fields($text_field_data, variable_get('strip_utf8mb4_replace_string', '--'));
// Save the filtered field data in the entity object.
$entity_wrapper->{$field_name} = $text_field_data;
}
}
}
// If we want to strip none utf8 from Drupal core node's title.
if (isset($entity->title) && _strip_utf8mb4_for('core_title')) {
$entity->title = _strip_utf8mb4_for_text_fields($entity->title, variable_get('strip_utf8mb4_replace_string', '--'));
}
}
/**
* Return TRUE if the text field widget type is enabled from the configurations.
*
* @param string $field_widget_type
*
* @return bool
*/
function _strip_utf8mb4_for($field_widget_type) {
// Get list of text filed widget types to be filterd or replaced.
$strip_utf8mb4_for = variable_get('strip_utf8mb4_for_text_field_widget_types', array(
'text_textfield',
'text_textarea',
'text_textarea_with_summary',
'core_title',
));
return in_array($field_widget_type, $strip_utf8mb4_for, TRUE);
}
/**
* Return TRUE if the component type is enabled from the configurations.
*
* @param string $component_type
*
* @return bool
*/
function _webform_strip_utf8mb4_for($component_type) {
// Get list of text filed widget types to be filterd or replaced.
$strip_utf8mb4_for = variable_get('webform_strip_utf8mb4_for_component_types', array(
'textfield',
'textarea',
));
return in_array($component_type, $strip_utf8mb4_for, TRUE);
}
/**
* Retern the processed text which the none utf8 characters has been replaced.
*
* @param string $text_data
* @param string $replace_text
*
* @return string
*/
function _strip_utf8mb4_for_text_fields($text_data, $replace_text = '') {
$replacements_done = array();
// Strip overly long 2 byte sequences, as well as characters
// above U+10000 and replace with $replace_text
$processed_text_data = preg_replace('/[\\x00-\\x08\\x10\\x0B\\x0C\\x0E-\\x19\\x7F]' . '|[\\x00-\\x7F][\\x80-\\xBF]+' . '|([\\xC0\\xC1]|[\\xF0-\\xFF])[\\x80-\\xBF]*' . '|[\\xC2-\\xDF]((?![\\x80-\\xBF])|[\\x80-\\xBF]{2,})' . '|[\\xE0-\\xEF](([\\x80-\\xBF](?![\\x80-\\xBF]))|(?![\\x80-\\xBF]{2})|[\\x80-\\xBF]{3,})/S', $replace_text, $text_data, -1, $replacements_done[]);
// Strip overly long 3 byte sequences and UTF-16 surrogates and replace with $replace_text
$processed_text_data = preg_replace('/\\xE0[\\x80-\\x9F][\\x80-\\xBF]' . '|\\xED[\\xA0-\\xBF][\\x80-\\xBF]/S', $replace_text, $processed_text_data, -1, $replacements_done[]);
if (array_sum($replacements_done) > 0) {
$message = t('Unsupported characters in your text were replaced with "!replacement"', array(
'!replacement' => $replace_text,
));
drupal_set_message($message, 'warning', FALSE);
}
return $processed_text_data;
}
Functions
Name![]() |
Description |
---|---|
strip_utf8mb4_field_attach_presave | Implements hook_field_attach_presave(). |
strip_utf8mb4_menu | Implements hook_menu(). |
strip_utf8mb4_webform_submission_presave | Implement hook_webform_submission_presave(). |
_strip_utf8mb4_for | Return TRUE if the text field widget type is enabled from the configurations. |
_strip_utf8mb4_for_text_fields | Retern the processed text which the none utf8 characters has been replaced. |
_webform_strip_utf8mb4_for | Return TRUE if the component type is enabled from the configurations. |