RTLMaker.php in PersianTools 8
Contains \Drupal\persiantools\RTLMaker.
Namespace
Drupal\persiantoolsFile
src/RTLMaker.phpView source
<?php
/**
* @file
* Contains \Drupal\persiantools\RTLMaker.
*/
namespace Drupal\persiantools;
use Drupal\Component\Utility\Unicode;
class RTLMaker {
// Statment direction
const RTL = 0;
const LTR = 1;
// Persian digits
public static $FA_DIGITS = array(
'۰',
'۱',
'۲',
'۳',
'۴',
'۵',
'۶',
'۷',
'۸',
'۹',
);
// Characters type
const UN = 0;
// Unknown
const FA = 1;
const EN = 2;
const OPENING = 3;
const CLOSING = 4;
const DIGIT = 5;
const EOS = 6;
// End Of Statment
const SLASH = 7;
const WS = 8;
// White Space
static $OPENING_SYMS = array(
'(',
'{',
'[',
'"',
'\'',
);
static $STATMENT_END = array(
'.',
'!',
';',
'?',
':',
);
/**
* Main function for multiple features and fixes of persiantools module.
*/
static function convert_sm($str, $digit_method, $rtl_ltr_fix) {
$is_all_en = TRUE;
$any_en = FALSE;
$closing_ch = '\\0';
$paren_state = 0;
$dir = RTLMaker::RTL;
$len = Unicode::strlen($str);
for ($i = 0; $i < $len; $i = $i + 1) {
$ch = Unicode::substr($str, $i, 1);
// Skip unicode characters, which might be mistaken for english characters
// (e.g. ).
if ($ch == '&') {
for ($j = $i; $j < $len; $j = $j + 1) {
if (Unicode::substr($str, $j, 1) == ';') {
$i = $j;
continue 2;
}
elseif ($j > $i + 9) {
break;
}
}
}
$type = RTLMaker::get_char_type($ch);
if ($type == RTLMaker::FA) {
$is_all_en = FALSE;
}
elseif ($ch == $closing_ch) {
$type = RTLMaker::CLOSING;
}
elseif ($type == RTLMaker::UN) {
// Last char should go through anyway, to wrap things up.
if ($i != $len - 1) {
continue;
}
}
if ($rtl_ltr_fix) {
list($str, $changed) = RTLMaker::fix_mixed_path($str, $ch, $type, $i, $len);
if ($changed) {
$i += 5;
$len += 5;
}
}
switch ($type) {
case RTLMaker::DIGIT:
if ($digit_method == 'full' || $digit_method == 'smart' && $dir == RTLMaker::RTL && !($is_all_en && $any_en)) {
$new_digit = RTLMaker::$FA_DIGITS[$ch - '0'];
$str = Unicode::substr($str, 0, $i) . $new_digit . Unicode::substr($str, $i + 1);
$len += Unicode::strlen($new_digit) - 1;
$i += Unicode::strlen($new_digit) - 1;
}
break;
case RTLMaker::EN:
$dir = RTLMaker::LTR;
$any_en = TRUE;
break;
case RTLMaker::FA:
$dir = RTLMaker::RTL;
break;
case RTLMaker::OPENING:
$opening_pos = $i;
$paren_state = 1;
break;
}
// Fix misplaced enclosing chars, like paranthesis, bracket, quotation, ...
if ($rtl_ltr_fix) {
switch ($paren_state) {
case 1:
$pre_open = $dir;
$closing_ch = RTLMaker::get_closing_char($ch);
$paren_state = 2;
break;
case 2:
if ($type == RTLMaker::CLOSING) {
// Fix misplaced empty enclosing chars, like function calls, array
// access, ...
if ($dir == RTLMaker::LTR) {
$str = RTLMaker::insert_str($str, '‎', $i + 1);
$len += 5;
$i += 5;
}
$paren_state = 0;
}
elseif ($type == RTLMaker::EN || $type == RTLMaker::FA) {
$post_open = $dir;
$paren_state = 3;
}
break;
case 3:
if ($type == RTLMaker::CLOSING) {
$pre_close = $dir;
$paren_state = 4;
$closed_pos = $i;
}
break;
case 4:
if ($type == RTLMaker::EN || $type == RTLMaker::FA) {
$post_close = $dir;
$paren_state = 5;
}
break;
}
if ($paren_state == 4 && $i == $len - 1) {
$post_close = RTLMaker::RTL;
$paren_state = 5;
}
if ($paren_state == 5) {
if ($pre_open == $post_open) {
$open_dir = $pre_open;
}
else {
$open_dir = RTLMaker::RTL;
}
if ($pre_close == $post_close) {
$close_dir = $pre_close;
}
else {
$close_dir = RTLMaker::RTL;
}
if ($open_dir != $close_dir) {
if ($pre_open == RTLMaker::RTL) {
$str = RTLMaker::insert_str($str, '‏', $closed_pos);
$len += 5;
$i += 5;
}
elseif ($pre_open == RTLMaker::LTR) {
// ‪ lre (Left to Right Embedding).
// ‬: pdf (Pop Directional Formatting).
$str = RTLMaker::insert_str($str, '‪', $opening_pos);
$str = RTLMaker::insert_str($str, '‬', $closed_pos + 8);
$len += 14;
$i += 14;
}
$paren_state = 0;
}
}
// Fix misplaced dot in English Sentences inside RTL direction.
if ($is_all_en && $dir == RTLMaker::LTR && $type == RTLMaker::EOS) {
if ($i < $len - 1) {
$next_ch = Unicode::substr($str, $i + 1, 1);
$next_type = RTLMaker::get_char_type($next_ch);
if ($next_type == RTLMaker::EN) {
continue;
}
}
$str = RTLMaker::insert_str($str, '‎', $i + 1);
$i += 5;
$len += 5;
}
}
}
return $str;
}
/**
* Fix mixed-up paths in rtl blocks.
* Logic: Gets triggered once a starting '.' or '/' is detected after a whitespace.
* The correcting symbol is inserted once an english char is seen inside the path.
*/
static function fix_mixed_path($str, $ch, $type, $i, $len) {
static $maybe_path = TRUE;
static $is_path = FALSE;
static $path_pos = -1;
$changed = FALSE;
if ($i == 0) {
$maybe_path = TRUE;
}
if ($type == RTLMaker::WS) {
$maybe_path = TRUE;
}
elseif ($is_path) {
if ($type == RTLMaker::EN) {
$str = RTLMaker::insert_str($str, '‎', $path_pos);
$changed = TRUE;
}
$path_pos = -1;
$is_path = FALSE;
}
elseif ($maybe_path) {
if ($type == RTLMaker::SLASH) {
$is_path = TRUE;
if ($path_pos < 0) {
$path_pos = $i;
}
}
elseif ($ch == '.') {
if ($path_pos < 0) {
$path_pos = $i;
}
}
else {
$maybe_path = FALSE;
$is_path = FALSE;
$path_pos = -1;
}
}
// Detect trailing slashes in paths.
if ($type == RTLMaker::SLASH && $i > 0) {
$prev_ch = Unicode::substr($str, $i - 1, 1);
$prev_ch_type = RTLMaker::get_char_type($prev_ch);
$is_last_char = $i == $len - 1;
if ($prev_ch_type == RTLMaker::EN && ($is_last_char || Unicode::substr($str, $i + 1, 1) == ' ')) {
$str = RTLMaker::insert_str($str, '‎', $i + 1);
$changed = TRUE;
}
}
return array(
$str,
$changed,
);
}
/**
* Detects and returns a character's type.
*/
static function get_char_type($ch) {
if ($ch >= 'آ' && $ch <= 'ي' || $ch == 'ی') {
$type = RTLMaker::FA;
}
elseif ($ch >= 'a' && $ch <= 'z' || $ch >= 'A' && $ch <= 'Z') {
$type = RTLMaker::EN;
}
elseif (in_array($ch, RTLMaker::$OPENING_SYMS)) {
$type = RTLMaker::OPENING;
}
elseif ($ch >= '0' && $ch <= '9') {
$type = RTLMaker::DIGIT;
}
elseif (in_array($ch, RTLMaker::$STATMENT_END)) {
$type = RTLMaker::EOS;
}
elseif ($ch == '/') {
$type = RTLMaker::SLASH;
}
elseif ($ch == ' ' || $ch == '\\n') {
$type = RTLMaker::WS;
}
else {
// Type not detected.
$type = RTLMaker::UN;
}
return $type;
}
/**
* Returns the matching closing char for an opening char.
*/
static function get_closing_char($char) {
switch ($char) {
case '(':
return ')';
case '{':
return '}';
case '[':
return ']';
case '\'':
case '"':
return $char;
}
}
/**
* A simple function to insert a unicode char in a str.
*/
static function insert_str($str, $char, $pos) {
return Unicode::substr($str, 0, $pos) . $char . Unicode::substr($str, $pos);
}
}