function ARC_ntriples_serializer::str2unicode_nfc in Taxonomy import/export via XML 6
Same name and namespace in other branches
- 5.2 arc/ARC_ntriples_serializer.php \ARC_ntriples_serializer::str2unicode_nfc()
- 5 arc/ARC_ntriples_serializer.php \ARC_ntriples_serializer::str2unicode_nfc()
- 6.2 arc/ARC_ntriples_serializer.php \ARC_ntriples_serializer::str2unicode_nfc()
1 call to ARC_ntriples_serializer::str2unicode_nfc()
File
- arc/
ARC_ntriples_serializer.php, line 38
Class
Code
function str2unicode_nfc($str) {
$result = "";
/* try to detect encoding */
$tmp = str_replace("?", "", $str);
if (strpos(utf8_decode($tmp), "?") === false) {
$str = utf8_decode($str);
}
for ($i = 0, $i_max = strlen($str); $i < $i_max; $i++) {
$nr = 0;
/* unicode dec nr */
/* char */
$char = $str[$i];
/* utf8 binary */
$utf8_char = utf8_encode($char);
$bytes = strlen($utf8_char);
if ($bytes == 1) {
/* 0####### (0-127) */
$nr = ord($utf8_char);
}
elseif ($bytes == 2) {
/* 110##### 10###### = 192+x 128+x */
$nr = (ord($utf8_char[0]) - 192) * 64 + (ord($utf8_char[1]) - 128);
}
elseif ($bytes == 3) {
/* 1110#### 10###### 10###### = 224+x 128+x 128+x */
$nr = (ord($utf8_char[0]) - 224) * 4096 + (ord($utf8_char[1]) - 128) * 64 + (ord($utf8_char[2]) - 128);
}
elseif ($bytes == 4) {
/* 1111#### 10###### 10###### 10###### = 240+x 128+x 128+x 128+x */
$nr = (ord($utf8_char[0]) - 240) * 262144 + (ord($utf8_char[1]) - 128) * 4096 + (ord($utf8_char[2]) - 128) * 64 + (ord($utf8_char[3]) - 128);
}
/* result (see http://www.w3.org/TR/rdf-testcases/#ntrip_strings) */
if ($nr < 9) {
/* #x0-#x8 (0-8) */
$result .= "\\u" . sprintf("%04X", $nr);
}
elseif ($nr == 9) {
/* #x9 (9) */
$result .= '\\t';
}
elseif ($nr == 10) {
/* #xA (10) */
$result .= '\\n';
}
elseif ($nr < 13) {
/* #xB-#xC (11-12) */
$result .= "\\u" . sprintf("%04X", $nr);
}
elseif ($nr == 13) {
/* #xD (13) */
$result .= '\\t';
}
elseif ($nr < 32) {
/* #xE-#x1F (14-31) */
$result .= "\\u" . sprintf("%04X", $nr);
}
elseif ($nr < 34) {
/* #x20-#x21 (32-33) */
$result .= $char;
}
elseif ($nr == 34) {
/* #x22 (34) */
$result .= '\\"';
}
elseif ($nr < 92) {
/* #x23-#x5B (35-91) */
$result .= $char;
}
elseif ($nr == 92) {
/* #x5C (92) */
$result .= '\\';
}
elseif ($nr < 127) {
/* #x5D-#x7E (93-126) */
$result .= $char;
}
elseif ($nr < 65536) {
/* #x7F-#xFFFF (128-65535) */
$result .= "\\u" . sprintf("%04X", $nr);
}
elseif ($nr < 1114112) {
/* #x10000-#x10FFFF (65536-1114111) */
$result .= "\\U" . sprintf("%08X", $nr);
}
else {
/* other chars are not defined => ignore */
}
}
return $result;
}