ARC_ntriples_serializer.php in Taxonomy import/export via XML 5.2
File
arc/ARC_ntriples_serializer.php
View source
<?php
class ARC_ntriples_serializer {
function ARC_ntriples_serializer($args = "") {
$this->spacer = " ";
$this->linebreak = "\r\n";
if (is_array($args)) {
foreach ($args as $k => $v) {
$this->{$k} = $v;
}
}
}
function str2unicode_nfc($str) {
$result = "";
$tmp = str_replace("?", "", $str);
if (strpos(utf8_decode($tmp), "?") === false) {
$str = utf8_decode($str);
}
for ($i = 0, $i_max = strlen($str); $i < $i_max; $i++) {
$nr = 0;
$char = $str[$i];
$utf8_char = utf8_encode($char);
$bytes = strlen($utf8_char);
if ($bytes == 1) {
$nr = ord($utf8_char);
}
elseif ($bytes == 2) {
$nr = (ord($utf8_char[0]) - 192) * 64 + (ord($utf8_char[1]) - 128);
}
elseif ($bytes == 3) {
$nr = (ord($utf8_char[0]) - 224) * 4096 + (ord($utf8_char[1]) - 128) * 64 + (ord($utf8_char[2]) - 128);
}
elseif ($bytes == 4) {
$nr = (ord($utf8_char[0]) - 240) * 262144 + (ord($utf8_char[1]) - 128) * 4096 + (ord($utf8_char[2]) - 128) * 64 + (ord($utf8_char[3]) - 128);
}
if ($nr < 9) {
$result .= "\\u" . sprintf("%04X", $nr);
}
elseif ($nr == 9) {
$result .= '\\t';
}
elseif ($nr == 10) {
$result .= '\\n';
}
elseif ($nr < 13) {
$result .= "\\u" . sprintf("%04X", $nr);
}
elseif ($nr == 13) {
$result .= '\\t';
}
elseif ($nr < 32) {
$result .= "\\u" . sprintf("%04X", $nr);
}
elseif ($nr < 34) {
$result .= $char;
}
elseif ($nr == 34) {
$result .= '\\"';
}
elseif ($nr < 92) {
$result .= $char;
}
elseif ($nr == 92) {
$result .= '\\';
}
elseif ($nr < 127) {
$result .= $char;
}
elseif ($nr < 65536) {
$result .= "\\u" . sprintf("%04X", $nr);
}
elseif ($nr < 1114112) {
$result .= "\\U" . sprintf("%08X", $nr);
}
else {
}
}
return $result;
}
function get_ntriples($triples) {
$spacer = $this->spacer;
$linebreak = $this->linebreak;
$result = "";
if (is_array($triples)) {
for ($i = 0, $i_max = count($triples); $i < $i_max; $i++) {
$cur_t = $triples[$i];
$s = $cur_t["s"];
$s_type = $s["type"];
if ($s_type === "uri") {
$result .= '<' . $this
->str2unicode_nfc($s["uri"]) . '>';
}
elseif ($s_type === "bnode") {
$result .= $s["bnode_id"];
}
$result .= $spacer;
$p = $cur_t["p"];
$result .= '<' . $p . '>';
$result .= $spacer;
$o = $cur_t["o"];
$o_type = $o["type"];
if ($o_type === "uri") {
$result .= '<' . $this
->str2unicode_nfc($o["uri"]) . '>';
}
elseif ($o_type === "bnode") {
$result .= $o["bnode_id"];
}
elseif ($o_type === "literal") {
$result .= '"' . $this
->str2unicode_nfc($o["val"]) . '"';
if ($dt = $o["dt"]) {
$result .= "^^<" . $dt . ">";
}
elseif ($lang = $o["lang"]) {
$result .= "@" . $lang;
}
}
$result .= " ." . $linebreak;
}
}
return $result;
}
}