function ARC_erdf_parser::parse_web_file in Taxonomy import/export via XML 6
Same name and namespace in other branches
- 5.2 arc/ARC_erdf_parser.php \ARC_erdf_parser::parse_web_file()
- 5 arc/ARC_erdf_parser.php \ARC_erdf_parser::parse_web_file()
- 6.2 arc/ARC_erdf_parser.php \ARC_erdf_parser::parse_web_file()
File
- arc/
ARC_erdf_parser.php, line 147
Class
Code
function parse_web_file($url = "", $redir_count = 0) {
if (!isset($this->init_args["base"])) {
$this->init_args["base"] = $url;
}
$this
->init(false);
if (!$url) {
$url = $this->full_base;
}
if ($url) {
if ($redir_count) {
$this->parsed_url = $url;
}
/* http method */
$http_method = isset($this->init_args["http_method"]) ? $this->init_args["http_method"] : "GET";
$url_parts = parse_url($url);
if (!isset($url_parts["port"])) {
$url_parts["port"] = 80;
}
if (isset($url_parts["user"]) && strlen($url_parts["user"]) || isset($this->init_args["proxy_host"]) && $this->init_args["proxy_host"] && isset($this->init_args["proxy_port"]) && $this->init_args["proxy_port"]) {
$http_code = $http_method . ' ' . $url . ' HTTP/1.0' . "\r\n";
}
else {
$http_code = $http_method . ' ';
$http_code .= isset($url_parts["path"]) ? $url_parts["path"] : '/';
$http_code .= isset($url_parts["query"]) && strlen($url_parts["query"]) ? "?" . $url_parts["query"] : "";
$http_code .= isset($url_parts["fragment"]) && strlen($url_parts["fragment"]) ? "#" . $url_parts["fragment"] : "";
$http_code .= ' HTTP/1.0' . "\r\n";
}
/* custom headers */
if (isset($this->init_args["headers"]) && ($headers = $this->init_args["headers"])) {
for ($i = 0, $i_max = count($headers); $i < $i_max; $i++) {
$http_code .= $headers[$i] . "\r\n";
}
}
if (strpos($http_code, "Host: ") === false) {
$http_code .= 'Host: ' . $url_parts["host"] . "\r\n";
}
if (strpos($http_code, "Accept: ") === false) {
$http_code .= 'Accept: text/html; q=0.9, */*; q=0.1' . "\r\n";
}
if (strpos($http_code, "User-Agent: ") === false) {
$http_code .= 'User-Agent: ARC eRDF Parser v' . $this->version . ' (http://arc.web-semantics.org/)' . "\r\n";
}
$http_code .= "\r\n";
/* socket */
if (isset($this->init_args["proxy_host"]) && $this->init_args["proxy_host"] && isset($this->init_args["proxy_port"]) && $this->init_args["proxy_port"]) {
$fp = @fsockopen($this->init_args["proxy_host"], $this->init_args["proxy_port"]);
$server_str = $this->init_args["proxy_host"] . ":" . $this->init_args["proxy_port"];
}
else {
$fp = @fsockopen($url_parts["host"], $url_parts["port"]);
$server_str = $url_parts["host"] . ":" . $url_parts["port"];
}
if (!$fp) {
return array(
"error" => "Socket error: could not connect to server '" . $server_str . "'",
"result" => "",
);
}
else {
$redirect = false;
fputs($fp, $http_code);
/* http-headers */
$cur_line = fgets($fp, 256);
/* 304/4xx/5xx handling */
if (preg_match("/^HTTP[^\\s]+\\s+([0-9]{1})([0-9]{2})(.*)\$/i", trim($cur_line), $matches)) {
$code_1 = $matches[1];
$code_2 = $matches[2];
$msg = trim($matches[3]);
if (in_array($code_1, array(
"4",
"5",
))) {
return $code_1 . $code_2 . " " . $msg;
}
if ($code_1 . $code_2 == "304") {
return $code_1 . $code_2 . " " . $msg;
}
$redirect = $code_1 == "3" ? true : false;
}
while (!feof($fp) && trim($cur_line)) {
$this->result_headers[] = $cur_line;
if ($this->encoding == "auto" && strpos(strtolower($cur_line), "content-type") !== false) {
if (strpos(strtolower($cur_line), "utf-8")) {
$this->encoding = "UTF-8";
}
elseif (strpos(strtolower($cur_line), "iso-8859-1")) {
$this->encoding = "ISO-8859-1";
}
elseif (strpos(strtolower($cur_line), "us-ascii")) {
$this->encoding = "US-ASCII";
}
}
/* 3xx handling */
if ($redirect && preg_match("/^Location:\\s*(http.*)\$/i", $cur_line, $matches)) {
fclose($fp);
unset($this->encoding);
unset($this->init_args["base"]);
return $redir_count > 3 ? $cur_line : $this
->parse_web_file(trim($matches[1]), $redir_count + 1);
}
$cur_line = fgets($fp, 256);
}
/* first lines of body to detect encoding */
$pre_data = fread($fp, 512);
if ($this->encoding == "auto" && preg_match("/\\<\\?xml .* encoding(.+).*\\?\\>/", $pre_data, $matches)) {
$cur_match = $matches[1];
if (strpos(strtolower($cur_match), "utf-8")) {
$this->encoding = "UTF-8";
}
elseif (strpos(strtolower($cur_match), "iso-8859-1")) {
$this->encoding = "ISO-8859-1";
}
elseif (strpos(strtolower($cur_match), "us-ascii")) {
$this->encoding = "US-ASCII";
}
}
if ($this->encoding == "auto") {
$this->encoding = "UTF-8";
}
$this
->create_parser();
/* body */
while ($data = $pre_data . fread($fp, 4096)) {
$started = true;
$pre_data = "";
if ($this->save_data) {
$this->data .= $data;
}
if (!($success = xml_parse($this->parser, $data, feof($fp)))) {
$error_str = xml_error_string(xml_get_error_code($this->parser));
$line = xml_get_current_line_number($this->parser);
fclose($fp);
xml_parser_free($this->parser);
return array(
"error" => "XML error: '" . $error_str . "' at line " . $line . "\n",
"result" => "",
);
}
}
$this->target_encoding = xml_parser_get_option($this->parser, XML_OPTION_TARGET_ENCODING);
xml_parser_free($this->parser);
fclose($fp);
}
}
return $this
->done();
}