You are here

function ARC_erdf_parser::parse_web_file in Taxonomy import/export via XML 5

Same name and namespace in other branches
  1. 5.2 arc/ARC_erdf_parser.php \ARC_erdf_parser::parse_web_file()
  2. 6.2 arc/ARC_erdf_parser.php \ARC_erdf_parser::parse_web_file()
  3. 6 arc/ARC_erdf_parser.php \ARC_erdf_parser::parse_web_file()

File

arc/ARC_erdf_parser.php, line 147

Class

ARC_erdf_parser

Code

function parse_web_file($url = "", $redir_count = 0) {
  if (!isset($this->init_args["base"])) {
    $this->init_args["base"] = $url;
  }
  $this
    ->init(false);
  if (!$url) {
    $url = $this->full_base;
  }
  if ($url) {
    if ($redir_count) {
      $this->parsed_url = $url;
    }

    /* http method */
    $http_method = isset($this->init_args["http_method"]) ? $this->init_args["http_method"] : "GET";
    $url_parts = parse_url($url);
    if (!isset($url_parts["port"])) {
      $url_parts["port"] = 80;
    }
    if (isset($url_parts["user"]) && strlen($url_parts["user"]) || isset($this->init_args["proxy_host"]) && $this->init_args["proxy_host"] && isset($this->init_args["proxy_port"]) && $this->init_args["proxy_port"]) {
      $http_code = $http_method . ' ' . $url . ' HTTP/1.0' . "\r\n";
    }
    else {
      $http_code = $http_method . ' ';
      $http_code .= isset($url_parts["path"]) ? $url_parts["path"] : '/';
      $http_code .= isset($url_parts["query"]) && strlen($url_parts["query"]) ? "?" . $url_parts["query"] : "";
      $http_code .= isset($url_parts["fragment"]) && strlen($url_parts["fragment"]) ? "#" . $url_parts["fragment"] : "";
      $http_code .= ' HTTP/1.0' . "\r\n";
    }

    /* custom headers */
    if (isset($this->init_args["headers"]) && ($headers = $this->init_args["headers"])) {
      for ($i = 0, $i_max = count($headers); $i < $i_max; $i++) {
        $http_code .= $headers[$i] . "\r\n";
      }
    }
    if (strpos($http_code, "Host: ") === false) {
      $http_code .= 'Host: ' . $url_parts["host"] . "\r\n";
    }
    if (strpos($http_code, "Accept: ") === false) {
      $http_code .= 'Accept: text/html; q=0.9, */*; q=0.1' . "\r\n";
    }
    if (strpos($http_code, "User-Agent: ") === false) {
      $http_code .= 'User-Agent: ARC eRDF Parser v' . $this->version . ' (http://arc.web-semantics.org/)' . "\r\n";
    }
    $http_code .= "\r\n";

    /* socket */
    if (isset($this->init_args["proxy_host"]) && $this->init_args["proxy_host"] && isset($this->init_args["proxy_port"]) && $this->init_args["proxy_port"]) {
      $fp = @fsockopen($this->init_args["proxy_host"], $this->init_args["proxy_port"]);
      $server_str = $this->init_args["proxy_host"] . ":" . $this->init_args["proxy_port"];
    }
    else {
      $fp = @fsockopen($url_parts["host"], $url_parts["port"]);
      $server_str = $url_parts["host"] . ":" . $url_parts["port"];
    }
    if (!$fp) {
      return array(
        "error" => "Socket error: could not connect to server '" . $server_str . "'",
        "result" => "",
      );
    }
    else {
      $redirect = false;
      fputs($fp, $http_code);

      /* http-headers */
      $cur_line = fgets($fp, 256);

      /* 304/4xx/5xx handling */
      if (preg_match("/^HTTP[^\\s]+\\s+([0-9]{1})([0-9]{2})(.*)\$/i", trim($cur_line), $matches)) {
        $code_1 = $matches[1];
        $code_2 = $matches[2];
        $msg = trim($matches[3]);
        if (in_array($code_1, array(
          "4",
          "5",
        ))) {
          return $code_1 . $code_2 . " " . $msg;
        }
        if ($code_1 . $code_2 == "304") {
          return $code_1 . $code_2 . " " . $msg;
        }
        $redirect = $code_1 == "3" ? true : false;
      }
      while (!feof($fp) && trim($cur_line)) {
        $this->result_headers[] = $cur_line;
        if ($this->encoding == "auto" && strpos(strtolower($cur_line), "content-type") !== false) {
          if (strpos(strtolower($cur_line), "utf-8")) {
            $this->encoding = "UTF-8";
          }
          elseif (strpos(strtolower($cur_line), "iso-8859-1")) {
            $this->encoding = "ISO-8859-1";
          }
          elseif (strpos(strtolower($cur_line), "us-ascii")) {
            $this->encoding = "US-ASCII";
          }
        }

        /* 3xx handling */
        if ($redirect && preg_match("/^Location:\\s*(http.*)\$/i", $cur_line, $matches)) {
          fclose($fp);
          unset($this->encoding);
          unset($this->init_args["base"]);
          return $redir_count > 3 ? $cur_line : $this
            ->parse_web_file(trim($matches[1]), $redir_count + 1);
        }
        $cur_line = fgets($fp, 256);
      }

      /* first lines of body to detect encoding */
      $pre_data = fread($fp, 512);
      if ($this->encoding == "auto" && preg_match("/\\<\\?xml .* encoding(.+).*\\?\\>/", $pre_data, $matches)) {
        $cur_match = $matches[1];
        if (strpos(strtolower($cur_match), "utf-8")) {
          $this->encoding = "UTF-8";
        }
        elseif (strpos(strtolower($cur_match), "iso-8859-1")) {
          $this->encoding = "ISO-8859-1";
        }
        elseif (strpos(strtolower($cur_match), "us-ascii")) {
          $this->encoding = "US-ASCII";
        }
      }
      if ($this->encoding == "auto") {
        $this->encoding = "UTF-8";
      }
      $this
        ->create_parser();

      /* body */
      while ($data = $pre_data . fread($fp, 4096)) {
        $started = true;
        $pre_data = "";
        if ($this->save_data) {
          $this->data .= $data;
        }
        if (!($success = xml_parse($this->parser, $data, feof($fp)))) {
          $error_str = xml_error_string(xml_get_error_code($this->parser));
          $line = xml_get_current_line_number($this->parser);
          fclose($fp);
          xml_parser_free($this->parser);
          return array(
            "error" => "XML error: '" . $error_str . "' at line " . $line . "\n",
            "result" => "",
          );
        }
      }
      $this->target_encoding = xml_parser_get_option($this->parser, XML_OPTION_TARGET_ENCODING);
      xml_parser_free($this->parser);
      fclose($fp);
    }
  }
  return $this
    ->done();
}