You are here

function ARC_rdfxml_parser::parse_web_file in Taxonomy import/export via XML 5.2

Same name and namespace in other branches
  1. 5 arc/ARC_rdfxml_parser.php \ARC_rdfxml_parser::parse_web_file()
  2. 6.2 arc/ARC_rdfxml_parser.php \ARC_rdfxml_parser::parse_web_file()
  3. 6 arc/ARC_rdfxml_parser.php \ARC_rdfxml_parser::parse_web_file()

File

arc/ARC_rdfxml_parser.php, line 837

Class

ARC_rdfxml_parser

Code

function parse_web_file($url = "", $redir_count = 0) {
  if (!isset($this->init_args["base"]) || !$this->init_args["base"]) {
    $this->init_args["base"] = $url;
  }
  $this
    ->init(false);
  if (!$url) {
    $url = $this->full_base;
  }
  if ($url) {
    if ($redir_count) {
      $this->parsed_url = $url;
    }

    /* http method */
    $http_method = isset($this->init_args["http_method"]) ? $this->init_args["http_method"] : "GET";
    $url_parts = parse_url($url);
    if (!isset($url_parts["port"])) {
      $url_parts["port"] = 80;
    }
    if (isset($url_parts["user"]) && strlen($url_parts["user"]) || $this->init_args["proxy_host"] && $this->init_args["proxy_port"]) {
      $http_code = $http_method . ' ' . $url . ' HTTP/1.0' . "\r\n";
    }
    else {
      $http_code = $http_method . ' ' . $url_parts["path"];
      $http_code .= isset($url_parts["query"]) && strlen($url_parts["query"]) ? "?" . $url_parts["query"] : "";
      $http_code .= isset($url_parts["fragment"]) && strlen($url_parts["fragment"]) ? "#" . $url_parts["fragment"] : "";
      $http_code .= ' HTTP/1.0' . "\r\n";
    }

    /* custom headers */
    if ($headers = $this->init_args["headers"]) {
      for ($i = 0, $i_max = count($headers); $i < $i_max; $i++) {
        $http_code .= $headers[$i] . "\r\n";
      }
    }
    if (strpos($http_code, "Host: ") === false) {
      $http_code .= 'Host: ' . $url_parts["host"] . "\r\n";
    }
    if (strpos($http_code, "Accept: ") === false) {
      $http_code .= 'Accept: application/rdf+xml; q=0.9, */*; q=0.1' . "\r\n";
    }
    if (strpos($http_code, "User-Agent: ") === false) {
      $ua_string = $this->init_args["user_agent"] ? $this->init_args["user_agent"] : "ARC RDF/XML Parser v" . $this->version . " (http://www.appmosphere.com/en-arc_rdfxml_parser)";
      $http_code .= 'User-Agent: ' . $ua_string . "\r\n";
    }
    $http_code .= "\r\n";

    /* socket */
    if ($this->init_args["proxy_host"] && $this->init_args["proxy_port"]) {
      $fp = @fsockopen($this->init_args["proxy_host"], $this->init_args["proxy_port"]);
      $server_str = $this->init_args["proxy_host"] . ":" . $this->init_args["proxy_port"];
    }
    else {
      $fp = @fsockopen($url_parts["host"], $url_parts["port"]);
      $server_str = $url_parts["host"] . ":" . $url_parts["port"];
    }
    if (!$fp) {
      return "Socket error: could not connect to server '" . $server_str . "'";
    }
    else {
      fputs($fp, $http_code);

      /* http-headers */
      $cur_line = fgets($fp, 256);

      /* 304/4xx/5xx handling */
      if (preg_match("/^HTTP[^\\s]+\\s+([0-9]{1})([0-9]{2})(.*)\$/i", trim($cur_line), $matches)) {
        $code_1 = $matches[1];
        $code_2 = $matches[2];
        $msg = trim($matches[3]);
        if (in_array($code_1, array(
          "4",
          "5",
        ))) {
          return $code_1 . $code_2 . " " . $msg;
        }
        if ($code_1 . $code_2 == "304") {
          return $code_1 . $code_2 . " " . $msg;
        }
        $redirect = $code_1 == "3" ? true : false;
      }
      while (!feof($fp) && trim($cur_line)) {
        $this->result_headers[] = $cur_line;
        if ($this->encoding == "auto" && strpos(strtolower($cur_line), "content-type") !== false) {
          if (strpos(strtolower($cur_line), "utf-8")) {
            $this->encoding = "UTF-8";
          }
          elseif (strpos(strtolower($cur_line), "iso-8859-1")) {
            $this->encoding = "ISO-8859-1";
          }
          elseif (strpos(strtolower($cur_line), "us-ascii")) {
            $this->encoding = "US-ASCII";
          }
        }

        /* 3xx handling */
        if ($redirect && preg_match("/^Location:\\s*(http.*)\$/i", $cur_line, $matches)) {
          fclose($fp);
          unset($this->encoding);
          unset($this->init_args["base"]);
          return $redir_count > 3 ? $cur_line : $this
            ->parse_web_file(trim($matches[1]), $redir_count + 1);
        }
        $cur_line = fgets($fp, 256);
      }

      /* first lines of body to detect encoding */
      $pre_data = fread($fp, 512);
      if ($this->encoding == "auto" && preg_match("/\\<\\?xml .* encoding(.+).*\\?\\>/", $pre_data, $matches)) {
        $cur_match = $matches[1];
        if (strpos(strtolower($cur_match), "utf-8")) {
          $this->encoding = "UTF-8";
        }
        elseif (strpos(strtolower($cur_match), "iso-8859-1")) {
          $this->encoding = "ISO-8859-1";
        }
        elseif (strpos(strtolower($cur_match), "us-ascii")) {
          $this->encoding = "US-ASCII";
        }
      }
      if ($this->encoding == "auto") {
        $this->encoding = "UTF-8";
      }
      $this
        ->create_parser();

      /* body */
      $max_lns = $this->max_lines;
      while (($data = $pre_data . fread($fp, 4096)) && ($max_lns === 0 || xml_get_current_line_number($this->parser) <= $max_lns)) {
        $started = true;
        $pre_data = "";
        if ($this->save_data) {
          $this->data .= $data;
        }
        if (!($success = xml_parse($this->parser, $data, feof($fp)))) {
          $error_str = xml_error_string(xml_get_error_code($this->parser));
          $line = xml_get_current_line_number($this->parser);
          fclose($fp);
          xml_parser_free($this->parser);
          return "XML error: '" . $error_str . "' at line " . $line . "\n";
        }
      }
      $this->target_encoding = xml_parser_get_option($this->parser, XML_OPTION_TARGET_ENCODING);
      xml_parser_free($this->parser);
      fclose($fp);
      $this
        ->done();
    }
  }
  return $this->triples;
}