You are here

function feeds_http_request in Feeds 7.2

Get the content from the given URL.

Parameters

string $url: A valid URL (not only web URLs).

array $options: (optional) An array that can have one or more of the following elements:

  • username: (string) If the URL uses authentication, supply the username.
  • password: (string) If the URL uses authentication, supply the password.
  • accept_invalid_cert: (bool) Whether to accept invalid certificates. Defaults to FALSE.
  • timeout: (integer) Timeout in seconds to wait for an HTTP get request to finish. Defaults to 30 seconds.
  • cache_http_result: (bool) Whether to cache the HTTP result. Defaults to TRUE.

Return value

object An object that describes the data downloaded from $url.

8 calls to feeds_http_request()
FeedsEnclosure::getContent in plugins/FeedsParser.inc
Downloads the content from the file URL.
FeedsFileHTTPTestCase::testHttpRequestUsingFileCache in tests/feeds_fetcher_http.test
Tests if the result of a http request can be cached on the file system.
FeedsFileHTTPTestCase::testNoRefetchOnSameRequest in tests/feeds_fetcher_http.test
Tests that the source isn't fetched twice during the same request.
FeedsFileHTTPTestCase::testRefetchWhenCachedFileIsRemoved in tests/feeds_fetcher_http.test
Tests if the source is refetched when the cached file is manually removed.
FeedsHTTPFetcherResult::getRaw in plugins/FeedsHTTPFetcher.inc
Overrides FeedsFetcherResult::getRaw().

... See full list

2 string references to 'feeds_http_request'
FeedsFileHTTPTestCase::testHttpRequestUsingFileCache in tests/feeds_fetcher_http.test
Tests if the result of a http request can be cached on the file system.
FeedsFileHTTPTestCase::testRefetchWhenCachedFileIsRemoved in tests/feeds_fetcher_http.test
Tests if the source is refetched when the cached file is manually removed.

File

libraries/http_request.inc, line 143
Download via HTTP.

Code

function feeds_http_request($url, array $options = array()) {
  $options += array(
    'username' => NULL,
    'password' => NULL,
    'accept_invalid_cert' => FALSE,
    'cache_http_result' => TRUE,
  );

  // Make sure a request timeout is set.
  if (empty($options['timeout'])) {
    $options['timeout'] = variable_get('http_request_timeout', 30);
  }

  // Intra-pagedownload cache, avoid to download the same content twice within
  // one page download (it's possible, compatible and parse calls).
  $cached_urls =& drupal_static(__FUNCTION__, array());
  if (!empty($cached_urls[$url])) {
    $cache = http_request_get_cache($url);
    if ($cache->data) {
      return $cache->data;
    }
  }
  if (!$options['username'] && valid_url($url, TRUE)) {

    // Handle password protected feeds.
    $url_parts = parse_url($url);
    if (!empty($url_parts['user'])) {
      $options['password'] = urldecode($url_parts['pass']);
      $options['username'] = urldecode($url_parts['user']);
    }
  }
  $curl = http_request_use_curl();

  // Only download and parse data if really needs refresh.
  // Based on "Last-Modified" and "If-Modified-Since".
  $headers = array();
  if ($options['cache_http_result'] && ($cache = http_request_get_cache($url))) {
    $last_result = $cache->data;
    $last_headers = array_change_key_case($last_result->headers);
    if (!empty($last_headers['etag'])) {
      if ($curl) {
        $headers[] = 'If-None-Match: ' . $last_headers['etag'];
      }
      else {
        $headers['If-None-Match'] = $last_headers['etag'];
      }
    }
    if (!empty($last_headers['last-modified'])) {
      if ($curl) {
        $headers[] = 'If-Modified-Since: ' . $last_headers['last-modified'];
      }
      else {
        $headers['If-Modified-Since'] = $last_headers['last-modified'];
      }
    }
    if (!empty($options['username']) && !$curl) {
      $headers['Authorization'] = 'Basic ' . base64_encode($options['username'] . ':' . $options['password']);
    }
  }

  // Support the 'feed' and 'webcal' schemes by converting them into 'http'.
  $url = strtr($url, array(
    'feed://' => 'http://',
    'webcal://' => 'http://',
  ));
  if ($curl) {
    $headers[] = 'User-Agent: Drupal (+http://drupal.org/)';
    $result = new stdClass();
    $result->headers = array();

    // Parse the URL and make sure we can handle the schema.
    // cURL can only support either http:// or https://.
    // CURLOPT_PROTOCOLS is only supported with cURL 7.19.4.
    $uri = parse_url($url);
    if ($uri === FALSE) {
      $result->error = 'unable to parse URL';
      $result->code = FEEDS_ERROR_PARSE_ERROR;
    }
    elseif (!isset($uri['scheme'])) {
      $result->error = 'missing schema';
      $result->code = FEEDS_ERROR_NO_SCHEME;
    }
    else {
      switch ($uri['scheme']) {
        case 'http':
        case 'https':

          // Valid scheme.
          break;
        default:
          $result->error = 'invalid schema ' . $uri['scheme'];
          $result->code = FEEDS_ERROR_INVALID_SCHEME;
          break;
      }
    }

    // If the scheme was valid, continue to request the feed using cURL.
    if (empty($result->error)) {
      $download = curl_init($url);
      curl_setopt($download, CURLOPT_FOLLOWLOCATION, TRUE);
      if (!empty($options['username'])) {
        curl_setopt($download, CURLOPT_USERPWD, $options['username'] . ':' . $options['password']);
        curl_setopt($download, CURLOPT_HTTPAUTH, CURLAUTH_ANY);
      }
      curl_setopt($download, CURLOPT_HTTPHEADER, $headers);
      curl_setopt($download, CURLOPT_HEADER, TRUE);
      curl_setopt($download, CURLOPT_RETURNTRANSFER, TRUE);
      curl_setopt($download, CURLOPT_ENCODING, '');
      curl_setopt($download, CURLOPT_TIMEOUT, $options['timeout']);
      $proxy_server = variable_get('proxy_server');
      if ($proxy_server && _drupal_http_use_proxy($uri['host'])) {
        curl_setopt($download, CURLOPT_PROXY, $proxy_server);
        curl_setopt($download, CURLOPT_PROXYPORT, variable_get('proxy_port', 8080));

        // Proxy user/password.
        if ($proxy_username = variable_get('proxy_username')) {
          $username_password = $proxy_username . ':' . variable_get('proxy_password', '');
          curl_setopt($download, CURLOPT_PROXYUSERPWD, $username_password);
          curl_setopt($download, CURLOPT_PROXYAUTH, variable_get('proxy_auth_method', CURLAUTH_BASIC));
        }
      }
      if ($options['accept_invalid_cert']) {
        curl_setopt($download, CURLOPT_SSL_VERIFYPEER, 0);
        curl_setopt($download, CURLOPT_SSL_VERIFYHOST, 0);
      }
      $header = '';
      $result->data = curl_exec($download);
      if (curl_error($download)) {
        throw new HRCurlException(t('cURL error (@code) @error for @url', array(
          '@code' => curl_errno($download),
          '@error' => curl_error($download),
          '@url' => $url,
        )), curl_errno($download));
      }

      // When using a proxy, remove extra data from the header which is not
      // considered by CURLINFO_HEADER_SIZE (possibly cURL bug).
      // This data is only added when to HTTP header when working with a proxy.
      // Example string added: <HTTP/1.0 200 Connection established\r\n\r\n>
      // This was fixed in libcurl version 7.30.0 (0x71e00) (April 12, 2013),
      // so this workaround only removes the proxy-added headers if we are using
      // an older version of libcurl.
      $curl_ver = curl_version();
      if ($proxy_server && $curl_ver['version_number'] < 0x71e00 && _drupal_http_use_proxy($uri['host'])) {
        $http_header_break = "\r\n\r\n";
        $response = explode($http_header_break, $result->data);
        if (count($response) > 2) {
          $result->data = substr($result->data, strlen($response[0] . $http_header_break), strlen($result->data));
        }
      }
      $header_size = curl_getinfo($download, CURLINFO_HEADER_SIZE);
      $header = substr($result->data, 0, $header_size - 1);
      $result->data = substr($result->data, $header_size);
      $headers = preg_split("/(\r\n){2}/", $header);
      $header_lines = preg_split("/\r\n|\n|\r/", end($headers));

      // Skip HTTP response status.
      array_shift($header_lines);
      while ($line = trim(array_shift($header_lines))) {
        list($header, $value) = explode(':', $line, 2);

        // Normalize the headers.
        $header = strtolower($header);
        if (isset($result->headers[$header]) && $header == 'set-cookie') {

          // RFC 2109: the Set-Cookie response header comprises the token Set-
          // Cookie:, followed by a comma-separated list of one or more cookies.
          $result->headers[$header] .= ',' . trim($value);
        }
        else {
          $result->headers[$header] = trim($value);
        }
      }
      $result->code = curl_getinfo($download, CURLINFO_HTTP_CODE);
      curl_close($download);
    }
  }
  else {
    $result = drupal_http_request($url, array(
      'headers' => $headers,
      'timeout' => $options['timeout'],
    ));
    $result->headers = isset($result->headers) ? $result->headers : array();
  }
  $result->code = isset($result->code) ? $result->code : 200;

  // In case of 304 Not Modified try to return cached data.
  if ($result->code == 304) {
    if (isset($last_result->data)) {
      $last_result->from_cache = TRUE;
      return $last_result;
    }
    else {

      // It's a tragedy, this file must exist and contain good data.
      // In this case, clear cache and repeat.
      http_request_clear_cache($url);
      return feeds_http_request($url, $options);
    }
  }

  // Set caches if asked.
  if ($options['cache_http_result']) {
    http_request_set_cache($url, $result);

    // In the static cache, mark this URL as being cached.
    $cached_urls[$url] = TRUE;
  }
  return $result;
}