You are here

function cf_http_adjust_urls in Common Functionality 7.2

Same name and namespace in other branches
  1. 7 modules/cf_http/cf_http.module \cf_http_adjust_urls()

Fix relative urls pulled from the remote server.

These urls are turned into absolute urls.

@see: cf_http_get_webpage()

Parameters

string|cf_dom $text_or_dom: The html document text whose urls are to be altered. If cf_dom is enabled, then this can instead be the cf_dom object.

string $server: The hostname or ip address of the server to use when generating absolute urls. This must not contain the 'http://' prefixes nor the suffixes such as '/' or ':80'.

string $relative_path: all relative paths will have this prepended to the absolute url.

string $scheme: (optional) The 'http' at the front of most urls. A common alternative is 'https'.

string $suffix: (optional) The suffix to prepend to the url. Most cases this should be '/', but if the links are being cached on a different server and a different sub-path, then this must be used.

int $port: (optional) The port number of the web-server. In almost all cases this should be 80. If $schema is set to 'https', then normally this should instead be 443.

Return value

array An array containing the connection status and return http response. The array keys:

  • adjusted: A boolean with TRUE representing that the text's urls were

successfully adjuested, FALSE otherwise.

  • text: The complete html text with all links adjusted to absolute paths. This is an empty string if $text_or_dom is a cf_dom object.

Related topics

File

modules/cf_http/cf_http.module, line 408
Common Functionality - HTTP module.

Code

function cf_http_adjust_urls($text_or_dom, $server, $relative_path, $scheme = 'http', $suffix = '/', $port = 80) {
  $results = array(
    'adjusted' => FALSE,
    'text' => $text_or_dom,
  );
  $matches = array();
  if (class_exists('cf_dom') && $text_or_dom instanceof cf_dom && $text_or_dom
    ->is_loaded()) {
    $results['text'] = '';
    $head = $text_or_dom
      ->get_head();
    $tags = array();
    if (!is_null($head)) {
      $tags = $head
        ->getElementsByTagName('*');
    }
    foreach ($tags as $tag) {
      $attributes = array(
        'src' => NULL,
        'href' => NULL,
      );
      if ($tag
        ->hasAttribute('href')) {
        $attributes['href'] = $tag
          ->getAttribute('href');
      }
      if ($tag
        ->hasAttribute('src')) {
        $attributes['src'] = $tag
          ->getAttribute('src');
      }
      foreach ($attributes as $attribute_name => $attribute) {
        if (is_null($attribute)) {
          continue;
        }
        $generated_url = cf_adjust_url($attribute, $server, $relative_path, $scheme, $suffix, $port);
        if (is_string($generated_url)) {
          $tag
            ->setAttribute($attribute_name, $generated_url);
        }
      }
    }
    $body = $text_or_dom
      ->get_body();
    $tags = array();
    if (!is_null($body)) {
      $tags = $body
        ->getElementsByTagName('*');
    }
    foreach ($tags as $tag) {
      $attributes = array(
        'src' => NULL,
        'href' => NULL,
      );
      if ($tag
        ->hasAttribute('href')) {
        $attributes['href'] = $tag
          ->getAttribute('href');
      }
      if ($tag
        ->hasAttribute('src')) {
        $attributes['src'] = $tag
          ->getAttribute('src');
      }
      foreach ($attributes as $attribute_name => $attribute) {
        if (is_null($attribute)) {
          continue;
        }
        $generated_url = cf_adjust_url($attribute, $server, $relative_path, $scheme, $suffix, $port);
        if (is_string($generated_url)) {
          $tag
            ->setAttribute($attribute_name, $generated_url);
        }
      }
    }
    $results['adjusted'] = TRUE;
    return $results;
  }
  else {
    if (!is_string($text_or_dom)) {
      if (class_exists('cf_error')) {
        cf_error::invalid_string('text_or_dom');
      }
      return $results;
    }
  }
  foreach (array(
    'src',
    'href',
  ) as $attr_key => $attribute) {
    $result = preg_match_all('/(<[^>]*' . $attribute . '\\s*=\\s*)(["|\'])([^>]*)>/i', $text_or_dom, $matches);
    if ($result > 0) {
      foreach ($matches[0] as $key => &$value) {
        $parts = explode($matches[2][$key], $matches[3][$key], 2);
        $generated_url = cf_adjust_url($parts[0], $server, $relative_path, $scheme, $suffix, $port);
        $safe_expression = preg_replace('/\\`/i', '\\`', $matches[1][$key] . $matches[2][$key] . $parts[0] . $matches[2][$key]);
        $safe_expression = preg_replace('/\\?/i', '\\?', $safe_expression);
        $safe_expression = preg_replace('/\\./i', '\\.', $safe_expression);
        $safe_expression = preg_replace('/\\~/i', '\\~', $safe_expression);
        $safe_text = preg_replace('`' . $safe_expression . '`si', $matches[1][$key] . $matches[2][$key] . $generated_url . $matches[2][$key], $results['text']);
        if (is_string($safe_text)) {
          $results['text'] = $safe_text;
        }
      }
    }
    $results['adjusted'] = TRUE;
  }
  return $results;
}