cf_http.module in Common Functionality 7
Same filename and directory in other branches
File
modules/cf_http/cf_http.moduleView source
<?php
/**
* Implements hook_init().
*/
function cf_http_init() {
static $cf_http_html_headers = FALSE;
if (!$cf_http_html_headers) {
drupal_add_css(drupal_get_path('module', 'cf_http') . '/includes/cf_http_html_headers.css');
$cf_http_html_headers = TRUE;
}
}
/**
* Reads an http page at the given path and returns an unprocessed response.
*
* Why:
* Custom php scripts need a straight-forward and easy way to pull data from
* another website.
* This is useful as an alternative to iframe and has advantages and
* disadvantages to iframes.
* An advantage is that this allows showing remote content even if the remote
* url is down (via caching).
* A disadvantage is that remote images and links need to be processed,
* updated, and possibly even manually cached.
*
* @param string $server
* Hostname or ip address of the server.
* Should not contain http:// or similary prefixes.
* @param string $path
* The file/path on the server to
* @param int $port
* (optional) port number of the page to read (defaults to 80).
* @param array $function_history
* (optional) An array of function names, ie:
* array('0' => 'my_function_name').
*
* @return array
* An array containing the connection status and return http response.
* The array keys:
* - connected: A boolean with TRUE representing that the connection to the
* server was established and FALSE otherwise.
* - response: The http response as returned by the target server.
* This http response must be processed.
*/
function cf_http_get_response($server, $path, $port = 80, array $function_history = array()) {
cf_error_append_history($function_history, __FUNCTION__);
$results = array(
'connected' => FALSE,
'response' => '',
'is_local' => FALSE,
);
if (cf_is_empty_or_non_string($function_history, 'server', $server, WATCHDOG_ERROR)) {
return $results;
}
if (cf_is_empty_or_non_string($function_history, 'path', $path, WATCHDOG_ERROR)) {
return $results;
}
if (!is_numeric($port)) {
cf_error_not_numeric($function_history, 'port');
return $results;
}
$fp = fsockopen($server, $port, $errno, $errstr, 8);
$server_address = preg_replace('@^\\w+://@i', '', $server);
if (!$fp || !is_string($server_address)) {
return $results;
}
else {
fwrite($fp, 'GET ' . $path . ' HTTP/1.1' . "\r\n" . 'Host: ' . $server_address . "\r\n" . 'Accept-Encoding: deflate' . "\r\n" . 'Connection: Close' . "\r\n\r\n");
stream_set_timeout($fp, 4);
$results['is_local'] = stream_is_local($fp);
while (!feof($fp)) {
$results['response'] .= fgets($fp, 8192);
$info = stream_get_meta_data($fp);
if ($info['timed_out']) {
fclose($fp);
return $results;
}
}
$results['connected'] = TRUE;
fclose($fp);
return $results;
}
}
/**
* Validate http responses by checking header.
*
* Originally From: http://php.net/manual/en/function.fsockopen.php#85572
*
* Why:
* Custom php scripts need a straight-forward and easy way to pull data from
* another website.
* This is useful as an alternative to iframe and has advantages and
* disadvantages to iframes.
* An advantage is that this allows showing remote content even if the remote
* url is down (via caching).
* A disadvantage is that remote images and links need to be processed,
* updated, and possibly even manually cached.
*
* @param array $headers
* An array of http headers to validate.
* @param array $function_history
* (optional) An array of function names, ie:
* array('0' => 'my_function_name').
*
* @return bool
* A boolean with TRUE representing that the headers are valid, FALSE
* otherwise.
*/
function cf_http_validate_response(array $headers, array $function_history = array()) {
if (empty($headers)) {
return FALSE;
}
switch (trim(strtolower($headers[0]))) {
case 'http/1.0 100 ok':
case 'http/1.0 200 ok':
case 'http/1.1 100 ok':
case 'http/1.1 200 ok':
return TRUE;
}
return FALSE;
}
/**
* Search through an array of http errors for common 400 and 500 http codes.
*
* Why:
* Custom php scripts need a straight-forward and easy way to pull data from
* another website.
* This is useful as an alternative to iframe and has advantages and
* disadvantages to iframes.
* An advantage is that this allows showing remote content even if the remote
* url is down (via caching).
* A disadvantage is that remote images and links need to be processed,
* updated, and possibly even manually cached.
*
* @param array $headers
* An array of http headers.
* @param array $function_history
* (optional) An array of function names, ie:
* array('0' => 'my_function_name').
*
* @return array
* An array with the following structure:
* - error_code: number representing the error code of the error found,
* 0 otherwise.
* - key: array key of the header with the error.
* - value: error information associated with the error code.
*/
function cf_http_headers_errors(array &$headers, array $function_history = array()) {
foreach ($headers as $key => &$value) {
$matches = array();
if (preg_match('/^([45]\\d\\d)\\s/i', $value, $matches) > 0) {
return array(
'error_code' => $matches[1],
'key' => $key,
'value' => &$value,
);
}
}
return array(
'error_code' => 0,
'key' => '',
'value' => '',
);
}
/**
* Unchunk http content.
*
* Originally From: http://php.net/manual/en/function.fsockopen.php#85572
*
* Why:
* Custom php scripts need a straight-forward and easy way to pull data from
* another website.
* This is useful as an alternative to iframe and has advantages and
* disadvantages to iframes.
* An advantage is that this allows showing remote content even if the remote
* url is down (via caching).
* A disadvantage is that remote images and links need to be processed,
* updated, and possibly even manually cached.
*
* @param string $document
* An string representing an html document.
* @param array $function_history
* (optional) An array of function names, ie:
* array('0' => 'my_function_name').
*
* @return array
* An array containing the unchunk status and unchunked string.
* The array keys:
* - unchunked: A boolean with TRUE representing that the document string was
* successfully unchunked, FALSE otherwise.
* - document: The complete (unchunked) html document.
*/
function cf_http_unchunk_response(string $document, array $function_history = array()) {
$results = array(
'unchunked' => FALSE,
'document' => '',
);
if (empty($document)) {
return $results;
}
$eol = "\r\n";
$add = strlen($eol);
$tmp = $document;
do {
$tmp = ltrim($tmp);
$position = strpos($tmp, $eol);
if ($position === FALSE) {
return $results;
}
$length = hexdec(substr($tmp, 0, $position));
if (!is_numeric($length) || $length < 0) {
return $results;
}
$results['document'] .= substr($tmp, $position + $add, $length);
$tmp = substr($tmp, $length + $position + $add);
$check = trim($tmp);
} while (!empty($check));
unset($tmp);
return $results;
}
/**
* Accepts and processes provided http content.
*
* This process checks for a valid http response, unchunks if needed, returns
* http content without headers on success, false on any errors.
*
* Originally From: http://php.net/manual/en/function.fsockopen.php#85572
*
* Why:
* Custom php scripts need a straight-forward and easy way to pull data from
* another website.
* This is useful as an alternative to iframe and has advantages and
* disadvantages to iframes.
* An advantage is that this allows showing remote content even if the
* remote url is down (via caching).
* A disadvantage is that remote images and links need to be processed,
* updated, and possibly even manually cached.
*
* @param string $http_response
* An http response string.
* @param string $path
* The file/path on the server to.
* @param int $port
* (optional) port number of the page to read (defaults to 80).
* @param array $function_history
* (optional) An array of function names, ie:
* array('0' => 'my_function_name').
*
* @return array
* An array containing the connection status and return http response.
* The array keys:
* - parsed: A boolean with TRUE representing that the http request string
* was successfully parsed, FALSE otherwise.
* - headers: The http header from the httpd response.
* - document: The complete html document from the http response.
*/
function cf_http_parse_response($http_response, array $function_history = array()) {
cf_error_append_history($function_history, __FUNCTION__);
$results = array(
'parsed' => FALSE,
'header' => '',
'document' => '',
'http_error' => array(
'error_code' => 0,
'key' => '',
'value' => '',
),
);
if (cf_is_empty_or_non_string($function_history, 'http_response', $http_response, WATCHDOG_ERROR)) {
return $results;
}
// split into array, headers and content.
$hunks = explode("\r\n\r\n", trim($http_response));
if (!is_array($hunks) || count($hunks) < 2) {
return $results;
}
$header = $hunks[count($hunks) - 2];
$document = $hunks[count($hunks) - 1];
$headers = explode("\n", $header);
$results['headers'] = $headers;
$results['document'] = $document;
unset($hunks);
unset($header);
unset($document);
if (!cf_http_validate_response($results['headers'], $function_history)) {
$results['http_error'] = cf_http_headers_errors($results['headers'], $function_history);
}
if (in_array('Transfer-Coding: chunked', $results['headers'])) {
$result = cf_http_unchunk_response($results['document'], $function_history);
if ($result['unchunked']) {
$results['document'] = $result['document'];
}
}
$results['document'] = trim($results['document']);
// remove some additional trash not removed by the original function
$results['document'] = preg_replace("/^[[:alnum:]]+\r\n/i", '', $results['document']);
$results['document'] = preg_replace("/\r\n0\$/i", '', $results['document']);
if (!is_string($results['document'])) {
$results['document'] = '';
return $results;
}
$results['parsed'] = TRUE;
return $results;
}
/**
* Breaks apart an html formatted document string.
*
* The string is broken into an array containing two parts: 'head' and 'body'.
* All other elements before, between, or after the html <head> and <body> tags
* are lost/ignored.
*
* Why:
* Custom php scripts need a straight-forward and easy way to pull data from
* another website.
* This is useful as an alternative to iframe and has advantages and
* disadvantages to iframes.
* An advantage is that this allows showing remote content even if the remote
* url is down (via caching).
* A disadvantage is that remote images and links need to be processed,
* updated, and possibly even manually cached.
*
* @param string $document
* An http response string.
* @param array $function_history
* (optional) An array of function names, ie:
* array('0' => 'my_function_name').
*
* @return array
* An array containing the connection status and return http response.
* The array keys:
* - split: A boolean with TRUE representing that the document string was
* successfully split, FALSE otherwise.
* - headers: The http header from the httpd response.
* - document: The complete html document from the http response.
*/
function cf_http_split_response($document, array $function_history = array()) {
cf_error_append_history($function_history, __FUNCTION__);
$results = array(
'split' => FALSE,
'head' => '',
'body' => '',
);
if (cf_is_empty_or_non_string($function_history, 'document', $document, WATCHDOG_ERROR)) {
return $results;
}
$matches = array();
$result = preg_match('/<head[^>]*>(.*)<\\/head>/sim', $document, $matches);
if ($result > 0 && isset($matches[1])) {
$results['head'] = $matches[1];
}
else {
return $results;
}
$matches = array();
$result = preg_match('/<body[^>]*>(.*)<\\/body>/sim', $document, $matches);
if ($result > 0 && isset($matches[1])) {
$results['body'] = $matches[1];
}
else {
return $results;
}
$results['split'] = TRUE;
return $results;
}
/**
* Fix relative urls pulled from the remote server.
*
* These urls are turned into absolute urls.
*
* Why:
* Custom php scripts need a straight-forward and easy way to pull data from
* another website.
* This is useful as an alternative to iframe and has advantages and
* disadvantages to iframes.
* An advantage is that this allows showing remote content even if the remote
* url is down (via caching).
* A disadvantage is that remote images and links need to be processed,
* updated, and possibly even manually cached.
*
* @param string $text
* The html document text whose urls are to be altered.
* @param string $server
* The hostname or ip address of the server to use when generating absolute urls.
* This must not contain the 'http://' prefixes nor the suffixes such as '/' or ':80'.
* @param string $relative_path
* all relative paths will have this prepended to the absolute url.
* @param string $scheme
* (optional) The 'http' at the front of most urls.
* A common alternative is 'https'.
* @param string $suffix
* (optional) The suffix to prepend to the url.
* Most cases this should be '/', but if the links are being cached on a
* different server and a different sub-path, then this must be used.
* @param int $port
* (optional) The port number of the web-server.
* In almost all cases this should be 80.
* If $schema is set to 'https', then normally this should instead be 443.
* @param array $function_history
* (optional) An array of function names, ie:
* array('0' => 'my_function_name').
*
* @return array
* An array containing the connection status and return http response.
* The array keys:
* - adjusted: A boolean with TRUE representing that the text's urls were
* successfully adjuested, FALSE otherwise.
* - text: The complete html text with all links adjusted to absolute paths.
*/
function cf_http_adjust_urls($text, $server, $relative_path, $scheme = 'http', $suffix = '/', $port = 80, array $function_history = array()) {
cf_error_append_history($function_history, __FUNCTION__);
$results = array(
'adjusted' => FALSE,
'text' => $text,
);
$matches = array();
foreach (array(
'src',
'href',
) as $attr_key => $attribute) {
$result = preg_match_all('/(<[^>]*' . $attribute . '\\s*=\\s*)(["|\'])([^>]*)>/i', $text, $matches);
if ($result > 0) {
foreach ($matches[0] as $key => &$value) {
$parts = explode($matches[2][$key], $matches[3][$key], 2);
$parsed_url = parse_url($parts[0]);
if (!isset($parsed_url['host'])) {
$parsed_url['scheme'] = $scheme;
$parsed_url['host'] = $server;
if (!($scheme == 'http' && $port == 80) && !($scheme == 'https' && $port == 443)) {
$parsed_url['port'] = $port;
}
$generated_url = $parsed_url['scheme'] . '://';
$generated_url .= $parsed_url['host'];
if (!empty($parsed_url['port'])) {
$generated_url .= ':' . $parsed_url['port'];
}
if (!empty($parsed_url['path'])) {
if (preg_match('/^\\//i', $parsed_url['path']) == 0) {
$generated_url .= $relative_path . '/';
}
$generated_url .= $parsed_url['path'];
}
else {
$generated_url .= $relative_path . '/';
}
if (!empty($parsed_url['query'])) {
$generated_url .= '?' . $parsed_url['query'];
}
if (!empty($parsed_url['fragment'])) {
$generated_url .= '#' . $parsed_url['fragment'];
}
$safe_expression = preg_replace('/\\`/i', '\\`', $matches[1][$key] . $matches[2][$key] . $parts[0] . $matches[2][$key]);
$safe_expression = preg_replace('/\\?/i', '\\?', $safe_expression);
$safe_expression = preg_replace('/\\./i', '\\.', $safe_expression);
$safe_expression = preg_replace('/\\~/i', '\\~', $safe_expression);
$safe_text = preg_replace('`' . $safe_expression . '`si', $matches[1][$key] . $matches[2][$key] . $generated_url . $matches[2][$key], $results['text']);
if (is_string($safe_text)) {
$results['text'] = $safe_text;
}
}
}
}
}
return $results;
}
/**
* Reads and processes a website page at the given path.
*
* Why:
* Custom php scripts need a straight-forward and easy way to pull data from
* another website.
* This is useful as an alternative to iframe and has advantages and
* disadvantages to iframes.
* An advantage is that this allows showing remote content even if the remote
* url is down (via caching).
* A disadvantage is that remote images and links need to be processed,
* updated, and possibly even manually cached.
*
* @param string $server
* Hostname or ip address of the server.
* Should not contain http:// or similary prefixes.
* @param string $path
* The file/path on the server to.
* @param int $port
* (optional) Port number of the page to read (defaults to 80).
* @param array $function_history
* (optional) An array of function names, ie:
* array('0' => 'my_function_name').
*
* @return array
* An array containing the connection status and return http response.
* The array keys:
* - read: A boolean with TRUE representing that the read was successful and
* FALSE otherwise.
* - headers: The http header from the httpd response.
* - document: The complete html document from the http response.
*/
function cf_http_get_webpage($server, $path, $port = 80, array $function_history = array()) {
cf_error_append_history($function_history, __FUNCTION__);
$results = array(
'read' => FALSE,
'headers' => '',
'document' => '',
'http_error' => array(
'error_code' => 0,
'key' => '',
'value' => '',
),
);
$result = cf_http_get_response($server, $path, $port, $function_history);
if ($result['connected']) {
$result = cf_http_parse_response($result['response'], $function_history);
if ($result['parsed']) {
$results['headers'] = $result['headers'];
$results['document'] = $result['document'];
$results['http_error'] = $result['http_error'];
$results['read'] = TRUE;
}
}
return $results;
}
/**
* Reads and processes a website page at the given path.
*
* Why:
* Custom php scripts need a straight-forward and easy way to pull data from
* another website.
* This is useful as an alternative to iframe and has advantages and
* disadvantages to iframes.
* An advantage is that this allows showing remote content even if the remote
* url is down (via caching).
* A disadvantage is that remote images and links need to be processed,
* updated, and possibly even manually cached.
*
* @param $text
* Hostname or ip address of the server.
* Should not contain http:// or similary prefixes.
* @param $depth
* The amount of shrinkage to perform. Any number from 1 to 6.
* @param $preserve
* A boolean representing whether or not to preserve the header structure
* when the depth of a given header is reduced to a number greater than 6.
* If preserve is false, all header formatting will be lost.
* @param array $function_history
* (optional) An array of function names, ie:
* array('0' => 'my_function_name').
*
* @return array
* An array containing the connection status and return http response.
* The array keys:
* - reduced: A boolean with TRUE representing that the text was successful
* reduced and FALSE otherwise.
* - text: The http text with all html headers reduced by $depth.
*/
function cf_http_reduce_html_headers($text, $depth = 1, $preserve = TRUE, array $function_history = array()) {
cf_error_append_history($function_history, __FUNCTION__);
$results = array(
'reduced' => FALSE,
'text' => '',
);
if (!is_string($text)) {
cf_error_not_string($function_history, 'text');
return $results;
}
if ($depth < 0 || $depth > 6) {
return $results;
}
$results['text'] = $text;
foreach (array(
6,
5,
4,
3,
2,
1,
) as $number) {
$reduced = $number + $depth;
$tag = 'h' . $reduced;
$matches = array();
if ($reduced > 6) {
$tag = 'div';
}
if ($tag != 'div' || $preserve) {
if (preg_match_all('/<h' . $number . '([^>]*)>/i', $results['text'], $matches) > 0) {
$results['text'] = preg_replace('/<h' . $number . '>/i', '<' . $tag . ' class="cf_http-was_h' . $number . '">', $results['text']);
foreach ($matches[1] as $match_key => $match) {
if (!empty($match)) {
$class_matches = array();
if (preg_match('/class="([^"]*)"/i', $match, $class_matches) == 0) {
$class_matches = array();
if (preg_match("/class='([^']*)'/i", $match, $class_matches) == 0) {
$results['text'] = preg_replace('/<h' . $number . '([^>]*)>/i', '<' . $tag . ' ${1} class="cf_http-was_h' . $number . '">', $results['text']);
}
else {
$new_attributes = preg_replace("/\\bclass='([^']*)'/i", "class='" . $class_matches[1] . ' cf_http-was_h' . $number . "'", $match);
$results['text'] = preg_replace('/<h' . $number . '[^>]*>/i', '<' . $tag . ' ' . $new_attributes . '>', $results['text']);
}
}
else {
$new_attributes = preg_replace('/\\bclass="([^"]*)"/i', 'class="' . $class_matches[1] . ' cf_http-was_h' . $number . '"', $match);
$results['text'] = preg_replace('/<h' . $number . '[^>]*>/i', '<' . $tag . ' ' . $new_attributes . '>', $results['text']);
}
}
}
$results['text'] = preg_replace('/<\\/h' . $number . '>/i', '</' . $tag . '>', $results['text']);
}
}
else {
$results['text'] = preg_replace('/<h' . $number . '([^>]*)>/i', '<' . $tag . ' ${1}' . '">', $results['text']);
$results['text'] = preg_replace('/<\\/h' . $number . '>/i', '</' . $tag . '>', $results['text']);
}
}
if (!is_string($results['text'])) {
$results['text'] = '';
return $results;
}
$results['reduced'] = TRUE;
return $results;
}
Functions
Name | Description |
---|---|
cf_http_adjust_urls | Fix relative urls pulled from the remote server. |
cf_http_get_response | Reads an http page at the given path and returns an unprocessed response. |
cf_http_get_webpage | Reads and processes a website page at the given path. |
cf_http_headers_errors | Search through an array of http errors for common 400 and 500 http codes. |
cf_http_init | Implements hook_init(). |
cf_http_parse_response | Accepts and processes provided http content. |
cf_http_reduce_html_headers | Reads and processes a website page at the given path. |
cf_http_split_response | Breaks apart an html formatted document string. |
cf_http_unchunk_response | Unchunk http content. |
cf_http_validate_response | Validate http responses by checking header. |