function _linkchecker_status_handling in Link checker 6.2
Same name and namespace in other branches
- 5.2 linkchecker.module \_linkchecker_status_handling()
- 7 linkchecker.module \_linkchecker_status_handling()
Status code handling.
Parameters
object $response: An object containing the HTTP request headers, response code, headers, data and redirect status.
string $link: An object containing the url, lid and fail_count.
1 call to _linkchecker_status_handling()
- _linkchecker_check_links in ./
linkchecker.module - Run link checks.
1 string reference to '_linkchecker_status_handling'
- _linkchecker_check_links in ./
linkchecker.module - Run link checks.
File
- ./
linkchecker.module, line 507 - This module periodically check links in given node types, blocks, cck fields, etc.
Code
function _linkchecker_status_handling(&$response, $link) {
$ignore_response_codes = preg_split('/(\\r\\n?|\\n)/', variable_get('linkchecker_ignore_response_codes', "200\n206\n302\n304\n401\n403"));
// - Prevent E_ALL warnings in DB updates for non-existing $response->error.
// - @todo drupal_http_request() may not provide an UTF8 encoded error message
// what results in a database UPDATE failure. For more information, see
// http://drupal.org/node/371495.
// Workaround: ISO-8859-1 as source encoding may be wrong, but WFM.
if (!isset($response->error)) {
$response->error = '';
}
if (!isset($response->status_message)) {
$response->status_message = '';
}
$response->error = trim(drupal_convert_to_utf8($response->error, 'ISO-8859-1'));
$response->status_message = trim(drupal_convert_to_utf8($response->status_message, 'ISO-8859-1'));
// Destination anchors in HTML documents may be specified either by the A
// element (naming it with the name attribute), or by any other element
// (naming with the id attribute).
// See http://www.w3.org/TR/html401/struct/links.html
//
// Notes:
// - '#top' is a reserved fragment that must not exist in a page.
if ($response->code == 200 && !empty($response->data) && !empty($response->headers['content-type']) && !empty($response->uri['fragment']) && !in_array($response->uri['fragment'], array(
'#top',
)) && in_array($response->headers['content-type'], array(
'text/html',
'application/xhtml+xml',
'application/xml',
)) && !preg_match('/(\\s[^>]*(name|id)(\\s+)?=(\\s+)?["\'])(' . preg_quote($response->uri['fragment'], '/') . ')(["\'][^>]*>)/i', $response->data)) {
// Override status code 200 with status code 404 so it can be handled with
// default status code 404 logic and custom error text.
$response->code = 404;
$response->status_message = $response->error = 'URL fragment identifier not found in content';
}
switch ($response->code) {
case -4:
// HTTPRL: httprl_send_request timed out.
// Skip these and try them again next cron run.
break;
case -2:
// HTTPRL: maximum allowed redirects exhausted.
case 301:
db_query("UPDATE {linkchecker_links} SET code = %d, error = '%s', fail_count = fail_count+1, last_checked = %d WHERE lid = %d", $response->redirect_code, $response->status_message, time(), $link->lid);
// A HTTP status code of 301 tells us an existing link have changed to
// a new link. The remote site owner was so kind to provide us the new
// link and if we trust this change we are able to replace the old link
// with the new one without any hand work.
$auto_repair_301 = variable_get('linkchecker_action_status_code_301', 0);
if ($auto_repair_301 && $auto_repair_301 <= $link->fail_count + 1 && valid_url($response->redirect_url, TRUE)) {
// Switch anonymous user to an admin.
linkchecker_impersonate_user(user_load(array(
'name' => variable_get('linkchecker_impersonate_user', ''),
)));
// NODES: Autorepair all nodes having this outdated link.
$res = db_query("SELECT * FROM {linkchecker_nodes} WHERE lid = %d", $link->lid);
while ($row = db_fetch_object($res)) {
$node = node_load(array(
'nid' => $row->nid,
));
// Has the node object loaded successfully?
if (is_object($node)) {
$node_original = drupal_clone($node);
// Create array of node fields to scan (for e.g. $node->title, $node->links_weblink_url).
$text_items = array();
$text_items[] = 'title';
$text_items[] = 'body';
$text_items[] = 'teaser';
// Update 'weblink' nodes from 'links' module package.
if (module_exists('links_weblink') && $node->type == 'weblink' && isset($node->links_weblink_url)) {
$text_items[] = 'links_weblink_url';
}
// Update 'weblinks' nodes from 'weblinks' module.
if (module_exists('weblinks') && $node->type == 'weblinks' && isset($node->url)) {
$text_items[] = 'url';
}
// Now replace the outdated link with the permanently moved one in all node fields.
foreach ($text_items as $text_item) {
_linkchecker_link_replace($node->{$text_item}, $link->url, $response->redirect_url);
}
// Search for CCK-fields of types 'link' and 'text'.
if (module_exists('content')) {
$fields = content_fields(NULL, $node->type);
foreach ($fields as $field) {
if (isset($node->{$field['field_name']})) {
if (module_exists('link') && $field['type'] == 'link') {
foreach ($node->{$field}['field_name'] as $delta => $item) {
_linkchecker_link_replace($node->{$field['field_name']}[$delta]['url'], $link->url, $response->redirect_url);
}
}
elseif (module_exists('text') && $field['type'] == 'text') {
foreach ($node->{$field}['field_name'] as $delta => $item) {
_linkchecker_link_replace($node->{$field['field_name']}[$delta]['value'], $link->url, $response->redirect_url);
}
}
}
}
}
if ($node_original != $node) {
// Always use the default revision setting. See node_object_prepare().
$node_options = variable_get('node_options_' . $node->type, array(
'status',
'promote',
));
$node->revision = in_array('revision', $node_options);
// Generate a log message for the node_revisions table, visible on the node's revisions tab.
$node->log = t('Changed permanently moved link in %node from %src to %dst.', array(
'%node' => url('node/' . $row->nid),
'%src' => $link->url,
'%dst' => $response->redirect_url,
));
// Save changed node and update the node link list.
node_save($node);
watchdog('linkchecker', 'Changed permanently moved link in %node from %src to %dst.', array(
'%node' => url('node/' . $row->nid),
'%src' => $link->url,
'%dst' => $response->redirect_url,
), WATCHDOG_INFO);
}
else {
watchdog('linkchecker', 'Link update in node failed. Permanently moved link %src not found in node %node. Manual fix required.', array(
'%node' => url('node/' . $row->nid),
'%src' => $link->url,
), WATCHDOG_WARNING);
}
}
else {
watchdog('linkchecker', 'Loading node %node for update failed. Manual fix required.', array(
'%node' => $row->nid,
), WATCHDOG_ERROR);
}
}
// COMMENTS: Autorepair all comments having this outdated link.
$res = db_query("SELECT * FROM {linkchecker_comments} WHERE lid = %d", $link->lid);
while ($row = db_fetch_object($res)) {
$comment = _linkchecker_comment_load($row->cid);
// Has the custom comment array loaded successfully?
if (!empty($comment)) {
$comment_original = $comment;
// Create array of comment fields to scan (for e.g. $comment->subject, $comment->comment).
$text_items = array();
$text_items[] = 'subject';
$text_items[] = 'comment';
// Now replace the outdated link with the permanently moved one in all comment fields.
foreach ($text_items as $text_item) {
_linkchecker_link_replace($comment[$text_item], $link->url, $response->redirect_url);
}
// Save changed comment and update the comment link list.
$comment_diff = array_diff($comment, $comment_original);
if (!empty($comment_diff)) {
comment_save($comment);
watchdog('linkchecker', 'Changed permanently moved link in comment %comment from %src to %dst.', array(
'%comment' => $comment['cid'],
'%src' => $link->url,
'%dst' => $response->redirect_url,
), WATCHDOG_INFO);
}
else {
watchdog('linkchecker', 'Link update in comment failed. Permanently moved link %src not found in comment %comment. Manual fix required.', array(
'%comment' => $comment['cid'],
'%src' => $link->url,
), WATCHDOG_WARNING);
}
}
else {
watchdog('linkchecker', 'Loading comment %comment for update failed. Manual fix required.', array(
'%comment' => $comment['cid'],
), WATCHDOG_ERROR);
}
}
// BOXES: Autorepair all boxes having this outdated link.
$res = db_query("SELECT * FROM {linkchecker_boxes} WHERE lid = %d", $link->lid);
while ($row = db_fetch_object($res)) {
$box = block_box_get($row->bid);
// Has the custom block array loaded successfully?
if (!empty($box)) {
$box_original = $box;
// Create array of box fields to scan.
$text_items = array();
$text_items[] = 'info';
$text_items[] = 'body';
// Now replace the outdated link with the permanently moved one in all
// box fields.
foreach ($text_items as $text_item) {
_linkchecker_link_replace($box[$text_item], $link->url, $response->redirect_url);
}
$box_diff = array_diff($box, $box_original);
if (!empty($box_diff)) {
// Save changed box and update the box link list.
block_box_save($box, $row->bid);
// There is no hook that fires on block_box_save(), therefore do link
// extraction programmatically.
_linkchecker_add_box_links($box, $row->bid);
watchdog('linkchecker', 'Changed permanently moved link in box %bid from %src to %dst.', array(
'%bid' => $row->bid,
'%src' => $link->url,
'%dst' => $response->redirect_url,
), WATCHDOG_INFO);
}
else {
watchdog('linkchecker', 'Link update in block failed. Permanently moved link %src not found in block %bid. Manual fix required.', array(
'%bid' => $row->bid,
'%src' => $link->url,
), WATCHDOG_WARNING);
}
}
else {
watchdog('linkchecker', 'Loading block %bid for update failed. Manual fix required.', array(
'%bid' => $row->bid,
), WATCHDOG_ERROR);
}
}
// Revert user back to anonymous.
linkchecker_revert_user();
}
else {
watchdog('linkchecker', 'Link %link has changed and needs to be updated.', array(
'%link' => $link->url,
), WATCHDOG_NOTICE, l(t('Broken links'), 'admin/reports/linkchecker'));
}
break;
case 404:
db_query("UPDATE {linkchecker_links} SET code = %d, error = '%s', fail_count = fail_count+1, last_checked = %d WHERE lid = %d", $response->code, $response->error, time(), $link->lid);
watchdog('linkchecker', 'Broken link %link has been found.', array(
'%link' => $link->url,
), WATCHDOG_NOTICE, l(t('Broken links'), 'admin/reports/linkchecker'));
// If unpublishing limit is reached, unpublish all nodes having this link.
$linkchecker_action_status_code_404 = variable_get('linkchecker_action_status_code_404', 0);
if ($linkchecker_action_status_code_404 && $linkchecker_action_status_code_404 <= $link->fail_count + 1) {
// Switch anonymous user to an admin.
linkchecker_impersonate_user(user_load(array(
'name' => variable_get('linkchecker_impersonate_user', ''),
)));
_linkchecker_unpublish_nodes($link->lid);
linkchecker_revert_user();
}
break;
case 405:
// - 405: Special error handling if method is not allowed. Switch link
// checking to GET method and try again.
db_query("UPDATE {linkchecker_links} SET method = '%s', code = %d, error = '%s', fail_count = fail_count+1, last_checked = %d WHERE lid = %d", 'GET', $response->code, $response->error, time(), $link->lid);
watchdog('linkchecker', 'Method HEAD is not allowed for link %link. Method has been changed to GET.', array(
'%link' => $link->url,
), WATCHDOG_INFO, l(t('Broken links'), 'admin/reports/linkchecker'));
break;
case 500:
// - 500: Like WGET, try with GET on "500 Internal server error".
// - If GET also fails with status code 500, than the link is broken.
if ($link->method == 'GET' && $response->code == 500) {
db_query("UPDATE {linkchecker_links} SET code = %d, error = '%s', fail_count = fail_count+1, last_checked = %d WHERE lid = %d", $response->code, $response->error, time(), $link->lid);
watchdog('linkchecker', 'Broken link %link has been found.', array(
'%link' => $link->url,
), WATCHDOG_NOTICE, l(t('Broken links'), 'admin/reports/linkchecker'));
}
else {
db_query("UPDATE {linkchecker_links} SET method = '%s', code = %d, error = '%s', fail_count = fail_count+1, last_checked = %d WHERE lid = %d", 'GET', $response->code, $response->error, time(), $link->lid);
watchdog('linkchecker', 'Internal server error for link %link. Method has been changed to GET.', array(
'%link' => $link->url,
), WATCHDOG_INFO, l(t('Broken links'), 'admin/reports/linkchecker'));
}
break;
default:
// Don't treat ignored response codes as errors.
if (in_array($response->code, $ignore_response_codes)) {
db_query("UPDATE {linkchecker_links} SET code = %d, error = '%s', fail_count = %d, last_checked = %d WHERE lid = %d", $response->code, $response->error, 0, time(), $link->lid);
//watchdog('linkchecker', 'Unhandled link error %link has been found.', array('%link' => $link->url), WATCHDOG_ERROR, l(t('Broken links'), 'admin/reports/linkchecker'));
}
else {
db_query("UPDATE {linkchecker_links} SET code = %d, error = '%s', fail_count = fail_count+1, last_checked = %d WHERE lid = %d", $response->code, $response->error, time(), $link->lid);
//watchdog('linkchecker', 'Unhandled link error %link has been found.', array('%link' => $link->url), WATCHDOG_ERROR, l(t('Broken links'), 'admin/reports/linkchecker'));
}
}
// Free Memory.
$response = new stdClass();
}