function _linkchecker_status_handling in Link checker 7
Same name and namespace in other branches
- 5.2 linkchecker.module \_linkchecker_status_handling()
- 6.2 linkchecker.module \_linkchecker_status_handling()
Status code handling.
Parameters
object $response: An object containing the HTTP request headers, response code, headers, data and redirect status.
string $link: An object containing the url, lid and fail_count.
1 call to _linkchecker_status_handling()
- _linkchecker_check_links in ./
linkchecker.module - Run link checks.
1 string reference to '_linkchecker_status_handling'
- _linkchecker_check_links in ./
linkchecker.module - Run link checks.
File
- ./
linkchecker.module, line 601 - This module periodically check links in given node types, blocks etc.
Code
function _linkchecker_status_handling(&$response, $link) {
$ignore_response_codes = preg_split('/(\\r\\n?|\\n)/', variable_get('linkchecker_ignore_response_codes', "200\n206\n302\n304\n401\n403"));
// - Prevent E_ALL warnings in DB updates for non-existing $response->error.
// - @todo drupal_http_request() may not provide an UTF8 encoded error message
// what results in a database UPDATE failure. For more information, see
// https://drupal.org/node/371495.
// Workaround: ISO-8859-1 as source encoding may be wrong, but WFM.
if (!isset($response->error)) {
$response->error = '';
}
if (!isset($response->status_message)) {
$response->status_message = '';
}
$response->error = trim(drupal_convert_to_utf8($response->error, 'ISO-8859-1'));
$response->status_message = trim(drupal_convert_to_utf8($response->status_message, 'ISO-8859-1'));
// Destination anchors in HTML documents may be specified either by:
// - the A element (naming it with the name attribute)
// - or by any other element (naming with the id attribute)
// - and must not contain a key/value pair as these type of hash fragments are
// typically used by AJAX applications to prevent additionally HTTP requests
// e.g. https://www.example.com/ajax.html#key1=value1&key2=value2
// - and must not contain '/' or ',' as this are not normal anchors.
// - and '#top' is a reserved fragment that must not exist in a page.
// See https://www.w3.org/TR/html401/struct/links.html
if ($response->code == 200 && !empty($response->data) && !empty($response->headers['content-type']) && !empty($response->uri['fragment']) && preg_match('/=|\\/|,/', $response->uri['fragment']) == FALSE && !in_array($response->uri['fragment'], array(
'#top',
)) && in_array($response->headers['content-type'], array(
'text/html',
'application/xhtml+xml',
'application/xml',
)) && !preg_match('/(\\s[^>]*(name|id)(\\s+)?=(\\s+)?["\'])(' . preg_quote(urldecode($response->uri['fragment']), '/') . ')(["\'][^>]*>)/i', $response->data)) {
// Override status code 200 with status code 404 so it can be handled with
// default status code 404 logic and custom error text.
$response->code = 404;
$response->status_message = $response->error = 'URL fragment identifier not found in content';
}
switch ($response->code) {
case -4:
// HTTPRL: httprl_send_request timed out.
// Skip these and try them again next cron run.
break;
case -2:
// HTTPRL: maximum allowed redirects exhausted.
case 301:
// Remote site send status code 301 and link needs an update.
db_update('linkchecker_link')
->condition('lid', $link->lid)
->fields(array(
'code' => $response->redirect_code,
'error' => $response->status_message,
'fail_count' => 0,
'last_checked' => time(),
))
->expression('fail_count', 'fail_count + 1')
->execute();
// A HTTP status code of 301 tells us an existing link have changed to
// a new link. The remote site owner was so kind to provide us the new
// link and if we trust this change we are able to replace the old link
// with the new one without any hand work.
$auto_repair_301 = variable_get('linkchecker_action_status_code_301', 0);
if ($auto_repair_301 && $auto_repair_301 <= $link->fail_count + 1 && valid_url($response->redirect_url, TRUE)) {
// Switch anonymous user to an admin.
linkchecker_impersonate_user(user_load_by_name(variable_get('linkchecker_impersonate_user', '')));
// NODES: Autorepair all nodes having this outdated link.
$result = db_query('SELECT nid FROM {linkchecker_node} WHERE lid = :lid', array(
':lid' => $link->lid,
));
foreach ($result as $row) {
// Explicitly don't use node_load_multiple() or the module may run
// into issues like https://drupal.org/node/1210606. With this logic
// nodes can be updated until an out of memory occurs and further
// updates will be made on the remaining nodes only.
$node = node_load($row->nid);
// Has the node object loaded successfully?
if (is_object($node)) {
$node_original = clone $node;
$node = _linkchecker_replace_fields('node', $node->type, $node, $link->url, $response->redirect_url);
if ($node_original != $node) {
// Always use the default revision setting. For more information,
// see node_object_prepare().
$node_options = variable_get('node_options_' . $node->type, array(
'status',
'promote',
));
$node->revision = in_array('revision', $node_options);
// Generate a log message for the node_revisions table, visible on
// the node's revisions tab.
$node->log = t('Changed permanently moved link in %node from %src to %dst.', array(
'%node' => url('node/' . $node->nid),
'%src' => $link->url,
'%dst' => $response->redirect_url,
));
// Save changed node and update the node link list.
node_save($node);
linkchecker_watchdog_log('linkchecker', 'Changed permanently moved link in %node from %src to %dst.', array(
'%node' => url('node/' . $node->nid),
'%src' => $link->url,
'%dst' => $response->redirect_url,
), WATCHDOG_INFO);
}
else {
linkchecker_watchdog_log('linkchecker', 'Link update in node failed. Permanently moved link %src not found in node %node. Manual fix required.', array(
'%node' => url('node/' . $row->nid),
'%src' => $link->url,
), WATCHDOG_WARNING);
}
}
else {
linkchecker_watchdog_log('linkchecker', 'Loading node %node for update failed. Manual fix required.', array(
'%node' => $row->nid,
), WATCHDOG_ERROR);
}
}
// COMMENTS: Autorepair all comments having this outdated link.
$result = db_query('SELECT cid FROM {linkchecker_comment} WHERE lid = :lid', array(
':lid' => $link->lid,
));
foreach ($result as $row) {
// Explicitly don't use comment_load_multiple() or the module may run
// into issues like https://drupal.org/node/1210606. With this logic
// comment can be updated until an out of memory occurs and further
// updates will be made on the remaining comments only.
$comment = comment_load($row->cid);
// Has the comment object loaded successfully?
if (is_object($comment)) {
$comment_original = clone $comment;
// Replace links in subject.
_linkchecker_link_replace($comment->subject, $link->url, $response->redirect_url);
// Replace links in fields.
$comment = _linkchecker_replace_fields('comment', $comment->node_type, $comment, $link->url, $response->redirect_url);
// Save changed comment and update the comment link list.
if ($comment_original != $comment) {
comment_save($comment);
linkchecker_watchdog_log('linkchecker', 'Changed permanently moved link in comment %comment from %src to %dst.', array(
'%comment' => $comment->cid,
'%src' => $link->url,
'%dst' => $response->redirect_url,
), WATCHDOG_INFO);
}
else {
linkchecker_watchdog_log('linkchecker', 'Link update in comment failed. Permanently moved link %src not found in comment %comment. Manual fix required.', array(
'%comment' => $comment->cid,
'%src' => $link->url,
), WATCHDOG_WARNING);
}
}
else {
linkchecker_watchdog_log('linkchecker', 'Loading comment %comment for update failed. Manual fix required.', array(
'%comment' => $comment->cid,
), WATCHDOG_ERROR);
}
}
// CUSTOM BLOCKS: Autorepair all custom blocks having this outdated
// link.
$result = db_query('SELECT bid FROM {linkchecker_block_custom} WHERE lid = :lid', array(
':lid' => $link->lid,
));
foreach ($result as $row) {
$block_custom = linkchecker_block_custom_block_get($row->bid);
// Has the custom block object loaded successfully?
if (is_object($block_custom)) {
$block_custom_original = clone $block_custom;
// Now replace the outdated link with the permanently moved one in
// all custom block fields.
_linkchecker_link_replace($block_custom->info, $link->url, $response->redirect_url);
_linkchecker_link_replace($block_custom->body['value'], $link->url, $response->redirect_url);
if ($block_custom_original != $block_custom) {
// Save changed block and update the block link list.
block_custom_block_save((array) $block_custom, $block_custom->delta);
// There is no hook that fires on block_custom_block_save(),
// therefore do link extraction programmatically.
_linkchecker_add_block_custom_links($block_custom, $block_custom->delta);
linkchecker_watchdog_log('linkchecker', 'Changed permanently moved link in custom block %bid from %src to %dst.', array(
'%bid' => $block_custom->delta,
'%src' => $link->url,
'%dst' => $response->redirect_url,
), WATCHDOG_INFO);
}
else {
linkchecker_watchdog_log('linkchecker', 'Link update in block failed. Permanently moved link %src not found in block %bid. Manual fix required.', array(
'%bid' => $block_custom->delta,
'%src' => $link->url,
), WATCHDOG_WARNING);
}
}
else {
linkchecker_watchdog_log('linkchecker', 'Loading block %bid for update failed. Manual fix required.', array(
'%bid' => $block_custom->delta,
), WATCHDOG_ERROR);
}
}
// Revert user back to anonymous.
linkchecker_revert_user();
}
else {
linkchecker_watchdog_log('linkchecker', 'Link %link has changed and needs to be updated.', array(
'%link' => $link->url,
), WATCHDOG_NOTICE, l(t('Broken links'), 'admin/reports/linkchecker'));
}
break;
case 404:
db_update('linkchecker_link')
->condition('lid', $link->lid)
->fields(array(
'code' => $response->code,
'error' => $response->error,
'fail_count' => 0,
'last_checked' => time(),
))
->expression('fail_count', 'fail_count + 1')
->execute();
linkchecker_watchdog_log('linkchecker', 'Broken link %link has been found.', array(
'%link' => $link->url,
), WATCHDOG_NOTICE, l(t('Broken links'), 'admin/reports/linkchecker'));
// If unpublishing limit is reached, unpublish all nodes having this link.
$linkchecker_action_status_code_404 = variable_get('linkchecker_action_status_code_404', 0);
if ($linkchecker_action_status_code_404 && $linkchecker_action_status_code_404 <= $link->fail_count + 1) {
// Switch anonymous user to an admin.
linkchecker_impersonate_user(user_load_by_name(variable_get('linkchecker_impersonate_user', '')));
_linkchecker_unpublish_nodes($link->lid);
linkchecker_revert_user();
}
break;
case 405:
// - 405: Special error handling if method is not allowed. Switch link
// checking to GET method and try again.
db_update('linkchecker_link')
->condition('lid', $link->lid)
->fields(array(
'method' => 'GET',
'code' => $response->code,
'error' => $response->error,
'fail_count' => 0,
'last_checked' => time(),
))
->expression('fail_count', 'fail_count + 1')
->execute();
linkchecker_watchdog_log('linkchecker', 'Method HEAD is not allowed for link %link. Method has been changed to GET.', array(
'%link' => $link->url,
), WATCHDOG_INFO, l(t('Broken links'), 'admin/reports/linkchecker'));
break;
case 500:
// - 500: Like WGET, try with GET on "500 Internal server error".
// - If GET also fails with status code 500, than the link is broken.
if ($link->method == 'GET' && $response->code == 500) {
db_update('linkchecker_link')
->condition('lid', $link->lid)
->fields(array(
'code' => $response->code,
'error' => $response->error,
'fail_count' => 0,
'last_checked' => time(),
))
->expression('fail_count', 'fail_count + 1')
->execute();
linkchecker_watchdog_log('linkchecker', 'Broken link %link has been found.', array(
'%link' => $link->url,
), WATCHDOG_NOTICE, l(t('Broken links'), 'admin/reports/linkchecker'));
}
else {
db_update('linkchecker_link')
->condition('lid', $link->lid)
->fields(array(
'method' => 'GET',
'code' => $response->code,
'error' => $response->error,
'fail_count' => 0,
'last_checked' => time(),
))
->expression('fail_count', 'fail_count + 1')
->execute();
linkchecker_watchdog_log('linkchecker', 'Internal server error for link %link. Method has been changed to GET.', array(
'%link' => $link->url,
), WATCHDOG_INFO, l(t('Broken links'), 'admin/reports/linkchecker'));
}
break;
default:
// Don't treat ignored response codes as errors.
if (in_array($response->code, $ignore_response_codes)) {
db_update('linkchecker_link')
->condition('lid', $link->lid)
->fields(array(
'code' => $response->code,
'error' => $response->error,
'fail_count' => 0,
'last_checked' => time(),
))
->execute();
// linkchecker_watchdog_log('linkchecker', 'Unhandled link error %link has been found.', array('%link' => $link->url), WATCHDOG_ERROR, l(t('Broken links'), 'admin/reports/linkchecker'));
}
else {
db_update('linkchecker_link')
->condition('lid', $link->lid)
->fields(array(
'code' => $response->code,
'error' => $response->error,
'fail_count' => 0,
'last_checked' => time(),
))
->expression('fail_count', 'fail_count + 1')
->execute();
// linkchecker_watchdog_log('linkchecker', 'Unhandled link error %link has been found.', array('%link' => $link->url), WATCHDOG_ERROR, l(t('Broken links'), 'admin/reports/linkchecker'));
}
}
// Free Memory.
$response = new stdClass();
}