You are here

function _linkchecker_status_handling in Link checker 7

Same name and namespace in other branches
  1. 5.2 linkchecker.module \_linkchecker_status_handling()
  2. 6.2 linkchecker.module \_linkchecker_status_handling()

Status code handling.

Parameters

object $response: An object containing the HTTP request headers, response code, headers, data and redirect status.

string $link: An object containing the url, lid and fail_count.

1 call to _linkchecker_status_handling()
_linkchecker_check_links in ./linkchecker.module
Run link checks.
1 string reference to '_linkchecker_status_handling'
_linkchecker_check_links in ./linkchecker.module
Run link checks.

File

./linkchecker.module, line 601
This module periodically check links in given node types, blocks etc.

Code

function _linkchecker_status_handling(&$response, $link) {
  $ignore_response_codes = preg_split('/(\\r\\n?|\\n)/', variable_get('linkchecker_ignore_response_codes', "200\n206\n302\n304\n401\n403"));

  // - Prevent E_ALL warnings in DB updates for non-existing $response->error.
  // - @todo drupal_http_request() may not provide an UTF8 encoded error message
  //   what results in a database UPDATE failure. For more information, see
  //   https://drupal.org/node/371495.
  //   Workaround: ISO-8859-1 as source encoding may be wrong, but WFM.
  if (!isset($response->error)) {
    $response->error = '';
  }
  if (!isset($response->status_message)) {
    $response->status_message = '';
  }
  $response->error = trim(drupal_convert_to_utf8($response->error, 'ISO-8859-1'));
  $response->status_message = trim(drupal_convert_to_utf8($response->status_message, 'ISO-8859-1'));

  // Destination anchors in HTML documents may be specified either by:
  // - the A element (naming it with the name attribute)
  // - or by any other element (naming with the id attribute)
  // - and must not contain a key/value pair as these type of hash fragments are
  //   typically used by AJAX applications to prevent additionally HTTP requests
  //   e.g. https://www.example.com/ajax.html#key1=value1&key2=value2
  // - and must not contain '/' or ',' as this are not normal anchors.
  // - and '#top' is a reserved fragment that must not exist in a page.
  // See https://www.w3.org/TR/html401/struct/links.html
  if ($response->code == 200 && !empty($response->data) && !empty($response->headers['content-type']) && !empty($response->uri['fragment']) && preg_match('/=|\\/|,/', $response->uri['fragment']) == FALSE && !in_array($response->uri['fragment'], array(
    '#top',
  )) && in_array($response->headers['content-type'], array(
    'text/html',
    'application/xhtml+xml',
    'application/xml',
  )) && !preg_match('/(\\s[^>]*(name|id)(\\s+)?=(\\s+)?["\'])(' . preg_quote(urldecode($response->uri['fragment']), '/') . ')(["\'][^>]*>)/i', $response->data)) {

    // Override status code 200 with status code 404 so it can be handled with
    // default status code 404 logic and custom error text.
    $response->code = 404;
    $response->status_message = $response->error = 'URL fragment identifier not found in content';
  }
  switch ($response->code) {
    case -4:

      // HTTPRL: httprl_send_request timed out.
      // Skip these and try them again next cron run.
      break;
    case -2:

    // HTTPRL: maximum allowed redirects exhausted.
    case 301:

      // Remote site send status code 301 and link needs an update.
      db_update('linkchecker_link')
        ->condition('lid', $link->lid)
        ->fields(array(
        'code' => $response->redirect_code,
        'error' => $response->status_message,
        'fail_count' => 0,
        'last_checked' => time(),
      ))
        ->expression('fail_count', 'fail_count + 1')
        ->execute();

      // A HTTP status code of 301 tells us an existing link have changed to
      // a new link. The remote site owner was so kind to provide us the new
      // link and if we trust this change we are able to replace the old link
      // with the new one without any hand work.
      $auto_repair_301 = variable_get('linkchecker_action_status_code_301', 0);
      if ($auto_repair_301 && $auto_repair_301 <= $link->fail_count + 1 && valid_url($response->redirect_url, TRUE)) {

        // Switch anonymous user to an admin.
        linkchecker_impersonate_user(user_load_by_name(variable_get('linkchecker_impersonate_user', '')));

        // NODES: Autorepair all nodes having this outdated link.
        $result = db_query('SELECT nid FROM {linkchecker_node} WHERE lid = :lid', array(
          ':lid' => $link->lid,
        ));
        foreach ($result as $row) {

          // Explicitly don't use node_load_multiple() or the module may run
          // into issues like https://drupal.org/node/1210606. With this logic
          // nodes can be updated until an out of memory occurs and further
          // updates will be made on the remaining nodes only.
          $node = node_load($row->nid);

          // Has the node object loaded successfully?
          if (is_object($node)) {
            $node_original = clone $node;
            $node = _linkchecker_replace_fields('node', $node->type, $node, $link->url, $response->redirect_url);
            if ($node_original != $node) {

              // Always use the default revision setting. For more information,
              // see node_object_prepare().
              $node_options = variable_get('node_options_' . $node->type, array(
                'status',
                'promote',
              ));
              $node->revision = in_array('revision', $node_options);

              // Generate a log message for the node_revisions table, visible on
              // the node's revisions tab.
              $node->log = t('Changed permanently moved link in %node from %src to %dst.', array(
                '%node' => url('node/' . $node->nid),
                '%src' => $link->url,
                '%dst' => $response->redirect_url,
              ));

              // Save changed node and update the node link list.
              node_save($node);
              linkchecker_watchdog_log('linkchecker', 'Changed permanently moved link in %node from %src to %dst.', array(
                '%node' => url('node/' . $node->nid),
                '%src' => $link->url,
                '%dst' => $response->redirect_url,
              ), WATCHDOG_INFO);
            }
            else {
              linkchecker_watchdog_log('linkchecker', 'Link update in node failed. Permanently moved link %src not found in node %node. Manual fix required.', array(
                '%node' => url('node/' . $row->nid),
                '%src' => $link->url,
              ), WATCHDOG_WARNING);
            }
          }
          else {
            linkchecker_watchdog_log('linkchecker', 'Loading node %node for update failed. Manual fix required.', array(
              '%node' => $row->nid,
            ), WATCHDOG_ERROR);
          }
        }

        // COMMENTS: Autorepair all comments having this outdated link.
        $result = db_query('SELECT cid FROM {linkchecker_comment} WHERE lid = :lid', array(
          ':lid' => $link->lid,
        ));
        foreach ($result as $row) {

          // Explicitly don't use comment_load_multiple() or the module may run
          // into issues like https://drupal.org/node/1210606. With this logic
          // comment can be updated until an out of memory occurs and further
          // updates will be made on the remaining comments only.
          $comment = comment_load($row->cid);

          // Has the comment object loaded successfully?
          if (is_object($comment)) {
            $comment_original = clone $comment;

            // Replace links in subject.
            _linkchecker_link_replace($comment->subject, $link->url, $response->redirect_url);

            // Replace links in fields.
            $comment = _linkchecker_replace_fields('comment', $comment->node_type, $comment, $link->url, $response->redirect_url);

            // Save changed comment and update the comment link list.
            if ($comment_original != $comment) {
              comment_save($comment);
              linkchecker_watchdog_log('linkchecker', 'Changed permanently moved link in comment %comment from %src to %dst.', array(
                '%comment' => $comment->cid,
                '%src' => $link->url,
                '%dst' => $response->redirect_url,
              ), WATCHDOG_INFO);
            }
            else {
              linkchecker_watchdog_log('linkchecker', 'Link update in comment failed. Permanently moved link %src not found in comment %comment. Manual fix required.', array(
                '%comment' => $comment->cid,
                '%src' => $link->url,
              ), WATCHDOG_WARNING);
            }
          }
          else {
            linkchecker_watchdog_log('linkchecker', 'Loading comment %comment for update failed. Manual fix required.', array(
              '%comment' => $comment->cid,
            ), WATCHDOG_ERROR);
          }
        }

        // CUSTOM BLOCKS: Autorepair all custom blocks having this outdated
        // link.
        $result = db_query('SELECT bid FROM {linkchecker_block_custom} WHERE lid = :lid', array(
          ':lid' => $link->lid,
        ));
        foreach ($result as $row) {
          $block_custom = linkchecker_block_custom_block_get($row->bid);

          // Has the custom block object loaded successfully?
          if (is_object($block_custom)) {
            $block_custom_original = clone $block_custom;

            // Now replace the outdated link with the permanently moved one in
            // all custom block fields.
            _linkchecker_link_replace($block_custom->info, $link->url, $response->redirect_url);
            _linkchecker_link_replace($block_custom->body['value'], $link->url, $response->redirect_url);
            if ($block_custom_original != $block_custom) {

              // Save changed block and update the block link list.
              block_custom_block_save((array) $block_custom, $block_custom->delta);

              // There is no hook that fires on block_custom_block_save(),
              // therefore do link extraction programmatically.
              _linkchecker_add_block_custom_links($block_custom, $block_custom->delta);
              linkchecker_watchdog_log('linkchecker', 'Changed permanently moved link in custom block %bid from %src to %dst.', array(
                '%bid' => $block_custom->delta,
                '%src' => $link->url,
                '%dst' => $response->redirect_url,
              ), WATCHDOG_INFO);
            }
            else {
              linkchecker_watchdog_log('linkchecker', 'Link update in block failed. Permanently moved link %src not found in block %bid. Manual fix required.', array(
                '%bid' => $block_custom->delta,
                '%src' => $link->url,
              ), WATCHDOG_WARNING);
            }
          }
          else {
            linkchecker_watchdog_log('linkchecker', 'Loading block %bid for update failed. Manual fix required.', array(
              '%bid' => $block_custom->delta,
            ), WATCHDOG_ERROR);
          }
        }

        // Revert user back to anonymous.
        linkchecker_revert_user();
      }
      else {
        linkchecker_watchdog_log('linkchecker', 'Link %link has changed and needs to be updated.', array(
          '%link' => $link->url,
        ), WATCHDOG_NOTICE, l(t('Broken links'), 'admin/reports/linkchecker'));
      }
      break;
    case 404:
      db_update('linkchecker_link')
        ->condition('lid', $link->lid)
        ->fields(array(
        'code' => $response->code,
        'error' => $response->error,
        'fail_count' => 0,
        'last_checked' => time(),
      ))
        ->expression('fail_count', 'fail_count + 1')
        ->execute();
      linkchecker_watchdog_log('linkchecker', 'Broken link %link has been found.', array(
        '%link' => $link->url,
      ), WATCHDOG_NOTICE, l(t('Broken links'), 'admin/reports/linkchecker'));

      // If unpublishing limit is reached, unpublish all nodes having this link.
      $linkchecker_action_status_code_404 = variable_get('linkchecker_action_status_code_404', 0);
      if ($linkchecker_action_status_code_404 && $linkchecker_action_status_code_404 <= $link->fail_count + 1) {

        // Switch anonymous user to an admin.
        linkchecker_impersonate_user(user_load_by_name(variable_get('linkchecker_impersonate_user', '')));
        _linkchecker_unpublish_nodes($link->lid);
        linkchecker_revert_user();
      }
      break;
    case 405:

      // - 405: Special error handling if method is not allowed. Switch link
      //   checking to GET method and try again.
      db_update('linkchecker_link')
        ->condition('lid', $link->lid)
        ->fields(array(
        'method' => 'GET',
        'code' => $response->code,
        'error' => $response->error,
        'fail_count' => 0,
        'last_checked' => time(),
      ))
        ->expression('fail_count', 'fail_count + 1')
        ->execute();
      linkchecker_watchdog_log('linkchecker', 'Method HEAD is not allowed for link %link. Method has been changed to GET.', array(
        '%link' => $link->url,
      ), WATCHDOG_INFO, l(t('Broken links'), 'admin/reports/linkchecker'));
      break;
    case 500:

      // - 500: Like WGET, try with GET on "500 Internal server error".
      // - If GET also fails with status code 500, than the link is broken.
      if ($link->method == 'GET' && $response->code == 500) {
        db_update('linkchecker_link')
          ->condition('lid', $link->lid)
          ->fields(array(
          'code' => $response->code,
          'error' => $response->error,
          'fail_count' => 0,
          'last_checked' => time(),
        ))
          ->expression('fail_count', 'fail_count + 1')
          ->execute();
        linkchecker_watchdog_log('linkchecker', 'Broken link %link has been found.', array(
          '%link' => $link->url,
        ), WATCHDOG_NOTICE, l(t('Broken links'), 'admin/reports/linkchecker'));
      }
      else {
        db_update('linkchecker_link')
          ->condition('lid', $link->lid)
          ->fields(array(
          'method' => 'GET',
          'code' => $response->code,
          'error' => $response->error,
          'fail_count' => 0,
          'last_checked' => time(),
        ))
          ->expression('fail_count', 'fail_count + 1')
          ->execute();
        linkchecker_watchdog_log('linkchecker', 'Internal server error for link %link. Method has been changed to GET.', array(
          '%link' => $link->url,
        ), WATCHDOG_INFO, l(t('Broken links'), 'admin/reports/linkchecker'));
      }
      break;
    default:

      // Don't treat ignored response codes as errors.
      if (in_array($response->code, $ignore_response_codes)) {
        db_update('linkchecker_link')
          ->condition('lid', $link->lid)
          ->fields(array(
          'code' => $response->code,
          'error' => $response->error,
          'fail_count' => 0,
          'last_checked' => time(),
        ))
          ->execute();

        // linkchecker_watchdog_log('linkchecker', 'Unhandled link error %link has been found.', array('%link' => $link->url), WATCHDOG_ERROR, l(t('Broken links'), 'admin/reports/linkchecker'));
      }
      else {
        db_update('linkchecker_link')
          ->condition('lid', $link->lid)
          ->fields(array(
          'code' => $response->code,
          'error' => $response->error,
          'fail_count' => 0,
          'last_checked' => time(),
        ))
          ->expression('fail_count', 'fail_count + 1')
          ->execute();

        // linkchecker_watchdog_log('linkchecker', 'Unhandled link error %link has been found.', array('%link' => $link->url), WATCHDOG_ERROR, l(t('Broken links'), 'admin/reports/linkchecker'));
      }
  }

  // Free Memory.
  $response = new stdClass();
}