You are here

function _linkchecker_status_handling in Link checker 6.2

Same name and namespace in other branches
  1. 5.2 linkchecker.module \_linkchecker_status_handling()
  2. 7 linkchecker.module \_linkchecker_status_handling()

Status code handling.

Parameters

object $response: An object containing the HTTP request headers, response code, headers, data and redirect status.

string $link: An object containing the url, lid and fail_count.

1 call to _linkchecker_status_handling()
_linkchecker_check_links in ./linkchecker.module
Run link checks.
1 string reference to '_linkchecker_status_handling'
_linkchecker_check_links in ./linkchecker.module
Run link checks.

File

./linkchecker.module, line 507
This module periodically check links in given node types, blocks, cck fields, etc.

Code

function _linkchecker_status_handling(&$response, $link) {
  $ignore_response_codes = preg_split('/(\\r\\n?|\\n)/', variable_get('linkchecker_ignore_response_codes', "200\n206\n302\n304\n401\n403"));

  // - Prevent E_ALL warnings in DB updates for non-existing $response->error.
  // - @todo drupal_http_request() may not provide an UTF8 encoded error message
  //   what results in a database UPDATE failure. For more information, see
  //   http://drupal.org/node/371495.
  //   Workaround: ISO-8859-1 as source encoding may be wrong, but WFM.
  if (!isset($response->error)) {
    $response->error = '';
  }
  if (!isset($response->status_message)) {
    $response->status_message = '';
  }
  $response->error = trim(drupal_convert_to_utf8($response->error, 'ISO-8859-1'));
  $response->status_message = trim(drupal_convert_to_utf8($response->status_message, 'ISO-8859-1'));

  // Destination anchors in HTML documents may be specified either by the A
  // element (naming it with the name attribute), or by any other element
  // (naming with the id attribute).
  // See http://www.w3.org/TR/html401/struct/links.html
  //
  // Notes:
  // - '#top' is a reserved fragment that must not exist in a page.
  if ($response->code == 200 && !empty($response->data) && !empty($response->headers['content-type']) && !empty($response->uri['fragment']) && !in_array($response->uri['fragment'], array(
    '#top',
  )) && in_array($response->headers['content-type'], array(
    'text/html',
    'application/xhtml+xml',
    'application/xml',
  )) && !preg_match('/(\\s[^>]*(name|id)(\\s+)?=(\\s+)?["\'])(' . preg_quote($response->uri['fragment'], '/') . ')(["\'][^>]*>)/i', $response->data)) {

    // Override status code 200 with status code 404 so it can be handled with
    // default status code 404 logic and custom error text.
    $response->code = 404;
    $response->status_message = $response->error = 'URL fragment identifier not found in content';
  }
  switch ($response->code) {
    case -4:

      // HTTPRL: httprl_send_request timed out.
      // Skip these and try them again next cron run.
      break;
    case -2:

    // HTTPRL: maximum allowed redirects exhausted.
    case 301:
      db_query("UPDATE {linkchecker_links} SET code = %d, error = '%s', fail_count = fail_count+1, last_checked = %d WHERE lid = %d", $response->redirect_code, $response->status_message, time(), $link->lid);

      // A HTTP status code of 301 tells us an existing link have changed to
      // a new link. The remote site owner was so kind to provide us the new
      // link and if we trust this change we are able to replace the old link
      // with the new one without any hand work.
      $auto_repair_301 = variable_get('linkchecker_action_status_code_301', 0);
      if ($auto_repair_301 && $auto_repair_301 <= $link->fail_count + 1 && valid_url($response->redirect_url, TRUE)) {

        // Switch anonymous user to an admin.
        linkchecker_impersonate_user(user_load(array(
          'name' => variable_get('linkchecker_impersonate_user', ''),
        )));

        // NODES: Autorepair all nodes having this outdated link.
        $res = db_query("SELECT * FROM {linkchecker_nodes} WHERE lid = %d", $link->lid);
        while ($row = db_fetch_object($res)) {
          $node = node_load(array(
            'nid' => $row->nid,
          ));

          // Has the node object loaded successfully?
          if (is_object($node)) {
            $node_original = drupal_clone($node);

            // Create array of node fields to scan (for e.g. $node->title, $node->links_weblink_url).
            $text_items = array();
            $text_items[] = 'title';
            $text_items[] = 'body';
            $text_items[] = 'teaser';

            // Update 'weblink' nodes from 'links' module package.
            if (module_exists('links_weblink') && $node->type == 'weblink' && isset($node->links_weblink_url)) {
              $text_items[] = 'links_weblink_url';
            }

            // Update 'weblinks' nodes from 'weblinks' module.
            if (module_exists('weblinks') && $node->type == 'weblinks' && isset($node->url)) {
              $text_items[] = 'url';
            }

            // Now replace the outdated link with the permanently moved one in all node fields.
            foreach ($text_items as $text_item) {
              _linkchecker_link_replace($node->{$text_item}, $link->url, $response->redirect_url);
            }

            // Search for CCK-fields of types 'link' and 'text'.
            if (module_exists('content')) {
              $fields = content_fields(NULL, $node->type);
              foreach ($fields as $field) {
                if (isset($node->{$field['field_name']})) {
                  if (module_exists('link') && $field['type'] == 'link') {
                    foreach ($node->{$field}['field_name'] as $delta => $item) {
                      _linkchecker_link_replace($node->{$field['field_name']}[$delta]['url'], $link->url, $response->redirect_url);
                    }
                  }
                  elseif (module_exists('text') && $field['type'] == 'text') {
                    foreach ($node->{$field}['field_name'] as $delta => $item) {
                      _linkchecker_link_replace($node->{$field['field_name']}[$delta]['value'], $link->url, $response->redirect_url);
                    }
                  }
                }
              }
            }
            if ($node_original != $node) {

              // Always use the default revision setting. See node_object_prepare().
              $node_options = variable_get('node_options_' . $node->type, array(
                'status',
                'promote',
              ));
              $node->revision = in_array('revision', $node_options);

              // Generate a log message for the node_revisions table, visible on the node's revisions tab.
              $node->log = t('Changed permanently moved link in %node from %src to %dst.', array(
                '%node' => url('node/' . $row->nid),
                '%src' => $link->url,
                '%dst' => $response->redirect_url,
              ));

              // Save changed node and update the node link list.
              node_save($node);
              watchdog('linkchecker', 'Changed permanently moved link in %node from %src to %dst.', array(
                '%node' => url('node/' . $row->nid),
                '%src' => $link->url,
                '%dst' => $response->redirect_url,
              ), WATCHDOG_INFO);
            }
            else {
              watchdog('linkchecker', 'Link update in node failed. Permanently moved link %src not found in node %node. Manual fix required.', array(
                '%node' => url('node/' . $row->nid),
                '%src' => $link->url,
              ), WATCHDOG_WARNING);
            }
          }
          else {
            watchdog('linkchecker', 'Loading node %node for update failed. Manual fix required.', array(
              '%node' => $row->nid,
            ), WATCHDOG_ERROR);
          }
        }

        // COMMENTS: Autorepair all comments having this outdated link.
        $res = db_query("SELECT * FROM {linkchecker_comments} WHERE lid = %d", $link->lid);
        while ($row = db_fetch_object($res)) {
          $comment = _linkchecker_comment_load($row->cid);

          // Has the custom comment array loaded successfully?
          if (!empty($comment)) {
            $comment_original = $comment;

            // Create array of comment fields to scan (for e.g. $comment->subject, $comment->comment).
            $text_items = array();
            $text_items[] = 'subject';
            $text_items[] = 'comment';

            // Now replace the outdated link with the permanently moved one in all comment fields.
            foreach ($text_items as $text_item) {
              _linkchecker_link_replace($comment[$text_item], $link->url, $response->redirect_url);
            }

            // Save changed comment and update the comment link list.
            $comment_diff = array_diff($comment, $comment_original);
            if (!empty($comment_diff)) {
              comment_save($comment);
              watchdog('linkchecker', 'Changed permanently moved link in comment %comment from %src to %dst.', array(
                '%comment' => $comment['cid'],
                '%src' => $link->url,
                '%dst' => $response->redirect_url,
              ), WATCHDOG_INFO);
            }
            else {
              watchdog('linkchecker', 'Link update in comment failed. Permanently moved link %src not found in comment %comment. Manual fix required.', array(
                '%comment' => $comment['cid'],
                '%src' => $link->url,
              ), WATCHDOG_WARNING);
            }
          }
          else {
            watchdog('linkchecker', 'Loading comment %comment for update failed. Manual fix required.', array(
              '%comment' => $comment['cid'],
            ), WATCHDOG_ERROR);
          }
        }

        // BOXES: Autorepair all boxes having this outdated link.
        $res = db_query("SELECT * FROM {linkchecker_boxes} WHERE lid = %d", $link->lid);
        while ($row = db_fetch_object($res)) {
          $box = block_box_get($row->bid);

          // Has the custom block array loaded successfully?
          if (!empty($box)) {
            $box_original = $box;

            // Create array of box fields to scan.
            $text_items = array();
            $text_items[] = 'info';
            $text_items[] = 'body';

            // Now replace the outdated link with the permanently moved one in all
            // box fields.
            foreach ($text_items as $text_item) {
              _linkchecker_link_replace($box[$text_item], $link->url, $response->redirect_url);
            }
            $box_diff = array_diff($box, $box_original);
            if (!empty($box_diff)) {

              // Save changed box and update the box link list.
              block_box_save($box, $row->bid);

              // There is no hook that fires on block_box_save(), therefore do link
              // extraction programmatically.
              _linkchecker_add_box_links($box, $row->bid);
              watchdog('linkchecker', 'Changed permanently moved link in box %bid from %src to %dst.', array(
                '%bid' => $row->bid,
                '%src' => $link->url,
                '%dst' => $response->redirect_url,
              ), WATCHDOG_INFO);
            }
            else {
              watchdog('linkchecker', 'Link update in block failed. Permanently moved link %src not found in block %bid. Manual fix required.', array(
                '%bid' => $row->bid,
                '%src' => $link->url,
              ), WATCHDOG_WARNING);
            }
          }
          else {
            watchdog('linkchecker', 'Loading block %bid for update failed. Manual fix required.', array(
              '%bid' => $row->bid,
            ), WATCHDOG_ERROR);
          }
        }

        // Revert user back to anonymous.
        linkchecker_revert_user();
      }
      else {
        watchdog('linkchecker', 'Link %link has changed and needs to be updated.', array(
          '%link' => $link->url,
        ), WATCHDOG_NOTICE, l(t('Broken links'), 'admin/reports/linkchecker'));
      }
      break;
    case 404:
      db_query("UPDATE {linkchecker_links} SET code = %d, error = '%s', fail_count = fail_count+1, last_checked = %d WHERE lid = %d", $response->code, $response->error, time(), $link->lid);
      watchdog('linkchecker', 'Broken link %link has been found.', array(
        '%link' => $link->url,
      ), WATCHDOG_NOTICE, l(t('Broken links'), 'admin/reports/linkchecker'));

      // If unpublishing limit is reached, unpublish all nodes having this link.
      $linkchecker_action_status_code_404 = variable_get('linkchecker_action_status_code_404', 0);
      if ($linkchecker_action_status_code_404 && $linkchecker_action_status_code_404 <= $link->fail_count + 1) {

        // Switch anonymous user to an admin.
        linkchecker_impersonate_user(user_load(array(
          'name' => variable_get('linkchecker_impersonate_user', ''),
        )));
        _linkchecker_unpublish_nodes($link->lid);
        linkchecker_revert_user();
      }
      break;
    case 405:

      // - 405: Special error handling if method is not allowed. Switch link
      //        checking to GET method and try again.
      db_query("UPDATE {linkchecker_links} SET method = '%s', code = %d, error = '%s', fail_count = fail_count+1, last_checked = %d WHERE lid = %d", 'GET', $response->code, $response->error, time(), $link->lid);
      watchdog('linkchecker', 'Method HEAD is not allowed for link %link. Method has been changed to GET.', array(
        '%link' => $link->url,
      ), WATCHDOG_INFO, l(t('Broken links'), 'admin/reports/linkchecker'));
      break;
    case 500:

      // - 500: Like WGET, try with GET on "500 Internal server error".
      // - If GET also fails with status code 500, than the link is broken.
      if ($link->method == 'GET' && $response->code == 500) {
        db_query("UPDATE {linkchecker_links} SET code = %d, error = '%s', fail_count = fail_count+1, last_checked = %d WHERE lid = %d", $response->code, $response->error, time(), $link->lid);
        watchdog('linkchecker', 'Broken link %link has been found.', array(
          '%link' => $link->url,
        ), WATCHDOG_NOTICE, l(t('Broken links'), 'admin/reports/linkchecker'));
      }
      else {
        db_query("UPDATE {linkchecker_links} SET method = '%s', code = %d, error = '%s', fail_count = fail_count+1, last_checked = %d WHERE lid = %d", 'GET', $response->code, $response->error, time(), $link->lid);
        watchdog('linkchecker', 'Internal server error for link %link. Method has been changed to GET.', array(
          '%link' => $link->url,
        ), WATCHDOG_INFO, l(t('Broken links'), 'admin/reports/linkchecker'));
      }
      break;
    default:

      // Don't treat ignored response codes as errors.
      if (in_array($response->code, $ignore_response_codes)) {
        db_query("UPDATE {linkchecker_links} SET code = %d, error = '%s', fail_count = %d, last_checked = %d WHERE lid = %d", $response->code, $response->error, 0, time(), $link->lid);

        //watchdog('linkchecker', 'Unhandled link error %link has been found.', array('%link' => $link->url), WATCHDOG_ERROR, l(t('Broken links'), 'admin/reports/linkchecker'));
      }
      else {
        db_query("UPDATE {linkchecker_links} SET code = %d, error = '%s', fail_count = fail_count+1, last_checked = %d WHERE lid = %d", $response->code, $response->error, time(), $link->lid);

        //watchdog('linkchecker', 'Unhandled link error %link has been found.', array('%link' => $link->url), WATCHDOG_ERROR, l(t('Broken links'), 'admin/reports/linkchecker'));
      }
  }

  // Free Memory.
  $response = new stdClass();
}