You are here

protected function LinkCheckerService::statusHandling in Link checker 8

Status code handling.

Parameters

\Psr\Http\Message\ResponseInterface $response: An object containing the HTTP request headers, response code, headers, data and redirect status.

\Drupal\linkchecker\LinkCheckerLinkInterface $link: The link.

1 call to LinkCheckerService::statusHandling()
LinkCheckerService::check in src/LinkCheckerService.php
Check the link.

File

src/LinkCheckerService.php, line 194

Class

LinkCheckerService
Class LinkCheckerService.

Namespace

Drupal\linkchecker

Code

protected function statusHandling(ResponseInterface $response, LinkCheckerLinkInterface $link) {
  $ignoreResponseCodes = preg_split('/(\\r\\n?|\\n)/', $this->linkcheckerSetting
    ->get('error.ignore_response_codes'));
  $error = $response
    ->getReasonPhrase();
  if (!isset($error)) {
    $error = '';
  }

  // Destination anchors in HTML documents may be specified either by:
  // - the A element (naming it with the name attribute)
  // - or by any other element (naming with the id attribute)
  // - and must not contain a key/value pair as these type of hash fragments
  //   are typically used by AJAX applications to prevent additionally HTTP
  //   requests e.g. https://www.example.com/ajax.html#key1=value1&key2=value2
  // - and must not contain '/' or ',' as this are not normal anchors.
  // - and '#top' is a reserved fragment that must not exist in a page.
  // See https://www.w3.org/TR/html401/struct/links.html
  $statusCode = $response
    ->getStatusCode();
  if ($statusCode == 200 && !empty($response
    ->getBody()) && !empty($response
    ->getHeader('Content-Type')) && $response
    ->hasHeader('Fragment') && preg_match('/=|\\/|,/', $response
    ->getHeaderLine('Fragment')) == FALSE && $response
    ->getHeader('Fragment') !== '#top' && in_array($response
    ->getHeaderLine('Content-Type'), [
    'text/html',
    'application/xhtml+xml',
    'application/xml',
  ]) && !preg_match('/(\\s[^>]*(name|id)(\\s+)?=(\\s+)?["\'])(' . preg_quote(urldecode($response
    ->getHeaderLine('Fragment')), '/') . ')(["\'][^>]*>)/i', $response
    ->getBody())) {

    // Override status code 200 with status code 404 so it can be handled with
    // default status code 404 logic and custom error text.
    $statusCode = 404;
    $error = 'URL fragment identifier not found in content';
  }
  switch ($statusCode) {
    case 301:
      $link
        ->setStatusCode($statusCode);
      $link
        ->setErrorMessage($error);
      $link
        ->setFailCount($link
        ->getFailCount() + 1);
      $link
        ->setLastCheckTime($this->time
        ->getCurrentTime());
      $link
        ->save();
      linkchecker_watchdog_log('linkchecker', 'Link %link has changed and needs to be updated.', [
        '%link' => $link
          ->getUrl(),
      ], RfcLogLevel::NOTICE, $this
        ->getReportLink());
      break;
    case 404:
      $link
        ->setStatusCode($statusCode);
      $link
        ->setErrorMessage($error);
      $link
        ->setFailCount($link
        ->getFailCount() + 1);
      $link
        ->setLastCheckTime($this->time
        ->getCurrentTime());
      $link
        ->save();
      linkchecker_watchdog_log('linkchecker', 'Broken link %link has been found.', [
        '%link' => $link
          ->getUrl(),
      ], RfcLogLevel::NOTICE, $this
        ->getReportLink());
      break;
    case 405:

      // - 405: Special error handling if method is not allowed. Switch link
      //   checking to GET method and try again.
      $link
        ->setRequestMethod('GET');
      $link
        ->setStatusCode($statusCode);
      $link
        ->setErrorMessage($error);
      $link
        ->setFailCount($link
        ->getFailCount() + 1);
      $link
        ->setLastCheckTime($this->time
        ->getCurrentTime());
      $link
        ->save();
      linkchecker_watchdog_log('linkchecker', 'Method HEAD is not allowed for link %link. Method has been changed to GET.', [
        '%link' => $link
          ->getUrl(),
      ], RfcLogLevel::NOTICE, $this
        ->getReportLink());
      break;
    case 500:

      // - 500: Like WGET, try with GET on "500 Internal server error".
      // - If GET also fails with status code 500, than the link is broken.
      if ($link
        ->getRequestMethod() == 'GET') {
        $link
          ->setStatusCode($statusCode);
        $link
          ->setErrorMessage($error);
        $link
          ->setFailCount($link
          ->getFailCount() + 1);
        $link
          ->setLastCheckTime($this->time
          ->getCurrentTime());
        $link
          ->save();
        linkchecker_watchdog_log('linkchecker', 'Broken link %link has been found.', [
          '%link' => $link
            ->getUrl(),
        ], RfcLogLevel::NOTICE, $this
          ->getReportLink());
      }
      else {
        $link
          ->setRequestMethod('GET');
        $link
          ->setStatusCode($statusCode);
        $link
          ->setErrorMessage($error);
        $link
          ->setFailCount($link
          ->getFailCount() + 1);
        $link
          ->setLastCheckTime($this->time
          ->getCurrentTime());
        $link
          ->save();
        linkchecker_watchdog_log('linkchecker', 'Internal server error for link %link. Method has been changed to GET.', [
          '%link' => $link
            ->getUrl(),
        ], RfcLogLevel::NOTICE, $this
          ->getReportLink());
      }
      break;
    default:

      // Don't treat ignored response codes as errors.
      if (in_array($statusCode, $ignoreResponseCodes)) {
        $link
          ->setStatusCode($statusCode);
        $link
          ->setErrorMessage($error);
        $link
          ->setFailCount(0);
        $link
          ->setLastCheckTime($this->time
          ->getCurrentTime());
        $link
          ->save();
      }
      else {
        $link
          ->setStatusCode($statusCode);
        $link
          ->setErrorMessage($error);
        $link
          ->setFailCount($link
          ->getFailCount() + 1);
        $link
          ->setLastCheckTime($this->time
          ->getCurrentTime());
        $link
          ->save();
        linkchecker_watchdog_log('linkchecker', 'Unhandled link error %link has been found.', [
          '%link' => $link
            ->getUrl(),
        ], RfcLogLevel::ERROR, $this
          ->getReportLink());
      }
  }
  $this
    ->updateSameLinks($link);
  foreach ($this->statusHandlerManager
    ->getDefinitions() as $definition) {
    if (in_array($statusCode, $definition['status_codes'])) {

      /** @var \Drupal\linkchecker\Plugin\LinkStatusHandlerInterface $handler */
      $handler = $this->statusHandlerManager
        ->createInstance($definition['id']);
      $handler
        ->queueItems($link, $response);
    }
  }
}