protected function LinkCheckerService::statusHandling in Link checker 8
Status code handling.
Parameters
\Psr\Http\Message\ResponseInterface $response: An object containing the HTTP request headers, response code, headers, data and redirect status.
\Drupal\linkchecker\LinkCheckerLinkInterface $link: The link.
1 call to LinkCheckerService::statusHandling()
- LinkCheckerService::check in src/
LinkCheckerService.php - Check the link.
File
- src/
LinkCheckerService.php, line 194
Class
- LinkCheckerService
- Class LinkCheckerService.
Namespace
Drupal\linkcheckerCode
protected function statusHandling(ResponseInterface $response, LinkCheckerLinkInterface $link) {
$ignoreResponseCodes = preg_split('/(\\r\\n?|\\n)/', $this->linkcheckerSetting
->get('error.ignore_response_codes'));
$error = $response
->getReasonPhrase();
if (!isset($error)) {
$error = '';
}
// Destination anchors in HTML documents may be specified either by:
// - the A element (naming it with the name attribute)
// - or by any other element (naming with the id attribute)
// - and must not contain a key/value pair as these type of hash fragments
// are typically used by AJAX applications to prevent additionally HTTP
// requests e.g. https://www.example.com/ajax.html#key1=value1&key2=value2
// - and must not contain '/' or ',' as this are not normal anchors.
// - and '#top' is a reserved fragment that must not exist in a page.
// See https://www.w3.org/TR/html401/struct/links.html
$statusCode = $response
->getStatusCode();
if ($statusCode == 200 && !empty($response
->getBody()) && !empty($response
->getHeader('Content-Type')) && $response
->hasHeader('Fragment') && preg_match('/=|\\/|,/', $response
->getHeaderLine('Fragment')) == FALSE && $response
->getHeader('Fragment') !== '#top' && in_array($response
->getHeaderLine('Content-Type'), [
'text/html',
'application/xhtml+xml',
'application/xml',
]) && !preg_match('/(\\s[^>]*(name|id)(\\s+)?=(\\s+)?["\'])(' . preg_quote(urldecode($response
->getHeaderLine('Fragment')), '/') . ')(["\'][^>]*>)/i', $response
->getBody())) {
// Override status code 200 with status code 404 so it can be handled with
// default status code 404 logic and custom error text.
$statusCode = 404;
$error = 'URL fragment identifier not found in content';
}
switch ($statusCode) {
case 301:
$link
->setStatusCode($statusCode);
$link
->setErrorMessage($error);
$link
->setFailCount($link
->getFailCount() + 1);
$link
->setLastCheckTime($this->time
->getCurrentTime());
$link
->save();
linkchecker_watchdog_log('linkchecker', 'Link %link has changed and needs to be updated.', [
'%link' => $link
->getUrl(),
], RfcLogLevel::NOTICE, $this
->getReportLink());
break;
case 404:
$link
->setStatusCode($statusCode);
$link
->setErrorMessage($error);
$link
->setFailCount($link
->getFailCount() + 1);
$link
->setLastCheckTime($this->time
->getCurrentTime());
$link
->save();
linkchecker_watchdog_log('linkchecker', 'Broken link %link has been found.', [
'%link' => $link
->getUrl(),
], RfcLogLevel::NOTICE, $this
->getReportLink());
break;
case 405:
// - 405: Special error handling if method is not allowed. Switch link
// checking to GET method and try again.
$link
->setRequestMethod('GET');
$link
->setStatusCode($statusCode);
$link
->setErrorMessage($error);
$link
->setFailCount($link
->getFailCount() + 1);
$link
->setLastCheckTime($this->time
->getCurrentTime());
$link
->save();
linkchecker_watchdog_log('linkchecker', 'Method HEAD is not allowed for link %link. Method has been changed to GET.', [
'%link' => $link
->getUrl(),
], RfcLogLevel::NOTICE, $this
->getReportLink());
break;
case 500:
// - 500: Like WGET, try with GET on "500 Internal server error".
// - If GET also fails with status code 500, than the link is broken.
if ($link
->getRequestMethod() == 'GET') {
$link
->setStatusCode($statusCode);
$link
->setErrorMessage($error);
$link
->setFailCount($link
->getFailCount() + 1);
$link
->setLastCheckTime($this->time
->getCurrentTime());
$link
->save();
linkchecker_watchdog_log('linkchecker', 'Broken link %link has been found.', [
'%link' => $link
->getUrl(),
], RfcLogLevel::NOTICE, $this
->getReportLink());
}
else {
$link
->setRequestMethod('GET');
$link
->setStatusCode($statusCode);
$link
->setErrorMessage($error);
$link
->setFailCount($link
->getFailCount() + 1);
$link
->setLastCheckTime($this->time
->getCurrentTime());
$link
->save();
linkchecker_watchdog_log('linkchecker', 'Internal server error for link %link. Method has been changed to GET.', [
'%link' => $link
->getUrl(),
], RfcLogLevel::NOTICE, $this
->getReportLink());
}
break;
default:
// Don't treat ignored response codes as errors.
if (in_array($statusCode, $ignoreResponseCodes)) {
$link
->setStatusCode($statusCode);
$link
->setErrorMessage($error);
$link
->setFailCount(0);
$link
->setLastCheckTime($this->time
->getCurrentTime());
$link
->save();
}
else {
$link
->setStatusCode($statusCode);
$link
->setErrorMessage($error);
$link
->setFailCount($link
->getFailCount() + 1);
$link
->setLastCheckTime($this->time
->getCurrentTime());
$link
->save();
linkchecker_watchdog_log('linkchecker', 'Unhandled link error %link has been found.', [
'%link' => $link
->getUrl(),
], RfcLogLevel::ERROR, $this
->getReportLink());
}
}
$this
->updateSameLinks($link);
foreach ($this->statusHandlerManager
->getDefinitions() as $definition) {
if (in_array($statusCode, $definition['status_codes'])) {
/** @var \Drupal\linkchecker\Plugin\LinkStatusHandlerInterface $handler */
$handler = $this->statusHandlerManager
->createInstance($definition['id']);
$handler
->queueItems($link, $response);
}
}
}