View source
<?php
namespace Drupal\linkchecker;
use Drupal\Component\Datetime\TimeInterface;
use Drupal\Core\Config\ConfigFactory;
use Drupal\Core\Entity\EntityTypeManagerInterface;
use Drupal\Core\Link;
use Drupal\Core\Logger\RfcLogLevel;
use Drupal\Core\Queue\QueueFactory;
use Drupal\Core\StringTranslation\StringTranslationTrait;
use Drupal\Core\Url;
use Drupal\linkchecker\Plugin\LinkStatusHandlerManager;
use GuzzleHttp\Client;
use Psr\Http\Message\ResponseInterface;
use GuzzleHttp\Exception\RequestException;
class LinkCheckerService {
use StringTranslationTrait;
protected $entityTypeManager;
protected $linkcheckerSetting;
protected $httpClient;
protected $reportLink;
protected $time;
protected $queue;
protected $statusHandlerManager;
public function __construct(EntityTypeManagerInterface $entityTypeManager, ConfigFactory $config, Client $httpClient, TimeInterface $time, QueueFactory $queueFactory, LinkStatusHandlerManager $statusHandlerManager) {
$this->entityTypeManager = $entityTypeManager;
$this->linkcheckerSetting = $config
->get('linkchecker.settings');
$this->httpClient = $httpClient;
$this->time = $time;
$this->queue = $queueFactory
->get('linkchecker_check');
$this->statusHandlerManager = $statusHandlerManager;
}
public function queueLinks($rebuild = FALSE) {
if ($rebuild) {
$this->queue
->deleteQueue();
}
if (!empty($this->queue
->numberOfItems())) {
return $this->queue
->numberOfItems();
}
$checkInterval = $this->linkcheckerSetting
->get('check.interval');
$linkIds = $this->entityTypeManager
->getStorage('linkcheckerlink')
->getAggregateQuery()
->groupBy('urlhash')
->aggregate('lid', 'MIN')
->condition('last_check', $this->time
->getRequestTime() - $checkInterval, '<=')
->execute();
$this->queue
->createQueue();
if (!empty($linkIds)) {
$linkIds = array_column($linkIds, 'lid_min');
$maxConnections = $this->linkcheckerSetting
->get('check.connections_max');
$linkIds = array_chunk($linkIds, $maxConnections);
}
else {
$linkIds = [];
}
foreach ($linkIds as $ids) {
$this->queue
->createItem($ids);
}
return $this->queue
->numberOfItems();
}
public function check(LinkCheckerLinkInterface $link) {
$userAgent = $this->linkcheckerSetting
->get('check.useragent');
$headers = [];
$headers['User-Agent'] = $userAgent;
$uri = @parse_url($link
->getUrl());
if (in_array($link
->getRequestMethod(), [
'HEAD',
'GET',
]) && !empty($uri['fragment'])) {
$link
->setRequestMethod('GET');
$headers['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8';
}
elseif ($link
->getRequestMethod() == 'GET') {
$headers['Range'] = 'bytes=0-1024';
}
$options = [
'headers' => $headers,
'max_redirects' => 0,
'http_errors' => FALSE,
'allow_redirects' => FALSE,
'synchronous' => FALSE,
];
return $this->httpClient
->requestAsync($link
->getRequestMethod(), $link
->getUrl(), $options)
->then(function (ResponseInterface $response) use ($link, $uri) {
if (!empty($uri['fragment'])) {
$response = $response
->withHeader('Fragment', $uri['fragment']);
}
$this
->statusHandling($response, $link);
}, function (RequestException $e) use ($link) {
$this
->exceptionHandling($e, $link);
});
}
protected function statusHandling(ResponseInterface $response, LinkCheckerLinkInterface $link) {
$ignoreResponseCodes = preg_split('/(\\r\\n?|\\n)/', $this->linkcheckerSetting
->get('error.ignore_response_codes'));
$error = $response
->getReasonPhrase();
if (!isset($error)) {
$error = '';
}
$statusCode = $response
->getStatusCode();
if ($statusCode == 200 && !empty($response
->getBody()) && !empty($response
->getHeader('Content-Type')) && $response
->hasHeader('Fragment') && preg_match('/=|\\/|,/', $response
->getHeaderLine('Fragment')) == FALSE && $response
->getHeader('Fragment') !== '#top' && in_array($response
->getHeaderLine('Content-Type'), [
'text/html',
'application/xhtml+xml',
'application/xml',
]) && !preg_match('/(\\s[^>]*(name|id)(\\s+)?=(\\s+)?["\'])(' . preg_quote(urldecode($response
->getHeaderLine('Fragment')), '/') . ')(["\'][^>]*>)/i', $response
->getBody())) {
$statusCode = 404;
$error = 'URL fragment identifier not found in content';
}
switch ($statusCode) {
case 301:
$link
->setStatusCode($statusCode);
$link
->setErrorMessage($error);
$link
->setFailCount($link
->getFailCount() + 1);
$link
->setLastCheckTime($this->time
->getCurrentTime());
$link
->save();
linkchecker_watchdog_log('linkchecker', 'Link %link has changed and needs to be updated.', [
'%link' => $link
->getUrl(),
], RfcLogLevel::NOTICE, $this
->getReportLink());
break;
case 404:
$link
->setStatusCode($statusCode);
$link
->setErrorMessage($error);
$link
->setFailCount($link
->getFailCount() + 1);
$link
->setLastCheckTime($this->time
->getCurrentTime());
$link
->save();
linkchecker_watchdog_log('linkchecker', 'Broken link %link has been found.', [
'%link' => $link
->getUrl(),
], RfcLogLevel::NOTICE, $this
->getReportLink());
break;
case 405:
$link
->setRequestMethod('GET');
$link
->setStatusCode($statusCode);
$link
->setErrorMessage($error);
$link
->setFailCount($link
->getFailCount() + 1);
$link
->setLastCheckTime($this->time
->getCurrentTime());
$link
->save();
linkchecker_watchdog_log('linkchecker', 'Method HEAD is not allowed for link %link. Method has been changed to GET.', [
'%link' => $link
->getUrl(),
], RfcLogLevel::NOTICE, $this
->getReportLink());
break;
case 500:
if ($link
->getRequestMethod() == 'GET') {
$link
->setStatusCode($statusCode);
$link
->setErrorMessage($error);
$link
->setFailCount($link
->getFailCount() + 1);
$link
->setLastCheckTime($this->time
->getCurrentTime());
$link
->save();
linkchecker_watchdog_log('linkchecker', 'Broken link %link has been found.', [
'%link' => $link
->getUrl(),
], RfcLogLevel::NOTICE, $this
->getReportLink());
}
else {
$link
->setRequestMethod('GET');
$link
->setStatusCode($statusCode);
$link
->setErrorMessage($error);
$link
->setFailCount($link
->getFailCount() + 1);
$link
->setLastCheckTime($this->time
->getCurrentTime());
$link
->save();
linkchecker_watchdog_log('linkchecker', 'Internal server error for link %link. Method has been changed to GET.', [
'%link' => $link
->getUrl(),
], RfcLogLevel::NOTICE, $this
->getReportLink());
}
break;
default:
if (in_array($statusCode, $ignoreResponseCodes)) {
$link
->setStatusCode($statusCode);
$link
->setErrorMessage($error);
$link
->setFailCount(0);
$link
->setLastCheckTime($this->time
->getCurrentTime());
$link
->save();
}
else {
$link
->setStatusCode($statusCode);
$link
->setErrorMessage($error);
$link
->setFailCount($link
->getFailCount() + 1);
$link
->setLastCheckTime($this->time
->getCurrentTime());
$link
->save();
linkchecker_watchdog_log('linkchecker', 'Unhandled link error %link has been found.', [
'%link' => $link
->getUrl(),
], RfcLogLevel::ERROR, $this
->getReportLink());
}
}
$this
->updateSameLinks($link);
foreach ($this->statusHandlerManager
->getDefinitions() as $definition) {
if (in_array($statusCode, $definition['status_codes'])) {
$handler = $this->statusHandlerManager
->createInstance($definition['id']);
$handler
->queueItems($link, $response);
}
}
}
protected function exceptionHandling(RequestException $e, LinkCheckerLinkInterface $link) {
$link
->setStatusCode('502');
$link
->setErrorMessage($e
->getMessage());
$link
->setFailCount($link
->getFailCount() + 1);
$link
->setLastCheckTime($this->time
->getCurrentTime());
$link
->save();
linkchecker_watchdog_log('linkchecker', 'Unhandled link error %link has been found: : %message.', [
'%link' => $link
->getUrl(),
'%message' => $e
->getMessage(),
], RfcLogLevel::ERROR, $this
->getReportLink());
$this
->updateSameLinks($link);
}
protected function getReportLink() {
if (!isset($this->reportLink)) {
$this->reportLink = Link::fromTextAndUrl($this
->t('Broken links'), Url::fromUserInput('/admin/reports/linkchecker'));
}
return $this->reportLink;
}
protected function updateSameLinks(LinkCheckerLinkInterface $link) {
$storage = $this->entityTypeManager
->getStorage($link
->getEntityTypeId());
$query = $storage
->getQuery();
$query
->condition('urlhash', $link
->getHash());
$query
->condition('lid', $link
->id(), '!=');
$ids = $query
->execute();
foreach ($ids as $id) {
$linkToUpdate = $storage
->load($id);
$linkToUpdate
->setRequestMethod($link
->getRequestMethod());
$linkToUpdate
->setStatusCode($link
->getStatusCode());
$linkToUpdate
->setErrorMessage($link
->getErrorMessage());
$linkToUpdate
->setFailCount($link
->getFailCount());
$linkToUpdate
->setLastCheckTime($link
->getLastCheckTime());
$linkToUpdate
->save();
}
}
}