function _linkchecker_check_links in Link checker 6.2
Same name and namespace in other branches
- 7 linkchecker.module \_linkchecker_check_links()
Run link checks.
1 call to _linkchecker_check_links()
- linkchecker_cron in ./
linkchecker.module - Implementation of hook_cron().
1 string reference to '_linkchecker_check_links'
- linkchecker_cron in ./
linkchecker.module - Implementation of hook_cron().
File
- ./
linkchecker.module, line 375 - This module periodically check links in given node types, blocks, cck fields, etc.
Code
function _linkchecker_check_links() {
// Get max_execution_time from configuration, override 0 with 240 seconds.
$max_execution_time = ini_get('max_execution_time') == 0 ? 240 : ini_get('max_execution_time');
// Make sure we have enough time to validate all of the links.
linkchecker_set_time_limit($max_execution_time);
// Make sure this is the only process trying to run this function.
if (!lock_acquire(__FUNCTION__, $max_execution_time)) {
watchdog('linkchecker', 'Attempted to re-run link checks while they are already running.', array(), WATCHDOG_WARNING);
return FALSE;
}
$has_httprl = module_exists('httprl') && variable_get('linkchecker_check_library', 'core') == 'httprl';
// Do not confuse admins with a setting of maximum checkable links per cron
// run and guess that 2 links can be checked per second with 1 thread, what is
// nevertheless uncommon. The max_execution_time can be used to calculate
// a useful value that is higher, but not totally out of scope and limits the
// query result set to a reasonable size.
$linkchecker_check_connections_max = variable_get('linkchecker_check_connections_max', 8);
$check_links_max_per_cron_run = $has_httprl ? $linkchecker_check_connections_max * $max_execution_time : $max_execution_time;
$linkchecker_check_links_interval = variable_get('linkchecker_check_links_interval', 2419200);
$linkchecker_check_useragent = variable_get('linkchecker_check_useragent', 'Drupal (+http://drupal.org/)');
// Connection limit can be overridden via settings.php. Two connections is the
// limit defined in RFC http://www.ietf.org/rfc/rfc2616.txt. Modern browsers
// are typically using 6-8 connections and no more. Never use more and keep
// in mind that you can overload other people servers.
$linkchecker_check_domain_connections = variable_get('linkchecker_check_domain_connections', 2);
// Get URLs for checking.
$links = db_query_range("SELECT * FROM {linkchecker_links} WHERE last_checked < %d AND status = %d ORDER BY last_checked, lid ASC", time() - $linkchecker_check_links_interval, 1, 0, $check_links_max_per_cron_run);
// D6 database API does not provide a generic way to return the number of rows
// in a result set and $links->num_rows only works with 'mysqli'. The only
// workaround is to run the statement again with a COUNT query.
$links_remaining = db_result(db_query_range("SELECT COUNT(lid) AS num_rows FROM {linkchecker_links} WHERE last_checked < %d AND status = %d ORDER BY last_checked, lid ASC", time() - $linkchecker_check_links_interval, 1, 0, $check_links_max_per_cron_run));
while ($link = db_fetch_object($links)) {
$headers = array();
$headers['User-Agent'] = $linkchecker_check_useragent;
$uri = @parse_url($link->url);
// URL contains a fragment.
if (in_array($link->method, array(
'HEAD',
'GET',
)) && !empty($uri['fragment'])) {
// We need the full content and not only the HEAD.
$link->method = 'GET';
// Request text content only (like Firefox/Chrome).
$headers['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8';
}
elseif ($link->method == 'GET') {
// Range: Only request the first 1024 bytes from remote server. This is
// required to prevent timeouts on URLs that are large downloads.
$headers['Range'] = 'bytes=0-1024';
}
// Add in the headers.
$options = array(
'headers' => $headers,
'method' => $link->method,
'max_redirects' => 0,
);
if ($has_httprl) {
// Define the callback and add the $link object to it.
// Notes:
// - 'global_timeout' does not require a timer_read('page'), as this job
// runs in a new process, independent of cron.
$options += array(
'global_connections' => $linkchecker_check_connections_max,
'global_timeout' => $max_execution_time - 30,
'domain_connections' => $linkchecker_check_domain_connections,
'callback' => array(
array(
'function' => '_linkchecker_status_handling',
),
$link,
),
);
// Queue up the requests.
httprl_request($link->url, $options);
$links_remaining--;
// After all links are queued, run the url checks.
if ($links_remaining == 0) {
httprl_send_request();
}
}
else {
// Drupal core
$response = drupal_http_request($link->url, $options['headers'], $options['method'], NULL, $options['max_redirects']);
// Add 'redirect_code' property to core response object for consistency
// with HTTPRL object.
if ($response->code == 301 && !isset($response->redirect_code)) {
$response->redirect_code = $response->code;
}
// Add 'uri' property to core response object for 'fragment' check and
// consistency with HTTPRL object.
$response->uri = $uri;
_linkchecker_status_handling($response, $link);
if (timer_read('page') / 1000 > $max_execution_time / 2) {
break;
// Stop once we have used over half of the maximum execution time.
}
}
}
// Release the lock.
lock_release(__FUNCTION__);
watchdog('linkchecker', 'Link checks completed.', array(), WATCHDOG_INFO);
// Peak memory usage is only available in PHP >= 5.2.
if (version_compare(phpversion(), '5.2.0', '>=')) {
watchdog('linkchecker', 'Memory usage: @memory_get_usage, Peak memory usage: @memory_get_peak_usage.', array(
'@memory_get_peak_usage' => format_size(memory_get_peak_usage()),
'@memory_get_usage' => format_size(memory_get_usage()),
), WATCHDOG_DEBUG);
}
else {
watchdog('linkchecker', 'Memory usage: @memory_get_usage.', array(
'@memory_get_usage' => format_size(memory_get_usage()),
), WATCHDOG_DEBUG);
}
return TRUE;
}