You are here

boost_crawler.module in Boost 7

Minimal crawler to regenerate the cache as pages are expired.

File

boost_crawler/boost_crawler.module
View source
<?php

/**
 * @file
 * Minimal crawler to regenerate the cache as pages are expired.
 */

/**
 * Implements hook_menu().
 */
function boost_crawler_menu() {
  $items = array();
  $items['admin/config/system/boost/crawler'] = array(
    'title' => 'Crawler',
    'description' => 'Configuration for the Boost crawler.',
    'page callback' => 'drupal_get_form',
    'page arguments' => array(
      'boost_crawler_admin_settings',
    ),
    'access arguments' => array(
      'administer site configuration',
    ),
    'type' => MENU_LOCAL_TASK,
    'file' => 'boost_crawler.admin.inc',
  );
  return $items;
}

/**
 * Implements hook_cron_queue_info().
 */
function boost_crawler_cron_queue_info() {
  $queues['boost_crawler'] = array(
    'worker callback' => 'boost_crawler_run',
    // Max run time to claim per cron run (in seconds).
    'time' => variable_get('boost_crawl_queue_seconds', 30),
  );
  return $queues;
}

/**
 * Implements hook_expire_cache (from the 'expire' module)
 */
function boost_crawler_expire_cache($urls) {
  global $base_root;
  if (variable_get('boost_crawl_on_cron', FALSE)) {
    foreach ($urls as $url) {

      // Put URLs in a queue for processing by cron
      // http://drupal.org/node/1074080#comment-4590150
      $queue = DrupalQueue::get('boost_crawler');

      // Check if the URL to be flushed matches our base URL.
      if (!empty($url) && strpos($base_root, $url) == 0) {
        $queue
          ->createItem($url);
      }
    }
  }
}

/**
 * Worker Callback for the boost_crawler cron queue.
 */
function boost_crawler_run($url) {

  // Not doing async requests in order not to kill the server
  // and also for the 'time' limit of the queue to make sense.
  $options = array(
    'headers' => array(
      'Pragma' => 'no-cache',
    ),
  );
  httprl_request($url, $options);
  $request = httprl_send_request();
  boost_log('Crawler fetched !url', array(
    '!url' => $url,
  ), WATCHDOG_DEBUG);
}

Functions

Namesort descending Description
boost_crawler_cron_queue_info Implements hook_cron_queue_info().
boost_crawler_expire_cache Implements hook_expire_cache (from the 'expire' module)
boost_crawler_menu Implements hook_menu().
boost_crawler_run Worker Callback for the boost_crawler cron queue.