You are here

function linkchecker_cron in Link checker 5

Same name and namespace in other branches
  1. 8 linkchecker.module \linkchecker_cron()
  2. 5.2 linkchecker.module \linkchecker_cron()
  3. 6.2 linkchecker.module \linkchecker_cron()
  4. 7 linkchecker.module \linkchecker_cron()

Implementation of hook_cron() Rebuild the table if necessary.

1 call to linkchecker_cron()
linkchecker_debug_run in ./linkchecker.module

File

./linkchecker.module, line 182
This module periodically check html links referenced by drupal nodes Developed and maintained by Marek Tichy, marek@ecn.cz

Code

function linkchecker_cron() {
  $res = db_query("SELECT * FROM `linkchecker_tasks` WHERE `taskid` = 0;");
  $foo = db_fetch_array($res);
  $finish = false;
  $debug_report = "Linkchecker run";
  $lastrun = $foo["update"] ? strtotime($foo["update"]) : 0;
  d_("Lastrun: {$lastrun}");

  // Check if linkchecking process has not exceeded it's maximum run time
  $maxtime = variable_get('linkchecker_maxtime', 30);
  if (lc_CheckRuntime($maxtime) == false) {
    d_("Finishing early");
    $finish = true;
  }
  else {

    // Check if the table needs rebuilding
    $rebuildnow = false;
    $rebuild = variable_get('linkchecker_rebuild', 1);
    if ($rebuild) {
      $res = db_query("SELECT * FROM `linkchecker_tasks` WHERE taskid=0;");
      $foo = db_fetch_array($res);
      if (empty($foo)) {

        // the 0 record is missing, add it
        $sql = "INSERT INTO `linkchecker_tasks` VALUES (0," . lc_now_to_int() . ",0,NOW());";
        db_query($sql);
        d_("Cannot find time record, adding it: {$sql}.");
        $foo["status"] = 0;
      }
      else {
        $age = lc_int_to_age($foo["nodeid"]);
        d_("The entire site check has been initiated less than {$age} seconds ago.");
        d_("Maxage is set to {$rebuild}.");
        if ($age > $rebuild) {
          $rebuildnow = true;
          d_("Should rebuild now");
        }
      }
    }
    if ($rebuildnow) {
      d_("Completely rebuilding the table");
      watchdog("linkchecker", t("Rebuilding the entire linkchecker database from scratch"));
      db_query("TRUNCATE TABLE linkchecker_tasks;");

      //would be nice to find a slightly less destructive way
      db_query("TRUNCATE TABLE linkchecker_results;");
      db_query("INSERT INTO `linkchecker_tasks` VALUES (0," . lc_now_to_int() . ",0,NOW());");
      $lastrun = 0;
      if ($rebuild == 1) {

        // next cron run then never
        variable_set('linkchecker_rebuild', 0);
      }
    }

    // Tasks table maintenance
    // - garbage collect
    $maxage = time() - 24 * 60 * 60 * variable_get('linkchecker_remove_after', 30);
    d_("SELECT * FROM `linkchecker_tasks` WHERE `update` < FROM_UNIXTIME({$maxage})");
    $res = db_query("SELECT * FROM `linkchecker_tasks` WHERE `update` < FROM_UNIXTIME({$maxage})");
    while ($task = db_fetch_array($res)) {
      d_("Remove aged task: {$task['taskid']}");
      lc_RemoveTask($task);
    }

    // Find and delete orphaned reports
    $res = db_query("SELECT DISTINCT taskid FROM `linkchecker_results`;");
    while ($task = db_fetch_array($res)) {
      $res2 = db_query("SELECT * FROM `linkchecker_tasks` WHERE `taskid` = " . $task["taskid"] . ";");
      if (!db_result($res2)) {
        d_("Remove orphaned report for task : " . $task["taskid"]);
        lc_RemoveTask($task);
      }
    }

    //  - add new tasks
    $res = db_query("SELECT * FROM `node` WHERE `changed` > '{$lastrun}'");
    $i = 0;
    while ($foo = db_fetch_array($res)) {
      $task = array(
        "nodeid" => $foo["nid"],
      );
      d_("Pushing node: {$foo['nid']}");
      lc_PushTask($task);
      $i++;
    }
    $debug_report .= ", loaded {$i} new or updated nodes";

    //  - load tasks one by one and process them
    $res = db_query("SELECT * FROM `linkchecker_tasks` WHERE `nodeid` > 0 AND `status` < 999 ORDER BY status ASC;");
    $i = 0;
    $j = 0;
    while ($task = db_fetch_array($res)) {
      $i++;
      d_("Processing task {$task['taskid']}");
      $report = array();
      if (lc_DoLinkChecks($task, $maxtime, $report)) {
        if (empty($report)) {
          lc_RemoveTask($task);
        }
        else {
          lc_AddReport($task, $report);
          $j++;
        }
      }
      else {
        $newstatus = $task["status"] + 1;
        $max_attempts = variable_get('linkchecker_give_up', 5);
        if ($newstatus > $max_attempts) {
          lc_RemoveTask($task);
          watchdog("linkchecker", "Linkchecker was unable to check node {$task['nodeid']} for {$max_attempts} times, giving up on it for good.");
        }
        else {
          db_query("UPDATE `linkchecker_tasks` SET `status` = " . ($task["status"] + 1) . ", `update` = NOW() WHERE taskid = " . $task["taskid"] . ";");
          watchdog("linkchecker", "Linkchecker was unable to check node " . $task["nodeid"] . " within the given maximum runtime.");
        }
        d_("Finishing early");
        $debug_report .= ", processed {$i}";
        $debug_report .= ", problems found in {$j} nodes, finishing early (not enough time to process all)";
        $finish = true;
        break;
      }
    }
    db_query("UPDATE `linkchecker_tasks` SET `status` = 0, `update` = NOW() WHERE taskid=0;");
    if (!$finish) {
      d_("Finishing properly");
      $debug_report .= ", processed {$i}";
      $debug_report .= ", problems found in {$j} nodes";
    }
    watchdog("linkchecker", $debug_report);
  }
}