You are here

function boost_crawler_add_alias_to_table in Boost 6

Get URLs from url alias table

1 call to boost_crawler_add_alias_to_table()
boost_crawler_seed_tables in ./boost.module
Logic to get boost_crawler table ready.

File

./boost.module, line 6188
Provides static file caching for Drupal text output. Pages, Feeds, ect...

Code

function boost_crawler_add_alias_to_table() {

  // Insert batch of html URL's into boost_crawler table
  global $base_url, $language, $db_type;
  if (!variable_get('boost_crawl_url_alias', FALSE)) {
    return TRUE;
  }
  $count = BOOST_CRAWL_DB_IMPORT_SIZE;

  // Get maximum packet size for mysql
  if (stristr($db_type, 'pgsql')) {

    // Set Max Packet size to 16MB if using postgreSQL.
    $max_packet = 16777216;
  }
  else {

    // Get maximum packet size for mysql
    $result = @db_query("SHOW VARIABLES WHERE Variable_name = 'max_allowed_packet'");
    if ($result) {
      $result = db_fetch_array($result);
      $max_packet = (int) $result['Value'];
    }
    else {

      // default to 1/2 MB
      $max_packet = 524288;
    }

    // Get bulk insert buffer size
    $result = @db_query("SHOW VARIABLES WHERE Variable_name = 'bulk_insert_buffer_size'");
    if ($result) {
      $result = db_fetch_array($result);
      $insert_buffer_size = (int) $insert_buffer_size['Value'];
    }
    else {

      // default to 1/2 MB
      $insert_buffer_size = 524288;
    }

    // Set max
    $max_packet = $max_packet > $insert_buffer_size ? $insert_buffer_size : $max_packet;

    // Make sure its over 128K
    $max_packet = $max_packet > 131072 ? $max_packet : 131072;
  }
  $max_chunk = $max_packet / 512;
  $chunks = 0;
  $loop_counter = 0;

  // Don't crawl user pages if anonymous can't access them
  $hit_users = db_result(db_query("SELECT * FROM {permission} AS p INNER JOIN {role} AS r USING (rid) WHERE (r.name = 'anonymous user' OR r.rid = 1) AND p.perm LIKE '%%access user profiles%%'"));
  if (!$hit_users) {
    $extra = "AND ua.src NOT LIKE 'user/%%'";
  }
  else {
    $extra = '';
  }
  $total = db_result(db_query("SELECT COUNT(*) FROM {url_alias} AS ua LEFT JOIN {node} AS n ON n.nid = CAST(substring(ua.src, 6) AS UNSIGNED) WHERE (n.status = 1 OR n.status IS NULL) {$extra}"));
  $loaded = variable_get('boost_crawler_loaded_count_alias', 0);
  if ($total > $loaded) {
    $list = db_query_range("SELECT ua.dst, ua.language FROM {url_alias} AS ua LEFT JOIN {node} AS n ON n.nid = CAST(substring(ua.src, 6) AS UNSIGNED) WHERE (n.status = 1 OR n.status IS NULL) {$extra}", $loaded, $count);
    $data = array();
    while ($row = db_fetch_array($list)) {
      if (empty($row['language']) || $language->language != $row['language'] && empty($language->prefix)) {
        $url = $base_url . '/' . $row['dst'];
      }
      else {
        $url = $base_url . '/' . $row['language'] . '/' . $row['dst'];
      }
      $md5 = md5($url);
      $data[$chunks][] = $url;
      $data[$chunks][] = $md5;
      $loop_counter++;
      if ($loop_counter > $max_chunk) {
        $chunks++;
        $loop_counter = 0;
      }
    }
    foreach ($data as $values) {
      boost_db_multi_insert('boost_crawler', array(
        'url' => "'%s'",
        'hash' => "'%s'",
      ), $values, FALSE);
    }
    variable_set('boost_crawler_loaded_count_alias', $loaded + $count);
    return FALSE;
  }
  else {
    return TRUE;
  }
}