function boost_crawler_add_alias_to_table in Boost 6
Get URLs from url alias table
1 call to boost_crawler_add_alias_to_table()
- boost_crawler_seed_tables in ./
boost.module - Logic to get boost_crawler table ready.
File
- ./
boost.module, line 6188 - Provides static file caching for Drupal text output. Pages, Feeds, ect...
Code
function boost_crawler_add_alias_to_table() {
// Insert batch of html URL's into boost_crawler table
global $base_url, $language, $db_type;
if (!variable_get('boost_crawl_url_alias', FALSE)) {
return TRUE;
}
$count = BOOST_CRAWL_DB_IMPORT_SIZE;
// Get maximum packet size for mysql
if (stristr($db_type, 'pgsql')) {
// Set Max Packet size to 16MB if using postgreSQL.
$max_packet = 16777216;
}
else {
// Get maximum packet size for mysql
$result = @db_query("SHOW VARIABLES WHERE Variable_name = 'max_allowed_packet'");
if ($result) {
$result = db_fetch_array($result);
$max_packet = (int) $result['Value'];
}
else {
// default to 1/2 MB
$max_packet = 524288;
}
// Get bulk insert buffer size
$result = @db_query("SHOW VARIABLES WHERE Variable_name = 'bulk_insert_buffer_size'");
if ($result) {
$result = db_fetch_array($result);
$insert_buffer_size = (int) $insert_buffer_size['Value'];
}
else {
// default to 1/2 MB
$insert_buffer_size = 524288;
}
// Set max
$max_packet = $max_packet > $insert_buffer_size ? $insert_buffer_size : $max_packet;
// Make sure its over 128K
$max_packet = $max_packet > 131072 ? $max_packet : 131072;
}
$max_chunk = $max_packet / 512;
$chunks = 0;
$loop_counter = 0;
// Don't crawl user pages if anonymous can't access them
$hit_users = db_result(db_query("SELECT * FROM {permission} AS p INNER JOIN {role} AS r USING (rid) WHERE (r.name = 'anonymous user' OR r.rid = 1) AND p.perm LIKE '%%access user profiles%%'"));
if (!$hit_users) {
$extra = "AND ua.src NOT LIKE 'user/%%'";
}
else {
$extra = '';
}
$total = db_result(db_query("SELECT COUNT(*) FROM {url_alias} AS ua LEFT JOIN {node} AS n ON n.nid = CAST(substring(ua.src, 6) AS UNSIGNED) WHERE (n.status = 1 OR n.status IS NULL) {$extra}"));
$loaded = variable_get('boost_crawler_loaded_count_alias', 0);
if ($total > $loaded) {
$list = db_query_range("SELECT ua.dst, ua.language FROM {url_alias} AS ua LEFT JOIN {node} AS n ON n.nid = CAST(substring(ua.src, 6) AS UNSIGNED) WHERE (n.status = 1 OR n.status IS NULL) {$extra}", $loaded, $count);
$data = array();
while ($row = db_fetch_array($list)) {
if (empty($row['language']) || $language->language != $row['language'] && empty($language->prefix)) {
$url = $base_url . '/' . $row['dst'];
}
else {
$url = $base_url . '/' . $row['language'] . '/' . $row['dst'];
}
$md5 = md5($url);
$data[$chunks][] = $url;
$data[$chunks][] = $md5;
$loop_counter++;
if ($loop_counter > $max_chunk) {
$chunks++;
$loop_counter = 0;
}
}
foreach ($data as $values) {
boost_db_multi_insert('boost_crawler', array(
'url' => "'%s'",
'hash' => "'%s'",
), $values, FALSE);
}
variable_set('boost_crawler_loaded_count_alias', $loaded + $count);
return FALSE;
}
else {
return TRUE;
}
}