xmlsitemap.generate.inc in XML sitemap 7.2
Same filename and directory in other branches
Sitemap generation and rebuilding functions for the xmlsitemap module.
File
xmlsitemap.generate.incView source
<?php
/**
* @file
* Sitemap generation and rebuilding functions for the xmlsitemap module.
*
* @ingroup xmlsitemap
*/
/**
* Given an internal Drupal path, return the alias for the path.
*
* This is similar to drupal_get_path_alias(), but designed to fetch all alises
* at once so that only one database query is executed instead of several or
* possibly thousands during sitemap generation.
*
* @param string $path
* An internal Drupal path.
* @param string $language
* A language code to use when looking up the paths.
*/
function xmlsitemap_get_path_alias($path, $language) {
static $aliases;
static $last_language;
if (!isset($aliases)) {
$aliases[LANGUAGE_NONE] = db_query("SELECT source, alias FROM {url_alias} WHERE language = :language ORDER BY pid", array(
':language' => LANGUAGE_NONE,
))
->fetchAllKeyed();
}
if ($language != LANGUAGE_NONE && $last_language != $language) {
unset($aliases[$last_language]);
$aliases[$language] = db_query("SELECT source, alias FROM {url_alias} WHERE language = :language ORDER BY pid", array(
':language' => $language,
))
->fetchAllKeyed();
$last_language = $language;
}
// We need to pass our path through hook_url_outbound_alter(). This fixes
// clean URLs not working when they don't exist in the {url_alias} table and
// are created with something like subpathauto.
$normalized_path = $path;
// hook_url_outbound_alter() expects defaults in url() options.
$options = array(
'fragment' => '',
'query' => array(),
'absolute' => FALSE,
'alias' => FALSE,
'prefix' => '',
'external' => FALSE,
);
if ($language != LANGUAGE_NONE && isset($aliases[$language][$path])) {
$normalized_path = $aliases[$language][$path];
$options['alias'] = TRUE;
}
elseif (isset($aliases[LANGUAGE_NONE][$path])) {
$normalized_path = $aliases[LANGUAGE_NONE][$path];
$options['alias'] = TRUE;
}
$original_path = $normalized_path;
drupal_alter('url_outbound', $normalized_path, $options, $original_path);
return $normalized_path;
}
/**
* Perform operations before rebuilding the sitemap.
*/
function _xmlsitemap_regenerate_before() {
// Attempt to increase the memory limit.
_xmlsitemap_set_memory_limit();
if (variable_get('xmlsitemap_developer_mode', 0)) {
watchdog('xmlsitemap', 'Starting XML sitemap generation. Memory usage: @memory-peak.', array(
'@memory-peak' => format_size(memory_get_peak_usage(TRUE)),
), WATCHDOG_DEBUG);
}
}
/**
* Get Memory Usage.
*/
function _xmlsitemap_get_memory_usage($start = FALSE) {
static $memory_start;
$current = memory_get_peak_usage(TRUE);
if (!isset($memory_start) || $start) {
$memory_start = $current;
}
return $current - $memory_start;
}
/**
* Calculate the optimal PHP memory limit for sitemap generation.
*
* This function just makes a guess. It does not take into account
* the currently loaded modules.
*/
function _xmlsitemap_get_optimal_memory_limit() {
$optimal_limit =& drupal_static(__FUNCTION__);
if (!isset($optimal_limit)) {
// Set the base memory amount from the provided core constant.
$optimal_limit = parse_size(DRUPAL_MINIMUM_PHP_MEMORY_LIMIT);
// Add memory based on the chunk size.
$optimal_limit += xmlsitemap_get_chunk_size() * 500;
// Add memory for storing the url aliases.
if (variable_get('xmlsitemap_prefetch_aliases', 1)) {
$aliases = db_query("SELECT COUNT(pid) FROM {url_alias}")
->fetchField();
$optimal_limit += $aliases * 250;
}
}
return $optimal_limit;
}
/**
* Calculate the optimal memory level for sitemap generation.
*
* @param string $new_limit
* An optional PHP memory limit in bytes. If not provided, the value of
* _xmlsitemap_get_optimal_memory_limit() will be used.
*/
function _xmlsitemap_set_memory_limit($new_limit = NULL) {
$current_limit = @ini_get('memory_limit');
if ($current_limit && $current_limit != -1) {
if (!is_null($new_limit)) {
$new_limit = _xmlsitemap_get_optimal_memory_limit();
}
if (parse_size($current_limit) < $new_limit) {
return @ini_set('memory_limit', $new_limit);
}
}
}
/**
* Generate one page (chunk) of the sitemap.
*
* @param object $sitemap
* An unserialized data array for an XML sitemap.
* @param string $page
* An integer of the specific page of the sitemap to generate.
*/
function xmlsitemap_generate_page(stdClass $sitemap, $page) {
try {
$writer = new XMLSitemapWriter($sitemap, $page);
$writer
->startDocument();
$writer
->generateXML();
$writer
->endDocument();
} catch (Exception $e) {
watchdog_exception('xmlsitemap', $e);
throw $e;
}
return $writer
->getSitemapElementCount();
}
/**
* Generate chunk.
*/
function xmlsitemap_generate_chunk(stdClass $sitemap, XMLSitemapWriter $writer, $chunk) {
global $base_url;
$output_elements = drupal_map_assoc(variable_get('xmlsitemap_output_elements', array(
'lastmod',
'changefreq',
'priority',
)));
$lastmod_format = variable_get('xmlsitemap_lastmod_format', XMLSITEMAP_LASTMOD_MEDIUM);
$url_options = $sitemap->uri['options'];
$url_options += array(
'absolute' => TRUE,
'base_url' => variable_get('xmlsitemap_base_url', $base_url),
'language' => language_default(),
'alias' => variable_get('xmlsitemap_prefetch_aliases', TRUE),
);
$last_url = '';
$link_count = 0;
$query = db_select('xmlsitemap', 'x');
$query
->fields('x', array(
'id',
'type',
'subtype',
'loc',
'lastmod',
'changefreq',
'changecount',
'priority',
'language',
'access',
'status',
));
$query
->condition('x.access', 1);
$query
->condition('x.status', 1);
$query
->orderBy('x.language', 'DESC');
$query
->orderBy('x.loc');
$query
->addTag('xmlsitemap_generate');
$query
->addMetaData('sitemap', $sitemap);
$offset = max($chunk - 1, 0) * xmlsitemap_get_chunk_size();
$limit = xmlsitemap_get_chunk_size();
$query
->range($offset, $limit);
$links = $query
->execute();
while ($link = $links
->fetchAssoc()) {
$link['language'] = $link['language'] != LANGUAGE_NONE ? xmlsitemap_language_load($link['language']) : $url_options['language'];
$parsed_url = drupal_parse_url($link['loc']);
// Skip nodes which are 301 redirected.
if (variable_get('xmlsitemap_redirect')) {
$relative_redirect = redirect_fetch_rids_by_path($link['loc'], $link['language']->language, TRUE);
$alias_redirect = redirect_fetch_rids_by_path(ltrim(url($link['loc']), '/'), $link['language']->language, TRUE);
// If node contains a 301 redirect we skip it.
if (!empty($relative_redirect) || !empty($alias_redirect)) {
continue;
}
}
// Remove query or fragment.
$link['loc'] = $parsed_url['path'];
if ($url_options['alias']) {
$link['loc'] = xmlsitemap_get_path_alias($link['loc'], $link['language']->language);
}
$link_options = array(
'language' => $link['language'],
'xmlsitemap_link' => $link,
'xmlsitemap_sitemap' => $sitemap,
'query' => $parsed_url['query'],
'fragment' => $parsed_url['fragment'],
);
// @todo Add a separate hook_xmlsitemap_link_url_alter() here?
$link_url = url($link['loc'], $link_options + $url_options);
// Skip this link if it was a duplicate of the last one.
// @todo Figure out a way to do this before generation so we can report
// back to the user about this.
if ($link_url == $last_url) {
continue;
}
else {
$last_url = $link_url;
// Keep track of the total number of links written.
$link_count++;
}
$element = array();
$element['loc'] = urldecode($link_url);
if ($link['lastmod']) {
if (!empty($output_elements['lastmod'])) {
$element['lastmod'] = gmdate($lastmod_format, $link['lastmod']);
}
// If the link has a lastmod value, update the changefreq so that links
// with a short changefreq but updated two years ago show decay.
// We use abs() here just incase items were created on this same cron run
// because lastmod would be greater than REQUEST_TIME.
$link['changefreq'] = (abs(REQUEST_TIME - $link['lastmod']) + $link['changefreq']) / 2;
}
if (!empty($output_elements['changefreq']) && $link['changefreq']) {
$element['changefreq'] = xmlsitemap_get_changefreq($link['changefreq']);
}
if (!empty($output_elements['priority']) && isset($link['priority']) && $link['priority'] != 0.5) {
// Don't output the priority value for links that have 0.5 priority. This
// is the default 'assumed' value if priority is not included as per the
// sitemaps.org specification.
$element['priority'] = number_format($link['priority'], 1);
}
// @todo Should this be moved to XMLSitemapWritier::writeSitemapElement()?
drupal_alter('xmlsitemap_element', $element, $link, $sitemap);
if (!empty($element)) {
$writer
->writeSitemapElement('url', $element);
}
}
return $link_count;
}
/**
* Generate the index sitemap.
*
* @param object $sitemap
* An unserialized data array for an XML sitemap.
*/
function xmlsitemap_generate_index(stdClass $sitemap) {
try {
$writer = new XMLSitemapIndexWriter($sitemap);
$writer
->startDocument();
$writer
->generateXML();
$writer
->endDocument();
} catch (Exception $e) {
watchdog_exception('xmlsitemap', $e);
throw $e;
}
return $writer
->getSitemapElementCount();
}
/**
* BATCH OPERATIONS -----------------------------------------------------------.
*
* Batch information callback for regenerating the sitemap files.
*
* @param array $smids
* An optional array of XML sitemap IDs. If not provided, it will load all
* existing XML sitemaps.
*/
function xmlsitemap_regenerate_batch(array $smids = array()) {
if (empty($smids)) {
$smids = db_query("SELECT smid FROM {xmlsitemap_sitemap}")
->fetchCol();
}
$batch = array(
'operations' => array(),
'finished' => 'xmlsitemap_regenerate_batch_finished',
'title' => t('Regenerating Sitemap'),
'file' => drupal_get_path('module', 'xmlsitemap') . '/xmlsitemap.generate.inc',
);
// Set the regenerate flag in case something fails during file generation.
$batch['operations'][] = array(
'xmlsitemap_batch_variable_set',
array(
array(
'xmlsitemap_regenerate_needed' => TRUE,
),
),
);
// @todo Get rid of this batch operation.
$batch['operations'][] = array(
'_xmlsitemap_regenerate_before',
array(),
);
// Generate all the sitemap pages for each context.
foreach ($smids as $smid) {
$batch['operations'][] = array(
'xmlsitemap_regenerate_batch_generate',
array(
$smid,
),
);
$batch['operations'][] = array(
'xmlsitemap_regenerate_batch_generate_index',
array(
$smid,
),
);
}
// Clear the regeneration flag.
$batch['operations'][] = array(
'xmlsitemap_batch_variable_set',
array(
array(
'xmlsitemap_regenerate_needed' => FALSE,
),
),
);
return $batch;
}
/**
* Batch callback; generate all pages of a sitemap.
*/
function xmlsitemap_regenerate_batch_generate($smid, array &$context) {
if (!isset($context['sandbox']['sitemap'])) {
$context['sandbox']['sitemap'] = xmlsitemap_sitemap_load($smid);
$context['sandbox']['sitemap']->chunks = 1;
$context['sandbox']['sitemap']->links = 0;
$context['sandbox']['max'] = XMLSITEMAP_MAX_SITEMAP_LINKS;
// Clear the cache directory for this sitemap before generating any files.
xmlsitemap_check_directory($context['sandbox']['sitemap']);
xmlsitemap_clear_directory($context['sandbox']['sitemap']);
}
$sitemap =& $context['sandbox']['sitemap'];
$links = xmlsitemap_generate_page($sitemap, $sitemap->chunks);
$context['message'] = t('Now generating %sitemap-url.', array(
'%sitemap-url' => url('sitemap.xml', $sitemap->uri['options'] + array(
'query' => array(
'page' => $sitemap->chunks,
),
)),
));
if ($links) {
$sitemap->links += $links;
$sitemap->chunks++;
}
else {
// Cleanup the 'extra' empty file.
$file = xmlsitemap_sitemap_get_file($sitemap, $sitemap->chunks);
if (file_exists($file) && $sitemap->chunks > 1) {
file_unmanaged_delete($file);
}
$sitemap->chunks--;
// Save the updated chunks and links values.
$context['sandbox']['max'] = $sitemap->chunks;
$sitemap->updated = REQUEST_TIME;
xmlsitemap_sitemap_get_max_filesize($sitemap);
xmlsitemap_sitemap_save($sitemap);
}
if ($sitemap->chunks != $context['sandbox']['max']) {
$context['finished'] = $sitemap->chunks / $context['sandbox']['max'];
}
}
/**
* Batch callback; generate the index page of a sitemap.
*/
function xmlsitemap_regenerate_batch_generate_index($smid, array &$context) {
$sitemap = xmlsitemap_sitemap_load($smid);
if ($sitemap->chunks > 1) {
xmlsitemap_generate_index($sitemap);
$context['message'] = t('Now generating sitemap index %sitemap-url.', array(
'%sitemap-url' => url('sitemap.xml', $sitemap->uri['options']),
));
}
}
/**
* Batch callback; sitemap regeneration finished.
*/
function xmlsitemap_regenerate_batch_finished($success, $results, $operations, $elapsed) {
if ($success && !variable_get('xmlsitemap_regenerate_needed', FALSE)) {
variable_set('xmlsitemap_generated_last', REQUEST_TIME);
// drupal_set_message(t('The sitemaps were regenerated.'));
// Show a watchdog message that the sitemap was regenerated.
watchdog('xmlsitemap', 'Finished XML sitemap generation in @elapsed. Memory usage: @memory-peak.', array(
'@elapsed' => $elapsed,
'@memory-peak' => format_size(memory_get_peak_usage(TRUE)),
), WATCHDOG_NOTICE);
module_invoke_all('xmlsitemap_regenerate_finished');
}
else {
drupal_set_message(t('The sitemaps were not successfully regenerated.'), 'error');
}
}
/**
* Batch information callback for rebuilding the sitemap data.
*/
function xmlsitemap_rebuild_batch(array $entities, $save_custom = FALSE) {
$batch = array(
'operations' => array(),
'finished' => 'xmlsitemap_rebuild_batch_finished',
'title' => t('Rebuilding Sitemap'),
'file' => drupal_get_path('module', 'xmlsitemap') . '/xmlsitemap.generate.inc',
);
// Set the rebuild flag in case something fails during the rebuild.
$batch['operations'][] = array(
'xmlsitemap_batch_variable_set',
array(
array(
'xmlsitemap_rebuild_needed' => TRUE,
),
),
);
// Purge any links first.
$batch['operations'][] = array(
'xmlsitemap_rebuild_batch_clear',
array(
$entities,
(bool) $save_custom,
),
);
// Fetch all the sitemap links and save them to the {xmlsitemap} table.
foreach ($entities as $entity) {
$info = xmlsitemap_get_link_info($entity);
$batch['operations'][] = array(
$info['xmlsitemap']['rebuild callback'],
array(
$entity,
),
);
}
// Clear the rebuild flag.
$batch['operations'][] = array(
'xmlsitemap_batch_variable_set',
array(
array(
'xmlsitemap_rebuild_needed' => FALSE,
),
),
);
// Add the regeneration batch.
$regenerate_batch = xmlsitemap_regenerate_batch();
$batch['operations'] = array_merge($batch['operations'], $regenerate_batch['operations']);
return $batch;
}
/**
* Batch callback; set an array of variables and their values.
*/
function xmlsitemap_batch_variable_set(array $variables) {
foreach ($variables as $variable => $value) {
variable_set($variable, $value);
}
}
/**
* Batch callback; clear sitemap links for entites.
*/
function xmlsitemap_rebuild_batch_clear(array $entities, $save_custom, &$context) {
if (!empty($entities)) {
xmlsitemap_rebuild_clear($entities, $save_custom);
}
$context['message'] = t('Purging links.');
}
/**
* Batch callback; fetch and add the sitemap links for a specific entity.
*/
function xmlsitemap_rebuild_batch_fetch($entity, &$context) {
if (!isset($context['sandbox']['info'])) {
$context['sandbox']['info'] = xmlsitemap_get_link_info($entity);
$context['sandbox']['progress'] = 0;
$context['sandbox']['last_id'] = 0;
}
$info = $context['sandbox']['info'];
$query = new EntityFieldQuery();
$query
->entityCondition('entity_type', $entity);
$query
->entityCondition('entity_id', $context['sandbox']['last_id'], '>');
$query
->addTag('xmlsitemap_link_bundle_access');
$query
->addTag('xmlsitemap_rebuild');
$query
->addMetaData('entity', $entity);
$query
->addMetaData('entity_info', $info);
if ($types = xmlsitemap_get_link_type_enabled_bundles($entity)) {
$query
->entityCondition('bundle', $types, 'IN');
}
else {
// If no enabled bundle types, skip everything else.
return;
}
if (!isset($context['sandbox']['max'])) {
$count_query = clone $query;
$count_query
->count();
$context['sandbox']['max'] = $count_query
->execute();
if (!$context['sandbox']['max']) {
// If there are no items to process, skip everything else.
return;
}
}
// PostgreSQL cannot have the ORDERED BY in the count query.
$query
->entityOrderBy('entity_id');
$limit = 20;
$query
->range(0, $limit);
$result = $query
->execute();
$ids = array_keys($result[$entity]);
$info['xmlsitemap']['process callback']($ids);
$context['sandbox']['last_id'] = end($ids);
$context['sandbox']['progress'] += count($ids);
$context['message'] = t('Now processing %entity @last_id (@progress of @count).', array(
'%entity' => $entity,
'@last_id' => $context['sandbox']['last_id'],
'@progress' => $context['sandbox']['progress'],
'@count' => $context['sandbox']['max'],
));
if ($context['sandbox']['progress'] >= $context['sandbox']['max']) {
$context['finished'] = 1;
}
else {
$context['finished'] = $context['sandbox']['progress'] / $context['sandbox']['max'];
}
}
/**
* Batch callback; sitemap rebuild finished.
*/
function xmlsitemap_rebuild_batch_finished($success, $results, $operations, $elapsed) {
if ($success && !variable_get('xmlsitemap_rebuild_needed', FALSE)) {
drupal_set_message(t('The sitemap links were rebuilt.'));
}
else {
drupal_set_message(t('The sitemap links were not successfully rebuilt.'), 'error');
}
}
/**
* Get Rebuildable link types.
*/
function xmlsitemap_get_rebuildable_link_types() {
$rebuild_types = array();
$entities = xmlsitemap_get_link_info();
foreach ($entities as $entity => $info) {
if (empty($info['xmlsitemap']['rebuild callback'])) {
// If the entity is missing a rebuild callback, skip.
continue;
}
if (!empty($info['entity keys']['bundle']) && !xmlsitemap_get_link_type_enabled_bundles($entity)) {
// If the entity has bundles, but no enabled bundles, skip since
// rebuilding wouldn't get any links.
continue;
}
else {
$rebuild_types[] = $entity;
}
}
return $rebuild_types;
}
/**
* Clear all sitemap links for given entity types.
*
* @param array $types
* An array of link types.
* @param bool $save_custom
* A boolean if links with status or priority overridden should not be
* removed (and hence overridden values not lost).
*
* @return int
* The number of deleted links.
*/
function xmlsitemap_rebuild_clear(array $types, $save_custom) {
// Let other modules respond to the rebuild clearing.
module_invoke_all('xmlsitemap_rebuild_clear', $types, $save_custom);
$query = db_delete('xmlsitemap');
$query
->condition('type', $types);
// If we want to save the custom data, make sure to exclude any links
// that are not using default inclusion or priority.
if ($save_custom) {
$query
->condition('status_override', 0);
$query
->condition('priority_override', 0);
}
return $query
->execute();
}
Functions
Name | Description |
---|---|
xmlsitemap_batch_variable_set | Batch callback; set an array of variables and their values. |
xmlsitemap_generate_chunk | Generate chunk. |
xmlsitemap_generate_index | Generate the index sitemap. |
xmlsitemap_generate_page | Generate one page (chunk) of the sitemap. |
xmlsitemap_get_path_alias | Given an internal Drupal path, return the alias for the path. |
xmlsitemap_get_rebuildable_link_types | Get Rebuildable link types. |
xmlsitemap_rebuild_batch | Batch information callback for rebuilding the sitemap data. |
xmlsitemap_rebuild_batch_clear | Batch callback; clear sitemap links for entites. |
xmlsitemap_rebuild_batch_fetch | Batch callback; fetch and add the sitemap links for a specific entity. |
xmlsitemap_rebuild_batch_finished | Batch callback; sitemap rebuild finished. |
xmlsitemap_rebuild_clear | Clear all sitemap links for given entity types. |
xmlsitemap_regenerate_batch | BATCH OPERATIONS -----------------------------------------------------------. |
xmlsitemap_regenerate_batch_finished | Batch callback; sitemap regeneration finished. |
xmlsitemap_regenerate_batch_generate | Batch callback; generate all pages of a sitemap. |
xmlsitemap_regenerate_batch_generate_index | Batch callback; generate the index page of a sitemap. |
_xmlsitemap_get_memory_usage | Get Memory Usage. |
_xmlsitemap_get_optimal_memory_limit | Calculate the optimal PHP memory limit for sitemap generation. |
_xmlsitemap_regenerate_before | Perform operations before rebuilding the sitemap. |
_xmlsitemap_set_memory_limit | Calculate the optimal memory level for sitemap generation. |