apachesolr_stats.module in Apache Solr Statistics 6.3
Same filename and directory in other branches
Keeps and reports statistics about Apache Solr usage and performance.
File
apachesolr_stats.moduleView source
<?php
/**
* @file
* Keeps and reports statistics about Apache Solr usage and performance.
*/
/**
* Implementation of hook_menu().
*/
function apachesolr_stats_menu() {
return array(
'admin/settings/apachesolr/stats' => array(
'title' => 'Statistics',
'description' => 'Apache Solr Statistics settings to measure usage and performance.',
'page callback' => 'drupal_get_form',
'page arguments' => array(
'apachesolr_stats_admin',
),
'access arguments' => array(
'administer search',
),
'type' => MENU_LOCAL_TASK,
),
'admin/reports/apachesolr_stats' => array(
'title' => 'Apache Solr Search Page Statistics',
'description' => 'Report of Apache Solr Page usage and performance.',
'page callback' => 'apachesolr_stats_report',
'page arguments' => array(),
'access arguments' => array(
'access site reports',
),
'type' => MENU_NORMAL_ITEM,
),
'admin/reports/apachesolr_stats/%apachesolr_search_page' => array(
'title' => 'Apache Solr Search Page Statistics',
'page callback' => 'apachesolr_stats_report',
'page arguments' => array(
3,
),
'access arguments' => array(
'access site reports',
),
'type' => MENU_NORMAL_ITEM,
),
);
}
/**
* Build the settings form.
*/
function apachesolr_stats_admin() {
$form = array();
$form['tip'] = array(
'#type' => 'markup',
'#value' => t('You can also visit the <a href="@report-url">reports page</a> and <a href="@blocks-url">block administration page</a> for the enabled search pages.', array(
'@report-url' => url('admin/reports/apachesolr_stats'),
'@blocks-url' => url('admin/build/block'),
)),
);
$search_pages = apachesolr_search_load_all_search_pages();
$options = array();
foreach ($search_pages as $search_page) {
$options[$search_page['page_id']] = $search_page['label'] . ' (' . $search_page['search_path'] . ')';
}
$form['apachesolr_stats_enabled'] = array(
'#type' => 'checkboxes',
'#title' => t('Enable search logging for these search pages'),
'#default_value' => variable_get('apachesolr_stats_enabled', array()),
'#options' => $options,
'#description' => t('Log information about all queries launched via the Apache Solr Search Integration module. Disabling a page here will also disable the associated block.'),
);
$periods = drupal_map_assoc(array(
3600,
10800,
21600,
32400,
43200,
86400,
172800,
259200,
604800,
1209600,
2419200,
4838400,
9676800,
), 'format_interval');
$form['apachesolr_stats_flush_log_timer'] = array(
'#type' => 'select',
'#title' => t('Discard query logs older than'),
'#default_value' => variable_get('apachesolr_stats_flush_log_timer', 259200),
'#options' => $periods,
'#description' => t('Older query log entries will be automatically discarded. (Requires a correctly configured <a href="@cron">cron maintenance task</a>.)', array(
'@cron' => url('admin/reports/status'),
)),
);
// Blacklist settings.
$form['access'] = array(
'#type' => 'fieldset',
'#title' => t('Log blacklist'),
'#description' => t('Note: Changing this does not alter existing logged queries.'),
);
$form['access']['apachesolr_stats_ignore_ip_list'] = array(
'#type' => 'textarea',
'#title' => t('IP addresses that will not be logged'),
'#default_value' => variable_get('apachesolr_stats_ignore_ip_list', ''),
'#description' => t('Enter IP addresses (e.g.: 192.168.1.2), one per line. You can match entire subnets using a partial IP address ending with a period (e.g.: 192.168.)'),
);
$form['access']['apachesolr_stats_ignore_role_list'] = array(
'#type' => 'checkboxes',
'#title' => t('User roles that will not be logged'),
'#options' => user_roles(),
'#default_value' => variable_get('apachesolr_stats_ignore_role_list', array()),
'#description' => t('Check all roles which should not be logged.'),
);
return system_settings_form($form);
}
/**
* Implementation of hook_apachesolr_query_alter().
*
* Adds debugQuery parameter to Solr call that returns processing time, etc.
*/
function apachesolr_stats_apachesolr_query_alter($query) {
if (variable_get('apachesolr_stats_enabled', array())) {
// Add the debug query argument.
// See: http://wiki.apache.org/solr/CommonQueryParameters#head-f45a9396425956a4db8d6478ed6029adfb7b0858
$query
->replaceParam('debugQuery', 'true');
}
}
/**
* Implementation of hook_exit().
*
* This is the spot where actual logging takes place.
*/
function apachesolr_stats_exit() {
$enabled_pages = variable_get('apachesolr_stats_enabled', array());
if (!$enabled_pages) {
return;
}
// Apparently there can be cases where some modules aren't loaded.
if (!function_exists('apachesolr_has_searched')) {
return;
}
// Log only for current applicable search page.
$search_page = apachesolr_stats_get_search_page_by_path($_GET['q']);
// If no search page matches this path, return.
if (!$search_page) {
return;
}
// Continue if current search page is enabled.
$page_id = $search_page['page_id'];
if (!isset($enabled_pages[$page_id]) || isset($enabled_pages[$page_id]) && !$enabled_pages[$page_id]) {
return;
}
// Continue if current page request has issued a search.
$env_id = $search_page['env_id'];
if (!apachesolr_has_searched($env_id)) {
return;
}
// Ignore certain IPs
$ignore_list = variable_get('apachesolr_stats_ignore_ip_list', '');
if ($ignore_list) {
$ips_to_ignore = preg_split('/[\\s]+/', $ignore_list);
$request_ip_address = ip_address();
foreach ($ips_to_ignore as $ip) {
if ($ip != "" && strpos($request_ip_address, $ip) === 0) {
return;
}
}
}
// Ignore certain roles
global $user;
$roles_ignore_list = variable_get('apachesolr_stats_ignore_role_list', array());
$test = array_intersect(array_keys($user->roles), array_values($roles_ignore_list));
if (count($test) > 0) {
return;
}
$query = apachesolr_current_query($env_id);
$response = apachesolr_static_response_cache($query
->getSearcher());
$filters = apachesolr_stats_get_active_facets($query
->getSearcher());
$solrsort = $query
->getSolrSort();
$keywords = $query
->getParam('q');
$params = $query
->getParams();
$num_suggestions = 0;
// If spellchecking is enabled for this search page...
if ($search_page['settings']['apachesolr_search_spellcheck']) {
// ... and Solr returned a suggestion...
if (isset($response->spellcheck) && isset($response->spellcheck->suggestions) && $response->spellcheck->suggestions != NULL) {
// .. log the spellchecker suggestion.
$num_suggestions = 1;
}
}
db_query("INSERT INTO {apachesolr_stats}\n (timestamp, uid, sid, numfound, showed_suggestions, total_time, prepare_time, process_time, page, keywords, filters, sort, params, env_id, page_id)\n VALUES\n (%d, %d, '%s', %d, %d, %d, %d, %d, '%s', '%s','%s','%s','%s', '%s', '%s')", time(), $user->uid, session_id(), $response->response->numFound, $num_suggestions, $response->debug->timing->time, $response->debug->timing->prepare->time, $response->debug->timing->process->time, isset($_GET['page']) ? $_GET['page'] : '', $keywords, json_encode($filters), json_encode($solrsort), json_encode($params), $env_id, $search_page['page_id']);
return;
/*
$times = array();
$times['total']['total'] = $response->debug->timing->time;
foreach (array('prepare', 'process') as $phase) {
foreach($response->debug->timing->prepare as $key => $value) {
if (is_object($value)) {
$times[$phase][$key] = (int) $value->time;
} else {
$times[$phase]['total'] = (int) $value;
}
}
}
dsm($times);
return;
*/
}
/**
* Return a search page by the base path, e.g. 'search/site'
*
* @param string $path
* @return bool|array
* Returns FALSE if no page found, or the search page as returned by
* apachesolr_search_page_load($page_id);
*/
function apachesolr_stats_get_search_page_by_path($path) {
$search_pages = apachesolr_search_load_all_search_pages();
foreach ($search_pages as $search_page) {
if (strpos($path, $search_page['search_path']) === 0) {
return $search_page;
}
}
return FALSE;
}
/**
* Gets the searcher's active facets from Facet API.
*
* @param $searcher
* The machine name of the searcher.
*
* @return array
* An array keyed by facet name to value.
*/
function apachesolr_stats_get_active_facets($searcher) {
$filters = array();
if (function_exists('facetapi_adapter_load')) {
if ($adapter = facetapi_adapter_load($searcher)) {
$active = $adapter
->getAllActiveItems();
foreach ($active as $filter) {
foreach ($filter['facets'] as $facet_name) {
$filters[$facet_name] = $filter['value'];
}
}
}
}
return $filters;
}
/**
* Callback for admin/reports/apachesolr/stats.
* @param array $search_page
* @param string $picked_granularity
* @internal param string $granularity Granularity to use for report.* Granularity to use for report.
* @return string
* The page output as HTML.
* @see apachesolr_stats_menu()
*/
function apachesolr_stats_report($search_page = NULL, $picked_granularity = "minute") {
$enabled_pages = variable_get('apachesolr_stats_enabled', array());
// If not given a search_page argument, show a listing.
if (empty($search_page)) {
$search_pages = apachesolr_search_load_all_search_pages();
$items = array();
foreach ($search_pages as $search_page) {
$items[] = l($search_page['label'], 'admin/reports/apachesolr_stats/' . $search_page['page_id']);
}
$output = t('Pick a search page to view the report:');
$output .= theme('item_list', $items);
return $output;
}
drupal_set_title(t("Apache Solr statistics report: @label", array(
'@label' => $search_page['label'],
)));
$disabled = true;
if (isset($enabled_pages[$search_page['page_id']]) && $enabled_pages[$search_page['page_id']]) {
$disabled = false;
}
if ($disabled) {
drupal_set_message('Logging is disabled for this page in the !link. Enable it to log Apache Solr queries.', array(
'!link' => l('configuration page', 'admin/settings/apachesolr/stats'),
));
}
$granularities = apachesolr_stats_get_granularities();
// Decide what granularity to use: minute, hour or day
// Check if given argument exists; if not, reset to "hour"
if (!isset($granularities[$picked_granularity])) {
$picked_granularity = "minute";
}
$granularity = $granularities[$picked_granularity];
// Process latest log entries
$report_elements = apachesolr_stats_generate_report_elements($search_page['page_id'], $granularity);
// Create the output HTML:::::::::::::::::::::::::::::::::::::
// Granularity picker:
// Leave only those less than apachesolr_stats_flush_log_timer
$timer_max = variable_get('apachesolr_stats_flush_log_timer', 259200);
$output = "<div class='granularity'>" . t('Choose the report time span:');
foreach ($granularities as $name => $granularity) {
if ($name != "all" && $granularity['time_before'] > $timer_max) {
continue;
}
$output .= " ";
if ($name != $picked_granularity) {
$output .= l($granularity["last_msg"], "admin/reports/apachesolr_stats/" . $search_page['page_id'] . '/' . $name);
}
else {
$output .= "<strong>" . $granularity["last_msg"] . "</strong>";
}
}
$output .= "</div><hr>";
if ($report_elements) {
// Report description
$output .= t('This is an overview of Apache Solr usage and performance.');
$output .= ' ' . t('You can also visit the <a href="@settings-url">settings page</a>.', array(
'@settings-url' => url('admin/settings/apachesolr/stats'),
));
// Render report elements
foreach ($report_elements as $id => $data) {
// Table data
$rows[] = array(
"data" => array(
array(
'data' => $data['name'],
'header' => true,
'style' => 'width:33%',
),
array(
'data' => $data['value'],
),
),
);
}
$output .= theme('table', array(), $rows);
}
else {
drupal_set_message(t('There is not enough stored data to build a report for the current time span.'));
}
return $output;
}
/**
* Generate an IMG tag with the URL to generate a chart using Google Charts API.
*
* @param string $granularity
* The granularity to use.
* @param array $data
* The array of data to chart.
* @param integer $start_timeslot
* The index of the first data element to chart.
* @param integer $last_timeslot
* The index of the first data element to chart.
* @param integer $total_queries
* Integer with the total number of queries included in this chart.
* @param bool|float $average
* Boolean flag: show an average value in the chart.
* @return string
*/
function apachesolr_stats_chart($granularity, $data, $start_timeslot, $last_timeslot, $total_queries, $average = FALSE) {
// Sample: http://chart.apis.google.com/chart?cht=lc&chs=350x100&chdlp=b&chma=10,10,10,10&chd=s:[encoded chart data]
$chart_prefix = 'http://chart.apis.google.com/chart?cht=lc&chs=350x100';
$chart_prefix .= '&chdlp=b&chma=30,100,20,20&chd=s:';
$chd = array();
$chd_min = 9999999;
$chd_max = 0;
$total = 0;
for ($t = $start_timeslot; $t <= $last_timeslot; $t++) {
$num = isset($data[$t]) ? intval($data[$t]) : 0;
$chd_min = $chd_min > $num ? $num : $chd_min;
$chd_max = $chd_max < $num ? $num : $chd_max;
$chd[] = $num;
}
// Add missing data for time between last timeslot and current time.
$current_timeslot = intval(time() / $granularity['timespan']);
for ($t = $last_timeslot + 1; $t <= $current_timeslot; $t++) {
$chd_min = $chd_min > 0 ? 0 : $chd_min;
// Insert at beginning.
array_unshift($chd, 0);
}
// Fix min if min and max are the same.
if ($chd_min == $chd_max) {
$chd_min = 0;
}
// Since we read the log from the database newest-first, reverse the data for rendering.
$chd = array_reverse($chd);
// Generate basic image URL
$image_url = $chart_prefix . apachesolr_stats_encodedata($chd, $chd_min, $chd_max);
// Add labels
$chxl = "";
if ($chd_max > 0) {
// Add y-axis labels.
$chxl .= "0:|" . intval($chd_min) . "|" . intval($chd_max);
// Show average value in a label on right-hand side.
if ($average !== FALSE) {
$image_url .= "&chxp=1," . intval($average / $chd_max * 100);
$chxl .= sprintf("|1:|%s=%.2f", t('average'), $average);
$chxt = "y,r";
}
else {
$chxt = "y";
}
// Add time/date labels
$earliest_timestamp = $start_timeslot * $granularity['timespan'];
$last_timestamp = $current_timeslot * $granularity['timespan'];
$mid_timestamp = ($last_timestamp + $earliest_timestamp) / 2;
$time_msg_1 = drupal_urlencode(strftime($granularity['format'], $earliest_timestamp));
$time_msg_2 = drupal_urlencode(strftime($granularity['format'], $mid_timestamp));
$time_msg_3 = drupal_urlencode(strftime($granularity['format'], $last_timestamp));
if ($chxt) {
$chxt = "{$chxt},x";
$chxl .= "|2:|{$time_msg_1}|{$time_msg_2}|{$time_msg_3}";
}
else {
$chxt = "x";
$chxl .= "|1:|{$time_msg_1}|{$time_msg_2}|{$time_msg_3}";
}
$image_url .= "&chxl={$chxl}&chxt={$chxt}";
}
// Return the image tag
return "<img src='{$image_url}' />";
}
/**
* Encode data using Chart's simple encoding.
* See http://code.google.com/apis/chart/formats.html#simple
*
* @param array $chd
* an array of integer values to encode.
* @param integer $chd_min
* an integer with the smallest value to encode.
* @param integer $chd_max
* an integer with the greatest value to encode.
* @return string
* a string representing the Google Charts API simple encoding of the data.
*/
function apachesolr_stats_encodedata($chd, $chd_min, $chd_max) {
$encoder_string = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789';
$encoded_values = '';
if (is_array($chd)) {
foreach ($chd as $value) {
$encoded_values .= substr($encoder_string, ($value - $chd_min) / $chd_max * 61, 1);
}
}
// Google does not like single-valued line charts; fix that.
if (strlen($encoded_values) == 1) {
$encoded_values = "99";
}
return $encoded_values;
}
/**
* Implementation of hook_cron().
*
* Remove expired log messages.
*/
function apachesolr_stats_cron() {
db_query('DELETE FROM {apachesolr_stats} WHERE timestamp < %d', time() - variable_get('apachesolr_stats_flush_log_timer', 259200));
}
/**
* Returns a themed table for facet usage
* @param array $facets
* An array of calculated data to report.
* @return string
* HTML for a themed table containing the report data.
*/
function apachesolr_stats_facet_usage_table($facets) {
// Report usage in table
$header = array(
array(
'data' => t('Facet ID'),
'Xfield' => 'id',
'sort' => 'asc',
),
array(
'data' => t('Facet info'),
'Xfield' => 'info',
'sort' => '',
),
array(
'data' => t('Queries containing this facet'),
'Xfield' => 'queries',
'sort' => '',
),
array(
'data' => t('% of queries containing this facet'),
'Xfield' => 'queries',
'sort' => '',
),
);
foreach ($facets as $fieldname => $facet) {
$rows[$fieldname][] = $fieldname;
$rows[$fieldname][] = $facet['info'];
$rows[$fieldname][] = $facet['usage'];
$rows[$fieldname][] = sprintf("%2.1f%%", $facet['usage'] / $facets['any']['usage'] * 100);
#$rows[$fieldname][] = $facet['clickthru'];
#if ($facet['usage']>0) {
# $rows[$fieldname][] = sprintf("%2.1f%%", ($facet['clickthru'] / $facet['usage'])*100);
#}
}
$output = theme('table', $header, $rows, array(
'style' => 'font-size:80%',
));
return $output;
}
/**
* Returns the <img> tag for a Google chart with the facet usage
*
* @param array $facets
* An array of calculated data to report.
* @return string
* HTML for an IMG tag to a Google chart.
*/
function apachesolr_stats_facet_usage_graph($facets) {
// Chart for field usage
$leyends = array();
$data = array();
$label_cutoff = 40;
$total_queries = $facets['any']['usage'];
foreach ($facets as $fieldname => $facet) {
$leyend = preg_replace("/^.*ilter by /", "", $facet['info']);
if (strlen($leyend) > $label_cutoff) {
$leyend = substr($leyend, 0, $label_cutoff) . "...";
}
$leyends[] = drupal_urlencode($leyend);
$data[] = $facet['usage'] / $total_queries * 100;
}
$chd = 's:' . apachesolr_stats_encodedata($data, 0, 100);
// array_reverse() in next line due to apachesolr_stats_encodedata() encoding data backwards
$chl = implode('|', array_reverse($leyends));
$height = 30 + sizeof($leyends) * 28;
// Percentage labels
$chm = "N*f0*%,000000,0,-1,11";
$chart = "<img src='http://chart.apis.google.com/chart?chxt=y&cht=bhs&chma=20,20,20,20&chd={$chd}&chs=350x{$height}&chds=0,100&chxl=0:|{$chl}&chm={$chm}' />";
return $chart;
}
/**
* Returns the facet array to report on.
*/
function apachesolr_stats_get_facets() {
$facets = array();
if (module_exists('facetapi')) {
$environments = apachesolr_load_all_environments();
foreach ($environments as $env_id => $environment) {
$searcher = 'apachesolr@' . $env_id;
foreach (facetapi_get_enabled_facets($searcher) as $facet_name => $facet) {
$facets[$facet_name] = array(
'facet_field' => $facet['field'],
'info' => $facet['label'],
'usage' => 0,
);
}
}
}
// Add some "virtual" facets to report on.
$facets['kw'] = array(
'facet_field' => 'kw',
'info' => 'Keyword search',
'usage' => 0,
);
$facets['any'] = array(
'facet_field' => 'any',
'info' => '[All queries including any filter and/or keywords]',
'usage' => 0,
);
/*
$facets['none'] = array(
'facet_field' => 'none',
'info' => '[Clickthrus with no previous queries]',
);
*/
return $facets;
}
/**
* Returns an array of preset granularities.
* @return array
* an array of preset granularities for reports.
*/
function apachesolr_stats_get_granularities() {
$granularities = array(
'minute' => array(
'name' => t('minute'),
'timespan' => 60,
'time_before' => 60 * 60 * 24,
// One day before
'last_msg' => t('last day'),
'format' => '%H:%M',
),
'hour' => array(
'name' => t('hour'),
'timespan' => 60 * 60,
'time_before' => 60 * 60 * 24 * 7,
// One week before
'last_msg' => t('last week'),
'format' => '%m/%d %H:%M',
),
'day' => array(
'name' => t('day'),
'timespan' => 60 * 60 * 24,
'time_before' => 60 * 60 * 24 * 2 * 16,
// 4 weeks before
'last_msg' => t('last month'),
'format' => '%m/%d',
),
'all' => array(
'name' => t('day'),
'timespan' => 60 * 60 * 24,
'time_before' => 60 * 60 * 24 * 7 * 16,
// 16 weeks before
'last_msg' => t('all time (depends on settings)'),
'format' => '%m/%d',
),
);
return $granularities;
}
/**
* Generates report elements for the given granularity.
*
* @param string $granularity
* Timespan to aggregate report by. Possible values: 'minute', 'hour' or 'day'
* @return array
* An indexed array with the report elements; each element is an array with
* the indexes:
* 'name' => human-readable name of the element, e.g. "Total queries"
* 'value' => html with the result. Can be an image, a number, etc.
*/
function apachesolr_stats_generate_report_elements($page_id, $granularity) {
// Initialize
$facets = apachesolr_stats_get_facets();
$suggestions = 0;
$users = array();
$sessions = array();
$total_requests = 0;
$last_timeslot = 0;
$first_timestamp = 0;
$no_keywords = 0;
$total_queries = 0;
$time['max'] = -1;
$time['min'] = 9999.999;
$report_elements = array();
$keywords = array();
$keywords_noresults = array();
$simultaneous_fields = array();
$count_per_granularity = array();
$data_per_granularity = array();
$sort_usage = array();
// Scan the logfile and build statistics arrays
$result = db_query("SELECT * FROM {apachesolr_stats} WHERE timestamp > %d AND page_id = '%s' ORDER BY timestamp DESC", time() - $granularity['time_before'], $page_id);
while ($record = db_fetch_object($result)) {
$timeslot = intval($record->timestamp / $granularity['timespan']);
if ($last_timeslot == 0) {
$last_timeslot = $timeslot;
}
@$users[$record->uid]++;
@$sessions[$record->sid]++;
// Tally suggestions
if ($record->showed_suggestions) {
$suggestions++;
}
@$total_requests++;
@($time['total'] += $record->total_time);
// $time['prepare'] += $record->prepare_time;
// $time['process'] += $record->process_time;
// Track max and min response times
$time['max'] = $time['max'] < $record->total_time ? $record->total_time : $time['max'];
$time['min'] = $time['min'] > $record->total_time ? $record->total_time : $time['min'];
// Field usage; only when on first results page (meaning it's a fresh search)
if ($record->page == "") {
$facet_processed_flag = array();
$filters = json_decode($record->filters);
foreach ($filters as $facet_name => $facet_value) {
if (isset($facets[$facet_name]) && !isset($facet_processed_flag[$facet_name])) {
// Add 1 to usage of facet.
$facets[$facet_name]['usage']++;
// Mark so we don't count it again for this query.
$facet_processed_flag[$facet_name] = TRUE;
}
}
if (trim($record->keywords) != "") {
if (!isset($facet_processed_flag['kw']) || $facet_processed_flag['kw'] != true) {
$facets['kw']['usage']++;
$facet_processed_flag['kw'] = true;
// Keep track of individual keywords used
$keys_filtered = drupal_strtolower(trim($record->keywords));
@$keywords[$keys_filtered]++;
// Count keywords with zero results; but only when no filters issued.
if ($record->numfound == 0 && !$filters) {
@$keywords_noresults[$keys_filtered]++;
}
}
}
else {
$no_keywords++;
}
// Count each unique query
$facets["any"]['usage']++;
// Keep track of how many fields were active per query
@$simultaneous_fields[sizeof($facet_processed_flag)]++;
$total_queries++;
}
// Sort usage; count only the first page of results
if ($record->page == "") {
$sort = (array) json_decode($record->sort);
if ($sort) {
@$sort_usage[$sort['#name']]++;
}
}
// Group some stats into timeslots (minutes, hours) to show trends
if (empty($user_slot[$record->uid][$timeslot])) {
@$data_per_granularity['users_per_slot'][$timeslot]++;
$user_slot[$record->uid][$timeslot] = TRUE;
}
if (empty($session_slot[$record->sid][$timeslot])) {
@$data_per_granularity['sessions_per_slot'][$timeslot]++;
$session_slot[$record->sid][$timeslot] = TRUE;
}
@$data_per_granularity['queries'][$timeslot]++;
@$count_per_granularity[$timeslot]++;
@($data_per_granularity['total_time'][$timeslot] += $record->total_time);
$first_timestamp = $record->timestamp;
}
if (sizeof($sessions) == 0 || sizeof($users) == 0 || $total_queries == 0) {
return array();
}
$start_timeslot = $timeslot;
$earliest_timestamp = $start_timeslot * $granularity['timespan'];
$report_elements['span'] = array(
'name' => t('Report span'),
'value' => t('Last @interval (@startdate to @enddate)', array(
'@interval' => format_interval(3600 + time() - $first_timestamp),
'@startdate' => format_date($first_timestamp),
'@enddate' => format_date(time()),
)) . '<br />' . t('Data points in charts are one point per @granularity.', array(
'@granularity' => $granularity['name'],
)),
);
#$report_elements['queries'] = array('name' => t('Total requests to Solr'), 'value' => $total_queries);
// Chart for queries per timeslot
$chart = apachesolr_stats_chart($granularity, $data_per_granularity['queries'], $start_timeslot, $last_timeslot, $total_queries, $total_queries / ($last_timeslot - $start_timeslot + 1));
$report_elements['total_queries_per'] = array(
'name' => t('Requests'),
'value' => t('Total: @total', array(
'@total' => $total_queries,
)) . '<br />' . $chart,
);
// Chart for sessions per timeslot
$chart = apachesolr_stats_chart($granularity, $data_per_granularity['sessions_per_slot'], $start_timeslot, $last_timeslot, sizeof($sessions), sizeof($sessions) / ($last_timeslot - $start_timeslot + 1));
$report_elements['total_sessions_per'] = array(
'name' => t('Unique sessions'),
'value' => t('Total: @total', array(
'@total' => sizeof($sessions),
)) . '<br />' . $chart,
);
$report_elements['avg_queries_session'] = array(
'name' => t('Average requests per session'),
'value' => sprintf("%.1f", $total_queries / sizeof($sessions)),
);
// Chart for average time per timeslot
$data = array();
foreach ($data_per_granularity['total_time'] as $timeslot => $value) {
$data[$timeslot] = $value / $count_per_granularity[$timeslot];
}
// Call with average_empty = FALSE
$chart = apachesolr_stats_chart($granularity, $data, $start_timeslot, $last_timeslot, $total_queries, $time['total'] / $total_queries);
$report_elements['query_avg_time'] = array(
'name' => t('Average time per request (miliseconds)'),
'value' => sprintf("%s: %.2f ms / %s: %.2f ms / %s: %.2f ms", t('Minimum'), $time['min'], t('Average'), $time['total'] / $total_queries, t('Maximum'), $time['max']) . '</br>' . $chart,
);
// Most-used keywords
$report_elements['keywords'] = array(
'name' => t('Top search phrases'),
'value' => apachesolr_stats_report_frequent_keywords($page_id, $keywords, $keywords_noresults),
);
// Most-used keywords with no results
$report_elements['keywords_noresults'] = array(
'name' => t('Top search phrases with no results'),
'value' => apachesolr_stats_report_frequent_keywords($page_id, $keywords_noresults, $keywords_noresults, "error"),
);
// Total spellchecker suggestions
$report_elements['spellchecker'] = array(
'name' => t('Total spellchecker suggestions'),
'value' => $suggestions,
);
// Chart for sort usage
$leyends = array();
foreach ($sort_usage as $key => $value) {
$leyends[] = drupal_urlencode($key);
}
$chl = implode('|', $leyends);
$chd = implode(',', $sort_usage);
$chart = "<img src='http://chart.apis.google.com/chart?cht=p3&chd=t:{$chd}&chs=350x100&chl={$chl}' />";
$report_elements['sort_usage'] = array(
'name' => t('Sort usage'),
'value' => $chart,
);
// Chart for field usage
$report_elements['field_usage'] = array(
'name' => t('Facet usage'),
'value' => apachesolr_stats_facet_usage_graph($facets) . apachesolr_stats_facet_usage_table($facets),
);
return $report_elements;
}
/**
* Recieves an array of keyword => count and reports the top-used terms.
* @param string $page_id
* ID of the search page.
* @param array $keywords
* array of keyword => count pairs.
* @param array $keywords_noresults
* array containing keywords that returned 0 results.
* @param string $class
* CSS class for each link.
* @param int $number
* Number of terms to show.
* @return string
*/
function apachesolr_stats_report_frequent_keywords($page_id, $keywords, $keywords_noresults, $class = '', $number = 25) {
$search_page = apachesolr_search_page_load($page_id);
$search_path = $search_page['search_path'];
if (empty($keywords)) {
return '';
}
arsort($keywords);
// Final elements are the most frequent, get $number elements off the array
$slice = array_slice($keywords, 0, $number);
// Calculate font size for display
$min = 1000000000.0;
$max = -1000000000.0;
$steps = 6;
$weighted_slice = array();
foreach ($slice as $word => $count) {
$min = min($min, $count);
$max = max($max, $count);
$weighted_slice[$word] = array(
'count' => $count,
'log_count' => log($count),
);
}
// Note: we need to ensure the range is slightly too large to make sure even
// the largest element is rounded down.
$range = max(0.01, $max - $min) * 1.0001;
// Add "weight"
foreach ($weighted_slice as $word => $data) {
$weighted_slice[$word]['weight'] = floor($steps * ($data['count'] - $min) / $range);
}
$items = array();
foreach ($weighted_slice as $word => $data) {
if (isset($keywords_noresults[$word])) {
$class = "error";
}
$font_size_pct = 80 + $data['weight'] * 12;
$items[] = l($word, $search_path . '/' . $word, array(
'attributes' => array(
'class' => $class,
'style' => 'font-size:' . sprintf("%d%%", $font_size_pct),
),
'absolute' => TRUE,
)) . " (" . $data["count"] . ")";
}
return implode(", ", $items);
}
/**
* Return a listing of keywords for the Popular Searches block.
*/
function apachesolr_stats_block_frequent_keywords($page_id, $limit = 10) {
$search_page = apachesolr_search_page_load($page_id);
$search_path = $search_page['search_path'];
$cid = "apachesolr_stats_block_frequent_keywords_{$page_id}";
$cached = cache_get($cid, 'cache_block');
if ($cached && $cached->data && $cached->expire > time()) {
return $cached->data;
}
$keywords = array();
// Return keywords only last week's logged queries.
$timestamp = time() - 3600 * 24 * 7;
$result = db_query_range("SELECT keywords FROM {apachesolr_stats} WHERE\n numfound > 0 AND timestamp > %d AND page_id = '%s'\n ORDER BY timestamp DESC", $timestamp, $page_id, 0, 5000);
while ($record = db_fetch_object($result)) {
if (trim($record->keywords) != "") {
// Keep track of individual keywords used
$keys_filtered = drupal_strtolower(trim($record->keywords));
if (isset($keywords[$keys_filtered])) {
$keywords[$keys_filtered]++;
}
else {
$keywords[$keys_filtered] = 1;
}
}
}
// Sort by most frequent first.
arsort($keywords);
// Get first $limit items.
$keywords = array_slice($keywords, 0, $limit);
$links = array();
foreach ($keywords as $key => $frequency) {
$links[] = l($key, $search_path . '/' . $key);
}
// Cache information for 10 minutes.
$expire = time() + 600;
cache_set($cid, $links, 'cache_block', $expire);
return $links;
}
/**
* Implementation of hook_block().
*/
function apachesolr_stats_block($op = 'list', $delta = 0) {
$search_pages = apachesolr_search_load_all_search_pages();
$enabled_search_pages = variable_get('apachesolr_stats_enabled', array());
$blocks = array();
switch ($op) {
case 'list':
// Default block settings.
foreach ($enabled_search_pages as $page_id) {
if ($page_id) {
$blocks[$page_id] = array(
'info' => t('Apache Solr Statistics: popular searches for @page_id page', array(
'@page_id' => $search_pages[$page_id]['label'],
)),
// Start out disabled
'status' => 0,
'region' => 'right',
// Should be cached equally across paths and roles.
'cache' => BLOCK_CACHE_GLOBAL,
);
}
}
return $blocks;
case 'view':
if (!$enabled_search_pages[$delta]) {
return array(
'subject' => '',
'content' => '',
);
}
$links = apachesolr_stats_block_frequent_keywords($delta);
if ($links) {
// Return a block array.
$block = array(
'subject' => t('Popular searches for @page_name', array(
'@page_name' => $search_pages[$delta]['label'],
)),
'content' => theme('apachesolr_stats_block', $links),
);
return $block;
}
}
}
/**
* Implementation of hook_theme().
*/
function apachesolr_stats_theme() {
return array(
'apachesolr_stats_block' => array(
'arguments' => array(
'links',
),
),
);
}
/**
* Theme content for apachesolr_stats_block().
*
* @param array $links
* An array, each element is a link to launch a search for that word.
* @return string
HTML, the themed block content.
*/
function theme_apachesolr_stats_block($links) {
return theme('item_list', $links);
}
Functions
Name![]() |
Description |
---|---|
apachesolr_stats_admin | Build the settings form. |
apachesolr_stats_apachesolr_query_alter | Implementation of hook_apachesolr_query_alter(). |
apachesolr_stats_block | Implementation of hook_block(). |
apachesolr_stats_block_frequent_keywords | Return a listing of keywords for the Popular Searches block. |
apachesolr_stats_chart | Generate an IMG tag with the URL to generate a chart using Google Charts API. |
apachesolr_stats_cron | Implementation of hook_cron(). |
apachesolr_stats_encodedata | Encode data using Chart's simple encoding. See http://code.google.com/apis/chart/formats.html#simple |
apachesolr_stats_exit | Implementation of hook_exit(). |
apachesolr_stats_facet_usage_graph | Returns the <img> tag for a Google chart with the facet usage |
apachesolr_stats_facet_usage_table | Returns a themed table for facet usage |
apachesolr_stats_generate_report_elements | Generates report elements for the given granularity. |
apachesolr_stats_get_active_facets | Gets the searcher's active facets from Facet API. |
apachesolr_stats_get_facets | Returns the facet array to report on. |
apachesolr_stats_get_granularities | Returns an array of preset granularities. |
apachesolr_stats_get_search_page_by_path | Return a search page by the base path, e.g. 'search/site' |
apachesolr_stats_menu | Implementation of hook_menu(). |
apachesolr_stats_report | Callback for admin/reports/apachesolr/stats. |
apachesolr_stats_report_frequent_keywords | Recieves an array of keyword => count and reports the top-used terms. |
apachesolr_stats_theme | Implementation of hook_theme(). |
theme_apachesolr_stats_block | Theme content for apachesolr_stats_block(). |