apachesolr_stats.module in Apache Solr Statistics 6
Same filename and directory in other branches
Keeps and reports statistics about Apache Solr usage and performance.
File
apachesolr_stats.moduleView source
<?php
/**
* @file
* Keeps and reports statistics about Apache Solr usage and performance.
*/
/**
* Implementation of hook_menu().
*/
function apachesolr_stats_menu() {
return array(
'admin/settings/apachesolr/stats' => array(
'title' => 'Statistics',
'description' => 'Apache Solr Statistics settings to measure usage and performance.',
'page callback' => 'drupal_get_form',
'page arguments' => array(
'apachesolr_stats_admin',
),
'access arguments' => array(
'administer search',
),
'type' => MENU_LOCAL_TASK,
),
'admin/reports/apachesolr/stats' => array(
'title' => 'Statistics',
'description' => 'Report of Apache Solr usage and performance.',
'page callback' => 'apachesolr_stats_report',
'page arguments' => array(),
'access arguments' => array(
'administer search',
),
'type' => MENU_LOCAL_TASK,
),
'apachesolr_stats/gadget' => array(
'title' => 'Apache Solr Google Gadget',
'page callback' => 'apachesolr_stats_report_gadget',
'description' => 'Provides content for Google Gadget.',
'page arguments' => array(),
'access arguments' => array(
'access content',
),
'type' => MENU_CALLBACK,
),
'apachesolr_stats/element/%/%/%' => array(
'title' => 'Apache Solr Google Gadget Elements',
'page callback' => 'apachesolr_stats_report_gadget_element',
'description' => 'Provides content for Google Gadget.',
'page arguments' => array(
2,
3,
4,
),
'access arguments' => array(
'access content',
),
'type' => MENU_CALLBACK,
),
);
}
/**
* Build the settings form.
*/
function apachesolr_stats_admin() {
$form = array();
$form['tip'] = array(
'#type' => 'markup',
'#value' => t('You can also visit the <a href="@report-url">report page</a>.', array(
'@report-url' => url('admin/reports/apachesolr/stats'),
)),
);
$form['apachesolr_stats_enabled'] = array(
'#type' => 'radios',
'#title' => t('Enable query log'),
'#default_value' => variable_get('apachesolr_stats_enabled', 0),
'#options' => array(
'1' => t('Enabled'),
'0' => t('Disabled'),
),
'#description' => t('Log information about all queries launched via the Apache Solr Search Integration module.'),
);
$periods = drupal_map_assoc(array(
3600,
10800,
21600,
32400,
43200,
86400,
172800,
259200,
604800,
1209600,
2419200,
4838400,
9676800,
), 'format_interval');
$form['apachesolr_stats_flush_log_timer'] = array(
'#type' => 'select',
'#title' => t('Discard query logs older than'),
'#default_value' => variable_get('apachesolr_stats_flush_log_timer', 259200),
'#options' => $periods,
'#description' => t('Older query log entries will be automatically discarded. (Requires a correctly configured <a href="@cron">cron maintenance task</a>.)', array(
'@cron' => url('admin/reports/status'),
)),
);
// Blacklist settings.
$form['access'] = array(
'#type' => 'fieldset',
'#title' => t('Log blacklist'),
'#description' => t('Note: Changing this does not alter existing logged queries.'),
);
$form['access']['apachesolr_stats_ignore_ip_list'] = array(
'#type' => 'textarea',
'#title' => t('IP addresses that will not be logged'),
'#default_value' => variable_get('apachesolr_stats_ignore_ip_list', ''),
'#description' => t('Enter IP addresses (e.g.: 192.168.1.2), one per line. You can match entire subnets using a partial IP address ending with a period (e.g.: 192.168.)'),
);
$form['access']['apachesolr_stats_ignore_role_list'] = array(
'#type' => 'checkboxes',
'#title' => t('User roles that will not be logged'),
'#options' => user_roles(),
'#default_value' => variable_get('apachesolr_stats_ignore_role_list', array()),
'#description' => t('Check all roles which should not be logged.'),
);
// Google gadget settings.
$form['gadget'] = array(
'#type' => 'fieldset',
'#title' => t('Google Gadget settings'),
'#description' => t('You can embed statistics displays via Google Gadget by configuring the setting below.'),
);
$key = variable_get("apachesolr_stats_gadget_key", "");
$form['gadget']['apachesolr_stats_gadget_key'] = array(
'#type' => 'textfield',
'#title' => t('Google Gadget secret key'),
'#description' => t("Enter a string that will be embedded in the Gadget URL. Leave empty to disable. WARNING: changing this setting will deactivate all installed gadgets; users can re-enable them by entering the new key in their gadget's preferences."),
'#default_value' => $key,
);
if ($key) {
$gadget_url = url("apachesolr_stats/gadget/{$key}", array(
'absolute' => TRUE,
));
$gadget_embed_url = 'http://fusion.google.com/add?source=atgs&moduleurl=' . urlencode($gadget_url);
$gadget_embed_html = "<a href='{$gadget_embed_url}'><img src='http://buttons.googlesyndication.com/fusion/add.gif' border='0' alt='Add to Google'></a>";
$form['gadget']['embed_link'] = array(
'#type' => 'markup',
'#value' => t('The gadget is currently available at these URLs:') . '<ul><li>' . l(t('Source'), $gadget_url) . '<li>' . l(t('Embed'), $gadget_embed_url) . ' ' . $gadget_embed_html . '</ul>',
);
}
return system_settings_form($form);
}
/**
* Implementation of hook_apachesolr_modify_query().
*
* Adds debugQuery parameter to Solr call that returns processing time, etc.
*/
function apachesolr_stats_apachesolr_modify_query(&$query, &$params, $caller) {
if (variable_get('apachesolr_stats_enabled', 0)) {
// Add the debug query argument.
// See: http://wiki.apache.org/solr/CommonQueryParameters#head-f45a9396425956a4db8d6478ed6029adfb7b0858
if ($caller == 'apachesolr_search') {
$params['debugQuery'] = 'true';
}
}
}
/**
* Implementation of hook_exit().
*
* This is the spot where actual logging takes place.
*/
function apachesolr_stats_exit() {
if (!variable_get('apachesolr_stats_enabled', 0)) {
return;
}
// Apparently there can be cases where some modules aren't loaded.
if (!function_exists('apachesolr_has_searched')) {
return;
}
// Ignore certain IPs
$ignore_list = variable_get('apachesolr_stats_ignore_ip_list', '');
if ($ignore_list) {
$ips_to_ignore = preg_split('/[\\s]+/', $ignore_list);
$request_ip_address = ip_address();
foreach ($ips_to_ignore as $ip) {
if ($ip != "" && strpos($request_ip_address, $ip) === 0) {
return;
}
}
}
global $user;
$roles_ignore_list = variable_get('apachesolr_stats_ignore_role_list', array());
$test = array_intersect(array_keys($user->roles), array_values($roles_ignore_list));
if (count($test) > 0) {
return;
}
if (!apachesolr_has_searched()) {
return;
}
$response = apachesolr_static_response_cache();
$query = apachesolr_current_query();
$url_queryvalues = $query
->get_url_queryvalues();
$num_suggestions = 0;
if (isset($response->spellcheck) && isset($response->spellcheck->suggestions) && $response->spellcheck->suggestions != NULL) {
$num_suggestions = (int) get_object_vars($response->spellcheck->suggestions);
}
db_query("INSERT INTO {apachesolr_stats}\n (timestamp, uid, sid, numfound, showed_suggestions, total_time, prepare_time, process_time, page, keywords, filters, sort, params)\n VALUES\n (%d, %d, '%s', %d, %d, %d, %d, %d, '%s', '%s','%s','%s','%s')", time(), $user->uid, session_id(), $response->response->numFound, $num_suggestions, $response->debug->timing->time, $response->debug->timing->prepare->time, $response->debug->timing->process->time, isset($_GET['page']) ? $_GET['page'] : '', $query
->get_query_basic(), isset($url_queryvalues['filters']) ? $url_queryvalues['filters'] : '', isset($url_queryvalues['solrsort']) ? $url_queryvalues['solrsort'] : '', serialize($response->responseHeader->params));
return;
/*
$times = array();
$times['total']['total'] = $response->debug->timing->time;
foreach (array('prepare', 'process') as $phase) {
foreach($response->debug->timing->prepare as $key => $value) {
if (is_object($value)) {
$times[$phase][$key] = (int) $value->time;
} else {
$times[$phase]['total'] = (int) $value;
}
}
}
dsm($times);
return;
*/
}
/**
* Callback function that outputs an XML description for a Google Gadget
* and terminates PHP execution.
*
* @see apachesolr_stats_menu()
*/
function apachesolr_stats_report_gadget() {
$settings_key = variable_get("apachesolr_stats_gadget_key", "");
if ($settings_key === 0) {
echo t("Invalid request");
exit;
}
$granularities = apachesolr_stats_get_granularities();
$report_elements = apachesolr_stats_generate_report_elements($granularities["hour"]);
// Generate options
$element_options = "";
foreach ($report_elements as $id => $element) {
$element_options .= " <EnumValue value=\"{$id}\" display_value=\"{$element[name]}\" />\n";
}
$granularity_options = "";
foreach ($granularities as $id => $granularity) {
$granularity_options .= " <EnumValue value=\"{$id}\" display_value=\"{$granularity[last_msg]}\"/>\n";
}
$title = t('Search statistics for @sitename', array(
'@sitename' => variable_get('site_name', ''),
));
$access_key = t('Access key');
$var_to_show = t('Variable to show');
$timespan_to_report = t('Time span to report');
$loading = t('Loading...');
// Send correct header for XML
header("Content-Type: text/xml");
// Output the gadget XML description
$site_base_url = url('<front>', array(
'absolute' => true,
));
echo <<<HEREDOC
<?xml version="1.0" encoding="UTF-8" ?>
<Module>
<ModulePrefs
title="{<span class="php-variable">$title</span>}"
author="Alejandro Garza"
author_email="alejandro.garza@itesm.mx"
description="Google widget for Drupal Apache Solr Statistics module. More info: http://drupal.org/project/apachesolr_stats"
screenshot="http://chart.apis.google.com/chart?cht=lc&chs=300x200&chdlp=b&chma=30,100,20,20&chd=s:NlVabjGXUaM&chxp=1,22"
thumbnail="http://chart.apis.google.com/chart?cht=lc&chs=200x100&chdlp=b&chma=30,100,20,20&chd=s:NlVabjGXUaM&chxp=1,22"
height="180" >
<Require feature="dynamic-height"/>
</ModulePrefs>
<UserPref name="key" display_name="{<span class="php-variable">$access_key</span>}" required="true" default_value="{<span class="php-variable">$settings_key</span>}" datatype="string" />
<UserPref name="element" display_name="{<span class="php-variable">$var_to_show</span>}" default_value="total_queries_per" datatype="enum" >
{<span class="php-variable">$element_options</span>}
</UserPref>
<UserPref name="granularity" display_name="{<span class="php-variable">$timespan_to_report</span>}" default_value="day" datatype="enum" >
{<span class="php-variable">$granularity_options</span>}
</UserPref>
<Content type="html"><![CDATA[
<div id="content_div">{<span class="php-variable">$loading</span>}</div>
<script type="text/javascript">
function getHtml() {
var params = {};
var prefs = new gadgets.Prefs();
var element = prefs.getString("element");
var granularity = prefs.getString("granularity");
var key = prefs.getString("key");
var rand = Math.floor(Math.random()*9999)
params[gadgets.io.RequestParameters.CONTENT_TYPE] = gadgets.io.ContentType.TEXT;
var url = "{<span class="php-variable">$site_base_url</span>}/apachesolr_stats/element/" + granularity + "/" + element + "/" + key + "?" + rand;
gadgets.io.makeRequest(url, response, params);
};
function response(obj) {
//obj.text contains the text of the page that was requested
document.getElementById('content_div').innerHTML = obj.text;
setTimeout("gadgets.window.adjustHeight()", 1000);
};
gadgets.util.registerOnLoadHandler(getHtml);
</script>
]]>
</Content>
</Module>
HEREDOC;
exit;
}
/**
* Callback function used by Gadget javascript to fetch a particular element.
*
* @param string $requested_granularity
* Granularity of report to use.
* @param string $requested_element
* ID of report element to return.
* @param string $requested_key
* Secret key given by gadget.
* @see apachesolr_stats_menu()
*/
function apachesolr_stats_report_gadget_element($requested_granularity, $requested_element, $requested_key) {
$granularities = apachesolr_stats_get_granularities();
$settings_key = variable_get("apachesolr_stats_gadget_key", "");
if ($settings_key === 0) {
echo "Invalid request: no local key set";
exit;
}
if ($settings_key != $requested_key) {
echo "Invalid request: invalid key {$requested_key}";
exit;
}
if (empty($granularities[$requested_granularity])) {
echo "Invalid request: bad granularity {$requested_granularity}";
exit;
}
$report_elements = apachesolr_stats_generate_report_elements($granularities[$requested_granularity]);
foreach ($report_elements as $id => $report_element) {
if ($id == $requested_element) {
echo "<b>" . $report_element['name'] . "</b><br />\n";
echo "<div style='font-size:80%'>" . $report_elements['span']['value'] . "</div>\n";
$value = $report_element['value'];
$value = str_replace('&', '&', $value);
echo "<div>{$value}</div>\n";
exit;
}
}
echo "Invalid request: bad element {$requested_element}";
exit;
}
/**
* Callback for admin/reports/apachesolr/stats.
* @param string $granularity
* Granularity to use for report.
* @return string
* The page output as HTML.
* @see apachesolr_stats_menu()
*/
function apachesolr_stats_report($picked_granularity = "minute") {
drupal_set_title(t("Apache Solr statistics report"));
if (!variable_get('apachesolr_stats_enabled', 0)) {
return t('Logging is disabled in the !link. Enable it to log Apache Solr queries.', array(
'!link' => l('module configuration page', 'admin/settings/apachesolr/stats'),
));
}
$granularities = apachesolr_stats_get_granularities();
// Decide what granularity to use: minute, hour or day
// Check if given argument exists; if not, reset to "hour"
if (!isset($granularities[$picked_granularity])) {
$picked_granularity = "minute";
}
$granularity = $granularities[$picked_granularity];
// Process latest log entries
$report_elements = apachesolr_stats_generate_report_elements($granularity);
// Create the output HTML:::::::::::::::::::::::::::::::::::::
// Granularity picker:
// Leave only those less than apachesolr_stats_flush_log_timer
$timer_max = variable_get('apachesolr_stats_flush_log_timer', 259200);
$output = "<div class='granularity'>" . t('Choose the report time span:');
foreach ($granularities as $name => $granularity) {
if ($name != "all" && $granularity['time_before'] > $timer_max) {
continue;
}
$output .= " ";
if ($name != $picked_granularity) {
$output .= l($granularity["last_msg"], "admin/reports/apachesolr/stats/{$name}");
}
else {
$output .= "<strong>" . $granularity["last_msg"] . "</strong>";
}
}
$output .= "</div><hr>";
if ($report_elements) {
// Report description
$output .= t('This is an overview of Apache Solr usage and performance.');
$output .= ' ' . t('You can also visit the !settingslink.', array(
'!settingslink' => l(t('settings page'), 'admin/settings/apachesolr/stats'),
));
// Add Google Gadgets embedding link
$key = variable_get("apachesolr_stats_gadget_key", "");
if ($key) {
$gadget_url = url("apachesolr_stats/gadget/{$key}", array(
'absolute' => TRUE,
));
$gadget_embed_html = '<a href="http://fusion.google.com/add?source=atgs&moduleurl=' . urlencode($gadget_url) . '"><img src="http://buttons.googlesyndication.com/fusion/add.gif" border="0" alt="' . t('Add to Google') . '"></a>';
$output .= "<div style='text-align:right'>" . l(t('Embed as Google Gadget'), "http://www.google.com/ig/adde?moduleurl=" . urlencode($gadget_url)) . " {$gadget_embed_html}</div>";
}
// Render report elements
foreach ($report_elements as $id => $data) {
// Table data
$rows[] = array(
"data" => array(
array(
'data' => $data['name'],
'header' => true,
'style' => 'width:33%',
),
array(
'data' => $data['value'],
),
),
);
}
$output .= theme('table', array(), $rows);
}
else {
drupal_set_message(t('There is not enough stored data to build a report.'));
}
return $output;
}
/**
* Generate an IMG tag with the URL to generate a chart using Google Charts API.
*
* @param string $granularity
* The granularity to use.
* @param array $data
* The array of data to chart.
* @param integer $start_timeslot
* The index of the first data element to chart.
* @param integer $last_timeslot
* The index of the first data element to chart.
* @param integer $total_queries
* Integer with the total number of queries included in this chart.
* @param boolean $average
* Boolean flag: show an average value in the chart.
*/
function apachesolr_stats_chart($granularity, $data, $start_timeslot, $last_timeslot, $total_queries, $average = FALSE) {
// Sample: http://chart.apis.google.com/chart?cht=lc&chs=350x100&chdlp=b&chma=10,10,10,10&chd=s:[encoded chart data]
$chart_prefix = 'http://chart.apis.google.com/chart?cht=lc&chs=350x100';
$chart_prefix .= '&chdlp=b&chma=30,100,20,20&chd=s:';
$chd = array();
$chd_min = 9999999;
$chd_max = 0;
$total = 0;
for ($t = $start_timeslot; $t <= $last_timeslot; $t++) {
$num = isset($data[$t]) ? intval($data[$t]) : 0;
$chd_min = $chd_min > $num ? $num : $chd_min;
$chd_max = $chd_max < $num ? $num : $chd_max;
$chd[] = $num;
}
// Add missing data for time between last timeslot and current time.
$current_timeslot = intval(time() / $granularity['timespan']);
for ($t = $last_timeslot + 1; $t <= $current_timeslot; $t++) {
$chd_min = $chd_min > 0 ? 0 : $chd_min;
// Insert at beginning.
array_unshift($chd, 0);
}
// Fix min if min and max are the same.
if ($chd_min == $chd_max) {
$chd_min = 0;
}
// Since we read the log from the database newest-first, reverse the data for rendering.
$chd = array_reverse($chd);
// Generate basic image URL
$image_url = $chart_prefix . apachesolr_stats_encodedata($chd, $chd_min, $chd_max);
// Add labels
$chxl = "";
if ($chd_max > 0) {
// Add y-axis labels.
$chxl .= "0:|" . intval($chd_min) . "|" . intval($chd_max);
// Show average value in a label on right-hand side.
if ($average !== FALSE) {
$image_url .= "&chxp=1," . intval($average / $chd_max * 100);
$chxl .= sprintf("|1:|%s=%.2f", t('average'), $average);
$chxt = "y,r";
}
else {
$chxt = "y";
}
// Add time/date labels
$earliest_timestamp = $start_timeslot * $granularity['timespan'];
$last_timestamp = $current_timeslot * $granularity['timespan'];
$mid_timestamp = ($last_timestamp + $earliest_timestamp) / 2;
$time_msg_1 = drupal_urlencode(strftime($granularity['format'], $earliest_timestamp));
$time_msg_2 = drupal_urlencode(strftime($granularity['format'], $mid_timestamp));
$time_msg_3 = drupal_urlencode(strftime($granularity['format'], $last_timestamp));
if ($chxt) {
$chxt = "{$chxt},x";
$chxl .= "|2:|{$time_msg_1}|{$time_msg_2}|{$time_msg_3}";
}
else {
$chxt = "x";
$chxl .= "|1:|{$time_msg_1}|{$time_msg_2}|{$time_msg_3}";
}
$image_url .= "&chxl={$chxl}&chxt={$chxt}";
}
// Return the image tag
return "<img src='{$image_url}' />";
}
/**
* Encode data using Chart's simple encoding.
* See http://code.google.com/apis/chart/formats.html#simple
*
* @param array $chd
* an array of integer values to encode.
* @param integer $chd_min
* an integer with the smallest value to encode.
* @param integer $chd_max
* an integer with the greatest value to encode.
* @return string
* a string representing the Google Charts API simple encoding of the data.
*/
function apachesolr_stats_encodedata($chd, $chd_min, $chd_max) {
$encoder_string = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789';
$encoded_values = '';
if (is_array($chd)) {
foreach ($chd as $value) {
$encoded_values .= substr($encoder_string, ($value - $chd_min) / $chd_max * 61, 1);
}
}
// Google does not like single-valued line charts; fix that.
if (strlen($encoded_values) == 1) {
$encoded_values = "99";
}
return $encoded_values;
}
/**
* Implementation of hook_cron().
*
* Remove expired log messages.
*/
function apachesolr_stats_cron() {
db_query('DELETE FROM {apachesolr_stats} WHERE timestamp < %d', time() - variable_get('apachesolr_stats_flush_log_timer', 259200));
}
/**
* Determine the facet field from a word: im_vid_XX, kw or XXX (anything before ":", like tid, uid, etc)
*
* @return string
* The facet field id.
*/
function apachesolr_stats_determine_field_from_query($facets, $word) {
if (strpos($word, ":") !== false) {
list($fieldname, $value) = explode(":", $word);
if ($fieldname == "tid") {
// Replace tid with the correct facet field name (im_vid_XXX) where XXX = vid
$term = taxonomy_get_term(intval(substr($word, 4)));
$fieldname = "im_vid_" . $term->vid;
}
else {
// Check if $fieldname is really a facet, if not count as keyword
if (empty($facets[$fieldname])) {
$fieldname = false;
}
}
}
else {
// No ":" found, so it's a keyword
$fieldname = false;
}
return $fieldname;
}
/**
* Returns a themed table for facet usage
* @param array $facets
* An array of calculated data to report.
* @return string
* HTML for a themed table containing the report data.
*/
function apachesolr_stats_facet_usage_table($facets) {
// Report usage in table
$header = array(
array(
'data' => t('Facet ID'),
'Xfield' => 'id',
'sort' => 'asc',
),
array(
'data' => t('Facet info'),
'Xfield' => 'info',
'sort' => '',
),
array(
'data' => t('Queries containing this facet'),
'Xfield' => 'queries',
'sort' => '',
),
array(
'data' => t('% of queries containing this facet'),
'Xfield' => 'queries',
'sort' => '',
),
);
foreach ($facets as $fieldname => $facet) {
$rows[$fieldname][] = $fieldname;
$rows[$fieldname][] = $facet['info'];
$rows[$fieldname][] = $facet['usage'];
$rows[$fieldname][] = sprintf("%2.1f%%", $facet['usage'] / $facets['any']['usage'] * 100);
#$rows[$fieldname][] = $facet['clickthru'];
#if ($facet['usage']>0) {
# $rows[$fieldname][] = sprintf("%2.1f%%", ($facet['clickthru'] / $facet['usage'])*100);
#}
}
$output = theme('table', $header, $rows, array(
'style' => 'font-size:80%',
));
return $output;
}
/**
* Returns the <img> tag for a Google chart with the facet usage
*
* @param array $facets
* An array of calculated data to report.
* @return string
* HTML for an IMG tag to a Google chart.
*/
function apachesolr_stats_facet_usage_graph($facets) {
// Chart for field usage
$leyends = array();
$data = array();
$label_cutoff = 40;
$total_queries = $facets['any']['usage'];
foreach ($facets as $fieldname => $facet) {
$leyend = preg_replace("/^.*ilter by /", "", $facet['info']);
if (strlen($leyend) > $label_cutoff) {
$leyend = substr($leyend, 0, $label_cutoff) . "...";
}
$leyends[] = drupal_urlencode($leyend);
$data[] = $facet['usage'] / $total_queries * 100;
}
$chd = 's:' . apachesolr_stats_encodedata($data, 0, 100);
// array_reverse() in next line due to apachesolr_stats_encodedata() encoding data backwards
$chl = implode('|', array_reverse($leyends));
$height = 30 + sizeof($leyends) * 28;
// Percentage labels
$chm = "N*f0*%,000000,0,-1,11";
$chart = "<img src='http://chart.apis.google.com/chart?chxt=y&cht=bhs&chma=20,20,20,20&chd={$chd}&chs=350x{$height}&chds=0,100&chxl=0:|{$chl}&chm={$chm}' />";
return $chart;
}
/**
* Returns the facet array to report on.
*/
function apachesolr_stats_get_facets() {
$all_facets = module_invoke_all('apachesolr_facets');
// Keep only those enabled according to apachesolr_get_enabled_facets()
$facets = array();
foreach (apachesolr_get_enabled_facets() as $module => $enabled) {
foreach ($enabled as $key => $facet_id) {
$facets[$facet_id] = $all_facets[$key];
$facets[$facet_id]['usage'] = 0;
}
}
// Add some "virtual" facets to report on.
$facets['kw'] = array(
'facet_field' => 'kw',
'info' => 'Keyword search',
'usage' => 0,
);
$facets['any'] = array(
'facet_field' => 'any',
'info' => '[All queries including any filter and/or keywords]',
'usage' => 0,
);
/*
$facets['none'] = array(
'facet_field' => 'none',
'info' => '[Clickthrus with no previous queries]',
);
*/
return $facets;
}
/**
* Returns an array of preset granularities.
* @return array
* an array of preset granularities for reports.
*/
function apachesolr_stats_get_granularities() {
$granularities = array(
'minute' => array(
'name' => t('minute'),
'timespan' => 60,
'time_before' => 60 * 60 * 24,
// One day before
'last_msg' => t('last day'),
'format' => '%H:%M',
),
'hour' => array(
'name' => t('hour'),
'timespan' => 60 * 60,
'time_before' => 60 * 60 * 24 * 7,
// One week before
'last_msg' => t('last week'),
'format' => '%m/%d %H:%M',
),
'day' => array(
'name' => t('day'),
'timespan' => 60 * 60 * 24,
'time_before' => 60 * 60 * 24 * 2 * 16,
// 4 weeks before
'last_msg' => t('last month'),
'format' => '%m/%d',
),
'all' => array(
'name' => t('day'),
'timespan' => 60 * 60 * 24,
'time_before' => 60 * 60 * 24 * 7 * 16,
// 16 weeks before
'last_msg' => t('all time (depends on settings)'),
'format' => '%m/%d',
),
);
return $granularities;
}
/**
* Generates report elements for the given granularity.
*
* @param string $granularity
* Timespan to aggregate report by. Possible values: 'minute', 'hour' or 'day'
* @return array
* An indexed array with the report elements; each element is an array with
* the indexes:
* 'name' => human-readable name of the element, e.g. "Total queries"
* 'value' => html with the result. Can be an image, a number, etc.
*/
function apachesolr_stats_generate_report_elements($granularity) {
// Initialize
$facets = apachesolr_stats_get_facets();
$suggestions = 0;
$queries = 0;
$users = array();
$sessions = array();
$start_timeslot = 0;
$last_timeslot = 0;
$first_timestamp = 0;
$no_keywords = 0;
$total_queries = 0;
$time['max'] = -1;
$time['min'] = 9999.999;
$report_elements = array();
$keywords = array();
$keywords_noresults = array();
// Scan the logfile and build statistics arrays
$result = db_query("SELECT * FROM {apachesolr_stats} WHERE timestamp > %d ORDER BY timestamp DESC", time() - $granularity['time_before']);
while ($record = db_fetch_object($result)) {
$timeslot = intval($record->timestamp / $granularity['timespan']);
if ($last_timeslot == 0) {
$last_timeslot = $timeslot;
}
@$users[$record->uid]++;
@$sessions[$record->sid]++;
// Tally suggestions
if ($record->showed_suggestions) {
$suggestions++;
}
@$total_requests++;
@($time['total'] += $record->total_time);
// $time['prepare'] += $record->prepare_time;
// $time['process'] += $record->process_time;
// Track max and min response times
$time['max'] = $time['max'] < $record->total_time ? $record->total_time : $time['max'];
$time['min'] = $time['min'] > $record->total_time ? $record->total_time : $time['min'];
// Field usage; only when on first results page (meaning it's a fresh search)
if ($record->page == "") {
$facet_processed_flag = array();
$ok = preg_match_all('/[^ ]+/', $record->filters, $matches);
foreach ($matches[0] as $word) {
$fieldname = apachesolr_stats_determine_field_from_query($facets, $word);
if ($fieldname != false) {
// Count this facet field only once per query
if (!isset($facet_processed_flag[$fieldname])) {
// Add 1 to usage of term from vocabulary
$facets[$fieldname]['usage']++;
// Mark so we don't count it again for this query
$facet_processed_flag[$fieldname] = true;
}
}
}
if (trim($record->keywords) != "") {
if (!isset($facet_processed_flag['kw']) || $facet_processed_flag['kw'] != true) {
$facets['kw']['usage']++;
$facet_processed_flag['kw'] = true;
// Keep track of individual keywords used
$keys_filtered = drupal_strtolower(trim($record->keywords));
@$keywords[$keys_filtered]++;
// Count keywords with zero results; but only when no filters issued.
if ($record->numfound == 0 && !$record->filters) {
@$keywords_noresults[$keys_filtered]++;
}
}
}
else {
$no_keywords++;
}
// Count each unique query
$facets["any"]['usage']++;
// Keep track of how many fields were active per query
@$simultaneous_fields[sizeof($facet_processed_flag)]++;
$total_queries++;
}
// Sort usage; count only the first page of results
if ($record->page == "") {
@$sort_usage[$record->sort ? $record->sort : "relevance"]++;
}
// Group some stats into timeslots (minutes, hours) to show trends
if (empty($user_slot[$record->uid][$timeslot])) {
@$data_per_granularity['users_per_slot'][$timeslot]++;
$user_slot[$record->uid][$timeslot] = TRUE;
}
if (empty($session_slot[$record->sid][$timeslot])) {
@$data_per_granularity['sessions_per_slot'][$timeslot]++;
$session_slot[$record->sid][$timeslot] = TRUE;
}
@$data_per_granularity['queries'][$timeslot]++;
@$count_per_granularity[$timeslot]++;
@($data_per_granularity['total_time'][$timeslot] += $record->total_time);
$first_timestamp = $record->timestamp;
}
if (sizeof($sessions) == 0 || sizeof($users) == 0 || $total_queries == 0) {
return array();
}
$start_timeslot = $timeslot;
$earliest_timestamp = $start_timeslot * $granularity['timespan'];
$report_elements['span'] = array(
'name' => t('Report span'),
'value' => t('Last @interval (@startdate to @enddate)', array(
'@interval' => format_interval(3600 + time() - $first_timestamp),
'@startdate' => format_date($first_timestamp),
'@enddate' => format_date(time()),
)) . '<br />' . t('Data points in charts are one point per @granularity.', array(
'@granularity' => $granularity['name'],
)),
);
#$report_elements['queries'] = array('name' => t('Total requests to Solr'), 'value' => $total_queries);
// Chart for queries per timeslot
$chart = apachesolr_stats_chart($granularity, $data_per_granularity['queries'], $start_timeslot, $last_timeslot, $total_queries, $total_queries / ($last_timeslot - $start_timeslot + 1));
$report_elements['total_queries_per'] = array(
'name' => t('Requests'),
'value' => t('Total: @total', array(
'@total' => $total_queries,
)) . '<br />' . $chart,
);
// Chart for sessions per timeslot
$chart = apachesolr_stats_chart($granularity, $data_per_granularity['sessions_per_slot'], $start_timeslot, $last_timeslot, sizeof($sessions), sizeof($sessions) / ($last_timeslot - $start_timeslot + 1));
$report_elements['total_sessions_per'] = array(
'name' => t('Unique sessions'),
'value' => t('Total: @total', array(
'@total' => sizeof($sessions),
)) . '<br />' . $chart,
);
$report_elements['avg_queries_session'] = array(
'name' => t('Average requests per session'),
'value' => sprintf("%.1f", $total_queries / sizeof($sessions)),
);
// Chart for average time per timeslot
$data = array();
foreach ($data_per_granularity['total_time'] as $timeslot => $value) {
$data[$timeslot] = $value / $count_per_granularity[$timeslot];
}
// Call with average_empty = FALSE
$chart = apachesolr_stats_chart($granularity, $data, $start_timeslot, $last_timeslot, $total_queries, $time['total'] / $total_queries);
$report_elements['query_avg_time'] = array(
'name' => t('Average time per request (miliseconds)'),
'value' => sprintf("%s: %.2f ms / %s: %.2f ms / %s: %.2f ms", t('Minimum'), $time['min'], t('Average'), $time['total'] / $total_queries, t('Maximum'), $time['max']) . '</br>' . $chart,
);
// Most-used keywords
$report_elements['keywords'] = array(
'name' => t('Top search phrases'),
'value' => apachesolr_stats_report_frequent_keywords($keywords, $keywords_noresults),
);
// Most-used keywords with no results
$report_elements['keywords_noresults'] = array(
'name' => t('Top search phrases with no results'),
'value' => apachesolr_stats_report_frequent_keywords($keywords_noresults, $keywords_noresults, "error"),
);
// Total spellchecker suggestions
$report_elements['spellchecker'] = array(
'name' => t('Total spellchecker suggestions'),
'value' => $suggestions,
);
// Chart for sort usage
$leyends = array();
foreach ($sort_usage as $key => $value) {
$leyends[] = drupal_urlencode($key);
}
$chl = implode('|', $leyends);
$chd = implode(',', $sort_usage);
$chart = "<img src='http://chart.apis.google.com/chart?cht=p3&chd=t:{$chd}&chs=350x100&chl={$chl}' />";
$report_elements['sort_usage'] = array(
'name' => t('Sort usage'),
'value' => $chart,
);
// Chart for field usage
$report_elements['field_usage'] = array(
'name' => t('Facet usage'),
'value' => apachesolr_stats_facet_usage_graph($facets) . apachesolr_stats_facet_usage_table($facets),
);
return $report_elements;
}
/**
* Recieves an array of keyword => count and reports the top-used terms.
* @param $keywords
* array of keyword => count pairs
*/
function apachesolr_stats_report_frequent_keywords($keywords, $keywords_noresults, $class = "", $number = 25) {
if (empty($keywords)) {
return '';
}
arsort($keywords);
// Final elements are the most frequent, get $number elements off the array
$slice = array_slice($keywords, 0, $number);
// Calculate font size for display
$min = 1000000000.0;
$max = -1000000000.0;
$steps = 6;
$weighted_slice = array();
foreach ($slice as $word => $count) {
$min = min($min, $count);
$max = max($max, $count);
$weighted_slice[$word] = array(
'count' => $count,
'log_count' => log($count),
);
}
// Note: we need to ensure the range is slightly too large to make sure even
// the largest element is rounded down.
$range = max(0.01, $max - $min) * 1.0001;
// Add "weight"
foreach ($weighted_slice as $word => $data) {
$weighted_slice[$word]['weight'] = floor($steps * ($data['count'] - $min) / $range);
}
$items = array();
foreach ($weighted_slice as $word => $data) {
if (isset($keywords_noresults[$word])) {
$class = "error";
}
$font_size_pct = 80 + $data['weight'] * 12;
$items[] = l($word, "search/apachesolr_search/{$word}", array(
'attributes' => array(
'class' => $class,
'style' => 'font-size:' . sprintf("%d%%", $font_size_pct),
),
'absolute' => TRUE,
)) . " (" . $data["count"] . ")";
}
return implode(", ", $items);
}
/**
* Return a listing of keywords for the Popular Searches block.
*/
function apachesolr_stats_block_frequent_keywords($limit = 10) {
$cid = "apachesolr_stats_block_frequent_keywords";
$cached = cache_get($cid, 'cache_block');
if ($cached && $cached->data && $cached->expire > time()) {
return $cached->data;
}
$keywords = array();
// Return keywords only last week's logged queries.
$timestamp = time() - 3600 * 24 * 7;
$result = db_query_range("SELECT keywords FROM {apachesolr_stats} WHERE\n numfound > 0 AND timestamp > %d\n ORDER BY timestamp DESC", $timestamp, 0, 5000);
while ($record = db_fetch_object($result)) {
if (trim($record->keywords) != "") {
// Keep track of individual keywords used
$keys_filtered = drupal_strtolower(trim($record->keywords));
if (isset($keywords[$keys_filtered])) {
$keywords[$keys_filtered]++;
}
else {
$keywords[$keys_filtered] = 1;
}
}
}
// Sort by most frequent first.
arsort($keywords);
// Get first $limit items.
$keywords = array_slice($keywords, 0, $limit);
$links = array();
foreach ($keywords as $key => $frequency) {
$links[] = l($key, 'search/apachesolr_search/' . $key);
}
// Cache information for 10 minutes.
$expire = time() + 600;
cache_set($cid, $links, 'cache_block', $expire);
return $links;
}
/**
* Implementation of hook_block().
*/
function apachesolr_stats_block($op = 'list', $delta = 0) {
switch ($op) {
case 'list':
// Default block settings.
$blocks = array(
0 => array(
'info' => t('Apache Solr Statistics: popular searches'),
// Start out disabled
'status' => 0,
'region' => 'right',
// Should be cached equally across paths and roles.
'cache' => BLOCK_CACHE_GLOBAL,
),
);
return $blocks;
case 'view':
$links = apachesolr_stats_block_frequent_keywords();
if ($links) {
// Return a block array.
$block = array(
'subject' => t('Popular searches'),
'content' => theme('apachesolr_stats_block', $links),
);
return $block;
}
}
}
/**
* Implementation of hook_theme().
*/
function apachesolr_stats_theme() {
return array(
'apachesolr_stats_block' => array(
'arguments' => array(
'links',
),
),
);
}
/**
* Theme content for apachesolr_stats_block().
*
* @param array $links
* An array, each element is a link to launch a search for that word.
* @return
* HTML, the themed block content.
*/
function theme_apachesolr_stats_block($links) {
return theme('item_list', $links);
}
Functions
Name![]() |
Description |
---|---|
apachesolr_stats_admin | Build the settings form. |
apachesolr_stats_apachesolr_modify_query | Implementation of hook_apachesolr_modify_query(). |
apachesolr_stats_block | Implementation of hook_block(). |
apachesolr_stats_block_frequent_keywords | Return a listing of keywords for the Popular Searches block. |
apachesolr_stats_chart | Generate an IMG tag with the URL to generate a chart using Google Charts API. |
apachesolr_stats_cron | Implementation of hook_cron(). |
apachesolr_stats_determine_field_from_query | Determine the facet field from a word: im_vid_XX, kw or XXX (anything before ":", like tid, uid, etc) |
apachesolr_stats_encodedata | Encode data using Chart's simple encoding. See http://code.google.com/apis/chart/formats.html#simple |
apachesolr_stats_exit | Implementation of hook_exit(). |
apachesolr_stats_facet_usage_graph | Returns the <img> tag for a Google chart with the facet usage |
apachesolr_stats_facet_usage_table | Returns a themed table for facet usage |
apachesolr_stats_generate_report_elements | Generates report elements for the given granularity. |
apachesolr_stats_get_facets | Returns the facet array to report on. |
apachesolr_stats_get_granularities | Returns an array of preset granularities. |
apachesolr_stats_menu | Implementation of hook_menu(). |
apachesolr_stats_report | Callback for admin/reports/apachesolr/stats. |
apachesolr_stats_report_frequent_keywords | Recieves an array of keyword => count and reports the top-used terms. |
apachesolr_stats_report_gadget | Callback function that outputs an XML description for a Google Gadget and terminates PHP execution. |
apachesolr_stats_report_gadget_element | Callback function used by Gadget javascript to fetch a particular element. |
apachesolr_stats_theme | Implementation of hook_theme(). |
theme_apachesolr_stats_block | Theme content for apachesolr_stats_block(). |