You are here

apachesolr_stats.module in Apache Solr Statistics 6.3

Same filename and directory in other branches
  1. 6 apachesolr_stats.module
  2. 7 apachesolr_stats.module

Keeps and reports statistics about Apache Solr usage and performance.

File

apachesolr_stats.module
View source
<?php

/**
 * @file
 *   Keeps and reports statistics about Apache Solr usage and performance.
 */

/**
 * Implementation of hook_menu().
 */
function apachesolr_stats_menu() {
  return array(
    'admin/settings/apachesolr/stats' => array(
      'title' => 'Statistics',
      'description' => 'Apache Solr Statistics settings to measure usage and performance.',
      'page callback' => 'drupal_get_form',
      'page arguments' => array(
        'apachesolr_stats_admin',
      ),
      'access arguments' => array(
        'administer search',
      ),
      'type' => MENU_LOCAL_TASK,
    ),
    'admin/reports/apachesolr_stats' => array(
      'title' => 'Apache Solr Search Page Statistics',
      'description' => 'Report of Apache Solr Page usage and performance.',
      'page callback' => 'apachesolr_stats_report',
      'page arguments' => array(),
      'access arguments' => array(
        'access site reports',
      ),
      'type' => MENU_NORMAL_ITEM,
    ),
    'admin/reports/apachesolr_stats/%apachesolr_search_page' => array(
      'title' => 'Apache Solr Search Page Statistics',
      'page callback' => 'apachesolr_stats_report',
      'page arguments' => array(
        3,
      ),
      'access arguments' => array(
        'access site reports',
      ),
      'type' => MENU_NORMAL_ITEM,
    ),
  );
}

/**
 * Build the settings form.
 */
function apachesolr_stats_admin() {
  $form = array();
  $form['tip'] = array(
    '#type' => 'markup',
    '#value' => t('You can also visit the <a href="@report-url">reports page</a> and <a href="@blocks-url">block administration page</a> for the enabled search pages.', array(
      '@report-url' => url('admin/reports/apachesolr_stats'),
      '@blocks-url' => url('admin/build/block'),
    )),
  );
  $search_pages = apachesolr_search_load_all_search_pages();
  $options = array();
  foreach ($search_pages as $search_page) {
    $options[$search_page['page_id']] = $search_page['label'] . ' (' . $search_page['search_path'] . ')';
  }
  $form['apachesolr_stats_enabled'] = array(
    '#type' => 'checkboxes',
    '#title' => t('Enable search logging for these search pages'),
    '#default_value' => variable_get('apachesolr_stats_enabled', array()),
    '#options' => $options,
    '#description' => t('Log information about all queries launched via the Apache Solr Search Integration module. Disabling a page here will also disable the associated block.'),
  );
  $periods = drupal_map_assoc(array(
    3600,
    10800,
    21600,
    32400,
    43200,
    86400,
    172800,
    259200,
    604800,
    1209600,
    2419200,
    4838400,
    9676800,
  ), 'format_interval');
  $form['apachesolr_stats_flush_log_timer'] = array(
    '#type' => 'select',
    '#title' => t('Discard query logs older than'),
    '#default_value' => variable_get('apachesolr_stats_flush_log_timer', 259200),
    '#options' => $periods,
    '#description' => t('Older query log entries will be automatically discarded. (Requires a correctly configured <a href="@cron">cron maintenance task</a>.)', array(
      '@cron' => url('admin/reports/status'),
    )),
  );

  // Blacklist settings.
  $form['access'] = array(
    '#type' => 'fieldset',
    '#title' => t('Log blacklist'),
    '#description' => t('Note: Changing this does not alter existing logged queries.'),
  );
  $form['access']['apachesolr_stats_ignore_ip_list'] = array(
    '#type' => 'textarea',
    '#title' => t('IP addresses that will not be logged'),
    '#default_value' => variable_get('apachesolr_stats_ignore_ip_list', ''),
    '#description' => t('Enter IP addresses (e.g.: 192.168.1.2), one per line. You can match entire subnets using a partial IP address ending with a period (e.g.: 192.168.)'),
  );
  $form['access']['apachesolr_stats_ignore_role_list'] = array(
    '#type' => 'checkboxes',
    '#title' => t('User roles that will not be logged'),
    '#options' => user_roles(),
    '#default_value' => variable_get('apachesolr_stats_ignore_role_list', array()),
    '#description' => t('Check all roles which should not be logged.'),
  );
  return system_settings_form($form);
}

/**
 * Implementation of hook_apachesolr_query_alter().
 *
 * Adds debugQuery parameter to Solr call that returns processing time, etc.
 */
function apachesolr_stats_apachesolr_query_alter($query) {
  if (variable_get('apachesolr_stats_enabled', array())) {

    // Add the debug query argument.
    // See: http://wiki.apache.org/solr/CommonQueryParameters#head-f45a9396425956a4db8d6478ed6029adfb7b0858
    $query
      ->replaceParam('debugQuery', 'true');
  }
}

/**
 * Implementation of hook_exit().
 *
 * This is the spot where actual logging takes place.
 */
function apachesolr_stats_exit() {
  $enabled_pages = variable_get('apachesolr_stats_enabled', array());
  if (!$enabled_pages) {
    return;
  }

  // Apparently there can be cases where some modules aren't loaded.
  if (!function_exists('apachesolr_has_searched')) {
    return;
  }

  // Log only for current applicable search page.
  $search_page = apachesolr_stats_get_search_page_by_path($_GET['q']);

  // If no search page matches this path, return.
  if (!$search_page) {
    return;
  }

  // Continue if current search page is enabled.
  $page_id = $search_page['page_id'];
  if (!isset($enabled_pages[$page_id]) || isset($enabled_pages[$page_id]) && !$enabled_pages[$page_id]) {
    return;
  }

  // Continue if current page request has issued a search.
  $env_id = $search_page['env_id'];
  if (!apachesolr_has_searched($env_id)) {
    return;
  }

  // Ignore certain IPs
  $ignore_list = variable_get('apachesolr_stats_ignore_ip_list', '');
  if ($ignore_list) {
    $ips_to_ignore = preg_split('/[\\s]+/', $ignore_list);
    $request_ip_address = ip_address();
    foreach ($ips_to_ignore as $ip) {
      if ($ip != "" && strpos($request_ip_address, $ip) === 0) {
        return;
      }
    }
  }

  // Ignore certain roles
  global $user;
  $roles_ignore_list = variable_get('apachesolr_stats_ignore_role_list', array());
  $test = array_intersect(array_keys($user->roles), array_values($roles_ignore_list));
  if (count($test) > 0) {
    return;
  }
  $query = apachesolr_current_query($env_id);
  $response = apachesolr_static_response_cache($query
    ->getSearcher());
  $filters = apachesolr_stats_get_active_facets($query
    ->getSearcher());
  $solrsort = $query
    ->getSolrSort();
  $keywords = $query
    ->getParam('q');
  $params = $query
    ->getParams();
  $num_suggestions = 0;

  // If spellchecking is enabled for this search page...
  if ($search_page['settings']['apachesolr_search_spellcheck']) {

    // ... and Solr returned a suggestion...
    if (isset($response->spellcheck) && isset($response->spellcheck->suggestions) && $response->spellcheck->suggestions != NULL) {

      // .. log the spellchecker suggestion.
      $num_suggestions = 1;
    }
  }
  db_query("INSERT INTO {apachesolr_stats}\n    (timestamp, uid, sid, numfound, showed_suggestions, total_time, prepare_time, process_time, page, keywords, filters, sort, params, env_id, page_id)\n    VALUES\n    (%d, %d, '%s', %d, %d, %d, %d, %d, '%s', '%s','%s','%s','%s', '%s', '%s')", time(), $user->uid, session_id(), $response->response->numFound, $num_suggestions, $response->debug->timing->time, $response->debug->timing->prepare->time, $response->debug->timing->process->time, isset($_GET['page']) ? $_GET['page'] : '', $keywords, json_encode($filters), json_encode($solrsort), json_encode($params), $env_id, $search_page['page_id']);
  return;

  /*
  $times = array();
  $times['total']['total'] = $response->debug->timing->time;
  foreach (array('prepare', 'process') as $phase) {
    foreach($response->debug->timing->prepare as $key => $value) {
      if (is_object($value)) {
        $times[$phase][$key] = (int) $value->time;
      } else {
        $times[$phase]['total'] = (int) $value;
      }
    }
  }
  dsm($times);
  return;
  */
}

/**
 * Return a search page by the base path, e.g. 'search/site'
 *
 * @param string $path
 * @return bool|array
 *   Returns FALSE if no page found, or the search page as returned by
 *   apachesolr_search_page_load($page_id);
 */
function apachesolr_stats_get_search_page_by_path($path) {
  $search_pages = apachesolr_search_load_all_search_pages();
  foreach ($search_pages as $search_page) {
    if (strpos($path, $search_page['search_path']) === 0) {
      return $search_page;
    }
  }
  return FALSE;
}

/**
 * Gets the searcher's active facets from Facet API.
 *
 * @param $searcher
 *   The machine name of the searcher.
 *
 * @return array
 *   An array keyed by facet name to value.
 */
function apachesolr_stats_get_active_facets($searcher) {
  $filters = array();
  if (function_exists('facetapi_adapter_load')) {
    if ($adapter = facetapi_adapter_load($searcher)) {
      $active = $adapter
        ->getAllActiveItems();
      foreach ($active as $filter) {
        foreach ($filter['facets'] as $facet_name) {
          $filters[$facet_name] = $filter['value'];
        }
      }
    }
  }
  return $filters;
}

/**
 * Callback for admin/reports/apachesolr/stats.
 * @param array $search_page
 * @param string $picked_granularity
 * @internal param string $granularity Granularity to use for report.*   Granularity to use for report.
 * @return string
 *   The page output as HTML.
 * @see apachesolr_stats_menu()
 */
function apachesolr_stats_report($search_page = NULL, $picked_granularity = "minute") {
  $enabled_pages = variable_get('apachesolr_stats_enabled', array());

  // If not given a search_page argument, show a listing.
  if (empty($search_page)) {
    $search_pages = apachesolr_search_load_all_search_pages();
    $items = array();
    foreach ($search_pages as $search_page) {
      $items[] = l($search_page['label'], 'admin/reports/apachesolr_stats/' . $search_page['page_id']);
    }
    $output = t('Pick a search page to view the report:');
    $output .= theme('item_list', $items);
    return $output;
  }
  drupal_set_title(t("Apache Solr statistics report: @label", array(
    '@label' => $search_page['label'],
  )));
  $disabled = true;
  if (isset($enabled_pages[$search_page['page_id']]) && $enabled_pages[$search_page['page_id']]) {
    $disabled = false;
  }
  if ($disabled) {
    drupal_set_message('Logging is disabled for this page in the !link. Enable it to log Apache Solr queries.', array(
      '!link' => l('configuration page', 'admin/settings/apachesolr/stats'),
    ));
  }
  $granularities = apachesolr_stats_get_granularities();

  // Decide what granularity to use: minute, hour or day
  // Check if given argument exists; if not, reset to "hour"
  if (!isset($granularities[$picked_granularity])) {
    $picked_granularity = "minute";
  }
  $granularity = $granularities[$picked_granularity];

  // Process latest log entries
  $report_elements = apachesolr_stats_generate_report_elements($search_page['page_id'], $granularity);

  // Create the output HTML:::::::::::::::::::::::::::::::::::::
  // Granularity picker:
  // Leave only those less than apachesolr_stats_flush_log_timer
  $timer_max = variable_get('apachesolr_stats_flush_log_timer', 259200);
  $output = "<div class='granularity'>" . t('Choose the report time span:');
  foreach ($granularities as $name => $granularity) {
    if ($name != "all" && $granularity['time_before'] > $timer_max) {
      continue;
    }
    $output .= " &nbsp; ";
    if ($name != $picked_granularity) {
      $output .= l($granularity["last_msg"], "admin/reports/apachesolr_stats/" . $search_page['page_id'] . '/' . $name);
    }
    else {
      $output .= "<strong>" . $granularity["last_msg"] . "</strong>";
    }
  }
  $output .= "</div><hr>";
  if ($report_elements) {

    // Report description
    $output .= t('This is an overview of Apache Solr usage and performance.');
    $output .= ' ' . t('You can also visit the <a href="@settings-url">settings page</a>.', array(
      '@settings-url' => url('admin/settings/apachesolr/stats'),
    ));

    // Render report elements
    foreach ($report_elements as $id => $data) {

      // Table data
      $rows[] = array(
        "data" => array(
          array(
            'data' => $data['name'],
            'header' => true,
            'style' => 'width:33%',
          ),
          array(
            'data' => $data['value'],
          ),
        ),
      );
    }
    $output .= theme('table', array(), $rows);
  }
  else {
    drupal_set_message(t('There is not enough stored data to build a report for the current time span.'));
  }
  return $output;
}

/**
 * Generate an IMG tag with the URL to generate a chart using Google Charts API.
 *
 * @param string $granularity
 *    The granularity to use.
 * @param array $data
 *    The array of data to chart.
 * @param integer $start_timeslot
 *    The index of the first data element to chart.
 * @param integer $last_timeslot
 *    The index of the first data element to chart.
 * @param integer $total_queries
 *    Integer with the total number of queries included in this chart.
 * @param bool|float $average
 *    Boolean flag: show an average value in the chart.
 * @return string
 */
function apachesolr_stats_chart($granularity, $data, $start_timeslot, $last_timeslot, $total_queries, $average = FALSE) {

  // Sample: http://chart.apis.google.com/chart?cht=lc&chs=350x100&chdlp=b&chma=10,10,10,10&chd=s:[encoded chart data]
  $chart_prefix = 'http://chart.apis.google.com/chart?cht=lc&chs=350x100';
  $chart_prefix .= '&chdlp=b&chma=30,100,20,20&chd=s:';
  $chd = array();
  $chd_min = 9999999;
  $chd_max = 0;
  $total = 0;
  for ($t = $start_timeslot; $t <= $last_timeslot; $t++) {
    $num = isset($data[$t]) ? intval($data[$t]) : 0;
    $chd_min = $chd_min > $num ? $num : $chd_min;
    $chd_max = $chd_max < $num ? $num : $chd_max;
    $chd[] = $num;
  }

  // Add missing data for time between last timeslot and current time.
  $current_timeslot = intval(time() / $granularity['timespan']);
  for ($t = $last_timeslot + 1; $t <= $current_timeslot; $t++) {
    $chd_min = $chd_min > 0 ? 0 : $chd_min;

    // Insert at beginning.
    array_unshift($chd, 0);
  }

  // Fix min if min and max are the same.
  if ($chd_min == $chd_max) {
    $chd_min = 0;
  }

  // Since we read the log from the database newest-first, reverse the data for rendering.
  $chd = array_reverse($chd);

  // Generate basic image URL
  $image_url = $chart_prefix . apachesolr_stats_encodedata($chd, $chd_min, $chd_max);

  // Add labels
  $chxl = "";
  if ($chd_max > 0) {

    // Add y-axis labels.
    $chxl .= "0:|" . intval($chd_min) . "|" . intval($chd_max);

    // Show average value in a label on right-hand side.
    if ($average !== FALSE) {
      $image_url .= "&chxp=1," . intval($average / $chd_max * 100);
      $chxl .= sprintf("|1:|%s=%.2f", t('average'), $average);
      $chxt = "y,r";
    }
    else {
      $chxt = "y";
    }

    // Add time/date labels
    $earliest_timestamp = $start_timeslot * $granularity['timespan'];
    $last_timestamp = $current_timeslot * $granularity['timespan'];
    $mid_timestamp = ($last_timestamp + $earliest_timestamp) / 2;
    $time_msg_1 = drupal_urlencode(strftime($granularity['format'], $earliest_timestamp));
    $time_msg_2 = drupal_urlencode(strftime($granularity['format'], $mid_timestamp));
    $time_msg_3 = drupal_urlencode(strftime($granularity['format'], $last_timestamp));
    if ($chxt) {
      $chxt = "{$chxt},x";
      $chxl .= "|2:|{$time_msg_1}|{$time_msg_2}|{$time_msg_3}";
    }
    else {
      $chxt = "x";
      $chxl .= "|1:|{$time_msg_1}|{$time_msg_2}|{$time_msg_3}";
    }
    $image_url .= "&chxl={$chxl}&chxt={$chxt}";
  }

  // Return the image tag
  return "<img src='{$image_url}' />";
}

/**
 * Encode data using Chart's simple encoding.
 * See http://code.google.com/apis/chart/formats.html#simple
 *
 * @param array $chd
 *   an array of integer values to encode.
 * @param integer $chd_min
 *   an integer with the smallest value to encode.
 * @param integer $chd_max
 *   an integer with the greatest value to encode.
 * @return string
 *   a string representing the Google Charts API simple encoding of the data.
 */
function apachesolr_stats_encodedata($chd, $chd_min, $chd_max) {
  $encoder_string = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789';
  $encoded_values = '';
  if (is_array($chd)) {
    foreach ($chd as $value) {
      $encoded_values .= substr($encoder_string, ($value - $chd_min) / $chd_max * 61, 1);
    }
  }

  // Google does not like single-valued line charts; fix that.
  if (strlen($encoded_values) == 1) {
    $encoded_values = "99";
  }
  return $encoded_values;
}

/**
 * Implementation of hook_cron().
 *
 * Remove expired log messages.
 */
function apachesolr_stats_cron() {
  db_query('DELETE FROM {apachesolr_stats} WHERE timestamp < %d', time() - variable_get('apachesolr_stats_flush_log_timer', 259200));
}

/**
 * Returns a themed table for facet usage
 * @param array $facets
 *   An array of calculated data to report.
 * @return string
 *   HTML for a themed table containing the report data.
 */
function apachesolr_stats_facet_usage_table($facets) {

  // Report usage in table
  $header = array(
    array(
      'data' => t('Facet ID'),
      'Xfield' => 'id',
      'sort' => 'asc',
    ),
    array(
      'data' => t('Facet info'),
      'Xfield' => 'info',
      'sort' => '',
    ),
    array(
      'data' => t('Queries containing this facet'),
      'Xfield' => 'queries',
      'sort' => '',
    ),
    array(
      'data' => t('% of queries containing this facet'),
      'Xfield' => 'queries',
      'sort' => '',
    ),
  );
  foreach ($facets as $fieldname => $facet) {
    $rows[$fieldname][] = $fieldname;
    $rows[$fieldname][] = $facet['info'];
    $rows[$fieldname][] = $facet['usage'];
    $rows[$fieldname][] = sprintf("%2.1f%%", $facet['usage'] / $facets['any']['usage'] * 100);

    #$rows[$fieldname][] = $facet['clickthru'];

    #if ($facet['usage']>0) {

    #  $rows[$fieldname][] = sprintf("%2.1f%%", ($facet['clickthru'] / $facet['usage'])*100);

    #}
  }
  $output = theme('table', $header, $rows, array(
    'style' => 'font-size:80%',
  ));
  return $output;
}

/**
 * Returns the <img> tag for a Google chart with the facet usage
 *
 * @param array $facets
 *   An array of calculated data to report.
 * @return string
 *   HTML for an IMG tag to a Google chart.
 */
function apachesolr_stats_facet_usage_graph($facets) {

  // Chart for field usage
  $leyends = array();
  $data = array();
  $label_cutoff = 40;
  $total_queries = $facets['any']['usage'];
  foreach ($facets as $fieldname => $facet) {
    $leyend = preg_replace("/^.*ilter by /", "", $facet['info']);
    if (strlen($leyend) > $label_cutoff) {
      $leyend = substr($leyend, 0, $label_cutoff) . "...";
    }
    $leyends[] = drupal_urlencode($leyend);
    $data[] = $facet['usage'] / $total_queries * 100;
  }
  $chd = 's:' . apachesolr_stats_encodedata($data, 0, 100);

  // array_reverse() in next line due to apachesolr_stats_encodedata() encoding data backwards
  $chl = implode('|', array_reverse($leyends));
  $height = 30 + sizeof($leyends) * 28;

  // Percentage labels
  $chm = "N*f0*%,000000,0,-1,11";
  $chart = "<img src='http://chart.apis.google.com/chart?chxt=y&cht=bhs&chma=20,20,20,20&chd={$chd}&chs=350x{$height}&chds=0,100&chxl=0:|{$chl}&chm={$chm}' />";
  return $chart;
}

/**
 * Returns the facet array to report on.
 */
function apachesolr_stats_get_facets() {
  $facets = array();
  if (module_exists('facetapi')) {
    $environments = apachesolr_load_all_environments();
    foreach ($environments as $env_id => $environment) {
      $searcher = 'apachesolr@' . $env_id;
      foreach (facetapi_get_enabled_facets($searcher) as $facet_name => $facet) {
        $facets[$facet_name] = array(
          'facet_field' => $facet['field'],
          'info' => $facet['label'],
          'usage' => 0,
        );
      }
    }
  }

  // Add some "virtual" facets to report on.
  $facets['kw'] = array(
    'facet_field' => 'kw',
    'info' => 'Keyword search',
    'usage' => 0,
  );
  $facets['any'] = array(
    'facet_field' => 'any',
    'info' => '[All queries including any filter and/or keywords]',
    'usage' => 0,
  );

  /*
  $facets['none'] = array(
    'facet_field' => 'none',
    'info' => '[Clickthrus with no previous queries]',
  );
  */
  return $facets;
}

/**
 * Returns an array of preset granularities.
 * @return array
 *   an array of preset granularities for reports.
 */
function apachesolr_stats_get_granularities() {
  $granularities = array(
    'minute' => array(
      'name' => t('minute'),
      'timespan' => 60,
      'time_before' => 60 * 60 * 24,
      // One day before
      'last_msg' => t('last day'),
      'format' => '%H:%M',
    ),
    'hour' => array(
      'name' => t('hour'),
      'timespan' => 60 * 60,
      'time_before' => 60 * 60 * 24 * 7,
      // One week before
      'last_msg' => t('last week'),
      'format' => '%m/%d %H:%M',
    ),
    'day' => array(
      'name' => t('day'),
      'timespan' => 60 * 60 * 24,
      'time_before' => 60 * 60 * 24 * 2 * 16,
      // 4 weeks before
      'last_msg' => t('last month'),
      'format' => '%m/%d',
    ),
    'all' => array(
      'name' => t('day'),
      'timespan' => 60 * 60 * 24,
      'time_before' => 60 * 60 * 24 * 7 * 16,
      // 16 weeks before
      'last_msg' => t('all time (depends on settings)'),
      'format' => '%m/%d',
    ),
  );
  return $granularities;
}

/**
 * Generates report elements for the given granularity.
 *
 * @param string $granularity
 *  Timespan to aggregate report by. Possible values: 'minute', 'hour' or 'day'
 * @return array
 *  An indexed array with the report elements; each element is an array with
 *  the indexes:
 *    'name'  => human-readable name of the element, e.g. "Total queries"
 *    'value' => html with the result. Can be an image, a number, etc.
 */
function apachesolr_stats_generate_report_elements($page_id, $granularity) {

  // Initialize
  $facets = apachesolr_stats_get_facets();
  $suggestions = 0;
  $users = array();
  $sessions = array();
  $total_requests = 0;
  $last_timeslot = 0;
  $first_timestamp = 0;
  $no_keywords = 0;
  $total_queries = 0;
  $time['max'] = -1;
  $time['min'] = 9999.999;
  $report_elements = array();
  $keywords = array();
  $keywords_noresults = array();
  $simultaneous_fields = array();
  $count_per_granularity = array();
  $data_per_granularity = array();
  $sort_usage = array();

  // Scan the logfile and build statistics arrays
  $result = db_query("SELECT * FROM {apachesolr_stats} WHERE timestamp > %d AND page_id = '%s' ORDER BY timestamp DESC", time() - $granularity['time_before'], $page_id);
  while ($record = db_fetch_object($result)) {
    $timeslot = intval($record->timestamp / $granularity['timespan']);
    if ($last_timeslot == 0) {
      $last_timeslot = $timeslot;
    }
    @$users[$record->uid]++;
    @$sessions[$record->sid]++;

    // Tally suggestions
    if ($record->showed_suggestions) {
      $suggestions++;
    }
    @$total_requests++;
    @($time['total'] += $record->total_time);

    // $time['prepare'] += $record->prepare_time;
    // $time['process'] += $record->process_time;
    // Track max and min response times
    $time['max'] = $time['max'] < $record->total_time ? $record->total_time : $time['max'];
    $time['min'] = $time['min'] > $record->total_time ? $record->total_time : $time['min'];

    // Field usage; only when on first results page (meaning it's a fresh search)
    if ($record->page == "") {
      $facet_processed_flag = array();
      $filters = json_decode($record->filters);
      foreach ($filters as $facet_name => $facet_value) {
        if (isset($facets[$facet_name]) && !isset($facet_processed_flag[$facet_name])) {

          // Add 1 to usage of facet.
          $facets[$facet_name]['usage']++;

          // Mark so we don't count it again for this query.
          $facet_processed_flag[$facet_name] = TRUE;
        }
      }
      if (trim($record->keywords) != "") {
        if (!isset($facet_processed_flag['kw']) || $facet_processed_flag['kw'] != true) {
          $facets['kw']['usage']++;
          $facet_processed_flag['kw'] = true;

          // Keep track of individual keywords used
          $keys_filtered = drupal_strtolower(trim($record->keywords));
          @$keywords[$keys_filtered]++;

          // Count keywords with zero results; but only when no filters issued.
          if ($record->numfound == 0 && !$filters) {
            @$keywords_noresults[$keys_filtered]++;
          }
        }
      }
      else {
        $no_keywords++;
      }

      // Count each unique query
      $facets["any"]['usage']++;

      // Keep track of how many fields were active per query
      @$simultaneous_fields[sizeof($facet_processed_flag)]++;
      $total_queries++;
    }

    // Sort usage; count only the first page of results
    if ($record->page == "") {
      $sort = (array) json_decode($record->sort);
      if ($sort) {
        @$sort_usage[$sort['#name']]++;
      }
    }

    // Group some stats into timeslots (minutes, hours) to show trends
    if (empty($user_slot[$record->uid][$timeslot])) {
      @$data_per_granularity['users_per_slot'][$timeslot]++;
      $user_slot[$record->uid][$timeslot] = TRUE;
    }
    if (empty($session_slot[$record->sid][$timeslot])) {
      @$data_per_granularity['sessions_per_slot'][$timeslot]++;
      $session_slot[$record->sid][$timeslot] = TRUE;
    }
    @$data_per_granularity['queries'][$timeslot]++;
    @$count_per_granularity[$timeslot]++;
    @($data_per_granularity['total_time'][$timeslot] += $record->total_time);
    $first_timestamp = $record->timestamp;
  }
  if (sizeof($sessions) == 0 || sizeof($users) == 0 || $total_queries == 0) {
    return array();
  }
  $start_timeslot = $timeslot;
  $earliest_timestamp = $start_timeslot * $granularity['timespan'];
  $report_elements['span'] = array(
    'name' => t('Report span'),
    'value' => t('Last @interval (@startdate to @enddate)', array(
      '@interval' => format_interval(3600 + time() - $first_timestamp),
      '@startdate' => format_date($first_timestamp),
      '@enddate' => format_date(time()),
    )) . '<br />' . t('Data points in charts are one point per @granularity.', array(
      '@granularity' => $granularity['name'],
    )),
  );

  #$report_elements['queries'] = array('name' => t('Total requests to Solr'), 'value' => $total_queries);

  // Chart for queries per timeslot
  $chart = apachesolr_stats_chart($granularity, $data_per_granularity['queries'], $start_timeslot, $last_timeslot, $total_queries, $total_queries / ($last_timeslot - $start_timeslot + 1));
  $report_elements['total_queries_per'] = array(
    'name' => t('Requests'),
    'value' => t('Total: @total', array(
      '@total' => $total_queries,
    )) . '<br />' . $chart,
  );

  // Chart for sessions per timeslot
  $chart = apachesolr_stats_chart($granularity, $data_per_granularity['sessions_per_slot'], $start_timeslot, $last_timeslot, sizeof($sessions), sizeof($sessions) / ($last_timeslot - $start_timeslot + 1));
  $report_elements['total_sessions_per'] = array(
    'name' => t('Unique sessions'),
    'value' => t('Total: @total', array(
      '@total' => sizeof($sessions),
    )) . '<br />' . $chart,
  );
  $report_elements['avg_queries_session'] = array(
    'name' => t('Average requests per session'),
    'value' => sprintf("%.1f", $total_queries / sizeof($sessions)),
  );

  // Chart for average time per timeslot
  $data = array();
  foreach ($data_per_granularity['total_time'] as $timeslot => $value) {
    $data[$timeslot] = $value / $count_per_granularity[$timeslot];
  }

  // Call with average_empty = FALSE
  $chart = apachesolr_stats_chart($granularity, $data, $start_timeslot, $last_timeslot, $total_queries, $time['total'] / $total_queries);
  $report_elements['query_avg_time'] = array(
    'name' => t('Average time per request (miliseconds)'),
    'value' => sprintf("%s: %.2f ms / %s: %.2f ms / %s: %.2f ms", t('Minimum'), $time['min'], t('Average'), $time['total'] / $total_queries, t('Maximum'), $time['max']) . '</br>' . $chart,
  );

  // Most-used keywords
  $report_elements['keywords'] = array(
    'name' => t('Top search phrases'),
    'value' => apachesolr_stats_report_frequent_keywords($page_id, $keywords, $keywords_noresults),
  );

  // Most-used keywords with no results
  $report_elements['keywords_noresults'] = array(
    'name' => t('Top search phrases with no results'),
    'value' => apachesolr_stats_report_frequent_keywords($page_id, $keywords_noresults, $keywords_noresults, "error"),
  );

  // Total spellchecker suggestions
  $report_elements['spellchecker'] = array(
    'name' => t('Total spellchecker suggestions'),
    'value' => $suggestions,
  );

  // Chart for sort usage
  $leyends = array();
  foreach ($sort_usage as $key => $value) {
    $leyends[] = drupal_urlencode($key);
  }
  $chl = implode('|', $leyends);
  $chd = implode(',', $sort_usage);
  $chart = "<img src='http://chart.apis.google.com/chart?cht=p3&chd=t:{$chd}&chs=350x100&chl={$chl}' />";
  $report_elements['sort_usage'] = array(
    'name' => t('Sort usage'),
    'value' => $chart,
  );

  // Chart for field usage
  $report_elements['field_usage'] = array(
    'name' => t('Facet usage'),
    'value' => apachesolr_stats_facet_usage_graph($facets) . apachesolr_stats_facet_usage_table($facets),
  );
  return $report_elements;
}

/**
 * Recieves an array of keyword => count and reports the top-used terms.
 * @param string $page_id
 *   ID of the search page.
 * @param array $keywords
 *   array of keyword => count pairs.
 * @param array $keywords_noresults
 *   array containing keywords that returned 0 results.
 * @param string $class
 *   CSS class for each link.
 * @param int $number
 *   Number of terms to show.
 * @return string
 */
function apachesolr_stats_report_frequent_keywords($page_id, $keywords, $keywords_noresults, $class = '', $number = 25) {
  $search_page = apachesolr_search_page_load($page_id);
  $search_path = $search_page['search_path'];
  if (empty($keywords)) {
    return '';
  }
  arsort($keywords);

  // Final elements are the most frequent, get $number elements off the array
  $slice = array_slice($keywords, 0, $number);

  // Calculate font size for display
  $min = 1000000000.0;
  $max = -1000000000.0;
  $steps = 6;
  $weighted_slice = array();
  foreach ($slice as $word => $count) {
    $min = min($min, $count);
    $max = max($max, $count);
    $weighted_slice[$word] = array(
      'count' => $count,
      'log_count' => log($count),
    );
  }

  // Note: we need to ensure the range is slightly too large to make sure even
  // the largest element is rounded down.
  $range = max(0.01, $max - $min) * 1.0001;

  // Add "weight"
  foreach ($weighted_slice as $word => $data) {
    $weighted_slice[$word]['weight'] = floor($steps * ($data['count'] - $min) / $range);
  }
  $items = array();
  foreach ($weighted_slice as $word => $data) {
    if (isset($keywords_noresults[$word])) {
      $class = "error";
    }
    $font_size_pct = 80 + $data['weight'] * 12;
    $items[] = l($word, $search_path . '/' . $word, array(
      'attributes' => array(
        'class' => $class,
        'style' => 'font-size:' . sprintf("%d%%", $font_size_pct),
      ),
      'absolute' => TRUE,
    )) . " (" . $data["count"] . ")";
  }
  return implode(", ", $items);
}

/**
 * Return a listing of keywords for the Popular Searches block.
 */
function apachesolr_stats_block_frequent_keywords($page_id, $limit = 10) {
  $search_page = apachesolr_search_page_load($page_id);
  $search_path = $search_page['search_path'];
  $cid = "apachesolr_stats_block_frequent_keywords_{$page_id}";
  $cached = cache_get($cid, 'cache_block');
  if ($cached && $cached->data && $cached->expire > time()) {
    return $cached->data;
  }
  $keywords = array();

  // Return keywords only last week's logged queries.
  $timestamp = time() - 3600 * 24 * 7;
  $result = db_query_range("SELECT keywords FROM {apachesolr_stats} WHERE\n      numfound > 0 AND timestamp > %d AND page_id = '%s'\n      ORDER BY timestamp DESC", $timestamp, $page_id, 0, 5000);
  while ($record = db_fetch_object($result)) {
    if (trim($record->keywords) != "") {

      // Keep track of individual keywords used
      $keys_filtered = drupal_strtolower(trim($record->keywords));
      if (isset($keywords[$keys_filtered])) {
        $keywords[$keys_filtered]++;
      }
      else {
        $keywords[$keys_filtered] = 1;
      }
    }
  }

  // Sort by most frequent first.
  arsort($keywords);

  // Get first $limit items.
  $keywords = array_slice($keywords, 0, $limit);
  $links = array();
  foreach ($keywords as $key => $frequency) {
    $links[] = l($key, $search_path . '/' . $key);
  }

  // Cache information for 10 minutes.
  $expire = time() + 600;
  cache_set($cid, $links, 'cache_block', $expire);
  return $links;
}

/**
 * Implementation of hook_block().
 */
function apachesolr_stats_block($op = 'list', $delta = 0) {
  $search_pages = apachesolr_search_load_all_search_pages();
  $enabled_search_pages = variable_get('apachesolr_stats_enabled', array());
  $blocks = array();
  switch ($op) {
    case 'list':

      // Default block settings.
      foreach ($enabled_search_pages as $page_id) {
        if ($page_id) {
          $blocks[$page_id] = array(
            'info' => t('Apache Solr Statistics: popular searches for @page_id page', array(
              '@page_id' => $search_pages[$page_id]['label'],
            )),
            // Start out disabled
            'status' => 0,
            'region' => 'right',
            // Should be cached equally across paths and roles.
            'cache' => BLOCK_CACHE_GLOBAL,
          );
        }
      }
      return $blocks;
    case 'view':
      if (!$enabled_search_pages[$delta]) {
        return array(
          'subject' => '',
          'content' => '',
        );
      }
      $links = apachesolr_stats_block_frequent_keywords($delta);
      if ($links) {

        // Return a block array.
        $block = array(
          'subject' => t('Popular searches for @page_name', array(
            '@page_name' => $search_pages[$delta]['label'],
          )),
          'content' => theme('apachesolr_stats_block', $links),
        );
        return $block;
      }
  }
}

/**
 * Implementation of hook_theme().
 */
function apachesolr_stats_theme() {
  return array(
    'apachesolr_stats_block' => array(
      'arguments' => array(
        'links',
      ),
    ),
  );
}

/**
* Theme content for apachesolr_stats_block().
*
* @param array $links
*   An array, each element is a link to launch a search for that word.
* @return string
    HTML, the themed block content.
*/
function theme_apachesolr_stats_block($links) {
  return theme('item_list', $links);
}

Functions

Namesort descending Description
apachesolr_stats_admin Build the settings form.
apachesolr_stats_apachesolr_query_alter Implementation of hook_apachesolr_query_alter().
apachesolr_stats_block Implementation of hook_block().
apachesolr_stats_block_frequent_keywords Return a listing of keywords for the Popular Searches block.
apachesolr_stats_chart Generate an IMG tag with the URL to generate a chart using Google Charts API.
apachesolr_stats_cron Implementation of hook_cron().
apachesolr_stats_encodedata Encode data using Chart's simple encoding. See http://code.google.com/apis/chart/formats.html#simple
apachesolr_stats_exit Implementation of hook_exit().
apachesolr_stats_facet_usage_graph Returns the <img> tag for a Google chart with the facet usage
apachesolr_stats_facet_usage_table Returns a themed table for facet usage
apachesolr_stats_generate_report_elements Generates report elements for the given granularity.
apachesolr_stats_get_active_facets Gets the searcher's active facets from Facet API.
apachesolr_stats_get_facets Returns the facet array to report on.
apachesolr_stats_get_granularities Returns an array of preset granularities.
apachesolr_stats_get_search_page_by_path Return a search page by the base path, e.g. 'search/site'
apachesolr_stats_menu Implementation of hook_menu().
apachesolr_stats_report Callback for admin/reports/apachesolr/stats.
apachesolr_stats_report_frequent_keywords Recieves an array of keyword => count and reports the top-used terms.
apachesolr_stats_theme Implementation of hook_theme().
theme_apachesolr_stats_block Theme content for apachesolr_stats_block().