You are here

acquia_search.module in Acquia Search 6.3

Integration between Acquia Drupal and Acquia's hosted solr search service.

File

acquia_search.module
View source
<?php

/**
 * @file
 *   Integration between Acquia Drupal and Acquia's hosted solr search service.
 */
define('ACQUIA_SEARCH_ENVIRONMENT_ID', 'acquia_search_server_1');
if (!defined('REQUEST_TIME')) {
  define('REQUEST_TIME', (int) $_SERVER['REQUEST_TIME']);
}

/**
 * Predefined Acquia Search network environment
 */
function acquia_search_get_environment($conf = array()) {
  if (!empty($conf['acquia_subscription_id']) && !empty($conf['acquia_subscription_key'])) {
    $identifier = $conf['acquia_subscription_id'];
    $key = $conf['acquia_subscription_key'];
    $subscription = acquia_agent_get_subscription($params = array(), $identifier, $key);
  }
  else {
    $identifier = acquia_agent_settings('acquia_identifier');
    $subscription = acquia_agent_settings('acquia_subscription_data');
  }

  // Adding the subscription specific colony to the heartbeat data
  if (!empty($subscription['heartbeat_data']['search_service_colony'])) {
    $search_base_url = 'http://' . $subscription['heartbeat_data']['search_service_colony'];
  }
  else {
    $search_base_url = variable_get('acquia_search_base_url', 'http://search.acquia.com');
  }
  $environment = array(
    // @todo - server URL may depend on region info.
    'url' => $search_base_url . variable_get('acquia_search_path', '/solr/' . $identifier),
    'service_class' => 'AcquiaSearchService',
    'conf' => array(
      'service_class_info' => array(
        'file' => 'Acquia_Search_Service',
        'module' => 'acquia_search',
        'class' => 'AcquiaSearchService',
      ),
    ),
  );
  return $environment;
}

/**
 * Implementation of hook_enable().
 */
function acquia_search_enable() {

  // Send a heartbeat so the Acquia Network knows the module is enabled.
  // This causes an invocation of hook_acquia_subscription_status() which is
  // implemented in this module to set up the environment.
  _acquia_search_set_version();
  acquia_agent_check_subscription();
}

/**
 * Implementation of hook_disable().
 *
 * Helper function to clear variables we may have set.
 */
function acquia_search_disable() {

  // Remove the environment we added.
  apachesolr_environment_delete(ACQUIA_SEARCH_ENVIRONMENT_ID);

  // Unset all other acquia search environments
  $environments = apachesolr_load_all_environments($reset = TRUE);
  foreach ($environments as $environment) {
    if (acquia_search_environment_connected($environment)) {

      // remove traces of acquia_search
      // unset our acquia url and set it back to default
      $environment['url'] = 'http://localhost:8983/solr';
      if (apachesolr_default_environment() == $environment['env_id']) {

        // Go back to the default.
        variable_del('apachesolr_default_environment');
      }

      // Emptying the service class, unsetting it would not work, since it would
      // not overwrite the old value
      $environment['service_class'] = '';
      if (is_array($environment['conf'])) {
        $environment['conf']['service_class'] = '';
      }
      apachesolr_environment_save($environment);
    }
  }
  variable_del('acquia_search_derived_key_salt');
}

/**
 * Implementation of hook_help().
 */
function acquia_search_help($path, $arg) {
  switch ($path) {
    case 'admin/settings/apachesolr':
      $env_id = $arg[4] ? $arg[4] : apachesolr_default_environment();
      $environment = apachesolr_environment_load($env_id);
      if (acquia_search_environment_connected($environment) && acquia_agent_subscription_is_active()) {
        $as_link = l(t('Acquia Search'), 'http://www.acquia.com/products-services/acquia-search');
        return t("Search is being provided by the !as network service.", array(
          '!as' => $as_link,
        ));
      }
      break;
  }
}

/**
 * Create a new record pointing to the Acquia apachesolr search server and set it as the default
 */
function acquia_search_enable_acquia_solr_environment() {

  // Creates the new environment
  $environment = apachesolr_environment_load(ACQUIA_SEARCH_ENVIRONMENT_ID, $reset = TRUE);
  if (!$environment) {

    // Only set the default if we just created the environment.
    // This will almost always happen, unless the module was disabled via SQL.
    variable_set('apachesolr_default_environment', ACQUIA_SEARCH_ENVIRONMENT_ID);
    $environment['conf'] = array();

    // Also make sure that the default search page has Acquia Search as its
    // default environment
    $default_search_page_id = apachesolr_search_default_search_page();
    $default_search_page = apachesolr_search_page_load($default_search_page_id);
    if (!empty($default_search_page) && $default_search_page['env_id'] != ACQUIA_SEARCH_ENVIRONMENT_ID) {
      $default_search_page['env_id'] = ACQUIA_SEARCH_ENVIRONMENT_ID;
      apachesolr_search_page_save($default_search_page);
    }
  }
  $acquia_environment = acquia_search_get_environment();

  // Manual merge because we have 1 level deeper (conf) and we would override
  // the conf variable if we did an automatic merge
  $environment['url'] = $acquia_environment['url'];
  $environment['service_class'] = $acquia_environment['service_class'];
  $environment['conf']['service_class_info'] = $acquia_environment['conf']['service_class_info'];

  //$environment = array_merge($acquia_environment, $environment);
  $environment['env_id'] = ACQUIA_SEARCH_ENVIRONMENT_ID;
  $environment['name'] = t('Acquia Search');
  apachesolr_environment_save($environment);
}

/**
 * Implementation of hook_menu_alter().
 */
function acquia_search_menu_alter(&$menu) {
  $delete_page = 'admin/settings/apachesolr/settings/%apachesolr_environment/delete';
  if (isset($menu[$delete_page])) {
    $menu[$delete_page]['access callback'] = 'acquia_search_environment_delete_access';
    $menu[$delete_page]['access arguments'] = array(
      4,
    );
  }
}

/**
 * Helper function to cache the Acquia Search version.
 */
function _acquia_search_set_version() {

  // Cache the version in a variable so we can send it at no extra cost.
  $version = variable_get('acquia_search_version', '6.x-3.x');
  $path = drupal_get_path('module', 'acquia_search') . '/acquia_search.info';
  $info = drupal_parse_info_file($path);

  // Send the version, or at least the core compatibility as a fallback.
  $new_version = isset($info['version']) ? (string) $info['version'] : '6.x-3.x';
  if ($version != $new_version) {
    variable_set('acquia_search_version', $new_version);
  }
}

/**
 * Tests whether the environment is connected to Acquia Search.
 */
function acquia_search_environment_connected($environment) {
  if ($environment['service_class'] == 'AcquiaSearchService') {
    return TRUE;
  }
  $acquia_search_key = apachesolr_environment_variable_get($environment['env_id'], 'acquia_search_key');
  if (!empty($acquia_search_key)) {
    return TRUE;
  }
  return FALSE;
}

/**
 * Delete environment page access.
 */
function acquia_search_environment_delete_access($environment) {
  if ($environment['env_id'] == ACQUIA_SEARCH_ENVIRONMENT_ID && !isset($environment['export_type'])) {
    return FALSE;
  }

  // Fall back to the original check.
  return user_access('administer search');
}

/**
 * Implementation of hook_form_[form_id]_alter().
 */
function acquia_search_form_apachesolr_settings_alter(&$form, $form_state) {

  // Don't alter the form if there is no subscription.
  if (acquia_agent_subscription_is_active()) {

    // Don't show delete operation for the Default AS environment. This means
    // that cloned acquia search environments can be deleted
    $delete_link = '<a href="/admin/settings/apachesolr/settings/' . ACQUIA_SEARCH_ENVIRONMENT_ID . '/delete">Delete</a>';
    $form['apachesolr_host_settings']['table']['#value'] = str_replace($delete_link, '', $form['apachesolr_host_settings']['table']['#value']);
    $form['advanced']['acquia_search_edismax_default'] = array(
      '#type' => 'radios',
      '#title' => t('Always allow advanced syntax for Acquia Search'),
      '#default_value' => variable_get('acquia_search_edismax_default', 0),
      '#options' => array(
        0 => t('Disabled'),
        1 => t('Enabled'),
      ),
      '#description' => t('If enabled, all Acquia Search keyword searches may use advanced <a href="@url">Lucene syntax</a> such as wildcard searches, fuzzy searches, proximity searches, boolean operators and more via the Extended Dismax parser. If not enabled, this syntax wll only be used when needed to enable wildcard searches.', array(
        '@url' => 'http://lucene.apache.org/java/2_9_3/queryparsersyntax.html',
      )),
      '#weight' => 10,
    );
  }
}

/**
 * Implementation of hook_form_[form_id]_alter().
 */
function acquia_search_form_apachesolr_environment_edit_form_alter(&$form, $form_state) {

  // Gets environment from form, gets connection status to Acquia Search.
  $env_id = isset($form['env_id']['#default_value']) ? $form['env_id']['#default_value'] : '';
  $environment = $env_id ? apachesolr_environment_load($env_id) : FALSE;
  if ($environment && acquia_search_environment_connected($environment)) {
    $form['url']['#value'] = $form['url']['#default_value'];
    $form['url']['#attributes'] = array(
      'readonly' => 'readonly',
    );
    $form['env_id']['#value'] = $form['env_id']['#default_value'];
    $form['env_id']['#disabled'] = array(
      'readonly' => 'readonly',
    );
  }

  // Don't let the user delete the initial environment.
  if ($env_id == ACQUIA_SEARCH_ENVIRONMENT_ID) {
    $form['name']['#value'] = $form['name']['#default_value'];
    $form['name']['#disabled'] = array(
      'readonly' => 'readonly',
    );
    $form['actions']['delete']['#access'] = FALSE;
    $form['actions']['cancel']['#access'] = FALSE;
  }
  $form['actions']['save']['#validate'][] = 'acquia_search_environment_edit_form_validate';
}
function acquia_search_environment_edit_form_validate($form, &$form_state) {
  if ($form_state['values']['env_id'] == ACQUIA_SEARCH_ENVIRONMENT_ID) {

    // make sure that the environment parameters have not been changed
    $form_state['values'] = array_merge($form_state['values'], acquia_search_get_environment());
  }
}

/**
 * Implementation of hook_acquia_subscription_status().
 */
function acquia_search_acquia_subscription_status($active, $subscription = FALSE) {
  if ($active) {
    acquia_search_enable_acquia_solr_environment();

    // Refresh the salt with the subscription data returned by the heartbeat
    // since it can change periodically.
    $salt = variable_get('acquia_search_derived_key_salt', '');
    if (isset($subscription['derived_key_salt']) && $salt != $subscription['derived_key_salt']) {
      variable_set('acquia_search_derived_key_salt', $subscription['derived_key_salt']);
    }
  }
  else {
    if (is_int($subscription)) {
      switch ($subscription) {
        case SUBSCRIPTION_NOT_FOUND:
        case SUBSCRIPTION_EXPIRED:
          acquia_search_disable();
          break;
      }
    }
  }

  // Due to the fact the D6 does not have an update phase in hook_requirements,
  // we are moving this to a cron-based job.
  // @see http://drupal.org/node/1784770
  // @see acquia_search_requirements().
  _acquia_search_set_version();
}

/**
 * Modify a solr base url and construct a hmac authenticator cookie.
 *
 * @param $url
 *  The solr url beng requested - passed by reference and may be altered.
 * @param $string
 *  A string - the data to be authenticated, or empty to just use the path
 *  and query from the url to build the authenticator.
 * @param $derived_key
 *  Optional string to supply the derived key.
 *
 * @return
 *  An array containing the string to be added as the content of the
 *  Cookie header to the request and the nonce.
 */
function acquia_search_auth_cookie(&$url, $string = '', $derived_key = NULL, $env_id = NULL) {
  $uri = parse_url($url);

  // Add a scheme - should always be https if available.
  if (in_array('ssl', stream_get_transports(), TRUE) && !defined('ACQUIA_DEVELOPMENT_NOSSL')) {
    $scheme = 'https://';
    $port = '';
  }
  else {
    $scheme = 'http://';
    $port = isset($uri['port']) && $uri['port'] != 80 ? ':' . $uri['port'] : '';
  }
  $path = isset($uri['path']) ? $uri['path'] : '/';
  $query = isset($uri['query']) ? '?' . $uri['query'] : '';
  $url = $scheme . $uri['host'] . $port . $path . $query;

  // 32 character nonce.
  $nonce = base64_encode(acquia_search_random_bytes(24));
  if ($string) {
    $auth_header = acquia_search_authenticator($string, $nonce, $derived_key, $env_id);
  }
  else {
    $auth_header = acquia_search_authenticator($path . $query, $nonce, $derived_key, $env_id);
  }
  return array(
    $auth_header,
    $nonce,
  );
}

/**
 * Returns the subscription's salt used to generate the derived key.
 *
 * The salt is stored in a system variable so that this module can continue
 * connecting to Acquia Search even when the subscription data is not available.
 * The most common reason for subscription data being unavailable is a failed
 * heartbeat connection to rpc.acquia.com.
 *
 * Acquia Connector versions <= 7.x-2.7 pulled the derived key salt directly
 * from the subscription data. In order to allow for seamless upgrades, this
 * function checks whether the system variable exists and sets it with the data
 * in the subscription if it doesn't.
 *
 * @return string
 *   The derived key salt.
 *
 * @see http://drupal.org/node/1784114
 */
function acquia_search_derived_key_salt() {
  $salt = variable_get('acquia_search_derived_key_salt', '');
  if (!$salt) {

    // If the variable doesn't exist, set it using the subscription data.
    $subscription = acquia_agent_settings('acquia_subscription_data');
    if (isset($subscription['derived_key_salt'])) {
      variable_set('acquia_search_derived_key_salt', $subscription['derived_key_salt']);
      $salt = $subscription['derived_key_salt'];
    }
  }
  return $salt;
}

/**
 * Get the derived key for the solr hmac using the information shared with acquia.com.
 */
function _acquia_search_derived_key($env_id = NULL) {
  static $derived_key = array();
  if (empty($env_id)) {
    $env_id = 0;
  }
  if (!isset($derived_key[$env_id])) {

    // If we set an explicit environment, check if this needs to overridden
    // Use the default
    $identifier = acquia_agent_settings('acquia_identifier');
    $key = acquia_agent_settings('acquia_key');

    // See if we need to overwrite these values
    if ($env_id) {

      // Load the explicit environment and a manually set search key.
      if ($search_key = apachesolr_environment_variable_get($env_id, 'acquia_search_key')) {
        $derived_key[$env_id] = $search_key;
      }
    }

    // In any case, this is equal for all subscriptions. Also
    // even if the search sub is different, the main subscription should be
    // active
    $derived_key_salt = acquia_search_derived_key_salt();

    // value that we could change on the AN side if needed to force any
    // or all clients to use a new derived key.  We also use a string
    // ('solr') specific to the service, since we want each service using a
    // derived key to have a separate one.
    if (empty($derived_key_salt) || empty($key) || empty($identifier)) {

      // Expired or invalid subscription - don't continue.
      $derived_key[$env_id] = '';
    }
    elseif (!isset($derived_key[$env_id])) {
      $derived_key[$env_id] = _acquia_search_create_derived_key($derived_key_salt, $identifier, $key);
    }
  }
  return $derived_key[$env_id];
}

/**
 * Derive a key for the solr hmac using a salt, id and key.
 */
function _acquia_search_create_derived_key($salt, $id, $key) {
  $derivation_string = $id . 'solr' . $salt;
  return hash_hmac('sha1', str_pad($derivation_string, 80, $derivation_string), $key);
}

/**
 * Creates an authenticator based on a data string and HMAC-SHA1.
 */
function acquia_search_authenticator($string, $nonce, $derived_key = NULL, $env_id = NULL) {
  if (empty($derived_key)) {
    $derived_key = _acquia_search_derived_key($env_id);
  }
  if (empty($derived_key)) {

    // Expired or invalid subscription - don't continue.
    return '';
  }
  else {
    $time = REQUEST_TIME;
    return 'acquia_solr_time=' . $time . '; acquia_solr_nonce=' . $nonce . '; acquia_solr_hmac=' . hash_hmac('sha1', $time . $nonce . $string, $derived_key) . ';';
  }
}

/**
 * Validate the authenticity of returned data using a nonce and HMAC-SHA1.
 *
 * @return
 *  TRUE or FALSE.
 */
function acquia_search_valid_response($hmac, $nonce, $string, $derived_key = NULL, $env_id = NULL) {
  if (empty($derived_key)) {
    $derived_key = _acquia_search_derived_key($env_id);
  }
  return $hmac == hash_hmac('sha1', $nonce . $string, $derived_key);
}

/**
 * Look in the headers and get the hmac_digest out
 * @return string hmac_digest
 *
 */
function acquia_search_extract_hmac($headers) {
  $reg = array();
  if (is_array($headers)) {
    foreach ($headers as $name => $value) {
      if (strtolower($name) == 'pragma' && preg_match("/hmac_digest=([^;]+);/i", $value, $reg)) {
        return trim($reg[1]);
      }
    }
  }
  return '';
}

/**
 * Implementation of hook_apachesolr_modify_query().
 *
 * Possibly alters the query type ('defType') param to edismax.
 */
function acquia_search_apachesolr_query_alter($query) {
  $environment = apachesolr_environment_load($query
    ->solr('getId'));

  // @todo - does it make sense to check $caller too?
  if (!acquia_search_environment_connected($environment) || $query
    ->getParam('qt') || $query
    ->getParam('defType')) {

    // This is a 'mlt' query or something else custom.
    return;
  }

  // Set the qt to edismax if we have keywords, and we always use it, or are
  // using a wildcard (* or ?).
  $keys = $query
    ->getParam('q');
  if ($keys && (($wildcard = preg_match('/\\S+[*?]/', $keys)) || variable_get('acquia_search_edismax_default', 0))) {
    $query
      ->addParam('defType', 'edismax');
    if ($wildcard) {
      $keys = preg_replace_callback('/(\\S+[*?]\\S*)/', '_acquia_search_lower', $keys);
      $query
        ->replaceParam('q', $keys);
    }
  }
}

/**
 * Convert to lower-case any keywords containing a wildcard.
 */
function _acquia_search_lower($matches) {
  return drupal_strtolower($matches[1]);
}

/**
 * Returns a string of highly randomized bytes (over the full 8-bit range).
 *
 * This function is better than simply calling mt_rand() or any other built-in
 * PHP function because it can return a long string of bytes (compared to < 4
 * bytes normally from mt_rand()) and uses the best available pseudo-random
 * source.
 *
 * Copied from the Drupal 7 source
 * @see http://api.drupal.org/api/drupal/includes!bootstrap.inc/function/drupal_random_bytes/7
 *
 * @param $count
 *   The number of characters (bytes) to return in the string.
 */
function acquia_search_random_bytes($count) {

  // $random_state does not use drupal_static as it stores random bytes.
  static $random_state, $bytes, $php_compatible;

  // Initialize on the first call. The contents of $_SERVER includes a mix of
  // user-specific and system information that varies a little with each page.
  if (!isset($random_state)) {
    $random_state = print_r($_SERVER, TRUE);
    if (function_exists('getmypid')) {

      // Further initialize with the somewhat random PHP process ID.
      $random_state .= getmypid();
    }
    $bytes = '';
  }
  if (strlen($bytes) < $count) {

    // PHP versions prior 5.3.4 experienced openssl_random_pseudo_bytes()
    // locking on Windows and rendered it unusable.
    if (!isset($php_compatible)) {
      $php_compatible = version_compare(PHP_VERSION, '5.3.4', '>=');
    }

    // /dev/urandom is available on many *nix systems and is considered the
    // best commonly available pseudo-random source.
    if ($fh = @fopen('/dev/urandom', 'rb')) {

      // PHP only performs buffered reads, so in reality it will always read
      // at least 4096 bytes. Thus, it costs nothing extra to read and store
      // that much so as to speed any additional invocations.
      $bytes .= fread($fh, max(4096, $count));
      fclose($fh);
    }
    elseif ($php_compatible && function_exists('openssl_random_pseudo_bytes')) {
      $bytes .= openssl_random_pseudo_bytes($count - strlen($bytes));
    }

    // If /dev/urandom is not available or returns no bytes, this loop will
    // generate a good set of pseudo-random bytes on any system.
    // Note that it may be important that our $random_state is passed
    // through hash() prior to being rolled into $output, that the two hash()
    // invocations are different, and that the extra input into the first one -
    // the microtime() - is prepended rather than appended. This is to avoid
    // directly leaking $random_state via the $output stream, which could
    // allow for trivial prediction of further "random" numbers.
    while (strlen($bytes) < $count) {
      $random_state = hash('sha256', microtime() . mt_rand() . $random_state);
      $bytes .= hash('sha256', mt_rand() . $random_state, TRUE);
    }
  }
  $output = substr($bytes, 0, $count);
  $bytes = substr($bytes, $count);
  return $output;
}

Functions

Namesort descending Description
acquia_search_acquia_subscription_status Implementation of hook_acquia_subscription_status().
acquia_search_apachesolr_query_alter Implementation of hook_apachesolr_modify_query().
acquia_search_authenticator Creates an authenticator based on a data string and HMAC-SHA1.
acquia_search_auth_cookie Modify a solr base url and construct a hmac authenticator cookie.
acquia_search_derived_key_salt Returns the subscription's salt used to generate the derived key.
acquia_search_disable Implementation of hook_disable().
acquia_search_enable Implementation of hook_enable().
acquia_search_enable_acquia_solr_environment Create a new record pointing to the Acquia apachesolr search server and set it as the default
acquia_search_environment_connected Tests whether the environment is connected to Acquia Search.
acquia_search_environment_delete_access Delete environment page access.
acquia_search_environment_edit_form_validate
acquia_search_extract_hmac Look in the headers and get the hmac_digest out
acquia_search_form_apachesolr_environment_edit_form_alter Implementation of hook_form_[form_id]_alter().
acquia_search_form_apachesolr_settings_alter Implementation of hook_form_[form_id]_alter().
acquia_search_get_environment Predefined Acquia Search network environment
acquia_search_help Implementation of hook_help().
acquia_search_menu_alter Implementation of hook_menu_alter().
acquia_search_random_bytes Returns a string of highly randomized bytes (over the full 8-bit range).
acquia_search_valid_response Validate the authenticity of returned data using a nonce and HMAC-SHA1.
_acquia_search_create_derived_key Derive a key for the solr hmac using a salt, id and key.
_acquia_search_derived_key Get the derived key for the solr hmac using the information shared with acquia.com.
_acquia_search_lower Convert to lower-case any keywords containing a wildcard.
_acquia_search_set_version Helper function to cache the Acquia Search version.

Constants

Namesort descending Description
ACQUIA_SEARCH_ENVIRONMENT_ID @file Integration between Acquia Drupal and Acquia's hosted solr search service.