You are here

google_appliance.module in Google Search Appliance 5

GSA integration

File

google_appliance.module
View source
<?php

/**
 * @file GSA integration
 */

/**
 * Implementation of hook_menu()
 *
 */
function google_appliance_menu($may_cache = false) {
  $items = array();
  if ($may_cache) {
    $items[] = array(
      'path' => 'google_appliance',
      'callback' => 'google_appliance_search',
      'type' => MENU_CALLBACK,
      'access' => true,
    );
    $items[] = array(
      'path' => 'admin/settings/google_appliance',
      'title' => t('Google Appliance Settings'),
      'description' => t('Configuration for the Google Appliance search'),
      'callback' => 'drupal_get_form',
      'callback arguments' => array(
        'google_appliance_admin_settings',
      ),
      'access' => user_access('administer search'),
      'type' => MENU_NORMAL_ITEM,
    );
  }
  else {
    _google_appliance_add_meta_tags();
  }
  return $items;
}

/**
 * Implementation of hook_block().
 */
function google_appliance_block($op = 'list', $delta = 0, $edit = array()) {
  switch ($op) {
    case 'list':
      $blocks['recommended_links']['info'] = t("Recommended Links");
      $blocks['recommended_links']['title'] = t('Recommended Links');
      $blocks['recommended_links']['pages'] = '*search/google_appliance*';
      $blocks['recommended_links']['visibility'] = 1;
      return $blocks;
      break;
    case 'view':
      switch ($delta) {
        case 'recommended_links':
          if ($result =& google_appliance_static_response_cache()) {
            $matches = $result
              ->getKeyMatches();
            if (!$matches) {
              return;
            }
            $links = array();
            foreach ($matches as $link => $title) {
              $links[] = l($title, $link);
            }
            if (count($links)) {
              $block['content'] = theme('item_list', $links);
            }
            else {
              return false;
            }
          }
          break;
      }
      return $block;
      break;
  }
}
function google_appliance_admin_settings() {
  $form = array();

  // initial required config fields
  $form["config_init"] = array(
    "#title" => t("Initial Configuration"),
    "#type" => "fieldset",
  );
  $form["config_init"]["google_appliance_name"] = array(
    "#type" => "textfield",
    "#size" => 30,
    "#title" => t("Search Name"),
    "#description" => t('The name of this search, to appear as sub-navigation on the search page.'),
    "#default_value" => variable_get('google_appliance_name', 'Google Appliance'),
    "#required" => true,
  );
  $form["config_init"]["google_appliance_host_name"] = array(
    "#type" => "textfield",
    "#size" => 50,
    "#title" => t("Host Name"),
    "#description" => t('Your Google Search Appliance host name or IP address (with http:// or https://), which were assigned when the appliance was set up.<br />You do <b>not</b> need to include "/search" at the end, or a trailing slash, but you should include a port number if needed.<br/> Example: http://mygooglebox.com'),
    "#default_value" => variable_get('google_appliance_host_name', ''),
    "#required" => true,
  );
  $form["config_init"]["google_appliance_collection"] = array(
    "#type" => "textfield",
    "#size" => 20,
    "#title" => t("Collection"),
    "#description" => t('The name of the collection of indexed content to search.'),
    "#default_value" => variable_get('google_appliance_collection', ''),
    "#required" => true,
  );
  $form["config_init"]["google_appliance_client"] = array(
    "#type" => "textfield",
    "#size" => 20,
    "#title" => t("Client"),
    "#description" => t('The name of a valid front-end, defined when you set up the appliance.'),
    "#default_value" => variable_get('google_appliance_client', ''),
    "#required" => true,
  );
  $form["config_init"]["google_appliance_cache_timeout"] = array(
    "#type" => "textfield",
    "#size" => 20,
    "#title" => t("Cache Timeout"),
    "#description" => t('If you wish to use caching of results (to reduce load on mini, enter a timeout here'),
    "#default_value" => variable_get('google_appliance_cache_timeout', ''),
  );
  $form["config_init"]["google_debug"] = array(
    "#type" => "textfield",
    "#size" => 20,
    "#title" => t("Debug Level"),
    "#description" => t('1 = watchdog, 2 = dpr(needs devel module), 3 = more dpr\'s'),
    "#default_value" => variable_get('google_debug', ''),
  );
  $form["config_init"]["google_appliance_limit_per_page"] = array(
    "#type" => "textfield",
    "#size" => 5,
    "#title" => t("Number of results per page"),
    "#description" => t('If you enter 0, it will return the max allowed by the appliance (100)'),
    "#default_value" => variable_get('google_appliance_limit_per_page', 10),
  );

  // error message config
  $form["config_messages"] = array(
    "#title" => t("Error Messages"),
    "#type" => "fieldset",
    "#collapsible" => true,
  );
  $form["config_messages"]["google_appliance_errorcode_1"] = array(
    "#title" => t("No results found"),
    "#type" => "textfield",
    "#size" => 100,
    "#maxlength" => 255,
    "#required" => true,
    "#description" => t('If there are no results for the search criteria.'),
    "#default_value" => variable_get('google_appliance_errorcode_1', 'No results were found that matched your criteria. Please try broadening your search.'),
  );
  $form["config_messages"]["google_appliance_errorcode_2"] = array(
    "#title" => t("More than 1,000 results"),
    "#type" => "textfield",
    "#size" => 100,
    "#maxlength" => 255,
    "#required" => true,
    "#description" => t('If there are more than 1,000 results for the search criteria.'),
    "#default_value" => variable_get('google_appliance_errorcode_2', 'Sorry, but our search does not return more than 1,000 records, please refine your criteria.'),
  );
  $form["config_messages"]["google_appliance_errorcode_neg_99"] = array(
    "#title" => t("Cannot perform search"),
    "#type" => "textfield",
    "#size" => 100,
    "#maxlength" => 255,
    "#required" => true,
    "#description" => t('If the search cannot perform due to a query error.'),
    "#default_value" => variable_get('google_appliance_errorcode_neg_99', 'We apologize, but your search cannot be completed at this time, please try again later.'),
  );
  $form["config_messages"]["google_appliance_errorcode_neg_100"] = array(
    "#title" => t("Cannot connect to Google Appliance"),
    "#type" => "textfield",
    "#size" => 100,
    "#maxlength" => 255,
    "#required" => true,
    "#description" => t('If the search cannot connect to the Google Appliance server.'),
    "#default_value" => variable_get('google_appliance_errorcode_neg_100', 'We apologize, but the connection to our search engine appears to be down at the moment, please try again later.'),
  );

  // optional metadata configuration

  /*
  $form["config_metadata"] = array(
    "#title" => t("Metadata Configuration"),
    "#type" => "fieldset",
    "#collapsible" => true,
  );
  */

  // last but not least, submit
  $form["submit"] = array(
    "#type" => "submit",
    "#value" => t("Save Settings"),
  );
  return $form;
}

/**
 * Validation function, though it's actually getting overridden by the #required fields...
 *
 */
function google_appliance_admin_settings_validate($form_id, $form) {
  if (empty($form['google_appliance_host_name'])) {
    form_set_error('google_appliance_host_name', t('Please enter your host name or IP address.'));
  }
  if (empty($form['google_appliance_collection'])) {
    form_set_error('google_appliance_collection', t('Please enter the name of the collection you want to search.'));
  }
  if (empty($form['google_appliance_client'])) {
    form_set_error('google_appliance_client', t('Please enter name of the client frontend you are searching.'));
  }
  if (empty($form['google_appliance_name'])) {
    form_set_error('google_appliance_name', t('Please enter the name of this search, to appear as sub-navigation on the search page.'));
  }
}

/**
 * Submits the admin settings form and saves all the variables.
 */
function google_appliance_admin_settings_submit($form_id, $form) {
  variable_set('google_appliance_host_name', check_plain($form['google_appliance_host_name']));
  variable_set('google_appliance_collection', check_plain($form['google_appliance_collection']));
  variable_set('google_appliance_client', check_plain($form['google_appliance_client']));
  variable_set('google_appliance_name', check_plain($form['google_appliance_name']));
  variable_set('google_appliance_cache_timeout', check_plain($form['google_appliance_cache_timeout']));
  variable_set('google_debug', check_plain($form['google_debug']));
  variable_set('google_appliance_limit_per_page', check_plain($form['google_appliance_limit_per_page']));

  // don't run check_plain on these because they can have HTML
  variable_set('google_appliance_errorcode_1', $form['google_appliance_errorcode_1']);
  variable_set('google_appliance_errorcode_2', $form['google_appliance_errorcode_2']);
  variable_set('google_appliance_errorcode_neg_99', $form['google_appliance_errorcode_neg_99']);
  variable_set('google_appliance_errorcode_neg_100', $form['google_appliance_errorcode_neg_100']);
  drupal_set_message(t('Your settings have been saved.'));
}

/**
 * Invokes the google_appliance_appconfig hook to add <meta> tags to nodes
 * for indexing metadata by the google crawler.
 *
 */
function _google_appliance_add_meta_tags() {
  if (arg(0) == 'node' && is_numeric(arg(1))) {
    $node = node_load(arg(1));
    theme('add_google_appliance_meta_tags', $node);
  }
}

/**
 * Implementation of hook_search()
 *
 * @param string $op
 *  Operation - name, reset, search, status
 * @param string $keys
 *  Keyword string sent to the search
 * @return
 *  Array of search results (each is an assoc. array) that can be fed to a theme function
 */
function google_appliance_search($op = 'search', $keys = null) {
  switch ($op) {
    case 'name':
      return t(variable_get('google_appliance_name', "Google Appliance"));
      break;
    case 'search':
      global $pager_page_array, $pager_total, $pager_total_items;
      $page = isset($_GET['page']) ? $_GET['page'] : '';

      // Convert comma-separated $page to an array, used by other functions.
      $pager_page_array = explode(',', $page);

      // $element indicates which of the pagers active this pager is working from.  $limit indicates how many per page.
      $element = 0;
      $limit = variable_get('google_appliance_limit_per_page', 10);
      $dir = drupal_get_path('module', 'google_appliance');
      include_once $dir . '/DrupalGoogleMini.php';
      $google_debug = variable_get('google_debug', 0);
      if ($google_debug >= 2) {
        $gm = new DrupalGoogleMini(true, 'dpr');
      }
      elseif ($google_debug == 1) {
        $gm = new DrupalGoogleMini(true);
      }
      else {
        $gm = new DrupalGoogleMini(false);
      }

      /**
       * If you have many searches for the same content
       * You can use this setting to keep the GSA from getting hit too often
       *
       */
      if ($cache = variable_get('google_appliance_cache_timeout', 0)) {
        cache_clear_all(null, 'cache_google');
        $gm->cache = true;
      }

      // initialize search object
      try {
        $gm
          ->setOutputEncoding('utf8');
        $gm
          ->setInputEncoding('utf8');
        $gm
          ->setMetaDataRequested('*');

        // get configuration from settings page
        $_tmp_host = variable_get('google_appliance_host_name', false);
        if (!$_tmp_host) {
          drupal_set_message(t('No host name has been configured for the search appliance. Please enter it on the <a href="@admin-url">Google Appliance settings page</a>', array(
            "@admin-url" => url("admin/settings/search/google_appliance"),
          )), 'error');
          return false;
        }
        $gm->baseUrl = $_tmp_host . "/search";
        $gm->collection = variable_get('google_appliance_collection', '');
        $gm
          ->setQueryPart('client', variable_get('google_appliance_client', ''));
        $gm
          ->setPageAndResultsPerPage($page, $limit);

        // set search parameters
        $gm
          ->setKeywords($keys);
        if (module_exists('i18n')) {
          if ($lang = i18n_get_lang()) {
            $gm
              ->setLanguageFilter(array(
              $lang,
            ));
          }
        }
      } catch (GoogleMiniCriteriaException $e) {
        $code = $e
          ->getCode();
        if ($message = variable_get('google_appliance_errorcode_' . $code, '')) {
          $user_message = $message;
        }
        else {
          $user_message = GoogleMiniException::getUserMessage($code);
        }
        $error_message = $e
          ->getMessage();
        if ($code > 0) {
          $output .= "<h2>" . $user_message . "</h2>";
          return $output;
        }
        else {
          watchdog('google_appliance', $error_message);
          drupal_set_message($error_message, 'error');
        }
      }

      // perform the search
      $results = array();
      try {
        $resultIterator = $gm
          ->query();
        google_appliance_static_response_cache($resultIterator);

        // Google never returns more than 1000 results, so we ignore any values larger than that.
        $pager_total_items[$element] = $resultIterator->totalResults < 1000 ? $resultIterator->totalResults : 1000;
        $pager_total[$element] = ceil($pager_total_items[$element] / $limit);
        $pager_page_array[$element] = max(0, min((int) $pager_page_array[$element], (int) $pager_total[$element] - 1));
        foreach ($resultIterator as $key => $result) {
          $results[] = theme('google_appliance_search_result_array', $result);
        }
      } catch (Exception $e) {
        if ($e
          ->getCode() > 0) {
          google_appliance_static_response_cache($resultIterator);
        }
        drupal_set_message($e
          ->getMessage());
        return false;
      }
      return $results;
      break;
    case 'reset':
    case 'status':

      // do nothing
      break;
  }
}
function theme_add_google_appliance_meta_tags($node) {

  // create list of tags to add
  $results = array();

  /**
   * Adding taxonomy tags
   */
  $vocabs = taxonomy_get_vocabularies();
  if (module_exists('nat') && $node->nat) {
    $node->taxonomy = array_merge($node->nat, $node->taxonomy);

    //$node->taoxnomy[] = $
  }
  foreach ($node->taxonomy as $term) {
    $tagname = 'category-' . strtolower($vocabs[$term->vid]->name);
    $results[] = array(
      $tagname,
      $term->name,
    );
  }

  /**
   * Adding sort date IMPORTANT: for sorting, mini must be configured to use htis tag
   */
  $results[] = array(
    'date',
    date('Y-m-d h:i:s', $node->changed),
  );
  $results[] = array(
    'created',
    date('Y-m-d h:i:s', $node->created),
  );

  /**
   * Normally this doesn't matter,
   * but if you want to allow the gsa to
   * access unpublished pages and later filter
   * on this you will need it.
   */
  $results[] = array(
    'status',
    $node->status,
  );

  /**
   * i18n configuration
   */
  if ($node->language) {
    $results[] = array(
      'content-language',
      $node->language,
    );
  }

  /**
   * node type
   */
  $results[] = array(
    'type',
    $node->type,
  );

  /**
   * Author
   */
  $node->uid = empty($node->uid) ? 0 : $node->uid;
  $user = user_load(array(
    'uid' => 0,
  ));
  $user->name = empty($user->name) ? 'anonymous' : $user->name;
  $results[] = array(
    'author' => $user->name,
  );

  // add meta tags
  foreach ($results as $res) {
    list($name, $content) = $res;
    $content = strip_tags($content);
    drupal_set_html_head('<meta name="' . htmlentities($name) . '" content="' . htmlentities($content) . '" />');
  }
}
function theme_google_appliance_search_result_array($result) {
  return array(
    'link' => $result->U,
    'user' => $result
      ->getMetaData('user'),
    'type' => $result
      ->getMetaData('type'),
    'title' => strip_tags($result->T),
    'date' => strtotime($result
      ->getMetaData('date')),
    'snippet' => decode_entities((string) $result->S),
  );
}
function google_appliance_form_alter($form_id, &$form) {
  if ($form_id == 'block_admin_configure') {
    $module = $form['module']['#value'];
    $delta = $form['delta']['#value'];
    $var_name = $module . '-' . $delta;
    $ga_blocksettings = google_appliance_blocksettings_get();
    $form['block_settings']['google_appliance'] = array(
      '#type' => 'fieldset',
      '#title' => t("Google Appliance Settings"),
      '#description' => t(''),
      '#collapsed' => true,
      '#collapsible' => true,
      '#tree' => true,
      '#weight' => -1,
    );
    $form['block_settings']['google_appliance']['hide'] = array(
      '#type' => 'radios',
      '#title' => t("Do you want to hide this block from the GSA crawler?"),
      '#description' => t('Select No if you want this block content to be crawled with the page content.'),
      '#options' => array(
        1 => t('Yes'),
        0 => t('No'),
      ),
      '#default_value' => isset($ga_blocksettings[$var_name]) ? $ga_blocksettings[$var_name] : 1,
      '#collapsed' => true,
      '#collapsible' => true,
      '#tree' => true,
    );
    $form['#submit']['google_appliance_block_save'] = array();
    return $form;
  }
}
function google_appliance_block_save($form_id, $form_values) {
  $var_name = $form_values['module'] . '-' . $form_values['delta'];
  $block_settings = google_appliance_blocksettings_get();
  if (!isset($form_values['google_appliance']['hide'])) {
    unset($block_settings[$var_name]);
  }
  else {
    $block_settings[$var_name] = $form_values['google_appliance']['hide'];
  }
  google_appliance_blocksettings_set($block_settings);
}
function google_appliance_blocksettings_set($new_val) {
  variable_set('google_appliance_block_settings', $new_val);
}
function google_appliance_blocksettings_get() {
  static $google_appliance;
  if (empty($google_appliance)) {
    $google_appliance = variable_get('google_appliance_block_settings', array());
  }
  return $google_appliance;
}
function google_appliance_block_nogoogle($block) {
  $gsa_block_settings = google_appliance_blocksettings_get();
  $var_name = $block->module . '-' . $block->delta;
  if (!isset($gsa_block_settings[$var_name]) || $gsa_block_settings[$var_name]) {
    return true;
  }
}

/**
 * It is important to hold on to the Google Appliance response object for the duration of the
 * page request so that we can use it for things like the keymatch block
 * 
 * Stolen from apachesolr module
 * 
 * 
 */
function &google_appliance_static_response_cache($response = NULL) {
  static $_response;
  if (!empty($response)) {
    $_response = drupal_clone($response);
  }
  return $_response;
}
function google_appliance_simpletest() {
  $dir = drupal_get_path('module', 'google_appliance') . '/tests';
  $tests = file_scan_directory($dir, '\\.test$');
  return array_keys($tests);
}

Functions

Namesort descending Description
google_appliance_admin_settings
google_appliance_admin_settings_submit Submits the admin settings form and saves all the variables.
google_appliance_admin_settings_validate Validation function, though it's actually getting overridden by the #required fields...
google_appliance_block Implementation of hook_block().
google_appliance_blocksettings_get
google_appliance_blocksettings_set
google_appliance_block_nogoogle
google_appliance_block_save
google_appliance_form_alter
google_appliance_menu Implementation of hook_menu()
google_appliance_search Implementation of hook_search()
google_appliance_simpletest
google_appliance_static_response_cache It is important to hold on to the Google Appliance response object for the duration of the page request so that we can use it for things like the keymatch block
theme_add_google_appliance_meta_tags
theme_google_appliance_search_result_array
_google_appliance_add_meta_tags Invokes the google_appliance_appconfig hook to add <meta> tags to nodes for indexing metadata by the google crawler.