google_appliance.module in Google Search Appliance 7
Same filename and directory in other branches
Google Appliance module enables searching via a dedicated Google Search Appliance hardware device. See README.txt and the help page at admin/help/google_appliance.
File
google_appliance.moduleView source
<?php
/**
* @file
* Google Appliance module enables searching via a dedicated Google Search Appliance
* hardware device. See README.txt and the help page at admin/help/google_appliance.
*/
// helpers
require_once drupal_get_path('module', 'google_appliance') . '/google_appliance.helpers.inc';
// default theme impementations
require_once drupal_get_path('module', 'google_appliance') . '/theme/google_appliance.theme.inc';
/**
* Implements hook_help().
*/
function google_appliance_help($path, $arg) {
switch ($path) {
case 'admin/help#google_appliance':
$output = '';
$output .= '<h2>' . t('About') . '</h2>' . PHP_EOL;
$output .= '<p>' . t('The Google Appliance module provides an interface to obtain search results from a dedicated Google Search Appliance (GSA) hardware device. Please visit the <a href="@project-page">Google Appliance Project Page</a> to learn more about features and development notes.') . '</p>' . PHP_EOL;
$output .= '<h2>' . t('Setup and Usage') . '</h2>' . PHP_EOL;
$output .= '<p>' . t('First, you must set up a collection and frontend on your GSA device that can be accessed publicly. Then configure the connection information on the <a href="@settings-page">Google Appliance Settings page</a> To produce results that include Drupal content, the GSA crawl must have crawled your drupal installation and indexed the desired content. No cron-job configuration is required (ala core Search) since index management is handled by the GSA device.', array(
'@settings-page',
'admin/config/search/google_appliance/settings',
)) . '</p>' . PHP_EOL;
$output .= '<h3>' . t('Teaming up with core Search') . '</h3>';
$output .= '<p>' . t('The Google Appliance module can act as a replacement for core <i>Search</i>, or it may operate in tandem with it. Say you want to provide search results for the public to include both drupal and non-drupal content, but you want administrative searches to only provide results for local site content. In this case you use the Google Appliance searches for public search, and core Search for admin searches.') . '</p>' . PHP_EOL;
$output .= '<p>' . t('If you will use core search along with this module, core search results will show up normally at <code>search/{search terms}</code>, while Google Appliance searches will show up by default at <code>gsearch/{search terms}</code>.') . '</p>' . PHP_EOL;
$output .= '<h2>' . t('Blocks') . '</h2>';
$output .= '<p>' . t('The module provides two blocks:') . '</p>' . PHP_EOL;
$output .= '<ol><li>' . t('the block search form') . '</li>' . PHP_EOL;
$output .= '<li>' . t('the related searches block') . '</li></ol>' . PHP_EOL;
$output .= '<p>' . t('Both can be enabled and configured on the <a href="@blocks-admin">Blocks administration page</a>. The block search form provides a search form that can be placed into any block. The related searches block displays a list of links to related results. The related search block only makes sense to appear on results pages, so it should be configured accordingly (i.e. specify <code>gsearch*</code> as the pages for the block to appear on).', array(
'@blocks-admin' => url('admin/structure/block'),
)) . '</p>';
return $output;
case 'admin/config/google_appliance/settings':
$output = '<p>' . t('The Google Appliance settings specify the interface to your GSA device. The search results obtained via that interface are controlled by the Google Search Appliance configuration itself. Visit the <a href="@gsa-help">Google Appliance help section</a> for more information on setting up the integration.', array(
'@gsa-help' => url('admin/help/google_appliance'),
)) . '</p>';
return $output;
}
}
/**
* Implements hook_permission().
*/
function google_appliance_permission() {
$perms = array();
$perms['administer_google_appliance'] = array(
'title' => t('Administer Google Appliance settings'),
);
$perms['access_google_appliance_content'] = array(
'title' => t('Use Google Appliance searches'),
);
return $perms;
}
/**
* Implements hook_menu().
*/
function google_appliance_menu() {
$items = array();
$settings = _google_appliance_get_settings();
// search results page
$parts = substr_count($settings['drupal_path'], '/');
$items[$settings['drupal_path']] = array(
'title' => $settings['search_title'],
'page callback' => 'google_appliance_search_view',
'page arguments' => array(
++$parts,
++$parts,
),
// (1) search query, (2) results sort param
'access arguments' => array(
'access_google_appliance_content',
),
'type' => MENU_SUGGESTED_ITEM,
);
// admin page
$items['admin/config/search/google_appliance/settings'] = array(
'title' => 'Google Appliance',
'description' => 'Configure options for search via Google Search Appliance.',
'page callback' => 'drupal_get_form',
'page arguments' => array(
'google_appliance_admin_settings',
),
'file' => 'google_appliance.admin.inc',
'access arguments' => array(
'administer_google_appliance',
),
'weight' => -30,
);
return $items;
}
/**
* Implements hook_library().
*/
function google_appliance_library() {
$libraries['jquery.gsa-clicks'] = array(
'title' => 'Google Search Appliance Click Tracking',
'website' => 'https://github.com/asmallwebfirm/gsa-clicks',
'version' => '1.0.0',
'js' => array(
drupal_get_path('module', 'google_appliance') . '/js/jquery.gsa-clicks.min.js' => array(),
),
'dependencies' => array(
array(
'system',
'jquery',
),
),
);
return $libraries;
}
/**
* Implements hook_module_implements_alter().
*/
function google_appliance_module_implements_alter(&$implementations, $hook) {
// Make sure we come after search module's implementation.
if ($hook == 'menu') {
$group = $implementations['google_appliance'];
unset($implementations['google_appliance']);
$implementations['google_appliance'] = $group;
}
}
/**
* Implements hook_block_info().
*/
function google_appliance_block_info() {
$blocks = array();
$settings = _google_appliance_get_settings();
// Onebox blocks.
foreach ($settings['onebox_modules'] as $module) {
$blocks[$module] = array(
'info' => t('Google Appliance Onebox: @module', array(
'@module' => $module,
)),
'cache' => DRUPAL_NO_CACHE,
'visibility' => 1,
'pages' => 'gsearch*',
);
}
// block search form
$blocks['ga_block_search_form'] = array(
'info' => t('Google Appliance search form'),
'cache' => DRUPAL_NO_CACHE,
);
// related searches block
$blocks['ga_related_searches'] = array(
'info' => t('Google Appliance related searches'),
'cache' => DRUPAL_NO_CACHE,
// set default visibility to appear only on results pages (just a suggestion)
'visibility' => 1,
'pages' => $settings['drupal_path'] . '*',
);
return $blocks;
}
/**
* Implements hook_block_view().
*/
function google_appliance_block_view($delta = '') {
$block = array();
if (user_access('access_google_appliance_content')) {
switch ($delta) {
case 'ga_block_search_form':
$block['content'] = drupal_get_form('google_appliance_block_form');
break;
case 'ga_related_searches':
$block['subject'] = t('Related Searches');
$block['content'] = google_appliance_get_clusters();
// related search links
break;
}
$results = drupal_static('google_appliance_parse_device_response_xml');
if (isset($results['onebox'][$delta])) {
$onebox = $results['onebox'][$delta];
$subject = empty($onebox['url_text']) ? '' : $onebox['url_text'];
$subject = empty($onebox['url_link']) ? $subject : l($subject, $onebox['url_link']);
$block['subject'] = $subject;
$block['content'] = array(
'#theme' => 'google_appliance_onebox_module',
'#onebox' => $onebox,
);
}
}
return $block;
}
/**
* Implements hook_preprocess_html().
*/
function google_appliance_preprocess_html(&$variables) {
$settings = _google_appliance_get_settings();
// If configured, add JSON-LD for Google sitelinks search box.
if ($settings['sitelinks_search_box'] && drupal_is_front_page()) {
$canonicalUrl = $GLOBALS['base_url'] . base_path();
// Google recommended practices suggest that the "url" key in the JSON-LD
// and the homepage canonical URL must match. This checks for the canonical
// metatag value and substitutes in place of the default provided above.
$knownElements = drupal_add_html_head();
if (isset($knownElements['metatag_canonical']['#value'])) {
$canonicalUrl = $knownElements['metatag_canonical']['#value'];
}
// Build the JSON-LD object.
$jsld = array(
'@context' => 'http://schema.org',
'@type' => 'WebSite',
'url' => $canonicalUrl,
'potentialAction' => array(
'@type' => 'SearchAction',
'target' => url($settings['drupal_path'], array(
'absolute' => TRUE,
)) . '/{search_term_string}',
'query-input' => 'required name=search_term_string',
),
);
// Add the data as markup to the HTML head. If necessary, you can alter this
// JSON-LD via hook_html_head_alter() with the key "google_appliance_slsb."
drupal_add_html_head(array(
'#type' => 'markup',
'#markup' => '<script type="application/ld+json">' . drupal_json_encode($jsld) . '</script>',
'_json' => $jsld,
), 'google_appliance_slsb');
}
}
/**
* Form builder outputs the search form for the search block
*
* @ingroup forms
* @see google_appliance_block_form_submit()
* @see google_appliance-block-form.tpl.php
*/
function google_appliance_block_form($form, &$form_state) {
$form['search_keys'] = array(
'#type' => 'textfield',
'#title' => t('Enter the terms you wish to search for.'),
'#title_display' => 'invisible',
'#size' => 15,
'#default_value' => '',
);
$form['actions'] = array(
'#type' => 'actions',
'submit' => array(
'#type' => 'submit',
'#value' => t('Search'),
),
);
return $form;
}
/**
* Submit handler for block search form just sets the redirect for the form
* based on the search query
*/
function google_appliance_block_form_submit($form, &$form_state) {
$settings = _google_appliance_get_settings();
// kill any dynamic destinations, as the results page is always the destination
if (isset($_GET['destination'])) {
unset($_GET['destination']);
}
// set the redirect
$search_query = urlencode($form_state['values']['search_keys']);
$form_state['redirect'] = url($settings['drupal_path'] . '/' . $search_query, array(
'absolute' => TRUE,
));
// search execution happens in page callback
}
/**
* Implements hook_form_FORM_ID_alter().
*
* This function adds a custom field to block editing forms to allow editors to
* conditionally hide content from the crawler using googleon/googleoff tags.
*/
function google_appliance_form_block_admin_configure_alter(&$form, &$form_state, $form_id) {
$settings = _google_appliance_get_settings();
// Determine the default value for this block.
$default = 'none';
$module = $form['module']['#value'];
$delta = $form['delta']['#value'];
if (isset($settings['block_visibility_settings'][$module][$delta])) {
$default = $settings['block_visibility_settings'][$module][$delta];
}
// Add a Google Appliance visibility fieldset.
$read_more_link = 'https://developers.google.com/search-appliance/documentation/64/admin_crawl/Preparing';
$form['visibility']['google_appliance'] = array(
'#type' => 'fieldset',
'#title' => t('Google Appliance'),
'#collapsible' => 1,
'#collapsed' => 1,
'#group' => 'visibility',
'#weight' => 30,
'crawler' => array(
'#type' => 'radios',
'#title' => t('Visible to Google Appliance crawler'),
'#description' => '<p>' . t('Configure how the Google Search Appliance crawler handles the contents of this block.') . '</p>' . '<p>' . t('For more information, see the relevant') . ' ' . l(t('Google Search Appliance documentation'), $read_more_link, array(
'fragment' => 'pagepart',
'attributes' => array(
'target' => '_blank',
),
)) . '.</p>',
'#options' => array(
'none' => t('No restrictions'),
'index' => t('Exclude block contents from index'),
'anchor' => t('Disassociate anchor text from target pages within this block'),
'snippet' => t('Exclude block contents from search snippets'),
'all' => t('All of the above'),
),
'#default_value' => $default,
),
);
// Attach a submit handler to save our preferences.
$form['#submit'][] = 'google_appliance_block_visibility_submit';
}
/**
* Submit handler to save block-specific crawler visibility settings.
*/
function google_appliance_block_visibility_submit($form, &$form_state) {
$settings = _google_appliance_get_settings();
$module = $form_state['values']['module'];
$delta = $form_state['values']['delta'];
// Clear out our block settings array if no restrictions are applied.
// @TODO: clean up array keys with empty values, i.e., those that once
// had settings for block visibility, but have been reverted to none.
if ($form_state['values']['crawler'] == 'none') {
if (isset($settings['block_visibility_settings'][$module][$delta])) {
unset($settings['block_visibility_settings'][$module][$delta]);
}
}
else {
$settings['block_visibility_settings'][$module][$delta] = $form_state['values']['crawler'];
}
// Save the settings.
variable_set('google_appliance_block_visibility_settings', $settings['block_visibility_settings']);
}
/**
* Implements hook_block_view_alter().
*
* This function is used to hide user-specified blocks from the crawler by
* wrapping them in google on/off tags.
*
* @see google_appliance_form_block_admin_configure_alter()
* @see _google_appliance_get_googleonoff()
* @see https://developers.google.com/search-appliance/documentation/64/admin_crawl/Preparing#pagepart
*/
function google_appliance_block_view_alter(&$data, $block) {
$settings = _google_appliance_get_settings();
$module = $block->module;
$delta = $block->delta;
// Only wrap specified blocks.
if (isset($settings['block_visibility_settings'][$module][$delta])) {
$tags = _google_appliance_get_googleonoff();
$tag_type = $settings['block_visibility_settings'][$module][$delta];
$prefix = $tags[$tag_type]['prefix'];
$suffix = $tags[$tag_type]['suffix'];
// Sanity check to make sure we have a valid block.
if (isset($data['content'])) {
// We may be dealing with either a string of HTML or a renderable array.
if (is_array($data['content'])) {
// Special case if there's already markup.
if (isset($data['content']['#markup'])) {
$data['content']['#markup'] = $prefix . $data['content']['#markup'] . $suffix;
}
else {
if (isset($data['content']['#prefix'])) {
$data['content']['#prefix'] = $prefix . $data['content']['#prefix'];
}
else {
$data['content']['#prefix'] = $prefix;
}
if (isset($data['content']['#suffix'])) {
$data['content']['#suffix'] .= $suffix;
}
else {
$data['content']['#suffix'] = $suffix;
}
}
}
else {
$data['content'] = $prefix . $data['content'] . $suffix;
}
}
}
}
/**
* Form builder outputs the search form for results pages
*
* @ingroup forms
* @see google_appliance_search_form_submit()
* @see google_appliance-search-form.tpl.php
* @see google_appliance-results.tpl.php
*/
function google_appliance_search_form($form, &$form_state, $query = '', $prompt = '') {
$settings = _google_appliance_get_settings();
$prompt = $prompt == '' ? t('Enter the terms you wish to search for.') : $prompt;
// basic search
$form['basic'] = array(
'#type' => 'container',
);
$form['basic']['search_keys'] = array(
'#type' => 'textfield',
'#default_value' => $query,
'#attributes' => array(
'title' => $prompt,
),
'#title' => check_plain($prompt),
'#title_display' => 'invisible',
);
// only prompt if we haven't searched yet
if ($query == '') {
$form['basic']['prompt'] = array(
'#type' => 'item',
'#markup' => '<p><b>' . $prompt . '</b></p>',
);
}
$form['basic']['submit'] = array(
'#type' => 'submit',
'#value' => t('Search'),
);
// submit points to search page without any keys (pre-search state)
// the redirect happens in _submit handler
$form_state['action'] = $settings['drupal_path'] . '/';
// use core search CSS in addition to this module's css
// (keep it general in case core search is enabled)
$form['#attributes']['class'][] = 'search-form';
$form['#attributes']['class'][] = 'search-google-appliance-search-form';
return $form;
}
/**
* Submit handler for full search form
*/
function google_appliance_search_form_submit($form, &$form_state) {
// set the redirect
$search_query = urlencode($form_state['values']['search_keys']);
$form_state['redirect'] = url($form_state['action'] . $search_query, array(
'absolute' => TRUE,
));
}
/**
* Top level search execution (menu callback)
* - performs a query of the GSA device for a SINGLE PAGE of search results
*
* @arg $query
* the search query from url [ arg(1) ]
* @arg $sort
* optional sort parameter, either "rel" (default) or "date" [ arg(2) ]
* @return
* themed results: error messages or results listing
*/
function google_appliance_search_view($query = '', $sort = NULL) {
$query = urldecode($query);
// get the results page form
$form = drupal_get_form('google_appliance_search_form', $query);
// When POSTing back to the search form on a search results page, the original
// URL is accessed (which re-runs that search) and then the redirect for
// the new search takes place, executing the second (correct) search.
// Prevent hitting the device twice per search by checking if we've POSTed yet.
if (!isset($_POST['form_id']) && $query != '') {
// grab module settings
$settings = _google_appliance_get_settings();
// response processing structures
$search_query_data = array();
// cURL arguments for the query
$response_data = array();
// processd response
// get the sort parameter from the url
$sort_param = '';
if ($sort != NULL && $sort == 'date') {
$sort_param = 'date:D:S:d1';
}
// determine how deep into the results to start viewing (paging)
if (isset($_GET['page'])) {
$results_view_start = check_plain($_GET['page']) * $settings['results_per_page'];
}
else {
$results_view_start = 0;
}
// build cURL request
$search_query_data['gsa_host'] = check_plain($settings['hostname'] . '/search');
$search_query_data['gsa_query_params'] = array(
'site' => check_plain($settings['collection']),
'oe' => 'utf8',
'ie' => 'utf8',
'getfields' => '*',
'client' => check_plain($settings['frontend']),
'start' => $results_view_start,
'num' => check_plain($settings['results_per_page']),
'filter' => check_plain($settings['autofilter']),
'q' => $query,
'output' => 'xml_no_dtd',
'sort' => $sort_param,
'access' => 'p',
);
// Alter request according to language filter settings.
if (module_exists('locale') && $settings['language_filter_toggle']) {
$search_query_data['gsa_query_params']['lr'] = _google_appliance_get_lr($settings['language_filter_options']);
}
// allow implementation of hook_google_appliance_query_alter() by other modules
drupal_alter('google_appliance_query', $search_query_data);
// build debug info in case we need to display it
if ($settings['query_inspection'] == '1') {
$search_query_data['debug_info'][] = t('GSA host: @host', array(
'@host' => $search_query_data['gsa_host'],
));
$search_query_data['debug_info'][] = t('Query Parameters: <pre>@qp</pre>', array(
'@qp' => print_r($search_query_data['gsa_query_params'], TRUE),
));
}
$curl_options = array();
// Use drupal proxy if any.
$drupal_proxy_server = variable_get('proxy_server', '');
$drupal_proxy_port = variable_get('proxy_port', '');
$drupal_proxy_username = variable_get('proxy_username', '');
$drupal_proxy_password = variable_get('proxy_password', '');
$drupal_proxy_user_agent = variable_get('proxy_user_agent', NULL);
// NULL as default value.
$drupal_proxy_exceptions = variable_get('proxy_exceptions', array());
// Add drupal proxy to curl_options.
if ($drupal_proxy_server != '') {
// Parse gsa_hostname to obtain the host without the scheme (http/https).
// Check if a proxy should be used for gsa_hostname using _drupal_http_use_proxy
// If parsing fails (and it should not), use the proxy. The idea here is that we should always use the proxy
// except for some exceptional hosts. If we are not able to confirm that this is indeed an exception
// better to use the proxy.
$gsa_hostname_components = @parse_url($gsa_hostname);
if ($gsa_hostname_components === false || _drupal_http_use_proxy($gsa_hostname_components['host'])) {
$curl_options[CURLOPT_PROXY] = $drupal_proxy_server;
// Add port, if provided.
if ($drupal_proxy_port != '') {
$curl_options[CURLOPT_PROXY] .= ':' . $drupal_proxy_port;
}
// Some proxies reject requests with any User-Agent headers, while others
// require a specific one.
if ($drupal_proxy_user_agent !== NULL) {
$curl_options[CURLOPT_USERAGENT] = $drupal_proxy_user_agent;
}
}
// Set authentication, if needed, checking if proxy_username is not empty.
if ($drupal_proxy_username != '') {
// The option CURLOPT_PROXYUSERPWD has format [username]:[password].
$curl_options[CURLOPT_PROXYUSERPWD] = $drupal_proxy_username . ':' . $drupal_proxy_password;
}
}
// allow implementation of hook_google_appliance_curl_alter() by other modules
drupal_alter('google_appliance_curl', $curl_options);
// query the GSA for search results
$gsa_response = _curl_get($search_query_data['gsa_host'], $search_query_data['gsa_query_params'], $curl_options, $settings['timeout']);
// check for handshake errors
if ($gsa_response['is_error'] == TRUE) {
// cURL returned an error (can't connect)
$response_data['error']['curl_error'] = $gsa_response['response'];
// displaying useful error messages depends upon the use of the array key
// 'curl_error' ... the actual error code/response is displayed elsewhere.
// @see google_appliance.theme.inc
}
else {
// cURL gave us something back -> attempt to parse
$response_data = google_appliance_parse_device_response_xml($gsa_response['response']);
}
// render results
$search_query_data['gsa_query_params']['urlencoded_q'] = urlencode($query);
$template_data = array(
'search_query_data' => $search_query_data,
'response_data' => $response_data,
);
return theme('google_appliance_results', $template_data);
}
else {
// return the form so the (real) second search is performed
return $form;
}
}
/**
* Parse the response from the Google Search Appliance device into a php array
*
* @arg $gsa_xml
* response text obtained from the device query
* @arg $use_cached
* Whether or not to use the cached version of this query
* @return
* php array structure to iterate when displaying results
*/
function google_appliance_parse_device_response_xml($gsa_xml, $use_cached = TRUE) {
$results =& drupal_static(__FUNCTION__);
if (!isset($results) || $use_cached === FALSE) {
// look for xml parse errors
libxml_use_internal_errors(TRUE);
$payload = simplexml_load_string($gsa_xml);
if (!$payload) {
// XML parse error(s)
$errors = array();
foreach (libxml_get_errors() as $error) {
$errors[] = $error->message;
}
// roll up the errors
$errors = join(', ', $errors);
$results['error'] = array(
'lib_xml_parse_error' => $errors,
);
// displaying useful error messages depends upon the use of the array key
// 'lib_xml_parse_error' ... the actual error is displayed elsewhere.
// @see google_appliance.theme.inc
}
else {
// store metrics for stat reporting
$results['total_results'] = (int) $payload->RES->M;
// somewhat unreliable
$results['last_result'] = (string) $payload->RES['EN'];
// check if there is an result at all ($payload->RES),
// secure search doesn't provide a value for $payload->RES->M
if ($results['total_results'] == 0 && isset($payload->RES)) {
$results['total_results'] = (int) $payload->RES['EN'];
$param_start = $payload
->xpath('/GSP/PARAM[@name="start"]');
$param_num = $payload
->xpath('/GSP/PARAM[@name="num"]');
$request_max_total = (int) $param_start[0]['value'] + (int) $param_num[0]['value'];
if ($results['total_results'] == $request_max_total) {
$results['total_results']++;
}
}
// Spelling suggestions.
if (isset($payload->Spelling->Suggestion)) {
$spelling_suggestion = (string) $payload->Spelling->Suggestion;
$results['spelling_suggestion'] = filter_xss($spelling_suggestion, array(
'b',
'i',
));
}
// Onebox results.
// @see https://developers.google.com/search-appliance/documentation/614/oneboxguide#providerresultsschema
// @see https://developers.google.com/search-appliance/documentation/614/oneboxguide#mergingobs
foreach ($payload
->xpath('//ENTOBRESULTS/OBRES') as $mod) {
$result_code = empty($mod->resultCode) ? '' : (string) $mod->resultCode;
if (empty($result_code) || $result_code == 'success') {
$module_name = (string) $mod['module_name'];
$onebox = array();
$onebox['module_name'] = $module_name;
$onebox['provider'] = (string) $mod->provider;
$onebox['url_text'] = (string) $mod->title->urlText;
$onebox['url_link'] = (string) $mod->title->urlLink;
$onebox['image'] = (string) $mod->IMAGE_SOURCE;
$onebox['description'] = (string) $mod->description;
foreach ($mod
->xpath('./MODULE_RESULT') as $res) {
$result = array();
$result['abs_url'] = (string) $res->U;
$result['title'] = (string) $res->Title;
foreach ($res
->xpath('./Field') as $field) {
$field_name = (string) $field['name'];
$result['fields'][$field_name] = (string) $field;
}
$onebox['results'][] = $result;
}
$results['onebox'][$module_name] = $onebox;
}
}
if ($results['total_results'] == 0) {
// search returned zero results
$results['error'] = array(
'gsa_no_results' => TRUE,
);
return $results;
}
else {
// parse results
foreach ($payload
->xpath('//GM') as $km) {
$keymatch = array();
// keymatch information
$keymatch['description'] = (string) $km->GD;
$keymatch['url'] = (string) $km->GL;
$results['keymatch'][] = $keymatch;
}
// If there are any synonyms returned by the appliance, put them in the results as a new array
// @see http://code.google.com/apis/searchappliance/documentation/50/xml_reference.html#tag_onesynonym
foreach ($payload
->xpath('//OneSynonym') as $syn_element) {
$synonym = array();
// synonym information
$synonym['description'] = (string) $syn_element;
$synonym['url'] = (string) $syn_element['q'];
$results['synonyms'][] = $synonym;
}
foreach ($payload
->xpath('//R') as $res) {
$result = array();
// handy variants of the url for the result
$result['abs_url'] = (string) $res->U;
$result['enc_url'] = (string) $res->UE;
// urlencoded URL of result
$result['short_url'] = substr($result['abs_url'], 0, 80) . (strlen($result['abs_url']) > 80 ? '...' : '');
// result info
$result['title'] = (string) $res->T;
$result['snippet'] = (string) $res->S;
$result['crawl_date'] = (string) $res->FS['VALUE'];
$result['level'] = isset($res['L']) ? (int) $res['L'] : 1;
// result meta data
// here we just collect the data from the device and leave implementing
// display of meta data to the themer (use-case specific)
// @see google-appliance-result.tpl.php
$meta = array();
foreach ($res
->xpath('./MT') as $mt) {
$meta[(string) $mt['N']] = (string) $mt['V'];
}
$result['meta'] = $meta;
// detect the mime type to allow themes to decorate with mime icons
// @see google-appliance-result.tpl.php
$result['mime']['type'] = (string) $res['MIME'];
// collect
$results['entry'][] = $result;
}
}
drupal_alter('google_appliance_results', $results, $payload);
}
}
return $results;
}
/**
* get related search via the Google Search Appliance clustering service
* @return
* themed list of links
*/
function google_appliance_get_clusters() {
// grab module settings
$settings = _google_appliance_get_settings();
// get the search query
$query_pos = substr_count($settings['drupal_path'], '/') + 1;
$search_query = urldecode(arg($query_pos));
$cluster_content = NULL;
// perform POST to acquire the clusters block
$clusterQueryURL = check_plain($settings['hostname'] . '/cluster');
$clusterQueryParams = array(
'q' => check_plain($search_query),
'btnG' => 'Google+Search',
'access' => 'p',
'entqr' => '0',
'ud' => '1',
'sort' => 'date:D:L:d1',
'output' => 'xml_no_dtd',
'oe' => 'utf8',
'ie' => 'utf8',
'site' => check_plain($settings['collection']),
'client' => check_plain($settings['frontend']),
);
// Alter request according to language filter settings.
if (module_exists('locale') && $settings['language_filter_toggle']) {
$clusterQueryParams['lr'] = _google_appliance_get_lr($settings['language_filter_options']);
}
// cURL request for the clusters produces JSON result
$gsa_clusters_json = _curl_post($clusterQueryURL, $clusterQueryParams);
if (!$gsa_clusters_json['is_error']) {
// no error -> get the clusters
$clusters = json_decode($gsa_clusters_json['response'], TRUE);
if (isset($clusters['clusters'][0])) {
// Build the link list.
$cluster_list_items = array();
foreach ($clusters['clusters'][0]['clusters'] as $cluster) {
array_push($cluster_list_items, l($cluster['label'], $settings['drupal_path'] . '/' . $cluster['label']));
}
// Create theme-friendly list of links render array.
$cluster_list = array(
'#theme' => 'item_list',
'#items' => $cluster_list_items,
'#title' => NULL,
'#type' => 'ul',
'#attributes' => array(),
);
// Allow implementation of hook_google_appliance_cluster_list_alter() by
// other modules.
drupal_alter('google_appliance_cluster_list', $cluster_list, $cluster_results);
$cluster_content = drupal_render($cluster_list);
}
}
return $cluster_content;
}