You are here

search404.module in Search 404 6

File

search404.module
View source
<?php

/* @file
 * The search404 module files, does all the searching
 * when a 404 occurs
 */

/**
 * Implementation of hook_menu().
 */
function search404_menu() {
  $items = array();
  $items['search404'] = array(
    'title' => 'Page not found',
    'access callback' => TRUE,
    'page callback' => 'search404_page',
    'type' => MENU_CALLBACK,
  );
  $items['admin/settings/search404'] = array(
    'title' => 'Search 404 settings',
    'description' => t('Administer search 404.'),
    'page callback' => 'drupal_get_form',
    'page arguments' => array(
      'search404_settings',
    ),
    'access callback' => 'user_access',
    'access arguments' => array(
      'administer site configuration',
    ),
    'type' => MENU_NORMAL_ITEM,
  );
  return $items;
}

/**
 * Replacement for search_get_keys
 * This function can potentially return dangerous
 * potential SQL inject/XSS data.
 * Return must be sanatized before use.
 */
function search404_get_keys() {
  global $language;
  $keys = $_REQUEST['destination'] ? $_REQUEST['destination'] : $_REQUEST['q'];

  // Abort query on certain extensions, e.g: gif jpg jpeg png
  $extensions = explode(' ', variable_get('search404_ignore_query', 'gif jpg jpeg bmp png'));
  $extensions = trim(implode('|', $extensions));
  if (!empty($extensions) && preg_match("/\\.({$extensions})\$/i", $keys)) {
    return FALSE;
  }

  //Remove the Language Prefix Appended to Search String (http://drupal.org/node/560426)
  if (LANGUAGE_NEGOTIATION_PATH_DEFAULT && $language->language) {
    $keys = preg_replace("/^" . $language->language . "\\//i", '', $keys);
  }
  $regex_filter = variable_get('search404_regex', '');
  $keys_array[] = $keys;
  if (!empty($regex_filter)) {
    $keys = preg_replace("/" . $regex_filter . "/i", '', $keys);
  }

  // Ignore certain extensions from query.
  $extensions = explode(' ', variable_get('search404_ignore_extensions', 'htm html php'));
  $extensions = trim(implode('|', $extensions));
  if (!empty($extensions)) {
    $keys = preg_replace("/\\.({$extensions})\$/i", '', $keys);
  }
  $keys = preg_split('/[' . PREG_CLASS_SEARCH_EXCLUDE . ']+/u', $keys);

  // Ignore certain words (use case insensitive search).
  $keys = array_udiff($keys, explode(' ', variable_get('search404_ignore', t('and or the'))), 'strcasecmp');

  //Sanitize the keys
  foreach ($keys as $a => $b) {
    $keys[$a] = check_plain($b);
  }
  $modifier = variable_get('search404_use_or', FALSE) ? ' OR ' : ' ';
  $keys = trim(implode($modifier, $keys));
  return $keys;
}

/**
 * Detect search from search engine.
 */
function search404_search_engine_query() {
  $engines = array(
    'altavista' => 'q',
    'aol' => 'query',
    'google' => 'q',
    'bing' => 'q',
    'lycos' => 'query',
    'yahoo' => 'p',
  );

  // Try to extract searchengine querystring only if HTTP_REFERER was set (#1111918)
  if (isset($_SERVER['HTTP_REFERER'])) {
    $parsed_url = parse_url($_SERVER['HTTP_REFERER']);
    $remote_host = isset($parsed_url['host']) ? $parsed_url['host'] : '';
    $query_string = isset($parsed_url['query']) ? $parsed_url['query'] : '';
    parse_str($query_string, $query);
    if (!$parsed_url === FALSE && !empty($remote_host) && !empty($query_string) && count($query)) {
      foreach ($engines as $host => $key) {
        if (strpos($remote_host, $host) !== FALSE && array_key_exists($key, $query)) {
          return trim($query[$key]);
        }
      }
    }
  }
  return FALSE;
}

/**
 * Main search function.
 * Started with: http://drupal.org/node/12668
 * Updated to be more similar to search_view
 */
function search404_page() {
  $output = '';
  drupal_set_title(variable_get('search404_page_title', t('Page not found')));
  if (module_exists('search') && (user_access('search content') || user_access('search by page'))) {
    $keys = "";
    $results = "";

    // If apachesolr_search or luceneapi or xapian is installed use them instead default node search.
    $type_search = module_exists('apachesolr_search') ? 'apachesolr_search' : (module_exists('luceneapi_node') ? 'luceneapi_node' : (module_exists('xapian') ? 'xapian' : 'node'));
    if (variable_get('search404_use_search_engine', FALSE)) {
      $keys = search404_search_engine_query();
    }
    if (!$keys) {
      $keys = search404_get_keys();
    }

    // Get throttle status
    $throttle = module_invoke('throttle', 'status');

    // If search keys are present and site is not throttled and automatic searching is not disabled.
    if ($keys && !$throttle && !variable_get('search404_skip_auto_search', FALSE)) {
      if (module_exists('search_by_page') && variable_get('search404_do_search_by_page', FALSE)) {

        // TODO - Perhaps move the message to the search404 goto?
        if (!variable_get('search404_disable_error_message', FALSE)) {
          drupal_set_message(t('The page you requested does not exist. For your convenience, a search was performed using the query %keys.', array(
            '%keys' => check_plain($keys),
          )), 'error');
        }
        search404_goto('search_pages/' . $keys);
      }
      elseif (module_exists('fuzzysearch') && user_access('fuzzysearch content') && variable_get('search404_do_fuzzysearch', FALSE)) {
        if (!variable_get('search404_disable_error_message', FALSE)) {
          drupal_set_message(t('The page you requested does not exist. For your convenience, a search was performed using the query %keys.', array(
            '%keys' => check_plain($keys),
          )), 'error');
        }
        search404_goto(variable_get('fuzzysearch_path_name', 'fuzzysearch/results') . '/' . $keys);
      }
      elseif (module_exists('google') && user_access('search Google CSE') && variable_get('search404_do_google_cse', FALSE)) {
        if (!variable_get('search404_disable_error_message', FALSE)) {
          drupal_set_message(t('The page you requested does not exist. For your convenience, a google search was performed using the query %keys.', array(
            '%keys' => check_plain($keys),
          )), 'error');
        }
        search404_goto('search/google/' . $keys);
      }
      elseif (module_exists('google_cse_adv') && user_access('search content') && variable_get('search404_do_google_cse_adv', FALSE)) {
        if (!variable_get('search404_disable_error_message', FALSE)) {
          drupal_set_message(t('The page you requested does not exist. For your convenience, a google search was performed using the query %keys.', array(
            '%keys' => check_plain($keys),
          )), 'error');
        }
        search404_goto('search/google_cse_adv/' . $keys);
      }
      elseif (variable_get('search404_do_custom_search', FALSE)) {
        if (!variable_get('search404_disable_error_message', FALSE)) {
          drupal_set_message(t('The page you requested does not exist. For your convenience, a search was performed using the query %keys.', array(
            '%keys' => check_plain($keys),
          )), 'error');
        }
        $custom_search_path = variable_get('search404_custom_search_path', 'search/@keys');
        $custom_search_path = str_replace('@keys', $keys, $custom_search_path);
        search404_goto($custom_search_path);
      }
      else {

        // Called for apache solr, lucene, xapian and core search.
        $results = module_invoke($type_search, 'search', 'search', $keys);

        // First, check to see if there is exactly 1 result and if jump to first
        // is selected then jump to it.
        if (isset($results) && is_array($results) && count($results) == 1 && variable_get('search404_jump', FALSE)) {
          if (!variable_get('search404_disable_error_message', FALSE)) {
            drupal_set_message(t('The page you requested does not exist. A search for %keys resulted in this page.', array(
              '%keys' => check_plain($keys),
            )), 'status');
          }
          search404_goto('node/' . $results[0]['node']->nid);
        }
        elseif (isset($results) && is_array($results) && count($results) >= 1 && variable_get('search404_first', FALSE)) {
          if (!variable_get('search404_disable_error_message', FALSE)) {
            drupal_set_message(t('The page you requested does not exist. A search for %keys resulted in this page.', array(
              '%keys' => check_plain($keys),
            )), 'status');
          }
          search404_goto('node/' . $results[0]['node']->nid);
        }
        else {
          if (!variable_get('search404_disable_error_message', FALSE)) {
            drupal_set_message(t('The page you requested does not exist. For your convenience, a search was performed using the query %keys.', array(
              '%keys' => check_plain($keys),
            )), 'error');
          }
          if (isset($results) && is_array($results) && count($results) > 0) {
            drupal_add_css(drupal_get_path('module', 'search') . '/search.css', 'module', 'all', FALSE);

            // Hack to get search results themed by manipulating $_GET['q']!
            $oldgetq = $_GET['q'];
            $olddestination = $_REQUEST['destination'];
            unset($_REQUEST['destination']);
            $_GET['q'] = "search/{$type_search}/{$keys}";
            $results = theme('search_results', $results, 'node');
            $_GET['q'] = $oldgetq;
            $_REQUEST['destination'] = $olddestination;

            // End of Hack
          }
          else {

            // If no results are found and Lucene Did You Mean is present, then redirect to Lucene DYM
            if (module_exists('luceneapi_dym') && ($suggestions = luceneapi_dym_suggestions_get($keys))) {
              $lucene_dym_path = preg_replace('/^.*href="\\/search\\/luceneapi_node\\/([^"]*)".*$/i', '$1', $suggestions);
              search404_goto($lucene_dym_path);
            }

            // If we are searching with apache solr, use the no results text
            // from the apachesolr_search module (#1127412).
            if (module_exists('apachesolr_search')) {

              // Some versions of apachesolr_search has this as a function
              // instead of a theme function (#1279466).
              if (function_exists('apachesolr_search_noresults')) {
                $results = apachesolr_search_noresults();
              }
              else {
                $results = theme('apachesolr_search_noresults');
              }
            }
            else {
              $results = search_help('search#noresults', drupal_help_arg());
            }
          }
          $results = theme('box', t('Search results'), $results);
        }
      }
    }
    $search_suggestions = '';

    // If Lucent DYM is present then add Lucene "Did You Mean" suggestions
    if (module_exists('luceneapi_dym') && ($suggestions = luceneapi_dym_suggestions_get($keys))) {
      $search_suggestions = theme('box', t('Did you mean'), $suggestions);
    }

    // Construct the search form.
    if (module_exists('fuzzysearch') && variable_get('search404_do_fuzzysearch', FALSE)) {
      $search_form = drupal_get_form('fuzzysearch_form', $keys);
    }
    else {
      $search_form = drupal_get_form('search_form', NULL, $keys, $type_search);
    }
    $output = variable_get('search404_page_text', '') . $search_form . $search_suggestions . $results;
  }

  // This was done to display sidebars left and right of page,
  // if the option is set from Search404 settings Page.
  // However this will cause the program flow to not return
  // to the drupal_not_found() and back to index.php.
  if (variable_get('search404_block_show', FALSE)) {
    print theme('page', $output);
    drupal_page_footer();
    unset($output);
    exit(0);
  }
  else {
    return $output;
  }
}

/**
 * Search404 drupal_goto helper function.
 */
function search404_goto($path = '') {

  // Overwrite $_REQUEST['destination'] because it is set by drupal_not_found().
  $_REQUEST['destination'] = $path;

  // Set 301 redirect if so specified in settings else do default 302 redirect.
  if (variable_get('search404_redirect_301', FALSE)) {
    $http_status = 301;
  }
  else {
    $http_status = 302;
  }
  drupal_goto($path, NULL, NULL, $http_status);
}

/**
 * Search404 administration settings.
 */
function search404_settings() {
  $form['search404_jump'] = array(
    '#type' => 'checkbox',
    '#title' => t('Jump directly to the search result when there is only one result'),
    '#description' => t('Works only with Core, Apache Solr, Lucene and Xapian searches. An HTTP status of 301 or 302 will be returned for this redirect.'),
    '#default_value' => variable_get('search404_jump', FALSE),
  );
  $form['search404_first'] = array(
    '#type' => 'checkbox',
    '#title' => t('Jump directly to the first search result even when there are multiple results'),
    '#description' => t('Works only with Core, Apache Solr, Lucene and Xapian searches. An HTTP status of 301 or 302 will be returned for this redirect.'),
    '#default_value' => variable_get('search404_first', FALSE),
  );

  // Added for having a 301 redirect instead of the standard 302 (offered by the drupal_goto).
  // TODO: jump to search pages should still return 404 and not 301 or 302 for searches other
  // than Core, Apache Solr, Lucene and Xapian. Can this even be done? Meta refresh?
  $form['search404_redirect_301'] = array(
    '#type' => 'checkbox',
    '#title' => t('Use a 301 Redirect instead of 302 Redirect'),
    '#description' => t('This applies when the option to jump to first result is enabled and also for search404 results pages other than for Core, Apache Solr, Lucene and Xapian.'),
    '#default_value' => variable_get('search404_redirect_301', FALSE),
  );

  // Added for displaying blocks for 404 searches.
  $form['search404_block_show'] = array(
    '#type' => 'checkbox',
    '#title' => t('Show left and right sidebars in page when a 404 search occurs'),
    '#default_value' => variable_get('search404_block_show', FALSE),
  );
  $form['search404_do_google_cse'] = array(
    '#type' => 'checkbox',
    '#title' => t('Do a "Google CSE" search instead of a Drupal Search when a 404 occurs'),
    '#description' => t('Requires Google CSE and Google CSE Search Modules to be Enabled.'),
    '#attributes' => module_exists('google') ? array() : array(
      'disabled' => 'disabled',
    ),
    '#default_value' => variable_get('search404_do_google_cse', FALSE),
  );
  $form['search404_do_google_cse_adv'] = array(
    '#type' => 'checkbox',
    '#title' => t('Do a "Google CSE Advanced" search instead of a Drupal Search when a 404 occurs'),
    '#description' => t('Requires Google CSE Advanced Search Module to be Enabled.'),
    '#attributes' => module_exists('google_cse_adv') ? array() : array(
      'disabled' => 'disabled',
    ),
    '#default_value' => variable_get('search404_do_google_cse_adv', FALSE),
  );
  $form['search404_do_search_by_page'] = array(
    '#type' => 'checkbox',
    '#title' => t('Do a "Search By Page" search instead of a Drupal Search when a 404 occurs'),
    '#description' => t('Requires "Search by page" module to be Enabled.'),
    '#attributes' => module_exists('search_by_page') ? array() : array(
      'disabled' => 'disabled',
    ),
    '#default_value' => variable_get('search404_do_search_by_page', FALSE),
  );
  $form['search404_do_fuzzysearch'] = array(
    '#type' => 'checkbox',
    '#title' => t('Do a "Fuzzy Search" search instead of a Drupal Search when a 404 occurs'),
    '#description' => t('Requires "Fuzzy Search" module to be enabled. If using both Core search and Fuzzy Search, do not use "search" as your fuzzysearch results path.'),
    '#attributes' => module_exists('fuzzysearch') ? array() : array(
      'disabled' => 'disabled',
    ),
    '#default_value' => variable_get('search404_do_fuzzysearch', FALSE),
  );

  // Custom search path implementation (for Search API integration #1076190)
  $form['search404_do_custom_search'] = array(
    '#type' => 'checkbox',
    '#title' => t('Do a "Search" with custom path instead of a Drupal Search when a 404 occurs'),
    '#description' => t('Redirect the user to a Custom search path to be entered below. Can be used to open a view with path parameter.'),
    '#default_value' => variable_get('search404_do_custom_search', FALSE),
  );
  $form['search404_custom_search_path'] = array(
    '#type' => 'textfield',
    '#title' => t('Custom search path'),
    '#description' => t('The custom search path: example: myownsearch/@keys or myownsearch?txt_s=@keys. The token "@keys" will be replaced with the search keys from the URL.'),
    '#default_value' => variable_get('search404_custom_search_path', 'search/@keys'),
  );

  // Added for preventing automatic search for large sites.
  $form['search404_skip_auto_search'] = array(
    '#title' => t('Disable auto search'),
    '#description' => t('Disable automatically searching for the keywords when a page is not found and instead show the populated search form with the keywords. Useful for large sites to reduce server loads.'),
    '#type' => 'checkbox',
    '#default_value' => variable_get('search404_skip_auto_search', FALSE),
  );

  // Disable the drupal error message when showing search results.
  $form['search404_disable_error_message'] = array(
    '#title' => t('Disable error message'),
    '#type' => 'checkbox',
    '#description' => t('Disable the Drupal error message when search results are shown on a 404 page.'),
    '#default_value' => variable_get('search404_disable_error_message', FALSE),
  );
  $form['advanced'] = array(
    '#type' => 'fieldset',
    '#title' => t('Advanced settings'),
    '#collapsible' => TRUE,
    '#collapsed' => TRUE,
  );
  $form['advanced']['search404_use_or'] = array(
    '#type' => 'checkbox',
    '#title' => t('Use OR between keywords when searching.'),
    '#default_value' => variable_get('search404_use_or', FALSE),
  );
  $form['advanced']['search404_use_search_engine'] = array(
    '#type' => 'checkbox',
    '#title' => t('Use auto-detection of keywords from search engine referer'),
    '#description' => t('This feature will conduct a search based on the query string got from a search engine if the URL of the search result points to a 404 page in the current website. Currently supported search engines: Google, Yahoo, Altavista, Lycos, Bing and AOL'),
    '#default_value' => variable_get('search404_use_search_engine', FALSE),
  );
  $form['advanced']['search404_ignore'] = array(
    '#type' => 'textarea',
    '#title' => t('Words to ignore'),
    '#description' => t('These words will be ignored from the search query. Separate words with a space, e.g.: "and or the".'),
    '#default_value' => variable_get('search404_ignore', t('and or the')),
  );
  $form['advanced']['search404_ignore_extensions'] = array(
    '#type' => 'textfield',
    '#title' => t('Extensions to ignore'),
    '#description' => t('These extensions will be ignored from the search query, e.g.: http://www.example.com/invalid/page.php will only search for "invalid page". Separate extensions with a space, e.g.: "htm html php". Do not include leading dot.'),
    '#default_value' => variable_get('search404_ignore_extensions', 'htm html php'),
  );
  $form['advanced']['search404_ignore_query'] = array(
    '#type' => 'textfield',
    '#title' => t('Extensions to abort search'),
    '#description' => t('A search will not be performed for a query ending in these extensions. Separate extensions with a space, e.g.: "gif jpg jpeg bmp png". Do not include leading dot.'),
    '#default_value' => variable_get('search404_ignore_query', 'gif jpg jpeg bmp png'),
  );
  $form['advanced']['search404_regex'] = array(
    '#type' => 'textfield',
    '#title' => t('PCRE filter'),
    '#description' => t('This regular expression will be applied to filter all queries. The parts of the path that match the expression will be EXCLUDED from the search. You do NOT have to enclose the regex in forward slashes when defining the PCRE. e.g.: use "[foo]bar" instead of "/[foo]bar/". On how to use a PCRE Regex please refer <a href="http://php.net/pcre">PCRE pages in the PHP Manual</a>.'),
    '#default_value' => variable_get('search404_regex', ''),
  );

  // Show custom title for the 404 search results page.
  $form['advanced']['search404_page_title'] = array(
    '#type' => 'textfield',
    '#title' => t('Custom Page Title'),
    '#description' => t('You can enter a value that will displayed at the title of the Webpage e.g. "Page not found"'),
    '#default_value' => variable_get('search404_page_title', t('Page not found')),
  );

  // Show custom text below the search form for the 404 search results page.
  $form['advanced']['search404_page_text'] = array(
    '#type' => 'textarea',
    '#title' => t('Custom Page Text'),
    '#default_value' => variable_get('search404_page_text', ''),
    '#description' => t('You can enter a custom text message that can be displayed at the top of the search results, HTML formatting can be used'),
  );

  // Helps reset the site_404 variable to search404 in case the user changes it manually.
  $form['site_404'] = array(
    '#type' => 'hidden',
    '#value' => 'search404',
  );

  // Tell the user about the site_404 issue.
  $form['search404_variable_message'] = array(
    '#type' => 'markup',
    '#prefix' => '<p>',
    '#suffix' => '</p>',
    '#value' => t('Saving this form will revert the 404 handling on the site to this module.'),
  );
  return system_settings_form($form);
}

Functions

Namesort descending Description
search404_get_keys Replacement for search_get_keys This function can potentially return dangerous potential SQL inject/XSS data. Return must be sanatized before use.
search404_goto Search404 drupal_goto helper function.
search404_menu Implementation of hook_menu().
search404_page Main search function. Started with: http://drupal.org/node/12668 Updated to be more similar to search_view
search404_search_engine_query Detect search from search engine.
search404_settings Search404 administration settings.