You are here

search_by_page.module in Search by Page 7

Same filename and directory in other branches
  1. 8 search_by_page.module
  2. 6 search_by_page.module

Main module file for Drupal module Search by Page.

Adds page-oriented searching to the core Drupal search module.

Copyright 2009 Jennifer Hodgdon, Poplar ProductivityWare LLC.

Licensed under the GNU Public License.

File

search_by_page.module
View source
<?php

/**
 * @file
 * Main module file for Drupal module Search by Page.
 *
 * Adds page-oriented searching to the core Drupal search module.
 *
 * Copyright 2009 Jennifer Hodgdon, Poplar ProductivityWare LLC.
 *
 * Licensed under the GNU Public License.
 * @ingroup search_by_page
 */

/**
 * @defgroup search_by_page Search by Page Module.
 *
 * Adds page-oriented searching to core Drupal search module.
 */

/**
 * Forces a page to be reindexed at the next cron run.
 *
 * @param $module
 *   Module that submitted this path for indexing.
 * @param $id
 *   ID given in hook_sbp_paths() for this path.
 * @param $environment
 *   Environment ID where this path should be reindexed. If omitted, all
 *   environments are checked.
 */
function search_by_page_force_reindex($module, $id, $environment = NULL) {
  $envs = array();
  if (isset($environment)) {
    $envs = array(
      $environment,
    );
  }
  else {
    $envs = search_by_page_list_environments();
  }
  foreach ($envs as $env) {
    $item = _search_by_page_lookup(0, '', $id, $module, $env);
    if ($item) {
      _search_by_page_update_last_time($item->pid, 0);
    }
  }
}

/**
 * Removes a page from Search by Page.
 *
 * The page will immediately be unavailable in search results, and
 * will not be reindexed (unless hook_sbp_paths() submits it for
 * indexing again).
 *
 * @param $module
 *   Module that submitted this path for indexing.
 * @param $id
 *   ID given in hook_sbp_paths() for this path.
 * @param $environment
 *   Environment ID where this path should be removed. If omitted, all
 *   environments are checked.
 */
function search_by_page_force_remove($module, $id, $environment = NULL) {
  $envs = array();
  if (isset($environment)) {
    $envs = array(
      $environment,
    );
  }
  else {
    $envs = search_by_page_list_environments();
  }
  foreach ($envs as $env) {
    $item = _search_by_page_lookup(0, '', $id, $module, $env);
    if ($item) {
      _search_by_page_remove_path($item->pid);
    }
  }
}

/**
 * Rebuilds the paths table for a particular module.
 *
 * Calls that module's hook_sbp_paths() implementation, which should
 * return an array of Drupal paths to be indexed. Removes any obsolete
 * paths, adds new ones, and updates existing ones' information, so
 * that the correct paths will be indexed.
 *
 * @param $module
 *   Module to rebuild.
 * @param $environment
 *   Environment ID to rebuild.
 * @param $reset_items
 *   TRUE if the last_index_time for items whose max_time has been exceeded
 *   should be reset. FALSE to leave it alone.
 */
function search_by_page_rebuild_paths($module, $environment, $reset_items) {

  // Find out what paths this module wants now.
  $function = $module . '_sbp_paths';
  $paths = call_user_func($function, $environment);
  if (!is_array($paths)) {
    $paths = array();
  }

  // Make this into an array that matches db structure, with a
  // unique ID as key.
  $pathsnow = array();
  $defaults = array(
    'min_time' => 1,
    'max_time' => 0,
  );
  foreach ($paths as $path => $item) {
    $item += $defaults;
    foreach ($item['languages'] as $lang) {
      $pathsnow[$lang . "_._" . $path] = array(
        'page_path' => $path,
        'language' => $lang,
        'modid' => $item['id'],
        'role' => $item['role'],
        'min_time' => $item['min_time'],
        'max_time' => $item['max_time'],
      );
    }
  }
  $newpaths = array_keys($pathsnow);

  // Now see what's in the database already.
  // Figure out what they wanted last time we did this
  $dbr = db_query("SELECT p.page_path, p.language, p.pid FROM {sbp_path} p WHERE p.from_module= :modname AND p.environment = :envid", array(
    ':modname' => $module,
    ':envid' => $environment,
  ))
    ->fetchAll();
  $pathsindb = array();
  foreach ($dbr as $item) {
    $pathsindb[$item->language . "_._" . $item->page_path] = $item->pid;
  }
  $oldpaths = array_keys($pathsindb);

  // Resolve differences by deleting items they no longer want,
  // adding items they want now but didn't before, and modifying any
  // items whose information might potentially have changed. Do it this way
  // to preserve the information we've recorded on old items.
  $new = array_diff($newpaths, $oldpaths);
  $del = array_diff($oldpaths, $newpaths);
  $chg = array_diff($newpaths, $new);
  if (count($del) > 0) {
    foreach ($del as $key) {
      _search_by_page_remove_path($pathsindb[$key]);
    }
  }
  if (count($new) > 0) {
    foreach ($new as $key) {
      $item = $pathsnow[$key];
      db_insert('sbp_path')
        ->fields(array(
        'page_path' => $item['page_path'],
        'from_module' => $module,
        'modid' => $item['modid'],
        'language' => $item['language'],
        'role' => $item['role'],
        'environment' => $environment,
        'min_time' => $item['min_time'],
        'max_time' => $item['max_time'],
      ))
        ->execute();
    }
  }
  if (count($chg) > 0) {
    foreach ($chg as $key) {
      $item = $pathsnow[$key];
      db_update('sbp_path')
        ->fields(array(
        'modid' => $item['modid'],
        'role' => $item['role'],
        'min_time' => $item['min_time'],
        'max_time' => $item['max_time'],
      ))
        ->condition('pid', $pathsindb[$key])
        ->execute();
    }
  }

  // Finally, reset the last index time for any item whose max time to reindex
  // has passed.
  if ($reset_items) {
    db_query("UPDATE {sbp_path} SET last_index_time=0 WHERE (max_time > 0 AND :currtime > (last_index_time + max_time))", array(
      ':currtime' => time(),
    ));
  }
}

/**
 * Splits a path into the main path and any query parts.
 *
 * @param $path
 *    Path to split.
 * @return
 *    Array, first element is main path (after resolving aliases), second is
 *    query string.
 */
function search_by_page_path_parts($path) {

  // see if there is a ? in the path
  $path = drupal_get_normal_path($path);
  $stuff = explode('?', $path, 2);
  if (count($stuff) > 1) {
    return $stuff;
  }

  // see if there is an & in the path
  return explode('&', $path, 2);
}

/**
 * Returns a search excerpt, with matched keywords highlighted.
 *
 * This is a drop-in replacement for the core search_exerpt() function.
 * The difference is that it allows stemming modules (or other modules
 * that preprocess search text and terms) to highlight words other than
 * exact keyword matches in the text, by implementing hook_sbp_excerpt_match().
 *
 * @param $keys
 *   A string containing a search query.
 * @param $text
 *   The text to extract fragments from.
 * @return
 *   A string containing HTML for the excerpt.
 */
function search_by_page_excerpt($keys, $text) {

  // We highlight around non-indexable or CJK characters.
  $boundary = '(?:(?<=[' . PREG_CLASS_UNICODE_WORD_BOUNDARY . PREG_CLASS_CJK . '])|(?=[' . PREG_CLASS_UNICODE_WORD_BOUNDARY . PREG_CLASS_CJK . ']))';

  // Extract positive keywords and phrases
  preg_match_all('/ ("([^"]+)"|(?!OR)([^"]+))/', ' ' . $keys, $matches);
  $keys = array_merge($matches[2], $matches[3]);

  // Prepare text
  $text = ' ' . strip_tags(str_replace(array(
    '<',
    '>',
  ), array(
    ' <',
    '> ',
  ), $text)) . ' ';
  array_walk($keys, '_search_excerpt_replace');
  $workkeys = $keys;

  // Extract fragments around keywords.
  // First we collect ranges of text around each keyword, starting/ending
  // at spaces, trying to get to 256 characters.
  // If the sum of all fragments is too short, we look for second occurrences.
  $ranges = array();
  $included = array();
  $foundkeys = array();
  $length = 0;
  while ($length < 256 && count($workkeys)) {
    foreach ($workkeys as $k => $key) {
      if (!strlen($key)) {
        unset($workkeys[$k]);
        unset($keys[$k]);
        continue;
      }
      if ($length >= 256) {
        break;
      }

      // Remember occurrence of key so we can skip over it if more occurrences
      // are desired.
      if (!isset($included[$key])) {
        $included[$key] = 0;
      }

      // Locate a keyword (position $p, always >0 because $text starts with
      // a space). Try a bare keyword and let stemming modules try to find a
      // derived form. Make sure to keep the leftmost match found.
      $p = 0;
      if (preg_match('/' . $boundary . $key . $boundary . '/iu', $text, $match, PREG_OFFSET_CAPTURE, $included[$key])) {
        $p = $match[0][1];
      }
      foreach (module_implements('sbp_excerpt_match') as $module) {
        $info = module_invoke($module, 'sbp_excerpt_match', $key, $text, $included[$key], $boundary);
        if ($info['where']) {
          if (!$p || $info['where'] < $p) {
            $p = $info['where'];
          }
          if ($info['keyword']) {
            $foundkeys[] = $info['keyword'];
          }
        }
      }

      // Now locate a space in front (position $q) and behind it (position $s),
      // leaving about 60 characters extra before and after for context.
      // Note that a space was added to the front and end of $text above.
      if ($p) {
        if (($q = strpos($text, ' ', max(0, $p - 60))) !== FALSE) {
          $end = substr($text, $p, 80);

          // CODER-IGNORE-THIS
          if (($s = strrpos($end, ' ')) !== FALSE) {
            $ranges[$q] = $p + $s;
            $length += $p + $s - $q;
            $included[$key] = $p + 1;
          }
          else {
            unset($workkeys[$k]);
          }
        }
        else {
          unset($workkeys[$k]);
        }
      }
      else {
        unset($workkeys[$k]);
      }
    }
  }

  // If we didn't find anything, return the beginning.
  if (count($ranges) == 0) {
    return truncate_utf8($text, 256) . ' ...';
  }

  // Sort the text ranges by starting position.
  ksort($ranges);

  // Now we collapse overlapping text ranges into one. The sorting makes it O(n).
  $newranges = array();
  foreach ($ranges as $from2 => $to2) {
    if (!isset($from1)) {
      $from1 = $from2;
      $to1 = $to2;
      continue;
    }
    if ($from2 <= $to1) {
      $to1 = max($to1, $to2);
    }
    else {
      $newranges[$from1] = $to1;
      $from1 = $from2;
      $to1 = $to2;
    }
  }
  $newranges[$from1] = $to1;

  // Fetch text
  $out = array();
  foreach ($newranges as $from => $to) {
    $out[] = substr($text, $from, $to - $from);

    // CODER-IGNORE-THIS
  }
  $text = (isset($newranges[0]) ? '' : '... ') . implode(' ... ', $out) . ' ...';

  // Highlight keywords. Must be done at once to prevent conflicts ('strong' and '<strong>').
  $keys = $keys + $foundkeys;
  $text = preg_replace('/' . $boundary . '(' . implode('|', $keys) . ')' . $boundary . '/iu', '<strong>\\0</strong>', $text);
  return $text;
}

/**
 * Strips out contents of HTML tags that are excluded in the given environment.
 *
 * Modules should always call this function in their hook_sbp_details()
 * implementation. Note that it differs from the PHP strip_tags in that it
 * strips the text contained within the tags, as well as the tags.
 *
 * @param $text
 *   Text to be processed.
 * @param $envid
 *   Environment ID to use to find list of tags to exclude.
 *
 * @return
 *   $text with the specified tags (and their contents) from
 *   search_by_page_setting_get('exclude_tags', $envid, '') stripped out.
 */
function search_by_page_strip_tags($text, $envid) {
  $tags = search_by_page_setting_get('exclude_tags', $envid, '');

  // Make sure $tags is clean.
  $tags = preg_replace('/[^a-z0-9_ ]/', ' ', strtolower($tags));
  $tags = explode(' ', $tags);

  // Strip tags and their contents, noting that tags could have attributes.
  foreach ($tags as $tag) {
    $tag = trim($tag);
    if ($tag) {
      $text = preg_replace('|<' . $tag . '[^>]*>.*</' . $tag . '>|isUu', '', $text);
    }
  }
  return $text;
}

/**
 * Returns a setting for Search by Page or a sub-module.
 *
 * This is like the Drupal variable_get() function, except that it is
 * environment-aware.
 *
 * @param $name
 *   Name of the setting.
 * @param $environment
 *   ID of the environment.
 * @param $default
 *   Default value to return if setting has not been defined.
 *
 * @return
 *   Setting value, or $default if not defined.
 *
 * @see search_by_page_setting_set()
 */
function search_by_page_setting_get($name, $environment, $default) {
  $stuff = variable_get('search_by_page_settings', array());
  if (isset($stuff[$environment][$name])) {
    return $stuff[$environment][$name];
  }
  return $default;
}

/**
 * Sets a setting for Search by Page or a sub-module.
 *
 * This is like the Drupal variable_set() function, except that it is
 * environment-aware.
 *
 * @param $name
 *   Name of the setting.
 * @param $environment
 *   ID of the environment.
 * @param $value
 *   Value to set.
 *
 * @see search_by_page_setting_get()
 */
function search_by_page_setting_set($name, $environment, $value) {
  $stuff = variable_get('search_by_page_settings', array());
  if (!isset($stuff[$environment])) {
    $stuff[$environment] = array();
  }
  $stuff[$environment][$name] = $value;
  variable_set('search_by_page_settings', $stuff);
}

/**
 * Returns a suitable field prefix for a path.
 *
 * This is your base site URL, with ?q= appended if clean URLs are not being
 * used. It can be used in a Form API form as component #field_prefix, if
 * you are asking the user to input a URL path.
 */
function search_by_page_path_field_prefix() {
  return url(NULL, array(
    'absolute' => TRUE,
  )) . (variable_get('clean_url', 0) ? '' : '?q=');
}

/**
 * Returns the content portion of the rendered page at the given path.
 *
 * Note: As a side effect, theme, pager, and page title variables may be
 * altered. The calling function needs to make sure they are restored. Do not
 * call this during rendering of search results! Use
 * search_by_page_stored_page_content() instead.
 *
 * @param $path
 *    Path to render.
 *
 * @return
 *   An integer error code if there is a problem. A string containing the
 *   page content if all is well.
 */
function search_by_page_page_content($path) {
  $save_get = $_GET;
  $save_content = drupal_set_page_content();

  // Separate path into main part and query.
  $parts = search_by_page_path_parts($path);
  $path = drupal_get_normal_path($parts[0]);
  $_GET['q'] = $path;

  // Add query to $_GET as it would be on a page request.
  if (isset($parts[1])) {
    $getstuff = array();
    parse_str($parts[1], $getstuff);
    $_GET += $getstuff;
  }

  // Set up the theme and get page information.
  drupal_static_reset('menu_get_item');
  _search_by_page_setup_theme();
  $page = menu_execute_active_handler($path, FALSE);
  if (is_int($page)) {

    // Error code return.
    $_GET = $save_get;
    drupal_set_page_content($save_content);
    drupal_static('system_main_content_added', FALSE);
    return $page;
  }

  // If we get here, we'll follow what drupal_render_page does to let
  // other modules alter the page.
  if (is_string($page) || is_array($page) && (!isset($page['#type']) || $page['#type'] != 'page')) {
    drupal_set_page_content($page);
    $page = element_info('page');
  }
  foreach (module_implements('page_build') as $module) {
    $function = $module == 'block' ? '_search_by_page_build_content_blocks' : $module . '_page_build';
    $function($page);
  }
  drupal_alter('page', $page);
  if (!isset($page['content'])) {
    $page['content']['system_main'] = drupal_set_page_content();
  }
  $content = drupal_render($page['content']);
  $_GET = $save_get;
  drupal_set_page_content($save_content);
  drupal_static('system_main_content_added', FALSE);
  drupal_static_reset('menu_tree_set_path');
  drupal_static_reset('menu_tree_page_data');
  drupal_static_reset('menu_get_item');
  return $content;
}

/**
 * Returns the stored content from the last indexing of a page.
 *
 * @param string $module
 *   Name of the module for the path.
 * @param int $modid
 *   ID of this path within this module.
 * @param int $environment
 *   ID of the environment.
 *
 * @return string
 *   The stored content from last indexing of the page.
 */
function search_by_page_stored_page_content($module, $modid, $environment) {
  return db_select('sbp_path', 'p')
    ->fields('p', array(
    'page_data',
  ))
    ->condition('from_module', $module)
    ->condition('modid', $modid)
    ->condition('environment', $environment)
    ->execute()
    ->fetchField();
}

/**
 * Builds just the content region blocks.
 *
 * This is a substitute for block_build_page() so that while rendering,
 * we avoid building blocks for regions other than 'content'.
 */
function _search_by_page_build_content_blocks(&$page) {
  if ($blocks = block_get_blocks_by_region('content')) {
    $page['content'] = $blocks;
  }
  drupal_static_reset('block_list');
}

/**
 * Sets up the right theme to use for theming this page.
 */
function _search_by_page_setup_theme() {

  // This basically follows drupal_theme_initialize(), but assumes all the
  // bootstrap stuff has been done, and skips AJAX/JS stuff.
  global $user;
  global $theme;
  global $theme_key;
  $themes = list_themes();
  $theme = !empty($user->theme) && drupal_theme_access($user->theme) ? $user->theme : variable_get('theme_default', 'bartik');
  drupal_static_reset('menu_get_custom_theme');
  $custom_theme = menu_get_custom_theme(TRUE);
  $theme = !empty($custom_theme) ? $custom_theme : $theme;
  $theme_key = $theme;
  $base_theme = array();
  $ancestor = $theme;
  while ($ancestor && isset($themes[$ancestor]->base_theme)) {
    $ancestor = $themes[$ancestor]->base_theme;
    $base_theme[] = $themes[$ancestor];
  }
  _drupal_theme_initialize($themes[$theme], array_reverse($base_theme));
  drupal_static_reset('drupal_alter');
}

/**
 * Implements hook_search_info().
 */
function search_by_page_search_info() {
  $name = variable_get('search_by_page_tabname', t('Pages'));
  if (module_exists('i18n_string')) {
    $name = i18n_string_translate('search_by_page:search_page:tab_name', $name);
  }
  return array(
    'title' => $name,
    // Note: Path here is not like a Drupal path, it's just the name of module.
    'path' => 'search_by_page',
  );
}

/**
 * Implements hook_form_FORM_ID_alter().
 *
 * Modifies the Search module settings form so that you can select 0 items
 * to be indexed per cron run for the other core search modules.
 */
function search_by_page_form_search_admin_settings_alter(&$form, $form_state) {
  $items = drupal_map_assoc(array(
    0,
    10,
    20,
    50,
    100,
    200,
    500,
  ));
  $form['indexing_throttle']['search_cron_limit']['#options'] = $items;
  $form['indexing_throttle']['search_cron_limit']['#description'] = t('The maximum number of items indexed in each pass of a <a href="@cron">cron maintenance task</a> by search modules that do not set their own defaults (such as the core Node module that indexes content items). If necessary, reduce the number of items to prevent timeouts and memory errors while indexing.', array(
    '@cron' => url('admin/reports/status'),
  ));
}

/**
 * Implements hook_search_admin().
 *
 * Adds a link to the Search by Page settings form.
 */
function search_by_page_search_admin() {
  $form = array();
  $form['search_by_page'] = array(
    '#type' => 'fieldset',
    '#title' => t('Search by Page settings'),
  );
  $form['search_by_page']['info'] = array(
    '#type' => 'markup',
    '#markup' => '<p>' . l(t('Configure Search by Page settings'), 'admin/config/search/search_by_page') . '</p>',
  );
  $form['search_by_page']['status'] = _search_by_page_status_details();
  return $form;
}

/**
 * Implements hook_search_reset().
 *
 * Marks all items in the Search by Page database as not indexed.
 */
function search_by_page_search_reset() {
  db_update('sbp_path')
    ->fields(array(
    'last_index_time' => 0,
  ))
    ->execute();
}

/**
 * Implements hook_search_status().
 */
function search_by_page_search_status() {

  // Tell Search module how many items have been indexed, and how many not
  $total = db_query('SELECT COUNT(*) FROM {sbp_path}')
    ->fetchField();
  $remain = db_query('SELECT COUNT(*) FROM {sbp_path} p WHERE p.last_index_time = 0')
    ->fetchField();
  return array(
    'remaining' => $remain,
    'total' => $total,
  );
}

/**
 * Implements hook_search_execute().
 */
function search_by_page_search_execute($keys = NULL, $conditions = NULL) {
  global $pager_page_array, $pager_total, $pager_total_items;
  global $language;
  $lang = $language->language;

  // Extract environment and clear from keys.
  $environment = search_expression_extract($keys, 'environment');
  $keys = search_expression_insert($keys, 'environment');
  if (!$environment) {
    $environment = variable_get('search_by_page_default_environment', 1);
  }

  // Set up query for Search module.
  $query = db_select('search_index', 'i', array(
    'target' => 'slave',
  ))
    ->extend('SearchQuery')
    ->extend('PagerDefault');
  $query
    ->searchExpression($keys, 'search_by_page')
    ->join('sbp_path', 'sp', 'i.sid = sp.pid');
  $query
    ->condition('sp.environment', $environment)
    ->condition('sp.language', $lang);

  // Set number of results per page.
  $num = intval(search_by_page_setting_get('results_per_page', $environment, 10));
  if ($num > 0) {
    $query
      ->limit($num);
  }

  // Get sub-modules' search modifications.
  $or = db_or();
  $or
    ->where('0=1');
  foreach (module_implements('sbp_query_modify') as $module) {
    $cond = module_invoke($module, 'sbp_query_modify', $environment, $query);
    $cond
      ->condition('sp.from_module', $module);
    $or
      ->condition($cond);
  }
  $query
    ->condition($or);

  // Perform the search.
  if (!$query
    ->executeFirstPass()) {
    return array();
  }
  $stuff = $query
    ->execute();

  // Create array of formatted results for Search
  $results = array();
  foreach ($stuff as $item) {
    $info = _search_by_page_lookup($item->sid);

    // Figure out the URL to this page.
    $parts = search_by_page_path_parts($info->page_path);
    $args = array(
      'absolute' => TRUE,
    );
    if (isset($parts[1])) {
      $args['query'] = array();
      parse_str($parts[1], $args['query']);
    }

    // Special case: if this is a file, we need to use the File API to get
    // the URL. Otherwise, use the normal Drupal URL function.
    if (file_uri_scheme($parts[0])) {

      // This is a file with scheme://filename.
      $link = file_create_url($parts[0]);
    }
    else {
      $link = url($parts[0], $args);
    }
    $res = array(
      'link' => $link,
    );

    // Merge URL with information provided by module from hook_sbp_details()
    $res2 = module_invoke($info->from_module, 'sbp_details', $info->modid, $environment, $keys);
    if (is_array($res2)) {
      $res = array_merge($res, $res2);
    }

    // Make sure we have a title
    if (!$res['title']) {
      $res['title'] = $link;
    }
    $results[] = $res;
  }
  return $results;
}

/**
 * Implements hook_update_index().
 *
 * Indexes the site's pages, or at least some of them (up to cron
 * limit on searches), each cron run.
 *
 * @see _search_by_page_rebuild_all_paths()
 */
function search_by_page_update_index() {
  global $user;
  global $language;
  $save_user = $user;
  $save_language = $language;
  $save_sessions = drupal_save_session();

  // Rebuild the list of paths to index, resetting ones whose time has come.
  _search_by_page_rebuild_all_paths(TRUE);
  $users = _search_by_page_indexing_users();

  // Figure out which pages to index this run - the ones that haven't been
  // indexed ever or re-indexed recently, up to limits of cron.
  $core_limit = (int) variable_get('search_cron_limit', 100);
  $limit = (int) variable_get('sbp_cron_limit', $core_limit);
  $result = db_query_range('SELECT * FROM {sbp_path} p WHERE (p.last_index_time = 0) OR (p.min_time > 0 AND :currtime > (p.last_index_time + p.min_time)) ORDER BY p.last_index_time', 0, $limit, array(
    ':currtime' => time(),
  ))
    ->fetchAll();

  // Index each page, but don't save sessions for the indexing users.
  drupal_save_session(FALSE);

  // Make sure output from rendering pages does not screw up the cron run.
  ob_start();
  $reasons = array(
    MENU_ACCESS_DENIED => t('access denied'),
    MENU_NOT_FOUND => t('not found'),
    MENU_SITE_OFFLINE => t('site offline'),
  );
  $allroles = user_roles();
  $languages = language_list();
  foreach ($result as $item) {

    // Set up language and user.
    $role = $item->role;
    $rolename = $allroles[$role];
    $user = $users[$rolename];

    // No matter what, we want to update the time so it's marked as indexed.
    // That way, if there is an error it will at least not hold up other pages
    // in the next cron run.
    _search_by_page_update_last_time($item->pid, time());
    if (!$user) {
      watchdog('search_by_page', 'Role %rid (%rname) could not be used to index PID (%pid), path (%path)', array(
        '%rid' => $role,
        '%rname' => $rolename,
        '%pid' => $item->pid,
        '%path' => $item->page_path,
      ), WATCHDOG_NOTICE);

      // Remove content from the search index.
      _search_by_page_remove_searchinfo($item->pid);
      continue;
    }
    $language = $languages[$item->language];

    // Get page title from module and optional content
    $info = module_invoke($item->from_module, 'sbp_details', $item->modid, $item->environment);
    if (!$info) {

      // Module indicated not to index this page after all
      watchdog('search_by_page', 'Content was skipped - PID (%pid), path (%path)', array(
        '%pid' => $item->pid,
        '%path' => $item->page_path,
      ), WATCHDOG_NOTICE);

      // Remove content from the search index.
      _search_by_page_remove_searchinfo($item->pid);
      continue;
    }

    // Check for module override of content.
    $content = '';
    if (!isset($info['content'])) {

      // No module override - render page to get the content.
      $content = search_by_page_page_content($item->page_path);
    }
    else {
      $content = $info['content'];
    }
    if (!isset($content)) {
      $content = 0;
    }

    // $content will be either a string if found/allowed, or an error code.
    // Note that an empty string is acceptable for content -- it could just
    // be a page with nothing on it except the title.
    if (!is_int($content)) {
      if ($info['title']) {
        $content = '<h1>' . $info['title'] . '</h1> ' . $content;
      }
      $content = search_by_page_strip_tags($content, $item->environment);

      // Add to search index (already marked as updated).
      search_index($item->pid, 'search_by_page', $content);

      // Add to SBP table.
      db_update('sbp_path')
        ->condition('pid', $item->pid)
        ->fields(array(
        'page_data' => $content,
      ))
        ->execute();
    }
    else {
      $con = $content;
      if (isset($reasons[$con])) {
        $con = $reasons[$con];
      }

      // If the site was off-line, abort the cron run
      if ($content == MENU_SITE_OFFLINE) {

        // In this one case, we want to reset this item so it gets indexed
        // next time for sure. But there is no point in continuing the cron
        // run if the site is off-line.
        _search_by_page_update_last_time($item->pid, 0);
        watchdog('search_by_page', 'Site is off-line, cannot index content', array(), WATCHDOG_NOTICE);
        break;
      }

      // Remove content from the search index.
      _search_by_page_remove_searchinfo($item->pid);
      watchdog('search_by_page', 'Content not rendered (%con) - PID (%pid), path (%path), language (%lang)', array(
        '%pid' => $item->pid,
        '%path' => $item->page_path,
        '%lang' => $item->language,
        '%con' => $con,
      ), WATCHDOG_ERROR);
    }
  }

  // Switch user back and rebuild permissions cache.
  $user = $save_user;
  drupal_static_reset('user_access');
  $language = $save_language;
  drupal_save_session($save_sessions);
  _search_by_page_setup_theme();
  ob_end_clean();
}

/**
 * Implements hook_menu().
 */
function search_by_page_menu() {
  $items = array();
  $items['admin/config/search/search_by_page'] = array(
    'title' => 'Search by Page settings',
    'description' => 'Configure settings for Search by Page',
    'page callback' => 'drupal_get_form',
    'page arguments' => array(
      'search_by_page_admin_overview',
    ),
    'access arguments' => array(
      'administer search by page',
    ),
    'type' => MENU_NORMAL_ITEM,
  );
  $items['admin/config/search/search_by_page/resetblank'] = array(
    'title' => 'Search by Page reset blank',
    'description' => 'Reset unindexed pages for Search by Page',
    'page callback' => 'search_by_page_reset_blank',
    'access arguments' => array(
      'administer search by page',
    ),
    'type' => MENU_CALLBACK,
  );
  $items['admin/config/search/search_by_page/edit/%'] = array(
    'title' => 'Search by Page environment settings',
    'description' => 'Configure settings for Search by Page environment',
    'page callback' => 'drupal_get_form',
    'page arguments' => array(
      'search_by_page_admin_settings',
      5,
    ),
    'access arguments' => array(
      'administer search by page',
    ),
    'type' => MENU_CALLBACK,
  );
  $items['admin/config/search/search_by_page/edit/%/general'] = array(
    'title' => 'General settings',
    'type' => MENU_DEFAULT_LOCAL_TASK,
    'weight' => -10,
  );
  $items['admin/config/search/search_by_page/add'] = array(
    'title' => 'Add environment',
    'type' => MENU_CALLBACK,
    'page callback' => 'drupal_get_form',
    'page arguments' => array(
      'search_by_page_admin_settings',
    ),
    'access arguments' => array(
      'administer search by page',
    ),
  );
  $items['admin/config/search/search_by_page/delete'] = array(
    'title' => 'Delete path',
    'type' => MENU_CALLBACK,
    'page callback' => 'drupal_get_form',
    'page arguments' => array(
      'search_by_page_delete_confirm',
    ),
    'access arguments' => array(
      'administer search by page',
    ),
  );
  $envs = search_by_page_list_environments();
  foreach ($envs as $envid) {
    $path = search_by_page_setting_get('page_path', $envid, 'search_pages');
    $title = search_by_page_setting_get('page_title', $envid, t('Search'));
    if (module_exists('i18n_string')) {
      $title = i18n_string_translate('search_by_page:search_form:page_title' . $envid, $title);
    }
    $items[$path] = array(
      'title' => $title,
      'page callback' => '_search_by_page_view',
      // Note that 'env' . $envid is used here to avoid argument substitution.
      'page arguments' => array(
        'env' . $envid,
      ),
      'access arguments' => array(
        'search page environment ' . search_by_page_setting_get('environment_name', $envid, t('new')),
      ),
      'type' => MENU_SUGGESTED_ITEM,
    );
  }
  return $items;
}

/**
 * Implements hook_permission().
 */
function search_by_page_permission() {
  $perms = array();
  $perms['administer search by page'] = array(
    'title' => t('Administer Search by Page'),
  );
  $envs = search_by_page_list_environments();
  foreach ($envs as $envid) {
    $name = search_by_page_setting_get('environment_name', $envid, t('new'));
    $hrname = $name;
    if (module_exists('i18n_string')) {
      $hrname = i18n_string_translate('search_by_page:environment:name' . $envid, $name);
    }
    $perms['search page environment ' . $name] = array(
      'title' => t('Search environment @name in Search by Page', array(
        '@name' => $hrname,
      )),
    );
  }
  return $perms;
}

/**
 * Implements hook_theme().
 */
function search_by_page_theme() {
  return array(
    'search_by_page_form' => array(
      'render element' => 'form',
      'template' => 'search-by-page-form',
    ),
  );
}

/**
 * Processes variables for search-by-page-form.tpl.php.
 *
 * The $variables array contains the following arguments:
 * - $form
 *
 * @see search-by-page-form.tpl.php
 */
function template_preprocess_search_by_page_form(&$variables) {
  $variables['search'] = array();
  $hidden = array();
  $variables['environment'] = $variables['form']['environment']['#value'];
  $variables['is_block'] = $variables['form']['is_block']['#value'];

  // Provide variables named after form keys so themers can print each
  // element independently.
  foreach (element_children($variables['form']) as $key) {
    if (isset($variables['form'][$key]['#type'])) {
      $type = $variables['form'][$key]['#type'];
      if ($type == 'hidden' || $type == 'token') {
        $hidden[] = drupal_render($variables['form'][$key]);
      }
      else {

        // See if this renders to something (e.g. not a 'value' type).
        $markup = drupal_render($variables['form'][$key]);
        if ($markup) {
          $variables['search'][$key] = $markup;
        }
      }
    }
  }

  // Hidden form elements have no value to themers. No need for separation.
  $variables['search']['hidden'] = implode($hidden);

  // Collect all form elements to make it easier to print the whole form.
  $variables['search_form'] = implode($variables['search']);
}

/**
 * Implements hook_block_info().
 */
function search_by_page_block_info() {
  $ret = array();

  // Create one block per environment
  $envs = search_by_page_list_environments();
  foreach ($envs as $envid) {
    $envname = search_by_page_setting_get('environment_name', $envid, t('new'));
    if (module_exists('i18n_string')) {
      $envname = i18n_string_translate('search_by_page:environment:name' . $envid, $envname);
    }
    $ret[$envid] = array(
      'info' => t('Search by page @environment', array(
        '@environment' => $envname,
      )),
      'cache' => DRUPAL_NO_CACHE,
    );
  }
  return $ret;
}

/**
 * Implements hook_block_view().
 */
function search_by_page_block_view($delta = 0) {
  $blocktitle = search_by_page_setting_get('block_title', $delta, t('Search'));
  if (module_exists('i18n_string')) {
    $blocktitle = i18n_string_translate('search_by_page:search_form:block_title' . $delta, $blocktitle);
  }
  return array(
    'subject' => $blocktitle,
    'content' => drupal_get_form('search_by_page_form', $delta, NULL, TRUE),
  );
}

/**
 * Implements hook_i18n_string_info().
 */
function search_by_page_i18n_string_info() {
  $groups = array();
  $groups['search_by_page'] = array(
    'title' => t('Search by Page'),
    'description' => t('Environment setting text for Search by Page, such as labels and block titles'),
    'format' => FALSE,
    'list' => FALSE,
    'refresh callback' => '_search_by_page_refresh_translations',
  );
  return $groups;
}

/**
 * Returns a ready-to-render search form and/or results page.
 *
 * @param $envstring
 *   The environment ID, with 'env' prepended, to avoid argument substitution
 *   in hook_menu().
 */
function _search_by_page_view($envstring) {
  $environment = intval(drupal_substr($envstring, 3));

  // This pretty much follows search_view()
  $path = explode('/', $_GET['q'], 2);
  $keys = '';
  if (count($path) > 1) {
    $keys = trim($path[1]);
  }
  $build = array();

  // Display a search form
  $build['search_form'] = drupal_get_form('search_by_page_form', $environment, $keys, FALSE);

  // Get search results and display them.
  $path = search_by_page_setting_get('page_path', $environment, 'search_pages');
  if ($keys) {
    watchdog('search_by_page', '%keys', array(
      '%keys' => $keys,
    ), WATCHDOG_NOTICE, l(t('results'), $path . '/' . $keys));
    $keys = search_expression_insert($keys, 'environment', $environment);
    $results = search_data($keys, 'search_by_page');
    $build['search_results'] = $results;
  }
  return $build;
}

/**
 * Returns a Forms API array for the search by pages form.
 *
 * @ingroup forms
 * @see search_by_page_form_submit()
 */
function search_by_page_form($form, &$form_state, $environment, $keys = '', $is_block = TRUE) {
  $form['#action'] = url(search_by_page_setting_get('page_path', $environment, 'search_pages'));
  $form['#attributes'] = array(
    'class' => 'search-by-page-form',
  );
  $form['#method'] = 'post';
  $fieldlabel = search_by_page_setting_get('field_label', $environment, t('Search for'));
  $buttonlabel = search_by_page_setting_get('button_label', $environment, t('Search'));
  if (module_exists('i18n_string')) {
    $fieldlabel = i18n_string_translate('search_by_page:search_form:field_label' . $environment, $fieldlabel);
    $buttonlabel = i18n_string_translate('search_by_page:search_form:button_label' . $environment, $buttonlabel);
  }
  $form['keys'] = array(
    '#type' => 'textfield',
    '#title' => $fieldlabel,
    '#default_value' => $keys,
    '#size' => 30,
    '#maxlength' => 255,
  );
  $form['environment'] = array(
    '#type' => 'value',
    '#value' => $environment,
  );
  $form['is_block'] = array(
    '#type' => 'value',
    '#value' => $is_block,
  );
  $form['submit'] = array(
    '#type' => 'submit',
    '#value' => $buttonlabel,
  );
  return $form;
}

/**
 * Submit function for search_by_page_form().
 */
function search_by_page_form_submit($form, &$form_state) {
  $keys = $form_state['values']['keys'];
  if ($keys == '') {
    form_set_error('keys', t('Please enter some keywords.'));
  }
  $envid = $form_state['values']['environment'];
  $path = search_by_page_setting_get('page_path', $envid, 'search_pages');
  $form_state['redirect'] = $path . '/' . $keys;
  return;
}

/**
 * Returns the admin overview page for module configuration.
 *
 * This page lets you choose and configure search environments.
 *
 * @ingroup forms
 */
function search_by_page_admin_overview($form, &$form_state) {
  $form['general_sbp'] = array(
    '#type' => 'fieldset',
    '#collapsible' => TRUE,
    '#collapsed' => TRUE,
    '#weight' => -20,
    '#title' => t('Additional actions'),
  );
  $form['general_sbp']['info'] = array(
    '#type' => 'markup',
    '#weight' => 20,
    '#markup' => '<p>' . l(t('Configure general search settings and see indexing status'), 'admin/config/search/settings') . '</p>',
  );
  $form['general_sbp']['cron'] = array(
    '#type' => 'markup',
    '#weight' => 21,
    '#markup' => '<p>' . l(t('Visit the Status Report page to check cron status and run cron'), 'admin/reports/status') . '</p>',
  );
  $form['general_sbp']['reset_blank'] = array(
    '#type' => 'markup',
    '#weight' => 22,
    '#markup' => '<p>' . l(t('Click to reset pages that are blank in the search index, so they will reindex at next cron run.'), 'admin/config/search/search_by_page/resetblank') . '</p>',
  );

  // Make a table of existing enviornments
  $output = '<h3>' . t('Search environments') . '</h3>';
  $output .= '<p>' . l(t('Add new search environment'), 'admin/config/search/search_by_page/add') . '</p>';
  $headers = array(
    t('Environment'),
    t('URL path'),
    array(
      'data' => t('Operations'),
      'colspan' => 2,
    ),
  );
  $rows = array();
  $options = array();
  $envs = search_by_page_list_environments();
  foreach ($envs as $envid) {
    $path = search_by_page_setting_get('page_path', $envid, 'search_pages');
    $envname = search_by_page_setting_get('environment_name', $envid, t('new'));
    if (module_exists('i18n_string')) {
      $envname = i18n_string_translate('search_by_page:environment:name' . $envid, $envname);
    }
    $options[$envid] = $envname;
    $rows[] = array(
      $envname,
      l($path, $path),
      l('edit', 'admin/config/search/search_by_page/edit/' . $envid),
      l('delete', 'admin/config/search/search_by_page/delete/' . $envid),
    );
  }
  if (!count($rows)) {
    $rows[] = array(
      t('No environments defined'),
      '',
      '',
      '',
    );
  }
  $output .= theme('table', array(
    'header' => $headers,
    'rows' => $rows,
  ));
  $form['environment_table'] = array(
    '#type' => 'markup',
    '#weight' => -5,
    '#markup' => $output,
  );

  // Form section to choose default environment, name for it in Search, and
  // cron limit.
  $items = drupal_map_assoc(array(
    1,
    10,
    20,
    50,
    100,
    200,
    500,
  ));
  $core_limit = (int) variable_get('search_cron_limit', 100);
  $limit = (int) variable_get('sbp_cron_limit', $core_limit);
  $form['sbp_cron_limit'] = array(
    '#type' => 'select',
    '#weight' => -3,
    '#default_value' => $limit,
    '#options' => $items,
    '#title' => t('Number of items to index per cron run'),
    '#description' => t('The maximum number of items indexed in each pass of a <a href="@cron">cron maintenance task</a> by Search by Page.', array(
      '@cron' => url('admin/reports/status'),
    )),
  );
  $form['search_by_page_default_environment'] = array(
    '#type' => 'select',
    '#weight' => -2,
    '#default_value' => variable_get('search_by_page_default_environment', 1),
    '#options' => $options,
    '#title' => 'Default environment',
    '#description' => t('The default environment is used for the Search by Page tab when using the core Search page.'),
  );
  $form['search_by_page_tabname'] = array(
    '#type' => 'textfield',
    '#weight' => -1,
    '#default_value' => variable_get('search_by_page_tabname', t('Pages')),
    '#title' => 'Search tab name',
    '#description' => t('If using Search by Page with the core Search module, the name of the tab where Search by Page results are shown.'),
  );
  $form = system_settings_form($form);
  return $form;
}

/**
 * Returns the admin settings page for a single search environment.
 *
 * Besides some general settings for the modules as a whole, also
 * includes all sub-modules' hook_sbp_settings() return values.
 *
 * @param $environment
 *   ID of environment for this settings page, or zero to add a new one.
 *
 * @ingroup forms
 */
function search_by_page_admin_settings($form, &$form_state, $environment = 0) {

  // We need a non-zero environment number (0 indicates adding a new one)
  $environment = intval($environment);
  if (!$environment) {
    $envs = search_by_page_list_environments();
    $max = 0;
    foreach ($envs as $env) {
      if ($env > $max) {
        $max = $env;
      }
    }
    $environment = $max + 1;
  }
  $form['environment'] = array(
    '#type' => 'value',
    '#value' => $environment,
  );
  $form['general_env'] = array(
    '#type' => 'fieldset',
    '#collapsible' => TRUE,
    '#weight' => -99,
    '#title' => t('General settings for this environment'),
  );
  $form['general_env']['environment_name'] = array(
    '#type' => 'textfield',
    '#weight' => 0,
    '#title' => t('Environment name (shown to administrators only)'),
    '#default_value' => search_by_page_setting_get('environment_name', $environment, t('new')),
  );
  $form['general_env']['set_as_default'] = array(
    '#type' => 'checkbox',
    '#weight' => 1,
    '#title' => t('Set this environment as default'),
    '#description' => t('The default environment is used for the Search by Page tab when using the core Search page.'),
  );
  if (variable_get('search_by_page_default_environment', 0) == $environment) {
    $form['general_env']['set_as_default']['#default_value'] = 1;
  }
  $form['general_env']['page_title'] = array(
    '#type' => 'textfield',
    '#weight' => 3,
    '#title' => t('Title for search page'),
    '#default_value' => search_by_page_setting_get('page_title', $environment, t('Search')),
  );
  $form['general_env']['block_title'] = array(
    '#type' => 'textfield',
    '#weight' => 4,
    '#title' => t('Title for search block'),
    '#default_value' => search_by_page_setting_get('block_title', $environment, t('Search')),
  );
  $form['general_env']['field_label'] = array(
    '#type' => 'textfield',
    '#weight' => 5,
    '#title' => t('Label for keywords field in search form'),
    '#default_value' => search_by_page_setting_get('field_label', $environment, t('Search for')),
  );
  $form['general_env']['button_label'] = array(
    '#type' => 'textfield',
    '#weight' => 6,
    '#title' => t('Text on button in search form'),
    '#default_value' => search_by_page_setting_get('button_label', $environment, t('Search')),
  );
  if (module_exists('locale')) {
    $form['general_env']['labelhelp'] = array(
      '#type' => 'markup',
      '#weight' => 8,
      '#markup' => '<p>' . t("Enter the text settings above in your site's default language. If you have a multi-lingual site with the Internationalization project's String Translation module installed and enabled, you can use Drupal's translation interface to translate them.") . '</p>',
    );
  }
  $form['general_env']['page_path'] = array(
    '#type' => 'textfield',
    '#weight' => 9,
    '#title' => t('URL path for search page'),
    '#description' => t('Path cannot include a /'),
    '#field_prefix' => search_by_page_path_field_prefix(),
    '#default_value' => search_by_page_setting_get('page_path', $environment, 'search_pages'),
  );
  $form['general_env']['results_per_page'] = array(
    '#type' => 'textfield',
    '#weight' => 10,
    '#title' => t('Number of search results to show per page'),
    '#default_value' => search_by_page_setting_get('results_per_page', $environment, 10),
  );
  $form['general_env']['permhelp'] = array(
    '#type' => 'markup',
    '#weight' => 12,
    '#markup' => '<p>' . t('After creating a new environment or changing the environment name, you will need to set permissions to define who can search using this environment.') . '</p>',
  );
  $form['general_env']['exclude_tags'] = array(
    '#type' => 'textfield',
    '#weight' => 15,
    '#title' => t('HTML tags to exclude'),
    '#description' => t('Enter the HTML tags whose <em>contents</em> should be completely removed from search indexing and search results. Separate by spaces, and just enter the tag names. Only supports letters, numbers, and underscores in tag names. Excluding h1 will exclude the page title. Example entry: script object'),
    '#default_value' => search_by_page_setting_get('exclude_tags', $environment, ''),
  );
  $form['#submit'] = array();
  $form = array_merge($form, module_invoke_all('sbp_settings', $environment));
  $form['buttons']['submit'] = array(
    '#type' => 'submit',
    '#value' => t('Save configuration'),
  );
  $form['#submit'][] = 'search_by_page_admin_settings_submit';
  $form['#theme'] = 'system_settings_form';

  // Cause the update function to get called when form submits
  $form['#submit'][] = '_search_by_page_rebuild_all_paths';

  // Cause a translation refresh when form submits
  $form['#submit'][] = '_search_by_page_refresh_translations';
  return $form;
}

/**
 * Submit handler for search_by_page_admin_settings().
 */
function search_by_page_admin_settings_submit($form, &$form_state) {

  // This basically follows system_settings_form_submit().
  $form_state['redirect'] = 'admin/config/search/search_by_page';
  $environment = intval($form_state['values']['environment']);
  $skip = array(
    'submit',
    'form_id',
    'form_token',
    'form_build_id',
    'environment',
    'set_as_default',
  );

  // Special case: results per page, make sure it's an integer. Could do
  // this in validation, but...
  $num = intval($form_state['values']['results_per_page']);
  if ($num < 1) {
    $num = 10;
  }
  $form_state['values']['results_per_page'] = $num;
  foreach ($form_state['values'] as $key => $value) {
    if (!in_array($key, $skip)) {
      search_by_page_setting_set($key, $environment, $value);
    }
  }
  if ($form_state['values']['set_as_default']) {
    variable_set('search_by_page_default_environment', $environment);
  }

  // Make sure menu gets rebuilt, so our new/changed path works.
  // Also need to clear block cache.
  variable_set('menu_rebuild_needed', TRUE);
  cache_clear_all();
  drupal_set_message(t('The configuration options have been saved.'));
}

/**
 * Returns a form confirming deletion of an environment.
 *
 * @ingroup forms
 * @see search_by_page_delete_confirm_submit()
 */
function search_by_page_delete_confirm($form, $form_state, $envid) {
  $envname = search_by_page_setting_get('environment_name', $envid, t('new'));
  if (module_exists('i18n_string')) {
    $envname = i18n_string_translate('search_by_page:environment:name' . $envid, $envname);
  }
  $form = array(
    'envid' => array(
      '#type' => 'value',
      '#value' => $envid,
    ),
  );
  $output = confirm_form($form, t('Are you sure you want to delete environment %env?', array(
    '%env' => $envname,
  )), 'admin/config/search/search_by_page');
  return $output;
}

/**
 * Submit callback for search_by_page_delete_confirm().
 *
 * Actually deletes the environment.
 */
function search_by_page_delete_confirm_submit($form, &$form_state) {
  $form_state['redirect'] = 'admin/config/search/search_by_page';
  if (!$form_state['values']['confirm']) {
    return;
  }
  $envid = $form_state['values']['envid'];

  // Let modules respond to environment deletion
  module_invoke_all('sbp_delete_environment', $envid);
}

/**
 * Implements hook_sbp_delete_environment().
 *
 * Removes the environment from settings, Search by Page database, and
 * search index.
 */
function search_by_page_sbp_delete_environment($environment) {
  $envid = intval($environment);

  // Remove this environment from settings
  $stuff = variable_get('search_by_page_settings', array());
  unset($stuff[$envid]);
  variable_set('search_by_page_settings', $stuff);

  // Remove environment from our database and search index
  $sel = db_select('sbp_path');
  $sel
    ->addField('sbp_path', 'pid');
  $sel
    ->condition('environment', $envid);
  db_delete('search_dataset')
    ->condition('type', 'search_by_page')
    ->condition('sid', $sel, 'IN')
    ->execute();
  db_delete('search_index')
    ->condition('type', 'search_by_page')
    ->condition('sid', $sel, 'IN')
    ->execute();
  db_delete('search_node_links')
    ->condition('type', 'search_by_page')
    ->condition('sid', $sel, 'IN')
    ->execute();
  db_delete('sbp_path')
    ->condition('environment', $envid)
    ->execute();
}

/**
 * Resets pages that have no words in search index, so they'll be reindexed.
 *
 * Page callback for the 'admin/config/search/search_by_page/resetblank' path.
 */
function search_by_page_reset_blank() {
  $sel = db_select('search_index', 'si')
    ->fields('si')
    ->condition('type', 'search_by_page');
  $query = db_select('sbp_path', 'sp');
  $query
    ->addField('sp', 'pid');
  $query
    ->leftJoin($sel, 'sisp', 'sisp.sid = sp.pid');
  $query
    ->isNull('sisp.word');
  $result = $query
    ->execute()
    ->fetchAll();
  $count = 0;
  foreach ($result as $item) {
    _search_by_page_update_last_time($item->pid);
    $count++;
  }
  drupal_set_message(t('Blank pages have been reset to index at next cron run (%num)', array(
    '%num' => $count,
  )));
  drupal_goto('admin/config/search/search_by_page');
}

/**
 * Internal function: rebuilds the paths table for all modules.
 *
 * Calls all modules' hook_sbp_paths() implementations; each should
 * return an array of Drupal paths to be indexed.
 *
 * @param $reset_items
 *   TRUE if the last_index_time for items whose max_time has been exceeded
 *   should be reset. FALSE to leave it alone.
 *
 * @see search_by_page_rebuild_paths()
 */
function _search_by_page_rebuild_all_paths($reset_items = FALSE) {
  $envs = search_by_page_list_environments();
  foreach (module_implements('sbp_paths') as $module) {
    foreach ($envs as $envid) {
      search_by_page_rebuild_paths($module, $envid, $reset_items);
    }
  }
}

/**
 * Internal function: Returns a list of internal users to use for indexing.
 *
 * Reads the paths table, and finds all roles modules said to use for indexing.
 * Maintains a set of blocked users for each of these roles, and adds/removes
 * users from this set as they appear/disappear from the list of needed users.
 *
 * @param int $role
 *    (optional) If this is passed in, instead of reading the paths table, only
 *    this role is assumed to be needed. Pass in the role ID.
 *
 * @return
 *   Array indexed by role name. Each element is a loaded user object having
 *   that role, with the status bit temporarily (in-memory) set to 1. Includes
 *   an entry for the anonymous role.
 */
function _search_by_page_indexing_users($role = NULL) {

  // Figure out which roles are currently in our paths table, or passed in.
  $allroles = user_roles();
  if ($role) {
    $roles_needed = array(
      $role,
    );
  }
  else {
    $roles_needed = db_query('SELECT role FROM {sbp_path} GROUP BY role')
      ->fetchCol();
  }

  // Figure out which users we already have.
  $accounts_have = db_query('SELECT rid, uid FROM {sbp_index_users}')
    ->fetchAllKeyed();

  // Create or load needed users.
  $accounts = array();
  foreach ($roles_needed as $rid) {
    if (!isset($allroles[$rid])) {
      watchdog('search_by_page', 'Role %rid requested for search indexing, does not seem to exist', array(
        '%rid' => $rid,
      ), WATCHDOG_ERROR);
      continue;
    }
    $rolename = $allroles[$rid];
    $account = FALSE;

    // Attempt to load the user from the UID we stored a previous time.
    if (isset($accounts_have[$rid])) {
      $account = user_load($accounts_have[$rid]);
    }
    if (!$account) {

      // That didn't work. Delete previous entry in index_users table,
      // attempt to create a new user, and save this user ID.
      db_delete('sbp_index_users')
        ->condition('rid', $rid)
        ->execute();
      if ($rid == DRUPAL_ANONYMOUS_RID) {
        $account = user_load(0);
      }
      else {

        // Create a blocked user with random password and email, and a random
        // suffix on the user name to prevent blocking problems. See issue
        // http://drupal.org/node/716342.
        $new = array(
          'pass' => user_password(),
          'name' => 'sbp indexing ' . $rolename . ' ' . user_password(),
          'mail' => user_password() . "@" . user_password() . ".com",
          'roles' => array(
            $rid => $rolename,
          ),
          'status' => 0,
        );
        $tmp = user_save((object) array(), $new);
        $account = user_load($tmp->uid, TRUE);
        if ($account && $account->uid) {
          watchdog('search_by_page', 'Created indexing user %uid (%uname) for role %rid (%rname)', array(
            '%uid' => $account->uid,
            '%uname' => $account->name,
            '%rid' => $rid,
            '%rname' => $rolename,
          ), WATCHDOG_NOTICE);
        }
        else {
          $account = FALSE;
        }
      }
      if ($account) {
        db_insert('sbp_index_users')
          ->fields(array(
          'rid' => $rid,
          'uid' => $account->uid,
        ))
          ->execute();
      }
    }

    // Add to return value, setting status bit temporarily to 1 (in-memory).
    if ($account) {
      $account->status = 1;
      $accounts[$rolename] = $account;
    }
    else {
      watchdog('search_by_page', 'Unable to set up an indexing user for role %rid (%rname)', array(
        '%rid' => $rid,
        '%rname' => $rolename,
      ), WATCHDOG_ERROR);
    }
  }
  return $accounts;
}

/**
 * Internal function: Finds and returns a single path record.
 *
 * @param $pid
 *    pid field value to search for (other args ignored if non-zero)
 * @param $path
 *    path field value to search for (must also supply $module, $environment)
 * @param $mid
 *    mid field value to search for (must also supply $module, $environment)
 * @param $module
 *    module field value to search for.
 * @param $environment
 *    environment field value to search for.
 *
 * @return
 *    Object with fields from the sbp_path table, if query succeeds.
 */
function _search_by_page_lookup($pid = 0, $path = '', $mid = 0, $module = '', $environment = 0) {
  $qry = db_select('sbp_path', 'p')
    ->fields('p');
  if ($pid) {
    $qry
      ->condition('pid', $pid);
  }
  elseif ($path) {
    $qry
      ->condition('page_path', $path)
      ->condition('environment', $environment)
      ->condition('from_module', $module);
  }
  else {
    $qry
      ->condition('modid', $mid)
      ->condition('environment', $environment)
      ->condition('from_module', $module);
  }
  return $qry
    ->execute()
    ->fetchObject();
}

/**
 * Internal function: updates the last index time.
 *
 * @param $pid
 *   pid to update
 * @param $time
 *   time to update to
 */
function _search_by_page_update_last_time($pid, $time = 0) {
  db_update('sbp_path')
    ->fields(array(
    'last_index_time' => $time,
  ))
    ->condition('pid', $pid)
    ->execute();
}

/**
 * Internal function: removes a path item from the paths and search database.
 *
 * @param $pid
 *   ID of the path to remove.
 */
function _search_by_page_remove_path($pid) {
  db_delete('sbp_path')
    ->condition('pid', $pid)
    ->execute();
  _search_by_page_remove_searchinfo($pid);
}

/**
 * Internal function: removes a path item from the search database.
 *
 * @param $pid
 *   ID of the path to remove.
 */
function _search_by_page_remove_searchinfo($pid) {
  db_delete('search_dataset')
    ->condition('type', 'search_by_page')
    ->condition('sid', $pid)
    ->execute();
  db_delete('search_index')
    ->condition('type', 'search_by_page')
    ->condition('sid', $pid)
    ->execute();
  db_delete('search_node_links')
    ->condition('type', 'search_by_page')
    ->condition('sid', $pid)
    ->execute();
}

/**
 * Internal function: causes the base form of translations to refresh.
 */
function _search_by_page_refresh_translations() {
  if (!module_exists('i18n_string')) {
    return;
  }
  $name = variable_get('search_by_page_tabname', t('Pages'));
  $name = i18n_string_update('search_by_page:search_page:tab_name', $name);
  $envs = search_by_page_list_environments();
  foreach ($envs as $environment) {
    $fieldlabel = search_by_page_setting_get('field_label', $environment, t('Search for'));
    $buttonlabel = search_by_page_setting_get('button_label', $environment, t('Search'));
    $blocktitle = search_by_page_setting_get('block_title', $environment, t('Search'));
    $pagetitle = search_by_page_setting_get('page_title', $environment, t('Search'));
    $envname = search_by_page_setting_get('environment_name', $environment, t('new'));
    i18n_string_update('search_by_page:search_form:field_label' . $environment, $fieldlabel);
    i18n_string_update('search_by_page:search_form:button_label' . $environment, $buttonlabel);
    i18n_string_update('search_by_page:search_form:block_title' . $environment, $blocktitle);
    i18n_string_update('search_by_page:search_form:page_title' . $environment, $pagetitle);
    i18n_string_update('search_by_page:environment:name' . $environment, $envname);
  }
}

/**
 * Returns a list of the currently-defined environment IDs.
 *
 * @return
 *   Array of environment IDs.
 */
function search_by_page_list_environments() {
  $stuff = variable_get('search_by_page_settings', array());
  return array_keys($stuff);
}

/**
 * Generates a detailed status listing for the Search status page.
 *
 * @return
 *   Renderable array of search status.
 */
function _search_by_page_status_details() {
  _search_by_page_rebuild_all_paths();

  // Query to find number of un-indexed items by environment and module.
  $dbr = db_query('SELECT COUNT(*) as nonindexed, p.environment, p.from_module FROM {sbp_path} p WHERE p.last_index_time = 0 GROUP BY p.environment, p.from_module')
    ->fetchAll();

  // Make this into a table.
  $stuff = array();
  $module_list = array();
  foreach ($dbr as $item) {
    if ($item->nonindexed) {
      $stuff[$item->environment][$item->from_module] = $item->nonindexed;
      $module_list[$item->from_module] = 1;
    }
  }
  if (!count($stuff)) {
    return array(
      '#type' => 'markup',
      '#markup' => '<p>' . t('Search by Page is fully indexed.') . '</p>',
    );
  }
  $module_list = array_keys($module_list);
  sort($module_list);
  array_unshift($module_list, t('Core'));
  $rows = array();
  foreach ($stuff as $envid => $lst) {
    $row = array();
    $row[] = search_by_page_setting_get('environment_name', $envid, t('new'));
    foreach ($module_list as $module) {
      $row[] = isset($lst[$module]) ? $lst[$module] : '';
    }
    $rows[] = $row;
  }
  $core = node_search_status();
  if (!empty($core['remaining'])) {
    $corerow = array(
      t('Core content search'),
      $core['remaining'],
    );
    for ($i = count($corerow); $i < count($row); $i++) {
      $corerow[] = '';
    }
    $rows[] = $corerow;
  }

  // Re-purpose module list as table header and make this into a table.
  array_unshift($module_list, t('Environment'));
  return array(
    '#type' => 'markup',
    '#markup' => theme('table', array(
      'header' => $module_list,
      'rows' => $rows,
      'caption' => t('Count of un-indexed items by environment and module'),
    )),
  );
}

Related topics

Functions

Namesort descending Description
search_by_page_admin_overview Returns the admin overview page for module configuration.
search_by_page_admin_settings Returns the admin settings page for a single search environment.
search_by_page_admin_settings_submit Submit handler for search_by_page_admin_settings().
search_by_page_block_info Implements hook_block_info().
search_by_page_block_view Implements hook_block_view().
search_by_page_delete_confirm Returns a form confirming deletion of an environment.
search_by_page_delete_confirm_submit Submit callback for search_by_page_delete_confirm().
search_by_page_excerpt Returns a search excerpt, with matched keywords highlighted.
search_by_page_force_reindex Forces a page to be reindexed at the next cron run.
search_by_page_force_remove Removes a page from Search by Page.
search_by_page_form Returns a Forms API array for the search by pages form.
search_by_page_form_search_admin_settings_alter Implements hook_form_FORM_ID_alter().
search_by_page_form_submit Submit function for search_by_page_form().
search_by_page_i18n_string_info Implements hook_i18n_string_info().
search_by_page_list_environments Returns a list of the currently-defined environment IDs.
search_by_page_menu Implements hook_menu().
search_by_page_page_content Returns the content portion of the rendered page at the given path.
search_by_page_path_field_prefix Returns a suitable field prefix for a path.
search_by_page_path_parts Splits a path into the main path and any query parts.
search_by_page_permission Implements hook_permission().
search_by_page_rebuild_paths Rebuilds the paths table for a particular module.
search_by_page_reset_blank Resets pages that have no words in search index, so they'll be reindexed.
search_by_page_sbp_delete_environment Implements hook_sbp_delete_environment().
search_by_page_search_admin Implements hook_search_admin().
search_by_page_search_execute Implements hook_search_execute().
search_by_page_search_info Implements hook_search_info().
search_by_page_search_reset Implements hook_search_reset().
search_by_page_search_status Implements hook_search_status().
search_by_page_setting_get Returns a setting for Search by Page or a sub-module.
search_by_page_setting_set Sets a setting for Search by Page or a sub-module.
search_by_page_stored_page_content Returns the stored content from the last indexing of a page.
search_by_page_strip_tags Strips out contents of HTML tags that are excluded in the given environment.
search_by_page_theme Implements hook_theme().
search_by_page_update_index Implements hook_update_index().
template_preprocess_search_by_page_form Processes variables for search-by-page-form.tpl.php.
_search_by_page_build_content_blocks Builds just the content region blocks.
_search_by_page_indexing_users Internal function: Returns a list of internal users to use for indexing.
_search_by_page_lookup Internal function: Finds and returns a single path record.
_search_by_page_rebuild_all_paths Internal function: rebuilds the paths table for all modules.
_search_by_page_refresh_translations Internal function: causes the base form of translations to refresh.
_search_by_page_remove_path Internal function: removes a path item from the paths and search database.
_search_by_page_remove_searchinfo Internal function: removes a path item from the search database.
_search_by_page_setup_theme Sets up the right theme to use for theming this page.
_search_by_page_status_details Generates a detailed status listing for the Search status page.
_search_by_page_update_last_time Internal function: updates the last index time.
_search_by_page_view Returns a ready-to-render search form and/or results page.