You are here

search_by_page.module in Search by Page 6

Same filename and directory in other branches
  1. 8 search_by_page.module
  2. 7 search_by_page.module

Main module file for Drupal module Search by Page.

Adds page-oriented searching to the core Drupal search module.

Copyright 2009 Jennifer Hodgdon, Poplar ProductivityWare LLC.

Licensed under the GNU Public License.

File

search_by_page.module
View source
<?php

/**
 * @file
 * Main module file for Drupal module Search by Page.
 *
 * Adds page-oriented searching to the core Drupal search module.
 *
 * Copyright 2009 Jennifer Hodgdon, Poplar ProductivityWare LLC.
 *
 * Licensed under the GNU Public License.
 * @ingroup search_by_page
 */

/**
 * @defgroup search_by_page Search by Page Module.
 *
 * Adds page-oriented searching to core Drupal search module.
 */

/**
 * Forces a page to be reindexed at the next cron run.
 *
 * @param $module
 *   Module that submitted this path for indexing.
 * @param $id
 *   ID given in hook_sbp_paths() for this path.
 * @param $environment
 *   Environment ID where this path should be reindexed. If omitted, all
 *   environments are checked.
 */
function search_by_page_force_reindex($module, $id, $environment = NULL) {
  $envs = array();
  if (isset($environment)) {
    $envs = array(
      $environment,
    );
  }
  else {
    $envs = search_by_page_list_environments();
  }
  foreach ($envs as $env) {
    $item = _search_by_page_lookup(0, '', $id, $module, $env);
    if ($item) {
      _search_by_page_update_last_time($item->pid, 0);
    }
  }
}

/**
 * Removes a page from Search by Page.
 *
 * The page will immediately be unavailable in search results, and
 * will not be reindexed (unless hook_sbp_paths() submits it for
 * indexing again).
 *
 * @param $module
 *   Module that submitted this path for indexing.
 * @param $id
 *   ID given in hook_sbp_paths() for this path.
 * @param $environment
 *   Environment ID where this path should be removed. If omitted, all
 *   environments are checked.
 */
function search_by_page_force_remove($module, $id, $environment = NULL) {
  $envs = array();
  if (isset($environment)) {
    $envs = array(
      $environment,
    );
  }
  else {
    $envs = search_by_page_list_environments();
  }
  foreach ($envs as $env) {
    $item = _search_by_page_lookup(0, '', $id, $module, $env);
    if ($item) {
      _search_by_page_remove_path($item->pid);
    }
  }
}

/**
 * Rebuilds the paths table for a particular module.
 *
 * Calls that module's hook_sbp_paths() implementation, which should
 * return an array of Drupal paths to be indexed. Removes any obsolete
 * paths, adds new ones, and updates existing ones' information, so
 * that the correct paths will be indexed.
 *
 * @param $module
 *   Module to rebuild.
 * @param $environment
 *   Environment ID to rebuild.
 * @param $reset_items
 *   TRUE if the last_index_time for items whose max_time has been exceeded
 *   should be reset. FALSE to leave it alone.
 */
function search_by_page_rebuild_paths($module, $environment, $reset_items) {

  // Find out what paths this module wants now.
  $function = $module . '_sbp_paths';
  $paths = call_user_func($function, $environment);
  if (!is_array($paths)) {
    $paths = array();
  }

  // Make this into an array that matches db structure, with a
  // unique ID as key.
  $pathsnow = array();
  $defaults = array(
    'min_time' => 1,
    'max_time' => 0,
  );
  foreach ($paths as $path => $item) {
    $item += $defaults;
    foreach ($item['languages'] as $lang) {
      $pathsnow[$lang . "_._" . $path] = array(
        'page_path' => $path,
        'language' => $lang,
        'modid' => $item['id'],
        'role' => $item['role'],
        'min_time' => $item['min_time'],
        'max_time' => $item['max_time'],
      );
    }
  }
  $newpaths = array_keys($pathsnow);

  // Now see what's in the database already.
  // Figure out what they wanted last time we did this
  $dbr = db_query("SELECT p.page_path, p.language, p.pid FROM {sbp_path} p WHERE p.from_module='%s' AND p.environment = %d", $module, $environment);
  $pathsindb = array();
  while ($item = db_fetch_object($dbr)) {
    $pathsindb[$item->language . "_._" . $item->page_path] = $item->pid;
  }
  $oldpaths = array_keys($pathsindb);

  // Resolve differences by deleting items they no longer want,
  // adding items they want now but didn't before, and modifying any
  // items whose information might potentially have changed. Do it this way
  // to preserve the information we've recorded on old items.
  $new = array_diff($newpaths, $oldpaths);
  $del = array_diff($oldpaths, $newpaths);
  $chg = array_diff($newpaths, $new);
  if (count($del) > 0) {
    foreach ($del as $key) {
      _search_by_page_remove_path($pathsindb[$key]);
    }
  }
  if (count($new) > 0) {
    foreach ($new as $key) {
      $item = $pathsnow[$key];
      db_query("INSERT INTO {sbp_path} (page_path, from_module, modid, language, role, environment, min_time, max_time) VALUES ('%s', '%s', %d, '%s', %d, %d, %d, %d)", $item['page_path'], $module, $item['modid'], $item['language'], $item['role'], $environment, $item['min_time'], $item['max_time']);
    }
  }
  if (count($chg) > 0) {
    foreach ($chg as $key) {
      $item = $pathsnow[$key];
      db_query("UPDATE {sbp_path} SET modid=%d, role=%d, min_time=%d, max_time= %d WHERE pid=%d", $item['modid'], $item['role'], $item['min_time'], $item['max_time'], $pathsindb[$key]);
    }
  }

  // Finally, reset the last index time for any item whose max time to reindex
  // has passed.
  if ($reset_items) {
    db_query("UPDATE {sbp_path} SET last_index_time=0 WHERE (max_time > 0 AND %d > (last_index_time + max_time))", time());
  }
}

/**
 * Splits a path into the main path and any query parts.
 *
 * @param $path
 *    Path to split.
 * @return
 *    Array, first element is main path (after resolving aliases), second is
 *    query string.
 */
function search_by_page_path_parts($path) {

  // see if there is a ? in the path
  $path = drupal_get_normal_path($path);
  $stuff = explode('?', $path, 2);
  if (count($stuff) > 1) {
    return $stuff;
  }

  // see if there is an & in the path
  return explode('&', $path, 2);
}

/**
 * Returns unique where and join clauses, similar to _db_rewrite_sql().
 *
 * Use this function as a direct replacement for _db_rewrite_sql().
 * It works the same way, except that all table name aliases are prefixed
 * with the passed-in prefix string. For instance, if the prefix string
 * is 'abc_', then the node table will be aliased as 'abc_n', and the
 * node access table as 'abc_na'. This will allow Search by Page to
 * collect multiple query pieces together without conflict.
 *
 * @param $prefix
 *   Prefix to use for all aliases.
 * @return
 *   Array with join statement and where statement.
 */
function search_by_page_unique_rewrite($prefix) {
  $stuff = _db_rewrite_sql('', $prefix . 'n');

  // Prefix any non-prefixed tables in there
  $tablechars = '[a-zA-Z0-9_]';

  // Regexp looks for table aliases in Drupal terms like {node_access} na
  // Note that this only occurs in the join statement!
  $regex = "|\\{{$tablechars}+\\}\\s+({$tablechars}+)\\s+|";
  $matches = array();
  $pos = 0;
  while (preg_match($regex, $stuff[0], $matches, PREG_OFFSET_CAPTURE, $pos)) {
    $oldtable = $matches[1][0];
    $pos = $matches[1][1];
    if (preg_match('|$' . $prefix . '|', $oldtable)) {

      // this has already been prefixed
      continue;
    }

    // Replace all occurrences of this table alias in join and where
    // When replacing, table name could have a paren or space in front,
    // or an = sign, or be at the beginning of the line. Followed by a
    // space or a .
    $pat = '|([$\\(\\s])' . $oldtable . '([\\s\\.])|';
    $repl = '$1' . $prefix . $oldtable . '$2';
    $stuff[0] = preg_replace($pat, $repl, $stuff[0]);
    $stuff[1] = preg_replace($pat, $repl, $stuff[1]);
  }
  return $stuff;
}

/**
 * Returns a search excerpt, with matched keywords highlighted.
 *
 * This is a drop-in replacement for the core search_exerpt() function.
 * The difference is that it allows stemming modules (or other modules
 * that preprocess search text and terms) to highlight words other than
 * exact keyword matches in the text, by implementing hook_sbp_excerpt_match().
 *
 * @param $keys
 *   A string containing a search query.
 * @param $text
 *   The text to extract fragments from.
 * @return
 *   A string containing HTML for the excerpt.
 */
function search_by_page_excerpt($keys, $text) {

  // We highlight around non-indexable or CJK characters.
  $boundary = '(?:(?<=[' . PREG_CLASS_SEARCH_EXCLUDE . PREG_CLASS_CJK . '])|(?=[' . PREG_CLASS_SEARCH_EXCLUDE . PREG_CLASS_CJK . ']))';

  // Extract positive keywords and phrases
  preg_match_all('/ ("([^"]+)"|(?!OR)([^"]+))/', ' ' . $keys, $matches);
  $keys = array_merge($matches[2], $matches[3]);

  // Prepare text
  $text = ' ' . strip_tags(str_replace(array(
    '<',
    '>',
  ), array(
    ' <',
    '> ',
  ), $text)) . ' ';
  array_walk($keys, '_search_excerpt_replace');
  $workkeys = $keys;

  // Extract fragments around keywords.
  // First we collect ranges of text around each keyword, starting/ending
  // at spaces, trying to get to 256 characters.
  // If the sum of all fragments is too short, we look for second occurrences.
  $ranges = array();
  $included = array();
  $foundkeys = array();
  $length = 0;
  while ($length < 256 && count($workkeys)) {
    foreach ($workkeys as $k => $key) {
      if (!strlen($key)) {
        unset($workkeys[$k]);
        unset($keys[$k]);
        continue;
      }
      if ($length >= 256) {
        break;
      }

      // Remember occurrence of key so we can skip over it if more occurrences
      // are desired.
      if (!isset($included[$key])) {
        $included[$key] = 0;
      }

      // Locate a keyword (position $p, always >0 because $text starts with
      // a space). Try a bare keyword and let stemming modules try to find a
      // derived form. Make sure to keep the leftmost match found.
      $p = 0;
      if (preg_match('/' . $boundary . $key . $boundary . '/iu', $text, $match, PREG_OFFSET_CAPTURE, $included[$key])) {
        $p = $match[0][1];
      }
      foreach (module_implements('sbp_excerpt_match') as $module) {
        $info = module_invoke($module, 'sbp_excerpt_match', $key, $text, $included[$key], $boundary);
        if ($info['where']) {
          if (!$p || $info['where'] < $p) {

            // We found a match before any we had before.
            $p = $info['where'];
          }
          if ($info['keyword']) {
            $foundkeys[] = $info['keyword'];
          }
        }
      }

      // Now locate a space in front (position $q) and behind it (position $s),
      // leaving about 60 characters extra before and after for context.
      // Note that a space was added to the front and end of $text above.
      if ($p) {
        if (($q = strpos($text, ' ', max(0, $p - 60))) !== FALSE) {
          $end = substr($text, $p, 80);

          // CODER-IGNORE-THIS
          if (($s = strrpos($end, ' ')) !== FALSE) {
            $ranges[$q] = $p + $s;
            $length += $p + $s - $q;
            $included[$key] = $p + 1;
          }
          else {
            unset($workkeys[$k]);
          }
        }
        else {
          unset($workkeys[$k]);
        }
      }
      else {
        unset($workkeys[$k]);
      }
    }
  }

  // If we didn't find anything, return the beginning.
  if (count($ranges) == 0) {
    return truncate_utf8($text, 256) . ' ...';
  }

  // Sort the text ranges by starting position.
  ksort($ranges);

  // Now we collapse overlapping text ranges into one. The sorting makes it
  // O(n).
  $newranges = array();
  foreach ($ranges as $from2 => $to2) {
    if (!isset($from1)) {
      $from1 = $from2;
      $to1 = $to2;
      continue;
    }
    if ($from2 <= $to1) {
      $to1 = max($to1, $to2);
    }
    else {
      $newranges[$from1] = $to1;
      $from1 = $from2;
      $to1 = $to2;
    }
  }
  $newranges[$from1] = $to1;

  // Fetch text.
  $out = array();
  foreach ($newranges as $from => $to) {
    $out[] = substr($text, $from, $to - $from);

    // CODER-IGNORE-THIS
  }
  $text = (isset($newranges[0]) ? '' : '... ') . implode(' ... ', $out) . ' ...';

  // Highlight keywords. Must be done all at one time to prevent conflicts
  // ('strong' and '<strong>').
  $keys = $keys + $foundkeys;
  $text = preg_replace('/' . $boundary . '(' . implode('|', $keys) . ')' . $boundary . '/iu', '<strong>\\0</strong>', $text);
  return $text;
}

/**
 * Returns a setting for Search by Page or a sub-module.
 *
 * This is like the Drupal variable_get() function, except that it is
 * environment-aware.
 *
 * @param $name
 *   Name of the setting.
 * @param $environment
 *   ID of the environment.
 * @param $default
 *   Default value to return if setting has not been defined.
 *
 * @return
 *   Setting value, or $default if not defined.
 *
 * @see search_by_page_setting_set()
 */
function search_by_page_setting_get($name, $environment, $default) {
  $stuff = variable_get('search_by_page_settings', array());
  if (isset($stuff[$environment][$name])) {
    return $stuff[$environment][$name];
  }
  return $default;
}

/**
 * Sets a setting for Search by Page or a sub-module.
 *
 * This is like the Drupal variable_set() function, except that it is
 * environment-aware.
 *
 * @param $name
 *   Name of the setting.
 * @param $environment
 *   ID of the environment.
 * @param $value
 *   Value to set.
 *
 * @see search_by_page_setting_get()
 */
function search_by_page_setting_set($name, $environment, $value) {
  $stuff = variable_get('search_by_page_settings', array());
  if (!isset($stuff[$environment])) {
    $stuff[$environment] = array();
  }
  $stuff[$environment][$name] = $value;
  variable_set('search_by_page_settings', $stuff);
}

/**
 * Returns a suitable field prefix for a path.
 *
 * This is your base site URL, with ?q= appended if clean URLs are not being
 * used. It can be used in a Form API form as component #field_prefix, if
 * you are asking the user to input a URL path.
 */
function search_by_page_path_field_prefix() {
  return url(NULL, array(
    'absolute' => TRUE,
  )) . (variable_get('clean_url', 0) ? '' : '?q=');
}

/**
 * Implementation of hook_form_FORM_ID_alter().
 *
 * Modifies the Search module settings form so that you can select 0 items
 * to be indexed per cron run for the other core search modules.
 */
function search_by_page_form_search_admin_settings_alter(&$form, $form_state) {
  $items = drupal_map_assoc(array(
    0,
    10,
    20,
    50,
    100,
    200,
    500,
  ));
  $form['indexing_throttle']['search_cron_limit']['#options'] = $items;
  $form['indexing_throttle']['search_cron_limit']['#description'] = t('The maximum number of items indexed in each pass of a <a href="@cron">cron maintenance task</a> by search modules that do not set their own defaults (such as the core Node module that indexes content items). If necessary, reduce the number of items to prevent timeouts and memory errors while indexing.', array(
    '@cron' => url('admin/reports/status'),
  ));
}

/**
 * Implementation of hook_search().
 *
 * Defines how to search by page. During op 'search', calls module
 * implementations of hook_sbp_details() to find out how to display the search
 * results.
 */
function search_by_page_search($op = 'search', $keys = NULL) {
  switch ($op) {
    case 'name':
      $name = variable_get('search_by_page_tabname', t('Pages'));
      if (module_exists('i18nstrings')) {
        $name = i18nstrings('search_by_page:search_page:tab_name', $name);
      }
      return $name;
    case 'reset':

      // Mark all items as not yet indexed
      db_query('UPDATE {sbp_path} SET last_index_time=0');
      return;
    case 'admin':

      // Add a link to the Search by Page settings form
      $form = array();
      $form['search_by_page'] = array(
        '#type' => 'fieldset',
        '#title' => t('Search by Page settings'),
      );
      $form['search_by_page']['info'] = array(
        '#value' => '<p>' . l(t('Configure Search by Page settings'), 'admin/settings/search_by_page') . '</p>',
      );
      $form['search_by_page']['status'] = _search_by_page_status_details();
      return $form;
    case 'status':

      // Tell Search module how many items have been indexed, and how many not
      $total = db_result(db_query('SELECT COUNT(*) FROM {sbp_path}'));
      $remain = db_result(db_query('SELECT COUNT(*) FROM {sbp_path} p WHERE p.last_index_time = 0'));
      return array(
        'remaining' => $remain,
        'total' => $total,
      );
    case 'search':
      return _search_by_page_do_search($keys);
  }
}

/**
 * Implementation of hook_update_index().
 *
 * Indexes the site's pages, or at least some of them (up to cron
 * limit on searches), each cron run.
 *
 * @see _search_by_page_rebuild_all_paths()
 */
function search_by_page_update_index() {
  global $user;
  global $language;
  global $_GET;
  $save_user = $user;
  $save_language = $language;
  $save_get = $_GET;
  $save_sessions = session_save_session();

  // Rebuild the list of paths to index, resetting ones whose time has come.
  _search_by_page_rebuild_all_paths(TRUE);
  $users = _search_by_page_indexing_users();

  // Figure out which pages to index this run - the ones that haven't been
  // indexed ever or re-indexed recently, up to limits of cron.
  $core_limit = (int) variable_get('search_cron_limit', 100);
  $limit = (int) variable_get('sbp_cron_limit', $core_limit);
  $result = db_query_range('SELECT * FROM {sbp_path} p WHERE (p.last_index_time = 0) OR (p.min_time > 0 AND %d > (p.last_index_time + p.min_time)) ORDER BY p.last_index_time', time(), 0, $limit);

  // Index each page, but don't save sessions for the indexing users.
  session_save_session(FALSE);

  // Make sure output from rendering pages does not screw up the cron run.
  ob_start();
  $reasons = array(
    MENU_ACCESS_DENIED => t('access denied'),
    MENU_NOT_FOUND => t('not found'),
    MENU_SITE_OFFLINE => t('site offline'),
  );
  $allroles = user_roles();
  $languages = language_list();
  while ($item = db_fetch_object($result)) {

    // Set up language and user.
    $role = $item->role;
    $rolename = $allroles[$role];
    $user = $users[$rolename];

    // No matter what, we want to update the time so it's marked as indexed.
    // That way, if there is an error it will at least not hold up other pages
    // in the next cron run.
    _search_by_page_update_last_time($item->pid, time());
    if (!$user) {
      watchdog('search_by_page', 'Role %rid (%rname) could not be used to index PID (%pid), path (%path)', array(
        '%rid' => $role,
        '%rname' => $rolename,
        '%pid' => $item->pid,
        '%path' => $item->page_path,
      ), WATCHDOG_NOTICE);

      // Remove content from the search index.
      _search_by_page_remove_searchinfo($item->pid);
      continue;
    }
    $language = $languages[$item->language];

    // Get page title from module and optional content
    $info = module_invoke($item->from_module, 'sbp_details', $item->modid, $item->environment);
    if (!$info) {

      // Module indicated not to index this page after all
      watchdog('search_by_page', 'Content was skipped - PID (%pid), path (%path)', array(
        '%pid' => $item->pid,
        '%path' => $item->page_path,
      ), WATCHDOG_NOTICE);

      // Remove content from the search index.
      _search_by_page_remove_searchinfo($item->pid);
      continue;
    }

    // Check for module override of content.
    $content = isset($info['content']) ? $info['content'] : NULL;
    if (!isset($content)) {

      // Mode didn't return content. Render the page to get it.
      // Separate path into main part and query.
      $parts = search_by_page_path_parts($item->page_path);
      $path = drupal_get_normal_path($parts[0]);
      $_GET['q'] = $path;

      // Add query to $_GET as it would be on a page request.
      if (isset($parts[1])) {
        $getstuff = array();
        parse_str($parts[1], $getstuff);
        $_GET += $getstuff;
      }
      $content = menu_execute_active_handler($path);
      $_GET = $save_get;
    }
    if (!isset($content)) {
      $content = 0;
    }

    // $content will be either a string or an error code, if rendered.
    // Note that an empty string is acceptable for content -- it could just
    // be a page with nothing on it except the title.
    if (!is_int($content)) {
      if ($info['title']) {
        $content = '<h1>' . $info['title'] . '</h1> ' . $content;
      }
      $content = search_by_page_strip_tags($content, $item->environment);

      // Add to search index (already marked as updated).
      search_index($item->pid, 'search_by_page', $content);
    }
    else {
      $con = $content;
      if (isset($reasons[$con])) {
        $con = $reasons[$con];
      }
      if ($content == MENU_SITE_OFFLINE) {

        // In this one case, we want to reset this item so it gets indexed
        // next time for sure. But there is no point in continuing the cron
        // run if the site is off-line.
        _search_by_page_update_last_time($item->pid, 0);
        watchdog('search_by_page', 'Site is off-line, cannot index content', array(), WATCHDOG_NOTICE);
        break;
      }

      // Remove content from the search index.
      _search_by_page_remove_searchinfo($item->pid);
      watchdog('search_by_page', 'Content not rendered (%con) - PID (%pid), path (%path), realpath (%real), language (%lang)', array(
        '%pid' => $item->pid,
        '%path' => $item->page_path,
        '%real' => $path,
        '%lang' => $item->language,
        '%con' => $con,
      ), WATCHDOG_ERROR);
    }
  }

  // Switch user back, rebuild permissions cache, flush output buffer.
  $user = $save_user;
  user_access('', $user, TRUE);
  $language = $save_language;
  session_save_session($save_sessions);
  ob_end_clean();
}

/**
 * Implementation of hook_menu().
 */
function search_by_page_menu() {
  $items = array();
  $items['admin/settings/search_by_page'] = array(
    'title' => 'Search by Page settings',
    'description' => 'Configure settings for Search by Page',
    'page callback' => 'drupal_get_form',
    'page arguments' => array(
      'search_by_page_admin_overview',
    ),
    'access arguments' => array(
      'administer search by page',
    ),
    'type' => MENU_NORMAL_ITEM,
  );
  $items['admin/settings/search_by_page/resetblank'] = array(
    'title' => 'Search by Page reset blank',
    'description' => 'Reset unindexed pages for Search by Page',
    'page callback' => 'search_by_page_reset_blank',
    'access arguments' => array(
      'administer search by page',
    ),
    'type' => MENU_CALLBACK,
  );
  $items['admin/settings/search_by_page/edit/%'] = array(
    'title' => 'Search by Page environment settings',
    'description' => 'Configure settings for Search by Page environment',
    'page callback' => 'drupal_get_form',
    'page arguments' => array(
      'search_by_page_admin_settings',
      4,
    ),
    'access arguments' => array(
      'administer search by page',
    ),
    'type' => MENU_CALLBACK,
  );
  $items['admin/settings/search_by_page/edit/%/general'] = array(
    'title' => 'General settings',
    'type' => MENU_DEFAULT_LOCAL_TASK,
    'weight' => -10,
  );
  $items['admin/settings/search_by_page/add'] = array(
    'title' => 'Add environment',
    'type' => MENU_CALLBACK,
    'page callback' => 'drupal_get_form',
    'page arguments' => array(
      'search_by_page_admin_settings',
    ),
    'access arguments' => array(
      'administer search by page',
    ),
  );
  $items['admin/settings/search_by_page/delete'] = array(
    'title' => 'Delete path',
    'type' => MENU_CALLBACK,
    'page callback' => 'drupal_get_form',
    'page arguments' => array(
      'search_by_page_delete_confirm',
    ),
    'access arguments' => array(
      'administer search by page',
    ),
  );
  $envs = search_by_page_list_environments();
  foreach ($envs as $envid) {
    $path = search_by_page_setting_get('page_path', $envid, 'search_pages');
    $title = search_by_page_setting_get('page_title', $envid, t('Search'));
    if (module_exists('i18nstrings')) {
      $title = i18nstrings('search_by_page:search_form:page_title' . $envid, $title);
    }
    $items[$path] = array(
      'title' => $title,
      'page callback' => '_search_by_page_view',
      // Note that 'env' . $envid is used here to avoid argument substitution.
      'page arguments' => array(
        'env' . $envid,
      ),
      'access arguments' => array(
        'search page environment ' . search_by_page_setting_get('environment_name', $envid, t('new')),
      ),
      'type' => MENU_SUGGESTED_ITEM,
    );
  }
  return $items;
}

/**
 * Implementation of hook_perm().
 */
function search_by_page_perm() {
  $perms = array(
    'administer search by page',
  );
  $envs = search_by_page_list_environments();
  foreach ($envs as $envid) {
    $perms[] = 'search page environment ' . search_by_page_setting_get('environment_name', $envid, t('new'));
  }
  return $perms;
}

/**
 * Implementation of hook_block().
 */
function search_by_page_block($op = 'list', $delta = 0, $edit = array()) {
  switch ($op) {
    case 'list':
      $ret = array();

      // Create one block per environment
      $envs = search_by_page_list_environments();
      foreach ($envs as $envid) {
        $envname = search_by_page_setting_get('environment_name', $envid, t('new'));
        if (module_exists('i18nstrings')) {
          $envname = i18nstrings('search_by_page:environment:name' . $envid, $envname);
        }
        $ret[$envid] = array(
          'info' => t('Search by page @environment', array(
            '@environment' => $envname,
          )),
        );
      }
      return $ret;
    case 'view':
      $blocktitle = search_by_page_setting_get('block_title', $delta, t('Search'));
      if (module_exists('i18nstrings')) {
        $blocktitle = i18nstrings('search_by_page:search_form:block_title' . $delta, $blocktitle);
      }
      return array(
        'subject' => $blocktitle,
        'content' => drupal_get_form('search_by_page_form', $delta, NULL, TRUE),
      );
  }
}

/**
 * Implementation of hook_locale().
 */
function search_by_page_locale($op = 'groups', $group = NULL) {
  switch ($op) {
    case 'groups':
      return array(
        'search_by_page' => t('Search by Page'),
      );
    case 'info':
      $info = array();
      $info['search_by_page']['refresh callback'] = '_search_by_page_refresh_translations';
      $info['search_by_page']['format'] = FALSE;
      return $info;
  }
}

/**
 * Returns rendered search form and/or results page.
 *
 * @param $envstring
 *   The environment ID, with 'env' prepended, to avoid argument substitution
 *   in hook_menu().
 */
function _search_by_page_view($envstring) {
  $environment = intval(drupal_substr($envstring, 3));

  // This pretty much follows search_view()
  $path = explode('/', $_GET['q'], 2);
  $keys = '';
  if (count($path) > 1) {
    $keys = trim($path[1]);
  }

  // Display a search form
  $output = drupal_get_form('search_by_page_form', $environment, $keys, FALSE);

  // Get search results, if any, and display them
  $path = search_by_page_setting_get('page_path', $environment, 'search_pages');
  if ($keys) {
    watchdog('search_by_page', '%keys', array(
      '%keys' => $keys,
    ), WATCHDOG_NOTICE, l(t('results'), $path . '/' . $keys));
    $entered_keys = $keys;
    $keys = search_query_insert($keys, 'environment', $environment);
    $results = search_data($keys, 'search_by_page');
    if ($results) {
      $title = theme('search_by_page_results_title', $entered_keys);
      $results = theme('box', $title, $results);
    }
    else {
      $results = theme('search_by_page_no_results', $entered_keys);
    }
    $output .= $results;
  }
  return $output;
}

/**
 * Returns a Forms API array for the search by pages form.
 *
 * @ingroup forms
 * @see search_by_page_form_submit()
 */
function search_by_page_form(&$form_state, $environment, $keys = '', $is_block = TRUE) {
  $form = array(
    '#action' => url(search_by_page_setting_get('page_path', $environment, 'search_pages')),
    '#attributes' => array(
      'class' => 'search-by-page-form',
    ),
    '#method' => 'post',
  );
  $fieldlabel = search_by_page_setting_get('field_label', $environment, t('Search for'));
  $buttonlabel = search_by_page_setting_get('button_label', $environment, t('Search'));
  if (module_exists('i18nstrings')) {
    $fieldlabel = i18nstrings('search_by_page:search_form:field_label' . $environment, $fieldlabel);
    $buttonlabel = i18nstrings('search_by_page:search_form:button_label' . $environment, $buttonlabel);
  }
  $form['keys'] = array(
    '#type' => 'textfield',
    '#title' => $fieldlabel,
    '#default_value' => $keys,
    '#size' => 30,
    '#maxlength' => 255,
  );
  $form['environment'] = array(
    '#type' => 'value',
    '#value' => $environment,
  );
  $form['is_block'] = array(
    '#type' => 'value',
    '#value' => $is_block,
  );
  $form['submit'] = array(
    '#type' => 'submit',
    '#value' => $buttonlabel,
  );
  return $form;
}

/**
 * Submit function for search_by_page_form().
 */
function search_by_page_form_submit($form, &$form_state) {
  $keys = $form_state['values']['keys'];
  if ($keys == '') {
    form_set_error('keys', t('Please enter some keywords.'));
  }
  $envid = $form_state['values']['environment'];
  $path = search_by_page_setting_get('page_path', $envid, 'search_pages');
  $form_state['redirect'] = $path . '/' . $keys;
  return;
}

/**
 * Returns the admin overview page for module configuration.
 *
 * This page lets you choose and configure search environments.
 *
 * @ingroup forms
 */
function search_by_page_admin_overview(&$form_state) {
  $form = array();
  $form['general_sbp'] = array(
    '#type' => 'fieldset',
    '#collapsible' => TRUE,
    '#collapsed' => TRUE,
    '#weight' => -20,
    '#title' => t('Additional actions'),
  );
  $form['general_sbp']['info'] = array(
    '#type' => 'markup',
    '#weight' => 20,
    '#value' => '<p>' . l(t('Configure general search settings and see indexing status'), 'admin/settings/search') . '</p>',
  );
  $form['general_sbp']['cron'] = array(
    '#type' => 'markup',
    '#weight' => 21,
    '#value' => '<p>' . l(t('Visit the Status Report page to check cron status and run cron'), 'admin/reports/status') . '</p>',
  );
  $form['general_sbp']['reset_blank'] = array(
    '#type' => 'markup',
    '#weight' => 22,
    '#value' => '<p>' . l(t('Click to reset pages that are blank in the search index, so they will reindex at next cron run.'), 'admin/settings/search_by_page/resetblank') . '</p>',
  );

  // Make a table of existing environments.
  $options = array();
  $output = '<h3>' . t('Search environments') . '</h3>';
  $output .= '<p>' . l(t('Add new search environment'), 'admin/settings/search_by_page/add') . '</p>';
  $headers = array(
    t('Environment'),
    t('URL path'),
    array(
      'data' => t('Operations'),
      'colspan' => 2,
    ),
  );
  $rows = array();
  $envs = search_by_page_list_environments();
  foreach ($envs as $envid) {
    $path = search_by_page_setting_get('page_path', $envid, 'search_pages');
    $envname = search_by_page_setting_get('environment_name', $envid, t('new'));
    if (module_exists('i18nstrings')) {
      $envname = i18nstrings('search_by_page:environment:name' . $envid, $envname);
    }
    $options[$envid] = $envname;
    $rows[] = array(
      $envname,
      l($path, $path),
      l('edit', 'admin/settings/search_by_page/edit/' . $envid),
      l('delete', 'admin/settings/search_by_page/delete/' . $envid),
    );
  }
  if (!count($rows)) {
    $rows[] = array(
      t('No environments defined'),
      '',
      '',
      '',
    );
  }
  $output .= theme('table', $headers, $rows);
  $form['environment_table'] = array(
    '#type' => 'markup',
    '#weight' => -5,
    '#value' => $output,
  );

  // Form section to choose default environment, name for it in Search, and
  // cron limit.
  $items = drupal_map_assoc(array(
    10,
    20,
    50,
    100,
    200,
    500,
  ));
  $core_limit = (int) variable_get('search_cron_limit', 100);
  $limit = (int) variable_get('sbp_cron_limit', $core_limit);
  $form['sbp_cron_limit'] = array(
    '#type' => 'select',
    '#weight' => -3,
    '#default_value' => $limit,
    '#options' => $items,
    '#title' => t('Number of items to index per cron run'),
    '#description' => t('The maximum number of items indexed in each pass of a <a href="@cron">cron maintenance task</a> by Search by Page.', array(
      '@cron' => url('admin/reports/status'),
    )),
  );
  $form['search_by_page_default_environment'] = array(
    '#type' => 'select',
    '#weight' => -2,
    '#default_value' => variable_get('search_by_page_default_environment', 1),
    '#options' => $options,
    '#title' => t('Default environment'),
    '#description' => t('The default environment is used for the Search by Page tab when using the core Search page.'),
  );
  $form['search_by_page_tabname'] = array(
    '#type' => 'textfield',
    '#weight' => -1,
    '#default_value' => variable_get('search_by_page_tabname', t('Pages')),
    '#title' => t('Search tab name'),
    '#description' => t('If using Search by Page with the core Search module, the name of the tab where Search by Page results are shown.'),
  );
  return system_settings_form($form);
}

/**
 * Returns the admin settings page for a single search environment.
 *
 * Besides some general settings for the modules as a whole, also
 * includes all sub-modules' hook_sbp_settings() return values.
 *
 * @param $environment
 *   ID of environment for this settings page, or zero to add a new one.
 *
 * @ingroup forms
 */
function search_by_page_admin_settings(&$form_state, $environment = 0) {
  $form = array();

  // We need a non-zero environment number (0 indicates adding a new one)
  $environment = intval($environment);
  if (!$environment) {
    $envs = search_by_page_list_environments();
    $max = 0;
    foreach ($envs as $env) {
      if ($env > $max) {
        $max = $env;
      }
    }
    $environment = $max + 1;
  }
  $form['environment'] = array(
    '#type' => 'value',
    '#value' => $environment,
  );
  $form['general_env'] = array(
    '#type' => 'fieldset',
    '#collapsible' => TRUE,
    '#weight' => -99,
    '#title' => t('General settings for this environment'),
  );
  $form['general_env']['environment_name'] = array(
    '#type' => 'textfield',
    '#weight' => 0,
    '#title' => t('Environment name (shown to administrators only)'),
    '#default_value' => search_by_page_setting_get('environment_name', $environment, t('new')),
  );
  $form['general_env']['set_as_default'] = array(
    '#type' => 'checkbox',
    '#weight' => 1,
    '#title' => t('Set this environment as default'),
    '#description' => t('The default environment is used for the Search by Page tab when using the core Search page.'),
  );
  if (variable_get('search_by_page_default_environment', 0) == $environment) {
    $form['general_env']['set_as_default']['#default_value'] = 1;
  }
  $form['general_env']['page_title'] = array(
    '#type' => 'textfield',
    '#weight' => 3,
    '#title' => t('Title for search page'),
    '#default_value' => search_by_page_setting_get('page_title', $environment, t('Search')),
  );
  $form['general_env']['block_title'] = array(
    '#type' => 'textfield',
    '#weight' => 4,
    '#title' => t('Title for search block'),
    '#default_value' => search_by_page_setting_get('block_title', $environment, t('Search')),
  );
  $form['general_env']['field_label'] = array(
    '#type' => 'textfield',
    '#weight' => 5,
    '#title' => t('Label for keywords field in search form'),
    '#default_value' => search_by_page_setting_get('field_label', $environment, t('Search for')),
  );
  $form['general_env']['button_label'] = array(
    '#type' => 'textfield',
    '#weight' => 6,
    '#title' => t('Text on button in search form'),
    '#default_value' => search_by_page_setting_get('button_label', $environment, t('Search')),
  );
  if (module_exists('locale')) {
    $form['general_env']['labelhelp'] = array(
      '#type' => 'markup',
      '#weight' => 10,
      '#value' => '<p>' . t("Enter the text settings above in your site's default language. If you have a multi-lingual site with the Internationalization project's String Translation module installed and enabled, you can use Drupal's translation interface to translate them.") . '</p>',
    );
  }
  $form['general_env']['page_path'] = array(
    '#type' => 'textfield',
    '#weight' => 11,
    '#title' => t('URL path for search page'),
    '#description' => t('Path cannot include a /'),
    '#field_prefix' => search_by_page_path_field_prefix(),
    '#default_value' => search_by_page_setting_get('page_path', $environment, 'search_pages'),
  );
  $form['general_env']['permhelp'] = array(
    '#type' => 'markup',
    '#weight' => 12,
    '#value' => '<p>' . t('After creating a new environment or changing the environment name, you will need to set permissions to define who can search using this environment.') . '</p>',
  );
  $form['general_env']['exclude_tags'] = array(
    '#type' => 'textfield',
    '#weight' => 15,
    '#title' => t('HTML tags to exclude'),
    '#description' => t('Enter the HTML tags whose <em>contents</em> should be completely removed from search indexing and search results. Separate by spaces, and just enter the tag names. Only supports letters, numbers, and underscores in tag names. Excluding h1 will exclude the page title. Example entry: script object'),
    '#default_value' => search_by_page_setting_get('exclude_tags', $environment, ''),
  );
  $form['#submit'] = array();
  $form = array_merge($form, module_invoke_all('sbp_settings', $environment));
  $form['buttons']['submit'] = array(
    '#type' => 'submit',
    '#value' => t('Save configuration'),
  );
  $form['#submit'][] = 'search_by_page_admin_settings_submit';
  $form['#theme'] = 'system_settings_form';

  // Cause the update function to get called when form submits
  $form['#submit'][] = '_search_by_page_rebuild_all_paths';

  // Cause a translation refresh when form submits
  $form['#submit'][] = '_search_by_page_refresh_translations';
  return $form;
}

/**
 * Submit handler for search_by_page_admin_settings().
 */
function search_by_page_admin_settings_submit($form, &$form_state) {

  // This basically follows system_settings_form_submit().
  $form_state['redirect'] = 'admin/settings/search_by_page';
  $environment = intval($form_state['values']['environment']);
  $skip = array(
    'submit',
    'form_id',
    'form_token',
    'form_build_id',
    'environment',
    'set_as_default',
  );
  foreach ($form_state['values'] as $key => $value) {
    if (!in_array($key, $skip)) {
      search_by_page_setting_set($key, $environment, $value);
    }
  }
  if ($form_state['values']['set_as_default']) {
    variable_set('search_by_page_default_environment', $environment);
  }

  // Make sure menu gets rebuilt, so our new/changed path works.
  // Also need to clear block cache.
  variable_set('menu_rebuild_needed', TRUE);
  cache_clear_all();
  drupal_set_message(t('The configuration options have been saved.'));
}

/**
 * Returns a form confirming deletion of an environment.
 *
 * @ingroup forms
 * @see search_by_page_delete_confirm_submit()
 */
function search_by_page_delete_confirm($form_state, $envid) {
  $envname = search_by_page_setting_get('environment_name', $envid, t('new'));
  if (module_exists('i18nstrings')) {
    $envname = i18nstrings('search_by_page:environment:name' . $envid, $envname);
  }
  $form = array(
    'envid' => array(
      '#type' => 'value',
      '#value' => $envid,
    ),
  );
  $output = confirm_form($form, t('Are you sure you want to delete environment %env?', array(
    '%env' => $envname,
  )), 'admin/settings/search_by_page');
  return $output;
}

/**
 * Submit callback for search_by_page_delete_confirm().
 *
 * Actually deletes the environment.
 */
function search_by_page_delete_confirm_submit($form, &$form_state) {
  $form_state['redirect'] = 'admin/settings/search_by_page';
  if (!$form_state['values']['confirm']) {
    return;
  }
  $envid = $form_state['values']['envid'];

  // Let modules respond to environment deletion
  module_invoke_all('sbp_delete_environment', $envid);
}

/**
 * Implementation of hook_theme().
 */
function search_by_page_theme() {
  return array(
    'search_by_page_form' => array(
      'arguments' => array(
        'form' => NULL,
      ),
      'template' => 'search-by-page-form',
    ),
    'search_by_page_results_title' => array(
      'arguments' => array(
        'keys' => '',
      ),
    ),
    'search_by_page_no_results' => array(
      'arguments' => array(
        'keys' => '',
      ),
    ),
  );
}

/**
 * Processes variables for search-by-page-form.tpl.php.
 *
 * The $variables array contains the following arguments:
 * - $form
 *
 * @see search-by-page-form.tpl.php
 */
function template_preprocess_search_by_page_form(&$variables) {
  $variables['search'] = array();
  $hidden = array();
  $variables['environment'] = $variables['form']['environment']['#value'];
  $variables['is_block'] = $variables['form']['is_block']['#value'];

  // Provide variables named after form keys so themers can print each
  // element independently.
  foreach (element_children($variables['form']) as $key) {
    $type = $variables['form'][$key]['#type'];
    if ($type == 'hidden' || $type == 'token') {
      $hidden[] = drupal_render($variables['form'][$key]);
    }
    else {

      // See if this renders to something (e.g. not a 'value' type).
      $markup = drupal_render($variables['form'][$key]);
      if ($markup) {
        $variables['search'][$key] = $markup;
      }
    }
  }

  // Hidden form elements have no value to themers. No need for separation.
  $variables['search']['hidden'] = implode($hidden);

  // Collect all form elements to make it easier to print the whole form.
  $variables['search_form'] = implode($variables['search']);
}

/**
 * Returns the title of the results section of the search results page.
 *
 * @param $keys
 *   The keywords being searched for.
 */
function theme_search_by_page_results_title($keys = '') {
  return t('Search results');
}

/**
 * Themes a page saying there are no search results.
 *
 * @param $keys
 *   The keywords being searched for.
 */
function theme_search_by_page_no_results($keys) {
  return theme('box', t('Your search yielded no results'), search_help('search#noresults', drupal_help_arg()));
}

/**
 * Implementation of hook_sbp_delete_environment().
 *
 * Removes the environment from settings, Search by Page database, and
 * search index.
 */
function search_by_page_sbp_delete_environment($environment) {
  $envid = intval($environment);

  // Remove this environment from settings
  $stuff = variable_get('search_by_page_settings', array());
  unset($stuff[$envid]);
  variable_set('search_by_page_settings', $stuff);

  // Remove environment from our database and search index
  db_query("DELETE FROM {search_dataset} WHERE type='search_by_page' AND sid IN (SELECT pid FROM {sbp_path} WHERE environment=%d)", $envid);
  db_query("DELETE FROM {search_index} WHERE type='search_by_page' AND sid IN (SELECT pid FROM {sbp_path} WHERE environment=%d)", $envid);
  db_query("DELETE FROM {search_node_links} WHERE type='search_by_page' AND sid IN (SELECT pid FROM {sbp_path} WHERE environment=%d)", $envid);
  db_query('DELETE FROM {sbp_path} WHERE environment=%d', $envid);
}

/**
 * Resets pages that have no words in search index, so they'll be reindexed.
 *
 * Page callback for the 'admin/settings/search_by_page/resetblank' path.
 */
function search_by_page_reset_blank() {

  // Query database to find pages in SBP with nothing in search index
  $zeros = db_query("SELECT sp.pid FROM {sbp_path} sp LEFT JOIN (SELECT * FROM {search_index} si WHERE si.type = 'search_by_page') sisp ON sisp.sid = sp.pid WHERE sisp.word IS NULL");
  $count = 0;
  while ($item = db_fetch_object($zeros)) {
    _search_by_page_update_last_time($item->pid);
    $count++;
  }
  drupal_set_message(t('Blank pages have been reset to index at next cron run (%num)', array(
    '%num' => $count,
  )));
  drupal_goto('admin/settings/search_by_page');
}

/**
 * Internal function: performs a search, for hook_search($op = 'search').
 *
 * @param $keys
 *    Keywords we are searching for
 * @return
 *    Array of search results (see hook_search() for documentation).
 */
function _search_by_page_do_search($keys = NULL) {
  global $pager_page_array, $pager_total, $pager_total_items;
  global $language;
  $lang = $language->language;

  // Extract environment and clear from keys.
  $environment = search_query_extract($keys, 'environment');
  $keys = search_query_insert($keys, 'environment');
  if (!$environment) {
    $environment = variable_get('search_by_page_default_environment', 1);
  }

  // Set up query for Search module.
  $join = 'INNER JOIN {sbp_path} sp ON i.sid = sp.pid';

  // Postgres note: integers are not booleans!
  $where = '0=1';
  $args = array();
  foreach (module_implements('sbp_query_modify') as $module) {
    $stuff = module_invoke($module, 'sbp_query_modify', $environment);
    $join = $join . ' ' . $stuff['join'];
    $where = $where . ' OR (sp.from_module = \'%s\' AND ' . $stuff['where'] . ')';
    $args[] = $module;
    $args = array_merge($args, $stuff['arguments']);
  }
  $mainwhere = 'sp.environment=%d';
  $args[] = $environment;
  if ($lang) {
    $mainwhere .= " AND sp.language='%s'";
    $args[] = $lang;
  }
  $where = '((' . $where . ') AND ' . $mainwhere . ')';

  // Let Search perform the actual search
  $stuff = do_search($keys, 'search_by_page', $join, $where, $args);

  // Save global pager information, page title, etc. because rendering
  // can screw this up.
  $title = drupal_get_title();
  $tmp_parray = $pager_page_array[0];
  $tmp_ptotal = $pager_total[0];
  $tmp_itotal = $pager_total_items[0];
  $tmp_breadcrumb = drupal_get_breadcrumb();
  $tmp_trail = menu_get_active_trail();
  $tmp_menu = menu_get_item();

  // Use PHP's output buffering to prevent any output during rendering.
  // Some modules print things directly when rendering the page, which is
  // not good practice, but so it goes.
  ob_start();

  // Create array of formatted results for Search
  $results = array();
  foreach ($stuff as $item) {
    $info = _search_by_page_lookup($item->sid);

    // Figure out the URL to this page.
    $parts = search_by_page_path_parts($info->page_path);
    $args = array(
      'absolute' => TRUE,
    );
    if (isset($parts[1])) {
      $args['query'] = $parts[1];
    }

    // Special case: if this is a file, we need to use the File API to get
    // the URL. Otherwise, use the normal Drupal URL function.
    if (strpos($parts[0], file_directory_path()) !== FALSE) {

      // This is a file
      $link = file_create_url($parts[0]);
    }
    else {
      $link = url($parts[0], $args);
    }
    $res = array(
      'link' => $link,
    );

    // Merge URL with information provided by module from hook_sbp_details()
    $res2 = module_invoke($info->from_module, 'sbp_details', $info->modid, $environment, $keys);
    if (is_array($res2)) {
      $res = array_merge($res, $res2);
    }

    // Make sure we have a title
    if (!$res['title']) {
      $res['title'] = $link;
    }
    $results[] = $res;
  }

  // Reset page title and pager stuff, and clear the output buffer.
  drupal_set_title($title);
  $pager_page_array[0] = $tmp_parray;
  $pager_total[0] = $tmp_ptotal;
  $pager_total_items[0] = $tmp_itotal;
  menu_set_item(NULL, $tmp_menu);
  drupal_set_breadcrumb($tmp_breadcrumb);
  menu_set_active_trail($tmp_trail);
  ob_end_clean();
  return $results;
}

/**
 * Internal function: rebuilds the paths table for all modules.
 *
 * Calls all modules' hook_sbp_paths() implementations; each should
 * return an array of Drupal paths to be indexed.
 *
 * @param $reset_items
 *   TRUE if the last_index_time for items whose max_time has been exceeded
 *   should be reset. FALSE to leave it alone.
 *
 * @see search_by_page_rebuild_paths()
 */
function _search_by_page_rebuild_all_paths($reset_items = FALSE) {
  $envs = search_by_page_list_environments();
  foreach (module_implements('sbp_paths') as $module) {
    foreach ($envs as $envid) {
      search_by_page_rebuild_paths($module, $envid, $reset_items);
    }
  }
}

/**
 * Internal function: Returns a list of internal users to use for indexing.
 *
 * Reads the paths table, and finds all roles modules said to use for indexing.
 * Maintains a set of blocked users for each of these roles, and adds/removes
 * users from this set as they appear/disappear from the list of needed users.
 *
 * @return
 *   Array indexed by role name. Each element is a loaded user object having
 *   that role, with the status bit temporarily (in-memory) set to 1. Includes
 *   an entry for the anonymous role.
 */
function _search_by_page_indexing_users() {

  // Figure out which roles are currently in our paths table.
  $allroles = user_roles();
  $result = db_query('SELECT role FROM {sbp_path} GROUP BY role');
  $roles_needed = array();
  while ($item = db_fetch_object($result)) {
    $roles_needed[$item->role] = $allroles[$item->role];
  }

  // Figure out which users we already have.
  $result = db_query('SELECT rid, uid FROM {sbp_index_users}');
  $accounts_have = array();
  while ($item = db_fetch_object($result)) {
    $rid = $item->rid;
    if (isset($allroles[$rid])) {
      $rolename = $allroles[$rid];
      $accounts_have[$rolename] = $item->uid;
    }
  }

  // Create or load needed users.
  $accounts = array();
  foreach ($roles_needed as $rid => $rolename) {
    $account = FALSE;

    // Attempt to load the user from the UID we stored a previous time.
    if (isset($accounts_have[$rolename])) {
      $account = user_load($accounts_have[$rolename]);
    }
    if (!$account) {

      // That didn't work. Delete previous entry in index_users table,
      // attempt to create a new user, and save this user ID.
      db_query('DELETE FROM {sbp_index_users} WHERE rid = %d', $rid);
      if ($rid == DRUPAL_ANONYMOUS_RID) {
        $account = user_load(0);
      }
      else {

        // Create a blocked user with random password and email, and a random
        // suffix on the user name to prevent blocking problems. See issue
        // http://drupal.org/node/716342.
        $new = array(
          'pass' => user_password(),
          'name' => 'sbp indexing ' . $rolename . ' ' . user_password(),
          'mail' => user_password() . "@" . user_password() . ".com",
          'roles' => array(
            $rid => $rolename,
          ),
          'status' => 0,
        );
        $tmp = user_save((object) array(), $new);
        $account = user_load($tmp->uid);
        if ($account && $account->uid) {
          watchdog('search_by_page', 'Created indexing user %uid (%uname) for role %rid (%rname)', array(
            '%uid' => $account->uid,
            '%uname' => $account->name,
            '%rid' => $rid,
            '%rname' => $rolename,
          ), WATCHDOG_NOTICE);
        }
        else {
          $account = FALSE;
        }
      }
      if ($account) {
        db_query('INSERT INTO {sbp_index_users} (rid, uid) VALUES (%d, %d)', $rid, $account->uid);
      }
    }

    // Add to return value, setting status bit temporarily to 1 (in-memory).
    if ($account) {
      $account->status = 1;
      $accounts[$rolename] = $account;
    }
    else {
      watchdog('search_by_page', 'Unable to set up an indexing user for role %rid (%rname)', array(
        '%rid' => $rid,
        '%rname' => $rolename,
      ), WATCHDOG_ERROR);
    }
  }
  return $accounts;
}

/**
 * Internal function: Finds and returns a single path record.
 *
 * @param $pid
 *    pid field value to search for (other args ignored if non-zero)
 * @param $path
 *    path field value to search for (must also supply $module, $environment)
 * @param $mid
 *    mid field value to search for (must also supply $module, $environment)
 * @param $module
 *    module field value to search for.
 * @param $environment
 *    environment field value to search for.
 *
 * @return
 *    Object with fields from the sbp_path table, if query succeeds.
 */
function _search_by_page_lookup($pid = 0, $path = '', $mid = 0, $module = '', $environment = 0) {
  $qry = 'SELECT * FROM {sbp_path} p WHERE ';
  if ($pid) {
    return db_fetch_object(db_query($qry . 'p.pid=%d', $pid));
  }
  if ($path) {
    return db_fetch_object(db_query($qry . "p.page_path='%s' AND p.from_module='%s' AND p.environment=%d", $path, $module, $environment));
  }
  return db_fetch_object(db_query($qry . "p.modid=%d AND p.from_module='%s' AND p.environment=%d", $mid, $module, $environment));
}

/**
 * Internal function: updates the last index time.
 *
 * @param $pid
 *   pid to update
 * @param $time
 *   time to update to
 */
function _search_by_page_update_last_time($pid, $time = 0) {
  db_query('UPDATE {sbp_path} SET last_index_time=%d WHERE pid=%d', $time, $pid);
}

/**
 * Internal function: removes a path item from the paths and search database.
 *
 * @param $pid
 *   ID of the path to remove.
 */
function _search_by_page_remove_path($pid) {
  db_query('DELETE FROM {sbp_path} WHERE pid=%d', $pid);
  _search_by_page_remove_searchinfo($pid);
}

/**
 * Internal function: removes a path item from the search database.
 *
 * @param $pid
 *   ID of the path to remove.
 */
function _search_by_page_remove_searchinfo($pid) {
  db_query("DELETE FROM {search_dataset} WHERE type='search_by_page' AND sid=%d", $pid);
  db_query("DELETE FROM {search_index} WHERE type='search_by_page' AND sid=%d", $pid);
  db_query("DELETE FROM {search_node_links} WHERE type='search_by_page' AND sid=%d", $pid);
}

/**
 * Internal function: causes the base form of translations to refresh.
 */
function _search_by_page_refresh_translations() {
  if (!module_exists('i18nstrings')) {
    return;
  }
  $name = variable_get('search_by_page_tabname', t('Pages'));
  $name = i18nstrings_update('search_by_page:search_page:tab_name', $name);
  $envs = search_by_page_list_environments();
  foreach ($envs as $environment) {
    $fieldlabel = search_by_page_setting_get('field_label', $environment, t('Search for'));
    $buttonlabel = search_by_page_setting_get('button_label', $environment, t('Search'));
    $blocktitle = search_by_page_setting_get('block_title', $environment, t('Search'));
    $pagetitle = search_by_page_setting_get('page_title', $environment, t('Search'));
    $envname = search_by_page_setting_get('environment_name', $environment, t('new'));
    i18nstrings_update('search_by_page:search_form:field_label' . $environment, $fieldlabel);
    i18nstrings_update('search_by_page:search_form:button_label' . $environment, $buttonlabel);
    i18nstrings_update('search_by_page:search_form:block_title' . $environment, $blocktitle);
    i18nstrings_update('search_by_page:search_form:page_title' . $environment, $pagetitle);
    i18nstrings_update('search_by_page:search_form:page_title' . $environment, $pagetitle);
    i18nstrings_update('search_by_page:environment:name' . $environment, $envname);
  }
}

/**
 * Returns a list of the currently-defined environment IDs.
 *
 * @return
 *   Array of environment IDs.
 */
function search_by_page_list_environments() {
  $stuff = variable_get('search_by_page_settings', array());
  return array_keys($stuff);
}

/**
 * Strips out contents of HTML tags that are excluded in the given environment.
 *
 * Modules should always call this function in their hook_sbp_details()
 * implementation. Note that it differs from the PHP strip_tags in that it
 * strips the text contained within the tags, as well as the tags.
 *
 * @param $text
 *   Text to be processed.
 * @param $envid
 *   Environment ID to use to find list of tags to exclude.
 *
 * @return
 *   $text with the specified tags (and their contents) from
 *   search_by_page_setting_get('exclude_tags', $envid, '') stripped out.
 */
function search_by_page_strip_tags($text, $envid) {
  $tags = search_by_page_setting_get('exclude_tags', $envid, '');

  // Make sure $tags is clean.
  $tags = preg_replace('/[^a-z0-9_ ]/', ' ', strtolower($tags));
  $tags = explode(' ', $tags);

  // Strip tags and their contents, noting that tags could have attributes.
  foreach ($tags as $tag) {
    $tag = trim($tag);
    if ($tag) {
      $text = preg_replace('|<' . $tag . '[^>]*>.*</' . $tag . '>|isUu', '', $text);
    }
  }
  return $text;
}

/**
 * Generates a detailed status listing for the Search status page.
 *
 * @return
 *   Renderable array of search status.
 */
function _search_by_page_status_details() {
  _search_by_page_rebuild_all_paths();

  // Query to find number of un-indexed items by environment and module.
  $dbr = db_query('SELECT COUNT(*) as nonindexed, p.environment, p.from_module FROM {sbp_path} p WHERE p.last_index_time = 0 GROUP BY p.environment, p.from_module');

  // Make this into a table.
  $stuff = array();
  $module_list = array();
  while ($item = db_fetch_object($dbr)) {
    if ($item->nonindexed) {
      $stuff[$item->environment][$item->from_module] = $item->nonindexed;
      $module_list[$item->from_module] = 1;
    }
  }
  if (!count($stuff)) {
    return array(
      '#value' => '<p>' . t('Search by Page is fully indexed.') . '</p>',
    );
  }
  $module_list = array_keys($module_list);
  sort($module_list);
  array_unshift($module_list, t('Core'));
  $rows = array();
  foreach ($stuff as $envid => $lst) {
    $row = array();
    $row[] = search_by_page_setting_get('environment_name', $envid, t('new'));
    foreach ($module_list as $module) {
      $row[] = isset($lst[$module]) ? $lst[$module] : '';
    }
    $rows[] = $row;
  }
  $core = node_search('status');
  if (!empty($core['remaining'])) {
    $corerow = array(
      t('Core content search'),
      $core['remaining'],
    );
    for ($i = count($corerow); $i < count($row); $i++) {
      $corerow[] = '';
    }
    $rows[] = $corerow;
  }

  // Re-purpose module list as table header and make this into a table.
  array_unshift($module_list, t('Environment'));
  return array(
    '#value' => theme('table', $module_list, $rows, array(), t('Count of un-indexed items by environment and module')),
  );
}

Related topics

Functions

Namesort descending Description
search_by_page_admin_overview Returns the admin overview page for module configuration.
search_by_page_admin_settings Returns the admin settings page for a single search environment.
search_by_page_admin_settings_submit Submit handler for search_by_page_admin_settings().
search_by_page_block Implementation of hook_block().
search_by_page_delete_confirm Returns a form confirming deletion of an environment.
search_by_page_delete_confirm_submit Submit callback for search_by_page_delete_confirm().
search_by_page_excerpt Returns a search excerpt, with matched keywords highlighted.
search_by_page_force_reindex Forces a page to be reindexed at the next cron run.
search_by_page_force_remove Removes a page from Search by Page.
search_by_page_form Returns a Forms API array for the search by pages form.
search_by_page_form_search_admin_settings_alter Implementation of hook_form_FORM_ID_alter().
search_by_page_form_submit Submit function for search_by_page_form().
search_by_page_list_environments Returns a list of the currently-defined environment IDs.
search_by_page_locale Implementation of hook_locale().
search_by_page_menu Implementation of hook_menu().
search_by_page_path_field_prefix Returns a suitable field prefix for a path.
search_by_page_path_parts Splits a path into the main path and any query parts.
search_by_page_perm Implementation of hook_perm().
search_by_page_rebuild_paths Rebuilds the paths table for a particular module.
search_by_page_reset_blank Resets pages that have no words in search index, so they'll be reindexed.
search_by_page_sbp_delete_environment Implementation of hook_sbp_delete_environment().
search_by_page_search Implementation of hook_search().
search_by_page_setting_get Returns a setting for Search by Page or a sub-module.
search_by_page_setting_set Sets a setting for Search by Page or a sub-module.
search_by_page_strip_tags Strips out contents of HTML tags that are excluded in the given environment.
search_by_page_theme Implementation of hook_theme().
search_by_page_unique_rewrite Returns unique where and join clauses, similar to _db_rewrite_sql().
search_by_page_update_index Implementation of hook_update_index().
template_preprocess_search_by_page_form Processes variables for search-by-page-form.tpl.php.
theme_search_by_page_no_results Themes a page saying there are no search results.
theme_search_by_page_results_title Returns the title of the results section of the search results page.
_search_by_page_do_search Internal function: performs a search, for hook_search($op = 'search').
_search_by_page_indexing_users Internal function: Returns a list of internal users to use for indexing.
_search_by_page_lookup Internal function: Finds and returns a single path record.
_search_by_page_rebuild_all_paths Internal function: rebuilds the paths table for all modules.
_search_by_page_refresh_translations Internal function: causes the base form of translations to refresh.
_search_by_page_remove_path Internal function: removes a path item from the paths and search database.
_search_by_page_remove_searchinfo Internal function: removes a path item from the search database.
_search_by_page_status_details Generates a detailed status listing for the Search status page.
_search_by_page_update_last_time Internal function: updates the last index time.
_search_by_page_view Returns rendered search form and/or results page.