You are here

function search_by_page_update_index in Search by Page 6

Same name and namespace in other branches
  1. 8 search_by_page.module \search_by_page_update_index()
  2. 7 search_by_page.module \search_by_page_update_index()

Implementation of hook_update_index().

Indexes the site's pages, or at least some of them (up to cron limit on searches), each cron run.

See also

_search_by_page_rebuild_all_paths()

File

./search_by_page.module, line 519
Main module file for Drupal module Search by Page.

Code

function search_by_page_update_index() {
  global $user;
  global $language;
  global $_GET;
  $save_user = $user;
  $save_language = $language;
  $save_get = $_GET;
  $save_sessions = session_save_session();

  // Rebuild the list of paths to index, resetting ones whose time has come.
  _search_by_page_rebuild_all_paths(TRUE);
  $users = _search_by_page_indexing_users();

  // Figure out which pages to index this run - the ones that haven't been
  // indexed ever or re-indexed recently, up to limits of cron.
  $core_limit = (int) variable_get('search_cron_limit', 100);
  $limit = (int) variable_get('sbp_cron_limit', $core_limit);
  $result = db_query_range('SELECT * FROM {sbp_path} p WHERE (p.last_index_time = 0) OR (p.min_time > 0 AND %d > (p.last_index_time + p.min_time)) ORDER BY p.last_index_time', time(), 0, $limit);

  // Index each page, but don't save sessions for the indexing users.
  session_save_session(FALSE);

  // Make sure output from rendering pages does not screw up the cron run.
  ob_start();
  $reasons = array(
    MENU_ACCESS_DENIED => t('access denied'),
    MENU_NOT_FOUND => t('not found'),
    MENU_SITE_OFFLINE => t('site offline'),
  );
  $allroles = user_roles();
  $languages = language_list();
  while ($item = db_fetch_object($result)) {

    // Set up language and user.
    $role = $item->role;
    $rolename = $allroles[$role];
    $user = $users[$rolename];

    // No matter what, we want to update the time so it's marked as indexed.
    // That way, if there is an error it will at least not hold up other pages
    // in the next cron run.
    _search_by_page_update_last_time($item->pid, time());
    if (!$user) {
      watchdog('search_by_page', 'Role %rid (%rname) could not be used to index PID (%pid), path (%path)', array(
        '%rid' => $role,
        '%rname' => $rolename,
        '%pid' => $item->pid,
        '%path' => $item->page_path,
      ), WATCHDOG_NOTICE);

      // Remove content from the search index.
      _search_by_page_remove_searchinfo($item->pid);
      continue;
    }
    $language = $languages[$item->language];

    // Get page title from module and optional content
    $info = module_invoke($item->from_module, 'sbp_details', $item->modid, $item->environment);
    if (!$info) {

      // Module indicated not to index this page after all
      watchdog('search_by_page', 'Content was skipped - PID (%pid), path (%path)', array(
        '%pid' => $item->pid,
        '%path' => $item->page_path,
      ), WATCHDOG_NOTICE);

      // Remove content from the search index.
      _search_by_page_remove_searchinfo($item->pid);
      continue;
    }

    // Check for module override of content.
    $content = isset($info['content']) ? $info['content'] : NULL;
    if (!isset($content)) {

      // Mode didn't return content. Render the page to get it.
      // Separate path into main part and query.
      $parts = search_by_page_path_parts($item->page_path);
      $path = drupal_get_normal_path($parts[0]);
      $_GET['q'] = $path;

      // Add query to $_GET as it would be on a page request.
      if (isset($parts[1])) {
        $getstuff = array();
        parse_str($parts[1], $getstuff);
        $_GET += $getstuff;
      }
      $content = menu_execute_active_handler($path);
      $_GET = $save_get;
    }
    if (!isset($content)) {
      $content = 0;
    }

    // $content will be either a string or an error code, if rendered.
    // Note that an empty string is acceptable for content -- it could just
    // be a page with nothing on it except the title.
    if (!is_int($content)) {
      if ($info['title']) {
        $content = '<h1>' . $info['title'] . '</h1> ' . $content;
      }
      $content = search_by_page_strip_tags($content, $item->environment);

      // Add to search index (already marked as updated).
      search_index($item->pid, 'search_by_page', $content);
    }
    else {
      $con = $content;
      if (isset($reasons[$con])) {
        $con = $reasons[$con];
      }
      if ($content == MENU_SITE_OFFLINE) {

        // In this one case, we want to reset this item so it gets indexed
        // next time for sure. But there is no point in continuing the cron
        // run if the site is off-line.
        _search_by_page_update_last_time($item->pid, 0);
        watchdog('search_by_page', 'Site is off-line, cannot index content', array(), WATCHDOG_NOTICE);
        break;
      }

      // Remove content from the search index.
      _search_by_page_remove_searchinfo($item->pid);
      watchdog('search_by_page', 'Content not rendered (%con) - PID (%pid), path (%path), realpath (%real), language (%lang)', array(
        '%pid' => $item->pid,
        '%path' => $item->page_path,
        '%real' => $path,
        '%lang' => $item->language,
        '%con' => $con,
      ), WATCHDOG_ERROR);
    }
  }

  // Switch user back, rebuild permissions cache, flush output buffer.
  $user = $save_user;
  user_access('', $user, TRUE);
  $language = $save_language;
  session_save_session($save_sessions);
  ob_end_clean();
}