You are here

function search_by_page_update_index in Search by Page 8

Same name and namespace in other branches
  1. 6 search_by_page.module \search_by_page_update_index()
  2. 7 search_by_page.module \search_by_page_update_index()

Implements hook_update_index().

Indexes the site's pages, or at least some of them (up to cron limit on searches), each cron run.

See also

_search_by_page_rebuild_all_paths()

File

./search_by_page.module, line 551
Main module file for Drupal module Search by Page.

Code

function search_by_page_update_index() {
  global $user;
  global $language;
  $save_user = $user;
  $save_language = $language;
  $save_sessions = drupal_save_session();

  // Rebuild the list of paths to index, resetting ones whose time has come.
  _search_by_page_rebuild_all_paths(TRUE);
  $users = _search_by_page_indexing_users();

  // Figure out which pages to index this run - the ones that haven't been
  // indexed ever or re-indexed recently, up to limits of cron.
  $core_limit = (int) \Drupal::state()
    ->get('search_cron_limit', 100);
  $limit = (int) \Drupal::state()
    ->get('search_by_page_cron_limit', $core_limit);
  $result = \Drupal::database()
    ->queryRange('SELECT * FROM {search_by_page_path} p WHERE (p.last_index_time = 0) OR (p.min_time > 0 AND :currtime > (p.last_index_time + p.min_time)) ORDER BY p.last_index_time', 0, $limit, [
    ':currtime' => time(),
  ])
    ->fetchAll();

  // Index each page, but don't save sessions for the indexing users.
  drupal_save_session(FALSE);

  // Make sure output from rendering pages does not screw up the cron run.
  ob_start();
  $reasons = [
    MENU_ACCESS_DENIED => t('access denied'),
    MENU_NOT_FOUND => t('not found'),
    MENU_SITE_OFFLINE => t('site offline'),
  ];
  $allroles = user_roles();
  $languageManager = new \Drupal\Core\Language\LanguageManager();
  $languages = $languageManager
    ->getLanguages();
  foreach ($result as $item) {

    // Set up language and user.
    $role = $item->role;
    $rolename = $allroles[$role];
    $user = $users[$rolename];

    // No matter what, we want to update the time so it's marked as indexed.
    // That way, if there is an error it will at least not hold up other pages
    // in the next cron run.
    _search_by_page_update_last_time($item->pid, time());
    if (!$user) {
      \Drupal::logger('search_by_page')
        ->notice('Role %rid (%rname) could not be used to index PID (%pid), path (%path)', [
        '%rid' => $role,
        '%rname' => $rolename,
        '%pid' => $item->pid,
        '%path' => $item->page_path,
      ]);

      // Remove content from the search index.
      _search_by_page_remove_searchinfo($item->pid);
      continue;
    }
    $language = $languages[$item->language];

    // Get page title from module and optional content
    $info = module_invoke($item->from_module, 'search_by_page_details', $item->modid, $item->environment);
    if (!$info) {

      // Module indicated not to index this page after all
      \Drupal::logger('search_by_page')
        ->notice('Content was skipped - PID (%pid), path (%path)', [
        '%pid' => $item->pid,
        '%path' => $item->page_path,
      ]);

      // Remove content from the search index.
      _search_by_page_remove_searchinfo($item->pid);
      continue;
    }

    // Check for module override of content.
    $content = '';
    if (!isset($info['content'])) {

      // No module override - render page to get the content.
      $content = search_by_page_page_content($item->page_path);
    }
    else {
      $content = $info['content'];
    }
    if (!isset($content)) {
      $content = 0;
    }

    // $content will be either a string if found/allowed, or an error code.
    // Note that an empty string is acceptable for content -- it could just
    // be a page with nothing on it except the title.
    if (!is_int($content)) {
      if ($info['title']) {
        $content = '<h1>' . $info['title'] . '</h1> ' . $content;
      }
      $content = search_by_page_strip_tags($content, $item->environment);

      // Add to search index (already marked as updated).
      search_index($item->pid, 'search_by_page', $content);

      // Add to SBP table.
      \Drupal::database()
        ->update('search_by_page_path')
        ->condition('pid', $item->pid)
        ->fields([
        'page_data' => $content,
      ])
        ->execute();
    }
    else {
      $con = $content;
      if (isset($reasons[$con])) {
        $con = $reasons[$con];
      }

      // If the site was off-line, abort the cron run
      if ($content == MENU_SITE_OFFLINE) {

        // In this one case, we want to reset this item so it gets indexed
        // next time for sure. But there is no point in continuing the cron
        // run if the site is off-line.
        _search_by_page_update_last_time($item->pid, 0);
        \Drupal::logger('search_by_page')
          ->notice('Site is off-line, cannot index content');
        break;
      }

      // Remove content from the search index.
      _search_by_page_remove_searchinfo($item->pid);
      \Drupal::logger('search_by_page')
        ->error('Content not rendered (%con) - PID (%pid), path (%path), language (%lang)', [
        '%pid' => $item->pid,
        '%path' => $item->page_path,
        '%lang' => $item->language,
        '%con' => $con,
      ]);
    }
  }

  // Switch user back and rebuild permissions cache.
  $user = $save_user;
  drupal_static_reset('user_access');
  $language = $save_language;
  drupal_save_session($save_sessions);
  _search_by_page_setup_theme();
  ob_end_clean();
}