You are here

protected function Database::indexItem in Search API 8

Indexes a single item on the specified index.

Used as a helper method in indexItems().

Parameters

\Drupal\search_api\IndexInterface $index: The index for which the item is being indexed.

\Drupal\search_api\Item\ItemInterface $item: The item to index.

Throws

\Exception Any encountered database (or other) exceptions are passed on, out of this method.

1 call to Database::indexItem()
Database::indexItems in modules/search_api_db/src/Plugin/search_api/backend/Database.php
Indexes the specified items.

File

modules/search_api_db/src/Plugin/search_api/backend/Database.php, line 1230

Class

Database
Indexes and searches items using the database.

Namespace

Drupal\search_api_db\Plugin\search_api\backend

Code

protected function indexItem(IndexInterface $index, ItemInterface $item) {
  $fields = $this
    ->getFieldInfo($index);
  $fields_updated = FALSE;
  $field_errors = [];
  $db_info = $this
    ->getIndexDbInfo($index);
  $denormalized_table = $db_info['index_table'];
  $item_id = $item
    ->getId();
  $transaction = $this->database
    ->startTransaction('search_api_db_indexing');
  try {

    // Remove the item from the denormalized table.
    $this->database
      ->delete($denormalized_table)
      ->condition('item_id', $item_id)
      ->execute();
    $denormalized_values = [];
    $text_inserts = [];
    $item_fields = $item
      ->getFields();
    $item_fields += $this
      ->getSpecialFields($index, $item);
    foreach ($item_fields as $field_id => $field) {

      // Sometimes index changes are not triggering the update hooks
      // correctly. Therefore, to avoid DB errors, we re-check the tables
      // here before indexing.
      if (empty($fields[$field_id]['table']) && !$fields_updated) {
        unset($db_info['field_tables'][$field_id]);
        $this
          ->fieldsUpdated($index);
        $fields_updated = TRUE;
        $fields = $db_info['field_tables'];
      }
      if (empty($fields[$field_id]['table']) && empty($field_errors[$field_id])) {

        // Log an error, but only once per field. Since a superfluous field is
        // not too serious, we just index the rest of the item normally.
        $field_errors[$field_id] = TRUE;
        $this
          ->getLogger()
          ->warning("Unknown field @field: please check (and re-save) the index's fields settings.", [
          '@field' => $field_id,
        ]);
        continue;
      }
      $field_info = $fields[$field_id];
      $table = $field_info['table'];
      $column = $field_info['column'];
      $this->database
        ->delete($table)
        ->condition('item_id', $item_id)
        ->execute();
      $type = $field
        ->getType();
      $values = [];
      foreach ($field
        ->getValues() as $field_value) {
        $converted_value = $this
          ->convert($field_value, $type, $field
          ->getOriginalType(), $index);

        // Don't add NULL values to the array of values. Also, adding an empty
        // array is, of course, a waste of time.
        if (($converted_value ?? []) !== []) {
          $values = array_merge($values, is_array($converted_value) ? $converted_value : [
            $converted_value,
          ]);
        }
      }
      if (!$values) {

        // SQLite sometimes has problems letting columns not present in an
        // INSERT statement default to NULL, so we set NULL values for the
        // denormalized table explicitly.
        $denormalized_values[$column] = NULL;
        continue;
      }

      // If the field contains more than one value, we remember that the field
      // can be multi-valued.
      if (count($values) > 1) {
        $db_info['field_tables'][$field_id]['multi-valued'] = TRUE;
      }
      if ($this
        ->getDataTypeHelper()
        ->isTextType($type)) {

        // Remember the text table the first time we encounter it.
        if (!isset($text_table)) {
          $text_table = $table;
        }
        $unique_tokens = [];
        $denormalized_value = '';

        /** @var \Drupal\search_api\Plugin\search_api\data_type\value\TextTokenInterface $token */
        foreach ($values as $token) {
          $word = $token
            ->getText();
          $score = $token
            ->getBoost() * $item
            ->getBoost();

          // In rare cases, tokens with leading or trailing whitespace can
          // slip through. Since this can lead to errors when such tokens are
          // part of a primary key (as in this case), we trim such whitespace
          // here.
          $word = trim($word);

          // Store the first 30 characters of the string as the denormalized
          // value.
          if (mb_strlen($denormalized_value) < 30) {
            $denormalized_value .= $word . ' ';
          }

          // Skip words that are too short, except for numbers.
          if (is_numeric($word)) {
            $word = ltrim($word, '-0');
          }
          elseif (mb_strlen($word) < $this->configuration['min_chars']) {
            continue;
          }

          // Taken from core search to reflect less importance of words later
          // in the text.
          // Focus is a decaying value in terms of the amount of unique words
          // up to this point. From 100 words and more, it decays, to (for
          // example) 0.5 at 500 words and 0.3 at 1000 words.
          $score *= min(1, 0.01 + 3.5 / (2 + count($unique_tokens) * 0.015));

          // Only insert each canonical base form of a word once.
          $word_base_form = $this->dbmsCompatibility
            ->preprocessIndexValue($word);
          if (!isset($unique_tokens[$word_base_form])) {
            $unique_tokens[$word_base_form] = [
              'value' => $word,
              'score' => $score,
            ];
          }
          else {
            $unique_tokens[$word_base_form]['score'] += $score;
          }
        }
        $denormalized_values[$column] = mb_substr(trim($denormalized_value), 0, 30);
        if ($unique_tokens) {
          $field_name = static::getTextFieldName($field_id);
          $boost = $field_info['boost'];
          foreach ($unique_tokens as $token) {
            $score = $token['score'] * $boost * self::SCORE_MULTIPLIER;
            $score = round($score);

            // Take care that the score doesn't exceed the maximum value for
            // the database column (2^32-1).
            $score = min((int) $score, 4294967295);
            $text_inserts[] = [
              'item_id' => $item_id,
              'field_name' => $field_name,
              'word' => $token['value'],
              'score' => $score,
            ];
          }
        }
      }
      else {
        $denormalized_values[$column] = reset($values);

        // Make sure no duplicate values are inserted (which would lead to a
        // database exception).
        // Use the canonical base form of the value for the comparison to
        // avoid not catching different values that are duplicates under the
        // database table's collation.
        $case_insensitive_unique_values = [];
        foreach ($values as $value) {
          $value_base_form = $this->dbmsCompatibility
            ->preprocessIndexValue("{$value}", 'field');

          // We still insert the value in its original case.
          $case_insensitive_unique_values[$value_base_form] = $value;
        }
        $values = array_values($case_insensitive_unique_values);
        $insert = $this->database
          ->insert($table)
          ->fields([
          'item_id',
          'value',
        ]);
        foreach ($values as $value) {
          $insert
            ->values([
            'item_id' => $item_id,
            'value' => $value,
          ]);
        }
        $insert
          ->execute();
      }
    }
    $this->database
      ->insert($denormalized_table)
      ->fields(array_merge($denormalized_values, [
      'item_id' => $item_id,
    ]))
      ->execute();
    if ($text_inserts && isset($text_table)) {
      $query = $this->database
        ->insert($text_table)
        ->fields([
        'item_id',
        'field_name',
        'word',
        'score',
      ]);
      foreach ($text_inserts as $row) {
        $query
          ->values($row);
      }
      $query
        ->execute();
    }

    // In case any new fields were detected as multi-valued, we re-save the
    // index's DB info.
    $this
      ->getKeyValueStore()
      ->set($index
      ->id(), $db_info);
  } catch (\Exception $e) {
    $transaction
      ->rollBack();
    throw $e;
  }
}