function search_index in Zircon Profile 8
Same name and namespace in other branches
- 8.0 core/modules/search/search.module \search_index()
Updates the full-text search index for a particular item.
Parameters
string $type: The plugin ID or other machine-readable type of this item, which should be less than 64 bytes.
int $sid: An ID number identifying this particular item (e.g., node ID).
string $langcode: Language code for the language of the text being indexed.
string $text: The content of this item. Must be a piece of HTML or plain text.
Related topics
3 calls to search_index()
- NodeSearch::indexNode in core/
modules/ node/ src/ Plugin/ Search/ NodeSearch.php - Indexes a single node.
- SearchMatchTest::_setup in core/
modules/ search/ src/ Tests/ SearchMatchTest.php - Set up a small index of items to test against.
- SearchMultilingualEntityTest::testMultilingualSearch in core/
modules/ search/ src/ Tests/ SearchMultilingualEntityTest.php - Tests the indexing throttle and search results with multilingual nodes.
14 string references to 'search_index'
- CommentDefaultFormatter::viewElements in core/
modules/ comment/ src/ Plugin/ Field/ FieldFormatter/ CommentDefaultFormatter.php - Builds a renderable array for a field value.
- CommentLinkBuilder::buildCommentedEntityLinks in core/
modules/ comment/ src/ CommentLinkBuilder.php - Builds links for the given entity.
- CommentLinkBuilderTest::getLinkCombinations in core/
modules/ comment/ tests/ src/ Unit/ CommentLinkBuilderTest.php - Data provider for ::testCommentLinkBuilder.
- d6_view_modes.yml in core/
modules/ node/ migration_templates/ d6_view_modes.yml - core/modules/node/migration_templates/d6_view_modes.yml
- drupal7.php in core/
modules/ migrate_drupal/ tests/ fixtures/ drupal7.php - A database agnostic dump for testing purposes.
File
- core/
modules/ search/ search.module, line 430 - Enables site-wide keyword searching.
Code
function search_index($type, $sid, $langcode, $text) {
$minimum_word_size = \Drupal::config('search.settings')
->get('index.minimum_word_size');
// Multipliers for scores of words inside certain HTML tags. The weights are
// stored in config so that modules can overwrite the default weights.
// Note: 'a' must be included for link ranking to work.
$tags = \Drupal::config('search.settings')
->get('index.tag_weights');
// Strip off all ignored tags to speed up processing, but insert space before
// and after them to keep word boundaries.
$text = str_replace(array(
'<',
'>',
), array(
' <',
'> ',
), $text);
$text = strip_tags($text, '<' . implode('><', array_keys($tags)) . '>');
// Split HTML tags from plain text.
$split = preg_split('/\\s*<([^>]+?)>\\s*/', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
// Note: PHP ensures the array consists of alternating delimiters and literals
// and begins and ends with a literal (inserting $null as required).
$tag = FALSE;
// Odd/even counter. Tag or no tag.
$score = 1;
// Starting score per word
$accum = ' ';
// Accumulator for cleaned up data
$tagstack = array();
// Stack with open tags
$tagwords = 0;
// Counter for consecutive words
$focus = 1;
// Focus state
$scored_words = array();
// Accumulator for words for index
foreach ($split as $value) {
if ($tag) {
// Increase or decrease score per word based on tag
list($tagname) = explode(' ', $value, 2);
$tagname = Unicode::strtolower($tagname);
// Closing or opening tag?
if ($tagname[0] == '/') {
$tagname = substr($tagname, 1);
// If we encounter unexpected tags, reset score to avoid incorrect boosting.
if (!count($tagstack) || $tagstack[0] != $tagname) {
$tagstack = array();
$score = 1;
}
else {
// Remove from tag stack and decrement score
$score = max(1, $score - $tags[array_shift($tagstack)]);
}
}
else {
if (isset($tagstack[0]) && $tagstack[0] == $tagname) {
// None of the tags we look for make sense when nested identically.
// If they are, it's probably broken HTML.
$tagstack = array();
$score = 1;
}
else {
// Add to open tag stack and increment score
array_unshift($tagstack, $tagname);
$score += $tags[$tagname];
}
}
// A tag change occurred, reset counter.
$tagwords = 0;
}
else {
// Note: use of PREG_SPLIT_DELIM_CAPTURE above will introduce empty values
if ($value != '') {
$words = search_index_split($value, $langcode);
foreach ($words as $word) {
// Add word to accumulator
$accum .= $word . ' ';
// Check wordlength
if (is_numeric($word) || Unicode::strlen($word) >= $minimum_word_size) {
if (!isset($scored_words[$word])) {
$scored_words[$word] = 0;
}
$scored_words[$word] += $score * $focus;
// Focus is a decaying value in terms of the amount of unique words up to this point.
// From 100 words and more, it decays, to e.g. 0.5 at 500 words and 0.3 at 1000 words.
$focus = min(1, 0.01 + 3.5 / (2 + count($scored_words) * 0.015));
}
$tagwords++;
// Too many words inside a single tag probably mean a tag was accidentally left open.
if (count($tagstack) && $tagwords >= 15) {
$tagstack = array();
$score = 1;
}
}
}
}
$tag = !$tag;
}
// Remove the item $sid from the search index, and invalidate the relevant
// cache tags.
search_index_clear($type, $sid, $langcode);
// Insert cleaned up data into dataset
db_insert('search_dataset')
->fields(array(
'sid' => $sid,
'langcode' => $langcode,
'type' => $type,
'data' => $accum,
'reindex' => 0,
))
->execute();
// Insert results into search index
foreach ($scored_words as $word => $score) {
// If a word already exists in the database, its score gets increased
// appropriately. If not, we create a new record with the appropriate
// starting score.
db_merge('search_index')
->keys(array(
'word' => $word,
'sid' => $sid,
'langcode' => $langcode,
'type' => $type,
))
->fields(array(
'score' => $score,
))
->expression('score', 'score + :score', array(
':score' => $score,
))
->execute();
search_dirty($word);
}
}