function apachesolr_add_tags_to_document in Apache Solr Search 6
Same name and namespace in other branches
- 5.2 apachesolr.index.inc \apachesolr_add_tags_to_document()
- 6.2 apachesolr.index.inc \apachesolr_add_tags_to_document()
Extract HTML tag contents from $text and add to boost fields.
$text must be stripped of control characters before hand.
1 call to apachesolr_add_tags_to_document()
- apachesolr_node_to_document in ./
apachesolr.index.inc - Given a node ID, return a document representing that node.
File
- ./
apachesolr.index.inc, line 219 - Functions used when indexing content to Apache Solr.
Code
function apachesolr_add_tags_to_document($document, $text) {
$tags_to_index = variable_get('apachesolr_tags_to_index', array(
'h1' => 'tags_h1',
'h2' => 'tags_h2_h3',
'h3' => 'tags_h2_h3',
'h4' => 'tags_h4_h5_h6',
'h5' => 'tags_h4_h5_h6',
'h6' => 'tags_h4_h5_h6',
'u' => 'tags_inline',
'b' => 'tags_inline',
'i' => 'tags_inline',
'strong' => 'tags_inline',
'em' => 'tags_inline',
'a' => 'tags_a',
));
// Strip off all ignored tags.
$text = strip_tags($text, '<' . implode('><', array_keys($tags_to_index)) . '>');
preg_match_all('@<(' . implode('|', array_keys($tags_to_index)) . ')[^>]*>(.*)</\\1>@Ui', $text, $matches);
foreach ($matches[1] as $key => $tag) {
$tag = strtolower($tag);
// We don't want to index links auto-generated by the url filter.
if ($tag != 'a' || !preg_match('@(?:http://|https://|ftp://|mailto:|smb://|afp://|file://|gopher://|news://|ssl://|sslv2://|sslv3://|tls://|tcp://|udp://|www\\.)[a-zA-Z0-9]+@', $matches[2][$key])) {
if (!isset($document->{$tags_to_index[$tag]})) {
$document->{$tags_to_index[$tag]} = '';
}
$document->{$tags_to_index[$tag]} .= ' ' . apachesolr_clean_text($matches[2][$key]);
}
}
}