apachesolr_multilingual_confgen.generator.inc in Apache Solr Multilingual 6.3
Same filename and directory in other branches
Schema generator for multilingual search
@author Markus Kalkbrenner (mkalkbrenner) | bio.logis GmbH
File
apachesolr_multilingual_confgen/apachesolr_multilingual_confgen.generator.incView source
<?php
/**
* @file
* Schema generator for multilingual search
*
* @see apachesolr_multilingual.module
* @see apachesolr.module
*
* @author Markus Kalkbrenner (mkalkbrenner) | bio.logis GmbH
* @see http://drupal.org/user/124705
*/
function apachesolr_multilingual_confgen_modify_schema($qp_schema, $solr_version) {
foreach (apachesolr_multilingual_language_list() as $language_id => $language) {
$qp_schema_language_specific = apachesolr_confgen_clone_qp($qp_schema);
$qp_schema_language_specific
->find(':root')
->xpath("fields/field[@name='path']")
->attr('name', 'i18n_path_' . $language_id)
->insertAfter($qp_schema
->find(':root')
->xpath("fields/field[@name='path']"));
foreach (array(
'text',
'text_und',
) as $type) {
if ('text_und' != $type) {
$qp_schema_language_specific
->find(':root')
->xpath("types/fieldType[@name='{$type}']//charFilter[@class='solr.MappingCharFilterFactory']")
->attr('mapping', 'mapping-ISOLatin1Accent_' . $language_id . '.txt');
}
$qp_schema_language_specific
->find(':root')
->xpath("types/fieldType[@name='{$type}']//filter[@class='solr.StopFilterFactory']")
->attr('ignoreCase', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_ignoreCase_stopwords', $language_id))
->attr('words', 'stopwords_' . $language_id . '.txt');
$qp_schema_language_specific
->find(':root')
->xpath("types/fieldType[@name='{$type}']/analyzer[@type='index']/filter[@class='solr.WordDelimiterFilterFactory']")
->attr('protected', 'protwords_' . $language_id . '.txt')
->attr('splitOnCaseChange', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_splitOnCaseChange_index', $language_id))
->attr('splitOnNumerics', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_splitOnNumerics_index', $language_id))
->attr('stemEnglishPossessive', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_stemEnglishPossessive_index', $language_id))
->attr('generateWordParts', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_generateWordParts_index', $language_id))
->attr('generateNumberParts', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_generateNumberParts_index', $language_id))
->attr('catenateWords', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_catenateWords_index', $language_id))
->attr('catenateNumbers', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_catenateNumbers_index', $language_id))
->attr('catenateAll', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_catenateAll_index', $language_id))
->attr('preserveOriginal', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_preserveOriginal_index', $language_id));
$qp_schema_language_specific
->find(':root')
->xpath("types/fieldType[@name='{$type}']/analyzer[@type='index']/filter[@class='solr.LengthFilterFactory']")
->attr('min', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_min_index', $language_id))
->attr('max', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_max_index', $language_id));
$qp_schema_language_specific
->find(':root')
->xpath("types/fieldType[@name='{$type}']//filter[@class='solr.LowerCaseFilterFactory']")
->after('<filter class="solr.DictionaryCompoundWordTokenFilterFactory" dictionary="compoundwords_' . $language_id . '.txt" />');
$qp_schema_language_specific
->find(':root')
->xpath("types/fieldType[@name='{$type}']//filter[@class='solr.DictionaryCompoundWordTokenFilterFactory']")
->attr('minWordSize', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_minWordSize', $language_id))
->attr('minSubwordSize', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_minSubwordSize', $language_id))
->attr('maxSubwordSize', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_maxSubwordSize', $language_id))
->attr('onlyLongestMatch', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_onlyLongestMatch', $language_id));
if ('text_und' != $type) {
$stemmer_language = apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_stemmer_language', $language_id);
// Check for solr version, only supported languages should be rendered
if (!empty($stemmer_language) && $stemmer_language != '-none-' && in_array($stemmer_language, array_values(apachesolr_multilingual_confgen_get_stemmer(NULL, $solr_version)))) {
$filter_str = '';
foreach (apachesolr_multilingual_get_stemming_filters($stemmer_language, $solr_version) as $filter) {
$filter_str .= '<filter ';
foreach ($filter as $key => $value) {
$filter_str .= $key . '="' . $value . '" ';
}
$filter_str .= '/>';
}
$qp_schema_language_specific
->find(':root')
->xpath("types/fieldType[@name='{$type}']//filter[@class='solr.SnowballPorterFilterFactory']")
->replaceWith($filter_str);
}
elseif (!empty($stemmer_language) && $stemmer_language != '-none-') {
drupal_set_message(t('The selected stemmer %stemmer is not available in the targeted solr version.', array(
'%stemmer' => $stemmer_language,
)), 'error');
// TODO "Not Found" is a quick solution. We should provide a better error page here.
drupal_not_found();
exit;
}
else {
// no stemming
$qp_schema_language_specific
->find(':root')
->xpath("types/fieldType[@name='{$type}']//filter[@class='solr.SnowballPorterFilterFactory']")
->detach();
}
}
$qp_schema_language_specific
->find(':root')
->xpath("types/fieldType[@name='{$type}']//filter[@class='solr.SynonymFilterFactory']")
->attr('synonyms', 'synonyms_' . $language_id . '.txt')
->attr('ignoreCase', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_ignoreCase_synonyms', $language_id));
$qp_schema_language_specific
->find(':root')
->xpath("types/fieldType[@name='{$type}']/analyzer[@type='query']/filter[@class='solr.WordDelimiterFilterFactory']")
->attr('protected', 'protwords_' . $language_id . '.txt')
->attr('splitOnCaseChange', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_splitOnCaseChange_query', $language_id))
->attr('splitOnNumerics', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_splitOnNumerics_query', $language_id))
->attr('stemEnglishPossessive', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_stemEnglishPossessive_query', $language_id))
->attr('generateWordParts', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_generateWordParts_query', $language_id))
->attr('generateNumberParts', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_generateNumberParts_query', $language_id))
->attr('catenateWords', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_catenateWords_query', $language_id))
->attr('catenateNumbers', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_catenateNumbers_query', $language_id))
->attr('catenateAll', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_catenateAll_query', $language_id))
->attr('preserveOriginal', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_preserveOriginal_query', $language_id));
$qp_schema_language_specific
->find(':root')
->xpath("types/fieldType[@name='{$type}']/analyzer[@type='query']/filter[@class='solr.LengthFilterFactory']")
->attr('min', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_min_query', $language_id))
->attr('max', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_max_query', $language_id));
if (!apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_lowerCase', $language_id)) {
$qp_schema_language_specific
->find(':root')
->xpath("types/fieldType[@name='{$type}']//filter[@class='solr.LowerCaseFilterFactory']")
->detach();
}
}
// type textSpell
$qp_schema_language_specific
->find(':root')
->xpath("types/fieldType[@name='textSpell']//filter[@class='solr.StopFilterFactory']")
->attr('ignoreCase', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_ignoreCase_stopwords_spell', $language_id))
->attr('words', 'stopwords_' . $language_id . '.txt');
$qp_schema_language_specific
->find(':root')
->xpath("types/fieldType[@name='textSpell']//filter[@class='solr.StopFilterFactory']")
->after('<filter class="solr.WordDelimiterFilterFactory" protected="protwords_' . $language_id . '.txt" />');
$qp_schema_language_specific
->find(':root')
->xpath("types/fieldType[@name='textSpell']//filter[@class='solr.WordDelimiterFilterFactory']")
->attr('splitOnCaseChange', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_splitOnCaseChange_spell', $language_id))
->attr('splitOnNumerics', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_splitOnNumerics_spell', $language_id))
->attr('stemEnglishPossessive', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_stemEnglishPossessive_spell', $language_id))
->attr('generateWordParts', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_generateWordParts_spell', $language_id))
->attr('generateNumberParts', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_generateNumberParts_spell', $language_id))
->attr('catenateWords', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_catenateWords_spell', $language_id))
->attr('catenateNumbers', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_catenateNumbers_spell', $language_id))
->attr('catenateAll', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_catenateAll_spell', $language_id))
->attr('preserveOriginal', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_preserveOriginal_spell', $language_id));
$qp_schema_language_specific
->find(':root')
->xpath("types/fieldType[@name='textSpell']//filter[@class='solr.LengthFilterFactory']")
->attr('min', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_lengthMin_spell', $language_id))
->attr('max', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_lengthMax_spell', $language_id));
$qp_schema_language_specific
->find(':root')
->xpath("types/fieldType[@name='textSpell']//filter[@class='solr.LowerCaseFilterFactory']")
->after('<filter class="solr.DictionaryCompoundWordTokenFilterFactory" dictionary="compoundwords_' . $language_id . '.txt" />');
$qp_schema_language_specific
->find(':root')
->xpath("types/fieldType[@name='textSpell']//filter[@class='solr.DictionaryCompoundWordTokenFilterFactory']")
->attr('minWordSize', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_minWordSize_spell', $language_id))
->attr('minSubwordSize', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_minSubwordSize_spell', $language_id))
->attr('maxSubwordSize', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_maxSubwordSize_spell', $language_id))
->attr('onlyLongestMatch', apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_onlyLongestMatch_spell', $language_id));
if (!apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_lowerCase_spell', $language_id)) {
$qp_schema_language_specific
->find(':root')
->xpath("types/fieldType[@name='textSpell']//filter[@class='solr.LowerCaseFilterFactory']")
->detach();
}
foreach (array(
'text_ws',
'edge_n2_kw_text',
) as $type) {
if (!apachesolr_multilingual_confgen_variable_get('apachesolr_multilingual_lowerCase_' . $type, $language_id)) {
$qp_schema_language_specific
->find(':root')
->xpath("types/fieldType[@name='{$type}']//filter[@class='solr.LowerCaseFilterFactory']")
->detach();
}
}
// add language specific types and fields to schema
// If a find() returns zero matches, then a subsequent find()
// will also return zero matches, even if that find has a selector like :root.
// The reason for this is that the QueryPathCssEventHandler does not set the
// root of the document tree if it cannot find any elements from which to
// determine what the root is.
// The workaround is to use top() to select the root element again.
$types = array_unique(array_values(apachesolr_multilingual_get_dynamic_text_field_prefixes_and_types()));
$types[] = 'textSpell';
// i18n_spell_de
$types[] = 'sortString';
// i18n_sort_label_de
$names = array();
foreach ($types as $type) {
$qp_schema_language_specific
->find(':root types fieldType[name="' . $type . '"]')
->attr('name', $type . '_' . $language_id)
->insertAfter($qp_schema
->find(':root')
->xpath("types/fieldType[@name='{$type}']"));
$fields = $qp_schema_language_specific
->branch()
->find(':root fields field[type="' . $type . '"]');
$qp_schema_language_specific
->top();
// workaround, see above
foreach ($fields as $field) {
$name = $field
->attr('name');
$names[$name] = 'field';
$field
->attr('type', $type . '_' . $language_id)
->attr('name', 'i18n_' . $name . '_' . $language_id)
->insertAfter($qp_schema
->find(':root')
->xpath("fields/field[@name='{$name}']"));
}
$dynamic_fields = $qp_schema_language_specific
->branch()
->find(':root fields dynamicField[type="' . $type . '"]');
$qp_schema_language_specific
->top();
// workaround, see above
foreach ($dynamic_fields as $dynamic_field) {
$name = $dynamic_field
->attr('name');
$names[$name] = 'dynamicField';
$dynamic_field
->attr('type', $type . '_' . $language_id)
->attr('name', preg_replace('/(.*)\\*/', 'i18n_$1' . $language_id . '_*', $name))
->insertAfter($qp_schema
->find(':root')
->xpath("fields/dynamicField[@name='{$name}']"));
}
}
foreach ($names as $src_name => $src_type) {
$copy_fields = array();
if ('dynamicField' == $src_type) {
$copy_fields = $qp_schema_language_specific
->branch()
->find(':root fields copyField[source^="' . trim($src_name, '*') . '"]');
}
else {
$copy_fields = $qp_schema_language_specific
->branch()
->find(':root fields copyField[source="' . $src_name . '"]');
}
$qp_schema_language_specific
->top();
// workaround, see above
foreach ($copy_fields as $copy_field) {
$dst_original = $dst = $copy_field
->attr('dest');
$src_original = $copy_field
->attr('source');
foreach ($names as $field_name => $field_type) {
if ('dynamicField' == $field_type && strpos($dst, trim($field_name, '*')) === 0) {
$dst_original = $field_name;
break;
}
}
// The type of the destination field needs to be verified in the original schema,
// because the type in $qp_schema_language_specific has been renamed already.
$dst_fields = array();
if ('dynamicField' == $names[$dst_original]) {
$dst_fields = $qp_schema
->branch()
->find(':root fields dynamicField[name^="' . trim($dst_original, '*') . '"]');
}
else {
$dst_fields = $qp_schema
->branch()
->find(':root fields field[name="' . $dst_original . '"]');
}
$qp_schema
->top();
// workaround, see above
$do_copy = FALSE;
foreach ($dst_fields as $dst_field) {
if (in_array($dst_field
->attr('type'), $types)) {
// Only add additional copy fields if the destination is of
// a multilingual text type.
$do_copy = TRUE;
break;
}
}
if ($do_copy) {
$src_suffix = '';
if (strpos($src_name, '_*') !== FALSE) {
// If $src_original is 'tm_vid_*' $suffix becomes '_vid_*'.
// If $src_original is 'tm_*' $suffix becomes '_*'.
$src_suffix = str_replace(trim($src_name, '_*'), '', trim($src_original, '_*')) . '_*';
}
$dst_suffix = '';
if ('dynamicField' == $names[$dst_original]) {
// If $src_original is 'tm_vid_*' $suffix becomes '_vid_*'.
// If $src_original is 'tm_*' $suffix becomes '_*'.
$dst_suffix = str_replace(trim($dst_original, '_*'), '', trim($dst, '_*'));
if (strpos($dst, '_*') !== FALSE) {
$dst_suffix .= '_*';
}
}
$copy_field
->attr('source', 'i18n_' . trim($src_name, '_*') . '_' . $language_id . $src_suffix)
->attr('dest', 'i18n_' . trim($dst_original, '_*') . '_' . $language_id . $dst_suffix)
->insertAfter($qp_schema
->find(':root')
->xpath("fields/copyField[@source='{$src_original}' and @dest='{$dst}']"));
}
}
}
}
}
function apachesolr_multilingual_confgen_modify_solrconfig($qp_solrconfig, $solr_version) {
foreach (apachesolr_multilingual_language_list() as $language_id => $language) {
$qp_solrconfig_language_specific = apachesolr_confgen_clone_qp($qp_solrconfig);
$spellcheck = $qp_solrconfig_language_specific
->find(':root searchComponent[name="spellcheck"]')
->attr('name', 'spellcheck_' . $language_id);
$query_analyzer_field_type = $spellcheck
->branch()
->find('str[name="queryAnalyzerFieldType"]');
$query_analyzer_field_type
->text($query_analyzer_field_type
->text() . '_' . $language_id);
$fields = $spellcheck
->branch()
->find('str[name="field"]');
foreach ($fields as $field) {
$field
->text('i18n_spell_' . $language_id);
}
$spellcheck_index_dirs = $spellcheck
->branch()
->find('str[name="spellcheckIndexDir"]');
foreach ($spellcheck_index_dirs as $spellcheck_index_dir) {
$spellcheck_index_dir
->text($spellcheck_index_dir
->text() . '_' . $language_id);
}
$spellcheck
->insertBefore($qp_solrconfig
->find(':root searchComponent[name="spellcheck"]'));
$qp_solrconfig
->find(':root requestHandler[default="true"] arr[name="last-components"] str:last-child')
->before('<str>spellcheck_' . $language_id . '</str>');
}
}
/**
* Returns best practice stemming filter chains for language specific stemming.
* @see https://wiki.apache.org/solr/LanguageAnalysis
* @see https://wiki.apache.org/solr/MultitermQueryAnalysis
*
* TODO Distinguish between solr versions. Not all filters are available in each version.
*
* TODO Do we need to disable LowerCaseFilterFactory if the filters here contain a language specific lower case filter factory?
*
* @param $language
* @param $solr_version
* @return array
*/
function apachesolr_multilingual_get_stemming_filters($language, $solr_version) {
$filters = array();
// custom stemming factories
switch ($language) {
case 'Arabic':
$filters[] = array(
'class' => 'solr.ArabicNormalizationFilterFactory',
);
$filters[] = array(
'class' => 'solr.ArabicStemFilterFactory',
);
break;
case 'Bulgarian':
$filters[] = array(
'class' => 'solr.BulgarianStemFilterFactory',
);
break;
case 'Czech':
$filters[] = array(
'class' => 'solr.CzechStemFilterFactory',
);
break;
case 'English (New)':
$filters[] = array(
'class' => 'solr.EnglishPossessiveFilterFactory',
);
$filters[] = array(
'class' => 'solr.KeywordMarkerFilterFactory',
'protected' => 'protwords_en.txt',
);
$filters[] = array(
'class' => 'solr.PorterStemFilterFactory',
);
break;
case 'English (Minimal)':
$filters[] = array(
'class' => 'solr.EnglishPossessiveFilterFactory',
);
$filters[] = array(
'class' => 'solr.KeywordMarkerFilterFactory',
'protected' => 'protwords_en.txt',
);
$filters[] = array(
'class' => 'solr.EnglishMinimalStemFilterFactory',
);
break;
case 'Finnish (Light)':
$filters[] = array(
'class' => 'solr.FinnishLightStemFilterFactory',
);
break;
case 'French (Light)':
$filters[] = array(
'class' => 'solr.ElisionFilterFactory',
'ignoreCase' => 'true',
'articles' => 'contractions_fr.txt',
);
$filters[] = array(
'class' => 'solr.FrenchLightStemFilterFactory',
);
break;
case 'French (Minimal)':
$filters[] = array(
'class' => 'solr.ElisionFilterFactory',
'ignoreCase' => 'true',
'articles' => 'contractions_fr.txt',
);
$filters[] = array(
'class' => 'solr.FrenchMinimalStemFilterFactory',
);
break;
case 'Galician':
$filters[] = array(
'class' => 'solr.GalicianStemFilterFactory',
);
break;
case 'Galician (Minimal)':
$filters[] = array(
'class' => 'solr.GalicianMinimalStemFilterFactory',
);
break;
case 'German (Light)':
$filters[] = array(
'class' => 'solr.GermanNormalizationFilterFactory',
);
$filters[] = array(
'class' => 'solr.GermanLightStemFilterFactory',
);
break;
case 'German (Minimal)':
$filters[] = array(
'class' => 'solr.GermanNormalizationFilterFactory',
);
$filters[] = array(
'class' => 'solr.GermanMinimalStemFilterFactory',
);
break;
case 'Greek':
$filters[] = array(
'class' => 'solr.GreekLowerCaseFilterFactory',
);
$filters[] = array(
'class' => 'solr.GreekStemFilterFactory',
);
break;
case 'Hindi':
$filters[] = array(
'class' => 'solr.IndicNormalizationFilterFactory',
);
$filters[] = array(
'class' => 'solr.HindiNormalizationFilterFactory',
);
$filters[] = array(
'class' => 'solr.HindiStemFilterFactory',
);
break;
case 'Hungarian (Light)':
$filters[] = array(
'class' => 'solr.HungarianLightStemFilterFactory',
);
break;
case 'Indonesian':
$filters[] = array(
'class' => 'solr.IndonesianStemFilterFactory',
'stemDerivational' => 'true',
);
break;
case 'Indonesian (Light)':
$filters[] = array(
'class' => 'solr.IndonesianStemFilterFactory',
'stemDerivational' => 'false',
);
break;
case 'Italian (Light)':
$filters[] = array(
'class' => 'solr.ElisionFilterFactory',
'ignoreCase' => 'true',
'articles' => 'contractions_it.txt',
);
$filters[] = array(
'class' => 'solr.ItalianLightStemFilterFactory',
);
break;
case 'Latvian':
$filters[] = array(
'class' => 'solr.LatvianStemFilterFactory',
);
break;
case 'Norwegian (Light)':
$filters[] = array(
'class' => 'solr.NorwegianLightStemFilterFactory',
);
break;
case 'Norwegian (Minimal)':
$filters[] = array(
'class' => 'solr.NorwegianMinimalStemFilterFactory',
);
break;
case 'Polish':
$filters[] = array(
'class' => 'solr.StempelPolishStemFilterFactory',
);
break;
case 'Portuguese (Light)':
$filters[] = array(
'class' => 'solr.PortugueseLightStemFilterFactory',
);
break;
case 'Portuguese (Minimal)':
$filters[] = array(
'class' => 'solr.PortugueseMinimalStemFilterFactory',
);
break;
case 'Portuguese (Aggressive)':
$filters[] = array(
'class' => 'solr.PortugueseStemFilterFactory',
);
break;
case 'Russian (Light)':
$filters[] = array(
'class' => 'solr.RussianLightStemFilterFactory',
);
break;
case 'Spanish (Light)':
$filters[] = array(
'class' => 'solr.SpanishLightStemFilterFactory',
);
break;
case 'Swedish (Light)':
$filters[] = array(
'class' => 'solr.SwedishLightStemFilterFactory',
);
break;
// SnowBallPorterFilterFactory with additional stemming
case 'Catalan':
$filters[] = array(
'class' => 'solr.ElisionFilterFactory',
'ignoreCase' => 'true',
'articles' => 'contractions_ca.txt',
);
$filters[] = array(
'class' => 'solr.SnowballPorterFilterFactory',
'language' => $language,
'protected' => 'protwords_ca.txt',
);
break;
case 'Dutch':
if (strpos($solr_version, '1.') !== 0) {
$filters[] = array(
'class' => 'solr.StemmerOverrideFilterFactory',
'dictionary' => 'stemdict_nl.txt',
'ignoreCase' => 'false',
);
}
$filters[] = array(
'class' => 'solr.SnowballPorterFilterFactory',
'language' => $language,
'protected' => 'protwords_nl.txt',
);
break;
case 'French':
if (strpos($solr_version, '1.') !== 0) {
$filters[] = array(
'class' => 'solr.ElisionFilterFactory',
'ignoreCase' => 'true',
'articles' => 'contractions_fr.txt',
);
}
$filters[] = array(
'class' => 'solr.SnowballPorterFilterFactory',
'language' => $language,
'protected' => 'protwords_fr.txt',
);
break;
case 'Italian':
if (strpos($solr_version, '1.') !== 0) {
$filters[] = array(
'class' => 'solr.ElisionFilterFactory',
'ignoreCase' => 'true',
'articles' => 'contractions_it.txt',
);
}
$filters[] = array(
'class' => 'solr.SnowballPorterFilterFactory',
'language' => $language,
'protected' => 'protwords_it.txt',
);
break;
case 'Irish':
$filters[] = array(
'class' => 'solr.ElisionFilterFactory',
'ignoreCase' => 'true',
'articles' => 'contractions_ga.txt',
);
$filters[] = array(
'class' => 'solr.StopFilterFactory',
'ignoreCase' => 'true',
'words' => 'hyphenations_ga.txt',
'enablePositionIncrements' => 'false',
);
$filters[] = array(
'class' => 'solr.IrishLowerCaseFilterFactory',
);
$filters[] = array(
'class' => 'solr.SnowballPorterFilterFactory',
'language' => $language,
'protected' => 'protwords_ga.txt',
);
break;
case 'Turkish':
if (strpos($solr_version, '1.') !== 0) {
$filters[] = array(
'class' => 'solr.TurkishLowerCaseFilterFactory',
);
}
$filters[] = array(
'class' => 'solr.SnowballPorterFilterFactory',
'language' => $language,
'protected' => 'protwords_tr.txt',
);
break;
// SnowBallPorterFilterFactory only
case 'Basque':
case 'Danish':
case 'English':
case 'Finnish':
case 'German':
case 'Hungarian':
case 'Norwegian':
case 'Portuguese':
case 'Romanian':
case 'Russian':
case 'Spanish':
case 'Swedish':
$language_ids = array_flip(apachesolr_multilingual_confgen_get_stemmer());
$filters[] = array(
'class' => 'solr.SnowballPorterFilterFactory',
'language' => $language,
'protected' => 'protwords_' . $language_ids[$language] . '.txt',
);
break;
case 'German2':
$filters[] = array(
'class' => 'solr.GermanNormalizationFilterFactory',
);
$filters[] = array(
'class' => 'solr.SnowballPorterFilterFactory',
'language' => 'German2',
'protected' => 'protwords_de.txt',
);
break;
// no stemming
case 'Albanian':
case 'Armenian':
case 'Azerbaijani':
case 'Belarusian':
case 'Bosnian':
case 'Croatian':
case 'Estonian':
case 'Georgian':
case 'Icelandic':
case 'Kazakh':
case 'Kirghiz':
case 'Lithuanian':
case 'Macedonian':
case 'Moldovan':
case 'Mongolian':
case 'Montenegrin':
case 'Pashto':
case 'Romani':
case 'Ruthenian':
case 'Serbian':
case 'Slovak':
case 'Slovenian':
case 'Tajik':
case 'Turkmen':
case 'Ukrainian':
case 'Uzbek':
default:
break;
}
return $filters;
}
Functions
Name | Description |
---|---|
apachesolr_multilingual_confgen_modify_schema | @file Schema generator for multilingual search |
apachesolr_multilingual_confgen_modify_solrconfig | |
apachesolr_multilingual_get_stemming_filters | Returns best practice stemming filter chains for language specific stemming. |