You are here

function apachesolr_multilingual_confgen_get_stemmer in Apache Solr Multilingual 6.3

Same name and namespace in other branches
  1. 7 apachesolr_multilingual_confgen/apachesolr_multilingual_confgen.module \apachesolr_multilingual_confgen_get_stemmer()

Helper function that returns the name of a stemmer if available

Parameters

string: the language id

Return value

string the name of the stemmer

4 calls to apachesolr_multilingual_confgen_get_stemmer()
apachesolr_multilingual_confgen_admin_form in apachesolr_multilingual_confgen/apachesolr_multilingual_confgen.admin.inc
Implements hook_form().
apachesolr_multilingual_confgen_modify_schema in apachesolr_multilingual_confgen/apachesolr_multilingual_confgen.generator.inc
@file Schema generator for multilingual search
apachesolr_multilingual_confgen_variable_info_alter in apachesolr_multilingual_confgen/apachesolr_multilingual_confgen.module
apachesolr_multilingual_get_stemming_filters in apachesolr_multilingual_confgen/apachesolr_multilingual_confgen.generator.inc
Returns best practice stemming filter chains for language specific stemming.

File

apachesolr_multilingual_confgen/apachesolr_multilingual_confgen.module, line 95
Multilingual search using Apache Solr.

Code

function apachesolr_multilingual_confgen_get_stemmer($language_id = NULL, $solr_version = NULL) {
  $available_stemmers = array(
    'da' => 'Danish',
    'nl' => 'Dutch',
    'en' => 'English',
    'fi' => 'Finnish',
    'fr' => 'French',
    'de' => 'German',
    'it' => 'Italian',
    'nn' => 'Norwegian',
    'nb' => 'Norwegian',
    'pt-br' => 'Portuguese',
    'pt-pt' => 'Portuguese',
    'ro' => 'Romanian',
    'ru' => 'Russian',
    'es' => 'Spanish',
    'sv' => 'Swedish',
    'tr' => 'Turkish',
  );
  switch ($solr_version) {
    case '1.4.x':
      break;
    case '3.5.x':
    case '4.5.x':
    default:

      // add newer stemmers
      $available_stemmers += array(
        'ar' => 'Arabic',
        'eu' => 'Basque',
        'bg' => 'Bulgarian',
        'ca' => 'Catalan',
        'cs' => 'Czech',
        'en-min' => 'English (Minimal)',
        'en-new' => 'English (New)',
        'fr-lt' => 'French (Light)',
        'fr-min' => 'French (Minimal)',
        'de2' => 'German2',
        'de-lt' => 'German (Light)',
        'de-min' => 'German (Minimal)',
        'ga' => 'Irish',
        'gl' => 'Galician',
        'el' => 'Greek',
        'hi' => 'Hindi',
        'hu' => 'Hungarian',
        'hu-lt' => 'Hungarian (Light)',
        'id' => 'Indonesian',
        'id-lt' => 'Indonesian (Light)',
        'it-lt' => 'Italian (Light)',
        'lv' => 'Latvian',
        'nn-lt' => 'Norwegian (Light)',
        'nn-min' => 'Norwegian (Minimal)',
        'pl' => 'Polish',
        'pt-pt-lt' => 'Portuguese (Light)',
        'pt-pt-min' => 'Portuguese (Minimal)',
        'pt-pt-ag' => 'Portuguese (Aggressive)',
        'ru-lt' => 'Russian (Light)',
        'es-lt' => 'Spanish (Light)',
        'sv' => 'Swedish (Light)',
      );
      break;
  }
  if (is_null($language_id)) {
    return $available_stemmers;
  }
  elseif (!is_string($language_id)) {

    // Some language detection functions return FALSE in some cases.
    // Instead of forcing everyone to not call this function in such a case
    // we handle it gracefully here.
  }
  elseif (array_key_exists($language_id, $available_stemmers)) {
    return $available_stemmers[$language_id];
  }
  else {
    if (strpos($language_id, '-') === 2) {

      // try to find a fallback stemmer
      $language_id_fallback = substr($language_id, 0, 2);
      if (array_key_exists($language_id, $available_stemmers)) {
        return $available_stemmers[$language_id_fallback];
      }
    }
  }
  return '';
}