You are here

function search_api_solr_update_8321 in Search API Solr 8.3

Same name and namespace in other branches
  1. 4.x search_api_solr.install \search_api_solr_update_8321()

Fix Chinese Field Types.

File

./search_api_solr.install, line 1529
Install, update and uninstall functions for the Search API Solr module.

Code

function search_api_solr_update_8321() {
  foreach (search_api_solr_update_helper_get_field_type_configs() as $field_type_name => $field_type_config) {
    if (!empty($field_type_config['field_type'])) {
      $save = FALSE;
      if ('zh-hans' === $field_type_config['field_type_language_code']) {
        $field_type_config['label'] = 'Simplified Chinese Text Field';
        foreach ($field_type_config['field_type'] as &$components) {
          if (is_array($components)) {
            foreach ($components as &$analyzers) {
              if ('org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer' === $analyzers['tokenizer']['class']) {
                $analyzers['tokenizer']['class'] = 'solr.HMMChineseTokenizerFactory';
                $analyzers['filters'] = [
                  [
                    'class' => 'solr.CJKWidthFilterFactory',
                  ],
                  [
                    'class' => 'solr.StopFilterFactory',
                    'words' => 'org/apache/lucene/analysis/cn/smart/stopwords.txt',
                  ],
                  [
                    'class' => 'solr.PorterStemFilterFactory',
                  ],
                  [
                    'class' => 'solr.LowerCaseFilterFactory',
                  ],
                ];
                $save = TRUE;
              }
            }
          }
        }
        if (!empty($field_type_config['spellcheck_field_type'])) {
          foreach ($field_type_config['spellcheck_field_type'] as &$analyzer) {
            if (is_array($analyzer)) {
              if ('org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer' === $analyzer['tokenizer']['class']) {
                $analyzer['tokenizer']['class'] = 'solr.HMMChineseTokenizerFactory';
                $analyzer['filters'] = [
                  [
                    'class' => 'solr.CJKWidthFilterFactory',
                  ],
                  [
                    'class' => 'solr.LowerCaseFilterFactory',
                  ],
                ];
                $save = TRUE;
              }
            }
          }
        }
        if (!isset($field_type_config['unstemmed_field_type'])) {
          $field_type_config['unstemmed_field_type'] = [
            'name' => 'text_unstemmed_zh_hans',
            'class' => 'solr.TextField',
            'positionIncrementGap' => 100,
            'analyzers' => [
              [
                'type' => 'index',
                'tokenizer' => [
                  'class' => 'solr.HMMChineseTokenizerFactory',
                ],
                'filters' => [
                  [
                    'class' => 'solr.CJKWidthFilterFactory',
                  ],
                  [
                    'class' => 'solr.StopFilterFactory',
                    'words' => 'org/apache/lucene/analysis/cn/smart/stopwords.txt',
                  ],
                  [
                    'class' => 'solr.LowerCaseFilterFactory',
                  ],
                ],
              ],
            ],
          ];
          $save = TRUE;
        }
      }
      elseif ('zh-hant' === $field_type_config['field_type_language_code']) {
        $field_type_config['label'] = 'Traditional Chinese Text Field';
        foreach ($field_type_config['field_type'] as &$components) {
          if (is_array($components)) {
            foreach ($components as &$analyzers) {
              if ('org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer' === $analyzers['tokenizer']['class']) {
                $analyzers['tokenizer']['class'] = 'solr.ICUTokenizerFactory';
                $analyzers['filters'] = [
                  [
                    'class' => 'solr.CJKBigramFilterFactory',
                    'han' => TRUE,
                    'hiragana' => FALSE,
                    'katakana' => FALSE,
                    'hangul' => FALSE,
                    'outputUnigrams' => FALSE,
                  ],
                  [
                    'class' => 'solr.CJKWidthFilterFactory',
                  ],
                  [
                    'class' => 'solr.LowerCaseFilterFactory',
                  ],
                ];
                $save = TRUE;
              }
            }
          }
        }
        if (!empty($field_type_config['spellcheck_field_type'])) {
          foreach ($field_type_config['spellcheck_field_type'] as &$analyzer) {
            if (is_array($analyzer)) {
              if ('org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer' === $analyzer['tokenizer']['class']) {
                $analyzer['tokenizer']['class'] = 'solr.ICUTokenizerFactory';
                $analyzer['filters'] = [
                  [
                    'class' => 'solr.CJKBigramFilterFactory',
                    'han' => TRUE,
                    'hiragana' => FALSE,
                    'katakana' => FALSE,
                    'hangul' => FALSE,
                    'outputUnigrams' => FALSE,
                  ],
                  [
                    'class' => 'solr.CJKWidthFilterFactory',
                  ],
                  [
                    'class' => 'solr.LowerCaseFilterFactory',
                  ],
                ];
                $save = TRUE;
              }
            }
          }
        }
      }
      if ($save) {
        search_api_solr_update_helper_save_field_type_config($field_type_name, $field_type_config);
      }
    }
  }
}