You are here

protected function FuzzySearchService::convert in Fuzzy Search 7

Helper for converting data to be indexed.

1 call to FuzzySearchService::convert()
FuzzySearchService::indexItem in includes/service.inc
Helper method to index a given item.

File

includes/service.inc, line 515

Class

FuzzySearchService
Search service class using the database for storing index information.

Code

protected function convert($value, $type, $original_type, SearchApiIndex $index) {
  if (search_api_is_list_type($type)) {
    $type = substr($type, 5, -1);
    $original_type = search_api_extract_inner_type($original_type);
    $ret = array();
    if (is_array($value)) {
      foreach ($value as $v) {
        $v = $this
          ->convert($v, $type, $original_type, $index);
        $ret = array_merge($ret, is_array($v) ? $v : array(
          $v,
        ));
      }
    }
    return $ret;
  }
  switch ($type) {
    case 'text':
      $ret = array();
      foreach (preg_split('/[^\\p{L}\\p{N}]+/u', $value, -1, PREG_SPLIT_NO_EMPTY) as $v) {
        if ($v) {
          $ret[] = array(
            'value' => $v,
            'score' => 1.0,
          );
        }
      }
      $value = $ret;

    // FALL-THROUGH!
    case 'tokens':
      while (TRUE) {
        foreach ($value as $i => $v) {

          // Check for over-long tokens.
          $score = $v['score'];
          $v = $v['value'];
          if (drupal_strlen($v) > 50) {
            $words = preg_split('/[^\\p{L}\\p{N}]+/u', $v, -1, PREG_SPLIT_NO_EMPTY);
            if (count($words) > 1 && max(array_map('drupal_strlen', $words)) <= 50) {

              // Overlong token is due to bad tokenizing.
              // Check for "Tokenizer" preprocessor on index.
              if (empty($index->options['processors']['search_api_tokenizer']['status'])) {
                watchdog('search_api_db', 'An overlong word (more than 50 characters) was encountered while indexing, due to bad tokenizing. It is recommended to enable the "Tokenizer" preprocessor for indexes using database servers. Otherwise, the service class has to use its own, fixed tokenizing.', array(), WATCHDOG_WARNING);
              }
              else {
                watchdog('search_api_db', 'An overlong word (more than 50 characters) was encountered while indexing, due to bad tokenizing. Please check your settings for the "Tokenizer" preprocessor to ensure that data is tokenized correctly.', array(), WATCHDOG_WARNING);
              }
            }
            $tokens = array();
            foreach ($words as $word) {
              if (drupal_strlen($word) > 50) {
                watchdog('search_api_db', 'An overlong word (more than 50 characters) was encountered while indexing: %word.<br />Database search servers currently cannot index such words correctly – the word was therefore trimmed to the allowed length.', array(
                  '%word' => $word,
                ), WATCHDOG_WARNING);
                $word = drupal_substr($word, 0, 50);
              }
              $tokens[] = array(
                'value' => $word,
                'score' => $score,
              );
            }
            array_splice($value, $i, 1, $tokens);
            continue 2;
          }
        }
        break;
      }
      return $value;
    case 'string':
    case 'uri':

      // For non-dates, PHP can handle this well enough.
      if ($original_type == 'date') {
        return date('%c', $value);
      }
      if (drupal_strlen($value) > 255) {
        throw new SearchApiException(t("A string value longer than 255 characters was encountered. Such values currently aren't supported by the database backend."));
      }
      return $value;
    case 'integer':
    case 'duration':
    case 'decimal':
      return 0 + $value;
    case 'boolean':
      return $value ? 1 : 0;
    case 'date':
      if (is_numeric($value) || !$value) {
        return 0 + $value;
      }
      return strtotime($value);
    default:
      throw new SearchApiException(t('Unknown field type !type. Database search module might be out of sync with Search API.', array(
        '!type' => $type,
      )));
  }
}