You are here

function autotag_search_text in Taxonomy Autotagger 7.3

Same name and namespace in other branches
  1. 8.3 autotag.module \autotag_search_text()

Compare functions

1 call to autotag_search_text()
autotag_tag_suggestions in ./autotag.module
Callback as defined above in hook_tag_suggestion_info

File

./autotag.module, line 50
Implements the API on tag suggestion/autotagging modules as outlined at http://groups.drupal.org/node/100179

Code

function autotag_search_text($text, $vid, $tag_only_leaves) {
  $tags_to_return = array();

  /**
   * Discovered that the following only seems to work in PHP 5.2, FARP!
   * Thanks to
   * http://stackoverflow.com/questions/790596/split-a-text-into-single-words
   * $words_including_small = preg_split('/[\p{P}\s\{\}\[\]\(\)]/', strtolower($text), -1, PREG_SPLIT_NO_EMPTY);
   *
   * Note, we split the string only once, so we have to pass the body text
   * to the search functions
   */

  // Lower the text!
  $text = strtolower($text);
  $words = array_unique(preg_split('/[\\ `!"£$%^&*()_\\-+={\\[}\\]:;@\'~#<,>.?\\/|\\\\]/', $text, -1, PREG_SPLIT_NO_EMPTY));
  if (!count($words)) {
    return $tags_to_return;
  }

  // We have the words, we need to check
  // We create a temporary table with all the words in it, and then compare this
  // temporary table against the taxonomy_term_data table.
  // First, we create the temporary table
  $temporary_table_name = db_query_temporary('SELECT ? AS words', array(
    array_pop($words),
  ));

  // Now we insert the rest of the data into the table.  We also add an index
  // to ensure that searching the data is quick, and update the column to be
  // a varchar(255).  Note, this can actually fail, but it does not actually
  // matter.
  try {
    db_change_field($temporary_table_name, 'words', 'words', array(
      'type' => 'varchar',
      'length' => 255,
    ));
  } catch (Exception $e) {
  }
  try {
    db_add_primary_key($temporary_table_name, array(
      'words',
    ));
  } catch (Exception $e) {
  }
  $values = array();
  $insert = db_insert($temporary_table_name);
  $insert
    ->fields(array(
    'words',
  ));
  foreach ($words as $word) {
    if (strlen($word < 256)) {
      $insert
        ->values(array(
        'words' => $word,
      ));
    }
  }
  try {
    $insert
      ->execute();
  } catch (Exception $e) {

    // FIXME - This currently does not work with 4-byte characters.  I need to
    // change this code so that it does not use a temporary table.
    return array();
  }

  // Select the direct single word matches.
  $select = db_select('taxonomy_term_data', 't');
  $select
    ->condition('vid', $vid);
  $select
    ->join($temporary_table_name, 'a', 'a.words LIKE t.name');
  $select
    ->addField('t', 'tid');
  $select
    ->addField('t', 'name');
  if ($tag_only_leaves) {
    $notin = db_select('taxonomy_term_hierarchy', 'tth')
      ->fields('tth', array(
      'parent',
    ));
    $select
      ->condition('t.tid', $notin, 'NOT IN');
  }
  $results = $select
    ->execute();
  foreach ($results as $term) {
    $tags_to_return[] = taxonomy_term_load($term->tid);
  }

  // Next we need to get the multiple word terms
  $select = db_select('taxonomy_term_data', 't');
  $select
    ->condition('vid', $vid);
  $select
    ->condition('name', '% %', 'LIKE');
  $select
    ->addField('t', 'tid');
  $select
    ->addField('t', 'name');
  if ($tag_only_leaves) {
    $select
      ->condition('t.tid', $notin, 'NOT IN');
  }
  $results = $select
    ->execute();
  foreach ($results as $term) {
    if (strpos($text, strtolower($term->name)) !== FALSE) {
      $tags_to_return[] = taxonomy_term_load($term->tid);
    }
  }
  return $tags_to_return;
}