You are here

search_files.module in Search Files 6.2

Same filename and directory in other branches
  1. 5 search_files.module
  2. 7.2 search_files.module

Used to index files in attachments and directories

File

search_files.module
View source
<?php

/**
 * @file
 * Used to index files in attachments and directories
 */

/**
 * Implementation of hook_menu().
 *
 * @return $items = array of menu items
 */
function search_files_menu() {
  $items = array();
  $items['admin/settings/search_files'] = array(
    'title' => 'Search Files',
    'description' => 'Manage searching for files in attachments and directories',
    'page callback' => 'search_files_overview',
    'access arguments' => array(
      'administer search_files configuration',
    ),
    'type' => MENU_NORMAL_ITEM,
  );
  $items['admin/settings/search_files/helpers'] = array(
    'title' => 'Helpers',
    'description' => 'List Search Files Helper Applications',
    'page callback' => 'search_files_helper_list',
    'access arguments' => array(
      'administer search_files configuration',
    ),
    'type' => MENU_NORMAL_ITEM,
    'weight' => 0,
  );
  $items['admin/settings/search_files/helpers/list'] = array(
    'title' => 'List',
    'description' => 'List Search Files Helper Applications',
    'page callback' => 'search_files_helper_list',
    'access arguments' => array(
      'administer search_files configuration',
    ),
    'type' => MENU_DEFAULT_LOCAL_TASK,
    'weight' => 0,
  );
  $items['admin/settings/search_files/helpers/add'] = array(
    'title' => 'Add',
    'description' => 'Add Search Files Helper Application',
    'page callback' => 'drupal_get_form',
    'page arguments' => array(
      'search_files_helper_add_form',
    ),
    'access arguments' => array(
      'administer search_files configuration',
    ),
    'type' => MENU_LOCAL_TASK,
    'weight' => 1,
  );
  $items['admin/settings/search_files/helpers/autodetect'] = array(
    'title' => 'Autodetect',
    'description' => 'Autodetect Search Files Helper Application',
    'page callback' => 'drupal_get_form',
    'page arguments' => array(
      'search_files_helper_autodetect',
    ),
    'access arguments' => array(
      'administer search_files configuration',
    ),
    'type' => MENU_LOCAL_TASK,
    'weight' => 2,
  );
  $items['admin/settings/search_files/helpers/edit'] = array(
    'title' => 'Edit',
    'description' => 'Edit Search Files Helper Application',
    'page callback' => 'drupal_get_form',
    'page arguments' => array(
      'search_files_helper_edit',
    ),
    'access arguments' => array(
      'administer search_files configuration',
    ),
    'type' => MENU_CALLBACK,
  );
  $items['admin/settings/search_files/helpers/delete'] = array(
    'title' => 'Delete',
    'description' => 'Delete Search Files Helper Application',
    'page callback' => 'search_files_helper_confirm_delete',
    'access arguments' => array(
      'administer search_files configuration',
    ),
    'type' => MENU_CALLBACK,
  );
  return $items;
}

/**
 * generate form to autodetect helper apps
 *
 * @return (array) $form
 */
function search_files_helper_autodetect() {
  $form['submit'] = array(
    '#type' => 'submit',
    '#value' => 'Autodetect',
  );
  return $form;
}
function search_files_help($path, $arg) {
  $output = '';
  switch ($path) {
    case 'admin/settings/search_files/helpers/autodetect':
      $output .= 'this will only work';
      $output .= '<ul>';
      $output .= '  <li>if PHP safemode is disabled (required by this module anyway)</li>';
      $output .= '  <li>if Server running Unix (Linux, BSD, Solaris, ...), not Windows (required "which" tool missing)</li>';
      $output .= '  <li>if search PATH inside Apache/PHP environment is set to include the directories containing the helpers</li>';
      $output .= '</ul>';
      break;
  }
  return $output;
}

/**
 * autodetect helper apps
 */
function search_files_helper_autodetect_submit($form, &$form_state) {

  // load sample helper apps into database
  search_files_install_auto_helper_app_configuration();
  drupal_goto('admin/settings/search_files/helpers/list');
}

/**
 * automatically detect helper apps and configure them
 *
 */
function search_files_install_auto_helper_app_configuration() {

  // safe_mode will inhibit shell_exec()
  if (search_files_issafemode()) {

    // load sample helper apps into database
    search_files_helper_db_add("PDF", "pdf", "/usr/bin/env pdftotext %file% -");
    search_files_helper_db_add("Text", "txt", "/usr/bin/env cat %file%");
  }
  else {

    // test for pdftotext
    $location = trim(shell_exec('which pdftotext'));
    $location = preg_replace("/^no .*\$/", "", $location);
    if ($location) {
      search_files_helper_db_add("PDF", "pdf", $location . " %file% -");
      drupal_set_message(t('Helper app pdftotext has been detected and configured'));
    }

    // test for cat
    $location = trim(shell_exec('which cat'));
    $location = preg_replace("/^no .*\$/", "", $location);
    if ($location) {
      search_files_helper_db_add("Text files", "txt", $location . " %file%");
      drupal_set_message(t('Helper app cat has been detected and configured'));
    }

    // test for catdoc
    $location = trim(shell_exec('which catdoc'));
    $location = preg_replace("/^no .*\$/", "", $location);
    if ($location) {
      search_files_helper_db_add("Word Documents", "doc", $location . " %file%");
      drupal_set_message(t('Helper app catdoc has been detected and configured'));
    }

    // test for docx2txt
    $location = trim(shell_exec('which docx2txt.pl'));
    $location = preg_replace("/^no .*\$/", "", $location);
    $perl_location = trim(shell_exec('which perl'));
    $perl_location = preg_replace("/^no .*\$/", "", $perl_location);
    if ($location && $perl_location) {
      search_files_helper_db_add("Word 2007 files", "docx", $perl_location . ' ' . $location . " %file% -");
      drupal_set_message(t('Helper app docx2txt has been detected and configured'));
    }

    // test for xls2csv
    $location = trim(shell_exec('which xls2csv'));
    $location = preg_replace("/^no .*\$/", "", $location);
    if ($location) {
      search_files_helper_db_add("Excel files", "xls", $location . " %file%");
      drupal_set_message(t('Helper app xls2csv has been detected and configured'));
    }

    // test for catppt
    $location = trim(shell_exec('which catppt'));
    $location = preg_replace("/^no .*\$/", "", $location);
    if ($location) {
      search_files_helper_db_add("Power Point Presentations", "ppt", $location . " %file%");
      drupal_set_message(t('Helper app catppt has been detected and configured'));
    }

    // test for unrtf
    $location = trim(shell_exec('which unrtf'));
    $location = preg_replace("/^no .*\$/", "", $location);
    if ($location) {
      search_files_helper_db_add("Rich Text Format files", "rtf", $location . " %file%");
      drupal_set_message(t('Helper app unrtf has been detected and configured'));
    }
  }
}
function search_files_overview() {
  $output = '';
  $output .= "\n    Search Files in\n    <ul>\n      <li>Attachments</li>\n      <li>Directories</li>\n    </ul>\n    and extract their content for index and use with Drupal search.\n  ";
  return $output;
}

/**
 * get an array of helper programs
 *
 * @return $helpers = array($helper->extension => $helper->helper_path);
 */
function search_files_get_helpers() {

  // Get all the registered helper applications and put them in static variable to elimiate unnecessary db queries
  // in search_files_nodeapi(). The query log feature of the dev module pointed out that this query was done
  // many times instead of once. Making $helpers a static variable reduced the number of queries by 25%.
  static $helpers;
  if (!isset($helpers)) {
    $helpers = array();
    $result = db_query("SELECT * FROM {search_files_helpers}");
    while ($helper = db_fetch_object($result)) {
      $helpers[$helper->extension] = $helper->helper_path;
    }
  }
  return $helpers;
}

/**
 * Check whether we run in PHP safe_mode
 */
function search_files_issafemode() {
  return preg_match('/(1|on)/i', @ini_get("safe_mode"));
}

/**
 * check to make sure the $helper_id is valid (a number greater
 * than zero) then it fetches the deletion confirmation form,
 * then returns it
 *
 * @return (array) $form
 */
function search_files_helper_confirm_delete() {
  $helper_id = arg(5);
  if ($helper_id > 0 && is_numeric($helper_id)) {
    return drupal_get_form('search_files_helper_confirm_delete_form', $helper_id);
  }
}

/**
 * generate the deletion confirmation form for helper apps and return the form
 *
 * @param unknown_type $form_state
 * @param (int) $helper_id
 * @return (array) $form
 */
function search_files_helper_confirm_delete_form(&$form_state, $helper_id) {
  $helper_name = db_result(db_query("SELECT name FROM {search_files_helpers} WHERE id = %d", $helper_id));
  $form = array();
  $form['search_files_helper_id'] = array(
    '#type' => 'hidden',
    '#value' => $helper_id,
  );
  $form['search_files_helper_name'] = array(
    '#type' => 'hidden',
    '#value' => $helper_name,
  );
  return confirm_form($form, t('Are you sure you want to delete the %name helper? The text extracted by this helper will remain in the search index until the directory is reindexed.', array(
    '%name' => $helper_name,
  )), 'admin/settings/search_files/helpers/list', t('This action cannot be undone.'), t('Delete'), t('Cancel'));
}

/**
 * remove row for the helper app from the search_files_helpers table
 */
function search_files_helper_confirm_delete_form_submit($form, &$form_state) {
  $result = db_query("DELETE FROM {search_files_helpers} WHERE id = %d", $form_state['values']['search_files_helper_id']);
  drupal_goto('admin/settings/search_files/helpers/list');
}

/**
 * display a themes table of the current helper apps set up in the system
 *
 * @return (string) html table
 */
function search_files_helper_list() {
  $output = '';
  $header = array(
    t('Helper name'),
    t('Extension'),
    array(
      'data' => t('Operations'),
      'colspan' => '3',
    ),
  );
  $sql = "\n    SELECT *\n    FROM {search_files_helpers}\n    ORDER BY extension\n  ";
  $result = db_query($sql);
  $helpers[] = array();
  $destination = drupal_get_destination();
  while ($helper = db_fetch_object($result)) {
    $helpers[] = array(
      $helper->name,
      $helper->extension,
      l(t('Edit'), 'admin/settings/search_files/helpers/edit/' . $helper->id),
      l(t('Delete'), 'admin/settings/search_files/helpers/delete/' . $helper->id),
      array(
        $destination,
      ),
    );
  }
  $output .= theme('table', $header, $helpers);

  // safe_mode will inhibit shell_exec()
  if (search_files_issafemode()) {
    $output .= t('<b>WARNING!</b> This server has safe_mode enabled, which inhibits use of helper applications');
  }
  else {
    $output .= t('Good. This server has safe_mode disabled, which allows use of helper applications');
  }
  $output .= "<br/>\n";
  return $output;
}

/**
 * Implementation of hook_perm().
 *
 * @return (array) permissions
 */
function search_files_perm() {
  return array(
    'administer search_files configuration',
    'view search_files results',
  );
}

/**
 * generate form to add a helper app to the system
 *
 * @return (array) $form
 */
function search_files_helper_add_form() {
  $form['search_files_name'] = array(
    '#type' => 'textfield',
    '#title' => t('Helper name'),
    '#size' => 50,
    '#maxlength' => 50,
    '#default_value' => 'Foo extractor',
    '#required' => TRUE,
    '#description' => t('A name for this helper configuration.'),
  );
  $form['search_files_extension'] = array(
    '#type' => 'textfield',
    '#title' => t('Extension'),
    '#size' => 10,
    '#maxlength' => 10,
    '#default_value' => 'foo',
    '#required' => TRUE,
    '#description' => t('Enter the extension for the files that you want the helper application to process. Do not include the period.'),
  );
  $form['search_files_helper_path'] = array(
    '#type' => 'textfield',
    '#title' => t('Helper path'),
    '#size' => 100,
    '#maxlength' => 255,
    '#default_value' => '/opt/bin/foo-extract %file%',
    '#validate' => array(
      'search_files_helpers_validate_add_edit' => array(),
    ),
    '#required' => TRUE,
    '#description' => t('Enter the path to the helper application installed on your server. "%file%" is a placeholder for the path of the attachment file and is required. Include any command-line parameters as well (for example, pdftotext requires a - after the file to be processed).'),
  );
  $form['submit_done'] = array(
    '#type' => 'submit',
    '#value' => 'Save and Done',
  );
  $form['submit'] = array(
    '#type' => 'submit',
    '#value' => 'Save add another',
  );
  return $form;
}

/**
 * validate the existance of the helper app supplied by the
 * helper_add or helper_edit form
 *
 * @param unknown_type $field
 */
function search_files_helpers_validate_add_edit($field) {
  if (!preg_match('/%file%/', $field['#value'])) {
    form_set_error($field['#title'], t('"%field" must contain the token %file%', array(
      '%field' => $field['#title'],
    )));
  }

  // Check to see if helper app can be found
  $helper_file = preg_replace('/\\s.+$/', '', $field['#value']);
  if (!file_exists($helper_file)) {
    form_set_error($field['#title'], t("Can't find helper app %helper -- please verify it is installed.", array(
      '%helper' => $helper_file,
    )));
  }
}

/**
 * fetch the helper add form, alter the form for use as an edit form,
 * populate the #default_values fields and return the form
 *
 * @return unknown
 */
function search_files_helper_edit() {
  $sql = "\n    SELECT *\n    FROM {search_files_helpers}\n    WHERE id = %d\n  ";
  $result = db_fetch_object(db_query($sql, arg(5)));
  $form = search_files_helper_add_form();
  $form['search_file_id'] = array(
    '#type' => 'value',
    '#value' => $result->id,
  );
  $form['search_files_helper_path']['#value'] = $result->helper_path;
  $form['search_files_extension']['#value'] = $result->extension;
  $form['search_files_name']['#value'] = $result->name;
  unset($form['submit_done']);
  $form['submit']['#value'] = 'Update';
  return $form;
}

/**
 * update the row in the table search_files_helpers for the given helper app
 */
function search_files_helper_edit_submit($form, &$form_state) {
  $sql = "\n    UPDATE {search_files_helpers}\n    SET\n      name = '%s',\n      extension = '%s',\n      helper_path = '%s'\n    WHERE id = %d\n  ";
  $result = db_query($sql, $form_state['clicked_button']['#post']['search_files_name'], $form_state['clicked_button']['#post']['search_files_extension'], $form_state['clicked_button']['#post']['search_files_helper_path'], $form_state['values']['search_file_id']);
  if ($result) {
    drupal_set_message(t('Helper app %helper_name has been updated', array(
      '%helper_name' => $form_state['values']['search_files_name'],
    )));
    drupal_goto('admin/settings/search_files/helpers/list');
  }
}

/**
 * validates uniqueness of directory
 */
function search_files_helper_add_form_validate($form, $form_state) {
  $name = $form_state['values']['search_files_name'];
  $sql = "SELECT * FROM {search_files_helpers} WHERE name = '%s'";
  $result = db_query($sql, $name);
  while ($row = db_fetch_object($result)) {
    form_set_error("search_files_helper", t('Helper name already in list'));
  }
  $extension = $form_state['values']['search_files_extension'];
  $sql = "SELECT * FROM {search_files_helpers} WHERE extension = '%s'";
  $result = db_query($sql, $extension);
  while ($row = db_fetch_object($result)) {
    form_set_error("search_files_helper", t('Extension already in list'));
  }
}

/**
 * insert a row in the search_files_helpers table for the helper
 * app given by the form
 */
function search_files_helper_add_form_submit($form, $form_state) {
  $result = search_files_helper_db_add($form_state['values']['search_files_name'], $form_state['values']['search_files_extension'], $form_state['values']['search_files_helper_path']);
  if ($result) {
    drupal_set_message(t('%helper helper added', array(
      '%helper' => $form_state['values']['search_files_name'],
    )));
  }
  if ($form_state['clicked_button']['#id'] == 'edit-submit-done') {
    drupal_goto('admin/settings/search_files/helpers/list');
  }
}

/**
 * insert a row in the search_files_helpers table for the helper app given
 */
function search_files_helper_db_add($name, $extension, $helper_path) {
  $sql = "INSERT INTO {search_files_helpers} (name, extension, helper_path) VALUES ('%s', '%s', '%s')";
  $result = db_query($sql, $name, $extension, $helper_path);
  return $result;
}

/**
 * Get the host system's character encoding and convert text from it to UTF-8,
 * Drupal's default HTTP and database character encoding.
 */
function search_files_convert_to_utf8($text) {
  $encoding = mb_detect_encoding($text);
  $text = drupal_convert_to_utf8($text, $encoding);
  return $text;
}

/**
 * format the result items before that are displayed
 *
 * @param (array) $item
 * @param (string) $type
 * @return (string) $output formatted string
 */
function search_files_search_item_format($item, $type) {

  //drupal_set_message('entry = <pre>'.var_export($item, true).'</pre>');
  $output = ' <dt class="title"><a href="' . check_url($item['link']) . '">' . check_plain($item['title']) . '</a></dt>';
  $info = array();
  if ($item['type']) {
    $info[] = check_plain($item['type']);
  }
  if ($item['user']) {

    // Add this here so the user name appears at the end of the output
    $item['extra'][] = $item['user'];
  }
  if ($item['date']) {
    $info[] = format_date($item['date'], 'small');
  }
  if (is_array($item['extra'])) {
    $info = array_merge($info, $item['extra']);
  }
  $output .= ' <dd>' . ($item['snippet'] ? '<p>' . $item['snippet'] . '</p>' : '') . '<p class="search-info">' . implode(' - ', $info) . '</p></dd>';
  return $output;
}

/**
 * returns an array of the file extensions with the extension as
 * the key and the name of the file type as the value, this data
 * is retrieved from the search_files_helpers table
 *
 * @return (array)
 */
function search_files_get_file_extensions() {
  $extensions = array();
  $result = db_query("SELECT extension, name FROM {search_files_helpers}");
  while ($helper = db_fetch_object($result)) {
    $extensions[$helper->extension] = $helper->name;
  }
  return $extensions;
}

/**
 * Get the name of a helper for the given extension.
 */
function search_files_helper_name($ext) {
  $result = db_query("SELECT name FROM {search_files_helpers} WHERE extension = '%s'", $ext);
  if ($helper = db_fetch_object($result)) {
    return $helper->name;
  }
}
function search_files_settings() {
  return '';
}

/**
 * search_files_update_totals($type)
 * marks all words of $type dirty, then updates search totals
 *
 * @param (string) $type
 */
function search_files_update_totals($type) {
  $result = db_query("SELECT data FROM {search_dataset} WHERE type = '%s'", $type);
  while ($obj = db_fetch_object($result)) {
    foreach (explode(" ", $obj->data) as $word) {
      search_dirty($word);
    }
  }
  search_update_totals();
}

Functions

Namesort descending Description
search_files_convert_to_utf8 Get the host system's character encoding and convert text from it to UTF-8, Drupal's default HTTP and database character encoding.
search_files_get_file_extensions returns an array of the file extensions with the extension as the key and the name of the file type as the value, this data is retrieved from the search_files_helpers table
search_files_get_helpers get an array of helper programs
search_files_help
search_files_helpers_validate_add_edit validate the existance of the helper app supplied by the helper_add or helper_edit form
search_files_helper_add_form generate form to add a helper app to the system
search_files_helper_add_form_submit insert a row in the search_files_helpers table for the helper app given by the form
search_files_helper_add_form_validate validates uniqueness of directory
search_files_helper_autodetect generate form to autodetect helper apps
search_files_helper_autodetect_submit autodetect helper apps
search_files_helper_confirm_delete check to make sure the $helper_id is valid (a number greater than zero) then it fetches the deletion confirmation form, then returns it
search_files_helper_confirm_delete_form generate the deletion confirmation form for helper apps and return the form
search_files_helper_confirm_delete_form_submit remove row for the helper app from the search_files_helpers table
search_files_helper_db_add insert a row in the search_files_helpers table for the helper app given
search_files_helper_edit fetch the helper add form, alter the form for use as an edit form, populate the #default_values fields and return the form
search_files_helper_edit_submit update the row in the table search_files_helpers for the given helper app
search_files_helper_list display a themes table of the current helper apps set up in the system
search_files_helper_name Get the name of a helper for the given extension.
search_files_install_auto_helper_app_configuration automatically detect helper apps and configure them
search_files_issafemode Check whether we run in PHP safe_mode
search_files_menu Implementation of hook_menu().
search_files_overview
search_files_perm Implementation of hook_perm().
search_files_search_item_format format the result items before that are displayed
search_files_settings
search_files_update_totals search_files_update_totals($type) marks all words of $type dirty, then updates search totals