search_files.module in Search Files 7.2
Same filename and directory in other branches
Organizes and provides helper functions for extracting text from files.
File
search_files.moduleView source
<?php
/**
* @file
* Organizes and provides helper functions for extracting text from files.
*/
/**
* Implements hook_help().
*/
function search_files_help($path, $arg) {
$output = '';
switch ($path) {
case 'admin/config/search/search_files/helpers/autodetect':
$output .= '<p>' . t('This will only work:') . '</p>';
$output .= '<ul>';
$output .= '<li>' . t('If PHP safemode is disabled (required by this module anyway)') . '</li>';
$output .= '<li>' . t('If your web server is running Unix (Linux, BSD, Solaris, ...), not Windows (required "which" tool missing)') . '</li>';
$output .= '<li>' . t('If the search PATH inside Apache/PHP environment is set to include the directories containing the helpers') . '</li>';
$output .= '</ul>';
break;
}
return $output;
}
/**
* Implements hook_menu().
*/
function search_files_menu() {
$items = array();
$items['admin/config/search/search_files'] = array(
'title' => 'Search Files',
'description' => 'Manage searching for files in attachments and directories',
'page callback' => 'search_files_overview',
'access arguments' => array(
'administer search_files configuration',
),
'type' => MENU_NORMAL_ITEM,
);
$items['admin/config/search/search_files/helpers'] = array(
'title' => 'Helpers',
'description' => 'List Search Files helper applications',
'page callback' => 'search_files_helper_list',
'access arguments' => array(
'administer search_files configuration',
),
'type' => MENU_NORMAL_ITEM,
'weight' => 0,
);
$items['admin/config/search/search_files/helpers/list'] = array(
'title' => 'List',
'type' => MENU_DEFAULT_LOCAL_TASK,
'weight' => 0,
);
$items['admin/config/search/search_files/helpers/add'] = array(
'title' => 'Add',
'description' => 'Add Search Files helper application',
'page callback' => 'search_files_edit',
'access arguments' => array(
'administer search_files configuration',
),
'type' => MENU_LOCAL_TASK,
'weight' => 1,
);
$items['admin/config/search/search_files/helpers/autodetect'] = array(
'title' => 'Autodetect',
'description' => 'Autodetect Search Files helper application',
'page callback' => 'drupal_get_form',
'page arguments' => array(
'search_files_helper_autodetect',
),
'access arguments' => array(
'administer search_files configuration',
),
'type' => MENU_LOCAL_TASK,
'weight' => 2,
);
$items['admin/config/search/search_files/helpers/edit'] = array(
'title' => 'Edit',
'description' => 'Edit Search Files helper application',
'page callback' => 'search_files_edit',
'access arguments' => array(
'administer search_files configuration',
),
'type' => MENU_CALLBACK,
);
$items['admin/config/search/search_files/helpers/delete'] = array(
'title' => 'Delete',
'description' => 'Delete Search Files helper application',
'page callback' => 'drupal_get_form',
'page arguments' => array(
'search_files_helper_confirm_delete_form',
),
'access arguments' => array(
'administer search_files configuration',
),
'type' => MENU_CALLBACK,
);
return $items;
}
/**
* Implements hook_permission().
*/
function search_files_permission() {
return array(
'administer search_files configuration' => array(
'title' => t('Administer Search Files configuration'),
),
'view search_files results' => array(
'title' => t('View Search Files results'),
),
);
}
/**
* Form callback for path admin/config/search/search_files/helpers/autodetect.
*/
function search_files_helper_autodetect($form, &$form_state) {
$form['submit'] = array(
'#type' => 'submit',
'#value' => t('Autodetect'),
);
return $form;
}
/**
* Submit callback for search_files_helper_autodetect().
*/
function search_files_helper_autodetect_submit($form, &$form_state) {
// load sample helper apps into database
search_files_install_auto_helper_app_configuration();
drupal_goto('admin/config/search/search_files/helpers/list');
}
/**
* Detects helper applications and adds to database.
*/
function search_files_install_auto_helper_app_configuration() {
// safe_mode will inhibit shell_exec()
if (search_files_issafemode()) {
drupal_set_message(t('Since PHP is running in safe mode we cannot detect all possible helpers and most will have to be added manually.'));
// load sample helper apps into database
search_files_helper_db_add(t("Portable Document Format"), "pdf", "/usr/bin/env pdftotext %file% -");
search_files_helper_db_add(t("Text"), "txt", "/usr/bin/env cat %file%");
}
else {
if (php_uname('s') == 'Darwin') {
// we are running on a mac so make sure the default install location are
// in the path
$path = 'export PATH=/opt/local/bin:/opt/local/sbin:$PATH;';
}
else {
$path = '';
}
// test for pdftotext
$location = trim(shell_exec('which pdftotext'));
$location = preg_replace("/^no .*\$/", "", $location);
if ($location) {
search_files_helper_db_add(t("Portable Document Format"), "pdf", $location . " %file% -");
drupal_set_message(t('Helper app pdftotext has been detected and configured'));
}
else {
$location = trim(shell_exec($path . 'which pstotext'));
$location = preg_replace("/^no .*\$/", "", $location);
if ($location) {
search_files_helper_db_add(t("PDF"), "pdf", $location . " %file%");
drupal_set_message(t('Helper app pstotext has been detected and configured'));
}
}
// test for cat
$location = trim(shell_exec($path . 'which cat'));
$location = preg_replace("/^no .*\$/", "", $location);
if ($location) {
search_files_helper_db_add(t("Text files"), "txt", $location . " %file%");
drupal_set_message(t('Helper app cat has been detected and configured'));
}
// test for catdoc
$location = trim(shell_exec($path . 'which catdoc'));
$location = preg_replace("/^no .*\$/", "", $location);
if ($location) {
search_files_helper_db_add(t("Word documents"), "doc", $location . " %file%");
drupal_set_message(t('Helper app catdoc has been detected and configured'));
}
// test for docx2txt
$location = trim(shell_exec($path . 'which docx2txt.pl'));
$location = preg_replace("/^no .*\$/", "", $location);
$perl_location = trim(shell_exec($path . 'which perl'));
$perl_location = preg_replace("/^no .*\$/", "", $perl_location);
if ($location && $perl_location) {
search_files_helper_db_add("Word 2007 files", "docx", $perl_location . ' ' . $location . " %file% -");
drupal_set_message(t('Helper app docx2txt has been detected and configured'));
}
// test for xls2csv
$location = trim(shell_exec($path . 'which xls2csv'));
$location = preg_replace("/^no .*\$/", "", $location);
if ($location) {
search_files_helper_db_add(t("Excel files"), "xls", $location . " %file%");
drupal_set_message(t('Helper app xls2csv has been detected and configured'));
}
// test for catppt
$location = trim(shell_exec($path . 'which catppt'));
$location = preg_replace("/^no .*\$/", "", $location);
if ($location) {
search_files_helper_db_add(t("Power Point Presentations"), "ppt", $location . " %file%");
drupal_set_message(t('Helper app catppt has been detected and configured'));
}
// test for unrtf
$location = trim(shell_exec($path . 'which unrtf'));
$location = preg_replace("/^no .*\$/", "", $location);
if ($location) {
search_files_helper_db_add(t("Rich Text Format files"), "rtf", $location . " %file%");
drupal_set_message(t('Helper app unrtf has been detected and configured'));
}
}
}
/**
* Page callback for admin/config/search/search_files path.
*/
function search_files_overview() {
$output = '';
$output .= t("Search Files in<ul><li>Attachments</li><li>Directories</li></ul> and extract their content for index and use with Drupal search.");
$output .= '<p>' . l(t('Set up helper applications'), 'admin/config/search/search_files/helpers') . '</p>';
$output .= '<p>' . l(t('Set up attachments search'), 'admin/config/search/search_files/attachments') . '</p>';
$output .= '<p>' . l(t('Set up directories search'), 'admin/config/search/search_files/directories') . '</p>';
return $output;
}
/**
* Returns an array of helper applications.
*
* @return
* Array whose keys are extensions and values are the paths to helper apps.
*/
function search_files_get_helpers() {
// Get all the registered helper applications and put them in static variable to elimiate unnecessary db queries
// in search_files_nodeapi(). The query log feature of the dev module pointed out that this query was done
// many times instead of once. Making $helpers a static variable reduced the number of queries by 25%.
static $helpers;
if (!isset($helpers)) {
$helpers = array();
$result = db_query("SELECT * FROM {search_files_helpers}")
->fetchAll();
foreach ($result as $helper) {
$helpers[$helper->extension] = $helper->helper_path;
}
}
return $helpers;
}
/**
* Checks whether we are running in PHP safe_mode.
*
* @return
* 1 if we're in safe mode, 0 if not, FALSE if there's an error.
*/
function search_files_issafemode() {
return preg_match('/(1|on)/i', @ini_get("safe_mode"));
}
/**
* Form callback for confirming deletion of a helper application.
*/
function search_files_helper_confirm_delete_form($form, &$form_state, $helper_id) {
$helper_id = intval($helper_id);
$helper_name = db_query("SELECT name FROM {search_files_helpers} WHERE id = :hid", array(
':hid' => $helper_id,
))
->fetchField();
$form['search_files_helper_id'] = array(
'#type' => 'value',
'#value' => $helper_id,
);
$form['search_files_helper_name'] = array(
'#type' => 'value',
'#value' => $helper_name,
);
return confirm_form($form, t('Delete %name helper?', array(
'%name' => $helper_name,
)), 'admin/config/search/search_files/helpers/list', t('Are you sure you want to delete the %name helper? The text extracted by this helper will remain in the search index until the directory is reindexed. This action cannot be undone.', array(
'%name' => $helper_name,
)), t('Delete'), t('Cancel'));
}
/**
* Submit callback for search_files_helper_confirm_delete_form().
*
* Removes the helper application from the database table.
*/
function search_files_helper_confirm_delete_form_submit($form, &$form_state) {
db_delete('search_files_helpers')
->condition('id', $form_state['values']['search_files_helper_id'])
->execute();
drupal_set_message(t('Helper has been deleted'));
drupal_goto('admin/config/search/search_files/helpers/list');
}
/**
* Returns an HTML table of the current helper apps set up in the system.
*/
function search_files_helper_list() {
$output = '';
$header = array(
t('Helper name'),
t('Extension'),
array(
'data' => t('Operations'),
'colspan' => '2',
),
);
$result = db_query("SELECT * FROM {search_files_helpers} ORDER BY extension")
->fetchAll();
$helpers = array();
foreach ($result as $helper) {
$helpers[] = array(
check_plain($helper->name),
check_plain($helper->extension),
l(t('edit'), 'admin/config/search/search_files/helpers/edit/' . $helper->id),
l(t('delete'), 'admin/config/search/search_files/helpers/delete/' . $helper->id),
);
}
$output .= theme('table', array(
'header' => $header,
'rows' => $helpers,
'empty' => t('No helpers are configured yet. Use the Add or Autodect tab to configure helper functions.'),
));
// safe_mode will inhibit shell_exec()
if (search_files_issafemode()) {
$output .= '<p>' . t('<b>WARNING!</b> This server has safe_mode enabled, which inhibits use of helper applications') . '</p>';
}
else {
$output .= '<p>' . t("Good. This server has safe_mode disabled, which allows use of helper applications.") . '</p>';
}
return $output;
}
/**
* Edits or adds a helper.
*/
function search_files_edit($hid = 0) {
if ($hid) {
$values = db_query('SELECT * FROM {search_files_helpers} WHERE id = :id', array(
':id' => $hid,
))
->fetchAssoc();
return drupal_get_form('search_files_helper_edit_form', $values);
}
return drupal_get_form('search_files_helper_edit_form');
}
/**
* Form callback for adding/editing a file helper.
*/
function search_files_helper_edit_form($form, &$form_state, $values = array()) {
$values += array(
'name' => t('Sample extractor'),
'extension' => 'foo',
'helper_path' => '/opt/bin/foo-extract %file%',
'id' => 0,
);
$form['search_files_name'] = array(
'#type' => 'textfield',
'#title' => t('Helper name'),
'#size' => 50,
'#maxlength' => 50,
'#default_value' => $values['name'],
'#required' => TRUE,
'#description' => t('A name for this helper configuration.'),
);
$form['search_files_extension'] = array(
'#type' => 'textfield',
'#title' => t('Extension'),
'#size' => 10,
'#maxlength' => 10,
'#default_value' => $values['extension'],
'#required' => TRUE,
'#description' => t('Enter the extension for the files that you want the helper application to process. Do not include the period.'),
);
$form['search_files_helper_path'] = array(
'#type' => 'textfield',
'#title' => t('Helper path'),
'#size' => 100,
'#maxlength' => 255,
'#default_value' => $values['helper_path'],
'#validate' => array(
'search_files_helpers_validate_add_edit' => array(),
),
'#required' => TRUE,
'#description' => t('Enter the path to the helper application installed on your server. "%file%" is a placeholder for the path of the attachment file and is required. Include any command-line parameters as well (for example, pdftotext requires a - after the file to be processed).'),
);
if ($values['id']) {
// editing existing
$form['search_files_id'] = array(
'#type' => 'value',
'#value' => $values['id'],
);
$form['submit_done'] = array(
'#type' => 'submit',
'#value' => t('Update'),
);
}
else {
// adding a new one
$form['submit_done'] = array(
'#type' => 'submit',
'#value' => t('Save and done'),
);
$form['submit'] = array(
'#type' => 'submit',
'#value' => t('Save and add another'),
);
$form['#validate'][] = 'search_files_helper_add_form_validate';
}
return $form;
}
/**
* Validate callback for search_files_helper_add_form field.
*/
function search_files_helpers_validate_add_edit($field) {
if (!preg_match('/%file%/', $field['#value'])) {
form_set_error($field['#title'], t('"%field" must contain the token %file%', array(
'%field' => $field['#title'],
)));
}
// Check to see if helper app can be found
$helper_file = preg_replace('/\\s.+$/', '', $field['#value']);
if (!file_exists($helper_file)) {
form_set_error($field['#title'], t("Can't find helper app %helper -- please verify it is installed.", array(
'%helper' => $helper_file,
)));
}
}
/**
* Updates the helper to the database.
*/
function search_files_helper_edit_form_submit($form, &$form_state) {
$id = 0;
if (isset($form_state['values']['search_files_id'])) {
$id = intval($form_state['values']['search_files_id']);
}
if ($id) {
$num = db_update('search_files_helpers')
->fields(array(
'name' => $form_state['values']['search_files_name'],
'extension' => $form_state['values']['search_files_extension'],
'helper_path' => $form_state['values']['search_files_helper_path'],
))
->condition('id', $id)
->execute();
if ($num) {
drupal_set_message(t('Helper app %helper_name has been updated', array(
'%helper_name' => $form_state['values']['search_files_name'],
)));
}
else {
drupal_set_message(t('An error occurred'));
}
drupal_goto('admin/config/search/search_files/helpers/list');
}
else {
$result = search_files_helper_db_add($form_state['values']['search_files_name'], $form_state['values']['search_files_extension'], $form_state['values']['search_files_helper_path']);
if ($result) {
drupal_set_message(t('%helper helper added', array(
'%helper' => $form_state['values']['search_files_name'],
)));
}
else {
drupal_set_message(t('An error occurred'));
}
if ($form_state['clicked_button']['#id'] == 'edit-submit-done') {
drupal_goto('admin/config/search/search_files/helpers/list');
}
}
}
/**
* Validates uniqueness of name and extension.
*/
function search_files_helper_add_form_validate($form, $form_state) {
$id = 0;
if (isset($form_state['values']['search_file_id'])) {
$id = intval($form_state['values']['search_file_id']);
}
$name = $form_state['values']['search_files_name'];
$sql = "SELECT * FROM {search_files_helpers} WHERE name = :name AND id <> :id";
$result = db_query($sql, array(
':name' => $name,
':id' => $id,
))
->fetchAll();
foreach ($result as $row) {
form_set_error("search_files_helper", t('Helper name already in list'));
break;
}
$extension = $form_state['values']['search_files_extension'];
$sql = "SELECT * FROM {search_files_helpers} WHERE extension = :ext AND id <> :id";
$result = db_query($sql, array(
':ext' => $extension,
':id' => $id,
))
->fetchAll();
foreach ($result as $row) {
form_set_error("search_files_helper", t('Extension already in list'));
break;
}
}
/**
* Inserts a helper into the database.
*/
function search_files_helper_db_add($name, $extension, $helper_path) {
// if there is already a helper for that extension we remove it first so we
// dont' end up with multiple helpers for the same extension
db_delete('search_files_helpers')
->condition('extension', $extension)
->execute();
return db_insert('search_files_helpers')
->fields(array(
'name' => $name,
'extension' => $extension,
'helper_path' => $helper_path,
))
->execute();
}
/**
* Converts text to UTF8 using the system's character encoding.
*/
function search_files_convert_to_utf8($text) {
$encoding = mb_detect_encoding($text);
$text = drupal_convert_to_utf8($text, $encoding);
return $text;
}
/**
* Returns an array of file helper extensions and names.
*
* @return
* Array whose keys are file extensions, and whose values are the
* helper names assigned to them.
*/
function search_files_get_file_extensions() {
$extensions = array();
$result = db_query("SELECT extension, name FROM {search_files_helpers}")
->fetchAll();
foreach ($result as $helper) {
$extensions[$helper->extension] = $helper->name;
}
return $extensions;
}
/**
* Returns the name of a helper for a given extension.
*
* @param $ext
* Extension to find name of.
*
* @return
* Name of extension. Returns nothing if not found.
*/
function search_files_helper_name($ext) {
$result = db_query("SELECT name FROM {search_files_helpers} WHERE extension = :ext", array(
':ext' => $ext,
))
->fetchField();
if ($result) {
return $result;
}
}
/**
* Updates the search totals for a given search type.
*
* Marks all words in the search index for the given type as "dirty", meaning
* their search totals should be rebuilt, and then causes the totals to be
* rebuilt.
*
* NOTE TO MAINTAINER OF THIS MODULE: You should not need to call the
* search_dirty() part of this function, because search_index() calls
* search_dirty() on all indexed content! But I've left it as-is for now...
* Probably you should just call search_update_totals().
*
* @param $type
* The search type (search module) to update.
*/
function search_files_update_totals($type) {
$result = db_query("SELECT data FROM {search_dataset} WHERE type = :type", array(
':type' => $type,
))
->fetchAll();
foreach ($result as $item) {
foreach (explode(" ", $item->data) as $word) {
search_dirty($word);
}
}
search_update_totals();
}
/**
* return the file text using the appropriate helper
*
* @param - $path (string) the system path for the file to read
*
* @return - the text in the file
*/
function search_files_get_content($path) {
$helpers = search_files_get_helpers();
$file_name = explode('/', $path);
$file_name = $file_name[count($file_name) - 1];
$file_extension = explode('.', $file_name);
$file_extension = $file_extension[count($file_extension) - 1];
$quoted_file_path = '"' . escapeshellcmd(realpath($path)) . '"';
$helper_command = preg_replace('/%file%/', $quoted_file_path, $helpers[$file_extension]);
//error_log('$helper_command: ' . $helper_command);
$descriptorspec = array(
0 => array(
"pipe",
"r",
),
// stdin is a pipe that the child will read from
1 => array(
"pipe",
"w",
),
// stdout is a pipe that the child will write to
2 => array(
"file",
"/tmp/error-output.txt",
"a",
),
);
$cwd = '/tmp';
$env = array(
'some_option' => 'aeiou',
);
$env = NULL;
$process = proc_open($helper_command, $descriptorspec, $pipes, $cwd, $env);
if (is_resource($process)) {
// $pipes now looks like this:
// 0 => writeable handle connected to child stdin
// 1 => readable handle connected to child stdout
// Any error output will be appended to /tmp/error-output.txt
$text = stream_get_contents($pipes[1]);
fclose($pipes[1]);
// It is important that you close any pipes before calling
// proc_close in order to avoid a deadlock
$return_value = proc_close($process);
}
//return 'file name: ' . $quoted_file_path . ', text: ' . search_files_convert_to_utf8($text);
return 'file name: ' . $quoted_file_path . ', text: ' . $text;
}
Functions
Name | Description |
---|---|
search_files_convert_to_utf8 | Converts text to UTF8 using the system's character encoding. |
search_files_edit | Edits or adds a helper. |
search_files_get_content | return the file text using the appropriate helper |
search_files_get_file_extensions | Returns an array of file helper extensions and names. |
search_files_get_helpers | Returns an array of helper applications. |
search_files_help | Implements hook_help(). |
search_files_helpers_validate_add_edit | Validate callback for search_files_helper_add_form field. |
search_files_helper_add_form_validate | Validates uniqueness of name and extension. |
search_files_helper_autodetect | Form callback for path admin/config/search/search_files/helpers/autodetect. |
search_files_helper_autodetect_submit | Submit callback for search_files_helper_autodetect(). |
search_files_helper_confirm_delete_form | Form callback for confirming deletion of a helper application. |
search_files_helper_confirm_delete_form_submit | Submit callback for search_files_helper_confirm_delete_form(). |
search_files_helper_db_add | Inserts a helper into the database. |
search_files_helper_edit_form | Form callback for adding/editing a file helper. |
search_files_helper_edit_form_submit | Updates the helper to the database. |
search_files_helper_list | Returns an HTML table of the current helper apps set up in the system. |
search_files_helper_name | Returns the name of a helper for a given extension. |
search_files_install_auto_helper_app_configuration | Detects helper applications and adds to database. |
search_files_issafemode | Checks whether we are running in PHP safe_mode. |
search_files_menu | Implements hook_menu(). |
search_files_overview | Page callback for admin/config/search/search_files path. |
search_files_permission | Implements hook_permission(). |
search_files_update_totals | Updates the search totals for a given search type. |