protected function SearchApiAttachmentsAlterSettings::getFileContent in Search API attachments 7
Extracts th file content.
Parameters
object $file: The file object.
Return value
string The extracted content.
10 calls to SearchApiAttachmentsAlterSettings::getFileContent()
- SearchApiAttachmentsAlterSettings::alterItems in includes/
callback_attachments_settings.inc - Alter items before indexing.
- SearchApiAttachmentsCommentAlterSettings::alterItems in contrib/
search_api_attachments_comment/ includes/ callback_attachments_comment_settings.inc - Alter items before indexing.
- SearchApiAttachmentsCommerceProductReferenceAlterSettings::alterItems in contrib/
search_api_attachments_commerce_product_reference/ includes/ callback_attachments_commerce_product_reference_settings.inc - Alter items before indexing.
- SearchApiAttachmentsEntityreferenceAlterSettings::getFilesContent in contrib/
search_api_attachments_entityreference/ includes/ callback_attachments_entityreference_settings.inc - Extracts and returns contents of files.
- SearchApiAttachmentsFieldCollectionsAlterSettings::alterItems in contrib/
search_api_attachments_field_collections/ includes/ callback_attachments_field_collections_settings.inc - Alter items before indexing.
File
- includes/
callback_attachments_settings.inc, line 268 - Search API data alteration callback.
Class
- SearchApiAttachmentsAlterSettings
- Indexes files content.
Code
protected function getFileContent($file) {
$extraction = FALSE;
// Let's make the variable consistent.
$file = (array) $file;
// Before running the (performance-intensive) extraction process, check
// if we already have a cached copy of the extracted data.
if (isset($file['fid'])) {
// Load cached extraction based off file ID.
$cid = 'cached_extraction_:' . $file['fid'];
$cached_extraction = cache_get($cid, self::CACHE_TABLE);
// If we have a cache hit, there really is no need to continue.
if (!empty($cached_extraction->data)) {
return $cached_extraction->data;
}
}
if (file_exists($file['uri'])) {
if (in_array($file['filemime'], $this
->textMimetypes())) {
$extraction = $this
->extractSimple($file);
}
elseif (in_array($file['filemime'], $this
->imageMimetypes())) {
$extraction = $this
->extractExif($file);
}
else {
$extraction_method = variable_get('search_api_attachments_extract_using', 'tika');
// Send the extraction request to the right place depending on the
// current setting.
if ($extraction_method == 'tika') {
$extraction = $this
->extractTika($file);
}
elseif ($extraction_method == 'tika_server') {
$extraction = $this
->extractTikaServer($file);
}
elseif ($extraction_method == 'python_pdf2txt') {
if (in_array($file['filemime'], $this
->pdfMimetypes())) {
$extraction = $this
->extractPythonPdf2txt($file);
}
elseif (variable_get('search_api_attachments_debug', FALSE)) {
watchdog('search_api_attachments', 'The python_pdf2txt extraction method does not support %mime_type', array(
'%mime_type' => $file['filemime'],
), WATCHDOG_WARNING);
}
}
elseif ($extraction_method == 'pdftotext') {
if (in_array($file['filemime'], $this
->pdfMimetypes())) {
$extraction = $this
->extractPdftotext($file);
}
elseif (variable_get('search_api_attachments_debug', FALSE)) {
watchdog('search_api_attachments', 'The pdftotext extraction method does not support %mime_type', array(
'%mime_type' => $file['filemime'],
), WATCHDOG_WARNING);
}
}
else {
$extraction = $this
->extractSolr($file);
}
}
}
else {
// Log the missing file information.
watchdog('search_api_attachments', "Couldn't index %filename content because this file was missing.", array(
'%filename' => $file['filename'],
));
}
// If we have actual extracted data, write it to the cache.
if ($extraction !== FALSE && isset($cid)) {
cache_set($cid, $extraction, self::CACHE_TABLE);
}
if (variable_get('search_api_attachments_debug', FALSE)) {
watchdog('search_api_attachments', "File: @filename\nExtraction: @extraction", array(
'@filename' => $file['uri'],
'@extraction' => $extraction,
), WATCHDOG_DEBUG);
}
return $extraction;
}