public function SearchApiAttachmentsLinksAlterSettings::getLinkContent in Search API attachments 7
1 call to SearchApiAttachmentsLinksAlterSettings::getLinkContent()
- SearchApiAttachmentsLinksAlterSettings::alterItems in contrib/search_api_attachments_links/includes/callback_attachments_links_settings.inc
- Alter items before indexing.
File
- contrib/search_api_attachments_links/includes/callback_attachments_links_settings.inc, line 117
- Search API data alteration callback.
Class
- SearchApiAttachmentsLinksAlterSettings
- @file
Search API data alteration callback.
Code
public function getLinkContent($link) {
$extraction = FALSE;
if (isset($link['url'])) {
$cid = 'cached_extraction_:' . $link['url'];
$cached_extraction = cache_get($cid, self::CACHE_TABLE);
if (!empty($cached_extraction->data)) {
return $cached_extraction->data;
}
}
if ($headers = get_headers($link['url'], 1)) {
if ($headers['Content-Type'] == 'text/plain' || $headers['Content-Type'] == 'text/x-diff') {
$extraction = $this
->extract_simple($link);
}
elseif (in_array($headers['Content-Type'], array(
'image/jpeg',
'image/jpg',
'image/tiff',
))) {
$extraction = $this
->extract_exif($link);
}
else {
$extraction_method = variable_get('search_api_attachments_extract_using', 'tika');
if ($extraction_method == 'tika') {
$extraction = $this
->extract_tika($link);
}
elseif ($extraction_method == 'python_pdf2txt') {
if (in_array($headers['Content-Type'], $this
->pdf_mimetypes())) {
$extraction = $this
->extract_python_pdf2txt($link);
}
elseif (variable_get('search_api_attachments_debug', FALSE)) {
watchdog('search_api_attachments', 'The python_pdf2txt extraction method does not support %mime_type', array(
'%mime_type' => $headers['Content-Type'],
), WATCHDOG_WARNING);
}
}
elseif ($extraction_method == 'pdftotext') {
if (in_array($headers['Content-Type'], $this
->pdf_mimetypes())) {
$extraction = $this
->extract_pdftotext($link);
}
elseif (variable_get('search_api_attachments_debug', FALSE)) {
watchdog('search_api_attachments', 'The pdftotext extraction method does not support %mime_type', array(
'%mime_type' => $headers['Content-Type'],
), WATCHDOG_WARNING);
}
}
else {
$extraction = $this
->extract_solr($link);
}
}
}
else {
watchdog('search_api_attachments', "Couldn't index %filename content because this link was missing.", array(
'%filename' => $link['url'],
));
}
if ($extraction !== FALSE && isset($cid)) {
cache_set($cid, $extraction, self::CACHE_TABLE);
}
if (variable_get('search_api_attachments_debug', FALSE)) {
watchdog('search_api_attachments', "File: @filename\nExtraction: @extraction", array(
'@filename' => $link['url'],
'@extraction' => $extraction,
), WATCHDOG_DEBUG);
}
return $extraction;
}