function apachesolr_attachments_extract_using_solr in Apache Solr Attachments 7
Same name and namespace in other branches
- 6.3 apachesolr_attachments.index.inc \apachesolr_attachments_extract_using_solr()
- 6 apachesolr_attachments.admin.inc \apachesolr_attachments_extract_using_solr()
- 6.2 apachesolr_attachments.admin.inc \apachesolr_attachments_extract_using_solr()
For a file path, try to extract text using Solr 1.4+.
Throws
Exception
1 call to apachesolr_attachments_extract_using_solr()
- apachesolr_attachments_get_attachment_text in ./
apachesolr_attachments.index.inc - Parse the attachment getting just the raw text.
File
- ./
apachesolr_attachments.index.inc, line 154 - Indexing-related functions.
Code
function apachesolr_attachments_extract_using_solr($filepath) {
// Extract using Solr.
// We allow Solr to throw exceptions - they will be caught
// by apachesolr.module.
$env_id = apachesolr_default_environment();
$solr = apachesolr_get_solr($env_id);
$filename = basename($filepath);
$params = array(
'resource.name' => $filename,
'extractFormat' => 'text',
);
// Construct a multi-part form-data POST body in $data.
$boundary = '--' . hash('sha256', uniqid(REQUEST_TIME));
$data = "--{$boundary}\r\n";
// The 'filename' used here becomes the property name in the response.
$data .= 'Content-Disposition: form-data; name="file"; filename="extracted"';
$data .= "\r\nContent-Type: application/octet-stream\r\n\r\n";
$data .= file_get_contents($filepath);
$data .= "\r\n--{$boundary}--\r\n";
$headers = array(
'Content-Type' => 'multipart/form-data; boundary=' . $boundary,
);
$options = array(
'method' => 'POST',
'headers' => $headers,
'data' => $data,
);
$response = $solr
->makeServletRequest(EXTRACTING_SERVLET, $params, $options);
return array(
$response->extracted,
$response->extracted_metadata,
);
}