View source
<?php
function feeds_oai_pmh_identify($baseurl) {
static $cache = array();
if (isset($cache[$baseurl])) {
return $cache[$baseurl];
}
$cid = 'feeds_oai_pmh:' . str_replace('http://', '', $baseurl);
if ($cached = cache_get($cid)) {
if ($cached->expire > time()) {
return $cached->data;
}
}
$output = array();
$url = "{$baseurl}?verb=Identify";
$repository = array(
'deleted_record' => '',
'compression' => FALSE,
'compression_gzip' => FALSE,
'compression_deflate' => FALSE,
'earliest_timestamp' => '',
'sets' => array(),
);
$result = drupal_http_request($url);
if ($result->code != 200) {
$message = 'OAI repository %repo is not avaliable, please check the base URL %url is correct.';
$args = array(
'%repo' => $baseurl,
'%url' => $baseurl,
);
watchdog('feeds_oai_pmh', $message, $args, WATCHDOG_ERROR);
return array(
'output' => t($message, $args),
'status' => 1,
);
}
$xml = @simplexml_load_string($result->data);
if (!$xml) {
$message = t('OAI repository %repo returns invalid XML upon identify.', array(
'%repo' => $baseurl,
));
watchdog('feeds_oai_pmh', $message, '', WATCHDOG_ERROR);
return array(
'output' => $message,
'status' => 1,
);
}
$ident = $xml->Identify;
if ($ident->protocolVersion != '2.0') {
$message = t('OAI repository %repo: Incorrect Identify Response -- Unsupported Protcool Version "@version"', array(
'%repo' => $baseurl,
'@version' => $ident->protocolVersion,
));
watchdog('feeds_oai_pmh', $message, '', WATCHDOG_ERROR);
return array(
'output' => $message,
'status' => 1,
);
}
else {
$repository["protocol_version"] = (string) $ident->protocolVersion;
}
if (!isset($ident->deletedRecord)) {
$message = t('OAI repository %repo: Incorrect Identify Response -- No deleteRecord', array(
'%repo' => $baseurl,
));
watchdog('feeds_oai_pmh', $message, '', WATCHDOG_ERROR);
return array(
'output' => $message,
'status' => 1,
);
}
else {
$repository['deleted_record'] = (string) $ident->deletedRecord;
}
if (!isset($ident->earliestDatestamp)) {
$message = t('OAI repository %repo: Incorrect Identify Response -- No earliest Datestamp', array(
'%repo' => $baseurl,
));
watchdog('feeds_oai_pmh', $message, '', WATCHDOG_ERROR);
return array(
'output' => $message,
'status' => 1,
);
}
else {
$repository['earliest_timestamp'] = strtotime((string) $ident->earliestDatestamp);
}
if (!isset($ident->granularity)) {
$message = t('OAI repository %repo: Incorrect Identify Response -- No Granularity', array(
'%repo' => $baseurl,
));
watchdog('feeds_oai_pmh', $message, '', WATCHDOG_ERROR);
return array(
'output' => $message,
'status' => 1,
);
}
else {
if (strlen($ident->granularity) == 10) {
$repository['granularity'] = 'days';
}
elseif (strlen($ident->granularity) == 20) {
$repository['granularity'] = 'seconds';
}
else {
$message = t('OAI repository %repo: Incorrect Identify Response -- Invalid granularity', array(
'%repo' => $baseurl,
));
watchdog('feeds_oai_pmh', $message, '', WATCHDOG_ERROR);
return array(
'output' => $message,
'status' => 1,
);
}
}
if (isset($ident->compression)) {
$repository['compression'] = TRUE;
foreach ($ident->compression as $encoding) {
if ($encoding == 'gzip') {
$repository['compression_gzip'] = TRUE;
}
elseif ($encoding == 'deflate') {
$repository['compression_deflate'] = TRUE;
}
}
}
if ($sets = feeds_oai_pmh_get_sets($baseurl)) {
$repository['sets'] = $sets;
}
$return = array(
'output' => $output,
'status' => 0,
'repository' => $repository,
);
$cache[$baseurl] = $return;
cache_set($cid, $return, 'cache', time() + 3600 * 60);
return $return;
}
function feeds_oai_pmh_get_sets($baseurl) {
$sets = array();
$url = "{$baseurl}?verb=ListSets";
$result = drupal_http_request($url);
if ($result->code != 200) {
return FALSE;
}
$xml = simplexml_load_string($result->data);
if (!$xml) {
return FALSE;
}
if (isset($xml->error)) {
return FALSE;
}
foreach ($xml->ListSets->set as $set) {
$sets[(string) $set->setSpec]['name'] = (string) $set->setName;
if ($set->setDescription) {
$description = $set->setDescription
->asXML();
$description = preg_replace('/.*?<dc:description>([^<]+)<.dc:description.*/s', '\\1', $description);
$sets[(string) $set->setSpec]['description'] = $description;
}
}
return $sets;
}
function feeds_oai_pmh_parse($raw_xml) {
$items = array();
$xml = simplexml_load_string($raw_xml);
if (!$xml) {
return FALSE;
}
if (isset($xml->error)) {
return FALSE;
}
$record_request_base_url = (string) $xml->request . '?metadataPrefix=' . (string) $xml->request['metadataPrefix'];
foreach ($xml->ListRecords->record as $xml_item) {
if ($xml_item->header["status"] == "deleted") {
continue;
}
$xml_dc_metadata = $xml_item->metadata
->children('http://www.openarchives.org/OAI/2.0/oai_dc/')
->children('http://purl.org/dc/elements/1.1/');
$item = array(
'guid' => (string) $xml_item->header->identifier,
'datestamp' => strtotime((string) $xml_item->header->datestamp),
'title' => (string) $xml_dc_metadata->title,
);
$item['metadata_record_url'] = $record_request_base_url . '&verb=GetRecord&identifier=' . $item['guid'];
$set_spec_values = array();
foreach ($xml_item->header->setSpec as $value) {
$value = (string) $value;
$set_spec_values[$value] = $value;
}
$item['setspec_raw'] = array_values($set_spec_values);
$elements = array(
'creator',
'subject',
'description',
'publisher',
'contributor',
'date',
'type',
'format',
'identifier',
'source',
'language',
'relation',
'coverage',
'rights',
);
foreach ($elements as $element) {
if (isset($xml_dc_metadata->{$element})) {
$item[$element] = array();
foreach ($xml_dc_metadata->{$element} as $value) {
$value = (string) $value;
$item[$element][$value] = $value;
}
$item[$element] = array_values($item[$element]);
}
}
foreach ($item['identifier'] as $value) {
if (valid_url($value, TRUE)) {
$item['url'][] = $value;
}
}
$items[] = $item;
}
return array(
'items' => $items,
);
}