View source
<?php
class FeedsOAIHTTPBatch extends FeedsImportBatch {
protected $oai_endpoint_url;
protected $from_timestamp;
protected $until_timestamp;
public $repository;
public function __construct($oai_endpoint_url, $from_timestamp, $until_timestamp, $resumption_token, $set) {
$this->oai_endpoint_url = $oai_endpoint_url;
$this->from_timestamp = $from_timestamp;
$this->until_timestamp = $until_timestamp;
$this->set = $set;
$this->resumption_token = $resumption_token;
require_once drupal_get_path('module', 'feeds_oai_pmh') . '/feeds_oai_pmh.inc';
$repository = feeds_oai_pmh_identify($oai_endpoint_url);
$this->repository = $repository['repository'];
parent::__construct('');
}
public function getRaw() {
$url = $this->oai_endpoint_url;
$url .= '?verb=ListRecords';
if ($this->resumption_token) {
$url .= "&resumptionToken=" . rawurlencode($this->resumption_token);
}
else {
$url .= '&metadataPrefix=oai_dc';
if ($this->from_timestamp > 0) {
$url .= '&from=' . rawurlencode($this
->formatDate($this->from_timestamp));
}
if ($this->until_timestamp > 0) {
$url .= '&until=' . rawurlencode($this
->formatDate($this->until_timestamp));
}
if ($this->set && $this->set != '*') {
$url .= '&set=' . rawurlencode($this->set);
}
}
$result = drupal_http_request($url);
if ($result->code == 200) {
$resumption_token = '';
$ok = preg_match_all('/<resumptionToken.*?>([^<]+)<\\/resumptionToken>/s', $result->data, $matches);
if ($ok) {
$resumption_token = array_pop($matches[1]);
$this
->setLastDate(0);
}
else {
if ($this->until_timestamp > 0) {
$this
->setLastDate(0);
}
else {
$resumption_token = "";
$this
->setLastDate(time());
}
}
$this
->setResumptionToken($resumption_token);
}
else {
$msg = 'OAI-PMH request failed: @error';
$args = array(
'@error' => $result->error,
);
drupal_set_message(t($msg, $args), 'error');
watchdog('feeds_oai_pmh', $msg, $args, WATCHDOG_ERROR, $url);
return FALSE;
}
return $result->data;
}
protected function setResumptionToken($resumption_token) {
$this->resumption_token = $resumption_token;
variable_set('feeds_oai:resumptionToken:' . $this->set . ':' . $this->oai_endpoint_url, $resumption_token);
}
protected function setLastDate($timestamp) {
variable_set('feeds_oai:from:' . $this->set . ':' . $this->oai_endpoint_url, $timestamp);
}
protected function formatDate($timestamp) {
$granularity = $this->repository['granularity'];
if ('seconds' == $granularity) {
$date_format = 'Y-m-d\\TH:m:s\\Z';
}
elseif ('days' == $granularity) {
$date_format = 'Y-m-d';
}
return date($date_format, $timestamp);
}
}
class FeedsOAIHTTPFetcher extends FeedsHTTPFetcher {
public function fetch(FeedsSource $source) {
$source_config = $source
->getConfigFor($this);
$from_timestamp = FALSE;
$until_timestamp = FALSE;
$resumption_token = variable_get('feeds_oai:resumptionToken:' . $source_config['set'] . ':' . $source_config['source'], '');
if (!$resumption_token) {
if ($source_config['use_dates']) {
$from_timestamp = $this
->dateFieldToTimestamp($source_config['dates']['from']);
$until_timestamp = $this
->dateFieldToTimestamp($source_config['dates']['to']);
}
else {
$from_timestamp = (int) variable_get('feeds_oai:from:' . $source_config['set'] . ':' . $source_config['source'], FALSE);
if ($from_timestamp > 0) {
$from_timestamp = $from_timestamp + 1;
}
}
}
$set = $source_config['set'];
return new FeedsOAIHTTPBatch($source_config['source'], $from_timestamp, $until_timestamp, $resumption_token, $set);
}
public function configDefaults() {
return array(
'last_fetched_timestamp' => '',
'earliest_timestamp' => '',
'use_dates' => FALSE,
'to' => array(),
'from' => array(),
);
}
public function configForm(&$form_state) {
$form = array();
return $form;
}
public function sourceForm($source_config) {
ctools_include('dependent');
$form = parent::sourceForm($source_config);
if (isset($source_config['source']) && !empty($source_config['source'])) {
require_once drupal_get_path('module', 'feeds_oai_pmh') . '/feeds_oai_pmh.inc';
$result = feeds_oai_pmh_identify($source_config['source']);
if ($result['status'] == 0) {
$source_config = array_merge($source_config, $result['repository']);
}
else {
drupal_set_message(t('There was a problem fetching repository information: !list', array(
'!list' => $result['output'],
)));
}
}
if (isset($result)) {
$sets_options = feeds_oai_pmh_sets_options($result['repository']['sets']);
}
$form['source']['#title'] = t('URL of OAI-PMH endpoint');
$form['source']['#description'] = t('You can use services like http://www.opendoar.org/ to get a list of repository OAI-PMH endpoints.');
$form['source']['#ahah'] = array(
'path' => 'feeds_oai_pmh/set_ahah',
'wrapper' => 'ahah-element',
'method' => 'replace',
'effect' => 'fade',
'event' => 'change',
);
if ($form['source']['#default_value']) {
require_once drupal_get_path('module', 'feeds_oai_pmh') . '/feeds_oai_pmh.inc';
$result = feeds_oai_pmh_identify($form['source']['#default_value']);
if ($result['status'] == 0) {
$source_config = array_merge($source_config, $result['repository']);
}
else {
$sets_options = feeds_oai_pmh_sets_options($result['repository']['sets']);
}
}
$form['set'] = array(
'#type' => 'select',
'#title' => t('Set to fetch'),
'#default_value' => isset($source_config['set']) ? $source_config['set'] : NULL,
'#options' => isset($sets_options) ? $sets_options : array(),
'#ahah' => array(
'path' => 'feeds_oai_pmh/set_ahah',
'wrapper' => 'ahah-element',
'method' => 'replace',
'effect' => 'fade',
'event' => 'change',
),
);
if (isset($source_config['source']) && isset($source_config['set'])) {
$msg = feeds_oai_pmh_current_status_msg($source_config['source'], $source_config['set']);
if ($msg) {
$form['status'] = array(
'#value' => '<div class="messages status">' . $msg . '</div>',
);
}
}
$form['use_dates'] = array(
'#type' => 'checkbox',
'#title' => 'Limit fetch by record creation date',
'#default_value' => isset($source_config['use_dates']) ? $source_config['use_dates'] : NULL,
);
$form['dates'] = array(
'#type' => 'fieldset',
'#title' => t('Record creation dates to fetch'),
'#process' => array(
'ctools_dependent_process',
),
'#dependency' => array(
'edit-feeds-FeedsOAIHTTPFetcher-use-dates' => array(
1,
),
),
'#prefix' => '<div id="edit-feeds-FeedsOAIHTTPFetcher-dates-wrapper">',
'#suffix' => '</div></div>',
'#input' => TRUE,
);
if (isset($source_config['earliest_timestamp'])) {
$date = format_date($source_config['earliest_timestamp'], 'custom', 'M d, Y');
$form['dates']['#description'] = t('Note: earliest record reported by repository is @date', array(
'@date' => $date,
));
}
$form['dates']['from'] = array(
'#type' => 'date',
'#title' => t('Starting date'),
'#default_value' => isset($source_config['dates']['from']) ? $source_config['dates']['from'] : NULL,
);
$form['dates']['to'] = array(
'#type' => 'date',
'#title' => t('Ending date'),
'#default_value' => isset($source_config['dates']['to']) ? $source_config['dates']['to'] : NULL,
);
$form['restart'] = array(
'#type' => 'checkbox',
'#title' => t('Reset import for this repository/set to above settings'),
'#description' => t('This forces any imports that are currently underway
for the chosen repository/set to start over from the beginning.
Normally, all imports that have already begun will only try to fetch
new items until this option is checked, or if the "Delete items"
option is used.'),
);
return $form;
}
public function sourceFormValidate(&$values) {
if ($values['use_dates']) {
$from_timestamp = $this
->dateFieldToTimestamp($values['dates']['from']);
$until_timestamp = $this
->dateFieldToTimestamp($values['dates']['to']);
if ($from_timestamp > $until_timestamp) {
form_set_error('feeds][source', t('The ending date must be later than the starting date'));
}
}
if ($values['restart']) {
variable_del('feeds_oai:resumptionToken:' . $values['set'] . ':' . $values['source']);
variable_del('feeds_oai:from:' . $values['set'] . ':' . $values['source']);
unset($values['restart']);
drupal_set_message(t('Import for this repository/set has been reset, ignoring any previous imports.'));
}
}
protected function dateFieldToTimestamp($field_value) {
return mktime(NULL, NULL, NULL, $field_value['month'], $field_value['day'], $field_value['year']);
}
}