You are here

class FeedsOAIHTTPBatch in Feeds OAI-PMH Fetcher and Parser 6

Definition of the import batch object created on the fetching stage by FeedsOAIHTTPFetcher.

Hierarchy

Expanded class hierarchy of FeedsOAIHTTPBatch

File

./FeedsOAIHTTPFetcher.inc, line 8

View source
class FeedsOAIHTTPBatch extends FeedsImportBatch {
  protected $oai_endpoint_url;
  protected $from_timestamp;
  protected $until_timestamp;
  public $repository;

  /**
   * Constructor.
   */
  public function __construct($oai_endpoint_url, $from_timestamp, $until_timestamp, $resumption_token, $set) {
    $this->oai_endpoint_url = $oai_endpoint_url;
    $this->from_timestamp = $from_timestamp;
    $this->until_timestamp = $until_timestamp;
    $this->set = $set;
    $this->resumption_token = $resumption_token;

    // Run identify request to fill this repository's information.
    require_once drupal_get_path('module', 'feeds_oai_pmh') . '/feeds_oai_pmh.inc';
    $repository = feeds_oai_pmh_identify($oai_endpoint_url);
    $this->repository = $repository['repository'];
    parent::__construct('');
  }

  /**
   * Implementation of FeedsImportBatch::getRaw();
   */
  public function getRaw() {

    // TODO: Move the URL building and data fetching to feeds_oai_pmh.inc
    // Build the request URL
    $url = $this->oai_endpoint_url;
    $url .= '?verb=ListRecords';
    if ($this->resumption_token) {
      $url .= "&resumptionToken=" . rawurlencode($this->resumption_token);
    }
    else {

      // When a resumptionToken is issued, there can't be any other arguments
      // in the request.
      $url .= '&metadataPrefix=oai_dc';
      if ($this->from_timestamp > 0) {
        $url .= '&from=' . rawurlencode($this
          ->formatDate($this->from_timestamp));
      }
      if ($this->until_timestamp > 0) {
        $url .= '&until=' . rawurlencode($this
          ->formatDate($this->until_timestamp));
      }
      if ($this->set && $this->set != '*') {
        $url .= '&set=' . rawurlencode($this->set);
      }
    }

    #dsm("URL for OAI request: $url");

    // Fetch the feed's contents
    $result = drupal_http_request($url);
    if ($result->code == 200) {
      $resumption_token = '';

      // TODO: Use simpleXML instead of regexp
      // Try to get resumptionToken. Example:
      // <resumptionToken completeListSize="478" cursor="0">0/300/478/oai_dc/eserev/null/null</resumptionToken>
      $ok = preg_match_all('/<resumptionToken.*?>([^<]+)<\\/resumptionToken>/s', $result->data, $matches);
      if ($ok) {
        $resumption_token = array_pop($matches[1]);

        #dsm("Resumption token: $resumption_token");
        $this
          ->setLastDate(0);
      }
      else {

        // No resumption token in response.
        if ($this->until_timestamp > 0) {

          // Since specific dates were requested, set the last date to 0.
          $this
            ->setLastDate(0);
        }
        else {

          // Store current system timestamp so next request limits items returned.
          $resumption_token = "";
          $this
            ->setLastDate(time());
        }
      }
      $this
        ->setResumptionToken($resumption_token);
    }
    else {

      // OAI fetch failed
      $msg = 'OAI-PMH request failed: @error';
      $args = array(
        '@error' => $result->error,
      );
      drupal_set_message(t($msg, $args), 'error');
      watchdog('feeds_oai_pmh', $msg, $args, WATCHDOG_ERROR, $url);
      return FALSE;
    }

    // Return the feed's contents
    return $result->data;
  }
  protected function setResumptionToken($resumption_token) {
    $this->resumption_token = $resumption_token;
    variable_set('feeds_oai:resumptionToken:' . $this->set . ':' . $this->oai_endpoint_url, $resumption_token);
  }
  protected function setLastDate($timestamp) {
    variable_set('feeds_oai:from:' . $this->set . ':' . $this->oai_endpoint_url, $timestamp);
  }
  protected function formatDate($timestamp) {
    $granularity = $this->repository['granularity'];
    if ('seconds' == $granularity) {
      $date_format = 'Y-m-d\\TH:m:s\\Z';
    }
    elseif ('days' == $granularity) {
      $date_format = 'Y-m-d';
    }
    return date($date_format, $timestamp);
  }

}

Members

Namesort descending Modifiers Type Description Overrides
FeedsBatch::$progress protected property
FeedsBatch::$total protected property
FeedsBatch::getProgress public function Report progress.
FeedsBatch::getTotal public function Get the total for a stage.
FeedsBatch::setProgress public function Set progress for a stage.
FeedsBatch::setTotal public function Set the total for a stage.
FeedsImportBatch::$created public property
FeedsImportBatch::$current_item protected property
FeedsImportBatch::$description public property
FeedsImportBatch::$feed_nid public property
FeedsImportBatch::$items public property
FeedsImportBatch::$link public property
FeedsImportBatch::$raw protected property
FeedsImportBatch::$title public property
FeedsImportBatch::$updated public property
FeedsImportBatch::addItem Deprecated public function Add an item.
FeedsImportBatch::currentItem public function
FeedsImportBatch::feedNode Deprecated public function
FeedsImportBatch::getDescription Deprecated public function
FeedsImportBatch::getFilePath public function 1
FeedsImportBatch::getItemCount Deprecated public function Get number of items.
FeedsImportBatch::getLink Deprecated public function
FeedsImportBatch::getTitle Deprecated public function
FeedsImportBatch::setDescription Deprecated public function Set description.
FeedsImportBatch::setItems Deprecated public function Set items.
FeedsImportBatch::setLink Deprecated public function Set link.
FeedsImportBatch::setTitle Deprecated public function Set title.
FeedsImportBatch::shiftItem public function @todo Move to a nextItem() based approach, not consuming the item array. Can only be done once we don't cache the entire batch object between page loads for batching anymore.
FeedsOAIHTTPBatch::$from_timestamp protected property
FeedsOAIHTTPBatch::$oai_endpoint_url protected property
FeedsOAIHTTPBatch::$repository public property
FeedsOAIHTTPBatch::$until_timestamp protected property
FeedsOAIHTTPBatch::formatDate protected function
FeedsOAIHTTPBatch::getRaw public function Implementation of FeedsImportBatch::getRaw(); Overrides FeedsImportBatch::getRaw
FeedsOAIHTTPBatch::setLastDate protected function
FeedsOAIHTTPBatch::setResumptionToken protected function
FeedsOAIHTTPBatch::__construct public function Constructor. Overrides FeedsImportBatch::__construct