You are here

function feeds_oai_pmh_identify in Feeds OAI-PMH Fetcher and Parser 7

Same name and namespace in other branches
  1. 6 feeds_oai_pmh.inc \feeds_oai_pmh_identify()

Returns an array of information returned by the OAI-PMH Identify verb.

5 calls to feeds_oai_pmh_identify()
FeedsOAIHTTPFetcher::sourceForm in ./FeedsOAIHTTPFetcher.inc
Expose source form.
FeedsOAIHTTPFetcher::sourceFormValidate in ./FeedsOAIHTTPFetcher.inc
Override parent::sourceFormValidate().
FeedsOAIHTTPFetcherResult::__construct in ./FeedsOAIHTTPFetcher.inc
Constructor.
FeedsOAIParser::parse in ./FeedsOAIParser.inc
Implementation of FeedsParser::parse().
feeds_oai_pmh_ajax_callback in ./feeds_oai_pmh.module
Callback function for AJAX setSpec element in form.

File

./feeds_oai_pmh.inc, line 12

Code

function feeds_oai_pmh_identify($baseurl) {
  static $cache = array();
  if (isset($cache[$baseurl])) {
    return $cache[$baseurl];
  }

  // Use Drupal cache
  $cid = 'feeds_oai_pmh:' . str_replace('http://', '', $baseurl);
  if ($cached = cache_get($cid)) {

    // If cached data is not yet stale, return it.
    if ($cached->expire > REQUEST_TIME) {
      return $cached->data;
    }
  }
  $output = array();
  $url = "{$baseurl}?verb=Identify";
  $repository = array(
    'deleted_record' => '',
    'compression' => FALSE,
    'compression_gzip' => FALSE,
    'compression_deflate' => FALSE,
    'earliest_timestamp' => '',
    'sets' => array(),
  );
  $result = drupal_http_request($url);
  if ($result->code != 200) {
    $message = 'OAI repository %repo is not avaliable, please check the base URL %url is correct.';
    $args = array(
      '%repo' => $baseurl,
      '%url' => $baseurl,
    );
    watchdog('feeds_oai_pmh', $message, $args, WATCHDOG_ERROR);
    return array(
      'output' => t($message, $args),
      'status' => 1,
    );
  }

  // Returns FALSE on error
  $xml = @simplexml_load_string($result->data);
  if (!$xml) {
    $message = 'OAI repository %repo returns invalid XML upon identify.';
    $args = array(
      '%repo' => $baseurl,
    );
    watchdog('feeds_oai_pmh', $message, $args, WATCHDOG_ERROR);
    return array(
      'output' => t($message, $args),
      'status' => 1,
    );
  }
  $ident = $xml->Identify;

  // Things which must come back, or die
  // Protocool Version
  if ($ident->protocolVersion != '2.0') {
    $message = 'OAI repository %repo: Incorrect Identify Response -- Unsupported Protcool Version "@version"';
    $args = array(
      '%repo' => $baseurl,
      '@version' => $ident->protocolVersion,
    );
    watchdog('feeds_oai_pmh', $message, $args, WATCHDOG_ERROR);
    return array(
      'output' => t($message, $args),
      'status' => 1,
    );
  }
  else {
    $repository["protocol_version"] = (string) $ident->protocolVersion;
  }

  // DeleteRecord
  if (!isset($ident->deletedRecord)) {
    $message = 'OAI repository %repo: Incorrect Identify Response -- No deleteRecord';
    $args = array(
      '%repo' => $baseurl,
    );
    watchdog('feeds_oai_pmh', $message, $args, WATCHDOG_ERROR);
    return array(
      'output' => t($message, $args),
      'status' => 1,
    );
  }
  else {
    $repository['deleted_record'] = (string) $ident->deletedRecord;
  }

  // earliest Datestamp
  if (!isset($ident->earliestDatestamp)) {
    $message = 'OAI repository %repo: Incorrect Identify Response -- No earliest Datestamp';
    $args = array(
      '%repo' => $baseurl,
    );
    watchdog('feeds_oai_pmh', $message, $args, WATCHDOG_ERROR);
    return array(
      'output' => t($message, $args),
      'status' => 1,
    );
  }
  else {

    #$repository['earliest_datestamp'] = (string)$ident->earliestDatestamp;
    $repository['earliest_timestamp'] = strtotime((string) $ident->earliestDatestamp);
  }

  // Granularity
  if (!isset($ident->granularity)) {
    $message = 'OAI repository %repo: Incorrect Identify Response -- No Granularity';
    $args = array(
      '%repo' => $baseurl,
    );
    watchdog('feeds_oai_pmh', $message, $args, WATCHDOG_ERROR);
    return array(
      'output' => t($message, $args),
      'status' => 1,
    );
  }
  else {

    // Granularty is only in days
    // Magic number from strlen(YYYY-MM-DD)
    if (strlen($ident->granularity) == 10) {
      $repository['granularity'] = 'days';
    }
    elseif (strlen($ident->granularity) == 20) {
      $repository['granularity'] = 'seconds';
    }
    else {
      $message = 'OAI repository %repo: Incorrect Identify Response -- Invalid granularity';
      $args = array(
        '%repo' => $baseurl,
      );
      watchdog('feeds_oai_pmh', $message, '', WATCHDOG_ERROR);
      return array(
        'output' => t($message, $args),
        'status' => 1,
      );
    }
  }

  // Optional things, which are nice to have
  if (isset($ident->compression)) {

    // According to HTTP 1.1 RFC 2616 there is also the Lempel-Ziv-Welch
    // compression, which in theory could be supported. However, PHP doesn't
    // seem to play nice with it, and I haven't seen a repo with it. It is also
    // 14 years old.
    $repository['compression'] = TRUE;
    foreach ($ident->compression as $encoding) {
      if ($encoding == 'gzip') {
        $repository['compression_gzip'] = TRUE;
      }
      elseif ($encoding == 'deflate') {
        $repository['compression_deflate'] = TRUE;
      }
    }
  }

  // Get and assign sets information
  $sets = feeds_oai_pmh_get_sets($baseurl);
  if (is_array($sets)) {
    $repository['sets'] = $sets;
  }
  else {
    $message = 'OAI repository %repo: Could not get sets';
    $args = array(
      '%repo' => $baseurl,
    );
    watchdog('feeds_oai_pmh', $message, $args, WATCHDOG_ERROR);
    return array(
      'output' => t($message, $args),
      'status' => 1,
    );
  }
  $return = array(
    'output' => $output,
    'status' => 0,
    'repository' => $repository,
  );

  // Store in static cache
  $cache[$baseurl] = $return;

  // Cache in the DB for 24 hours
  cache_set($cid, $return, 'cache', time() + 3600 * 24);
  return $return;
}