You are here

function emvideo_archive_data in Embedded Media Field 6

hook emvideo_PROVIDER_data

Provides an array to be serialised and made available with $item elsewhere.

This data can be used to store any extraneous information available specifically to the archive provider.

1 call to emvideo_archive_data()
emvideo_archive_duration in contrib/emvideo/providers/archive.inc
hook emvideo_PROVIDER_duration($item) Returns the duration of the video in seconds.

File

contrib/emvideo/providers/archive.inc, line 159
This is an archive.org provider include file for Embedded Media Video.

Code

function emvideo_archive_data($field, $item, $error_field = '') {

  // Initialize the data array.
  $data = array();

  // Create some version control. Thus if we make changes to the data array
  // down the road, we can respect older content. If allowed by Embedded Media
  // Field, any older content will automatically update this array as needed.
  // In any case, you should account for the version if you increment it.
  $data['emvideo_archive_version'] = $data['emvideo_data_version'] = EMVIDEO_ARCHIVE_DATA_VERSION;

  // Add the thumbnail data.
  $data['thumbnail'] = 'http://www.archive.org/download/' . $item['value'] . '/format=Thumbnail?.jpg';

  // Get the path to media files and XML files.
  // This is real kludgy, but without a real API there's no other way.
  // First we need to get the user facing HTML page.
  $html_page = drupal_http_request('http://www.archive.org/details/' . $item['value']);
  if ($html_page->error || $html_page->code != 200) {
    form_set_error($error_field, 'The HTML page for the item at archive.org could not be retrieved: ', $html_page->code . ': ' . $html_page->error);
    return $data;
  }

  // Scrape this page and find the path to the data directory.
  // A regex expert would be able to do all this with a single regex statement, but that stuff is dark arts.
  $html_chunks = explode('All Files: ', $html_page->data);
  $html_chunks = explode('HTTP', $html_chunks[1]);
  $html_chunks = explode('href=', $html_chunks[0]);
  preg_match("/http:\\/\\/(.*)\"/", array_pop($html_chunks), $matches);
  if (empty($matches)) {
    form_set_error($error_field, 'The data directory for the item at archive.org could not be retrieved.');
    return $data;
  }
  $data_url = rtrim($matches[0], '"');

  // In this directory should be two XML files, one for the list of files, one for metadata.
  $xml_files_url = $data_url . '/' . $item['value'] . '_files.xml';
  $xml_meta_url = $data_url . '/' . $item['value'] . '_meta.xml';

  // Retreive the XML files.
  $xml_files = emfield_request_xml('archive', $xml_files_url, array(), TRUE, TRUE, $item['value'] . '_files');
  $xml_meta = emfield_request_xml('archive', $xml_meta_url, array(), TRUE, TRUE, $item['value'] . '_meta');
  if ($xml_meta['stat'] == 'error' || empty($xml_meta)) {
    drupal_set_message('Additional information about the item at archive.org could not be retrieved.  The video can still be displayed.');
  }
  else {
    $data['metadata'] = $xml_meta['METADATA'];
  }
  if ($xml_files['stat'] == 'error' || empty($xml_files)) {
    form_set_error($error_field, 'The list of files for the item at archive.org could not be retrieved.  The video can not be displayed.');
    return $data;
  }

  // There are a tonne of useless thumbnails in this list that we don't want.
  // We also need to sort through the available derivitives and choose the best one.
  $derivative_files = array(
    'mp4' => '',
    'ogv' => '',
    'mov' => '',
  );
  foreach ($xml_files as $file_name => $file_data) {
    $extension = substr($file_name, -3);
    if (array_key_exists($extension, $derivative_files)) {
      $derivative_files[$extension] = $file_name;
    }
  }
  foreach ($derivative_files as $file_name) {
    if ($file_name) {
      $data['url'] = 'http://www.archive.org/download/' . $item['value'] . '/' . $file_name;
      break;
    }
  }
  return $data;
}