You are here

function _s3fs_refresh_cache in S3 File System 7.2

Same name and namespace in other branches
  1. 7.3 s3fs.module \_s3fs_refresh_cache()
  2. 7 s3fs.module \_s3fs_refresh_cache()

Refreshes the metadata cache.

Iterates over the full list of objects in the s3fs_root_folder within S3 bucket (or the entire bucket, if no root folder has been set), caching their metadata in the database.

It then caches the ancestor folders for those files, since folders are not normally stored as actual objects in S3.

Parameters

array $config: An s3fs configuration array.

4 calls to _s3fs_refresh_cache()
drush_s3fs_refresh_cache in ./s3fs.drush.inc
Refreshes the file metadata cache.
s3fs.test in tests/s3fs.test
s3fs_update_7000 in ./s3fs.install
Allow large filesize values in the S3 File Metadata cache.
_s3fs_refresh_cache_submit in ./s3fs.admin.inc
Submit callback for the "Refresh file metadata cache" button.

File

./s3fs.module, line 384
Hook implementations and other primary functionality for S3 File System.

Code

function _s3fs_refresh_cache($config) {

  // Bomb out with an error if our configuration settings are invalid.
  if (!_s3fs_validate_config($config)) {
    form_set_error('s3fs_refresh_cache][refresh', t('Unable to validate S3 configuration settings.'));
    return;
  }
  if (function_exists('drush_log')) {
    drush_log('Getting Amazon S3 client...');
  }
  $s3 = _s3fs_get_amazons3_client($config);

  // Set up the iterator that will loop over all the objects in the bucket.
  $file_metadata_list = array();
  $iterator_args = array(
    'Bucket' => $config['bucket'],
  );
  if (!empty($config['root_folder'])) {

    // If the root_folder option has been set, retrieve from S3 only those files
    // which reside in the root folder.
    $iterator_args['Prefix'] = "{$config['root_folder']}/";
  }

  // Determine if object versions should be included/excluded
  // as part of the ListObjects query
  if (_s3fs_get_setting('use_versioning')) {
    $iterator = $s3
      ->getIterator('ListObjectVersions', $iterator_args);
  }
  else {
    $iterator = $s3
      ->getIterator('ListObjects', $iterator_args);
  }

  // NOTE: Setting the maximum page size lower than 1000 will have no effect,
  // as stated by the API docs.
  $iterator
    ->setPageSize(1000);
  if (function_exists('drush_log')) {
    drush_log('Creating temporary tables...');
  }

  // Create the temp table, into which all the refreshed data will be written.
  // After the full refresh is complete, the temp table will be swapped with
  // the real one.
  module_load_install('s3fs');
  $schema = s3fs_schema();
  try {
    db_create_table('s3fs_file_temp', $schema['s3fs_file']);
  } catch (DatabaseSchemaObjectExistsException $e) {

    // The table already exists, so we can simply truncate it to start fresh.
    db_truncate('s3fs_file_temp')
      ->execute();
  }

  // Create temporary table for folders which will allow for duplicates.
  // Folders will be written at the same time as the file data is written,
  // then will be merged with the files at the end.
  try {
    $folder_schema = $schema['s3fs_file'];
    unset($folder_schema['primary key'], $folder_schema['indexes']);
    db_create_table('s3fs_folder_temp', $folder_schema);
    $options = Database::getConnectionInfo('default');
    switch ($options['default']['driver']) {
      case 'pgsql':
        break;
      case 'sqlite':
        break;
      case 'mysql':
        db_query('ALTER TABLE {s3fs_folder_temp} CONVERT TO CHARACTER SET utf8 COLLATE utf8_bin');
        break;
    }
  } catch (DatabaseSchemaObjectExistsException $e) {
    db_truncate('s3fs_folder_temp')
      ->execute();
  }
  $file_metadata_list = $folders = array();

  // Start by gathering all the existing folders. If we didn't do this, empty
  // folders would be lost, because they'd have no files from which to rebuild
  // themselves.
  $existing_folders = db_select('s3fs_file', 's')
    ->fields('s', array(
    'uri',
  ))
    ->condition('dir', 1, '=');
  $folder_counter = 0;
  foreach ($existing_folders
    ->execute()
    ->fetchCol(0) as $folder_uri) {
    $folders[$folder_uri] = TRUE;
    if ($folder_counter++ % 1000 == 0) {
      _s3fs_write_metadata($file_metadata_list, $folders);
    }
  }

  // Set up an event listener to consume each page of results before the next
  // request is made.
  $dispatcher = $iterator
    ->getEventDispatcher();
  $dispatcher
    ->addListener('resource_iterator.before_send', function ($event) use (&$file_metadata_list, &$folders) {
    _s3fs_write_metadata($file_metadata_list, $folders);
  });

  // Set up an event listener for post request debugging.
  if (function_exists('drush_log')) {
    $dispatcher
      ->addListener('resource_iterator.after_send', function ($event) use ($iterator) {
      drush_log(dt('Request #@request_count, @records record(s) recieved in total.', array(
        '@request_count' => $iterator
          ->getRequestCount(),
        '@records' => $iterator
          ->count(),
      )));
    });
  }
  foreach ($iterator as $s3_metadata) {
    $key = $s3_metadata['Key'];

    // The root folder is an impementation detail that only appears on S3.
    // Files' URIs are not aware of it, so we need to remove it beforehand.
    if (!empty($config['root_folder'])) {
      $key = str_replace("{$config['root_folder']}/", '', $key);
    }

    // Figure out the scheme based on the key's folder prefix.
    $public_folder_name = !empty($config['public_folder']) ? $config['public_folder'] : 's3fs-public';
    $private_folder_name = !empty($config['private_folder']) ? $config['private_folder'] : 's3fs-private';
    if (strpos($key, "{$public_folder_name}/") === 0) {

      // Much like the root folder, the public folder name must be removed from URIs.
      $key = str_replace("{$public_folder_name}/", '', $key);
      $uri = "public://{$key}";
    }
    elseif (strpos($key, "{$private_folder_name}/") === 0) {
      $key = str_replace("{$private_folder_name}/", '', $key);
      $uri = "private://{$key}";
    }
    else {

      // No special prefix means it's an s3:// file.
      $uri = "s3://{$key}";
    }
    $max_uri_length = $schema['s3fs_file']['fields']['uri']['length'];
    if (strlen($uri) >= $max_uri_length) {
      watchdog('s3fs', 'URI "@uri" is too long, ignoring', array(
        '@uri' => $uri,
      ), WATCHDOG_WARNING);
      continue;
    }
    if ($uri[strlen($uri) - 1] == '/') {

      // Treat objects in S3 whose filenames end in a '/' as folders.
      // But don't store the '/' itself as part of the folder's uri.
      $folders[rtrim($uri, '/')] = TRUE;
    }
    else {

      // Only store the metadata for the latest version of the file.
      if (isset($s3_metadata['IsLatest']) && !$s3_metadata['IsLatest']) {
        continue;
      }

      // Files with no StorageClass are actually from the DeleteMarkers list,
      // rather then the Versions list. They represent a file which has been
      // deleted, so don't cache them.
      if (!isset($s3_metadata['StorageClass'])) {
        continue;
      }

      // Buckets with Versioning disabled set all files' VersionIds to "null".
      // If we see that, unset VersionId to prevent "null" from being written
      // to the DB.
      if (isset($s3_metadata['VersionId']) && $s3_metadata['VersionId'] == 'null') {
        unset($s3_metadata['VersionId']);
      }
      $file_metadata_list[] = _s3fs_convert_metadata($uri, $s3_metadata);
    }
  }

  // The event listener doesn't fire after the last page is done, so we have
  // to write the last page of metadata manually.
  _s3fs_write_metadata($file_metadata_list, $folders);

  // Write folders.
  $query = db_select('s3fs_folder_temp')
    ->distinct();
  $query
    ->fields('s3fs_folder_temp');
  $folder_counter = 0;
  $result = $query
    ->execute();
  $insert_query = db_insert('s3fs_file_temp')
    ->fields(array(
    'uri',
    'filesize',
    'timestamp',
    'dir',
    'version',
  ));
  foreach ($result as $record) {
    $insert_query
      ->values((array) $record);

    // Flush every 1000 records.
    if ($folder_counter++ % 1000 == 0) {
      $insert_query
        ->execute();
    }
  }

  // Write any remainders.
  $insert_query
    ->execute();
  if (function_exists('drush_log')) {
    drush_log(dt('Flushed @folders folders to the file table.', array(
      '@folders' => $folder_counter,
    )));
  }

  // Cleanup.
  db_drop_table('s3fs_folder_temp');

  // Swap the temp table with the real table.
  db_rename_table('s3fs_file', 's3fs_file_old');
  db_rename_table('s3fs_file_temp', 's3fs_file');
  db_drop_table('s3fs_file_old');
  if (function_exists('drush_log')) {
    drush_log(dt('S3 File System cache refreshed.'));
  }
  else {
    drupal_set_message(t('S3 File System cache refreshed.'));
  }
}