You are here

function _s3fs_refresh_cache in S3 File System 7

Same name and namespace in other branches
  1. 7.3 s3fs.module \_s3fs_refresh_cache()
  2. 7.2 s3fs.module \_s3fs_refresh_cache()

Refreshes the metadata cache.

Iterates over the full list of objects in the S3 bucket (or only a subset, if the 'prefix' option is set), storing their metadata in the database.

It then creates the ancestor folders for those files, since folders are not normally stored as actual objects in S3.

Parameters

array $config: An s3fs configuration array.

4 calls to _s3fs_refresh_cache()
drush_s3fs_refresh_cache in ./s3fs.drush.inc
Refreshes the file metadata cache.
s3fs.test in tests/s3fs.test
s3fs_update_7000 in ./s3fs.install
Allow large filesize values in the S3 File Metadata cache.
_s3fs_refresh_cache_submit in ./s3fs.admin.inc
Submit callback for the "Refresh file metadata cache" button.

File

./s3fs.module, line 277
Sets up the S3fsStreamWrapper class to be used as a Drupal file system.

Code

function _s3fs_refresh_cache($config) {

  // Bomb out with an error if our configuration settings are invalid.
  if (!_s3fs_validate_config($config)) {
    form_set_error('s3fs_refresh_cache][refresh', t('Unable to validate S3 configuration settings.'));
    return;
  }
  $s3 = _s3fs_get_amazons3_client($config);

  // Set up the iterator that will loop over all the objects in the bucket.
  $file_metadata_list = array();
  $iterator_args = array(
    'Bucket' => $config['bucket'],
  );
  if (!empty($config['prefix'])) {

    // If the 'prefix' option has been set, retrieve from S3 only those files
    // whose keys begin with the prefix.
    $iterator_args['Prefix'] = $config['prefix'];
  }
  $iterator = $s3
    ->getListObjectVersionsIterator($iterator_args);

  // NOTE TO SELF: Changing the page size doesn't actually change how many
  // objects are pulled from S3 at a time. This line is here only as a
  // reminder that 1000 objects will be loaded at a time.
  $iterator
    ->setPageSize(1000);

  // The $folders array is an associative array keyed by folder names, which
  // is constructed as each filename is written to the DB. After all the files
  // are written, the folder names are converted to metadata and written.
  $folders = array();
  $existing_folders = db_select('s3fs_file', 's')
    ->fields('s', array(
    'uri',
  ))
    ->condition('dir', 1, '=');

  // If a prefix is set, only select folders which start with it.
  if (!empty($config['prefix'])) {
    $existing_folders = $existing_folders
      ->condition('uri', db_like("s3://{$config['prefix']}") . '%', 'LIKE');
  }
  foreach ($existing_folders
    ->execute()
    ->fetchCol(0) as $folder_uri) {
    $folders[$folder_uri] = TRUE;
  }

  // Create the temp table, into which all the refreshed data will be written.
  // After the full refresh is complete, the temp table will be swapped in.
  module_load_install('s3fs');
  $schema = s3fs_schema();
  try {
    db_create_table('s3fs_file_temp', $schema['s3fs_file']);

    // Like hook_schema(), db_create_table() ignores the 'collation' setting.
    $options = Database::getConnectionInfo('default');
    switch ($options['default']['driver']) {
      case 'pgsql':

        // Postgres uses binary collation by default
        break;
      case 'sqlite':

        // SQLite uses binary collation by default
        break;
      case 'mysql':

        // Set MySQL tables to use binary collation, to approximate case-sensitivity.
        db_query("ALTER TABLE {s3fs_file_temp} CONVERT TO CHARACTER SET utf8 COLLATE utf8_bin");
        break;
    }
  } catch (DatabaseSchemaObjectExistsException $e) {

    // The table already exists, so truncate it.
    db_truncate('s3fs_file_temp')
      ->execute();
  }

  // Set up an event listener to consume each page of results before the next
  // request is made.
  $dispatcher = $iterator
    ->getEventDispatcher();
  $dispatcher
    ->addListener('resource_iterator.before_send', function ($event) use (&$file_metadata_list, &$folders) {
    _s3fs_write_metadata($file_metadata_list, $folders);
  });
  foreach ($iterator as $s3_metadata) {
    $uri = "s3://{$s3_metadata['Key']}";
    if ($uri[strlen($uri) - 1] == '/') {

      // Treat objects in S3 whose filenames end in a '/' as folders.
      // But we don't store the '/' itself as part of the folder's metadata.
      $folders[rtrim($uri, '/')] = TRUE;
    }
    else {

      // Only store the metadata for the latest version of the file. Files
      // should be excluded only if IsLatest is set to FALSE.
      if (isset($s3_metadata['IsLatest']) && !$s3_metadata['IsLatest']) {
        continue;
      }

      // Files with no StorageClass are actually from the DeleteMarkers list,
      // rather then the Versions list. They represent a file which has been
      // deleted, so we don't cache them.
      if (!isset($s3_metadata['StorageClass'])) {
        continue;
      }

      // Buckets with Versioning disabled set all files' VersionIds to "null".
      // If we see that, unset VersionId to prevent "null" from being written
      // to the DB.
      if (isset($s3_metadata['VersionId']) && $s3_metadata['VersionId'] == 'null') {
        unset($s3_metadata['VersionId']);
      }
      $file_metadata_list[] = _s3fs_convert_metadata($uri, $s3_metadata);
    }
  }

  // Push the last page of metadata to the DB. The event listener doesn't fire
  // after the last page is done, so we have to do it manually.
  _s3fs_write_metadata($file_metadata_list, $folders);

  // Now that the $folders array contains all the ancestors of every file in
  // the cache, as well as the existing folders from before the refresh,
  // write those folders to the temp table.
  if ($folders) {
    $insert_query = db_insert('s3fs_file_temp')
      ->fields(array(
      'uri',
      'filesize',
      'timestamp',
      'dir',
      'mode',
      'uid',
      'version',
    ));
    foreach ($folders as $folder_uri => $ph) {

      // If it's set, exclude any folders which don't match the prefix.
      if (!empty($config['prefix']) && strpos($folder_uri, "s3://{$config['prefix']}") === FALSE) {
        continue;
      }
      $metadata = _s3fs_convert_metadata($folder_uri, array());
      $insert_query
        ->values($metadata);
    }

    // TODO: If this throws an integrity constraint violation, then the user's
    // S3 bucket has objects that represent folders using a different scheme
    // than the one we account for above. The best solution I can think of is
    // to convert any "files" in s3fs_file_temp which match an entry in the
    // $folders array (which would have been added in _s3fs_write_metadata())
    // to directories.
    $insert_query
      ->execute();
  }

  // We're done, so replace data in the real table with data from the temp table.
  if (empty($config['prefix'])) {

    // If this isn't a partial reresh, we can do a full table swap.
    db_rename_table('s3fs_file', 's3fs_file_old');
    db_rename_table('s3fs_file_temp', 's3fs_file');
    db_drop_table('s3fs_file_old');
  }
  else {

    // This is a partial refresh, so we can't just replace the s3fs_file table.
    // We wrap the whole thing in a transacation so that we can return the
    // database to its original state in case anything goes wrong.
    $transaction = db_transaction();
    try {
      $rows_to_copy = db_select('s3fs_file_temp', 's')
        ->fields('s', array(
        'uri',
        'filesize',
        'timestamp',
        'dir',
        'mode',
        'uid',
        'version',
      ));

      // Delete from s3fs_file only those rows which match the prefix.
      $delete_query = db_delete('s3fs_file')
        ->condition('uri', db_like("s3://{$config['prefix']}") . '%', 'LIKE')
        ->execute();

      // Copy the contents of s3fs_file_temp (which all have the prefix) into
      // s3fs_file (which was just cleared of all contents with the prefix).
      db_insert('s3fs_file')
        ->from($rows_to_copy)
        ->execute();
      db_drop_table('s3fs_file_temp');
    } catch (Exception $e) {
      $transaction
        ->rollback();
      watchdog_exception('S3 File System', $e);
      drupal_set_message(t('S3 File System cache refresh failed. Please see log messages for details.'), 'error');
      return;
    }

    // Destroying the transaction variable is the only way to explicitly commit.
    unset($transaction);
  }
  if (empty($config['prefix'])) {
    drupal_set_message(t('S3 File System cache refreshed.'));
  }
  else {
    drupal_set_message(t('Files in the S3 File System cache with prefix %prefix have been refreshed.', array(
      '%prefix' => $config['prefix'],
    )));
  }
}