You are here

public function MediaMigrateCommands::duplicateImageDetection in Migrate File Entities to Media Entities 8

Find duplicate file entities.

@command migrate:duplicate-file-detection @aliases migrate-duplicate

@option check-existing-media Check for existing media

Parameters

$migration_name:

File

src/Commands/MediaMigrateCommands.php, line 236

Class

MediaMigrateCommands
Drush 9 commands for migrate_file_to_media.

Namespace

Drupal\migrate_file_to_media\Commands

Code

public function duplicateImageDetection($migration_name, $options = [
  'check-existing-media' => FALSE,
]) {
  $manager = $this->migrationPluginManager;
  $plugins = $manager
    ->createInstances([]);

  /** @var \Drupal\migrate\Plugin\Migration $migration_instance */
  $migration_instance = NULL;
  foreach ($plugins as $id => $migration) {
    if (in_array(mb_strtolower($id), [
      $migration_name,
    ])) {
      $migration_instance = $migration;
    }
  }

  // Force update
  $migration_instance
    ->getIdMap()
    ->prepareUpdate();

  // Use the migration source plugin to calculate the binary hash of
  // the related files only.
  $source = $migration_instance
    ->getSourcePlugin();
  $source
    ->rewind();
  while ($source
    ->valid()) {
    $row = $source
      ->current();

    // Support remote images.
    if (!$this
      ->isLocalUri($row
      ->getSourceProperty('file_path'))) {
      $file = File::create([
        'fid' => $row
          ->getSourceProperty('target_id'),
        'uri' => $row
          ->getSourceProperty('file_path'),
      ]);
    }
    else {

      /** @var \Drupal\file\Entity\File $file */
      $file = File::load($row
        ->getSourceProperty('target_id'));
    }
    if (!$file) {
      $source
        ->next();
      $this
        ->output()
        ->writeln(dt("File not found: Skipped binary hash for source {$row->getSourceProperty('target_id')}"));
      continue;
    }
    try {

      // Skip existing entries is command is run multiple times.
      $skip_processed = $this->connection
        ->select('migrate_file_to_media_mapping', 'map');
      $skip_processed
        ->fields('map');
      $skip_processed
        ->condition('fid', $file
        ->id(), '=');
      $skip_processed
        ->condition('migration_id', $migration_instance
        ->getPluginId(), '=');
      $skip_processed = $skip_processed
        ->execute()
        ->fetchObject();
      if (!empty($skip_processed)) {
        $this
          ->output()
          ->writeln(dt("File {$file->id()} already processed."));
        $source
          ->next();
        continue;
      }
      if (!empty($binary_hash = $this
        ->calculateBinaryHash($file))) {

        // Query for duplicates.
        $query = $this->connection
          ->select('migrate_file_to_media_mapping', 'map');
        $query
          ->fields('map');
        $query
          ->condition('binary_hash', $binary_hash, '=');
        $result = $query
          ->execute()
          ->fetchObject();
        $duplicate_fid = $file
          ->id();
        if ($result) {
          $existing_file = File::load($result->fid);
          if (!empty($existing_file)) {
            $duplicate_fid = $existing_file
              ->id();
            $this
              ->output()
              ->writeln("Duplicate found for file {$existing_file->id()}");
          }
        }
        $existing_media = NULL;

        // Check for existing media entities from previous migrations.
        if ($options['check-existing-media']) {

          // Check for an existing media entity.
          $query_media = $this->connection
            ->select('migrate_file_to_media_mapping_media', 'media');
          $query_media
            ->fields('media');
          $query_media
            ->condition('binary_hash', $binary_hash, '=');
          $existing_media = $query_media
            ->execute()
            ->fetchObject();
        }
        $this->connection
          ->insert('migrate_file_to_media_mapping')
          ->fields([
          'type' => 'image',
          'migration_id' => $migration_instance
            ->getPluginId(),
          'fid' => $file
            ->id(),
          'target_fid' => $duplicate_fid,
          'binary_hash' => $binary_hash,
          'media_id' => $existing_media ? $existing_media->entity_id : NULL,
        ])
          ->execute();
        $this
          ->output()
          ->writeln(dt("Added binary hash {$binary_hash} for file {$file->id()}"));
      }
      else {
        $this
          ->output()
          ->writeln(dt("File empty: Skipped binary hash for file {$file->id()}"));
      }
    } catch (\Exception $ex) {
      $this
        ->output()
        ->writeln(dt("File not found: Skipped binary hash for file {$file->id()}"));
    }
    $source
      ->next();
  }
}