class GdprSqlDump in General Data Protection Regulation 8
Same name and namespace in other branches
- 8.2 modules/gdpr_dump/src/Service/GdprSqlDump.php \Drupal\gdpr_dump\Service\GdprSqlDump
- 3.0.x modules/gdpr_dump/src/Service/GdprSqlDump.php \Drupal\gdpr_dump\Service\GdprSqlDump
Class GdprSqlDump.
@package Drupal\gdpr_dump\Service
Hierarchy
- class \Drupal\gdpr_dump\Service\GdprSqlDump
Expanded class hierarchy of GdprSqlDump
1 file declares its use of GdprSqlDump
- GdprSqlMysql.php in modules/gdpr_dump/ src/ Sql/ GdprSqlMysql.php 
1 string reference to 'GdprSqlDump'
- gdpr_dump.services.yml in modules/gdpr_dump/ gdpr_dump.services.yml 
- modules/gdpr_dump/gdpr_dump.services.yml
1 service uses GdprSqlDump
- gdpr_dump.sql_dump in modules/gdpr_dump/ gdpr_dump.services.yml 
- \Drupal\gdpr_dump\Service\GdprSqlDump
File
- modules/gdpr_dump/ src/ Service/ GdprSqlDump.php, line 37 
Namespace
Drupal\gdpr_dump\ServiceView source
class GdprSqlDump {
  const GDPR_TABLE_PREFIX = 'gdpr_clone_';
  /**
   * The GDPR table settings.
   *
   * @var array
   */
  protected $tablesToAnonymize = [];
  /**
   * The list of tables needed to be skipped.
   *
   * @var array
   */
  protected $tablesToSkip = [];
  /**
   * The database.
   *
   * @var \Drupal\Core\Database\Connection
   */
  protected $database;
  /**
   * GDPR database manager.
   *
   * @var \Drupal\gdpr_dump\Service\GdprDatabaseManager
   */
  protected $databaseManager;
  /**
   * The Sanitizer plugin factory.
   *
   * @var \Drupal\anonymizer\Anonymizer\AnonymizerFactory
   */
  protected $pluginFactory;
  /**
   * The database driver.
   *
   * E.g mysql, pgsql, sqlite.
   *
   * @var string
   */
  protected $driver;
  /**
   * GdprSqlDump constructor.
   *
   * @param \Drupal\Core\Config\ConfigFactoryInterface $configFactory
   *   Config factory.
   * @param \Drupal\Core\Database\Connection $database
   *   The database.
   * @param \Drupal\gdpr_dump\Service\GdprDatabaseManager $gdprDatabaseManager
   *   The GDPR database manager.
   * @param \Drupal\anonymizer\Anonymizer\AnonymizerFactory $pluginFactory
   *   The anonymizer plugin factory.
   */
  public function __construct(ConfigFactoryInterface $configFactory, Connection $database, GdprDatabaseManager $gdprDatabaseManager, AnonymizerFactory $pluginFactory) {
    $this->tablesToAnonymize = $configFactory
      ->get(SettingsForm::GDPR_DUMP_CONF_KEY)
      ->get('mapping');
    $this->tablesToSkip = $configFactory
      ->get(SettingsForm::GDPR_DUMP_CONF_KEY)
      ->get('empty_tables');
    $this->database = $database;
    $this->driver = $this->database
      ->driver();
    $this->databaseManager = $gdprDatabaseManager;
    $this->pluginFactory = $pluginFactory;
  }
  /**
   * Dump command.
   *
   * @throws \Drush\Sql\SqlException
   * @throws \InvalidArgumentException
   * @throws \Drupal\Core\Database\IntegrityConstraintViolationException
   * @throws \Drupal\Core\Database\DatabaseExceptionWrapper
   * @throws \Drupal\Core\Database\TransactionNoActiveException
   * @throws \Drupal\Core\Database\TransactionCommitFailedException
   * @throws \Exception
   */
  public function dump() {
    drush_sql_bootstrap_further();
    $sql = $this
      ->getInstance();
    $this
      ->prepare();
    $result = $sql
      ->dump(drush_get_option('result-file', FALSE));
    $this
      ->cleanup();
    return $result;
  }
  /**
   * Get a SqlBase instance according to dbSpecs.
   *
   * @param array $dbSpec
   *   If known, specify a $dbSpec that the class can operate with.
   *
   * @throws \Drush\Sql\SqlException
   *
   * @return \Drush\Sql\SqlBase
   *   The Sql instance.
   *
   * @see \drush_sql_get_class()
   */
  protected function getInstance(array $dbSpec = NULL) {
    $database = drush_get_option('database', 'default');
    $target = drush_get_option('target', 'default');
    // Try a few times to quickly get $dbSpec.
    if (!empty($dbSpec)) {
      if (!empty($dbSpec['driver'])) {
        // Try loading our implementation first.
        $instance = drush_get_class('\\Drupal\\gdpr_dump\\Sql\\GdprSql', [
          $dbSpec,
        ], [
          \ucfirst($dbSpec['driver']),
        ]);
        if (!empty($instance)) {
          return $instance;
        }
      }
    }
    elseif ($url = drush_get_option('db-url')) {
      $url = \is_array($url) ? $url[$database] : $url;
      $dbSpec = drush_convert_db_from_db_url($url);
      $dbSpec['db_prefix'] = drush_get_option('db-prefix');
      return $this
        ->getInstance($dbSpec);
    }
    elseif (($databases = drush_get_option('databases')) && \array_key_exists($database, $databases) && \array_key_exists($target, $databases[$database])) {
      $dbSpec = $databases[$database][$target];
      return $this
        ->getInstance($dbSpec);
    }
    else {
      // No parameter or options provided. Determine $dbSpec ourselves.
      /** @var \Drush\Sql\SqlVersion $sqlVersion */
      if ($sqlVersion = drush_sql_get_version()) {
        if ($dbSpec = $sqlVersion
          ->get_db_spec()) {
          return $this
            ->getInstance($dbSpec);
        }
      }
    }
    throw new SqlException('Unable to find a matching SQL Class. Drush cannot find your database connection details.');
  }
  /**
   * Creates a query string for cloning.
   *
   * @param string $originalTable
   *   The table name.
   *
   * @return string|null
   *   The query string.
   *
   * @throws \Exception
   */
  protected function createCloneQueryString($originalTable) {
    if (\array_key_exists($originalTable, $this->tablesToSkip)) {
      // No need to clone tables that are excluded.
      return NULL;
    }
    $clonedTable = self::GDPR_TABLE_PREFIX . $originalTable;
    switch ($this->driver) {
      case 'mysql':
        return "CREATE TABLE IF NOT EXISTS `{$clonedTable}` LIKE `{$originalTable}`;";
      /* @todo
       * - These seem to be the same.
       * - Test both.
       */
      case 'pgsql':
      case 'sqlite':
        // Maybe get the original SQL of the table and apply that:
        // SELECT sql FROM sqlite_master WHERE type='table' AND name='mytable'.
        return "CREATE TABLE IF NOT EXISTS `{$clonedTable}` AS SELECT * FROM `{$originalTable}` WHERE 1=2;";
      // These require a contrib module.
      case 'oracle':
        // @see: https://www.drupal.org/project/oracle
        break;
      case 'sqlsrv':
        // @see: https://www.drupal.org/project/sqlsrv
        break;
    }
    throw new SqlException("Unsupported database driver detected, can't clone table {$originalTable} for GDPR.");
  }
  /**
   * Creates table clones according to the config.
   *
   * @throws \Drupal\Core\Database\TransactionNoActiveException
   * @throws \Drupal\Core\Database\TransactionCommitFailedException
   * @throws \InvalidArgumentException
   * @throws \Drupal\Core\Database\IntegrityConstraintViolationException
   * @throws \Drupal\Core\Database\DatabaseExceptionWrapper
   * @throws \Exception
   */
  protected function createTableClones() {
    $tables = \array_keys($this->tablesToAnonymize);
    $transaction = $this->database
      ->startTransaction('gdpr_clone_tables');
    foreach ($tables as $table) {
      $queryString = $this
        ->createCloneQueryString($table);
      if (NULL === $queryString) {
        // @todo: Notify?
        continue;
      }
      try {
        if (drush_get_context('DRUSH_VERBOSE') || drush_get_context('DRUSH_SIMULATE')) {
          drush_print("Executing: '{$queryString}'", 0, STDERR);
        }
        $query = $this->database
          ->query($queryString);
        $query
          ->execute();
      } catch (\Exception $e) {
        drush_print("Error while cloning the '{$table}' table.");
        $transaction
          ->rollBack();
      }
    }
    $this->database
      ->popTransaction($transaction
      ->name());
  }
  /**
   * Go through the data and sanitize it.
   *
   * @throws \Exception
   */
  protected function sanitizeData() {
    /* @todo
     * Remote API call optimization:
     *   Prefetch the required amount of data from remote APIs.
     *   Maybe do it on a table level.
     */
    /** @var array $anonymizationOptions */
    foreach ($this->tablesToAnonymize as $table => $anonymizationOptions) {
      if (\array_key_exists($table, $this->tablesToSkip)) {
        continue;
      }
      $selectQuery = $this->database
        ->select($table);
      $selectQuery
        ->fields($table);
      $oldRows = $selectQuery
        ->execute();
      if (NULL === $oldRows) {
        // @todo: notify
        continue;
      }
      $clonedTable = self::GDPR_TABLE_PREFIX . $table;
      $tableColumns = $this->databaseManager
        ->fetchColumnNames($table);
      $insertQuery = $this->database
        ->insert($clonedTable);
      $insertQuery
        ->fields($tableColumns);
      $query = $this->database
        ->select('information_schema.columns', 'columns');
      $query
        ->fields('columns', [
        'COLUMN_NAME',
        'CHARACTER_MAXIMUM_LENGTH',
      ]);
      $query
        ->condition('TABLE_SCHEMA', $this->database
        ->getConnectionOptions()['database']);
      $query
        ->condition('TABLE_NAME', $table);
      $columnDetails = $query
        ->execute()
        ->fetchAllAssoc('COLUMN_NAME');
      while ($row = $oldRows
        ->fetchAssoc()) {
        foreach ($anonymizationOptions as $column => $pluginId) {
          /* @todo
           * Maybe it would be better to use 'per table' sanitation,
           * so username, email, etc can be the same.
           * E.g myuser could have myuser@example.com as a mail, not
           * somethingelse@example.com
           *
           * @todo:
           * Also add a way to make exceptions
           * e.g option for 'don't alter uid 1 name', etc.
           */
          $tries = 0;
          do {
            $isValid = TRUE;
            $value = $this->pluginFactory
              ->get($pluginId)
              ->anonymize($row[$column]);
            if (!empty($columnDetails[$column]->CHARACTER_MAXIMUM_LENGTH) && strlen($value) > $columnDetails[$column]->CHARACTER_MAXIMUM_LENGTH) {
              $isValid = FALSE;
            }
          } while (!$isValid && $tries++ < 50);
          if ($tries > 50) {
            throw new GdprDumpAnonymizationException("Too many retries for column '{$column}'.");
          }
          $row[$column] = $value;
        }
        $insertQuery
          ->values($row);
      }
      $insertQuery
        ->execute();
    }
  }
  /**
   * Prepare the database for the dump.
   *
   * @throws \InvalidArgumentException
   * @throws \Drupal\Core\Database\IntegrityConstraintViolationException
   * @throws \Drupal\Core\Database\DatabaseExceptionWrapper
   * @throws \Drupal\Core\Database\TransactionNoActiveException
   * @throws \Drupal\Core\Database\TransactionCommitFailedException
   * @throws \Exception
   */
  protected function prepare() {
    $this
      ->cleanup();
    $this
      ->buildTablesToSkip();
    $this
      ->createTableClones();
    $this
      ->sanitizeData();
  }
  /**
   * Builds tablesToSkip array.
   */
  protected function buildTablesToSkip() {
    // Get table expanded selection.
    $sql = $this
      ->getInstance();
    $table_selection = $sql
      ->get_expanded_table_selection();
    $tablesToSkip = \array_merge($table_selection['skip'], $table_selection['structure']);
    $tablesToSkip = \array_flip($tablesToSkip);
    $tablesToSkip = $tablesToSkip + $this->tablesToSkip;
    $this->tablesToSkip = $tablesToSkip;
  }
  /**
   * Cleanup the database after the dump.
   *
   * @throws \Drupal\Core\Database\TransactionNoActiveException
   * @throws \Drupal\Core\Database\TransactionCommitFailedException
   */
  protected function cleanup() {
    $transaction = $this->database
      ->startTransaction('gdpr_drop_table');
    foreach (\array_keys($this->tablesToAnonymize) as $table) {
      $gdprTable = self::GDPR_TABLE_PREFIX . $table;
      $this->database
        ->schema()
        ->dropTable($gdprTable);
    }
    $this->database
      ->popTransaction($transaction
      ->name());
  }
}Members
| Name   | Modifiers | Type | Description | Overrides | 
|---|---|---|---|---|
| GdprSqlDump:: | protected | property | The database. | |
| GdprSqlDump:: | protected | property | GDPR database manager. | |
| GdprSqlDump:: | protected | property | The database driver. | |
| GdprSqlDump:: | protected | property | The Sanitizer plugin factory. | |
| GdprSqlDump:: | protected | property | The GDPR table settings. | |
| GdprSqlDump:: | protected | property | The list of tables needed to be skipped. | |
| GdprSqlDump:: | protected | function | Builds tablesToSkip array. | |
| GdprSqlDump:: | protected | function | Cleanup the database after the dump. | |
| GdprSqlDump:: | protected | function | Creates a query string for cloning. | |
| GdprSqlDump:: | protected | function | Creates table clones according to the config. | |
| GdprSqlDump:: | public | function | Dump command. | |
| GdprSqlDump:: | constant | |||
| GdprSqlDump:: | protected | function | Get a SqlBase instance according to dbSpecs. | |
| GdprSqlDump:: | protected | function | Prepare the database for the dump. | |
| GdprSqlDump:: | protected | function | Go through the data and sanitize it. | |
| GdprSqlDump:: | public | function | GdprSqlDump constructor. | 
