You are here

protected function GdprDumpGdprSqlDump::sanitizeData in General Data Protection Regulation 7

Go through the data and sanitize it.

1 call to GdprDumpGdprSqlDump::sanitizeData()
GdprDumpGdprSqlDump::prepare in modules/gdpr_dump/inc/GdprDumpGdprSqlDump.inc
Prepare the database for the dump.

File

modules/gdpr_dump/inc/GdprDumpGdprSqlDump.inc, line 186

Class

GdprDumpGdprSqlDump
Class GdprDumpGdprSqlDump.

Code

protected function sanitizeData() {

  /* @todo
   * Remote API call optimization:
   *   Prefetch the required amount of data from remote APIs.
   *   Maybe do it on a table level.
   */

  /** @var array $sanitationOptions */
  foreach ($this->gdprOptions as $table => $sanitationOptions) {
    if (\array_key_exists($table, $this->skipTables)) {
      continue;
    }
    $selectQuery = db_select($table);
    $selectQuery
      ->fields($table);
    $oldRows = $selectQuery
      ->execute();
    if (NULL === $oldRows) {

      // @todo: notify
      continue;
    }
    $clonedTable = self::GDPR_TABLE_PREFIX . $table;
    $describeQueryString = "DESCRIBE  `{$table}`";
    $describeQuery = db_query($describeQueryString);
    $tableColumns = $describeQuery
      ->fetchCol('Field');
    $insertQuery = db_insert($clonedTable);
    $insertQuery
      ->fields($tableColumns);
    $unique_data = [];
    $schema = drupal_get_schema($table);
    $uniq_keys = isset($schema['unique keys']) ? $schema['unique keys'] : [];
    $primary_key = isset($schema['primary key'][0]) ? $schema['primary key'][0] : '';
    while ($row = $oldRows
      ->fetchAssoc()) {
      foreach ($sanitationOptions as $column => $pluginId) {

        /* @todo
         * Maybe it would be better to use 'per table' sanitation,
         * so username, email, etc can be the same.
         * E.g myuser could have myuser@example.com as a mail, not
         * somethingelse@example.com
         *
         * @todo:
         * Also add a way to make exceptions
         * e.g option for 'don't alter uid 1 name', etc.
         */
        $plugin = gdpr_dump_get_sanitizer_plugins($pluginId);
        if (function_exists($plugin['sanitize callback'])) {
          $tries = 0;
          $unique_field = isset($uniq_keys[$column]) || $column == $primary_key;
          do {
            $is_valid = TRUE;
            $value = call_user_func($plugin['sanitize callback'], $row[$column]);
            if (isset($schema['fields'][$column]['length']) && strlen($value) > $schema['fields'][$column]['length']) {
              $value = truncate_utf8($value, $schema['fields'][$column]['length']);
            }
            if ($unique_field && in_array($value, $unique_data)) {
              $is_valid = FALSE;
            }
          } while (!$is_valid && $tries++ < 50);
          if ($tries >= 50) {
            drush_log("Too many tries for column '{$column}'", 'error');
            exit;
          }
          if ($unique_field) {
            $unique_data[] = $value;
          }
          $row[$column] = $value;
        }
      }
      $insertQuery
        ->values($row);
    }
    $insertQuery
      ->execute();
  }
}