You are here

public function ImgTagToEmbedFilter::transform in Media Migration 8

Performs the associated process.

Parameters

mixed $value: The value to be transformed.

\Drupal\migrate\MigrateExecutableInterface $migrate_executable: The migration in which this process is being executed.

\Drupal\migrate\Row $row: The row from the source to process. Normally, just transforming the value is adequate but very rarely you might need to change two columns at the same time or something like that.

string $destination_property: The destination property currently worked on. This is only used together with the $row above.

Return value

string|array The newly transformed value.

Overrides ProcessPluginBase::transform

File

src/Plugin/migrate/process/ImgTagToEmbedFilter.php, line 85

Class

ImgTagToEmbedFilter
Transforms <img src="/files/cat.png"> tags to <drupal-media …>.

Namespace

Drupal\media_migration\Plugin\migrate\process

Code

public function transform($value, MigrateExecutableInterface $migrate_executable, Row $row, $destination_property) {
  $value_is_array = is_array($value);
  $text = (string) ($value_is_array ? $value['value'] : $value);
  if (strpos($text, '<img ') === FALSE) {
    return $value;
  }
  if (!MediaMigration::embedTokenDestinationFilterPluginIsValid($this->destinationFilterPluginId)) {
    return $value;
  }
  $source_plugin = $this->migration
    ->getSourcePlugin();
  if (!$source_plugin instanceof SqlBase) {
    return $value;
  }
  $probable_domain_names = $this
    ->getProbableDomainNames($source_plugin
    ->getDatabase());

  // Document why HTML5 instead of DomDocument.
  $html5 = new HTML5([
    'disable_html_ns' => TRUE,
  ]);

  // Compatibility for older HTML5 versions (e.g. in Drupal core 8.9.x).
  $dom_text = '<html><body>' . $text . '</body></html>';
  try {
    $dom = $html5
      ->parse($dom_text);
  } catch (\TypeError $e) {
    $text_stream = new StringInputStream($dom_text);
    $dom = $html5
      ->parse($text_stream);
  }
  $d7_file_public_path = $this
    ->variableGet($source_plugin
    ->getDatabase(), 'file_public_path', 'sites/default/files');
  $source_connection = $source_plugin
    ->getDatabase();
  $images = $dom
    ->getElementsByTagName('img');
  $images_count = $images->length;
  $skipped_images_count = 0;
  for ($i = 0; $i < $images_count; $i++) {
    $image = $images
      ->item($skipped_images_count);
    $src = rawurldecode($image
      ->getAttribute('src'));
    $url_parts = parse_url($src);
    $path = $url_parts['path'];

    // Support transforming absolute image URLs without knowing the source
    // site's domain name: validate that the correct public files path is
    // present in file URLs, and then look up the file by using the filename.
    if (strpos($path, '/' . $d7_file_public_path . '/') !== 0) {
      $skipped_images_count++;
      continue;
    }

    // Support transforming absolute image URLs without knowing the source
    // site's domain, but do not attempt to transform absolute URLs if we were
    // able to deduce probable domain names from watchdog log entries.
    if (isset($url_parts['host']) && !empty($probable_domain_names) && !in_array($url_parts['host'], $probable_domain_names)) {
      $skipped_images_count++;
      continue;
    }
    $escaped_file_path = preg_quote($d7_file_public_path, '/');
    $filesystem_location = preg_replace('/^\\/' . $escaped_file_path . '\\/(.*)$/', 'public://$1', $path);
    $file_id = FALSE;
    try {
      if ($source_connection
        ->schema()
        ->tableExists('file_managed')) {
        $file_id = $source_connection
          ->select('file_managed', 'fm')
          ->fields('fm', [
          'fid',
        ])
          ->condition('fm.uri', $filesystem_location)
          ->execute()
          ->fetchField();
      }
    } catch (\Exception $e) {
    }
    if ($file_id === FALSE) {

      // If no file was found, distinguish between absolute URLs and relative
      // URLs. The latter are definitely errors on the source site. The former
      // may be hotlinking or not; this is impossible to know without knowing
      // the source site's domain(s).
      $row_source_id_string = preg_replace('/\\s+/', ' ', Variable::export($row
        ->getSourceIdValues()));
      if (strpos($src, 'http') === 0 || strpos($src, '//') === 0) {
        $this->logger
          ->log(RfcLogLevel::INFO, sprintf("No file found for the absolute image URL in tag '%s' used in the '%s' migration's source row with source ID %s while processing the destination property '%s'.", $html5
          ->saveHTML($image), $this->migration
          ->id(), $row_source_id_string, $destination_property));
      }
      else {
        $this->logger
          ->log(RfcLogLevel::WARNING, sprintf("No file found for the relative image URL in tag '%s' used in the '%s' migration's source row with source ID %s while processing the destination property '%s'.", $html5
          ->saveHTML($image), $this->migration
          ->id(), $row_source_id_string, $destination_property));
      }
      $skipped_images_count++;
      continue;
    }

    // Delete the consumed attribute.
    $image
      ->removeAttribute('src');

    // Generate the <drupal-media> tag that will replace the <img> tag.
    $replacement_node = $this
      ->createEmbedNode($dom, $file_id);

    // Best-effort support for data-align.
    // @see \Drupal\filter\Plugin\Filter\FilterAlign
    // @see https://developer.mozilla.org/en-US/docs/Web/HTML/Element/Img#attr-align
    if ($image
      ->hasAttribute('align')) {
      $replacement_node
        ->setAttribute('data-align', $image
        ->getAttribute('align'));

      // Delete the consumed attribute.
      $image
        ->removeAttribute('align');
    }
    if ($image
      ->hasAttribute('style')) {
      $styles = explode(';', $image
        ->getAttribute('style'));
      foreach ($styles as $index => $style) {

        // We have to get the last value of a float style property definition,
        // so we must not have a break here, after the first match.
        if (preg_match('/;float\\s*\\:\\s*(left|right);/', ';' . trim($style) . ';', $matches)) {
          $replacement_node
            ->setAttribute('data-align', $matches[1]);
          unset($styles[$index]);
          $image
            ->setAttribute('style', implode(';', $styles));
        }
      }
    }

    // Best-effort support for data-caption.
    // @see \Drupal\filter\Plugin\Filter\FilterCaption
    // @see https://developer.mozilla.org/en-US/docs/Web/HTML/Element/figcaption
    $target_node = $image;
    if ($image->parentNode->tagName === 'figure') {
      $target_node = $image->parentNode;
      foreach ($image->parentNode->childNodes as $child) {
        if ($child instanceof \DOMElement && $child->tagName === 'figcaption') {
          $caption_html = $html5
            ->saveHTML($child->childNodes);
          $replacement_node
            ->setAttribute('data-caption', $caption_html);
          break;
        }
      }
    }

    // Retain all other attributes. Currently the media_embed filter
    // explicitly supports the `alt` and `title` attributes, but it may
    // support more attributes in the future. We avoid data loss and allow
    // contrib modules to add more filtering.
    // @see \Drupal\media\Plugin\Filter\MediaEmbed::applyPerEmbedMediaOverrides()
    foreach ($image->attributes as $attribute) {
      if ($attribute->name === 'style' && empty($attribute->value)) {
        continue;
      }
      $replacement_node
        ->setAttribute($attribute->name, $attribute->value);
    }
    $target_node->parentNode
      ->insertBefore($replacement_node, $target_node);
    $target_node->parentNode
      ->removeChild($target_node);
  }
  $result = $html5
    ->saveHTML($dom->documentElement->firstChild->childNodes);
  if ($value_is_array) {
    $value['value'] = $result;
  }
  else {
    $value = $result;
  }
  return $value;
}