You are here

protected function HtmlLinkDetector::extractEntitiesFromText in Lingotek Translation 3.5.x

Same name and namespace in other branches
  1. 4.0.x src/Plugin/RelatedEntitiesDetector/HtmlLinkDetector.php \Drupal\lingotek\Plugin\RelatedEntitiesDetector\HtmlLinkDetector::extractEntitiesFromText()
  2. 3.6.x src/Plugin/RelatedEntitiesDetector/HtmlLinkDetector.php \Drupal\lingotek\Plugin\RelatedEntitiesDetector\HtmlLinkDetector::extractEntitiesFromText()
  3. 3.7.x src/Plugin/RelatedEntitiesDetector/HtmlLinkDetector.php \Drupal\lingotek\Plugin\RelatedEntitiesDetector\HtmlLinkDetector::extractEntitiesFromText()
  4. 3.8.x src/Plugin/RelatedEntitiesDetector/HtmlLinkDetector.php \Drupal\lingotek\Plugin\RelatedEntitiesDetector\HtmlLinkDetector::extractEntitiesFromText()

File

src/Plugin/RelatedEntitiesDetector/HtmlLinkDetector.php, line 96

Class

HtmlLinkDetector
@RelatedEntitiesDetector ( id = "html_link_detector", title = Plugin annotation @Translation("Get editor linked entities with html links"), description = @translation("Get editor linked entities with html links."), weight = 7, )

Namespace

Drupal\lingotek\Plugin\RelatedEntitiesDetector

Code

protected function extractEntitiesFromText($text) {

  // This method is adapted from \Drupal\entity_usage\Plugin\EntityUsage\Track\HtmlLink::parseEntitiesFromText().
  $dom = Html::load($text);
  $xpath = new \DOMXPath($dom);
  $entities = [];

  // Loop trough all the <a> elements that don't have the LinkIt attributes.
  $xpath_query = "//a[@href != '']";
  foreach ($xpath
    ->query($xpath_query) as $element) {

    /** @var \DOMElement $element */
    try {

      // Get the href value of the <a> element.
      $href = $element
        ->getAttribute('href');

      // Strip off the scheme and host, so we only get the path.
      $domain = $this->request
        ->getSchemeAndHttpHost() . $this->request
        ->getBasePath();
      if (($position = strpos($href, $domain)) === 0) {
        $href = str_replace($domain, '', $href);
      }
      $target_type = $target_id = NULL;

      // Check if the href links to an entity.
      $url = $this->pathValidator
        ->getUrlIfValidWithoutAccessCheck($href);
      if ($url && $url
        ->isRouted() && preg_match('/^entity\\./', $url
        ->getRouteName())) {

        // Ge the target entity type and ID.
        $route_parameters = $url
          ->getRouteParameters();
        $target_type = array_keys($route_parameters)[0];
        $target_id = $route_parameters[$target_type];
      }
      elseif (\preg_match('{^/?' . $this->publicFileDirectory . '/}', $href)) {

        // Check if we can map the link to a public file.
        $file_uri = preg_replace('{^/?' . $this->publicFileDirectory . '/}', 'public://', urldecode($href));
        $files = $this->entityTypeManager
          ->getStorage('file')
          ->loadByProperties([
          'uri' => $file_uri,
        ]);
        if ($files) {

          // File entity found.
          $target_type = 'file';
          $target_id = array_keys($files)[0];
        }
      }
      if ($target_type && $target_id) {
        $entity = $this->entityTypeManager
          ->getStorage($target_type)
          ->load($target_id);
        if ($entity) {
          if ($element
            ->hasAttribute('data-entity-uuid')) {

            // Normally the Linkit plugin handles when a element has this
            // attribute, but sometimes users may change the HREF manually and
            // leave behind the wrong UUID.
            $data_uuid = $element
              ->getAttribute('data-entity-uuid');

            // If the UUID is the same as found in HREF, then skip it because
            // it's LinkIt's job to register this usage.
            if ($data_uuid == $entity
              ->uuid()) {
              continue;
            }
          }
          $entities[$entity
            ->uuid()] = $target_type;
        }
      }
    } catch (\Exception $e) {

      // Do nothing.
    }
  }
  return $entities;
}