InlineLinkExtractor.php in Printer and PDF versions for Drupal 8+ 8
File
src/LinkExtractor/InlineLinkExtractor.php
View source
<?php
namespace Drupal\printable\LinkExtractor;
use wa72\htmlpagedom\HtmlPageCrawler;
use Drupal\Core\Url;
use Drupal\Core\Render\MetadataBubblingUrlGenerator;
use Drupal\Core\Path\AliasManager;
class InlineLinkExtractor implements LinkExtractorInterface {
protected $crawler;
protected $urlGenerator;
protected $aliasMnager;
public function __construct(HtmlPageCrawler $crawler, MetadataBubblingUrlGenerator $urlGenerator, AliasManager $aliasMnager) {
$this->crawler = $crawler;
$this->urlGenerator = $urlGenerator;
$this->aliasMnager = $aliasMnager;
}
public function extract($string) {
$this->crawler
->addContent($string);
$this->crawler
->filter('a')
->each(function (HtmlPageCrawler $anchor, $uri) {
$href = $anchor
->attr('href');
if ($href) {
$url = $this
->urlFromHref($href);
$anchor
->append(' (' . $url
->toString() . ')');
}
});
return (string) $this->crawler;
}
public function removeAttribute($content, $attr) {
$this->crawler
->addContent($content);
$this->crawler
->filter('a')
->each(function (HtmlPageCrawler $anchor, $uri) {
$anchor
->removeAttribute('href');
});
return (string) $this->crawler;
}
public function listAttribute($content) {
$this->crawler
->addContent($content);
$this->links = [];
$this->crawler
->filter('a')
->each(function (HtmlPageCrawler $anchor, $uri) {
global $base_url;
$href = $anchor
->attr('href');
try {
$this->links[] = $base_url . $this->aliasMnager
->getAliasByPath($href);
} catch (\Exception $e) {
$this->links[] = $this
->urlFromHref($href)
->toString();
}
});
$this->crawler
->remove();
return implode(',', $this->links);
}
private function urlFromHref($href) {
try {
$url = Url::fromUri($href, [
'absolute' => TRUE,
]);
} catch (\InvalidArgumentException $e) {
$url = Url::fromUserInput($href, [
'absolute' => TRUE,
]);
}
return $url;
}
}