View source
<?php
namespace Drupal\minisite;
use Drupal\Component\Utility\UrlHelper;
use Drupal\minisite\Exception\PageProcessorException;
class PageProcessor implements PageProcessorInterface {
protected $document;
protected $urlBag;
public function __construct($content, UrlBag $url_bag) {
$this->urlBag = $url_bag;
$this->document = $this
->loadDocument($content);
}
public function process() {
$this
->processTagBase();
foreach ($this->document
->getElementsByTagName('a') as $item) {
$this
->processTagA($item);
}
foreach ($this->document
->getElementsByTagName('link') as $item) {
$this
->processTagLink($item);
}
foreach ($this->document
->getElementsByTagName('img') as $item) {
$this
->processTagImg($item);
}
foreach ($this->document
->getElementsByTagName('script') as $item) {
$this
->processTagScript($item);
}
foreach ($this->document
->getElementsByTagName('style') as $item) {
$this
->processTagStyle($item);
}
}
public static function urlIsDocumentFile($url) {
$regex = '/\\.(' . preg_replace('/ +/', '|', preg_quote(PageProcessorInterface::EXTENSIONS_NON_HTML_DOCUMENTS)) . ')$/i';
return (bool) preg_match($regex, $url);
}
public function content() {
return $this->document
->saveHTML();
}
protected function processTagBase() {
$base_tag = $this->document
->getElementsByTagName('base')
->item(0);
if ($base_tag) {
$this->document
->removeChild($base_tag);
}
}
protected function processTagA(\DOMNode $item) {
$url = $item
->getAttribute('href');
if (!$url) {
return;
}
if (UrlValidator::urlIsExternal($url)) {
return;
}
if (UrlValidator::urlIsRoot($url)) {
$item
->setAttribute('href', UrlValidator::rootToRelative($url, $this->urlBag
->getRootDir(), $this->urlBag
->getParentAlias()));
return;
}
if (UrlValidator::urlIsRelative($url) && self::urlIsDocumentFile($url)) {
$url = self::urlExtractPath($url);
$url = UrlValidator::relativeToRoot($url, $this->urlBag
->getAssetDir() . '/' . $this->urlBag
->getRootDir());
$item
->setAttribute('href', $url);
return;
}
}
protected function processTagLink(\DOMNode $item) {
$url = $item
->getAttribute('href');
if (!$url) {
return;
}
if (UrlValidator::urlIsExternal($url)) {
return;
}
if (UrlValidator::urlIsRoot($url)) {
$item
->setAttribute('href', UrlValidator::rootToRelative($url, $this->urlBag
->getRootDir(), $this->urlBag
->getParentAlias()));
return;
}
$url = self::urlExtractPath($url);
$url = UrlValidator::relativeToRoot($url, $this->urlBag
->getAssetDir() . '/' . $this->urlBag
->getRootDir());
$item
->setAttribute('href', $url);
}
protected function processTagScript(\DOMNode $item) {
$url = $item
->getAttribute('src');
if (!$url) {
return;
}
$url = self::urlExtractPath($url);
$url = UrlValidator::relativeToRoot($url, $this->urlBag
->getAssetDir() . '/' . $this->urlBag
->getRootDir());
$item
->setAttribute('src', $url);
}
protected function processTagStyle(\DOMNode $item) {
$content = $item->textContent;
preg_match_all('/@import url\\(([^)]+)\\)/i', $content, $matches, PREG_SET_ORDER);
if (empty($matches)) {
return;
}
foreach ($matches as $match) {
if (count($match) != 2) {
continue;
}
$url = $match[1];
$url = self::urlExtractPath($url);
$url = UrlValidator::relativeToRoot($url, $this->urlBag
->getAssetDir() . '/' . $this->urlBag
->getRootDir());
$str = str_replace($match[1], $url, $match[0]);
$content = str_replace($match[0], $str, $content);
}
$item->textContent = $content;
}
protected function processTagImg(\DOMNode $item) {
$url = $item
->getAttribute('src');
if (!$url) {
return;
}
$item
->setAttribute('src', UrlValidator::relativeToRoot($url, $this->urlBag
->getAssetDir() . '/' . $this->urlBag
->getRootDir()));
}
protected function loadDocument($content) {
libxml_use_internal_errors(TRUE);
$document = new \DOMDocument();
$content = $this
->cleanupContent($content);
$loaded = $document
->loadHTML($content);
if (!$loaded || empty($document) || empty($document->textContent)) {
throw new PageProcessorException(sprintf('Unable to parse document: %s', libxml_get_last_error()));
}
return $document;
}
protected function cleanupContent($content) {
$content = preg_replace('/\\<meta\\s+http-equiv\\s*=\\s*\\"content-type\\"\\s+content\\s*=\\s*\\".*charset=ISO-8859-1\\"\\s*(\\/?)\\>/i', '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">', $content);
$content = mb_convert_encoding($content, 'HTML-ENTITIES', "UTF-8");
return $content;
}
protected static function urlExtractPath($url) {
$parsed = UrlHelper::parse($url);
return isset($parsed['path']) ? $parsed['path'] : NULL;
}
}