View source
<?php
namespace Drupal\tmgmt_smartling\Context;
use Drupal;
class HtmlAssetInliner {
private $url = '';
private $cookie = '';
private $html = '';
private $dom = '';
protected static $authError = array(
"response" => array(
"code" => "AUTHENTICATION_ERROR",
"data" => array(
"baseUrl" => NULL,
"body" => NULL,
"headers" => NULL,
),
"messages" => array(
"Authentication token is empty or invalid.",
),
),
);
protected static $uriMissingError = array(
"response" => array(
"code" => "VALIDATION_ERROR",
"data" => array(
"baseUrl" => NULL,
"body" => NULL,
"headers" => NULL,
),
"messages" => array(
"fileUri parameter is missing.",
),
),
);
public function __construct() {
libxml_use_internal_errors(TRUE);
$this->dom = new \DOMDocument();
$this->dom->preserveWhiteSpace = FALSE;
$this->dom->strictErrorChecking = FALSE;
}
public function getCompletePage($url, $cookie = '', $keepjs = TRUE, $compress = FALSE, array $settings, $debug = FALSE) {
if (!filter_var($url, FILTER_VALIDATE_URL)) {
throw new \Exception('Invalid URL. Make sure to specify http(s) part.');
}
if (empty($url)) {
if ($debug) {
Drupal::logger('tmgmt_smartling_context_debug')
->info('Url is missing.');
}
return self::$uriMissingError;
}
if (!$cookie) {
if ($debug) {
Drupal::logger('tmgmt_smartling_context_debug')
->info('Auth error.');
}
return self::$authError;
}
$this->url = $url;
$this->cookie = $cookie;
$this->html = $this
->getUrlContents($this->url, 0, 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; en-US) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/8.0.552.215 Safari/534.10', $settings, $debug);
$this
->embedLocalCss();
$this
->embedLocalJs();
$this
->embedContentImages();
if (strlen($this->html) <= 300) {
if ($debug) {
Drupal::logger('tmgmt_smartling_context_debug')
->info('Response is too small.');
}
return '';
}
return $compress ? $this
->compress($this->html) : $this->html;
}
private function convertImageToDataUri() {
$tags = $this
->getTags('//img');
$tagsLength = $tags->length;
for ($i = 0; $i < $tagsLength; $i++) {
$tag = $tags
->item($i);
$src = $this
->getFullUrl($tag
->getAttribute('src'));
if ($this
->remote_file_exists($src)) {
$dataUri = $this
->imageToDataUri($src);
$tag
->setAttribute('src', $dataUri);
}
}
$this->html = $this->dom
->saveHTML();
}
private function getTags($selector) {
$this->dom
->loadHTML($this->html);
$xpath = new DOMXpath($this->dom);
$tags = $xpath
->query($selector);
libxml_use_internal_errors(FALSE);
libxml_use_internal_errors(TRUE);
libxml_clear_errors();
unset($xpath);
$xpath = NULL;
return $tags;
}
private function remote_file_exists($url) {
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_NOBODY, 1);
curl_setopt($ch, CURLOPT_FAILONERROR, 1);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
if (curl_exec($ch) !== FALSE) {
return TRUE;
}
return FALSE;
}
private function imageToDataUri($path) {
$fileType = trim(strtolower(pathinfo($path, PATHINFO_EXTENSION)));
$mimType = $fileType;
if (!$fileType || $fileType === 'jpg') {
$mimType = 'jpeg';
}
else {
if ($fileType === 'ico') {
$mimType = 'x-icon';
}
}
if (preg_match('#^(gif|png|jp[e]?g|bmp)$#i', $fileType) || $this
->isImage($path)) {
if ($mimType === 'php' || stripos($mimType, 'php') !== FALSE) {
$mimType = 'jpeg';
}
$data = $this
->getContents($path);
$base64 = 'data:image/' . $mimType . ';base64,' . base64_encode($data);
return $base64;
}
}
private function removeUseless($keepjs = TRUE) {
$this->html = mb_convert_encoding($this->html, 'HTML-ENTITIES', 'UTF-8');
$tags = $this
->getTags('//meta | //link | //script');
$tagsLength = $tags->length;
for ($i = 0; $i < $tagsLength; $i++) {
$tag = $tags
->item($i);
if (strtolower($tag->nodeName) === 'script') {
if ($keepjs) {
if ($tag
->getAttribute('src') !== '') {
$tag->parentNode
->removeChild($tag);
}
}
else {
$tag->parentNode
->removeChild($tag);
}
}
elseif (strtolower($tag->nodeName) === 'meta') {
if (stripos($tag
->getAttribute('content'), 'charset') === FALSE) {
$tag->parentNode
->removeChild($tag);
}
}
else {
$tag->parentNode
->removeChild($tag);
}
}
$this->html = $this->dom
->saveHTML();
}
private function toAbsoluteURLs() {
$links = $this
->getTags('//a');
foreach ($links as $link) {
$link
->setAttribute('href', $this
->getFullUrl($link
->getAttribute('href')));
}
$this->html = $this->dom
->saveHTML();
}
private function compress($string) {
return str_replace(array(
"\r\n",
"\r",
"\n",
"\t",
' ',
' ',
' ',
), ' ', $string);
}
private function getContents($url) {
$data = @file_get_contents($url);
if ($data) {
return $data;
}
return @file_get_contents(trim($url));
}
private function getUrlContents($url, $timeout = 0, $userAgent = 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; en-US) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/8.0.552.215 Safari/534.10', array $settings, $debug = FALSE) {
$crl = curl_init();
if ($debug) {
curl_setopt($crl, CURLINFO_HEADER_OUT, TRUE);
curl_setopt($crl, CURLOPT_HEADER, 1);
}
if (!empty($settings['context_skip_host_verifying'])) {
curl_setopt($crl, CURLOPT_SSL_VERIFYHOST, FALSE);
}
if (!empty($settings['enable_basic_auth'])) {
curl_setopt($crl, CURLOPT_HTTPAUTH, CURLAUTH_BASIC);
curl_setopt($crl, CURLOPT_USERPWD, $settings['basic_auth']['login'] . ':' . $settings['basic_auth']['password']);
}
curl_setopt($crl, CURLOPT_URL, $url);
curl_setopt($crl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($crl, CURLOPT_CONNECTTIMEOUT, $timeout);
curl_setopt($crl, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($crl, CURLOPT_COOKIE, $this->cookie);
curl_setopt($crl, CURLOPT_USERAGENT, $userAgent);
curl_setopt($crl, CURLOPT_SSL_VERIFYPEER, FALSE);
$output = curl_exec($crl);
if ($debug) {
$curlInfo = curl_getinfo($crl);
$header_size = $curlInfo['header_size'];
$headers = substr($output, 0, $header_size);
$output = substr($output, $header_size);
Drupal::logger('tmgmt_smartling_context_debug')
->info('Curl request info: @request_info:', [
'@request_info' => print_r($curlInfo, TRUE),
]);
Drupal::logger('tmgmt_smartling_context_debug')
->info('Curl response headers: @response_headers', [
'@response_headers' => $headers,
]);
Drupal::logger('tmgmt_smartling_context_debug')
->info('Curl response body: @response_body', [
'@response_body' => substr($output, 0, 500) . '*****',
]);
}
curl_close($crl);
if (!$output) {
return -1;
}
return $output;
}
private function getFullUrl($url) {
if (strpos($url, '//') === FALSE) {
return url_to_absolute($this->url, $url);
}
return $url;
}
private function isImage($path) {
list($width) = @getimagesize($path);
if (isset($width) && $width) {
return TRUE;
}
return FALSE;
}
private function embedLocalCss() {
$css = [];
preg_match_all('/<link rel="stylesheet" href="([^"]+)\\?.*" media="([a-zA-Z0-9]*)" \\/>/iU', $this->html, $css);
foreach ($css[1] as $id => $filename) {
if (strpos($filename, '?') !== FALSE) {
$fil_splt = explode('?', $filename);
$filename = reset($fil_splt);
}
$path = DRUPAL_ROOT . $filename;
if (!file_exists($path)) {
continue;
}
$file_content = file_get_contents($path);
$file_content = $this
->embedCssImages($file_content, $path);
$this->html = str_replace($css[0][$id], "<style media='{$css[2][$id]}'>\n {$file_content} \n</style>", $this->html);
}
$css = [];
preg_match_all('/@import url\\("([^"]+)"\\);/iU', $this->html, $css);
foreach ($css[1] as $id => $filename) {
if (strpos($filename, '?') !== FALSE) {
$fil_splt = explode('?', $filename);
$filename = reset($fil_splt);
}
$path = DRUPAL_ROOT . $filename;
if (!file_exists($path)) {
continue;
}
$file_content = file_get_contents($path);
$file_content = $this
->embedCssImages($file_content, $path);
$this->html = str_replace($css[0][$id], "\n\n {$file_content} \n\n", $this->html);
}
}
private function embedCssImages($css_content, $path) {
$matches = array();
preg_match_all('/url\\(([\\d\\D^)]+)\\)/iU', $css_content, $matches);
foreach ($matches[1] as $k => $img_url) {
$img_url = trim($img_url, '\'"');
$fileType = trim(strtolower(pathinfo($img_url, PATHINFO_EXTENSION)));
if (!preg_match('#^(gif|png|jp[e]?g|bmp|svg)$#i', $fileType)) {
continue;
}
$src = $img_url[0] === '/' ? DRUPAL_ROOT . $img_url : pathinfo($path, PATHINFO_DIRNAME) . '/' . $img_url;
if (!file_exists($src) || !($dataUri = file_get_contents($src))) {
continue;
}
$mimType = $fileType === 'svg' ? 'svg+xml' : 'png';
$dataUri = 'url("data:image/' . $mimType . ';base64,' . base64_encode($dataUri) . '")';
$css_content = str_replace($matches[0][$k], $dataUri, $css_content);
}
return $css_content;
}
private function embedLocalJs() {
$js = [];
preg_match_all('/<script src="([^"]+)"><\\/script>/iU', $this->html, $js);
foreach ($js[1] as $id => $filename) {
if (strpos($filename, '?') !== FALSE) {
$fil_splt = explode('?', $filename);
$filename = reset($fil_splt);
}
$path = DRUPAL_ROOT . $filename;
if (!file_exists($path)) {
continue;
}
$file_content = file_get_contents($path);
$this->html = str_replace($js[0][$id], "<script>\n {$file_content} \n</script>", $this->html);
}
}
private function embedContentImages() {
$matches = array();
preg_match_all('/<img.*src="([^"]+)".*>/iU', $this->html, $matches);
foreach ($matches[1] as $k => $img_url) {
$img_url = trim($img_url, '\'"');
$img_url = str_replace($this
->getBaseUrl(), '', $img_url);
$fileType = trim(strtolower(pathinfo($img_url, PATHINFO_EXTENSION)));
if (!preg_match('#^(gif|png|jp[e]?g|bmp|svg)$#i', $fileType)) {
continue;
}
$src = DRUPAL_ROOT . $img_url;
if (!file_exists($src) || !($dataUri = file_get_contents($src))) {
continue;
}
$mimType = $fileType === 'svg' ? 'svg+xml' : 'png';
$dataUri = '<img src="data:image/' . $mimType . ';base64,' . base64_encode($dataUri) . '" />';
$this->html = str_replace($matches[0][$k], $dataUri, $this->html);
}
}
private function getBaseUrl() {
global $base_url;
return $base_url;
}
}