HtmlLinkExtractor.php in Link checker 8
File
src/Plugin/LinkExtractor/HtmlLinkExtractor.php
View source
<?php
namespace Drupal\linkchecker\Plugin\LinkExtractor;
use Drupal\Component\Utility\Html;
use Drupal\linkchecker\Plugin\LinkExtractorBase;
class HtmlLinkExtractor extends LinkExtractorBase {
protected function extractUrlFromField(array $value) {
$string = $value['value'];
if (empty($string)) {
return [];
}
$html_dom = Html::load($string);
$urls = [];
if ($this->linkcheckerSetting
->get('extract.from_a') == TRUE) {
$links = $html_dom
->getElementsByTagName('a');
foreach ($links as $link) {
$urls[] = $link
->getAttribute('href');
}
$links = $html_dom
->getElementsByTagName('area');
foreach ($links as $link) {
$urls[] = $link
->getAttribute('href');
}
}
if ($this->linkcheckerSetting
->get('extract.from_audio') == TRUE) {
$audios = $html_dom
->getElementsByTagName('audio');
foreach ($audios as $audio) {
$urls[] = $audio
->getAttribute('src');
$sources = $audio
->getElementsByTagName('source');
foreach ($sources as $source) {
$urls[] = $source
->getAttribute('src');
}
$tracks = $audio
->getElementsByTagName('track');
foreach ($tracks as $track) {
$urls[] = $track
->getAttribute('src');
}
}
}
if ($this->linkcheckerSetting
->get('extract.from_embed') == TRUE) {
$embeds = $html_dom
->getElementsByTagName('embed');
foreach ($embeds as $embed) {
$urls[] = $embed
->getAttribute('src');
$urls[] = $embed
->getAttribute('pluginurl');
$urls[] = $embed
->getAttribute('pluginspage');
}
}
if ($this->linkcheckerSetting
->get('extract.from_iframe') == TRUE) {
$iframes = $html_dom
->getElementsByTagName('iframe');
foreach ($iframes as $iframe) {
$urls[] = $iframe
->getAttribute('src');
}
}
if ($this->linkcheckerSetting
->get('extract.from_img') == TRUE) {
$imgs = $html_dom
->getElementsByTagName('img');
foreach ($imgs as $img) {
$urls[] = $img
->getAttribute('src');
$urls[] = $img
->getAttribute('longdesc');
}
}
if ($this->linkcheckerSetting
->get('extract.from_object') == TRUE) {
$objects = $html_dom
->getElementsByTagName('object');
foreach ($objects as $object) {
$urls[] = $object
->getAttribute('data');
$urls[] = $object
->getAttribute('codebase');
$params = $object
->getElementsByTagName('param');
foreach ($params as $param) {
$names = [
'archive',
'filename',
'href',
'movie',
'src',
'url',
];
if ($param
->hasAttribute('name') && in_array($param
->getAttribute('name'), $names)) {
$urls[] = $param
->getAttribute('value');
}
$srcs = [
'movie',
];
if ($param
->hasAttribute('src') && in_array($param
->getAttribute('src'), $srcs)) {
$urls[] = $param
->getAttribute('value');
}
}
}
}
if ($this->linkcheckerSetting
->get('extract.from_video') == TRUE) {
$videos = $html_dom
->getElementsByTagName('video');
foreach ($videos as $video) {
$urls[] = $video
->getAttribute('poster');
$urls[] = $video
->getAttribute('src');
$sources = $video
->getElementsByTagName('source');
foreach ($sources as $source) {
$urls[] = $source
->getAttribute('src');
}
$tracks = $video
->getElementsByTagName('track');
foreach ($tracks as $track) {
$urls[] = $track
->getAttribute('src');
}
}
}
$urls = array_filter($urls);
$urls = array_unique($urls);
return $urls;
}
}