class HtmlLinkExtractor in Link checker 8
Class HtmlLinkExtractor.
Plugin annotation
@LinkExtractor(
id = "html_link_extractor",
label = @Translation("HTML extractor"),
field_types = {
"text",
"text_long",
"text_with_summary",
}
)
Hierarchy
- class \Drupal\Component\Plugin\PluginBase implements DerivativeInspectionInterface, PluginInspectionInterface
- class \Drupal\linkchecker\Plugin\LinkExtractorBase implements ContainerFactoryPluginInterface, LinkExtractorInterface
- class \Drupal\linkchecker\Plugin\LinkExtractor\HtmlLinkExtractor
- class \Drupal\linkchecker\Plugin\LinkExtractorBase implements ContainerFactoryPluginInterface, LinkExtractorInterface
Expanded class hierarchy of HtmlLinkExtractor
File
- src/
Plugin/ LinkExtractor/ HtmlLinkExtractor.php, line 21
Namespace
Drupal\linkchecker\Plugin\LinkExtractorView source
class HtmlLinkExtractor extends LinkExtractorBase {
/**
* {@inheritdoc}
*/
protected function extractUrlFromField(array $value) {
$string = $value['value'];
if (empty($string)) {
return [];
}
$html_dom = Html::load($string);
$urls = [];
// Finds all hyperlinks in the content.
if ($this->linkcheckerSetting
->get('extract.from_a') == TRUE) {
$links = $html_dom
->getElementsByTagName('a');
foreach ($links as $link) {
$urls[] = $link
->getAttribute('href');
}
$links = $html_dom
->getElementsByTagName('area');
foreach ($links as $link) {
$urls[] = $link
->getAttribute('href');
}
}
// Finds all audio links in the content.
if ($this->linkcheckerSetting
->get('extract.from_audio') == TRUE) {
$audios = $html_dom
->getElementsByTagName('audio');
foreach ($audios as $audio) {
$urls[] = $audio
->getAttribute('src');
// Finds source tags with links in the audio tag.
$sources = $audio
->getElementsByTagName('source');
foreach ($sources as $source) {
$urls[] = $source
->getAttribute('src');
}
// Finds track tags with links in the audio tag.
$tracks = $audio
->getElementsByTagName('track');
foreach ($tracks as $track) {
$urls[] = $track
->getAttribute('src');
}
}
}
// Finds embed tags with links in the content.
if ($this->linkcheckerSetting
->get('extract.from_embed') == TRUE) {
$embeds = $html_dom
->getElementsByTagName('embed');
foreach ($embeds as $embed) {
$urls[] = $embed
->getAttribute('src');
$urls[] = $embed
->getAttribute('pluginurl');
$urls[] = $embed
->getAttribute('pluginspage');
}
}
// Finds iframe tags with links in the content.
if ($this->linkcheckerSetting
->get('extract.from_iframe') == TRUE) {
$iframes = $html_dom
->getElementsByTagName('iframe');
foreach ($iframes as $iframe) {
$urls[] = $iframe
->getAttribute('src');
}
}
// Finds img tags with links in the content.
if ($this->linkcheckerSetting
->get('extract.from_img') == TRUE) {
$imgs = $html_dom
->getElementsByTagName('img');
foreach ($imgs as $img) {
$urls[] = $img
->getAttribute('src');
$urls[] = $img
->getAttribute('longdesc');
}
}
// Finds object/param tags with links in the content.
if ($this->linkcheckerSetting
->get('extract.from_object') == TRUE) {
$objects = $html_dom
->getElementsByTagName('object');
foreach ($objects as $object) {
$urls[] = $object
->getAttribute('data');
$urls[] = $object
->getAttribute('codebase');
// Finds param tags with links in the object tag.
$params = $object
->getElementsByTagName('param');
foreach ($params as $param) {
// @todo
// - Try to extract links in unkown "flashvars" values
// (e.g., file=http://, data=http://).
$names = [
'archive',
'filename',
'href',
'movie',
'src',
'url',
];
if ($param
->hasAttribute('name') && in_array($param
->getAttribute('name'), $names)) {
$urls[] = $param
->getAttribute('value');
}
$srcs = [
'movie',
];
if ($param
->hasAttribute('src') && in_array($param
->getAttribute('src'), $srcs)) {
$urls[] = $param
->getAttribute('value');
}
}
}
}
// Finds video tags with links in the content.
if ($this->linkcheckerSetting
->get('extract.from_video') == TRUE) {
$videos = $html_dom
->getElementsByTagName('video');
foreach ($videos as $video) {
$urls[] = $video
->getAttribute('poster');
$urls[] = $video
->getAttribute('src');
// Finds source tags with links in the video tag.
$sources = $video
->getElementsByTagName('source');
foreach ($sources as $source) {
$urls[] = $source
->getAttribute('src');
}
// Finds track tags with links in the audio tag.
$tracks = $video
->getElementsByTagName('track');
foreach ($tracks as $track) {
$urls[] = $track
->getAttribute('src');
}
}
}
// Remove empty values.
$urls = array_filter($urls);
// Remove duplicate urls.
$urls = array_unique($urls);
return $urls;
}
}
Members
Name | Modifiers | Type | Description | Overrides |
---|---|---|---|---|
HtmlLinkExtractor:: |
protected | function |
Extracts a URLs from field. Overrides LinkExtractorBase:: |
|
LinkExtractorBase:: |
protected | property | The Linkchecker settings. | |
LinkExtractorBase:: |
public static | function |
Creates an instance of the plugin. Overrides ContainerFactoryPluginInterface:: |
|
LinkExtractorBase:: |
public | function |
Extracts links from field list. Overrides LinkExtractorInterface:: |
|
LinkExtractorBase:: |
public | function |
LinkExtractorBase plugin constructor. Overrides PluginBase:: |
|
PluginBase:: |
protected | property | Configuration information passed into the plugin. | 1 |
PluginBase:: |
protected | property | The plugin implementation definition. | 1 |
PluginBase:: |
protected | property | The plugin_id. | |
PluginBase:: |
constant | A string which is used to separate base plugin IDs from the derivative ID. | ||
PluginBase:: |
public | function |
Gets the base_plugin_id of the plugin instance. Overrides DerivativeInspectionInterface:: |
|
PluginBase:: |
public | function |
Gets the derivative_id of the plugin instance. Overrides DerivativeInspectionInterface:: |
|
PluginBase:: |
public | function |
Gets the definition of the plugin implementation. Overrides PluginInspectionInterface:: |
3 |
PluginBase:: |
public | function |
Gets the plugin_id of the plugin instance. Overrides PluginInspectionInterface:: |
|
PluginBase:: |
public | function | Determines if the plugin is configurable. |