ConvertUrlToEmbedFilter.php in URL Embed 8
Namespace
Drupal\url_embed\Plugin\FilterFile
src/Plugin/Filter/ConvertUrlToEmbedFilter.phpView source
<?php
namespace Drupal\url_embed\Plugin\Filter;
use Drupal\Component\Utility\Html;
use Drupal\Core\Form\FormStateInterface;
use Drupal\filter\FilterProcessResult;
use Drupal\filter\Plugin\FilterBase;
/**
* Provides a filter to display embedded entities based on data attributes.
*
* @Filter(
* id = "url_embed_convert_links",
* title = @Translation("Convert URLs to URL embeds"),
* description = @Translation("Convert plain URLs to embed elements that can be rendered with the <em>Display embedded URLs</em> filter."),
* type = Drupal\filter\Plugin\FilterInterface::TYPE_TRANSFORM_REVERSIBLE,
* settings = {
* "url_prefix" = "",
* },
* )
*/
class ConvertUrlToEmbedFilter extends FilterBase {
/**
* {@inheritdoc}
*/
public function settingsForm(array $form, FormStateInterface $form_state) {
$form['url_prefix'] = [
'#type' => 'textfield',
'#title' => $this
->t('URL prefix'),
'#default_value' => $this->settings['url_prefix'],
'#description' => $this
->t('Optional prefix that will be used to indicate which URLs that apply. All URLs that are supported will be converted if empty. Example: EMBED-https://twitter.com/drupal/status/735873777683320832'),
];
return $form;
}
/**
* {@inheritdoc}
*/
public function process($text, $langcode) {
return new FilterProcessResult(static::convertUrls($text, $this->settings['url_prefix']));
}
/**
* Replaces appearances of supported URLs with <drupal-url> embed elements.
*
* Logic of this function is copied from _filter_url() and slightly adopted
* for our use case. _filter_url() is unfortunately not general enough to
* re-use it.
*
* @param string $text
* Text to be processed.
* @param string $url_prefix
* (Optional) Prefix that should be used to manually choose which URLs
* should be converted.
*
* @return string
* Processed text.
*/
public static function convertUrls($text, $url_prefix = '') {
// Tags to skip and not recurse into.
$ignore_tags = 'a|script|style|code|pre';
// Create an array which contains the regexps for each type of link.
// The key to the regexp is the name of a function that is used as
// callback function to process matches of the regexp. The callback function
// is to return the replacement for the match. The array is used and
// matching/replacement done below inside some loops.
$tasks = [];
// Prepare protocols pattern for absolute URLs.
// \Drupal\Component\Utility\UrlHelper::stripDangerousProtocols() will replace
// any bad protocols with HTTP, so we need to support the identical list.
// While '//' is technically optional for MAILTO only, we cannot cleanly
// differ between protocols here without hard-coding MAILTO, so '//' is
// optional for all protocols.
// @see \Drupal\Component\Utility\UrlHelper::stripDangerousProtocols()
$protocols = \Drupal::getContainer()
->getParameter('filter_protocols');
$protocols = implode(':(?://)?|', $protocols) . ':(?://)?';
$valid_url_path_characters = "[\\p{L}\\p{M}\\p{N}!\\*\\';:=\\+,\\.\$\\/%#\\[\\]\\-_~@&]";
// Allow URL paths to contain balanced parens
// 1. Used in Wikipedia URLs like /Primer_(film)
// 2. Used in IIS sessions like /S(dfd346)/
$valid_url_balanced_parens = '\\(' . $valid_url_path_characters . '+\\)';
// Valid end-of-path characters (so /foo. does not gobble the period).
// 1. Allow =&# for empty URL parameters and other URL-join artifacts
$valid_url_ending_characters = '[\\p{L}\\p{M}\\p{N}:_+~#=/]|(?:' . $valid_url_balanced_parens . ')';
$valid_url_query_chars = '[a-zA-Z0-9!?\\*\'@\\(\\);:&=\\+\\$\\/%#\\[\\]\\-_\\.,~|]';
$valid_url_query_ending_chars = '[a-zA-Z0-9_&=#\\/]';
//full path
//and allow @ in a url, but only in the middle. Catch things like http://example.com/@user/
$valid_url_path = '(?:(?:' . $valid_url_path_characters . '*(?:' . $valid_url_balanced_parens . $valid_url_path_characters . '*)*' . $valid_url_ending_characters . ')|(?:@' . $valid_url_path_characters . '+\\/))';
// Prepare domain name pattern.
// The ICANN seems to be on track towards accepting more diverse top level
// domains, so this pattern has been "future-proofed" to allow for TLDs
// of length 2-64.
$domain = '(?:[\\p{L}\\p{M}\\p{N}._+-]+\\.)?[\\p{L}\\p{M}]{2,64}\\b';
$ip = '(?:[0-9]{1,3}\\.){3}[0-9]{1,3}';
$auth = '[\\p{L}\\p{M}\\p{N}:%_+*~#?&=.,/;-]+@';
$trail = '(' . $valid_url_path . '*)?(\\?' . $valid_url_query_chars . '*' . $valid_url_query_ending_chars . ')?';
// Match absolute URLs.
$url_pattern = "(?:{$auth})?(?:{$domain}|{$ip})/?(?:{$trail})?";
$pattern = "`{$url_prefix}((?:{$protocols})(?:{$url_pattern}))`u";
$tasks['replaceFullLinks'] = $pattern;
// HTML comments need to be handled separately, as they may contain HTML
// markup, especially a '>'. Therefore, remove all comment contents and add
// them back later.
_filter_url_escape_comments('', TRUE);
$text = preg_replace_callback('`<!--(.*?)-->`s', '_filter_url_escape_comments', $text);
// Split at all tags; ensures that no tags or attributes are processed.
$chunks = preg_split('/(<.+?>)/is', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
// PHP ensures that the array consists of alternating delimiters and
// literals, and begins and ends with a literal (inserting NULL as
// required). Therefore, the first chunk is always text:
$chunk_type = 'text';
// If a tag of $ignore_tags is found, it is stored in $open_tag and only
// removed when the closing tag is found. Until the closing tag is found,
// no replacements are made.
$open_tag = '';
for ($i = 0; $i < count($chunks); $i++) {
if ($chunk_type == 'text') {
// Only process this text if there are no unclosed $ignore_tags.
if ($open_tag == '') {
// If there is a match, inject a link into this chunk via the callback
// function contained in $task.
$chunks[$i] = preg_replace_callback($pattern, function ($match) {
if (\Drupal::service('url_embed')
->getEmbed(Html::decodeEntities($match[1]))) {
return '<drupal-url data-embed-url="' . $match[1] . '"></drupal-url>';
}
else {
return $match[1];
}
}, $chunks[$i]);
}
// Text chunk is done, so next chunk must be a tag.
$chunk_type = 'tag';
}
else {
// Only process this tag if there are no unclosed $ignore_tags.
if ($open_tag == '') {
// Check whether this tag is contained in $ignore_tags.
if (preg_match("`<({$ignore_tags})(?:\\s|>)`i", $chunks[$i], $matches)) {
$open_tag = $matches[1];
}
}
else {
if (preg_match("`<\\/{$open_tag}>`i", $chunks[$i], $matches)) {
$open_tag = '';
}
}
// Tag chunk is done, so next chunk must be text.
$chunk_type = 'text';
}
}
$text = implode($chunks);
// Revert to the original comment contents
_filter_url_escape_comments('', FALSE);
return preg_replace_callback('`<!--(.*?)-->`', '_filter_url_escape_comments', $text);
}
/**
* {@inheritdoc}
*/
public function tips($long = FALSE) {
if ($long) {
return $this
->t('<p>You can convert plain URLs to <drupal-url> HTML elements. Those elements are later converted to embeds using "Display embedded URLs" text filter.</p>');
}
else {
return $this
->t('You can convert plain URLs to embed elements.');
}
}
}
Classes
Name | Description |
---|---|
ConvertUrlToEmbedFilter | Provides a filter to display embedded entities based on data attributes. |