class TruncateHTML in Smart Trim 8
Class TruncateHTML.
Hierarchy
- class \Drupal\smart_trim\Truncate\TruncateHTML
Expanded class hierarchy of TruncateHTML
2 files declare their use of TruncateHTML
- SmartTrimFormatter.php in src/
Plugin/ Field/ FieldFormatter/ SmartTrimFormatter.php - TruncateHTMLTest.php in tests/
src/ Unit/ TruncateHTMLTest.php
File
- src/
Truncate/ TruncateHTML.php, line 37 - Contains trim functionality.
Namespace
Drupal\smart_trim\TruncateView source
class TruncateHTML {
/**
* Total characters.
*
* @var int
*/
protected $charCount = 0;
/**
* Total words.
*
* @var int
*/
protected $wordCount = 0;
/**
* Character / Word limit.
*
* @var int
*/
protected $limit;
/**
* Element to start on.
*
* @var \DOMElement
*/
protected $startNode;
/**
* Ellipsis character.
*
* @var string
*/
protected $ellipsis;
/**
* Did we find the breakpoint?
*
* @var bool
*/
protected $foundBreakpoint = FALSE;
/**
* Sets up object for use.
*
* @param string $html
* Text to be prepared.
* @param int $limit
* Amount of text to return.
* @param string $ellipsis
* Characters to use at the end of the text.
*
* @return \DOMDocument
* Prepared DOMDocument to work with.
*/
protected function init($html, $limit, $ellipsis) {
$dom = Html::load(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
// The body tag node, our html fragment is automatically wrapped in
// a <html><body> etc.
$this->startNode = $dom
->getElementsByTagName("body")
->item(0);
$this->limit = $limit;
$this->ellipsis = $ellipsis;
$this->charCount = 0;
$this->wordCount = 0;
$this->foundBreakpoint = FALSE;
return $dom;
}
/**
* Truncates HTML text by characters.
*
* @param string $html
* Text to be updated.
* @param int $limit
* Amount of text to allow.
* @param string $ellipsis
* Characters to use at the end of the text.
*
* @return mixed
* Resulting text.
*/
public function truncateChars($html, $limit, $ellipsis = '...') {
if ($limit <= 0 || $limit >= strlen(strip_tags($html))) {
return $html;
}
$dom = $this
->init($html, $limit, $ellipsis);
// Pass the body node on to be processed.
$this
->domNodeTruncateChars($this->startNode);
return Html::serialize($dom);
}
/**
* Truncates HTML text by words.
*
* @param string $html
* Text to be updated.
* @param int $limit
* Amount of text to allow.
* @param string $ellipsis
* Characters to use at the end of the text.
*
* @return mixed
* Resulting text.
*/
public function truncateWords($html, $limit, $ellipsis = '...') {
if ($limit <= 0 || $limit >= $this
->countWords(strip_tags($html))) {
return $html;
}
$dom = $this
->init($html, $limit, $ellipsis);
// Pass the body node on to be processed.
$this
->domNodeTruncateWords($this->startNode);
return Html::serialize($dom);
}
/**
* Truncates a DOMNode by character count.
*
* @param \DOMNode $domnode
* Object to be truncated.
*/
protected function domNodeTruncateChars(\DOMNode $domnode) {
foreach ($domnode->childNodes as $node) {
if ($this->foundBreakpoint == TRUE) {
return;
}
if ($node
->hasChildNodes()) {
$this
->domNodeTruncateChars($node);
}
else {
$text = html_entity_decode($node->nodeValue, ENT_QUOTES, 'UTF-8');
$length = mb_strlen($text);
if ($this->charCount + $length >= $this->limit) {
// We have found our end point.
$node->nodeValue = Unicode::truncate($text, $this->limit - $this->charCount, TRUE);
$this
->removeTrailingPunctuation($node);
$this
->removeProceedingNodes($node);
$this
->insertEllipsis($node);
$this->foundBreakpoint = TRUE;
return;
}
else {
$this->charCount += $length;
}
}
}
}
/**
* Truncates a DOMNode by words.
*
* @param \DOMNode $domnode
* Object to be truncated.
*/
protected function domNodeTruncateWords(\DOMNode $domnode) {
foreach ($domnode->childNodes as $node) {
if ($this->foundBreakpoint == TRUE) {
return;
}
if ($node
->hasChildNodes()) {
$this
->domNodeTruncateWords($node);
}
else {
$cur_count = $this
->countWords($node->nodeValue);
if ($this->wordCount + $cur_count >= $this->limit) {
// We have found our end point.
if ($cur_count > 1 && $this->limit - $this->wordCount < $cur_count) {
// Note that PREG_SPLIT_OFFSET_CAPTURE and UTF-8 is interesting.
// preg_split() works on the string as an array of bytes therefore
// in order to use it's results we need to use non unicode aware
// functions.
// @see https://bugs.php.net/bug.php?id=67487
$words = preg_split("/[\n\r\t ]+/", $node->nodeValue, $this->limit - $this->wordCount + 1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE);
end($words);
$last_word = prev($words);
$node->nodeValue = substr($node->nodeValue, 0, $last_word[1] + strlen($last_word[0]));
}
$this
->removeTrailingPunctuation($node);
$this
->removeProceedingNodes($node);
$this
->insertEllipsis($node);
$this->foundBreakpoint = TRUE;
return;
}
else {
$this->wordCount += $cur_count;
}
}
}
}
/**
* Removes certain punctuation from the end of the node value.
*
* @param \DOMNode $domnode
* Node to be altered.
*/
protected function removeTrailingPunctuation(\DOMNode $domnode) {
while (preg_match('/[\\.,:;\\?!…]$/', $domnode->nodeValue)) {
$domnode->nodeValue = substr($domnode->nodeValue, 0, -1);
}
}
/**
* Removes preceding sibling node.
*
* @param \DOMNode $domnode
* Node to be altered.
*/
protected function removeProceedingNodes(\DOMNode $domnode) {
$nextnode = $domnode->nextSibling;
if ($nextnode !== NULL) {
$this
->removeProceedingNodes($nextnode);
$domnode->parentNode
->removeChild($nextnode);
}
else {
// Scan upwards till we find a sibling.
$curnode = $domnode->parentNode;
while ($curnode !== $this->startNode) {
if ($curnode->nextSibling !== NULL) {
$curnode = $curnode->nextSibling;
$this
->removeProceedingNodes($curnode);
$curnode->parentNode
->removeChild($curnode);
break;
}
$curnode = $curnode->parentNode;
}
}
}
/**
* Inserts the Elipsis character to the node.
*
* @param \DOMNode $domnode
* Node to be altered.
*/
protected function insertEllipsis(\DOMNode $domnode) {
// HTML tags to avoid appending the ellipsis to.
$avoid = [
'a',
'strong',
'em',
'h1',
'h2',
'h3',
'h4',
'h5',
];
if (in_array($domnode->parentNode->nodeName, $avoid) && ($domnode->parentNode->parentNode !== NULL || $domnode->parentNode->parentNode !== $this->startNode)) {
// Append as text node to parent instead.
$textnode = new \DOMText($this->ellipsis);
if ($domnode->parentNode->parentNode->nextSibling) {
$domnode->parentNode->parentNode
->insertBefore($textnode, $domnode->parentNode->parentNode->nextSibling);
}
else {
$domnode->parentNode->parentNode
->appendChild($textnode);
}
}
else {
// Append to current node.
$domnode->nodeValue = rtrim($domnode->nodeValue) . $this->ellipsis;
}
}
/**
* Gets number of words in text.
*
* @param string $text
* Text to be counted.
*
* @return int
* Results
*/
protected function countWords($text) {
$words = preg_split("/[\n\r\t ]+/", $text, -1, PREG_SPLIT_NO_EMPTY);
return count($words);
}
}
Members
Name | Modifiers | Type | Description | Overrides |
---|---|---|---|---|
TruncateHTML:: |
protected | property | Total characters. | |
TruncateHTML:: |
protected | property | Ellipsis character. | |
TruncateHTML:: |
protected | property | Did we find the breakpoint? | |
TruncateHTML:: |
protected | property | Character / Word limit. | |
TruncateHTML:: |
protected | property | Element to start on. | |
TruncateHTML:: |
protected | property | Total words. | |
TruncateHTML:: |
protected | function | Gets number of words in text. | |
TruncateHTML:: |
protected | function | Truncates a DOMNode by character count. | |
TruncateHTML:: |
protected | function | Truncates a DOMNode by words. | |
TruncateHTML:: |
protected | function | Sets up object for use. | |
TruncateHTML:: |
protected | function | Inserts the Elipsis character to the node. | |
TruncateHTML:: |
protected | function | Removes preceding sibling node. | |
TruncateHTML:: |
protected | function | Removes certain punctuation from the end of the node value. | |
TruncateHTML:: |
public | function | Truncates HTML text by characters. | |
TruncateHTML:: |
public | function | Truncates HTML text by words. |