function _mailsystem_html_to_text in Mail System 6.2
Same name and namespace in other branches
- 8.2 html_to_text.inc \_mailsystem_html_to_text()
- 7.3 html_to_text.inc \_mailsystem_html_to_text()
- 7.2 html_to_text.inc \_mailsystem_html_to_text()
Helper function for drupal_html_to_text().
Recursively converts $node to text, wrapping and indenting as necessary.
Parameters
$node: The source DOMNode.
$allowed_tags: A list of tags that will be transformed.
array &$notes: A writeable array of footnote reference numbers, keyed by their respective hyperlink destination urls.
$line_length: The maximum length of a line, for wrapping. Defaults to 80 characters.
array $parents: The list of ancestor tags, from nearest to most distant. Defaults to an empty array().
$count: The number to use for the next list item within an ordered list. Defaults to 1.
2 calls to _mailsystem_html_to_text()
- mailsystem_html_to_text in ./
html_to_text.inc - Transform an HTML string into plain text, preserving the structure of the markup. Useful for preparing the body of a node to be sent by e-mail.
- _mailsystem_html_to_text_table in ./
html_to_text.inc - Helper function for _mailsystem_html_to_text().
File
- ./
html_to_text.inc, line 161 - Copy of drupal_html_to_text improvements from issue #299138.
Code
function _mailsystem_html_to_text(DOMNode $node, array $allowed_tags, array &$notes, $line_length = 80, array $parents = array(), &$count = NULL) {
if (!isset($count)) {
$count = 1;
}
$eol = variable_get('mail_line_endings', MAIL_LINE_ENDINGS);
if ($node->nodeType === XML_TEXT_NODE) {
// For text nodes, we just copy the text content.
$text = $node->textContent;
// Convert line breaks and trim trailing spaces.
$text = preg_replace('/ *\\r?\\n/', $eol, $text);
if (in_array('pre', $parents)) {
// Within <pre> tags, all spaces become non-breaking.
// chr(160) is the non-breaking space character.
$text = str_replace(' ', chr(160), $text);
}
else {
// Outside <pre> tags, collapse whitespace.
$text = preg_replace('/[[:space:]]+/', ' ', $text);
}
return $text;
}
// Non-text node.
$tag = '';
$text = '';
$child_text = '';
$child_count = 1;
$indent = '';
$prefix = '';
$suffix = '';
$pad = '';
if (isset($node->tagName) && in_array($node->tagName, $allowed_tags)) {
$tag = $node->tagName;
switch ($tag) {
// Turn links with valid hrefs into footnotes.
case 'a':
$test = !empty($node->attributes);
$test = $test && ($href = $node->attributes
->getNamedItem('href'));
$test = $test && ($url = url(preg_replace('|^' . base_path() . '|', '', $href->nodeValue), array(
'absolute' => TRUE,
)));
$test = $test && valid_url($url);
if ($test) {
// Only add links that have not already been added.
if (isset($notes[$url])) {
$note = $notes[$url];
}
else {
$note = count($notes) + 1;
$notes[$url] = $note;
}
$suffix = ' [' . $note . ']';
}
break;
// Generic block-level tags.
case 'address':
case 'caption':
case 'div':
case 'p':
case 'pre':
// Start on a new line except as the first child of a list item.
if (!isset($parents[0]) || $parents[0] !== 'li' || !$node
->isSameNode($node->parentNode->firstChild)) {
$text = $eol;
}
$suffix = $eol;
break;
// Forced line break.
case 'br':
$text = $eol;
break;
// Boldface by wrapping with "*" characters.
case 'b':
case 'strong':
$prefix = '*';
$suffix = '*';
break;
// Italicize by wrapping with "/" characters.
case 'cite':
case 'em':
case 'i':
$prefix = '/';
$suffix = '/';
break;
// Underline by wrapping with "_" characters.
case 'u':
$prefix = '_';
$suffix = '_';
break;
// Blockquotes are indented by "> " at each level.
case 'blockquote':
$text = $eol;
// chr(160) is the non-breaking space character.
$indent = '>' . chr(160);
$suffix = $eol;
break;
// Dictionary definitions are indented by four spaces.
case 'dd':
// chr(160) is the non-breaking space character.
$indent = chr(160) . chr(160) . chr(160) . chr(160);
$suffix = $eol;
break;
// Dictionary list.
case 'dl':
// Start on a new line as the first child of a list item.
if (!isset($parents[0]) || $parents[0] !== 'li' || !$node
->isSameNode($node->parentNode->firstChild)) {
$text = $eol;
}
$suffix = $eol;
break;
// Dictionary term.
case 'dt':
$suffix = $eol;
break;
// Header level 1 is prefixed by eight "=" characters.
case 'h1':
$text = "{$eol}{$eol}";
// chr(160) is the non-breaking space character.
$indent = '========' . chr(160);
$pad = chr(160) . '=';
$suffix = $eol;
break;
// Header level 2 is prefixed by six "-" characters.
case 'h2':
$text = "{$eol}{$eol}";
// chr(160) is the non-breaking space character.
$indent = '------' . chr(160);
$pad = chr(160) . '-';
$suffix = $eol;
break;
// Header level 3 is prefixed by four "." characters and a space.
case 'h3':
$text = "{$eol}{$eol}";
// chr(160) is the non-breaking space character.
$indent = '....' . chr(160);
$suffix = $eol;
break;
// Header level 4 is prefixed by three "." characters and a space.
case 'h4':
$text = "{$eol}{$eol}";
// chr(160) is the non-breaking space character.
$indent = '...' . chr(160);
$suffix = $eol;
break;
// Header level 5 is prefixed by two "." character and a space.
case 'h5':
$text = "{$eol}{$eol}";
// chr(160) is the non-breaking space character.
$indent = '..' . chr(160);
$suffix = $eol;
break;
// Header level 6 is prefixed by one "." character and a space.
case 'h6':
$text = "{$eol}{$eol}";
// chr(160) is the non-breaking space character.
$indent = '.' . chr(160);
$suffix = $eol;
break;
// Horizontal rulers become a line of "-" characters.
case 'hr':
$text = $eol;
$child_text = '-';
$pad = '-';
$suffix = $eol;
break;
// List items are treated differently depending on the parent tag.
case 'li':
// Ordered list item.
if (reset($parents) === 'ol') {
// Check the value attribute.
$test = !empty($node->attributes);
$test = $test && ($value = $node->attributes
->getNamedItem('value'));
if ($test) {
$count = $value->nodeValue;
}
// chr(160) is the non-breaking space character.
$indent = ($count < 10 ? chr(160) : '') . chr(160) . "{$count})" . chr(160);
$count++;
}
else {
// chr(160) is the non-breaking space character.
$indent = chr(160) . '*' . chr(160);
}
$suffix = $eol;
break;
// Ordered lists.
case 'ol':
// Start on a new line as the first child of a list item.
if (!isset($parents[0]) || $parents[0] !== 'li' || !$node
->isSameNode($node->parentNode->firstChild)) {
$text = $eol;
}
// Check the start attribute.
$test = !empty($node->attributes);
$test = $test && ($value = $node->attributes
->getNamedItem('start'));
if ($test) {
$child_count = $value->nodeValue;
}
break;
// Tables require special handling.
case 'table':
return _mailsystem_html_to_text_table($node, $allowed_tags, $notes, $line_length);
// Separate adjacent table cells by two non-breaking spaces.
case 'td':
if (!empty($node->nextSibling)) {
// chr(160) is the non-breaking space character.
$suffix = chr(160) . chr(160);
}
break;
// End each table row with a newline.
case 'tr':
$suffix = $eol;
break;
// Unordered lists.
case 'ul':
// Start on a new line as the first child of a list item.
if (!isset($parents[0]) || $parents[0] !== 'li' || !$node
->isSameNode($node->parentNode->firstChild)) {
$text = $eol;
}
break;
default:
// Coder review complains if there is no default case.
break;
}
// Only add allowed tags to the $parents array.
array_unshift($parents, $tag);
}
// Copy each child node to output.
if ($node
->hasChildNodes()) {
foreach ($node->childNodes as $child) {
$child_text .= _mailsystem_html_to_text($child, $allowed_tags, $notes, $line_length - drupal_strlen($indent), $parents, $child_count);
}
}
// We only add prefix and suffix if the child nodes were non-empty.
if ($child_text > '') {
// We capitalize the contents of h1 and h2 tags.
if ($tag === 'h1' || $tag === 'h2') {
$child_text = drupal_strtoupper($child_text);
}
// Don't add a newline to an existing newline.
if ($suffix === $eol && drupal_substr($child_text, -drupal_strlen($eol)) === $eol) {
$suffix = '';
}
// Trim spaces around newlines except with <pre> or inline tags.
if (!in_array($tag, array(
'a',
'b',
'cite',
'em',
'i',
'pre',
'strong',
'u',
))) {
$child_text = preg_replace('/ *' . $eol . ' */', $eol, $child_text);
}
// Soft-wrap at effective line length, but don't space-stuff.
$child_text = mailsystem_wrap_mail($prefix . $child_text, array(
// chr(160) is the non-breaking space character.
'break' => chr(160) . $eol,
'indent' => $indent,
'max' => $line_length,
'pad' => $pad,
'stuff' => FALSE,
)) . $suffix;
if ($tag === 'pre') {
// Perform RFC-3676 soft-wrapping.
// chr(160) is the non-breaking space character.
$child_text = str_replace(chr(160), ' ', $child_text);
$child_text = mailsystem_wrap_mail($child_text, array(
'max' => $line_length,
'stuff' => FALSE,
));
// chr(160) is the non-breaking space character.
$child_text = str_replace(' ', chr(160), $child_text);
}
$text .= $child_text;
}
return $text;
}