html_to_text.inc in Mime Mail 5
File
html_to_text.incView source
<?php
/**
* To generate this file from Drupal 6 mail.inc, delete mimemail_mail and
* mimemail_mail_send functions.
*/
/**
* Perform format=flowed soft wrapping for mail (RFC 3676).
*
* We use delsp=yes wrapping, but only break non-spaced languages when
* absolutely necessary to avoid compatibility issues.
*
* We deliberately use LF rather than CRLF, see mimemail_mail().
*
* @param $text
* The plain text to process.
* @param $indent (optional)
* A string to indent the text with. Only '>' characters are repeated on
* subsequent wrapped lines. Others are replaced by spaces.
*/
function mimemail_wrap_mail($text, $indent = '') {
// Convert CRLF into LF.
$text = str_replace("\r", '', $text);
// See if soft-wrapping is allowed.
$clean_indent = _mimemail_html_to_text_clean($indent);
$soft = strpos($clean_indent, ' ') === FALSE;
// Check if the string has line breaks.
if (strpos($text, "\n") !== FALSE) {
// Remove trailing spaces to make existing breaks hard.
$text = preg_replace('/ +\\n/m', "\n", $text);
// Wrap each line at the needed width.
$lines = explode("\n", $text);
array_walk($lines, '_mimemail_wrap_mail_line', array(
'soft' => $soft,
'length' => strlen($indent),
));
$text = implode("\n", $lines);
}
else {
// Wrap this line.
_mimemail_wrap_mail_line($text, 0, array(
'soft' => $soft,
'length' => strlen($indent),
));
}
// Empty lines with nothing but spaces.
$text = preg_replace('/^ +\\n/m', "\n", $text);
// Space-stuff special lines.
$text = preg_replace('/^(>| |From)/m', ' $1', $text);
// Apply indentation. We only include non-'>' indentation on the first line.
$text = $indent . substr(preg_replace('/^/m', $clean_indent, $text), strlen($indent));
return $text;
}
/**
* Transform an HTML string into plain text, preserving the structure of the
* markup. Useful for preparing the body of a node to be sent by e-mail.
*
* The output will be suitable for use as 'format=flowed; delsp=yes' text
* (RFC 3676) and can be passed directly to mimemail_mail() for sending.
*
* We deliberately use LF rather than CRLF, see mimemail_mail().
*
* This function provides suitable alternatives for the following tags:
* <a> <em> <i> <strong> <b> <br> <p> <blockquote> <ul> <ol> <li> <dl> <dt>
* <dd> <h1> <h2> <h3> <h4> <h5> <h6> <hr>
*
* @param $string
* The string to be transformed.
* @param $allowed_tags (optional)
* If supplied, a list of tags that will be transformed. If omitted, all
* all supported tags are transformed.
* @return
* The transformed string.
*/
function mimemail_html_to_text($string, $allowed_tags = NULL) {
// Cache list of supported tags.
static $supported_tags;
if (empty($supported_tags)) {
$supported_tags = array(
'a',
'em',
'i',
'strong',
'b',
'br',
'p',
'blockquote',
'ul',
'ol',
'li',
'dl',
'dt',
'dd',
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'hr',
);
}
// Make sure only supported tags are kept.
$allowed_tags = isset($allowed_tags) ? array_intersect($supported_tags, $allowed_tags) : $supported_tags;
// Make sure tags, entities and attributes are well-formed and properly nested.
$string = _mimemail_filter_htmlcorrector(filter_xss($string, $allowed_tags));
// Apply inline styles.
$string = preg_replace('!</?(em|i)>!i', '/', $string);
$string = preg_replace('!</?(strong|b)>!i', '*', $string);
// Replace inline <a> tags with the text of link and a footnote.
// 'See <a href="http://mimemail.org">the Drupal site</a>' becomes
// 'See the Drupal site [1]' with the URL included as a footnote.
_mimemail_html_to_mail_urls(NULL, TRUE);
$pattern = '@(<a[^>]+?href="([^"]*)"[^>]*?>(.+?)</a>)@i';
$string = preg_replace_callback($pattern, '_mimemail_html_to_mail_urls', $string);
$urls = _mimemail_html_to_mail_urls();
$footnotes = '';
if (count($urls)) {
$footnotes .= "\n";
for ($i = 0, $max = count($urls); $i < $max; $i++) {
$footnotes .= '[' . ($i + 1) . '] ' . $urls[$i] . "\n";
}
}
// Split tags from text.
$split = preg_split('/<([^>]+?)>/', $string, -1, PREG_SPLIT_DELIM_CAPTURE);
// Note: PHP ensures the array consists of alternating delimiters and literals
// and begins and ends with a literal (inserting $null as required).
$tag = FALSE;
// Odd/even counter (tag or no tag)
$casing = NULL;
// Case conversion function
$output = '';
$indent = array();
// All current indentation string chunks
$lists = array();
// Array of counters for opened lists
foreach ($split as $value) {
$chunk = NULL;
// Holds a string ready to be formatted and output.
// Process HTML tags (but don't output any literally).
if ($tag) {
list($tagname) = explode(' ', strtolower($value), 2);
switch ($tagname) {
// List counters
case 'ul':
array_unshift($lists, '*');
break;
case 'ol':
array_unshift($lists, 1);
break;
case '/ul':
case '/ol':
array_shift($lists);
$chunk = '';
// Ensure blank new-line.
break;
// Quotation/list markers, non-fancy headers
case 'blockquote':
// Format=flowed indentation cannot be mixed with lists.
$indent[] = count($lists) ? ' "' : '>';
break;
case 'li':
$indent[] = is_numeric($lists[0]) ? ' ' . $lists[0]++ . ') ' : ' * ';
break;
case 'dd':
$indent[] = ' ';
break;
case 'h3':
$indent[] = '.... ';
break;
case 'h4':
$indent[] = '.. ';
break;
case '/blockquote':
if (count($lists)) {
// Append closing quote for inline quotes (immediately).
$output = rtrim($output, "> \n") . "\"\n";
$chunk = '';
// Ensure blank new-line.
}
// Fall-through
case '/li':
case '/dd':
array_pop($indent);
break;
case '/h3':
case '/h4':
array_pop($indent);
case '/h5':
case '/h6':
$chunk = '';
// Ensure blank new-line.
break;
// Fancy headers
case 'h1':
$indent[] = '======== ';
$casing = 'drupal_strtoupper';
break;
case 'h2':
$indent[] = '-------- ';
$casing = 'drupal_strtoupper';
break;
case '/h1':
case '/h2':
$casing = NULL;
// Pad the line with dashes.
$output = _mimemail_html_to_text_pad($output, $tagname == '/h1' ? '=' : '-', ' ');
array_pop($indent);
$chunk = '';
// Ensure blank new-line.
break;
// Horizontal rulers
case 'hr':
// Insert immediately.
$output .= mimemail_wrap_mail('', implode('', $indent)) . "\n";
$output = _mimemail_html_to_text_pad($output, '-');
break;
// Paragraphs and definition lists
case '/p':
case '/dl':
$chunk = '';
// Ensure blank new-line.
break;
}
}
else {
// Convert inline HTML text to plain text.
$value = trim(preg_replace('/\\s+/', ' ', decode_entities($value)));
if (strlen($value)) {
$chunk = $value;
}
}
// See if there is something waiting to be output.
if (isset($chunk)) {
// Apply any necessary case conversion.
if (isset($casing)) {
$chunk = $casing($chunk);
}
// Format it and apply the current indentation.
$output .= mimemail_wrap_mail($chunk, implode('', $indent)) . "\n";
// Remove non-quotation markers from indentation.
$indent = array_map('_mimemail_html_to_text_clean', $indent);
}
$tag = !$tag;
}
return $output . $footnotes;
}
/**
* Helper function for array_walk in mimemail_wrap_mail().
*
* Wraps words on a single line.
*/
function _mimemail_wrap_mail_line(&$line, $key, $values) {
// Use soft-breaks only for purely quoted or unindented text.
$line = wordwrap($line, 77 - $values['length'], $values['soft'] ? " \n" : "\n");
// Break really long words at the maximum width allowed.
$line = wordwrap($line, 996 - $values['length'], $values['soft'] ? " \n" : "\n");
}
/**
* Helper function for mimemail_html_to_text().
*
* Keeps track of URLs and replaces them with placeholder tokens.
*/
function _mimemail_html_to_mail_urls($match = NULL, $reset = FALSE) {
global $base_url, $base_path;
static $urls = array(), $regexp;
if ($reset) {
// Reset internal URL list.
$urls = array();
}
else {
if (empty($regexp)) {
$regexp = '@^' . preg_quote($base_path, '@') . '@';
}
if ($match) {
list(, , $url, $label) = $match;
// Ensure all URLs are absolute.
$urls[] = strpos($url, '://') ? $url : preg_replace($regexp, $base_url . '/', $url);
return $label . ' [' . count($urls) . ']';
}
}
return $urls;
}
/**
* Helper function for mimemail_wrap_mail() and mimemail_html_to_text().
*
* Replace all non-quotation markers from a given piece of indentation with spaces.
*/
function _mimemail_html_to_text_clean($indent) {
return preg_replace('/[^>]/', ' ', $indent);
}
/**
* Helper function for mimemail_html_to_text().
*
* Pad the last line with the given character.
*/
function _mimemail_html_to_text_pad($text, $pad, $prefix = '') {
// Remove last line break.
$text = substr($text, 0, -1);
// Calculate needed padding space and add it.
if (($p = strrpos($text, "\n")) === FALSE) {
$p = -1;
}
$n = max(0, 79 - (strlen($text) - $p));
// Add prefix and padding, and restore linebreak.
return $text . $prefix . str_repeat($pad, $n - strlen($prefix)) . "\n";
}
/**
* Scan input and make sure that all HTML tags are properly closed and nested.
*
* Copied from,Drupal 6 filter.module.
*/
function _mimemail_filter_htmlcorrector($text) {
// Prepare tag lists.
static $no_nesting, $single_use;
if (!isset($no_nesting)) {
// Tags which cannot be nested but are typically left unclosed.
$no_nesting = drupal_map_assoc(array(
'li',
'p',
));
// Single use tags in HTML4
$single_use = drupal_map_assoc(array(
'base',
'meta',
'link',
'hr',
'br',
'param',
'img',
'area',
'input',
'col',
'frame',
));
}
// Properly entify angles.
$text = preg_replace('!<([^a-zA-Z/])!', '<\\1', $text);
// Split tags from text.
$split = preg_split('/<([^>]+?)>/', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
// Note: PHP ensures the array consists of alternating delimiters and literals
// and begins and ends with a literal (inserting $null as required).
$tag = false;
// Odd/even counter. Tag or no tag.
$stack = array();
$output = '';
foreach ($split as $value) {
// Process HTML tags.
if ($tag) {
list($tagname) = explode(' ', strtolower($value), 2);
// Closing tag
if ($tagname[0] == '/') {
$tagname = substr($tagname, 1);
// Discard XHTML closing tags for single use tags.
if (!isset($single_use[$tagname])) {
// See if we possibly have a matching opening tag on the stack.
if (in_array($tagname, $stack)) {
// Close other tags lingering first.
do {
$output .= '</' . $stack[0] . '>';
} while (array_shift($stack) != $tagname);
}
// Otherwise, discard it.
}
}
else {
// See if we have an identical 'no nesting' tag already open and close it if found.
if (count($stack) && $stack[0] == $tagname && isset($no_nesting[$stack[0]])) {
$output .= '</' . array_shift($stack) . '>';
}
// Push non-single-use tags onto the stack
if (!isset($single_use[$tagname])) {
array_unshift($stack, $tagname);
}
else {
$value = rtrim($value, ' /') . ' /';
}
$output .= '<' . $value . '>';
}
}
else {
// Passthrough all text.
$output .= $value;
}
$tag = !$tag;
}
// Close remaining tags.
while (count($stack) > 0) {
$output .= '</' . array_shift($stack) . '>';
}
return $output;
}
Functions
Name | Description |
---|---|
mimemail_html_to_text | Transform an HTML string into plain text, preserving the structure of the markup. Useful for preparing the body of a node to be sent by e-mail. |
mimemail_wrap_mail | Perform format=flowed soft wrapping for mail (RFC 3676). |
_mimemail_filter_htmlcorrector | Scan input and make sure that all HTML tags are properly closed and nested. |
_mimemail_html_to_mail_urls | Helper function for mimemail_html_to_text(). |
_mimemail_html_to_text_clean | Helper function for mimemail_wrap_mail() and mimemail_html_to_text(). |
_mimemail_html_to_text_pad | Helper function for mimemail_html_to_text(). |
_mimemail_wrap_mail_line | Helper function for array_walk in mimemail_wrap_mail(). |