class SmartyPants in Typogrify 8
SmartyPants - Smart punctuation for web sites.
By John Gruber <http://daringfireball.net>
PHP port by Michel Fortin <http://www.michelf.com/>
Copyright (c) 2003-2004 John Gruber Copyright (c) 2004-2005 Michel Fortin Copyright (c) 2012 Micha Glave - i18n-quote-style
Re-released under GPLv2 for Drupal.
Wrapped in a class for D8. Do we trust the GPLv2 notice above?
Hierarchy
- class \Drupal\typogrify\SmartyPants
Expanded class hierarchy of SmartyPants
2 files declare their use of SmartyPants
- Typogrify.php in src/
TwigExtension/ Typogrify.php - TypogrifyFilter.php in src/
Plugin/ Filter/ TypogrifyFilter.php
File
- src/
SmartyPants.php, line 22
Namespace
Drupal\typogrifyView source
class SmartyPants {
/**
* Fri 9 Dec 2005.
*/
const SMARTYPANTS_PHP_VERSION = '1.5.1e';
/**
* Fri 12 Mar 2004.
*/
const SMARTYPANTS_SYNTAX_VERSION = '1.5.1';
/**
* Regex-pattern for tags we don't mess with.
*/
const SMARTYPANTS_TAGS_TO_SKIP = '@<(/?)(?:pre|code|kbd|script|textarea|tt|math)[\\s>]@';
/**
* Current SmartyPants configuration setting.
*
* Change this to configure.
* 1 => "--" for em-dashes; no en-dash support
* 2 => "---" for em-dashes; "--" for en-dashes
* 3 => "--" for em-dashes; "---" for en-dashes
* See docs for more configuration options.
*
* @var string
*/
protected static $smartypantsAttr = "1";
/* -- Smarty Modifier Interface ------------------------------------------*/
/**
* Wrapper method for SmartyPants.
*
* @param string $text
* Text to be parsed.
* @param string $attr
* Value of the smart_quotes="" attribute.
*
* @return string
* The modified text.
*/
public static function modifierSmartypants($text, $attr = NULL) {
return self::process($text, $attr);
}
/**
* Returns a locale-specific array of quotes.
*
* @param string $langcode
* Optional. The language code, such as 'en' or 'fr', or the special string
* 'all'.
*
* @return array
* If $langcode is a recognized language code, return string[] the opening
* and closing quote characters.
* If $langcode is NULL or an unrecognized code, return string[] the opening
* and closing quote characters for English.
* If $langcode is 'all', return string[][] an array of such arrays, keyed
* by the recognized language codes.
*/
public static function i18nQuotes($langcode = NULL) {
// Ignore all english-equivalents served by fallback.
$quotes = [
// Arabic.
'ar' => [
'«',
'»',
'‹',
'›',
],
// Belarusian.
'be' => [
'«',
'»',
'„',
'“',
],
// Bulgarian.
'bg' => [
'„',
'“',
'‚',
'‘',
],
// Danish.
'da' => [
'»',
'«',
'›',
'‹',
],
// German.
'de' => [
'„',
'“',
'‚',
'‘',
],
// Greek.
'el' => [
'«',
'»',
'‹',
'›',
],
// English.
'en' => [
'“',
'”',
'‘',
'’',
],
// Esperanto.
'eo' => [
'“',
'”',
'“',
'”',
],
// Spanish.
'es' => [
'«',
'»',
'“',
'“',
],
// Estonian.
'et' => [
'„',
'“',
'„',
'“',
],
// Finnish.
'fi' => [
'”',
'”',
'’',
'’',
],
// French.
'fr' => [
'«',
'»',
'‹',
'›',
],
// Swiss German.
'gsw-berne' => [
'„',
'“',
'‚',
'‘',
],
// Hebrew.
'he' => [
'“',
'“',
'«',
'»',
],
// Croatian.
'hr' => [
'»',
'«',
'›',
'‹',
],
// Hungarian.
'hu' => [
'„',
'“',
'„',
'“',
],
// Icelandic.
'is' => [
'„',
'“',
'‚',
'‘',
],
// Italian.
'it' => [
'«',
'»',
'‘',
'’',
],
// Lithuanian.
'lt' => [
'„',
'“',
'‚',
'‘',
],
// Latvian.
'lv' => [
'„',
'“',
'„',
'“',
],
// Dutch.
'nl' => [
'„',
'”',
'‘',
'’',
],
// Norwegian.
'no' => [
'„',
'“',
'„',
'“',
],
// Polish.
'pl' => [
'„',
'”',
'«',
'»',
],
// Portuguese.
'pt' => [
'“',
'”',
'‘',
'’',
],
// Romanian.
'ro' => [
'„',
'“',
'«',
'»',
],
// Russian.
'ru' => [
'«',
'»',
'„',
'“',
],
// Slovak.
'sk' => [
'„',
'“',
'‚',
'‘',
],
// Slovenian.
'sl' => [
'„',
'“',
'‚',
'‘',
],
// Albanian.
'sq' => [
'«',
'»',
'‹',
'›',
],
// Serbian.
'sr' => [
'„',
'“',
'‚',
'‘',
],
// Swedish.
'sv' => [
'”',
'”',
'’',
'’',
],
// Turkish.
'tr' => [
'«',
'»',
'‹',
'›',
],
// Ukrainian.
'uk' => [
'«',
'»',
'„',
'“',
],
];
if ($langcode == 'all') {
return $quotes;
}
if (isset($quotes[$langcode])) {
return $quotes[$langcode];
}
return $quotes['en'];
}
/**
* SmartyPants.
*
* @param string $text
* Text to be parsed.
* @param string $attr
* Value of the smart_quotes="" attribute.
* @param string $ctx
* MT context object (unused).
*
* @return string
* The modified text.
*/
public static function process($text, $attr = NULL, $ctx = NULL) {
if ($attr == NULL) {
$attr = self::$smartypantsAttr;
}
// Options to specify which transformations to make.
$do_stupefy = FALSE;
// Should we translate " entities into normal quotes?
$convert_quot = 0;
// Parse attributes:
// 0 : do nothing
// 1 : set all
// 2 : set all, using old school en- and em- dash shortcuts
// 3 : set all, using inverted old school en and em- dash shortcuts
//
// q : quotes
// b : backtick quotes (``double'' and ,,double`` only)
// B : backtick quotes (``double'', ,,double``, ,single` and `single')
// d : dashes
// D : old school dashes
// i : inverted old school dashes
// e : ellipses
// w : convert " entities to " for Dreamweaver users.
if ($attr == "0") {
// Do nothing.
return $text;
}
elseif ($attr == "1") {
// Do everything, turn all options on.
$do_quotes = 2;
$do_backticks = 1;
$do_dashes = 1;
$do_ellipses = 1;
}
elseif ($attr == "2") {
// Do everything, turn all options on, use old school dash shorthand.
$do_quotes = 2;
$do_backticks = 1;
$do_dashes = 2;
$do_ellipses = 1;
}
elseif ($attr == "3") {
// Do everything, turn all options on,
// use inverted old school dash shorthand.
$do_quotes = 2;
$do_backticks = 1;
$do_dashes = 3;
$do_ellipses = 1;
}
elseif ($attr == "-1") {
// Special "stupefy" mode.
$do_stupefy = 1;
}
else {
$chars = preg_split('//', $attr);
foreach ($chars as $c) {
if ($c == "q") {
$do_quotes = 1;
}
elseif ($c == "Q") {
$do_quotes = 2;
}
elseif ($c == "b") {
$do_backticks = 1;
}
elseif ($c == "B") {
$do_backticks = 2;
}
elseif ($c == "d") {
$do_dashes = 1;
}
elseif ($c == "D") {
$do_dashes = 2;
}
elseif ($c == "i") {
$do_dashes = 3;
}
elseif ($c == "e") {
$do_ellipses = 1;
}
elseif ($c == "w") {
$convert_quot = 1;
}
}
}
if ($do_quotes == 2) {
$doc_lang = $ctx['langcode'];
}
else {
$doc_lang = 'en';
}
$tokens = self::tokenizeHtml($text);
$result = '';
// Keep track of when we're inside <pre> or <code> tags.
$in_pre = 0;
// This is a cheat, used to get some context
// for one-character tokens that consist of
// just a quote char. What we do is remember
// the last character of the previous text
// token, to use as context to curl single-
// character quote tokens correctly.
$prev_token_last_char = '';
foreach ($tokens as $cur_token) {
if ($cur_token[0] == 'tag') {
// Don't mess with quotes inside tags.
$result .= $cur_token[1];
if (preg_match(self::SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) {
$in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
}
else {
// Reading language from span.
if (preg_match('/<span .*(xml:)?lang="(..)"/', $cur_token[1], $matches)) {
$span_lang = $matches[2];
}
elseif ($cur_token[1] == '</span>') {
unset($span_lang);
}
}
}
else {
$t = $cur_token[1];
// Remember last char of this token before processing.
$last_char = mb_substr($t, -1);
if (!$in_pre) {
$quotes = self::i18nQuotes(isset($span_lang) ? $span_lang : $doc_lang);
$t = self::processEscapes($t);
if ($convert_quot) {
$t = preg_replace('/"/', '"', $t);
}
if ($do_dashes) {
if ($do_dashes == 1) {
$t = self::educateDashes($t);
}
elseif ($do_dashes == 2) {
$t = self::educateDashesOldSchool($t);
}
elseif ($do_dashes == 3) {
$t = self::educateDashesOldSchoolInverted($t);
}
}
if ($do_ellipses) {
$t = self::educateEllipses($t);
}
// Note: backticks need to be processed before quotes.
if ($do_backticks) {
$t = self::educateBackticks($t);
if ($do_backticks == 2) {
$t = self::educateSingleBackticks($t);
}
}
if ($do_quotes) {
$t = self::educateBackticks($t);
if ($t == "'") {
// Special case: single-character ' token.
if (preg_match('/\\S/', $prev_token_last_char)) {
$t = $quotes[3];
}
else {
$t = $quotes[2];
}
}
elseif ($t == '"') {
// Special case: single-character " token.
if (preg_match('/\\S/', $prev_token_last_char)) {
$t = $quotes[1];
}
else {
$t = $quotes[0];
}
}
else {
// Normal case:
$t = self::educateQuotes($t, $quotes);
}
}
if ($do_stupefy) {
$t = self::stupefyEntities($t);
}
}
$prev_token_last_char = $last_char;
$result .= $t;
}
}
return $result;
}
/**
* SmartQuotes.
*
* @param string $text
* Text to be parsed.
* @param string $attr
* Value of the smart_quotes="" attribute.
* @param string $ctx
* MT context object (unused).
*
* @return string
* $text with replacements.
*/
protected static function smartQuotes($text, $attr = NULL, $ctx = NULL) {
if ($attr == NULL) {
$attr = self::$smartypantsAttr;
}
// Should we educate ``backticks'' -style quotes?
$do_backticks;
if ($attr == 0) {
// Do nothing.
return $text;
}
elseif ($attr == 2) {
// Smarten ``backticks'' -style quotes.
$do_backticks = 1;
}
else {
$do_backticks = 0;
}
// Special case to handle quotes at the very end of $text when preceded by
// an HTML tag. Add a space to give the quote education algorithm a bit of
// context, so that it can guess correctly that it's a closing quote:
$add_extra_space = 0;
if (preg_match("/>['\"]\\z/", $text)) {
// Remember, so we can trim the extra space later.
$add_extra_space = 1;
$text .= " ";
}
$tokens = self::tokenizeHtml($text);
$result = '';
// Keep track of when we're inside <pre> or <code> tags.
$in_pre = 0;
// This is a cheat, used to get some context
// for one-character tokens that consist of
// just a quote char. What we do is remember
// the last character of the previous text
// token, to use as context to curl single-
// character quote tokens correctly.
$prev_token_last_char = "";
foreach ($tokens as $cur_token) {
if ($cur_token[0] == "tag") {
// Don't mess with quotes inside tags.
$result .= $cur_token[1];
if (preg_match(self::SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) {
$in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
}
}
else {
$t = $cur_token[1];
// Remember last char of this token before processing.
$last_char = mb_substr($t, -1);
if (!$in_pre) {
$t = self::processEscapes($t);
if ($do_backticks) {
$t = self::educateBackticks($t);
}
if ($t == "'") {
// Special case: single-character ' token.
if (preg_match('/\\S/', $prev_token_last_char)) {
$t = "’";
}
else {
$t = "‘";
}
}
elseif ($t == '"') {
// Special case: single-character " token.
if (preg_match('/\\S/', $prev_token_last_char)) {
$t = "”";
}
else {
$t = "“";
}
}
else {
// Normal case:
$t = self::educateQuotes($t);
}
}
$prev_token_last_char = $last_char;
$result .= $t;
}
}
if ($add_extra_space) {
// Trim trailing space if we added one earlier.
preg_replace('/ \\z/', '', $result);
}
return $result;
}
/**
* SmartDashes.
*
* @param string $text
* Text to be parsed.
* @param string $attr
* Value of the smart_quotes="" attribute.
* @param string $ctx
* MT context object (unused).
*
* @return string
* $text with replacements.
*/
protected static function smartDashes($text, $attr = NULL, $ctx = NULL) {
if ($attr == NULL) {
$attr = self::$smartypantsAttr;
}
// Reference to the subroutine to use for dash education,
// default to educateDashes:
$dash_sub_ref = 'educateDashes';
if ($attr == 0) {
// Do nothing.
return $text;
}
elseif ($attr == 2) {
// Use old smart dash shortcuts, "--" for en, "---" for em.
$dash_sub_ref = 'educateDashesOldSchool';
}
elseif ($attr == 3) {
// Inverse of 2, "--" for em, "---" for en.
$dash_sub_ref = 'educateDashesOldSchoolInverted';
}
$tokens;
$tokens = self::tokenizeHtml($text);
$result = '';
// Keep track of when we're inside <pre> or <code> tags.
$in_pre = 0;
foreach ($tokens as $cur_token) {
if ($cur_token[0] == "tag") {
// Don't mess with quotes inside tags.
$result .= $cur_token[1];
if (preg_match(self::SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) {
$in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
}
}
else {
$t = $cur_token[1];
if (!$in_pre) {
$t = self::processEscapes($t);
$t = self::$dash_sub_ref($t);
}
$result .= $t;
}
}
return $result;
}
/**
* SmartEllipses.
*
* @param string $text
* Text to be parsed.
* @param string $attr
* Value of the smart_quotes="" attribute.
* @param string $ctx
* MT context object (unused).
*
* @return string
* $text with replacements.
*/
protected static function smartEllipses($text, $attr = NULL, $ctx = NULL) {
if ($attr == NULL) {
$attr = self::$smartypantsAttr;
}
if ($attr == 0) {
// Do nothing.
return $text;
}
$tokens;
$tokens = self::tokenizeHtml($text);
$result = '';
// Keep track of when we're inside <pre> or <code> tags.
$in_pre = 0;
foreach ($tokens as $cur_token) {
if ($cur_token[0] == "tag") {
// Don't mess with quotes inside tags.
$result .= $cur_token[1];
if (preg_match(self::SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) {
$in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
}
}
else {
$t = $cur_token[1];
if (!$in_pre) {
$t = self::processEscapes($t);
$t = self::educateEllipses($t);
}
$result .= $t;
}
}
return $result;
}
/**
* Replaces '=' with '­' for easier manual hyphenating.
*
* @param string $text
* The string to prettify.
*
* @return string
* The modified text.
*/
public static function hyphenate($text) {
$tokens;
$tokens = self::tokenizeHtml($text);
$equal_finder = '/(\\p{L})=(\\p{L})/u';
$result = '';
// Keep track of when we're inside <pre> or <code> tags.
$in_pre = 0;
foreach ($tokens as $cur_token) {
if ($cur_token[0] == "tag") {
// Don't mess with quotes inside tags.
$result .= $cur_token[1];
if (preg_match(self::SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) {
$in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
}
}
else {
$t = $cur_token[1];
if (!$in_pre) {
$t = preg_replace($equal_finder, '\\1­\\2', $t);
}
$result .= $t;
}
}
return $result;
}
/**
* Adding space in numbers for easier reading aka digit grouping.
*
* @param string $text
* The string to prettify.
* @param int $attr
* The kind of space to use.
* @param array $ctx
* Not used.
*
* @return string
* The modified text.
*/
public static function smartNumbers($text, $attr = 0, array $ctx = NULL) {
$tokens;
$tokens = self::tokenizeHtml($text);
if ($attr == 0) {
return $text;
}
elseif ($attr == 1) {
$method = 'numberNarrownbsp';
}
elseif ($attr == 2) {
$method = 'numberThinsp';
}
elseif ($attr == 3) {
$method = 'numberSpan';
}
elseif ($attr == 4) {
$method = 'numberJustSpan';
}
$result = '';
// Keep track of when we're inside <pre> or <code> tags.
$in_pre = 0;
$span_stop = 0;
foreach ($tokens as $cur_token) {
if ($cur_token[0] == "tag") {
// Don't mess with quotes inside tags.
$result .= $cur_token[1];
if (preg_match(self::SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) {
$in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
}
elseif (preg_match('/<span .*class="[^"]*\\b(number|phone|ignore)\\b[^"]*"/', $cur_token[1], $matches)) {
$span_stop = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
}
elseif ($cur_token[1] == '</span>') {
$span_stop = 0;
}
}
else {
$t = $cur_token[1];
if (!$in_pre && !$span_stop) {
$number_finder = '@(?:(&#\\d{2,4};|&(#x[0-9a-fA-F]{2,4}|frac\\d\\d);)|' . '(\\d{4}-\\d\\d-\\d\\d)|(\\d\\d\\.\\d\\d\\.\\d{4})|' . '(0[ \\d\\-/]+)|([+-]?\\d+)([.,]\\d+|))@';
$t = preg_replace_callback($number_finder, [
__CLASS__,
$method,
], $t);
}
$result .= $t;
}
}
return $result;
}
/**
* Internal: wraps and inserts space in numbers.
*
* @param array $hit
* A matcher-array from preg_replace_callback.
* @param string $thbl
* The kind of whitespace to add.
*
* @return string
* The first non-empty numeric string in $hit. Depending on where in the
* array it is, add the $thbl whitespace and wrap in a <span>.
*/
protected static function numberReplacer(array $hit, $thbl) {
if ($hit[1] != '') {
// Html-entity number: don't touch.
return $hit[1];
}
elseif ($hit[2] != '') {
// Html-entity number: don't touch.
return $hit[2];
}
elseif ($hit[3] != '') {
// Don't format german phone-numbers.
return $hit[3];
}
elseif ($hit[4] != '') {
// Date -`iso don't touch.
return $hit[4];
}
elseif ($hit[5] != '') {
// Date -`dd.mm.yyyy don't touch.
return $hit[5];
}
if (preg_match('/[+-]?\\d{5,}/', $hit[6]) == 1) {
$dec = preg_replace('/[+-]?\\d{1,3}(?=(\\d{3})+(?!\\d))/', '\\0' . $thbl, $hit[6]);
}
else {
$dec = $hit[6];
}
$frac = preg_replace('/\\d{3}/', '\\0' . $thbl, $hit[7]);
return '<span class="number">' . $dec . $frac . '</span>';
}
/**
* Wrapping numbers and adding whitespace '&narrownbsp;'.
*
* @param array $hit
* A matcher-array from preg_replace_callback.
*
* @return string
* The first non-empty numeric string in $hit. Depending on where in the
* array it is, add whitespace and wrap in a <span>.
*/
protected static function numberNarrownbsp(array $hit) {
return self::numberReplacer($hit, ' ');
}
/**
* Wrapping numbers and adding whitespace ' '.
*
* @param array $hit
* A matcher-array from preg_replace_callback.
*
* @return string
* The first non-empty numeric string in $hit. Depending on where in the
* array it is, add whitespace and wrap in a <span>.
*/
protected static function numberThinsp(array $hit) {
return self::numberReplacer($hit, ' ');
}
/**
* Wrapping numbers and adding whitespace by setting margin-left in a span.
*
* @param array $hit
* A matcher-array from preg_replace_callback.
*
* @return string
* The first non-empty numeric string in $hit. Depending on where in the
* array it is, add whitespace and wrap in a <span>.
*/
protected static function numberSpan(array $hit) {
$thbl = '<span style="margin-left:0.167em"></span>';
return self::numberReplacer($hit, $thbl);
}
/**
* Wrapping numbers and adding whitespace by setting margin-left in a span.
*
* @param array $hit
* A matcher-array from preg_replace_callback.
*
* @return string
* The first non-empty numeric string in $hit. Depending on where in the
* array it is, add whitespace and wrap in a <span>.
*/
protected static function numberJustSpan(array $hit) {
return self::numberReplacer($hit, '');
}
/**
* Wrapping abbreviations and adding half space between digit grouping.
*
* @param string $text
* The string to prettify.
* @param int $attr
* The kind of space to use.
* @param array $ctx
* Not used.
*
* @return string
* The modified text.
*/
public static function smartAbbreviation($text, $attr = 0, array $ctx = NULL) {
$tokens;
$tokens = self::tokenizeHtml($text);
$replace_method = 'abbrAsis';
if ($attr == 1) {
$replace_method = 'abbrNarrownbsp';
}
elseif ($attr == 2) {
$replace_method = 'abbrThinsp';
}
elseif ($attr == 3) {
$replace_method = 'abbrSpan';
}
$result = '';
// Keep track of when we're inside <pre> or <code> tags.
$in_pre = 0;
foreach ($tokens as $cur_token) {
if ($cur_token[0] == "tag") {
// Don't mess with quotes inside tags.
$result .= $cur_token[1];
if (preg_match(self::SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) {
$in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
}
}
else {
$t = $cur_token[1];
if (!$in_pre) {
$abbr_finder = '/(?<=\\s|)(\\p{L}+\\.)(\\p{L}+\\.)+(?=\\s|)/u';
$t = preg_replace_callback($abbr_finder, [
__CLASS__,
$replace_method,
], $t);
}
$result .= $t;
}
}
return $result;
}
/**
* Wrapping abbreviations without adding whitespace.
*
* @param array $hit
* A matcher-array from preg_replace_callback.
*
* @return string
* The first element of the array, wrapped in a span.
*/
public static function abbrAsis(array $hit) {
return '<span class="abbr">' . $hit[0] . '</span>';
}
/**
* Wrapping abbreviations adding whitespace ' '.
*
* @param array $hit
* A matcher-array from preg_replace_callback.
*
* @return string
* The first element of the array, wrapped in a span.
*/
protected static function abbrThinsp(array $hit) {
$res = preg_replace('/\\.(\\p{L})/u', '. \\1', $hit[0]);
return '<span class="abbr">' . $res . '</span>';
}
/**
* Wrapping abbreviations adding whitespace '&narrownbsp;'.
*
* @param array $hit
* A matcher-array from preg_replace_callback.
*
* @return string
* The first element of the array, wrapped in a span.
*/
protected static function abbrNarrownbsp(array $hit) {
$res = preg_replace('/\\.(\\p{L})/u', '. \\1', $hit[0]);
return '<span class="abbr">' . $res . '</span>';
}
/**
* Wrapping abbreviations adding whitespace by setting margin-left in a span.
*
* @param array $hit
* A matcher-array from preg_replace_callback.
*
* @return string
* The first element of the array, wrapped in a span.
*/
protected static function abbrSpan(array $hit) {
$thbl = '.<span style="margin-left:0.167em"><span style="display:none"> </span></span>';
$res = preg_replace('/\\.(\\p{L})/u', $thbl . '\\1', $hit[0]);
return '<span class="abbr">' . $res . '</span>';
}
/**
* Wrapping ampersands.
*
* @param string $text
* The text to work on.
*
* @return string
* The text with '&' characters replaced, except when inside <pre> or <code>
* tags.
*/
public static function smartAmpersand($text) {
$tokens;
$tokens = self::tokenizeHtml($text);
$result = '';
// Keep track of when we're inside <pre> or <code> tags.
$in_pre = 0;
foreach ($tokens as $cur_token) {
if ($cur_token[0] == "tag") {
// Don't mess with quotes inside tags.
$result .= $cur_token[1];
if (preg_match(self::SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) {
$in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
}
}
else {
$t = $cur_token[1];
if (!$in_pre) {
$t = Typogrify::amp($t);
}
$result .= $t;
}
}
return $result;
}
/**
* EducatedQuotes.
*
* Example input: "Isn't this fun?"
* Example output: [0]Isn’t this fun?[1];
*
* @param string $_
* Input text.
* @param string[] $quotes
* An array of four quote characters: open/close, single/double.
*
* @return string
* The string, with "educated" curly quote HTML entities.
*/
protected static function educateQuotes($_, array $quotes) {
// Make our own "punctuation" character class, because the POSIX-style
// [:PUNCT:] is only available in Perl 5.6 or later:
$punct_class = "[!\"#\\\$\\%'()*+,-.\\/:;<=>?\\@\\[\\\\\\]\\^_`{|}~]";
// Special case if the very first character is a quote
// followed by punctuation at a non-word-break.
// Close the quotes by brute force:
$_ = preg_replace([
"/^'(?={$punct_class}\\B)/",
"/^\"(?={$punct_class}\\B)/",
], [
$quotes[3],
$quotes[1],
], $_);
// Special case for double sets of quotes, e.g.:
// <p>They said, "'Quoted' words in a larger quote."</p>.
$spacer = ' ';
$_ = preg_replace([
"/\"'(?=\\p{L})/u",
"/'\"(?=\\p{L})/u",
"/(\\p{L})\"'/u",
"/(\\p{L})'\"/u",
], [
$quotes[0] . $spacer . $quotes[2],
$quotes[2] . $spacer . $quotes[0],
'\\1' . $quotes[1] . $spacer . $quotes[3],
'\\1' . $quotes[3] . $spacer . $quotes[1],
], $_);
// Special case for decade abbreviations (the '80s):
$_ = preg_replace("/'(?=\\d{2}s)/", '’', $_);
// Special case for apostroph.
$_ = preg_replace("/(\\p{L}|\\p{N})(')(?=\\p{L}|\$)/u", '\\1’', $_);
$close_class = '[^\\ \\t\\r\\n\\[\\{\\(\\-]';
$dec_dashes = '&\\#8211;|&\\#8212;';
// Get most opening single quotes:
$_ = preg_replace("{\n (\n \\s | # a whitespace char, or\n | # a non-breaking space entity, or\n -- | # dashes, or\n &[mn]dash; | # named dash entities\n {$dec_dashes} | # or decimal entities\n &\\#x201[34]; # or hex\n )\n ' # the quote\n (?=\\p{L}) # followed by a word character\n }x", '\\1' . $quotes[2], $_);
// Single closing quotes:
$_ = preg_replace("{\n ({$close_class})?\n '\n (?(1)| # If \$1 captured, then do nothing;\n (?=\\s | s\\b) # otherwise, positive lookahead for a whitespace\n ) # char or an 's' at a word ending position. This\n # is a special case to handle something like:\n # \"<i>Custer</i>'s Last Stand.\"\n }xi", '\\1' . $quotes[3], $_);
// Any remaining single quotes should be opening ones:
$_ = str_replace("'", $quotes[2], $_);
// Get most opening double quotes:
$_ = preg_replace("{\n (\n \\s | # a whitespace char, or\n | # a non-breaking space entity, or\n -- | # dashes, or\n &[mn]dash; | # named dash entities\n {$dec_dashes} | # or decimal entities\n &\\#x201[34]; # or hex\n )\n \" # the quote\n (?=\\p{L}) # followed by a word character\n }x", '\\1' . $quotes[0], $_);
// Double closing quotes:
$_ = preg_replace("{\n ({$close_class})?\n \"\n (?(1)|(?=\\s)) # If \$1 captured, then do nothing;\n # if not, then make sure the next char is whitespace.\n }x", '\\1' . $quotes[1], $_);
// Any remaining quotes should be opening ones.
$_ = str_replace('"', $quotes[0], $_);
return $_;
}
/**
* Educate backticks.
*
* Example input: ``Isn't this fun?''
* Example output: “Isn't this fun?”.
*
* @param string $_
* Input text.
*
* @return string
* The string, with ``backticks'' -style double quotes
* translated into HTML curly quote entities.
*/
protected static function educateBackticks($_) {
$_ = str_replace([
"``",
"''",
",,",
], [
'“',
'”',
'„',
], $_);
return $_;
}
/**
* Educate single backticks.
*
* Example input: `Isn't this fun?'
* Example output: ‘Isn’t this fun?’
*
* @param string $_
* Input text.
*
* @return string
* The string, with `backticks' -style single quotes
* translated into HTML curly quote entities.
*/
protected static function educateSingleBackticks($_) {
$_ = str_replace([
"`",
"'",
], [
'‘',
'’',
], $_);
return $_;
}
/**
* Educate dashes.
*
* Example input: `Isn't this fun?'
* Example output: ‘Isn’t this fun?’
*
* @param string $_
* Input text.
*
* @return string
* The string, with each instance of "--" translated to
* an em-dash HTML entity.
*/
protected static function educateDashes($_) {
$_ = str_replace('--', '—', $_);
return $_;
}
/**
* EducateDashesOldSchool.
*
* @param string $_
* Input text.
*
* @return string
* The string, with each instance of "--" translated to
* an en-dash HTML entity, and each "---" translated to
* an em-dash HTML entity.
*/
protected static function educateDashesOldSchool($_) {
$_ = str_replace([
"---",
"--",
], [
'—',
'–',
], $_);
return $_;
}
/**
* EducateDashesOldSchoolInverted.
*
* @param string $_
* Input text.
*
* @return string
* The string, with each instance of "--" translated to an em-dash HTML
* entity, and each "---" translated to an en-dash HTML entity. Two
* reasons why: First, unlike the en- and em-dash syntax supported by
* educateDashesOldSchool(), it's compatible with existing entries written
* before SmartyPants 1.1, back when "--" was only used for em-dashes.
* Second, em-dashes are more common than en-dashes, and so it sort of makes
* sense that the shortcut should be shorter to type. (Thanks to Aaron
* Swartz for the idea.)
*/
public static function educateDashesOldSchoolInverted($_) {
// Dashes en em.
$_ = str_replace([
"---",
"--",
], [
'–',
'—',
], $_);
return $_;
}
/**
* EducateEllipses.
*
* Example input: Huh...?
* Example output: Huh…?
*
* @param string $_
* Input text.
*
* @return string
* The string, with each instance of "..." translated to
* an ellipsis HTML entity. Also converts the case where
* there are spaces between the dots.
*/
protected static function educateEllipses($_) {
$_ = str_replace([
"...",
". . .",
], '…', $_);
return $_;
}
/**
* StupefyEntities.
*
* Example input: “Hello — world.”
* Example output: "Hello -- world."
*
* @param string $_
* Input text.
*
* @return string
* The string, with each SmartyPants HTML entity translated to
* it's ASCII counterpart.
*/
protected static function stupefyEntities($_) {
// Dashes en-dash em-dash.
$_ = str_replace([
'–',
'—',
], [
'-',
'--',
], $_);
// Single quote open close.
$_ = str_replace([
'‘',
'’',
'‚',
], "'", $_);
// Double quote open close.
$_ = str_replace([
'“',
'”',
'„',
], '"', $_);
// Ellipsis.
$_ = str_replace('…', '...', $_);
return $_;
}
/**
* Process escapes.
*
* Escape Value
* ------ -----
* \\ \
* \" "
* \' '
* \. .
* \- -
* \` `
* \, ,
*
* @param string $_
* Input text.
*
* @return string
* The string, with after processing the following backslash
* escape sequences. This is useful if you want to force a "dumb"
* quote or other character to appear.
*/
public static function processEscapes($_) {
$_ = str_replace([
'\\\\',
'\\"',
"\\'",
'\\.',
'\\-',
'\\`',
'\\,',
'<',
'>',
'"',
''',
], [
'\',
'"',
''',
'.',
'-',
'`',
',',
'<',
'>',
'"',
'\'',
], $_);
return $_;
}
/**
* Space to nbsp.
*
* Replaces the space before a "double punctuation mark" (!?:;) with
* `` ``
* Especially useful in french.
*
* @param string $text
* The text to work on.
*
* @return string
* The modified text.
*/
public static function spaceToNbsp($text) {
$tokens;
$tokens = self::tokenizeHtml($text);
$result = '';
// Keep track of when we're inside <pre> or <code> tags.
$in_pre = 0;
foreach ($tokens as $cur_token) {
if ($cur_token[0] == "tag") {
// Don't mess with quotes inside tags.
$result .= $cur_token[1];
if (preg_match(self::SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) {
$in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
}
}
else {
$t = $cur_token[1];
if (!$in_pre) {
$t = preg_replace("/\\s([\\!\\?\\:;])/", ' $1', $t);
}
$result .= $t;
}
}
return $result;
}
/**
* Space hyphens.
*
* Replaces a normal dash with em-dash between whitespaces.
*
* @param string $text
* The text to work on.
*
* @return string
* The modified text.
*/
public static function spaceHyphens($text) {
$tokens;
$tokens = self::tokenizeHtml($text);
$result = '';
// Keep track of when we're inside <pre> or <code> tags.
$in_pre = 0;
foreach ($tokens as $cur_token) {
if ($cur_token[0] == "tag") {
// Don't mess with quotes inside tags.
$result .= $cur_token[1];
if (preg_match(self::SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) {
$in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
}
}
else {
$t = $cur_token[1];
if (!$in_pre) {
$t = preg_replace("/\\s(-{1,3})\\s/", ' — ', $t);
}
$result .= $t;
}
}
return $result;
}
/**
* Fallback Tokenizer if Markdown not present.
*
* Regular expression derived from the _tokenize() subroutine in
* Brad Choate's MTRegex plugin.
* <http://www.bradchoate.com/past/mtregex.php>
*
* @param string $str
* String containing HTML markup.
*
* @return array
* An array of the tokens comprising the input string. Each token is either
* a tag (possibly with nested, tags contained therein, such as
* <a href="<MTFoo>" />, or a run of text between tags. Each element of the
* array is a two-element array; the first is either 'tag' or 'text'; the
* second is the actual value.
*/
public static function tokenizeHtml($str) {
$index = 0;
$tokens = [];
// Comment
// Processing instruction
// Regular tags.
$match = '(?s:<!(?:--.*?--\\s*)+>)|';
$match .= '(?s:<\\?.*?\\?>)|';
$match .= '(?:<[/!$]?[-a-zA-Z0-9:]+\\b(?>[^"\'>]+|"[^"]*"|\'[^\']*\')*>)';
$parts = preg_split("{({$match})}", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
foreach ($parts as $part) {
if (++$index % 2 && $part != '') {
$tokens[] = [
'text',
$part,
];
}
else {
$tokens[] = [
'tag',
$part,
];
}
}
return $tokens;
}
}
Members
Name | Modifiers | Type | Description | Overrides |
---|---|---|---|---|
SmartyPants:: |
protected static | property | Current SmartyPants configuration setting. | |
SmartyPants:: |
public static | function | Wrapping abbreviations without adding whitespace. | |
SmartyPants:: |
protected static | function | Wrapping abbreviations adding whitespace '&narrownbsp;'. | |
SmartyPants:: |
protected static | function | Wrapping abbreviations adding whitespace by setting margin-left in a span. | |
SmartyPants:: |
protected static | function | Wrapping abbreviations adding whitespace ' '. | |
SmartyPants:: |
protected static | function | Educate backticks. | |
SmartyPants:: |
protected static | function | Educate dashes. | |
SmartyPants:: |
protected static | function | EducateDashesOldSchool. | |
SmartyPants:: |
public static | function | EducateDashesOldSchoolInverted. | |
SmartyPants:: |
protected static | function | EducateEllipses. | |
SmartyPants:: |
protected static | function | EducatedQuotes. | |
SmartyPants:: |
protected static | function | Educate single backticks. | |
SmartyPants:: |
public static | function | Replaces '=' with '­' for easier manual hyphenating. | |
SmartyPants:: |
public static | function | Returns a locale-specific array of quotes. | |
SmartyPants:: |
public static | function | Wrapper method for SmartyPants. | |
SmartyPants:: |
protected static | function | Wrapping numbers and adding whitespace by setting margin-left in a span. | |
SmartyPants:: |
protected static | function | Wrapping numbers and adding whitespace '&narrownbsp;'. | |
SmartyPants:: |
protected static | function | Internal: wraps and inserts space in numbers. | |
SmartyPants:: |
protected static | function | Wrapping numbers and adding whitespace by setting margin-left in a span. | |
SmartyPants:: |
protected static | function | Wrapping numbers and adding whitespace ' '. | |
SmartyPants:: |
public static | function | SmartyPants. | |
SmartyPants:: |
public static | function | Process escapes. | |
SmartyPants:: |
public static | function | Wrapping abbreviations and adding half space between digit grouping. | |
SmartyPants:: |
public static | function | Wrapping ampersands. | |
SmartyPants:: |
protected static | function | SmartDashes. | |
SmartyPants:: |
protected static | function | SmartEllipses. | |
SmartyPants:: |
public static | function | Adding space in numbers for easier reading aka digit grouping. | |
SmartyPants:: |
protected static | function | SmartQuotes. | |
SmartyPants:: |
constant | Fri 9 Dec 2005. | ||
SmartyPants:: |
constant | Fri 12 Mar 2004. | ||
SmartyPants:: |
constant | Regex-pattern for tags we don't mess with. | ||
SmartyPants:: |
public static | function | Space hyphens. | |
SmartyPants:: |
public static | function | Space to nbsp. | |
SmartyPants:: |
protected static | function | StupefyEntities. | |
SmartyPants:: |
public static | function | Fallback Tokenizer if Markdown not present. |