protected function MarkdownExtra::_hashHTMLBlocks_inHTML in Markdown 7
* Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags. * * * Calls $hash_method to convert any blocks. * * Stops when the first opening tag closes. * * $md_attr indicate if the use of the `markdown="1"` attribute is allowed. * (it is not inside clean tags) * * Returns an array of that form: ( processed text , remaining text ) *
Parameters
string $text: * @param string $hash_method * @param string $md_attr * @return array
1 call to MarkdownExtra::_hashHTMLBlocks_inHTML()
- MarkdownExtra::_hashHTMLBlocks_inMarkdown in includes/
MarkdownExtra.php - * Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags. * * * $indent is the number of space to be ignored when checking for code * blocks. This is important because if we don't take the indent into * account,…
File
- includes/
MarkdownExtra.php, line 595
Class
- MarkdownExtra
- Markdown Extra Parser Class
Namespace
MichelfCode
protected function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
if ($text === '') {
return array(
'',
'',
);
}
// Regex to match `markdown` attribute inside of a tag.
$markdown_attr_re = '
{
\\s* # Eat whitespace before the `markdown` attribute
markdown
\\s*=\\s*
(?>
(["\']) # $1: quote delimiter
(.*?) # $2: attribute value
\\1 # matching delimiter
|
([^\\s>]*) # $3: unquoted attribute value
)
() # $4: make $3 always defined (avoid warnings)
}xs';
// Regex to match any tag.
$tag_re = '{
( # $2: Capture whole tag.
</? # Any opening or closing tag.
[\\w:$]+ # Tag name.
(?:
(?=[\\s"\'/a-zA-Z0-9]) # Allowed characters after tag name.
(?>
".*?" | # Double quotes (can contain `>`)
\'.*?\' | # Single quotes (can contain `>`)
.+? # Anything but quotes and `>`.
)*?
)?
> # End of tag.
|
<!-- .*? --> # HTML Comment
|
<\\?.*?\\?> | <%.*?%> # Processing instruction
|
<!\\[CDATA\\[.*?\\]\\]> # CData Block
)
}xs';
$original_text = $text;
// Save original text in case of faliure.
$depth = 0;
// Current depth inside the tag tree.
$block_text = "";
// Temporary text holder for current text.
$parsed = "";
// Parsed text that will be returned.
// Get the name of the starting tag.
// (This pattern makes $base_tag_name_re safe without quoting.)
if (preg_match('/^<([\\w:$]*)\\b/', $text, $matches)) {
$base_tag_name_re = $matches[1];
}
// Loop through every tag until we find the corresponding closing tag.
do {
// Split the text using the first $tag_match pattern found.
// Text before pattern will be first in the array, text after
// pattern will be at the end, and between will be any catches made
// by the pattern.
$parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
if (count($parts) < 3) {
// End of $text reached with unbalenced tag(s).
// In that case, we return original text unchanged and pass the
// first character as filtered to prevent an infinite loop in the
// parent function.
return array(
$original_text[0],
substr($original_text, 1),
);
}
$block_text .= $parts[0];
// Text before current tag.
$tag = $parts[1];
// Tag to handle.
$text = $parts[2];
// Remaining text after current tag.
// Check for: Auto-close tag (like <hr/>)
// Comments and Processing Instructions.
if (preg_match('{^</?(?:' . $this->auto_close_tags_re . ')\\b}', $tag) || $tag[1] == '!' || $tag[1] == '?') {
// Just add the tag to the block as if it was text.
$block_text .= $tag;
}
else {
// Increase/decrease nested tag count. Only do so if
// the tag's name match base tag's.
if (preg_match('{^</?' . $base_tag_name_re . '\\b}', $tag)) {
if ($tag[1] == '/') {
$depth--;
}
else {
if ($tag[strlen($tag) - 2] != '/') {
$depth++;
}
}
}
// Check for `markdown="1"` attribute and handle it.
if ($md_attr && preg_match($markdown_attr_re, $tag, $attr_m) && preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3])) {
// Remove `markdown` attribute from opening tag.
$tag = preg_replace($markdown_attr_re, '', $tag);
// Check if text inside this tag must be parsed in span mode.
$this->mode = $attr_m[2] . $attr_m[3];
$span_mode = $this->mode == 'span' || $this->mode != 'block' && preg_match('{^<(?:' . $this->contain_span_tags_re . ')\\b}', $tag);
// Calculate indent before tag.
if (preg_match('/(?:^|\\n)( *?)(?! ).*?$/', $block_text, $matches)) {
$strlen = $this->utf8_strlen;
$indent = $strlen($matches[1], 'UTF-8');
}
else {
$indent = 0;
}
// End preceding block with this tag.
$block_text .= $tag;
$parsed .= $this
->{$hash_method}($block_text);
// Get enclosing tag name for the ParseMarkdown function.
// (This pattern makes $tag_name_re safe without quoting.)
preg_match('/^<([\\w:$]*)\\b/', $tag, $matches);
$tag_name_re = $matches[1];
// Parse the content using the HTML-in-Markdown parser.
list($block_text, $text) = $this
->_hashHTMLBlocks_inMarkdown($text, $indent, $tag_name_re, $span_mode);
// Outdent markdown text.
if ($indent > 0) {
$block_text = preg_replace("/^[ ]{1,{$indent}}/m", "", $block_text);
}
// Append tag content to parsed text.
if (!$span_mode) {
$parsed .= "\n\n{$block_text}\n\n";
}
else {
$parsed .= "{$block_text}";
}
// Start over with a new block.
$block_text = "";
}
else {
$block_text .= $tag;
}
}
} while ($depth > 0);
// Hash last block text that wasn't processed inside the loop.
$parsed .= $this
->{$hash_method}($block_text);
return array(
$parsed,
$text,
);
}