protected function MarkdownExtra::_hashHTMLBlocks_inMarkdown in Markdown 7
* Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags. * * * $indent is the number of space to be ignored when checking for code * blocks. This is important because if we don't take the indent into * account, something like this (which looks right) won't work as expected: * * <div> * <div markdown="1"> * Hello World. <-- Is this a Markdown code block or text? * </div> <-- Is this a Markdown code block or a real tag? * <div> * * If you don't like this, just don't indent the tag on which * you apply the markdown="1" attribute. * * * If $enclosing_tag_re is not empty, stops at the first unmatched closing * tag with that name. Nested tags supported. * * * If $span is true, text inside must treated as span. So any double * newline will be replaced by a single newline so that it does not create * paragraphs. * * Returns an array of that form: ( processed text , remaining text ) * *
Parameters
string $text: * @param integer $indent * @param string $enclosing_tag_re * @param boolean $span * @return array
2 calls to MarkdownExtra::_hashHTMLBlocks_inMarkdown()
- MarkdownExtra::hashHTMLBlocks in includes/
MarkdownExtra.php - * Hashify HTML Blocks and "clean tags". * * We only want to do this for block-level HTML tags, such as headers, * lists, and tables. That's because we still want to wrap <p>s around * "paragraphs" that are…
- MarkdownExtra::_hashHTMLBlocks_inHTML in includes/
MarkdownExtra.php - * Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags. * * * Calls $hash_method to convert any blocks. * * Stops when the first opening tag closes. * * $md_attr indicate if the use of the `markdown="1"` attribute is…
File
- includes/
MarkdownExtra.php, line 382
Class
- MarkdownExtra
- Markdown Extra Parser Class
Namespace
MichelfCode
protected function _hashHTMLBlocks_inMarkdown($text, $indent = 0, $enclosing_tag_re = '', $span = false) {
if ($text === '') {
return array(
'',
'',
);
}
// Regex to check for the presense of newlines around a block tag.
$newline_before_re = '/(?:^\\n?|\\n\\n)*$/';
$newline_after_re = '{
^ # Start of text following the tag.
(?>[ ]*<!--.*?-->)? # Optional comment.
[ ]*\\n # Must be followed by newline.
}xs';
// Regex to match any tag.
$block_tag_re = '{
( # $2: Capture whole tag.
</? # Any opening or closing tag.
(?> # Tag name.
' . $this->block_tags_re . ' |
' . $this->context_block_tags_re . ' |
' . $this->clean_tags_re . ' |
(?!\\s)' . $enclosing_tag_re . '
)
(?:
(?=[\\s"\'/a-zA-Z0-9]) # Allowed characters after tag name.
(?>
".*?" | # Double quotes (can contain `>`)
\'.*?\' | # Single quotes (can contain `>`)
.+? # Anything but quotes and `>`.
)*?
)?
> # End of tag.
|
<!-- .*? --> # HTML Comment
|
<\\?.*?\\?> | <%.*?%> # Processing instruction
|
<!\\[CDATA\\[.*?\\]\\]> # CData Block
' . (!$span ? ' # If not in span.
|
# Indented code block
(?: ^[ ]*\\n | ^ | \\n[ ]*\\n )
[ ]{' . ($indent + 4) . '}[^\\n]* \\n
(?>
(?: [ ]{' . ($indent + 4) . '}[^\\n]* | [ ]* ) \\n
)*
|
# Fenced code block marker
(?<= ^ | \\n )
[ ]{0,' . ($indent + 3) . '}(?:~{3,}|`{3,})
[ ]*
(?: \\.?[-_:a-zA-Z0-9]+ )? # standalone class name
[ ]*
(?: ' . $this->id_class_attr_nocatch_re . ' )? # extra attributes
[ ]*
(?= \\n )
' : '') . ' # End (if not is span).
|
# Code span marker
# Note, this regex needs to go after backtick fenced
# code blocks but it should also be kept outside of the
# "if not in span" condition adding backticks to the parser
`+
)
}xs';
$depth = 0;
// Current depth inside the tag tree.
$parsed = "";
// Parsed text that will be returned.
// Loop through every tag until we find the closing tag of the parent
// or loop until reaching the end of text if no parent tag specified.
do {
// Split the text using the first $tag_match pattern found.
// Text before pattern will be first in the array, text after
// pattern will be at the end, and between will be any catches made
// by the pattern.
$parts = preg_split($block_tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
// If in Markdown span mode, add a empty-string span-level hash
// after each newline to prevent triggering any block element.
if ($span) {
$void = $this
->hashPart("", ':');
$newline = "\n{$void}";
$parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
}
$parsed .= $parts[0];
// Text before current tag.
// If end of $text has been reached. Stop loop.
if (count($parts) < 3) {
$text = "";
break;
}
$tag = $parts[1];
// Tag to handle.
$text = $parts[2];
// Remaining text after current tag.
$tag_re = preg_quote($tag);
// For use in a regular expression.
// Check for: Fenced code block marker.
// Note: need to recheck the whole tag to disambiguate backtick
// fences from code spans
if (preg_match('{^\\n?([ ]{0,' . ($indent + 3) . '})(~{3,}|`{3,})[ ]*(?:\\.?[-_:a-zA-Z0-9]+)?[ ]*(?:' . $this->id_class_attr_nocatch_re . ')?[ ]*\\n?$}', $tag, $capture)) {
// Fenced code block marker: find matching end marker.
$fence_indent = strlen($capture[1]);
// use captured indent in re
$fence_re = $capture[2];
// use captured fence in re
if (preg_match('{^(?>.*\\n)*?[ ]{' . $fence_indent . '}' . $fence_re . '[ ]*(?:\\n|$)}', $text, $matches)) {
// End marker found: pass text unchanged until marker.
$parsed .= $tag . $matches[0];
$text = substr($text, strlen($matches[0]));
}
else {
// No end marker: just skip it.
$parsed .= $tag;
}
}
else {
if ($tag[0] == "\n" || $tag[0] == " ") {
// Indented code block: pass it unchanged, will be handled
// later.
$parsed .= $tag;
}
else {
if ($tag[0] == "`") {
// Find corresponding end marker.
$tag_re = preg_quote($tag);
if (preg_match('{^(?>.+?|\\n(?!\\n))*?(?<!`)' . $tag_re . '(?!`)}', $text, $matches)) {
// End marker found: pass text unchanged until marker.
$parsed .= $tag . $matches[0];
$text = substr($text, strlen($matches[0]));
}
else {
// Unmatched marker: just skip it.
$parsed .= $tag;
}
}
else {
if (preg_match('{^<(?:' . $this->block_tags_re . ')\\b}', $tag) || preg_match('{^<(?:' . $this->context_block_tags_re . ')\\b}', $tag) && preg_match($newline_before_re, $parsed) && preg_match($newline_after_re, $text)) {
// Need to parse tag and following text using the HTML parser.
list($block_text, $text) = $this
->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
// Make sure it stays outside of any paragraph by adding newlines.
$parsed .= "\n\n{$block_text}\n\n";
}
else {
if (preg_match('{^<(?:' . $this->clean_tags_re . ')\\b}', $tag) || $tag[1] == '!' || $tag[1] == '?') {
// Need to parse tag and following text using the HTML parser.
// (don't check for markdown attribute)
list($block_text, $text) = $this
->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
$parsed .= $block_text;
}
else {
if ($enclosing_tag_re !== '' && preg_match('{^</?(?:' . $enclosing_tag_re . ')\\b}', $tag)) {
// Increase/decrease nested tag count.
if ($tag[1] == '/') {
$depth--;
}
else {
if ($tag[strlen($tag) - 2] != '/') {
$depth++;
}
}
if ($depth < 0) {
// Going out of parent element. Clean up and break so we
// return to the calling function.
$text = $tag . $text;
break;
}
$parsed .= $tag;
}
else {
$parsed .= $tag;
}
}
}
}
}
}
} while ($depth >= 0);
return array(
$parsed,
$text,
);
}