You are here

function MarkdownExtra_Parser::_hashHTMLBlocks_inMarkdown in Markdown 5

Same name and namespace in other branches
  1. 6 markdown.php \MarkdownExtra_Parser::_hashHTMLBlocks_inMarkdown()
2 calls to MarkdownExtra_Parser::_hashHTMLBlocks_inMarkdown()
MarkdownExtra_Parser::hashHTMLBlocks in ./markdown.php
MarkdownExtra_Parser::_hashHTMLBlocks_inHTML in ./markdown.php

File

./markdown.php, line 1772

Class

MarkdownExtra_Parser

Code

function _hashHTMLBlocks_inMarkdown($text, $indent = 0, $enclosing_tag_re = '', $span = false) {

  #

  # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.

  #

  # *   $indent is the number of space to be ignored when checking for code

  #     blocks. This is important because if we don't take the indent into

  #     account, something like this (which looks right) won't work as expected:

  #

  #     <div>

  #         <div markdown="1">

  #         Hello World.  <-- Is this a Markdown code block or text?

  #         </div>  <-- Is this a Markdown code block or a real tag?

  #     <div>

  #

  #     If you don't like this, just don't indent the tag on which

  #     you apply the markdown="1" attribute.

  #

  # *   If $enclosing_tag_re is not empty, stops at the first unmatched closing

  #     tag with that name. Nested tags supported.

  #

  # *   If $span is true, text inside must treated as span. So any double

  #     newline will be replaced by a single newline so that it does not create

  #     paragraphs.

  #

  # Returns an array of that form: ( processed text , remaining text )

  #
  if ($text === '') {
    return array(
      '',
      '',
    );
  }

  # Regex to check for the presense of newlines around a block tag.
  $newline_before_re = '/(?:^\\n?|\\n\\n)*$/';
  $newline_after_re = '{
				^						# Start of text following the tag.
				(?>[ ]*<!--.*?-->)?		# Optional comment.
				[ ]*\\n					# Must be followed by newline.
			}xs';

  # Regex to match any tag.
  $block_tag_re = '{
				(					# $2: Capture hole tag.
					</?					# Any opening or closing tag.
						(?>				# Tag name.
							' . $this->block_tags_re . '			|
							' . $this->context_block_tags_re . '	|
							' . $this->clean_tags_re . '        	|
							(?!\\s)' . $enclosing_tag_re . '
						)
						(?:
							(?=[\\s"\'/])		# Allowed characters after tag name.
							(?>
								".*?"		|	# Double quotes (can contain `>`)
								\'.*?\'   	|	# Single quotes (can contain `>`)
								.+?				# Anything but quotes and `>`.
							)*?
						)?
					>					# End of tag.
				|
					<!--    .*?     -->	# HTML Comment
				|
					<\\?.*?\\?> | <%.*?%>	# Processing instruction
				|
					<!\\[CDATA\\[.*?\\]\\]>	# CData Block
				|
					# Code span marker
					`+
				' . (!$span ? ' # If not in span.
				|
					# Indented code block
					(?> ^[ ]*\\n? | \\n[ ]*\\n )
					[ ]{' . ($indent + 4) . '}[^\\n]* \\n
					(?>
						(?: [ ]{' . ($indent + 4) . '}[^\\n]* | [ ]* ) \\n
					)*
				|
					# Fenced code block marker
					(?> ^ | \\n )
					[ ]{' . $indent . '}~~~+[ ]*\\n
				' : '') . ' # End (if not is span).
				)
			}xs';
  $depth = 0;

  # Current depth inside the tag tree.
  $parsed = "";

  # Parsed text that will be returned.

  #

  # Loop through every tag until we find the closing tag of the parent

  # or loop until reaching the end of text if no parent tag specified.

  #
  do {

    #

    # Split the text using the first $tag_match pattern found.

    # Text before  pattern will be first in the array, text after

    # pattern will be at the end, and between will be any catches made

    # by the pattern.

    #
    $parts = preg_split($block_tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);

    # If in Markdown span mode, add a empty-string span-level hash

    # after each newline to prevent triggering any block element.
    if ($span) {
      $void = $this
        ->hashPart("", ':');
      $newline = "{$void}\n";
      $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
    }
    $parsed .= $parts[0];

    # Text before current tag.

    # If end of $text has been reached. Stop loop.
    if (count($parts) < 3) {
      $text = "";
      break;
    }
    $tag = $parts[1];

    # Tag to handle.
    $text = $parts[2];

    # Remaining text after current tag.
    $tag_re = preg_quote($tag);

    # For use in a regular expression.

    #

    # Check for: Code span marker

    #
    if ($tag[0] == "`") {

      # Find corresponding end marker.
      $tag_re = preg_quote($tag);
      if (preg_match('{^(?>.+?|\\n(?!\\n))*?(?<!`)' . $tag_re . '(?!`)}', $text, $matches)) {

        # End marker found: pass text unchanged until marker.
        $parsed .= $tag . $matches[0];
        $text = substr($text, strlen($matches[0]));
      }
      else {

        # Unmatched marker: just skip it.
        $parsed .= $tag;
      }
    }
    else {
      if ($tag[0] == "\n" || $tag[0] == "~") {
        if ($tag[1] == "\n" || $tag[1] == " ") {

          # Indented code block: pass it unchanged, will be handled

          # later.
          $parsed .= $tag;
        }
        else {

          # Fenced code block marker: find matching end marker.
          $tag_re = preg_quote(trim($tag));
          if (preg_match('{^(?>.*\\n)+?' . $tag_re . ' *\\n}', $text, $matches)) {

            # End marker found: pass text unchanged until marker.
            $parsed .= $tag . $matches[0];
            $text = substr($text, strlen($matches[0]));
          }
          else {

            # No end marker: just skip it.
            $parsed .= $tag;
          }
        }
      }
      else {
        if (preg_match('{^<(?:' . $this->block_tags_re . ')\\b}', $tag) || preg_match('{^<(?:' . $this->context_block_tags_re . ')\\b}', $tag) && preg_match($newline_before_re, $parsed) && preg_match($newline_after_re, $text)) {

          # Need to parse tag and following text using the HTML parser.
          list($block_text, $text) = $this
            ->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);

          # Make sure it stays outside of any paragraph by adding newlines.
          $parsed .= "\n\n{$block_text}\n\n";
        }
        else {
          if (preg_match('{^<(?:' . $this->clean_tags_re . ')\\b}', $tag) || $tag[1] == '!' || $tag[1] == '?') {

            # Need to parse tag and following text using the HTML parser.

            # (don't check for markdown attribute)
            list($block_text, $text) = $this
              ->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
            $parsed .= $block_text;
          }
          else {
            if ($enclosing_tag_re !== '' && preg_match('{^</?(?:' . $enclosing_tag_re . ')\\b}', $tag)) {

              #

              # Increase/decrease nested tag count.

              #
              if ($tag[1] == '/') {
                $depth--;
              }
              else {
                if ($tag[strlen($tag) - 2] != '/') {
                  $depth++;
                }
              }
              if ($depth < 0) {

                #

                # Going out of parent element. Clean up and break so we

                # return to the calling function.

                #
                $text = $tag . $text;
                break;
              }
              $parsed .= $tag;
            }
            else {
              $parsed .= $tag;
            }
          }
        }
      }
    }
  } while ($depth >= 0);
  return array(
    $parsed,
    $text,
  );
}