class MarkdownExtra_Parser in Markdown 5

Same name and namespace in other branches

6 markdown.php \MarkdownExtra_Parser

Hierarchy

class \Markdown_Parser
- class \MarkdownExtra_Parser

Expanded class hierarchy of MarkdownExtra_Parser

1 string reference to 'MarkdownExtra_Parser'

markdown.php in ./markdown.php

File

./markdown.php, line 1640

View source

class MarkdownExtra_Parser extends Markdown_Parser {

  # Prefix for footnote ids.
  var $fn_id_prefix = "";

  # Optional title attribute for footnote links and backlinks.
  var $fn_link_title = MARKDOWN_FN_LINK_TITLE;
  var $fn_backlink_title = MARKDOWN_FN_BACKLINK_TITLE;

  # Optional class attribute for footnote links and backlinks.
  var $fn_link_class = MARKDOWN_FN_LINK_CLASS;
  var $fn_backlink_class = MARKDOWN_FN_BACKLINK_CLASS;

  # Predefined abbreviations.
  var $predef_abbr = array();
  function MarkdownExtra_Parser() {

    #

    # Constructor function. Initialize the parser object.

    #

    # Add extra escapable characters before parent constructor

    # initialize the table.
    $this->escape_chars .= ':|';

    # Insert extra document, block, and span transformations.

    # Parent constructor will do the sorting.
    $this->document_gamut += array(
      "doFencedCodeBlocks" => 5,
      "stripFootnotes" => 15,
      "stripAbbreviations" => 25,
      "appendFootnotes" => 50,
    );
    $this->block_gamut += array(
      "doFencedCodeBlocks" => 5,
      "doTables" => 15,
      "doDefLists" => 45,
    );
    $this->span_gamut += array(
      "doFootnotes" => 5,
      "doAbbreviations" => 70,
    );
    parent::Markdown_Parser();
  }

  # Extra variables used during extra transformations.
  var $footnotes = array();
  var $footnotes_ordered = array();
  var $abbr_desciptions = array();
  var $abbr_word_re = '';

  # Give the current footnote number.
  var $footnote_counter = 1;
  function setup() {

    #

    # Setting up Extra-specific variables.

    #
    parent::setup();
    $this->footnotes = array();
    $this->footnotes_ordered = array();
    $this->abbr_desciptions = array();
    $this->abbr_word_re = '';
    $this->footnote_counter = 1;
    foreach ($this->predef_abbr as $abbr_word => $abbr_desc) {
      if ($this->abbr_word_re) {
        $this->abbr_word_re .= '|';
      }
      $this->abbr_word_re .= preg_quote($abbr_word);
      $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
    }
  }
  function teardown() {

    #

    # Clearing Extra-specific variables.

    #
    $this->footnotes = array();
    $this->footnotes_ordered = array();
    $this->abbr_desciptions = array();
    $this->abbr_word_re = '';
    parent::teardown();
  }

  ### HTML Block Parser ###

  # Tags that are always treated as block tags:
  var $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend';

  # Tags treated as block tags only if the opening tag is alone on it's line:
  var $context_block_tags_re = 'script|noscript|math|ins|del';

  # Tags where markdown="1" default to span mode:
  var $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address';

  # Tags which must not have their contents modified, no matter where

  # they appear:
  var $clean_tags_re = 'script|math';

  # Tags that do not need to be closed.
  var $auto_close_tags_re = 'hr|img';
  function hashHTMLBlocks($text) {

    #

    # Hashify HTML Blocks and "clean tags".

    #

    # We only want to do this for block-level HTML tags, such as headers,

    # lists, and tables. That's because we still want to wrap <p>s around

    # "paragraphs" that are wrapped in non-block-level tags, such as anchors,

    # phrase emphasis, and spans. The list of tags we're looking for is

    # hard-coded.

    #

    # This works by calling _HashHTMLBlocks_InMarkdown, which then calls

    # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"

    # attribute is found whitin a tag, _HashHTMLBlocks_InHTML calls back

    #  _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.

    # These two functions are calling each other. It's recursive!

    #

    #

    # Call the HTML-in-Markdown hasher.

    #
    list($text, ) = $this
      ->_hashHTMLBlocks_inMarkdown($text);
    return $text;
  }
  function _hashHTMLBlocks_inMarkdown($text, $indent = 0, $enclosing_tag_re = '', $span = false) {

    #

    # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.

    #

    # *   $indent is the number of space to be ignored when checking for code

    #     blocks. This is important because if we don't take the indent into

    #     account, something like this (which looks right) won't work as expected:

    #

    #     <div>

    #         <div markdown="1">

    #         Hello World.  <-- Is this a Markdown code block or text?

    #         </div>  <-- Is this a Markdown code block or a real tag?

    #     <div>

    #

    #     If you don't like this, just don't indent the tag on which

    #     you apply the markdown="1" attribute.

    #

    # *   If $enclosing_tag_re is not empty, stops at the first unmatched closing

    #     tag with that name. Nested tags supported.

    #

    # *   If $span is true, text inside must treated as span. So any double

    #     newline will be replaced by a single newline so that it does not create

    #     paragraphs.

    #

    # Returns an array of that form: ( processed text , remaining text )

    #
    if ($text === '') {
      return array(
        '',
        '',
      );
    }

    # Regex to check for the presense of newlines around a block tag.
    $newline_before_re = '/(?:^\\n?|\\n\\n)*$/';
    $newline_after_re = '{
				^						# Start of text following the tag.
				(?>[ ]*<!--.*?-->)?		# Optional comment.
				[ ]*\\n					# Must be followed by newline.
			}xs';

    # Regex to match any tag.
    $block_tag_re = '{
				(					# $2: Capture hole tag.
					</?					# Any opening or closing tag.
						(?>				# Tag name.
							' . $this->block_tags_re . '			|
							' . $this->context_block_tags_re . '	|
							' . $this->clean_tags_re . '        	|
							(?!\\s)' . $enclosing_tag_re . '
						)
						(?:
							(?=[\\s"\'/])		# Allowed characters after tag name.
							(?>
								".*?"		|	# Double quotes (can contain `>`)
								\'.*?\'   	|	# Single quotes (can contain `>`)
								.+?				# Anything but quotes and `>`.
							)*?
						)?
					>					# End of tag.
				|
					<!--    .*?     -->	# HTML Comment
				|
					<\\?.*?\\?> | <%.*?%>	# Processing instruction
				|
					<!\\[CDATA\\[.*?\\]\\]>	# CData Block
				|
					# Code span marker
					`+
				' . (!$span ? ' # If not in span.
				|
					# Indented code block
					(?> ^[ ]*\\n? | \\n[ ]*\\n )
					[ ]{' . ($indent + 4) . '}[^\\n]* \\n
					(?>
						(?: [ ]{' . ($indent + 4) . '}[^\\n]* | [ ]* ) \\n
					)*
				|
					# Fenced code block marker
					(?> ^ | \\n )
					[ ]{' . $indent . '}~~~+[ ]*\\n
				' : '') . ' # End (if not is span).
				)
			}xs';
    $depth = 0;

    # Current depth inside the tag tree.
    $parsed = "";

    # Parsed text that will be returned.

    #

    # Loop through every tag until we find the closing tag of the parent

    # or loop until reaching the end of text if no parent tag specified.

    #
    do {

      #

      # Split the text using the first $tag_match pattern found.

      # Text before  pattern will be first in the array, text after

      # pattern will be at the end, and between will be any catches made

      # by the pattern.

      #
      $parts = preg_split($block_tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);

      # If in Markdown span mode, add a empty-string span-level hash

      # after each newline to prevent triggering any block element.
      if ($span) {
        $void = $this
          ->hashPart("", ':');
        $newline = "{$void}\n";
        $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
      }
      $parsed .= $parts[0];

      # Text before current tag.

      # If end of $text has been reached. Stop loop.
      if (count($parts) < 3) {
        $text = "";
        break;
      }
      $tag = $parts[1];

      # Tag to handle.
      $text = $parts[2];

      # Remaining text after current tag.
      $tag_re = preg_quote($tag);

      # For use in a regular expression.

      #

      # Check for: Code span marker

      #
      if ($tag[0] == "`") {

        # Find corresponding end marker.
        $tag_re = preg_quote($tag);
        if (preg_match('{^(?>.+?|\\n(?!\\n))*?(?<!`)' . $tag_re . '(?!`)}', $text, $matches)) {

          # End marker found: pass text unchanged until marker.
          $parsed .= $tag . $matches[0];
          $text = substr($text, strlen($matches[0]));
        }
        else {

          # Unmatched marker: just skip it.
          $parsed .= $tag;
        }
      }
      else {
        if ($tag[0] == "\n" || $tag[0] == "~") {
          if ($tag[1] == "\n" || $tag[1] == " ") {

            # Indented code block: pass it unchanged, will be handled

            # later.
            $parsed .= $tag;
          }
          else {

            # Fenced code block marker: find matching end marker.
            $tag_re = preg_quote(trim($tag));
            if (preg_match('{^(?>.*\\n)+?' . $tag_re . ' *\\n}', $text, $matches)) {

              # End marker found: pass text unchanged until marker.
              $parsed .= $tag . $matches[0];
              $text = substr($text, strlen($matches[0]));
            }
            else {

              # No end marker: just skip it.
              $parsed .= $tag;
            }
          }
        }
        else {
          if (preg_match('{^<(?:' . $this->block_tags_re . ')\\b}', $tag) || preg_match('{^<(?:' . $this->context_block_tags_re . ')\\b}', $tag) && preg_match($newline_before_re, $parsed) && preg_match($newline_after_re, $text)) {

            # Need to parse tag and following text using the HTML parser.
            list($block_text, $text) = $this
              ->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);

            # Make sure it stays outside of any paragraph by adding newlines.
            $parsed .= "\n\n{$block_text}\n\n";
          }
          else {
            if (preg_match('{^<(?:' . $this->clean_tags_re . ')\\b}', $tag) || $tag[1] == '!' || $tag[1] == '?') {

              # Need to parse tag and following text using the HTML parser.

              # (don't check for markdown attribute)
              list($block_text, $text) = $this
                ->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
              $parsed .= $block_text;
            }
            else {
              if ($enclosing_tag_re !== '' && preg_match('{^</?(?:' . $enclosing_tag_re . ')\\b}', $tag)) {

                #

                # Increase/decrease nested tag count.

                #
                if ($tag[1] == '/') {
                  $depth--;
                }
                else {
                  if ($tag[strlen($tag) - 2] != '/') {
                    $depth++;
                  }
                }
                if ($depth < 0) {

                  #

                  # Going out of parent element. Clean up and break so we

                  # return to the calling function.

                  #
                  $text = $tag . $text;
                  break;
                }
                $parsed .= $tag;
              }
              else {
                $parsed .= $tag;
              }
            }
          }
        }
      }
    } while ($depth >= 0);
    return array(
      $parsed,
      $text,
    );
  }
  function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {

    #

    # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.

    #

    # *   Calls $hash_method to convert any blocks.

    # *   Stops when the first opening tag closes.

    # *   $md_attr indicate if the use of the `markdown="1"` attribute is allowed.

    #     (it is not inside clean tags)

    #

    # Returns an array of that form: ( processed text , remaining text )

    #
    if ($text === '') {
      return array(
        '',
        '',
      );
    }

    # Regex to match `markdown` attribute inside of a tag.
    $markdown_attr_re = '
			{
				\\s*			# Eat whitespace before the `markdown` attribute
				markdown
				\\s*=\\s*
				(?>
					(["\'])		# $1: quote delimiter		
					(.*?)		# $2: attribute value
					\\1			# matching delimiter	
				|
					([^\\s>]*)	# $3: unquoted attribute value
				)
				()				# $4: make $3 always defined (avoid warnings)
			}xs';

    # Regex to match any tag.
    $tag_re = '{
				(					# $2: Capture hole tag.
					</?					# Any opening or closing tag.
						[\\w:$]+			# Tag name.
						(?:
							(?=[\\s"\'/])		# Allowed characters after tag name.
							(?>
								".*?"		|	# Double quotes (can contain `>`)
								\'.*?\'   	|	# Single quotes (can contain `>`)
								.+?				# Anything but quotes and `>`.
							)*?
						)?
					>					# End of tag.
				|
					<!--    .*?     -->	# HTML Comment
				|
					<\\?.*?\\?> | <%.*?%>	# Processing instruction
				|
					<!\\[CDATA\\[.*?\\]\\]>	# CData Block
				)
			}xs';
    $original_text = $text;

    # Save original text in case of faliure.
    $depth = 0;

    # Current depth inside the tag tree.
    $block_text = "";

    # Temporary text holder for current text.
    $parsed = "";

    # Parsed text that will be returned.

    #

    # Get the name of the starting tag.

    # (This pattern makes $base_tag_name_re safe without quoting.)

    #
    if (preg_match('/^<([\\w:$]*)\\b/', $text, $matches)) {
      $base_tag_name_re = $matches[1];
    }

    #

    # Loop through every tag until we find the corresponding closing tag.

    #
    do {

      #

      # Split the text using the first $tag_match pattern found.

      # Text before  pattern will be first in the array, text after

      # pattern will be at the end, and between will be any catches made

      # by the pattern.

      #
      $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
      if (count($parts) < 3) {

        #

        # End of $text reached with unbalenced tag(s).

        # In that case, we return original text unchanged and pass the

        # first character as filtered to prevent an infinite loop in the

        # parent function.

        #
        return array(
          $original_text[0],
          substr($original_text, 1),
        );
      }
      $block_text .= $parts[0];

      # Text before current tag.
      $tag = $parts[1];

      # Tag to handle.
      $text = $parts[2];

      # Remaining text after current tag.

      #

      # Check for: Auto-close tag (like <hr/>)

      #			 Comments and Processing Instructions.

      #
      if (preg_match('{^</?(?:' . $this->auto_close_tags_re . ')\\b}', $tag) || $tag[1] == '!' || $tag[1] == '?') {

        # Just add the tag to the block as if it was text.
        $block_text .= $tag;
      }
      else {

        #

        # Increase/decrease nested tag count. Only do so if

        # the tag's name match base tag's.

        #
        if (preg_match('{^</?' . $base_tag_name_re . '\\b}', $tag)) {
          if ($tag[1] == '/') {
            $depth--;
          }
          else {
            if ($tag[strlen($tag) - 2] != '/') {
              $depth++;
            }
          }
        }

        #

        # Check for `markdown="1"` attribute and handle it.

        #
        if ($md_attr && preg_match($markdown_attr_re, $tag, $attr_m) && preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3])) {

          # Remove `markdown` attribute from opening tag.
          $tag = preg_replace($markdown_attr_re, '', $tag);

          # Check if text inside this tag must be parsed in span mode.
          $this->mode = $attr_m[2] . $attr_m[3];
          $span_mode = $this->mode == 'span' || $this->mode != 'block' && preg_match('{^<(?:' . $this->contain_span_tags_re . ')\\b}', $tag);

          # Calculate indent before tag.
          if (preg_match('/(?:^|\\n)( *?)(?! ).*?$/', $block_text, $matches)) {
            $strlen = $this->utf8_strlen;
            $indent = $strlen($matches[1], 'UTF-8');
          }
          else {
            $indent = 0;
          }

          # End preceding block with this tag.
          $block_text .= $tag;
          $parsed .= $this
            ->{$hash_method}($block_text);

          # Get enclosing tag name for the ParseMarkdown function.

          # (This pattern makes $tag_name_re safe without quoting.)
          preg_match('/^<([\\w:$]*)\\b/', $tag, $matches);
          $tag_name_re = $matches[1];

          # Parse the content using the HTML-in-Markdown parser.
          list($block_text, $text) = $this
            ->_hashHTMLBlocks_inMarkdown($text, $indent, $tag_name_re, $span_mode);

          # Outdent markdown text.
          if ($indent > 0) {
            $block_text = preg_replace("/^[ ]{1,{$indent}}/m", "", $block_text);
          }

          # Append tag content to parsed text.
          if (!$span_mode) {
            $parsed .= "\n\n{$block_text}\n\n";
          }
          else {
            $parsed .= "{$block_text}";
          }

          # Start over a new block.
          $block_text = "";
        }
        else {
          $block_text .= $tag;
        }
      }
    } while ($depth > 0);

    #

    # Hash last block text that wasn't processed inside the loop.

    #
    $parsed .= $this
      ->{$hash_method}($block_text);
    return array(
      $parsed,
      $text,
    );
  }
  function hashClean($text) {

    #

    # Called whenever a tag must be hashed when a function insert a "clean" tag

    # in $text, it pass through this function and is automaticaly escaped,

    # blocking invalid nested overlap.

    #
    return $this
      ->hashPart($text, 'C');
  }
  function doHeaders($text) {

    #

    # Redefined to add id attribute support.

    #

    # Setext-style headers:

    #	  Header 1  {#header1}

    #	  ========

    #

    #	  Header 2  {#header2}

    #	  --------

    #
    $text = preg_replace_callback('{
				(^.+?)								# $1: Header text
				(?:[ ]+\\{\\#([-_:a-zA-Z0-9]+)\\})?	# $2: Id attribute
				[ ]*\\n(=+|-+)[ ]*\\n+				# $3: Header footer
			}mx', array(
      &$this,
      '_doHeaders_callback_setext',
    ), $text);

    # atx-style headers:

    #	# Header 1        {#header1}

    #	## Header 2       {#header2}

    #	## Header 2 with closing hashes ##  {#header3}

    #	...

    #	###### Header 6   {#header2}

    #
    $text = preg_replace_callback('{
				^(\\#{1,6})	# $1 = string of #\'s
				[ ]*
				(.+?)		# $2 = Header text
				[ ]*
				\\#*			# optional closing #\'s (not counted)
				(?:[ ]+\\{\\#([-_:a-zA-Z0-9]+)\\})? # id attribute
				[ ]*
				\\n+
			}xm', array(
      &$this,
      '_doHeaders_callback_atx',
    ), $text);
    return $text;
  }
  function _doHeaders_attr($attr) {
    if (empty($attr)) {
      return "";
    }
    return " id=\"{$attr}\"";
  }
  function _doHeaders_callback_setext($matches) {
    if ($matches[3] == '-' && preg_match('{^- }', $matches[1])) {
      return $matches[0];
    }
    $level = $matches[3][0] == '=' ? 1 : 2;
    $attr = $this
      ->_doHeaders_attr($id =& $matches[2]);
    $block = "<h{$level}{$attr}>" . $this
      ->runSpanGamut($matches[1]) . "</h{$level}>";
    return "\n" . $this
      ->hashBlock($block) . "\n\n";
  }
  function _doHeaders_callback_atx($matches) {
    $level = strlen($matches[1]);
    $attr = $this
      ->_doHeaders_attr($id =& $matches[3]);
    $block = "<h{$level}{$attr}>" . $this
      ->runSpanGamut($matches[2]) . "</h{$level}>";
    return "\n" . $this
      ->hashBlock($block) . "\n\n";
  }
  function doTables($text) {

    #

    # Form HTML tables.

    #
    $less_than_tab = $this->tab_width - 1;

    #

    # Find tables with leading pipe.

    #

    #	| Header 1 | Header 2

    #	| -------- | --------

    #	| Cell 1   | Cell 2

    #	| Cell 3   | Cell 4

    #
    $text = preg_replace_callback('
			{
				^							# Start of a line
				[ ]{0,' . $less_than_tab . '}	# Allowed whitespace.
				[|]							# Optional leading pipe (present)
				(.+) \\n						# $1: Header row (at least one pipe)
				
				[ ]{0,' . $less_than_tab . '}	# Allowed whitespace.
				[|] ([ ]*[-:]+[-| :]*) \\n	# $2: Header underline
				
				(							# $3: Cells
					(?>
						[ ]*				# Allowed whitespace.
						[|] .* \\n			# Row content.
					)*
				)
				(?=\\n|\\Z)					# Stop at final double newline.
			}xm', array(
      &$this,
      '_doTable_leadingPipe_callback',
    ), $text);

    #

    # Find tables without leading pipe.

    #

    #	Header 1 | Header 2

    #	-------- | --------

    #	Cell 1   | Cell 2

    #	Cell 3   | Cell 4

    #
    $text = preg_replace_callback('
			{
				^							# Start of a line
				[ ]{0,' . $less_than_tab . '}	# Allowed whitespace.
				(\\S.*[|].*) \\n				# $1: Header row (at least one pipe)
				
				[ ]{0,' . $less_than_tab . '}	# Allowed whitespace.
				([-:]+[ ]*[|][-| :]*) \\n	# $2: Header underline
				
				(							# $3: Cells
					(?>
						.* [|] .* \\n		# Row content
					)*
				)
				(?=\\n|\\Z)					# Stop at final double newline.
			}xm', array(
      &$this,
      '_DoTable_callback',
    ), $text);
    return $text;
  }
  function _doTable_leadingPipe_callback($matches) {
    $head = $matches[1];
    $underline = $matches[2];
    $content = $matches[3];

    # Remove leading pipe for each row.
    $content = preg_replace('/^ *[|]/m', '', $content);
    return $this
      ->_doTable_callback(array(
      $matches[0],
      $head,
      $underline,
      $content,
    ));
  }
  function _doTable_callback($matches) {
    $head = $matches[1];
    $underline = $matches[2];
    $content = $matches[3];

    # Remove any tailing pipes for each line.
    $head = preg_replace('/[|] *$/m', '', $head);
    $underline = preg_replace('/[|] *$/m', '', $underline);
    $content = preg_replace('/[|] *$/m', '', $content);

    # Reading alignement from header underline.
    $separators = preg_split('/ *[|] */', $underline);
    foreach ($separators as $n => $s) {
      if (preg_match('/^ *-+: *$/', $s)) {
        $attr[$n] = ' align="right"';
      }
      else {
        if (preg_match('/^ *:-+: *$/', $s)) {
          $attr[$n] = ' align="center"';
        }
        else {
          if (preg_match('/^ *:-+ *$/', $s)) {
            $attr[$n] = ' align="left"';
          }
          else {
            $attr[$n] = '';
          }
        }
      }
    }

    # Parsing span elements, including code spans, character escapes,

    # and inline HTML tags, so that pipes inside those gets ignored.
    $head = $this
      ->parseSpan($head);
    $headers = preg_split('/ *[|] */', $head);
    $col_count = count($headers);

    # Write column headers.
    $text = "<table>\n";
    $text .= "<thead>\n";
    $text .= "<tr>\n";
    foreach ($headers as $n => $header) {
      $text .= "  <th{$attr[$n]}>" . $this
        ->runSpanGamut(trim($header)) . "</th>\n";
    }
    $text .= "</tr>\n";
    $text .= "</thead>\n";

    # Split content by row.
    $rows = explode("\n", trim($content, "\n"));
    $text .= "<tbody>\n";
    foreach ($rows as $row) {

      # Parsing span elements, including code spans, character escapes,

      # and inline HTML tags, so that pipes inside those gets ignored.
      $row = $this
        ->parseSpan($row);

      # Split row by cell.
      $row_cells = preg_split('/ *[|] */', $row, $col_count);
      $row_cells = array_pad($row_cells, $col_count, '');
      $text .= "<tr>\n";
      foreach ($row_cells as $n => $cell) {
        $text .= "  <td{$attr[$n]}>" . $this
          ->runSpanGamut(trim($cell)) . "</td>\n";
      }
      $text .= "</tr>\n";
    }
    $text .= "</tbody>\n";
    $text .= "</table>";
    return $this
      ->hashBlock($text) . "\n";
  }
  function doDefLists($text) {

    #

    # Form HTML definition lists.

    #
    $less_than_tab = $this->tab_width - 1;

    # Re-usable pattern to match any entire dl list:
    $whole_list_re = '(?>
			(								# $1 = whole list
			  (								# $2
				[ ]{0,' . $less_than_tab . '}
				((?>.*\\S.*\\n)+)				# $3 = defined term
				\\n?
				[ ]{0,' . $less_than_tab . '}:[ ]+ # colon starting definition
			  )
			  (?s:.+?)
			  (								# $4
				  \\z
				|
				  \\n{2,}
				  (?=\\S)
				  (?!						# Negative lookahead for another term
					[ ]{0,' . $less_than_tab . '}
					(?: \\S.*\\n )+?			# defined term
					\\n?
					[ ]{0,' . $less_than_tab . '}:[ ]+ # colon starting definition
				  )
				  (?!						# Negative lookahead for another definition
					[ ]{0,' . $less_than_tab . '}:[ ]+ # colon starting definition
				  )
			  )
			)
		)';

    // mx
    $text = preg_replace_callback('{
				(?>\\A\\n?|(?<=\\n\\n))
				' . $whole_list_re . '
			}mx', array(
      &$this,
      '_doDefLists_callback',
    ), $text);
    return $text;
  }
  function _doDefLists_callback($matches) {

    # Re-usable patterns to match list item bullets and number markers:
    $list = $matches[1];

    # Turn double returns into triple returns, so that we can make a

    # paragraph for the last item in a list, if necessary:
    $result = trim($this
      ->processDefListItems($list));
    $result = "<dl>\n" . $result . "\n</dl>";
    return $this
      ->hashBlock($result) . "\n\n";
  }
  function processDefListItems($list_str) {

    #

    #	Process the contents of a single definition list, splitting it

    #	into individual term and definition list items.

    #
    $less_than_tab = $this->tab_width - 1;

    # trim trailing blank lines:
    $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);

    # Process definition terms.
    $list_str = preg_replace_callback('{
			(?>\\A\\n?|\\n\\n+)					# leading line
			(								# definition terms = $1
				[ ]{0,' . $less_than_tab . '}	# leading whitespace
				(?![:][ ]|[ ])				# negative lookahead for a definition
											#   mark (colon) or more whitespace.
				(?> \\S.* \\n)+?				# actual term (not whitespace).	
			)			
			(?=\\n?[ ]{0,3}:[ ])				# lookahead for following line feed
											#   with a definition mark.
			}xm', array(
      &$this,
      '_processDefListItems_callback_dt',
    ), $list_str);

    # Process actual definitions.
    $list_str = preg_replace_callback('{
			\\n(\\n+)?						# leading line = $1
			(								# marker space = $2
				[ ]{0,' . $less_than_tab . '}	# whitespace before colon
				[:][ ]+						# definition mark (colon)
			)
			((?s:.+?))						# definition text = $3
			(?= \\n+ 						# stop at next definition mark,
				(?:							# next term or end of text
					[ ]{0,' . $less_than_tab . '} [:][ ]	|
					<dt> | \\z
				)						
			)					
			}xm', array(
      &$this,
      '_processDefListItems_callback_dd',
    ), $list_str);
    return $list_str;
  }
  function _processDefListItems_callback_dt($matches) {
    $terms = explode("\n", trim($matches[1]));
    $text = '';
    foreach ($terms as $term) {
      $term = $this
        ->runSpanGamut(trim($term));
      $text .= "\n<dt>" . $term . "</dt>";
    }
    return $text . "\n";
  }
  function _processDefListItems_callback_dd($matches) {
    $leading_line = $matches[1];
    $marker_space = $matches[2];
    $def = $matches[3];
    if ($leading_line || preg_match('/\\n{2,}/', $def)) {

      # Replace marker with the appropriate whitespace indentation
      $def = str_repeat(' ', strlen($marker_space)) . $def;
      $def = $this
        ->runBlockGamut($this
        ->outdent($def . "\n\n"));
      $def = "\n" . $def . "\n";
    }
    else {
      $def = rtrim($def);
      $def = $this
        ->runSpanGamut($this
        ->outdent($def));
    }
    return "\n<dd>" . $def . "</dd>\n";
  }
  function doFencedCodeBlocks($text) {

    #

    # Adding the fenced code block syntax to regular Markdown:

    #

    # ~~~

    # Code block

    # ~~~

    #
    $less_than_tab = $this->tab_width;
    $text = preg_replace_callback('{
				(?:\\n|\\A)
				# 1: Opening marker
				(
					~{3,} # Marker: three tilde or more.
				)
				[ ]* \\n # Whitespace and newline following marker.
				
				# 2: Content
				(
					(?>
						(?!\\1 [ ]* \\n)	# Not a closing marker.
						.*\\n+
					)+
				)
				
				# Closing marker.
				\\1 [ ]* \\n
			}xm', array(
      &$this,
      '_doFencedCodeBlocks_callback',
    ), $text);
    return $text;
  }
  function _doFencedCodeBlocks_callback($matches) {
    $codeblock = $matches[2];
    $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
    $codeblock = preg_replace_callback('/^\\n+/', array(
      &$this,
      '_doFencedCodeBlocks_newlines',
    ), $codeblock);
    $codeblock = "<pre><code>{$codeblock}</code></pre>";
    return "\n\n" . $this
      ->hashBlock($codeblock) . "\n\n";
  }
  function _doFencedCodeBlocks_newlines($matches) {
    return str_repeat("<br{$this->empty_element_suffix}", strlen($matches[0]));
  }

  #

  # Redefining emphasis markers so that emphasis by underscore does not

  # work in the middle of a word.

  #
  var $em_relist = array(
    '' => '(?:(?<!\\*)\\*(?!\\*)|(?<![a-zA-Z0-9_])_(?!_))(?=\\S)(?![.,:;]\\s)',
    '*' => '(?<=\\S)(?<!\\*)\\*(?!\\*)',
    '_' => '(?<=\\S)(?<!_)_(?![a-zA-Z0-9_])',
  );
  var $strong_relist = array(
    '' => '(?:(?<!\\*)\\*\\*(?!\\*)|(?<![a-zA-Z0-9_])__(?!_))(?=\\S)(?![.,:;]\\s)',
    '**' => '(?<=\\S)(?<!\\*)\\*\\*(?!\\*)',
    '__' => '(?<=\\S)(?<!_)__(?![a-zA-Z0-9_])',
  );
  var $em_strong_relist = array(
    '' => '(?:(?<!\\*)\\*\\*\\*(?!\\*)|(?<![a-zA-Z0-9_])___(?!_))(?=\\S)(?![.,:;]\\s)',
    '***' => '(?<=\\S)(?<!\\*)\\*\\*\\*(?!\\*)',
    '___' => '(?<=\\S)(?<!_)___(?![a-zA-Z0-9_])',
  );
  function formParagraphs($text) {

    #

    #	Params:

    #		$text - string to process with html <p> tags

    #

    # Strip leading and trailing lines:
    $text = preg_replace('/\\A\\n+|\\n+\\z/', '', $text);
    $grafs = preg_split('/\\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);

    #

    # Wrap <p> tags and unhashify HTML blocks

    #
    foreach ($grafs as $key => $value) {
      $value = trim($this
        ->runSpanGamut($value));

      # Check if this should be enclosed in a paragraph.

      # Clean tag hashes & block tag hashes are left alone.
      $is_p = !preg_match('/^B\\x1A[0-9]+B|^C\\x1A[0-9]+C$/', $value);
      if ($is_p) {
        $value = "<p>{$value}</p>";
      }
      $grafs[$key] = $value;
    }

    # Join grafs in one text, then unhash HTML tags.
    $text = implode("\n\n", $grafs);

    # Finish by removing any tag hashes still present in $text.
    $text = $this
      ->unhash($text);
    return $text;
  }

  ### Footnotes
  function stripFootnotes($text) {

    #

    # Strips link definitions from text, stores the URLs and titles in

    # hash references.

    #
    $less_than_tab = $this->tab_width - 1;

    # Link defs are in the form: [^id]: url "optional title"
    $text = preg_replace_callback('{
			^[ ]{0,' . $less_than_tab . '}\\[\\^(.+?)\\][ ]?:	# note_id = $1
			  [ ]*
			  \\n?					# maybe *one* newline
			(						# text = $2 (no blank lines allowed)
				(?:					
					.+				# actual text
				|
					\\n				# newlines but
					(?!\\[\\^.+?\\]:\\s)# negative lookahead for footnote marker.
					(?!\\n+[ ]{0,3}\\S)# ensure line is not blank and followed
									# by non-indented content
				)*
			)		
			}xm', array(
      &$this,
      '_stripFootnotes_callback',
    ), $text);
    return $text;
  }
  function _stripFootnotes_callback($matches) {
    $note_id = $this->fn_id_prefix . $matches[1];
    $this->footnotes[$note_id] = $this
      ->outdent($matches[2]);
    return '';

    # String that will replace the block
  }
  function doFootnotes($text) {

    #

    # Replace footnote references in $text [^id] with a special text-token

    # which will be replaced by the actual footnote marker in appendFootnotes.

    #
    if (!$this->in_anchor) {
      $text = preg_replace('{\\[\\^(.+?)\\]}', "F\32fn:\\1\32:", $text);
    }
    return $text;
  }
  function appendFootnotes($text) {

    #

    # Append footnote list to text.

    #
    $text = preg_replace_callback('{F\\x1Afn:(.*?)\\x1A:}', array(
      &$this,
      '_appendFootnotes_callback',
    ), $text);
    if (!empty($this->footnotes_ordered)) {
      $text .= "\n\n";
      $text .= "<div class=\"footnotes\">\n";
      $text .= "<hr" . MARKDOWN_EMPTY_ELEMENT_SUFFIX . "\n";
      $text .= "<ol>\n\n";
      $attr = " rev=\"footnote\"";
      if ($this->fn_backlink_class != "") {
        $class = $this->fn_backlink_class;
        $class = $this
          ->encodeAttribute($class);
        $attr .= " class=\"{$class}\"";
      }
      if ($this->fn_backlink_title != "") {
        $title = $this->fn_backlink_title;
        $title = $this
          ->encodeAttribute($title);
        $attr .= " title=\"{$title}\"";
      }
      $num = 0;
      while (!empty($this->footnotes_ordered)) {
        $footnote = reset($this->footnotes_ordered);
        $note_id = key($this->footnotes_ordered);
        unset($this->footnotes_ordered[$note_id]);
        $footnote .= "\n";

        # Need to append newline before parsing.
        $footnote = $this
          ->runBlockGamut("{$footnote}\n");
        $footnote = preg_replace_callback('{F\\x1Afn:(.*?)\\x1A:}', array(
          &$this,
          '_appendFootnotes_callback',
        ), $footnote);
        $attr = str_replace("%%", ++$num, $attr);
        $note_id = $this
          ->encodeAttribute($note_id);

        # Add backlink to last paragraph; create new paragraph if needed.
        $backlink = "<a href=\"#fnref:{$note_id}\"{$attr}>&#8617;</a>";
        if (preg_match('{</p>$}', $footnote)) {
          $footnote = substr($footnote, 0, -4) . "&#160;{$backlink}</p>";
        }
        else {
          $footnote .= "\n\n<p>{$backlink}</p>";
        }
        $text .= "<li id=\"fn:{$note_id}\">\n";
        $text .= $footnote . "\n";
        $text .= "</li>\n\n";
      }
      $text .= "</ol>\n";
      $text .= "</div>";
    }
    return $text;
  }
  function _appendFootnotes_callback($matches) {
    $node_id = $this->fn_id_prefix . $matches[1];

    # Create footnote marker only if it has a corresponding footnote *and*

    # the footnote hasn't been used by another marker.
    if (isset($this->footnotes[$node_id])) {

      # Transfert footnote content to the ordered list.
      $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id];
      unset($this->footnotes[$node_id]);
      $num = $this->footnote_counter++;
      $attr = " rel=\"footnote\"";
      if ($this->fn_link_class != "") {
        $class = $this->fn_link_class;
        $class = $this
          ->encodeAttribute($class);
        $attr .= " class=\"{$class}\"";
      }
      if ($this->fn_link_title != "") {
        $title = $this->fn_link_title;
        $title = $this
          ->encodeAttribute($title);
        $attr .= " title=\"{$title}\"";
      }
      $attr = str_replace("%%", $num, $attr);
      $node_id = $this
        ->encodeAttribute($node_id);
      return "<sup id=\"fnref:{$node_id}\">" . "<a href=\"#fn:{$node_id}\"{$attr}>{$num}</a>" . "</sup>";
    }
    return "[^" . $matches[1] . "]";
  }

  ### Abbreviations ###
  function stripAbbreviations($text) {

    #

    # Strips abbreviations from text, stores titles in hash references.

    #
    $less_than_tab = $this->tab_width - 1;

    # Link defs are in the form: [id]*: url "optional title"
    $text = preg_replace_callback('{
			^[ ]{0,' . $less_than_tab . '}\\*\\[(.+?)\\][ ]?:	# abbr_id = $1
			(.*)					# text = $2 (no blank lines allowed)	
			}xm', array(
      &$this,
      '_stripAbbreviations_callback',
    ), $text);
    return $text;
  }
  function _stripAbbreviations_callback($matches) {
    $abbr_word = $matches[1];
    $abbr_desc = $matches[2];
    if ($this->abbr_word_re) {
      $this->abbr_word_re .= '|';
    }
    $this->abbr_word_re .= preg_quote($abbr_word);
    $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
    return '';

    # String that will replace the block
  }
  function doAbbreviations($text) {

    #

    # Find defined abbreviations in text and wrap them in <abbr> elements.

    #
    if ($this->abbr_word_re) {

      // cannot use the /x modifier because abbr_word_re may
      // contain significant spaces:
      $text = preg_replace_callback('{' . '(?<![\\w\\x1A])' . '(?:' . $this->abbr_word_re . ')' . '(?![\\w\\x1A])' . '}', array(
        &$this,
        '_doAbbreviations_callback',
      ), $text);
    }
    return $text;
  }
  function _doAbbreviations_callback($matches) {
    $abbr = $matches[0];
    if (isset($this->abbr_desciptions[$abbr])) {
      $desc = $this->abbr_desciptions[$abbr];
      if (empty($desc)) {
        return $this
          ->hashPart("<abbr>{$abbr}</abbr>");
      }
      else {
        $desc = $this
          ->encodeAttribute($desc);
        return $this
          ->hashPart("<abbr title=\"{$desc}\">{$abbr}</abbr>");
      }
    }
    else {
      return $matches[0];
    }
  }

}

Members

Name	Type	Description
MarkdownExtra_Parser::$abbr_desciptions	property
MarkdownExtra_Parser::$abbr_word_re	property
MarkdownExtra_Parser::$auto_close_tags_re	property
MarkdownExtra_Parser::$block_tags_re	property
MarkdownExtra_Parser::$clean_tags_re	property
MarkdownExtra_Parser::$contain_span_tags_re	property
MarkdownExtra_Parser::$context_block_tags_re	property
MarkdownExtra_Parser::$em_relist	property	Overrides Markdown_Parser::$em_relist
MarkdownExtra_Parser::$em_strong_relist	property	Overrides Markdown_Parser::$em_strong_relist
MarkdownExtra_Parser::$fn_backlink_class	property
MarkdownExtra_Parser::$fn_backlink_title	property
MarkdownExtra_Parser::$fn_id_prefix	property
MarkdownExtra_Parser::$fn_link_class	property
MarkdownExtra_Parser::$fn_link_title	property
MarkdownExtra_Parser::$footnotes	property
MarkdownExtra_Parser::$footnotes_ordered	property
MarkdownExtra_Parser::$footnote_counter	property
MarkdownExtra_Parser::$predef_abbr	property
MarkdownExtra_Parser::$strong_relist	property	Overrides Markdown_Parser::$strong_relist
MarkdownExtra_Parser::appendFootnotes	function
MarkdownExtra_Parser::doAbbreviations	function
MarkdownExtra_Parser::doDefLists	function
MarkdownExtra_Parser::doFencedCodeBlocks	function
MarkdownExtra_Parser::doFootnotes	function
MarkdownExtra_Parser::doHeaders	function	Overrides Markdown_Parser::doHeaders
MarkdownExtra_Parser::doTables	function
MarkdownExtra_Parser::formParagraphs	function	Overrides Markdown_Parser::formParagraphs
MarkdownExtra_Parser::hashClean	function
MarkdownExtra_Parser::hashHTMLBlocks	function	Overrides Markdown_Parser::hashHTMLBlocks
MarkdownExtra_Parser::MarkdownExtra_Parser	function
MarkdownExtra_Parser::processDefListItems	function
MarkdownExtra_Parser::setup	function	Overrides Markdown_Parser::setup
MarkdownExtra_Parser::stripAbbreviations	function
MarkdownExtra_Parser::stripFootnotes	function
MarkdownExtra_Parser::teardown	function	Overrides Markdown_Parser::teardown
MarkdownExtra_Parser::_appendFootnotes_callback	function
MarkdownExtra_Parser::_doAbbreviations_callback	function
MarkdownExtra_Parser::_doDefLists_callback	function
MarkdownExtra_Parser::_doFencedCodeBlocks_callback	function
MarkdownExtra_Parser::_doFencedCodeBlocks_newlines	function
MarkdownExtra_Parser::_doHeaders_attr	function
MarkdownExtra_Parser::_doHeaders_callback_atx	function	Overrides Markdown_Parser::_doHeaders_callback_atx
MarkdownExtra_Parser::_doHeaders_callback_setext	function	Overrides Markdown_Parser::_doHeaders_callback_setext
MarkdownExtra_Parser::_doTable_callback	function
MarkdownExtra_Parser::_doTable_leadingPipe_callback	function
MarkdownExtra_Parser::_hashHTMLBlocks_inHTML	function
MarkdownExtra_Parser::_hashHTMLBlocks_inMarkdown	function
MarkdownExtra_Parser::_processDefListItems_callback_dd	function
MarkdownExtra_Parser::_processDefListItems_callback_dt	function
MarkdownExtra_Parser::_stripAbbreviations_callback	function
MarkdownExtra_Parser::_stripFootnotes_callback	function
Markdown_Parser::$block_gamut	property
Markdown_Parser::$document_gamut	property
Markdown_Parser::$empty_element_suffix	property
Markdown_Parser::$em_strong_prepared_relist	property
Markdown_Parser::$escape_chars	property
Markdown_Parser::$escape_chars_re	property
Markdown_Parser::$html_hashes	property
Markdown_Parser::$in_anchor	property
Markdown_Parser::$list_level	property
Markdown_Parser::$nested_brackets_depth	property
Markdown_Parser::$nested_brackets_re	property
Markdown_Parser::$nested_url_parenthesis_depth	property
Markdown_Parser::$nested_url_parenthesis_re	property
Markdown_Parser::$no_entities	property
Markdown_Parser::$no_markup	property
Markdown_Parser::$predef_titles	property
Markdown_Parser::$predef_urls	property
Markdown_Parser::$span_gamut	property
Markdown_Parser::$tab_width	property
Markdown_Parser::$titles	property
Markdown_Parser::$urls	property
Markdown_Parser::$utf8_strlen	property
Markdown_Parser::detab	function
Markdown_Parser::doAnchors	function
Markdown_Parser::doAutoLinks	function
Markdown_Parser::doBlockQuotes	function
Markdown_Parser::doCodeBlocks	function
Markdown_Parser::doHardBreaks	function
Markdown_Parser::doHorizontalRules	function
Markdown_Parser::doImages	function
Markdown_Parser::doItalicsAndBold	function
Markdown_Parser::doLists	function
Markdown_Parser::encodeAmpsAndAngles	function
Markdown_Parser::encodeAttribute	function
Markdown_Parser::encodeEmailAddress	function
Markdown_Parser::handleSpanToken	function
Markdown_Parser::hashBlock	function
Markdown_Parser::hashPart	function
Markdown_Parser::makeCodeSpan	function
Markdown_Parser::Markdown_Parser	function
Markdown_Parser::outdent	function
Markdown_Parser::parseSpan	function
Markdown_Parser::prepareItalicsAndBold	function
Markdown_Parser::processListItems	function
Markdown_Parser::runBasicBlockGamut	function
Markdown_Parser::runBlockGamut	function
Markdown_Parser::runSpanGamut	function
Markdown_Parser::stripLinkDefinitions	function
Markdown_Parser::transform	function
Markdown_Parser::unhash	function
Markdown_Parser::_detab_callback	function
Markdown_Parser::_doAnchors_inline_callback	function
Markdown_Parser::_doAnchors_reference_callback	function
Markdown_Parser::_doAutoLinks_email_callback	function
Markdown_Parser::_doAutoLinks_url_callback	function
Markdown_Parser::_doBlockQuotes_callback	function
Markdown_Parser::_doBlockQuotes_callback2	function
Markdown_Parser::_doCodeBlocks_callback	function
Markdown_Parser::_doHardBreaks_callback	function
Markdown_Parser::_doImages_inline_callback	function
Markdown_Parser::_doImages_reference_callback	function
Markdown_Parser::_doLists_callback	function
Markdown_Parser::_hashHTMLBlocks_callback	function
Markdown_Parser::_initDetab	function
Markdown_Parser::_processListItems_callback	function
Markdown_Parser::_stripLinkDefinitions_callback	function
Markdown_Parser::_unhash_callback	function

You are here

class MarkdownExtra_Parser in Markdown 5

Hierarchy

File

Members

API Navigation