#!/usr/bin/env python # Copyright (c) 2012 Trent Mick. # Copyright (c) 2007-2008 ActiveState Corp. # License: MIT (http://www.opensource.org/licenses/mit-license.php) from __future__ import generators r"""A fast and complete Python implementation of Markdown. [from http://daringfireball.net/projects/markdown/] > Markdown is a text-to-HTML filter; it translates an easy-to-read / > easy-to-write structured text format into HTML. Markdown's text > format is most similar to that of plain text email, and supports > features such as headers, *emphasis*, code blocks, blockquotes, and > links. > > Markdown's syntax is designed not as a generic markup language, but > specifically to serve as a front-end to (X)HTML. You can use span-level > HTML tags anywhere in a Markdown document, and you can use block level > HTML tags (like
%s
' def _task_list_item_sub(self, match): marker = match.group(1) item_text = match.group(2) if marker == '[x]': return self._task_list_warpper_str % ('checked ', item_text) elif marker == '[ ]': return self._task_list_warpper_str % ('', item_text) _last_li_endswith_two_eols = False def _list_item_sub(self, match): item = match.group(4) leading_line = match.group(1) if leading_line or "\n\n" in item or self._last_li_endswith_two_eols: item = self._run_block_gamut(self._outdent(item)) else: # Recursion for sub-lists: item = self._do_lists(self._outdent(item)) if item.endswith('\n'): item = item[:-1] item = self._run_span_gamut(item) self._last_li_endswith_two_eols = (len(match.group(5)) == 2) if "task_list" in self.extras: item = self._task_list_item_re.sub(self._task_list_item_sub, item) return " tags.
                """
                yield 0, ""
                for tup in inner:
                    yield tup
                yield 0, ""
            def wrap(self, source, outfile):
                """Return the source with a code, pre, and div."""
                return self._wrap_div(self._wrap_pre(self._wrap_code(source)))
        formatter_opts.setdefault("cssclass", "codehilite")
        formatter = HtmlCodeFormatter(**formatter_opts)
        return pygments.highlight(codeblock, lexer, formatter)
    def _code_block_sub(self, match, is_fenced_code_block=False):
        lexer_name = None
        if is_fenced_code_block:
            lexer_name = match.group(1)
            if lexer_name:
                formatter_opts = self.extras['fenced-code-blocks'] or {}
            codeblock = match.group(2)
            codeblock = codeblock[:-1]  # drop one trailing newline
        else:
            codeblock = match.group(1)
            codeblock = self._outdent(codeblock)
            codeblock = self._detab(codeblock)
            codeblock = codeblock.lstrip('\n')  # trim leading newlines
            codeblock = codeblock.rstrip()      # trim trailing whitespace
            # Note: "code-color" extra is DEPRECATED.
            if "code-color" in self.extras and codeblock.startswith(":::"):
                lexer_name, rest = codeblock.split('\n', 1)
                lexer_name = lexer_name[3:].strip()
                codeblock = rest.lstrip("\n")   # Remove lexer declaration line.
                formatter_opts = self.extras['code-color'] or {}
        if lexer_name:
            def unhash_code(codeblock):
                for key, sanitized in list(self.html_spans.items()):
                    codeblock = codeblock.replace(key, sanitized)
                replacements = [
                    ("&", "&"),
                    ("<", "<"),
                    (">", ">")
                ]
                for old, new in replacements:
                    codeblock = codeblock.replace(old, new)
                return codeblock
            lexer = self._get_pygments_lexer(lexer_name)
            if lexer:
                codeblock = unhash_code( codeblock )
                colored = self._color_with_pygments(codeblock, lexer,
                                                    **formatter_opts)
                return "\n\n%s\n\n" % colored
        codeblock = self._encode_code(codeblock)
        pre_class_str = self._html_class_str_from_tag("pre")
        code_class_str = self._html_class_str_from_tag("code")
        return "\n\n%s\n
\n\n" % (
            pre_class_str, code_class_str, codeblock)
    def _html_class_str_from_tag(self, tag):
        """Get the appropriate ' class="..."' string (note the leading
        space), if any, for the given tag.
        """
        if "html-classes" not in self.extras:
            return ""
        try:
            html_classes_from_tag = self.extras["html-classes"]
        except TypeError:
            return ""
        else:
            if tag in html_classes_from_tag:
                return ' class="%s"' % html_classes_from_tag[tag]
        return ""
    def _do_code_blocks(self, text):
        """Process Markdown `` blocks."""
        code_block_re = re.compile(r'''
            (?:\n\n|\A\n?)
            (               # $1 = the code block -- one or more lines, starting with a space/tab
              (?:
                (?:[ ]{%d} | \t)  # Lines must start with a tab or a tab-width of spaces
                .*\n+
              )+
            )
            ((?=^[ ]{0,%d}\S)|\Z)   # Lookahead for non-space at line-start, or end of doc
            # Lookahead to make sure this block isn't already in a code block.
            # Needed when syntax highlighting is being used.
            (?![^<]*\)
            ''' % (self.tab_width, self.tab_width),
            re.M | re.X)
        return code_block_re.sub(self._code_block_sub, text)
    _fenced_code_block_re = re.compile(r'''
        (?:\n+|\A\n?)
        ^```([\w+-]+)?[ \t]*\n      # opening fence, $1 = optional lang
        (.*?)                       # $2 = code block content
        ^```[ \t]*\n                # closing fence
        ''', re.M | re.X | re.S)
    def _fenced_code_block_sub(self, match):
        return self._code_block_sub(match, is_fenced_code_block=True)
    def _do_fenced_code_blocks(self, text):
        """Process ```-fenced unindented code blocks ('fenced-code-blocks' extra)."""
        return self._fenced_code_block_re.sub(self._fenced_code_block_sub, text)
    # Rules for a code span:
    # - backslash escapes are not interpreted in a code span
    # - to include one or or a run of more backticks the delimiters must
    #   be a longer run of backticks
    # - cannot start or end a code span with a backtick; pad with a
    #   space and that space will be removed in the emitted HTML
    # See `test/tm-cases/escapes.text` for a number of edge-case
    # examples.
    _code_span_re = re.compile(r'''
            (?%s
" % c
    def _do_code_spans(self, text):
        #   *   Backtick quotes are used for  spans.
        #
        #   *   You can use multiple backticks as the delimiters if you want to
        #       include literal backticks in the code span. So, this input:
        #
        #         Just type ``foo `bar` baz`` at the prompt.
        #
        #       Will translate to:
        #
        #         Just type foo `bar` baz at the prompt.
`bar` ...
        return self._code_span_re.sub(self._code_span_sub, text)
    def _encode_code(self, text):
        """Encode/escape certain characters inside Markdown code runs.
        The point is that in code, these characters are literals,
        and lose their special Markdown meanings.
        """
        replacements = [
            # Encode all ampersands; HTML entities are not
            # entities within a Markdown code span.
            ('&', '&'),
            # Do the angle bracket song and dance:
            ('<', '<'),
            ('>', '>'),
        ]
        for before, after in replacements:
            text = text.replace(before, after)
        hashed = _hash_text(text)
        self._escape_table[text] = hashed
        return hashed
    _strike_re = re.compile(r"~~(?=\S)(.+?)(?<=\S)~~", re.S)
    def _do_strike(self, text):
        text = self._strike_re.sub(r".+?)', re.S) def _dedent_two_spaces_sub(self, match): return re.sub(r'(?m)^ ', '', match.group(1)) def _block_quote_sub(self, match): bq = match.group(1) is_spoiler = 'spoiler' in self.extras and self._bq_all_lines_spoilers.match(bq) # trim one level of quoting if is_spoiler: bq = self._bq_one_level_re_spoiler.sub('', bq) else: bq = self._bq_one_level_re.sub('', bq) # trim whitespace-only lines bq = self._ws_only_line_re.sub('', bq) bq = self._run_block_gamut(bq) # recurse bq = re.sub('(?m)^', ' ', bq) # These leading spaces screw with
 content, so we need to fix that:
        bq = self._html_pre_block_re.sub(self._dedent_two_spaces_sub, bq)
        if is_spoiler:
            return '\n%s\n
\n\n' % bq
        else:
            return '\n%s\n
\n\n' % bq
    def _do_block_quotes(self, text):
        if '>' not in text:
            return text
        if 'spoiler' in self.extras:
            return self._block_quote_re_spoiler.sub(self._block_quote_sub, text)
        else:
            return self._block_quote_re.sub(self._block_quote_sub, text)
    def _form_paragraphs(self, text):
        # Strip leading and trailing lines:
        text = text.strip('\n')
        # Wrap  tags.
        grafs = []
        for i, graf in enumerate(re.split(r"\n{2,}", text)):
            if graf in self.html_blocks:
                # Unhashify HTML blocks
                grafs.append(self.html_blocks[graf])
            else:
                cuddled_list = None
                if "cuddled-lists" in self.extras:
                    # Need to put back trailing '\n' for `_list_item_re`
                    # match at the end of the paragraph.
                    li = self._list_item_re.search(graf + '\n')
                    # Two of the same list marker in this paragraph: a likely
                    # candidate for a list cuddled to preceding paragraph
                    # text (issue 33). Note the `[-1]` is a quick way to
                    # consider numeric bullets (e.g. "1." and "2.") to be
                    # equal.
                    if (li and len(li.group(2)) <= 3 and li.group("next_marker")
                        and li.group("marker")[-1] == li.group("next_marker")[-1]):
                        start = li.start()
                        cuddled_list = self._do_lists(graf[start:]).rstrip("\n")
                        assert cuddled_list.startswith("
tags. graf = self._run_span_gamut(graf) grafs.append("
" + graf.lstrip(" \t") + "
") if cuddled_list: grafs.append(cuddled_list) return "\n\n".join(grafs) def _add_footnotes(self, text): if self.footnotes: footer = [ '%s
" % backlink) footer.append('