#!/usr/bin/env python # Copyright (c) 2012 Trent Mick. # Copyright (c) 2007-2008 ActiveState Corp. # License: MIT (http://www.opensource.org/licenses/mit-license.php) from __future__ import generators r"""A fast and complete Python implementation of Markdown. [from http://daringfireball.net/projects/markdown/] > Markdown is a text-to-HTML filter; it translates an easy-to-read / > easy-to-write structured text format into HTML. Markdown's text > format is most similar to that of plain text email, and supports > features such as headers, *emphasis*, code blocks, blockquotes, and > links. > > Markdown's syntax is designed not as a generic markup language, but > specifically to serve as a front-end to (X)HTML. You can use span-level > HTML tags anywhere in a Markdown document, and you can use block level > HTML tags (like
%s
' def _task_list_item_sub(self, match): marker = match.group(1) item_text = match.group(2) if marker == '[x]': return self._task_list_warpper_str % ('checked ', item_text) elif marker == '[ ]': return self._task_list_warpper_str % ('', item_text) _last_li_endswith_two_eols = False def _list_item_sub(self, match): item = match.group(4) leading_line = match.group(1) if leading_line or "\n\n" in item or self._last_li_endswith_two_eols: item = self._run_block_gamut(self._outdent(item)) else: # Recursion for sub-lists: item = self._do_lists(self._outdent(item)) if item.endswith('\n'): item = item[:-1] item = self._run_span_gamut(item) self._last_li_endswith_two_eols = (len(match.group(5)) == 2) if "task_list" in self.extras: item = self._task_list_item_re.sub(self._task_list_item_sub, item) return " tags.
"""
yield 0, ""
for tup in inner:
yield tup
yield 0, ""
def wrap(self, source, outfile):
"""Return the source with a code, pre, and div."""
return self._wrap_div(self._wrap_pre(self._wrap_code(source)))
formatter_opts.setdefault("cssclass", "codehilite")
formatter = HtmlCodeFormatter(**formatter_opts)
return pygments.highlight(codeblock, lexer, formatter)
def _code_block_sub(self, match, is_fenced_code_block=False):
lexer_name = None
if is_fenced_code_block:
lexer_name = match.group(1)
if lexer_name:
formatter_opts = self.extras['fenced-code-blocks'] or {}
codeblock = match.group(2)
codeblock = codeblock[:-1] # drop one trailing newline
else:
codeblock = match.group(1)
codeblock = self._outdent(codeblock)
codeblock = self._detab(codeblock)
codeblock = codeblock.lstrip('\n') # trim leading newlines
codeblock = codeblock.rstrip() # trim trailing whitespace
# Note: "code-color" extra is DEPRECATED.
if "code-color" in self.extras and codeblock.startswith(":::"):
lexer_name, rest = codeblock.split('\n', 1)
lexer_name = lexer_name[3:].strip()
codeblock = rest.lstrip("\n") # Remove lexer declaration line.
formatter_opts = self.extras['code-color'] or {}
if lexer_name:
def unhash_code(codeblock):
for key, sanitized in list(self.html_spans.items()):
codeblock = codeblock.replace(key, sanitized)
replacements = [
("&", "&"),
("<", "<"),
(">", ">")
]
for old, new in replacements:
codeblock = codeblock.replace(old, new)
return codeblock
lexer = self._get_pygments_lexer(lexer_name)
if lexer:
codeblock = unhash_code( codeblock )
colored = self._color_with_pygments(codeblock, lexer,
**formatter_opts)
return "\n\n%s\n\n" % colored
codeblock = self._encode_code(codeblock)
pre_class_str = self._html_class_str_from_tag("pre")
code_class_str = self._html_class_str_from_tag("code")
return "\n\n%s\n
\n\n" % (
pre_class_str, code_class_str, codeblock)
def _html_class_str_from_tag(self, tag):
"""Get the appropriate ' class="..."' string (note the leading
space), if any, for the given tag.
"""
if "html-classes" not in self.extras:
return ""
try:
html_classes_from_tag = self.extras["html-classes"]
except TypeError:
return ""
else:
if tag in html_classes_from_tag:
return ' class="%s"' % html_classes_from_tag[tag]
return ""
def _do_code_blocks(self, text):
"""Process Markdown `` blocks."""
code_block_re = re.compile(r'''
(?:\n\n|\A\n?)
( # $1 = the code block -- one or more lines, starting with a space/tab
(?:
(?:[ ]{%d} | \t) # Lines must start with a tab or a tab-width of spaces
.*\n+
)+
)
((?=^[ ]{0,%d}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
# Lookahead to make sure this block isn't already in a code block.
# Needed when syntax highlighting is being used.
(?![^<]*\)
''' % (self.tab_width, self.tab_width),
re.M | re.X)
return code_block_re.sub(self._code_block_sub, text)
_fenced_code_block_re = re.compile(r'''
(?:\n+|\A\n?)
^```([\w+-]+)?[ \t]*\n # opening fence, $1 = optional lang
(.*?) # $2 = code block content
^```[ \t]*\n # closing fence
''', re.M | re.X | re.S)
def _fenced_code_block_sub(self, match):
return self._code_block_sub(match, is_fenced_code_block=True)
def _do_fenced_code_blocks(self, text):
"""Process ```-fenced unindented code blocks ('fenced-code-blocks' extra)."""
return self._fenced_code_block_re.sub(self._fenced_code_block_sub, text)
# Rules for a code span:
# - backslash escapes are not interpreted in a code span
# - to include one or or a run of more backticks the delimiters must
# be a longer run of backticks
# - cannot start or end a code span with a backtick; pad with a
# space and that space will be removed in the emitted HTML
# See `test/tm-cases/escapes.text` for a number of edge-case
# examples.
_code_span_re = re.compile(r'''
(?%s
" % c
def _do_code_spans(self, text):
# * Backtick quotes are used for spans.
#
# * You can use multiple backticks as the delimiters if you want to
# include literal backticks in the code span. So, this input:
#
# Just type ``foo `bar` baz`` at the prompt.
#
# Will translate to:
#
# Just type foo `bar` baz at the prompt.
`bar` ...
return self._code_span_re.sub(self._code_span_sub, text)
def _encode_code(self, text):
"""Encode/escape certain characters inside Markdown code runs.
The point is that in code, these characters are literals,
and lose their special Markdown meanings.
"""
replacements = [
# Encode all ampersands; HTML entities are not
# entities within a Markdown code span.
('&', '&'),
# Do the angle bracket song and dance:
('<', '<'),
('>', '>'),
]
for before, after in replacements:
text = text.replace(before, after)
hashed = _hash_text(text)
self._escape_table[text] = hashed
return hashed
_strike_re = re.compile(r"~~(?=\S)(.+?)(?<=\S)~~", re.S)
def _do_strike(self, text):
text = self._strike_re.sub(r".+?)', re.S) def _dedent_two_spaces_sub(self, match): return re.sub(r'(?m)^ ', '', match.group(1)) def _block_quote_sub(self, match): bq = match.group(1) is_spoiler = 'spoiler' in self.extras and self._bq_all_lines_spoilers.match(bq) # trim one level of quoting if is_spoiler: bq = self._bq_one_level_re_spoiler.sub('', bq) else: bq = self._bq_one_level_re.sub('', bq) # trim whitespace-only lines bq = self._ws_only_line_re.sub('', bq) bq = self._run_block_gamut(bq) # recurse bq = re.sub('(?m)^', ' ', bq) # These leading spaces screw with
content, so we need to fix that:
bq = self._html_pre_block_re.sub(self._dedent_two_spaces_sub, bq)
if is_spoiler:
return '\n%s\n
\n\n' % bq
else:
return '\n%s\n
\n\n' % bq
def _do_block_quotes(self, text):
if '>' not in text:
return text
if 'spoiler' in self.extras:
return self._block_quote_re_spoiler.sub(self._block_quote_sub, text)
else:
return self._block_quote_re.sub(self._block_quote_sub, text)
def _form_paragraphs(self, text):
# Strip leading and trailing lines:
text = text.strip('\n')
# Wrap tags.
grafs = []
for i, graf in enumerate(re.split(r"\n{2,}", text)):
if graf in self.html_blocks:
# Unhashify HTML blocks
grafs.append(self.html_blocks[graf])
else:
cuddled_list = None
if "cuddled-lists" in self.extras:
# Need to put back trailing '\n' for `_list_item_re`
# match at the end of the paragraph.
li = self._list_item_re.search(graf + '\n')
# Two of the same list marker in this paragraph: a likely
# candidate for a list cuddled to preceding paragraph
# text (issue 33). Note the `[-1]` is a quick way to
# consider numeric bullets (e.g. "1." and "2.") to be
# equal.
if (li and len(li.group(2)) <= 3 and li.group("next_marker")
and li.group("marker")[-1] == li.group("next_marker")[-1]):
start = li.start()
cuddled_list = self._do_lists(graf[start:]).rstrip("\n")
assert cuddled_list.startswith("
tags. graf = self._run_span_gamut(graf) grafs.append("
" + graf.lstrip(" \t") + "
") if cuddled_list: grafs.append(cuddled_list) return "\n\n".join(grafs) def _add_footnotes(self, text): if self.footnotes: footer = [ '%s
" % backlink) footer.append('