Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve, normalise mathjax handling, yapf the file #609

Merged
merged 5 commits into from
Jul 27, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 22 additions & 14 deletions nbconvert/filters/markdown_mistune.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,20 @@
from pygments.util import ClassNotFound

from nbconvert.filters.strings import add_anchor
from nbconvert.utils.exceptions import ConversionException

block_math = re.compile(r"^\$\$(.*?)\$\$|^\\\\\[(.*?)\\\\\]", re.DOTALL)
inline_math = re.compile(r"^\$(.+?)\$|^\\\\\((.+?)\\\\\)", re.DOTALL)


class MathBlockGrammar(mistune.BlockGrammar):
block_math = re.compile(r"^\$\$(.*?)\$\$", re.DOTALL)
block_math = block_math
latex_environment = re.compile(r"^\\begin\{([a-z]*\*?)\}(.*?)\\end\{\1\}",
re.DOTALL)
re.DOTALL)


class MathBlockLexer(mistune.BlockLexer):
default_rules = ['block_math', 'latex_environment'] + mistune.BlockLexer.default_rules
default_rules = ['block_math', 'latex_environment'
] + mistune.BlockLexer.default_rules

def __init__(self, rules=None, **kwargs):
if rules is None:
Expand All @@ -39,7 +43,7 @@ def parse_block_math(self, m):
"""Parse a $$math$$ block"""
self.tokens.append({
'type': 'block_math',
'text': m.group(1)
'text': m.group(1) or m.group(2)
})

def parse_latex_environment(self, m):
Expand All @@ -51,24 +55,25 @@ def parse_latex_environment(self, m):


class MathInlineGrammar(mistune.InlineGrammar):
math = re.compile(r"^\$(.+?)\$", re.DOTALL)
block_math = re.compile(r"^\$\$(.+?)\$\$", re.DOTALL)
inline_math = inline_math
block_math = block_math
text = re.compile(r'^[\s\S]+?(?=[\\<!\[_*`~$]|https?://| {2,}\n|$)')


class MathInlineLexer(mistune.InlineLexer):
default_rules = ['block_math', 'math'] + mistune.InlineLexer.default_rules
default_rules = (['block_math', 'inline_math']
+ mistune.InlineLexer.default_rules)

def __init__(self, renderer, rules=None, **kwargs):
if rules is None:
rules = MathInlineGrammar()
super(MathInlineLexer, self).__init__(renderer, rules, **kwargs)

def output_math(self, m):
return self.renderer.inline_math(m.group(1))
def output_inline_math(self, m):
return self.renderer.inline_math(m.group(1) or m.group(2))

def output_block_math(self, m):
return self.renderer.block_math(m.group(1))
return self.renderer.block_math(m.group(1) or m.group(2))


class MarkdownWithMath(mistune.Markdown):
Expand All @@ -83,7 +88,8 @@ def output_block_math(self):
return self.renderer.block_math(self.token['text'])

def output_latex_environment(self):
return self.renderer.latex_environment(self.token['name'], self.token['text'])
return self.renderer.latex_environment(self.token['name'],
self.token['text'])


class IPythonRenderer(mistune.Renderer):
Expand Down Expand Up @@ -111,7 +117,7 @@ def header(self, text, level, raw=None):
# html.escape() is not availale on python 2.7
# For more details, see:
# https://wiki.python.org/moin/EscapingHtml
def escape_html(self,text):
def escape_html(self, text):
return cgi.escape(text)

def block_math(self, text):
Expand All @@ -125,6 +131,8 @@ def latex_environment(self, name, text):
def inline_math(self, text):
return '$%s$' % self.escape_html(text)


def markdown2html_mistune(source):
"""Convert a markdown string to HTML using mistune"""
return MarkdownWithMath(renderer=IPythonRenderer(escape=False)).render(source)
return MarkdownWithMath(renderer=IPythonRenderer(
escape=False)).render(source)
94 changes: 53 additions & 41 deletions nbconvert/filters/tests/test_markdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,10 @@ class TestMarkdown(TestsBase):
def test_markdown2latex(self):
"""markdown2latex test"""
for index, test in enumerate(self.tests):
self._try_markdown(partial(convert_pandoc, from_format='markdown',
to_format='latex'), test,
self.tokens[index])
self._try_markdown(
partial(
convert_pandoc, from_format='markdown', to_format='latex'),
test, self.tokens[index])

@dec.onlyif_cmds_exist('pandoc')
def test_markdown2latex_markup(self):
Expand Down Expand Up @@ -108,34 +109,35 @@ def test_markdown2html(self):
self._try_markdown(markdown2html, test, self.tokens[index])

def test_markdown2html_heading_anchors(self):
for md, tokens in [
('# test',
('<h1', '>test', 'id="test"', u'&#182;</a>', "anchor-link")
),
('###test head space',
('<h3', '>test head space', 'id="test-head-space"', u'&#182;</a>', "anchor-link")
)
]:
for md, tokens in [('# test', ('<h1', '>test', 'id="test"',
u'&#182;</a>', "anchor-link")),
('###test head space',
('<h3', '>test head space', 'id="test-head-space"',
u'&#182;</a>', "anchor-link"))]:
self._try_markdown(markdown2html, md, tokens)

def test_markdown2html_math(self):
# Mathematical expressions not containing <, >, & should be passed through unaltered
# Mathematical expressions not containing <, >, &
# should be passed through unaltered
# all the "<", ">", "&" must be escaped correctly
cases = [("\\begin{equation*}\n"
"\\left( \\sum_{k=1}^n a_k b_k \\right)^2 \\leq \\left( \\sum_{k=1}^n a_k^2 \\right) \\left( \\sum_{k=1}^n b_k^2 \\right)\n"
"\\end{equation*}"),
("$$\n"
"a = 1 *3* 5\n"
"$$"),
"$ a = 1 *3* 5 $",
"$s_i = s_{i}\n$",
"$a<b&b<lt$",
"$a<b&lt;b>a;a-b<0$",
"$<k'>$",
"$$a<b&b<lt$$",
"$$a<b&lt;b>a;a-b<0$$",
"$$<k'>$$",
"""$
cases = [(
"\\begin{equation*}\n" +
("\\left( \\sum_{k=1}^n a_k b_k \\right)^2 "
"\\leq \\left( \\sum_{k=1}^n a_k^2 \\right) "
"\\left( \\sum_{k=1}^n b_k^2 \\right)\n") +
"\\end{equation*}"),
("$$\n"
"a = 1 *3* 5\n"
"$$"),
"$ a = 1 *3* 5 $",
"$s_i = s_{i}\n$",
"$a<b&b<lt$",
"$a<b&lt;b>a;a-b<0$",
"$<k'>$",
"$$a<b&b<lt$$",
"$$a<b&lt;b>a;a-b<0$$",
"$$<k'>$$",
"""$
\\begin{tabular}{ l c r }
1 & 2 & 3 \\
4 & 5 & 6 \\
Expand All @@ -145,9 +147,11 @@ def test_markdown2html_math(self):
for case in cases:
result = markdown2html(case)
# find the equation in the generated texts
search_result = re.search("\$.*\$",result,re.DOTALL)
search_result = re.search("\$.*\$", result, re.DOTALL)
if search_result is None:
search_result = re.search("\\\\begin\\{equation.*\\}.*\\\\end\\{equation.*\\}",result,re.DOTALL)
search_result = re.search(
"\\\\begin\\{equation.*\\}.*\\\\end\\{equation.*\\}",
result, re.DOTALL)
math = search_result.group(0)
# the resulting math part can not contain "<", ">" or
# "&" not followed by "lt;", "gt;", or "amp;".
Expand All @@ -156,19 +160,25 @@ def test_markdown2html_math(self):
# python 2.7 has assertNotRegexpMatches instead of assertNotRegex
if not hasattr(self, 'assertNotRegex'):
self.assertNotRegex = self.assertNotRegexpMatches
self.assertNotRegex(math,"&(?![gt;|lt;|amp;])")
self.assertNotRegex(math, "&(?![gt;|lt;|amp;])")
# the result should be able to be unescaped correctly
self.assertEquals(case,self._unescape(math))
self.assertEquals(case, self._unescape(math))

def test_markdown2html_math_mixed(self):
"""ensure markdown between inline and inline-block math"""
case = """The entries of $C$ are given by the exact formula:
"""ensure markdown between inline and inline-block math works and
test multiple LaTeX markup syntaxes.
"""
case = """The entries of \\\\(C\\\\) are given by the exact formula:
$$
C_{ik} = \sum_{j=1}^n A_{ij} B_{jk}
C_{ik} = \sum_{j=1}^n A_{ij} B_{jk},
$$
but there are many ways to _implement_ this computation. $\approx 2mnp$ flops"""
self._try_markdown(markdown2html, case,
case.replace("_implement_", "<em>implement</em>"))
but you can _implement_ this computation in many ways.
$\approx 2mnp$ flops are needed for \\\\[ C_{ik} = \sum_{j=1}^n A_{ij} B_{jk} \\\\]."""
output_check = (case.replace("_implement_", "<em>implement</em>")
.replace("\\\\(", "$").replace("\\\\)", "$")
.replace("\\\\[", "$$").replace("\\\\]", "$$"))
# these replacements are needed because we use $ and $$ in our html output
self._try_markdown(markdown2html, case, output_check)

def test_markdown2html_math_paragraph(self):
"""these should all parse without modification"""
Expand Down Expand Up @@ -200,7 +210,7 @@ def test_markdown2html_math_paragraph(self):

for case in cases:
s = markdown2html(case)
self.assertIn(case,self._unescape(s))
self.assertIn(case, self._unescape(s))

@dec.onlyif_cmds_exist('pandoc')
def test_markdown2rst(self):
Expand All @@ -212,8 +222,10 @@ def test_markdown2rst(self):
tokens[1] = r'\*\*test'

for index, test in enumerate(self.tests):
self._try_markdown(partial(convert_pandoc, from_format='markdown',
to_format='rst'), test, tokens[index])
self._try_markdown(
partial(
convert_pandoc, from_format='markdown', to_format='rst'),
test, tokens[index])

def _try_markdown(self, method, test, tokens):
results = method(test)
Expand All @@ -223,7 +235,7 @@ def _try_markdown(self, method, test, tokens):
for token in tokens:
self.assertIn(token, results)

def _unescape(self,s):
def _unescape(self, s):
# undo cgi.escape() manually
# We must be careful here for compatibility
# html.unescape() is not availale on python 2.7
Expand Down