Skip to content

Commit

Permalink
Merge pull request #609 from mpacer/better_mjax
Browse files Browse the repository at this point in the history
Improve, normalise mathjax handling, yapf the file
  • Loading branch information
takluyver authored Jul 27, 2017
2 parents 0fec557 + 83cb693 commit e5ba024
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 55 deletions.
36 changes: 22 additions & 14 deletions nbconvert/filters/markdown_mistune.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,20 @@
from pygments.util import ClassNotFound

from nbconvert.filters.strings import add_anchor
from nbconvert.utils.exceptions import ConversionException

block_math = re.compile(r"^\$\$(.*?)\$\$|^\\\\\[(.*?)\\\\\]", re.DOTALL)
inline_math = re.compile(r"^\$(.+?)\$|^\\\\\((.+?)\\\\\)", re.DOTALL)


class MathBlockGrammar(mistune.BlockGrammar):
block_math = re.compile(r"^\$\$(.*?)\$\$", re.DOTALL)
block_math = block_math
latex_environment = re.compile(r"^\\begin\{([a-z]*\*?)\}(.*?)\\end\{\1\}",
re.DOTALL)
re.DOTALL)


class MathBlockLexer(mistune.BlockLexer):
default_rules = ['block_math', 'latex_environment'] + mistune.BlockLexer.default_rules
default_rules = ['block_math', 'latex_environment'
] + mistune.BlockLexer.default_rules

def __init__(self, rules=None, **kwargs):
if rules is None:
Expand All @@ -39,7 +43,7 @@ def parse_block_math(self, m):
"""Parse a $$math$$ block"""
self.tokens.append({
'type': 'block_math',
'text': m.group(1)
'text': m.group(1) or m.group(2)
})

def parse_latex_environment(self, m):
Expand All @@ -51,24 +55,25 @@ def parse_latex_environment(self, m):


class MathInlineGrammar(mistune.InlineGrammar):
math = re.compile(r"^\$(.+?)\$", re.DOTALL)
block_math = re.compile(r"^\$\$(.+?)\$\$", re.DOTALL)
inline_math = inline_math
block_math = block_math
text = re.compile(r'^[\s\S]+?(?=[\\<!\[_*`~$]|https?://| {2,}\n|$)')


class MathInlineLexer(mistune.InlineLexer):
default_rules = ['block_math', 'math'] + mistune.InlineLexer.default_rules
default_rules = (['block_math', 'inline_math']
+ mistune.InlineLexer.default_rules)

def __init__(self, renderer, rules=None, **kwargs):
if rules is None:
rules = MathInlineGrammar()
super(MathInlineLexer, self).__init__(renderer, rules, **kwargs)

def output_math(self, m):
return self.renderer.inline_math(m.group(1))
def output_inline_math(self, m):
return self.renderer.inline_math(m.group(1) or m.group(2))

def output_block_math(self, m):
return self.renderer.block_math(m.group(1))
return self.renderer.block_math(m.group(1) or m.group(2))


class MarkdownWithMath(mistune.Markdown):
Expand All @@ -83,7 +88,8 @@ def output_block_math(self):
return self.renderer.block_math(self.token['text'])

def output_latex_environment(self):
return self.renderer.latex_environment(self.token['name'], self.token['text'])
return self.renderer.latex_environment(self.token['name'],
self.token['text'])


class IPythonRenderer(mistune.Renderer):
Expand Down Expand Up @@ -111,7 +117,7 @@ def header(self, text, level, raw=None):
# html.escape() is not availale on python 2.7
# For more details, see:
# https://wiki.python.org/moin/EscapingHtml
def escape_html(self,text):
def escape_html(self, text):
return cgi.escape(text)

def block_math(self, text):
Expand All @@ -125,6 +131,8 @@ def latex_environment(self, name, text):
def inline_math(self, text):
return '$%s$' % self.escape_html(text)


def markdown2html_mistune(source):
"""Convert a markdown string to HTML using mistune"""
return MarkdownWithMath(renderer=IPythonRenderer(escape=False)).render(source)
return MarkdownWithMath(renderer=IPythonRenderer(
escape=False)).render(source)
94 changes: 53 additions & 41 deletions nbconvert/filters/tests/test_markdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,10 @@ class TestMarkdown(TestsBase):
def test_markdown2latex(self):
"""markdown2latex test"""
for index, test in enumerate(self.tests):
self._try_markdown(partial(convert_pandoc, from_format='markdown',
to_format='latex'), test,
self.tokens[index])
self._try_markdown(
partial(
convert_pandoc, from_format='markdown', to_format='latex'),
test, self.tokens[index])

@dec.onlyif_cmds_exist('pandoc')
def test_markdown2latex_markup(self):
Expand Down Expand Up @@ -108,34 +109,35 @@ def test_markdown2html(self):
self._try_markdown(markdown2html, test, self.tokens[index])

def test_markdown2html_heading_anchors(self):
for md, tokens in [
('# test',
('<h1', '>test', 'id="test"', u'&#182;</a>', "anchor-link")
),
('###test head space',
('<h3', '>test head space', 'id="test-head-space"', u'&#182;</a>', "anchor-link")
)
]:
for md, tokens in [('# test', ('<h1', '>test', 'id="test"',
u'&#182;</a>', "anchor-link")),
('###test head space',
('<h3', '>test head space', 'id="test-head-space"',
u'&#182;</a>', "anchor-link"))]:
self._try_markdown(markdown2html, md, tokens)

def test_markdown2html_math(self):
# Mathematical expressions not containing <, >, & should be passed through unaltered
# Mathematical expressions not containing <, >, &
# should be passed through unaltered
# all the "<", ">", "&" must be escaped correctly
cases = [("\\begin{equation*}\n"
"\\left( \\sum_{k=1}^n a_k b_k \\right)^2 \\leq \\left( \\sum_{k=1}^n a_k^2 \\right) \\left( \\sum_{k=1}^n b_k^2 \\right)\n"
"\\end{equation*}"),
("$$\n"
"a = 1 *3* 5\n"
"$$"),
"$ a = 1 *3* 5 $",
"$s_i = s_{i}\n$",
"$a<b&b<lt$",
"$a<b&lt;b>a;a-b<0$",
"$<k'>$",
"$$a<b&b<lt$$",
"$$a<b&lt;b>a;a-b<0$$",
"$$<k'>$$",
"""$
cases = [(
"\\begin{equation*}\n" +
("\\left( \\sum_{k=1}^n a_k b_k \\right)^2 "
"\\leq \\left( \\sum_{k=1}^n a_k^2 \\right) "
"\\left( \\sum_{k=1}^n b_k^2 \\right)\n") +
"\\end{equation*}"),
("$$\n"
"a = 1 *3* 5\n"
"$$"),
"$ a = 1 *3* 5 $",
"$s_i = s_{i}\n$",
"$a<b&b<lt$",
"$a<b&lt;b>a;a-b<0$",
"$<k'>$",
"$$a<b&b<lt$$",
"$$a<b&lt;b>a;a-b<0$$",
"$$<k'>$$",
"""$
\\begin{tabular}{ l c r }
1 & 2 & 3 \\
4 & 5 & 6 \\
Expand All @@ -145,9 +147,11 @@ def test_markdown2html_math(self):
for case in cases:
result = markdown2html(case)
# find the equation in the generated texts
search_result = re.search("\$.*\$",result,re.DOTALL)
search_result = re.search("\$.*\$", result, re.DOTALL)
if search_result is None:
search_result = re.search("\\\\begin\\{equation.*\\}.*\\\\end\\{equation.*\\}",result,re.DOTALL)
search_result = re.search(
"\\\\begin\\{equation.*\\}.*\\\\end\\{equation.*\\}",
result, re.DOTALL)
math = search_result.group(0)
# the resulting math part can not contain "<", ">" or
# "&" not followed by "lt;", "gt;", or "amp;".
Expand All @@ -156,19 +160,25 @@ def test_markdown2html_math(self):
# python 2.7 has assertNotRegexpMatches instead of assertNotRegex
if not hasattr(self, 'assertNotRegex'):
self.assertNotRegex = self.assertNotRegexpMatches
self.assertNotRegex(math,"&(?![gt;|lt;|amp;])")
self.assertNotRegex(math, "&(?![gt;|lt;|amp;])")
# the result should be able to be unescaped correctly
self.assertEquals(case,self._unescape(math))
self.assertEquals(case, self._unescape(math))

def test_markdown2html_math_mixed(self):
"""ensure markdown between inline and inline-block math"""
case = """The entries of $C$ are given by the exact formula:
"""ensure markdown between inline and inline-block math works and
test multiple LaTeX markup syntaxes.
"""
case = """The entries of \\\\(C\\\\) are given by the exact formula:
$$
C_{ik} = \sum_{j=1}^n A_{ij} B_{jk}
C_{ik} = \sum_{j=1}^n A_{ij} B_{jk},
$$
but there are many ways to _implement_ this computation. $\approx 2mnp$ flops"""
self._try_markdown(markdown2html, case,
case.replace("_implement_", "<em>implement</em>"))
but you can _implement_ this computation in many ways.
$\approx 2mnp$ flops are needed for \\\\[ C_{ik} = \sum_{j=1}^n A_{ij} B_{jk} \\\\]."""
output_check = (case.replace("_implement_", "<em>implement</em>")
.replace("\\\\(", "$").replace("\\\\)", "$")
.replace("\\\\[", "$$").replace("\\\\]", "$$"))
# these replacements are needed because we use $ and $$ in our html output
self._try_markdown(markdown2html, case, output_check)

def test_markdown2html_math_paragraph(self):
"""these should all parse without modification"""
Expand Down Expand Up @@ -200,7 +210,7 @@ def test_markdown2html_math_paragraph(self):

for case in cases:
s = markdown2html(case)
self.assertIn(case,self._unescape(s))
self.assertIn(case, self._unescape(s))

@dec.onlyif_cmds_exist('pandoc')
def test_markdown2rst(self):
Expand All @@ -212,8 +222,10 @@ def test_markdown2rst(self):
tokens[1] = r'\*\*test'

for index, test in enumerate(self.tests):
self._try_markdown(partial(convert_pandoc, from_format='markdown',
to_format='rst'), test, tokens[index])
self._try_markdown(
partial(
convert_pandoc, from_format='markdown', to_format='rst'),
test, tokens[index])

def _try_markdown(self, method, test, tokens):
results = method(test)
Expand All @@ -223,7 +235,7 @@ def _try_markdown(self, method, test, tokens):
for token in tokens:
self.assertIn(token, results)

def _unescape(self,s):
def _unescape(self, s):
# undo cgi.escape() manually
# We must be careful here for compatibility
# html.unescape() is not availale on python 2.7
Expand Down

0 comments on commit e5ba024

Please sign in to comment.