Merge pull request #609 from mpacer/better_mjax

Improve, normalise mathjax handling, yapf the file
jupyter · Jul 27, 2017 · e5ba024 · e5ba024
2 parents 0fec557 + 83cb693
commit e5ba024
Show file tree

Hide file tree

Showing 2 changed files with 75 additions and 55 deletions.
diff --git a/nbconvert/filters/markdown_mistune.py b/nbconvert/filters/markdown_mistune.py
@@ -19,16 +19,20 @@
 from pygments.util import ClassNotFound
 
 from nbconvert.filters.strings import add_anchor
-from nbconvert.utils.exceptions import ConversionException
+
+block_math = re.compile(r"^\$\$(.*?)\$\$|^\\\\\[(.*?)\\\\\]", re.DOTALL)
+inline_math = re.compile(r"^\$(.+?)\$|^\\\\\((.+?)\\\\\)", re.DOTALL)
 
 
 class MathBlockGrammar(mistune.BlockGrammar):
-    block_math = re.compile(r"^\$\$(.*?)\$\$", re.DOTALL)
+    block_math = block_math
     latex_environment = re.compile(r"^\\begin\{([a-z]*\*?)\}(.*?)\\end\{\1\}",
-                                                re.DOTALL)
+                                   re.DOTALL)
+
 
 class MathBlockLexer(mistune.BlockLexer):
-    default_rules = ['block_math', 'latex_environment'] + mistune.BlockLexer.default_rules
+    default_rules = ['block_math', 'latex_environment'
+                     ] + mistune.BlockLexer.default_rules
 
     def __init__(self, rules=None, **kwargs):
         if rules is None:
@@ -39,7 +43,7 @@ def parse_block_math(self, m):
         """Parse a $$math$$ block"""
         self.tokens.append({
             'type': 'block_math',
-            'text': m.group(1)
+            'text': m.group(1) or m.group(2)
         })
 
     def parse_latex_environment(self, m):
@@ -51,24 +55,25 @@ def parse_latex_environment(self, m):
 
 
 class MathInlineGrammar(mistune.InlineGrammar):
-    math = re.compile(r"^\$(.+?)\$", re.DOTALL)
-    block_math = re.compile(r"^\$\$(.+?)\$\$", re.DOTALL)
+    inline_math = inline_math
+    block_math = block_math
     text = re.compile(r'^[\s\S]+?(?=[\\<!\[_*`~$]|https?://| {2,}\n|$)')
 
 
 class MathInlineLexer(mistune.InlineLexer):
-    default_rules = ['block_math', 'math'] + mistune.InlineLexer.default_rules
+    default_rules = (['block_math', 'inline_math']
+                     + mistune.InlineLexer.default_rules)
 
     def __init__(self, renderer, rules=None, **kwargs):
         if rules is None:
             rules = MathInlineGrammar()
         super(MathInlineLexer, self).__init__(renderer, rules, **kwargs)
 
-    def output_math(self, m):
-        return self.renderer.inline_math(m.group(1))
+    def output_inline_math(self, m):
+        return self.renderer.inline_math(m.group(1) or m.group(2))
 
     def output_block_math(self, m):
-        return self.renderer.block_math(m.group(1))
+        return self.renderer.block_math(m.group(1) or m.group(2))
 
 
 class MarkdownWithMath(mistune.Markdown):
@@ -83,7 +88,8 @@ def output_block_math(self):
         return self.renderer.block_math(self.token['text'])
 
     def output_latex_environment(self):
-        return self.renderer.latex_environment(self.token['name'], self.token['text'])
+        return self.renderer.latex_environment(self.token['name'],
+                                               self.token['text'])
 
 
 class IPythonRenderer(mistune.Renderer):
@@ -111,7 +117,7 @@ def header(self, text, level, raw=None):
     # html.escape() is not availale on python 2.7
     # For more details, see:
     # https://wiki.python.org/moin/EscapingHtml
-    def escape_html(self,text):
+    def escape_html(self, text):
         return cgi.escape(text)
 
     def block_math(self, text):
@@ -125,6 +131,8 @@ def latex_environment(self, name, text):
     def inline_math(self, text):
         return '$%s$' % self.escape_html(text)
 
+
 def markdown2html_mistune(source):
     """Convert a markdown string to HTML using mistune"""
-    return MarkdownWithMath(renderer=IPythonRenderer(escape=False)).render(source)
+    return MarkdownWithMath(renderer=IPythonRenderer(
+        escape=False)).render(source)
diff --git a/nbconvert/filters/tests/test_markdown.py b/nbconvert/filters/tests/test_markdown.py
@@ -52,9 +52,10 @@ class TestMarkdown(TestsBase):
     def test_markdown2latex(self):
         """markdown2latex test"""
         for index, test in enumerate(self.tests):
-            self._try_markdown(partial(convert_pandoc, from_format='markdown',
-                                       to_format='latex'), test,
-                               self.tokens[index])
+            self._try_markdown(
+                partial(
+                    convert_pandoc, from_format='markdown', to_format='latex'),
+                test, self.tokens[index])
 
     @dec.onlyif_cmds_exist('pandoc')
     def test_markdown2latex_markup(self):
@@ -108,34 +109,35 @@ def test_markdown2html(self):
             self._try_markdown(markdown2html, test, self.tokens[index])
 
     def test_markdown2html_heading_anchors(self):
-        for md, tokens in [
-            ('# test',
-                ('<h1', '>test', 'id="test"', u'&#182;</a>', "anchor-link")
-            ),
-            ('###test head space',
-                ('<h3', '>test head space', 'id="test-head-space"', u'&#182;</a>', "anchor-link")
-            )
-        ]:
+        for md, tokens in [('# test', ('<h1', '>test', 'id="test"',
+                                       u'&#182;</a>', "anchor-link")),
+                           ('###test head space',
+                            ('<h3', '>test head space', 'id="test-head-space"',
+                             u'&#182;</a>', "anchor-link"))]:
             self._try_markdown(markdown2html, md, tokens)
 
     def test_markdown2html_math(self):
-        # Mathematical expressions not containing <, >, & should be passed through unaltered
+        # Mathematical expressions not containing <, >, &
+        # should be passed through unaltered
         # all the "<", ">", "&" must be escaped correctly
-        cases = [("\\begin{equation*}\n"
-                  "\\left( \\sum_{k=1}^n a_k b_k \\right)^2 \\leq \\left( \\sum_{k=1}^n a_k^2 \\right) \\left( \\sum_{k=1}^n b_k^2 \\right)\n"
-                  "\\end{equation*}"),
-                 ("$$\n"
-                  "a = 1 *3* 5\n"
-                  "$$"),
-                  "$ a = 1 *3* 5 $",
-                  "$s_i = s_{i}\n$",
-                  "$a<b&b<lt$",
-                  "$a<b&lt;b>a;a-b<0$",
-                  "$<k'>$",
-                  "$$a<b&b<lt$$",
-                  "$$a<b&lt;b>a;a-b<0$$",
-                  "$$<k'>$$",
-                  """$
+        cases = [(
+            "\\begin{equation*}\n" +
+            ("\\left( \\sum_{k=1}^n a_k b_k \\right)^2 "
+             "\\leq \\left( \\sum_{k=1}^n a_k^2 \\right) "
+             "\\left( \\sum_{k=1}^n b_k^2 \\right)\n") +
+            "\\end{equation*}"),
+            ("$$\n"
+             "a = 1 *3* 5\n"
+             "$$"),
+            "$ a = 1 *3* 5 $",
+            "$s_i = s_{i}\n$",
+            "$a<b&b<lt$",
+            "$a<b&lt;b>a;a-b<0$",
+            "$<k'>$",
+            "$$a<b&b<lt$$",
+            "$$a<b&lt;b>a;a-b<0$$",
+            "$$<k'>$$",
+            """$
 \\begin{tabular}{ l c r }
   1 & 2 & 3 \\
   4 & 5 & 6 \\
@@ -145,9 +147,11 @@ def test_markdown2html_math(self):
         for case in cases:
             result = markdown2html(case)
             # find the equation in the generated texts
-            search_result = re.search("\$.*\$",result,re.DOTALL)
+            search_result = re.search("\$.*\$", result, re.DOTALL)
             if search_result is None:
-                search_result = re.search("\\\\begin\\{equation.*\\}.*\\\\end\\{equation.*\\}",result,re.DOTALL)
+                search_result = re.search(
+                    "\\\\begin\\{equation.*\\}.*\\\\end\\{equation.*\\}",
+                    result, re.DOTALL)
             math = search_result.group(0)
             # the resulting math part can not contain "<", ">" or
             # "&" not followed by "lt;", "gt;", or "amp;".
@@ -156,19 +160,25 @@ def test_markdown2html_math(self):
             # python 2.7 has assertNotRegexpMatches instead of assertNotRegex
             if not hasattr(self, 'assertNotRegex'):
                 self.assertNotRegex = self.assertNotRegexpMatches
-            self.assertNotRegex(math,"&(?![gt;|lt;|amp;])")
+            self.assertNotRegex(math, "&(?![gt;|lt;|amp;])")
             # the result should be able to be unescaped correctly
-            self.assertEquals(case,self._unescape(math))
+            self.assertEquals(case, self._unescape(math))
 
     def test_markdown2html_math_mixed(self):
-        """ensure markdown between inline and inline-block math"""
-        case = """The entries of $C$ are given by the exact formula:
+        """ensure markdown between inline and inline-block math works and
+        test multiple LaTeX markup syntaxes.
+        """
+        case = """The entries of \\\\(C\\\\) are given by the exact formula:
 $$
-C_{ik} = \sum_{j=1}^n A_{ij} B_{jk}
+C_{ik} = \sum_{j=1}^n A_{ij} B_{jk},
 $$
-but there are many ways to _implement_ this computation.   $\approx 2mnp$ flops"""
-        self._try_markdown(markdown2html, case,
-                           case.replace("_implement_", "<em>implement</em>"))
+but you can _implement_ this computation in many ways.
+$\approx 2mnp$ flops are needed for \\\\[ C_{ik} = \sum_{j=1}^n A_{ij} B_{jk} \\\\]."""
+        output_check = (case.replace("_implement_", "<em>implement</em>")
+                            .replace("\\\\(", "$").replace("\\\\)", "$")
+                            .replace("\\\\[", "$$").replace("\\\\]", "$$"))
+        # these replacements are needed because we use $ and $$ in our html output
+        self._try_markdown(markdown2html, case, output_check)
 
     def test_markdown2html_math_paragraph(self):
         """these should all parse without modification"""
@@ -200,7 +210,7 @@ def test_markdown2html_math_paragraph(self):
 
         for case in cases:
             s = markdown2html(case)
-            self.assertIn(case,self._unescape(s))
+            self.assertIn(case, self._unescape(s))
 
     @dec.onlyif_cmds_exist('pandoc')
     def test_markdown2rst(self):
@@ -212,8 +222,10 @@ def test_markdown2rst(self):
         tokens[1] = r'\*\*test'
 
         for index, test in enumerate(self.tests):
-            self._try_markdown(partial(convert_pandoc, from_format='markdown',
-                                       to_format='rst'), test, tokens[index])
+            self._try_markdown(
+                partial(
+                    convert_pandoc, from_format='markdown', to_format='rst'),
+                test, tokens[index])
 
     def _try_markdown(self, method, test, tokens):
         results = method(test)
@@ -223,7 +235,7 @@ def _try_markdown(self, method, test, tokens):
             for token in tokens:
                 self.assertIn(token, results)
 
-    def _unescape(self,s):
+    def _unescape(self, s):
         # undo cgi.escape() manually
         # We must be careful here for compatibility
         # html.unescape() is not availale on python 2.7