Skip to content

Commit

Permalink
Split BLOCK_MATH and INLINE_MATH into four regexes
Browse files Browse the repository at this point in the history
Important to avoid problems with the TeX-style block math where the trailing '$$' is not removed.
  • Loading branch information
marmitar committed Apr 27, 2022
1 parent 58f7861 commit b226489
Showing 1 changed file with 25 additions and 27 deletions.
52 changes: 25 additions & 27 deletions nbconvert/filters/markdown_mistune.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,19 +66,6 @@ def _dotall(pattern):
return f"(?s:{pattern})"


def _strip(text, *, prefix, suffix):
"""Remove prefix and suffix from text, if present.
`InlineParser` sometimes return these affixes, even though it shouldn't.
"""
np, ns = len(prefix), len(suffix)
if text[:np] == prefix:
text = text[np:]
if text[-ns:] == suffix:
text = text[:-ns]
return text


class MathInlineParser(InlineParser):
r"""This interprets the content of LaTeX style math objects.
Expand All @@ -87,27 +74,38 @@ class MathInlineParser(InlineParser):
delimiters from all these varieties, and extracts the type of environment
in the last case (``foo`` in this example).
"""
INLINE_MATH = _dotall(r"(?<![\\$])[$](.+?)(?<!\\)[$]|[\\]{2}[(](.+?)[\\]{2}[)]")
BLOCK_MATH = _dotall(r"(?<!\\)[$]{2}(.*?)(?<!\\)[$]{2}|\\\\\[(.*?)\\\\\]")
BLOCK_MATH_TEX = _dotall(r"(?<!\\)\$\$(.*?)(?<!\\)\$\$")
BLOCK_MATH_LATEX = _dotall(r"(?<!\\)\\\\\[(.*?)(?<!\\)\\\\\]")
INLINE_MATH_TEX = _dotall(r"(?<![$\\])\$(.+?)(?<![$\\])\$")
INLINE_MATH_LATEX = _dotall(r"(?<!\\)\\\\\((.*?)(?<!\\)\\\\\)")
LATEX_ENVIRONMENT = _dotall(r"\\begin\{([a-z]*\*?)\}(.*?)\\end\{\1\}")

RULE_NAMES = ("block_math", "inline_math", "latex_environment") + InlineParser.RULE_NAMES
# The order is important here
RULE_NAMES = (
"block_math_tex",
"block_math_latex",
"inline_math_tex",
"inline_math_latex",
"latex_environment",
) + InlineParser.RULE_NAMES

def parse_block_math_tex(self, m, state):
# sometimes the Scanner keeps the final '$$', so we use the
# full matched string and remove the math markers
text = m.group(0)[2:-2]
return "block_math", text

def parse_inline_math(self, m, state):
def parse_block_math_latex(self, m, state):
text = m.group(1)
return "block_math", text

def parse_inline_math_tex(self, m, state):
text = m.group(1)
if text:
text = _strip(text, prefix="$", suffix="$")
else:
text = _strip(m.group(2), prefix="\\\\(", suffix="\\\\)")
return "inline_math", text

def parse_block_math(self, m, state):
def parse_inline_math_latex(self, m, state):
text = m.group(1)
if text:
text = _strip(text, prefix="$$", suffix="$$")
else:
text = _strip(m.group(2), prefix="\\\\[", suffix="\\\\]")
return "block_math", text
return "inline_math", text

def parse_latex_environment(self, m, state):
name, text = m.group(1), m.group(2)
Expand Down

0 comments on commit b226489

Please sign in to comment.