diff --git a/nbconvert/filters/markdown_mistune.py b/nbconvert/filters/markdown_mistune.py index 382a53882..636e1e8cc 100644 --- a/nbconvert/filters/markdown_mistune.py +++ b/nbconvert/filters/markdown_mistune.py @@ -21,7 +21,7 @@ from cgi import escape as html_escape import bs4 -import mistune +from mistune import BlockParser, HTMLRenderer, InlineParser, Markdown from pygments import highlight from pygments.formatters import HtmlFormatter from pygments.lexers import get_lexer_by_name @@ -34,158 +34,183 @@ class InvalidNotebook(Exception): pass -class MathBlockGrammar(mistune.BlockGrammar): - """This defines a single regex comprised of the different patterns that - identify math content spanning multiple lines. These are used by the - MathBlockLexer. +class MathBlockParser(BlockParser): + """This acts as a pass-through to the MathInlineParser. It is needed in + order to avoid other block level rules splitting math sections apart. """ - multi_math_str = "|".join( - [r"^\$\$.*?\$\$", r"^\\\\\[.*?\\\\\]", r"^\\begin\{([a-z]*\*?)\}(.*?)\\end\{\1\}"] + MULTILINE_MATH = re.compile( + r"(?=2.0.0`, where the pattern is passed + to the undocumented `re.Scanner`. """ - - inline_math = re.compile(r"^\$(.+?)\$|^\\\\\((.+?)\\\\\)", re.DOTALL) - block_math = re.compile(r"^\$\$(.*?)\$\$|^\\\\\[(.*?)\\\\\]", re.DOTALL) - latex_environment = re.compile(r"^\\begin\{([a-z]*\*?)\}(.*?)\\end\{\1\}", re.DOTALL) - text = re.compile(r"^[\s\S]+?(?=[\\%s\n" % mistune.escape(code) + return super().block_code(code) formatter = HtmlFormatter() return highlight(code, lexer, formatter) def block_html(self, html): - embed_images = self.options.get("embed_images", False) - - if embed_images: + if self.embed_images: html = self._html_embed_images(html) return super().block_html(html) def inline_html(self, html): - embed_images = self.options.get("embed_images", False) - - if embed_images: + if self.embed_images: html = self._html_embed_images(html) return super().inline_html(html) - def header(self, text, level, raw=None): - html = super().header(text, level, raw=raw) - if self.options.get("exclude_anchor_links"): + def heading(self, text, level): + html = super().heading(text, level) + if self.exclude_anchor_links: return html - anchor_link_text = self.options.get("anchor_link_text", "¶") - return add_anchor(html, anchor_link_text=anchor_link_text) + return add_anchor(html, anchor_link_text=self.anchor_link_text) def escape_html(self, text): return html_escape(text) + def multiline_math(self, text): + return text + def block_math(self, text): - return "$$%s$$" % self.escape_html(text) + return f"$${self.escape_html(text)}$$" def latex_environment(self, name, text): - name = self.escape_html(name) - text = self.escape_html(text) - return rf"\begin{{{name}}}{text}\end{{{name}}}" + name, text = self.escape_html(name), self.escape_html(text) + return f"\\begin{{{name}}}{text}\\end{{{name}}}" def inline_math(self, text): - return "$%s$" % self.escape_html(text) + return f"${self.escape_html(text)}$" - def image(self, src, title, text): + def image(self, src, text, title): """Rendering a image with title and text. :param src: source link of the image. - :param title: title text of the image. :param text: alt text of the image. + :param title: title text of the image. """ - attachments = self.options.get("attachments", {}) attachment_prefix = "attachment:" - embed_images = self.options.get("embed_images", False) if src.startswith(attachment_prefix): name = src[len(attachment_prefix) :] - if name not in attachments: + if name not in self.attachments: raise InvalidNotebook(f"missing attachment: {name}") - attachment = attachments[name] + attachment = self.attachments[name] # we choose vector over raster, and lossless over lossy preferred_mime_types = ["image/svg+xml", "image/png", "image/jpeg"] for preferred_mime_type in preferred_mime_types: @@ -197,13 +222,13 @@ def image(self, src, title, text): data = attachment[mime_type] src = "data:" + mime_type + ";base64," + data - elif embed_images: + elif self.embed_images: base64_url = self._src_to_base64(src) if base64_url is not None: src = base64_url - return super().image(src, title, text) + return super().image(src, text, title) def _src_to_base64(self, src): """Turn the source file into a base64 url. @@ -211,8 +236,7 @@ def _src_to_base64(self, src): :param src: source link of the file. :return: the base64 url or None if the file was not found. """ - path = self.options.get("path", "") - src_path = os.path.join(path, src) + src_path = os.path.join(self.path, src) if not os.path.exists(src_path): return None diff --git a/pyproject.toml b/pyproject.toml index 060bda2c7..ae51a7970 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,7 @@ dependencies = [ "jupyter_core>=4.7", "jupyterlab_pygments", "MarkupSafe>=2.0", - "mistune>=0.8.1,<2", + "mistune>=2.0.2", "nbclient>=0.5.0", "nbformat>=5.1", "packaging",