-
Notifications
You must be signed in to change notification settings - Fork 86
/
Copy pathsphinx_.py
570 lines (494 loc) · 23.2 KB
/
sphinx_.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
"""The sphinx parser implementation for myst-nb."""
from __future__ import annotations
from collections import defaultdict
from html import escape
import json
from pathlib import Path
import re
from typing import Any, DefaultDict, cast
from docutils import nodes
from markdown_it.token import Token
from markdown_it.tree import SyntaxTreeNode
from myst_parser.config.main import MdParserConfig, merge_file_level
from myst_parser.mdit_to_docutils.base import token_line
from myst_parser.mdit_to_docutils.sphinx_ import SphinxRenderer
from myst_parser.parsers.mdit import create_md_parser
from myst_parser.parsers.sphinx_ import MystParser
import nbformat
from sphinx.application import Sphinx
from sphinx.environment import BuildEnvironment
from sphinx.environment.collectors import EnvironmentCollector
from sphinx.transforms.post_transforms import SphinxPostTransform
from sphinx.util import logging as sphinx_logging
from sphinx.util.docutils import SphinxTranslator
from myst_nb._compat import findall
from myst_nb.core.config import NbParserConfig
from myst_nb.core.execute import ExecutionResult, create_client
from myst_nb.core.loggers import DEFAULT_LOG_TYPE, SphinxDocLogger
from myst_nb.core.nb_to_tokens import nb_node_to_dict, notebook_to_tokens
from myst_nb.core.read import create_nb_reader
from myst_nb.core.render import (
MditRenderMixin,
MimeData,
NbElementRenderer,
create_figure_context,
get_mime_priority,
load_renderer,
)
from myst_nb.warnings_ import MystNBWarnings, create_warning
SPHINX_LOGGER = sphinx_logging.getLogger(__name__)
class SphinxEnvType(BuildEnvironment):
"""Sphinx build environment, including attributes set by myst_nb."""
myst_config: MdParserConfig
mystnb_config: NbParserConfig
nb_metadata: DefaultDict[str, dict]
nb_new_exec_data: bool
class Parser(MystParser):
"""Sphinx parser for Jupyter Notebook formats, containing MyST Markdown."""
supported = ("myst-nb",)
translate_section_name = None
config_section = "myst-nb parser"
config_section_dependencies = ("parsers",)
env: SphinxEnvType
def parse(self, inputstring: str, document: nodes.document) -> None:
"""Parse source text.
:param inputstring: The source string to parse
:param document: The root docutils node to add AST elements to
"""
assert self.env is not None, "env not set"
document_path = self.env.doc2path(self.env.docname)
# get a logger for this document
logger = SphinxDocLogger(document)
# get markdown parsing configuration
md_config: MdParserConfig = self.env.myst_config
# get notebook rendering configuration
nb_config: NbParserConfig = self.env.mystnb_config
# create a reader for the notebook
nb_reader = create_nb_reader(document_path, md_config, nb_config, inputstring)
# If the nb_reader is None, then we default to a standard Markdown parser
if nb_reader is None or document.current_source.endswith("<translated>"):
return super().parse(inputstring, document)
notebook = nb_reader.read(inputstring)
# update the global markdown config with the file-level config
warning = lambda wtype, msg: create_warning( # noqa: E731
document, msg, line=1, append_to=document, subtype=wtype
)
nb_reader.md_config = merge_file_level(
nb_reader.md_config, notebook.metadata, warning
)
# potentially replace kernel name with alias
kernel_name = notebook.metadata.get("kernelspec", {}).get("name", None)
if kernel_name is not None and nb_config.kernel_rgx_aliases:
for rgx, alias in nb_config.kernel_rgx_aliases.items():
if re.fullmatch(rgx, kernel_name):
logger.debug(
f"Replaced kernel name: {kernel_name!r} -> {alias!r}",
subtype="kernel",
)
notebook.metadata["kernelspec"]["name"] = alias
break
# Update mystnb configuration with notebook level metadata
if nb_config.metadata_key in notebook.metadata:
overrides = nb_node_to_dict(notebook.metadata[nb_config.metadata_key])
overrides.pop("output_folder", None) # this should not be overridden
try:
nb_config = nb_config.copy(**overrides)
except Exception as exc:
logger.warning(
f"Failed to update configuration with notebook metadata: {exc}",
subtype="config",
)
else:
logger.debug(
"Updated configuration with notebook metadata", subtype="config"
)
# Setup the parser
mdit_parser = create_md_parser(nb_reader.md_config, SphinxNbRenderer)
mdit_parser.options["document"] = document
mdit_parser.options["nb_config"] = nb_config
mdit_renderer: SphinxNbRenderer = mdit_parser.renderer # type: ignore
mdit_env: dict[str, Any] = {}
# load notebook element renderer class from entry-point name
# this is separate from SphinxNbRenderer, so that users can override it
renderer_name = nb_config.render_plugin
nb_renderer: NbElementRenderer = load_renderer(renderer_name)(
mdit_renderer, logger
)
# we temporarily store nb_renderer on the document,
# so that roles/directives can access it
document.attributes["nb_renderer"] = nb_renderer
# we currently do this early, so that the nb_renderer has access to things
mdit_renderer.setup_render(mdit_parser.options, mdit_env) # type: ignore
# parse notebook structure to markdown-it tokens
# note, this does not assume that the notebook has been executed yet
mdit_tokens = notebook_to_tokens(notebook, mdit_parser, mdit_env, logger)
# open the notebook execution client,
# this may execute the notebook immediately or during the page render
with create_client(
notebook, document_path, nb_config, logger, nb_reader.read_fmt
) as nb_client:
mdit_parser.options["nb_client"] = nb_client
# convert to docutils AST, which is added to the document
mdit_renderer.render(mdit_tokens, mdit_parser.options, mdit_env)
# save final execution data
if nb_client.exec_metadata:
NbMetadataCollector.set_exec_data(
self.env, self.env.docname, nb_client.exec_metadata
)
if nb_client.exec_metadata["traceback"]:
# store error traceback in outdir and log its path
reports_file = Path(self.env.app.outdir).joinpath(
"reports", *(self.env.docname + ".err.log").split("/")
)
reports_file.parent.mkdir(parents=True, exist_ok=True)
reports_file.write_text(
nb_client.exec_metadata["traceback"], encoding="utf8"
)
logger.warning(
f"Notebook exception traceback saved in: {reports_file}",
subtype="exec",
)
# write final (updated) notebook to output folder (utf8 is standard encoding)
path = self.env.docname.split("/")
ipynb_path = path[:-1] + [path[-1] + ".ipynb"]
content = nbformat.writes(notebook).encode("utf-8")
nb_renderer.write_file(ipynb_path, content, overwrite=True)
# write glue data to the output folder,
# and store the keys to environment doc metadata,
# so that they may be used in any post-transform steps
if nb_client.glue_data:
glue_path = path[:-1] + [path[-1] + ".glue.json"]
nb_renderer.write_file(
glue_path,
json.dumps(nb_client.glue_data, cls=BytesEncoder).encode("utf8"),
overwrite=True,
)
NbMetadataCollector.set_doc_data(
self.env, self.env.docname, "glue", list(nb_client.glue_data.keys())
)
# move some document metadata to environment metadata,
# so that we can later read it from the environment,
# rather than having to load the whole doctree
for key, (uri, kwargs) in document.attributes.pop("nb_js_files", {}).items():
NbMetadataCollector.add_js_file(
self.env, self.env.docname, key, uri, kwargs
)
# remove temporary state
document.attributes.pop("nb_renderer")
class SphinxNbRenderer(SphinxRenderer, MditRenderMixin):
"""A sphinx renderer for Jupyter Notebooks."""
def render_nb_initialise(self, token: SyntaxTreeNode) -> None:
env = cast(BuildEnvironment, self.sphinx_env)
metadata = self.nb_client.nb_metadata
special_keys = ["kernelspec", "language_info", "source_map"]
for key in special_keys:
if key in metadata:
# save these special keys on the metadata, rather than as docinfo
# note, sphinx_book_theme checks kernelspec is in the metadata
env.metadata[env.docname][key] = metadata.get(key)
# forward the remaining metadata to the front_matter renderer
special_keys.append("widgets")
top_matter = {k: v for k, v in metadata.items() if k not in special_keys}
self.render_front_matter(
Token( # type: ignore
"front_matter",
"",
0,
map=[0, 0],
content=top_matter, # type: ignore[arg-type]
),
)
def _render_nb_cell_code_outputs(
self, token: SyntaxTreeNode, outputs: list[nbformat.NotebookNode]
) -> None:
"""Render a notebook code cell's outputs."""
line = token_line(token, 0)
cell_index = token.meta["index"]
metadata = token.meta["metadata"]
# render the outputs
for output_index, output in enumerate(outputs):
if output.output_type == "stream":
if output.name == "stdout":
_nodes = self.nb_renderer.render_stdout(
output, metadata, cell_index, line
)
self.add_line_and_source_path_r(_nodes, token)
self.current_node.extend(_nodes)
elif output.name == "stderr":
_nodes = self.nb_renderer.render_stderr(
output, metadata, cell_index, line
)
self.add_line_and_source_path_r(_nodes, token)
self.current_node.extend(_nodes)
else:
pass # TODO warning
elif output.output_type == "error":
_nodes = self.nb_renderer.render_error(
output, metadata, cell_index, line
)
self.add_line_and_source_path_r(_nodes, token)
self.current_node.extend(_nodes)
elif output.output_type in ("display_data", "execute_result"):
# Note, this is different to the docutils implementation,
# where we directly select a single output, based on the mime_priority.
# Here, we do not know the mime priority until we know the output format
# so we output all the outputs during this parsing phase
# (this is what sphinx caches as "output format agnostic" AST),
# and replace the mime_bundle with the format specific output
# in a post-transform (run per output format on the cached AST)
figure_options = (
self.get_cell_level_config(
"render_figure_options", metadata, line=line
)
or None
)
with create_figure_context(self, figure_options, line):
mime_bundle = nodes.container(nb_element="mime_bundle")
with self.current_node_context(mime_bundle):
for mime_type, data in output["data"].items():
mime_container = nodes.container(mime_type=mime_type)
with self.current_node_context(mime_container):
_nodes = self.nb_renderer.render_mime_type(
MimeData(
mime_type,
data,
cell_metadata=metadata,
output_metadata=output.get("metadata", {}),
cell_index=cell_index,
output_index=output_index,
line=line,
)
)
self.current_node.extend(_nodes)
if mime_container.children:
self.current_node.append(mime_container)
if mime_bundle.children:
self.add_line_and_source_path_r([mime_bundle], token)
self.current_node.append(mime_bundle)
else:
create_warning(
self.document,
f"Unsupported output type: {output.output_type}",
line=line,
append_to=self.current_node,
# wtype=DEFAULT_LOG_TYPE,
subtype=MystNBWarnings.OUTPUT_TYPE,
)
class SelectMimeType(SphinxPostTransform):
"""Select the mime type to render from mime bundles,
based on the builder and its associated priority list.
"""
default_priority = 4 # TODO set correct priority
def run(self, **kwargs: Any) -> None:
"""Run the transform."""
# get priority list for this builder
# TODO allow for per-notebook/cell priority dicts?
bname = self.app.builder.name
priority_list = get_mime_priority(
bname, self.config["nb_mime_priority_overrides"]
)
def condition(node):
return (
isinstance(node, nodes.container)
and node.attributes.get("nb_element", "") == "mime_bundle"
)
# remove/replace_self will not work with an iterator
for node in list(findall(self.document)(condition)):
# get available mime types
mime_types = [node["mime_type"] for node in node.children]
if not mime_types:
node.parent.remove(node)
continue
# select top priority
index = None
for mime_type in priority_list:
try:
index = mime_types.index(mime_type)
except ValueError:
continue
else:
break
if index is None:
mime_string = ",".join(repr(m) for m in mime_types)
SPHINX_LOGGER.warning(
f"No mime type available in priority list for builder {bname!r} "
f"({mime_string}) [{DEFAULT_LOG_TYPE}.mime_priority]",
type=DEFAULT_LOG_TYPE,
subtype="mime_priority",
location=node,
)
node.parent.remove(node)
elif not node.children[index].children:
node.parent.remove(node)
else:
node.replace_self(node.children[index].children)
class NbMetadataCollector(EnvironmentCollector):
"""Collect myst-nb specific metadata, and handle merging of parallel builds."""
@staticmethod
def set_doc_data(env: SphinxEnvType, docname: str, key: str, value: Any) -> None:
"""Add nb metadata for a docname to the environment."""
if not hasattr(env, "nb_metadata"):
env.nb_metadata = defaultdict(dict)
env.nb_metadata.setdefault(docname, {})[key] = value
@staticmethod
def get_doc_data(env: SphinxEnvType) -> DefaultDict[str, dict]:
"""Get myst-nb docname -> metadata dict."""
if not hasattr(env, "nb_metadata"):
env.nb_metadata = defaultdict(dict)
return env.nb_metadata
@classmethod
def set_exec_data(
cls, env: SphinxEnvType, docname: str, value: ExecutionResult
) -> None:
"""Add nb metadata for a docname to the environment."""
cls.set_doc_data(env, docname, "exec_data", value)
# TODO this does not take account of cache data
cls.note_exec_update(env)
@classmethod
def get_exec_data(cls, env: SphinxEnvType, docname: str) -> ExecutionResult | None:
"""Get myst-nb docname -> execution data."""
return cls.get_doc_data(env)[docname].get("exec_data")
def get_outdated_docs( # type: ignore[override]
self,
app: Sphinx,
env: SphinxEnvType,
added: set[str],
changed: set[str],
removed: set[str],
) -> list[str]:
# called before any docs are read
env.nb_new_exec_data = False
return []
@staticmethod
def note_exec_update(env: SphinxEnvType) -> None:
"""Note that a notebook has been executed."""
env.nb_new_exec_data = True
@staticmethod
def new_exec_data(env: SphinxEnvType) -> bool:
"""Return whether any notebooks have updated execution data."""
return getattr(env, "nb_new_exec_data", False)
@classmethod
def add_js_file(
cls,
env: SphinxEnvType,
docname: str,
key: str,
uri: str | None,
kwargs: dict[str, str],
):
"""Register a JavaScript file to include in the HTML output."""
if not hasattr(env, "nb_metadata"):
env.nb_metadata = defaultdict(dict)
js_files = env.nb_metadata.setdefault(docname, {}).setdefault("js_files", {})
# TODO handle whether overrides are allowed
js_files[key] = (uri, kwargs)
@classmethod
def get_js_files(
cls, env: SphinxEnvType, docname: str
) -> dict[str, tuple[str | None, dict[str, str]]]:
"""Get myst-nb docname -> execution data."""
return cls.get_doc_data(env)[docname].get("js_files", {})
def clear_doc( # type: ignore[override]
self,
app: Sphinx,
env: SphinxEnvType,
docname: str,
) -> None:
if not hasattr(env, "nb_metadata"):
env.nb_metadata = defaultdict(dict)
env.nb_metadata.pop(docname, None)
def process_doc(self, app: Sphinx, doctree: nodes.document) -> None:
pass
def merge_other( # type: ignore[override]
self,
app: Sphinx,
env: SphinxEnvType,
docnames: set[str],
other: SphinxEnvType,
) -> None:
if not hasattr(env, "nb_metadata"):
env.nb_metadata = defaultdict(dict)
other_metadata = getattr(other, "nb_metadata", defaultdict(dict))
for docname in docnames:
env.nb_metadata[docname] = other_metadata[docname]
if other.nb_new_exec_data:
env.nb_new_exec_data = True
class BytesEncoder(json.JSONEncoder):
"""A JSON encoder that accepts b64 (and other *ascii*) bytestrings."""
def default(self, obj):
if isinstance(obj, bytes):
return obj.decode("ascii")
return json.JSONEncoder.default(self, obj)
class HideCodeCellNode(nodes.Element):
"""Node for hiding cell input."""
@classmethod
def add_to_app(cls, app: Sphinx):
app.add_node(cls, html=(visit_HideCellInput, depart_HideCellInput))
def visit_HideCellInput(self: SphinxTranslator, node: HideCodeCellNode):
classes = " ".join(node["classes"])
self.body.append(f'<details class="hide {classes}">\n')
self.body.append('<summary aria-label="Toggle hidden content">\n')
self.body.append(f'<span class="collapsed">{escape(node["prompt_show"])}</span>\n')
self.body.append(f'<span class="expanded">{escape(node["prompt_hide"])}</span>\n')
self.body.append("</summary>\n")
def depart_HideCellInput(self: SphinxTranslator, node: HideCodeCellNode):
self.body.append("</details>\n")
class HideInputCells(SphinxPostTransform):
"""Hide input cells in the HTML output."""
default_priority = 199
formats = ("html",)
def run(self, **kwargs):
for node in findall(self.document)(nodes.container):
if (
node.get("nb_element") == "cell_code"
and node.get("hide_mode")
and node.children
):
hide_mode = node.get("hide_mode")
has_input = node.children[0].get("nb_element") == "cell_code_source"
has_output = node.children[-1].get("nb_element") == "cell_code_output"
if has_input and hide_mode == "all":
# wrap everything and place a summary above the input
wrap_node = HideCodeCellNode(
prompt_show=node["prompt_show"].replace("{type}", "content"),
prompt_hide=node["prompt_hide"].replace("{type}", "content"),
)
wrap_node["classes"].append("above-input")
wrap_node.extend(node.children)
node.children = [wrap_node]
if has_input and has_output and hide_mode in ("output", "input+output"):
node.children[0]["classes"].append("above-output-prompt")
if has_input and hide_mode in ("input", "input+output"):
# wrap just the input and place a summary above the input
wrap_node = HideCodeCellNode(
prompt_show=node["prompt_show"].replace("{type}", "source"),
prompt_hide=node["prompt_hide"].replace("{type}", "source"),
)
wrap_node["classes"].append("above-input")
code = node.children[0]
wrap_node.append(code)
node.replace(code, wrap_node)
if has_input and has_output and hide_mode in ("output", "input+output"):
# wrap just the output and place a summary below the input
wrap_node = HideCodeCellNode(
prompt_show=node["prompt_show"].replace("{type}", "output"),
prompt_hide=node["prompt_hide"].replace("{type}", "output"),
)
wrap_node["classes"].append("below-input")
output = node.children[-1]
wrap_node.append(output)
node.replace(output, wrap_node)
if (
(not has_input)
and has_output
and hide_mode in ("all", "input+output", "output")
):
# wrap just the output and place a summary above the output
wrap_node = HideCodeCellNode(
prompt_show=node["prompt_show"].replace("{type}", "outputs"),
prompt_hide=node["prompt_hide"].replace("{type}", "outputs"),
)
wrap_node["classes"].append("above-output")
output = node.children[-1]
wrap_node.append(output)
node.replace(output, wrap_node)