From 392ba779ffa9b0c84a74f05bd2d6002c40892140 Mon Sep 17 00:00:00 2001 From: Rowan Cockett Date: Tue, 19 Sep 2023 14:16:07 -0600 Subject: [PATCH] =?UTF-8?q?=F0=9F=93=91=20Add=20`literalinclude`=20directi?= =?UTF-8?q?ve=20(#610)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Supports `literalinclude` directive and options across RST and Sphinx. - [RST documentation](https://docutils.sourceforge.io/docs/ref/rst/directives.html#including-an-external-document-fragment) - [Sphinx documentation](https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html#directive-literalinclude) See executablebooks/jupyterlab-myst#189 and https://github.com/orgs/executablebooks/discussions/1026 --- .changeset/cool-cooks-act.md | 6 + .changeset/flat-suits-allow.md | 5 + .changeset/gold-weeks-appear.md | 5 + .changeset/orange-planes-promise.md | 6 + docs/code.md | 70 +++++++- packages/myst-cli/src/process/mdast.ts | 4 +- packages/myst-cli/src/transforms/include.ts | 42 ++--- packages/myst-directives/package.json | 2 + packages/myst-directives/src/code.spec.ts | 54 ++++++ packages/myst-directives/src/code.ts | 176 +++++++++---------- packages/myst-directives/src/include.spec.ts | 19 ++ packages/myst-directives/src/include.ts | 171 +++++++++++++++++- packages/myst-directives/src/index.ts | 7 +- packages/myst-spec-ext/src/types.ts | 26 +++ packages/myst-transforms/src/include.spec.ts | 36 ++++ packages/myst-transforms/src/include.ts | 165 +++++++++++++++++ packages/myst-transforms/src/index.ts | 1 + 17 files changed, 669 insertions(+), 126 deletions(-) create mode 100644 .changeset/cool-cooks-act.md create mode 100644 .changeset/flat-suits-allow.md create mode 100644 .changeset/gold-weeks-appear.md create mode 100644 .changeset/orange-planes-promise.md create mode 100644 packages/myst-directives/src/code.spec.ts create mode 100644 packages/myst-directives/src/include.spec.ts create mode 100644 packages/myst-transforms/src/include.spec.ts create mode 100644 packages/myst-transforms/src/include.ts diff --git a/.changeset/cool-cooks-act.md b/.changeset/cool-cooks-act.md new file mode 100644 index 000000000..57b54c826 --- /dev/null +++ b/.changeset/cool-cooks-act.md @@ -0,0 +1,6 @@ +--- +'myst-directives': patch +'myst-transforms': patch +--- + +Move includeDirective transform to myst-transforms and make it generic for use in JupyterLab diff --git a/.changeset/flat-suits-allow.md b/.changeset/flat-suits-allow.md new file mode 100644 index 000000000..f6721c133 --- /dev/null +++ b/.changeset/flat-suits-allow.md @@ -0,0 +1,5 @@ +--- +'myst-spec-ext': patch +--- + +Add `include` node, that implements the `literalinclude` directive diff --git a/.changeset/gold-weeks-appear.md b/.changeset/gold-weeks-appear.md new file mode 100644 index 000000000..c832e942c --- /dev/null +++ b/.changeset/gold-weeks-appear.md @@ -0,0 +1,5 @@ +--- +'myst-directives': patch +--- + +Remove the codeBlockDirective, this is now the same as the `codeDirective`. diff --git a/.changeset/orange-planes-promise.md b/.changeset/orange-planes-promise.md new file mode 100644 index 000000000..4e12fe6f1 --- /dev/null +++ b/.changeset/orange-planes-promise.md @@ -0,0 +1,6 @@ +--- +'myst-directives': patch +'myst-cli': patch +--- + +Add `literalinclude` directive diff --git a/docs/code.md b/docs/code.md index 19bb3f9c2..735459810 100644 --- a/docs/code.md +++ b/docs/code.md @@ -83,9 +83,8 @@ caption (string) name (string) : The target label for the code-block, can be used by `ref` and `numref` roles. -```{note} +```{note} Alternative implementations :class: dropdown -# Alternative implementations The parser also supports the `docutils` implementation (see [docutils documentation](https://docutils.sourceforge.io/docs/ref/rst/directives.html#code)) of a `{code}` directive, which only supports the `number-lines` option. @@ -93,3 +92,70 @@ It is recommended to use the more fully featured `code-block` directive document All implementations are resolved to the same `code` type in the abstract syntax tree. ``` + +## Including Files + +If your code is in a separate file you can use the `literalinclude` directive (or the `include` directive with the `literal` flag). +This directive is helpful for showing code snippets without duplicating your content. + +For example, a `literalinclude` of a snippet of the `myst.yml` such as: + +````markdown +```{literalinclude} myst.yml +:start-at: project +:end-before: references +:lineno-match: +``` +```` + +creates a snippet that has matching line numbers, and starts at a line including `"project"` and ends before the line including `"references"`. + +```{literalinclude} myst.yml +:start-at: project +:end-before: references +:lineno-match: +``` + +:::{note} Auto Reload +If you are working with the auto-reload (e.g. `myst start`), currently you will need to save the file with the `literalinclude` directive for the contents to update.code for the contents to update. +::: + +## `include` Reference + +The argument of an include directive is the file path, relative to the file from which it was referenced. +By default the file will be parsed using MyST, you can also set the file to be `literal`, which will show as a code-block; this is the same as using the `literalinclude` directive. +If in literal mode, the directive also accepts all of the options from the `code-block` (e.g. `:linenos:`). + +To select a portion of the file to be shown using the `start-at`/`start-after` selectors with the `end-before`/`end-at`, which use a snippet of included text. +Alternatively, you can explicitly select the lines (e.g. `1,3,5-10,20-`) or the `start-line`/`end-line` (which is zero based for compatibility with Sphinx). + +literal (boolean) +: Flag the include block as literal, and show the contents as a code block. This can also be set automatically by setting the `language` or using the `literalinclude` directive. + +lang (string) +: The language of the code to be highlighted as. If set, this automatically changes an `include` into a `literalinclude`. +: You can alias this as `language` or `code` + +start-line (number) +: Only the content starting from this line will be included. The first line has index 0 and negative values count from the end. + +start-at (string) +: Only the content after and including the first occurrence of the specified text in the external data file will be included. + +start-after (string) +: Only the content after the first occurrence of the specified text in the external data file will be included. + +end-line (number) +: Only the content up to (but excluding) this line will be included. + +end-at (string) +: Only the content up to and including the first occurrence of the specified text in the external data file (but after any start-after text) will be included. + +end-before (string) +: Only the content before the first occurrence of the specified text in the external data file (but after any start-after text) will be included. + +lines (string) +: Specify exactly which lines to include from the original file, starting at 1. For example, `1,3,5-10,20-` includes the lines 1, 3, 5 to 10 and lines 20 to the last line of the original file. + +lineno-match (boolean) +: Display the original line numbers, correct only when the selection consists of contiguous lines. diff --git a/packages/myst-cli/src/process/mdast.ts b/packages/myst-cli/src/process/mdast.ts index 81d76ab5f..56df06e7f 100644 --- a/packages/myst-cli/src/process/mdast.ts +++ b/packages/myst-cli/src/process/mdast.ts @@ -37,7 +37,7 @@ import { checkLinksTransform, embedTransform, importMdastFromJson, - includeFilesDirective, + includeFilesTransform, liftCodeMetadataToBlock, transformLinkedDOIs, transformOutputs, @@ -155,7 +155,7 @@ export async function transformMdast( cache.$internalReferences[file] = state; // Import additional content from mdast or other files importMdastFromJson(session, file, mdast); - includeFilesDirective(session, file, mdast); + includeFilesTransform(session, file, mdast, vfile); // This needs to come before basic transformations since it may add labels to blocks liftCodeMetadataToBlock(session, file, mdast); diff --git a/packages/myst-cli/src/transforms/include.ts b/packages/myst-cli/src/transforms/include.ts index 2f7a4fa44..3a1539e24 100644 --- a/packages/myst-cli/src/transforms/include.ts +++ b/packages/myst-cli/src/transforms/include.ts @@ -1,27 +1,29 @@ +import path from 'node:path'; import fs from 'node:fs'; -import type { GenericNode, GenericParent } from 'myst-common'; +import type { GenericParent } from 'myst-common'; +import { fileError } from 'myst-common'; import { parseMyst } from '../process/index.js'; -import { selectAll } from 'unist-util-select'; -import { join, dirname } from 'node:path'; import type { ISession } from '../session/types.js'; +import type { VFile } from 'vfile'; +import { includeDirectiveTransform } from 'myst-transforms'; -/** - * This is the {include} directive, that loads from disk. - * - * RST documentation: - * - https://docutils.sourceforge.io/docs/ref/rst/directives.html#including-an-external-document-fragment - */ -export function includeFilesDirective(session: ISession, filename: string, mdast: GenericParent) { - const includeNodes = selectAll('include', mdast) as GenericNode[]; - const dir = dirname(filename); - includeNodes.forEach((node) => { - const file = join(dir, node.file); - if (!fs.existsSync(file)) { - session.log.error(`Include Directive: Could not find "${file}" in "${filename}"`); +export function includeFilesTransform( + session: ISession, + baseFile: string, + tree: GenericParent, + vfile: VFile, +) { + const dir = path.dirname(baseFile); + const loadFile = (filename: string) => { + const fullFile = path.join(dir, filename); + if (!fs.existsSync(fullFile)) { + fileError(vfile, `Include Directive: Could not find "${fullFile}" in "${baseFile}"`); return; } - const content = fs.readFileSync(file).toString(); - const children = parseMyst(session, content, filename).children as GenericNode[]; - node.children = children; - }); + return fs.readFileSync(fullFile).toString(); + }; + const parseContent = (filename: string, content: string) => { + return parseMyst(session, content, filename).children; + }; + includeDirectiveTransform(tree, vfile, { loadFile, parseContent }); } diff --git a/packages/myst-directives/package.json b/packages/myst-directives/package.json index 5e9edeaf0..b97b29469 100644 --- a/packages/myst-directives/package.json +++ b/packages/myst-directives/package.json @@ -23,6 +23,8 @@ "clean": "rimraf dist", "lint": "eslint \"src/**/!(*.spec).ts\" -c ./.eslintrc.cjs", "lint:format": "npx prettier --check \"src/**/*.ts\"", + "test": "vitest run", + "test:watch": "vitest watch", "build:esm": "tsc --project ./tsconfig.json --module es2015 --outDir dist --declaration", "build": "npm-run-all -l clean -p build:esm" }, diff --git a/packages/myst-directives/src/code.spec.ts b/packages/myst-directives/src/code.spec.ts new file mode 100644 index 000000000..191945f1a --- /dev/null +++ b/packages/myst-directives/src/code.spec.ts @@ -0,0 +1,54 @@ +import { describe, expect, test } from 'vitest'; +import { getCodeBlockOptions } from './code.js'; +import { VFile } from 'vfile'; + +describe('Code block options', () => { + test('default options', () => { + const vfile = new VFile(); + const opts = getCodeBlockOptions({}, vfile); + expect(opts).toEqual({}); + expect(vfile.messages.length).toEqual(0); + }); + test('number-lines', () => { + const vfile = new VFile(); + const opts = getCodeBlockOptions({ 'number-lines': 1 }, vfile); + expect(opts).toEqual({ showLineNumbers: true }); + expect(vfile.messages.length).toEqual(0); + }); + test('number-lines: 2', () => { + const vfile = new VFile(); + const opts = getCodeBlockOptions({ 'number-lines': 2 }, vfile); + expect(opts).toEqual({ showLineNumbers: true, startingLineNumber: 2 }); + expect(vfile.messages.length).toEqual(0); + }); + test('number-lines clashes with lineno-start', () => { + const vfile = new VFile(); + const opts = getCodeBlockOptions({ 'number-lines': 1, 'lineno-start': 2 }, vfile); + expect(opts).toEqual({ showLineNumbers: true, startingLineNumber: 2 }); + // Show warning! + expect(vfile.messages.length).toEqual(1); + }); + test('lineno-start activates showLineNumbers', () => { + const vfile = new VFile(); + const opts = getCodeBlockOptions({ 'lineno-start': 1 }, vfile); + expect(opts).toEqual({ showLineNumbers: true }); + expect(vfile.messages.length).toEqual(0); + }); + test('emphasize-lines', () => { + const vfile = new VFile(); + const opts = getCodeBlockOptions({ 'emphasize-lines': '3,5' }, vfile); + expect(opts).toEqual({ emphasizeLines: [3, 5] }); + expect(vfile.messages.length).toEqual(0); + }); + // See https://github.com/executablebooks/jupyterlab-myst/issues/174 + test(':lineno-start: 10, :emphasize-lines: 12,13', () => { + const vfile = new VFile(); + const opts = getCodeBlockOptions({ 'lineno-start': 10, 'emphasize-lines': '12,13' }, vfile); + expect(opts).toEqual({ + showLineNumbers: true, + emphasizeLines: [12, 13], + startingLineNumber: 10, + }); + expect(vfile.messages.length).toEqual(0); + }); +}); diff --git a/packages/myst-directives/src/code.ts b/packages/myst-directives/src/code.ts index f6a3d766a..c8478778e 100644 --- a/packages/myst-directives/src/code.ts +++ b/packages/myst-directives/src/code.ts @@ -1,56 +1,83 @@ import type { Caption, Container } from 'myst-spec'; import type { Code } from 'myst-spec-ext'; import yaml from 'js-yaml'; -import type { DirectiveSpec, GenericNode } from 'myst-common'; +import type { DirectiveData, DirectiveSpec, GenericNode } from 'myst-common'; import { fileError, fileWarn, normalizeLabel, ParseTypesEnum } from 'myst-common'; +import type { VFile } from 'vfile'; -export const codeDirective: DirectiveSpec = { - name: 'code', - arg: { - type: ParseTypesEnum.string, +function parseEmphasizeLines(emphasizeLinesString?: string | undefined): number[] | undefined { + if (!emphasizeLinesString) return undefined; + const emphasizeLines = emphasizeLinesString + ?.split(',') + .map((val) => Number(val.trim())) + .filter((val) => Number.isInteger(val)); + return emphasizeLines; +} + +/** This function parses both sphinx and RST code-block options */ +export function getCodeBlockOptions( + options: DirectiveData['options'], + vfile: VFile, +): Pick { + if (options?.['lineno-start'] != null && options?.['number-lines'] != null) { + fileWarn(vfile, 'Cannot use both "lineno-start" and "number-lines"', { + source: 'code-block:options', + }); + } + const emphasizeLines = parseEmphasizeLines(options?.['emphasize-lines'] as string | undefined); + const numberLines = options?.['number-lines'] as number | undefined; + // Only include this in mdast if it is `true` + const showLineNumbers = + options?.linenos || options?.['lineno-start'] || options?.['lineno-match'] || numberLines + ? true + : undefined; + let startingLineNumber: number | undefined = + numberLines != null && numberLines > 1 ? numberLines : (options?.['lineno-start'] as number); + if (options?.['lineno-match']) { + startingLineNumber = 'match' as any; + } else if (startingLineNumber == null || startingLineNumber <= 1) { + startingLineNumber = undefined; + } + return { + emphasizeLines, + showLineNumbers, + startingLineNumber, + }; +} + +export const CODE_DIRECTIVE_OPTIONS: DirectiveSpec['options'] = { + caption: { + type: ParseTypesEnum.parsed, }, - options: { - label: { - type: ParseTypesEnum.string, - alias: ['name'], - }, - class: { - type: ParseTypesEnum.string, - // class_option: list of strings? - }, - // force: { - // type: ParseTypesEnum.boolean, - // doc: 'Ignore minor errors on highlighting', - // }, - 'number-lines': { - type: ParseTypesEnum.number, - }, + linenos: { + type: ParseTypesEnum.boolean, + doc: 'Show line numbers', }, - body: { - type: ParseTypesEnum.string, + 'lineno-start': { + type: ParseTypesEnum.number, + doc: 'Start line numbering from a particular value, default is 1. If present, line numbering is activated.', }, - run(data): GenericNode[] { - const { label, identifier } = normalizeLabel(data.options?.label as string | undefined) || {}; - const numberLines = data.options?.['number-lines'] as number | undefined; - const showLineNumbers = !!numberLines; - const startingLineNumber = numberLines && numberLines > 1 ? numberLines : undefined; - return [ - { - type: 'code', - lang: data.arg, - identifier, - label, - class: data.options?.class, - showLineNumbers, - startingLineNumber, - value: data.body as string | undefined, - }, - ]; + 'number-lines': { + type: ParseTypesEnum.number, + doc: 'Alternative for "lineno-start", turns on line numbering and can be an integer that is the start of the line numbering.', }, + 'emphasize-lines': { + type: ParseTypesEnum.string, + doc: 'Emphasize particular lines (comma-separated numbers), e.g. "3,5"', + }, + // dedent: { + // type: ParseTypesEnum.number, + // doc: 'Strip indentation characters from the code block', + // }, + // force: { + // type: ParseTypesEnum.boolean, + // doc: 'Ignore minor errors on highlighting', + // }, }; -export const codeBlockDirective: DirectiveSpec = { - name: 'code-block', +export const codeDirective: DirectiveSpec = { + name: 'code', + alias: ['code-block', 'sourcecode'], arg: { type: ParseTypesEnum.string, }, @@ -63,55 +90,25 @@ export const codeBlockDirective: DirectiveSpec = { type: ParseTypesEnum.string, // class_option: list of strings? }, - // force: { - // type: ParseTypesEnum.boolean, - // doc: 'Ignore minor errors on highlighting', - // }, - caption: { - type: ParseTypesEnum.parsed, - }, - linenos: { - type: ParseTypesEnum.boolean, - doc: 'Add line numbers', - }, - 'lineno-start': { - type: ParseTypesEnum.number, - doc: 'Start line numbering from a particular value', - }, - // dedent: { - // type: ParseTypesEnum.number, - // doc: 'Strip indentation characters from the code block', - // }, - 'emphasize-lines': { - type: ParseTypesEnum.string, - doc: 'Emphasize particular lines (comma-separated numbers)', - }, + ...CODE_DIRECTIVE_OPTIONS, }, body: { type: ParseTypesEnum.string, }, - run(data): GenericNode[] { + run(data, vfile): GenericNode[] { const { label, identifier } = normalizeLabel(data.options?.label as string | undefined) || {}; - // Validating this should probably happen first - const emphasizeLinesString = data.options?.['emphasize-lines'] as string | undefined; - const emphasizeLines = emphasizeLinesString - ?.split(',') - .map((val) => Number(val.trim())) - .filter((val) => Number.isInteger(val)); + const opts = getCodeBlockOptions(data.options, vfile); + const code: Code = { + type: 'code', + lang: data.arg as string, + class: data.options?.class as string, + ...opts, + value: data.body as string, + }; if (!data.options?.caption) { - return [ - { - type: 'code', - lang: data.arg, - identifier, - label, - class: data.options?.class, - showLineNumbers: data.options?.linenos, - startingLineNumber: data.options?.['lineno-start'], - emphasizeLines, - value: data.body as string, - }, - ]; + code.label = label; + code.identifier = identifier; + return [code]; } const caption: Caption = { type: 'caption', @@ -122,15 +119,6 @@ export const codeBlockDirective: DirectiveSpec = { }, ], }; - const code: Code = { - type: 'code', - lang: data.arg as string, - class: data.options?.class as string, - showLineNumbers: data.options?.linenos as boolean, - startingLineNumber: data.options?.['lineno-start'] as number, - emphasizeLines, - value: data.body as string, - }; const container: Container = { type: 'container', kind: 'code' as any, diff --git a/packages/myst-directives/src/include.spec.ts b/packages/myst-directives/src/include.spec.ts new file mode 100644 index 000000000..d83bcc03a --- /dev/null +++ b/packages/myst-directives/src/include.spec.ts @@ -0,0 +1,19 @@ +import { describe, expect, test } from 'vitest'; +import { parseLinesString } from './include.js'; +import { VFile } from 'vfile'; + +describe('parseLinesString', () => { + test.each([ + ['', undefined, 0], + ['1,2', [1, 2], 0], + ['1,2-', [1, [2]], 0], + ['1,2-4', [1, [2, 4]], 0], + ['1,2-4,a', [1, [2, 4]], 1], + ['1,3,5-10,20-', [1, 3, [5, 10], [20]], 0], + ['1,3,5 10,20-', [1, 3, [20]], 1], + ])('"%s"', (t, a, w) => { + const vfile = new VFile(); + expect(parseLinesString(vfile, t)).toEqual(a); + expect(vfile.messages.length).toEqual(w); + }); +}); diff --git a/packages/myst-directives/src/include.ts b/packages/myst-directives/src/include.ts index 8a885fe44..b51711510 100644 --- a/packages/myst-directives/src/include.ts +++ b/packages/myst-directives/src/include.ts @@ -1,18 +1,181 @@ -import type { DirectiveSpec, DirectiveData, GenericNode } from 'myst-common'; -import { ParseTypesEnum } from 'myst-common'; +import type { DirectiveSpec } from 'myst-common'; +import { ParseTypesEnum, fileWarn, normalizeLabel } from 'myst-common'; +import { CODE_DIRECTIVE_OPTIONS, getCodeBlockOptions } from './code.js'; +import type { Include } from 'myst-spec-ext'; +import type { VFile } from 'vfile'; +/** + * RST documentation: + * - https://docutils.sourceforge.io/docs/ref/rst/directives.html#including-an-external-document-fragment + * + * Sphinx documentation: + * - https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html#directive-literalinclude + */ export const includeDirective: DirectiveSpec = { name: 'include', + alias: ['literalinclude'], arg: { type: ParseTypesEnum.string, required: true, }, - run(data: DirectiveData): GenericNode[] { + options: { + label: { + type: ParseTypesEnum.string, + alias: ['name'], + }, + literal: { + type: ParseTypesEnum.boolean, + doc: 'Flag the include block as literal, and show the contents as a code block. This can also be set automatically by setting the `language` or using the `literalinclude` directive.', + }, + lang: { + type: ParseTypesEnum.string, + doc: 'The language of the code to be highlighted as. If set, this automatically changes an `include` into a `literalinclude`.', + alias: ['language', 'code'], + }, + ...CODE_DIRECTIVE_OPTIONS, + 'start-line': { + type: ParseTypesEnum.number, + doc: 'Only the content starting from this line will be included. The first line has index 0 and negative values count from the end.', + }, + 'start-at': { + type: ParseTypesEnum.string, + doc: 'Only the content after and including the first occurrence of the specified text in the external data file will be included.', + }, + 'start-after': { + type: ParseTypesEnum.string, + doc: 'Only the content after the first occurrence of the specified text in the external data file will be included.', + }, + 'end-line': { + type: ParseTypesEnum.number, + doc: 'Only the content up to (but excluding) this line will be included.', + }, + 'end-at': { + type: ParseTypesEnum.string, + doc: 'Only the content up to and including the first occurrence of the specified text in the external data file (but after any start-after text) will be included.', + }, + 'end-before': { + type: ParseTypesEnum.string, + doc: 'Only the content before the first occurrence of the specified text in the external data file (but after any start-after text) will be included.', + }, + lines: { + type: ParseTypesEnum.string, + doc: 'Specify exactly which lines to include from the original file, starting at 1. For example, `1,3,5-10,20-` includes the lines 1, 3, 5 to 10 and lines 20 to the last line of the original file.', + }, + 'lineno-match': { + type: ParseTypesEnum.boolean, + doc: 'Display the original line numbers, correct only when the selection consists of contiguous lines.', + }, + }, + run(data, vfile): Include[] { + const { label, identifier } = normalizeLabel(data.options?.label as string | undefined) || {}; + const literal = + data.name === 'literalinclude' || !!data.options?.literal || !!data.options?.lang; + + const file = data.arg as string; + if (!literal) { + // TODO: warn on unused options + return [ + { + type: 'include', + file, + label, + identifier, + }, + ]; + } + const lang = (data.options?.lang as string) ?? extToLanguage(file.split('.').pop()); + const opts = getCodeBlockOptions(data.options, vfile); + const filter: Include['filter'] = {}; + ensureOnlyOneOf(vfile, data.options, ['start-at', 'start-line', 'start-after', 'lines']); + ensureOnlyOneOf(vfile, data.options, ['end-at', 'end-line', 'end-before', 'lines']); + filter.startAt = data.options?.['start-at'] as string; + filter.startAfter = data.options?.['start-after'] as string; + filter.endAt = data.options?.['end-at'] as string; + filter.endBefore = data.options?.['end-before'] as string; + if (data.options?.lines) { + filter.lines = parseLinesString(vfile, data.options?.lines as string); + } else { + const startLine = data.options?.['start-line'] as number; + const endLine = data.options?.['end-line'] as number; + const lines = []; + if (startLine != null) lines.push(startLine); + if (startLine == null && endLine != null) lines.push(0); + if (endLine != null) lines.push(endLine); + if (lines.length > 0) { + filter.lines = [ + lines.map((n) => { + if (n >= 0) return n + 1; + return n; + }) as [number, number?], + ]; + } + } return [ { type: 'include', - file: data.arg as string, + file, + literal, + lang, + label, + identifier, + caption: data.options?.caption as any[], + filter: Object.keys(filter).length > 0 ? filter : undefined, + ...opts, }, ]; }, }; + +type Lines = Required['filter']['lines']; + +export function parseLinesString(vfile: VFile, linesString: string | undefined): Lines { + if (!linesString) return undefined; + return linesString + .split(',') + .map((l) => { + const line = l.trim(); + const match = line.match(/^([0-9]+)(?:\s*(-)\s*([0-9]+)?)?$/); + if (!match) { + fileWarn(vfile, `Unknown lines match "${line}"`); + return undefined; + } + const [, first, dash, last] = match; + if (!dash && !last) { + return Number.parseInt(first); + } + if (dash && !last) { + return [Number.parseInt(first)]; + } + return [Number.parseInt(first), Number.parseInt(last)]; + }) + .filter((l) => !!l) as Lines; +} + +function ensureOnlyOneOf( + vfile: VFile, + options: Record | undefined, + exclusive: string[], +): void { + if (!options) return; + const set1 = new Set(exclusive); + const set2 = new Set(Object.keys(options)); + const intersection = new Set([...set1].filter((x) => set2.has(x))); + if (intersection.size > 1) { + fileWarn(vfile, `Conflicting options for directive: ["${[...intersection].join('", "')}"]`, { + note: `Choose a single option out of ["${[...exclusive].join('", "')}"]`, + }); + } +} + +function extToLanguage(ext?: string): string | undefined { + return ( + { + ts: 'typescript', + js: 'javascript', + tex: 'latex', + py: 'python', + md: 'markdown', + yml: 'yaml', + }[ext ?? ''] ?? ext + ); +} diff --git a/packages/myst-directives/src/index.ts b/packages/myst-directives/src/index.ts index 7e81607dc..75ba96677 100644 --- a/packages/myst-directives/src/index.ts +++ b/packages/myst-directives/src/index.ts @@ -1,6 +1,6 @@ import { admonitionDirective } from './admonition.js'; import { bibliographyDirective } from './bibliography.js'; -import { codeBlockDirective, codeDirective, codeCellDirective } from './code.js'; +import { codeDirective, codeCellDirective } from './code.js'; import { dropdownDirective } from './dropdown.js'; import { embedDirective } from './embed.js'; import { figureDirective } from './figure.js'; @@ -19,9 +19,8 @@ import { outputDirective } from './output.js'; export const defaultDirectives = [ admonitionDirective, bibliographyDirective, - codeBlockDirective, - codeCellDirective, codeDirective, + codeCellDirective, dropdownDirective, embedDirective, figureDirective, @@ -40,7 +39,7 @@ export const defaultDirectives = [ export { admonitionDirective } from './admonition.js'; export { bibliographyDirective } from './bibliography.js'; -export { codeBlockDirective, codeDirective } from './code.js'; +export { codeDirective } from './code.js'; export { dropdownDirective } from './dropdown.js'; export { embedDirective } from './embed.js'; export { figureDirective } from './figure.js'; diff --git a/packages/myst-spec-ext/src/types.ts b/packages/myst-spec-ext/src/types.ts index 2b6cd9002..2462d8594 100644 --- a/packages/myst-spec-ext/src/types.ts +++ b/packages/myst-spec-ext/src/types.ts @@ -150,6 +150,32 @@ export type Embed = { children?: (FlowContent | ListContent | PhrasingContent)[]; }; +type IncludeFilter = { + startAfter?: string; + startAt?: string; + endBefore?: string; + endAt?: string; + /** Lines start at 1 and can be negative (-1 is the last line). For example, [1, 3, [10]] will select lines 1, 3 and 10 until the end. */ + lines?: (number | [number, number?])[]; +}; + +export type Include = { + type: 'include'; + file: string; + literal?: boolean; + filter?: IncludeFilter; + lang?: string; + showLineNumbers?: boolean; + /** The `match` will be removed in a transform */ + startingLineNumber?: number | 'match'; + emphasizeLines?: number[]; + identifier?: string; + label?: string; + children?: (FlowContent | ListContent | PhrasingContent)[]; + /** `caption` is temporary, and is used before a transform */ + caption?: (FlowContent | ListContent | PhrasingContent)[]; +}; + export type Container = SpecContainer & { source?: Dependency; }; diff --git a/packages/myst-transforms/src/include.spec.ts b/packages/myst-transforms/src/include.spec.ts new file mode 100644 index 000000000..3bcba6bbc --- /dev/null +++ b/packages/myst-transforms/src/include.spec.ts @@ -0,0 +1,36 @@ +import { describe, expect, test } from 'vitest'; +import { filterIncludedContent } from './include'; +import { VFile } from 'vfile'; + +describe('filterIncludedContent', () => { + test.each([ + [{ startAt: 'ok' }, 'ok\nreally\ncool', 2, 0], + [{ startAt: 'ok', endBefore: 'cool' }, 'ok\nreally', 2, 0], + [{ startAt: 'ok', endBefore: 'ok' }, 'ok\nreally\ncool', 2, 1], + [{ startAt: 'ok', endAt: 'cool' }, 'ok\nreally\ncool', 2, 0], + [{ startAfter: 'k', endBefore: 'cool' }, 'really', 3, 0], + [{ endBefore: 'cool' }, 'some\nok\nreally', 1, 0], + [{ startAt: 'really' }, 'really\ncool', 3, 0], + [{ startAfter: 'really' }, 'cool', 4, 0], + [{ lines: [1, 3] }, 'some\nreally', 1, 0], + [{ lines: [[1, 3]] }, 'some\nok\nreally', 1, 0], + [{ lines: [1, [3]] }, 'some\nreally\ncool', 1, 0], + [{ lines: [2, 1, 2] }, 'ok\nsome\nok', 2, 0], + [{ lines: [1, -1] }, 'some\ncool', 1, 0], + [{ lines: [-1] }, 'cool', 4, 0], + [{ lines: [1, [-1]] }, 'some\ncool', 1, 0], + [{ lines: [1, [-2]] }, 'some\nreally\ncool', 1, 0], + [{ lines: [1, [-2, -1]] }, 'some\nreally\ncool', 1, 0], + [{ lines: [1, [-1, -2]] }, 'some', 1, 1], + ])('%s', (t, a, sln, w) => { + const vfile = new VFile(); + const { content, startingLineNumber } = filterIncludedContent( + vfile, + t as any, + 'some\nok\nreally\ncool', + ); + expect(content).toEqual(a); + expect(startingLineNumber).toEqual(sln); + expect(vfile.messages.length).toBe(w); + }); +}); diff --git a/packages/myst-transforms/src/include.ts b/packages/myst-transforms/src/include.ts new file mode 100644 index 000000000..764a2abf7 --- /dev/null +++ b/packages/myst-transforms/src/include.ts @@ -0,0 +1,165 @@ +import { fileWarn, type GenericNode, type GenericParent } from 'myst-common'; +import type { Code, Container, Include } from 'myst-spec-ext'; +import { selectAll } from 'unist-util-select'; +import type { Caption } from 'myst-spec'; +import type { Plugin } from 'unified'; +import type { VFile } from 'vfile'; + +export type Options = { + loadFile: (filename: string) => Promise | string | undefined; + parseContent: (filename: string, content: string) => Promise | GenericNode[]; +}; + +/** + * This is the {include} directive, that loads from disk. + * + * RST documentation: + * - https://docutils.sourceforge.io/docs/ref/rst/directives.html#including-an-external-document-fragment + */ +export async function includeDirectiveTransform(tree: GenericParent, file: VFile, opts: Options) { + const includeNodes = selectAll('include', tree) as Include[]; + await Promise.all( + includeNodes.map(async (node) => { + const rawContent = await opts.loadFile(node.file); + if (rawContent == null) return; + const { content, startingLineNumber } = filterIncludedContent(file, node.filter, rawContent); + let children: GenericNode[]; + if (node.literal) { + const code: Code = { + type: 'code', + value: content, + }; + if (node.startingLineNumber === 'match') { + // Replace the starting line number if it should match + node.startingLineNumber = startingLineNumber; + } + // Move the code attributes to the code block + ( + [ + 'lang', + 'emphasizeLines', + 'showLineNumbers', + 'startingLineNumber', + 'label', + 'identifier', + ] as const + ).forEach((attr) => { + if (!node[attr]) return; + code[attr] = node[attr] as any; + delete node[attr]; + }); + if (!node.caption) { + children = [code]; + } else { + const caption: Caption = { + type: 'caption', + children: [ + { + type: 'paragraph', + children: node.caption as any[], + }, + ], + }; + const container: Container = { + type: 'container', + kind: 'code' as any, + // Move the label to the container + label: code.label, + identifier: code.identifier, + children: [code as any, caption], + }; + delete code.label; + delete code.identifier; + children = [container]; + } + } else { + children = await opts.parseContent(node.file, content); + } + node.children = children as any; + }), + ); +} + +function index(n: number, total: number): [number, number] | null { + if (n > 0) return [n - 1, n]; + if (n < 0) return [total + n, total + n + 1]; + return null; +} + +export function filterIncludedContent( + vfile: VFile, + filter: Include['filter'], + rawContent: string, +): { content: string; startingLineNumber?: number } { + if (!filter || Object.keys(filter).length === 0) { + return { content: rawContent, startingLineNumber: undefined }; + } + const lines = rawContent.split('\n'); + let startingLineNumber: number | undefined; + if (filter.lines) { + const filtered = filter.lines.map((f) => { + if (typeof f === 'number') { + const ind = index(f, lines.length); + if (!ind) { + fileWarn(vfile, 'Invalid line number "0", indexing starts at 1'); + return []; + } + if (!startingLineNumber) startingLineNumber = ind[0] + 1; + return lines.slice(...ind); + } + const ind0 = index(f[0], lines.length); + const ind1 = index(f[1] ?? lines.length, lines.length); + if (!ind0 || !ind1) { + fileWarn(vfile, 'Invalid line number "0", indexing starts at 1'); + return []; + } + if (!startingLineNumber) startingLineNumber = ind0[0] + 1; + const slice = lines.slice(ind0[0], ind1[1]); + if (slice.length === 0) { + fileWarn(vfile, `Unexpected lines, from "${f[0]}" to "${f[1] ?? ''}"`); + } + return slice; + }); + return { content: filtered.flat().join('\n'), startingLineNumber }; + } + let startLine = + filter.startAt || filter.startAfter + ? lines.findIndex( + (line) => + (filter.startAt && line.includes(filter.startAt)) || + (filter.startAfter && line.includes(filter.startAfter)), + ) + : 0; + if (startLine === -1) { + fileWarn( + vfile, + `Could not find starting line including "${filter.startAt || filter.startAfter}"`, + ); + startLine = 0; + } + if (filter.startAfter) startLine += 1; + let endLine = + filter.endAt || filter.endBefore + ? lines + .slice(startLine + 1) + .findIndex( + (line) => + (filter.endAt && line.includes(filter.endAt)) || + (filter.endBefore && line.includes(filter.endBefore)), + ) + : lines.length; + if (endLine === -1) { + fileWarn(vfile, `Could not find ending line including "${filter.endAt || filter.endBefore}"`); + endLine = lines.length; + } else if (filter.endAt || filter.endBefore) { + endLine += startLine; + if (filter.endAt) endLine += 1; + } + startingLineNumber = startLine + 1; + return { content: lines.slice(startLine, endLine + 1).join('\n'), startingLineNumber }; +} + +export const includeDirectivePlugin: Plugin<[Options], GenericParent, GenericParent> = + (opts) => (tree, file) => { + includeDirectiveTransform(tree as GenericParent, file, opts); + }; diff --git a/packages/myst-transforms/src/index.ts b/packages/myst-transforms/src/index.ts index 6d85cf343..c3b01336a 100644 --- a/packages/myst-transforms/src/index.ts +++ b/packages/myst-transforms/src/index.ts @@ -40,6 +40,7 @@ export { export { joinGatesPlugin, joinGatesTransform } from './joinGates.js'; export { glossaryPlugin, glossaryTransform } from './glossary.js'; export { abbreviationPlugin, abbreviationTransform } from './abbreviations.js'; +export { includeDirectivePlugin, includeDirectiveTransform } from './include.js'; // Enumeration export type { IReferenceState, NumberingOptions, ReferenceKind } from './enumerate.js';