From 1880a465afbf24fd15cadc02b5fd38721a856ce6 Mon Sep 17 00:00:00 2001 From: Franklin Koch Date: Thu, 21 Mar 2024 13:50:59 -0600 Subject: [PATCH] =?UTF-8?q?=F0=9F=9B=B8=20Update=20extractParts=20to=20pul?= =?UTF-8?q?l=20implicit=20parts=20based=20on=20heading=20(#986)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Rowan Cockett --- .changeset/big-peas-crash.md | 5 + .changeset/clean-dots-glow.md | 5 + .changeset/tidy-windows-smell.md | 5 + docs/_toc.yml | 1 + docs/blocks.md | 2 +- docs/document-parts.md | 91 ++++ docs/quickstart-myst-documents.md | 22 +- packages/myst-common/src/extractParts.spec.ts | 434 +++++++++++++++++- packages/myst-common/src/extractParts.ts | 112 ++++- .../myst-frontmatter/src/site/validators.ts | 10 +- packages/tex-to-myst/src/frontmatter.ts | 2 +- 11 files changed, 663 insertions(+), 26 deletions(-) create mode 100644 .changeset/big-peas-crash.md create mode 100644 .changeset/clean-dots-glow.md create mode 100644 .changeset/tidy-windows-smell.md create mode 100644 docs/document-parts.md diff --git a/.changeset/big-peas-crash.md b/.changeset/big-peas-crash.md new file mode 100644 index 000000000..b51ce6be3 --- /dev/null +++ b/.changeset/big-peas-crash.md @@ -0,0 +1,5 @@ +--- +'myst-common': patch +--- + +Update extractParts to pull implicit parts from sections based on heading diff --git a/.changeset/clean-dots-glow.md b/.changeset/clean-dots-glow.md new file mode 100644 index 000000000..1667692e7 --- /dev/null +++ b/.changeset/clean-dots-glow.md @@ -0,0 +1,5 @@ +--- +'tex-to-myst': patch +--- + +Fix bug where certain affiliations without authors crash tex-to-myst diff --git a/.changeset/tidy-windows-smell.md b/.changeset/tidy-windows-smell.md new file mode 100644 index 000000000..d39a274c7 --- /dev/null +++ b/.changeset/tidy-windows-smell.md @@ -0,0 +1,5 @@ +--- +"myst-frontmatter": patch +--- + +Expand parts to other aliases diff --git a/docs/_toc.yml b/docs/_toc.yml index 484516910..cb07cf438 100644 --- a/docs/_toc.yml +++ b/docs/_toc.yml @@ -59,6 +59,7 @@ parts: - file: syntax-overview - file: directives - file: frontmatter + - file: document-parts - file: settings - file: glossary - caption: Contribute diff --git a/docs/blocks.md b/docs/blocks.md index 8f4a0a90b..b4bbf0137 100644 --- a/docs/blocks.md +++ b/docs/blocks.md @@ -15,7 +15,7 @@ cell 2 ``` ```{tip} -To identify a part of a document, like an abstract, use `+++ {"part": "abstract"}`, this will allow tools like the [](./creating-pdf-documents.md) to be created with the appropriate parts of information. +To identify a [part of a document](./document-parts.md), like an abstract, use `+++ { "part": "abstract" }`, this will allow tools like the [](./creating-pdf-documents.md) to be created with the appropriate parts of information. ``` ## Comments diff --git a/docs/document-parts.md b/docs/document-parts.md new file mode 100644 index 000000000..5dd069203 --- /dev/null +++ b/docs/document-parts.md @@ -0,0 +1,91 @@ +--- +title: Document Parts +description: Parts allow you to specify special parts of your document, like abstract, keypoints acknowledgements. +--- + +Document parts allow you to add metadata to your documents with specific components of your page, for example, abstract, dedication, or acknowledgments. Many templates put these in specific places. + +There are three ways that you can define parts of a document: (1) in your page frontmatter; (2) implicitly using a section heading; and (3) on a block using a `part` or `tag` annotation. + +## Parts in Frontmatter + +On any page, you can add a part to your document directly in the frontmatter, for example, the `abstract`: + +```yaml +--- +title: My document +abstract: | + This is a multi-line + abstract, with _markdown_! +--- +``` + +### Known Frontmatter Parts + +The known parts that are recognized as _top-level_ document frontmatter keys are: + +abstract +: A concise overview of the entire document, highlighting the main objectives, methods, results, and conclusions. It's meant to give readers a quick snapshot of what to expect without having to read the entire document. + +summary +: Similar to an abstract, but can either be slightly longer and more detailed or used as a plain-language summary, depending on the context. It summarizes the document's content, including the background, purpose, methodology, results, and conclusions. +: Alias: `plain_language_summary`, `lay_summary` + +keypoints +: A brief list that highlights the main findings, conclusions, or contributions of the document. Key points are often used to quickly convey the core message or most important aspects to the reader. + +dedication +: A short section where the author dedicates the document to someone, often as a gesture of honor or respect. + +epigraph +: A quote or poem that the author includes at the beginning of the document to set a tone or theme, or to hint at the document’s underlying message. It is often relevant to the content but not directly related to it. +: Alias: `quote` + +data_availability +: A statement or section that details how readers can access the data sets and resources used in the document. This can include links to repositories, conditions for access, and any restrictions on the data. It's crucial for transparency and reproducibility in research documents. +: Alias: `availability` + +acknowledgments +: A section where the author thanks individuals, organizations, or agencies that contributed to the completion of the document. This can include support in the form of funding, expertise, feedback, or moral support. +: Alias: `ack`, `acknowledgements` + +### Custom Frontmatter Parts + +If you have a custom part name for a template, you can nest it under `parts:`, which takes arbitrary keys. + +```yaml +--- +title: My document +parts: + special_part: | + This is a multi-line + abstract, with _markdown_! +--- +``` + +The advantage of this method is that the content is not rendered in your document. + +## Implicit Parts using a Title + +If you are rendering your project in other places, it can be helpful to leave these sections directly in the document. +Complete this using a header as usual: + +``` +# Abstract + +This is my abstract! +``` + +Note that frontmatter parts and explicitly tagged cells/blocks will take precedence over this method. + +## In a Jupyter Notebook cells and blocks + +When using a Jupyter Notebook, you can add a `tag` to the cell with the part name, if multiple cells share that tag, they will be extracted and merged. This can also be represented in a [block](./blocks.md): + +```markdown ++++ { "part": "abstract" } + +This is my abstract block. + ++++ +``` diff --git a/docs/quickstart-myst-documents.md b/docs/quickstart-myst-documents.md index 6b86c4a03..36c6d6666 100644 --- a/docs/quickstart-myst-documents.md +++ b/docs/quickstart-myst-documents.md @@ -151,23 +151,25 @@ Once these are added, the myst theme (in this case the `book-theme` template) ca The myst theme for the `01-paper.md` page after the frontmatter changes are added. Compare this to what it looked like before in [](#frontmatter-before-pdf). The structure of the HTML page has also been improved, including meta tags that are available to search engines and other programmatic indexers. ::: -### Add an abstract block +### Add an abstract part -We will also add data about the "parts" of our document, for example, the abstract. This will be important when we export to PDF and also visually changes the `book-theme`. +We will also add metadata about the "parts" of our document, for example, the abstract. +This will be important when we export to PDF and also visually changes the `book-theme`. -🛠 In `01-paper.md`: surround the abstract in a block `+++ {"part": "abstract"}` +🛠 In `01-paper.md`: move the abstract into the frontmatter using a multiline YAML syntax `abstract: |` ```{code-block} markdown :linenos: -:emphasize-lines: 1,5 -+++ {"part": "abstract"} - -We introduce, a set of open-source, community-driven ... - -+++ +:emphasize-lines: 4,5 +--- +title: How to MyST, without being mystified 🧙 +... +abstract: | + We introduce, a set of open-source, community-driven ... +--- ``` -You can make other blocks, like `data-availability` or `acknowledgments` or `key-points`, templates will treat these differently and may require specific parts to fully render. +You can make other parts, like `data_availability` or `acknowledgments` or `keypoints`, templates will treat these differently and may require specific parts to fully render. See [document parts](./document-parts.md) for additional information. ### Add a citation diff --git a/packages/myst-common/src/extractParts.spec.ts b/packages/myst-common/src/extractParts.spec.ts index 535db35d4..447c4bd02 100644 --- a/packages/myst-common/src/extractParts.spec.ts +++ b/packages/myst-common/src/extractParts.spec.ts @@ -1,6 +1,7 @@ import { describe, expect, it } from 'vitest'; -import { extractPart } from './extractParts'; +import { extractImplicitPart, extractPart } from './extractParts'; import type { GenericParent } from '../dist'; +import { copyNode } from './utils'; describe('extractPart', () => { it('no part returns undefined', async () => { @@ -174,3 +175,434 @@ describe('extractPart', () => { }); }); }); + +describe('extractImplicitPart', () => { + it('no part returns undefined', async () => { + expect( + extractImplicitPart({ + type: 'root', + children: [{ type: 'text', value: 'untagged content' }], + }), + ).toEqual(undefined); + }); + it('part heading/content is removed from tree at top level and returned', async () => { + const tree: GenericParent = { + type: 'root', + children: [ + { + type: 'paragraph', + children: [{ type: 'text', value: 'one' }], + }, + { + type: 'heading', + children: [{ type: 'text', value: 'abstract' }], + }, + { + type: 'paragraph', + children: [{ type: 'text', value: 'two' }], + }, + { + type: 'paragraph', + children: [{ type: 'text', value: 'three' }], + }, + { + type: 'heading', + children: [{ type: 'text', value: 'intro' }], + }, + { + type: 'paragraph', + children: [{ type: 'text', value: 'four' }], + }, + ], + }; + expect(extractImplicitPart(tree, 'abstract')).toEqual({ + type: 'root', + children: [ + { + type: 'block', + data: { part: 'abstract' }, + children: [ + { + type: 'paragraph', + children: [{ type: 'text', value: 'two' }], + }, + { + type: 'paragraph', + children: [{ type: 'text', value: 'three' }], + }, + ], + }, + ], + }); + expect(tree).toEqual({ + type: 'root', + children: [ + { + type: 'paragraph', + children: [{ type: 'text', value: 'one' }], + }, + { + type: 'heading', + children: [{ type: 'text', value: 'intro' }], + }, + { + type: 'paragraph', + children: [{ type: 'text', value: 'four' }], + }, + ], + }); + }); + it('part heading/content is removed from top level block and returned', async () => { + const tree: GenericParent = { + type: 'root', + children: [ + { + type: 'block', + children: [ + { + type: 'paragraph', + children: [{ type: 'text', value: 'one' }], + }, + { + type: 'heading', + children: [{ type: 'text', value: 'abstract' }], + }, + { + type: 'paragraph', + children: [{ type: 'text', value: 'two' }], + }, + { + type: 'paragraph', + children: [{ type: 'text', value: 'three' }], + }, + { + type: 'heading', + children: [{ type: 'text', value: 'intro' }], + }, + { + type: 'paragraph', + children: [{ type: 'text', value: 'four' }], + }, + ], + }, + ], + }; + expect(extractImplicitPart(tree, 'abstract')).toEqual({ + type: 'root', + children: [ + { + type: 'block', + data: { part: 'abstract' }, + children: [ + { + type: 'paragraph', + children: [{ type: 'text', value: 'two' }], + }, + { + type: 'paragraph', + children: [{ type: 'text', value: 'three' }], + }, + ], + }, + ], + }); + expect(tree).toEqual({ + type: 'root', + children: [ + { + type: 'block', + children: [ + { + type: 'paragraph', + children: [{ type: 'text', value: 'one' }], + }, + { + type: 'heading', + children: [{ type: 'text', value: 'intro' }], + }, + { + type: 'paragraph', + children: [{ type: 'text', value: 'four' }], + }, + ], + }, + ], + }); + }); + it('part heading/content is not removed when nested', async () => { + const tree: GenericParent = { + type: 'root', + children: [ + { + type: 'block', + children: [ + { + type: 'block', + children: [ + { + type: 'paragraph', + children: [{ type: 'text', value: 'one' }], + }, + { + type: 'heading', + children: [{ type: 'text', value: 'abstract' }], + }, + { + type: 'paragraph', + children: [{ type: 'text', value: 'two' }], + }, + { + type: 'paragraph', + children: [{ type: 'text', value: 'three' }], + }, + { + type: 'heading', + children: [{ type: 'text', value: 'intro' }], + }, + { + type: 'paragraph', + children: [{ type: 'text', value: 'four' }], + }, + ], + }, + ], + }, + ], + }; + const treeCopy = copyNode(tree); + expect(extractImplicitPart(tree, 'abstract')).toEqual(undefined); + expect(tree).toEqual(treeCopy); + }); + it('parts from top and block levels concatenate', async () => { + const tree: GenericParent = { + type: 'root', + children: [ + { + type: 'block', + children: [ + { + type: 'paragraph', + children: [{ type: 'text', value: 'one' }], + }, + { + type: 'heading', + children: [{ type: 'text', value: 'abstract' }], + }, + { + type: 'paragraph', + children: [{ type: 'text', value: 'two' }], + }, + ], + }, + { + type: 'paragraph', + children: [{ type: 'text', value: 'three' }], + }, + { + type: 'heading', + children: [{ type: 'text', value: 'abstract' }], + }, + { + type: 'paragraph', + children: [{ type: 'text', value: 'four' }], + }, + ], + }; + expect(extractImplicitPart(tree, 'abstract')).toEqual({ + type: 'root', + children: [ + { + type: 'block', + data: { part: 'abstract' }, + children: [ + { + type: 'paragraph', + children: [{ type: 'text', value: 'two' }], + }, + ], + }, + { + type: 'block', + data: { part: 'abstract' }, + children: [ + { + type: 'paragraph', + children: [{ type: 'text', value: 'four' }], + }, + ], + }, + ], + }); + expect(tree).toEqual({ + type: 'root', + children: [ + { + type: 'block', + children: [ + { + type: 'paragraph', + children: [{ type: 'text', value: 'one' }], + }, + ], + }, + { + type: 'paragraph', + children: [{ type: 'text', value: 'three' }], + }, + ], + }); + }); + it('part headings with no content remain', async () => { + const tree: GenericParent = { + type: 'root', + children: [ + { + type: 'block', + children: [ + { + type: 'paragraph', + children: [{ type: 'text', value: 'one' }], + }, + { + type: 'heading', + children: [{ type: 'text', value: 'abstract' }], + }, + { + type: 'heading', + children: [{ type: 'text', value: 'not abstract' }], + }, + { + type: 'paragraph', + children: [{ type: 'text', value: 'two' }], + }, + ], + }, + { + type: 'paragraph', + children: [{ type: 'text', value: 'three' }], + }, + { + type: 'paragraph', + children: [{ type: 'text', value: 'four' }], + }, + { + type: 'heading', + children: [{ type: 'text', value: 'abstract' }], + }, + ], + }; + const treeCopy = copyNode(tree); + expect(extractImplicitPart(tree, 'abstract')).toEqual(undefined); + expect(tree).toEqual(treeCopy); + }); + it('part is not removed from block with part', async () => { + const tree: GenericParent = { + type: 'root', + children: [ + { + type: 'block', + data: { part: 'explicit' }, + children: [ + { + type: 'paragraph', + children: [{ type: 'text', value: 'one' }], + }, + { + type: 'heading', + children: [{ type: 'text', value: 'abstract' }], + }, + { + type: 'paragraph', + children: [{ type: 'text', value: 'two' }], + }, + { + type: 'paragraph', + children: [{ type: 'text', value: 'three' }], + }, + { + type: 'heading', + children: [{ type: 'text', value: 'intro' }], + }, + { + type: 'paragraph', + children: [{ type: 'text', value: 'four' }], + }, + ], + }, + ], + }; + const treeCopy = copyNode(tree); + expect(extractImplicitPart(tree, 'abstract')).toEqual(undefined); + expect(tree).toEqual(treeCopy); + }); + it('part heading with format removed', async () => { + const tree: GenericParent = { + type: 'root', + children: [ + { + type: 'paragraph', + children: [{ type: 'text', value: 'one' }], + }, + { + type: 'heading', + children: [ + { type: 'text', value: 'abs' }, + { type: 'strong', children: [{ type: 'text', value: 'TRACT' }] }, + ], + }, + { + type: 'paragraph', + children: [{ type: 'text', value: 'two' }], + }, + { + type: 'paragraph', + children: [{ type: 'text', value: 'three' }], + }, + { + type: 'heading', + children: [{ type: 'text', value: 'intro' }], + }, + { + type: 'paragraph', + children: [{ type: 'text', value: 'four' }], + }, + ], + }; + expect(extractImplicitPart(tree, 'abstract')).toEqual({ + type: 'root', + children: [ + { + type: 'block', + data: { part: 'abstract' }, + children: [ + { + type: 'paragraph', + children: [{ type: 'text', value: 'two' }], + }, + { + type: 'paragraph', + children: [{ type: 'text', value: 'three' }], + }, + ], + }, + ], + }); + expect(tree).toEqual({ + type: 'root', + children: [ + { + type: 'paragraph', + children: [{ type: 'text', value: 'one' }], + }, + { + type: 'heading', + children: [{ type: 'text', value: 'intro' }], + }, + { + type: 'paragraph', + children: [{ type: 'text', value: 'four' }], + }, + ], + }); + }); +}); diff --git a/packages/myst-common/src/extractParts.ts b/packages/myst-common/src/extractParts.ts index 3922465df..cc31f007d 100644 --- a/packages/myst-common/src/extractParts.ts +++ b/packages/myst-common/src/extractParts.ts @@ -1,20 +1,25 @@ import type { Block } from 'myst-spec-ext'; -import type { GenericParent } from './types.js'; +import type { GenericNode, GenericParent } from './types.js'; import { remove } from 'unist-util-remove'; import { selectAll } from 'unist-util-select'; -import { copyNode } from './utils.js'; +import { copyNode, toText } from './utils.js'; -/** - * Selects the block node(s) based on part (string) or tags (string[]). - * If `part` is a string array, any of the parts will be treated equally. - */ -export function selectBlockParts(tree: GenericParent, part: string | string[]): Block[] { +function coercePart(part?: string | string[]): string[] { if (!part) { // Prevent an undefined, null or empty part comparison return []; } + return typeof part === 'string' ? [part] : part; +} + +/** + * Selects the block node(s) based on part (string) or tags (string[]). + * If `part` is a string array, any of the parts will be treated equally. + */ +export function selectBlockParts(tree: GenericParent, part?: string | string[]): Block[] { + const parts = coercePart(part); + if (parts.length === 0) return []; const blockParts = selectAll('block', tree).filter((block) => { - const parts = typeof part === 'string' ? [part] : part; return parts .map((p) => { return ( @@ -27,22 +32,105 @@ export function selectBlockParts(tree: GenericParent, part: string | string[]): return blockParts as Block[]; } +function createPartBlock( + children: GenericNode[], + part: string, + opts?: { + removePartData?: boolean; + }, +) { + const block: GenericParent = { type: 'block', children }; + if (!opts?.removePartData) { + block.data ??= {}; + block.data.part = part; + } + return block; +} + +/** + * Extract implicit part based on heading name + * + * Given a tree, search children at the root or block level for a heading + * with text matching parts. If such heading is encountered, return a copy of + * the subsequent paragraph nodes until a non-paragraph node is encountered. + * Heading and paragraph nodes in the original tree are marked for deletion. + * + * Ignores anything that is already part of a block with explicit part. + */ +export function extractImplicitPart( + tree: GenericParent, + part?: string | string[], + opts?: { + removePartData?: boolean; + }, +): GenericParent | undefined { + const parts = coercePart(part); + if (parts.length === 0) return; + let insideImplicitPart = false; + const blockParts: GenericNode[] = []; + let paragraphs: GenericNode[] = []; + tree.children.forEach((child, index) => { + // Add this paragraph to the part + if (insideImplicitPart && child.type === 'paragraph') { + paragraphs.push(copyNode(child)); + child.type = '__part_delete__'; + } + // Stop adding things if we didn't just add a paragraph OR we are at the last child + if (child.type !== '__part_delete__' || index === tree.children.length - 1) { + insideImplicitPart = false; + if (paragraphs.length > 0) { + blockParts.push(createPartBlock(paragraphs, parts[0], opts)); + paragraphs = []; + selectAll('__part_heading__', tree).forEach((node) => { + node.type = '__part_delete__'; + }); + } + } + if (child.type === 'block') { + // Do not search blocks already marked explicitly as parts + if (child.data?.part) return; + // Do not recursively search beyond top-level blocks on root node + if (tree.type !== 'root') return; + const blockPartsTree = extractImplicitPart(child as GenericParent, parts); + if (blockPartsTree) blockParts.push(...blockPartsTree.children); + } else if (child.type === 'heading' && parts.includes(toText(child).toLowerCase())) { + // Start adding paragraphs to the part after this heading + insideImplicitPart = true; + child.type = '__part_heading__'; + } + }); + // Restore part headings if they did not contain any paragraphs + selectAll('__part_heading__', tree).forEach((node) => { + node.type = 'heading'; + }); + if (blockParts.length === 0) return; + const partsTree = { type: 'root', children: blockParts } as GenericParent; + remove(tree, '__part_delete__'); + return partsTree; +} + /** * Returns a copy of the block parts and removes them from the tree. */ export function extractPart( tree: GenericParent, - part: string | string[], + part?: string | string[], opts?: { /** Helpful for when we are doing recursions, we don't want to extract the part again. */ removePartData?: boolean; - /** Ensure that blocks are by default turned to visible */ + /** Ensure that blocks are by default turned to visible within the part */ keepVisibility?: boolean; + /** Provide an option so implicit section-to-part behavior can be disabled */ + requireExplicitPart?: boolean; }, ): GenericParent | undefined { - const partStrings = typeof part === 'string' ? [part] : part; + const partStrings = coercePart(part); + if (partStrings.length === 0) return; const blockParts = selectBlockParts(tree, part); - if (blockParts.length === 0) return undefined; + if (blockParts.length === 0) { + if (opts?.requireExplicitPart) return; + return extractImplicitPart(tree, partStrings); + } const children = copyNode(blockParts).map((block) => { // Ensure the block always has the `part` defined, as it might be in the tags block.data ??= {}; diff --git a/packages/myst-frontmatter/src/site/validators.ts b/packages/myst-frontmatter/src/site/validators.ts index b75e9fea0..23e8b5d16 100644 --- a/packages/myst-frontmatter/src/site/validators.ts +++ b/packages/myst-frontmatter/src/site/validators.ts @@ -46,9 +46,17 @@ export const FRONTMATTER_ALIASES = { ack: 'acknowledgments', acknowledgements: 'acknowledgments', availability: 'data_availability', - plain_language_summary: 'summary', + dataAvailability: 'data_availability', + 'data-availability': 'data_availability', quote: 'epigraph', + plain_language_summary: 'summary', + 'plain-language-summary': 'summary', + plainLanguageSummary: 'summary', lay_summary: 'summary', + 'lay-summary': 'summary', + keyPoints: 'keypoints', + key_points: 'keypoints', + 'key-points': 'keypoints', image: 'thumbnail', }; diff --git a/packages/tex-to-myst/src/frontmatter.ts b/packages/tex-to-myst/src/frontmatter.ts index 894f16765..13832b230 100644 --- a/packages/tex-to-myst/src/frontmatter.ts +++ b/packages/tex-to-myst/src/frontmatter.ts @@ -88,7 +88,7 @@ function addAffiliation(node: GenericNode, state: ITexParser) { state.closeParagraph(); const renderedAffil = state.stack.pop(); const fmAffil = { id: affilNumber, name: getContentFromRenderedSpan(renderedAffil) }; - if (!affilNumber) { + if (!affilNumber && fm.authors.length > 0) { const lastAuthor = fm.authors[fm.authors.length - 1]; if (!lastAuthor.affiliations) lastAuthor.affiliations = []; lastAuthor.affiliations.push(childrenOrString(fmAffil.name) as any);