From f464bae4d29b8a59af9c0de08f6efb6383227bc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Germ=C3=A1n=20Jablo=C3=B1ski?= <43938777+GermanJablo@users.noreply.github.com> Date: Fri, 6 Sep 2024 18:21:58 -0300 Subject: [PATCH 1/9] code-md --- .../lexical-markdown/src/MarkdownTransformers.ts | 3 ++- .../src/__tests__/unit/LexicalMarkdown.test.ts | 12 ++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/packages/lexical-markdown/src/MarkdownTransformers.ts b/packages/lexical-markdown/src/MarkdownTransformers.ts index fc0662726ae..46ccb94e8d6 100644 --- a/packages/lexical-markdown/src/MarkdownTransformers.ts +++ b/packages/lexical-markdown/src/MarkdownTransformers.ts @@ -318,10 +318,11 @@ export const CODE: MultilineElementTransformer = { return null; } const textContent = node.getTextContent(); + const removeSingleNewLine = textContent.replace(/(? { html: '
Code
',
md: '```javascript\nCode\n```',
},
+ {
+ html: 'A[B\nC](https://www.google.com)
',
+ md: ['```javascript', 'A[B', 'C](https://www.google.com)', '```'].join(
+ '\n',
+ ),
+ skipExport: true,
+ },
+ {
+ html: 'A[B\nC](https://www.google.com)
',
+ md: ['```javascript', 'A[BC](https://www.google.com)', '```'].join('\n'),
+ skipImport: true,
+ },
{
// Should always preserve language in md but keep data-highlight-language only for supported languages
html: 'Code
',
From f6830a34916c6f4335417be378993d88228d7664 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Germ=C3=A1n=20Jablo=C3=B1ski?=
<43938777+GermanJablo@users.noreply.github.com>
Date: Sat, 7 Sep 2024 10:30:44 -0300
Subject: [PATCH 2/9] sanitize markdown
---
.../__tests__/unit/LexicalMarkdown.test.ts | 45 +++++++++++++++++++
packages/lexical-markdown/src/utils.ts | 34 ++++++++++++++
2 files changed, 79 insertions(+)
diff --git a/packages/lexical-markdown/src/__tests__/unit/LexicalMarkdown.test.ts b/packages/lexical-markdown/src/__tests__/unit/LexicalMarkdown.test.ts
index fe1e322d526..6aa02db5ed3 100644
--- a/packages/lexical-markdown/src/__tests__/unit/LexicalMarkdown.test.ts
+++ b/packages/lexical-markdown/src/__tests__/unit/LexicalMarkdown.test.ts
@@ -23,6 +23,7 @@ import {
TRANSFORMERS,
} from '../..';
import {MultilineElementTransformer} from '../../MarkdownTransformers';
+import {sanitizeMarkdown} from '../../utils';
// Matches html within a mdx file
const MDX_HTML_TRANSFORMER: MultilineElementTransformer = {
@@ -419,3 +420,47 @@ describe('Markdown', () => {
});
}
});
+
+describe('sanitizeMarkdown', () => {
+ it('should combine lines separated by a single \n unless they are in a codeblock', () => {
+ const markdown = `
+1
+2
+
+3
+
+\`\`\`md
+1
+2
+
+3
+\`\`\`
+
+\`\`\`js
+1
+2
+
+3
+\`\`\`
+`;
+ expect(sanitizeMarkdown(markdown)).toBe(`
+12
+
+3
+
+\`\`\`md
+1
+2
+
+3
+\`\`\`
+
+\`\`\`js
+1
+2
+
+3
+\`\`\`
+`);
+ });
+});
diff --git a/packages/lexical-markdown/src/utils.ts b/packages/lexical-markdown/src/utils.ts
index 812d61e0269..83dc8acee02 100644
--- a/packages/lexical-markdown/src/utils.ts
+++ b/packages/lexical-markdown/src/utils.ts
@@ -456,3 +456,37 @@ export function isEmptyParagraph(node: LexicalNode): boolean {
MARKDOWN_EMPTY_LINE_REG_EXP.test(firstChild.getTextContent()))
);
}
+
+export function sanitizeMarkdown(input: string): string {
+ const lines = input.split('\n');
+ let inCodeBlock = false;
+ const sanitizedLines: string[] = [];
+
+ for (let i = 0; i < lines.length; i++) {
+ const line = lines[i];
+
+ // If we are inside a code block, keep the line unchanged
+ if (inCodeBlock) {
+ sanitizedLines.push(line);
+ continue;
+ }
+
+ // Detect the start or end of a markdown code block
+ if (line.startsWith('```')) {
+ inCodeBlock = !inCodeBlock;
+ sanitizedLines.push(line);
+ continue;
+ }
+
+ // In markdown the concept of "empty paragraphs" does not exist.
+ // Blocks must be separated by an empty line. Non-empty adjacent lines must be merged.
+ const lastLine = sanitizedLines[sanitizedLines.length - 1];
+ if (line === '' || lastLine === '' || !lastLine) {
+ sanitizedLines.push(line);
+ } else {
+ sanitizedLines[sanitizedLines.length - 1] = lastLine + line;
+ }
+ }
+
+ return sanitizedLines.join('\n');
+}
From cb29c2d26e70f81c15102adae23e8762d4d730c7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Germ=C3=A1n=20Jablo=C3=B1ski?=
<43938777+GermanJablo@users.noreply.github.com>
Date: Sat, 7 Sep 2024 10:43:11 -0300
Subject: [PATCH 3/9] save
---
.../lexical-markdown/src/MarkdownTransformers.ts | 3 +--
.../src/__tests__/unit/LexicalMarkdown.test.ts | 12 ------------
packages/lexical-markdown/src/index.ts | 4 +++-
3 files changed, 4 insertions(+), 15 deletions(-)
diff --git a/packages/lexical-markdown/src/MarkdownTransformers.ts b/packages/lexical-markdown/src/MarkdownTransformers.ts
index 46ccb94e8d6..fc0662726ae 100644
--- a/packages/lexical-markdown/src/MarkdownTransformers.ts
+++ b/packages/lexical-markdown/src/MarkdownTransformers.ts
@@ -318,11 +318,10 @@ export const CODE: MultilineElementTransformer = {
return null;
}
const textContent = node.getTextContent();
- const removeSingleNewLine = textContent.replace(/(? {
html: 'Code
',
md: '```javascript\nCode\n```',
},
- {
- html: 'A[B\nC](https://www.google.com)
',
- md: ['```javascript', 'A[B', 'C](https://www.google.com)', '```'].join(
- '\n',
- ),
- skipExport: true,
- },
- {
- html: 'A[B\nC](https://www.google.com)
',
- md: ['```javascript', 'A[BC](https://www.google.com)', '```'].join('\n'),
- skipImport: true,
- },
{
// Should always preserve language in md but keep data-highlight-language only for supported languages
html: 'Code
',
diff --git a/packages/lexical-markdown/src/index.ts b/packages/lexical-markdown/src/index.ts
index dac5b260478..57e286c72d9 100644
--- a/packages/lexical-markdown/src/index.ts
+++ b/packages/lexical-markdown/src/index.ts
@@ -36,6 +36,7 @@ import {
STRIKETHROUGH,
UNORDERED_LIST,
} from './MarkdownTransformers';
+import {sanitizeMarkdown} from './utils';
const ELEMENT_TRANSFORMERS: ArrayHelloworld!
', + md: ['Hello', 'world', '!'].join('\n'), + skipExport: true, + }, { // Multiline paragraphs - html: 'Hello
world
!
Hello\\\nworld\\\n!
', + html: 'Hello
world
!
Hello', md: '> Hello\n> world!', }, + // TO-DO:
world!
Hello', + // https://spec.commonmark.org/dingus/?text=%3E%20Hello%0Aworld%0A! + html: '
world
!
Helloworld!', md: '> Hello\nworld\n!', skipExport: true, }, @@ -299,8 +316,9 @@ describe('Markdown', () => { }, { customTransformers: [MDX_HTML_TRANSFORMER], - html: '
Some HTML in mdx:
From HTML: Line 1\nSome Text
',
+ html: 'Some HTML in mdx:
From HTML: Line 1Some Text
',
md: 'Some HTML in mdx:\n\n- And can be nested -
- and multiline as well + + And can be nested and multiline as well + From eb16b01161107146b1d9929c652614ae30393c45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Germ=C3=A1n=20Jablo=C3=B1ski?= <43938777+GermanJablo@users.noreply.github.com> Date: Sat, 7 Sep 2024 23:59:59 -0300 Subject: [PATCH 5/9] nit comment --- packages/lexical-markdown/src/MarkdownTransformers.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/lexical-markdown/src/MarkdownTransformers.ts b/packages/lexical-markdown/src/MarkdownTransformers.ts index 9ef448cdaa7..d012ee46e62 100644 --- a/packages/lexical-markdown/src/MarkdownTransformers.ts +++ b/packages/lexical-markdown/src/MarkdownTransformers.ts @@ -535,7 +535,7 @@ export function sanitizeMarkdown(input: string): string { const line = lines[i]; const lastLine = sanitizedLines[sanitizedLines.length - 1]; - // Detect the start or end of a markdown code block + // Detect the start or end of a code block if (line.includes('```')) { inCodeBlock = !inCodeBlock; sanitizedLines.push(line); From e9a741bcf4e257a82605d2b94313e7518d2c2be4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Germ=C3=A1n=20Jablo=C3=B1ski?= <43938777+GermanJablo@users.noreply.github.com> Date: Sun, 8 Sep 2024 00:04:17 -0300 Subject: [PATCH 6/9] fix import --- .../src/__tests__/unit/LexicalMarkdown.test.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/packages/lexical-markdown/src/__tests__/unit/LexicalMarkdown.test.ts b/packages/lexical-markdown/src/__tests__/unit/LexicalMarkdown.test.ts index 79d26f6beb2..656d8910841 100644 --- a/packages/lexical-markdown/src/__tests__/unit/LexicalMarkdown.test.ts +++ b/packages/lexical-markdown/src/__tests__/unit/LexicalMarkdown.test.ts @@ -22,8 +22,10 @@ import { Transformer, TRANSFORMERS, } from '../..'; -import {MultilineElementTransformer} from '../../MarkdownTransformers'; -import {sanitizeMarkdown} from '../../utils'; +import { + MultilineElementTransformer, + sanitizeMarkdown, +} from '../../MarkdownTransformers'; // Matches html within a mdx file const MDX_HTML_TRANSFORMER: MultilineElementTransformer = { From e15a4d87609e4c8e989bdd34c5a7fd81fd06cf5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Germ=C3=A1n=20Jablo=C3=B1ski?= <43938777+GermanJablo@users.noreply.github.com> Date: Mon, 9 Sep 2024 09:29:29 -0300 Subject: [PATCH 7/9] rename sanitizeMarkdown to normalizeMarkdown --- packages/lexical-markdown/src/MarkdownTransformers.ts | 2 +- .../src/__tests__/unit/LexicalMarkdown.test.ts | 6 +++--- packages/lexical-markdown/src/index.ts | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/lexical-markdown/src/MarkdownTransformers.ts b/packages/lexical-markdown/src/MarkdownTransformers.ts index d012ee46e62..40f0efc23f0 100644 --- a/packages/lexical-markdown/src/MarkdownTransformers.ts +++ b/packages/lexical-markdown/src/MarkdownTransformers.ts @@ -526,7 +526,7 @@ export const LINK: TextMatchTransformer = { type: 'text-match', }; -export function sanitizeMarkdown(input: string): string { +export function normalizeMarkdown(input: string): string { const lines = input.split('\n'); let inCodeBlock = false; const sanitizedLines: string[] = []; diff --git a/packages/lexical-markdown/src/__tests__/unit/LexicalMarkdown.test.ts b/packages/lexical-markdown/src/__tests__/unit/LexicalMarkdown.test.ts index 656d8910841..8d1a3a7bdb2 100644 --- a/packages/lexical-markdown/src/__tests__/unit/LexicalMarkdown.test.ts +++ b/packages/lexical-markdown/src/__tests__/unit/LexicalMarkdown.test.ts @@ -24,7 +24,7 @@ import { } from '../..'; import { MultilineElementTransformer, - sanitizeMarkdown, + normalizeMarkdown, } from '../../MarkdownTransformers'; // Matches html within a mdx file @@ -429,7 +429,7 @@ describe('Markdown', () => { } }); -describe('sanitizeMarkdown', () => { +describe('normalizeMarkdown', () => { it('should combine lines separated by a single \n unless they are in a codeblock', () => { const markdown = ` 1 @@ -451,7 +451,7 @@ describe('sanitizeMarkdown', () => { 3 \`\`\` `; - expect(sanitizeMarkdown(markdown)).toBe(` + expect(normalizeMarkdown(markdown)).toBe(` 12 3 diff --git a/packages/lexical-markdown/src/index.ts b/packages/lexical-markdown/src/index.ts index 95abb04df08..3fc2f21da17 100644 --- a/packages/lexical-markdown/src/index.ts +++ b/packages/lexical-markdown/src/index.ts @@ -31,9 +31,9 @@ import { ITALIC_STAR, ITALIC_UNDERSCORE, LINK, + normalizeMarkdown, ORDERED_LIST, QUOTE, - sanitizeMarkdown, STRIKETHROUGH, UNORDERED_LIST, } from './MarkdownTransformers'; @@ -83,7 +83,7 @@ function $convertFromMarkdownString( node?: ElementNode, shouldPreserveNewLines = false, ): void { - const sanitizedMarkdown = sanitizeMarkdown(markdown); + const sanitizedMarkdown = normalizeMarkdown(markdown); const importMarkdown = createMarkdownImport( transformers, shouldPreserveNewLines, From b9b6ee3da440c8ed8771a6737167d46df08533ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Germ=C3=A1n=20Jablo=C3=B1ski?= <43938777+GermanJablo@users.noreply.github.com> Date: Mon, 9 Sep 2024 09:36:06 -0300 Subject: [PATCH 8/9] fix regex for code block --- packages/lexical-markdown/src/MarkdownTransformers.ts | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/packages/lexical-markdown/src/MarkdownTransformers.ts b/packages/lexical-markdown/src/MarkdownTransformers.ts index 40f0efc23f0..718b89dd63c 100644 --- a/packages/lexical-markdown/src/MarkdownTransformers.ts +++ b/packages/lexical-markdown/src/MarkdownTransformers.ts @@ -158,6 +158,8 @@ const UNORDERED_LIST_REGEX = /^(\s*)[-*+]\s/; const CHECK_LIST_REGEX = /^(\s*)(?:-\s)?\s?(\[(\s|x)?\])\s/i; const HEADING_REGEX = /^(#{1,6})\s/; const QUOTE_REGEX = /^>\s/; +const CODE_START_REGEX = /^[ \t]*```(\w+)?/; +const CODE_END_REGEX = /^[ \t]*```$/; const createBlockNode = ( createNode: (match: Array) => ElementNode, @@ -334,9 +336,9 @@ export const CODE: MultilineElementTransformer = { }, regExpEnd: { optional: true, - regExp: /[ \t]*```$/, + regExp: CODE_END_REGEX, }, - regExpStart: /^[ \t]*```(\w+)?/, + regExpStart: CODE_START_REGEX, replace: ( rootNode, children, @@ -536,7 +538,7 @@ export function normalizeMarkdown(input: string): string { const lastLine = sanitizedLines[sanitizedLines.length - 1]; // Detect the start or end of a code block - if (line.includes('```')) { + if (CODE_START_REGEX.test(line) || CODE_END_REGEX.test(line)) { inCodeBlock = !inCodeBlock; sanitizedLines.push(line); continue; From c936c1163e744c5cba6a4820caf6eee1b8a9bcea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Germ=C3=A1n=20Jablo=C3=B1ski?= <43938777+GermanJablo@users.noreply.github.com> Date: Mon, 9 Sep 2024 10:29:52 -0300 Subject: [PATCH 9/9] fix tests --- packages/lexical-markdown/src/MarkdownTransformers.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/lexical-markdown/src/MarkdownTransformers.ts b/packages/lexical-markdown/src/MarkdownTransformers.ts index 718b89dd63c..efff5770e18 100644 --- a/packages/lexical-markdown/src/MarkdownTransformers.ts +++ b/packages/lexical-markdown/src/MarkdownTransformers.ts @@ -159,7 +159,7 @@ const CHECK_LIST_REGEX = /^(\s*)(?:-\s)?\s?(\[(\s|x)?\])\s/i; const HEADING_REGEX = /^(#{1,6})\s/; const QUOTE_REGEX = /^>\s/; const CODE_START_REGEX = /^[ \t]*```(\w+)?/; -const CODE_END_REGEX = /^[ \t]*```$/; +const CODE_END_REGEX = /[ \t]*```$/; const createBlockNode = ( createNode: (match: Array ) => ElementNode,