facebook · potatowagon · Sep 13, 2024 · Sep 6, 2024 · Sep 7, 2024 · Sep 7, 2024
@@ -153,6 +153,18 @@ export type TextMatchTransformer = Readonly<{
   type: 'text-match';
 }>;
 
+const ORDERED_LIST_REGEX = /^(\s*)(\d{1,})\.\s/;
+const UNORDERED_LIST_REGEX = /^(\s*)[-*+]\s/;
+const CHECK_LIST_REGEX = /^(\s*)(?:-\s)?\s?(\[(\s|x)?\])\s/i;
+const HEADING_REGEX = /^(#{1,6})\s/;
+const QUOTE_REGEX = /^>\s/;
+const CODE_START_REGEX = /^[ \t]*```(\w+)?/;
+const CODE_END_REGEX = /[ \t]*```$/;
+const CODE_SINGLE_LINE_REGEX =
+  /^[ \t]*```[^`]+(?:(?:`{1,2}|`{4,})[^`]+)*```(?:[^`]|$)/;
+const TABLE_ROW_REG_EXP = /^(?:\|)(.+)(?:\|)\s?$/;
+const TABLE_ROW_DIVIDER_REG_EXP = /^(\| ?:?-*:? ?)+\|\s?$/;
+
 const createBlockNode = (
   createNode: (match: Array<string>) => ElementNode,
 ): ElementTransformer['replace'] => {
@@ -266,7 +278,7 @@ export const HEADING: ElementTransformer = {
     const level = Number(node.getTag().slice(1));
     return '#'.repeat(level) + ' ' + exportChildren(node);
   },
-  regExp: /^(#{1,6})\s/,
+  regExp: HEADING_REGEX,
   replace: createBlockNode((match) => {
     const tag = ('h' + match[1].length) as HeadingTagType;
     return $createHeadingNode(tag);
@@ -288,7 +300,7 @@ export const QUOTE: ElementTransformer = {
     }
     return output.join('\n');
   },
-  regExp: /^>\s/,
+  regExp: QUOTE_REGEX,
   replace: (parentNode, children, _match, isImport) => {
     if (isImport) {
       const previousNode = parentNode.getPreviousSibling();
@@ -328,9 +340,9 @@ export const CODE: MultilineElementTransformer = {
   },
   regExpEnd: {
     optional: true,
-    regExp: /[ \t]*```$/,
+    regExp: CODE_END_REGEX,
   },
-  regExpStart: /^[ \t]*```(\w+)?/,
+  regExpStart: CODE_START_REGEX,
   replace: (
     rootNode,
     children,
@@ -399,7 +411,7 @@ export const UNORDERED_LIST: ElementTransformer = {
   export: (node, exportChildren) => {
     return $isListNode(node) ? listExport(node, exportChildren, 0) : null;
   },
-  regExp: /^(\s*)[-*+]\s/,
+  regExp: UNORDERED_LIST_REGEX,
   replace: listReplace('bullet'),
   type: 'element',
 };
@@ -409,7 +421,7 @@ export const CHECK_LIST: ElementTransformer = {
   export: (node, exportChildren) => {
     return $isListNode(node) ? listExport(node, exportChildren, 0) : null;
   },
-  regExp: /^(\s*)(?:-\s)?\s?(\[(\s|x)?\])\s/i,
+  regExp: CHECK_LIST_REGEX,
   replace: listReplace('check'),
   type: 'element',
 };
@@ -419,7 +431,7 @@ export const ORDERED_LIST: ElementTransformer = {
   export: (node, exportChildren) => {
     return $isListNode(node) ? listExport(node, exportChildren, 0) : null;
   },
-  regExp: /^(\s*)(\d{1,})\.\s/,
+  regExp: ORDERED_LIST_REGEX,
   replace: listReplace('number'),
   type: 'element',
 };
@@ -519,3 +531,55 @@ export const LINK: TextMatchTransformer = {
   trigger: ')',
   type: 'text-match',
 };
+
+export function normalizeMarkdown(input: string): string {
+  const lines = input.split('\n');
+  let inCodeBlock = false;
+  const sanitizedLines: string[] = [];
+
+  for (let i = 0; i < lines.length; i++) {
+    const line = lines[i];
+    const lastLine = sanitizedLines[sanitizedLines.length - 1];
+
+    // Code blocks of ```single line``` don't toggle the inCodeBlock flag
+    if (CODE_SINGLE_LINE_REGEX.test(line)) {
+      sanitizedLines.push(line);
+      continue;
+    }
+
+    // Detect the start or end of a code block
+    if (CODE_START_REGEX.test(line) || CODE_END_REGEX.test(line)) {
+      inCodeBlock = !inCodeBlock;
+      sanitizedLines.push(line);
+      continue;
+    }
+
+    // If we are inside a code block, keep the line unchanged
+    if (inCodeBlock) {
+      sanitizedLines.push(line);
+      continue;
+    }
+
+    // In markdown the concept of "empty paragraphs" does not exist.
+    // Blocks must be separated by an empty line. Non-empty adjacent lines must be merged.
+    if (
+      line === '' ||
+      lastLine === '' ||
+      !lastLine ||
+      HEADING_REGEX.test(lastLine) ||
+      HEADING_REGEX.test(line) ||
+      QUOTE_REGEX.test(line) ||
+      ORDERED_LIST_REGEX.test(line) ||
+      UNORDERED_LIST_REGEX.test(line) ||
+      CHECK_LIST_REGEX.test(line) ||
+      TABLE_ROW_REG_EXP.test(line) ||
+      TABLE_ROW_DIVIDER_REG_EXP.test(line)
+    ) {
+      sanitizedLines.push(line);
+    } else {
+      sanitizedLines[sanitizedLines.length - 1] = lastLine + line;
+    }
+  }
+
+  return sanitizedLines.join('\n');
+}
@@ -22,7 +22,10 @@ import {
   Transformer,
   TRANSFORMERS,
 } from '../..';
-import {MultilineElementTransformer} from '../../MarkdownTransformers';
+import {
+  MultilineElementTransformer,
+  normalizeMarkdown,
+} from '../../MarkdownTransformers';
 
 // Matches html within a mdx file
 const MDX_HTML_TRANSFORMER: MultilineElementTransformer = {
@@ -92,19 +95,36 @@ describe('Markdown', () => {
       html: '<h6><span style="white-space: pre-wrap;">Hello world</span></h6>',
       md: '###### Hello world',
     },
+    {
+      // Multiline paragraphs: https://spec.commonmark.org/dingus/?text=Hello%0Aworld%0A!
+      html: '<p><span style="white-space: pre-wrap;">Helloworld!</span></p>',
+      md: ['Hello', 'world', '!'].join('\n'),
+      skipExport: true,
+    },
     {
       // Multiline paragraphs
-      html: '<p><span style="white-space: pre-wrap;">Hello</span><br><span style="white-space: pre-wrap;">world</span><br><span style="white-space: pre-wrap;">!</span></p>',
+      // TO-DO: It would be nice to support also hard line breaks (<br>) as \ or double spaces
+      // See https://spec.commonmark.org/0.31.2/#hard-line-breaks.
+      // Example: '<p><span style="white-space: pre-wrap;">Hello\\\nworld\\\n!</span></p>',
+      html: '<p><span style="white-space: pre-wrap;">Hello<br>world<br>!</span></p>',
       md: ['Hello', 'world', '!'].join('\n'),
+      skipImport: true,
     },
     {
       html: '<blockquote><span style="white-space: pre-wrap;">Hello</span><br><span style="white-space: pre-wrap;">world!</span></blockquote>',
       md: '> Hello\n> world!',
     },
+    // TO-DO: <br> should be preserved
+    // {
+    //   html: '<ul><li value="1"><span style="white-space: pre-wrap;">Hello</span></li><li value="2"><span style="white-space: pre-wrap;">world<br>!<br>!</span></li></ul>',
+    //   md: '- Hello\n- world<br>!<br>!',
+    //   skipImport: true,
+    // },
     {
-      // Multiline list items
-      html: '<ul><li value="1"><span style="white-space: pre-wrap;">Hello</span></li><li value="2"><span style="white-space: pre-wrap;">world</span><br><span style="white-space: pre-wrap;">!</span><br><span style="white-space: pre-wrap;">!</span></li></ul>',
+      // Multiline list items: https://spec.commonmark.org/dingus/?text=-%20Hello%0A-%20world%0A!%0A!
+      html: '<ul><li value="1"><span style="white-space: pre-wrap;">Hello</span></li><li value="2"><span style="white-space: pre-wrap;">world!!</span></li></ul>',
       md: '- Hello\n- world\n!\n!',
+      skipExport: true,
     },
     {
       html: '<ul><li value="1"><span style="white-space: pre-wrap;">Hello</span></li><li value="2"><span style="white-space: pre-wrap;">world</span></li></ul>',
@@ -274,8 +294,8 @@ describe('Markdown', () => {
       skipExport: true,
     },
     {
-      // Import only: multiline quote will be prefixed with ">" on each line during export
-      html: '<blockquote><span style="white-space: pre-wrap;">Hello</span><br><span style="white-space: pre-wrap;">world</span><br><span style="white-space: pre-wrap;">!</span></blockquote>',
+      // https://spec.commonmark.org/dingus/?text=%3E%20Hello%0Aworld%0A!
+      html: '<blockquote><span style="white-space: pre-wrap;">Helloworld!</span></blockquote>',
       md: '> Hello\nworld\n!',
       skipExport: true,
     },
@@ -298,8 +318,9 @@ describe('Markdown', () => {
     },
     {
       customTransformers: [MDX_HTML_TRANSFORMER],
-      html: '<p><span style="white-space: pre-wrap;">Some HTML in mdx:</span></p><pre spellcheck="false" data-language="MyComponent"><span style="white-space: pre-wrap;">From HTML: Line 1\nSome Text</span></pre>',
+      html: '<p><span style="white-space: pre-wrap;">Some HTML in mdx:</span></p><pre spellcheck="false" data-language="MyComponent"><span style="white-space: pre-wrap;">From HTML: Line 1Some Text</span></pre>',
       md: 'Some HTML in mdx:\n\n<MyComponent>Line 1\nSome Text</MyComponent>',
+      skipExport: true,
     },
   ];
 
@@ -407,3 +428,78 @@ describe('Markdown', () => {
     });
   }
 });
+
+describe('normalizeMarkdown', () => {
+  it('should combine lines separated by a single \n unless they are in a codeblock', () => {
+    const markdown = `
+A1
+A2
+
+A3
+
+\`\`\`md
+B1
+B2
+
+B3
+\`\`\`
+
+C1
+C2
+
+C3
+
+\`\`\`js
+D1
+D2
+
+D3
+\`\`\`
+
+\`\`\`single line code\`\`\`
+
+E1
+E2
+
+E3
+`;
+    expect(normalizeMarkdown(markdown)).toBe(`
+A1A2
+
+A3
+
+\`\`\`md
+B1
+B2
+
+B3
+\`\`\`
+
+C1C2
+
+C3
+
+\`\`\`js
+D1
+D2
+
+D3
+\`\`\`
+
+\`\`\`single line code\`\`\`
+
+E1E2
+
+E3
+`);
+  });
+
+  it('tables', () => {
+    const markdown = `
+| a | b |
+| --- | --- |
+| c | d |
+`;
+    expect(normalizeMarkdown(markdown)).toBe(markdown);
+  });
+});
@@ -31,6 +31,7 @@ import {
   ITALIC_STAR,
   ITALIC_UNDERSCORE,
   LINK,
+  normalizeMarkdown,
   ORDERED_LIST,
   QUOTE,
   STRIKETHROUGH,
@@ -82,11 +83,12 @@ function $convertFromMarkdownString(
   node?: ElementNode,
   shouldPreserveNewLines = false,
 ): void {
+  const sanitizedMarkdown = normalizeMarkdown(markdown);
   const importMarkdown = createMarkdownImport(
     transformers,
     shouldPreserveNewLines,
   );
-  return importMarkdown(markdown, node);
+  return importMarkdown(sanitizedMarkdown, node);
 }
 
 /**

@@ -1310,7 +1310,6 @@ const IMPORTED_MARKDOWN_HTML = html`
       bold italic strikethrough
     </strong>
     <span data-lexical-text="true">text,</span>
-    <br />
     <strong
       class="PlaygroundEditorTheme__textBold PlaygroundEditorTheme__textItalic PlaygroundEditorTheme__textStrikethrough"
       data-lexical-text="true">
@@ -1408,9 +1407,7 @@ const IMPORTED_MARKDOWN_HTML = html`
     dir="ltr">
     <span data-lexical-text="true">Blockquotes text goes here</span>
     <br />
-    <span data-lexical-text="true">And second</span>
-    <br />
-    <span data-lexical-text="true">line after</span>
+    <span data-lexical-text="true">And secondline after</span>
   </blockquote>
   <blockquote
     class="PlaygroundEditorTheme__quote PlaygroundEditorTheme__ltr"
@@ -1488,9 +1485,9 @@ const IMPORTED_MARKDOWN_HTML = html`
           class="PlaygroundEditorTheme__listItem PlaygroundEditorTheme__ltr"
           dir="ltr"
           value="1">
-          <span data-lexical-text="true">And can be nested</span>
-          <br />
-          <span data-lexical-text="true">and multiline as well</span>
+          <span data-lexical-text="true">
+            And can be nested and multiline as well
+          </span>
         </li>
       </ol>
     </li>