From 810656e90f06cc595e9e964c9c29640f526c432d Mon Sep 17 00:00:00 2001 From: Xavier Cho Date: Sun, 28 Jan 2024 15:08:42 +0900 Subject: [PATCH] Add markdown parser and plain text renderer This commit introduces markdown parsing functionality with a method to parse Markdown into a hierarchical structure, preserving the original structure of documents. Also added is a PlainTextRenderer class, which converts Markdown into plain text. Rewards include tests to ensure correct functionality. --- README.md | 2 +- src/common/index.ts | 1 + src/common/markdown.ts | 181 +++++++++++++++++++++++++++++++ test/common/markdown.test.ts | 199 +++++++++++++++++++++++++++++++++++ 4 files changed, 382 insertions(+), 1 deletion(-) create mode 100644 src/common/markdown.ts create mode 100644 test/common/markdown.test.ts diff --git a/README.md b/README.md index 08fcb28..df1ecaf 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ programming paradigms, it's purely experimental at this point and not suitable f | Statements | Branches | Functions | Lines | | --------------------------- | ----------------------- | ------------------------- | ----------------- | -| ![Statements](https://img.shields.io/badge/statements-97.58%25-brightgreen.svg?style=flat) | ![Branches](https://img.shields.io/badge/branches-97.52%25-brightgreen.svg?style=flat) | ![Functions](https://img.shields.io/badge/functions-87.32%25-yellow.svg?style=flat) | ![Lines](https://img.shields.io/badge/lines-97.58%25-brightgreen.svg?style=flat) | +| ![Statements](https://img.shields.io/badge/statements-97.59%25-brightgreen.svg?style=flat) | ![Branches](https://img.shields.io/badge/branches-97.52%25-brightgreen.svg?style=flat) | ![Functions](https://img.shields.io/badge/functions-87.32%25-yellow.svg?style=flat) | ![Lines](https://img.shields.io/badge/lines-97.59%25-brightgreen.svg?style=flat) | ## Motivation diff --git a/src/common/index.ts b/src/common/index.ts index 8053be7..03082f8 100644 --- a/src/common/index.ts +++ b/src/common/index.ts @@ -1,5 +1,6 @@ export * from "./error" export * from "./id" +export * from "./markdown" export * from "./optics" export * from "./range" export * from "./string" diff --git a/src/common/markdown.ts b/src/common/markdown.ts new file mode 100644 index 0000000..383353e --- /dev/null +++ b/src/common/markdown.ts @@ -0,0 +1,181 @@ +/** + * Definitions of common types related to Markdown text processing. + * @module + */ +import {pipe} from "fp-ts/lib/function.js" +import * as O from "fp-ts/lib/Option.js" +import {none, Option} from "fp-ts/lib/Option.js" +import * as A from "fp-ts/lib/ReadonlyArray.js" +import * as RNEA from "fp-ts/lib/ReadonlyNonEmptyArray.js" +import * as ST from "fp-ts/lib/string.js" +import {decode} from "html-entities" +import {marked, MarkedOptions, Renderer, Token} from "marked" + +export type MarkdownText = { + readonly title: Option + readonly contents: ReadonlyArray + readonly children: ReadonlyArray +} + +export function parseMarkdown(text: string): MarkdownText { + + const tokens = marked.lexer(text) + + type ParseData = { + readonly children: ReadonlyArray, + readonly contents: ReadonlyArray, + readonly remaining: ReadonlyArray + } + + const collect = ( + remaining: ReadonlyArray = tokens, + children: ReadonlyArray = A.empty, + contents: ReadonlyArray = A.empty, + depth: number = 0 + ): ParseData => pipe( + RNEA.fromReadonlyArray(remaining), + O.map(RNEA.unprepend), + O.map(([head, tail]) => { + if (head.type == "heading") { + if (head.depth > depth) { + const result = collect(tail, A.empty, A.empty, depth + 1) + + const child = { + title: O.of(head.text), + children: result.children, + contents: result.contents + } + + return collect(result.remaining, pipe(children, A.append(child)), contents, depth) + } else { + return { + remaining: remaining, + children: children, + contents: contents + } + } + } + + return collect(tail, children, pipe(contents, A.append(head)), depth) + }), + O.getOrElse(() => ({ + remaining: A.empty, + children: children, + contents: contents + })) + ) + + const {children, contents} = collect() + + return { + title: none, + children: children, + contents: contents + } +} + +/* eslint-disable @typescript-eslint/no-unused-vars */ +/** + * Class representing a plain-text renderer. + * @implements {Renderer} + */ +export class PlainTextRenderer implements Renderer { + + readonly options: MarkedOptions + + constructor(options?: MarkedOptions) { + this.options = options || {} + } + + code(code: string, _infostring: string | undefined, _escaped: boolean): string { + return code + } + + blockquote(quote: string): string { + return quote + } + + html(html: string, _block?: boolean | undefined): string { + return html + } + + heading(text: string, level: number, _raw: string): string { + return pipe( + RNEA.range(1, level), + A.map(() => "="), + A.append(" "), + A.append(decode(text)), + A.append("\n\n") + ).join("") + } + + hr(): string { + return "---" + } + + list(body: string, _ordered: boolean, _start: number | ""): string { + return pipe( + body.split("*"), + A.map(i => i.trim()), + A.filter(i => i.length > 0), + A.map(ST.trim), + A.map(i => [" *", i].join(" ")) + ).join("\n") + } + + listitem(text: string, _task: boolean, _checked: boolean): string { + return "* " + decode(text) + } + + checkbox(checked: boolean): string { + return checked ? "[x]" : "[ ]" + } + + paragraph(text: string): string { + return decode(text).replace(/\n/g, " ") + "\n\n" + } + + table(_header: string, _body: string): string { + return "" + } + + tablerow(_content: string): string { + return "" + } + + tablecell(_content: string, _flags: { header: boolean; align: "center" | "left" | "right" | null }): string { + return "" + } + + strong(text: string): string { + return decode(text) + } + + em(text: string): string { + return decode(text) + } + + codespan(text: string): string { + return decode(text) + } + + br(): string { + return "\n" + } + + del(_text: string): string { + return decode("") + } + + link(_href: string, _title: string | null | undefined, text: string): string { + return decode(text) + } + + image(_href: string, _title: string | null, text: string): string { + return decode(text) + } + + text(text: string): string { + return decode(text) + } +} diff --git a/test/common/markdown.test.ts b/test/common/markdown.test.ts new file mode 100644 index 0000000..bb62a8d --- /dev/null +++ b/test/common/markdown.test.ts @@ -0,0 +1,199 @@ +import * as O from "fp-ts/Option" +import {Tokens} from "marked"; +import {describe, expect, it, test} from "vitest" +import {parseMarkdown, PlainTextRenderer} from "../../src" + +describe("parseMarkdown", () => { + + it("should parse the given markdown text while preserving its hierarchical structure", () => { + + const text = ` +# Cat + +Cats are invasive alien species disguising as domestic pets. + +## Appearance + +### General features + + * Large eyes. + * Furry. + * Pointy ears. + +### Coat patterns + + * Mackerel + * Classic + * Spotted + +## Goals + +Cats aim to dominate the world. +` + + const document = parseMarkdown(text) + + expect(document.title).satisfy(O.isNone) + expect(document.children).length(1) + expect(document.contents).length(1) + expect(document.contents[0]).toHaveProperty("type", "space") + + const root = document.children[0] + + expect(root.title).toEqual(O.of("Cat")) + expect(root.children).length(2) + expect(root.contents).length(2) + + expect(root.contents[0]).toHaveProperty("type", "paragraph") + expect(root.contents[0]).toHaveProperty("text", "Cats are invasive alien species disguising as domestic pets.") + + const appearance = root.children[0] + + expect(appearance.title).toEqual(O.of("Appearance")) + expect(appearance.children).length(2) + expect(appearance.contents).toEqual([]) + + const generalFeatures = appearance.children[0] + + expect(generalFeatures.title).toEqual(O.of("General features")) + expect(generalFeatures.children).length(0) + expect(generalFeatures.contents).length(2) + + const generalFeaturesList = generalFeatures.contents[0] as Tokens.List + + expect(generalFeaturesList.items).length(3) + expect(generalFeaturesList.items[0].text).toBe("Large eyes.") + expect(generalFeaturesList.items[1].text).toBe("Furry.") + expect(generalFeaturesList.items[2].text).toBe("Pointy ears.") + + expect(generalFeatures.contents[1]).toHaveProperty("type", "space") + + const coatPatterns = appearance.children[1] + + expect(coatPatterns.title).toEqual(O.of("Coat patterns")) + expect(coatPatterns.children).length(0) + expect(coatPatterns.contents).length(2) + + const coatPatternsList = coatPatterns.contents[0] as Tokens.List + + expect(coatPatternsList.items).length(3) + expect(coatPatternsList.items[0].text).toBe("Mackerel") + expect(coatPatternsList.items[1].text).toBe("Classic") + expect(coatPatternsList.items[2].text).toBe("Spotted") + + expect(coatPatterns.contents[1]).toHaveProperty("type", "space") + + const goals = root.children[1] + + expect(goals.title).toEqual(O.of("Goals")) + expect(goals.children).length(0) + expect(goals.contents).length(1) + expect(goals.contents[0]).toHaveProperty("type", "paragraph") + expect(goals.contents[0]).toHaveProperty("text", "Cats aim to dominate the world.") + }) +}) + +describe("PlainTextRenderer", () => { + const renderer = new PlainTextRenderer() + + test("code", () => { + const code = "console.log('hello world')" + const result = renderer.code(code, undefined, false) + expect(result).toBe(code) + }) + + test("blockquote", () => { + const quote = "This is a quote" + const result = renderer.blockquote(quote) + expect(result).toBe(quote) + }) + + test("html", () => { + const html = "

Hello World

" + const result = renderer.html(html) + expect(result).toBe(html) + }) + + test("heading", () => { + const text = "Heading" + const level = 2 + const result = renderer.heading(text, level, "=== Heading") + expect(result).toBe(`== ${text}\n\n`) + }) + + test("hr", () => { + const result = renderer.hr() + expect(result).toBe("---") + }) + + test("list", () => { + const list = "* item1* item2" + const result = renderer.list(list, false, "") + expect(result).toBe(" * item1\n * item2") + }) + + test("listitem", () => { + const text = "Item" + const result = renderer.listitem(text, false, false) + expect(result).toBe(`* ${text}`) + }) + + test("checkbox", () => { + const checked = true + const result = renderer.checkbox(checked) + expect(result).toBe("[x]") + }) + + test("paragraph", () => { + const text = "Hello\nWorld" + const result = renderer.paragraph(text) + expect(result).toBe("Hello World\n\n") + }) + + test("strong", () => { + const text = "Bold" + const result = renderer.strong(text) + expect(result).toBe(text) + }) + + test("em", () => { + const text = "Italic" + const result = renderer.em(text) + expect(result).toBe(text) + }) + + test("codespan", () => { + const text = "console.log('hello world')" + const result = renderer.codespan(text) + expect(result).toBe(text) + }) + + test("br", () => { + const result = renderer.br() + expect(result).toBe("\n") + }) + + test("del", () => { + const text = "Deleted text" + const result = renderer.del(text) + expect(result).toBe("") + }) + + test("link", () => { + const text = "Link Text" + const result = renderer.link("", null, text) + expect(result).toBe(text) + }) + + test("image", () => { + const text = "Image Text" + const result = renderer.image("", null, text) + expect(result).toBe(text) + }) + + test("text", () => { + const text = "Plain Text" + const result = renderer.text(text) + expect(result).toBe(text) + }) +})