This repository has been archived by the owner on Sep 15, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add markdown parser and plain text renderer
This commit introduces markdown parsing functionality with a method to parse Markdown into a hierarchical structure, preserving the original structure of documents. Also added is a PlainTextRenderer class, which converts Markdown into plain text. Rewards include tests to ensure correct functionality.
- Loading branch information
1 parent
9bdedd0
commit 810656e
Showing
4 changed files
with
382 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
export * from "./error" | ||
export * from "./id" | ||
export * from "./markdown" | ||
export * from "./optics" | ||
export * from "./range" | ||
export * from "./string" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,181 @@ | ||
/** | ||
* Definitions of common types related to Markdown text processing. | ||
* @module | ||
*/ | ||
import {pipe} from "fp-ts/lib/function.js" | ||
import * as O from "fp-ts/lib/Option.js" | ||
import {none, Option} from "fp-ts/lib/Option.js" | ||
import * as A from "fp-ts/lib/ReadonlyArray.js" | ||
import * as RNEA from "fp-ts/lib/ReadonlyNonEmptyArray.js" | ||
import * as ST from "fp-ts/lib/string.js" | ||
import {decode} from "html-entities" | ||
import {marked, MarkedOptions, Renderer, Token} from "marked" | ||
|
||
export type MarkdownText = { | ||
readonly title: Option<string> | ||
readonly contents: ReadonlyArray<Token> | ||
readonly children: ReadonlyArray<MarkdownText> | ||
} | ||
|
||
export function parseMarkdown(text: string): MarkdownText { | ||
|
||
const tokens = marked.lexer(text) | ||
|
||
type ParseData = { | ||
readonly children: ReadonlyArray<MarkdownText>, | ||
readonly contents: ReadonlyArray<Token>, | ||
readonly remaining: ReadonlyArray<Token> | ||
} | ||
|
||
const collect = ( | ||
remaining: ReadonlyArray<Token> = tokens, | ||
children: ReadonlyArray<MarkdownText> = A.empty, | ||
contents: ReadonlyArray<Token> = A.empty, | ||
depth: number = 0 | ||
): ParseData => pipe( | ||
RNEA.fromReadonlyArray(remaining), | ||
O.map(RNEA.unprepend), | ||
O.map(([head, tail]) => { | ||
if (head.type == "heading") { | ||
if (head.depth > depth) { | ||
const result = collect(tail, A.empty, A.empty, depth + 1) | ||
|
||
const child = { | ||
title: O.of(head.text), | ||
children: result.children, | ||
contents: result.contents | ||
} | ||
|
||
return collect(result.remaining, pipe(children, A.append(child)), contents, depth) | ||
} else { | ||
return { | ||
remaining: remaining, | ||
children: children, | ||
contents: contents | ||
} | ||
} | ||
} | ||
|
||
return collect(tail, children, pipe(contents, A.append(head)), depth) | ||
}), | ||
O.getOrElse<ParseData>(() => ({ | ||
remaining: A.empty, | ||
children: children, | ||
contents: contents | ||
})) | ||
) | ||
|
||
const {children, contents} = collect() | ||
|
||
return { | ||
title: none, | ||
children: children, | ||
contents: contents | ||
} | ||
} | ||
|
||
/* eslint-disable @typescript-eslint/no-unused-vars */ | ||
/** | ||
* Class representing a plain-text renderer. | ||
* @implements {Renderer} | ||
*/ | ||
export class PlainTextRenderer implements Renderer { | ||
|
||
readonly options: MarkedOptions | ||
|
||
constructor(options?: MarkedOptions) { | ||
this.options = options || {} | ||
} | ||
|
||
code(code: string, _infostring: string | undefined, _escaped: boolean): string { | ||
return code | ||
} | ||
|
||
blockquote(quote: string): string { | ||
return quote | ||
} | ||
|
||
html(html: string, _block?: boolean | undefined): string { | ||
return html | ||
} | ||
|
||
heading(text: string, level: number, _raw: string): string { | ||
return pipe( | ||
RNEA.range(1, level), | ||
A.map(() => "="), | ||
A.append(" "), | ||
A.append(decode(text)), | ||
A.append("\n\n") | ||
).join("") | ||
} | ||
|
||
hr(): string { | ||
return "---" | ||
} | ||
|
||
list(body: string, _ordered: boolean, _start: number | ""): string { | ||
return pipe( | ||
body.split("*"), | ||
A.map(i => i.trim()), | ||
A.filter(i => i.length > 0), | ||
A.map(ST.trim), | ||
A.map(i => [" *", i].join(" ")) | ||
).join("\n") | ||
} | ||
|
||
listitem(text: string, _task: boolean, _checked: boolean): string { | ||
return "* " + decode(text) | ||
} | ||
|
||
checkbox(checked: boolean): string { | ||
return checked ? "[x]" : "[ ]" | ||
} | ||
|
||
paragraph(text: string): string { | ||
return decode(text).replace(/\n/g, " ") + "\n\n" | ||
} | ||
|
||
table(_header: string, _body: string): string { | ||
return "" | ||
} | ||
|
||
tablerow(_content: string): string { | ||
return "" | ||
} | ||
|
||
tablecell(_content: string, _flags: { header: boolean; align: "center" | "left" | "right" | null }): string { | ||
return "" | ||
} | ||
|
||
strong(text: string): string { | ||
return decode(text) | ||
} | ||
|
||
em(text: string): string { | ||
return decode(text) | ||
} | ||
|
||
codespan(text: string): string { | ||
return decode(text) | ||
} | ||
|
||
br(): string { | ||
return "\n" | ||
} | ||
|
||
del(_text: string): string { | ||
return decode("") | ||
} | ||
|
||
link(_href: string, _title: string | null | undefined, text: string): string { | ||
return decode(text) | ||
} | ||
|
||
image(_href: string, _title: string | null, text: string): string { | ||
return decode(text) | ||
} | ||
|
||
text(text: string): string { | ||
return decode(text) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,199 @@ | ||
import * as O from "fp-ts/Option" | ||
import {Tokens} from "marked"; | ||
import {describe, expect, it, test} from "vitest" | ||
import {parseMarkdown, PlainTextRenderer} from "../../src" | ||
|
||
describe("parseMarkdown", () => { | ||
|
||
it("should parse the given markdown text while preserving its hierarchical structure", () => { | ||
|
||
const text = ` | ||
# Cat | ||
Cats are invasive alien species disguising as domestic pets. | ||
## Appearance | ||
### General features | ||
* Large eyes. | ||
* Furry. | ||
* Pointy ears. | ||
### Coat patterns | ||
* Mackerel | ||
* Classic | ||
* Spotted | ||
## Goals | ||
Cats aim to dominate the world. | ||
` | ||
|
||
const document = parseMarkdown(text) | ||
|
||
expect(document.title).satisfy(O.isNone) | ||
expect(document.children).length(1) | ||
expect(document.contents).length(1) | ||
expect(document.contents[0]).toHaveProperty("type", "space") | ||
|
||
const root = document.children[0] | ||
|
||
expect(root.title).toEqual(O.of("Cat")) | ||
expect(root.children).length(2) | ||
expect(root.contents).length(2) | ||
|
||
expect(root.contents[0]).toHaveProperty("type", "paragraph") | ||
expect(root.contents[0]).toHaveProperty("text", "Cats are invasive alien species disguising as domestic pets.") | ||
|
||
const appearance = root.children[0] | ||
|
||
expect(appearance.title).toEqual(O.of("Appearance")) | ||
expect(appearance.children).length(2) | ||
expect(appearance.contents).toEqual([]) | ||
|
||
const generalFeatures = appearance.children[0] | ||
|
||
expect(generalFeatures.title).toEqual(O.of("General features")) | ||
expect(generalFeatures.children).length(0) | ||
expect(generalFeatures.contents).length(2) | ||
|
||
const generalFeaturesList = generalFeatures.contents[0] as Tokens.List | ||
|
||
expect(generalFeaturesList.items).length(3) | ||
expect(generalFeaturesList.items[0].text).toBe("Large eyes.") | ||
expect(generalFeaturesList.items[1].text).toBe("Furry.") | ||
expect(generalFeaturesList.items[2].text).toBe("Pointy ears.") | ||
|
||
expect(generalFeatures.contents[1]).toHaveProperty("type", "space") | ||
|
||
const coatPatterns = appearance.children[1] | ||
|
||
expect(coatPatterns.title).toEqual(O.of("Coat patterns")) | ||
expect(coatPatterns.children).length(0) | ||
expect(coatPatterns.contents).length(2) | ||
|
||
const coatPatternsList = coatPatterns.contents[0] as Tokens.List | ||
|
||
expect(coatPatternsList.items).length(3) | ||
expect(coatPatternsList.items[0].text).toBe("Mackerel") | ||
expect(coatPatternsList.items[1].text).toBe("Classic") | ||
expect(coatPatternsList.items[2].text).toBe("Spotted") | ||
|
||
expect(coatPatterns.contents[1]).toHaveProperty("type", "space") | ||
|
||
const goals = root.children[1] | ||
|
||
expect(goals.title).toEqual(O.of("Goals")) | ||
expect(goals.children).length(0) | ||
expect(goals.contents).length(1) | ||
expect(goals.contents[0]).toHaveProperty("type", "paragraph") | ||
expect(goals.contents[0]).toHaveProperty("text", "Cats aim to dominate the world.") | ||
}) | ||
}) | ||
|
||
describe("PlainTextRenderer", () => { | ||
const renderer = new PlainTextRenderer() | ||
|
||
test("code", () => { | ||
const code = "console.log('hello world')" | ||
const result = renderer.code(code, undefined, false) | ||
expect(result).toBe(code) | ||
}) | ||
|
||
test("blockquote", () => { | ||
const quote = "This is a quote" | ||
const result = renderer.blockquote(quote) | ||
expect(result).toBe(quote) | ||
}) | ||
|
||
test("html", () => { | ||
const html = "<h1>Hello World</h1>" | ||
const result = renderer.html(html) | ||
expect(result).toBe(html) | ||
}) | ||
|
||
test("heading", () => { | ||
const text = "Heading" | ||
const level = 2 | ||
const result = renderer.heading(text, level, "=== Heading") | ||
expect(result).toBe(`== ${text}\n\n`) | ||
}) | ||
|
||
test("hr", () => { | ||
const result = renderer.hr() | ||
expect(result).toBe("---") | ||
}) | ||
|
||
test("list", () => { | ||
const list = "* item1* item2" | ||
const result = renderer.list(list, false, "") | ||
expect(result).toBe(" * item1\n * item2") | ||
}) | ||
|
||
test("listitem", () => { | ||
const text = "Item" | ||
const result = renderer.listitem(text, false, false) | ||
expect(result).toBe(`* ${text}`) | ||
}) | ||
|
||
test("checkbox", () => { | ||
const checked = true | ||
const result = renderer.checkbox(checked) | ||
expect(result).toBe("[x]") | ||
}) | ||
|
||
test("paragraph", () => { | ||
const text = "Hello\nWorld" | ||
const result = renderer.paragraph(text) | ||
expect(result).toBe("Hello World\n\n") | ||
}) | ||
|
||
test("strong", () => { | ||
const text = "Bold" | ||
const result = renderer.strong(text) | ||
expect(result).toBe(text) | ||
}) | ||
|
||
test("em", () => { | ||
const text = "Italic" | ||
const result = renderer.em(text) | ||
expect(result).toBe(text) | ||
}) | ||
|
||
test("codespan", () => { | ||
const text = "console.log('hello world')" | ||
const result = renderer.codespan(text) | ||
expect(result).toBe(text) | ||
}) | ||
|
||
test("br", () => { | ||
const result = renderer.br() | ||
expect(result).toBe("\n") | ||
}) | ||
|
||
test("del", () => { | ||
const text = "Deleted text" | ||
const result = renderer.del(text) | ||
expect(result).toBe("") | ||
}) | ||
|
||
test("link", () => { | ||
const text = "Link Text" | ||
const result = renderer.link("", null, text) | ||
expect(result).toBe(text) | ||
}) | ||
|
||
test("image", () => { | ||
const text = "Image Text" | ||
const result = renderer.image("", null, text) | ||
expect(result).toBe(text) | ||
}) | ||
|
||
test("text", () => { | ||
const text = "Plain Text" | ||
const result = renderer.text(text) | ||
expect(result).toBe(text) | ||
}) | ||
}) |