Skip to content
This repository has been archived by the owner on Sep 15, 2024. It is now read-only.

Commit

Permalink
Add markdown parser and plain text renderer
Browse files Browse the repository at this point in the history
This commit introduces markdown parsing functionality with a method to parse Markdown into a hierarchical structure, preserving the original structure of documents. Also added is a PlainTextRenderer class, which converts Markdown into plain text. Rewards include tests to ensure correct functionality.
  • Loading branch information
mysticfall committed Jan 28, 2024
1 parent 9bdedd0 commit 810656e
Show file tree
Hide file tree
Showing 4 changed files with 382 additions and 1 deletion.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ programming paradigms, it's purely experimental at this point and not suitable f

| Statements | Branches | Functions | Lines |
| --------------------------- | ----------------------- | ------------------------- | ----------------- |
| ![Statements](https://img.shields.io/badge/statements-97.58%25-brightgreen.svg?style=flat) | ![Branches](https://img.shields.io/badge/branches-97.52%25-brightgreen.svg?style=flat) | ![Functions](https://img.shields.io/badge/functions-87.32%25-yellow.svg?style=flat) | ![Lines](https://img.shields.io/badge/lines-97.58%25-brightgreen.svg?style=flat) |
| ![Statements](https://img.shields.io/badge/statements-97.59%25-brightgreen.svg?style=flat) | ![Branches](https://img.shields.io/badge/branches-97.52%25-brightgreen.svg?style=flat) | ![Functions](https://img.shields.io/badge/functions-87.32%25-yellow.svg?style=flat) | ![Lines](https://img.shields.io/badge/lines-97.59%25-brightgreen.svg?style=flat) |

## Motivation

Expand Down
1 change: 1 addition & 0 deletions src/common/index.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
export * from "./error"
export * from "./id"
export * from "./markdown"
export * from "./optics"
export * from "./range"
export * from "./string"
181 changes: 181 additions & 0 deletions src/common/markdown.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
/**
* Definitions of common types related to Markdown text processing.
* @module
*/
import {pipe} from "fp-ts/lib/function.js"
import * as O from "fp-ts/lib/Option.js"
import {none, Option} from "fp-ts/lib/Option.js"
import * as A from "fp-ts/lib/ReadonlyArray.js"
import * as RNEA from "fp-ts/lib/ReadonlyNonEmptyArray.js"
import * as ST from "fp-ts/lib/string.js"
import {decode} from "html-entities"
import {marked, MarkedOptions, Renderer, Token} from "marked"

export type MarkdownText = {
readonly title: Option<string>
readonly contents: ReadonlyArray<Token>
readonly children: ReadonlyArray<MarkdownText>
}

export function parseMarkdown(text: string): MarkdownText {

const tokens = marked.lexer(text)

type ParseData = {
readonly children: ReadonlyArray<MarkdownText>,
readonly contents: ReadonlyArray<Token>,
readonly remaining: ReadonlyArray<Token>
}

const collect = (
remaining: ReadonlyArray<Token> = tokens,
children: ReadonlyArray<MarkdownText> = A.empty,
contents: ReadonlyArray<Token> = A.empty,
depth: number = 0
): ParseData => pipe(
RNEA.fromReadonlyArray(remaining),
O.map(RNEA.unprepend),
O.map(([head, tail]) => {
if (head.type == "heading") {
if (head.depth > depth) {
const result = collect(tail, A.empty, A.empty, depth + 1)

const child = {
title: O.of(head.text),
children: result.children,
contents: result.contents
}

return collect(result.remaining, pipe(children, A.append(child)), contents, depth)
} else {
return {
remaining: remaining,
children: children,
contents: contents
}
}
}

return collect(tail, children, pipe(contents, A.append(head)), depth)
}),
O.getOrElse<ParseData>(() => ({
remaining: A.empty,
children: children,
contents: contents
}))
)

const {children, contents} = collect()

return {
title: none,
children: children,
contents: contents
}
}

/* eslint-disable @typescript-eslint/no-unused-vars */
/**
* Class representing a plain-text renderer.
* @implements {Renderer}
*/
export class PlainTextRenderer implements Renderer {

readonly options: MarkedOptions

constructor(options?: MarkedOptions) {
this.options = options || {}
}

code(code: string, _infostring: string | undefined, _escaped: boolean): string {
return code
}

blockquote(quote: string): string {
return quote
}

html(html: string, _block?: boolean | undefined): string {
return html
}

heading(text: string, level: number, _raw: string): string {
return pipe(
RNEA.range(1, level),
A.map(() => "="),
A.append(" "),
A.append(decode(text)),
A.append("\n\n")
).join("")
}

hr(): string {
return "---"
}

list(body: string, _ordered: boolean, _start: number | ""): string {
return pipe(
body.split("*"),
A.map(i => i.trim()),
A.filter(i => i.length > 0),
A.map(ST.trim),
A.map(i => [" *", i].join(" "))
).join("\n")
}

listitem(text: string, _task: boolean, _checked: boolean): string {
return "* " + decode(text)
}

checkbox(checked: boolean): string {
return checked ? "[x]" : "[ ]"
}

paragraph(text: string): string {
return decode(text).replace(/\n/g, " ") + "\n\n"
}

table(_header: string, _body: string): string {
return ""
}

tablerow(_content: string): string {
return ""
}

tablecell(_content: string, _flags: { header: boolean; align: "center" | "left" | "right" | null }): string {
return ""
}

strong(text: string): string {
return decode(text)
}

em(text: string): string {
return decode(text)
}

codespan(text: string): string {
return decode(text)
}

br(): string {
return "\n"
}

del(_text: string): string {
return decode("")
}

link(_href: string, _title: string | null | undefined, text: string): string {
return decode(text)
}

image(_href: string, _title: string | null, text: string): string {
return decode(text)
}

text(text: string): string {
return decode(text)
}
}
199 changes: 199 additions & 0 deletions test/common/markdown.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
import * as O from "fp-ts/Option"
import {Tokens} from "marked";
import {describe, expect, it, test} from "vitest"
import {parseMarkdown, PlainTextRenderer} from "../../src"

describe("parseMarkdown", () => {

it("should parse the given markdown text while preserving its hierarchical structure", () => {

const text = `
# Cat
Cats are invasive alien species disguising as domestic pets.
## Appearance
### General features
* Large eyes.
* Furry.
* Pointy ears.
### Coat patterns
* Mackerel
* Classic
* Spotted
## Goals
Cats aim to dominate the world.
`

const document = parseMarkdown(text)

expect(document.title).satisfy(O.isNone)
expect(document.children).length(1)
expect(document.contents).length(1)
expect(document.contents[0]).toHaveProperty("type", "space")

const root = document.children[0]

expect(root.title).toEqual(O.of("Cat"))
expect(root.children).length(2)
expect(root.contents).length(2)

expect(root.contents[0]).toHaveProperty("type", "paragraph")
expect(root.contents[0]).toHaveProperty("text", "Cats are invasive alien species disguising as domestic pets.")

const appearance = root.children[0]

expect(appearance.title).toEqual(O.of("Appearance"))
expect(appearance.children).length(2)
expect(appearance.contents).toEqual([])

const generalFeatures = appearance.children[0]

expect(generalFeatures.title).toEqual(O.of("General features"))
expect(generalFeatures.children).length(0)
expect(generalFeatures.contents).length(2)

const generalFeaturesList = generalFeatures.contents[0] as Tokens.List

expect(generalFeaturesList.items).length(3)
expect(generalFeaturesList.items[0].text).toBe("Large eyes.")
expect(generalFeaturesList.items[1].text).toBe("Furry.")
expect(generalFeaturesList.items[2].text).toBe("Pointy ears.")

expect(generalFeatures.contents[1]).toHaveProperty("type", "space")

const coatPatterns = appearance.children[1]

expect(coatPatterns.title).toEqual(O.of("Coat patterns"))
expect(coatPatterns.children).length(0)
expect(coatPatterns.contents).length(2)

const coatPatternsList = coatPatterns.contents[0] as Tokens.List

expect(coatPatternsList.items).length(3)
expect(coatPatternsList.items[0].text).toBe("Mackerel")
expect(coatPatternsList.items[1].text).toBe("Classic")
expect(coatPatternsList.items[2].text).toBe("Spotted")

expect(coatPatterns.contents[1]).toHaveProperty("type", "space")

const goals = root.children[1]

expect(goals.title).toEqual(O.of("Goals"))
expect(goals.children).length(0)
expect(goals.contents).length(1)
expect(goals.contents[0]).toHaveProperty("type", "paragraph")
expect(goals.contents[0]).toHaveProperty("text", "Cats aim to dominate the world.")
})
})

describe("PlainTextRenderer", () => {
const renderer = new PlainTextRenderer()

test("code", () => {
const code = "console.log('hello world')"
const result = renderer.code(code, undefined, false)
expect(result).toBe(code)
})

test("blockquote", () => {
const quote = "This is a quote"
const result = renderer.blockquote(quote)
expect(result).toBe(quote)
})

test("html", () => {
const html = "<h1>Hello World</h1>"
const result = renderer.html(html)
expect(result).toBe(html)
})

test("heading", () => {
const text = "Heading"
const level = 2
const result = renderer.heading(text, level, "=== Heading")
expect(result).toBe(`== ${text}\n\n`)
})

test("hr", () => {
const result = renderer.hr()
expect(result).toBe("---")
})

test("list", () => {
const list = "* item1* item2"
const result = renderer.list(list, false, "")
expect(result).toBe(" * item1\n * item2")
})

test("listitem", () => {
const text = "Item"
const result = renderer.listitem(text, false, false)
expect(result).toBe(`* ${text}`)
})

test("checkbox", () => {
const checked = true
const result = renderer.checkbox(checked)
expect(result).toBe("[x]")
})

test("paragraph", () => {
const text = "Hello\nWorld"
const result = renderer.paragraph(text)
expect(result).toBe("Hello World\n\n")
})

test("strong", () => {
const text = "Bold"
const result = renderer.strong(text)
expect(result).toBe(text)
})

test("em", () => {
const text = "Italic"
const result = renderer.em(text)
expect(result).toBe(text)
})

test("codespan", () => {
const text = "console.log('hello world')"
const result = renderer.codespan(text)
expect(result).toBe(text)
})

test("br", () => {
const result = renderer.br()
expect(result).toBe("\n")
})

test("del", () => {
const text = "Deleted text"
const result = renderer.del(text)
expect(result).toBe("")
})

test("link", () => {
const text = "Link Text"
const result = renderer.link("", null, text)
expect(result).toBe(text)
})

test("image", () => {
const text = "Image Text"
const result = renderer.image("", null, text)
expect(result).toBe(text)
})

test("text", () => {
const text = "Plain Text"
const result = renderer.text(text)
expect(result).toBe(text)
})
})

0 comments on commit 810656e

Please sign in to comment.