Skip to content
This repository has been archived by the owner on Sep 15, 2024. It is now read-only.

Commit

Permalink
Refine markdown parsing and add substitution feature
Browse files Browse the repository at this point in the history
Updated markdown parsing to include a space after header separators and ensure correct rendering of blockquotes and lists. Added a substitution feature to apply replacements in document content, enhancing the flexibility of text processing in Lore documents. The parsing now also respects second-level headings and adjusts the rendering of code blocks and horizontal rules.
  • Loading branch information
mysticfall committed Mar 20, 2024
1 parent f499f24 commit 03834ac
Show file tree
Hide file tree
Showing 8 changed files with 268 additions and 29 deletions.
31 changes: 23 additions & 8 deletions src/common/markdown.ts
Original file line number Diff line number Diff line change
Expand Up @@ -101,25 +101,30 @@ export function parseMarkdown(text: string): MarkdownText {
}
}

export interface PlainTextRendererOptions extends MarkedOptions {

concatenateList?: boolean
}

/* eslint-disable @typescript-eslint/no-unused-vars */
/**
* An implementation of {@link Renderer} which converts a Markdown input into plain text output.
* @implements {Renderer}
*/
export class PlainTextRenderer implements Renderer {

readonly options: MarkedOptions
readonly options: PlainTextRendererOptions

constructor(options?: MarkedOptions) {
constructor(options?: PlainTextRendererOptions) {
this.options = options || {}
}

code(code: string, _infostring: string | undefined, _escaped: boolean): string {
return code
return [code, "\n\n"].join("")
}

blockquote(quote: string): string {
return quote
return [">", decode(quote)].join(" ")
}

html(html: string, _block?: boolean | undefined): string {
Expand All @@ -137,10 +142,11 @@ export class PlainTextRenderer implements Renderer {
}

hr(): string {
return "---"
return "---\n\n"
}

list(body: string, _ordered: boolean, _start: number | ""): string {
list(body: string, ordered: boolean, _start: number | ""): string {

const items = pipe(
body.split("*"),
A.map(i => i.trim()),
Expand All @@ -150,10 +156,19 @@ export class PlainTextRenderer implements Renderer {

const removePeriod = (s: string) => s.endsWith(".") ? s.substring(0, s.length - 1) : s

const multiline = this.options.concatenateList !== true
const itemSeparator = multiline ? "\n" : "; "

return pipe(
items,
A.mapWithIndex((i, text) => i < items.length - 1 ? removePeriod(text) : text)
).join("; ").trim()
A.mapWithIndex((i, text) => {
if (multiline) {
return ordered ? [i + 1, ". ", text].join("") : ["*", text].join(" ")
}

return i < items.length - 1 ? removePeriod(text) : text
})
).join(itemSeparator).trim() + "\n\n"
}

listitem(text: string, _task: boolean, _checked: boolean): string {
Expand Down
56 changes: 49 additions & 7 deletions src/lore/document.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import {Document} from "@langchain/core/documents"
import * as E from "fp-ts/Either"
import {Either} from "fp-ts/Either"
import {flow, pipe} from "fp-ts/function"
import {Reader} from "fp-ts/Reader"
import * as A from "fp-ts/ReadonlyArray"
import * as TE from "fp-ts/TaskEither"
import {TaskEither} from "fp-ts/TaskEither"
Expand All @@ -21,33 +22,72 @@ import {LoreParser} from "./parser"
*/
export type LoreDocument = Document<LoreEntryMetadata>

/**
* Represents the options for the {@link LoreDocumentLoader}.
*/
export interface LoreDocumentLoaderOptions {

/**
* The parser with which to convert the source documents into lore entries.
* Defaults to an instance of {@link MarkdownLoreParser}.
*
* @readonly
*/
readonly parser?: LoreParser

/**
* The separator to use when joining an entry title with its content. Defaults to ": ".
*
* @readonly
*/
readonly titleSeparator?: string

/**
* An optional function that performs text substitutions. The function takes a string as input and returns the
* substituted string.
*
* @readonly
*/
readonly substitutions?: Reader<string, string>
}

/**
* An implementation of {@link DocumentLoader} that acts as a wrapper for a given {@link DocumentLoader} to
* convert its output into {@link LoreDocument}s.
*/

export class LoreDocumentLoader extends BaseDocumentLoader {

/**
* @class LoreParser
* @description A class that parses and extracts information from a given Lore file.
*/
readonly parser: LoreParser

readonly titleSeparator: string

readonly substitutions: Reader<string, string>

/**
* Constructs a new instance of {@link LoreDocumentLoader}.
*
* @param {DocumentLoader} source The loader from which to load source documents.
* @param {LoreParser} parser The parser with which to convert the source documents into lore entries.
* Defaults to an instance of {@link MarkdownLoreParser}.
* @param {string} [titleSeparator] The separator to use when joining an entry title with its content.
* Defaults to ": ".
* @param {LoreDocumentLoaderOptions} options Configuration options for the loader.
*/
constructor(
readonly source: DocumentLoader,
readonly parser: LoreParser = new MarkdownLoreParser(),
readonly titleSeparator: string = ": "
options: LoreDocumentLoaderOptions = {}
) {
super()

this.load = this.load.bind(this)
this.createTask = this.createTask.bind(this)
this.parse = this.parse.bind(this)
this.toDocument = this.toDocument.bind(this)

this.parser = options.parser ?? new MarkdownLoreParser()
this.titleSeparator = options.titleSeparator ?? ": "
this.substitutions = options.substitutions ?? (t => t)
}

/**
Expand Down Expand Up @@ -96,7 +136,9 @@ export class LoreDocumentLoader extends BaseDocumentLoader {
const {pageContent, metadata} = source

return pipe(
this.parser.parseText(pageContent),
pageContent,
this.substitutions,
this.parser.parseText,
E.map(A.map(this.toDocument)),
E.map(A.map(d => ({
pageContent: d.pageContent,
Expand Down
2 changes: 1 addition & 1 deletion src/lore/markdown.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ export class MarkdownLoreParser extends AbstractLoreParser {
constructor(options: MarkdownLoreParserOptions = {}) {
super()

this.headerSeparator = options?.headerSeparator ?? ">"
this.headerSeparator = options?.headerSeparator ?? "> "
this.renderer = options?.renderer ?? new PlainTextRenderer()
}

Expand Down
34 changes: 30 additions & 4 deletions src/prompt/markdown.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,31 @@ export class MarkdownMessageParser implements ChatMessageParser {
parse(text: string): Either<PromptParseError, ReadonlyArray<BaseMessage>> {
const root = parseMarkdown(text)

const createMessage = ({title, contents}: MarkdownText): Option<BaseMessage> => {
const renderChildren = (texts: ReadonlyArray<MarkdownText>, level = 2): string => {
return pipe(
texts,
A.flatMap(({title, contents, children}) => {
const tokens = A.toArray(contents)
const body = marked.parser(tokens, {renderer: this.renderer})

return pipe(
title,
O.map(t => pipe(
A.of("#".repeat(level)),
A.prepend("\n"),
A.append(" "),
A.append(t),
A.append("\n")
).join("")),
A.fromOption,
A.append(body.trim()),
A.concat(A.isEmpty(children) ? A.empty : A.of(renderChildren(children, level + 1)))
)
})
).join("\n")
}

const createMessage = ({title, contents, children}: MarkdownText): Option<BaseMessage> => {
const tokens = A.toArray(contents)
const body = marked.parser(tokens, {renderer: this.renderer})

Expand All @@ -55,6 +79,8 @@ export class MarkdownMessageParser implements ChatMessageParser {
O.map(ST.trim),
O.filter(not(ST.isEmpty)),
O.map(t => {
const tt = O.isSome(title) && A.isNonEmpty(children) ? [t, renderChildren(children)].join("\n"): t

const isAI = pipe(title, O.exists(ST.startsWith("AI")))
const isHuman = pipe(title, O.exists(ST.startsWith("Human")))

Expand All @@ -72,11 +98,11 @@ export class MarkdownMessageParser implements ChatMessageParser {

switch (role) {
case "AI":
return new AIMessage({name: name, content: t})
return new AIMessage({name: name, content: tt})
case "Human":
return new HumanMessage({name: name, content: t})
return new HumanMessage({name: name, content: tt})
case "System":
return new SystemMessage({name: name, content: t})
return new SystemMessage({name: name, content: tt})
}
})
)
Expand Down
16 changes: 12 additions & 4 deletions test/common/markdown.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -99,13 +99,13 @@ describe("PlainTextRenderer", () => {
test("code", () => {
const code = "console.log('hello world')"
const result = renderer.code(code, undefined, false)
expect(result).toBe(code)
expect(result).toBe(code + "\n\n")
})

test("blockquote", () => {
const quote = "This is a quote"
const result = renderer.blockquote(quote)
expect(result).toBe(quote)
expect(result).toBe(`> ${quote}`)
})

test("html", () => {
Expand All @@ -123,13 +123,21 @@ describe("PlainTextRenderer", () => {

test("hr", () => {
const result = renderer.hr()
expect(result).toBe("---")
expect(result).toBe("---\n\n")
})

test("list(concatenateList = true)", () => {
const renderer = new PlainTextRenderer({concatenateList: true})

const list = "* item1.\n* item2\n* item3."
const result = renderer.list(list, false, "")
expect(result).toBe("item1; item2; item3.\n\n")
})

test("list", () => {
const list = "* item1.\n* item2\n* item3."
const result = renderer.list(list, false, "")
expect(result).toBe("item1; item2; item3.")
expect(result).toBe("* item1.\n* item2\n* item3.\n\n")
})

test("listitem", () => {
Expand Down
36 changes: 35 additions & 1 deletion test/lore/document.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import {Document} from "@langchain/core/documents"
import * as E from "fp-ts/Either"
import {BaseDocumentLoader} from "langchain/document_loaders/base"
import {describe, expect, it} from "vitest"
import {LoreDocument, LoreDocumentLoader, LoreParseError, LoreParseErrorT} from "../../src"
import {LoreDocument, LoreDocumentLoader, LoreParseError, LoreParseErrorT, substitute} from "../../src"

class MockDocumentLoader extends BaseDocumentLoader {

Expand Down Expand Up @@ -110,6 +110,40 @@ Title is too long!
expect(details).contain("Must be equal to or shorter than 200 characters.")
}
})

it("should apply the substitute table to documents when the relevant option is given", async () => {

const source = new MockDocumentLoader([{
pageContent: `
## {name}
{name} is the main protagonist of {title}.`,
metadata: {
id: 1
}
}])

const substitutions = substitute({
name: "Max",
title: "Life is Strange"
})

const loader = new LoreDocumentLoader(source, {substitutions})

const result = await loader.createTask()()

expect(result).toSatisfy(E.isRight<ReadonlyArray<LoreDocument>>)

if (E.isRight(result)) {
const docs = result.right

expect(docs).length(1)

const {pageContent} = docs[0]

expect(pageContent).toBe("Max: Max is the main protagonist of Life is Strange.")
}
})
})

describe("load", () => {
Expand Down
Loading

0 comments on commit 03834ac

Please sign in to comment.