diff --git a/src/common/markdown.ts b/src/common/markdown.ts index 80b75b2..3801723 100644 --- a/src/common/markdown.ts +++ b/src/common/markdown.ts @@ -101,6 +101,11 @@ export function parseMarkdown(text: string): MarkdownText { } } +export interface PlainTextRendererOptions extends MarkedOptions { + + concatenateList?: boolean +} + /* eslint-disable @typescript-eslint/no-unused-vars */ /** * An implementation of {@link Renderer} which converts a Markdown input into plain text output. @@ -108,18 +113,18 @@ export function parseMarkdown(text: string): MarkdownText { */ export class PlainTextRenderer implements Renderer { - readonly options: MarkedOptions + readonly options: PlainTextRendererOptions - constructor(options?: MarkedOptions) { + constructor(options?: PlainTextRendererOptions) { this.options = options || {} } code(code: string, _infostring: string | undefined, _escaped: boolean): string { - return code + return [code, "\n\n"].join("") } blockquote(quote: string): string { - return quote + return [">", decode(quote)].join(" ") } html(html: string, _block?: boolean | undefined): string { @@ -137,10 +142,11 @@ export class PlainTextRenderer implements Renderer { } hr(): string { - return "---" + return "---\n\n" } - list(body: string, _ordered: boolean, _start: number | ""): string { + list(body: string, ordered: boolean, _start: number | ""): string { + const items = pipe( body.split("*"), A.map(i => i.trim()), @@ -150,10 +156,19 @@ export class PlainTextRenderer implements Renderer { const removePeriod = (s: string) => s.endsWith(".") ? s.substring(0, s.length - 1) : s + const multiline = this.options.concatenateList !== true + const itemSeparator = multiline ? "\n" : "; " + return pipe( items, - A.mapWithIndex((i, text) => i < items.length - 1 ? removePeriod(text) : text) - ).join("; ").trim() + A.mapWithIndex((i, text) => { + if (multiline) { + return ordered ? [i + 1, ". ", text].join("") : ["*", text].join(" ") + } + + return i < items.length - 1 ? removePeriod(text) : text + }) + ).join(itemSeparator).trim() + "\n\n" } listitem(text: string, _task: boolean, _checked: boolean): string { diff --git a/src/lore/document.ts b/src/lore/document.ts index a920b25..966d121 100644 --- a/src/lore/document.ts +++ b/src/lore/document.ts @@ -6,6 +6,7 @@ import {Document} from "@langchain/core/documents" import * as E from "fp-ts/Either" import {Either} from "fp-ts/Either" import {flow, pipe} from "fp-ts/function" +import {Reader} from "fp-ts/Reader" import * as A from "fp-ts/ReadonlyArray" import * as TE from "fp-ts/TaskEither" import {TaskEither} from "fp-ts/TaskEither" @@ -21,6 +22,35 @@ import {LoreParser} from "./parser" */ export type LoreDocument = Document +/** + * Represents the options for the {@link LoreDocumentLoader}. + */ +export interface LoreDocumentLoaderOptions { + + /** + * The parser with which to convert the source documents into lore entries. + * Defaults to an instance of {@link MarkdownLoreParser}. + * + * @readonly + */ + readonly parser?: LoreParser + + /** + * The separator to use when joining an entry title with its content. Defaults to ": ". + * + * @readonly + */ + readonly titleSeparator?: string + + /** + * An optional function that performs text substitutions. The function takes a string as input and returns the + * substituted string. + * + * @readonly + */ + readonly substitutions?: Reader +} + /** * An implementation of {@link DocumentLoader} that acts as a wrapper for a given {@link DocumentLoader} to * convert its output into {@link LoreDocument}s. @@ -28,19 +58,25 @@ export type LoreDocument = Document export class LoreDocumentLoader extends BaseDocumentLoader { + /** + * @class LoreParser + * @description A class that parses and extracts information from a given Lore file. + */ + readonly parser: LoreParser + + readonly titleSeparator: string + + readonly substitutions: Reader + /** * Constructs a new instance of {@link LoreDocumentLoader}. * * @param {DocumentLoader} source The loader from which to load source documents. - * @param {LoreParser} parser The parser with which to convert the source documents into lore entries. - * Defaults to an instance of {@link MarkdownLoreParser}. - * @param {string} [titleSeparator] The separator to use when joining an entry title with its content. - * Defaults to ": ". + * @param {LoreDocumentLoaderOptions} options Configuration options for the loader. */ constructor( readonly source: DocumentLoader, - readonly parser: LoreParser = new MarkdownLoreParser(), - readonly titleSeparator: string = ": " + options: LoreDocumentLoaderOptions = {} ) { super() @@ -48,6 +84,10 @@ export class LoreDocumentLoader extends BaseDocumentLoader { this.createTask = this.createTask.bind(this) this.parse = this.parse.bind(this) this.toDocument = this.toDocument.bind(this) + + this.parser = options.parser ?? new MarkdownLoreParser() + this.titleSeparator = options.titleSeparator ?? ": " + this.substitutions = options.substitutions ?? (t => t) } /** @@ -96,7 +136,9 @@ export class LoreDocumentLoader extends BaseDocumentLoader { const {pageContent, metadata} = source return pipe( - this.parser.parseText(pageContent), + pageContent, + this.substitutions, + this.parser.parseText, E.map(A.map(this.toDocument)), E.map(A.map(d => ({ pageContent: d.pageContent, diff --git a/src/lore/markdown.ts b/src/lore/markdown.ts index fde5385..208abf0 100644 --- a/src/lore/markdown.ts +++ b/src/lore/markdown.ts @@ -48,7 +48,7 @@ export class MarkdownLoreParser extends AbstractLoreParser { constructor(options: MarkdownLoreParserOptions = {}) { super() - this.headerSeparator = options?.headerSeparator ?? ">" + this.headerSeparator = options?.headerSeparator ?? "> " this.renderer = options?.renderer ?? new PlainTextRenderer() } diff --git a/src/prompt/markdown.ts b/src/prompt/markdown.ts index 720d067..cc20a30 100644 --- a/src/prompt/markdown.ts +++ b/src/prompt/markdown.ts @@ -46,7 +46,31 @@ export class MarkdownMessageParser implements ChatMessageParser { parse(text: string): Either> { const root = parseMarkdown(text) - const createMessage = ({title, contents}: MarkdownText): Option => { + const renderChildren = (texts: ReadonlyArray, level = 2): string => { + return pipe( + texts, + A.flatMap(({title, contents, children}) => { + const tokens = A.toArray(contents) + const body = marked.parser(tokens, {renderer: this.renderer}) + + return pipe( + title, + O.map(t => pipe( + A.of("#".repeat(level)), + A.prepend("\n"), + A.append(" "), + A.append(t), + A.append("\n") + ).join("")), + A.fromOption, + A.append(body.trim()), + A.concat(A.isEmpty(children) ? A.empty : A.of(renderChildren(children, level + 1))) + ) + }) + ).join("\n") + } + + const createMessage = ({title, contents, children}: MarkdownText): Option => { const tokens = A.toArray(contents) const body = marked.parser(tokens, {renderer: this.renderer}) @@ -55,6 +79,8 @@ export class MarkdownMessageParser implements ChatMessageParser { O.map(ST.trim), O.filter(not(ST.isEmpty)), O.map(t => { + const tt = O.isSome(title) && A.isNonEmpty(children) ? [t, renderChildren(children)].join("\n"): t + const isAI = pipe(title, O.exists(ST.startsWith("AI"))) const isHuman = pipe(title, O.exists(ST.startsWith("Human"))) @@ -72,11 +98,11 @@ export class MarkdownMessageParser implements ChatMessageParser { switch (role) { case "AI": - return new AIMessage({name: name, content: t}) + return new AIMessage({name: name, content: tt}) case "Human": - return new HumanMessage({name: name, content: t}) + return new HumanMessage({name: name, content: tt}) case "System": - return new SystemMessage({name: name, content: t}) + return new SystemMessage({name: name, content: tt}) } }) ) diff --git a/test/common/markdown.test.ts b/test/common/markdown.test.ts index 0bbbf43..053d16b 100644 --- a/test/common/markdown.test.ts +++ b/test/common/markdown.test.ts @@ -99,13 +99,13 @@ describe("PlainTextRenderer", () => { test("code", () => { const code = "console.log('hello world')" const result = renderer.code(code, undefined, false) - expect(result).toBe(code) + expect(result).toBe(code + "\n\n") }) test("blockquote", () => { const quote = "This is a quote" const result = renderer.blockquote(quote) - expect(result).toBe(quote) + expect(result).toBe(`> ${quote}`) }) test("html", () => { @@ -123,13 +123,21 @@ describe("PlainTextRenderer", () => { test("hr", () => { const result = renderer.hr() - expect(result).toBe("---") + expect(result).toBe("---\n\n") + }) + + test("list(concatenateList = true)", () => { + const renderer = new PlainTextRenderer({concatenateList: true}) + + const list = "* item1.\n* item2\n* item3." + const result = renderer.list(list, false, "") + expect(result).toBe("item1; item2; item3.\n\n") }) test("list", () => { const list = "* item1.\n* item2\n* item3." const result = renderer.list(list, false, "") - expect(result).toBe("item1; item2; item3.") + expect(result).toBe("* item1.\n* item2\n* item3.\n\n") }) test("listitem", () => { diff --git a/test/lore/document.test.ts b/test/lore/document.test.ts index 8b0f6fb..bead7f3 100644 --- a/test/lore/document.test.ts +++ b/test/lore/document.test.ts @@ -2,7 +2,7 @@ import {Document} from "@langchain/core/documents" import * as E from "fp-ts/Either" import {BaseDocumentLoader} from "langchain/document_loaders/base" import {describe, expect, it} from "vitest" -import {LoreDocument, LoreDocumentLoader, LoreParseError, LoreParseErrorT} from "../../src" +import {LoreDocument, LoreDocumentLoader, LoreParseError, LoreParseErrorT, substitute} from "../../src" class MockDocumentLoader extends BaseDocumentLoader { @@ -110,6 +110,40 @@ Title is too long! expect(details).contain("Must be equal to or shorter than 200 characters.") } }) + + it("should apply the substitute table to documents when the relevant option is given", async () => { + + const source = new MockDocumentLoader([{ + pageContent: ` +## {name} + +{name} is the main protagonist of {title}.`, + metadata: { + id: 1 + } + }]) + + const substitutions = substitute({ + name: "Max", + title: "Life is Strange" + }) + + const loader = new LoreDocumentLoader(source, {substitutions}) + + const result = await loader.createTask()() + + expect(result).toSatisfy(E.isRight>) + + if (E.isRight(result)) { + const docs = result.right + + expect(docs).length(1) + + const {pageContent} = docs[0] + + expect(pageContent).toBe("Max: Max is the main protagonist of Life is Strange.") + } + }) }) describe("load", () => { diff --git a/test/lore/markdown.test.ts b/test/lore/markdown.test.ts index 014bb97..7733aea 100644 --- a/test/lore/markdown.test.ts +++ b/test/lore/markdown.test.ts @@ -87,19 +87,19 @@ Text 2.2.2 expect(entries[0].content).toBe("Text 1.1.") // Second section - expect(entries[1].title).toEqual(O.of("Heading 1.1>Heading 2.1")) + expect(entries[1].title).toEqual(O.of("Heading 1.1> Heading 2.1")) expect(entries[1].content).toBe("Text 2.1.") // Third section - expect(entries[2].title).toEqual(O.of("Heading 1.1>Heading 2.1>Heading 3.1")) + expect(entries[2].title).toEqual(O.of("Heading 1.1> Heading 2.1> Heading 3.1")) expect(entries[2].content).toBe("Text 3.1") // Fourth section - expect(entries[3].title).toEqual(O.of("Heading 1.1>Heading 2.1>Heading 3.2")) + expect(entries[3].title).toEqual(O.of("Heading 1.1> Heading 2.1> Heading 3.2")) expect(entries[3].content).toBe("Text 3.2") // Fifth section - expect(entries[4].title).toEqual(O.of("Heading 1.1>Heading 2.2")) + expect(entries[4].title).toEqual(O.of("Heading 1.1> Heading 2.2")) expect(entries[4].content).toBe("Text 2.2.1 Text 2.2.2") } }) @@ -245,5 +245,78 @@ Text 2.1. expect(title).toMatchObject(O.of("Heading 1.1 // Heading 2.1")) } }) + + it("should use compact Markdown content", () => { + const text = ` +# Heading 1.1 + +## Heading 2.1 + +This is a list: + + * item 1. + * item 2. + * item 3. + +End of the list. + +------ + +This is a code block: + +\`\`\`json +{ + value: true +} +\`\`\` + +End of the code block. + +This is a blockquote: + +> Dialogue example + +End of the blockquote. + +` + + const parser = new MarkdownLoreParser() + const result = parser.parseText(text) + + expect(result).toSatisfy(E.isRight) + + if (E.isRight(result)) { + const entries = result.right + + expect(entries).toHaveLength(1) + + const {title, content} = entries[0] + + expect(title).toMatchObject(O.of("Heading 1.1> Heading 2.1")) + expect(content).toBe(`This is a list: + +* item 1. +* item 2. +* item 3. + +End of the list. + +--- + +This is a code block: + +{ + value: true +} + +End of the code block. + +This is a blockquote: + +> Dialogue example + +End of the blockquote.`) + } + }) }) }) diff --git a/test/prompt/markdown.test.ts b/test/prompt/markdown.test.ts index 2e66ede..22c3dba 100644 --- a/test/prompt/markdown.test.ts +++ b/test/prompt/markdown.test.ts @@ -109,5 +109,46 @@ Ready for the mosh pit, shaka brah! expect(messages[1].name).toBe("Chloe") } }) + + it("should preserve second and lower level headings verbatim", () => { + const text = ` +# Human + +Tell me about Life is Strange. + +# AI + +Here's some basic information about the game: + +## Location +Arcadia Bay + +## Characters + * Max Caulfield + * Chloe Price +` + + const result = parser.parse(text) + + expect(result).toSatisfy(E.isRight) + + if (E.isRight(result)) { + const messages = result.right + + expect(messages).toHaveLength(2) + + expect(messages[0].content).toBe("Tell me about Life is Strange.") + expect(messages[1].content).toBe(`Here's some basic information about the game: + +## Location + +Arcadia Bay + +## Characters + +* Max Caulfield +* Chloe Price`) + } + }) }) })