From d665c1b0aa499f351d5dbfa9831b43cadb2c14c6 Mon Sep 17 00:00:00 2001 From: Oleg Solomko Date: Fri, 10 Jan 2025 20:46:57 -0800 Subject: [PATCH] support `markdown links` syntax in prompt files (#237698) * [markdown links]: add II of `MarkdownDecoder` * [markdown links]: add `Colon` and `Hash` simple tokens, implement the new logic inside `ChatPromptDecoder` * [markdown links]: add unit tests for the `MarkdownDecoder`, update other unit tests to account for the new MD links * [markdown links]: refactor and improve docs * [markdown links]: improve unit tests of the `MarkdownDecoder` * [markdown links]: fix recursion issue caused by `##` in `ChatPromptDecoder` and improve unit tests * [markdown links]: improve docs --- src/vs/editor/common/codecs/baseToken.ts | 5 + .../linesCodec/tokens/carriageReturn.ts | 7 + .../codecs/linesCodec/tokens/newLine.ts | 7 + .../codecs/markdownCodec/markdownDecoder.ts | 301 ++++++++++++++++ .../markdownCodec/tokens/markdownLink.ts | 101 ++++++ .../markdownCodec/tokens/markdownToken.ts | 12 + .../common/codecs/simpleCodec/parserBase.ts | 73 ++++ .../codecs/simpleCodec/simpleDecoder.ts | 67 ++-- .../codecs/simpleCodec/tokens/brackets.ts | 99 ++++++ .../common/codecs/simpleCodec/tokens/colon.ts | 54 +++ .../codecs/simpleCodec/tokens/formFeed.ts | 7 + .../common/codecs/simpleCodec/tokens/hash.ts | 54 +++ .../codecs/simpleCodec/tokens/parentheses.ts | 99 ++++++ .../common/codecs/simpleCodec/tokens/space.ts | 7 + .../common/codecs/simpleCodec/tokens/tab.ts | 7 + .../codecs/simpleCodec/tokens/verticalTab.ts | 7 + .../common/codecs/markdownDecoder.test.ts | 332 ++++++++++++++++++ .../test/common/codecs/simpleDecoder.test.ts | 23 +- .../editor/test/common/utils/testDecoder.ts | 14 +- .../browser/contrib/chatDynamicVariables.ts | 2 +- .../chatPromptCodec/chatPromptDecoder.ts | 245 ++++++++++++- .../chatPromptCodec/tokens/fileReference.ts | 8 +- .../common/codecs/chatPromptDecoder.test.ts | 25 +- .../test/common/promptFileReference.test.ts | 11 +- 24 files changed, 1500 insertions(+), 67 deletions(-) create mode 100644 src/vs/editor/common/codecs/markdownCodec/markdownDecoder.ts create mode 100644 src/vs/editor/common/codecs/markdownCodec/tokens/markdownLink.ts create mode 100644 src/vs/editor/common/codecs/markdownCodec/tokens/markdownToken.ts create mode 100644 src/vs/editor/common/codecs/simpleCodec/parserBase.ts create mode 100644 src/vs/editor/common/codecs/simpleCodec/tokens/brackets.ts create mode 100644 src/vs/editor/common/codecs/simpleCodec/tokens/colon.ts create mode 100644 src/vs/editor/common/codecs/simpleCodec/tokens/hash.ts create mode 100644 src/vs/editor/common/codecs/simpleCodec/tokens/parentheses.ts create mode 100644 src/vs/editor/test/common/codecs/markdownDecoder.test.ts diff --git a/src/vs/editor/common/codecs/baseToken.ts b/src/vs/editor/common/codecs/baseToken.ts index 9ebe3ad8abc3c..6430ffb61a5ef 100644 --- a/src/vs/editor/common/codecs/baseToken.ts +++ b/src/vs/editor/common/codecs/baseToken.ts @@ -18,6 +18,11 @@ export abstract class BaseToken { return this._range; } + /** + * Return text representation of the token. + */ + public abstract get text(): string; + /** * Check if this token has the same range as another one. */ diff --git a/src/vs/editor/common/codecs/linesCodec/tokens/carriageReturn.ts b/src/vs/editor/common/codecs/linesCodec/tokens/carriageReturn.ts index 5120f4ac322b9..a509940bc4e87 100644 --- a/src/vs/editor/common/codecs/linesCodec/tokens/carriageReturn.ts +++ b/src/vs/editor/common/codecs/linesCodec/tokens/carriageReturn.ts @@ -31,6 +31,13 @@ export class CarriageReturn extends BaseToken { return CarriageReturn.byte; } + /** + * Return text representation of the token. + */ + public get text(): string { + return CarriageReturn.symbol; + } + /** * Create new `CarriageReturn` token with range inside * the given `Line` at the given `column number`. diff --git a/src/vs/editor/common/codecs/linesCodec/tokens/newLine.ts b/src/vs/editor/common/codecs/linesCodec/tokens/newLine.ts index 19b80dd88a3c3..fb826b759ca29 100644 --- a/src/vs/editor/common/codecs/linesCodec/tokens/newLine.ts +++ b/src/vs/editor/common/codecs/linesCodec/tokens/newLine.ts @@ -24,6 +24,13 @@ export class NewLine extends BaseToken { */ public static readonly byte = VSBuffer.fromString(NewLine.symbol); + /** + * Return text representation of the token. + */ + public get text(): string { + return NewLine.symbol; + } + /** * The byte representation of the token. */ diff --git a/src/vs/editor/common/codecs/markdownCodec/markdownDecoder.ts b/src/vs/editor/common/codecs/markdownCodec/markdownDecoder.ts new file mode 100644 index 0000000000000..3703fa1df29ce --- /dev/null +++ b/src/vs/editor/common/codecs/markdownCodec/markdownDecoder.ts @@ -0,0 +1,301 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { MarkdownLink } from './tokens/markdownLink.js'; +import { NewLine } from '../linesCodec/tokens/newLine.js'; +import { assert } from '../../../../base/common/assert.js'; +import { FormFeed } from '../simpleCodec/tokens/formFeed.js'; +import { VSBuffer } from '../../../../base/common/buffer.js'; +import { VerticalTab } from '../simpleCodec/tokens/verticalTab.js'; +import { ReadableStream } from '../../../../base/common/stream.js'; +import { CarriageReturn } from '../linesCodec/tokens/carriageReturn.js'; +import { BaseDecoder } from '../../../../base/common/codecs/baseDecoder.js'; +import { LeftBracket, RightBracket } from '../simpleCodec/tokens/brackets.js'; +import { SimpleDecoder, TSimpleToken } from '../simpleCodec/simpleDecoder.js'; +import { ParserBase, TAcceptTokenResult } from '../simpleCodec/parserBase.js'; +import { LeftParenthesis, RightParenthesis } from '../simpleCodec/tokens/parentheses.js'; + +/** + * Tokens handled by this decoder. + */ +export type TMarkdownToken = MarkdownLink | TSimpleToken; + +/** + * List of characters that stop a markdown link sequence. + */ +const MARKDOWN_LINK_STOP_CHARACTERS: readonly string[] = [CarriageReturn, NewLine, VerticalTab, FormFeed] + .map((token) => { return token.symbol; }); + +/** + * The parser responsible for parsing a `markdown link caption` part of a markdown + * link (e.g., the `[caption text]` part of the `[caption text](./some/path)` link). + * + * The parsing process starts with single `[` token and collects all tokens until + * the first `]` token is encountered. In this successful case, the parser transitions + * into the {@linkcode MarkdownLinkCaption} parser type which continues the general + * parsing process of the markdown link. + * + * Otherwise, if one of the stop characters defined in the {@linkcode MARKDOWN_LINK_STOP_CHARACTERS} + * is encountered before the `]` token, the parsing process is aborted which is communicated to + * the caller by returning a `failure` result. In this case, the caller is assumed to be responsible + * for re-emitting the {@link tokens} accumulated so far as standalone entities since they are no + * longer represent a coherent token entity of a larger size. + */ +class PartialMarkdownLinkCaption extends ParserBase { + constructor(token: LeftBracket) { + super([token]); + } + + public accept(token: TSimpleToken): TAcceptTokenResult { + // any of stop characters is are breaking a markdown link caption sequence + if (MARKDOWN_LINK_STOP_CHARACTERS.includes(token.text)) { + return { + result: 'failure', + wasTokenConsumed: false, + }; + } + + // the `]` character ends the caption of a markdown link + if (token instanceof RightBracket) { + return { + result: 'success', + nextParser: new MarkdownLinkCaption([...this.tokens, token]), + wasTokenConsumed: true, + }; + } + + // otherwise, include the token in the sequence + // and keep the current parser object instance + this.currentTokens.push(token); + return { + result: 'success', + nextParser: this, + wasTokenConsumed: true, + }; + } +} + +/** + * The parser responsible for transitioning from a {@linkcode PartialMarkdownLinkCaption} + * parser to the {@link PartialMarkdownLink} one, therefore serves a parser glue between + * the `[caption]` and the `(./some/path)` parts of the `[caption](./some/path)` link. + * + * The only successful case of this parser is the `(` token that initiated the process + * of parsing the `reference` part of a markdown link and in this case the parser + * transitions into the `PartialMarkdownLink` parser type. + * + * Any other character is considered a failure result. In this case, the caller is assumed + * to be responsible for re-emitting the {@link tokens} accumulated so far as standalone + * entities since they are no longer represent a coherent token entity of a larger size. + */ +class MarkdownLinkCaption extends ParserBase { + public accept(token: TSimpleToken): TAcceptTokenResult { + // the `(` character starts the link part of a markdown link + // that is the only character that can follow the caption + if (token instanceof LeftParenthesis) { + return { + result: 'success', + wasTokenConsumed: true, + nextParser: new PartialMarkdownLink([...this.tokens], token), + }; + } + + return { + result: 'failure', + wasTokenConsumed: false, + }; + } +} + +/** + * The parser responsible for parsing a `link reference` part of a markdown link + * (e.g., the `(./some/path)` part of the `[caption text](./some/path)` link). + * + * The parsing process starts with tokens that represent the `[caption]` part of a markdown + * link, followed by the `(` token. The parser collects all subsequent tokens until final closing + * parenthesis (`)`) is encountered (*\*see [1] below*). In this successful case, the parser object + * transitions into the {@linkcode MarkdownLink} token type which signifies the end of the entire + * parsing process of the link text. + * + * Otherwise, if one of the stop characters defined in the {@linkcode MARKDOWN_LINK_STOP_CHARACTERS} + * is encountered before the final `)` token, the parsing process is aborted which is communicated to + * the caller by returning a `failure` result. In this case, the caller is assumed to be responsible + * for re-emitting the {@link tokens} accumulated so far as standalone entities since they are no + * longer represent a coherent token entity of a larger size. + * + * `[1]` The `reference` part of the markdown link can contain any number of nested parenthesis, e.g., + * `[caption](/some/p(th/file.md)` is a valid markdown link and a valid folder name, hence number + * of open parenthesis must match the number of closing ones and the path sequence is considered + * to be complete as soon as this requirement is met. Therefore the `final` word is used in + * the description comments above to highlight this important detail. + */ +class PartialMarkdownLink extends ParserBase { + /** + * Number of open parenthesis in the sequence. + * See comment in the {@linkcode accept} method for more details. + */ + private openParensCount: number = 1; + + constructor( + protected readonly captionTokens: TSimpleToken[], + token: LeftParenthesis, + ) { + super([token]); + } + + public override get tokens(): readonly TSimpleToken[] { + return [...this.captionTokens, ...this.currentTokens]; + } + + public accept(token: TSimpleToken): TAcceptTokenResult { + // markdown links allow for nested parenthesis inside the link reference part, but + // the number of open parenthesis must match the number of closing parenthesis, e.g.: + // - `[caption](/some/p()th/file.md)` is a valid markdown link + // - `[caption](/some/p(th/file.md)` is an invalid markdown link + // hence we use the `openParensCount` variable to keep track of the number of open + // parenthesis encountered so far; then upon encountering a closing parenthesis we + // decrement the `openParensCount` and if it reaches 0 - we consider the link reference + // to be complete + + if (token instanceof LeftParenthesis) { + this.openParensCount += 1; + } + + if (token instanceof RightParenthesis) { + this.openParensCount -= 1; + + // sanity check! this must alway hold true because we return a complete markdown + // link as soon as we encounter matching number of closing parenthesis, hence + // we must never have `openParensCount` that is less than 0 + assert( + this.openParensCount >= 0, + `Unexpected right parenthesis token encountered: '${token}'.`, + ); + + // the markdown link is complete as soon as we get the same number of closing parenthesis + if (this.openParensCount === 0) { + const { startLineNumber, startColumn } = this.captionTokens[0].range; + + // create link caption string + const caption = this.captionTokens + .map((token) => { return token.text; }) + .join(''); + + // create link reference string + this.currentTokens.push(token); + const reference = this.currentTokens + .map((token) => { return token.text; }).join(''); + + // return complete markdown link object + return { + result: 'success', + wasTokenConsumed: true, + nextParser: new MarkdownLink( + startLineNumber, + startColumn, + caption, + reference, + ), + }; + } + } + + // any of stop characters is are breaking a markdown link reference sequence + if (MARKDOWN_LINK_STOP_CHARACTERS.includes(token.text)) { + return { + result: 'failure', + wasTokenConsumed: false, + }; + } + + // the rest of the tokens can be included in the sequence + this.currentTokens.push(token); + return { + result: 'success', + nextParser: this, + wasTokenConsumed: true, + }; + } +} + +/** + * Decoder capable of parsing markdown entities (e.g., links) from a sequence of simplier tokens. + */ +export class MarkdownDecoder extends BaseDecoder { + /** + * Current parser object that is responsible for parsing a sequence of tokens + * into some markdown entity. + */ + private current?: PartialMarkdownLinkCaption | MarkdownLinkCaption | PartialMarkdownLink; + + constructor( + stream: ReadableStream, + ) { + super(new SimpleDecoder(stream)); + } + + protected override onStreamData(token: TSimpleToken): void { + // markdown links start with `[` character, so here we can + // initiate the process of parsing a markdown link + if (token instanceof LeftBracket && !this.current) { + this.current = new PartialMarkdownLinkCaption(token); + + return; + } + + // if current parser was not initiated before, - we are not inside a + // sequence of tokens we care about, therefore re-emit the token + // immediately and continue to the next one + if (!this.current) { + this._onData.fire(token); + return; + } + + // if there is a current parser object, submit the token to it + // so it can progress with parsing the tokens sequence + const parseResult = this.current.accept(token); + if (parseResult.result === 'success') { + // if got a parsed out `MarkdownLink` back, emit it + // then reset the current parser object + if (parseResult.nextParser instanceof MarkdownLink) { + this._onData.fire(parseResult.nextParser); + delete this.current; + } else { + // otherwise, update the current parser object + this.current = parseResult.nextParser; + } + } else { + // if failed to parse a sequence of a tokens as a single markdown + // entity (e.g., a link), re-emit the tokens accumulated so far + // then reset the current parser object + for (const token of this.current.tokens) { + this._onData.fire(token); + delete this.current; + } + } + + // if token was not consumed by the parser, call `onStreamData` again + // so the token is properly handled by the decoder in the case when a + // new sequence starts with this token + if (!parseResult.wasTokenConsumed) { + this.onStreamData(token); + } + } + + protected override onStreamEnd(): void { + // if the stream has ended and there is a current incomplete parser + // object present, then re-emit its tokens as standalone entities + if (this.current) { + const { tokens } = this.current; + delete this.current; + + for (const token of [...tokens]) { + this._onData.fire(token); + } + } + + super.onStreamEnd(); + } +} diff --git a/src/vs/editor/common/codecs/markdownCodec/tokens/markdownLink.ts b/src/vs/editor/common/codecs/markdownCodec/tokens/markdownLink.ts new file mode 100644 index 0000000000000..174365c45599b --- /dev/null +++ b/src/vs/editor/common/codecs/markdownCodec/tokens/markdownLink.ts @@ -0,0 +1,101 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { BaseToken } from '../../baseToken.js'; +import { Range } from '../../../core/range.js'; +import { MarkdownToken } from './markdownToken.js'; +import { assert } from '../../../../../base/common/assert.js'; + +/** + * A token that represent a `markdown link` with a `range`. The `range` + * value reflects the position of the token in the original data. + */ +export class MarkdownLink extends MarkdownToken { + constructor( + /** + * The starting line number of the link (1-based indexing). + */ + lineNumber: number, + /** + * The starting column number of the link (1-based indexing). + */ + columnNumber: number, + /** + * The caprtion of the link, including the square brackets. + */ + private readonly caption: string, + /** + * The reference of the link, including the parentheses. + */ + private readonly reference: string, + ) { + assert( + !isNaN(lineNumber), + `The line number must not be a NaN.`, + ); + + assert( + lineNumber > 0, + `The line number must be >= 1, got "${lineNumber}".`, + ); + + assert( + columnNumber > 0, + `The column number must be >= 1, got "${columnNumber}".`, + ); + + assert( + caption[0] === '[' && caption[caption.length - 1] === ']', + `The caption must be enclosed in square brackets, got "${caption}".`, + ); + + assert( + reference[0] === '(' && reference[reference.length - 1] === ')', + `The reference must be enclosed in parentheses, got "${reference}".`, + ); + + super( + new Range( + lineNumber, + columnNumber, + lineNumber, + columnNumber + caption.length + reference.length, + ), + ); + } + + public override get text(): string { + return `${this.caption}${this.reference}`; + } + + /** + * Returns the `reference` part of the link without enclosing parentheses. + */ + public get path(): string { + return this.reference.slice(1, this.reference.length - 1); + } + + /** + * Check if this token is equal to another one. + */ + public override equals(other: T): boolean { + if (!super.sameRange(other.range)) { + return false; + } + + if (!(other instanceof MarkdownLink)) { + return false; + } + + return this.text === other.text; + } + + /** + * Returns a string representation of the token. + */ + public override toString(): string { + return `md-link("${this.text}")${this.range}`; + } +} diff --git a/src/vs/editor/common/codecs/markdownCodec/tokens/markdownToken.ts b/src/vs/editor/common/codecs/markdownCodec/tokens/markdownToken.ts new file mode 100644 index 0000000000000..fc1935d081bf6 --- /dev/null +++ b/src/vs/editor/common/codecs/markdownCodec/tokens/markdownToken.ts @@ -0,0 +1,12 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { BaseToken } from '../../baseToken.js'; + +/** + * Common base token that all `markdown` tokens should + * inherit from. + */ +export abstract class MarkdownToken extends BaseToken { } diff --git a/src/vs/editor/common/codecs/simpleCodec/parserBase.ts b/src/vs/editor/common/codecs/simpleCodec/parserBase.ts new file mode 100644 index 0000000000000..9e864177f9fd5 --- /dev/null +++ b/src/vs/editor/common/codecs/simpleCodec/parserBase.ts @@ -0,0 +1,73 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { BaseToken } from '../baseToken.js'; + +/** + * Common interface for a result of accepting a next token + * in a sequence. + */ +export interface IAcceptTokenResult { + /** + * The result type of accepting a next token in a sequence. + */ + result: 'success' | 'failure'; + + /** + * Whether the token to accept was consumed by the parser + * during the accept operation. + */ + wasTokenConsumed: boolean; +} + +/** + * Successful result of accepting a next token in a sequence. + */ +export interface IAcceptTokenSuccess extends IAcceptTokenResult { + result: 'success'; + nextParser: T; +} + +/** + * Failure result of accepting a next token in a sequence. + */ +export interface IAcceptTokenFailure extends IAcceptTokenResult { + result: 'failure'; +} + +/** + * The result of operation of accepting a next token in a sequence. + */ +export type TAcceptTokenResult = IAcceptTokenSuccess | IAcceptTokenFailure; + +/** + * An abstract parser class that is able to parse a sequence of + * tokens into a new single entity. + */ +export abstract class ParserBase { + constructor( + /** + * Set of tokens that were accumulated so far. + */ + protected readonly currentTokens: TToken[] = [], + ) { } + + /** + * Get the tokens that were accumulated so far. + */ + public get tokens(): readonly TToken[] { + return this.currentTokens; + } + + /** + * Accept a new token returning parsing result: + * - successful result must include the next parser object or a fully parsed out token + * - failure result must indicate that the token was not consumed + * + * @param token The token to accept. + * @returns The parsing result. + */ + public abstract accept(token: TToken): TAcceptTokenResult; +} diff --git a/src/vs/editor/common/codecs/simpleCodec/simpleDecoder.ts b/src/vs/editor/common/codecs/simpleCodec/simpleDecoder.ts index 64173eceabdaa..88ad129850170 100644 --- a/src/vs/editor/common/codecs/simpleCodec/simpleDecoder.ts +++ b/src/vs/editor/common/codecs/simpleCodec/simpleDecoder.ts @@ -3,6 +3,8 @@ * Licensed under the MIT License. See License.txt in the project root for license information. *--------------------------------------------------------------------------------------------*/ +import { Hash } from './tokens/hash.js'; +import { Colon } from './tokens/colon.js'; import { FormFeed } from './tokens/formFeed.js'; import { Tab } from '../simpleCodec/tokens/tab.js'; import { Word } from '../simpleCodec/tokens/word.js'; @@ -10,22 +12,39 @@ import { VerticalTab } from './tokens/verticalTab.js'; import { Space } from '../simpleCodec/tokens/space.js'; import { NewLine } from '../linesCodec/tokens/newLine.js'; import { VSBuffer } from '../../../../base/common/buffer.js'; +import { LeftBracket, RightBracket } from './tokens/brackets.js'; import { ReadableStream } from '../../../../base/common/stream.js'; import { CarriageReturn } from '../linesCodec/tokens/carriageReturn.js'; import { LinesDecoder, TLineToken } from '../linesCodec/linesDecoder.js'; import { BaseDecoder } from '../../../../base/common/codecs/baseDecoder.js'; +import { LeftParenthesis, RightParenthesis } from './tokens/parentheses.js'; /** * A token type that this decoder can handle. */ -export type TSimpleToken = Word | Space | Tab | VerticalTab | NewLine | FormFeed | CarriageReturn; +export type TSimpleToken = Word | Space | Tab | VerticalTab | NewLine | FormFeed | CarriageReturn | LeftBracket + | RightBracket | LeftParenthesis | RightParenthesis | Colon | Hash; + +/** + * List of well-known distinct tokens that this decoder emits (excluding + * the word stop characters defined below). Everything else is considered + * an arbitrary "text" sequence and is emitted as a single `Word` token. + */ +const WELL_KNOWN_TOKENS = [ + Space, Tab, VerticalTab, FormFeed, LeftBracket, RightBracket, + LeftParenthesis, RightParenthesis, Colon, Hash, +]; /** * Characters that stop a "word" sequence. * Note! the `\r` and `\n` are excluded from the list because this decoder based on `LinesDecoder` which * already handles the `carriagereturn`/`newline` cases and emits lines that don't contain them. */ -const STOP_CHARACTERS = [Space.symbol, Tab.symbol, VerticalTab.symbol, FormFeed.symbol]; +const WORD_STOP_CHARACTERS = [ + Space.symbol, Tab.symbol, VerticalTab.symbol, FormFeed.symbol, + LeftBracket.symbol, RightBracket.symbol, LeftParenthesis.symbol, + RightParenthesis.symbol, Colon.symbol, Hash.symbol, +]; /** * A decoder that can decode a stream of `Line`s into a stream @@ -39,7 +58,7 @@ export class SimpleDecoder extends BaseDecoder { } protected override onStreamData(token: TLineToken): void { - // re-emit new line tokens + // re-emit new line tokens immediately if (token instanceof CarriageReturn || token instanceof NewLine) { this._onData.fire(token); @@ -52,46 +71,30 @@ export class SimpleDecoder extends BaseDecoder { // index is 0-based, but column numbers are 1-based const columnNumber = i + 1; - // if a space character, emit a `Space` token and continue - if (token.text[i] === Space.symbol) { - this._onData.fire(Space.newOnLine(token, columnNumber)); - - i++; - continue; - } - - // if a tab character, emit a `Tab` token and continue - if (token.text[i] === Tab.symbol) { - this._onData.fire(Tab.newOnLine(token, columnNumber)); - - i++; - continue; - } - - // if a vertical tab character, emit a `VerticalTab` token and continue - if (token.text[i] === VerticalTab.symbol) { - this._onData.fire(VerticalTab.newOnLine(token, columnNumber)); - - i++; - continue; - } + // check if the current character is a well-known token + const tokenConstructor = WELL_KNOWN_TOKENS + .find((wellKnownToken) => { + return wellKnownToken.symbol === token.text[i]; + }); - // if a form feed character, emit a `FormFeed` token and continue - if (token.text[i] === FormFeed.symbol) { - this._onData.fire(FormFeed.newOnLine(token, columnNumber)); + // if it is a well-known token, emit it and continue to the next one + if (tokenConstructor) { + this._onData.fire(tokenConstructor.newOnLine(token, columnNumber)); i++; continue; } - // if a non-space character, parse out the whole word and - // emit it, then continue from the last word character position + // otherwise, it is an arbitrary "text" sequence of characters, + // that needs to be collected into a single `Word` token, hence + // read all the characters until a stop character is encountered let word = ''; - while (i < token.text.length && !(STOP_CHARACTERS.includes(token.text[i]))) { + while (i < token.text.length && !(WORD_STOP_CHARACTERS.includes(token.text[i]))) { word += token.text[i]; i++; } + // emit a "text" sequence of characters as a single `Word` token this._onData.fire( Word.newOnLine(word, token, columnNumber), ); diff --git a/src/vs/editor/common/codecs/simpleCodec/tokens/brackets.ts b/src/vs/editor/common/codecs/simpleCodec/tokens/brackets.ts new file mode 100644 index 0000000000000..5c6c1e46a5d3d --- /dev/null +++ b/src/vs/editor/common/codecs/simpleCodec/tokens/brackets.ts @@ -0,0 +1,99 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { BaseToken } from '../../baseToken.js'; +import { Range } from '../../../core/range.js'; +import { Position } from '../../../core/position.js'; +import { Line } from '../../linesCodec/tokens/line.js'; + +/** + * A token that represent a `[` with a `range`. The `range` + * value reflects the position of the token in the original data. + */ +export class LeftBracket extends BaseToken { + /** + * The underlying symbol of the `LeftBracket` token. + */ + public static readonly symbol: string = '['; + + /** + * Return text representation of the token. + */ + public get text(): string { + return LeftBracket.symbol; + } + + /** + * Create new `LeftBracket` token with range inside + * the given `Line` at the given `column number`. + */ + public static newOnLine( + line: Line, + atColumnNumber: number, + ): LeftBracket { + const { range } = line; + + const startPosition = new Position(range.startLineNumber, atColumnNumber); + // the tab token length is 1, hence `+ 1` + const endPosition = new Position(range.startLineNumber, atColumnNumber + this.symbol.length); + + return new LeftBracket(Range.fromPositions( + startPosition, + endPosition, + )); + } + + /** + * Returns a string representation of the token. + */ + public override toString(): string { + return `left-bracket${this.range}`; + } +} + +/** + * A token that represent a `]` with a `range`. The `range` + * value reflects the position of the token in the original data. + */ +export class RightBracket extends BaseToken { + /** + * The underlying symbol of the `RightBracket` token. + */ + public static readonly symbol: string = ']'; + + /** + * Return text representation of the token. + */ + public get text(): string { + return RightBracket.symbol; + } + + /** + * Create new `RightBracket` token with range inside + * the given `Line` at the given `column number`. + */ + public static newOnLine( + line: Line, + atColumnNumber: number, + ): RightBracket { + const { range } = line; + + const startPosition = new Position(range.startLineNumber, atColumnNumber); + // the tab token length is 1, hence `+ 1` + const endPosition = new Position(range.startLineNumber, atColumnNumber + this.symbol.length); + + return new RightBracket(Range.fromPositions( + startPosition, + endPosition, + )); + } + + /** + * Returns a string representation of the token. + */ + public override toString(): string { + return `right-bracket${this.range}`; + } +} diff --git a/src/vs/editor/common/codecs/simpleCodec/tokens/colon.ts b/src/vs/editor/common/codecs/simpleCodec/tokens/colon.ts new file mode 100644 index 0000000000000..2c4b89d9ce522 --- /dev/null +++ b/src/vs/editor/common/codecs/simpleCodec/tokens/colon.ts @@ -0,0 +1,54 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { BaseToken } from '../../baseToken.js'; +import { Range } from '../../../core/range.js'; +import { Position } from '../../../core/position.js'; +import { Line } from '../../linesCodec/tokens/line.js'; + +/** + * A token that represent a `:` with a `range`. The `range` + * value reflects the position of the token in the original data. + */ +export class Colon extends BaseToken { + /** + * The underlying symbol of the `LeftBracket` token. + */ + public static readonly symbol: string = ':'; + + /** + * Return text representation of the token. + */ + public get text(): string { + return Colon.symbol; + } + + /** + * Create new `LeftBracket` token with range inside + * the given `Line` at the given `column number`. + */ + public static newOnLine( + line: Line, + atColumnNumber: number, + ): Colon { + const { range } = line; + + const startPosition = new Position(range.startLineNumber, atColumnNumber); + // the tab token length is 1, hence `+ 1` + const endPosition = new Position(range.startLineNumber, atColumnNumber + this.symbol.length); + + return new Colon(Range.fromPositions( + startPosition, + endPosition, + )); + } + + /** + * Returns a string representation of the token. + */ + public override toString(): string { + return `colon${this.range}`; + } +} diff --git a/src/vs/editor/common/codecs/simpleCodec/tokens/formFeed.ts b/src/vs/editor/common/codecs/simpleCodec/tokens/formFeed.ts index ab40192f459e4..35f55dd8a2ab4 100644 --- a/src/vs/editor/common/codecs/simpleCodec/tokens/formFeed.ts +++ b/src/vs/editor/common/codecs/simpleCodec/tokens/formFeed.ts @@ -18,6 +18,13 @@ export class FormFeed extends BaseToken { */ public static readonly symbol: string = '\f'; + /** + * Return text representation of the token. + */ + public get text(): string { + return FormFeed.symbol; + } + /** * Create new `FormFeed` token with range inside * the given `Line` at the given `column number`. diff --git a/src/vs/editor/common/codecs/simpleCodec/tokens/hash.ts b/src/vs/editor/common/codecs/simpleCodec/tokens/hash.ts new file mode 100644 index 0000000000000..372e0b2ee3d9c --- /dev/null +++ b/src/vs/editor/common/codecs/simpleCodec/tokens/hash.ts @@ -0,0 +1,54 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { BaseToken } from '../../baseToken.js'; +import { Range } from '../../../core/range.js'; +import { Position } from '../../../core/position.js'; +import { Line } from '../../linesCodec/tokens/line.js'; + +/** + * A token that represent a `#` with a `range`. The `range` + * value reflects the position of the token in the original data. + */ +export class Hash extends BaseToken { + /** + * The underlying symbol of the `LeftBracket` token. + */ + public static readonly symbol: string = '#'; + + /** + * Return text representation of the token. + */ + public get text(): string { + return Hash.symbol; + } + + /** + * Create new `LeftBracket` token with range inside + * the given `Line` at the given `column number`. + */ + public static newOnLine( + line: Line, + atColumnNumber: number, + ): Hash { + const { range } = line; + + const startPosition = new Position(range.startLineNumber, atColumnNumber); + // the tab token length is 1, hence `+ 1` + const endPosition = new Position(range.startLineNumber, atColumnNumber + this.symbol.length); + + return new Hash(Range.fromPositions( + startPosition, + endPosition, + )); + } + + /** + * Returns a string representation of the token. + */ + public override toString(): string { + return `hash${this.range}`; + } +} diff --git a/src/vs/editor/common/codecs/simpleCodec/tokens/parentheses.ts b/src/vs/editor/common/codecs/simpleCodec/tokens/parentheses.ts new file mode 100644 index 0000000000000..b67f4e10f5c71 --- /dev/null +++ b/src/vs/editor/common/codecs/simpleCodec/tokens/parentheses.ts @@ -0,0 +1,99 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { BaseToken } from '../../baseToken.js'; +import { Range } from '../../../core/range.js'; +import { Position } from '../../../core/position.js'; +import { Line } from '../../linesCodec/tokens/line.js'; + +/** + * A token that represent a `(` with a `range`. The `range` + * value reflects the position of the token in the original data. + */ +export class LeftParenthesis extends BaseToken { + /** + * The underlying symbol of the `LeftParenthesis` token. + */ + public static readonly symbol: string = '('; + + /** + * Return text representation of the token. + */ + public get text(): string { + return LeftParenthesis.symbol; + } + + /** + * Create new `LeftParenthesis` token with range inside + * the given `Line` at the given `column number`. + */ + public static newOnLine( + line: Line, + atColumnNumber: number, + ): LeftParenthesis { + const { range } = line; + + const startPosition = new Position(range.startLineNumber, atColumnNumber); + // the tab token length is 1, hence `+ 1` + const endPosition = new Position(range.startLineNumber, atColumnNumber + this.symbol.length); + + return new LeftParenthesis(Range.fromPositions( + startPosition, + endPosition, + )); + } + + /** + * Returns a string representation of the token. + */ + public override toString(): string { + return `left-parenthesis${this.range}`; + } +} + +/** + * A token that represent a `)` with a `range`. The `range` + * value reflects the position of the token in the original data. + */ +export class RightParenthesis extends BaseToken { + /** + * The underlying symbol of the `RightParenthesis` token. + */ + public static readonly symbol: string = ')'; + + /** + * Return text representation of the token. + */ + public get text(): string { + return RightParenthesis.symbol; + } + + /** + * Create new `RightParenthesis` token with range inside + * the given `Line` at the given `column number`. + */ + public static newOnLine( + line: Line, + atColumnNumber: number, + ): RightParenthesis { + const { range } = line; + + const startPosition = new Position(range.startLineNumber, atColumnNumber); + // the tab token length is 1, hence `+ 1` + const endPosition = new Position(range.startLineNumber, atColumnNumber + this.symbol.length); + + return new RightParenthesis(Range.fromPositions( + startPosition, + endPosition, + )); + } + + /** + * Returns a string representation of the token. + */ + public override toString(): string { + return `right-parenthesis${this.range}`; + } +} diff --git a/src/vs/editor/common/codecs/simpleCodec/tokens/space.ts b/src/vs/editor/common/codecs/simpleCodec/tokens/space.ts index 9961c38ece9ce..18a5dff4a0a94 100644 --- a/src/vs/editor/common/codecs/simpleCodec/tokens/space.ts +++ b/src/vs/editor/common/codecs/simpleCodec/tokens/space.ts @@ -18,6 +18,13 @@ export class Space extends BaseToken { */ public static readonly symbol: string = ' '; + /** + * Return text representation of the token. + */ + public get text(): string { + return Space.symbol; + } + /** * Create new `Space` token with range inside * the given `Line` at the given `column number`. diff --git a/src/vs/editor/common/codecs/simpleCodec/tokens/tab.ts b/src/vs/editor/common/codecs/simpleCodec/tokens/tab.ts index aab11327bc156..7f511c2626bfd 100644 --- a/src/vs/editor/common/codecs/simpleCodec/tokens/tab.ts +++ b/src/vs/editor/common/codecs/simpleCodec/tokens/tab.ts @@ -18,6 +18,13 @@ export class Tab extends BaseToken { */ public static readonly symbol: string = '\t'; + /** + * Return text representation of the token. + */ + public get text(): string { + return Tab.symbol; + } + /** * Create new `Tab` token with range inside * the given `Line` at the given `column number`. diff --git a/src/vs/editor/common/codecs/simpleCodec/tokens/verticalTab.ts b/src/vs/editor/common/codecs/simpleCodec/tokens/verticalTab.ts index 11e5ca6efabfa..c6b87db0e37be 100644 --- a/src/vs/editor/common/codecs/simpleCodec/tokens/verticalTab.ts +++ b/src/vs/editor/common/codecs/simpleCodec/tokens/verticalTab.ts @@ -18,6 +18,13 @@ export class VerticalTab extends BaseToken { */ public static readonly symbol: string = '\v'; + /** + * Return text representation of the token. + */ + public get text(): string { + return VerticalTab.symbol; + } + /** * Create new `VerticalTab` token with range inside * the given `Line` at the given `column number`. diff --git a/src/vs/editor/test/common/codecs/markdownDecoder.test.ts b/src/vs/editor/test/common/codecs/markdownDecoder.test.ts new file mode 100644 index 0000000000000..bff4b428ae1d6 --- /dev/null +++ b/src/vs/editor/test/common/codecs/markdownDecoder.test.ts @@ -0,0 +1,332 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { TestDecoder } from '../utils/testDecoder.js'; +import { Range } from '../../../common/core/range.js'; +import { VSBuffer } from '../../../../base/common/buffer.js'; +import { newWriteableStream } from '../../../../base/common/stream.js'; +import { Tab } from '../../../common/codecs/simpleCodec/tokens/tab.js'; +import { Word } from '../../../common/codecs/simpleCodec/tokens/word.js'; +import { Space } from '../../../common/codecs/simpleCodec/tokens/space.js'; +import { NewLine } from '../../../common/codecs/linesCodec/tokens/newLine.js'; +import { VerticalTab } from '../../../common/codecs/simpleCodec/tokens/verticalTab.js'; +import { MarkdownLink } from '../../../common/codecs/markdownCodec/tokens/markdownLink.js'; +import { ensureNoDisposablesAreLeakedInTestSuite } from '../../../../base/test/common/utils.js'; +import { MarkdownDecoder, TMarkdownToken } from '../../../common/codecs/markdownCodec/markdownDecoder.js'; +import { FormFeed } from '../../../common/codecs/simpleCodec/tokens/formFeed.js'; +import { LeftParenthesis, RightParenthesis } from '../../../common/codecs/simpleCodec/tokens/parentheses.js'; +import { LeftBracket, RightBracket } from '../../../common/codecs/simpleCodec/tokens/brackets.js'; +import { CarriageReturn } from '../../../common/codecs/linesCodec/tokens/carriageReturn.js'; +import assert from 'assert'; + +/** + * A reusable test utility that asserts that a `TestMarkdownDecoder` instance + * correctly decodes `inputData` into a stream of `TMarkdownToken` tokens. + * + * ## Examples + * + * ```typescript + * // create a new test utility instance + * const test = testDisposables.add(new TestMarkdownDecoder()); + * + * // run the test + * await test.run( + * ' hello [world](/etc/hosts)!', + * [ + * new Space(new Range(1, 1, 1, 2)), + * new Word(new Range(1, 2, 1, 7), 'hello'), + * new Space(new Range(1, 7, 1, 8)), + * new MarkdownLink(1, 8, '[world]', '(/etc/hosts)'), + * new Word(new Range(1, 27, 1, 28), '!'), + * new NewLine(new Range(1, 28, 1, 29)), + * ], + * ); + */ +export class TestMarkdownDecoder extends TestDecoder { + constructor() { + const stream = newWriteableStream(null); + + super(stream, new MarkdownDecoder(stream)); + } +} + +suite('MarkdownDecoder', () => { + const testDisposables = ensureNoDisposablesAreLeakedInTestSuite(); + + test('produces expected tokens', async () => { + const test = testDisposables.add( + new TestMarkdownDecoder(), + ); + + await test.run( + ' hello world\nhow are\t you [caption text](./some/file/path/refer🎨nce.md)?\v\n\n[(example)](another/path/with[-and-]-chars/folder)\t \n\t[#file:something.txt](/absolute/path/to/something.txt)', + [ + // first line + new Space(new Range(1, 1, 1, 2)), + new Word(new Range(1, 2, 1, 7), 'hello'), + new Space(new Range(1, 7, 1, 8)), + new Word(new Range(1, 8, 1, 13), 'world'), + new NewLine(new Range(1, 13, 1, 14)), + // second line + new Word(new Range(2, 1, 2, 4), 'how'), + new Space(new Range(2, 4, 2, 5)), + new Word(new Range(2, 5, 2, 8), 'are'), + new Tab(new Range(2, 8, 2, 9)), + new Space(new Range(2, 9, 2, 10)), + new Word(new Range(2, 10, 2, 13), 'you'), + new Space(new Range(2, 13, 2, 14)), + new MarkdownLink(2, 14, '[caption text]', '(./some/file/path/refer🎨nce.md)'), + new Word(new Range(2, 60, 2, 61), '?'), + new VerticalTab(new Range(2, 61, 2, 62)), + new NewLine(new Range(2, 62, 2, 63)), + // third line + new NewLine(new Range(3, 1, 3, 2)), + // fourth line + new MarkdownLink(4, 1, '[(example)]', '(another/path/with[-and-]-chars/folder)'), + new Tab(new Range(4, 51, 4, 52)), + new Space(new Range(4, 52, 4, 53)), + new NewLine(new Range(4, 53, 4, 54)), + // fifth line + new Tab(new Range(5, 1, 5, 2)), + new MarkdownLink(5, 2, '[#file:something.txt]', '(/absolute/path/to/something.txt)'), + ], + ); + }); + + test('handles complex cases', async () => { + const test = testDisposables.add( + new TestMarkdownDecoder(), + ); + + const inputLines = [ + // tests that the link caption contain a chat prompt `#file:` reference, while + // the file path can contain other `graphical characters` + '\v\t[#file:./another/path/to/file.txt](./real/filepath/file◆name.md)', + // tests that the link file path contain a chat prompt `#file:` reference, + // `spaces`, `emojies`, and other `graphical characters` + ' [reference ∘ label](/absolute/pa th/to-#file:file.txt/f🥸⚡️le.md)', + // tests that link caption and file path can contain `parentheses`, `spaces`, and + // `emojies` + '\f[!(hello)!](./w(())rld/nice-🦚-filen(a)me.git))\n\t', + // tests that the link caption can be empty, while the file path can contain `square brackets` + '[](./s[]me/pa[h!) ', + ]; + + await test.run( + inputLines, + [ + // `1st` line + new VerticalTab(new Range(1, 1, 1, 2)), + new Tab(new Range(1, 2, 1, 3)), + new MarkdownLink(1, 3, '[#file:./another/path/to/file.txt]', '(./real/filepath/file◆name.md)'), + new NewLine(new Range(1, 67, 1, 68)), + // `2nd` line + new Space(new Range(2, 1, 2, 2)), + new MarkdownLink(2, 2, '[reference ∘ label]', '(/absolute/pa th/to-#file:file.txt/f🥸⚡️le.md)'), + new NewLine(new Range(2, 67, 2, 68)), + // `3rd` line + new FormFeed(new Range(3, 1, 3, 2)), + new MarkdownLink(3, 2, '[!(hello)!]', '(./w(())rld/nice-🦚-filen(a)me.git)'), + new RightParenthesis(new Range(3, 48, 3, 49)), + new NewLine(new Range(3, 49, 3, 50)), + // `4th` line + new Tab(new Range(4, 1, 4, 2)), + new NewLine(new Range(4, 2, 4, 3)), + // `5th` line + new MarkdownLink(5, 1, '[]', '(./s[]me/pa[h!)'), + new Space(new Range(5, 18, 5, 19)), + ], + ); + }); + + suite('broken links', () => { + test('incomplete/invalid links', async () => { + const test = testDisposables.add( + new TestMarkdownDecoder(), + ); + + const inputLines = [ + // incomplete link reference with empty caption + '[ ](./real/file path/file⇧name.md', + // space between caption and reference is disallowed + '[link text] (./file path/name.txt)', + ]; + + await test.run( + inputLines, + [ + // `1st` line + new LeftBracket(new Range(1, 1, 1, 2)), + new Space(new Range(1, 2, 1, 3)), + new RightBracket(new Range(1, 3, 1, 4)), + new LeftParenthesis(new Range(1, 4, 1, 5)), + new Word(new Range(1, 5, 1, 5 + 11), './real/file'), + new Space(new Range(1, 16, 1, 17)), + new Word(new Range(1, 17, 1, 17 + 17), 'path/file⇧name.md'), + new NewLine(new Range(1, 34, 1, 35)), + // `2nd` line + new LeftBracket(new Range(2, 1, 2, 2)), + new Word(new Range(2, 2, 2, 2 + 4), 'link'), + new Space(new Range(2, 6, 2, 7)), + new Word(new Range(2, 7, 2, 7 + 4), 'text'), + new RightBracket(new Range(2, 11, 2, 12)), + new Space(new Range(2, 12, 2, 13)), + new LeftParenthesis(new Range(2, 13, 2, 14)), + new Word(new Range(2, 14, 2, 14 + 6), './file'), + new Space(new Range(2, 20, 2, 21)), + new Word(new Range(2, 21, 2, 21 + 13), 'path/name.txt'), + new RightParenthesis(new Range(2, 34, 2, 35)), + ], + ); + }); + + suite('stop characters inside caption/reference (new lines)', () => { + for (const stopCharacter of [CarriageReturn, NewLine]) { + let characterName = ''; + + if (stopCharacter === CarriageReturn) { + characterName = '\\r'; + } + if (stopCharacter === NewLine) { + characterName = '\\n'; + } + + assert( + characterName !== '', + 'The "characterName" must be set, got "empty line".', + ); + + test(`stop character - "${characterName}"`, async () => { + const test = testDisposables.add( + new TestMarkdownDecoder(), + ); + + const inputLines = [ + // stop character inside link caption + `[haa${stopCharacter.symbol}loů](./real/💁/name.txt)`, + // stop character inside link reference + `[ref text](/etc/pat${stopCharacter.symbol}h/to/file.md)`, + // stop character between line caption and link reference is disallowed + `[text]${stopCharacter.symbol}(/etc/ path/file.md)`, + ]; + + + await test.run( + inputLines, + [ + // `1st` input line + new LeftBracket(new Range(1, 1, 1, 2)), + new Word(new Range(1, 2, 1, 2 + 3), 'haa'), + new stopCharacter(new Range(1, 5, 1, 6)), // <- stop character + new Word(new Range(2, 1, 2, 1 + 3), 'loů'), + new RightBracket(new Range(2, 4, 2, 5)), + new LeftParenthesis(new Range(2, 5, 2, 6)), + new Word(new Range(2, 6, 2, 6 + 18), './real/💁/name.txt'), + new RightParenthesis(new Range(2, 24, 2, 25)), + new NewLine(new Range(2, 25, 2, 26)), + // `2nd` input line + new LeftBracket(new Range(3, 1, 3, 2)), + new Word(new Range(3, 2, 3, 2 + 3), 'ref'), + new Space(new Range(3, 5, 3, 6)), + new Word(new Range(3, 6, 3, 6 + 4), 'text'), + new RightBracket(new Range(3, 10, 3, 11)), + new LeftParenthesis(new Range(3, 11, 3, 12)), + new Word(new Range(3, 12, 3, 12 + 8), '/etc/pat'), + new stopCharacter(new Range(3, 20, 3, 21)), // <- stop character + new Word(new Range(4, 1, 4, 1 + 12), 'h/to/file.md'), + new RightParenthesis(new Range(4, 13, 4, 14)), + new NewLine(new Range(4, 14, 4, 15)), + // `3nd` input line + new LeftBracket(new Range(5, 1, 5, 2)), + new Word(new Range(5, 2, 5, 2 + 4), 'text'), + new RightBracket(new Range(5, 6, 5, 7)), + new stopCharacter(new Range(5, 7, 5, 8)), // <- stop character + new LeftParenthesis(new Range(6, 1, 6, 2)), + new Word(new Range(6, 2, 6, 2 + 5), '/etc/'), + new Space(new Range(6, 7, 6, 8)), + new Word(new Range(6, 8, 6, 8 + 12), 'path/file.md'), + new RightParenthesis(new Range(6, 20, 6, 21)), + ], + ); + }); + } + }); + + /** + * Same as above but these stop characters do not move the caret to the next line. + */ + suite('stop characters inside caption/reference (same line)', () => { + for (const stopCharacter of [VerticalTab, FormFeed]) { + let characterName = ''; + + if (stopCharacter === VerticalTab) { + characterName = '\\v'; + } + if (stopCharacter === FormFeed) { + characterName = '\\f'; + } + + assert( + characterName !== '', + 'The "characterName" must be set, got "empty line".', + ); + + test(`stop character - "${characterName}"`, async () => { + const test = testDisposables.add( + new TestMarkdownDecoder(), + ); + + const inputLines = [ + // stop character inside link caption + `[haa${stopCharacter.symbol}loů](./real/💁/name.txt)`, + // stop character inside link reference + `[ref text](/etc/pat${stopCharacter.symbol}h/to/file.md)`, + // stop character between line caption and link reference is disallowed + `[text]${stopCharacter.symbol}(/etc/ path/file.md)`, + ]; + + + await test.run( + inputLines, + [ + // `1st` input line + new LeftBracket(new Range(1, 1, 1, 2)), + new Word(new Range(1, 2, 1, 2 + 3), 'haa'), + new stopCharacter(new Range(1, 5, 1, 6)), // <- stop character + new Word(new Range(1, 6, 1, 6 + 3), 'loů'), + new RightBracket(new Range(1, 9, 1, 10)), + new LeftParenthesis(new Range(1, 10, 1, 11)), + new Word(new Range(1, 11, 1, 11 + 18), './real/💁/name.txt'), + new RightParenthesis(new Range(1, 29, 1, 30)), + new NewLine(new Range(1, 30, 1, 31)), + // `2nd` input line + new LeftBracket(new Range(2, 1, 2, 2)), + new Word(new Range(2, 2, 2, 2 + 3), 'ref'), + new Space(new Range(2, 5, 2, 6)), + new Word(new Range(2, 6, 2, 6 + 4), 'text'), + new RightBracket(new Range(2, 10, 2, 11)), + new LeftParenthesis(new Range(2, 11, 2, 12)), + new Word(new Range(2, 12, 2, 12 + 8), '/etc/pat'), + new stopCharacter(new Range(2, 20, 2, 21)), // <- stop character + new Word(new Range(2, 21, 2, 21 + 12), 'h/to/file.md'), + new RightParenthesis(new Range(2, 33, 2, 34)), + new NewLine(new Range(2, 34, 2, 35)), + // `3nd` input line + new LeftBracket(new Range(3, 1, 3, 2)), + new Word(new Range(3, 2, 3, 2 + 4), 'text'), + new RightBracket(new Range(3, 6, 3, 7)), + new stopCharacter(new Range(3, 7, 3, 8)), // <- stop character + new LeftParenthesis(new Range(3, 8, 3, 9)), + new Word(new Range(3, 9, 3, 9 + 5), '/etc/'), + new Space(new Range(3, 14, 3, 15)), + new Word(new Range(3, 15, 3, 15 + 12), 'path/file.md'), + new RightParenthesis(new Range(3, 27, 3, 28)), + ], + ); + }); + } + }); + }); +}); diff --git a/src/vs/editor/test/common/codecs/simpleDecoder.test.ts b/src/vs/editor/test/common/codecs/simpleDecoder.test.ts index 2e57a1c8219ba..b0804a2fe5fa5 100644 --- a/src/vs/editor/test/common/codecs/simpleDecoder.test.ts +++ b/src/vs/editor/test/common/codecs/simpleDecoder.test.ts @@ -8,6 +8,7 @@ import { Range } from '../../../common/core/range.js'; import { VSBuffer } from '../../../../base/common/buffer.js'; import { newWriteableStream } from '../../../../base/common/stream.js'; import { Tab } from '../../../common/codecs/simpleCodec/tokens/tab.js'; +import { Hash } from '../../../common/codecs/simpleCodec/tokens/hash.js'; import { Word } from '../../../common/codecs/simpleCodec/tokens/word.js'; import { Space } from '../../../common/codecs/simpleCodec/tokens/space.js'; import { NewLine } from '../../../common/codecs/linesCodec/tokens/newLine.js'; @@ -16,6 +17,8 @@ import { VerticalTab } from '../../../common/codecs/simpleCodec/tokens/verticalT import { CarriageReturn } from '../../../common/codecs/linesCodec/tokens/carriageReturn.js'; import { ensureNoDisposablesAreLeakedInTestSuite } from '../../../../base/test/common/utils.js'; import { SimpleDecoder, TSimpleToken } from '../../../common/codecs/simpleCodec/simpleDecoder.js'; +import { LeftBracket, RightBracket } from '../../../common/codecs/simpleCodec/tokens/brackets.js'; +import { LeftParenthesis, RightParenthesis } from '../../../common/codecs/simpleCodec/tokens/parentheses.js'; /** * A reusable test utility that asserts that a `SimpleDecoder` instance @@ -57,7 +60,7 @@ suite('SimpleDecoder', () => { ); await test.run( - ' hello world\nhow are\t you?\v\n\n (test) [!@#$%^&*_+=]\f \n\t\t🤗❤ \t\n hey\vthere\r\n\r\n', + ' hello world\nhow are\t you?\v\n\n (test) [!@#$%^🦄&*_+=]\f \n\t\t🤗❤ \t\n hey\vthere\r\n\r\n', [ // first line new Space(new Range(1, 1, 1, 2)), @@ -80,14 +83,20 @@ suite('SimpleDecoder', () => { new Space(new Range(4, 1, 4, 2)), new Space(new Range(4, 2, 4, 3)), new Space(new Range(4, 3, 4, 4)), - new Word(new Range(4, 4, 4, 10), '(test)'), + new LeftParenthesis(new Range(4, 4, 4, 5)), + new Word(new Range(4, 5, 4, 5 + 4), 'test'), + new RightParenthesis(new Range(4, 9, 4, 10)), new Space(new Range(4, 10, 4, 11)), new Space(new Range(4, 11, 4, 12)), - new Word(new Range(4, 12, 4, 25), '[!@#$%^&*_+=]'), - new FormFeed(new Range(4, 25, 4, 26)), - new Space(new Range(4, 26, 4, 27)), - new Space(new Range(4, 27, 4, 28)), - new NewLine(new Range(4, 28, 4, 29)), + new LeftBracket(new Range(4, 12, 4, 13)), + new Word(new Range(4, 13, 4, 13 + 2), '!@'), + new Hash(new Range(4, 15, 4, 16)), + new Word(new Range(4, 16, 4, 16 + 10), '$%^🦄&*_+='), + new RightBracket(new Range(4, 26, 4, 27)), + new FormFeed(new Range(4, 27, 4, 28)), + new Space(new Range(4, 28, 4, 29)), + new Space(new Range(4, 29, 4, 30)), + new NewLine(new Range(4, 30, 4, 31)), // fifth line new Tab(new Range(5, 1, 5, 2)), new Tab(new Range(5, 2, 5, 3)), diff --git a/src/vs/editor/test/common/utils/testDecoder.ts b/src/vs/editor/test/common/utils/testDecoder.ts index e9ee9ce1067ea..bd0a7a07e6ccb 100644 --- a/src/vs/editor/test/common/utils/testDecoder.ts +++ b/src/vs/editor/test/common/utils/testDecoder.ts @@ -7,6 +7,7 @@ import assert from 'assert'; import { VSBuffer } from '../../../../base/common/buffer.js'; import { randomInt } from '../../../../base/common/numbers.js'; import { BaseToken } from '../../../common/codecs/baseToken.js'; +import { assertDefined } from '../../../../base/common/types.js'; import { Disposable } from '../../../../base/common/lifecycle.js'; import { WriteableStream } from '../../../../base/common/stream.js'; import { BaseDecoder } from '../../../../base/common/codecs/baseDecoder.js'; @@ -61,9 +62,15 @@ export class TestDecoder> extends * that the decoder produces the `expectedTokens` sequence of tokens. */ public async run( - inputData: string, + inputData: string | string[], expectedTokens: readonly T[], ): Promise { + // if input data was passed as an array of lines, + // join them into a single string with newlines + if (Array.isArray(inputData)) { + inputData = inputData.join('\n'); + } + // write the data to the stream after a short delay to ensure // that the the data is sent after the reading loop below setTimeout(() => { @@ -100,6 +107,11 @@ export class TestDecoder> extends const expectedToken = expectedTokens[i]; const receivedtoken = receivedTokens[i]; + assertDefined( + receivedtoken, + `Expected token '${i}' to be '${expectedToken}', got 'undefined'.`, + ); + assert( receivedtoken.equals(expectedToken), `Expected token '${i}' to be '${expectedToken}', got '${receivedtoken}'.`, diff --git a/src/vs/workbench/contrib/chat/browser/contrib/chatDynamicVariables.ts b/src/vs/workbench/contrib/chat/browser/contrib/chatDynamicVariables.ts index f4a6d525ce8e8..8a557e382a929 100644 --- a/src/vs/workbench/contrib/chat/browser/contrib/chatDynamicVariables.ts +++ b/src/vs/workbench/contrib/chat/browser/contrib/chatDynamicVariables.ts @@ -140,7 +140,7 @@ export class ChatDynamicVariableModel extends Disposable implements IChatWidgetC this.widget.refreshParsedInput(); // if the `prompt snippets` feature is enabled, and file is a `prompt snippet`, - // start resolving nested file references immediatelly and subscribe to updates + // start resolving nested file references immediately and subscribe to updates if (variable instanceof ChatFileReference && variable.isPromptSnippetFile) { // subscribe to variable changes variable.onUpdate(() => { diff --git a/src/vs/workbench/contrib/chat/common/codecs/chatPromptCodec/chatPromptDecoder.ts b/src/vs/workbench/contrib/chat/common/codecs/chatPromptCodec/chatPromptDecoder.ts index 57b7f0955b82c..06d8e1620ec6b 100644 --- a/src/vs/workbench/contrib/chat/common/codecs/chatPromptCodec/chatPromptDecoder.ts +++ b/src/vs/workbench/contrib/chat/common/codecs/chatPromptCodec/chatPromptDecoder.ts @@ -5,41 +5,258 @@ import { FileReference } from './tokens/fileReference.js'; import { VSBuffer } from '../../../../../../base/common/buffer.js'; +import { Range } from '../../../../../../editor/common/core/range.js'; import { ReadableStream } from '../../../../../../base/common/stream.js'; import { BaseDecoder } from '../../../../../../base/common/codecs/baseDecoder.js'; +import { Tab } from '../../../../../../editor/common/codecs/simpleCodec/tokens/tab.js'; import { Word } from '../../../../../../editor/common/codecs/simpleCodec/tokens/word.js'; -import { SimpleDecoder, TSimpleToken } from '../../../../../../editor/common/codecs/simpleCodec/simpleDecoder.js'; +import { Hash } from '../../../../../../editor/common/codecs/simpleCodec/tokens/hash.js'; +import { Space } from '../../../../../../editor/common/codecs/simpleCodec/tokens/space.js'; +import { Colon } from '../../../../../../editor/common/codecs/simpleCodec/tokens/colon.js'; +import { NewLine } from '../../../../../../editor/common/codecs/linesCodec/tokens/newLine.js'; +import { FormFeed } from '../../../../../../editor/common/codecs/simpleCodec/tokens/formFeed.js'; +import { VerticalTab } from '../../../../../../editor/common/codecs/simpleCodec/tokens/verticalTab.js'; +import { MarkdownLink } from '../../../../../../editor/common/codecs/markdownCodec/tokens/markdownLink.js'; +import { CarriageReturn } from '../../../../../../editor/common/codecs/linesCodec/tokens/carriageReturn.js'; +import { ParserBase, TAcceptTokenResult } from '../../../../../../editor/common/codecs/simpleCodec/parserBase.js'; +import { MarkdownDecoder, TMarkdownToken } from '../../../../../../editor/common/codecs/markdownCodec/markdownDecoder.js'; /** - * Tokens handled by the `ChatPromptDecoder` decoder. + * Tokens produced by this decoder. */ -export type TChatPromptToken = FileReference; +export type TChatPromptToken = MarkdownLink | FileReference; + +/** + * The Parser responsible for processing a `prompt variable name` syntax from + * a sequence of tokens (e.g., `#variable:`). + * + * The parsing process starts with single `#` token, then can accept `file` word, + * followed by the `:` token, resulting in the tokens sequence equivalent to + * the `#file:` text sequence. In this successful case, the parser transitions into + * the {@linkcode PartialPromptFileReference} parser to continue the parsing process. + */ +class PartialPromptVariableName extends ParserBase { + constructor(token: Hash) { + super([token]); + } + + public accept(token: TMarkdownToken): TAcceptTokenResult { + // given we currently hold the `#` token, if we receive a `file` word, + // we can successfully proceed to the next token in the sequence + if (token instanceof Word) { + if (token.text === 'file') { + this.currentTokens.push(token); + + return { + result: 'success', + nextParser: this, + wasTokenConsumed: true, + }; + } + + return { + result: 'failure', + wasTokenConsumed: false, + }; + } + + // if we receive the `:` token, we can successfully proceed to the next + // token in the sequence `only if` the previous token was a `file` word + // therefore for currently tokens sequence equivalent to the `#file` text + if (token instanceof Colon) { + const lastToken = this.currentTokens[this.currentTokens.length - 1]; + + if (lastToken instanceof Word) { + this.currentTokens.push(token); + + return { + result: 'success', + nextParser: new PartialPromptFileReference(this.currentTokens), + wasTokenConsumed: true, + }; + } + } + + // all other cases are failures and we don't consume the offending token + return { + result: 'failure', + wasTokenConsumed: false, + }; + } +} + +/** + * List of characters that stop a prompt variable sequence. + */ +const PROMPT_FILE_REFERENCE_STOP_CHARACTERS: readonly string[] = [Space, Tab, CarriageReturn, NewLine, VerticalTab, FormFeed] + .map((token) => { return token.symbol; }); + +/** + * Parser responsible for processing the `file reference` syntax part from + * a sequence of tokens (e.g., #variable:`./some/file/path.md`). + * + * The parsing process starts with the sequence of `#`, `file`, and `:` tokens, + * then can accept a sequence of tokens until one of the tokens defined in + * the {@linkcode PROMPT_FILE_REFERENCE_STOP_CHARACTERS} list is encountered. + * This sequence of tokens is treated as a `file path` part of the `#file:` variable, + * and in the successful case, the parser transitions into the {@linkcode FileReference} + * token which signifies the end of the file reference text parsing process. + */ +class PartialPromptFileReference extends ParserBase { + /** + * Set of tokens that were accumulated so far. + */ + private readonly fileReferenceTokens: (Hash | Word | Colon)[]; + + constructor(tokens: (Hash | Word | Colon)[]) { + super([]); + + this.fileReferenceTokens = tokens; + } + + /** + * List of tokens that were accumulated so far. + */ + public override get tokens(): readonly (Hash | Word | Colon)[] { + return [...this.fileReferenceTokens, ...this.currentTokens]; + } + + /** + * Return the `FileReference` instance created from the current object. + */ + public asFileReference(): FileReference { + // use only tokens in the `currentTokens` list to + // create the path component of the file reference + const path = this.currentTokens + .map((token) => { return token.text; }) + .join(''); + + const firstToken = this.tokens[0]; + + const range = new Range( + firstToken.range.startLineNumber, + firstToken.range.startColumn, + firstToken.range.startLineNumber, + firstToken.range.startColumn + FileReference.TOKEN_START.length + path.length, + ); + + return new FileReference(range, path); + } + + public accept(token: TMarkdownToken): TAcceptTokenResult { + // any of stop characters is are breaking a prompt variable sequence + if (PROMPT_FILE_REFERENCE_STOP_CHARACTERS.includes(token.text)) { + return { + result: 'success', + wasTokenConsumed: false, + nextParser: this.asFileReference(), + }; + } + + // any other token can be included in the sequence so accumulate + // it and continue with using the current parser instance + this.currentTokens.push(token); + return { + result: 'success', + wasTokenConsumed: true, + nextParser: this, + }; + } +} /** * Decoder for the common chatbot prompt message syntax. * For instance, the file references `#file:./path/file.md` are handled by this decoder. */ -export class ChatPromptDecoder extends BaseDecoder { +export class ChatPromptDecoder extends BaseDecoder { + /** + * Currently active parser object that is used to parse a well-known equence of + * tokens, for instance, a `file reference` that consists of `hash`, `word`, and + * `colon` tokens sequence plus following file path part. + */ + private current?: PartialPromptVariableName; + constructor( stream: ReadableStream, ) { - super(new SimpleDecoder(stream)); + super(new MarkdownDecoder(stream)); } - protected override onStreamData(simpleToken: TSimpleToken): void { - // handle the word tokens only - if (!(simpleToken instanceof Word)) { + protected override onStreamData(token: TMarkdownToken): void { + // prompt variables always start with the `#` character, hence + // initiate a parser object if we encounter respective token and + /// there is no active parser object present at the moment + if (token instanceof Hash && !this.current) { + this.current = new PartialPromptVariableName(token); + return; } - // handle file references only for now - const { text } = simpleToken; - if (!text.startsWith(FileReference.TOKEN_START)) { + // if current parser was not yet initiated, - we are in the general + // "text" mode, therefore re-emit the token immediately and return + if (!this.current) { + // at the moment, the decoder outputs only specific markdown tokens, like + // the `markdown link` one, so re-emit only these tokens ignoring the rest + // + // note! to make the decoder consistent with others we would need to: + // - re-emit all tokens here + // - collect all "text" sequences of tokens and emit them as a single + // "text" sequence token + if (token instanceof MarkdownLink) { + this._onData.fire(token); + } + return; } - this._onData.fire( - FileReference.fromWord(simpleToken), - ); + // if there is a current parser object, submit the token to it + // so it can progress with parsing the tokens sequence + const parseResult = this.current.accept(token); + + // process the parse result next + switch (parseResult.result) { + // in the case of success there might be 2 cases: + // 1) parsing fully completed and an parsed entity is returned back, in this case, + // emit the parsed token (e.g., a `link`) and reset current parser object + // 2) parsing is still in progress and the next parser object is returned, hence + // we need to update the current praser object with a new one and continue + case 'success': { + if (parseResult.nextParser instanceof FileReference) { + this._onData.fire(parseResult.nextParser); + delete this.current; + } else { + this.current = parseResult.nextParser; + } + + break; + } + // in the case of failure, reset the current parser object + case 'failure': { + delete this.current; + + // note! when this decoder becomes consistent with other ones and hence starts emitting + // all token types, not just links, we would need to re-emit all the tokens that + // the parser object has accumulated so far + break; + } + } + + // if token was not consumed by the parser, call `onStreamData` again + // so the token is properly handled by the decoder in the case when a + // new sequence starts with this token + if (!parseResult.wasTokenConsumed) { + this.onStreamData(token); + } + } + + protected override onStreamEnd(): void { + // if the stream has ended and there is a current `PartialPromptFileReference` + // parser object, then the file reference was terminated by the end of the stream + if (this.current && this.current instanceof PartialPromptFileReference) { + this._onData.fire(this.current.asFileReference()); + delete this.current; + } + + super.onStreamEnd(); } } diff --git a/src/vs/workbench/contrib/chat/common/codecs/chatPromptCodec/tokens/fileReference.ts b/src/vs/workbench/contrib/chat/common/codecs/chatPromptCodec/tokens/fileReference.ts index 5a68344a7575b..224e0086851c1 100644 --- a/src/vs/workbench/contrib/chat/common/codecs/chatPromptCodec/tokens/fileReference.ts +++ b/src/vs/workbench/contrib/chat/common/codecs/chatPromptCodec/tokens/fileReference.ts @@ -8,14 +8,18 @@ import { Range } from '../../../../../../../editor/common/core/range.js'; import { BaseToken } from '../../../../../../../editor/common/codecs/baseToken.js'; import { Word } from '../../../../../../../editor/common/codecs/simpleCodec/tokens/word.js'; -// Start sequence for a file reference token in a prompt. +/** + * Start sequence for a file reference token in a prompt. + */ const TOKEN_START: string = '#file:'; /** * Object represents a file reference token inside a chatbot prompt. */ export class FileReference extends BaseToken { - // Start sequence for a file reference token in a prompt. + /** + * Start sequence for a file reference token in a prompt. + */ public static readonly TOKEN_START = TOKEN_START; constructor( diff --git a/src/vs/workbench/contrib/chat/test/common/codecs/chatPromptDecoder.test.ts b/src/vs/workbench/contrib/chat/test/common/codecs/chatPromptDecoder.test.ts index 2ebadb798f189..56f4e30059cdb 100644 --- a/src/vs/workbench/contrib/chat/test/common/codecs/chatPromptDecoder.test.ts +++ b/src/vs/workbench/contrib/chat/test/common/codecs/chatPromptDecoder.test.ts @@ -9,6 +9,7 @@ import { newWriteableStream } from '../../../../../../base/common/stream.js'; import { TestDecoder } from '../../../../../../editor/test/common/utils/testDecoder.js'; import { FileReference } from '../../../common/codecs/chatPromptCodec/tokens/fileReference.js'; import { ensureNoDisposablesAreLeakedInTestSuite } from '../../../../../../base/test/common/utils.js'; +import { MarkdownLink } from '../../../../../../editor/common/codecs/markdownCodec/tokens/markdownLink.js'; import { ChatPromptDecoder, TChatPromptToken } from '../../../common/codecs/chatPromptCodec/chatPromptDecoder.js'; /** @@ -50,23 +51,39 @@ suite('ChatPromptDecoder', () => { new TestChatPromptDecoder(), ); + const contents = [ + '', + 'haalo!', + ' message 👾 message #file:./path/to/file1.md', + '', + '## Heading Title', + ' \t#file:a/b/c/filename2.md\t🖖\t#file:other-file.md', + ' [#file:reference.md](./reference.md)some text #file:/some/file/with/absolute/path.md', + ]; + await test.run( - '\nhaalo!\n message 👾 message #file:./path/to/file1.md \n\n \t#file:a/b/c/filename2.md\t🖖\t#file:other-file.md\nsome text #file:/some/file/with/absolute/path.md\t', + contents, [ new FileReference( new Range(3, 21, 3, 21 + 24), './path/to/file1.md', ), new FileReference( - new Range(5, 3, 5, 3 + 24), + new Range(6, 3, 6, 3 + 24), 'a/b/c/filename2.md', ), new FileReference( - new Range(5, 31, 5, 31 + 19), + new Range(6, 31, 6, 31 + 19), 'other-file.md', ), + new MarkdownLink( + 7, + 2, + '[#file:reference.md]', + '(./reference.md)', + ), new FileReference( - new Range(6, 11, 6, 11 + 38), + new Range(7, 48, 7, 48 + 38), '/some/file/with/absolute/path.md', ), ], diff --git a/src/vs/workbench/contrib/chat/test/common/promptFileReference.test.ts b/src/vs/workbench/contrib/chat/test/common/promptFileReference.test.ts index 4a3b710e8ceb4..e281ff697fc2f 100644 --- a/src/vs/workbench/contrib/chat/test/common/promptFileReference.test.ts +++ b/src/vs/workbench/contrib/chat/test/common/promptFileReference.test.ts @@ -118,8 +118,7 @@ class TestPromptFileReference extends Disposable { )); // resolve the root file reference including all nested references - const resolvedReferences = (await rootReference.resolve(true)) - .flatten(); + const resolvedReferences = (await rootReference.resolve(true)).flatten(); assert.strictEqual( resolvedReferences.length, @@ -237,7 +236,7 @@ suite('PromptFileReference (Unix)', function () { }, { name: 'file2.prompt.md', - contents: '## Files\n\t- this file #file:folder1/file3.prompt.md \n\t- also this #file:./folder1/some-other-folder/file4.prompt.md please!\n ', + contents: '## Files\n\t- this file #file:folder1/file3.prompt.md \n\t- also this [file4.prompt.md](./folder1/some-other-folder/file4.prompt.md) please!\n ', }, { name: 'folder1', @@ -262,7 +261,7 @@ suite('PromptFileReference (Unix)', function () { children: [ { name: 'another-file.prompt.md', - contents: 'another-file.prompt.md contents\t #file:../file.txt', + contents: 'another-file.prompt.md contents\t [#file:file.txt](../file.txt)', }, { name: 'one_more_file_just_in_case.prompt.md', @@ -353,14 +352,14 @@ suite('PromptFileReference (Unix)', function () { }, { name: 'file2.prompt.md', - contents: `## Files\n\t- this file #file:folder1/file3.prompt.md \n\t- also this #file:./folder1/some-other-folder/file4.prompt.md\n\n#file:${rootFolder}/folder1/some-other-folder/file5.prompt.md\t please!\n\t#file:./file1.md `, + contents: `## Files\n\t- this file #file:folder1/file3.prompt.md \n\t- also this #file:./folder1/some-other-folder/file4.prompt.md\n\n#file:${rootFolder}/folder1/some-other-folder/file5.prompt.md\t please!\n\t[some (snippet!) #name))](./file1.md)`, }, { name: 'folder1', children: [ { name: 'file3.prompt.md', - contents: `\n\n\t- some seemingly random #file:${rootFolder}/folder1/some-other-folder/yetAnotherFolder🤭/another-file.prompt.md contents\n some more\t content`, + contents: `\n\n\t- some seemingly random [another-file.prompt.md](${rootFolder}/folder1/some-other-folder/yetAnotherFolder🤭/another-file.prompt.md) contents\n some more\t content`, }, { name: 'some-other-folder',