Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expand monarch functionality to allow state access within rules #183463

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion src/vs/editor/standalone/common/monarch/monarchCommon.ts
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,10 @@ export function isIAction(what: FuzzyAction): what is IAction {
}

export interface IRule {
regex: RegExp;
action: FuzzyAction;
matchOnlyAtLineStart: boolean;
name: string;
resolveRegex(state: string): RegExp;
}

export interface IAction {
Expand Down Expand Up @@ -175,6 +175,26 @@ export function substituteMatches(lexer: ILexerMin, str: string, id: string, mat
});
}

/**
* substituteMatchesRe is used on lexer regex rules and can substitutes predefined patterns:
* $Sn => n'th part of state
*
*/
export function substituteMatchesRe(lexer: ILexerMin, str: string, state: string): string {
const re = /\$[sS](\d\d?)/g;
let stateMatches: string[] | null = null;
return str.replace(re, function (full, s) {
if (stateMatches === null) { // split state on demand
stateMatches = state.split('.');
stateMatches.unshift(state);
}
if (!empty(s) && s < stateMatches.length) {
return fixCase(lexer, stateMatches[s]); //$Sn
}
return '';
});
}

/**
* Find the tokenizer rules for a specific state (i.e. next action)
*/
Expand Down
39 changes: 34 additions & 5 deletions src/vs/editor/standalone/common/monarch/monarchCompile.ts
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,8 @@ function createKeywordMatcher(arr: string[], caseInsensitive: boolean = false):
* @example /@attr/ will be replaced with the value of lexer[attr]
* @example /@@text/ will not be replaced and will become /@text/.
*/
function compileRegExp(lexer: monarchCommon.ILexerMin, str: string): RegExp {
function compileRegExp<S extends true | false>(lexer: monarchCommon.ILexerMin, str: string, handleSn: S): S extends true ? RegExp | DynamicRegExp : RegExp;
function compileRegExp(lexer: monarchCommon.ILexerMin, str: string, handleSn: true | false): RegExp | DynamicRegExp {
// @@ must be interpreted as a literal @, so we replace all occurences of @@ with a placeholder character
str = str.replace(/@@/g, `\x01`);

Expand Down Expand Up @@ -116,6 +117,24 @@ function compileRegExp(lexer: monarchCommon.ILexerMin, str: string): RegExp {
str = str.replace(/\x01/g, '@');

const flags = (lexer.ignoreCase ? 'i' : '') + (lexer.unicode ? 'u' : '');

// handle $Sn
if (handleSn) {
const match = str.match(/\$[sS](\d\d?)/g);
if (match) {
let lastState: string | null = null;
let lastRegEx: RegExp | null = null;
return (state: string) => {
if (lastRegEx && lastState === state) {
return lastRegEx;
}
lastState = state;
lastRegEx = new RegExp(monarchCommon.substituteMatchesRe(lexer, str, state), flags);
return lastRegEx;
};
}
}

return new RegExp(str, flags);
}

Expand Down Expand Up @@ -196,12 +215,12 @@ function createGuard(lexer: monarchCommon.ILexerMin, ruleName: string, tkey: str
else if (op === '~' || op === '!~') {
if (pat.indexOf('$') < 0) {
// precompile regular expression
const re = compileRegExp(lexer, '^' + pat + '$');
const re = compileRegExp(lexer, '^' + pat + '$', false);
tester = function (s) { return (op === '~' ? re.test(s) : !re.test(s)); };
}
else {
tester = function (s, id, matches, state) {
const re = compileRegExp(lexer, '^' + monarchCommon.substituteMatches(lexer, pat, id, matches, state) + '$');
const re = compileRegExp(lexer, '^' + monarchCommon.substituteMatches(lexer, pat, id, matches, state) + '$', false);
return re.test(s);
};
}
Expand Down Expand Up @@ -355,11 +374,13 @@ function compileAction(lexer: monarchCommon.ILexerMin, ruleName: string, action:
}
}

type DynamicRegExp = (state: string) => RegExp;

/**
* Helper class for creating matching rules
*/
class Rule implements monarchCommon.IRule {
public regex: RegExp = new RegExp('');
private regex: RegExp | DynamicRegExp = new RegExp('');
public action: monarchCommon.FuzzyAction = { token: '' };
public matchOnlyAtLineStart: boolean = false;
public name: string = '';
Expand All @@ -382,12 +403,20 @@ class Rule implements monarchCommon.IRule {

this.matchOnlyAtLineStart = (sregex.length > 0 && sregex[0] === '^');
this.name = this.name + ': ' + sregex;
this.regex = compileRegExp(lexer, '^(?:' + (this.matchOnlyAtLineStart ? sregex.substr(1) : sregex) + ')');
this.regex = compileRegExp(lexer, '^(?:' + (this.matchOnlyAtLineStart ? sregex.substr(1) : sregex) + ')', true);
}

public setAction(lexer: monarchCommon.ILexerMin, act: monarchCommon.IAction) {
this.action = compileAction(lexer, this.name, act);
}

public resolveRegex(state: string): RegExp {
if (this.regex instanceof RegExp) {
return this.regex;
} else {
return this.regex(state);
}
}
}

/**
Expand Down
6 changes: 3 additions & 3 deletions src/vs/editor/standalone/common/monarch/monarchLexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -519,8 +519,8 @@ export class MonarchTokenizer extends Disposable implements languages.ITokenizat
}
hasEmbeddedPopRule = true;

let regex = rule.regex;
const regexSource = rule.regex.source;
let regex = rule.resolveRegex(state.stack.state);
const regexSource = regex.source;
if (regexSource.substr(0, 4) === '^(?:' && regexSource.substr(regexSource.length - 1, 1) === ')') {
const flags = (regex.ignoreCase ? 'i' : '') + (regex.unicode ? 'u' : '');
regex = new RegExp(regexSource.substr(4, regexSource.length - 5), flags);
Expand Down Expand Up @@ -643,7 +643,7 @@ export class MonarchTokenizer extends Disposable implements languages.ITokenizat
const restOfLine = line.substr(pos);
for (const rule of rules) {
if (pos === 0 || !rule.matchOnlyAtLineStart) {
matches = restOfLine.match(rule.regex);
matches = restOfLine.match(rule.resolveRegex(state));
if (matches) {
matched = matches[0];
action = rule.action;
Expand Down
49 changes: 49 additions & 0 deletions src/vs/editor/standalone/test/browser/monarch.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -346,4 +346,53 @@ suite('Monarch', () => {
disposables.dispose();
});

test('microsoft/monaco-editor#3128: allow state access within rules', () => {
const disposables = new DisposableStore();
const configurationService = new StandaloneConfigurationService();
const languageService = disposables.add(new LanguageService());

const tokenizer = disposables.add(createMonarchTokenizer(languageService, 'test', {
ignoreCase: false,
encoding: /u|u8|U|L/,
tokenizer: {
root: [
// C++ 11 Raw String
[/@encoding?R\"(?:([^ ()\\\t]*))\(/, { token: 'string.raw.begin', next: '@raw.$1' }],
],

raw: [
[/.*\)$S2\"/, 'string.raw', '@pop'],
[/.*/, 'string.raw']
],
},
}, configurationService));

const lines = [
`int main(){`,
``,
` auto s = R""""(`,
` Hello World`,
` )"""";`,
``,
` std::cout << "hello";`,
``,
`}`,
];

const actualTokens = getTokens(tokenizer, lines);
assert.deepStrictEqual(actualTokens, [
[new Token(0, 'source.test', 'test')],
[],
[new Token(0, 'source.test', 'test'), new Token(10, 'string.raw.begin.test', 'test')],
[new Token(0, 'string.raw.test', 'test')],
[new Token(0, 'string.raw.test', 'test'), new Token(6, 'source.test', 'test')],
[],
[new Token(0, 'source.test', 'test')],
[],
[new Token(0, 'source.test', 'test')],
]);

disposables.dispose();
});

});
Loading