Skip to content

Commit

Permalink
Positional groups: new syntax, added index validating. The "m" flag i…
Browse files Browse the repository at this point in the history
…s not enabled by default now.
  • Loading branch information
kildom committed Feb 29, 2024
1 parent e78af3f commit 5eef61c
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 25 deletions.
16 changes: 14 additions & 2 deletions scripts/generate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,13 @@ repeat ${whitespaceWithoutNewLine};
} or {
newLine: \n;
} or {
label: ([a-zA-Z_], repeat [a-zA-Z0-9_]);
label: {
{
[a-zA-Z_], repeat [a-zA-Z0-9_];
} or {
[1-9], repeat [0-9];
}
}
":";
} or {
keyword: at-least-1 [a-zA-Z0-9\u2011\\-];
Expand Down Expand Up @@ -76,7 +82,13 @@ repeat ${whitespaceWithoutNewLine};
} or {
newLine: \n;
} or {
label: ([a-zA-Z_], repeat [a-zA-Z0-9_]);
label: {
{
[a-zA-Z_], repeat [a-zA-Z0-9_];
} or {
[1-9], repeat [0-9];
}
}
":";
} or {
keyword: at-least-1 [a-zA-Z0-9\u2011\\-];
Expand Down
52 changes: 39 additions & 13 deletions src/con-reg-exp.ts
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,9 @@ class TokenizerError extends Error {
// #region Generated Regular Expressions


const tokenRegExpBase = /[\r\t\v\f \xA0\uFEFF]*(?:(?<begin>[{(])|(?<end>[)}])|(?<separator>[,;])|(?<newLine>\n)|(?<label>[a-zA-Z_][a-zA-Z0-9_]*):|(?<keyword>[a-zA-Z0-9\u2011\\-]+)|(?<literal>(?<_literalQuote>["'])(?:\\.|.)*?\k<_literalQuote>)|<(?<identifier>.*?)>|\[(?<complement>\^)?(?<characterClass>(?:\\.|.)*?)\]|(?<prefix>`[A-Z]{3,})(?<index>[0-9]+)\}|(?<comment1>\/\*.*?\*\/)|(?<comment2>\/\/.*?)$)[\r\t\v\f \xA0\uFEFF]*/msy;
const tokenRegExpBase = /[\r\t\v\f \xA0\uFEFF]*(?:(?<begin>[{(])|(?<end>[)}])|(?<separator>[,;])|(?<newLine>\n)|(?<label>[a-zA-Z_][a-zA-Z0-9_]*|[1-9][0-9]*):|(?<keyword>[a-zA-Z0-9\u2011\\-]+)|(?<literal>(?<_literalQuote>["'])(?:\\.|.)*?\k<_literalQuote>)|<(?<identifier>.*?)>|\[(?<complement>\^)?(?<characterClass>(?:\\.|.)*?)\]|(?<prefix>`[A-Z]{3,})(?<index>[0-9]+)\}|(?<comment1>\/\*.*?\*\/)|(?<comment2>\/\/.*?)$)[\r\t\v\f \xA0\uFEFF]*/msy;

const tokenRegExpVMode = /[\r\t\v\f \xA0\uFEFF]*(?:(?<begin>[{(])|(?<end>[)}])|(?<separator>[,;])|(?<newLine>\n)|(?<label>[a-zA-Z_][a-zA-Z0-9_]*):|(?<keyword>[a-zA-Z0-9\u2011\\-]+)|(?<literal>(?<_literalQuote>["'])(?:\\.|.)*?\k<_literalQuote>)|<(?<identifier>.*?)>|(?<characterClassVMode>\[)(?<complement>\^)?|(?<prefix>`[A-Z]{3,})(?<index>[0-9]+)\}|(?<comment1>\/\*.*?\*\/)|(?<comment2>\/\/.*?)$)[\r\t\v\f \xA0\uFEFF]*/msy;
const tokenRegExpVMode = /[\r\t\v\f \xA0\uFEFF]*(?:(?<begin>[{(])|(?<end>[)}])|(?<separator>[,;])|(?<newLine>\n)|(?<label>[a-zA-Z_][a-zA-Z0-9_]*|[1-9][0-9]*):|(?<keyword>[a-zA-Z0-9\u2011\\-]+)|(?<literal>(?<_literalQuote>["'])(?:\\.|.)*?\k<_literalQuote>)|<(?<identifier>.*?)>|(?<characterClassVMode>\[)(?<complement>\^)?|(?<prefix>`[A-Z]{3,})(?<index>[0-9]+)\}|(?<comment1>\/\*.*?\*\/)|(?<comment2>\/\/.*?)$)[\r\t\v\f \xA0\uFEFF]*/msy;

const quantifierRegExp = /^(?<lazy>lazy-|non-greedy-)?(?:(?<optional>optional)|(?<repeat>repeat)|(?:repeat-)?(?:(?:at-)?(?:(?<least>least-)|(?<most>most-))(?<count>\d+)|(?<min>\d+)(?:-to-(?<max>\d+))?)(?:-times?)?)$/su;

Expand Down Expand Up @@ -431,10 +431,20 @@ function tokenize(text: string, interpolationPrefix: string, values: (string | E
// #region Parser Context


enum ContextBoundaryTypes {
NONE = 0,
LINE = 1,
TEXT = 2,
BOTH = 3,
}


class Context {

private index: number;
private interpolationStack: InterpolationBeginToken[] = [];
private lastGroupIndex = 0;
private boundaryTypes = ContextBoundaryTypes.NONE;

public constructor(
public info: ExpressionTokenized,
Expand Down Expand Up @@ -542,6 +552,16 @@ class Context {
}
} while (true);
}

public reserveGroupIndex() {
this.lastGroupIndex++;
return this.lastGroupIndex;
}

public useBoundary(type: ContextBoundaryTypes) {
this.boundaryTypes |= type;
this.info.flags.multiline = (this.boundaryTypes === ContextBoundaryTypes.LINE);
}
}


Expand Down Expand Up @@ -835,6 +855,7 @@ class LineBoundary extends InvertibleNode {
) {
obj = new LineBoundary(false);
obj.start = (token.text !== 'end-of-line');
ctx.useBoundary(ContextBoundaryTypes.LINE);
obj.flags = ctx.info.flags;
}
return obj;
Expand Down Expand Up @@ -865,7 +886,7 @@ class TextBoundary extends InvertibleNode {
) {
obj = new TextBoundary(false);
obj.start = (token.text !== 'end-of-text');
ctx.info.flags.multiline = false;
ctx.useBoundary(ContextBoundaryTypes.TEXT);
}
return obj;
}
Expand All @@ -880,28 +901,33 @@ class TextBoundary extends InvertibleNode {

class Group extends Node {

private id!: string | undefined;
private name?: string;
private index!: number;
private child!: Node;

public static create(token: Token, ctx: Context) {
let obj: Group | undefined = undefined;
if (token.type === TokenType.Keyword && token.text === 'group') {
obj = new Group();
obj.id = undefined;
obj.child = parseLeaf(ctx);
} else if (token.type === TokenType.Label) {
if (token.type === TokenType.Label) {
obj = new Group();
obj.id = token.text;
obj.index = ctx.reserveGroupIndex();
if (token.text.match(/^[0-9]+$/)) {
let providedIndex = parseInt(token.text);
if (providedIndex != obj.index) {
throw ctx.error(token, `Mismatching positional capturing group. Provided ${providedIndex}, expected ${obj.index}.`);
}
} else {
obj.name = token.text;
}
obj.child = parseLeaf(ctx);
}
return obj;
}

public generateAtom(): string {
if (!this.id) {
if (!this.name) {
return `(${this.child.generate()})`;
} else {
return `(?<${this.id}>${this.child.generate()})`;
return `(?<${this.name}>${this.child.generate()})`;
}
}
}
Expand Down Expand Up @@ -1143,7 +1169,7 @@ function parse(text: string, interpolationPrefix: string, values: (string | Expr
sourceCode: text,
tokens: [],
flags: {
multiline: true,
multiline: false,
indices: false,
global: false,
ignoreCase: false,
Expand Down
1 change: 1 addition & 0 deletions test/unit/assertions.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import cre from '../../src/con-reg-exp';

describe('Assertions', () => {
test('Input/line boundary assertion', () => {
expect(cre`any`).toStrictEqual(/./su);
expect(cre`begin-of-text, end-of-text`).toStrictEqual(/^$/su);
expect(cre`start-of-text`).toStrictEqual(/^/su);
expect(cre`begin-of-line, end-of-line`).toStrictEqual(/^$/msu);
Expand Down
23 changes: 23 additions & 0 deletions test/unit/capture.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@

import { describe, expect, test } from 'vitest'
import cre from '../../src/con-reg-exp';


describe('Capture', () => {
test('Positional', () => {
expect(cre`1: any`).toStrictEqual(/(.)/su);
expect(cre`1: any, 2: digit`).toStrictEqual(/(.)(\d)/su);
expect(cre`1: (any, 2: digit)`).toStrictEqual(/(.(\d))/su);
});
test('Positional failure', () => {
expect(() => cre`0: any`).toThrow();
expect(() => cre`2: any`).toThrow();
expect(() => cre`1: (any, 1: digit)`).toThrow();
expect(() => cre`first: digit, 1: any`).toThrow();
});
test('Mixed', () => {
expect(cre`first: any, 2: digit`).toStrictEqual(/(?<first>.)(\d)/su);
expect(cre`first: (any, 2: digit)`).toStrictEqual(/(?<first>.(\d))/su);
expect(cre`1: any, two: digit, 3: word-char`).toStrictEqual(/(.)(?<two>\d)(\w)/su);
});
});
20 changes: 10 additions & 10 deletions test/unit/separators.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@ describe('Separators', () => {

expect(cre`
"abc"
group "def"
group "ghi";
(group "jkl")
group "mno"
`).toStrictEqual(/abc(def)(ghi)(jkl)(mno)/msu)
1: "def"
2: "ghi";
(3: "jkl")
4: "mno"
`).toStrictEqual(/abc(def)(ghi)(jkl)(mno)/su)

});

Expand All @@ -40,10 +40,10 @@ describe('Separators', () => {
expect(() => cre`${inner2} "ghi"`).toThrow();
expect(cre`
${inner}
group ${inner2}
group ${inner};
(group ${inner})
group ${inner2}
`).toStrictEqual(/def(def)(def)(def)(def)/msu)
1: ${inner2}
2: ${inner};
(3: ${inner})
4: ${inner2}
`).toStrictEqual(/def(def)(def)(def)(def)/su)
});
});

0 comments on commit 5eef61c

Please sign in to comment.