Skip to content

Commit

Permalink
feat: add codepoint tests
Browse files Browse the repository at this point in the history
Testing by codepoint may make sense in some cases.
  • Loading branch information
lddubeau committed Jul 23, 2018
1 parent 3c538e9 commit 881e552
Show file tree
Hide file tree
Showing 2 changed files with 147 additions and 8 deletions.
78 changes: 78 additions & 0 deletions src/xmlchars.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,84 @@ export namespace XML_1_0 {
export const NMTOKEN = new RegExp(`^[${fragments.NAME_CHAR}]+$`, "u");
}

const TAB = 9;
const NL = 0xA;
const CR = 0xD;
const SPACE = 0x20;
const MINUS = 0x2D;
const COLON = 0x3A;

/**
* Determines whether a codepoint matches the ``CHAR`` production.
*
* @param c The code point.
*
* @returns ``true`` if the codepoint matches ``CHAR``.
*/
export function isChar(c: number): boolean {
return (c === TAB ||
c === NL ||
c === CR ||
(c >= SPACE && c <= 0xD7FF) ||
(c >= 0xE000 && c <= 0xFFFD) ||
(c >= 0x10000 && c <= 0x10FFFF));
}

/**
* Determines whether a codepoint matches the ``S`` (space) production.
*
* @param c The code point.
*
* @returns ``true`` if the codepoint matches ``S``.
*/
export function isS(c: number): boolean {
return c === SPACE || c === NL || c === CR || c === TAB;
}

/**
* Determines whether a codepoint matches the ``NAME_START_CHAR``
* production.
*
* @param c The code point.
*
* @returns ``true`` if the codepoint matches ``NAME_START_CHAR``.
*/
// tslint:disable-next-line:cyclomatic-complexity
export function isNameStartChar(c: number): boolean {
return (c === COLON ||
(c >= 0x41 && c <= 0x5A) ||
c === 0x5F ||
(c >= 0x61 && c <= 0x7A) ||
(c >= 0xC0 && c <= 0xD6) ||
(c >= 0xD8 && c <= 0xF6) ||
(c >= 0x00F8 && c <= 0x02FF) ||
(c >= 0x0370 && c <= 0x037D) ||
(c >= 0x037F && c <= 0x1FFF) ||
(c >= 0x200C && c <= 0x200D) ||
(c >= 0x2070 && c <= 0x218F) ||
(c >= 0x2C00 && c <= 0x2FEF) ||
(c >= 0x3001 && c <= 0xD7FF) ||
(c >= 0xF900 && c <= 0xFDCF) ||
(c >= 0xFDF0 && c <= 0xFFFD) ||
(c >= 0x10000 && c <= 0xEFFFF));
}

/**
* Determines whether a codepoint matches the ``NAME_CHAR`` production.
*
* @param c The code point.
*
* @returns ``true`` if the codepoint matches ``NAME_CHAR``.
*/
export function isNameChar(c: number): boolean {
return isNameStartChar(c) ||
(c === MINUS ||
c === 0x2E ||
(c >= 0x30 && c <= 0x39) ||
c === 0x00B7 ||
(c >= 0x0300 && c <= 0x036F) ||
(c >= 0x203F && c <= 0x2040));
}
}

/**
Expand Down
77 changes: 69 additions & 8 deletions test/xmlchars.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,11 @@ const leadingDigit: Fixture = {
data: "1bar",
};

const bom: Fixture = {
name: "bom",
data: "\uFFFE",
};

// tslint:disable-next-line:mocha-no-side-effect-code
const ALL_FIXTURES = new Set([
x,
Expand All @@ -99,6 +104,7 @@ const ALL_FIXTURES = new Set([
leadingDot,
leadingDash,
leadingDigit,
bom,
]);

interface Case {
Expand Down Expand Up @@ -132,8 +138,39 @@ function makeTests(re: RegExp, testCase: Case): void {
});
}

function makeCodePointTestTests(codePointTest: (c: number) => boolean,
testCase: Case): void {
for (const fixture of testCase.matching) {
const { data, name } = fixture;
// tslint:disable-next-line:no-non-null-assertion
const code = data.codePointAt(0)!;
if (data.length > String.fromCodePoint(code).length) {
// We skip those fixtures that contain more than one character.
continue;
}
it(`matches ${name}`, () => {
expect(codePointTest(code)).to.be.true;
});
}

for (const fixture of ALL_FIXTURES) {
if (!testCase.matching.includes(fixture)) {
const { data, name } = fixture;
// tslint:disable-next-line:no-non-null-assertion
const code = data.codePointAt(0)!;
if (data.length > String.fromCodePoint(code).length) {
// We skip those fixtures that contain more than one character.
continue;
}
it(`does not match ${name}`, () => {
expect(codePointTest(code)).to.be.false;
});
}
}
}

describe("XML_1_0", () => {
describe("ED5.regexes", () => {
describe("ED5", () => {
// tslint:disable-next-line:mocha-no-side-effect-code
const cases: Record<ED5_REGEX_NAMES, Case> = {
CHAR: {
Expand All @@ -156,15 +193,38 @@ describe("XML_1_0", () => {
matching: [x, abc, ideographic, nameWithColon, leadingDot, leadingDash,
leadingDigit, combining, extender, digit, poo],
},
// tslint:disable-next-line:no-any
} as any;
describe(".regexes", () => {
// tslint:disable-next-line:mocha-no-side-effect-code
for (const name of (Object.keys(cases) as (keyof typeof cases)[])) {
describe(name, () => {
// tslint:disable-next-line:mocha-no-side-effect-code
makeTests(XML_1_0.ED5.regexes[name], cases[name]);
});
}
});

// tslint:disable-next-line:mocha-no-side-effect-code
for (const name of (Object.keys(cases) as (keyof typeof cases)[])) {
describe(name, () => {
// tslint:disable-next-line:mocha-no-side-effect-code
makeTests(XML_1_0.ED5.regexes[name], cases[name]);
});
}
describe(".isChar", () => {
// tslint:disable-next-line:mocha-no-side-effect-code
makeCodePointTestTests(XML_1_0.ED5.isChar, cases.CHAR);
});

describe(".isS", () => {
// tslint:disable-next-line:mocha-no-side-effect-code
makeCodePointTestTests(XML_1_0.ED5.isS, cases.S);
});

describe(".isNameStartChar", () => {
// tslint:disable-next-line:mocha-no-side-effect-code
makeCodePointTestTests(XML_1_0.ED5.isNameStartChar,
cases.NAME_START_CHAR);
});

describe(".isNameChar", () => {
// tslint:disable-next-line:mocha-no-side-effect-code
makeCodePointTestTests(XML_1_0.ED5.isNameChar, cases.NAME_CHAR);
});
});

describe("ED4.regexes", () => {
Expand Down Expand Up @@ -205,6 +265,7 @@ describe("XML_1_0", () => {
matching: [x, abc, ideographic, nameWithColon, leadingDot, leadingDash,
leadingDigit, combining, extender, digit],
},
// tslint:disable-next-line:no-any
} as any;

// tslint:disable-next-line:mocha-no-side-effect-code
Expand Down

0 comments on commit 881e552

Please sign in to comment.