Skip to content

Commit

Permalink
feat: add initial support for XML 1.1. edition 2
Browse files Browse the repository at this point in the history
  • Loading branch information
lddubeau committed Jun 23, 2019
1 parent b9f8285 commit baaa734
Show file tree
Hide file tree
Showing 3 changed files with 184 additions and 0 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ last updated, we had:

* `xmlchars/xml/1.0/ed5` which covers XML 1.0 edition 5.
* `xmlchars/xml/1.0/ed4` which covers XML 1.0 edition 4.
* `xmlchars/xml/1.1/ed2` which covers XML 1.0 edition 2.
* `xmlchars/xmlns/1.0/ed3` which covers XML Namespaces 1.0 edition 3.

## Features
Expand Down
116 changes: 116 additions & 0 deletions src/xml/1.1/ed2.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
/**
* Character classes and associated utilities for the 2nd edition of XML 1.1.
*
* @author Louis-Dominique Dubeau
* @license MIT
* @copyright Louis-Dominique Dubeau
*/

//
// Fragments.
//
export const CHAR = "\u0001-\uD7FF\uE000-\uFFFD\u{10000}-\u{10FFFF}";

export const S = " \t\r\n";

// tslint:disable-next-line:max-line-length
export const NAME_START_CHAR = ":A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u{10000}-\u{EFFFF}";

export const NAME_CHAR =
`-${NAME_START_CHAR}.0-9\u00B7\u0300-\u036F\u203F-\u2040`;

//
// Regular expressions.
//

export const CHAR_RE = new RegExp(`^[${CHAR}]$`, "u");

export const S_RE = new RegExp(`^[${S}]+$`, "u");

export const NAME_START_CHAR_RE = new RegExp(`^[${NAME_START_CHAR}]$`, "u");

export const NAME_CHAR_RE = new RegExp(`^[${NAME_CHAR}]$`, "u");

export const NAME_RE = new RegExp(`^[${NAME_START_CHAR}][${NAME_CHAR}]*$`, "u");

export const NMTOKEN_RE = new RegExp(`^[${NAME_CHAR}]+$`, "u");

const TAB = 9;
const NL = 0xA;
const CR = 0xD;
const SPACE = 0x20;

//
// Lists.
//

/** All characters in the ``S`` production. */
export const S_LIST = [SPACE, NL, CR, TAB];

/**
* Determines whether a codepoint matches the ``CHAR`` production.
*
* @param c The code point.
*
* @returns ``true`` if the codepoint matches ``CHAR``.
*/
export function isChar(c: number): boolean {
return (c >= 0x0001 && c <= 0xD7FF) ||
(c >= 0xE000 && c <= 0xFFFD) ||
(c >= 0x10000 && c <= 0x10FFFF);
}

/**
* Determines whether a codepoint matches the ``S`` (space) production.
*
* @param c The code point.
*
* @returns ``true`` if the codepoint matches ``S``.
*/
export function isS(c: number): boolean {
return S_LIST.includes(c);
}

const NAME_START_CHAR_LIST = [0x3A, 0x5F, 0x200C, 0x200D];

/**
* Determines whether a codepoint matches the ``NAME_START_CHAR`` production.
*
* @param c The code point.
*
* @returns ``true`` if the codepoint matches ``NAME_START_CHAR``.
*/
// tslint:disable-next-line:cyclomatic-complexity
export function isNameStartChar(c: number): boolean {
return ((c >= 0x41 && c <= 0x5A) ||
(c >= 0x61 && c <= 0x7A) ||
NAME_START_CHAR_LIST.includes(c) ||
(c >= 0xC0 && c <= 0xD6) ||
(c >= 0xD8 && c <= 0xF6) ||
(c >= 0x00F8 && c <= 0x02FF) ||
(c >= 0x0370 && c <= 0x037D) ||
(c >= 0x037F && c <= 0x1FFF) ||
(c >= 0x2070 && c <= 0x218F) ||
(c >= 0x2C00 && c <= 0x2FEF) ||
(c >= 0x3001 && c <= 0xD7FF) ||
(c >= 0xF900 && c <= 0xFDCF) ||
(c >= 0xFDF0 && c <= 0xFFFD) ||
(c >= 0x10000 && c <= 0xEFFFF));
}

const NAME_CHAR_LIST = [0x2D, 0x2E, 0xB7];

/**
* Determines whether a codepoint matches the ``NAME_CHAR`` production.
*
* @param c The code point.
*
* @returns ``true`` if the codepoint matches ``NAME_CHAR``.
*/
export function isNameChar(c: number): boolean {
return isNameStartChar(c) ||
(c >= 0x30 && c <= 0x39) ||
NAME_CHAR_LIST.includes(c) ||
(c >= 0x0300 && c <= 0x036F) ||
(c >= 0x203F && c <= 0x2040);
}
67 changes: 67 additions & 0 deletions test/xmlchars.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import { expect } from "chai";
import * as xml_1_0_ed4 from "../src/xml/1.0/ed4";
import * as xml_1_0_ed5 from "../src/xml/1.0/ed5";
import * as xml_1_1_ed2 from "../src/xml/1.1/ed2";
import * as xmlns_1_0_ed3 from "../src/xmlns/1.0/ed3";

interface Fixture {
Expand Down Expand Up @@ -94,6 +95,11 @@ const bom: Fixture = {
data: "\uFFFE",
};

const one: Fixture = {
name: "\\u0001",
data: "\u0001",
};

// tslint:disable-next-line:mocha-no-side-effect-code
const ALL_FIXTURES = new Set([
x,
Expand All @@ -113,6 +119,7 @@ const ALL_FIXTURES = new Set([
leadingDash,
leadingDigit,
bom,
one,
]);

interface Case {
Expand Down Expand Up @@ -287,6 +294,66 @@ describe("xml/1.0", () => {
});
});

describe("xml/1.1", () => {
describe("ed2", () => {
// tslint:disable-next-line:mocha-no-side-effect-code
const cases: Record<FilterPropertyName<typeof xml_1_1_ed2, RegExp>,
Case> = {
CHAR_RE: {
matching: [one, x, poo, colon, space, tab, newline, cr, ideographic,
combining, digit, extender],
},
S_RE: {
matching: [space, tab, newline, cr],
},
NAME_START_CHAR_RE: {
matching: [x, ideographic, poo, colon],
},
NAME_CHAR_RE: {
matching: [x, ideographic, poo, colon, combining, extender, digit],
},
NAME_RE: {
matching: [x, abc, ideographic, colon, nameWithColon, poo],
},
NMTOKEN_RE: {
matching: [x, abc, ideographic, colon, nameWithColon, leadingDot,
leadingDash, leadingDigit, combining, extender, digit, poo],
},
};

describe("regexes", () => {
// tslint:disable-next-line:mocha-no-side-effect-code
for (const name of (Object.keys(cases) as (keyof typeof cases)[])) {
describe(name, () => {
// tslint:disable-next-line:mocha-no-side-effect-code
makeTests(xml_1_1_ed2[name], cases[name]);
});
}
});

describe(".isChar", () => {
// tslint:disable-next-line:mocha-no-side-effect-code
makeCodePointTestTests(xml_1_1_ed2.isChar, cases.CHAR_RE);
});

describe(".isS", () => {
// tslint:disable-next-line:mocha-no-side-effect-code
makeCodePointTestTests(xml_1_1_ed2.isS, cases.S_RE);
});

describe(".isNameStartChar", () => {
// tslint:disable-next-line:mocha-no-side-effect-code
makeCodePointTestTests(xml_1_1_ed2.isNameStartChar,
cases.NAME_START_CHAR_RE);
});

describe(".isNameChar", () => {
// tslint:disable-next-line:mocha-no-side-effect-code
makeCodePointTestTests(xml_1_1_ed2.isNameChar, cases.NAME_CHAR_RE);
});
});
});

describe("xmlns/1.0", () => {
describe("ed3", () => {
// tslint:disable-next-line:mocha-no-side-effect-code
Expand Down

0 comments on commit baaa734

Please sign in to comment.