Skip to content

Commit cfe87ba

Browse files
chunninghamldthomasw4ll3
authored
ABNF parser rewrite (#213)
* A complete re-write of the ABNF parser. Removed dependencies on uri-js and valid-url. Added many new unit tests for validating both the message URI and the resource URIs. Added a script to re-generate the ABNF grammar object whenever the ABNF grammar needs to be changed. * Updated package/siwe-parser. package.json: 1)the dependency on apg-js is the newly published version 4.4.0, 2)the script "apg" will generate a typescript grammar object from the ABNF grammar siwe-abnf.txt. Added an optional "doTrace" parameter to the constructor of ParsedMessage. The default is "false". If set to "true" apg-js will trace the parse tree of parsed message and write it to output/siwe-parser-trace.html. Consequently the output directory has been added to ".gitignore" and ".npmignore". * Committed four files missed in the last commit. * Modified the ABNF grammar and parser to match the spec for (present/empty/missing) statement/resources. * Added new function "isUri()" to "packages/siwe-parser/lib/abnf.ts". Added "packages/siwe-parser/lib/t-isUri.test.ts" for unit testing of the "isUri" function. Added tests in the object block of the “SiweMessage” constructor for testing the EIP-55 address, the URI and the resources URIs. Added “packages/siwe/lib/objects.test.ts” for unit testing of the object block. * Modifed the ParsedMessage callbacks to validate the semantics as well as the syntax of date times. Removed the SiweMessage private function validateMessage(). Replaced it in the message object block of the constructor by parsing the stringified message object. In this way both the message and the message object get exactly the same validation. Fixed a bug for the case of an empty statement in the function toMessage(). * Updated validation of date-times in ParsedMessage. Fixed message object validation in SiweMessage. * Removed debugging (doTrace) code from t-chars.test.ts". * Fixed siwe/lib/utils.ts bug. Converted all unit tests to JSON file methodology. * Removed a development-only script from siwe package.json. * remove commented code * fix invalid opts check * fix tests, SiweErrorType has no values, is not object * Fix test * Fix typos * Lint files * Typos * Upgrade to ES2018 * Remove deprecated method * Bump version * Bump siwe-parser version --------- Co-authored-by: Lowell D. Thomas <ldt@sabnf.com> Co-authored-by: Gregorio <greg.magalhaes@gmail.com>
1 parent 940a66a commit cfe87ba

27 files changed

+3434
-1210
lines changed

package-lock.json

+483-621
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

packages/siwe-parser/lib/abnf.ts

+145-362
Large diffs are not rendered by default.

packages/siwe-parser/lib/callbacks.ts

+529
Large diffs are not rendered by default.
+27-24
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,37 @@
11
import { ParsedMessage } from "./abnf";
22
import * as fs from "fs";
33

4-
const parsingPositive: object = JSON.parse(fs.readFileSync('../../test/parsing_positive.json', 'utf8'));
5-
const parsingNegative: object = JSON.parse(fs.readFileSync('../../test/parsing_negative.json', 'utf8'));
4+
const parsingPositive: object = JSON.parse(
5+
fs.readFileSync("../../test/parsing_positive.json", "utf8")
6+
);
7+
const parsingNegative: object = JSON.parse(
8+
fs.readFileSync("../../test/parsing_negative.json", "utf8")
9+
);
610

711
//
812
describe("Successfully parses with ABNF Client", () => {
9-
test.concurrent.each(Object.entries(parsingPositive))(
10-
"Parses message successfully: %s",
11-
(test_name, test) => {
12-
const parsedMessage = new ParsedMessage(test.message);
13-
for (const [field, value] of Object.entries(test.fields)) {
14-
if (value === null) {
15-
expect(parsedMessage[field]).toBeUndefined();
16-
}
17-
else if (typeof value === "object") {
18-
expect(parsedMessage[field]).toStrictEqual(value);
19-
} else {
20-
expect(parsedMessage[field]).toBe(value);
21-
}
22-
}
23-
}
24-
);
13+
test.concurrent.each(Object.entries(parsingPositive))(
14+
"Parses message successfully: %s",
15+
(test_name, test) => {
16+
const parsedMessage = new ParsedMessage(test.message);
17+
for (const [field, value] of Object.entries(test.fields)) {
18+
if (value === null) {
19+
expect(parsedMessage[field]).toBeUndefined();
20+
} else if (typeof value === "object") {
21+
expect(parsedMessage[field]).toStrictEqual(value);
22+
} else {
23+
expect(parsedMessage[field]).toBe(value);
24+
}
25+
}
26+
}
27+
);
2528
});
2629

2730
describe("Successfully fails with ABNF Client", () => {
28-
test.concurrent.each(Object.entries(parsingNegative))(
29-
"Fails to parse message: %s",
30-
(test_name, test) => {
31-
expect(() => new ParsedMessage(test)).toThrow();
32-
}
33-
);
31+
test.concurrent.each(Object.entries(parsingNegative))(
32+
"Fails to parse message: %s",
33+
(test_name, test) => {
34+
expect(() => new ParsedMessage(test)).toThrow();
35+
}
36+
);
3437
});

packages/siwe-parser/lib/parsers.ts

+4-6
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
import { ParsedMessage as ABNFParsedMessage } from "./abnf";
2-
export * from './utils';
3-
export {
4-
ABNFParsedMessage as ParsedMessage
5-
};
6-
7-
2+
import { isUri as ABNFisUri } from "./abnf";
3+
export * from "./utils";
4+
export { ABNFParsedMessage as ParsedMessage };
5+
export { ABNFisUri as isUri };
+212
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
; LDT 05/06/2024
2+
; modified in several significant ways
3+
; 1) Literal strings are replaced with numbers and ranges (%d32 & %d32-126, etc.) when possible.
4+
; TRB and especially TRG operators are much more efficient than TLS operators.
5+
; 2) Two rules, authority and URI, are used multiple times in different contexts. These rules will be reproduced and renamed
6+
; in order to a) recognize the context and b) remove unneccary callback functions for certain contexts.
7+
; This will simiplify recognizing contexts AND remove unneccesary callbacks
8+
; 2.a) domain is defined as authority-d which is identical to authority except that there will be no
9+
; callback functions defined on authority-d or any of its *-d components.
10+
; 2.b) The resource URI is defined as URI-r and its components defined as *-r.
11+
; In this way, callback functions can be defined on URI and is components while
12+
; leaving URI-r to be parsed identically with no unnecessary callback functions to slow it down.
13+
; 3) IPv6address does not work because of APG's "first-success disambiguation" and "greedy" repetitions.
14+
; IPv6address redefined and validations moved to callback functions (semantic vs syntactic validation)
15+
; Redefinition requires negative look-ahead operators, https://en.wikipedia.org/wiki/Syntactic_predicate
16+
; That is SABNF instead of simple ABNF.
17+
; 4) IPv4address fails because of "first-success disambiguation".
18+
; This could be fixed with rearrangement of the alternative terms. However, it would still not
19+
; accept zero-padded (leading zeros) decimal octets.
20+
; Therefore, IPv4address is also done with callback functions and semantic validation.
21+
; 5) The negative look-ahead operator is also needed in the definition of host to
22+
; prevent failure with a reg-name that begins with an IPv4 address.
23+
; 6) NOTE: host = 1.1.1.256 is a valid host name even though it is an invalid IPv4address.
24+
; The IPv4address alternative fails but the reg-name alternative succeeds.
25+
; 7) The Ethereum spec (https://eips.ethereum.org/EIPS/eip-4361) message format ABNF
26+
; allows for empty statements. Because of the "first success disambiguation" of APG
27+
; the an explicit "empty-statement" rule is required to match the spec's intent.
28+
29+
30+
sign-in-with-ethereum =
31+
oscheme domain %s" wants you to sign in with your Ethereum account:" LF
32+
address LF
33+
((LF statement LF LF) / empty-statement / (LF LF))
34+
%s"URI: " URI LF
35+
%s"Version: " version LF
36+
%s"Chain ID: " chain-id LF
37+
%s"Nonce: " nonce LF
38+
%s"Issued At: " issued-at
39+
[ LF ex-title expiration-time ]
40+
[ LF nb-title not-before ]
41+
[ LF ri-title request-id ]
42+
[ LF re-title resources ]
43+
ex-title = %s"Expiration Time: "
44+
nb-title = %s"Not Before: "
45+
ri-title = %s"Request ID: "
46+
re-title = %s"Resources:"
47+
oscheme = [ ALPHA *( ALPHA / DIGIT / %d43 / %d45-46 ) "://" ]
48+
domain = authority-d
49+
address = "0x" 40*40HEXDIG
50+
; Must also conform to captilization
51+
; checksum encoding specified in EIP-55
52+
; where applicable (EOAs).
53+
54+
statement = 1*( %d97-122 / %d65-90 / %d48-57 / %d32-33 / %d35-36 / %d38-59 / %d61 / %d63-64 / %d91 / %d93 / %d95 / %d126)
55+
; The purpose is to exclude LF (line breaks).
56+
; LDT 10/04/2023: Do you mean %d32-126? All printing characters
57+
empty-statement = LF LF LF
58+
version = "1"
59+
nonce = 8*( ALPHA / DIGIT )
60+
issued-at = date-time
61+
expiration-time = date-time
62+
not-before = date-time
63+
request-id = *pchar
64+
chain-id = 1*DIGIT
65+
; See EIP-155 for valid CHAIN_IDs.
66+
resources = *( LF resource )
67+
resource = "- " URI-r
68+
69+
; ------------------------------------------------------------------------------
70+
; RFC 3986
71+
72+
URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
73+
hier-part = "//" authority path-abempty
74+
/ path-absolute
75+
/ path-rootless
76+
/ path-empty
77+
scheme = ALPHA *( ALPHA / DIGIT / %d43 / %d45-46 )
78+
authority = [ userinfo-at ] host [ ":" port ]
79+
path-abempty = *( "/" segment )
80+
path-absolute = "/" [ segment-nz *( "/" segment ) ]
81+
path-rootless = segment-nz *( "/" segment )
82+
path-empty = ""
83+
userinfo-at = userinfo %d64
84+
; userinfo redefined to include the "@" so that it will fail without it
85+
; otherwise userinfo can match host and then the parser will backtrack
86+
; incorrectly keeping the captured userinfo phrase
87+
userinfo = *(%d97-122 / %d65-90 / %d48-57 / pct-encoded / %d33 / %d36 / %d38-46 / %d58-59 / %d61 / %d95 / %d126)
88+
host = IP-literal / (IPv4address !reg-name-char) / reg-name
89+
; negative look-ahead required to prevent IPv4address from being recognized as first part of reg-name
90+
; same fix as https://github.com/garycourt/uri-js/issues/4
91+
IP-literal = "[" ( IPv6address / IPvFuture ) "]"
92+
IPvFuture = "v" 1*HEXDIG "." 1*( %d97-122 / %d65-90 / %d48-57 / %d33 / %d36 /%d38-46 / %d58-59 /%d61 /%d95 / %d126 )
93+
IPv6address = nodcolon / dcolon
94+
nodcolon = (h16n *h16cn) [%d58 IPv4address]
95+
dcolon = [h16 *h16c] %d58.58 (((h16n *h16cn) [%d58 IPv4address]) / [IPv4address])
96+
h16 = 1*4HEXDIG
97+
h16c = %d58 1*4HEXDIG
98+
h16n = 1*4HEXDIG !%d46
99+
h16cn = %d58 1*4HEXDIG !%d46
100+
IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
101+
; Here we will will use callback functions to evaluate and validate the (possibly zero-padded) dec-octet.
102+
dec-octet = *3dec-digit
103+
dec-digit = %d48-57
104+
reg-name = *reg-name-char
105+
reg-name-char = %d97-122 / %d65-90 / %d48-57 / pct-encoded / %d33 / %d36 / %d38-46 / %d59 / %d61 /%d95 / %d126
106+
port = *DIGIT
107+
query = *(pchar / %d47 / %d63)
108+
fragment = *(pchar / %d47 / %d63)
109+
110+
; URI-r is a redefiniton of URI but without the callback functions attached to it
111+
; it reuses athority-d from domain
112+
URI-r = scheme-r ":" hier-part-r [ "?" query-r ] [ "#" fragment-r ]
113+
hier-part-r = "//" authority-d path-abempty-r
114+
/ path-absolute-r
115+
/ path-rootless-r
116+
/ path-empty-r
117+
scheme-r = ALPHA *( ALPHA / DIGIT / %d43 / %d45-46 )
118+
query-r = *(pchar / %d47 / %d63)
119+
fragment-r = *(pchar / %d47 / %d63)
120+
121+
; authority-d is a redefinition of authority for capturing the domian phrase
122+
; but without callback functions
123+
; it is reused for URI- for the same reason
124+
authority-d = [ userinfo-d %d64 ] host-d [ ":" port-d ]
125+
userinfo-d = *(%d97-122 / %d65-90 / %d48-57 / pct-encoded / %d33 / %d36 / %d38-46 / %d58-59 / %d61 / %d95 / %d126)
126+
host-d = IP-literal / (IPv4address !reg-name-char) / reg-name
127+
port-d = *DIGIT
128+
129+
; for use with URI-r
130+
path-abempty-r = *( "/" segment )
131+
path-absolute-r = "/" [ segment-nz *( "/" segment ) ]
132+
path-rootless-r = segment-nz *( "/" segment )
133+
path-empty-r = ""
134+
segment = *pchar
135+
segment-nz = 1*pchar
136+
pchar = (%d97-122 / %d65-90 / %d48-57 / pct-encoded / %d33 / %d36 / %d38-46 /%d58-59 / %d61 / %d64 / %d95 / %d126)
137+
pct-encoded = %d37 HEXDIG HEXDIG
138+
139+
; no longer needed - expanded for all usage for fewer branches in the parse there
140+
; and more efficient use of the TBS & TRG operators in place of TLS and rule names
141+
; does not work with APG probably because of "first-success disambiguation" and greedy repetitions.
142+
; will replace with semantic checking of valid number of h16s
143+
;IPv6address = 6( h16 ":" ) ls32
144+
; / "::" 5( h16 ":" ) ls32
145+
; / [ h16 ] "::" 4( h16 ":" ) ls32
146+
; / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
147+
; / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
148+
; / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
149+
; / [ *4( h16 ":" ) h16 ] "::" ls32
150+
; / [ *5( h16 ":" ) h16 ] "::" h16
151+
; / [ *6( h16 ":" ) h16 ] "::"
152+
;ls32 = ( h16 ":" h16 ) / IPv4address
153+
; dec-octet does not work because of "first-success disambiguation".
154+
; Must have the longest (3-digit) numbers first.
155+
; Even so, this form does not accept leading zeros.
156+
; There does not seem to be a clear standard for this (https://en.wikipedia.org/wiki/Dot-decimal_notation)
157+
; however and early RFC 790 did show leading-zero padding of the three digits.
158+
;dec-octet = DIGIT ; 0-9
159+
; / %x31-39 DIGIT ; 10-99
160+
; / "1" 2DIGIT ; 100-199
161+
; / "2" %x30-34 DIGIT ; 200-249
162+
; / "25" %x30-35 ; 250-255
163+
;statement = 1*( reserved / unreserved / " " )
164+
;scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
165+
;authority = [ userinfo "@" ] host [ ":" port ]
166+
;userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
167+
;query = *( pchar / "/" / "?" )
168+
;fragment = *( pchar / "/" / "?" )
169+
;IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
170+
;reg-name = *( unreserved / pct-encoded / sub-delims )
171+
;pct-encoded = "%" HEXDIG HEXDIG
172+
;pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
173+
;path-empty = 0pchar; deprecated - empty literal string, "", is more efficient
174+
;unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
175+
;reserved = gen-delims / sub-delims
176+
;gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
177+
;sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
178+
; / "*" / "+" / "," / ";" / "="
179+
;HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
180+
181+
; ------------------------------------------------------------------------------
182+
; RFC 3339
183+
184+
date-fullyear = 4DIGIT
185+
date-month = 2DIGIT ; 01-12
186+
date-mday = 2DIGIT ; 01-28, 01-29, 01-30, 01-31 based on
187+
; month/year
188+
time-hour = 2DIGIT ; 00-23
189+
time-minute = 2DIGIT ; 00-59
190+
time-second = 2DIGIT ; 00-58, 00-59, 00-60 based on leap second
191+
; rules
192+
time-secfrac = "." 1*DIGIT
193+
time-numoffset = ("+" / "-") time-hour ":" time-minute
194+
time-offset = "Z" / time-numoffset
195+
196+
partial-time = time-hour ":" time-minute ":" time-second
197+
[time-secfrac]
198+
full-date = date-fullyear "-" date-month "-" date-mday
199+
full-time = partial-time time-offset
200+
201+
date-time = full-date "T" full-time
202+
203+
; ------------------------------------------------------------------------------
204+
; RFC 5234
205+
206+
ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
207+
LF = %x0A
208+
; linefeed
209+
DIGIT = %x30-39
210+
; 0-9
211+
HEXDIG = %d48-57 / %d65-70 / %d97-102
212+

0 commit comments

Comments
 (0)