Skip to content

Commit

Permalink
Normative: move checks for invalid character up (#38)
Browse files Browse the repository at this point in the history
  • Loading branch information
bakkot authored Jan 22, 2024
1 parent fc401c9 commit d0a1833
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 20 deletions.
27 changes: 17 additions & 10 deletions playground/polyfill-core.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -74,23 +74,18 @@ export function uint8ArrayToBase64(arr, options) {
return result;
}

function decodeChunk(chunk, alphabet, throwOnExtraBits) {
function decodeBase64Chunk(chunk, throwOnExtraBits) {
let actualChunkLength = chunk.length;
if (actualChunkLength < 4) {
chunk += actualChunkLength === 2 ? 'AA' : 'A';
}

let map = new Map((alphabet === 'base64' ? base64Characters : base64UrlCharacters).split('').map((c, i) => [c, i]));
let map = new Map(base64Characters.split('').map((c, i) => [c, i]));

let c1 = chunk[0];
let c2 = chunk[1];
let c3 = chunk[2];
let c4 = chunk[3];
[c1, c2, c3, c4].forEach(c => {
if (!map.has(c)) {
throw new SyntaxError(`unexpected character ${JSON.stringify(c)}`);
}
});

let triplet =
(map.get(c1) << 18) +
Expand Down Expand Up @@ -147,7 +142,7 @@ function fromBase64(string, alphabet, lastChunkHandling, maxLength) {
if (chunk.length === 1) {
throw new SyntaxError('malformed padding: exactly one additional character');
}
bytes.push(...decodeChunk(chunk, alphabet, false));
bytes.push(...decodeBase64Chunk(chunk, false));
} else {
assert(lastChunkHandling === 'strict');
throw new SyntaxError('missing padding');
Expand Down Expand Up @@ -178,10 +173,22 @@ function fromBase64(string, alphabet, lastChunkHandling, maxLength) {
if (index < string.length) {
throw new SyntaxError('unexpected character after padding');
}
bytes.push(...decodeChunk(chunk, alphabet, lastChunkHandling === 'strict'));
bytes.push(...decodeBase64Chunk(chunk, lastChunkHandling === 'strict'));
assert(bytes.length <= maxLength);
return { bytes, read: string.length };
}
if (alphabet === 'base64url') {
if (char === '+' || char === '/') {
throw new SyntaxError(`unexpected character ${JSON.stringify(char)}`);
} else if (char === '-') {
char = '+';
} else if (char === '_') {
char = '/';
}
}
if (!base64Characters.includes(char)) {
throw new SyntaxError(`unexpected character ${JSON.stringify(char)}`);
}
let remainingBytes = maxLength - bytes.length;
if (remainingBytes === 1 && chunk.length === 2 || remainingBytes === 2 && chunk.length === 3) {
// special case: we can fit exactly the number of bytes currently represented by chunk, so we were just checking for `=`
Expand All @@ -190,7 +197,7 @@ function fromBase64(string, alphabet, lastChunkHandling, maxLength) {

chunk += char;
if (chunk.length === 4) {
bytes.push(...decodeChunk(chunk, alphabet, false));
bytes.push(...decodeBase64Chunk(chunk, false));
chunk = '';
read = index;
assert(bytes.length <= maxLength);
Expand Down
18 changes: 8 additions & 10 deletions spec.html
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,6 @@ <h1>
<h1>
DecodeBase64Chunk (
_chunk_: a string,
_alphabet_: *"base64"* or *"base64url"*,
optional _throwOnExtraBits_: a boolean,
): either a normal completion containing a List of byte values, or a throw completion
</h1>
Expand All @@ -217,12 +216,6 @@ <h1>
1. Set _chunk_ to the string-concatenation of _chunk_ and *"A"*.
1. Else,
1. Assert: _chunkLength_ is 4.
1. If _alphabet_ is *"base64url"*, then
1. TODO fix the types here - these are code points, not code units.
1. If _chunk_ contains U+002B (PLUS SIGN) or U+002F (SOLIDUS), throw a *SyntaxError* exception.
1. Replace all occurrences of U+002D (HYPHEN-MINUS) in _chunk_ with U+002B (PLUS SIGN).
1. Replace all occurrences of U+005F (LOW LINE) in _chunk_ with U+002F (SOLIDUS).
1. If any element of _chunk_ is not an element of the standard base64 alphabet, throw a *SyntaxError* exception.
1. Let _byteSequence_ be the unique sequence of 3 bytes resulting from decoding _chunk_ as base64 (such that applying the base64 encoding specified in section 4 of <a href="https://datatracker.ietf.org/doc/html/rfc4648">RFC 4648</a> to _byteSequence_ would produce _chunk_).
1. Let _bytes_ be a List whose elements are the elements of _byteSequence_, in order.
1. If _chunkLength_ is 2, then
Expand Down Expand Up @@ -273,7 +266,7 @@ <h1>
1. Else if _lastChunkHandling_ is *"loose"*, then
1. If _chunkLength_ is 1, then
1. Throw a *SyntaxError* exception.
1. Set _bytes_ to the list-concatenation of _bytes_ and ? DecodeBase64Chunk(_chunk_, _alphabet_, *false*).
1. Set _bytes_ to the list-concatenation of _bytes_ and ! DecodeBase64Chunk(_chunk_, *false*).
1. Else,
1. Assert: _lastChunkHandling_ is *"strict"*.
1. Throw a *SyntaxError* exception.
Expand All @@ -296,15 +289,20 @@ <h1>
1. Throw a *SyntaxError* exception.
1. If _lastChunkHandling_ is *"strict"*, let _throwOnExtraBits_ be *true*.
1. Else, let _throwOnExtraBits_ be *false*.
1. Set _bytes_ to the list-concatenation of _bytes_ and ? DecodeBase64Chunk(_chunk_, _alphabet_, _throwOnExtraBits_).
1. Set _bytes_ to the list-concatenation of _bytes_ and ? DecodeBase64Chunk(_chunk_, _throwOnExtraBits_).
1. Return the Record { [[Read]]: _length_, [[Bytes]]: _bytes_ }.
1. If _alphabet_ is *"base64url"*, then
1. If _char_ is either *"+"* or *"/"*, throw a *SyntaxError* exception.
1. Else if _char_ is *"-"*, set _char_ to *"+"*.
1. Else if _char_ is *"_"*, set _char_ to *"/"*.
1. If _char_ is not an element of the standard base64 alphabet, throw a *SyntaxError* exception.
1. Let _remaining_ be _maxLength_ - the length of _bytes_.
1. If _remaining_ = 1 and _chunkLength_ = 2, or if _remaining_ = 2 and _chunkLength_ = 3, then
1. Return the Record { [[Read]]: _read_, [[Bytes]]: _bytes_ }.
1. Set _chunk_ to the string-concatenation of _chunk_ and _char_.
1. Set _chunkLength_ to the length of _chunk_.
1. If _chunkLength_ = 4, then
1. Set _bytes_ to the list-concatenation of _bytes_ and ? DecodeBase64Chunk(_chunk_, _alphabet_).
1. Set _bytes_ to the list-concatenation of _bytes_ and ! DecodeBase64Chunk(_chunk_).
1. Set _chunk_ to the empty String.
1. Set _chunkLength_ to 0.
1. Set _read_ to _index_.
Expand Down

0 comments on commit d0a1833

Please sign in to comment.