From 142601cec2aa6dfe2308a3577ce22446ce551cfb Mon Sep 17 00:00:00 2001 From: Kevin Gibbons Date: Thu, 9 May 2024 23:04:01 -0700 Subject: [PATCH] Normative: add option to omit padding --- README.md | 6 ++---- playground/index-raw.html | 6 +++++- playground/polyfill-core.mjs | 5 +++-- spec.html | 5 +++-- test-polyfill.mjs | 8 ++++++++ 5 files changed, 21 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index e6484ec..7b5f78b 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,8 @@ Additional options are supplied in an options bag argument: - `lastChunkHandling`: Recall that base64 decoding operates on chunks of 4 characters at a time, but the input may have some characters which don't fit evenly into such a chunk of 4 characters. This option determines how the final chunk of characters should be handled. The three options are `"loose"` (the default), which treats the chunk as if it had any necessary `=` padding (but throws if this is not possible, i.e. there is exactly one extra character); `"strict"`, which enforces that the chunk has exactly 4 characters (counting `=` padding) and that [overflow bits](https://datatracker.ietf.org/doc/html/rfc4648#section-3.5) are 0; and `"stop-before-partial"`, which stops decoding before the final chunk unless the final chunk has exactly 4 characters. +- `omitPadding`: When encoding, whether to include `=` padding. Defaults to `false`, i.e., padding is included. + The hex methods do not take any options. ## Writing to an existing Uint8Array @@ -89,10 +91,6 @@ For base64, you can specify either base64 or base64url for both the encoder and For hex, both lowercase and uppercase characters (including mixed within the same string) will decode successfully. Output is always lowercase. -### How is `=` padding handled? - -Padding is always generated. The base64 decoder allows specifying how to handle inputs without it with the `lastChunkHandling` option. - ### How are the extra padding bits handled? If the length of your input data isn't exactly a multiple of 3 bytes, then encoding it will use either 2 or 3 base64 characters to encode the final 1 or 2 bytes. Since each base64 character is 6 bits, this means you'll be using either 12 or 18 bits to represent 8 or 16 bits, which means you have an extra 4 or 2 bits which don't encode anything. diff --git a/playground/index-raw.html b/playground/index-raw.html index 36cc439..1f03f82 100644 --- a/playground/index-raw.html +++ b/playground/index-raw.html @@ -86,7 +86,8 @@

Basic usage

Options

The base64 methods take an optional options bag which allows specifying the alphabet as either "base64" (the default) or "base64url" (the URL-safe variant).

-

The base64 decoder also allows specifying the behavior for the final chunk with lastChunkHandling. Recall that base64 decoding operates on chunks of 4 characters at a time, but the input may have some characters which don't fit evenly into such a chunk of 4 characters. This option determines how the final chunk of characters should be handled. The three options are "loose" (the default), which treats the chunk as if it had any necessary = padding (but throws if this is not possible, i.e. there is exactly one extra character); "strict", which enforces that the chunk has exactly 4 characters (counting = padding) and that overflow bits are 0; and "stop-before-partial", which stops decoding before the final chunk unless the final chunk has exactly 4 characters. +

The base64 decoder also allows specifying the behavior for the final chunk with lastChunkHandling. Recall that base64 decoding operates on chunks of 4 characters at a time, but the input may have some characters which don't fit evenly into such a chunk of 4 characters. This option determines how the final chunk of characters should be handled. The three options are "loose" (the default), which treats the chunk as if it had any necessary = padding (but throws if this is not possible, i.e. there is exactly one extra character); "strict", which enforces that the chunk has exactly 4 characters (counting = padding) and that overflow bits are 0; and "stop-before-partial", which stops decoding before the final chunk unless the final chunk has exactly 4 characters.

+

The base64 encoder allows omitting padding by specifying omitPadding: true. The default is to include padding.

The hex methods do not have any options.


@@ -109,6 +110,9 @@ 

Options

} catch { console.log('with lastChunkHandling: "strict", overflow bits are rejected'); } + +console.log((new Uint8Array([72])).toBase64()); // 'SA==' +console.log((new Uint8Array([72])).toBase64({ omitPadding: true })); // 'SA'

Writing to an existing Uint8Array

diff --git a/playground/polyfill-core.mjs b/playground/polyfill-core.mjs index 1e2fbb0..bff97c1 100644 --- a/playground/polyfill-core.mjs +++ b/playground/polyfill-core.mjs @@ -40,6 +40,7 @@ export function uint8ArrayToBase64(arr, options) { if (alphabet !== 'base64' && alphabet !== 'base64url') { throw new TypeError('expected alphabet to be either "base64" or "base64url"'); } + let omitPadding = !!opts.omitPadding; if ('detached' in arr.buffer && arr.buffer.detached) { throw new TypeError('toBase64 called on array backed by detached buffer'); @@ -63,13 +64,13 @@ export function uint8ArrayToBase64(arr, options) { lookup[(triplet >> 18) & 63] + lookup[(triplet >> 12) & 63] + lookup[(triplet >> 6) & 63] + - '='; + (omitPadding ? '' : '='); } else if (i + 1 === arr.length) { let triplet = arr[i] << 16; result += lookup[(triplet >> 18) & 63] + lookup[(triplet >> 12) & 63] + - '=='; + (omitPadding ? '' : '=='); } return result; } diff --git a/spec.html b/spec.html index 0044dd3..bd6746b 100644 --- a/spec.html +++ b/spec.html @@ -23,12 +23,13 @@

Uint8Array.prototype.toBase64 ( [ _options_ ] )

1. Let _alphabet_ be ? Get(_opts_, *"alphabet"*). 1. If _alphabet_ is *undefined*, set _alphabet_ to *"base64"*. 1. If _alphabet_ is neither *"base64"* nor *"base64url"*, throw a *TypeError* exception. + 1. Let _omitPadding_ be ToBoolean(? Get(_opts_, *"omitPadding"*)). 1. Let _toEncode_ be ? GetUint8ArrayBytes(_O_). 1. If _alphabet_ is *"base64"*, then - 1. Let _outAscii_ be the sequence of code points which results from encoding _toEncode_ according to the base64 encoding specified in section 4 of RFC 4648. Padding is included. + 1. Let _outAscii_ be the sequence of code points which results from encoding _toEncode_ according to the base64 encoding specified in section 4 of RFC 4648. Padding is included if and only if _omitPadding_ is *false*. 1. Else, 1. Assert: _alphabet_ is *"base64url"*. - 1. Let _outAscii_ be the sequence of code points which results from encoding _toEncode_ according to the base64url encoding specified in section 5 of RFC 4648. Padding is included. + 1. Let _outAscii_ be the sequence of code points which results from encoding _toEncode_ according to the base64url encoding specified in section 5 of RFC 4648. Padding is included if and only if _omitPadding_ is *false*. 1. Return CodePointsToString(_outAscii_). diff --git a/test-polyfill.mjs b/test-polyfill.mjs index 82f5ed6..b9646cb 100644 --- a/test-polyfill.mjs +++ b/test-polyfill.mjs @@ -26,6 +26,14 @@ test('standard vectors', async t => { } }); +test('omitPadding', async t => { + for (let [string, result] of standardBase64Vectors) { + await t.test(JSON.stringify(string), () => { + assert.strictEqual(stringToBytes(string).toBase64({ omitPadding: true }), result.replace(/=/g, '')); + }); + } +}); + let malformedPadding = ['=', 'Zg=', 'Z===', 'Zm8==', 'Zm9v=']; test('malformed padding', async t => { for (let string of malformedPadding) {