From e365cd9513565f090226badeeeba3c80ed4a3d4d Mon Sep 17 00:00:00 2001 From: Doctor <44320105+BlackAsLight@users.noreply.github.com> Date: Mon, 6 Jan 2025 16:54:34 +1100 Subject: [PATCH] perf(cbor): encodeCbor & encodeCborSequence (#6311) --- cbor/_common_encode.ts | 366 ++++++++++++++++++++--------------- cbor/encode_cbor.ts | 35 +--- cbor/encode_cbor_sequence.ts | 13 +- cbor/encode_cbor_test.ts | 6 +- 4 files changed, 229 insertions(+), 191 deletions(-) diff --git a/cbor/_common_encode.ts b/cbor/_common_encode.ts index 973fdd055eb6..dc2ed858aef8 100644 --- a/cbor/_common_encode.ts +++ b/cbor/_common_encode.ts @@ -1,188 +1,250 @@ // Copyright 2018-2025 the Deno authors. MIT license. -import { concat } from "@std/bytes"; -import { numberToArray } from "./_common.ts"; -import { encodeCbor } from "./encode_cbor.ts"; -import type { CborTag } from "./tag.ts"; +import { CborTag } from "./tag.ts"; import type { CborType } from "./types.ts"; -export function encodeNumber(x: number): Uint8Array { - if (x % 1 === 0) { - const isNegative = x < 0; - const majorType = isNegative ? 0b001_00000 : 0b000_00000; - if (isNegative) x = -x - 1; +function calcHeaderSize(x: number | bigint): number { + if (x < 24) return 1; + if (x < 2 ** 8) return 2; + if (x < 2 ** 16) return 3; + if (x < 2 ** 32) return 5; + return 9; +} + +export function calcEncodingSize(x: CborType): number { + if (x == undefined || typeof x === "boolean") return 1; + if (typeof x === "number") { + return x % 1 === 0 ? calcHeaderSize(x < 0 ? -x - 1 : x) : 9; + } + if (typeof x === "bigint") return calcHeaderSize(x < 0n ? -x - 1n : x); + if (typeof x === "string" || x instanceof Uint8Array) { + return calcHeaderSize(x.length) + x.length; + } + if (x instanceof Date) return 1 + calcEncodingSize(x.getTime() / 1000); + if (x instanceof CborTag) { + return calcHeaderSize(x.tagNumber) + calcEncodingSize(x.tagContent); + } + if (x instanceof Array) { + let size = calcHeaderSize(x.length); + for (const y of x) size += calcEncodingSize(y); + return size; + } + if (x instanceof Map) { + let size = 3 + calcHeaderSize(x.size); + for (const y of x) size += calcEncodingSize(y[0]) + calcEncodingSize(y[1]); + return size; + } + let pairs = 0; + let size = 0; + for (const y in x) { + ++pairs; + size += calcHeaderSize(y.length) + y.length + calcEncodingSize(x[y]); + } + return size + calcHeaderSize(pairs); +} - if (x < 24) return Uint8Array.from([majorType + x]); - if (x < 2 ** 8) return Uint8Array.from([majorType + 24, x]); - const output = new Uint8Array(9); +export function encode( + input: CborType, + output: Uint8Array, + offset: number, +): number { + switch (typeof input) { + case "undefined": + output[offset++] = 0b111_10111; + break; + case "boolean": + output[offset++] = input ? 0b111_10101 : 0b111_10100; + break; + case "number": + return encodeNumber(input, output, offset); + case "bigint": + return encodeBigInt(input, output, offset); + case "string": + return encodeString(input, output, offset); + default: + if (input === null) output[offset++] = 0b111_10110; + else if (input instanceof Uint8Array) { + return encodeUint8Array(input, output, offset); + } else if (input instanceof Date) { + return encodeDate(input, output, offset); + } else if (input instanceof CborTag) { + return encodeTag(input, output, offset); + } else if (input instanceof Map) return encodeMap(input, output, offset); + else if (input instanceof Array) { + return encodeArray(input, output, offset); + } else return encodeObject(input, output, offset); + } + return offset; +} + +function encodeHeader( + majorType: number, + input: number | bigint, + output: Uint8Array, + offset: number, +): number { + if (input < 24) output[offset++] = majorType + Number(input); + else if (input < 2 ** 8) { + output[offset++] = majorType + 0b000_11000; + output[offset++] = Number(input); + } else { const view = new DataView(output.buffer); - if (x < 2 ** 16) { - output[0] = majorType + 25; - view.setUint16(1, x); - return output.subarray(0, 3); - } - if (x < 2 ** 32) { - output[0] = majorType + 26; - view.setUint32(1, x); - return output.subarray(0, 5); + if (input < 2 ** 16) { + output[offset++] = majorType + 0b000_11001; + view.setUint16(offset, Number(input)); + offset += 2; + } else if (input < 2 ** 32) { + output[offset++] = majorType + 0b000_11010; + view.setUint32(offset, Number(input)); + offset += 4; + } else { + output[offset++] = majorType + 0b000_11011; + view.setBigUint64(offset, BigInt(input)); + offset += 8; } - if (x < 2 ** 64) { - output[0] = majorType + 27; - // Due to possible precision loss with numbers this large, it's best to do conversion under BigInt or end up with 1n off. - view.setBigUint64(1, BigInt(x)); - return output; + } + return offset; +} + +function encodeNumber( + input: number, + output: Uint8Array, + offset: number, +): number { + if (input % 1 === 0) { + const isNegative = input < 0; + if (isNegative && input <= -(2 ** 64)) { + throw new RangeError( + `Cannot encode number: It (${input}) exceeds -(2 ** 64) - 1`, + ); + } else if (input >= 2 ** 64) { + throw new RangeError( + `Cannot encode number: It (${input}) exceeds 2 ** 64 - 1`, + ); } - throw new RangeError( - `Cannot encode number: It (${isNegative ? -x - 1 : x}) exceeds ${ - isNegative ? "-" : "" - }2 ** 64 - 1`, + return encodeHeader( + isNegative ? 0b001_00000 : 0b000_00000, + isNegative ? -input - 1 : input, + output, + offset, ); } - const output = new Uint8Array(9); const view = new DataView(output.buffer); - output[0] = 0b111_11011; - view.setFloat64(1, x); - return output; + output[offset++] = 0b111_11011; + view.setFloat64(offset, input); + return offset + 8; } -export function encodeBigInt(x: bigint): Uint8Array { - const isNegative = x < 0n; - if ((isNegative ? -x : x) < 2n ** 32n) return encodeNumber(Number(x)); - - const output = new Uint8Array(9); - const view = new DataView(output.buffer); - if (isNegative) x = -x - 1n; - if (x < 2n ** 64n) { - output[0] = isNegative ? 0b001_11011 : 0b000_11011; - view.setBigUint64(1, x); - return output; +function encodeBigInt( + input: bigint, + output: Uint8Array, + offset: number, +): number { + const isNegative = input < 0n; + if (isNegative && input <= -(2n ** 64n)) { + throw new RangeError( + `Cannot encode bigint: It (${input}) exceeds -(2n ** 64n) - 1n`, + ); + } else if (input >= 2n ** 64n) { + throw new RangeError( + `Cannot encode bigint: It (${input}) exceeds 2n ** 64n - 1n`, + ); } - throw new RangeError( - `Cannot encode bigint: It (${isNegative ? -x - 1n : x}) exceeds ${ - isNegative ? "-" : "" - }2 ** 64 - 1`, + return encodeHeader( + isNegative ? 0b001_00000 : 0b000_00000, + isNegative ? -input - 1n : input, + output, + offset, ); } -export function encodeUint8Array(x: Uint8Array): Uint8Array { - if (x.length < 24) { - return concat([new Uint8Array([0b010_00000 + x.length]), x]); - } - if (x.length < 2 ** 8) { - return concat([new Uint8Array([0b010_11000, x.length]), x]); - } - if (x.length < 2 ** 16) { - return concat([ - new Uint8Array([0b010_11001]), - numberToArray(2, x.length), - x, - ]); - } - if (x.length < 2 ** 32) { - return concat([ - new Uint8Array([0b010_11010]), - numberToArray(4, x.length), - x, - ]); - } - // Can safely assume `x.length < 2 ** 64` as JavaScript doesn't support a `Uint8Array` being that large. - return concat([ - new Uint8Array([0b010_11011]), - numberToArray(8, x.length), - x, - ]); +function encodeUint8Array( + input: Uint8Array, + output: Uint8Array, + offset: number, +): number { + offset = encodeHeader(0b010_00000, input.length, output, offset); + output.set(input, offset); + return offset + input.length; } -export function encodeString(x: string): Uint8Array { - const array = encodeUint8Array(new TextEncoder().encode(x)); - array[0]! += 1 << 5; - return array; +function encodeString( + input: string, + output: Uint8Array, + offset: number, +): number { + const binary = new TextEncoder().encode(input); + offset = encodeHeader(0b011_00000, binary.length, output, offset); + output.set(binary, offset); + return offset + binary.length; } -export function encodeDate(x: Date): Uint8Array { - const input = encodeNumber(x.getTime() / 1000); - // deno-lint-ignore no-explicit-any - const output = new Uint8Array((input.buffer as any) - .transfer(input.length + 1)); - output.set(output.subarray(0, -1), 1); - output[0] = 0b110_00001; - return output; +function encodeArray( + input: CborType[], + output: Uint8Array, + offset: number, +): number { + offset = encodeHeader(0b100_00000, input.length, output, offset); + for (const value of input) offset = encode(value, output, offset); + return offset; } -export function encodeMap(x: Map): Uint8Array { - const len = x.size; - let head: Uint8Array; - if (len < 24) head = Uint8Array.from([0b101_00000 + len]); - else if (len < 2 ** 8) head = Uint8Array.from([0b101_11000, len]); - else { - head = new Uint8Array(9); - const view = new DataView(head.buffer); - if (len < 2 ** 16) { - head[0] = 0b101_11001; - view.setUint16(1, len); - head = head.subarray(0, 3); - } else if (len < 2 ** 32) { - head[0] = 0b101_11010; - view.setUint32(1, len); - head = head.subarray(0, 5); - } else { - head[0] = 0b101_11011; - view.setBigUint64(1, BigInt(len)); - } +function encodeObject( + input: { [k: string]: CborType }, + output: Uint8Array, + offset: number, +): number { + output[offset] = 0b101_00000; + offset = encodeHeader(0b101_00000, Object.keys(input).length, output, offset); + for (const key in input) { + offset = encodeString(key, output, offset); + offset = encode(input[key], output, offset); } - return concat([ - Uint8Array.from([217, 1, 3]), // TagNumber 259 - head, - ...Array.from(x - .entries()) - .map(([k, v]) => [encodeCbor(k), encodeCbor(v)]) - .flat(), - ]); -} - -export function encodeArray(x: CborType[]): Uint8Array { - let head: number[]; - if (x.length < 24) head = [0b100_00000 + x.length]; - else if (x.length < 2 ** 8) head = [0b100_11000, x.length]; - else if (x.length < 2 ** 16) { - head = [0b100_11001, ...numberToArray(2, x.length)]; - } else if (x.length < 2 ** 32) { - head = [0b100_11010, ...numberToArray(4, x.length)]; - } // Can safely assume `x.length < 2 ** 64` as JavaScript doesn't support an `Array` being that large. - else head = [0b100_11011, ...numberToArray(8, x.length)]; - return concat([Uint8Array.from(head), ...x.map((x) => encodeCbor(x))]); + return offset; } -export function encodeObject(x: { [k: string]: CborType }): Uint8Array { - const len = Object.keys(x).length; - let head: number[]; - if (len < 24) head = [0b101_00000 + len]; - else if (len < 2 ** 8) head = [0b101_11000, len]; - else if (len < 2 ** 16) head = [0b101_11001, ...numberToArray(2, len)]; - else if (len < 2 ** 32) head = [0b101_11010, ...numberToArray(4, len)]; - // Can safely assume `len < 2 ** 64` as JavaScript doesn't support an `Object` being that Large. - else head = [0b101_11011, ...numberToArray(8, len)]; - return concat([ - Uint8Array.from(head), - ...Object.entries(x).map(( - [k, v], - ) => [encodeString(k), encodeCbor(v)]).flat(), - ]); +function encodeDate(input: Date, output: Uint8Array, offset: number): number { + output[offset++] = 0b110_00001; + return encodeNumber(input.getTime() / 1000, output, offset); } -export function encodeTag(x: CborTag) { - const tagNumber = BigInt(x.tagNumber); +function encodeTag( + input: CborTag, + output: Uint8Array, + offset: number, +): number { + const tagNumber = BigInt(input.tagNumber); if (tagNumber < 0n) { throw new RangeError( - `Cannot encode Tag Item: Tag Number (${x.tagNumber}) is less than zero`, + `Cannot encode Tag Item: Tag Number (${input.tagNumber}) is less than zero`, ); } - if (tagNumber > 2n ** 64n) { + if (tagNumber >= 2n ** 64n) { throw new RangeError( - `Cannot encode Tag Item: Tag Number (${x.tagNumber}) exceeds 2 ** 64 - 1`, + `Cannot encode Tag Item: Tag Number (${input.tagNumber}) exceeds 2 ** 64 - 1`, ); } - const head = encodeBigInt(tagNumber); - head[0]! += 0b110_00000; - return concat([head, encodeCbor(x.tagContent)]); + offset = encodeHeader(0b110_00000, tagNumber, output, offset); + return encode( + input.tagContent, + output, + offset, + ); +} + +function encodeMap( + input: Map, + output: Uint8Array, + offset: number, +): number { + // Tag Number 259 = [217, 1, 3] + output[offset++] = 217; + output[offset++] = 1; + output[offset++] = 3; + offset = encodeHeader(0b101_00000, input.size, output, offset); + for (const pair of input) { + offset = encode(pair[0], output, offset); + offset = encode(pair[1], output, offset); + } + return offset; } diff --git a/cbor/encode_cbor.ts b/cbor/encode_cbor.ts index 5641f5b354fa..b05ab372879c 100644 --- a/cbor/encode_cbor.ts +++ b/cbor/encode_cbor.ts @@ -1,17 +1,6 @@ // Copyright 2018-2025 the Deno authors. MIT license. -import { - encodeArray, - encodeBigInt, - encodeDate, - encodeMap, - encodeNumber, - encodeObject, - encodeString, - encodeTag, - encodeUint8Array, -} from "./_common_encode.ts"; -import { CborTag } from "./tag.ts"; +import { calcEncodingSize, encode } from "./_common_encode.ts"; import type { CborType } from "./types.ts"; /** @@ -47,23 +36,7 @@ import type { CborType } from "./types.ts"; * @returns A {@link Uint8Array} representing the encoded data. */ export function encodeCbor(value: CborType): Uint8Array { - switch (typeof value) { - case "number": - return encodeNumber(value); - case "string": - return encodeString(value); - case "boolean": - return new Uint8Array([value ? 0b111_10101 : 0b111_10100]); - case "undefined": - return new Uint8Array([0b111_10111]); - case "bigint": - return encodeBigInt(value); - } - if (value === null) return new Uint8Array([0b111_10110]); - if (value instanceof Date) return encodeDate(value); - if (value instanceof Uint8Array) return encodeUint8Array(value); - if (value instanceof Array) return encodeArray(value); - if (value instanceof CborTag) return encodeTag(value); - if (value instanceof Map) return encodeMap(value); - return encodeObject(value); + const output = new Uint8Array(calcEncodingSize(value)); + encode(value, output, 0); + return output; } diff --git a/cbor/encode_cbor_sequence.ts b/cbor/encode_cbor_sequence.ts index 721fc2c2bff4..e11d12616ca7 100644 --- a/cbor/encode_cbor_sequence.ts +++ b/cbor/encode_cbor_sequence.ts @@ -1,7 +1,6 @@ // Copyright 2018-2025 the Deno authors. MIT license. -import { concat } from "@std/bytes/concat"; -import { encodeCbor } from "./encode_cbor.ts"; +import { calcEncodingSize, encode } from "./_common_encode.ts"; import type { CborType } from "./types.ts"; /** @@ -36,7 +35,11 @@ import type { CborType } from "./types.ts"; * @returns A {@link Uint8Array} representing the encoded data. */ export function encodeCborSequence(values: CborType[]): Uint8Array { - const output: Uint8Array[] = []; - for (const value of values) output.push(encodeCbor(value)); - return concat(output); + let x = 0; + for (const value of values) x += calcEncodingSize(value); + const output = new Uint8Array(x); + + x = 0; + for (const value of values) x = encode(value, output, x); + return output; } diff --git a/cbor/encode_cbor_test.ts b/cbor/encode_cbor_test.ts index ec0b96f5d56c..7ac1e136ff90 100644 --- a/cbor/encode_cbor_test.ts +++ b/cbor/encode_cbor_test.ts @@ -440,7 +440,7 @@ Deno.test("encodeCbor() rejecting numbers as Int", () => { encodeCbor(num); }, RangeError, - `Cannot encode number: It (${num}) exceeds -2 ** 64 - 1`, + `Cannot encode number: It (${num}) exceeds -(2 ** 64) - 1`, ); }); @@ -451,7 +451,7 @@ Deno.test("encodeCbor() rejecting bigints as Uint", () => { encodeCbor(num); }, RangeError, - `Cannot encode bigint: It (${num}) exceeds 2 ** 64 - 1`, + `Cannot encode bigint: It (${num}) exceeds 2n ** 64n - 1n`, ); }); @@ -462,7 +462,7 @@ Deno.test("encodeCbor() rejecting bigints as Int", () => { encodeCbor(num); }, RangeError, - `Cannot encode bigint: It (${num}) exceeds -2 ** 64 - 1`, + `Cannot encode bigint: It (${num}) exceeds -(2n ** 64n) - 1n`, ); });