diff --git a/etc/benchmarks/bson_versions.json b/etc/benchmarks/bson_versions.json index 7058882f..1e61d11c 100644 --- a/etc/benchmarks/bson_versions.json +++ b/etc/benchmarks/bson_versions.json @@ -1,10 +1,6 @@ { "versions": [ - "1.1.6", "4.6", - "5.0", - "5.1", - "5.2", "5.3" ] } diff --git a/etc/benchmarks/install_bson_versions.sh b/etc/benchmarks/install_bson_versions.sh index 71a2f8f5..c8e966c8 100755 --- a/etc/benchmarks/install_bson_versions.sh +++ b/etc/benchmarks/install_bson_versions.sh @@ -1,5 +1,6 @@ #!/bin/bash -versions=$(jq '.versions' < bson_versions.json | sed -E 's/(\[|\]|,|")//g') +# To be run from repo root +versions=$(jq '.versions' < etc/benchmarks/bson_versions.json | sed -E 's/(\[|\]|,|")//g') installVersions='' for bson in $versions; do versionNoDot=$(echo $bson | tr -d '.') diff --git a/etc/benchmarks/main.mjs b/etc/benchmarks/main.mjs index 6a927cb6..6f48cb1f 100644 --- a/etc/benchmarks/main.mjs +++ b/etc/benchmarks/main.mjs @@ -129,6 +129,61 @@ await runner({ } }); +await runner({ + skip: true, + name: 'deserialize a large batch of documents each with an array of many Int32s', + iterations, + setup(libs) { + const bson = libs[0].lib; + return bson.serialize({ + nextBatch: Array.from({ length: 1000 }, () => ({ + _id: new bson.ObjectId(), + arrayField: Array.from({ length: 100 }, (_, i) => i) + })) + }); + }, + async run(i, bson, document) { + await Promise.all( + Array.from( + { length: 100 }, + (_, i) => + new Promise(resolve => { + setTimeout(() => { + resolve(bson.lib.deserialize(document, { validation: { utf8: false } })); + }, 20); + }) + ) + ); + } +}); + +await runner({ + skip: true, + name: 'deserialize a large batch of documents each with an array of many Int64s', + iterations, + setup(libs) { + const bson = libs[0].lib; + return bson.serialize({ + nextBatch: Array.from({ length: 1000 }, () => ({ + _id: new bson.ObjectId(), + arrayField: Array.from({ length: 100 }, (_, i) => bson.Long.fromInt(i)) + })) + }); + }, + async run(i, bson, document) { + await Promise.all( + Array.from( + { length: 100 }, + (_, i) => + new Promise(resolve => { + setTimeout(() => { + resolve(bson.lib.deserialize(document, { validation: { utf8: false } })); + }, 20); + }) + ) + ); + } +}); // End console.log( 'Total time taken to benchmark:', diff --git a/src/binary.ts b/src/binary.ts index f69f62bf..1324fd41 100644 --- a/src/binary.ts +++ b/src/binary.ts @@ -223,8 +223,9 @@ export class Binary extends BSONValue { toString(encoding?: 'hex' | 'base64' | 'utf8' | 'utf-8'): string { if (encoding === 'hex') return ByteUtils.toHex(this.buffer); if (encoding === 'base64') return ByteUtils.toBase64(this.buffer); - if (encoding === 'utf8' || encoding === 'utf-8') return ByteUtils.toUTF8(this.buffer); - return ByteUtils.toUTF8(this.buffer); + if (encoding === 'utf8' || encoding === 'utf-8') + return ByteUtils.toUTF8(this.buffer, 0, this.buffer.byteLength); + return ByteUtils.toUTF8(this.buffer, 0, this.buffer.byteLength); } /** @internal */ diff --git a/src/parser/deserializer.ts b/src/parser/deserializer.ts index 7ffd3636..abb61046 100644 --- a/src/parser/deserializer.ts +++ b/src/parser/deserializer.ts @@ -236,7 +236,7 @@ function deserializeObject( if (i >= buffer.byteLength) throw new BSONError('Bad BSON Document: illegal CString'); // Represents the key - const name = isArray ? arrayIndex++ : ByteUtils.toUTF8(buffer.subarray(index, i)); + const name = isArray ? arrayIndex++ : ByteUtils.toUTF8(buffer, index, i); // shouldValidateKey is true if the key should be validated, false otherwise let shouldValidateKey = true; @@ -476,7 +476,7 @@ function deserializeObject( // If are at the end of the buffer there is a problem with the document if (i >= buffer.length) throw new BSONError('Bad BSON Document: illegal CString'); // Return the C string - const source = ByteUtils.toUTF8(buffer.subarray(index, i)); + const source = ByteUtils.toUTF8(buffer, index, i); // Create the regexp index = i + 1; @@ -489,7 +489,7 @@ function deserializeObject( // If are at the end of the buffer there is a problem with the document if (i >= buffer.length) throw new BSONError('Bad BSON Document: illegal CString'); // Return the C string - const regExpOptions = ByteUtils.toUTF8(buffer.subarray(index, i)); + const regExpOptions = ByteUtils.toUTF8(buffer, index, i); index = i + 1; // For each option add the corresponding one for javascript @@ -521,7 +521,7 @@ function deserializeObject( // If are at the end of the buffer there is a problem with the document if (i >= buffer.length) throw new BSONError('Bad BSON Document: illegal CString'); // Return the C string - const source = ByteUtils.toUTF8(buffer.subarray(index, i)); + const source = ByteUtils.toUTF8(buffer, index, i); index = i + 1; // Get the start search index @@ -533,7 +533,7 @@ function deserializeObject( // If are at the end of the buffer there is a problem with the document if (i >= buffer.length) throw new BSONError('Bad BSON Document: illegal CString'); // Return the C string - const regExpOptions = ByteUtils.toUTF8(buffer.subarray(index, i)); + const regExpOptions = ByteUtils.toUTF8(buffer, index, i); index = i + 1; // Set the object @@ -678,7 +678,7 @@ function deserializeObject( throw new BSONError('Invalid UTF-8 string in BSON document'); } } - const namespace = ByteUtils.toUTF8(buffer.subarray(index, index + stringSize - 1)); + const namespace = ByteUtils.toUTF8(buffer, index, index + stringSize - 1); // Update parse index position index = index + stringSize; @@ -735,7 +735,7 @@ function getValidatedString( end: number, shouldValidateUtf8: boolean ) { - const value = ByteUtils.toUTF8(buffer.subarray(start, end)); + const value = ByteUtils.toUTF8(buffer, start, end); // if utf8 validation is on, do the check if (shouldValidateUtf8) { for (let i = 0; i < value.length; i++) { diff --git a/src/utils/byte_utils.ts b/src/utils/byte_utils.ts index 7c44b38a..41ec2c6a 100644 --- a/src/utils/byte_utils.ts +++ b/src/utils/byte_utils.ts @@ -26,7 +26,7 @@ export type ByteUtils = { /** Create a Uint8Array containing utf8 code units from a string */ fromUTF8: (text: string) => Uint8Array; /** Create a string from utf8 code units */ - toUTF8: (buffer: Uint8Array) => string; + toUTF8: (buffer: Uint8Array, start: number, end: number) => string; /** Get the utf8 code unit count from a string if it were to be transformed to utf8 */ utf8ByteLength: (input: string) => number; /** Encode UTF8 bytes generated from `source` string into `destination` at byteOffset. Returns the number of bytes encoded. */ diff --git a/src/utils/node_byte_utils.ts b/src/utils/node_byte_utils.ts index 468acf89..214b1e39 100644 --- a/src/utils/node_byte_utils.ts +++ b/src/utils/node_byte_utils.ts @@ -5,7 +5,7 @@ type NodeJsBuffer = ArrayBufferView & Uint8Array & { write(string: string, offset: number, length: undefined, encoding: 'utf8'): number; copy(target: Uint8Array, targetStart: number, sourceStart: number, sourceEnd: number): number; - toString: (this: Uint8Array, encoding: NodeJsEncoding) => string; + toString: (this: Uint8Array, encoding: NodeJsEncoding, start?: number, end?: number) => string; equals: (this: Uint8Array, other: Uint8Array) => boolean; }; type NodeJsBufferConstructor = Omit & { @@ -125,8 +125,8 @@ export const nodeJsByteUtils = { return Buffer.from(text, 'utf8'); }, - toUTF8(buffer: Uint8Array): string { - return nodeJsByteUtils.toLocalBufferType(buffer).toString('utf8'); + toUTF8(buffer: Uint8Array, start: number, end: number): string { + return nodeJsByteUtils.toLocalBufferType(buffer).toString('utf8', start, end); }, utf8ByteLength(input: string): number { diff --git a/src/utils/web_byte_utils.ts b/src/utils/web_byte_utils.ts index 6e45f91e..cf93e43a 100644 --- a/src/utils/web_byte_utils.ts +++ b/src/utils/web_byte_utils.ts @@ -172,8 +172,8 @@ export const webByteUtils = { return new TextEncoder().encode(text); }, - toUTF8(uint8array: Uint8Array): string { - return new TextDecoder('utf8', { fatal: false }).decode(uint8array); + toUTF8(uint8array: Uint8Array, start: number, end: number): string { + return new TextDecoder('utf8', { fatal: false }).decode(uint8array.slice(start, end)); }, utf8ByteLength(input: string): number {