From 6768a50f5ba4955221953d705912f89a77227493 Mon Sep 17 00:00:00 2001 From: Aapo Alasuutari Date: Wed, 30 Aug 2023 23:24:56 +0300 Subject: [PATCH] feat(bytes): Deprecate BytesList class --- bytes/bytes_list.ts | 41 ++++++++++ msgpack/encode.ts | 58 +++++++------- streams/_common.ts | 5 +- streams/delimiter_stream.ts | 132 ++++++++++++++++++++++++------- streams/delimiter_stream_test.ts | 69 ++++++++-------- 5 files changed, 214 insertions(+), 91 deletions(-) diff --git a/bytes/bytes_list.ts b/bytes/bytes_list.ts index da71708f8491..c18d29fdc649 100644 --- a/bytes/bytes_list.ts +++ b/bytes/bytes_list.ts @@ -3,6 +3,8 @@ /** * An abstraction of multiple Uint8Arrays + * + * @deprecated (will be removed in 0.205.0) Use a plain array of Uint8Arrays instead. */ export class BytesList { #len = 0; @@ -16,12 +18,20 @@ export class BytesList { /** * Total size of bytes + * + * @deprecated */ size() { return this.#len; } /** * Push bytes with given offset infos + * + * @deprecated Use a plain array of Uint8Arrays instead. + * Adding into the array can be done with {@linkcode Array#push}. + * If {@linkcode start} or {@linkcode end} parameters are + * used then use {@linkcode Uint8Array#subarray} + * to slice the needed part without copying. */ add(value: Uint8Array, start = 0, end = value.byteLength) { if (value.byteLength === 0 || end - start === 0) { @@ -39,6 +49,16 @@ export class BytesList { /** * Drop head `n` bytes. + * + * @deprecated Use a plain array of Uint8Arrays instead. + * Shifting from the array can be done using conditional + * {@linkcode Array#shift}s against the number of bytes left + * to be dropped. + * + * If the next item in the array is longer than the number + * of bytes left to be dropped, then instead of shifting it out + * it should be replaced in-place with a subarray of itself that + * drops the remaining bytes from the front. */ shift(n: number) { if (n === 0) { @@ -67,6 +87,12 @@ export class BytesList { /** * Find chunk index in which `pos` locates by binary-search * returns -1 if out of range + * + * @deprecated Use a plain array of Uint8Arrays instead. + * Finding the index of a chunk in the array can be + * done using {@linkcode Array#findIndex} with a counter + * for the number of bytes already encountered from past + * chunks' {@linkcode Uint8Array#byteLength}. */ getChunkIndex(pos: number): number { let max = this.#chunks.length; @@ -90,6 +116,10 @@ export class BytesList { /** * Get indexed byte from chunks + * + * @deprecated Use a plain array of Uint8Arrays instead. + * See {@linkcode getChunkIndex} for finding a chunk + * by number of bytes. */ get(i: number): number { if (i < 0 || this.#len <= i) { @@ -102,6 +132,8 @@ export class BytesList { /** * Iterator of bytes from given position + * + * @deprecated Use a plain array of Uint8Arrays instead. */ *iterator(start = 0): IterableIterator { const startIdx = this.getChunkIndex(start); @@ -119,6 +151,13 @@ export class BytesList { /** * Returns subset of bytes copied + * + * @deprecated Use a plain array of Uint8Arrays instead. + * For copying the whole list see {@linkcode concat}. + * For copying subarrays find the start and end chunk indexes + * and the internal indexes within those Uint8Arrays, prepare + * a Uint8Array of size `end - start` and set the chunks (or + * chunk subarrays) into that at proper offsets. */ slice(start: number, end: number = this.#len): Uint8Array { if (end === start) { @@ -146,6 +185,8 @@ export class BytesList { } /** * Concatenate chunks into single Uint8Array copied. + * + * @deprecated Use a plain array of Uint8Arrays and the `concat.ts` module instead. */ concat(): Uint8Array { const result = new Uint8Array(this.#len); diff --git a/msgpack/encode.ts b/msgpack/encode.ts index 55b06e74a876..cce432b26448 100644 --- a/msgpack/encode.ts +++ b/msgpack/encode.ts @@ -1,6 +1,6 @@ // Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. -import { BytesList } from "../bytes/bytes_list.ts"; +import { concat } from "../bytes/concat.ts"; export type ValueType = | number @@ -48,9 +48,9 @@ const encoder = new TextEncoder(); * ``` */ export function encode(object: ValueType) { - const byteList = new BytesList(); - encodeSlice(object, byteList); - return byteList.concat(); + const byteParts: Uint8Array[] = []; + encodeSlice(object, byteParts); + return concat(...byteParts); } function encodeFloat64(num: number) { @@ -119,24 +119,24 @@ function encodeNumber(num: number) { return encodeFloat64(num); } -function encodeSlice(object: ValueType, byteList: BytesList) { +function encodeSlice(object: ValueType, byteParts: Uint8Array[]) { if (object === null) { - byteList.add(new Uint8Array([0xc0])); + byteParts.push(new Uint8Array([0xc0])); return; } if (object === false) { - byteList.add(new Uint8Array([0xc2])); + byteParts.push(new Uint8Array([0xc2])); return; } if (object === true) { - byteList.add(new Uint8Array([0xc3])); + byteParts.push(new Uint8Array([0xc3])); return; } if (typeof object === "number") { - byteList.add(encodeNumber(object)); + byteParts.push(encodeNumber(object)); return; } @@ -149,7 +149,7 @@ function encodeSlice(object: ValueType, byteList: BytesList) { const dataView = new DataView(new ArrayBuffer(9)); dataView.setBigInt64(1, object); dataView.setUint8(0, 0xd3); - byteList.add(new Uint8Array(dataView.buffer)); + byteParts.push(new Uint8Array(dataView.buffer)); return; } @@ -160,7 +160,7 @@ function encodeSlice(object: ValueType, byteList: BytesList) { const dataView = new DataView(new ArrayBuffer(9)); dataView.setBigUint64(1, object); dataView.setUint8(0, 0xcf); - byteList.add(new Uint8Array(dataView.buffer)); + byteParts.push(new Uint8Array(dataView.buffer)); return; } @@ -169,63 +169,63 @@ function encodeSlice(object: ValueType, byteList: BytesList) { const len = encoded.length; if (len < FIVE_BITS) { // fixstr - byteList.add(new Uint8Array([0xa0 | len])); + byteParts.push(new Uint8Array([0xa0 | len])); } else if (len < EIGHT_BITS) { // str 8 - byteList.add(new Uint8Array([0xd9, len])); + byteParts.push(new Uint8Array([0xd9, len])); } else if (len < SIXTEEN_BITS) { // str 16 const dataView = new DataView(new ArrayBuffer(3)); dataView.setUint16(1, len); dataView.setUint8(0, 0xda); - byteList.add(new Uint8Array(dataView.buffer)); + byteParts.push(new Uint8Array(dataView.buffer)); } else if (len < THIRTY_TWO_BITS) { // str 32 const dataView = new DataView(new ArrayBuffer(5)); dataView.setUint32(1, len); dataView.setUint8(0, 0xdb); - byteList.add(new Uint8Array(dataView.buffer)); + byteParts.push(new Uint8Array(dataView.buffer)); } else { throw new Error( "Cannot safely encode string with size larger than 32 bits", ); } - byteList.add(encoded); + byteParts.push(encoded); return; } if (object instanceof Uint8Array) { if (object.length < EIGHT_BITS) { // bin 8 - byteList.add(new Uint8Array([0xc4, object.length])); + byteParts.push(new Uint8Array([0xc4, object.length])); } else if (object.length < SIXTEEN_BITS) { // bin 16 const dataView = new DataView(new ArrayBuffer(3)); dataView.setUint16(1, object.length); dataView.setUint8(0, 0xc5); - byteList.add(new Uint8Array(dataView.buffer)); + byteParts.push(new Uint8Array(dataView.buffer)); } else if (object.length < THIRTY_TWO_BITS) { // bin 32 const dataView = new DataView(new ArrayBuffer(5)); dataView.setUint32(1, object.length); dataView.setUint8(0, 0xc6); - byteList.add(new Uint8Array(dataView.buffer)); + byteParts.push(new Uint8Array(dataView.buffer)); } else { throw new Error( "Cannot safely encode Uint8Array with size larger than 32 bits", ); } - byteList.add(object); + byteParts.push(object); return; } if (Array.isArray(object)) { if (object.length < FOUR_BITS) { // fixarray - byteList.add(new Uint8Array([0x90 | object.length])); + byteParts.push(new Uint8Array([0x90 | object.length])); } else if (object.length < SIXTEEN_BITS) { // array 16 const dataView = new DataView(new ArrayBuffer(3)); dataView.setUint16(1, object.length); dataView.setUint8(0, 0xdc); - byteList.add(new Uint8Array(dataView.buffer)); + byteParts.push(new Uint8Array(dataView.buffer)); } else if (object.length < THIRTY_TWO_BITS) { // array 32 const dataView = new DataView(new ArrayBuffer(5)); dataView.setUint32(1, object.length); dataView.setUint8(0, 0xdd); - byteList.add(new Uint8Array(dataView.buffer)); + byteParts.push(new Uint8Array(dataView.buffer)); } else { throw new Error( "Cannot safely encode array with size larger than 32 bits", @@ -233,7 +233,7 @@ function encodeSlice(object: ValueType, byteList: BytesList) { } for (const obj of object) { - encodeSlice(obj, byteList); + encodeSlice(obj, byteParts); } return; } @@ -243,24 +243,24 @@ function encodeSlice(object: ValueType, byteList: BytesList) { const numKeys = Object.keys(object).length; if (numKeys < FOUR_BITS) { // fixarray - byteList.add(new Uint8Array([0x80 | numKeys])); + byteParts.push(new Uint8Array([0x80 | numKeys])); } else if (numKeys < SIXTEEN_BITS) { // map 16 const dataView = new DataView(new ArrayBuffer(3)); dataView.setUint16(1, numKeys); dataView.setUint8(0, 0xde); - byteList.add(new Uint8Array(dataView.buffer)); + byteParts.push(new Uint8Array(dataView.buffer)); } else if (numKeys < THIRTY_TWO_BITS) { // map 32 const dataView = new DataView(new ArrayBuffer(5)); dataView.setUint32(1, numKeys); dataView.setUint8(0, 0xdf); - byteList.add(new Uint8Array(dataView.buffer)); + byteParts.push(new Uint8Array(dataView.buffer)); } else { throw new Error("Cannot safely encode map with size larger than 32 bits"); } for (const [key, value] of Object.entries(object)) { - encodeSlice(key, byteList); - encodeSlice(value, byteList); + encodeSlice(key, byteParts); + encodeSlice(value, byteParts); } return; } diff --git a/streams/_common.ts b/streams/_common.ts index e5d394ff7534..fcd7f0b520cb 100644 --- a/streams/_common.ts +++ b/streams/_common.ts @@ -6,11 +6,12 @@ export const DEFAULT_BUFFER_SIZE = 32 * 1024; /** Generate longest proper prefix which is also suffix array. */ export function createLPS(pat: Uint8Array): Uint8Array { - const lps = new Uint8Array(pat.length); + const length = pat.length; + const lps = new Uint8Array(length); lps[0] = 0; let prefixEnd = 0; let i = 1; - while (i < lps.length) { + while (i < length) { if (pat[i] === pat[prefixEnd]) { prefixEnd++; lps[i] = prefixEnd; diff --git a/streams/delimiter_stream.ts b/streams/delimiter_stream.ts index 8f6b963e077b..1aefbfc72acb 100644 --- a/streams/delimiter_stream.ts +++ b/streams/delimiter_stream.ts @@ -1,7 +1,7 @@ // Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. // This module is browser compatible. -import { BytesList } from "../bytes/bytes_list.ts"; +import { concat } from "../bytes/concat.ts"; import { createLPS } from "./_common.ts"; /** Disposition of the delimiter. */ @@ -52,13 +52,11 @@ export interface DelimiterStreamOptions { * @returns Transform stream */ export class DelimiterStream extends TransformStream { - #bufs = new BytesList(); + #bufs: Uint8Array[] = []; #delimiter: Uint8Array; - #inspectIndex = 0; #matchIndex = 0; - #delimLen: number; #delimLPS: Uint8Array; - #disp?: DelimiterDisposition; + #disp: DelimiterDisposition; constructor( delimiter: Uint8Array, @@ -69,12 +67,11 @@ export class DelimiterStream extends TransformStream { this.#handle(chunk, controller); }, flush: (controller) => { - controller.enqueue(this.#bufs.concat()); + controller.enqueue(concat(...this.#bufs)); }, }); this.#delimiter = delimiter; - this.#delimLen = delimiter.length; this.#delimLPS = createLPS(delimiter); this.#disp = options?.disposition ?? "discard"; } @@ -83,32 +80,109 @@ export class DelimiterStream extends TransformStream { chunk: Uint8Array, controller: TransformStreamDefaultController, ) { - this.#bufs.add(chunk); - let localIndex = 0; - while (this.#inspectIndex < this.#bufs.size()) { - if (chunk[localIndex] === this.#delimiter[this.#matchIndex]) { - this.#inspectIndex++; - localIndex++; - this.#matchIndex++; - if (this.#matchIndex === this.#delimLen) { + const bufs = this.#bufs; + const length = chunk.byteLength; + const disposition = this.#disp; + const delimiter = this.#delimiter; + const delimLen = delimiter.length; + const lps = this.#delimLPS; + let chunkStart = 0; + let matchIndex = this.#matchIndex; + let inspectIndex = 0; + while (inspectIndex < length) { + if (chunk[inspectIndex] === delimiter[matchIndex]) { + // Next byte matched our next delimiter byte + inspectIndex++; + matchIndex++; + if (matchIndex === delimLen) { // Full match - const start = this.#inspectIndex - this.#delimLen; - const end = this.#disp === "suffix" ? this.#inspectIndex : start; - const copy = this.#bufs.slice(0, end); - controller.enqueue(copy); - const shift = this.#disp === "prefix" ? start : this.#inspectIndex; - this.#bufs.shift(shift); - this.#inspectIndex = this.#disp === "prefix" ? this.#delimLen : 0; - this.#matchIndex = 0; + matchIndex = 0; + const delimiterStartIndex = inspectIndex - delimLen; + const delimitedChunkEnd = disposition === "suffix" + ? inspectIndex + : delimiterStartIndex; + if (delimitedChunkEnd <= 0 && bufs.length === 0) { + // Our chunk started with a delimiter and no previous chunks exist: + // Enqueue an empty chunk. + controller.enqueue(new Uint8Array()); + } else if (delimitedChunkEnd > 0 && bufs.length === 0) { + // No previous chunks, slice from current chunk. + controller.enqueue(chunk.subarray(chunkStart, delimitedChunkEnd)); + // Our chunk may have more than one delimiter; we must remember where + // the next delimited chunk begins. + chunkStart = disposition === "prefix" + ? delimiterStartIndex + : inspectIndex; + } else if (delimitedChunkEnd === 0 && bufs.length > 0) { + // Our chunk started with a delimiter, previous chunks are passed as + // they are (with concatenation). + if (bufs.length === 1) { + // Concat not needed when a single buffer is passed. + controller.enqueue(bufs[0]); + } else { + controller.enqueue(concat(...bufs)); + } + // Drop all previous chunks. + bufs.length = 0; + if (disposition !== "prefix") { + // suffix or discard: The next chunk starts where our inspection finished. + // We should only ever end up here with a discard disposition as + // for a suffix disposition this branch would mean that the previous + // chunk ended with a full match but was not enqueued. + chunkStart = inspectIndex; + } + } else if (delimitedChunkEnd < 0 && bufs.length > 0) { + // Our chunk started by finishing a partial delimiter match. + const lastIndex = bufs.length - 1; + const last = bufs[lastIndex]; + const lastSliceIndex = last.byteLength + delimitedChunkEnd; + const lastSliced = last.subarray(0, lastSliceIndex); + if (lastIndex === 0) { + controller.enqueue(lastSliced); + } else { + bufs[lastIndex] = lastSliced; + controller.enqueue(concat(...bufs)); + } + bufs.length = 0; + if (disposition === "prefix") { + // Must keep last bytes of last chunk. + bufs.push(last.subarray(lastSliceIndex)); + } else { + chunkStart = inspectIndex; + } + } else if (delimitedChunkEnd > 0 && bufs.length > 0) { + // Previous chunks and current chunk together form a delimited chunk. + const chunkSliced = chunk.subarray(chunkStart, delimitedChunkEnd); + const result = concat(...bufs, chunkSliced); + bufs.length = 0; + chunkStart = disposition === "prefix" + ? delimitedChunkEnd + : inspectIndex; + controller.enqueue(result); + } else { + throw new Error("unreachable"); + } } + } else if (matchIndex === 0) { + // No match ongoing, keep going through the buffer. + inspectIndex++; } else { - if (this.#matchIndex === 0) { - this.#inspectIndex++; - localIndex++; - } else { - this.#matchIndex = this.#delimLPS[this.#matchIndex - 1]; - } + // Ongoing match: Degrade to the previous possible match. + // eg. If we're looking for 'AAB' and had matched 'AA' previously + // but now got a new 'A', then we'll drop down to having matched + // just 'A'. The while loop will turn around again and we'll rematch + // to 'AA' and proceed onwards to try and match on 'B' again. + matchIndex = lps[matchIndex - 1]; } } + // Save match index. + this.#matchIndex = matchIndex; + if (chunkStart === 0) { + bufs.push(chunk); + } else if (chunkStart !== length - 1) { + // If we matched partially somewhere in the middle of our chunk + // then the remnants should be pushed into buffers. + bufs.push(chunk.subarray(chunkStart)); + } } } diff --git a/streams/delimiter_stream_test.ts b/streams/delimiter_stream_test.ts index 034544828a25..709f441a6c10 100644 --- a/streams/delimiter_stream_test.ts +++ b/streams/delimiter_stream_test.ts @@ -3,71 +3,78 @@ import { DelimiterStream } from "./delimiter_stream.ts"; import { testTransformStream } from "./_test_common.ts"; +const DELIMITER_STREAM_INPUTS = [ + "a", // more than one subsequent chunks with no delimiters + "b", // more than one subsequent chunks with no delimiters + "cCRLF", // more than one subsequent chunks with no delimiters + "CRLF", // chunk with only delimiter + "qwertzu", // no delimiter + "iopasdCRLFmnbvc", // one delimiter in the middle + "xylkjhCRLFgfdsapCRLFoiuzt", // two separate delimiters + "euoiCRLFCRLFaueiou", // two consecutive delimiters + "rewq098765432CR", // split delimiter (1/2) + "LF349012i491290", // split delimiter (2/2) + "asdfghjkliopCR", // split delimiter with followup (1/2) + "LFytrewqCRLFmnbvcxz", // split delimiter with followup (2/2) + "CRLFasd", // chunk starts with delimiter +].map((s) => new TextEncoder().encode(s)); + Deno.test("[streams] DelimiterStream, discard", async () => { const crlf = new TextEncoder().encode("CRLF"); const delimStream = new DelimiterStream(crlf, { disposition: "discard" }); - const inputs = [ - "qwertzu", // no delimiter - "iopasdCRLFmnbvc", // one delimiter in the middle - "xylkjhCRLFgfdsapCRLFoiuzt", // two separate delimiters - "euoiCRLFCRLFaueiou", // two consecutive delimiters - "rewq098765432CR", // split delimiter (1/2) - "LF349012i491290", // split delimiter (2/2) - ].map((s) => new TextEncoder().encode(s)); const outputs = [ + "abc", + "", "qwertzuiopasd", "mnbvcxylkjh", "gfdsap", "oiuzteuoi", "", "aueiourewq098765432", - "349012i491290", + "349012i491290asdfghjkliop", + "ytrewq", + "mnbvcxz", + "asd", ].map((s) => new TextEncoder().encode(s)); - await testTransformStream(delimStream, inputs, outputs); + await testTransformStream(delimStream, DELIMITER_STREAM_INPUTS, outputs); }); Deno.test("[streams] DelimiterStream, suffix", async () => { const crlf = new TextEncoder().encode("CRLF"); const delimStream = new DelimiterStream(crlf, { disposition: "suffix" }); - const inputs = [ - "qwertzu", // no delimiter - "iopasdCRLFmnbvc", // one delimiter in the middle - "xylkjhCRLFgfdsapCRLFoiuzt", // two separate delimiters - "euoiCRLFCRLFaueiou", // two consecutive delimiters - "rewq098765432CR", // split delimiter (1/2) - "LF349012i491290", // split delimiter (2/2) - ].map((s) => new TextEncoder().encode(s)); const outputs = [ + "abcCRLF", + "CRLF", "qwertzuiopasdCRLF", "mnbvcxylkjhCRLF", "gfdsapCRLF", "oiuzteuoiCRLF", "CRLF", "aueiourewq098765432CRLF", - "349012i491290", + "349012i491290asdfghjkliopCRLF", + "ytrewqCRLF", + "mnbvcxzCRLF", + "asd", ].map((s) => new TextEncoder().encode(s)); - await testTransformStream(delimStream, inputs, outputs); + await testTransformStream(delimStream, DELIMITER_STREAM_INPUTS, outputs); }); Deno.test("[streams] DelimiterStream, prefix", async () => { const crlf = new TextEncoder().encode("CRLF"); const delimStream = new DelimiterStream(crlf, { disposition: "prefix" }); - const inputs = [ - "qwertzu", // no delimiter - "iopasdCRLFmnbvc", // one delimiter in the middle - "xylkjhCRLFgfdsapCRLFoiuzt", // two separate delimiters - "euoiCRLFCRLFaueiou", // two consecutive delimiters - "rewq098765432CR", // split delimiter (1/2) - "LF349012i491290", // split delimiter (2/2) - ].map((s) => new TextEncoder().encode(s)); const outputs = [ - "qwertzuiopasd", + "abc", + "CRLF", + "CRLFqwertzuiopasd", "CRLFmnbvcxylkjh", "CRLFgfdsap", "CRLFoiuzteuoi", "CRLF", "CRLFaueiourewq098765432", - "CRLF349012i491290", + "CRLF349012i491290asdfghjkliop", + "CRLFytrewq", + "CRLFmnbvcxz", + "CRLFasd", ].map((s) => new TextEncoder().encode(s)); - await testTransformStream(delimStream, inputs, outputs); + await testTransformStream(delimStream, DELIMITER_STREAM_INPUTS, outputs); });