diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b61a7a8..116e981 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,7 +14,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, windows-latest, macOS-latest] - node: ["15", "14", "12", engines] + node: ["16", "15", "14", engines] exclude: # On Windows, run tests with only the LTS environments. - os: windows-latest diff --git a/CHANGELOG.md b/CHANGELOG.md index b1bb459..384f23a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,18 @@ Changelog ========= -## next -- Fixed a bug where in BlobDataItem when the file was empty (#86) +## v3.0.0 +- Changed WeakMap for private field (require node 12) +- Switch to ESM +- blob.stream() return a subset of whatwg stream which is the async iterable + (it no longer return a node stream) +- Reduced the dependency of Buffer by changing to global TextEncoder/Decoder (require node 11) +- Disabled xo since it could understand private fields (#) +- No longer transform the type to lowercase (https://github.com/w3c/FileAPI/issues/43) + This is more loose than strict, keys should be lowercased, but values should not. + It would require a more proper mime type parser - so we just made it loose. +- index.js can now be imported by browser & deno since it no longer depends on any + core node features (but why would you? other environment can benefit from it) ## v2.1.2 - Fixed a bug where `start` in BlobDataItem was undefined (#85) diff --git a/README.md b/README.md index 53c81f5..41d8150 100644 --- a/README.md +++ b/README.md @@ -13,18 +13,95 @@ A Blob implementation in Node.js, originally from [node-fetch](https://github.co npm install fetch-blob ``` +
+ Upgrading from 2x to 3x + + Updating from 2 to 3 should be a breeze since there is not many changes to the blob specification. + The major cause of a major release is coding standards. + - internal WeakMaps was replaced with private fields + - internal Buffer.from was replaced with TextEncoder/Decoder + - internal buffers was replaced with Uint8Arrays + - CommonJS was replaced with ESM + - The node stream returned by calling `blob.stream()` was replaced with a simple generator function that yields Uint8Array (Breaking change) + (Read "Differences from other blobs" for more info.) + + All of this changes have made it dependency free of any core node modules, so it would be possible to just import it using http-import from a CDN without any bundling + +
+ +
+ Differences from other Blobs + + - Unlike NodeJS `buffer.Blob` (Added in: v15.7.0) and browser native Blob this polyfilled version can't be sent via PostMessage + - This blob version is more arbitrary, it can be constructed with blob parts that isn't a instance of itself + it has to look and behave as a blob to be accepted as a blob part. + - The benefit of this is that you can create other types of blobs that don't contain any internal data that has to be read in other ways, such as the `BlobDataItem` created in `from.js` that wraps a file path into a blob-like item and read lazily (nodejs plans to [implement this][fs-blobs] as well) + - The `blob.stream()` is the most noticeable differences. It returns a AsyncGeneratorFunction that yields Uint8Arrays + + The reasoning behind `Blob.prototype.stream()` is that NodeJS readable stream + isn't spec compatible with whatwg streams and we didn't want to import the hole whatwg stream polyfill for node + or browserify NodeJS streams for the browsers and picking any flavor over the other. So we decided to opted out + of any stream and just implement the bear minium of what both streams have in common which is the asyncIterator + that both yields Uint8Array. this is the most isomorphic way with the use of `for-await-of` loops. + It would be redundant to convert anything to whatwg streams and than convert it back to + node streams since you work inside of Node. + It will probably stay like this until nodejs get native support for whatwg[1][https://github.com/nodejs/whatwg-stream] streams and whatwg stream add the node + equivalent for `Readable.from(iterable)`[2](https://github.com/whatwg/streams/issues/1018) + + But for now if you really need a Node Stream then you can do so using this transformation + ```js + import {Readable} from 'stream' + const stream = Readable.from(blob.stream()) + ``` + But if you don't need it to be a stream then you can just use the asyncIterator part of it that is isomorphic. + ```js + for await (const chunk of blob.stream()) { + console.log(chunk) // uInt8Array + } + ``` + If you need to make some feature detection to fix this different behavior + ```js + if (Blob.prototype.stream?.constructor?.name === 'AsyncGeneratorFunction') { + // not spec compatible, monkey patch it... + // (Alternative you could extend the Blob and use super.stream()) + let orig = Blob.prototype.stream + Blob.prototype.stream = function () { + const iterator = orig.call(this) + return new ReadableStream({ + async pull (ctrl) { + const next = await iterator.next() + return next.done ? ctrl.close() : ctrl.enqueue(next.value) + } + }) + } + } + ``` + Possible feature whatwg version: `ReadableStream.from(iterator)` + It's also possible to delete this method and instead use `.slice()` and `.arrayBuffer()` since it has both a public and private stream method +
+ ## Usage ```js -const Blob = require('fetch-blob'); -const fetch = require('node-fetch'); - -fetch('https://httpbin.org/post', { - method: 'POST', - body: new Blob(['Hello World'], { type: 'text/plain' }) -}) - .then(res => res.json()); - .then(json => console.log(json)); +// Ways to import +// (PS it's dependency free ESM package so regular http-import from CDN works too) +import Blob from 'fetch-blob' +import {Blob} from 'fetch-blob' +const {Blob} = await import('fetch-blob') + + +// Ways to read the blob: +const blob = new Blob(['hello, world']) + +await blob.text() +await blob.arrayBuffer() +for await (let chunk of blob.stream()) { ... } + +// turn the async iterator into a node stream +stream.Readable.from(blob.stream()) + +// turn the async iterator into a whatwg stream (feature) +globalThis.ReadableStream.from(blob.stream()) ``` ### Blob part backed up by filesystem @@ -35,13 +112,16 @@ npm install fetch-blob domexception ``` ```js -const blobFrom = require('fetch-blob/from.js'); -const blob1 = blobFrom('./2-GiB-file.bin'); -const blob2 = blobFrom('./2-GiB-file.bin'); +// The default export is sync and use fs.stat to retrieve size & last modified +import blobFromSync from 'fetch-blob/from.js' +import {Blob, blobFrom, blobFromSync} from 'fetch-blob/from.js' + +const fsBlob1 = blobFromSync('./2-GiB-file.bin') +const fsBlob2 = await blobFrom('./2-GiB-file.bin') // Not a 4 GiB memory snapshot, just holds 3 references // points to where data is located on the disk -const blob = new Blob([blob1, blob2]); +const blob = new Blob([fsBlob1, fsBlob2, 'memory']) console.log(blob.size) // 4 GiB ``` @@ -55,3 +135,4 @@ See the [MDN documentation](https://developer.mozilla.org/en-US/docs/Web/API/Blo [codecov-url]: https://codecov.io/gh/node-fetch/fetch-blob [install-size-image]: https://flat.badgen.net/packagephobia/install/fetch-blob [install-size-url]: https://packagephobia.now.sh/result?p=fetch-blob +[fs-blobs]: https://github.com/nodejs/node/issues/37340 diff --git a/from.js b/from.js index ca94205..e8798ca 100644 --- a/from.js +++ b/from.js @@ -1,56 +1,69 @@ -const {statSync, createReadStream} = require('fs'); -const Blob = require('./index.js'); -const DOMException = require('domexception'); +import {statSync, createReadStream} from 'fs'; +import {stat} from 'fs/promises'; +import DOMException from 'domexception'; +import Blob from './index.js'; /** * @param {string} path filepath on the disk * @returns {Blob} */ -function blobFrom(path) { - const {size, mtime} = statSync(path); - const blob = new BlobDataItem({path, size, mtime}); + const blobFromSync = path => from(statSync(path), path); - return new Blob([blob]); -} +/** + * @param {string} path filepath on the disk + * @returns {Promise} + */ + const blobFrom = path => stat(path).then(stat => from(stat, path)); + +const from = (stat, path) => new Blob([new BlobDataItem({ + path, + size: stat.size, + lastModified: stat.mtimeMs, + start: 0 +})]); /** * This is a blob backed up by a file on the disk - * with minium requirement + * with minium requirement. Its wrapped around a Blob as a blobPart + * so you have no direct access to this. * * @private */ class BlobDataItem { + #path; + #start; + constructor(options) { + this.#path = options.path; + this.#start = options.start; this.size = options.size; - this.path = options.path; - this.start = options.start || 0; - this.mtime = options.mtime; + this.lastModified = options.lastModified } - // Slicing arguments is first validated and formated - // to not be out of range by Blob.prototype.slice + /** + * Slicing arguments is first validated and formatted + * to not be out of range by Blob.prototype.slice + */ slice(start, end) { return new BlobDataItem({ - path: this.path, - start, - mtime: this.mtime, - size: end - start + path: this.#path, + lastModified: this.lastModified, + size: end - start, + start }); } - stream() { - if (statSync(this.path).mtime > this.mtime) { + async * stream() { + const {mtimeMs} = await stat(this.#path) + if (mtimeMs > this.lastModified) { throw new DOMException('The requested file could not be read, typically due to permission problems that have occurred after a reference to a file was acquired.', 'NotReadableError'); } - - if (!this.size) { - return new Blob().stream(); + if (this.size) { + yield * createReadStream(this.#path, { + start: this.#start, + end: this.#start + this.size - 1 + }); } - - return createReadStream(this.path, { - start: this.start, - end: this.start + this.size - 1 - }); } get [Symbol.toStringTag]() { @@ -58,4 +71,5 @@ class BlobDataItem { } } -module.exports = blobFrom; +export default blobFromSync; +export {Blob, blobFrom, blobFromSync}; diff --git a/index.js b/index.js index 4a5cdfe..ff00f4f 100644 --- a/index.js +++ b/index.js @@ -1,58 +1,76 @@ -const {Readable} = require('stream'); +// 64 KiB (same size chrome slice theirs blob into Uint8array's) +const POOL_SIZE = 65536; -/** - * @type {WeakMap} - */ -const wm = new WeakMap(); - -async function * read(parts) { - for (const part of parts) { +/** @param {(Blob | Uint8Array)[]} parts */ +async function * toIterator (parts, clone = true) { + for (let part of parts) { if ('stream' in part) { yield * part.stream(); + } else if (ArrayBuffer.isView(part)) { + if (clone) { + let position = part.byteOffset; + let end = part.byteOffset + part.byteLength; + while (position !== end) { + const size = Math.min(end - position, POOL_SIZE); + const chunk = part.buffer.slice(position, position + size); + yield new Uint8Array(chunk); + position += chunk.byteLength; + } + } else { + yield part; + } } else { - yield part; + // For blobs that have arrayBuffer but no stream method (nodes buffer.Blob) + let position = 0; + while (position !== part.size) { + const chunk = part.slice(position, Math.min(part.size, position + POOL_SIZE)); + const buffer = await chunk.arrayBuffer(); + position += buffer.byteLength; + yield new Uint8Array(buffer); + } } } } -class Blob { +export default class Blob { + + /** @type {Array.<(Blob|Uint8Array)>} */ + #parts = []; + #type = ''; + #size = 0; + /** * The Blob() constructor returns a new Blob object. The content * of the blob consists of the concatenation of the values given * in the parameter array. * - * @param {(ArrayBufferLike | ArrayBufferView | Blob | Buffer | string)[]} blobParts + * @param {*} blobParts * @param {{ type?: string }} [options] */ constructor(blobParts = [], options = {}) { let size = 0; const parts = blobParts.map(element => { - let buffer; - if (element instanceof Buffer) { - buffer = element; - } else if (ArrayBuffer.isView(element)) { - buffer = Buffer.from(element.buffer, element.byteOffset, element.byteLength); + let part; + if (ArrayBuffer.isView(element)) { + part = new Uint8Array(element.buffer.slice(element.byteOffset, element.byteOffset + element.byteLength)); } else if (element instanceof ArrayBuffer) { - buffer = Buffer.from(element); + part = new Uint8Array(element.slice(0)); } else if (element instanceof Blob) { - buffer = element; + part = element; } else { - buffer = Buffer.from(typeof element === 'string' ? element : String(element)); + part = new TextEncoder().encode(String(element)); } - // eslint-disable-next-line unicorn/explicit-length-check - size += buffer.length || buffer.size || 0; - return buffer; + size += ArrayBuffer.isView(part) ? part.byteLength : part.size; + return part; }); - const type = options.type === undefined ? '' : String(options.type).toLowerCase(); + const type = options.type === undefined ? '' : String(options.type); - wm.set(this, { - type: /[^\u0020-\u007E]/.test(type) ? '' : type, - size, - parts - }); + this.#type = /[^\u0020-\u007E]/.test(type) ? '' : type; + this.#size = size; + this.#parts = parts; } /** @@ -60,14 +78,14 @@ class Blob { * size of the Blob in bytes. */ get size() { - return wm.get(this).size; + return this.#size; } /** * The type property of a Blob object returns the MIME type of the file. */ get type() { - return wm.get(this).type; + return this.#type; } /** @@ -78,7 +96,16 @@ class Blob { * @return {Promise} */ async text() { - return Buffer.from(await this.arrayBuffer()).toString(); + // More optimized than using this.arrayBuffer() + // that requires twice as much ram + const decoder = new TextDecoder(); + let str = ''; + for await (let part of toIterator(this.#parts, false)) { + str += decoder.decode(part, { stream: true }); + } + // Remaining + str += decoder.decode(); + return str; } /** @@ -91,7 +118,7 @@ class Blob { async arrayBuffer() { const data = new Uint8Array(this.size); let offset = 0; - for await (const chunk of this.stream()) { + for await (const chunk of toIterator(this.#parts, false)) { data.set(chunk, offset); offset += chunk.length; } @@ -100,13 +127,13 @@ class Blob { } /** - * The Blob interface's stream() method is difference from native - * and uses node streams instead of whatwg streams. + * The Blob stream() implements partial support of the whatwg stream + * by only being async iterable. * - * @returns {Readable} Node readable stream + * @returns {AsyncGenerator} */ - stream() { - return Readable.from(read(wm.get(this).parts)); + async * stream() { + yield * toIterator(this.#parts, true); } /** @@ -125,7 +152,7 @@ class Blob { let relativeEnd = end < 0 ? Math.max(size + end, 0) : Math.min(end, size); const span = Math.max(relativeEnd - relativeStart, 0); - const parts = wm.get(this).parts.values(); + const parts = this.#parts; const blobParts = []; let added = 0; @@ -137,9 +164,15 @@ class Blob { relativeStart -= size; relativeEnd -= size; } else { - const chunk = part.slice(relativeStart, Math.min(size, relativeEnd)); + let chunk + if (ArrayBuffer.isView(part)) { + chunk = part.subarray(relativeStart, Math.min(size, relativeEnd)); + added += chunk.byteLength + } else { + chunk = part.slice(relativeStart, Math.min(size, relativeEnd)); + added += chunk.size + } blobParts.push(chunk); - added += ArrayBuffer.isView(chunk) ? chunk.byteLength : chunk.size; relativeStart = 0; // All next sequental parts should start at 0 // don't add the overflow to new blobParts @@ -150,7 +183,8 @@ class Blob { } const blob = new Blob([], {type: String(type).toLowerCase()}); - Object.assign(wm.get(blob), {size: span, parts: blobParts}); + blob.#size = span; + blob.#parts = blobParts; return blob; } @@ -163,9 +197,11 @@ class Blob { return ( object && typeof object === 'object' && - typeof object.stream === 'function' && - object.stream.length === 0 && typeof object.constructor === 'function' && + ( + typeof object.stream === 'function' || + typeof object.arrayBuffer === 'function' + ) && /^(Blob|File)$/.test(object[Symbol.toStringTag]) ); } @@ -177,4 +213,4 @@ Object.defineProperties(Blob.prototype, { slice: {enumerable: true} }); -module.exports = Blob; +export { Blob }; diff --git a/package.json b/package.json index bceea12..a48fab6 100644 --- a/package.json +++ b/package.json @@ -1,8 +1,9 @@ { "name": "fetch-blob", - "version": "2.1.2", + "version": "3.0.0-rc.0", "description": "A Blob implementation in Node.js, originally from node-fetch.", "main": "index.js", + "type": "module", "files": [ "from.js", "index.js", @@ -10,8 +11,8 @@ "from.d.ts" ], "scripts": { - "lint": "xo", - "test": "xo && ava", + "lint": "xo test.js", + "test": "npm run lint && ava", "report": "c8 --reporter json --reporter text ava", "coverage": "c8 --reporter json --reporter text ava && codecov -f coverage/coverage-final.json", "prepublishOnly": "tsc --declaration --emitDeclarationOnly --allowJs index.js from.js" @@ -22,7 +23,7 @@ "node-fetch" ], "engines": { - "node": "^10.17.0 || >=12.3.0" + "node": ">=14.0.0" }, "author": "David Frank", "license": "MIT", @@ -61,8 +62,7 @@ "c8": "^7.7.1", "codecov": "^3.8.1", "domexception": "^2.0.1", - "get-stream": "^6.0.1", - "node-fetch": "^2.6.1", + "node-fetch": "^3.0.0-beta.9", "typescript": "^4.2.4", "xo": "^0.38.2" } diff --git a/test.js b/test.js index ebbfad9..2e67bc9 100644 --- a/test.js +++ b/test.js @@ -1,10 +1,12 @@ -const fs = require('fs'); -const test = require('ava'); -const getStream = require('get-stream'); -const {Response} = require('node-fetch'); -const {TextDecoder} = require('util'); -const Blob = require('./index.js'); -const blobFrom = require('./from.js'); +import fs from 'fs'; +import test from 'ava'; +import {Response} from 'node-fetch'; +import {Readable} from 'stream'; +import buffer from 'buffer'; +import Blob from './index.js'; +import syncBlob, {blobFromSync, blobFrom} from './from.js'; + +const license = fs.readFileSync('./LICENSE', 'utf-8'); test('new Blob()', t => { const blob = new Blob(); // eslint-disable-line no-unused-vars @@ -25,11 +27,12 @@ test('Blob ctor parts', async t => { new Uint8Array([101]).buffer, Buffer.from('f'), new Blob(['g']), - {} + {}, + new URLSearchParams('foo') ]; const blob = new Blob(parts); - t.is(await blob.text(), 'abcdefg[object Object]'); + t.is(await blob.text(), 'abcdefg[object Object]foo='); }); test('Blob size', t => { @@ -81,8 +84,10 @@ test('Blob stream()', async t => { const data = 'a=1'; const type = 'text/plain'; const blob = new Blob([data], {type}); - const result = await getStream(blob.stream()); - t.is(result, data); + + for await (const chunk of blob.stream()) { + t.is(chunk.join(), [97, 61, 49].join()); + } }); test('Blob toString()', t => { @@ -131,7 +136,7 @@ test('Blob works with node-fetch Response.blob()', async t => { const data = 'a=1'; const type = 'text/plain'; const blob = new Blob([data], {type}); - const response = new Response(blob); + const response = new Response(Readable.from(blob.stream())); const blob2 = await response.blob(); t.is(await blob2.text(), data); }); @@ -140,19 +145,19 @@ test('Blob works with node-fetch Response.text()', async t => { const data = 'a=1'; const type = 'text/plain'; const blob = new Blob([data], {type}); - const response = new Response(blob); + const response = new Response(Readable.from(blob.stream())); const text = await response.text(); t.is(text, data); }); test('blob part backed up by filesystem', async t => { - const blob = blobFrom('./LICENSE'); - t.is(await blob.slice(0, 3).text(), 'MIT'); - t.is(await blob.slice(4, 11).text(), 'License'); + const blob = blobFromSync('./LICENSE'); + t.is(await blob.slice(0, 3).text(), license.slice(0, 3)); + t.is(await blob.slice(4, 11).text(), license.slice(4, 11)); }); test('Reading after modified should fail', async t => { - const blob = blobFrom('./LICENSE'); + const blob = blobFromSync('./LICENSE'); await new Promise(resolve => { setTimeout(resolve, 100); }); @@ -160,20 +165,24 @@ test('Reading after modified should fail', async t => { // Change modified time fs.utimesSync('./LICENSE', now, now); const error = await blob.text().catch(error => error); + t.is(error instanceof Error, true); t.is(error.name, 'NotReadableError'); }); test('Reading from the stream created by blobFrom', async t => { - const blob = blobFrom('./LICENSE'); - const expected = await fs.promises.readFile('./LICENSE', 'utf-8'); - - const actual = await getStream(blob.stream()); + const blob = blobFromSync('./LICENSE'); + const actual = await blob.text(); + t.is(actual, license); +}); - t.is(actual, expected); +test('create a blob from path asynchronous', async t => { + const blob = await blobFrom('./LICENSE'); + const actual = await blob.text(); + t.is(actual, license); }); test('Reading empty blobs', async t => { - const blob = blobFrom('./LICENSE').slice(0, 0); + const blob = blobFromSync('./LICENSE').slice(0, 0); const actual = await blob.text(); t.is(actual, ''); }); @@ -193,3 +202,55 @@ test('Blob-ish class is an instance of Blob', t => { test('Instanceof check returns false for nullish values', t => { t.false(null instanceof Blob); }); + +/** @see https://github.com/w3c/FileAPI/issues/43 - important to keep boundary value */ +test('Dose not lowercase the blob values', t => { + const type = 'multipart/form-data; boundary=----WebKitFormBoundaryTKqdrVt01qOBltBd'; + t.is(new Blob([], {type}).type, type); +}); + +test('Parts are immutable', async t => { + const buf = new Uint8Array([97]); + const blob = new Blob([buf]); + buf[0] = 98; + t.is(await blob.text(), 'a'); +}); + +test('Blobs are immutable', async t => { + const buf = new Uint8Array([97]); + const blob = new Blob([buf]); + const chunk = await blob.stream().next(); + t.is(chunk.value[0], 97); + chunk.value[0] = 98; + t.is(await blob.text(), 'a'); +}); + +// This was necessary to avoid large ArrayBuffer clones (slice) +test('Large chunks are divided into smaller chunks', async t => { + const buf = new Uint8Array(65590); + const blob = new Blob([buf]); + let i = 0; + // eslint-disable-next-line no-unused-vars + for await (const chunk of blob.stream()) { + i++; + } + + t.is(i === 2, true); +}); + +test('Can use named import - as well as default', async t => { + const {Blob, default: def} = await import('./index.js'); + t.is(Blob, def); +}); + +test('default from.js exports blobFromSync', t => { + t.is(blobFromSync, syncBlob); +}); + +if (buffer.Blob) { + test('Can wrap buffer.Blob to a fetch-blob', async t => { + const blob1 = new buffer.Blob(['blob part']); + const blob2 = new Blob([blob1]); + t.is(await blob2.text(), 'blob part'); + }); +}