diff --git a/README.md b/README.md index 13fa1b6..1878dd8 100644 --- a/README.md +++ b/README.md @@ -92,7 +92,7 @@ const data = createTar( ### Compression -You can optionaly use `createTarGzip` or `createTarGzipStream` to create a compressed tar data stream (returned value is a [`Promise`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Uint8Array) or [`RedableStream`](https://developer.mozilla.org/en-US/docs/Web/API/ReadableStream) piped to [`CompressionStream`](https://developer.mozilla.org/en-US/docs/Web/API/CompressionStream)) +You can optionally use `createTarGzip` or `createTarGzipStream` to create a compressed tar data stream (returned value is a [`Promise`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Uint8Array) or [`ReadableStream`](https://developer.mozilla.org/en-US/docs/Web/API/ReadableStream) piped to [`CompressionStream`](https://developer.mozilla.org/en-US/docs/Web/API/CompressionStream)) ```js import { createTarGzip, createTarGzipStream } from "nanotar"; @@ -139,6 +139,14 @@ const files = parseTar(data); Parsed files array has two additional properties: `size` file size and `text`, a lazy getter that decodes `data` view as a string. +You can filter iterms to read using `filter` option: + +```ts +const files = parseTar(data, { + filter: (file) => file.name.starsWith("dir/"), +}); +``` + ### Decompression If input is compressed, you can use `parseTarGzip` utility instead to parse it (it used [`DecompressionStream`](https://developer.mozilla.org/en-US/docs/Web/API/DecompressionStream) internally and return a `Promise` value) diff --git a/src/parse.ts b/src/parse.ts index ab34e9a..5dc4b00 100644 --- a/src/parse.ts +++ b/src/parse.ts @@ -1,19 +1,31 @@ -import type { ParsedTarFileItem } from "./types"; +import type { ParsedTarFileItem, ParsedTarFileItemMeta } from "./types"; const TAR_TYPE_FILE = 0; const TAR_TYPE_DIR = 5; +export interface ParseTarOptions { + /** + * A filter function that determines whether a file entry should be skipped or not. + */ + filter?: (file: ParsedTarFileItemMeta) => boolean; +} + /** * Parses a TAR file from a binary buffer and returns an array of {@link TarFileItem} objects. * * @param {ArrayBuffer | Uint8Array} data - The binary data of the TAR file. * @returns {ParsedTarFileItem[]} An array of file items contained in the TAR file. */ -export function parseTar(data: ArrayBuffer | Uint8Array): ParsedTarFileItem[] { +export function parseTar( + data: ArrayBuffer | Uint8Array, + opts?: ParseTarOptions, +): ParsedTarFileItem[] { const buffer = (data as Uint8Array).buffer || data; const files: ParsedTarFileItem[] = []; + const filter = opts?.filter; + let offset = 0; while (offset < buffer.byteLength - 512) { @@ -35,6 +47,9 @@ export function parseTar(data: ArrayBuffer | Uint8Array): ParsedTarFileItem[] { // File size (offset: 124 - length: 12) const size = _readNumber(buffer, offset + 124, 12); + // Calculate next seek offset based on size + const seek = 512 + 512 * Math.trunc(size / 512) + (size % 512 ? 512 : 0); + // File mtime (offset: 136 - length: 12) const mtime = _readNumber(buffer, offset + 136, 12); @@ -54,20 +69,11 @@ export function parseTar(data: ArrayBuffer | Uint8Array): ParsedTarFileItem[] { // File owner group (offset: 297 - length: 32) const group = _readString(buffer, offset + 297, 32); - // File data (offset: 512 - length: size) - const data = - _type === TAR_TYPE_DIR - ? undefined - : new Uint8Array(buffer, offset + 512, size); - - files.push({ + // Group all file metadata + const meta: ParsedTarFileItemMeta = { name, type, size, - data, - get text() { - return new TextDecoder().decode(this.data); - }, attrs: { mode, uid, @@ -76,12 +82,29 @@ export function parseTar(data: ArrayBuffer | Uint8Array): ParsedTarFileItem[] { user, group, }, - }); + }; - offset += 512 + 512 * Math.trunc(size / 512); - if (size % 512) { - offset += 512; + // Filter + if (filter && !filter(meta)) { + offset += seek; + continue; } + + // File data (offset: 512 - length: size) + const data = + _type === TAR_TYPE_DIR + ? undefined + : new Uint8Array(buffer, offset + 512, size); + + files.push({ + ...meta, + data, + get text() { + return new TextDecoder().decode(this.data); + }, + }); + + offset += seek; } return files; @@ -98,7 +121,7 @@ export function parseTar(data: ArrayBuffer | Uint8Array): ParsedTarFileItem[] { */ export async function parseTarGzip( data: ArrayBuffer | Uint8Array, - opts: { compression?: CompressionFormat } = {}, + opts: ParseTarOptions & { compression?: CompressionFormat } = {}, ): Promise { const stream = new ReadableStream({ start(controller) { @@ -109,7 +132,7 @@ export async function parseTarGzip( const decompressedData = await new Response(stream).arrayBuffer(); - return parseTar(decompressedData); + return parseTar(decompressedData, opts); } function _readString(buffer: ArrayBufferLike, offset: number, size: number) { diff --git a/src/types.ts b/src/types.ts index 4123240..8060be0 100644 --- a/src/types.ts +++ b/src/types.ts @@ -34,6 +34,8 @@ export interface ParsedTarFileItem extends TarFileItem { readonly text: string; } +export type ParsedTarFileItemMeta = Omit; + export interface TarFileAttrs { /** * File mode in octal (e.g., `664`) represents read, write, and execute permissions for the owner, group, and others. diff --git a/test/index.test.ts b/test/index.test.ts index 676c23d..387356e 100644 --- a/test/index.test.ts +++ b/test/index.test.ts @@ -92,4 +92,17 @@ describe("nanotar", () => { ] `); }); + + it("parseTar (with filter)", async () => { + const data = await createTarGzip(fixture); + const files = ( + await parseTarGzip(data, { + filter: (file) => file.name.startsWith("foo/"), + }) + ).map((f) => ({ + ...f, + data: f.data ? inspect(f.data).replace(/\s+/g, " ") : undefined, + })); + expect(files.map((f) => f.name)).toMatchObject(["foo/bar.txt"]); + }); });