Skip to content

Commit

Permalink
feat(parse): support filter option
Browse files Browse the repository at this point in the history
resolves #28
  • Loading branch information
pi0 committed Jan 20, 2025
1 parent 71917b8 commit 1eddc34
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 20 deletions.
10 changes: 9 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ const data = createTar(

### Compression

You can optionaly use `createTarGzip` or `createTarGzipStream` to create a compressed tar data stream (returned value is a [`Promise<Uint8Array>`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Uint8Array) or [`RedableStream`](https://developer.mozilla.org/en-US/docs/Web/API/ReadableStream) piped to [`CompressionStream`](https://developer.mozilla.org/en-US/docs/Web/API/CompressionStream))
You can optionally use `createTarGzip` or `createTarGzipStream` to create a compressed tar data stream (returned value is a [`Promise<Uint8Array>`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Uint8Array) or [`ReadableStream`](https://developer.mozilla.org/en-US/docs/Web/API/ReadableStream) piped to [`CompressionStream`](https://developer.mozilla.org/en-US/docs/Web/API/CompressionStream))

```js
import { createTarGzip, createTarGzipStream } from "nanotar";
Expand Down Expand Up @@ -139,6 +139,14 @@ const files = parseTar(data);

Parsed files array has two additional properties: `size` file size and `text`, a lazy getter that decodes `data` view as a string.

You can filter iterms to read using `filter` option:

```ts
const files = parseTar(data, {
filter: (file) => file.name.starsWith("dir/"),
});
```

### Decompression

If input is compressed, you can use `parseTarGzip` utility instead to parse it (it used [`DecompressionStream`](https://developer.mozilla.org/en-US/docs/Web/API/DecompressionStream) internally and return a `Promise<Uint8Array>` value)
Expand Down
61 changes: 42 additions & 19 deletions src/parse.ts
Original file line number Diff line number Diff line change
@@ -1,19 +1,31 @@
import type { ParsedTarFileItem } from "./types";
import type { ParsedTarFileItem, ParsedTarFileItemMeta } from "./types";

const TAR_TYPE_FILE = 0;
const TAR_TYPE_DIR = 5;

export interface ParseTarOptions {
/**
* A filter function that determines whether a file entry should be skipped or not.
*/
filter?: (file: ParsedTarFileItemMeta) => boolean;
}

/**
* Parses a TAR file from a binary buffer and returns an array of {@link TarFileItem} objects.
*
* @param {ArrayBuffer | Uint8Array} data - The binary data of the TAR file.
* @returns {ParsedTarFileItem[]} An array of file items contained in the TAR file.
*/
export function parseTar(data: ArrayBuffer | Uint8Array): ParsedTarFileItem[] {
export function parseTar(
data: ArrayBuffer | Uint8Array,
opts?: ParseTarOptions,
): ParsedTarFileItem[] {
const buffer = (data as Uint8Array).buffer || data;

const files: ParsedTarFileItem[] = [];

const filter = opts?.filter;

let offset = 0;

while (offset < buffer.byteLength - 512) {
Expand All @@ -35,6 +47,9 @@ export function parseTar(data: ArrayBuffer | Uint8Array): ParsedTarFileItem[] {
// File size (offset: 124 - length: 12)
const size = _readNumber(buffer, offset + 124, 12);

// Calculate next seek offset based on size
const seek = 512 + 512 * Math.trunc(size / 512) + (size % 512 ? 512 : 0);

// File mtime (offset: 136 - length: 12)
const mtime = _readNumber(buffer, offset + 136, 12);

Expand All @@ -54,20 +69,11 @@ export function parseTar(data: ArrayBuffer | Uint8Array): ParsedTarFileItem[] {
// File owner group (offset: 297 - length: 32)
const group = _readString(buffer, offset + 297, 32);

// File data (offset: 512 - length: size)
const data =
_type === TAR_TYPE_DIR
? undefined
: new Uint8Array(buffer, offset + 512, size);

files.push({
// Group all file metadata
const meta: ParsedTarFileItemMeta = {
name,
type,
size,
data,
get text() {
return new TextDecoder().decode(this.data);
},
attrs: {
mode,
uid,
Expand All @@ -76,12 +82,29 @@ export function parseTar(data: ArrayBuffer | Uint8Array): ParsedTarFileItem[] {
user,
group,
},
});
};

offset += 512 + 512 * Math.trunc(size / 512);
if (size % 512) {
offset += 512;
// Filter
if (filter && !filter(meta)) {
offset += seek;
continue;
}

// File data (offset: 512 - length: size)
const data =
_type === TAR_TYPE_DIR
? undefined
: new Uint8Array(buffer, offset + 512, size);

files.push({
...meta,
data,
get text() {
return new TextDecoder().decode(this.data);
},
});

offset += seek;
}

return files;
Expand All @@ -98,7 +121,7 @@ export function parseTar(data: ArrayBuffer | Uint8Array): ParsedTarFileItem[] {
*/
export async function parseTarGzip(
data: ArrayBuffer | Uint8Array,
opts: { compression?: CompressionFormat } = {},
opts: ParseTarOptions & { compression?: CompressionFormat } = {},
): Promise<ParsedTarFileItem[]> {
const stream = new ReadableStream({
start(controller) {
Expand All @@ -109,7 +132,7 @@ export async function parseTarGzip(

const decompressedData = await new Response(stream).arrayBuffer();

return parseTar(decompressedData);
return parseTar(decompressedData, opts);
}

function _readString(buffer: ArrayBufferLike, offset: number, size: number) {
Expand Down
2 changes: 2 additions & 0 deletions src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ export interface ParsedTarFileItem extends TarFileItem {
readonly text: string;
}

export type ParsedTarFileItemMeta = Omit<ParsedTarFileItem, "data" | "text">;

export interface TarFileAttrs {
/**
* File mode in octal (e.g., `664`) represents read, write, and execute permissions for the owner, group, and others.
Expand Down
13 changes: 13 additions & 0 deletions test/index.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -92,4 +92,17 @@ describe("nanotar", () => {
]
`);
});

it("parseTar (with filter)", async () => {
const data = await createTarGzip(fixture);
const files = (
await parseTarGzip(data, {
filter: (file) => file.name.startsWith("foo/"),
})
).map((f) => ({
...f,
data: f.data ? inspect(f.data).replace(/\s+/g, " ") : undefined,
}));
expect(files.map((f) => f.name)).toMatchObject(["foo/bar.txt"]);
});
});

0 comments on commit 1eddc34

Please sign in to comment.