Skip to content

Commit

Permalink
Refactor: don't extract DATs in zip archives to temp file (#67)
Browse files Browse the repository at this point in the history
  • Loading branch information
emmercm authored Sep 22, 2022
1 parent 1988212 commit e5bb4e5
Show file tree
Hide file tree
Showing 21 changed files with 294 additions and 121 deletions.
1 change: 1 addition & 0 deletions .eslintrc
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
}]
}],
"@typescript-eslint/explicit-function-return-type": "error",
"@typescript-eslint/return-await": "error",
"@typescript-eslint/no-floating-promises": ["error"],
"@typescript-eslint/prefer-readonly": "error",
"simple-import-sort/exports": "error",
Expand Down
2 changes: 2 additions & 0 deletions src/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,6 @@ export default class Constants {
);

static readonly FILE_READING_CHUNK_SIZE = 1024 * 1024; // 1MiB

static readonly MAX_STREAM_EXTRACTION_SIZE = 1024 * 1024 * 100; // 100MiB
}
52 changes: 28 additions & 24 deletions src/modules/datScanner.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { promises as fsPromises } from 'fs';
import xml2js from 'xml2js';

import { Symbols } from '../console/progressBar.js';
import bufferPoly from '../polyfill/bufferPoly.js';
import File from '../types/files/file.js';
import DAT from '../types/logiqx/dat.js';
import DataFile from '../types/logiqx/dataFile.js';
Expand All @@ -26,7 +26,19 @@ export default class DATScanner extends Scanner {
await this.progressBar.setSymbol(Symbols.SEARCHING);
await this.progressBar.reset(datFilePaths.length);

const parsedXml: DataFile[] = [];
const parsedDataFiles = await this.parseDataFiles(datFilePaths);

await this.progressBar.logInfo('Deserializing DAT XML to objects');
const dats = parsedDataFiles
.filter((xmlObject) => xmlObject)
.map((xmlObject) => DAT.fromObject(xmlObject.datafile))
.sort((a, b) => a.getNameShort().localeCompare(b.getNameShort()));
await this.progressBar.logInfo(dats.map((dat) => `${dat.getName()}: ${dat.getGames().length} games, ${dat.getParents().length} parents parsed`).join('\n'));
return dats;
}

private async parseDataFiles(datFilePaths: string[]): Promise<DataFile[]> {
const results: DataFile[] = [];

/* eslint-disable no-await-in-loop */
for (let i = 0; i < datFilePaths.length; i += 1) {
Expand All @@ -37,38 +49,30 @@ export default class DATScanner extends Scanner {
const datFiles = await this.getFilesFromPath(datFilePath);
for (let j = 0; j < datFiles.length; j += 1) {
const datFile = datFiles[j];

const xmlObject = await this.parseDatFile(datFile);
if (xmlObject) {
parsedXml.push(xmlObject);
results.push(xmlObject);
}
}
}

await this.progressBar.logInfo('Deserializing DAT XML to objects');
const dats = parsedXml
.filter((xmlObject) => xmlObject)
.map((xmlObject) => DAT.fromObject(xmlObject.datafile))
.sort((a, b) => a.getNameShort().localeCompare(b.getNameShort()));
await this.progressBar.logInfo(dats.map((dat) => `${dat.getName()}: ${dat.getGames().length} games, ${dat.getParents().length} parents parsed`).join('\n'));
return dats;
return results;
}

private async parseDatFile(datFile: File): Promise<DataFile | undefined> {
return datFile.extract(async (localFile) => {
const xmlContents = await fsPromises.readFile(localFile);

try {
await this.progressBar.logDebug(`${datFile.toString()}: parsing XML`);
return await xml2js.parseStringPromise(xmlContents.toString(), {
try {
await this.progressBar.logDebug(`${datFile.toString()}: parsing XML`);
return await datFile.extractToStream(async (stream) => {
const xmlContents = await bufferPoly.fromReadable(stream);
return xml2js.parseStringPromise(xmlContents.toString(), {
mergeAttrs: true,
explicitArray: false,
}) as DataFile;
} catch (err) {
const message = (err as Error).message.split('\n').join(', ');
await this.progressBar.logError(`Failed to parse DAT ${datFile.toString()} : ${message}`);
return undefined;
}
});
});
});
} catch (err) {
const message = (err as Error).message.split('\n').join(', ');
await this.progressBar.logError(`Failed to parse DAT ${datFile.toString()} : ${message}`);
return Promise.resolve(undefined);
}
}
}
10 changes: 7 additions & 3 deletions src/modules/headerProcessor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,20 @@ export default class HeaderProcessor {
// Can get FileHeader from extension, use that
const headerForExtension = FileHeader.getForFilename(inputFile.getExtractedFilePath());
if (headerForExtension) {
const fileWithHeader = await inputFile.withFileHeader(headerForExtension).resolve();
const fileWithHeader = await (
await inputFile.withFileHeader(headerForExtension)
).resolve();
return callback(null, fileWithHeader);
}

// Should get FileHeader from File, try to
if (this.options.shouldReadFileForHeader(inputFile.getExtractedFilePath())) {
const headerForFile = await inputFile
.extract(async (localFile) => FileHeader.getForFileContents(localFile));
.extractToFile(async (localFile) => FileHeader.getForFileContents(localFile));
if (headerForFile) {
const fileWithHeader = await inputFile.withFileHeader(headerForFile).resolve();
const fileWithHeader = await (
await inputFile.withFileHeader(headerForFile)
).resolve();
return callback(null, fileWithHeader);
}
await this.progressBar.logWarn(`Couldn't detect header for ${inputFile.toString()}`);
Expand Down
11 changes: 8 additions & 3 deletions src/modules/romWriter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ export default class ROMWriter {
outputFile = new ArchiveEntry(
new Zip(outputFilePath),
entryPath,
inputFile.getSize(),
await inputFile.getCrc32(),
);
} else {
Expand All @@ -127,7 +128,11 @@ export default class ROMWriter {
inputFile.getFilePath(),
rom.getName(),
);
outputFile = new File(outputFilePath, await inputFile.getCrc32());
outputFile = new File(
outputFilePath,
inputFile.getSize(),
await inputFile.getCrc32(),
);
}

acc.set(inputFile, outputFile);
Expand Down Expand Up @@ -232,7 +237,7 @@ export default class ROMWriter {

// Write the entry
try {
await inputRomFile.extract(async (localFile) => {
await inputRomFile.extractToFile(async (localFile) => {
await this.progressBar.logDebug(`${outputZipPath}: adding ${localFile}`);
outputZip.addLocalFile(
localFile,
Expand Down Expand Up @@ -338,7 +343,7 @@ export default class ROMWriter {

private async writeRawFile(inputRomFile: File, outputFilePath: string): Promise<boolean> {
try {
await inputRomFile.extract(async (localFile) => {
await inputRomFile.extractToFile(async (localFile) => {
await this.progressBar.logDebug(`${localFile}: copying to ${outputFilePath}`);
await fsPromises.copyFile(localFile, outputFilePath);
});
Expand Down
20 changes: 20 additions & 0 deletions src/polyfill/bufferPoly.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import { Readable } from 'stream';

export default class BufferPoly {
static async fromReadable(readable: Readable): Promise<Buffer> {
return new Promise((resolve, reject) => {
readable.resume();

const chunks: Buffer[] = [];
readable.on('data', (chunk) => {
chunks.push(Buffer.from(chunk));
});

readable.on('end', () => {
resolve(Buffer.concat(chunks));
});

readable.on('error', (err) => reject(err));
});
}
}
18 changes: 17 additions & 1 deletion src/types/archives/archive.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import fs from 'fs';
import { Readable } from 'stream';

import ArchiveEntry from '../files/archiveEntry.js';

export default abstract class Archive {
Expand All @@ -13,9 +16,22 @@ export default abstract class Archive {

abstract getArchiveEntries(): Promise<ArchiveEntry[]>;

abstract extractEntry<T>(
abstract extractEntryToFile<T>(
archiveEntry: ArchiveEntry,
tempDir: string,
callback: (localFile: string) => (T | Promise<T>),
): Promise<T>;

extractEntryToStream<T>(
archiveEntry: ArchiveEntry,
tempDir: string,
callback: (stream: Readable) => (Promise<T> | T),
): Promise<T> {
return this.extractEntryToFile(archiveEntry, tempDir, async (localFile) => {
const stream = fs.createReadStream(localFile);
const result = await callback(stream);
stream.destroy();
return result;
});
}
}
3 changes: 2 additions & 1 deletion src/types/archives/rar.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,12 @@ export default class Rar extends Archive {
.map((fileHeader) => new ArchiveEntry(
this,
fileHeader.name,
fileHeader.unpSize,
fileHeader.crc.toString(16),
));
}

async extractEntry<T>(
async extractEntryToFile<T>(
archiveEntry: ArchiveEntry,
tempDir: string,
callback: (localFile: string) => (T | Promise<T>),
Expand Down
9 changes: 7 additions & 2 deletions src/types/archives/sevenZip.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,16 @@ export default class SevenZip extends Archive {
}
});
}) as Result[];
return filesIn7z.map((result) => new ArchiveEntry(this, result.name, result.crc));
return filesIn7z.map((result) => new ArchiveEntry(
this,
result.name,
parseInt(result.size, 10),
result.crc,
));
});
}

async extractEntry<T>(
async extractEntryToFile<T>(
archiveEntry: ArchiveEntry,
tempDir: string,
callback: (localFile: string) => (T | Promise<T>),
Expand Down
24 changes: 22 additions & 2 deletions src/types/archives/zip.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import AdmZip, { IZipEntry } from 'adm-zip';
import path from 'path';
import { Readable } from 'stream';

import ArchiveEntry from '../files/archiveEntry.js';
import Archive from './archive.js';
Expand All @@ -8,28 +9,32 @@ export default class Zip extends Archive {
static readonly SUPPORTED_EXTENSIONS = ['.zip'];

getArchiveEntries(): Promise<ArchiveEntry[]> {
// WARN(cemmer): every constructor causes a full file read!
const zip = new AdmZip(this.getFilePath());
const files = zip.getEntries()
.map((entry) => new ArchiveEntry(
this,
entry.entryName,
entry.header.size,
entry.header.crc.toString(16),
));
return Promise.resolve(files);
}

async extractEntry<T>(
async extractEntryToFile<T>(
archiveEntry: ArchiveEntry,
tempDir: string,
callback: (localFile: string) => (T | Promise<T>),
): Promise<T> {
const localFile = path.join(tempDir, archiveEntry.getEntryPath());

// WARN(cemmer): every constructor causes a full file read!
const zip = new AdmZip(this.getFilePath());
const entry = zip.getEntry(archiveEntry.getEntryPath());
if (!entry) {
throw new Error(`Entry path ${archiveEntry.getEntryPath()} does not exist in ${this.getFilePath()}`);
}

zip.extractEntryTo(
entry as IZipEntry,
tempDir,
Expand All @@ -38,7 +43,22 @@ export default class Zip extends Archive {
false,
archiveEntry.getEntryPath(),
);

return callback(localFile);
}

async extractEntryToStream<T>(
archiveEntry: ArchiveEntry,
tempDir: string,
callback: (stream: Readable) => (Promise<T> | T),
): Promise<T> {
// WARN(cemmer): every constructor causes a full file read!
const zip = new AdmZip(this.getFilePath());
const entry = zip.getEntry(archiveEntry.getEntryPath());
if (!entry) {
throw new Error(`Entry path ${archiveEntry.getEntryPath()} does not exist in ${this.getFilePath()}`);
}

const stream = Readable.from(entry.getData());
return callback(stream);
}
}
40 changes: 33 additions & 7 deletions src/types/files/archiveEntry.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { promises as fsPromises } from 'fs';
import fs, { promises as fsPromises } from 'fs';
import { Readable } from 'stream';

import Constants from '../../constants.js';
import fsPoly from '../../polyfill/fsPoly.js';
Expand All @@ -11,8 +12,14 @@ export default class ArchiveEntry extends File {

private readonly entryPath: string;

constructor(archive: Archive, entryPath: string, crc?: string, fileHeader?: FileHeader) {
super(archive.getFilePath(), crc, fileHeader);
constructor(
archive: Archive,
entryPath: string,
size: number,
crc?: string,
fileHeader?: FileHeader,
) {
super(archive.getFilePath(), size, crc, fileHeader);
this.archive = archive;
this.entryPath = entryPath;
}
Expand All @@ -25,21 +32,40 @@ export default class ArchiveEntry extends File {
return this.entryPath;
}

async extract<T>(callback: (localFile: string) => (T | Promise<T>)): Promise<T> {
async extractToFile<T>(callback: (localFile: string) => (T | Promise<T>)): Promise<T> {
const tempDir = await fsPromises.mkdtemp(Constants.GLOBAL_TEMP_DIR);
try {
return await this.archive.extractEntryToFile(this, tempDir, callback);
} finally {
fsPoly.rmSync(tempDir, { recursive: true });
}
}

async extractToStream<T>(callback: (stream: Readable) => (Promise<T> | T)): Promise<T> {
// Don't extract to memory if this archive entry size is too large
if (this.getSize() > Constants.MAX_STREAM_EXTRACTION_SIZE) {
return this.extractToFile(async (localFile) => {
const stream = fs.createReadStream(localFile);
const result = await callback(stream);
stream.destroy();
return result;
});
}

const tempDir = await fsPromises.mkdtemp(Constants.GLOBAL_TEMP_DIR);
try {
return await this.archive.extractEntry(this, tempDir, callback);
return await this.archive.extractEntryToStream(this, tempDir, callback);
} finally {
fsPoly.rmSync(tempDir, { recursive: true });
}
}

withFileHeader(fileHeader: FileHeader): File {
async withFileHeader(fileHeader: FileHeader): Promise<File> {
return new ArchiveEntry(
this.archive,
this.entryPath,
undefined, // the old CRC can't be used, a header will change it
this.getSize(),
await this.getCrc32(),
fileHeader,
);
}
Expand Down
Loading

0 comments on commit e5bb4e5

Please sign in to comment.