From c76ea33fd868e1350d183d4ce923ad6b2b7e937a Mon Sep 17 00:00:00 2001 From: Colin Date: Thu, 20 Feb 2025 10:41:24 -0500 Subject: [PATCH] Add status updates and optimization to PAFAdapter --- .../BlastTabularAdapter.ts | 2 +- .../src/MashMapAdapter/MashMapAdapter.ts | 2 +- .../src/PAFAdapter/PAFAdapter.ts | 2 +- plugins/comparative-adapters/src/util.ts | 68 +++++++++---------- 4 files changed, 34 insertions(+), 40 deletions(-) diff --git a/plugins/comparative-adapters/src/BlastTabularAdapter/BlastTabularAdapter.ts b/plugins/comparative-adapters/src/BlastTabularAdapter/BlastTabularAdapter.ts index 319f1644c8..ecc33a0d14 100755 --- a/plugins/comparative-adapters/src/BlastTabularAdapter/BlastTabularAdapter.ts +++ b/plugins/comparative-adapters/src/BlastTabularAdapter/BlastTabularAdapter.ts @@ -216,7 +216,7 @@ export default class BlastTabularAdapter extends BaseFeatureDataAdapter { opts, ) const columns: string = readConfObject(this.config, 'columns') - return parseLineByLine(buf, createBlastLineParser(columns)) + return parseLineByLine(buf, createBlastLineParser(columns), opts) } async hasDataForRefName() { diff --git a/plugins/comparative-adapters/src/MashMapAdapter/MashMapAdapter.ts b/plugins/comparative-adapters/src/MashMapAdapter/MashMapAdapter.ts index 99951e3526..01a9072326 100644 --- a/plugins/comparative-adapters/src/MashMapAdapter/MashMapAdapter.ts +++ b/plugins/comparative-adapters/src/MashMapAdapter/MashMapAdapter.ts @@ -10,7 +10,7 @@ export default class MashMapAdapter extends PAFAdapter { async setupPre(opts?: BaseOptions) { const outLoc = openLocation(this.getConf('outLocation'), this.pluginManager) const buf = await fetchAndMaybeUnzip(outLoc, opts) - return parseLineByLine(buf, parseMashMapLine) + return parseLineByLine(buf, parseMashMapLine, opts) } } diff --git a/plugins/comparative-adapters/src/PAFAdapter/PAFAdapter.ts b/plugins/comparative-adapters/src/PAFAdapter/PAFAdapter.ts index 672d564bcd..af91dae0ff 100644 --- a/plugins/comparative-adapters/src/PAFAdapter/PAFAdapter.ts +++ b/plugins/comparative-adapters/src/PAFAdapter/PAFAdapter.ts @@ -46,7 +46,7 @@ export default class PAFAdapter extends BaseFeatureDataAdapter { const pm = this.pluginManager const pafLocation = openLocation(this.getConf('pafLocation'), pm) const buf = await fetchAndMaybeUnzip(pafLocation, opts) - return parseLineByLine(buf, parsePAFLine) + return parseLineByLine(buf, parsePAFLine, opts) } async hasDataForRefName() { diff --git a/plugins/comparative-adapters/src/util.ts b/plugins/comparative-adapters/src/util.ts index af8a1db574..c7b16bc77c 100644 --- a/plugins/comparative-adapters/src/util.ts +++ b/plugins/comparative-adapters/src/util.ts @@ -37,10 +37,14 @@ export function zip(a: number[], b: number[]) { export function parseLineByLine( buffer: Uint8Array, cb: (line: string) => T | undefined, + opts?: BaseOptions, ): T[] { + const { statusCallback = () => {} } = opts || {} let blockStart = 0 const entries: T[] = [] const decoder = new TextDecoder('utf8') + + let i = 0 while (blockStart < buffer.length) { const n = buffer.indexOf(10, blockStart) if (n === -1) { @@ -54,52 +58,42 @@ export function parseLineByLine( entries.push(entry) } } - + if (i++ % 10_000 === 0) { + statusCallback( + `Loading ${Math.floor(blockStart / 1_000_000).toLocaleString('en-US')}/${Math.floor(buffer.length / 1_000_000).toLocaleString('en-US')} MB`, + ) + } blockStart = n + 1 } return entries } export function parsePAFLine(line: string) { - const [ - qname, - , - qstart, - qend, - strand, - tname, - , - tstart, - tend, - numMatches, - blockLen, - mappingQual, - ...fields - ] = line.split('\t') + const parts = line.split('\t') + const extraFields = parts.slice(12) + const extra: Record = { + numMatches: +parts[9]!, + blockLen: +parts[10]!, + mappingQual: +parts[11]!, + } - const rest = Object.fromEntries( - fields.map(field => { - const r = field.indexOf(':') - const fieldName = field.slice(0, r) - const fieldValue = field.slice(r + 3) - return [fieldName, fieldValue] - }), - ) + // Process extra fields only if they exist + if (extraFields.length) { + for (const field of extraFields) { + const colonIndex = field.indexOf(':') + extra[field.slice(0, colonIndex)] = field.slice(colonIndex + 3) + } + } return { - tname, - tstart: +tstart!, - tend: +tend!, - qname, - qstart: +qstart!, - qend: +qend!, - strand: strand === '-' ? -1 : 1, - extra: { - numMatches: +numMatches!, - blockLen: +blockLen!, - mappingQual: +mappingQual!, - ...rest, - }, + tname: parts[5], + tstart: +parts[7]!, + tend: +parts[8]!, + qname: parts[0], + qstart: +parts[2]!, + qend: +parts[3]!, + strand: parts[4] === '-' ? -1 : 1, + extra, } as PAFRecord }