Skip to content

Commit

Permalink
Add status updates and optimization to PAFAdapter
Browse files Browse the repository at this point in the history
  • Loading branch information
cmdcolin committed Feb 20, 2025
1 parent aa6d9cd commit c76ea33
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 40 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ export default class BlastTabularAdapter extends BaseFeatureDataAdapter {
opts,
)
const columns: string = readConfObject(this.config, 'columns')
return parseLineByLine(buf, createBlastLineParser(columns))
return parseLineByLine(buf, createBlastLineParser(columns), opts)
}

async hasDataForRefName() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ export default class MashMapAdapter extends PAFAdapter {
async setupPre(opts?: BaseOptions) {
const outLoc = openLocation(this.getConf('outLocation'), this.pluginManager)
const buf = await fetchAndMaybeUnzip(outLoc, opts)
return parseLineByLine(buf, parseMashMapLine)
return parseLineByLine(buf, parseMashMapLine, opts)
}
}

Expand Down
2 changes: 1 addition & 1 deletion plugins/comparative-adapters/src/PAFAdapter/PAFAdapter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ export default class PAFAdapter extends BaseFeatureDataAdapter {
const pm = this.pluginManager
const pafLocation = openLocation(this.getConf('pafLocation'), pm)
const buf = await fetchAndMaybeUnzip(pafLocation, opts)
return parseLineByLine(buf, parsePAFLine)
return parseLineByLine(buf, parsePAFLine, opts)
}

async hasDataForRefName() {
Expand Down
68 changes: 31 additions & 37 deletions plugins/comparative-adapters/src/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,14 @@ export function zip(a: number[], b: number[]) {
export function parseLineByLine<T>(
buffer: Uint8Array,
cb: (line: string) => T | undefined,
opts?: BaseOptions,
): T[] {
const { statusCallback = () => {} } = opts || {}
let blockStart = 0
const entries: T[] = []
const decoder = new TextDecoder('utf8')

let i = 0
while (blockStart < buffer.length) {
const n = buffer.indexOf(10, blockStart)
if (n === -1) {
Expand All @@ -54,52 +58,42 @@ export function parseLineByLine<T>(
entries.push(entry)
}
}

if (i++ % 10_000 === 0) {
statusCallback(
`Loading ${Math.floor(blockStart / 1_000_000).toLocaleString('en-US')}/${Math.floor(buffer.length / 1_000_000).toLocaleString('en-US')} MB`,
)
}
blockStart = n + 1
}
return entries
}

export function parsePAFLine(line: string) {
const [
qname,
,
qstart,
qend,
strand,
tname,
,
tstart,
tend,
numMatches,
blockLen,
mappingQual,
...fields
] = line.split('\t')
const parts = line.split('\t')
const extraFields = parts.slice(12)
const extra: Record<string, string | number> = {
numMatches: +parts[9]!,
blockLen: +parts[10]!,
mappingQual: +parts[11]!,
}

const rest = Object.fromEntries(
fields.map(field => {
const r = field.indexOf(':')
const fieldName = field.slice(0, r)
const fieldValue = field.slice(r + 3)
return [fieldName, fieldValue]
}),
)
// Process extra fields only if they exist
if (extraFields.length) {
for (const field of extraFields) {
const colonIndex = field.indexOf(':')
extra[field.slice(0, colonIndex)] = field.slice(colonIndex + 3)
}
}

return {
tname,
tstart: +tstart!,
tend: +tend!,
qname,
qstart: +qstart!,
qend: +qend!,
strand: strand === '-' ? -1 : 1,
extra: {
numMatches: +numMatches!,
blockLen: +blockLen!,
mappingQual: +mappingQual!,
...rest,
},
tname: parts[5],
tstart: +parts[7]!,
tend: +parts[8]!,
qname: parts[0],
qstart: +parts[2]!,
qend: +parts[3]!,
strand: parts[4] === '-' ? -1 : 1,
extra,
} as PAFRecord
}

Expand Down

0 comments on commit c76ea33

Please sign in to comment.