Skip to content

Commit

Permalink
Avoid stream polyfill for GFF3/GTF parsing (#4547)
Browse files Browse the repository at this point in the history
  • Loading branch information
cmdcolin authored Sep 4, 2024
1 parent 0523cac commit b063487
Show file tree
Hide file tree
Showing 12 changed files with 155 additions and 183 deletions.
1 change: 1 addition & 0 deletions eslint.config.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ export default tseslint.config(
'**/dist/**/*',
'**/esm/**/*',
'**/public/**/*',
'**/storybook-static/**',
'website/*',
'packages/core/util/nanoid.js',
'products/**/webpack.config.js',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,30 +56,32 @@ test('renders', () => {
}, 20000)

test('can handle a custom UCSC trackHub URL', async () => {
jest.spyOn(global, 'fetch').mockImplementation(async url => {
const urlText = `${url}`
if (urlText.endsWith('hub.txt')) {
return new Response(`hub TestHub
jest
.spyOn(global, 'fetch')
.mockImplementation(async (url: string | Request | URL) => {
const urlText = `${url}`
if (urlText.endsWith('hub.txt')) {
return new Response(`hub TestHub
shortLabel Test Hub
longLabel Test Genome Informatics Hub for human DNase and RNAseq data
genomesFile genomes.txt
email genome@test.com
descriptionUrl test.html
`)
} else if (urlText.endsWith('genomes.txt')) {
return new Response(`genome volMyt1
} else if (urlText.endsWith('genomes.txt')) {
return new Response(`genome volMyt1
trackDb hg19/trackDb.txt
`)
} else if (urlText.endsWith('trackDb.txt')) {
return new Response(`track dnaseSignal
} else if (urlText.endsWith('trackDb.txt')) {
return new Response(`track dnaseSignal
bigDataUrl dnaseSignal.bigWig
shortLabel DNAse Signal
longLabel Depth of alignments of DNAse reads
type bigWig
`)
}
throw new Error('unknown')
})
}
throw new Error('unknown')
})

const {
session,
Expand Down
2 changes: 1 addition & 1 deletion plugins/gff3/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
"dependencies": {
"@flatten-js/interval-tree": "^1.0.15",
"@gmod/bgzf-filehandle": "^1.4.3",
"@gmod/gff": "^1.3.0",
"gff-nostream": "^1.3.3",
"@gmod/tabix": "^1.5.6"
},
"peerDependencies": {
Expand Down
11 changes: 2 additions & 9 deletions plugins/gff3/src/Gff3Adapter/Gff3Adapter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import { ObservableCreate } from '@jbrowse/core/util/rxjs'
import IntervalTree from '@flatten-js/interval-tree'
import SimpleFeature, { Feature } from '@jbrowse/core/util/simpleFeature'
import { unzip } from '@gmod/bgzf-filehandle'
import gff from '@gmod/gff'
import { parseStringSync } from 'gff-nostream'
import { isGzip, updateStatus } from '@jbrowse/core/util'

import { featureData } from '../featureData'
Expand Down Expand Up @@ -76,14 +76,7 @@ export default class Gff3Adapter extends BaseFeatureDataAdapter {
if (!this.calculatedIntervalTreeMap[refName]) {
sc?.('Parsing GFF data')
const intervalTree = new IntervalTree()
gff
.parseStringSync(lines, {
parseFeatures: true,
parseComments: false,
parseDirectives: false,
parseSequences: false,
disableDerivesFromReferences: true,
})
parseStringSync(lines)
.flat()
.map(
(f, i) =>
Expand Down
40 changes: 13 additions & 27 deletions plugins/gff3/src/Gff3TabixAdapter/Gff3TabixAdapter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import { openLocation } from '@jbrowse/core/util/io'
import { ObservableCreate } from '@jbrowse/core/util/rxjs'
import SimpleFeature, { Feature } from '@jbrowse/core/util/simpleFeature'
import { TabixIndexedFile } from '@gmod/tabix'
import gff, { GFF3Feature } from '@gmod/gff'
import { parseStringSync } from 'gff-nostream'
import { Observer } from 'rxjs'
import {
readConfObject,
Expand Down Expand Up @@ -87,14 +87,14 @@ export default class Gff3TabixAdapter extends BaseFeatureDataAdapter {
query.refName,
query.start,
query.end,
(line: string, fileOffset: number) => {
(line, fileOffset) => {
lines.push(this.parseLine(metadata.columnNumbers, line, fileOffset))
},
)
if (allowRedispatch && lines.length) {
let minStart = Number.POSITIVE_INFINITY
let maxEnd = Number.NEGATIVE_INFINITY
lines.forEach(line => {
for (const line of lines) {
const featureType = line.fields[2]!
// only expand redispatch range if feature is not a "dontRedispatch"
// type skips large regions like chromosome,region
Expand All @@ -107,7 +107,7 @@ export default class Gff3TabixAdapter extends BaseFeatureDataAdapter {
maxEnd = line.end
}
}
})
}
if (maxEnd > query.end || minStart < query.start) {
// make a new feature callback to only return top-level features
// in the original query range
Expand All @@ -124,7 +124,7 @@ export default class Gff3TabixAdapter extends BaseFeatureDataAdapter {
}

const gff3 = lines
.map((lineRecord: LineFeature) => {
.map(lineRecord => {
if (lineRecord.fields[8] && lineRecord.fields[8] !== '.') {
if (!lineRecord.fields[8].includes('_lineHash')) {
lineRecord.fields[8] += `;_lineHash=${lineRecord.lineHash}`
Expand All @@ -136,16 +136,12 @@ export default class Gff3TabixAdapter extends BaseFeatureDataAdapter {
})
.join('\n')

const features = gff.parseStringSync(gff3, {
parseFeatures: true,
parseComments: false,
parseDirectives: false,
parseSequences: false,
disableDerivesFromReferences: true,
})

features.forEach(featureLocs => {
this.formatFeatures(featureLocs).forEach(f => {
for (const featureLocs of parseStringSync(gff3)) {
for (const featureLoc of featureLocs) {
const f = new SimpleFeature({
data: featureData(featureLoc),
id: `${this.id}-offset-${featureLoc.attributes?._lineHash?.[0]}`,
})
if (
doesIntersect2(
f.get('start'),
Expand All @@ -156,8 +152,8 @@ export default class Gff3TabixAdapter extends BaseFeatureDataAdapter {
) {
observer.next(f)
}
})
})
}
}
observer.complete()
} catch (e) {
observer.error(e)
Expand All @@ -180,15 +176,5 @@ export default class Gff3TabixAdapter extends BaseFeatureDataAdapter {
}
}

private formatFeatures(featureLocs: GFF3Feature) {
return featureLocs.map(
featureLoc =>
new SimpleFeature({
data: featureData(featureLoc),
id: `${this.id}-offset-${featureLoc.attributes?._lineHash?.[0]}`,
}),
)
}

public freeResources(/* { region } */) {}
}
2 changes: 1 addition & 1 deletion plugins/gff3/src/featureData.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { GFF3FeatureLineWithRefs } from '@gmod/gff'
import { GFF3FeatureLineWithRefs } from 'gff-nostream'

export function featureData(data: GFF3FeatureLineWithRefs) {
const f: Record<string, unknown> = { ...data }
Expand Down
2 changes: 1 addition & 1 deletion plugins/gtf/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
"dependencies": {
"@flatten-js/interval-tree": "^1.0.15",
"@gmod/bgzf-filehandle": "^1.4.3",
"@gmod/gtf": "^0.0.9"
"gtf-nostream": "^1.0.0"
},
"peerDependencies": {
"@jbrowse/core": "^2.0.0",
Expand Down
11 changes: 2 additions & 9 deletions plugins/gtf/src/GtfAdapter/GtfAdapter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import {
isGzip,
} from '@jbrowse/core/util'
import { unzip } from '@gmod/bgzf-filehandle'
import gtf from '@gmod/gtf'
import { parseStringSync } from 'gtf-nostream'

// locals
import { FeatureLoc, featureData } from '../util'
Expand Down Expand Up @@ -82,14 +82,7 @@ export default class GtfAdapter extends BaseFeatureDataAdapter {
if (!this.calculatedIntervalTreeMap[refName]) {
sc?.('Parsing GTF data')
const intervalTree = new IntervalTree()
;(
gtf.parseStringSync(lines, {
parseFeatures: true,
parseComments: false,
parseDirectives: false,
parseSequences: false,
}) as FeatureLoc[][]
)
;(parseStringSync(lines) as FeatureLoc[][])
.flat()
.map(
(f, i) =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@ test('read from meta', async () => {
.join(__dirname, '..', '..', '..', '..', 'test_data', 'names')
.replaceAll('\\', '\\\\')

jest.spyOn(global, 'fetch').mockImplementation(url => {
const response = `${url}`.includes('names/meta.json') ? meta : {}
return Promise.resolve(new Response(JSON.stringify(response)))
})
jest
.spyOn(global, 'fetch')
.mockImplementation((url: string | Request | URL) => {
const response = `${url}`.includes('names/meta.json') ? meta : {}
return Promise.resolve(new Response(JSON.stringify(response)))
})
const hashMap = new HttpMap({ url: rootTemplate })
await hashMap.getBucket('apple')

Expand All @@ -25,20 +27,21 @@ test('get bucket contents', async () => {
.join(__dirname, '..', '..', '..', '..', 'test_data', 'names')
.replaceAll('\\', '\\\\')

const spy = jest.spyOn(global, 'fetch')
spy.mockImplementation(url => {
let response = {}
if (`${url}`.includes('names/meta.json')) {
response = meta
}
if (`${url}`.includes('names/0.json')) {
response = first
}
if (`${url}`.includes('names/f.json')) {
response = last
}
return Promise.resolve(new Response(JSON.stringify(response)))
})
const spy = jest
.spyOn(global, 'fetch')
.mockImplementation((url: string | Request | URL) => {
let response = {}
if (`${url}`.includes('names/meta.json')) {
response = meta
}
if (`${url}`.includes('names/0.json')) {
response = first
}
if (`${url}`.includes('names/f.json')) {
response = last
}
return Promise.resolve(new Response(JSON.stringify(response)))
})
const hashMap = new HttpMap({ url: rootTemplate })

await hashMap.getBucket('apple')
Expand Down
111 changes: 57 additions & 54 deletions products/jbrowse-web/src/tests/Loader.test.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -25,62 +25,65 @@ jest.mock('../makeWorkerInstance', () => () => {})

const delay = { timeout: 20000 }

jest.spyOn(global, 'fetch').mockImplementation(async (url, args) => {
if (/plugin-store/.exec(`${url}`)) {
return new Response(
JSON.stringify({
plugins: [
{
url: 'https://unpkg.com/jbrowse-plugin-msaview/dist/jbrowse-plugin-msaview.umd.production.min.js',
},
],
}),
)
}
if (`${url}`.includes('testid')) {
return new Response(
`{"session":"U2FsdGVkX1+9+Hsy+o75Cdyb1jGYB/N1/h6Jr5ARZRF02uH2AN70Uc/yTXAEo4PQMVypDZMLqO+LJcnF6k2FKfRo9w3oeL+EbWZsXgsTrP5IrE+xYN1wfdTKoIohbQMI+zcIZGLVNf7UqNZjwzsIracm5DkgZh9EWo4MAkBP10ZZEWSdV7gmg95a5ofta2bOMpL4T5yOdukBa+6Uvv9qYXt2KdZPR4PoVLQUTE67zIdc0A9n9BuXiTOFUmczfJVvkoQSOGaXGgSUVoK31Ei12lk67a55YtbG3ClENIMcSK/YbMH7w9HtqImzPY0jaQZSZ6ikKW8fXIbXmqX0oadOKS70RNVcF5JcDMYKx6zPxAf7WjpuFh+cNNr7j6bizRoTbuZi+xNsPpnA2QmbtOXCQzbOao1Oj3HzriBAIGC56bSxx0YfJ0en751LV6yrLPsnMmmmowTIjkbH5c+QRJId9sdYQb9Ytqr2dWBKixHSGhLBfdNr0yt3t5GQRu11Rlq6OekrA9KcmHv9QU3AhDtj9TYjG5vqveYCDfS7uSc3TJLEczwF8p02wjuGapYV5QpX+Lm9ADO8X+qW+bFZj3EGKoQBTUSfV1fd3t5oH3KWWuWYpMuRLbSYgcjKC29DOUJA43k+Ufmio+wO7CufcgGkIWlpejojX8f28UsPXaONmd3t8H4bmzXkB631E1EVS4y+RZGxc2uSVedS446qq/9tV9XJW9tkwNINwbpMHAG0OZk="}`,
)
}
if (`${url}`.includes('testcustomcallback')) {
return new Response(
`{"session":"eJzVVm1v2zYQ_isCvzQB7NiK4zTQt9TNVm9p59puUmANAlo6SdwoSiPptwb-7zuSsiw7dhpk2YB9MGAe7-W5e053fCAsIgHRX5fwOfslnJAGETQDFPWFhkRSzXLhaVDagwXNCg6e37ponbZP_YbXDfyLoPvWG3xEs4zKhAkStBskknQO8pZFOiVB5-KsQWYM5ooEvz-4cGzj-974RnO9LEzUayaAyp9B5BncoBHe5HGsQA8WJDhtt9H7pBiANMf2SbuLwZgqOF1CNIQE_bkoEuJPLo1QJ5foRGkqtQUHAhF00ZPfQLUZSAUoiClX0CBUKcgmfFkaz3I-yxdkdYf4JA3_rKUwHOpr-XF0c7PBfslZIjIQWo2NMl6EuYhZMnWZVv6aPBdJUwKNVFPNmhOaNf1zv312cX7WOe-87TQVKIUGay9lhrXgf_XTL1dt6Y-Gu5XbYHjvrFBhwDhMi_U5KH2o3kD9OhmLSO362NZvkBRYkmLxfFP9nZQenjYtofejp5I_gHy7KPeFdX2_WH43zZZPFeRIHvqNpyI0YI5ioHoq4dh7-CY8TwIehFcKTxLQR28UsiiiN8ffxIoY9kUEEhvApOGQD0tRLbGdi1WDjGbJT87rHvU9l6sVGqk0n4_yWPc4KwomkqrlYsY1yHeWmJjTpC9CPo3A9qo5Xy3Ks9_tnBtPo0-DnkmdJrDLaZq__0qv2Kw72eV0j9GG2LMf8rrX_BXJVaIIS___GsO1HPbxtufWEQccQm3Gi5Fa5oh1rXI-dcXybflMaBxsJtbqBbQ-e1Y8q6I1dk-7bdd-6_xIoOUUnMx1t5Os7sygS1kEHzCeqU7ZohvRb-jBzPLqyo7FkS1RLseulCnDMDJMWUg5KVWu6QQ4loYYDJy6T6CEhUmANFmVXg2KOYuQT8tb3d24Hm7T9gc1qq_gwyGVWxvIzmqOuOwy2PA3hoV2fLusH2-uirmriNUxbQkrGL06ye6yAoAbyvx52ZZAGCwrcqlHTlp6XaPZd1eB6u-5NB1J8aObwW3FhEtVgP0W-9jvVIRQXtTBuEV1cCnafnh6aDy5DkV9N1tjzxp7c6ZTb3TjHYV5sTwm29vcgFrvc-wvGtFCbw2AdzS7LIX4xqDZdR5Ws3Aq2QG4J6hp9RV8sUqp1kXQanG05mmudNDBp0bLtMp9RDVtOSct1x4nfygMYLjD2bIwkfizo-KP_ePIhuat58UPHxOvN_T_d2u88cIV8npvpC3TR9X8L98zr_w-eGz_dHYvIsKut3KY9KpBZvr-bvU3l2iNHA"}`,
)
}
if (`${url}`.includes('nonexist')) {
return new Response('', {
status: 404,
statusText: 'failed to find session',
})
}
// this is the analytics
if (`${url}`.includes('jb2=true')) {
return new Response('{}')
}
try {
const file = getFile(`${url}`)
const maxRangeRequest = 2000000 // kind of arbitrary, part of the rangeParser
if (args?.headers && 'range' in args.headers) {
const range = rangeParser(maxRangeRequest, args.headers.range)
if (range === -2 || range === -1) {
throw new Error(`Error parsing range "${args.headers.range}"`)
}
const { start, end } = range[0]!
const len = end - start + 1
const buf = Buffer.alloc(len)
const { bytesRead } = await file.read(buf, 0, len, start)
const stat = await file.stat()
return new Response(buf.subarray(0, bytesRead), {
status: 206,
headers: [['content-range', `${start}-${end}/${stat.size}`]],
jest
.spyOn(global, 'fetch')

.mockImplementation(async (url: any, args: any) => {
if (/plugin-store/.exec(`${url}`)) {
return new Response(
JSON.stringify({
plugins: [
{
url: 'https://unpkg.com/jbrowse-plugin-msaview/dist/jbrowse-plugin-msaview.umd.production.min.js',
},
],
}),
)
}
if (`${url}`.includes('testid')) {
return new Response(
`{"session":"U2FsdGVkX1+9+Hsy+o75Cdyb1jGYB/N1/h6Jr5ARZRF02uH2AN70Uc/yTXAEo4PQMVypDZMLqO+LJcnF6k2FKfRo9w3oeL+EbWZsXgsTrP5IrE+xYN1wfdTKoIohbQMI+zcIZGLVNf7UqNZjwzsIracm5DkgZh9EWo4MAkBP10ZZEWSdV7gmg95a5ofta2bOMpL4T5yOdukBa+6Uvv9qYXt2KdZPR4PoVLQUTE67zIdc0A9n9BuXiTOFUmczfJVvkoQSOGaXGgSUVoK31Ei12lk67a55YtbG3ClENIMcSK/YbMH7w9HtqImzPY0jaQZSZ6ikKW8fXIbXmqX0oadOKS70RNVcF5JcDMYKx6zPxAf7WjpuFh+cNNr7j6bizRoTbuZi+xNsPpnA2QmbtOXCQzbOao1Oj3HzriBAIGC56bSxx0YfJ0en751LV6yrLPsnMmmmowTIjkbH5c+QRJId9sdYQb9Ytqr2dWBKixHSGhLBfdNr0yt3t5GQRu11Rlq6OekrA9KcmHv9QU3AhDtj9TYjG5vqveYCDfS7uSc3TJLEczwF8p02wjuGapYV5QpX+Lm9ADO8X+qW+bFZj3EGKoQBTUSfV1fd3t5oH3KWWuWYpMuRLbSYgcjKC29DOUJA43k+Ufmio+wO7CufcgGkIWlpejojX8f28UsPXaONmd3t8H4bmzXkB631E1EVS4y+RZGxc2uSVedS446qq/9tV9XJW9tkwNINwbpMHAG0OZk="}`,
)
}
if (`${url}`.includes('testcustomcallback')) {
return new Response(
`{"session":"eJzVVm1v2zYQ_isCvzQB7NiK4zTQt9TNVm9p59puUmANAlo6SdwoSiPptwb-7zuSsiw7dhpk2YB9MGAe7-W5e053fCAsIgHRX5fwOfslnJAGETQDFPWFhkRSzXLhaVDagwXNCg6e37ponbZP_YbXDfyLoPvWG3xEs4zKhAkStBskknQO8pZFOiVB5-KsQWYM5ooEvz-4cGzj-974RnO9LEzUayaAyp9B5BncoBHe5HGsQA8WJDhtt9H7pBiANMf2SbuLwZgqOF1CNIQE_bkoEuJPLo1QJ5foRGkqtQUHAhF00ZPfQLUZSAUoiClX0CBUKcgmfFkaz3I-yxdkdYf4JA3_rKUwHOpr-XF0c7PBfslZIjIQWo2NMl6EuYhZMnWZVv6aPBdJUwKNVFPNmhOaNf1zv312cX7WOe-87TQVKIUGay9lhrXgf_XTL1dt6Y-Gu5XbYHjvrFBhwDhMi_U5KH2o3kD9OhmLSO362NZvkBRYkmLxfFP9nZQenjYtofejp5I_gHy7KPeFdX2_WH43zZZPFeRIHvqNpyI0YI5ioHoq4dh7-CY8TwIehFcKTxLQR28UsiiiN8ffxIoY9kUEEhvApOGQD0tRLbGdi1WDjGbJT87rHvU9l6sVGqk0n4_yWPc4KwomkqrlYsY1yHeWmJjTpC9CPo3A9qo5Xy3Ks9_tnBtPo0-DnkmdJrDLaZq__0qv2Kw72eV0j9GG2LMf8rrX_BXJVaIIS___GsO1HPbxtufWEQccQm3Gi5Fa5oh1rXI-dcXybflMaBxsJtbqBbQ-e1Y8q6I1dk-7bdd-6_xIoOUUnMx1t5Os7sygS1kEHzCeqU7ZohvRb-jBzPLqyo7FkS1RLseulCnDMDJMWUg5KVWu6QQ4loYYDJy6T6CEhUmANFmVXg2KOYuQT8tb3d24Hm7T9gc1qq_gwyGVWxvIzmqOuOwy2PA3hoV2fLusH2-uirmriNUxbQkrGL06ye6yAoAbyvx52ZZAGCwrcqlHTlp6XaPZd1eB6u-5NB1J8aObwW3FhEtVgP0W-9jvVIRQXtTBuEV1cCnafnh6aDy5DkV9N1tjzxp7c6ZTb3TjHYV5sTwm29vcgFrvc-wvGtFCbw2AdzS7LIX4xqDZdR5Ws3Aq2QG4J6hp9RV8sUqp1kXQanG05mmudNDBp0bLtMp9RDVtOSct1x4nfygMYLjD2bIwkfizo-KP_ePIhuat58UPHxOvN_T_d2u88cIV8npvpC3TR9X8L98zr_w-eGz_dHYvIsKut3KY9KpBZvr-bvU3l2iNHA"}`,
)
}
if (`${url}`.includes('nonexist')) {
return new Response('', {
status: 404,
statusText: 'failed to find session',
})
}
return new Response(await file.readFile(), { status: 200 })
} catch (e) {
console.error(e)
return new Response(undefined, { status: 404 })
}
})
// this is the analytics
if (`${url}`.includes('jb2=true')) {
return new Response('{}')
}
try {
const file = getFile(`${url}`)
const maxRangeRequest = 2000000 // kind of arbitrary, part of the rangeParser
if (args?.headers && 'range' in args.headers) {
const range = rangeParser(maxRangeRequest, args.headers.range)
if (range === -2 || range === -1) {
throw new Error(`Error parsing range "${args.headers.range}"`)
}
const { start, end } = range[0]!
const len = end - start + 1
const buf = Buffer.alloc(len)
const { bytesRead } = await file.read(buf, 0, len, start)
const stat = await file.stat()
return new Response(buf.subarray(0, bytesRead), {
status: 206,
headers: [['content-range', `${start}-${end}/${stat.size}`]],
})
}
return new Response(await file.readFile(), { status: 200 })
} catch (e) {
console.error(e)
return new Response(undefined, { status: 404 })
}
})

afterEach(() => {
localStorage.clear()
Expand Down
Loading

0 comments on commit b063487

Please sign in to comment.