Skip to content

Commit

Permalink
Non-streaming parsers
Browse files Browse the repository at this point in the history
  • Loading branch information
cmdcolin committed Sep 4, 2024
1 parent 0523cac commit 1bf74cc
Show file tree
Hide file tree
Showing 11 changed files with 165 additions and 189 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -56,30 +56,32 @@ test('renders', () => {
}, 20000)

test('can handle a custom UCSC trackHub URL', async () => {
jest.spyOn(global, 'fetch').mockImplementation(async url => {
const urlText = `${url}`
if (urlText.endsWith('hub.txt')) {
return new Response(`hub TestHub
jest
.spyOn(global, 'fetch')
.mockImplementation(async (url: string | Request | URL) => {
const urlText = `${url}`
if (urlText.endsWith('hub.txt')) {
return new Response(`hub TestHub
shortLabel Test Hub
longLabel Test Genome Informatics Hub for human DNase and RNAseq data
genomesFile genomes.txt
email genome@test.com
descriptionUrl test.html
`)
} else if (urlText.endsWith('genomes.txt')) {
return new Response(`genome volMyt1
} else if (urlText.endsWith('genomes.txt')) {
return new Response(`genome volMyt1
trackDb hg19/trackDb.txt
`)
} else if (urlText.endsWith('trackDb.txt')) {
return new Response(`track dnaseSignal
} else if (urlText.endsWith('trackDb.txt')) {
return new Response(`track dnaseSignal
bigDataUrl dnaseSignal.bigWig
shortLabel DNAse Signal
longLabel Depth of alignments of DNAse reads
type bigWig
`)
}
throw new Error('unknown')
})
}
throw new Error('unknown')
})

const {
session,
Expand Down
2 changes: 1 addition & 1 deletion plugins/gff3/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
"dependencies": {
"@flatten-js/interval-tree": "^1.0.15",
"@gmod/bgzf-filehandle": "^1.4.3",
"@gmod/gff": "^1.3.0",
"gff-nostream": "^1.3.3",
"@gmod/tabix": "^1.5.6"
},
"peerDependencies": {
Expand Down
11 changes: 2 additions & 9 deletions plugins/gff3/src/Gff3Adapter/Gff3Adapter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import { ObservableCreate } from '@jbrowse/core/util/rxjs'
import IntervalTree from '@flatten-js/interval-tree'
import SimpleFeature, { Feature } from '@jbrowse/core/util/simpleFeature'
import { unzip } from '@gmod/bgzf-filehandle'
import gff from '@gmod/gff'
import { parseStringSync } from 'gff-nostream'
import { isGzip, updateStatus } from '@jbrowse/core/util'

import { featureData } from '../featureData'
Expand Down Expand Up @@ -76,14 +76,7 @@ export default class Gff3Adapter extends BaseFeatureDataAdapter {
if (!this.calculatedIntervalTreeMap[refName]) {
sc?.('Parsing GFF data')
const intervalTree = new IntervalTree()
gff
.parseStringSync(lines, {
parseFeatures: true,
parseComments: false,
parseDirectives: false,
parseSequences: false,
disableDerivesFromReferences: true,
})
parseStringSync(lines)
.flat()
.map(
(f, i) =>
Expand Down
57 changes: 24 additions & 33 deletions plugins/gff3/src/Gff3TabixAdapter/Gff3TabixAdapter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import { openLocation } from '@jbrowse/core/util/io'
import { ObservableCreate } from '@jbrowse/core/util/rxjs'
import SimpleFeature, { Feature } from '@jbrowse/core/util/simpleFeature'
import { TabixIndexedFile } from '@gmod/tabix'
import gff, { GFF3Feature } from '@gmod/gff'
import { parseStringSync, GFF3Feature } from 'gff-nostream'

Check warning on line 11 in plugins/gff3/src/Gff3TabixAdapter/Gff3TabixAdapter.ts

View workflow job for this annotation

GitHub Actions / Lint

'GFF3Feature' is defined but never used
import { Observer } from 'rxjs'
import {
readConfObject,
Expand Down Expand Up @@ -124,7 +124,7 @@ export default class Gff3TabixAdapter extends BaseFeatureDataAdapter {
}

const gff3 = lines
.map((lineRecord: LineFeature) => {
.map(lineRecord => {
if (lineRecord.fields[8] && lineRecord.fields[8] !== '.') {
if (!lineRecord.fields[8].includes('_lineHash')) {
lineRecord.fields[8] += `;_lineHash=${lineRecord.lineHash}`
Expand All @@ -136,27 +136,28 @@ export default class Gff3TabixAdapter extends BaseFeatureDataAdapter {
})
.join('\n')

const features = gff.parseStringSync(gff3, {
parseFeatures: true,
parseComments: false,
parseDirectives: false,
parseSequences: false,
disableDerivesFromReferences: true,
})

features.forEach(featureLocs => {
this.formatFeatures(featureLocs).forEach(f => {
if (
doesIntersect2(
f.get('start'),
f.get('end'),
originalQuery.start,
originalQuery.end,
)
) {
observer.next(f)
}
})
parseStringSync(gff3).forEach(featureLocs => {
console.log({ featureLocs })

Check warning on line 140 in plugins/gff3/src/Gff3TabixAdapter/Gff3TabixAdapter.ts

View workflow job for this annotation

GitHub Actions / Lint

Unexpected console statement
featureLocs
.map(
featureLoc =>
new SimpleFeature({
data: featureData(featureLoc),
id: `${this.id}-offset-${featureLoc.attributes?._lineHash?.[0]}`,
}),
)
.forEach(f => {
if (
doesIntersect2(
f.get('start'),
f.get('end'),
originalQuery.start,
originalQuery.end,
)
) {
observer.next(f)
}
})
})
observer.complete()
} catch (e) {
Expand All @@ -180,15 +181,5 @@ export default class Gff3TabixAdapter extends BaseFeatureDataAdapter {
}
}

private formatFeatures(featureLocs: GFF3Feature) {
return featureLocs.map(
featureLoc =>
new SimpleFeature({
data: featureData(featureLoc),
id: `${this.id}-offset-${featureLoc.attributes?._lineHash?.[0]}`,
}),
)
}

public freeResources(/* { region } */) {}
}
2 changes: 1 addition & 1 deletion plugins/gff3/src/featureData.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { GFF3FeatureLineWithRefs } from '@gmod/gff'
import { GFF3FeatureLineWithRefs } from 'gff-nostream'

export function featureData(data: GFF3FeatureLineWithRefs) {
const f: Record<string, unknown> = { ...data }
Expand Down
2 changes: 1 addition & 1 deletion plugins/gtf/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
"dependencies": {
"@flatten-js/interval-tree": "^1.0.15",
"@gmod/bgzf-filehandle": "^1.4.3",
"@gmod/gtf": "^0.0.9"
"gtf-nostream": "^1.0.0"
},
"peerDependencies": {
"@jbrowse/core": "^2.0.0",
Expand Down
11 changes: 2 additions & 9 deletions plugins/gtf/src/GtfAdapter/GtfAdapter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import {
isGzip,
} from '@jbrowse/core/util'
import { unzip } from '@gmod/bgzf-filehandle'
import gtf from '@gmod/gtf'
import { parseStringSync } from 'gtf-nostream'

// locals
import { FeatureLoc, featureData } from '../util'
Expand Down Expand Up @@ -82,14 +82,7 @@ export default class GtfAdapter extends BaseFeatureDataAdapter {
if (!this.calculatedIntervalTreeMap[refName]) {
sc?.('Parsing GTF data')
const intervalTree = new IntervalTree()
;(
gtf.parseStringSync(lines, {
parseFeatures: true,
parseComments: false,
parseDirectives: false,
parseSequences: false,
}) as FeatureLoc[][]
)
;(parseStringSync(lines) as FeatureLoc[][])
.flat()
.map(
(f, i) =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@ test('read from meta', async () => {
.join(__dirname, '..', '..', '..', '..', 'test_data', 'names')
.replaceAll('\\', '\\\\')

jest.spyOn(global, 'fetch').mockImplementation(url => {
const response = `${url}`.includes('names/meta.json') ? meta : {}
return Promise.resolve(new Response(JSON.stringify(response)))
})
jest
.spyOn(global, 'fetch')
.mockImplementation((url: string | Request | URL) => {
const response = `${url}`.includes('names/meta.json') ? meta : {}
return Promise.resolve(new Response(JSON.stringify(response)))
})
const hashMap = new HttpMap({ url: rootTemplate })
await hashMap.getBucket('apple')

Expand All @@ -25,20 +27,21 @@ test('get bucket contents', async () => {
.join(__dirname, '..', '..', '..', '..', 'test_data', 'names')
.replaceAll('\\', '\\\\')

const spy = jest.spyOn(global, 'fetch')
spy.mockImplementation(url => {
let response = {}
if (`${url}`.includes('names/meta.json')) {
response = meta
}
if (`${url}`.includes('names/0.json')) {
response = first
}
if (`${url}`.includes('names/f.json')) {
response = last
}
return Promise.resolve(new Response(JSON.stringify(response)))
})
const spy = jest
.spyOn(global, 'fetch')
.mockImplementation((url: string | Request | URL) => {
let response = {}
if (`${url}`.includes('names/meta.json')) {
response = meta
}
if (`${url}`.includes('names/0.json')) {
response = first
}
if (`${url}`.includes('names/f.json')) {
response = last
}
return Promise.resolve(new Response(JSON.stringify(response)))
})
const hashMap = new HttpMap({ url: rootTemplate })

await hashMap.getBucket('apple')
Expand Down
111 changes: 57 additions & 54 deletions products/jbrowse-web/src/tests/Loader.test.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -25,62 +25,65 @@ jest.mock('../makeWorkerInstance', () => () => {})

const delay = { timeout: 20000 }

jest.spyOn(global, 'fetch').mockImplementation(async (url, args) => {
if (/plugin-store/.exec(`${url}`)) {
return new Response(
JSON.stringify({
plugins: [
{
url: 'https://unpkg.com/jbrowse-plugin-msaview/dist/jbrowse-plugin-msaview.umd.production.min.js',
},
],
}),
)
}
if (`${url}`.includes('testid')) {
return new Response(
`{"session":"U2FsdGVkX1+9+Hsy+o75Cdyb1jGYB/N1/h6Jr5ARZRF02uH2AN70Uc/yTXAEo4PQMVypDZMLqO+LJcnF6k2FKfRo9w3oeL+EbWZsXgsTrP5IrE+xYN1wfdTKoIohbQMI+zcIZGLVNf7UqNZjwzsIracm5DkgZh9EWo4MAkBP10ZZEWSdV7gmg95a5ofta2bOMpL4T5yOdukBa+6Uvv9qYXt2KdZPR4PoVLQUTE67zIdc0A9n9BuXiTOFUmczfJVvkoQSOGaXGgSUVoK31Ei12lk67a55YtbG3ClENIMcSK/YbMH7w9HtqImzPY0jaQZSZ6ikKW8fXIbXmqX0oadOKS70RNVcF5JcDMYKx6zPxAf7WjpuFh+cNNr7j6bizRoTbuZi+xNsPpnA2QmbtOXCQzbOao1Oj3HzriBAIGC56bSxx0YfJ0en751LV6yrLPsnMmmmowTIjkbH5c+QRJId9sdYQb9Ytqr2dWBKixHSGhLBfdNr0yt3t5GQRu11Rlq6OekrA9KcmHv9QU3AhDtj9TYjG5vqveYCDfS7uSc3TJLEczwF8p02wjuGapYV5QpX+Lm9ADO8X+qW+bFZj3EGKoQBTUSfV1fd3t5oH3KWWuWYpMuRLbSYgcjKC29DOUJA43k+Ufmio+wO7CufcgGkIWlpejojX8f28UsPXaONmd3t8H4bmzXkB631E1EVS4y+RZGxc2uSVedS446qq/9tV9XJW9tkwNINwbpMHAG0OZk="}`,
)
}
if (`${url}`.includes('testcustomcallback')) {
return new Response(
`{"session":"eJzVVm1v2zYQ_isCvzQB7NiK4zTQt9TNVm9p59puUmANAlo6SdwoSiPptwb-7zuSsiw7dhpk2YB9MGAe7-W5e053fCAsIgHRX5fwOfslnJAGETQDFPWFhkRSzXLhaVDagwXNCg6e37ponbZP_YbXDfyLoPvWG3xEs4zKhAkStBskknQO8pZFOiVB5-KsQWYM5ooEvz-4cGzj-974RnO9LEzUayaAyp9B5BncoBHe5HGsQA8WJDhtt9H7pBiANMf2SbuLwZgqOF1CNIQE_bkoEuJPLo1QJ5foRGkqtQUHAhF00ZPfQLUZSAUoiClX0CBUKcgmfFkaz3I-yxdkdYf4JA3_rKUwHOpr-XF0c7PBfslZIjIQWo2NMl6EuYhZMnWZVv6aPBdJUwKNVFPNmhOaNf1zv312cX7WOe-87TQVKIUGay9lhrXgf_XTL1dt6Y-Gu5XbYHjvrFBhwDhMi_U5KH2o3kD9OhmLSO362NZvkBRYkmLxfFP9nZQenjYtofejp5I_gHy7KPeFdX2_WH43zZZPFeRIHvqNpyI0YI5ioHoq4dh7-CY8TwIehFcKTxLQR28UsiiiN8ffxIoY9kUEEhvApOGQD0tRLbGdi1WDjGbJT87rHvU9l6sVGqk0n4_yWPc4KwomkqrlYsY1yHeWmJjTpC9CPo3A9qo5Xy3Ks9_tnBtPo0-DnkmdJrDLaZq__0qv2Kw72eV0j9GG2LMf8rrX_BXJVaIIS___GsO1HPbxtufWEQccQm3Gi5Fa5oh1rXI-dcXybflMaBxsJtbqBbQ-e1Y8q6I1dk-7bdd-6_xIoOUUnMx1t5Os7sygS1kEHzCeqU7ZohvRb-jBzPLqyo7FkS1RLseulCnDMDJMWUg5KVWu6QQ4loYYDJy6T6CEhUmANFmVXg2KOYuQT8tb3d24Hm7T9gc1qq_gwyGVWxvIzmqOuOwy2PA3hoV2fLusH2-uirmriNUxbQkrGL06ye6yAoAbyvx52ZZAGCwrcqlHTlp6XaPZd1eB6u-5NB1J8aObwW3FhEtVgP0W-9jvVIRQXtTBuEV1cCnafnh6aDy5DkV9N1tjzxp7c6ZTb3TjHYV5sTwm29vcgFrvc-wvGtFCbw2AdzS7LIX4xqDZdR5Ws3Aq2QG4J6hp9RV8sUqp1kXQanG05mmudNDBp0bLtMp9RDVtOSct1x4nfygMYLjD2bIwkfizo-KP_ePIhuat58UPHxOvN_T_d2u88cIV8npvpC3TR9X8L98zr_w-eGz_dHYvIsKut3KY9KpBZvr-bvU3l2iNHA"}`,
)
}
if (`${url}`.includes('nonexist')) {
return new Response('', {
status: 404,
statusText: 'failed to find session',
})
}
// this is the analytics
if (`${url}`.includes('jb2=true')) {
return new Response('{}')
}
try {
const file = getFile(`${url}`)
const maxRangeRequest = 2000000 // kind of arbitrary, part of the rangeParser
if (args?.headers && 'range' in args.headers) {
const range = rangeParser(maxRangeRequest, args.headers.range)
if (range === -2 || range === -1) {
throw new Error(`Error parsing range "${args.headers.range}"`)
}
const { start, end } = range[0]!
const len = end - start + 1
const buf = Buffer.alloc(len)
const { bytesRead } = await file.read(buf, 0, len, start)
const stat = await file.stat()
return new Response(buf.subarray(0, bytesRead), {
status: 206,
headers: [['content-range', `${start}-${end}/${stat.size}`]],
jest
.spyOn(global, 'fetch')

.mockImplementation(async (url: any, args: any) => {
if (/plugin-store/.exec(`${url}`)) {
return new Response(
JSON.stringify({
plugins: [
{
url: 'https://unpkg.com/jbrowse-plugin-msaview/dist/jbrowse-plugin-msaview.umd.production.min.js',
},
],
}),
)
}
if (`${url}`.includes('testid')) {
return new Response(
`{"session":"U2FsdGVkX1+9+Hsy+o75Cdyb1jGYB/N1/h6Jr5ARZRF02uH2AN70Uc/yTXAEo4PQMVypDZMLqO+LJcnF6k2FKfRo9w3oeL+EbWZsXgsTrP5IrE+xYN1wfdTKoIohbQMI+zcIZGLVNf7UqNZjwzsIracm5DkgZh9EWo4MAkBP10ZZEWSdV7gmg95a5ofta2bOMpL4T5yOdukBa+6Uvv9qYXt2KdZPR4PoVLQUTE67zIdc0A9n9BuXiTOFUmczfJVvkoQSOGaXGgSUVoK31Ei12lk67a55YtbG3ClENIMcSK/YbMH7w9HtqImzPY0jaQZSZ6ikKW8fXIbXmqX0oadOKS70RNVcF5JcDMYKx6zPxAf7WjpuFh+cNNr7j6bizRoTbuZi+xNsPpnA2QmbtOXCQzbOao1Oj3HzriBAIGC56bSxx0YfJ0en751LV6yrLPsnMmmmowTIjkbH5c+QRJId9sdYQb9Ytqr2dWBKixHSGhLBfdNr0yt3t5GQRu11Rlq6OekrA9KcmHv9QU3AhDtj9TYjG5vqveYCDfS7uSc3TJLEczwF8p02wjuGapYV5QpX+Lm9ADO8X+qW+bFZj3EGKoQBTUSfV1fd3t5oH3KWWuWYpMuRLbSYgcjKC29DOUJA43k+Ufmio+wO7CufcgGkIWlpejojX8f28UsPXaONmd3t8H4bmzXkB631E1EVS4y+RZGxc2uSVedS446qq/9tV9XJW9tkwNINwbpMHAG0OZk="}`,
)
}
if (`${url}`.includes('testcustomcallback')) {
return new Response(
`{"session":"eJzVVm1v2zYQ_isCvzQB7NiK4zTQt9TNVm9p59puUmANAlo6SdwoSiPptwb-7zuSsiw7dhpk2YB9MGAe7-W5e053fCAsIgHRX5fwOfslnJAGETQDFPWFhkRSzXLhaVDagwXNCg6e37ponbZP_YbXDfyLoPvWG3xEs4zKhAkStBskknQO8pZFOiVB5-KsQWYM5ooEvz-4cGzj-974RnO9LEzUayaAyp9B5BncoBHe5HGsQA8WJDhtt9H7pBiANMf2SbuLwZgqOF1CNIQE_bkoEuJPLo1QJ5foRGkqtQUHAhF00ZPfQLUZSAUoiClX0CBUKcgmfFkaz3I-yxdkdYf4JA3_rKUwHOpr-XF0c7PBfslZIjIQWo2NMl6EuYhZMnWZVv6aPBdJUwKNVFPNmhOaNf1zv312cX7WOe-87TQVKIUGay9lhrXgf_XTL1dt6Y-Gu5XbYHjvrFBhwDhMi_U5KH2o3kD9OhmLSO362NZvkBRYkmLxfFP9nZQenjYtofejp5I_gHy7KPeFdX2_WH43zZZPFeRIHvqNpyI0YI5ioHoq4dh7-CY8TwIehFcKTxLQR28UsiiiN8ffxIoY9kUEEhvApOGQD0tRLbGdi1WDjGbJT87rHvU9l6sVGqk0n4_yWPc4KwomkqrlYsY1yHeWmJjTpC9CPo3A9qo5Xy3Ks9_tnBtPo0-DnkmdJrDLaZq__0qv2Kw72eV0j9GG2LMf8rrX_BXJVaIIS___GsO1HPbxtufWEQccQm3Gi5Fa5oh1rXI-dcXybflMaBxsJtbqBbQ-e1Y8q6I1dk-7bdd-6_xIoOUUnMx1t5Os7sygS1kEHzCeqU7ZohvRb-jBzPLqyo7FkS1RLseulCnDMDJMWUg5KVWu6QQ4loYYDJy6T6CEhUmANFmVXg2KOYuQT8tb3d24Hm7T9gc1qq_gwyGVWxvIzmqOuOwy2PA3hoV2fLusH2-uirmriNUxbQkrGL06ye6yAoAbyvx52ZZAGCwrcqlHTlp6XaPZd1eB6u-5NB1J8aObwW3FhEtVgP0W-9jvVIRQXtTBuEV1cCnafnh6aDy5DkV9N1tjzxp7c6ZTb3TjHYV5sTwm29vcgFrvc-wvGtFCbw2AdzS7LIX4xqDZdR5Ws3Aq2QG4J6hp9RV8sUqp1kXQanG05mmudNDBp0bLtMp9RDVtOSct1x4nfygMYLjD2bIwkfizo-KP_ePIhuat58UPHxOvN_T_d2u88cIV8npvpC3TR9X8L98zr_w-eGz_dHYvIsKut3KY9KpBZvr-bvU3l2iNHA"}`,
)
}
if (`${url}`.includes('nonexist')) {
return new Response('', {
status: 404,
statusText: 'failed to find session',
})
}
return new Response(await file.readFile(), { status: 200 })
} catch (e) {
console.error(e)
return new Response(undefined, { status: 404 })
}
})
// this is the analytics
if (`${url}`.includes('jb2=true')) {
return new Response('{}')
}
try {
const file = getFile(`${url}`)
const maxRangeRequest = 2000000 // kind of arbitrary, part of the rangeParser
if (args?.headers && 'range' in args.headers) {
const range = rangeParser(maxRangeRequest, args.headers.range)
if (range === -2 || range === -1) {
throw new Error(`Error parsing range "${args.headers.range}"`)
}
const { start, end } = range[0]!
const len = end - start + 1
const buf = Buffer.alloc(len)
const { bytesRead } = await file.read(buf, 0, len, start)
const stat = await file.stat()
return new Response(buf.subarray(0, bytesRead), {
status: 206,
headers: [['content-range', `${start}-${end}/${stat.size}`]],
})
}
return new Response(await file.readFile(), { status: 200 })
} catch (e) {
console.error(e)
return new Response(undefined, { status: 404 })
}
})

afterEach(() => {
localStorage.clear()
Expand Down
Loading

0 comments on commit 1bf74cc

Please sign in to comment.