Skip to content

Commit

Permalink
Split VCF
Browse files Browse the repository at this point in the history
  • Loading branch information
cmdcolin committed Feb 20, 2025
1 parent 7f4c816 commit 099278c
Show file tree
Hide file tree
Showing 4 changed files with 185 additions and 0 deletions.
111 changes: 111 additions & 0 deletions plugins/variants/src/SplitVcfTabixAdapter/SplitVcfTabixAdapter.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
import { TabixIndexedFile } from '@gmod/tabix'
import VcfParser from '@gmod/vcf'
import { BaseFeatureDataAdapter } from '@jbrowse/core/data_adapters/BaseAdapter'
import { fetchAndMaybeUnzipText, updateStatus } from '@jbrowse/core/util'
import { openLocation } from '@jbrowse/core/util/io'
import { ObservableCreate } from '@jbrowse/core/util/rxjs'

import VcfFeature from '../VcfFeature'

import type { BaseOptions } from '@jbrowse/core/data_adapters/BaseAdapter'
import type { Feature } from '@jbrowse/core/util'
import type { NoAssemblyRegion } from '@jbrowse/core/util/types'

export default class SplitVcfTabixAdapter extends BaseFeatureDataAdapter {
private async configurePre(refName: string) {
const indexType = this.getConf('indexType')
const vcfGzLocation = this.getConf('vcfGzLocationMap')[refName]
const indexLocation = this.getConf('indexLocationMap')[refName] || {
uri: `${vcfGzLocation.uri}.${indexType.toLowerCase()}`,
}

const filehandle = openLocation(vcfGzLocation, this.pluginManager)
const isCSI = indexType === 'CSI'
const vcf = new TabixIndexedFile({
filehandle,
csiFilehandle: isCSI
? openLocation(indexLocation, this.pluginManager)
: undefined,
tbiFilehandle: !isCSI
? openLocation(indexLocation, this.pluginManager)
: undefined,
chunkCacheSize: 50 * 2 ** 20,
})

return {
vcf,
parser: new VcfParser({
header: await vcf.getHeader(),
}),
}
}

async configure(refName: string, opts?: BaseOptions) {
const { statusCallback = () => {} } = opts || {}
return updateStatus('Downloading index', statusCallback, () =>
this.configurePre(refName),
)
}

public async getRefNames() {
return Object.keys(this.getConf('vcfGzLocationMap'))
}

public getFeatures(query: NoAssemblyRegion, opts: BaseOptions = {}) {
return ObservableCreate<Feature>(async observer => {
const { refName, start, end } = query
const { statusCallback = () => {} } = opts
const { vcf, parser } = await this.configure(query.refName, opts)

await updateStatus('Downloading variants', statusCallback, () =>
vcf.getLines(refName, start, end, {
lineCallback: (line, fileOffset) => {
observer.next(
new VcfFeature({
variant: parser.parseLine(line),
parser,
id: `${this.id}-vcf-${fileOffset}`,
}),
)
},
...opts,
}),
)
observer.complete()
}, opts.stopToken)
}

async getSources() {
const conf = this.getConf('samplesTsvLocation')
const r = Object.keys(this.getConf('vcfGzLocationMap'))[0]!
if (conf.uri === '' || conf.uri === '/path/to/samples.tsv') {
const { parser } = await this.configure(r)
return parser.samples.map(name => ({
name,
}))
} else {
const txt = await fetchAndMaybeUnzipText(
openLocation(conf, this.pluginManager),
)
const lines = txt.split(/\n|\r\n|\r/)
const header = lines[0]!.split('\t')
const { parser } = await this.configure(r)
const s = new Set(parser.samples)
return lines
.slice(1)
.map(line => {
const cols = line.split('\t')
return {
name: cols[0]!,
...Object.fromEntries(
// force col 0 to be called name
cols.slice(1).map((c, idx) => [header[idx + 1]!, c] as const),
),
}
})
.filter(f => s.has(f.name))
}
}

public freeResources(/* { region } */): void {}
}
52 changes: 52 additions & 0 deletions plugins/variants/src/SplitVcfTabixAdapter/configSchema.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import { ConfigurationSchema } from '@jbrowse/core/configuration'

/**
* #config VcfTabixAdapter
*/
function x() {} // eslint-disable-line @typescript-eslint/no-unused-vars

const SplitVcfTabixAdapter = ConfigurationSchema(
'SplitVcfTabixAdapter',
{
/**
* #slot
* object like {chr1:{uri:'url to file'}}
*/
vcfGzLocationMap: {
type: 'frozen',
defaultValue: {},
},
/**
* #slot
* object like {chr1:{uri:'url to index'}}
*/
indexLocationMap: {
type: 'frozen',
defaultValue: {},
},

/**
* #slot
*/
indexType: {
type: 'string',
defaultValue: 'TBI',
},

/**
* #slot
*/
samplesTsvLocation: {
type: 'fileLocation',
defaultValue: {
uri: '/path/to/samples.tsv',
description:
'tsv with header like name\tpopulation\tetc. where the first column is required, and is the sample names',
locationType: 'UriLocation',
},
},
},
{ explicitlyTyped: true },
)

export default SplitVcfTabixAdapter
20 changes: 20 additions & 0 deletions plugins/variants/src/SplitVcfTabixAdapter/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import AdapterType from '@jbrowse/core/pluggableElementTypes/AdapterType'

import configSchema from './configSchema'

import type PluginManager from '@jbrowse/core/PluginManager'

export { default as configSchema } from './configSchema'

export default function VcfTabixAdapterF(pluginManager: PluginManager) {
pluginManager.addAdapterType(
() =>
new AdapterType({
name: 'SplitVcfTabixAdapter',
displayName: 'VCF tabix adapter (split across multiple files)',
configSchema,
getAdapterClass: () =>
import('./SplitVcfTabixAdapter').then(r => r.default),
}),
)
}
2 changes: 2 additions & 0 deletions plugins/variants/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import MultiLinearVariantDisplayF from './MultiLinearVariantDisplay'
import LinearVariantMatrixDisplayF from './MultiLinearVariantMatrixDisplay'
import LinearVariantMatrixRendererF from './MultiLinearVariantMatrixRenderer'
import MultiVariantRendererF from './MultiLinearVariantRenderer'
import SplitVcfTabixAdapterF from './SplitVcfTabixAdapter'
import StructuralVariantChordRendererF from './StructuralVariantChordRenderer'
import VariantFeatureWidgetF from './VariantFeatureWidget'
import { MultiVariantGetGenotypeMatrix } from './VariantRPC/MultiVariantGetGenotypeMatrix'
Expand All @@ -24,6 +25,7 @@ export default class VariantsPlugin extends Plugin {
install(pluginManager: PluginManager) {
VcfAdapterF(pluginManager)
VcfTabixAdapterF(pluginManager)
SplitVcfTabixAdapterF(pluginManager)
VariantFeatureWidgetF(pluginManager)
VariantTrackF(pluginManager)
ExtensionPointsF(pluginManager)
Expand Down

0 comments on commit 099278c

Please sign in to comment.