-
Notifications
You must be signed in to change notification settings - Fork 64
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add BedGraphAdapter and BedGraphTabixAdapter (#4672)
- Loading branch information
Showing
10 changed files
with
917 additions
and
502 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,152 @@ | ||
import { ObservableCreate } from '@jbrowse/core/util/rxjs' | ||
import { | ||
Feature, | ||
fetchAndMaybeUnzip, | ||
Region, | ||
SimpleFeature, | ||
} from '@jbrowse/core/util' | ||
import IntervalTree from '@flatten-js/interval-tree' | ||
import { | ||
BaseFeatureDataAdapter, | ||
BaseOptions, | ||
} from '@jbrowse/core/data_adapters/BaseAdapter' | ||
import { openLocation } from '@jbrowse/core/util/io' | ||
|
||
export default class BedGraphAdapter extends BaseFeatureDataAdapter { | ||
protected bedFeatures?: Promise<{ | ||
header: string | ||
features: Record<string, string[]> | ||
columnNames: string[] | ||
}> | ||
|
||
protected intervalTrees: Record< | ||
string, | ||
Promise<IntervalTree | undefined> | undefined | ||
> = {} | ||
|
||
async getNames() { | ||
const { header, columnNames } = await this.loadData() | ||
if (columnNames.length) { | ||
return columnNames | ||
} | ||
const defs = header.split(/\n|\r\n|\r/).filter(f => !!f) | ||
const defline = defs.at(-1) | ||
return defline?.includes('\t') | ||
? defline | ||
.slice(1) | ||
.split('\t') | ||
.map(field => field.trim()) | ||
: undefined | ||
} | ||
private async loadFeatureIntervalTreeHelper(refName: string) { | ||
const { features } = await this.loadData() | ||
const lines = features[refName] | ||
if (!lines) { | ||
return undefined | ||
} | ||
const names = (await this.getNames())?.slice(3) || [] | ||
const intervalTree = new IntervalTree() | ||
for (let i = 0; i < lines.length; i++) { | ||
const line = lines[i]! | ||
const [refName, s, e, ...rest] = line.split('\t') | ||
for (let j = 0; j < rest.length; j++) { | ||
const uniqueId = `${this.id}-${refName}-${i}-${j}` | ||
const start = +s! | ||
const end = +e! | ||
const score = +rest[j]! | ||
const source = names[j] || `col${j}` | ||
if (score) { | ||
intervalTree.insert( | ||
[start, end], | ||
new SimpleFeature({ | ||
id: uniqueId, | ||
data: { | ||
refName, | ||
start, | ||
end, | ||
score, | ||
source, | ||
}, | ||
}), | ||
) | ||
} | ||
} | ||
} | ||
|
||
return intervalTree | ||
} | ||
public async getRefNames(opts: BaseOptions = {}) { | ||
const { features } = await this.loadData(opts) | ||
return Object.keys(features) | ||
} | ||
private async loadDataP(opts: BaseOptions = {}) { | ||
const pm = this.pluginManager | ||
const bedLoc = this.getConf('bedGraphLocation') | ||
const buffer = await fetchAndMaybeUnzip(openLocation(bedLoc, pm), opts) | ||
// 512MB max chrome string length is 512MB | ||
if (buffer.length > 536_870_888) { | ||
throw new Error('Data exceeds maximum string length (512MB)') | ||
} | ||
const data = new TextDecoder('utf8', { fatal: true }).decode(buffer) | ||
const lines = data.split(/\n|\r\n|\r/).filter(f => !!f) | ||
const headerLines = [] | ||
let i = 0 | ||
for (; i < lines.length && lines[i]!.startsWith('#'); i++) { | ||
headerLines.push(lines[i]) | ||
} | ||
const header = headerLines.join('\n') | ||
const features = {} as Record<string, string[]> | ||
for (; i < lines.length; i++) { | ||
const line = lines[i]! | ||
const tab = line.indexOf('\t') | ||
const refName = line.slice(0, tab) | ||
if (!features[refName]) { | ||
features[refName] = [] | ||
} | ||
features[refName].push(line) | ||
} | ||
|
||
const columnNames = this.getConf('columnNames') | ||
|
||
return { | ||
header, | ||
features, | ||
columnNames, | ||
} | ||
} | ||
|
||
async loadFeatureIntervalTree(refName: string) { | ||
if (!this.intervalTrees[refName]) { | ||
this.intervalTrees[refName] = this.loadFeatureIntervalTreeHelper( | ||
refName, | ||
).catch((e: unknown) => { | ||
this.intervalTrees[refName] = undefined | ||
throw e | ||
}) | ||
} | ||
return this.intervalTrees[refName] | ||
} | ||
|
||
async loadData(opts: BaseOptions = {}) { | ||
if (!this.bedFeatures) { | ||
this.bedFeatures = this.loadDataP(opts).catch((e: unknown) => { | ||
this.bedFeatures = undefined | ||
throw e | ||
}) | ||
} | ||
|
||
return this.bedFeatures | ||
} | ||
public getFeatures(query: Region, _opts: BaseOptions = {}) { | ||
return ObservableCreate<Feature>(async observer => { | ||
const { start, end, refName } = query | ||
const intervalTree = await this.loadFeatureIntervalTree(refName) | ||
intervalTree?.search([start, end]).forEach(f => { | ||
observer.next(f) | ||
}) | ||
observer.complete() | ||
}) | ||
} | ||
|
||
public freeResources(): void {} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
import { ConfigurationSchema } from '@jbrowse/core/configuration' | ||
|
||
/** | ||
* #config BedGraphAdapter | ||
*/ | ||
function x() {} // eslint-disable-line @typescript-eslint/no-unused-vars | ||
|
||
const BedGraphAdapter = ConfigurationSchema( | ||
'BedGraphAdapter', | ||
{ | ||
/** | ||
* #slot | ||
*/ | ||
bedGraphLocation: { | ||
type: 'fileLocation', | ||
defaultValue: { | ||
uri: '/path/to/my.bedgraph', | ||
locationType: 'UriLocation', | ||
}, | ||
}, | ||
/** | ||
* #slot | ||
*/ | ||
columnNames: { | ||
type: 'stringArray', | ||
description: 'List of column names', | ||
defaultValue: [], | ||
}, | ||
}, | ||
{ explicitlyTyped: true }, | ||
) | ||
export default BedGraphAdapter |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
import PluginManager from '@jbrowse/core/PluginManager' | ||
import AdapterType from '@jbrowse/core/pluggableElementTypes/AdapterType' | ||
|
||
import configSchema from './configSchema' | ||
|
||
export default function BedGraphAdapterF(pluginManager: PluginManager) { | ||
pluginManager.addAdapterType( | ||
() => | ||
new AdapterType({ | ||
name: 'BedGraphAdapter', | ||
displayName: 'BedGraph adapter', | ||
configSchema, | ||
getAdapterClass: () => import('./BedGraphAdapter').then(r => r.default), | ||
}), | ||
) | ||
} |
114 changes: 114 additions & 0 deletions
114
plugins/bed/src/BedGraphTabixAdapter/BedGraphTabixAdapter.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
import { ObservableCreate } from '@jbrowse/core/util/rxjs' | ||
import { Feature, Region, SimpleFeature } from '@jbrowse/core/util' | ||
import { | ||
BaseFeatureDataAdapter, | ||
BaseOptions, | ||
} from '@jbrowse/core/data_adapters/BaseAdapter' | ||
import { openLocation } from '@jbrowse/core/util/io' | ||
import { TabixIndexedFile } from '@gmod/tabix' | ||
|
||
export default class BedGraphAdapter extends BaseFeatureDataAdapter { | ||
private configured?: Promise<{ | ||
bedGraph: TabixIndexedFile | ||
header: string | ||
columnNames: string[] | ||
}> | ||
|
||
private async configurePre() { | ||
const pm = this.pluginManager | ||
const bedGraphGzLocation = this.getConf('bedGraphGzLocation') | ||
const location = this.getConf(['index', 'location']) | ||
const indexType = this.getConf(['index', 'indexType']) | ||
|
||
const filehandle = openLocation(bedGraphGzLocation, pm) | ||
const isCSI = indexType === 'CSI' | ||
const bedGraph = new TabixIndexedFile({ | ||
filehandle, | ||
csiFilehandle: isCSI ? openLocation(location, pm) : undefined, | ||
tbiFilehandle: !isCSI ? openLocation(location, pm) : undefined, | ||
chunkCacheSize: 50 * 2 ** 20, | ||
}) | ||
const columnNames = this.getConf('columnNames') | ||
|
||
const header = await bedGraph.getHeader() | ||
return { | ||
columnNames, | ||
bedGraph, | ||
header, | ||
} | ||
} | ||
|
||
protected async configure() { | ||
if (!this.configured) { | ||
this.configured = this.configurePre().catch((e: unknown) => { | ||
this.configured = undefined | ||
throw e | ||
}) | ||
} | ||
return this.configured | ||
} | ||
|
||
async getNames() { | ||
const { bedGraph, columnNames } = await this.configure() | ||
if (columnNames.length) { | ||
return columnNames | ||
} | ||
const header = await bedGraph.getHeader() | ||
const defs = header.split(/\n|\r\n|\r/).filter(f => !!f) | ||
const defline = defs.at(-1) | ||
return defline?.includes('\t') | ||
? defline | ||
.slice(1) | ||
.split('\t') | ||
.map(f => f.trim()) | ||
: undefined | ||
} | ||
|
||
public async getRefNames(opts: BaseOptions = {}) { | ||
const { bedGraph } = await this.configure() | ||
return bedGraph.getReferenceSequenceNames(opts) | ||
} | ||
|
||
async getHeader() { | ||
const { bedGraph } = await this.configure() | ||
return bedGraph.getHeader() | ||
} | ||
|
||
public getFeatures(query: Region, opts: BaseOptions = {}) { | ||
return ObservableCreate<Feature>(async observer => { | ||
const { refName, start, end } = query | ||
const { bedGraph } = await this.configure() | ||
const names = (await this.getNames())?.slice(3) || [] | ||
await bedGraph.getLines(refName, start, end, { | ||
lineCallback: (line, fileOffset) => { | ||
const [refName, s, e, ...rest] = line.split('\t') | ||
for (let j = 0; j < rest.length; j++) { | ||
const uniqueId = `${this.id}-${fileOffset}-${j}` | ||
const start = +s! | ||
const end = +e! | ||
const score = +rest[j]! | ||
const source = names[j] || `col${j}` | ||
if (score) { | ||
observer.next( | ||
new SimpleFeature({ | ||
id: uniqueId, | ||
data: { | ||
refName, | ||
start, | ||
end, | ||
score, | ||
source, | ||
}, | ||
}), | ||
) | ||
} | ||
} | ||
}, | ||
...opts, | ||
}) | ||
observer.complete() | ||
}) | ||
} | ||
|
||
public freeResources(): void {} | ||
} |
Oops, something went wrong.