Skip to content

Commit

Permalink
Add BedGraphAdapter and BedGraphTabixAdapter (#4672)
Browse files Browse the repository at this point in the history
  • Loading branch information
cmdcolin authored Nov 20, 2024
1 parent e3522a8 commit 3872967
Show file tree
Hide file tree
Showing 10 changed files with 917 additions and 502 deletions.
15 changes: 7 additions & 8 deletions plugins/bed/src/BedAdapter/BedAdapter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ export default class BedAdapter extends BaseFeatureDataAdapter {
}
}

private async loadData(opts: BaseOptions = {}) {
async loadData(opts: BaseOptions = {}) {
if (!this.bedFeatures) {
this.bedFeatures = this.loadDataP(opts).catch((e: unknown) => {
this.bedFeatures = undefined
Expand Down Expand Up @@ -128,9 +128,10 @@ export default class BedAdapter extends BaseFeatureDataAdapter {
const names = await this.getNames()

const intervalTree = new IntervalTree()
const ret = lines.map((line, i) => {
for (let i = 0; i < lines.length; i++) {
const line = lines[i]!
const uniqueId = `${this.id}-${refName}-${i}`
return new SimpleFeature(
const feat = new SimpleFeature(
featureData({
line,
colRef,
Expand All @@ -142,15 +143,13 @@ export default class BedAdapter extends BaseFeatureDataAdapter {
names,
}),
)
})

for (const obj of ret) {
intervalTree.insert([obj.get('start'), obj.get('end')], obj)
intervalTree.insert([feat.get('start'), feat.get('end')], feat)
}

return intervalTree
}

private async loadFeatureIntervalTree(refName: string) {
async loadFeatureIntervalTree(refName: string) {
if (!this.intervalTrees[refName]) {
this.intervalTrees[refName] = this.loadFeatureIntervalTreeHelper(
refName,
Expand Down
152 changes: 152 additions & 0 deletions plugins/bed/src/BedGraphAdapter/BedGraphAdapter.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
import { ObservableCreate } from '@jbrowse/core/util/rxjs'
import {
Feature,
fetchAndMaybeUnzip,
Region,
SimpleFeature,
} from '@jbrowse/core/util'
import IntervalTree from '@flatten-js/interval-tree'
import {
BaseFeatureDataAdapter,
BaseOptions,
} from '@jbrowse/core/data_adapters/BaseAdapter'
import { openLocation } from '@jbrowse/core/util/io'

export default class BedGraphAdapter extends BaseFeatureDataAdapter {
protected bedFeatures?: Promise<{
header: string
features: Record<string, string[]>
columnNames: string[]
}>

protected intervalTrees: Record<
string,
Promise<IntervalTree | undefined> | undefined
> = {}

async getNames() {
const { header, columnNames } = await this.loadData()
if (columnNames.length) {
return columnNames
}
const defs = header.split(/\n|\r\n|\r/).filter(f => !!f)
const defline = defs.at(-1)
return defline?.includes('\t')
? defline
.slice(1)
.split('\t')
.map(field => field.trim())
: undefined
}
private async loadFeatureIntervalTreeHelper(refName: string) {
const { features } = await this.loadData()
const lines = features[refName]
if (!lines) {
return undefined
}
const names = (await this.getNames())?.slice(3) || []
const intervalTree = new IntervalTree()
for (let i = 0; i < lines.length; i++) {
const line = lines[i]!
const [refName, s, e, ...rest] = line.split('\t')
for (let j = 0; j < rest.length; j++) {
const uniqueId = `${this.id}-${refName}-${i}-${j}`
const start = +s!
const end = +e!
const score = +rest[j]!
const source = names[j] || `col${j}`
if (score) {
intervalTree.insert(
[start, end],
new SimpleFeature({
id: uniqueId,
data: {
refName,
start,
end,
score,
source,
},
}),
)
}
}
}

return intervalTree
}
public async getRefNames(opts: BaseOptions = {}) {
const { features } = await this.loadData(opts)
return Object.keys(features)
}
private async loadDataP(opts: BaseOptions = {}) {
const pm = this.pluginManager
const bedLoc = this.getConf('bedGraphLocation')
const buffer = await fetchAndMaybeUnzip(openLocation(bedLoc, pm), opts)
// 512MB max chrome string length is 512MB
if (buffer.length > 536_870_888) {
throw new Error('Data exceeds maximum string length (512MB)')
}
const data = new TextDecoder('utf8', { fatal: true }).decode(buffer)
const lines = data.split(/\n|\r\n|\r/).filter(f => !!f)
const headerLines = []
let i = 0
for (; i < lines.length && lines[i]!.startsWith('#'); i++) {
headerLines.push(lines[i])
}
const header = headerLines.join('\n')
const features = {} as Record<string, string[]>
for (; i < lines.length; i++) {
const line = lines[i]!
const tab = line.indexOf('\t')
const refName = line.slice(0, tab)
if (!features[refName]) {
features[refName] = []
}
features[refName].push(line)
}

const columnNames = this.getConf('columnNames')

return {
header,
features,
columnNames,
}
}

async loadFeatureIntervalTree(refName: string) {
if (!this.intervalTrees[refName]) {
this.intervalTrees[refName] = this.loadFeatureIntervalTreeHelper(
refName,
).catch((e: unknown) => {
this.intervalTrees[refName] = undefined
throw e
})
}
return this.intervalTrees[refName]
}

async loadData(opts: BaseOptions = {}) {
if (!this.bedFeatures) {
this.bedFeatures = this.loadDataP(opts).catch((e: unknown) => {
this.bedFeatures = undefined
throw e
})
}

return this.bedFeatures
}
public getFeatures(query: Region, _opts: BaseOptions = {}) {
return ObservableCreate<Feature>(async observer => {
const { start, end, refName } = query
const intervalTree = await this.loadFeatureIntervalTree(refName)
intervalTree?.search([start, end]).forEach(f => {
observer.next(f)
})
observer.complete()
})
}

public freeResources(): void {}
}
32 changes: 32 additions & 0 deletions plugins/bed/src/BedGraphAdapter/configSchema.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import { ConfigurationSchema } from '@jbrowse/core/configuration'

/**
* #config BedGraphAdapter
*/
function x() {} // eslint-disable-line @typescript-eslint/no-unused-vars

const BedGraphAdapter = ConfigurationSchema(
'BedGraphAdapter',
{
/**
* #slot
*/
bedGraphLocation: {
type: 'fileLocation',
defaultValue: {
uri: '/path/to/my.bedgraph',
locationType: 'UriLocation',
},
},
/**
* #slot
*/
columnNames: {
type: 'stringArray',
description: 'List of column names',
defaultValue: [],
},
},
{ explicitlyTyped: true },
)
export default BedGraphAdapter
16 changes: 16 additions & 0 deletions plugins/bed/src/BedGraphAdapter/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import PluginManager from '@jbrowse/core/PluginManager'
import AdapterType from '@jbrowse/core/pluggableElementTypes/AdapterType'

import configSchema from './configSchema'

export default function BedGraphAdapterF(pluginManager: PluginManager) {
pluginManager.addAdapterType(
() =>
new AdapterType({
name: 'BedGraphAdapter',
displayName: 'BedGraph adapter',
configSchema,
getAdapterClass: () => import('./BedGraphAdapter').then(r => r.default),
}),
)
}
114 changes: 114 additions & 0 deletions plugins/bed/src/BedGraphTabixAdapter/BedGraphTabixAdapter.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
import { ObservableCreate } from '@jbrowse/core/util/rxjs'
import { Feature, Region, SimpleFeature } from '@jbrowse/core/util'
import {
BaseFeatureDataAdapter,
BaseOptions,
} from '@jbrowse/core/data_adapters/BaseAdapter'
import { openLocation } from '@jbrowse/core/util/io'
import { TabixIndexedFile } from '@gmod/tabix'

export default class BedGraphAdapter extends BaseFeatureDataAdapter {
private configured?: Promise<{
bedGraph: TabixIndexedFile
header: string
columnNames: string[]
}>

private async configurePre() {
const pm = this.pluginManager
const bedGraphGzLocation = this.getConf('bedGraphGzLocation')
const location = this.getConf(['index', 'location'])
const indexType = this.getConf(['index', 'indexType'])

const filehandle = openLocation(bedGraphGzLocation, pm)
const isCSI = indexType === 'CSI'
const bedGraph = new TabixIndexedFile({
filehandle,
csiFilehandle: isCSI ? openLocation(location, pm) : undefined,
tbiFilehandle: !isCSI ? openLocation(location, pm) : undefined,
chunkCacheSize: 50 * 2 ** 20,
})
const columnNames = this.getConf('columnNames')

const header = await bedGraph.getHeader()
return {
columnNames,
bedGraph,
header,
}
}

protected async configure() {
if (!this.configured) {
this.configured = this.configurePre().catch((e: unknown) => {
this.configured = undefined
throw e
})
}
return this.configured
}

async getNames() {
const { bedGraph, columnNames } = await this.configure()
if (columnNames.length) {
return columnNames
}
const header = await bedGraph.getHeader()
const defs = header.split(/\n|\r\n|\r/).filter(f => !!f)
const defline = defs.at(-1)
return defline?.includes('\t')
? defline
.slice(1)
.split('\t')
.map(f => f.trim())
: undefined
}

public async getRefNames(opts: BaseOptions = {}) {
const { bedGraph } = await this.configure()
return bedGraph.getReferenceSequenceNames(opts)
}

async getHeader() {
const { bedGraph } = await this.configure()
return bedGraph.getHeader()
}

public getFeatures(query: Region, opts: BaseOptions = {}) {
return ObservableCreate<Feature>(async observer => {
const { refName, start, end } = query
const { bedGraph } = await this.configure()
const names = (await this.getNames())?.slice(3) || []
await bedGraph.getLines(refName, start, end, {
lineCallback: (line, fileOffset) => {
const [refName, s, e, ...rest] = line.split('\t')
for (let j = 0; j < rest.length; j++) {
const uniqueId = `${this.id}-${fileOffset}-${j}`
const start = +s!
const end = +e!
const score = +rest[j]!
const source = names[j] || `col${j}`
if (score) {
observer.next(
new SimpleFeature({
id: uniqueId,
data: {
refName,
start,
end,
score,
source,
},
}),
)
}
}
},
...opts,
})
observer.complete()
})
}

public freeResources(): void {}
}
Loading

0 comments on commit 3872967

Please sign in to comment.