Skip to content

Commit

Permalink
Move stats calculation to baseadapter
Browse files Browse the repository at this point in the history
  • Loading branch information
cmdcolin committed Jan 26, 2021
1 parent 80e4526 commit 66547c1
Show file tree
Hide file tree
Showing 10 changed files with 347 additions and 186 deletions.
41 changes: 41 additions & 0 deletions packages/core/data_adapters/BaseAdapter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import { Feature } from '../util/simpleFeature'
import { AnyConfigurationModel } from '../configuration/configurationSchema'
import { getSubAdapterType } from './dataAdapterCache'
import { Region, NoAssemblyRegion } from '../util/types'
import { blankStats, rectifyStats, scoresToStats } from '../util/stats'

export interface BaseOptions {
signal?: AbortSignal
Expand Down Expand Up @@ -196,6 +197,46 @@ export abstract class BaseFeatureDataAdapter {
const refNames = await this.getRefNames(opts)
return refNames.includes(refName)
}

public getRegionStats(region: Region, opts?: BaseOptions) {
const feats = this.getFeatures(region, opts)
return scoresToStats(region, feats)
}

public async getMultiRegionStats(regions: Region[] = [], opts?: BaseOptions) {
if (!regions.length) {
return blankStats()
}
const feats = await Promise.all(
regions.map(region => this.getRegionStats(region, opts)),
)

const scoreMax = feats
.map(s => s.scoreMax)
.reduce((acc, curr) => Math.max(acc, curr))
const scoreMin = feats
.map(s => s.scoreMin)
.reduce((acc, curr) => Math.min(acc, curr))
const scoreSum = feats.map(s => s.scoreSum).reduce((a, b) => a + b, 0)
const scoreSumSquares = feats
.map(s => s.scoreSumSquares)
.reduce((a, b) => a + b, 0)
const featureCount = feats
.map(s => s.featureCount)
.reduce((a, b) => a + b, 0)
const basesCovered = feats
.map(s => s.basesCovered)
.reduce((a, b) => a + b, 0)

return rectifyStats({
scoreMin,
scoreMax,
featureCount,
basesCovered,
scoreSumSquares,
scoreSum,
})
}
}

export interface RegionsAdapter extends BaseFeatureDataAdapter {
Expand Down
88 changes: 88 additions & 0 deletions packages/core/util/stats.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import SimpleFeature from '@jbrowse/core/util/simpleFeature'
import { from } from 'rxjs'
import {
calcStdFromSums,
rectifyStats,
scoresToStats,
calcPerBaseStats,
UnrectifiedFeatureStats,
} from './stats'

test('calc std', () => {
const s = [1, 2, 3]
const sum = s.reduce((a, b) => a + b)
const sumSq = s.reduce((a, b) => a + b * b)
expect(calcStdFromSums(sum, sumSq, s.length, true)).toBeCloseTo(0.8164965809) // calculated from a webapp
expect(calcStdFromSums(sum, sumSq, s.length, false)).toBeCloseTo(1) // calculated from a webapp
expect(calcStdFromSums(100, 100, 0)).toEqual(0) // fake thing where list "n" is 0
expect(calcStdFromSums(100000, 100, 5)).toEqual(0) // fake thing where sumSq probably wrong
})

test('test rectify', () => {
// mean of 0 bases covered = 0
expect(
rectifyStats({ basesCovered: 0 } as UnrectifiedFeatureStats).scoreMean,
).toEqual(0)
const s = rectifyStats({
featureCount: 10,
scoreSum: 1000,
} as UnrectifiedFeatureStats)

expect(s.scoreMean).toEqual(100)
expect(s.featureCount).toEqual(10)

expect(
rectifyStats({
featureCount: 3,
scoreSum: 6,
scoreSumSquares: 14,
} as UnrectifiedFeatureStats).scoreStdDev,
).toEqual(1) // calculated from a webapp about sample standard deviations
})

test('scores to stats', async () => {
const ret = await scoresToStats(
{ refName: 'ctgA', start: 0, end: 2 },
from([
new SimpleFeature({ id: 1, data: { start: 0, end: 1, score: 1 } }),
new SimpleFeature({ id: 2, data: { start: 1, end: 2, score: 2 } }),
new SimpleFeature({ id: 3, data: { start: 2, end: 3, score: 3 } }),
]),
)
expect(ret.scoreMean).toEqual(2)
expect(ret.featureDensity).toEqual(1)
expect(ret.scoreMax).toEqual(3)
expect(ret.scoreMin).toEqual(1)
expect(ret.scoreStdDev).toEqual(1) // calculated from a webapp
})

// peter TODO: fix this test
test('calc per base stats', () => {
// one score at start
expect(
calcPerBaseStats({ refName: 'ctgA', start: 0, end: 9 }, [
new SimpleFeature({ id: 1, data: { start: 0, end: 1, score: 10 } }),
]),
).toEqual([10, 0, 0, 0, 0, 0, 0, 0, 0])
// multiple features
expect(
calcPerBaseStats({ refName: 'ctgA', start: 0, end: 9 }, [
new SimpleFeature({ id: 1, data: { start: 0, end: 1, score: 10 } }),
new SimpleFeature({ id: 2, data: { start: 8, end: 9, score: 10 } }),
]),
).toEqual([10, 0, 0, 0, 0, 0, 0, 0, 10])
// multiple features
expect(
calcPerBaseStats({ refName: 'ctgA', start: 15, end: 30 }, [
new SimpleFeature({ id: 1, data: { start: 10, end: 20, score: 10 } }),
new SimpleFeature({ id: 2, data: { start: 25, end: 26, score: 10 } }),
]),
).toEqual([10, 10, 10, 10, 10, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0])
// feature starts before region
expect(
calcPerBaseStats({ refName: 'ctgA', start: 10, end: 19 }, [
new SimpleFeature({ id: 1, data: { start: 5, end: 15, score: 10 } }),
new SimpleFeature({ id: 1, data: { start: 18, end: 26, score: 10 } }),
]),
).toEqual([10, 10, 10, 10, 10, 0, 0, 0, 10])
})
183 changes: 183 additions & 0 deletions packages/core/util/stats.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
import { Observable } from 'rxjs'
import { reduce } from 'rxjs/operators'
import { NoAssemblyRegion } from './types'
import { Feature } from './simpleFeature'

export interface UnrectifiedFeatureStats {
scoreMin: number
scoreMax: number
scoreSum: number
scoreSumSquares: number
featureCount: number
basesCovered: number
}
export interface FeatureStats extends UnrectifiedFeatureStats {
scoreMean: number
scoreStdDev: number
featureDensity: number
}

/*
* calculate standard deviation using the 'shortcut method' that accepts
* the sum and the sum squares of the elements
*
* @param sum - sum(i, 1..n)
* @param sumSquares - sum(i^2, 1..n)
* @param n - number of elements
* @param population - boolean: use population instead of sample correction
* @return the estimated std deviation
*/

export function calcStdFromSums(
sum: number,
sumSquares: number,
n: number,
population = false,
): number {
if (n === 0) return 0
let variance
if (population) {
variance = sumSquares / n - (sum * sum) / (n * n)
} else {
// sample correction is n-1
variance = sumSquares - (sum * sum) / n
if (n > 1) {
variance /= n - 1
}
}

return variance < 0 ? 0 : Math.sqrt(variance)
}

/*
* @param stats - a summary stats object with scoreSum, featureCount, scoreSumSquares, and basesCovered
* @return - a summary stats object with scoreMean, scoreStdDev, and featureDensity added
*/
export function rectifyStats(s: UnrectifiedFeatureStats): FeatureStats {
return {
...s,
scoreMean: (s.scoreSum || 0) / (s.featureCount || s.basesCovered || 1),
scoreStdDev: calcStdFromSums(
s.scoreSum,
s.scoreSumSquares,
s.featureCount || s.basesCovered,
),
featureDensity: (s.featureCount || 1) / s.basesCovered,
}
}

/*
* calculates per-base scores for variable width features over a region
* @param region - object contains start, end
* @param features - list of features with start, end, score
* @return array of numeric scores
*/
export function calcPerBaseStats(
region: NoAssemblyRegion,
features: Feature[],
): number[] {
const { start, end } = region
const scores = []
const feats = features.sort((a, b) => a.get('start') - b.get('start'))
let pos = start
let currentFeat = 0
let i = 0

while (pos < end) {
while (currentFeat < feats.length && pos >= feats[currentFeat].get('end')) {
currentFeat += 1
}
const f = feats[currentFeat]
if (!f) {
scores[i] = 0
} else if (pos >= f.get('start') && pos < f.get('end')) {
scores[i] = f.get('score')
} else {
scores[i] = 0
}
i += 1
pos += 1
}
return scores
}

/*
* transform a list of scores to summary statistics
* @param region - object with start, end
* @param feats - array of features which are possibly summary features
* @return - object with scoreMax, scoreMin, scoreSum, scoreSumSquares, etc
*/
export async function scoresToStats(
region: NoAssemblyRegion,
features: Observable<Feature>,
): Promise<FeatureStats> {
const { start, end } = region

const {
scoreMin,
scoreMax,
scoreSum,
scoreSumSquares,
featureCount,
} = await features
.pipe(
reduce(
(
seed: {
scoreMin: number
scoreMax: number
scoreSum: number
scoreSumSquares: number
featureCount: number
},
f: Feature,
) => {
const score = f.get('score')
seed.scoreMax = Math.max(
seed.scoreMax,
f.get('summary') ? f.get('maxScore') : score,
)
seed.scoreMin = Math.min(
seed.scoreMin,
f.get('summary') ? f.get('minScore') : score,
)
seed.scoreSum += score
seed.scoreSumSquares += score * score
seed.featureCount += 1

return seed
},
{
scoreMin: Number.MAX_VALUE,
scoreMax: Number.MIN_VALUE,
scoreSum: 0,
scoreSumSquares: 0,
featureCount: 0,
},
),
)
.toPromise()

return rectifyStats({
scoreMax,
scoreMin,
scoreSum,
scoreSumSquares,
featureCount,
basesCovered: end - start + 1,
})
}

export function blankStats(): FeatureStats {
return {
scoreMin: 0,
scoreMax: 0,
scoreMean: 0,
scoreStdDev: 0,
scoreSum: 0,
scoreSumSquares: 0,
featureCount: 0,
featureDensity: 0,
basesCovered: 0,
}
}
Loading

0 comments on commit 66547c1

Please sign in to comment.