-
Notifications
You must be signed in to change notification settings - Fork 64
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Move stats calculation to baseadapter
- Loading branch information
Showing
10 changed files
with
347 additions
and
186 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
import SimpleFeature from '@jbrowse/core/util/simpleFeature' | ||
import { from } from 'rxjs' | ||
import { | ||
calcStdFromSums, | ||
rectifyStats, | ||
scoresToStats, | ||
calcPerBaseStats, | ||
UnrectifiedFeatureStats, | ||
} from './stats' | ||
|
||
test('calc std', () => { | ||
const s = [1, 2, 3] | ||
const sum = s.reduce((a, b) => a + b) | ||
const sumSq = s.reduce((a, b) => a + b * b) | ||
expect(calcStdFromSums(sum, sumSq, s.length, true)).toBeCloseTo(0.8164965809) // calculated from a webapp | ||
expect(calcStdFromSums(sum, sumSq, s.length, false)).toBeCloseTo(1) // calculated from a webapp | ||
expect(calcStdFromSums(100, 100, 0)).toEqual(0) // fake thing where list "n" is 0 | ||
expect(calcStdFromSums(100000, 100, 5)).toEqual(0) // fake thing where sumSq probably wrong | ||
}) | ||
|
||
test('test rectify', () => { | ||
// mean of 0 bases covered = 0 | ||
expect( | ||
rectifyStats({ basesCovered: 0 } as UnrectifiedFeatureStats).scoreMean, | ||
).toEqual(0) | ||
const s = rectifyStats({ | ||
featureCount: 10, | ||
scoreSum: 1000, | ||
} as UnrectifiedFeatureStats) | ||
|
||
expect(s.scoreMean).toEqual(100) | ||
expect(s.featureCount).toEqual(10) | ||
|
||
expect( | ||
rectifyStats({ | ||
featureCount: 3, | ||
scoreSum: 6, | ||
scoreSumSquares: 14, | ||
} as UnrectifiedFeatureStats).scoreStdDev, | ||
).toEqual(1) // calculated from a webapp about sample standard deviations | ||
}) | ||
|
||
test('scores to stats', async () => { | ||
const ret = await scoresToStats( | ||
{ refName: 'ctgA', start: 0, end: 2 }, | ||
from([ | ||
new SimpleFeature({ id: 1, data: { start: 0, end: 1, score: 1 } }), | ||
new SimpleFeature({ id: 2, data: { start: 1, end: 2, score: 2 } }), | ||
new SimpleFeature({ id: 3, data: { start: 2, end: 3, score: 3 } }), | ||
]), | ||
) | ||
expect(ret.scoreMean).toEqual(2) | ||
expect(ret.featureDensity).toEqual(1) | ||
expect(ret.scoreMax).toEqual(3) | ||
expect(ret.scoreMin).toEqual(1) | ||
expect(ret.scoreStdDev).toEqual(1) // calculated from a webapp | ||
}) | ||
|
||
// peter TODO: fix this test | ||
test('calc per base stats', () => { | ||
// one score at start | ||
expect( | ||
calcPerBaseStats({ refName: 'ctgA', start: 0, end: 9 }, [ | ||
new SimpleFeature({ id: 1, data: { start: 0, end: 1, score: 10 } }), | ||
]), | ||
).toEqual([10, 0, 0, 0, 0, 0, 0, 0, 0]) | ||
// multiple features | ||
expect( | ||
calcPerBaseStats({ refName: 'ctgA', start: 0, end: 9 }, [ | ||
new SimpleFeature({ id: 1, data: { start: 0, end: 1, score: 10 } }), | ||
new SimpleFeature({ id: 2, data: { start: 8, end: 9, score: 10 } }), | ||
]), | ||
).toEqual([10, 0, 0, 0, 0, 0, 0, 0, 10]) | ||
// multiple features | ||
expect( | ||
calcPerBaseStats({ refName: 'ctgA', start: 15, end: 30 }, [ | ||
new SimpleFeature({ id: 1, data: { start: 10, end: 20, score: 10 } }), | ||
new SimpleFeature({ id: 2, data: { start: 25, end: 26, score: 10 } }), | ||
]), | ||
).toEqual([10, 10, 10, 10, 10, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0]) | ||
// feature starts before region | ||
expect( | ||
calcPerBaseStats({ refName: 'ctgA', start: 10, end: 19 }, [ | ||
new SimpleFeature({ id: 1, data: { start: 5, end: 15, score: 10 } }), | ||
new SimpleFeature({ id: 1, data: { start: 18, end: 26, score: 10 } }), | ||
]), | ||
).toEqual([10, 10, 10, 10, 10, 0, 0, 0, 10]) | ||
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,183 @@ | ||
import { Observable } from 'rxjs' | ||
import { reduce } from 'rxjs/operators' | ||
import { NoAssemblyRegion } from './types' | ||
import { Feature } from './simpleFeature' | ||
|
||
export interface UnrectifiedFeatureStats { | ||
scoreMin: number | ||
scoreMax: number | ||
scoreSum: number | ||
scoreSumSquares: number | ||
featureCount: number | ||
basesCovered: number | ||
} | ||
export interface FeatureStats extends UnrectifiedFeatureStats { | ||
scoreMean: number | ||
scoreStdDev: number | ||
featureDensity: number | ||
} | ||
|
||
/* | ||
* calculate standard deviation using the 'shortcut method' that accepts | ||
* the sum and the sum squares of the elements | ||
* | ||
* @param sum - sum(i, 1..n) | ||
* @param sumSquares - sum(i^2, 1..n) | ||
* @param n - number of elements | ||
* @param population - boolean: use population instead of sample correction | ||
* @return the estimated std deviation | ||
*/ | ||
|
||
export function calcStdFromSums( | ||
sum: number, | ||
sumSquares: number, | ||
n: number, | ||
population = false, | ||
): number { | ||
if (n === 0) return 0 | ||
let variance | ||
if (population) { | ||
variance = sumSquares / n - (sum * sum) / (n * n) | ||
} else { | ||
// sample correction is n-1 | ||
variance = sumSquares - (sum * sum) / n | ||
if (n > 1) { | ||
variance /= n - 1 | ||
} | ||
} | ||
|
||
return variance < 0 ? 0 : Math.sqrt(variance) | ||
} | ||
|
||
/* | ||
* @param stats - a summary stats object with scoreSum, featureCount, scoreSumSquares, and basesCovered | ||
* @return - a summary stats object with scoreMean, scoreStdDev, and featureDensity added | ||
*/ | ||
export function rectifyStats(s: UnrectifiedFeatureStats): FeatureStats { | ||
return { | ||
...s, | ||
scoreMean: (s.scoreSum || 0) / (s.featureCount || s.basesCovered || 1), | ||
scoreStdDev: calcStdFromSums( | ||
s.scoreSum, | ||
s.scoreSumSquares, | ||
s.featureCount || s.basesCovered, | ||
), | ||
featureDensity: (s.featureCount || 1) / s.basesCovered, | ||
} | ||
} | ||
|
||
/* | ||
* calculates per-base scores for variable width features over a region | ||
* @param region - object contains start, end | ||
* @param features - list of features with start, end, score | ||
* @return array of numeric scores | ||
*/ | ||
export function calcPerBaseStats( | ||
region: NoAssemblyRegion, | ||
features: Feature[], | ||
): number[] { | ||
const { start, end } = region | ||
const scores = [] | ||
const feats = features.sort((a, b) => a.get('start') - b.get('start')) | ||
let pos = start | ||
let currentFeat = 0 | ||
let i = 0 | ||
|
||
while (pos < end) { | ||
while (currentFeat < feats.length && pos >= feats[currentFeat].get('end')) { | ||
currentFeat += 1 | ||
} | ||
const f = feats[currentFeat] | ||
if (!f) { | ||
scores[i] = 0 | ||
} else if (pos >= f.get('start') && pos < f.get('end')) { | ||
scores[i] = f.get('score') | ||
} else { | ||
scores[i] = 0 | ||
} | ||
i += 1 | ||
pos += 1 | ||
} | ||
return scores | ||
} | ||
|
||
/* | ||
* transform a list of scores to summary statistics | ||
* @param region - object with start, end | ||
* @param feats - array of features which are possibly summary features | ||
* @return - object with scoreMax, scoreMin, scoreSum, scoreSumSquares, etc | ||
*/ | ||
export async function scoresToStats( | ||
region: NoAssemblyRegion, | ||
features: Observable<Feature>, | ||
): Promise<FeatureStats> { | ||
const { start, end } = region | ||
|
||
const { | ||
scoreMin, | ||
scoreMax, | ||
scoreSum, | ||
scoreSumSquares, | ||
featureCount, | ||
} = await features | ||
.pipe( | ||
reduce( | ||
( | ||
seed: { | ||
scoreMin: number | ||
scoreMax: number | ||
scoreSum: number | ||
scoreSumSquares: number | ||
featureCount: number | ||
}, | ||
f: Feature, | ||
) => { | ||
const score = f.get('score') | ||
seed.scoreMax = Math.max( | ||
seed.scoreMax, | ||
f.get('summary') ? f.get('maxScore') : score, | ||
) | ||
seed.scoreMin = Math.min( | ||
seed.scoreMin, | ||
f.get('summary') ? f.get('minScore') : score, | ||
) | ||
seed.scoreSum += score | ||
seed.scoreSumSquares += score * score | ||
seed.featureCount += 1 | ||
|
||
return seed | ||
}, | ||
{ | ||
scoreMin: Number.MAX_VALUE, | ||
scoreMax: Number.MIN_VALUE, | ||
scoreSum: 0, | ||
scoreSumSquares: 0, | ||
featureCount: 0, | ||
}, | ||
), | ||
) | ||
.toPromise() | ||
|
||
return rectifyStats({ | ||
scoreMax, | ||
scoreMin, | ||
scoreSum, | ||
scoreSumSquares, | ||
featureCount, | ||
basesCovered: end - start + 1, | ||
}) | ||
} | ||
|
||
export function blankStats(): FeatureStats { | ||
return { | ||
scoreMin: 0, | ||
scoreMax: 0, | ||
scoreMean: 0, | ||
scoreStdDev: 0, | ||
scoreSum: 0, | ||
scoreSumSquares: 0, | ||
featureCount: 0, | ||
featureDensity: 0, | ||
basesCovered: 0, | ||
} | ||
} |
Oops, something went wrong.