From 9184274066170eebfea0585461c1f3107a633d9b Mon Sep 17 00:00:00 2001 From: Jules Jacobsen Date: Wed, 13 Mar 2019 11:42:14 +0000 Subject: [PATCH] Refactor FrequencyData to use array-based implementation --- .../core/model/frequency/FrequencyData.java | 51 ++++++++++++++----- 1 file changed, 37 insertions(+), 14 deletions(-) diff --git a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/frequency/FrequencyData.java b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/frequency/FrequencyData.java index 4054fb07c..dd6a20bd6 100644 --- a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/frequency/FrequencyData.java +++ b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/frequency/FrequencyData.java @@ -36,6 +36,8 @@ public class FrequencyData { private static final FrequencyData EMPTY_DATA = new FrequencyData(RsId.empty(), Collections.emptyList()); + private static final int NUM_FREQ_SOURCES = FrequencySource.values().length; + private static final float VERY_RARE_SCORE = 1f; private static final float NOT_RARE_SCORE = 0f; @@ -46,7 +48,7 @@ public class FrequencyData { private final float[] values; public static FrequencyData of(RsId rsId, Collection frequencies) { - return validate(rsId, new ArrayList<>(frequencies)); + return validate(rsId, frequencies); } public static FrequencyData of(RsId rsId, Frequency frequency) { @@ -69,10 +71,7 @@ public static FrequencyData empty() { return EMPTY_DATA; } - /** - * IMPORTANT ensure that a copy of the input data has been provided to this method as the contents will be sorted - */ - private static FrequencyData validate(RsId rsId, List frequencies) { + private static FrequencyData validate(RsId rsId, Collection frequencies) { Objects.requireNonNull(rsId, "RsId cannot be null"); Objects.requireNonNull(frequencies, "frequency data cannot be null"); @@ -86,21 +85,45 @@ private static FrequencyData validate(RsId rsId, List frequencies) { return new FrequencyData(rsId, frequencies); } - private FrequencyData(RsId rsId, List knownFrequencies) { + private FrequencyData(RsId rsId, Collection frequencies) { this.rsId = rsId.getId(); - this.size = knownFrequencies.size(); + // use natural ordering by FrequencySource - this class is a kind of EnumMap, but we're using primitives to store + // the values. This means that duplicated FrequencySource will be overwritten. In practice this shouldn't happen + // as they are extracted from a map instance. + Frequency[] sorted = orderByFrequencySource(frequencies); + + this.size = countNotNullFrequencies(sorted); this.sources = new FrequencySource[size]; this.values = new float[size]; - // use natural ordering by FrequencySource - this class is a kind of EnumMap - knownFrequencies.sort(Comparator.comparing(Frequency::getSource)); - for (int i = 0; i < knownFrequencies.size(); i++) { - Frequency entry = knownFrequencies.get(i); - // consider replacing this with FrequencySource.ordinal(). - sources[i] = entry.getSource(); - values[i] = entry.getFrequency(); + int pos = 0; + for (Frequency entry : sorted) { + if (entry != null) { + sources[pos] = entry.getSource(); + values[pos] = entry.getFrequency(); + pos++; + } + } + } + + private Frequency[] orderByFrequencySource(Collection frequencies) { + Frequency[] sorted = new Frequency[NUM_FREQ_SOURCES]; + for (Frequency frequency : frequencies) { + FrequencySource frequencySource = frequency.getSource(); + sorted[frequencySource.ordinal()] = frequency; + } + return sorted; + } + + private int countNotNullFrequencies(Frequency[] sorted) { + int notNull = 0; + for (Frequency frequency : sorted) { + if (frequency != null) { + notNull++; + } } + return notNull; } //RSID ought to belong to the Variant, not the frequencyData, but its here for convenience