diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFsEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFsEngine.java index abc370a23dd..c2a4d5783c7 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFsEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFsEngine.java @@ -12,7 +12,6 @@ import org.broadinstitute.hellbender.tools.walkers.annotator.*; import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.AS_RMSMappingQuality; import org.broadinstitute.hellbender.tools.walkers.genotyper.*; -import org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc.GeneralPloidyFailOverAFCalculatorProvider; import org.broadinstitute.hellbender.utils.GATKProtectedVariantContextUtils; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; @@ -94,7 +93,7 @@ private void initialize() } // We only want the engine to generate the AS_QUAL key if we are using AlleleSpecific annotations. - genotypingEngine = new MinimalGenotypingEngine(createUAC(), samples, new GeneralPloidyFailOverAFCalculatorProvider(genotypeArgs), annotationEngine.isRequestedReducibleRawKey(GATKVCFConstants.AS_QUAL_KEY)); + genotypingEngine = new MinimalGenotypingEngine(createUAC(), samples, annotationEngine.isRequestedReducibleRawKey(GATKVCFConstants.AS_QUAL_KEY)); if ( includeNonVariants ) { // Save INFO header names that require alt alleles diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeCalculationArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeCalculationArgumentCollection.java index 9dc09b56123..ce1cbfea744 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeCalculationArgumentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeCalculationArgumentCollection.java @@ -36,7 +36,6 @@ public GenotypeCalculationArgumentCollection( final GenotypeCalculationArgumentC Utils.nonNull(other); this.useNewAFCalculator = other.useNewAFCalculator; - this.useOldAFCalculator = other.useOldAFCalculator; this.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED = other.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED; this.snpHeterozygosity = other.snpHeterozygosity; this.indelHeterozygosity = other.indelHeterozygosity; @@ -56,12 +55,6 @@ public GenotypeCalculationArgumentCollection( final GenotypeCalculationArgumentC @Argument(fullName = "use-new-qual-calculator", shortName = "new-qual", doc = "Use the new AF model instead of the so-called exact model", optional = true) public boolean useNewAFCalculator = true; - /** - * Use the old GATK 3 qual score aka the "exact model" - */ - @Argument(fullName = "use-old-qual-calculator", shortName = "old-qual", doc = "Use the old AF model", optional = true) - public boolean useOldAFCalculator = false; - /** * Depending on the value of the --max_alternate_alleles argument, we may genotype only a fraction of the alleles being sent on for genotyping. * Using this argument instructs the genotyper to annotate (in the INFO field) the number of alternate alleles that were originally discovered at the site. diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingEngine.java index ea94ffcea3f..2fa86f2a12d 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingEngine.java @@ -30,9 +30,7 @@ */ public abstract class GenotypingEngine { - protected final AFCalculator newAFCalculator; - - protected final AFCalculatorProvider afCalculatorProvider; + protected final AlleleFrequencyCalculator alleleFrequencyCalculator; protected final Config configuration; @@ -46,8 +44,6 @@ public abstract class GenotypingEngine upstreamDeletionsLoc = new LinkedList<>(); private final boolean doAlleleSpecificCalcs; @@ -64,66 +60,15 @@ public abstract class GenotypingEngine inputPriors) { - double sum = 0.0; - - if (!inputPriors.isEmpty()) { - // user-specified priors - if (inputPriors.size() != N) { - throw new CommandLineException.BadArgumentValue("inputPrior", "Invalid length of inputPrior vector: vector length must be equal to # samples +1 "); - } - - int idx = 1; - for (final double prior: inputPriors) { - if (prior < 0.0) { - throw new CommandLineException.BadArgumentValue("Bad argument: negative values not allowed", "inputPrior"); - } - priors[idx++] = Math.log10(prior); - sum += prior; - } - } - else { - // for each i - for (int i = 1; i <= N; i++) { - final double value = heterozygosity / (double)i; - priors[i] = Math.log10(value); - sum += value; - } - } - - // protection against the case of heterozygosity too high or an excessive number of samples (which break population genetics assumptions) - if (sum > 1.0) { - throw new CommandLineException.BadArgumentValue("heterozygosity","The heterozygosity value is set too high relative to the number of samples to be processed, or invalid values specified if input priors were provided - try reducing heterozygosity value or correct input priors."); - } - // null frequency for AF=0 is (1 - sum(all other frequencies)) - priors[0] = Math.log10(1.0 - sum); + alleleFrequencyCalculator = AlleleFrequencyCalculator.makeCalculator(configuration.genotypeArgs); } /** @@ -250,20 +195,18 @@ protected VariantCallContext calculateGenotypes(final FeatureContext features, } - final AFCalculator afCalculator = configuration.genotypeArgs.useOldAFCalculator ? - afCalculatorProvider.getInstance(vc,defaultPloidy,maxAltAlleles) : newAFCalculator; - final AFCalculationResult AFresult = afCalculator.getLog10PNonRef(reducedVC, defaultPloidy, maxAltAlleles, getAlleleFrequencyPriors(vc,defaultPloidy,model)); + final AFCalculationResult AFresult = alleleFrequencyCalculator.calculate(reducedVC, defaultPloidy); final OutputAlleleSubset outputAlternativeAlleles = calculateOutputAlleleSubset(AFresult, vc); // posterior probability that at least one alt allele exists in the samples - final double probOfAtLeastOneAltAllele = Math.pow(10, AFresult.getLog10PosteriorOfAFGT0()); + final double probOfAtLeastOneAltAllele = Math.pow(10, AFresult.log10ProbVariantPresent()); // note the math.abs is necessary because -10 * 0.0 => -0.0 which isn't nice final double log10Confidence = ! outputAlternativeAlleles.siteIsMonomorphic || configuration.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES || configuration.annotateAllSitesWithPLs - ? AFresult.getLog10PosteriorOfAFEq0() + 0.0 - : AFresult.getLog10PosteriorOfAFGT0() + 0.0 ; + ? AFresult.log10ProbOnlyRefAlleleExists() + 0.0 + : AFresult.log10ProbVariantPresent() + 0.0 ; // Add 0.0 removes -0.0 occurrences. @@ -272,13 +215,8 @@ protected VariantCallContext calculateGenotypes(final FeatureContext features, // return a null call if we don't pass the confidence cutoff or the most likely allele frequency is zero // skip this if we are already looking at a vc with NON_REF as the first alt allele i.e. if we are in GenotypeGVCFs if ( !passesEmitThreshold(phredScaledConfidence, outputAlternativeAlleles.siteIsMonomorphic) - && !forceSiteEmission() - && noAllelesOrFirstAlleleIsNotNonRef(outputAlternativeAlleles.alleles)) { - // technically, at this point our confidence in a reference call isn't accurately estimated - // because it didn't take into account samples with no data, so let's get a better estimate - final double[] AFpriors = getAlleleFrequencyPriors(vc, defaultPloidy, model); - final int INDEX_FOR_AC_EQUALS_1 = 1; - return limitedContext ? null : estimateReferenceConfidence(vc, stratifiedContexts, AFpriors[INDEX_FOR_AC_EQUALS_1], true, probOfAtLeastOneAltAllele); + && !forceSiteEmission() && noAllelesOrFirstAlleleIsNotNonRef(outputAlternativeAlleles.alleles)) { + return null; } // return a null call if we aren't forcing site emission and the only alt allele is a spanning deletion @@ -385,7 +323,7 @@ private OutputAlleleSubset calculateOutputAlleleSubset(final AFCalculationResult // we want to keep the NON_REF symbolic allele but only in the absence of a non-symbolic allele, e.g. // if we combined a ref / NON_REF gVCF with a ref / alt gVCF final boolean isNonRefWhichIsLoneAltAllele = alternativeAlleleCount == 1 && allele.equals(Allele.NON_REF_ALLELE); - final boolean isPlausible = afCalculationResult.isPolymorphicPhredScaledQual(allele, configuration.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING); + final boolean isPlausible = afCalculationResult.passesThreshold(allele, configuration.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING); //it's possible that the upstream deletion that spanned this site was not emitted, mooting the symbolic spanning deletion allele final boolean isSpuriousSpanningDeletion = GATKVCFConstants.isSpanningDeletion(allele) && !isVcCoveredByDeletion(vc); @@ -567,30 +505,6 @@ protected final VariantCallContext estimateReferenceConfidence(final VariantCont return new VariantCallContext(vc, passesCallThreshold(QualityUtils.phredScaleLog10CorrectRate(log10POfRef)), false); } - /** - * Returns the log10 prior probability for all possible allele counts from 0 to N where N is the total number of - * genomes (total-ploidy). - * - * @param vc the target variant-context, use to determine the total ploidy thus the possible ACs. - * @param defaultPloidy default ploidy to be assume if we do not have the ploidy for some sample in {@code vc}. - * @param model the calculation model (SNP,INDEL or MIXED) whose priors are to be retrieved. - * @throws java.lang.NullPointerException if either {@code vc} or {@code model} is {@code null} - * @return never {@code null}, an array with exactly total-ploidy(vc) + 1 positions. - */ - protected final double[] getAlleleFrequencyPriors( final VariantContext vc, final int defaultPloidy, final GenotypeLikelihoodsCalculationModel model ) { - final int totalPloidy = GATKVariantContextUtils.totalPloidy(vc, defaultPloidy); - switch (model) { - case SNP: - case GENERALPLOIDYSNP: - return log10AlleleFrequencyPriorsSNPs.forTotalPloidy(totalPloidy); - case INDEL: - case GENERALPLOIDYINDEL: - return log10AlleleFrequencyPriorsIndels.forTotalPloidy(totalPloidy); - default: - throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model); - } - } - /** * Compute the log10 probability of a sample with sequencing depth and no alt allele is actually truly homozygous reference * @@ -646,12 +560,12 @@ protected Map composeCallAttributes(final boolean inheritAttribut if (AFresult.getAllelesUsedInGenotyping().size() > 2) { for (final Allele a : allAllelesToUse) { if (a.isNonReference()) { - perAlleleQuals.add(AFresult.getLog10PosteriorOfAFEq0ForAllele(a)); + perAlleleQuals.add(AFresult.getLog10PosteriorOfAlleleAbsent(a)); } } } else { - perAlleleQuals.add(AFresult.getLog10PosteriorOfAFEq0()); + perAlleleQuals.add(AFresult.log10ProbOnlyRefAlleleExists()); } attributes.put(GATKVCFConstants.AS_QUAL_KEY, perAlleleQuals); @@ -708,7 +622,7 @@ public double calculateSingleSampleRefVsAnyActiveStateProfileValue(final double[ //TODO End of lousy part. final double normalizedLog10ACeq0Posterior = log10ACeq0Posterior - log10PosteriorNormalizationConstant; - // This is another condition to return a 0.0 also present in AFCalculator code as well. + // This is another condition to return a 0.0 also present in AlleleFrequencyCalculator code as well. if (normalizedLog10ACeq0Posterior >= QualityUtils.qualToErrorProbLog10(configuration.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING)) { return 0.0; } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/MinimalGenotypingEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/MinimalGenotypingEngine.java index ab355d9e5b8..f141b51402f 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/MinimalGenotypingEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/MinimalGenotypingEngine.java @@ -2,7 +2,6 @@ import htsjdk.variant.variantcontext.Allele; import org.broadinstitute.hellbender.exceptions.UserException; -import org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc.AFCalculatorProvider; import org.broadinstitute.hellbender.utils.genotyper.SampleList; @@ -19,9 +18,8 @@ public final class MinimalGenotypingEngine extends GenotypingEngine contaminat return false; } - /** - * Controls the model used to calculate the probability that a site is variant plus the various sample genotypes in the data at a given locus. - */ - @Hidden - @Argument(fullName = "p-nonref-model", doc = "Non-reference probability calculation model to employ", optional = true) - public AFCalculatorImplementation requestedAlleleFrequencyCalculationModel; - @Hidden @Argument(shortName = "log-exact-calls", optional=true) public File exactCallsLog = null; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AFCalculationResult.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AFCalculationResult.java index aabd6d29ba3..4115a46d934 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AFCalculationResult.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AFCalculationResult.java @@ -1,8 +1,8 @@ package org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc; import htsjdk.variant.variantcontext.Allele; -import org.apache.commons.math3.util.MathArrays; import org.broadinstitute.hellbender.utils.MathUtils; +import org.broadinstitute.hellbender.utils.QualityUtils; import org.broadinstitute.hellbender.utils.Utils; import java.util.*; @@ -17,19 +17,13 @@ * that users of this code can rely on the values coming out of these functions. */ public final class AFCalculationResult { - private static final int AF0 = 0; - private static final int AF1p = 1; - private static final int LOG_10_ARRAY_SIZES = 2; - // In GVCF mode the STANDARD_CONFIDENCE_FOR_CALLING is 0 by default, and it's nice having this easily-interpretable // threshold that says "call anything with any evidence at all." The problem is that *everything* has at least some evidence, // so this would end up putting every site, or at least too many sites, in the gvcf. Thus this parameter is in place to say // that "0" really means "epsilon." private static final double EPSILON = 1.0e-10; - private final double[] log10LikelihoodsOfAC; - private final double[] log10PriorsOfAC; - private final double[] log10PosteriorsOfAC; + private final double log10PosteriorOfNoVariant; private final Map log10pRefByAllele; @@ -48,60 +42,34 @@ public final class AFCalculationResult { */ public AFCalculationResult(final int[] alleleCountsOfMLE, final List allelesUsedInGenotyping, - final double[] log10LikelihoodsOfAC, - final double[] log10PriorsOfAC, + final double log10PosteriorOfNoVariant, final Map log10pRefByAllele) { Utils.nonNull(alleleCountsOfMLE, "alleleCountsOfMLE cannot be null"); - Utils.nonNull(log10PriorsOfAC, "log10PriorsOfAC cannot be null"); - Utils.nonNull(log10LikelihoodsOfAC, "log10LikelihoodsOfAC cannot be null"); - Utils.nonNull(log10LikelihoodsOfAC, "log10LikelihoodsOfAC cannot be null"); Utils.nonNull(log10pRefByAllele, "log10pRefByAllele cannot be null"); Utils.nonNull(allelesUsedInGenotyping, "allelesUsedInGenotyping cannot be null"); + Utils.validateArg(MathUtils.isValidLog10Probability(log10PosteriorOfNoVariant), "log10 posterior must be a valid log probability"); + if ( allelesUsedInGenotyping.isEmpty() ) { throw new IllegalArgumentException("allelesUsedInGenotyping must be non-null list of at least 1 value " + allelesUsedInGenotyping); } if ( alleleCountsOfMLE.length != allelesUsedInGenotyping.size() - 1) { throw new IllegalArgumentException("alleleCountsOfMLE.length " + alleleCountsOfMLE.length + " != allelesUsedInGenotyping.size() " + allelesUsedInGenotyping.size()); } - if ( log10LikelihoodsOfAC.length != 2 ) { - throw new IllegalArgumentException("log10LikelihoodsOfAC must have length equal 2"); - } - if ( log10PriorsOfAC.length != 2 ) { - throw new IllegalArgumentException("log10PriorsOfAC must have length equal 2"); - } if ( log10pRefByAllele.size() != allelesUsedInGenotyping.size() - 1 ) { throw new IllegalArgumentException("log10pRefByAllele has the wrong number of elements: log10pRefByAllele " + log10pRefByAllele + " but allelesUsedInGenotyping " + allelesUsedInGenotyping); } if ( ! allelesUsedInGenotyping.containsAll(log10pRefByAllele.keySet()) ) { throw new IllegalArgumentException("log10pRefByAllele doesn't contain all of the alleles used in genotyping: log10pRefByAllele " + log10pRefByAllele + " but allelesUsedInGenotyping " + allelesUsedInGenotyping); - }if ( ! MathUtils.isValidLog10ProbabilityVector(log10LikelihoodsOfAC, LOG_10_ARRAY_SIZES, false) ) { - throw new IllegalArgumentException("log10LikelihoodsOfAC are bad " + Utils.join(",", log10LikelihoodsOfAC)); - } - if ( ! MathUtils.isValidLog10ProbabilityVector(log10PriorsOfAC, LOG_10_ARRAY_SIZES, false) ) { - throw new IllegalArgumentException("log10priors are bad " + Utils.join(",", log10PriorsOfAC)); } //make defensive copies of all arguments this.alleleCountsOfMLE = alleleCountsOfMLE.clone(); this.allelesUsedInGenotyping = Collections.unmodifiableList(new ArrayList<>(allelesUsedInGenotyping)); - this.log10LikelihoodsOfAC = Arrays.copyOf(log10LikelihoodsOfAC, LOG_10_ARRAY_SIZES); - this.log10PriorsOfAC = Arrays.copyOf(log10PriorsOfAC, LOG_10_ARRAY_SIZES); - this.log10PosteriorsOfAC = computePosteriors(log10LikelihoodsOfAC, log10PriorsOfAC); + this.log10PosteriorOfNoVariant = log10PosteriorOfNoVariant; this.log10pRefByAllele = Collections.unmodifiableMap(new LinkedHashMap<>(log10pRefByAllele)); } - /** - * Return a new AFCalcResult with a new prior probability - * - * @param log10PriorsOfAC - * @return - */ - public AFCalculationResult copyWithNewPriors(final double[] log10PriorsOfAC) { - Utils.nonNull(log10PriorsOfAC); - return new AFCalculationResult(alleleCountsOfMLE, allelesUsedInGenotyping, log10LikelihoodsOfAC, log10PriorsOfAC, log10pRefByAllele); - } - /** * Returns a vector with maxAltAlleles values containing AC values at the MLE * @@ -127,14 +95,15 @@ public int[] getAlleleCountsOfMLE() { */ public int getAlleleCountAtMLE(final Allele allele) { Utils.nonNull(allele); - return alleleCountsOfMLE[altAlleleIndex(allele)]; + Utils.validate( allele.isNonReference(), () -> "Cannot get the alt allele index for reference allele " + allele); + final int indexInAllAllelesIncludingRef = allelesUsedInGenotyping.indexOf(allele); + Utils.validateArg(indexInAllAllelesIncludingRef != -1, () -> "could not find allele " + allele + " in " + allelesUsedInGenotyping); + final int indexInAltAlleles = indexInAllAllelesIncludingRef - 1; + return alleleCountsOfMLE[indexInAltAlleles]; } /** - * Get the list of alleles actually used in genotyping. - * - * Due to computational / implementation constraints this may be smaller than - * the actual list of alleles requested + * Get the list of alleles actually used in genotyping, which may be smaller than the actual list of alleles requested * * @return a non-empty list of alleles used during genotyping, the first of which is the reference allele */ @@ -142,46 +111,12 @@ public List getAllelesUsedInGenotyping() { return allelesUsedInGenotyping; } - /** - * Get the log10 normalized -- across all ACs -- posterior probability of AC == 0 for all alleles - */ - public double getLog10PosteriorOfAFEq0() { - return log10PosteriorsOfAC[AF0]; - } - - /** - * Get the log10 normalized -- across all ACs -- posterior probability of AC > 0 for any alleles - */ - public double getLog10PosteriorOfAFGT0() { - return log10PosteriorsOfAC[AF1p]; - } - - /** - * Get the log10 unnormalized -- across all ACs -- likelihood of AC == 0 for all alleles - */ - public double getLog10LikelihoodOfAFEq0() { - return log10LikelihoodsOfAC[AF0]; - } - - /** - * Get the log10 unnormalized -- across all ACs -- likelihood of AC > 0 for any alleles - */ - public double getLog10LikelihoodOfAFGT0() { - return log10LikelihoodsOfAC[AF1p]; + public double log10ProbOnlyRefAlleleExists() { + return log10PosteriorOfNoVariant; } - /** - * Get the log10 unnormalized -- across all ACs -- prior probability of AC == 0 for all alleles - */ - public double getLog10PriorOfAFEq0() { - return log10PriorsOfAC[AF0]; - } - - /** - * Get the log10 unnormalized -- across all ACs -- prior probability of AC > 0 - */ - public double getLog10PriorOfAFGT0() { - return log10PriorsOfAC[AF1p]; + public double log10ProbVariantPresent() { + return MathUtils.log10OneMinusPow10(log10PosteriorOfNoVariant); } @Override @@ -189,42 +124,18 @@ public String toString() { final List byAllele = new LinkedList<>(); for ( final Allele a : allelesUsedInGenotyping) { if (a.isNonReference()) { - byAllele.add(String.format("%s => MLE %d / posterior %.2f", a, getAlleleCountAtMLE(a), getLog10PosteriorOfAFEq0ForAllele(a))); + byAllele.add(String.format("%s => MLE %d / posterior %.2f", a, getAlleleCountAtMLE(a), getLog10PosteriorOfAlleleAbsent(a))); } } - return String.format("AFCalc%n\t\tlog10PosteriorOfAFGT0=%.2f%n\t\t%s", getLog10LikelihoodOfAFGT0(), Utils.join("\n\t\t", byAllele)); - } - - /** - * Are we sufficiently confident in being non-ref that the site is considered polymorphic? - * - * We are non-ref if the probability of being non-ref > the emit confidence (often an argument). - * Suppose posterior AF > 0 is log10: -5 => 10^-5 - * And that log10minPNonRef is -3. - * We are considered polymorphic since 10^-5 < 10^-3 => -5 < -3 - * - * Note that log10minPNonRef is really the minimum confidence, scaled as an error rate, so - * if you want to be 99% confidence, then log10PNonRef should be log10(0.01) = -2. - * - * @param log10minPNonRef the log10 scaled min pr of being non-ref to be considered polymorphic - * - * @return true if there's enough confidence (relative to log10minPNonRef) to reject AF == 0 - */ - public boolean isPolymorphic(final Allele allele, final double log10minPNonRef) { - Utils.nonNull(allele); - return getLog10PosteriorOfAFEq0ForAllele(allele) + EPSILON < log10minPNonRef; + return String.format("AFCalc%n\t\tlog10PosteriorOfVariant=%.2f%n\t\t%s", log10ProbVariantPresent(), Utils.join("\n\t\t", byAllele)); } /** - * Same as #isPolymorphic but takes a phred-scaled quality score as input + * Are we confident that an allele is present */ - public boolean isPolymorphicPhredScaledQual(final Allele allele, final double minPNonRefPhredScaledQual) { + public boolean passesThreshold(final Allele allele, final double phredScaleQualThreshold) { Utils.nonNull(allele); - if ( minPNonRefPhredScaledQual < 0 ) { - throw new IllegalArgumentException("phredScaledQual " + minPNonRefPhredScaledQual + " < 0 "); - } - final double log10Threshold = minPNonRefPhredScaledQual / -10; - return isPolymorphic(allele, log10Threshold); + return getLog10PosteriorOfAlleleAbsent(allele) + EPSILON < QualityUtils.qualToErrorProbLog10(phredScaleQualThreshold); } /** @@ -248,46 +159,10 @@ public boolean isPolymorphicPhredScaledQual(final Allele allele, final double mi * @param allele the allele we're interested in, must be in getAllelesUsedInGenotyping * @return the log10 probability that allele is not segregating at this site */ - public double getLog10PosteriorOfAFEq0ForAllele(final Allele allele) { + public double getLog10PosteriorOfAlleleAbsent(final Allele allele) { Utils.nonNull(allele); final Double log10pNonRef = log10pRefByAllele.get(allele); Utils.nonNull(log10pNonRef, "Unknown allele " + allele); return log10pNonRef; } - - /** - * Returns the log10 normalized posteriors given the log10 likelihoods and priors - * - * @param log10LikelihoodsOfAC - * @param log10PriorsOfAC - * - * @return freshly allocated log10 normalized posteriors vector - */ - private static double[] computePosteriors(final double[] log10LikelihoodsOfAC, final double[] log10PriorsOfAC) { - final double[] log10UnnormalizedPosteriors = MathArrays.ebeAdd(log10LikelihoodsOfAC, log10PriorsOfAC); - return MathUtils.normalizeLog10(log10UnnormalizedPosteriors); - } - - /** - * Computes the offset into linear vectors indexed by alt allele for allele - * - * Things like our MLE allele count vector are indexed by alt allele index, with - * the first alt allele being 0, the second 1, etc. This function computes the index - * associated with allele. - * - * @param allele the allele whose alt index we'd like to know - * @throws IllegalArgumentException if allele isn't in allelesUsedInGenotyping - * @return an index value greater than 0 suitable for indexing into the MLE and other alt allele indexed arrays - */ - private int altAlleleIndex(final Allele allele) { - if ( allele.isReference() ) { - throw new IllegalArgumentException("Cannot get the alt allele index for reference allele " + allele); - } - final int index = allelesUsedInGenotyping.indexOf(allele); - if ( index == -1 ) { - throw new IllegalArgumentException("could not find allele " + allele + " in " + allelesUsedInGenotyping); - } else { - return index - 1; - } - } } \ No newline at end of file diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AFCalculator.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AFCalculator.java deleted file mode 100644 index b0dbbcd3d8a..00000000000 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AFCalculator.java +++ /dev/null @@ -1,104 +0,0 @@ -package org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc; - -import com.google.common.annotations.VisibleForTesting; -import htsjdk.variant.variantcontext.VariantContext; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.broadinstitute.hellbender.utils.Utils; - -/** - * Generic interface for calculating the probability of alleles segregating given priors and genotype likelihoods - */ -public abstract class AFCalculator { - - protected static final Logger logger = LogManager.getLogger(AFCalculator.class); - - private StateTracker stateTracker; - - /** - * Compute the probability of the alleles segregating given the genotype likelihoods of the samples in vc - * - * @param vc the VariantContext holding the alleles and sample information. The VariantContext - * must have at least 1 alternative allele - * @param log10AlleleFrequencyPriors a prior vector nSamples x 2 in length indicating the Pr(AF = i) - * @return result (for programming convenience) - */ - public AFCalculationResult getLog10PNonRef(final VariantContext vc, final int defaultPloidy, final int maximumAlternativeAlleles, final double[] log10AlleleFrequencyPriors) { - Utils.nonNull(vc, "VariantContext cannot be null"); - Utils.nonNull(log10AlleleFrequencyPriors, "priors vector cannot be null"); - Utils.validateArg( vc.getNAlleles() > 1, () -> "VariantContext has only a single reference allele, but getLog10PNonRef requires at least one alt allele " + vc); - - // reset the result, so we can store our new result there - final StateTracker stateTracker = getStateTracker(true, maximumAlternativeAlleles); - return computeLog10PNonRef(vc, defaultPloidy, log10AlleleFrequencyPriors, stateTracker); - } - - /** - * Convert the final state of the state tracker into our result as an AFCalculationResult - * - * Assumes that stateTracker has been updated accordingly - * - * @param vc the VariantContext used as input to the calc model - * @param log10AlleleFrequencyPriors the priors by AC vector - * @return a AFCalculationResult describing the result of this calculation - */ - protected AFCalculationResult getResultFromFinalState(final VariantContext vc, final double[] log10AlleleFrequencyPriors, final StateTracker stateTracker) { - Utils.nonNull(vc, "vc cannot be null"); - Utils.nonNull(log10AlleleFrequencyPriors, "log10AlleleFrequencyPriors cannot be null"); - - stateTracker.setAllelesUsedInGenotyping(vc.getAlleles()); - return stateTracker.toAFCalculationResult(log10AlleleFrequencyPriors); - } - - // --------------------------------------------------------------------------- - // - // Abstract methods that should be implemented by concrete implementations - // to actually calculate the AF - // - // --------------------------------------------------------------------------- - - /** - * Actually carry out the log10PNonRef calculation on vc, storing results in results - * - * @param vc variant context with alleles and genotype likelihoods, - * must have at least one alt allele - * @param log10AlleleFrequencyPriors priors - * @return a AFCalcResult object describing the results of this calculation - */ - protected abstract AFCalculationResult computeLog10PNonRef(final VariantContext vc, final int defaultPloidy, - final double[] log10AlleleFrequencyPriors, final StateTracker stateTracker); - - /** - * Retrieves the state tracker. - * - *

- * The tracker will be reset if so requested or if it needs to be resized due to an increase in the - * maximum number of alleles is must be able to handle. - *

- * - * @param reset make sure the tracker is reset. - * @param maximumAlternativeAlleleCount the maximum alternative allele count it must be able to handle. Has no effect if - * the current tracker is able to handle that number. - * - * @return {@code null} iff this calculator implementation does not use a state tracker. - */ - protected StateTracker getStateTracker(final boolean reset, final int maximumAlternativeAlleleCount) { - if (stateTracker == null) { - stateTracker = new StateTracker(maximumAlternativeAlleleCount); - } else if (reset) { - stateTracker.reset(maximumAlternativeAlleleCount); - } else { - stateTracker.ensureMaximumAlleleCapacity(maximumAlternativeAlleleCount); - } - return stateTracker; - } - - /** - * Please don't use this method in production. - */ - @VisibleForTesting - int getAltAlleleCountOfMAP(final int allele) { - return getStateTracker(false,allele + 1).getAlleleCountsOfMAP()[allele]; - } - -} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AFCalculatorImplementation.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AFCalculatorImplementation.java deleted file mode 100644 index b500f325c7f..00000000000 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AFCalculatorImplementation.java +++ /dev/null @@ -1,145 +0,0 @@ -package org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc; - -import org.broadinstitute.hellbender.utils.Utils; - -import java.lang.reflect.Modifier; -import java.util.function.Supplier; - -/** - * Enumeration of usable AF calculation, their constraints (i.e. ploidy). - * - * Note: this is an enum so that it can be used by the CLI system as an argument. - * - * Note that the order these occur in the enum is the order of preference, so - * the first value is taken over the second when multiple calculations satisfy - * the needs of the request (i.e., considering ploidy). - */ -public enum AFCalculatorImplementation { - - /** default implementation */ - EXACT_INDEPENDENT(IndependentAllelesDiploidExactAFCalculator::new, 2), - - /** reference implementation of multi-allelic EXACT model. Extremely slow for many alternate alleles */ - EXACT_REFERENCE(ReferenceDiploidExactAFCalculator::new, 2), - - /** original biallelic exact model, for testing only */ - EXACT_ORIGINAL(OriginalDiploidExactAFCalculator::new, 2, 2), - - /** implementation that supports any sample ploidy. Currently not available for the HaplotypeCaller */ - EXACT_GENERAL_PLOIDY(GeneralPloidyExactAFCalculator::new); - - /** - * Special max alt allele count indicating that this maximum is in fact unbound (can be anything). - */ - public static final int UNBOUND_ALTERNATIVE_ALLELE_COUNT = -1; - - /** - * Special ploidy constant that indicates that in fact the ploidy is unbound (can be anything). - */ - public static final int UNBOUND_PLOIDY = -1; - - /** - * Maximum number of supported alternative alleles. - */ - private final int maxAltAlleles; - - /** - * Supported ploidy. - * - * This is equal to {@link #UNBOUND_PLOIDY} if the class can handle any ploidy. - */ - private final int requiredPloidy; - - /** - * Reference to the default implementation. - */ - public static final AFCalculatorImplementation DEFAULT = EXACT_INDEPENDENT; - - private final Supplier afCalculatorSupplier; - - /** - * Constructs a new instance given all its properties - * @param afCalculatorSupplier the calculator class that realizes this implementation. - * @param requiredPloidy the required ploidy; zero or greater or {@link #UNBOUND_PLOIDY} to indicate that any ploidy is supported. - * @param maxAltAlleles the maximum alternative alleles; zero or greater or {@link #UNBOUND_ALTERNATIVE_ALLELE_COUNT} to indicate that any maximum number of alternative alleles is supported. - */ - AFCalculatorImplementation(final Supplier afCalculatorSupplier, final int requiredPloidy, final int maxAltAlleles) { - Utils.nonNull(afCalculatorSupplier); - this.afCalculatorSupplier = afCalculatorSupplier; - this.requiredPloidy = requiredPloidy; - this.maxAltAlleles = maxAltAlleles; - } - - /** - * Constructs a new instance leaving ploidy and max-allele count unbound. - * @param afCalculatorSupplier the calculator class that realizes this implementation. - */ - AFCalculatorImplementation(final Supplier afCalculatorSupplier) { - this(afCalculatorSupplier,UNBOUND_PLOIDY, UNBOUND_ALTERNATIVE_ALLELE_COUNT); - } - - /** Constructs a new instance leaving max-allele count unbound. - * @param afCalculatorSupplier the calculator class that realizes this implementation. - * @param requiredPloidy the required ploidy; zero or greater or {@link #UNBOUND_PLOIDY} to indicate that any ploidy is supported. - */ - AFCalculatorImplementation(final Supplier afCalculatorSupplier, final int requiredPloidy) { - this(afCalculatorSupplier,requiredPloidy,UNBOUND_PLOIDY); - } - - /** - * Checks whether a given ploidy and max alternative alleles combination is supported or not. - * @param requestedPloidy the targeted ploidy. - * @param requestedMaxAltAlleles the targeted max alternative alleles. - * @return {@code true} iff this calculator implementation satisfies both requirements. - */ - public boolean usableForParams(final int requestedPloidy, final int requestedMaxAltAlleles) { - return (requiredPloidy == UNBOUND_PLOIDY || requiredPloidy == requestedPloidy) - && (maxAltAlleles == UNBOUND_ALTERNATIVE_ALLELE_COUNT || maxAltAlleles >= requestedMaxAltAlleles); - } - - public AFCalculator newInstance() { return this.afCalculatorSupplier.get(); } - - /** - * Returns the best (fastest) model give the required ploidy and alternative allele count. - * - * @param requiredPloidy required ploidy - * @param requiredAlternativeAlleleCount required alternative allele count. - * @param preferred a preferred mode if any. A {@code null} indicate that we should be try to use the default instead. - * @return never {@code null} - */ - public static AFCalculatorImplementation bestValue(final int requiredPloidy, final int requiredAlternativeAlleleCount, final AFCalculatorImplementation preferred) { - final AFCalculatorImplementation preferredValue = preferred == null ? DEFAULT : preferred; - if (preferredValue.usableForParams(requiredPloidy,requiredAlternativeAlleleCount)) { - return preferredValue; - } - if (EXACT_INDEPENDENT.usableForParams(requiredPloidy,requiredAlternativeAlleleCount)) { - return EXACT_INDEPENDENT; - } - if (EXACT_REFERENCE.usableForParams(requiredPloidy,requiredAlternativeAlleleCount)) { //TODO: this seems to be dead code. EXACT_REFERENCE will always lose to EXACT_INDEPENDENT. - return EXACT_REFERENCE; - } - return EXACT_GENERAL_PLOIDY; - } - - /** - * Returns the value that corresponds to a given implementation calculator class. - * - * @param clazz the target class. - * - * @throws IllegalArgumentException if {@code clazz} is {@code null} or if it is abstract. - * - * @return never {@code null}. - */ - public static AFCalculatorImplementation fromCalculatorClass(final Class clazz) { - Utils.nonNull(clazz, "input class cannot be null"); - Utils.validateArg(!Modifier.isAbstract(clazz.getModifiers()), "class " + clazz.getCanonicalName() + " should not be abstract"); - - //Using iteration instead of a static map to avoid static state. - for (final AFCalculatorImplementation impl : AFCalculatorImplementation.values()){ - if (clazz.equals(impl.newInstance().getClass())){ - return impl; - } - } - throw new IllegalArgumentException("Attempt to retrieve AFCalculatorImplementation instance from a non-registered calculator class " + clazz.getName()); - } -} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AFCalculatorProvider.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AFCalculatorProvider.java deleted file mode 100644 index 6695ad8fa65..00000000000 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AFCalculatorProvider.java +++ /dev/null @@ -1,57 +0,0 @@ -package org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc; - -import htsjdk.variant.variantcontext.Genotype; -import htsjdk.variant.variantcontext.GenotypesContext; -import htsjdk.variant.variantcontext.VariantContext; -import org.broadinstitute.hellbender.utils.Utils; - -/** - * Instantiates Exact AF calculators given the required ploidy specs. - * - *

This class might return the same instance several times - * and so the client code might need to make sure that there are no collisions or race conditions.

- */ -public abstract class AFCalculatorProvider { - - /** - * Returns a AF calculator capable to handle a particular variant-context. - * @param variantContext the target context build. - * @param defaultPloidy the assumed ploidy in case that there is no a GT call present to determine it. - * @return never {@code null} - */ - public AFCalculator getInstance(final VariantContext variantContext, final int defaultPloidy, final int maximumAltAlleles) { - Utils.nonNull(variantContext, "variant context cannot be null"); - - final int sampleCount = variantContext.getNSamples(); - if (sampleCount == 0) { - return getInstance(defaultPloidy, maximumAltAlleles); - } - - final GenotypesContext genotypes = variantContext.getGenotypes(); - - final Genotype firstGenotype = genotypes.get(0); - int ploidy = firstGenotype.getPloidy(); - if (ploidy <= 0) { - ploidy = defaultPloidy; - } - for (int i = 1 ; i < sampleCount; i++) { - final Genotype genotype = genotypes.get(i); - final int declaredPloidy = genotype.getPloidy(); - final int actualPloidy = declaredPloidy <= 0 ? defaultPloidy : declaredPloidy; - if (actualPloidy != ploidy) { - ploidy = AFCalculatorImplementation.UNBOUND_PLOIDY; - break; - } - } - return getInstance(ploidy, Math.min(variantContext.getNAlleles() - 1, maximumAltAlleles)); - } - - /** - * Returns a AF calculator given the required homogeneous ploidy and maximum alt allele count. - * @param ploidy the required ploidy. - * @param maximumAltAlleles the maximum alt allele count. - * @return never {@code null} - */ - public abstract AFCalculator getInstance(final int ploidy, final int maximumAltAlleles); - -} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AlleleFrequencyCalculator.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AlleleFrequencyCalculator.java index 1e3437dcec9..0f9a3f04d9c 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AlleleFrequencyCalculator.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AlleleFrequencyCalculator.java @@ -7,6 +7,7 @@ import it.unimi.dsi.fastutil.ints.Int2ObjectArrayMap; import org.apache.commons.math3.util.MathArrays; import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeAlleleCounts; +import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeCalculationArgumentCollection; import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculator; import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculators; import org.broadinstitute.hellbender.utils.Dirichlet; @@ -23,7 +24,7 @@ /** * @author David Benjamin <davidben@broadinstitute.org> */ -public final class AlleleFrequencyCalculator extends AFCalculator { +public final class AlleleFrequencyCalculator { private static final GenotypeLikelihoodCalculators GL_CALCS = new GenotypeLikelihoodCalculators(); private static final double THRESHOLD_FOR_ALLELE_COUNT_CONVERGENCE = 0.1; private static final int HOM_REF_GENOTYPE_INDEX = 0; @@ -41,22 +42,26 @@ public AlleleFrequencyCalculator(final double refPseudocount, final double snpPs this.defaultPloidy = defaultPloidy; } - public AFCalculationResult getLog10PNonRef(final VariantContext vc) { + public static AlleleFrequencyCalculator makeCalculator(final GenotypeCalculationArgumentCollection genotypeArgs) { + final double refPseudocount = genotypeArgs.snpHeterozygosity / Math.pow(genotypeArgs.heterozygosityStandardDeviation,2); + final double snpPseudocount = genotypeArgs.snpHeterozygosity * refPseudocount; + final double indelPseudocount = genotypeArgs.indelHeterozygosity * refPseudocount; + return new AlleleFrequencyCalculator(refPseudocount, snpPseudocount, indelPseudocount, genotypeArgs.samplePloidy); + } + + public AFCalculationResult calculate(final VariantContext vc) { // maxAltAlleles is not used by getLog10PNonRef, so don't worry about the 0 - return getLog10PNonRef(vc, defaultPloidy, 0, null); + return calculate(vc, defaultPloidy); } - //TODO: this should be a class of static methods once the old AFCalculator is gone. + /** * Compute the probability of the alleles segregating given the genotype likelihoods of the samples in vc * * @param vc the VariantContext holding the alleles and sample information. The VariantContext * must have at least 1 alternative allele - * @param refSnpIndelPseudocounts a total hack. A length-3 vector containing Dirichlet prior pseudocounts to - * be given to ref, alt SNP, and alt indel alleles. Hack won't be necessary when we destroy the old AF calculators * @return result (for programming convenience) */ - @Override - public AFCalculationResult getLog10PNonRef(final VariantContext vc, final int defaultPloidy, final int maximumAlternativeAlleles, final double[] refSnpIndelPseudocounts) { + public AFCalculationResult calculate(final VariantContext vc, final int defaultPloidy) { Utils.nonNull(vc, "VariantContext cannot be null"); final int numAlleles = vc.getNAlleles(); final List alleles = vc.getAlleles(); @@ -146,12 +151,7 @@ public AFCalculationResult getLog10PNonRef(final VariantContext vc, final int de final Map log10PRefByAllele = IntStream.range(1, numAlleles).boxed() .collect(Collectors.toMap(alleles::get, a -> log10POfZeroCountsByAllele[a])); - // we compute posteriors here and don't have the same prior that AFCalculationResult expects. Therefore, we - // give it our posterior as its "likelihood" along with a flat dummy prior - final double[] dummyFlatPrior = {-1e-10, -1e-10}; //TODO: HACK must be negative for AFCalcResult - final double[] log10PosteriorOfNoVariantYesVariant = {log10PNoVariant, MathUtils.log10OneMinusPow10(log10PNoVariant)}; - - return new AFCalculationResult(integerAltAlleleCounts, alleles, log10PosteriorOfNoVariantYesVariant, dummyFlatPrior, log10PRefByAllele); + return new AFCalculationResult(integerAltAlleleCounts, alleles, log10PNoVariant, log10PRefByAllele); } // effectiveAlleleCounts[allele a] = SUM_{genotypes g} (posterior_probability(g) * num_copies of a in g), which we denote as SUM [n_g p_g] @@ -199,15 +199,4 @@ private static int[] genotypeIndicesWithOnlyRefAndSpanDel(final int ploidy, fina return new IndexRange(0, ploidy).mapToInteger(n -> glCalc.alleleCountsToIndex(new int[]{0, ploidy - n, spanDelIndex, n})); } } - - @Override //Note: unused - protected AFCalculationResult getResultFromFinalState(final VariantContext vc, final double[] priors, final StateTracker st) { return null; } - - @Override//Note: unused - protected AFCalculationResult computeLog10PNonRef(final VariantContext vc, final int defaultPloidy, - final double[] priors, final StateTracker st) { return null; } - - @Override //Note: unused - protected StateTracker getStateTracker(final boolean reset, final int maximumAlternativeAlleleCount) { return null; } - } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/ConcurrentAFCalculatorProvider.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/ConcurrentAFCalculatorProvider.java deleted file mode 100644 index 3d22c300e1e..00000000000 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/ConcurrentAFCalculatorProvider.java +++ /dev/null @@ -1,39 +0,0 @@ -package org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc; - -import htsjdk.variant.variantcontext.VariantContext; - -/** - * Produces independent AF calculators per thread. - * - * @author Valentin Ruano-Rubio <valentin@broadinstitute.org> - */ -public abstract class ConcurrentAFCalculatorProvider extends AFCalculatorProvider { - - private final ThreadLocal threadLocal; - - /** - * Create a new concurrent af-calculator provider instance. - */ - public ConcurrentAFCalculatorProvider() { - threadLocal = new ThreadLocal() { - @Override - public AFCalculatorProvider initialValue() { - return createProvider(); - } - }; - } - - @Override - public AFCalculator getInstance(final VariantContext vc, final int defaultPloidy, final int maxAltAlleleCount) { - return threadLocal.get().getInstance(vc,defaultPloidy,maxAltAlleleCount); - } - - - @Override - public AFCalculator getInstance(final int ploidy, final int maxAltAlleleCount) { - return threadLocal.get().getInstance(ploidy, maxAltAlleleCount); - } - - protected abstract AFCalculatorProvider createProvider(); -} - diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/ExactACcounts.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/ExactACcounts.java deleted file mode 100644 index c239f7cccf3..00000000000 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/ExactACcounts.java +++ /dev/null @@ -1,51 +0,0 @@ -package org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc; - -import java.util.Arrays; - -/** - * Stores a vector of counts. It's a thin wrapper around int[] to give it a cached hashcode and equals. - */ -public final class ExactACcounts { - private final int[] counts; - private int hashcode = -1; - - /** - * Note: this constructor does not make a copy of the argument and stores a live pointer to the given array. - * Callers must make sure the array is not mutated to maintain semantics of hashcode. - * The array must be not null and longer than 0 elements. - */ - public ExactACcounts(final int[] counts) { - if (counts == null || counts.length == 0){ - throw new IllegalArgumentException("counts should not be null or empty"); - } - this.counts = counts; - } - - public int[] getCounts() { - return counts; - } - - @Override - public boolean equals(final Object obj) { - return (obj instanceof ExactACcounts) && Arrays.equals(counts, ((ExactACcounts) obj).counts); - } - - @Override - public int hashCode() { - if ( hashcode == -1 ) { - hashcode = Arrays.hashCode(counts); - } - return hashcode; - } - - @Override - public String toString() { - final StringBuffer sb = new StringBuffer(); - sb.append(counts[0]); - for ( int i = 1; i < counts.length; i++ ) { - sb.append("/"); - sb.append(counts[i]); - } - return sb.toString(); - } -} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/ExactACset.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/ExactACset.java deleted file mode 100644 index d9978b6e474..00000000000 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/ExactACset.java +++ /dev/null @@ -1,52 +0,0 @@ -package org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc; - -import org.broadinstitute.hellbender.utils.MathUtils; -import org.broadinstitute.hellbender.utils.Utils; - -import java.util.Arrays; - -/** - * This class represents a column in the Exact AC calculation matrix - */ -public final class ExactACset { - // the counts of the various alternate alleles which this column represents - private final ExactACcounts acCounts; - - // the column of the matrix - private final double[] log10Likelihoods; - - private final long sum; - - public ExactACset(final int size, final ExactACcounts acCounts) { - Utils.nonNull(acCounts); - this.acCounts = acCounts; - log10Likelihoods = new double[size]; - Arrays.fill(log10Likelihoods, Double.NEGATIVE_INFINITY); - sum = MathUtils.sum(acCounts.getCounts()); - } - - /** - * sum of all the non-reference alleles - */ - public int getACsum() { - return (int)sum; - } - - @Override - public boolean equals(final Object obj) { - return (obj instanceof ExactACset) && acCounts.equals(((ExactACset) obj).acCounts); - } - - @Override - public int hashCode() { - return acCounts.hashCode(); - } - - public ExactACcounts getACcounts() { - return acCounts; - } - - public double[] getLog10Likelihoods() { - return log10Likelihoods; - } -} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/ExactAFCalculator.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/ExactAFCalculator.java deleted file mode 100644 index 93e0bf732a6..00000000000 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/ExactAFCalculator.java +++ /dev/null @@ -1,48 +0,0 @@ -package org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc; - -import htsjdk.variant.variantcontext.*; -import org.broadinstitute.hellbender.utils.Utils; -import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils; - -import java.util.*; - -/** - * Uses the Exact calculation of Heng Li - */ -abstract class ExactAFCalculator extends AFCalculator { - - protected static final int HOM_REF_INDEX = 0; // AA likelihoods are always first - - - /** - * Unpack GenotypesContext into arraylist of double values - * @param GLs Input genotype context - * @param includeDummy //TODO: Does anyone have any clue what this is????? - * @return ArrayList of doubles corresponding to GL vectors - */ - public static List getGLs(final GenotypesContext GLs, final boolean includeDummy) { - return getGLs(GLs, includeDummy, false); - } - - /** - * Unpack GenotypesContext into arraylist of double values - * @param GLs Input genotype context - * @param includeDummy //TODO: Does anyone have any clue what this is????? - * @param keepUninformative - * @return ArrayList of doubles corresponding to GL vectors - */ - public static List getGLs(final GenotypesContext GLs, final boolean includeDummy, boolean keepUninformative) { - final List genotypeLikelihoods = new ArrayList<>(GLs.size() + 1); - if (includeDummy) { - genotypeLikelihoods.add((new double[]{0.0, 0.0, 0.0})); - } - - Utils.stream(GLs.iterateInSampleNameOrder()) - .filter(Genotype::hasLikelihoods) - .map(gt -> gt.getLikelihoods().getAsVector()) - .filter(gls -> keepUninformative || GATKVariantContextUtils.isInformative(gls)) - .forEach(genotypeLikelihoods::add); - - return genotypeLikelihoods; - } -} \ No newline at end of file diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/FixedAFCalculatorProvider.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/FixedAFCalculatorProvider.java deleted file mode 100644 index 09502b2f3b8..00000000000 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/FixedAFCalculatorProvider.java +++ /dev/null @@ -1,121 +0,0 @@ -package org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc; - -import htsjdk.variant.variantcontext.VariantContext; -import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeCalculationArgumentCollection; -import org.broadinstitute.hellbender.tools.walkers.genotyper.StandardCallerArgumentCollection; -import org.broadinstitute.hellbender.utils.Utils; - -/** - * A single fixed instance AF calculator provider. - */ -public final class FixedAFCalculatorProvider extends AFCalculatorProvider { - - private final AFCalculator calculator; - - private final boolean verifyRequests; - - private final int maximumAltAlleleCount; - - private final int ploidy; - - /** - * Constructs a fixed AF Calculator provider. - * @param configuration the called configuration. This is the source of the fixed ploidy and maximum number of - * supported alleles. - * @param verifyRequests whether this provider will verify that each request for the AF calculator meets the - * initial parameter values (ploidy, sample-count and maximum number of alleles. - * - * @throws NullPointerException if {@code configuration} is {@code null}, or it contains invalid values for - * sample ploidy and maximum number of alternative alleles, or {@code sampleCount} is less than 0. - */ - public FixedAFCalculatorProvider(final StandardCallerArgumentCollection configuration, - final boolean verifyRequests) { - this(configuration.requestedAlleleFrequencyCalculationModel, configuration.genotypeArgs, verifyRequests); - - } - - /** - * Constructs a fixed AF Calculator provider. - * - * @param configuration the called configuration. This is the source of the fixed ploidy and maximum number of - * supported alleles. - * @param verifyRequests whether this provider will verify that each request for the AF calculator meets the - * initial parameter values (ploidy, sample-count and maximum number of alleles. - * - * @throws IllegalArgumentException if {@code configuration} is {@code null}, or it contains invalid values for - * sample ploidy and maximum number of alternative alleles, or {@code sampleCount} is less than 0. - */ - public FixedAFCalculatorProvider(final GenotypeCalculationArgumentCollection configuration, final boolean verifyRequests) { - this(null,configuration,verifyRequests); - } - - /** - * Constructs a fixed AF Calculator provider. - * - * @param preferred preferred implementation. - * @param configuration the called configuration. This is the source of the fixed ploidy and maximum number of - * supported alleles. - * @param verifyRequests whether this provider will verify that each request for the AF calculator meets the - * initial parameter values (ploidy, sample-count and maximum number of alleles. - * - * @throws IllegalArgumentException if {@code configuration} is {@code null}, or it contains invalid values for - * sample ploidy and maximum number of alternative alleles, or {@code sampleCount} is less than 0. - */ - public FixedAFCalculatorProvider(final AFCalculatorImplementation preferred, final GenotypeCalculationArgumentCollection configuration, final boolean verifyRequests) { - Utils.nonNull(configuration, "null configuration"); - if (configuration.samplePloidy < 1) { - throw new IllegalArgumentException("invalid sample ploidy " + configuration.samplePloidy); - } - if (configuration.MAX_ALTERNATE_ALLELES < 0) { - throw new IllegalArgumentException("invalid maximum number of alleles " + (configuration.MAX_ALTERNATE_ALLELES + 1)); - } - - ploidy = configuration.samplePloidy; - maximumAltAlleleCount = configuration.MAX_ALTERNATE_ALLELES; - calculator = AFCalculatorImplementation.bestValue(ploidy,maximumAltAlleleCount,preferred).newInstance(); - this.verifyRequests = verifyRequests; - } - - @Override - public AFCalculator getInstance(final VariantContext vc, final int defaultPloidy, final int maximumAlleleCount) { - if (verifyRequests){ - // supers implementation will call eventually one of the other methods, so no need to verify anything here. - return super.getInstance(vc, defaultPloidy, maximumAlleleCount); - } - return calculator; - } - - @Override - public AFCalculator getInstance(final int ploidy, final int maxAltAlleleCount) { - if (verifyRequests) { - if (this.ploidy != AFCalculatorImplementation.UNBOUND_PLOIDY && ploidy != this.ploidy) { - throw new IllegalArgumentException("non-supported ploidy:" + ploidy + " Only " + this.ploidy + " or " + AFCalculatorImplementation.UNBOUND_PLOIDY); - } - if (maximumAltAlleleCount != AFCalculatorImplementation.UNBOUND_ALTERNATIVE_ALLELE_COUNT && maxAltAlleleCount > maximumAltAlleleCount) { - throw new IllegalArgumentException("non-supported alleleCount"); - } - } - return calculator; - } - - /** - * Creates a fixed AF calculator provider that is thread safe. - * - * @param config the caller configuration. - * - * @throws IllegalArgumentException if any of the input argument is {@code null} or contain invalid configuration - * like zero-samples, zero or negative ploidy or negative-zero maximum number of alleles. - * - * @return never {@code null} - */ - public static AFCalculatorProvider createThreadSafeProvider( final StandardCallerArgumentCollection config ) { - Utils.nonNull(config); - - return new ConcurrentAFCalculatorProvider() { - @Override - protected AFCalculatorProvider createProvider() { - return new FixedAFCalculatorProvider(config, false); - } - }; - } -} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/GeneralPloidyExactAFCalculator.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/GeneralPloidyExactAFCalculator.java deleted file mode 100644 index f4ce562642c..00000000000 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/GeneralPloidyExactAFCalculator.java +++ /dev/null @@ -1,331 +0,0 @@ -package org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc; - -import com.google.common.annotations.VisibleForTesting; -import htsjdk.variant.variantcontext.*; -import org.broadinstitute.hellbender.exceptions.GATKException; -import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeAlleleCounts; -import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculator; -import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculators; -import org.broadinstitute.hellbender.utils.MathUtils; -import org.broadinstitute.hellbender.utils.Utils; -import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils; - -import java.util.*; - -public final class GeneralPloidyExactAFCalculator extends ExactAFCalculator { - - private static final boolean VERBOSE = false; - private static final GenotypeLikelihoodCalculators GL_CALCS = new GenotypeLikelihoodCalculators(); - - @Override - protected AFCalculationResult computeLog10PNonRef(final VariantContext vc, final int defaultPloidy, final double[] log10AlleleFrequencyPriors, final StateTracker stateTracker) { - Utils.nonNull(vc, "vc is null"); - Utils.nonNull(log10AlleleFrequencyPriors, "log10AlleleFrequencyPriors is null"); - Utils.nonNull(stateTracker, "stateTracker is null"); - combineSinglePools(vc.getGenotypes(), defaultPloidy, vc.getNAlleles(), log10AlleleFrequencyPriors); - return getResultFromFinalState(vc, log10AlleleFrequencyPriors, stateTracker); - } - - /** - * Simple wrapper class to hold values of combined pool likelihoods. - * For fast hashing and fast retrieval, there's a hash map that shadows main list. - * - */ - private static final class CombinedPoolLikelihoods { - private final List alleleCountSetList; - private final Map conformationMap; - private double maxLikelihood; - - CombinedPoolLikelihoods() { - // final int numElements = GenotypeLikelihoods.numLikelihoods(); - alleleCountSetList = new LinkedList<>(); - conformationMap = new LinkedHashMap<>(); - maxLikelihood = Double.NEGATIVE_INFINITY; - } - - public void add(final ExactACset set) { - alleleCountSetList.add(set); - conformationMap.put(set.getACcounts(), set); - final double likelihood = set.getLog10Likelihoods()[0]; - - if (likelihood > maxLikelihood ) { - maxLikelihood = likelihood; - } - } - - public boolean hasConformation(final int[] ac) { - return conformationMap.containsKey(new ExactACcounts(ac)); - } - - public double getLikelihoodOfConformation(final int[] ac) { - return conformationMap.get(new ExactACcounts(ac)).getLog10Likelihoods()[0]; - } - - public double getGLOfACZero() { - return alleleCountSetList.get(0).getLog10Likelihoods()[0]; // AC 0 is always at beginning of list - } - - public int getLength() { - return alleleCountSetList.size(); - } - } - - - - /** - * Simple non-optimized version that combines GLs from several pools and produces global AF distribution. - * @param GLs Inputs genotypes context with per-pool GLs - * @param numAlleles Number of alternate alleles - * @param log10AlleleFrequencyPriors Frequency priors - */ - @VisibleForTesting - void combineSinglePools(final GenotypesContext GLs, - final int defaultPloidy, - final int numAlleles, - final double[] log10AlleleFrequencyPriors) { - - // Combine each pool incrementally - likelihoods will be renormalized at each step - - // first element: zero ploidy, e.g. trivial degenerate distribution - final int numAltAlleles = numAlleles - 1; - final int[] zeroCounts = new int[numAlleles]; - final ExactACset set = new ExactACset(1, new ExactACcounts(zeroCounts)); - set.getLog10Likelihoods()[0] = 0.0; - final StateTracker stateTracker = getStateTracker(false,numAltAlleles); - int combinedPloidy = 0; - CombinedPoolLikelihoods combinedPoolLikelihoods = new CombinedPoolLikelihoods(); - combinedPoolLikelihoods.add(set); - - for (final Genotype genotype : GLs.iterateInSampleNameOrder()) { - // recover gls and check if they qualify. - if (!genotype.hasPL()) { - continue; - } - final double[] gls = genotype.getLikelihoods().getAsVector(); - if (!GATKVariantContextUtils.isInformative(gls)) { - continue; - } - stateTracker.reset(); - final int declaredPloidy = genotype.getPloidy(); - final int ploidy = declaredPloidy < 1 ? defaultPloidy : declaredPloidy; - // they do qualify so we proceed. - combinedPoolLikelihoods = fastCombineMultiallelicPool(combinedPoolLikelihoods, gls, - combinedPloidy, ploidy, numAlleles, log10AlleleFrequencyPriors, stateTracker); - combinedPloidy = ploidy + combinedPloidy; // total number of chromosomes in combinedLikelihoods - } - if (combinedPloidy == 0) { - stateTracker.setLog10LikelihoodOfAFzero(0.0); - } - } - - private CombinedPoolLikelihoods fastCombineMultiallelicPool(final CombinedPoolLikelihoods originalPool, - final double[] newGL, - final int originalPloidy, - final int newGLPloidy, - final int numAlleles, - final double[] log10AlleleFrequencyPriors, - final StateTracker stateTracker) { - final Deque ACqueue = new LinkedList<>(); - // mapping of ExactACset indexes to the objects - final Map indexesToACset = new LinkedHashMap<>(); - final CombinedPoolLikelihoods newPool = new CombinedPoolLikelihoods(); - - // add AC=0 to the queue - final int[] zeroCounts = new int[numAlleles]; - final int newPloidy = originalPloidy + newGLPloidy; - zeroCounts[0] = newPloidy; - - final ExactACset zeroSet = new ExactACset(1, new ExactACcounts(zeroCounts)); - - ACqueue.add(zeroSet); - indexesToACset.put(zeroSet.getACcounts(), zeroSet); - - // keep processing while we have AC conformations that need to be calculated - while ( !ACqueue.isEmpty() ) { - // compute log10Likelihoods - final ExactACset ACset = ACqueue.remove(); - - calculateACConformationAndUpdateQueue(ACset, newPool, originalPool, newGL, log10AlleleFrequencyPriors, originalPloidy, newGLPloidy, ACqueue, indexesToACset, stateTracker); - - // clean up memory - indexesToACset.remove(ACset.getACcounts()); - if ( VERBOSE ) { - System.out.printf(" *** removing used set=%s%n", ACset.getACcounts()); - } - - } - return newPool; - } - - /** - * - * @param set ExactACset holding conformation to be computed - * @param newPool New pool likelihood holder - * @param originalPool Original likelihood holder - * @param newGL New pool GL vector to combine - * @param log10AlleleFrequencyPriors Prior object - * @param originalPloidy Total ploidy of original combined pool - * @param newGLPloidy Ploidy of GL vector - * @param ACqueue Queue of conformations to compute - * @param indexesToACset AC indices of objects in queue - * @return max log likelihood - */ - private double calculateACConformationAndUpdateQueue(final ExactACset set, - final CombinedPoolLikelihoods newPool, - final CombinedPoolLikelihoods originalPool, - final double[] newGL, - final double[] log10AlleleFrequencyPriors, - final int originalPloidy, - final int newGLPloidy, - final Deque ACqueue, - final Map indexesToACset, - final StateTracker stateTracker) { - - // compute likelihood in "set" of new set based on original likelihoods - final int numAlleles = set.getACcounts().getCounts().length; - final int newPloidy = set.getACsum(); - final double log10LofK = computeLofK(set, originalPool, newGL, log10AlleleFrequencyPriors, numAlleles, originalPloidy, newGLPloidy, stateTracker); - - - // add to new pool - if (!Double.isInfinite(log10LofK)) { - newPool.add(set); - } - - if ( stateTracker.abort(log10LofK, set.getACcounts(), true, true) ) { - return log10LofK; - } - - // iterate over higher frequencies if possible - // by convention, ACcounts contained in set have full vector of possible pool ac counts including ref count. - // so, if first element is zero, it automatically means we have no wiggle since we're in a corner of the conformation space - final int ACwiggle = set.getACcounts().getCounts()[0]; - if ( ACwiggle == 0 ) // all alternate alleles already sum to 2N so we cannot possibly go to higher frequencies - { - return log10LofK; - } - - - // add conformations for other cases - for ( int allele = 1; allele < numAlleles; allele++ ) { - final int[] ACcountsClone = set.getACcounts().getCounts().clone(); - ACcountsClone[allele]++; - // is this a valid conformation? - final int altSum = (int)MathUtils.sum(ACcountsClone) - ACcountsClone[0]; - ACcountsClone[0] = newPloidy - altSum; - if (ACcountsClone[0] < 0) { - continue; - } - - updateACset(ACcountsClone, ACqueue, indexesToACset); - } - - - return log10LofK; - } - - /** - * Compute likelihood of a particular AC conformation and update AFresult object - * @param set Set of AC counts to compute - * @param firstGLs Original pool likelihoods before combining - * @param secondGL New GL vector with additional pool - * @param log10AlleleFrequencyPriors Allele frequency priors - * @param numAlleles Number of alleles (including ref) - * @param ploidy1 Ploidy of original pool (combined) - * @param ploidy2 Ploidy of new pool - * @return log-likelihood of requested conformation - */ - private double computeLofK(final ExactACset set, - final CombinedPoolLikelihoods firstGLs, - final double[] secondGL, - final double[] log10AlleleFrequencyPriors, - final int numAlleles, final int ploidy1, final int ploidy2, final StateTracker stateTracker) { - - final int newPloidy = ploidy1 + ploidy2; - - // sanity check - int totalAltK = set.getACsum(); - if (newPloidy != totalAltK) { - throw new GATKException("BUG: inconsistent sizes of set.getACsum and passed ploidy values"); - } - - totalAltK -= set.getACcounts().getCounts()[0]; - // totalAltK has sum of alt alleles of conformation now - - - // special case for k = 0 over all k - if ( totalAltK == 0 ) { // all-ref case - final double log10Lof0 = firstGLs.getGLOfACZero() + secondGL[HOM_REF_INDEX]; - set.getLog10Likelihoods()[0] = log10Lof0; - stateTracker.setLog10LikelihoodOfAFzero(log10Lof0); - stateTracker.setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]); - return log10Lof0; - - } else { - // initialize result with denominator - // ExactACset holds by convention the conformation of all alleles, and the sum of all allele count is just the ploidy. - // To compute n!/k1!k2!k3!... we need to compute first n!/(k2!k3!...) and then further divide by k1! where k1=ploidy-sum_k_i - - final int[] currentCount = set.getACcounts().getCounts(); - final double denom = -MathUtils.log10MultinomialCoefficient(newPloidy, currentCount); - - final GenotypeLikelihoodCalculator glCalc = GL_CALCS.getInstance(ploidy2, numAlleles); - for (int PLIndex = 0; PLIndex < glCalc.genotypeCount(); PLIndex++) { - final GenotypeAlleleCounts alleleCounts = glCalc.genotypeAlleleCountsAt(PLIndex); - - final int[] acCount2 = alleleCounts.alleleCountsByIndex(numAlleles - 1); - final int[] acCount1 = MathUtils.vectorDiff(currentCount, acCount2); - // see if conformation is valid and if original pool had this conformation - // for conformation to be valid, all elements of g2 have to be <= elements of current AC set - if (isValidConformation(acCount1,ploidy1) && firstGLs.hasConformation(acCount1)) { - final double gl2 = secondGL[PLIndex]; - if (!Double.isInfinite(gl2)) { - final double firstGL = firstGLs.getLikelihoodOfConformation(acCount1); - final double num1 = MathUtils.log10MultinomialCoefficient(ploidy1, acCount1); - final double num2 = MathUtils.log10MultinomialCoefficient(ploidy2, acCount2); - final double sum = firstGL + gl2 + num1 + num2; - set.getLog10Likelihoods()[0] = MathUtils.approximateLog10SumLog10(set.getLog10Likelihoods()[0], sum); - } - } - } - set.getLog10Likelihoods()[0] += denom; - } - - double log10LofK = set.getLog10Likelihoods()[0]; - - // update the MLE if necessary - final int[] altCounts = Arrays.copyOfRange(set.getACcounts().getCounts(), 1, set.getACcounts().getCounts().length); - // TODO -- GUILLERMO THIS CODE MAY PRODUCE POSITIVE LIKELIHOODS OR -INFINITY - stateTracker.updateMLEifNeeded(Math.max(log10LofK, -Double.MAX_VALUE), altCounts); - - // apply the priors over each alternate allele - for (final int ACcount : altCounts ) { - if ( ACcount > 0 ) { - log10LofK += log10AlleleFrequencyPriors[ACcount]; - } - } - // TODO -- GUILLERMO THIS CODE MAY PRODUCE POSITIVE LIKELIHOODS OR -INFINITY - stateTracker.updateMAPifNeeded(Math.max(log10LofK, -Double.MAX_VALUE), altCounts); - - return log10LofK; - } - - //Small helper routine - is a particular AC conformation vector valid? ie are all elements non-negative and sum to ploidy? - private static boolean isValidConformation(final int[] alleleCounts, final int ploidy) { - return (Arrays.stream(alleleCounts).noneMatch(ac -> ac < 0) && MathUtils.sum(alleleCounts) == ploidy); - } - - - private static void updateACset(final int[] newSetCounts, - final Deque ACqueue, - final Map indexesToACset) { - - final ExactACcounts index = new ExactACcounts(newSetCounts); - if ( !indexesToACset.containsKey(index) ) { - final ExactACset newSet = new ExactACset(1, index); - indexesToACset.put(index, newSet); - ACqueue.add(newSet); - } - - } -} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/GeneralPloidyFailOverAFCalculatorProvider.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/GeneralPloidyFailOverAFCalculatorProvider.java deleted file mode 100644 index c2d9ce34af9..00000000000 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/GeneralPloidyFailOverAFCalculatorProvider.java +++ /dev/null @@ -1,41 +0,0 @@ -package org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc; - -import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeCalculationArgumentCollection; -import org.broadinstitute.hellbender.utils.Utils; - -/** - * Provider that defaults to the general ploidy implementation when the preferred one does not handle the required - * ploidy. - * - * @author Valentin Ruano-Rubio <valentin@broadinstitute.org> - */ -public final class GeneralPloidyFailOverAFCalculatorProvider extends AFCalculatorProvider { - - private final AFCalculator preferred; - private final AFCalculatorImplementation preferredImplementation; - private final AFCalculator failOver; - - /** - * Creates a new AF calculator provider given the genotyping arguments and logger reference. - * @param genotypeArgs genotyping parameter collection. - * @throws IllegalArgumentException if {@code genotypeArgs} is {@code null}. - */ - public GeneralPloidyFailOverAFCalculatorProvider(final GenotypeCalculationArgumentCollection genotypeArgs) { - Utils.nonNull(genotypeArgs); - preferredImplementation = AFCalculatorImplementation.bestValue(genotypeArgs.samplePloidy,genotypeArgs.MAX_ALTERNATE_ALLELES, null); - preferred = preferredImplementation.newInstance(); - failOver = AFCalculatorImplementation.EXACT_GENERAL_PLOIDY.newInstance(); - } - - /** - * {@inheritDoc} - * @param ploidy {@inheritDoc} - * @param maximumAlternativeAlleles {@inheritDoc} - * @return {@inheritDoc} - */ - @Override - public AFCalculator getInstance(final int ploidy, final int maximumAlternativeAlleles) { - return preferredImplementation.usableForParams(ploidy,maximumAlternativeAlleles) ? preferred : failOver; - } - -} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalculator.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalculator.java deleted file mode 100644 index 7636368e483..00000000000 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalculator.java +++ /dev/null @@ -1,392 +0,0 @@ -package org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc; - -import com.google.common.annotations.VisibleForTesting; -import htsjdk.variant.variantcontext.*; -import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculators; -import org.broadinstitute.hellbender.utils.MathUtils; -import org.broadinstitute.hellbender.utils.Utils; - -import java.util.*; - -/** - * Computes the conditional bi-allelic exact results - * - * Suppose vc contains 2 alt allele: A* with C and T. This function first computes: - * - * (1) P(D | AF_c > 0 && AF_t == *) [i.e., T can be anything] - * - * it then computes the conditional probability on AF_c == 0: - * - * (2) P(D | AF_t > 0 && AF_c == 0) - * - * Thinking about this visually, we have the following likelihood matrix where each cell is - * the P(D | AF_c == i && AF_t == j): - * - * 0 AF_c > 0 - * ----------------- - * 0 | | - * |--|------------- - * a | | - * f | | - * _ | | - * t | | - * > | | - * 0 | | - * - * What we really want to know how - * - * (3) P(D | AF_c == 0 & AF_t == 0) - * - * compares with - * - * (4) P(D | AF_c > 0 || AF_t > 0) - * - * This is effectively asking for the value in the upper left vs. the sum of all cells. - * - * This class implements the conditional likelihoods summation for any number of alt - * alleles, where each alt allele has its EXACT probability of segregating calculated by - * reducing each alt B into the case XB and computing P(D | AF_b > 0 ) as follows: - * - * Suppose we have for a A/B/C site the following GLs: - * - * AA AB BB AC BC CC - * - * and we want to get the bi-allelic GLs for X/B, where X is everything not B - * - * XX = AA + AC + CC (since X = A or C) - * XB = AB + BC - * BB = BB - * - * After each allele has its probability calculated we compute the joint posterior - * as P(D | AF_* == 0) = prod_i P (D | AF_i == 0), after applying the theta^i - * prior for the ith least likely allele. - */ - public final class IndependentAllelesDiploidExactAFCalculator extends ExactAFCalculator { - - private static final int[] BIALLELIC_NON_INFORMATIVE_PLS = {0,0,0}; - private static final List BIALLELIC_NOCALL = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL); - private static final double PHRED_2_LOG10_COEFF = -.1; - - /** - * Sorts AFCalcResults by their posteriors of AF > 0, so the - */ - private static final Comparator compareAFCalcResultsByPNonRef = Comparator.comparingDouble(o -> o.getLog10PosteriorOfAFGT0()).reversed(); - - /** - * The AFCalc model we are using to do the bi-allelic computation - */ - private final AFCalculator biAlleleExactModel; - private final GenotypeLikelihoodCalculators calculators; - - IndependentAllelesDiploidExactAFCalculator() { - biAlleleExactModel = new ReferenceDiploidExactAFCalculator(); - calculators = new GenotypeLikelihoodCalculators(); - } - - @Override - public AFCalculationResult computeLog10PNonRef(final VariantContext vc, - final int defaultPloidy, - final double[] log10AlleleFrequencyPriors, - final StateTracker stateTracker) { - Utils.nonNull(vc, "vc is null"); - Utils.nonNull(log10AlleleFrequencyPriors, "log10AlleleFrequencyPriors is null"); - Utils.nonNull(stateTracker, "stateTracker is null"); - - final List independentResultTrackers = computeAlleleIndependentExact(vc, defaultPloidy, log10AlleleFrequencyPriors); - - if ( independentResultTrackers.isEmpty() ) { - throw new IllegalStateException("Independent alleles model returned an empty list of results at VC " + vc); - } - - if ( independentResultTrackers.size() == 1 ) { - // fast path for the very common bi-allelic use case - return independentResultTrackers.get(0); - } else { - final AFCalculationResult combinedAltAllelesResult = combineAltAlleleIndependentExact(vc,defaultPloidy,log10AlleleFrequencyPriors); - // we are a multi-allelic, so we need to actually combine the results - final List withMultiAllelicPriors = applyMultiAllelicPriors(independentResultTrackers); - return combineIndependentPNonRefs(vc, withMultiAllelicPriors, combinedAltAllelesResult); - } - } - - private AFCalculationResult combineAltAlleleIndependentExact(final VariantContext vc, final int defaultPloidy, final double[] log10AlleleFrequencyPriors) { - final VariantContext combinedAltAllelesVariantContext = makeCombinedAltAllelesVariantContext(vc); - return biAlleleExactModel.getLog10PNonRef(combinedAltAllelesVariantContext, defaultPloidy, vc.getNAlleles() - 1, log10AlleleFrequencyPriors); - } - - private VariantContext makeCombinedAltAllelesVariantContext(final VariantContext vc) { - final int nAltAlleles = vc.getNAlleles() - 1; - - if ( nAltAlleles == 1 ) { - return vc; - } - final VariantContextBuilder vcb = new VariantContextBuilder(vc); - final Allele reference = vcb.getAlleles().get(0); - vcb.alleles(Arrays.asList(reference, Allele.NON_REF_ALLELE)); - final int genotypeCount = calculators.genotypeCount(2, vc.getNAlleles()); - final double[] hetLikelihoods = new double[vc.getNAlleles() - 1]; - final double[] homAltLikelihoods = new double[genotypeCount - hetLikelihoods.length - 1]; - final double[] newLikelihoods = new double[3]; - final List newGenotypes = new ArrayList<>(vc.getNSamples()); - for (final Genotype oldGenotype : vc.getGenotypes()) { - final GenotypeBuilder gb = new GenotypeBuilder(oldGenotype); - final List oldAlleles = oldGenotype.getAlleles(); - if (oldAlleles != null) { - final List newAlleles = new ArrayList<>(oldAlleles.size()); - for (int i = 0; i < oldAlleles.size(); i++) { - final Allele oldAllele = oldAlleles.get(i); - if (oldAllele.isReference()) { - newAlleles.add(reference); - } else if (oldAllele.isNoCall()) { - newAlleles.add(Allele.NO_CALL); - } else { - newAlleles.add(Allele.NON_REF_ALLELE); - } - } - gb.alleles(newAlleles); - } - if (oldGenotype.isNonInformative()) { - gb.PL(BIALLELIC_NON_INFORMATIVE_PLS); - } else if (combineAltAlleleLikelihoods(oldGenotype, genotypeCount, newLikelihoods, hetLikelihoods, homAltLikelihoods)) { - gb.PL(newLikelihoods); - } - - newGenotypes.add(gb.make()); - } - return vcb.genotypes(newGenotypes).make(); - } - - /** - * Compute the conditional exact AFCalcResult for each allele in vc independently, returning - * the result of each, in order of the alt alleles in VC - * - * @param vc the VariantContext we want to analyze, with at least 1 alt allele - * @param log10AlleleFrequencyPriors the priors - * @return a list of the AFCalcResults for each bi-allelic sub context of vc - */ - private List computeAlleleIndependentExact(final VariantContext vc, final int defaultPloidy, - final double[] log10AlleleFrequencyPriors) { - final List results = new LinkedList<>(); - - for ( final VariantContext subvc : makeAlleleConditionalContexts(vc) ) { - final AFCalculationResult resultTracker = biAlleleExactModel.getLog10PNonRef(subvc, defaultPloidy, vc.getNAlleles() - 1, log10AlleleFrequencyPriors); - results.add(resultTracker); - } - - return results; - } - - /** - * Returns the bi-allelic variant context for each alt allele in vc with bi-allelic likelihoods, in order - * - * @param vc the variant context to split. Must have n.alt.alleles > 1 - * @return a bi-allelic variant context for each alt allele in vc - */ - @VisibleForTesting - static List makeAlleleConditionalContexts(final VariantContext vc) { - final int nAltAlleles = vc.getNAlleles() - 1; - - if ( nAltAlleles == 1 ) { - // fast path for bi-allelic case. - return Collections.singletonList(vc); - } else { - // go through the work of ripping up the VC into its biallelic components - final List vcs = new LinkedList<>(); - - for ( int altI = 0; altI < nAltAlleles; altI++ ) { - vcs.add(biallelicCombinedGLs(vc, altI + 1)); - } - - return vcs; - } - } - - /** - * Create a single bi-allelic variant context from rootVC with alt allele with index altAlleleIndex - * - * @param rootVC the root (potentially multi-allelic) variant context - * @param altAlleleIndex index of the alt allele, from 0 == first alt allele - * @return a bi-allelic variant context based on rootVC - */ - private static VariantContext biallelicCombinedGLs(final VariantContext rootVC, final int altAlleleIndex) { - if ( rootVC.isBiallelic() ) { - return rootVC; - } - final int nAlts = rootVC.getNAlleles() - 1; - final List biallelicGenotypes = new ArrayList<>(rootVC.getNSamples()); - for ( final Genotype g : rootVC.getGenotypes() ) { - biallelicGenotypes.add(combineGLsPrecise(g, altAlleleIndex, nAlts)); - } - - final VariantContextBuilder vcb = new VariantContextBuilder(rootVC); - final Allele altAllele = rootVC.getAlternateAllele(altAlleleIndex - 1); - vcb.alleles(Arrays.asList(rootVC.getReference(), altAllele)); - vcb.genotypes(biallelicGenotypes); - return vcb.make(); - } - - /** - * Returns a new Genotype with the PLs of the multi-allelic original reduced to a bi-allelic case. - * - *

Uses the log-sum-exp trick in order to work well with very low PLs

- * - *

This is handled in the following way:

- * - *

Suppose we have for a A/B/C site the following GLs:

- * - *

AA AB BB AC BC CC

- * - *

and we want to get the bi-allelic GLs for X/B, where X is everything not B

- * - *

XX = AA + AC + CC (since X = A or C)
- * XB = AB + BC
- * BB = BB
- *

- *

- * This implementation use the log sum trick in order to avoid numeric inestability. - *

- * - * @param original the original multi-allelic genotype - * @param altIndex the index of the alt allele we wish to keep in the bialleic case -- with ref == 0 - * @param nAlts the total number of alt alleles - * @return a new biallelic genotype with appropriate PLs - */ - @VisibleForTesting - static Genotype combineGLsPrecise(final Genotype original, final int altIndex, final int nAlts) { - - if ( original.isNonInformative() ) { - return new GenotypeBuilder(original).PL(BIALLELIC_NON_INFORMATIVE_PLS).alleles(BIALLELIC_NOCALL).make(); - } - - if ( altIndex < 1 || altIndex > nAlts ) { - throw new IllegalStateException("altIndex must be between 1 and nAlts " + nAlts); - } - - final int[] pls = original.getPL(); - - final int nAlleles = nAlts + 1; - - final int plCount = pls.length; - - double BB = 0; - final double[] XBvalues = new double[nAlleles - 1]; - final double[] XXvalues = new double[plCount - nAlleles]; - - int xbOffset = 0; - int xxOffset = 0; - for ( int index = 0; index < plCount; index++ ) { - final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair pair = GenotypeLikelihoods.getAllelePair(index); - final int i = pair.alleleIndex1; - final int j = pair.alleleIndex2; - if (i == j) { - if (i == altIndex) { - BB = PHRED_2_LOG10_COEFF * pls[index]; - } else { - XXvalues[xxOffset++] = PHRED_2_LOG10_COEFF * pls[index]; - } - } else if (i == altIndex || j == altIndex) { - XBvalues[xbOffset++] = PHRED_2_LOG10_COEFF * pls[index]; - } else { - XXvalues[xxOffset++] = PHRED_2_LOG10_COEFF * pls[index]; - } - } - - final double XB = MathUtils.log10SumLog10(XBvalues); - final double XX = MathUtils.log10SumLog10(XXvalues); - - final double[] GLs = { XX, XB, BB}; - return new GenotypeBuilder(original).PL(GLs).alleles(BIALLELIC_NOCALL).make(); - } - - @VisibleForTesting - static List applyMultiAllelicPriors(final List conditionalPNonRefResults) { - final List sorted = new ArrayList<>(conditionalPNonRefResults); - - // sort the results, so the most likely allele is first - Collections.sort(sorted, compareAFCalcResultsByPNonRef); - - final double lastPosteriorGt0 = sorted.get(0).getLog10PosteriorOfAFGT0(); - final double log10SingleAllelePriorOfAFGt0 = conditionalPNonRefResults.get(0).getLog10PriorOfAFGT0(); - - for ( int i = 0; i < sorted.size(); i++ ) { - if ( sorted.get(i).getLog10PosteriorOfAFGT0() > lastPosteriorGt0 ) { - throw new IllegalStateException("pNonRefResults not sorted: lastPosteriorGt0 " + lastPosteriorGt0 + " but current is " + sorted.get(i).getLog10PosteriorOfAFGT0()); - } - - final double log10PriorAFGt0 = (i + 1) * log10SingleAllelePriorOfAFGt0; - final double log10PriorAFEq0 = Math.log10(1 - Math.pow(10, log10PriorAFGt0)); - final double[] thetaTONPriors = new double[] { log10PriorAFEq0, log10PriorAFGt0 }; - - // bind pNonRef for allele to the posterior value of the AF > 0 with the new adjusted prior - sorted.set(i, sorted.get(i).copyWithNewPriors(MathUtils.normalizeLog10(thetaTONPriors))); - } - - return sorted; - } - - /** - * Take the independent estimates of pNonRef for each alt allele and combine them into a single result - * - * Given n independent calculations for each of n alternate alleles create a single - * combined AFCalcResult with: - * - * priors for AF == 0 equal to theta^N for the nth least likely allele - * posteriors that reflect the combined chance that any alleles are segregating and corresponding - * likelihoods - * combined MLEs in the order of the alt alleles in vc - * - * @param sortedResultsWithThetaNPriors the pNonRef result for each allele independently - */ - private static AFCalculationResult combineIndependentPNonRefs(final VariantContext vc, - final List sortedResultsWithThetaNPriors, - final AFCalculationResult combinedAltAllelesResult) { - - - final int nAltAlleles = sortedResultsWithThetaNPriors.size(); - final int[] alleleCountsOfMLE = new int[nAltAlleles]; - final Map log10pRefByAllele = new LinkedHashMap<>(nAltAlleles); - - // the sum of the log10 posteriors for AF == 0 and AF > 0 to determine joint probs - - for ( final AFCalculationResult sortedResultWithThetaNPriors : sortedResultsWithThetaNPriors ) { - final Allele altAllele = sortedResultWithThetaNPriors.getAllelesUsedInGenotyping().get(1); - final int altI = vc.getAlleles().indexOf(altAllele) - 1; - - // MLE of altI allele is simply the MLE of this allele in altAlleles - alleleCountsOfMLE[altI] = sortedResultWithThetaNPriors.getAlleleCountAtMLE(altAllele); - - // bind pNonRef for allele to the posterior value of the AF > 0 with the new adjusted prior - log10pRefByAllele.put(altAllele, sortedResultWithThetaNPriors.getLog10PosteriorOfAFEq0()); - } - - return new AFCalculationResult(alleleCountsOfMLE, vc.getAlleles(), - // necessary to ensure all values < 0 - MathUtils.normalizeLog10(new double[] { combinedAltAllelesResult.getLog10LikelihoodOfAFEq0(), combinedAltAllelesResult.getLog10LikelihoodOfAFGT0() }), - // priors incorporate multiple alt alleles, must be normalized - MathUtils.normalizeLog10(new double[] { combinedAltAllelesResult.getLog10PriorOfAFEq0(), combinedAltAllelesResult.getLog10PriorOfAFGT0() }), - log10pRefByAllele); - } - - private static boolean combineAltAlleleLikelihoods(final Genotype g, final int plMaxIndex, final double[] dest, - final double[] hetLikelihoods, final double[] homAltLikelihoods) { - - final int[] pls = g.getPL(); - if (pls == null) { - return false; - } - int hetNextIndex = 0; - int homAltNextIndex = 0; - for (int plIndex = 1; plIndex < plMaxIndex; plIndex++) { - final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(plIndex); - if (alleles.alleleIndex1 == 0 || alleles.alleleIndex2 == 0) { - hetLikelihoods[hetNextIndex++] = pls[plIndex] * PHRED_2_LOG10_COEFF; - } else { - homAltLikelihoods[homAltNextIndex++] = pls[plIndex] * PHRED_2_LOG10_COEFF; - } - } - dest[0] = pls[0] * PHRED_2_LOG10_COEFF; - dest[1] = MathUtils.approximateLog10SumLog10(hetLikelihoods); - dest[2] = MathUtils.approximateLog10SumLog10(homAltLikelihoods); - return true; - } -} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/OriginalDiploidExactAFCalculator.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/OriginalDiploidExactAFCalculator.java deleted file mode 100644 index 85ae7e2e906..00000000000 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/OriginalDiploidExactAFCalculator.java +++ /dev/null @@ -1,162 +0,0 @@ -package org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc; - -import htsjdk.variant.variantcontext.Allele; -import htsjdk.variant.variantcontext.VariantContext; -import org.apache.commons.lang3.tuple.Pair; -import org.apache.commons.math3.util.MathArrays; -import org.broadinstitute.hellbender.utils.MathUtils; -import org.broadinstitute.hellbender.utils.Utils; - -import java.util.Collections; -import java.util.List; -import java.util.Map; - -/** - * Original bi-allelic ~O(N) implementation. Kept here for posterity and reference - */ -final class OriginalDiploidExactAFCalculator extends ExactAFCalculator { - - @Override - protected AFCalculationResult computeLog10PNonRef(final VariantContext vc, - @SuppressWarnings("unused") - final int defaultPloidy, - final double[] log10AlleleFrequencyPriors, - final StateTracker stateTracker) { - Utils.nonNull(vc, "vc is null"); - Utils.nonNull(log10AlleleFrequencyPriors, "log10AlleleFrequencyPriors is null"); - Utils.nonNull(stateTracker, "stateTracker is null"); - - final double[] log10AlleleFrequencyLikelihoods = new double[log10AlleleFrequencyPriors.length]; - final double[] log10AlleleFrequencyPosteriors = new double[log10AlleleFrequencyPriors.length]; - final Pair result = linearExact(vc, log10AlleleFrequencyPriors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors); - final int lastK = result.getLeft(); - final int mleK = result.getRight(); - - final double log10LikelihoodAFGt0 = lastK == 0 ? MathUtils.LOG10_P_OF_ZERO : MathUtils.log10SumLog10(log10AlleleFrequencyLikelihoods, 1, lastK+1); - final double[] log10Likelihoods = new double[]{log10AlleleFrequencyLikelihoods[0], log10LikelihoodAFGt0}; - final double[] log10Priors = new double[]{log10AlleleFrequencyPriors[0], MathUtils.log10SumLog10(log10AlleleFrequencyPriors, 1)}; - final double[] log10Posteriors = MathArrays.ebeAdd(log10Likelihoods, log10Priors); - - final double log10PRef = log10Posteriors[1] > log10Posteriors[0] ? MathUtils.LOG10_P_OF_ZERO : 0.0; - final Map log10pRefByAllele = Collections.singletonMap(vc.getAlternateAllele(0), log10PRef); - - return new AFCalculationResult(new int[]{mleK}, vc.getAlleles(), - MathUtils.normalizeLog10(log10Likelihoods), - MathUtils.normalizeLog10(log10Priors), - log10pRefByAllele); - } - - /** - * A simple data structure that holds the current, prev, and prev->prev likelihoods vectors - * for the exact model calculation - */ - private static final class ExactACCache { - double[] kMinus2, kMinus1, kMinus0; - - private static double[] create(final int n) { - return new double[n]; - } - - ExactACCache(final int n) { - kMinus2 = create(n); - kMinus1 = create(n); - kMinus0 = create(n); - } - - public void rotate() { - final double[] tmp = kMinus2; - kMinus2 = kMinus1; - kMinus1 = kMinus0; - kMinus0 = tmp; - } - - public double[] getkMinus2() { - return kMinus2; - } - - public double[] getkMinus1() { - return kMinus1; - } - - public double[] getkMinus0() { - return kMinus0; - } - } - - private static Pair linearExact(final VariantContext vc, - final double[] log10AlleleFrequencyPriors, - final double[] log10AlleleFrequencyLikelihoods, - final double[] log10AlleleFrequencyPosteriors) { - final List genotypeLikelihoods = getGLs(vc.getGenotypes(), true, vc.hasAllele(Allele.NON_REF_ALLELE)); - final int numSamples = genotypeLikelihoods.size()-1; - final int numChr = 2*numSamples; - - final ExactACCache logY = new ExactACCache(numSamples+1); - logY.getkMinus0()[0] = 0.0; // the zero case - - double maxLog10L = Double.NEGATIVE_INFINITY; - boolean done = false; - int lastK = -1, mleK = -1; - - for (int k=0; k <= numChr && ! done; k++ ) { - final double[] kMinus0 = logY.getkMinus0(); - - if ( k == 0 ) { // special case for k = 0 - for ( int j=1; j <= numSamples; j++ ) { - kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods.get(j)[0]; - } - } else { // k > 0 - final double[] kMinus1 = logY.getkMinus1(); - final double[] kMinus2 = logY.getkMinus2(); - - for ( int j=1; j <= numSamples; j++ ) { - final double[] gl = genotypeLikelihoods.get(j); - final double logDenominator = MathUtils.log10(2*j) + MathUtils.log10(2*j-1); - - double aa = Double.NEGATIVE_INFINITY; - double ab = Double.NEGATIVE_INFINITY; - if (k < 2*j-1) { - aa = MathUtils.log10(2 * j - k) + MathUtils.log10(2 * j - k - 1) + kMinus0[j - 1] + gl[0]; - } - - if (k < 2*j) { - ab = MathUtils.log10(2 * k) + MathUtils.log10(2 * j - k) + kMinus1[j - 1] + gl[1]; - } - - final double log10Max; - if (k > 1) { - final double bb = MathUtils.log10(k) + MathUtils.log10(k-1) + kMinus2[j-1] + gl[2]; - log10Max = MathUtils.approximateLog10SumLog10(aa, ab, bb); - } else { - // we know we aren't considering the BB case, so we can use an optimized log10 function - log10Max = MathUtils.approximateLog10SumLog10(aa, ab); - } - - // finally, update the L(j,k) value - kMinus0[j] = log10Max - logDenominator; - } - } - - // update the posteriors vector - final double log10LofK = kMinus0[numSamples]; - log10AlleleFrequencyLikelihoods[k] = log10LofK; - log10AlleleFrequencyPosteriors[k] = log10LofK + log10AlleleFrequencyPriors[k]; - - // can we abort early? - lastK = k; - if ( log10LofK > maxLog10L ) { - maxLog10L = log10LofK; - mleK = k; - } - - if ( log10LofK < maxLog10L - StateTracker.MAX_LOG10_ERROR_TO_STOP_EARLY ) { - //if ( DEBUG ) System.out.printf(" *** breaking early k=%d log10L=%.2f maxLog10L=%.2f%n", k, log10LofK, maxLog10L); - done = true; - } - - logY.rotate(); - } - - return Pair.of(lastK, mleK); - } -} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/ReferenceDiploidExactAFCalculator.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/ReferenceDiploidExactAFCalculator.java deleted file mode 100644 index 0c99b0f8d8f..00000000000 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/ReferenceDiploidExactAFCalculator.java +++ /dev/null @@ -1,262 +0,0 @@ -package org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc; - -import htsjdk.variant.variantcontext.Allele; -import htsjdk.variant.variantcontext.GenotypeLikelihoods; -import htsjdk.variant.variantcontext.VariantContext; -import org.broadinstitute.hellbender.utils.MathUtils; -import org.broadinstitute.hellbender.utils.Utils; - -import java.util.*; - -/** - * Reference implementation of multi-allelic EXACT model. Extremely slow for many alternate alleles. - */ -public final class ReferenceDiploidExactAFCalculator extends ExactAFCalculator { - - private static final double LOG10_OF_2 = MathUtils.log10(2); - - protected ReferenceDiploidExactAFCalculator() { - } - - @Override - protected AFCalculationResult computeLog10PNonRef(final VariantContext vc, final int defaultPloidy, - final double[] log10AlleleFrequencyPriors, final StateTracker stateTracker) { - Utils.nonNull(vc, "vc is null"); - Utils.nonNull(log10AlleleFrequencyPriors, "log10AlleleFrequencyPriors is null"); - Utils.nonNull(stateTracker, "stateTracker is null"); - final int numAlternateAlleles = vc.getNAlleles() - 1; - - final List genotypeLikelihoods = getGLs(vc.getGenotypes(), true, vc.hasAllele(Allele.NON_REF_ALLELE)); - final int numSamples = genotypeLikelihoods.size()-1; - final int numChr = 2*numSamples; - - // queue of AC conformations to process - final Deque ACqueue = new LinkedList<>(); - - // mapping of ExactACset indexes to the objects - final Map indexesToACset = new LinkedHashMap<>(numChr+1); - - // add AC=0 to the queue - final int[] zeroCounts = new int[numAlternateAlleles]; - final ExactACset zeroSet = new ExactACset(numSamples+1, new ExactACcounts(zeroCounts)); - ACqueue.add(zeroSet); - indexesToACset.put(zeroSet.getACcounts(), zeroSet); - - while ( !ACqueue.isEmpty() ) { - - // compute log10Likelihoods - final ExactACset set = ACqueue.remove(); - - calculateAlleleCountConformation(set, genotypeLikelihoods, numChr, ACqueue, indexesToACset, log10AlleleFrequencyPriors,stateTracker); - - // clean up memory - indexesToACset.remove(set.getACcounts()); - } - - return getResultFromFinalState(vc, log10AlleleFrequencyPriors, stateTracker); - } - - - private double calculateAlleleCountConformation(final ExactACset set, - final List genotypeLikelihoods, - final int numChr, - final Deque ACqueue, - final Map indexesToACset, - final double[] log10AlleleFrequencyPriors, - final StateTracker stateTracker) { - - // compute the log10Likelihoods - computeLofK(set, genotypeLikelihoods, log10AlleleFrequencyPriors, stateTracker); - - final double log10LofK = set.getLog10Likelihoods()[set.getLog10Likelihoods().length-1]; - - // can we abort early because the log10Likelihoods are so small? - if ( stateTracker.abort(log10LofK, set.getACcounts(), true, false) ) { - return log10LofK; - } - - // iterate over higher frequencies if possible - final int ACwiggle = numChr - set.getACsum(); - if ( ACwiggle == 0 ){ // all alternate alleles already sum to 2N so we cannot possibly go to higher frequencies - return log10LofK; - } - - final int numAltAlleles = set.getACcounts().getCounts().length; - - // add conformations for the k+1 case - for ( int allele = 0; allele < numAltAlleles; allele++ ) { - final int[] ACcountsClone = set.getACcounts().getCounts().clone(); - ACcountsClone[allele]++; - // to get to this conformation, a sample would need to be AB (remember that ref=0) - final int PLindex = GenotypeLikelihoods.calculatePLindex(0, allele + 1); - updateACset(ACcountsClone, numChr, set, PLindex, ACqueue, indexesToACset, genotypeLikelihoods); - } - - // add conformations for the k+2 case if it makes sense; note that the 2 new alleles may be the same or different - if ( ACwiggle > 1 ) { - final List differentAlleles = new ArrayList<>(numAltAlleles * numAltAlleles); - final List sameAlleles = new ArrayList<>(numAltAlleles); - - for ( int allele_i = 0; allele_i < numAltAlleles; allele_i++ ) { - for ( int allele_j = allele_i; allele_j < numAltAlleles; allele_j++ ) { - final int[] ACcountsClone = set.getACcounts().getCounts().clone(); - ACcountsClone[allele_i]++; - ACcountsClone[allele_j]++; - - // to get to this conformation, a sample would need to be BB or BC (remember that ref=0, so add one to the index) - final int PLindex = GenotypeLikelihoods.calculatePLindex(allele_i + 1, allele_j + 1); - if ( allele_i == allele_j ) { - sameAlleles.add(new DependentSet(ACcountsClone, PLindex)); - } else { - differentAlleles.add(new DependentSet(ACcountsClone, PLindex)); - } - } - } - - // IMPORTANT: we must first add the cases where the 2 new alleles are different so that the queue maintains its ordering - for ( final DependentSet dependent : differentAlleles ) { - updateACset(dependent.ACcounts, numChr, set, dependent.PLindex, ACqueue, indexesToACset, genotypeLikelihoods); - } - for ( final DependentSet dependent : sameAlleles ) { - updateACset(dependent.ACcounts, numChr, set, dependent.PLindex, ACqueue, indexesToACset, genotypeLikelihoods); - } - } - - return log10LofK; - } - - private static void computeLofK(final ExactACset set, - final List genotypeLikelihoods, - final double[] log10AlleleFrequencyPriors, - final StateTracker stateTracker) { - - final double[] setLog10Likelihoods = set.getLog10Likelihoods(); - setLog10Likelihoods[0] = 0.0; // the zero case - final int totalK = set.getACsum(); - - // special case for k = 0 over all k - if ( totalK == 0 ) { - for (int j = 1, n = setLog10Likelihoods.length; j < n; j++ ) { - setLog10Likelihoods[j] = setLog10Likelihoods[j - 1] + genotypeLikelihoods.get(j)[HOM_REF_INDEX]; - } - - final double log10Lof0 = setLog10Likelihoods[setLog10Likelihoods.length-1]; - stateTracker.setLog10LikelihoodOfAFzero(log10Lof0); - stateTracker.setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]); - return; - } - - // if we got here, then k > 0 for at least one k. - // the non-AA possible conformations were already dealt with by pushes from dependent sets; - // now deal with the AA case (which depends on previous cells in this column) and then update the L(j,k) value - for (int j = 1, n = setLog10Likelihoods.length; j < n; j++ ) { - if ( totalK < 2*j-1 ) { - final double[] gl = genotypeLikelihoods.get(j); - final double conformationValue = MathUtils.log10(2*j-totalK) + MathUtils.log10(2*j-totalK-1) + setLog10Likelihoods[j-1] + gl[HOM_REF_INDEX]; - setLog10Likelihoods[j] = MathUtils.approximateLog10SumLog10(setLog10Likelihoods[j], conformationValue); - } - - final double logDenominator = MathUtils.log10(2*j) + MathUtils.log10(2*j-1); - setLog10Likelihoods[j] = setLog10Likelihoods[j] - logDenominator; - } - - double log10LofK = setLog10Likelihoods[setLog10Likelihoods.length-1]; - - // update the MLE if necessary - stateTracker.updateMLEifNeeded(log10LofK, set.getACcounts().getCounts()); - - // apply the priors over each alternate allele - for ( final int ACcount : set.getACcounts().getCounts() ) { - if ( ACcount > 0 ) { - log10LofK += log10AlleleFrequencyPriors[ACcount]; - } - } - - stateTracker.updateMAPifNeeded(log10LofK, set.getACcounts().getCounts()); - } - - private static final class DependentSet { - public final int[] ACcounts; - public final int PLindex; - - DependentSet(final int[] ACcounts, final int PLindex) { - this.ACcounts = ACcounts; - this.PLindex = PLindex; - } - } - - - // adds the ExactACset represented by the ACcounts to the ACqueue if not already there (creating it if needed) and - // also pushes its value to the given callingSetIndex. - private static void updateACset(final int[] newSetCounts, - final int numChr, - final ExactACset dependentSet, - final int PLsetIndex, - final Queue ACqueue, - final Map indexesToACset, - final List genotypeLikelihoods) { - final ExactACcounts index = new ExactACcounts(newSetCounts); - if ( !indexesToACset.containsKey(index) ) { - final ExactACset set = new ExactACset(numChr/2 +1, index); - indexesToACset.put(index, set); - ACqueue.add(set); - } - - // push data from the dependency to the new set - pushData(indexesToACset.get(index), dependentSet, PLsetIndex, genotypeLikelihoods); - } - - private static void pushData(final ExactACset targetSet, - final ExactACset dependentSet, - final int PLsetIndex, - final List genotypeLikelihoods) { - final int totalK = targetSet.getACsum(); - - final double[] targetSetLog10Likelihoods = targetSet.getLog10Likelihoods(); - final double[] dependentSetLog10Likelihoods = dependentSet.getLog10Likelihoods(); - final int[] counts = targetSet.getACcounts().getCounts(); - - for ( int j = 1, n = targetSetLog10Likelihoods.length; j < n; j++ ) { - if (2 * j >= totalK) { // skip impossible conformations - final double[] gl = genotypeLikelihoods.get(j); - final double conformationValue = - determineCoefficient(PLsetIndex, j, counts, totalK) + dependentSetLog10Likelihoods[j-1] + gl[PLsetIndex]; - targetSetLog10Likelihoods[j] = MathUtils.approximateLog10SumLog10(targetSetLog10Likelihoods[j], conformationValue); - } - } - } - - private static double determineCoefficient(final int PLindex, final int j, final int[] ACcounts, final int totalK) { - // the closed form representation generalized for multiple alleles is as follows: - // AA: (2j - totalK) * (2j - totalK - 1) - // AB: 2k_b * (2j - totalK) - // AC: 2k_c * (2j - totalK) - // BB: k_b * (k_b - 1) - // BC: 2 * k_b * k_c - // CC: k_c * (k_c - 1) - - // find the 2 alleles that are represented by this PL index - final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex); - - // *** note that throughout this method we subtract one from the alleleIndex because ACcounts *** - // *** doesn't consider the reference allele whereas the GenotypeLikelihoods PL cache does. *** - - // the AX het case - if ( alleles.alleleIndex1 == 0 ) { - return MathUtils.log10(2 * ACcounts[alleles.alleleIndex2 - 1]) + MathUtils.log10(2 * j - totalK); - } - - final int k_i = ACcounts[alleles.alleleIndex1-1]; - - // the hom var case (e.g. BB, CC, DD) - final double coeff; - if ( alleles.alleleIndex1 == alleles.alleleIndex2 ) { - coeff = MathUtils.log10(k_i) + MathUtils.log10(k_i - 1); - } else { // the het non-ref case (e.g. BC, BD, CD) - final int k_j = ACcounts[alleles.alleleIndex2-1]; - coeff = LOG10_OF_2 + MathUtils.log10(k_i) + MathUtils.log10(k_j); - } - - return coeff; - } -} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/StateTracker.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/StateTracker.java deleted file mode 100644 index c6465f5deaf..00000000000 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/StateTracker.java +++ /dev/null @@ -1,302 +0,0 @@ -package org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc; - -import com.google.common.annotations.VisibleForTesting; -import htsjdk.variant.variantcontext.Allele; -import org.broadinstitute.hellbender.utils.MathUtils; - -import java.util.Arrays; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; - -/** - * TODO this class (+AFCalculator) is a bit messy... it seems that it combines "debugging" (unnecessarily adding CPU cost in production) - * TODO but it also contains important part of the AF calculation state... why mix both!!!? It seems that the second - * TODO part could be just blend into AFCalculator ... one one hand you want to reduce classes code size ... but these - * TODO two classes code seems to be quite intertwine and makes it difficult to understand what is going on. - * in the production setting without much need - * - * Keeps track of the state information during the exact model AF calculation. - * - * Tracks things like the MLE and MAP AC values, their corresponding likelihood and posterior - * values, the likelihood of the AF == 0 state, and the number of evaluations needed - * by the calculation to compute the P(AF == 0) - */ -final class StateTracker { - private static final double VALUE_NOT_CALCULATED = Double.NEGATIVE_INFINITY; - static final double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6 - - /** - * These variables are intended to contain the MLE and MAP (and their corresponding allele counts) - * of the site over all alternate alleles - */ - private double log10MLE; - private double log10MAP; - - /** - * Returns a vector with maxAltAlleles values containing AC values at the MLE - * - * The values of the ACs for this call are stored in the getAllelesUsedInGenotyping order, - * starting from index 0 (i.e., the first alt allele is at 0). The vector is always - * maxAltAlleles in length, and so only the first getAllelesUsedInGenotyping.size() - 1 values - * are meaningful. - */ - private int[] alleleCountsOfMLE; - private int[] alleleCountsOfMAP; - - /** - * A vector of log10 likelihood values seen, for future summation. When the size of the - * vector is exceeed -- because we've pushed more posteriors than there's space to hold - * -- we simply sum up the existing values, make that the first value, and continue. - */ - private final double[] log10LikelihoodsForAFGt0 = new double[LIKELIHOODS_CACHE_SIZE]; - private static final int LIKELIHOODS_CACHE_SIZE = 5000; - private int log10LikelihoodsForAFGt0CacheIndex = 0; - - /** - * The actual sum of the likelihoods. Null if the sum hasn't been computed yet - */ - private Double log10LikelihoodsForAFGt0Sum = null; - - /** - * Contains the likelihood for the site's being monomorphic (i.e. AF=0 for all alternate alleles) - */ - private double log10LikelihoodOfAFzero = 0.0; - - /** - * The list of alleles actually used in computing the AF - */ - private List allelesUsedInGenotyping = null; - - /** - * Create a results object capability of storing results for calls with up to maxAltAlleles - * - * @param maxAltAlleles an integer >= 1 - */ - StateTracker(final int maxAltAlleles) { - if ( maxAltAlleles < 0 ) { - throw new IllegalArgumentException("maxAltAlleles must be >= 0, saw " + maxAltAlleles); - } - - alleleCountsOfMLE = new int[maxAltAlleles]; - alleleCountsOfMAP = new int[maxAltAlleles]; - - reset(); - } - - /** - * Is the likelihood of configuration K too low to consider, related to the - * maximum likelihood seen already? - * - * @param log10LofK the log10 likelihood of the configuration we're considering analyzing - * @return true if the configuration cannot meaningfully contribute to our likelihood sum - */ - private boolean tooLowLikelihood(final double log10LofK) { - return log10LofK < log10MLE - MAX_LOG10_ERROR_TO_STOP_EARLY; - } - - /** - * @return true iff all ACs in this object are less than or equal to their corresponding ACs in the provided set - */ - private boolean isLowerAC(final ExactACcounts otherACs, final boolean otherACsContainsReference) { - final int[] otherACcounts = otherACs.getCounts(); - - final int firstAltAlleleIndex = otherACsContainsReference ? 1 : 0; - - for ( int i = firstAltAlleleIndex; i < otherACcounts.length; i++ ) { - if ( alleleCountsOfMLE[i - firstAltAlleleIndex] > otherACcounts[i] ) { - return false; - } - } - - return true; - } - - /** - * Should we stop exploring paths from ACs, given it's log10LofK - * - * @param log10LofK the log10LofK of these ACs - * @param ACs the ACs of this state - * @param exactACcountsContainReference whether the {@code ACs} contains the reference allele count (index == 0) beside all other alternative alleles. - * @return return true if there's no reason to continue with subpaths of AC, or false otherwise - */ - @VisibleForTesting - boolean abort(final double log10LofK, final ExactACcounts ACs, final boolean enforceLowerACs, final boolean exactACcountsContainReference) { - return tooLowLikelihood(log10LofK) && (!enforceLowerACs || isLowerAC(ACs,exactACcountsContainReference)); - } - - @VisibleForTesting - int[] getAlleleCountsOfMAP() { - return alleleCountsOfMAP; - } - - /** - * @return the likelihoods summed across all AC values for AC > 0 - */ - private double getLog10LikelihoodOfAFNotZero() { - if ( log10LikelihoodsForAFGt0Sum == null ) { - if ( log10LikelihoodsForAFGt0CacheIndex == 0 ){ // there's nothing to sum up, so make the sum equal to the smallest thing we have - log10LikelihoodsForAFGt0Sum = MathUtils.LOG10_P_OF_ZERO; - } else { - log10LikelihoodsForAFGt0Sum = MathUtils.log10SumLog10(log10LikelihoodsForAFGt0, 0, log10LikelihoodsForAFGt0CacheIndex); - } - } - return log10LikelihoodsForAFGt0Sum; - } - - /** - * @return the log10 likelihood of AF == 0 - */ - private double getLog10LikelihoodOfAFzero() { - return log10LikelihoodOfAFzero; - } - - /** - * Convert this state to an corresponding AFCalcResult. - * - * Assumes that the values in this state have been filled in with meaningful values during the calculation. - * For example, that the allelesUsedInGenotyping has been set, that the alleleCountsOfMLE contains meaningful - * values, etc. - * - * @param log10PriorsByAC the priors by AC - * - * @return an AFCalcResult summarizing the final results of this calculation - */ - AFCalculationResult toAFCalculationResult(final double[] log10PriorsByAC) { - final int [] subACOfMLE = Arrays.copyOf(alleleCountsOfMLE, allelesUsedInGenotyping.size() - 1); - final double[] log10Likelihoods = MathUtils.normalizeLog10(new double[]{getLog10LikelihoodOfAFzero(), getLog10LikelihoodOfAFNotZero()}); - final double[] log10Priors = MathUtils.normalizeLog10(new double[]{log10PriorsByAC[0], MathUtils.log10SumLog10(log10PriorsByAC, 1)}); - - final Map log10pRefByAllele = new LinkedHashMap<>(allelesUsedInGenotyping.size()); - for ( int i = 0; i < subACOfMLE.length; i++ ) { - final Allele allele = allelesUsedInGenotyping.get(i+1); - final double log10PRef = alleleCountsOfMAP[i] > 0 ? -10000 : 0; // TODO -- a total hack but in effect what the old behavior was - log10pRefByAllele.put(allele, log10PRef); - } - - return new AFCalculationResult(subACOfMLE, allelesUsedInGenotyping, log10Likelihoods, log10Priors, log10pRefByAllele); - } - - // -------------------------------------------------------------------------------- - // - // Protected mutational methods only for use within the calculation models themselves - // - // -------------------------------------------------------------------------------- - - /** - * Reset the data in this results object, so that it can be used in a subsequent AF calculation - * - * Resetting of the data is done by the calculation model itself, so shouldn't be done by callers any longer - * - * @param ensureAltAlleleCapacity indicate the minimum number of alt-alleles that should be supported by the - * tracker. - */ - void reset(final int ensureAltAlleleCapacity) { - log10MLE = log10MAP = log10LikelihoodOfAFzero = VALUE_NOT_CALCULATED; - log10LikelihoodsForAFGt0CacheIndex = 0; - log10LikelihoodsForAFGt0Sum = null; - allelesUsedInGenotyping = null; - if (alleleCountsOfMAP.length < ensureAltAlleleCapacity) { - final int newCapacity = Math.max(ensureAltAlleleCapacity, alleleCountsOfMAP.length << 1); - alleleCountsOfMAP = new int[newCapacity]; - alleleCountsOfMLE = new int[newCapacity]; - } else { - Arrays.fill(alleleCountsOfMLE, 0); - Arrays.fill(alleleCountsOfMAP, 0); - } - Arrays.fill(log10LikelihoodsForAFGt0, Double.POSITIVE_INFINITY); - } - - /** - * Reset the data in this results object, so that it can be used in a subsequent AF calculation - * - * Resetting of the data is done by the calculation model itself, so shouldn't be done by callers any longer - */ - void reset() { - log10MLE = log10MAP = log10LikelihoodOfAFzero = VALUE_NOT_CALCULATED; - log10LikelihoodsForAFGt0CacheIndex = 0; - log10LikelihoodsForAFGt0Sum = null; - allelesUsedInGenotyping = null; - Arrays.fill(alleleCountsOfMLE, 0); - Arrays.fill(alleleCountsOfMAP, 0); - Arrays.fill(log10LikelihoodsForAFGt0, Double.POSITIVE_INFINITY); - } - - /** - * Update the maximum log10 likelihoods seen, if log10LofKs is higher, and the corresponding ACs of this state - * - * @param log10LofK the likelihood of our current configuration state, cannot be the 0 state - * @param alleleCountsForK the allele counts for this state - */ - void updateMLEifNeeded(final double log10LofK, final int[] alleleCountsForK) { - addToLikelihoodsCache(log10LofK); - - if ( log10LofK > log10MLE ) { - log10MLE = log10LofK; - System.arraycopy(alleleCountsForK, 0, alleleCountsOfMLE, 0, alleleCountsForK.length); - } - } - - /** - * Update the maximum log10 posterior seen, if log10PofKs is higher, and the corresponding ACs of this state - * - * @param log10PofK the posterior of our current configuration state - * @param alleleCountsForK the allele counts for this state - */ - void updateMAPifNeeded(final double log10PofK, final int[] alleleCountsForK) { - if ( log10PofK > log10MAP ) { - log10MAP = log10PofK; - System.arraycopy(alleleCountsForK, 0, alleleCountsOfMAP, 0, alleleCountsForK.length); - } - } - - private void addToLikelihoodsCache(final double log10LofK) { - // add to the cache - log10LikelihoodsForAFGt0[log10LikelihoodsForAFGt0CacheIndex++] = log10LofK; - - // if we've filled up the cache, then condense by summing up all of the values and placing the sum back into the first cell - if ( log10LikelihoodsForAFGt0CacheIndex == LIKELIHOODS_CACHE_SIZE) { - final double temporarySum = MathUtils.log10SumLog10(log10LikelihoodsForAFGt0, 0, log10LikelihoodsForAFGt0CacheIndex); - Arrays.fill(log10LikelihoodsForAFGt0, Double.POSITIVE_INFINITY); - log10LikelihoodsForAFGt0[0] = temporarySum; - log10LikelihoodsForAFGt0CacheIndex = 1; - } - } - - void setLog10LikelihoodOfAFzero(final double log10LikelihoodOfAFzero) { - this.log10LikelihoodOfAFzero = log10LikelihoodOfAFzero; - if ( log10LikelihoodOfAFzero > log10MLE ) { - log10MLE = log10LikelihoodOfAFzero; - Arrays.fill(alleleCountsOfMLE, 0); - } - } - - void setLog10PosteriorOfAFzero(final double log10PosteriorOfAFzero) { - if ( log10PosteriorOfAFzero > log10MAP ) { - log10MAP = log10PosteriorOfAFzero; - Arrays.fill(alleleCountsOfMAP, 0); - } - } - - /** - * Set the list of alleles used in genotyping - * - * @param allelesUsedInGenotyping the list of alleles, where the first allele is reference - */ - void setAllelesUsedInGenotyping(final List allelesUsedInGenotyping) { - if ( allelesUsedInGenotyping == null || allelesUsedInGenotyping.isEmpty() ) { - throw new IllegalArgumentException("allelesUsedInGenotyping cannot be null or empty"); - } - if ( allelesUsedInGenotyping.get(0).isNonReference() ) { - throw new IllegalArgumentException("The first element of allelesUsedInGenotyping must be the reference allele"); - } - - this.allelesUsedInGenotyping = allelesUsedInGenotyping; - } - - public void ensureMaximumAlleleCapacity(final int capacity) { - if (this.alleleCountsOfMAP.length < capacity) { - reset(capacity); - } - } -} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java index ffc22168928..aa1eb4bbbed 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java @@ -22,7 +22,6 @@ import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.tools.walkers.annotator.*; import org.broadinstitute.hellbender.tools.walkers.genotyper.*; -import org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc.FixedAFCalculatorProvider; import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.ReadThreadingAssembler; import org.broadinstitute.hellbender.utils.MathUtils; import org.broadinstitute.hellbender.utils.QualityUtils; @@ -201,7 +200,7 @@ private void initialize(boolean createBamOutIndex, final boolean createBamOutMD5 initializeActiveRegionEvaluationGenotyperEngine(); - genotypingEngine = new HaplotypeCallerGenotypingEngine(hcArgs, samplesList, FixedAFCalculatorProvider.createThreadSafeProvider(hcArgs.standardArgs), ! hcArgs.doNotRunPhysicalPhasing); + genotypingEngine = new HaplotypeCallerGenotypingEngine(hcArgs, samplesList, ! hcArgs.doNotRunPhysicalPhasing); genotypingEngine.setAnnotationEngine(annotationEngine); referenceConfidenceModel = new ReferenceConfidenceModel(samplesList, readsHeader, hcArgs.indelSizeToEliminateInRefModel, hcArgs.standardArgs.genotypeArgs.numRefIfMissing); @@ -315,8 +314,7 @@ private void initializeActiveRegionEvaluationGenotyperEngine() { // UGs engine with ploidy == 1 simpleUAC.genotypeArgs.samplePloidy = Math.max(MINIMUM_PUTATIVE_PLOIDY_FOR_ACTIVE_REGION_DISCOVERY, hcArgs.standardArgs.genotypeArgs.samplePloidy); - activeRegionEvaluationGenotyperEngine = new MinimalGenotypingEngine(simpleUAC, samplesList, - FixedAFCalculatorProvider.createThreadSafeProvider(simpleUAC)); + activeRegionEvaluationGenotyperEngine = new MinimalGenotypingEngine(simpleUAC, samplesList); activeRegionEvaluationGenotyperEngine.setLogger(logger); } @@ -496,8 +494,9 @@ public ActivityProfileState isActive( final AlignmentContext context, final Refe final double isActiveProb; if (genotypes.size() == 1) { - // Faster implementation avoiding the costly and over complicated Exact AFCalculator machinery: + // Faster implementation avoiding the AlleleFrequencyCalculator // This is the case when doing GVCF output. + // TODO: now that old qual is gone, do we still need this? isActiveProb = activeRegionEvaluationGenotyperEngine.calculateSingleSampleRefVsAnyActiveStateProfileValue(genotypes.get(0).getLikelihoods().getAsVector()); } else { final VariantCallContext vcOut = activeRegionEvaluationGenotyperEngine.calculateGenotypes(new VariantContextBuilder("HCisActive!", context.getContig(), context.getLocation().getStart(), context.getLocation().getEnd(), alleles).genotypes(genotypes).make(), GenotypeLikelihoodsCalculationModel.SNP, readsHeader); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java index d3eef7f972e..69b25fbb6d1 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java @@ -11,7 +11,6 @@ import org.broadinstitute.hellbender.engine.ReferenceMemorySource; import org.broadinstitute.hellbender.tools.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.hellbender.tools.walkers.genotyper.*; -import org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc.AFCalculatorProvider; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.genotyper.AlleleList; @@ -57,9 +56,8 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine VCs, Var VariantAnnotatorEngine annotatorEngine = new VariantAnnotatorEngine(getAnnotationsToUse(), null, Collections.emptyList(), false, false); final UnifiedArgumentCollection uac = new UnifiedArgumentCollection(); uac.genotypeArgs = new GenotypeCalculationArgumentCollection(); - GeneralPloidyFailOverAFCalculatorProvider calculatorProvider = new GeneralPloidyFailOverAFCalculatorProvider(uac.genotypeArgs); - GenotypingEngine genotypingEngine = new MinimalGenotypingEngine(uac, new IndexedSampleList(result.getSampleNamesOrderedByName()), calculatorProvider); + GenotypingEngine genotypingEngine = new MinimalGenotypingEngine(uac, new IndexedSampleList(result.getSampleNamesOrderedByName())); genotypingEngine.setAnnotationEngine(annotatorEngine); GenotypeLikelihoodsCalculationModel model = result.getType() == VariantContext.Type.INDEL ? GenotypeLikelihoodsCalculationModel.INDEL diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingEngineUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingEngineUnitTest.java index 4fbd70a59db..baf92db4691 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingEngineUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingEngineUnitTest.java @@ -1,7 +1,6 @@ package org.broadinstitute.hellbender.tools.walkers.genotyper; import org.broadinstitute.hellbender.GATKBaseTest; -import org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc.GeneralPloidyFailOverAFCalculatorProvider; import org.broadinstitute.hellbender.utils.genotyper.IndexedSampleList; import org.broadinstitute.hellbender.utils.genotyper.SampleList; import org.broadinstitute.hellbender.utils.SimpleInterval; @@ -50,14 +49,14 @@ private static GenotypingEngine getGenotypingEngine() { final GenotypeCalculationArgumentCollection genotypeArgs = new GenotypeCalculationArgumentCollection(); final UnifiedArgumentCollection uac = new UnifiedArgumentCollection(); uac.genotypeArgs = new GenotypeCalculationArgumentCollection(genotypeArgs); - return new MinimalGenotypingEngine(uac, SAMPLES, new GeneralPloidyFailOverAFCalculatorProvider(genotypeArgs)); + return new MinimalGenotypingEngine(uac, SAMPLES); } private static GenotypingEngine getNewQualGenotypingEngine() { final GenotypeCalculationArgumentCollection genotypeArgs = new GenotypeCalculationArgumentCollection(); final UnifiedArgumentCollection uac = new UnifiedArgumentCollection(); uac.genotypeArgs = new GenotypeCalculationArgumentCollection(genotypeArgs); - return new MinimalGenotypingEngine(uac, SAMPLES, new GeneralPloidyFailOverAFCalculatorProvider(genotypeArgs)); + return new MinimalGenotypingEngine(uac, SAMPLES); } @DataProvider(name="testCoveredByDeletionData") diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AFCalculationResultUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AFCalculationResultUnitTest.java index 7a2a2f06c9f..d7c2d64c558 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AFCalculationResultUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AFCalculationResultUnitTest.java @@ -1,158 +1,47 @@ package org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc; import htsjdk.variant.variantcontext.Allele; -import org.broadinstitute.hellbender.utils.MathUtils; -import org.broadinstitute.hellbender.utils.QualityUtils; -import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.GATKBaseTest; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import org.testng.internal.junit.ArrayAsserts; import java.util.*; public final class AFCalculationResultUnitTest extends GATKBaseTest { - private static class MyTest { - final double[] Ls, expectedPosteriors; - private MyTest(double[] ls, double[] expectedPosteriors) { - Ls = ls; - this.expectedPosteriors = expectedPosteriors; - } - - @Override - public String toString() { - return "Ls [" + Utils.join(",", Ls) + "] expectedPosteriors [" + Utils.join(",", expectedPosteriors) + "]"; - } - } - - @DataProvider(name = "TestComputePosteriors") - public Object[][] makeTestCombineGLs() { - List tests = new ArrayList<>(); - - tests.add(new Object[]{new MyTest(log10Even, log10Even)}); - - for ( double L0 = -1e9; L0 < 0.0; L0 /= 10.0 ) { - for ( double L1 = -1e2; L1 < 0.0; L1 /= 100.0 ) { - final double[] input = new double[]{L0, L1}; - final double[] expected = MathUtils.normalizeLog10(input); - tests.add(new Object[]{new MyTest(input, expected)}); - } - } - - for ( double bigBadL = -1e50; bigBadL < -1e200; bigBadL *= 10 ) { - // test that a huge bad likelihood remains, even with a massive better result - for ( final double betterL : Arrays.asList(-1000.0, -100.0, -10.0, -1.0, -0.1, -0.01, -0.001, 0.0)) { - tests.add(new Object[]{new MyTest(new double[]{bigBadL, betterL}, new double[]{bigBadL, 0.0})}); - tests.add(new Object[]{new MyTest(new double[]{betterL, bigBadL}, new double[]{0.0, bigBadL})}); - } - } - // test that a modest bad likelihood with an ~0.0 value doesn't get lost - for ( final double badL : Arrays.asList(-10000.0, -1000.0, -100.0, -10.0)) { - tests.add(new Object[]{new MyTest(new double[]{badL, -1e-9}, new double[]{badL, 0.0})}); - tests.add(new Object[]{new MyTest(new double[]{-1e-9, badL}, new double[]{0.0, badL})}); - } - - // test that a non-ref site gets reasonable posteriors with an ~0.0 value doesn't get lost - for ( final double nonRefL : Arrays.asList(-100.0, -50.0, -10.0, -9.0, -8.0, -7.0, -6.0, -5.0)) { - tests.add(new Object[]{new MyTest(new double[]{0.0, nonRefL}, new double[]{0.0, nonRefL})}); - } - - return tests.toArray(new Object[][]{}); - } - - - static final double[] log10Even = MathUtils.normalizeLog10(new double[]{0.5, 0.5}); - private static final Allele C = Allele.create("C"); private static final Allele A = Allele.create("A", true); - static final List alleles = Arrays.asList(A, C); - - @Test(dataProvider = "TestComputePosteriors") - private void testComputingPosteriors(final MyTest data) { - final int[] zeroAC = {0}; - final AFCalculationResult result = new AFCalculationResult(zeroAC, alleles, data.Ls, log10Even, Collections.singletonMap(C, -1.0)); - - Assert.assertEquals(result.getLog10PosteriorOfAFEq0(), data.expectedPosteriors[0], 1e-3, "AF = 0 not expected"); - Assert.assertEquals(result.getLog10PosteriorOfAFGT0(), data.expectedPosteriors[1], 1e-3, "AF > 0 not expected"); - - Assert.assertEquals(result.getLog10PriorOfAFEq0(), log10Even[0], 1e-3, "prior for AF > 0 not expected"); - Assert.assertEquals(result.getLog10PriorOfAFGT0(), log10Even[1], 1e-3, "prior for AF > 0 not expected"); - - Assert.assertEquals(result.getLog10LikelihoodOfAFEq0(), data.Ls[0], 1e-3, "likelihood for AF > 0 not expected"); - Assert.assertEquals(result.getLog10LikelihoodOfAFGT0(), data.Ls[1], 1e-3, "likelihood for AF > 0 not expected"); - - Assert.assertEquals(result.getAllelesUsedInGenotyping(), alleles, "alleles are different"); - - Assert.assertNotNull(result.toString());//just making sure it does not blow up, ignoring contents - + private static final Allele C = Allele.create("C"); + private static final Allele T = Allele.create("T"); + private static final List TWO_ALLELES = Arrays.asList(A, C); + private static final List THREE_ALLELES = Arrays.asList(A, C, T); - Assert.assertEquals(result.getAlleleCountAtMLE(C), zeroAC[0]); - //getLog10PosteriorOfAFEq0ForAllele - //withNewPriors - Assert.assertEquals(result.getAlleleCountsOfMLE(), zeroAC, "getAlleleCountsOfMLE not as expected"); - final double[] actualPosteriors = {result.getLog10PosteriorOfAFEq0(), result.getLog10PosteriorOfAFGT0()}; - Assert.assertEquals(MathUtils.sumLog10(actualPosteriors), 1.0, 1e-3, "Posteriors don't sum to 1 with 1e-3 precision"); + @DataProvider(name = "AFCalculationResults") + public Object[][] createAFCalculationResultTestData() { + return new Object[][]{ + {new int[] {2}, TWO_ALLELES, 0.4, new double[] {0.4}}, + {new int[] {2,3}, THREE_ALLELES, 0.4, new double[] {0.5, 0.7}} + }; } - @DataProvider(name = "TestIsPolymorphic") - public Object[][] makeTestIsPolymorphic() { - List tests = new ArrayList<>(); - - final List pValues = new LinkedList<>(); - for ( final double p : Arrays.asList(0.01, 0.1, 0.9, 0.99, 0.999, 1 - 1e-4, 1 - 1e-5, 1 - 1e-6) ) - for ( final double espilon : Arrays.asList(-1e-7, 0.0, 1e-7) ) - pValues.add(p + espilon); - - for ( final double pNonRef : pValues ) { - for ( final double pThreshold : pValues ) { - final boolean shouldBePoly = pNonRef >= pThreshold; - if ( pNonRef != pThreshold) - // let's not deal with numerical instability - tests.add(new Object[]{ pNonRef, pThreshold, shouldBePoly }); - } + @Test(dataProvider = "AFCalculationResults") + private void test(final int[] mleCounts, final List alleles, final double probabilityOfNoVariant, final double[] probabilityOfNoVariantByAllele) { + final Map log10pRefByAllele = new HashMap<>(); + for (int n = 1; n < alleles.size(); n++) { + log10pRefByAllele.put(alleles.get(n), Math.log10(probabilityOfNoVariantByAllele[n-1])); } + final AFCalculationResult result = new AFCalculationResult(mleCounts, alleles, Math.log10(probabilityOfNoVariant), log10pRefByAllele); - return tests.toArray(new Object[][]{}); - } - - private AFCalculationResult makePolymorphicTestData(final double pNonRef) { - return new AFCalculationResult( - new int[]{0}, - alleles, - MathUtils.normalizeLog10(new double[]{1 - pNonRef, pNonRef}), - log10Even, - Collections.singletonMap(C, Math.log10(1 - pNonRef))); - } - - @Test(dataProvider = "TestIsPolymorphic") - private void testIsPolymorphic(final double pNonRef, final double pThreshold, final boolean shouldBePoly) { - final AFCalculationResult result = makePolymorphicTestData(pNonRef); - final boolean actualIsPoly = result.isPolymorphic(C, Math.log10(1 - pThreshold)); - Assert.assertEquals(actualIsPoly, shouldBePoly, - "isPolymorphic with pNonRef " + pNonRef + " and threshold " + pThreshold + " returned " - + actualIsPoly + " but the expected result is " + shouldBePoly); - } + ArrayAsserts.assertArrayEquals(result.getAlleleCountsOfMLE(), mleCounts); + Assert.assertEquals(result.getAllelesUsedInGenotyping(), alleles); - @Test(dataProvider = "TestIsPolymorphic") - private void testIsPolymorphicQual(final double pNonRef, final double pThreshold, final boolean shouldBePoly) { - final AFCalculationResult result = makePolymorphicTestData(pNonRef); - final double qual = QualityUtils.phredScaleCorrectRate(pThreshold); - final boolean actualIsPoly = result.isPolymorphicPhredScaledQual(C, qual); - Assert.assertEquals(actualIsPoly, shouldBePoly, - "isPolymorphic with pNonRef " + pNonRef + " and threshold " + pThreshold + " returned " - + actualIsPoly + " but the expected result is " + shouldBePoly); - } - - @Test(dataProvider = "TestComputePosteriors") - private void test(final MyTest data) { - final AFCalculationResult result = new AFCalculationResult(new int[]{0}, alleles, data.Ls, log10Even, Collections.singletonMap(C, -1.0)); - - Assert.assertEquals(result.getLog10PosteriorOfAFEq0(), data.expectedPosteriors[0], 1e-3, "AF = 0 not expected"); - Assert.assertEquals(result.getLog10PosteriorOfAFGT0(), data.expectedPosteriors[1], 1e-3, "AF > 0 not expected"); + for (int n = 1; n < alleles.size(); n++) { + Assert.assertEquals(result.getAlleleCountAtMLE(alleles.get(n)), mleCounts[n-1]); + } - final double[] actualPosteriors = {result.getLog10PosteriorOfAFEq0(), result.getLog10PosteriorOfAFGT0()}; - Assert.assertEquals(MathUtils.sumLog10(actualPosteriors), 1.0, 1e-3, "Posteriors don't sum to 1 with 1e-3 precision"); + Assert.assertEquals(result.log10ProbVariantPresent(), Math.log10(1 - probabilityOfNoVariant), 1.0e-10); } -} +} \ No newline at end of file diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AFCalculationUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AFCalculationUnitTest.java deleted file mode 100644 index 0221d56adc0..00000000000 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AFCalculationUnitTest.java +++ /dev/null @@ -1,657 +0,0 @@ -package org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc; - -import htsjdk.variant.variantcontext.*; -import org.apache.commons.lang.ArrayUtils; -import org.apache.commons.lang.math.NumberUtils; -import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypingEngine; -import org.broadinstitute.hellbender.utils.MathUtils; -import org.broadinstitute.hellbender.utils.QualityUtils; -import org.broadinstitute.hellbender.utils.Utils; -import org.broadinstitute.hellbender.GATKBaseTest; -import org.testng.Assert; -import org.testng.annotations.BeforeClass; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -import java.util.*; - - -public final class AFCalculationUnitTest extends GATKBaseTest { - static Allele A = Allele.create("A", true); - static Allele C = Allele.create("C"); - static Allele G = Allele.create("G"); - - static int sampleNameCounter = 0; - static Genotype AA1, AB1, BB1, NON_INFORMATIVE1; - static Genotype AA2, AB2, AC2, BB2, BC2, CC2, NON_INFORMATIVE2; - final double[] FLAT_3SAMPLE_PRIORS = MathUtils.normalizeLog10(new double[2 * 3 + 1]); // flat priors - - final private static boolean INCLUDE_BIALLELIC = true; - final private static boolean INCLUDE_TRIALLELIC = true; - final private static boolean Guillermo_FIXME = false; // TODO -- can only be enabled when GdA fixes bug - final private static boolean DEBUG_ONLY = false; - - protected static List createAFCalculators(final List calcs, final int maxAltAlleles, final int ploidy) { - final List AFCalculators = new LinkedList<>(); - - for ( final AFCalculatorImplementation calc : calcs ) { - if (calc.usableForParams(ploidy,maxAltAlleles)) - AFCalculators.add(calc.newInstance()); - else - throw new IllegalStateException("cannot use " + calc + " calculator instance with combination " + maxAltAlleles + " " + ploidy); - } - - return AFCalculators; - } - - @BeforeClass - public void before() { - AA1 = makePL(Arrays.asList(A, A), 0, 20, 20); - AB1 = makePL(Arrays.asList(A, C), 20, 0, 20); - BB1 = makePL(Arrays.asList(C, C), 20, 20, 0); - NON_INFORMATIVE1 = makePL(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), 0, 0, 0); - - AA2 = makePL(Arrays.asList(A, A), 0, 20, 20, 20, 20, 20); - AB2 = makePL(Arrays.asList(A, C), 20, 0, 20, 20, 20, 20); - BB2 = makePL(Arrays.asList(C, C), 20, 20, 0, 20, 20, 20); - AC2 = makePL(Arrays.asList(A, G), 20, 20, 20, 0, 20, 20); - BC2 = makePL(Arrays.asList(C, G), 20, 20, 20, 20, 0, 20); - CC2 = makePL(Arrays.asList(G, G), 20, 20, 20, 20, 20, 0); - NON_INFORMATIVE2 = makePL(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), 0, 0, 0, 0, 0, 0); - } - - protected static Genotype makePL(final List expectedGT, int ... pls) { - GenotypeBuilder gb = new GenotypeBuilder("sample" + sampleNameCounter++); - gb.alleles(expectedGT); - gb.PL(pls); - return gb.make(); - } - - - private static final int MAX_ALT_ALLELES = 2; - private static final int PLOIDY = 2; - - - @DataProvider(name = "wellFormedGLs") - public Object[][] createSimpleGLsData() { - final List biAllelicSamples = Arrays.asList(AA1, AB1, BB1); - final List triAllelicSamples = Arrays.asList(AA2, AB2, BB2, AC2, BC2, CC2); - - for ( final int nSamples : Arrays.asList(1, 2, 3, 4) ) { - List calcs = createAFCalculators(Arrays.asList(AFCalculatorImplementation.values()), MAX_ALT_ALLELES, PLOIDY); - - final int nPriorValues = 2*nSamples+1; - final double[] flatPriors = MathUtils.normalizeLog10(new double[nPriorValues]); // flat priors - final double[] humanPriors = new double[nPriorValues]; - GenotypingEngine.computeAlleleFrequencyPriors(nPriorValues - 1, humanPriors, 0.001, new ArrayList<>()); - - for ( final double[] priors : Arrays.asList(flatPriors, humanPriors) ) { // , humanPriors) ) { - for ( AFCalculator model : calcs ) { - final String priorName = priors == humanPriors ? "human" : "flat"; - - // bi-allelic - if ( INCLUDE_BIALLELIC && nSamples <= biAllelicSamples.size() ) - for ( List genotypes : Utils.makePermutations(biAllelicSamples, nSamples, true) ) - new GetGLsTest(model, 1, genotypes, priors, priorName); - - // tri-allelic - if ( INCLUDE_TRIALLELIC && ( ! priorName.equals("human") || Guillermo_FIXME ) && ! ( model instanceof OriginalDiploidExactAFCalculator) ) // || model != generalCalc ) ) - for ( List genotypes : Utils.makePermutations(triAllelicSamples, nSamples, true) ) - new GetGLsTest(model, 2, genotypes, priors, priorName); - } - } - } - - return GetGLsTest.getTests(GetGLsTest.class); - } - - @Test(enabled = true && ! DEBUG_ONLY, dataProvider = "wellFormedGLs") - public void testBiallelicGLs(GetGLsTest cfg) { - if ( cfg.getAlleles().size() == 2 ) - testResultSimple(cfg); - } - - @Test(enabled = true && ! DEBUG_ONLY, dataProvider = "wellFormedGLs") - public void testTriallelicGLs(GetGLsTest cfg) { - if ( cfg.getAlleles().size() > 2 ) - testResultSimple(cfg); - } - - private static class NonInformativeData { - final Genotype nonInformative; - final List called; - final int nAltAlleles; - - private NonInformativeData(List called, Genotype nonInformative, int nAltAlleles) { - this.called = called; - this.nonInformative = nonInformative; - this.nAltAlleles = nAltAlleles; - } - } - - @DataProvider(name = "GLsWithNonInformative") - public Object[][] makeGLsWithNonInformative() { - List tests = new ArrayList<>(); - - final List nonInformativeTests = new LinkedList<>(); - nonInformativeTests.add(new NonInformativeData(Arrays.asList(AB1), NON_INFORMATIVE1, 1)); - nonInformativeTests.add(new NonInformativeData(Arrays.asList(AB2), NON_INFORMATIVE2, 2)); - nonInformativeTests.add(new NonInformativeData(Arrays.asList(AB2, BC2), NON_INFORMATIVE2, 2)); - - for ( final int nNonInformative : Arrays.asList(1, 10, 100) ) { - for ( final NonInformativeData testData : nonInformativeTests ) { - final List samples = new ArrayList<>(); - samples.addAll(testData.called); - samples.addAll(Collections.nCopies(nNonInformative, testData.nonInformative)); - - final int nSamples = samples.size(); - List calcs = createAFCalculators(Arrays.asList(AFCalculatorImplementation.values()), MAX_ALT_ALLELES, PLOIDY); - - final double[] priors = MathUtils.normalizeLog10(new double[2*nSamples+1]); // flat priors - - for ( AFCalculator model : calcs ) { - if ( testData.nAltAlleles > 1 && model instanceof OriginalDiploidExactAFCalculator) - continue; - - final GetGLsTest onlyInformative = new GetGLsTest(model, testData.nAltAlleles, testData.called, priors, "flat"); - - for ( int rotation = 0; rotation < nSamples; rotation++ ) { - Collections.rotate(samples, 1); - final GetGLsTest withNonInformative = new GetGLsTest(model, testData.nAltAlleles, samples, priors, "flat"); - tests.add(new Object[]{onlyInformative, withNonInformative}); - } - } - } - } - - return tests.toArray(new Object[][]{}); - } - - @Test(enabled = true && ! DEBUG_ONLY, dataProvider = "GLsWithNonInformative", dependsOnMethods = {"testBiallelicGLs", "testTriallelicGLs"}) - public void testGLsWithNonInformative(GetGLsTest onlyInformative, GetGLsTest withNonInformative) { - final AFCalculationResult expected = onlyInformative.execute(); - final AFCalculationResult actual = withNonInformative.execute(); - - testResultSimple(withNonInformative); - compareAFCalcResults(actual, expected, onlyInformative.getCalc(), onlyInformative.numAltAlleles, true); - } - - private void testResultSimple(final GetGLsTest cfg) { - final AFCalculationResult refResultTracker = cfg.executeRef(); - final AFCalculationResult resultTracker = cfg.execute(); - try { - compareAFCalcResults(resultTracker, refResultTracker, cfg.getCalc(), cfg.numAltAlleles, true); - } catch (Throwable t) { - cfg.execute(); - throw new RuntimeException(t); - } - Assert.assertNotNull(resultTracker.getAllelesUsedInGenotyping()); - Assert.assertTrue(cfg.getAlleles().containsAll(resultTracker.getAllelesUsedInGenotyping()), "Result object has alleles not in our initial allele list"); - - for ( int altAlleleI = 0; altAlleleI < cfg.numAltAlleles; altAlleleI++ ) { - int expectedAlleleCount = cfg.getExpectedAltAC(altAlleleI); - int calcAC_MLE = resultTracker.getAlleleCountsOfMLE()[altAlleleI]; - - final Allele allele = cfg.getAlleles().get(altAlleleI+1); - Assert.assertEquals(calcAC_MLE, expectedAlleleCount, "MLE AC not equal to expected AC for allele " + allele); - } - } - - private void compareAFCalcResults(final AFCalculationResult actual, final AFCalculationResult expected, final AFCalculator calc, final int maxAltAlleles, final boolean onlyPosteriorsShouldBeEqual) { - // note we cannot really test the multi-allelic case because we actually meaningfully differ among the models here - final double TOLERANCE = maxAltAlleles > 1 ? 1000 : 0.1; // much tighter constraints on bi-allelic results - - if ( ! onlyPosteriorsShouldBeEqual ) { - Assert.assertEquals(actual.getLog10PriorOfAFEq0(), expected.getLog10PriorOfAFEq0(), TOLERANCE, "Priors AF == 0"); - Assert.assertEquals(actual.getLog10PriorOfAFGT0(), expected.getLog10PriorOfAFGT0(), TOLERANCE, "Priors AF > 0"); - Assert.assertEquals(actual.getLog10LikelihoodOfAFEq0(), expected.getLog10LikelihoodOfAFEq0(), TOLERANCE, "Likelihoods AF == 0"); - Assert.assertEquals(actual.getLog10LikelihoodOfAFGT0(), expected.getLog10LikelihoodOfAFGT0(), TOLERANCE, "Likelihoods AF > 0"); - } - Assert.assertEquals(actual.getLog10PosteriorOfAFEq0(), expected.getLog10PosteriorOfAFEq0(), TOLERANCE, "Posteriors AF == 0"); - Assert.assertEquals(actual.getLog10PosteriorOfAFGT0(), expected.getLog10PosteriorOfAFGT0(), TOLERANCE, "Posteriors AF > 0"); - Assert.assertTrue(Arrays.equals(actual.getAlleleCountsOfMLE(), expected.getAlleleCountsOfMLE()), "MLE ACs "); - Assert.assertEquals(actual.getAllelesUsedInGenotyping(), expected.getAllelesUsedInGenotyping(), "Alleles used in genotyping"); - - for ( final Allele a : expected.getAllelesUsedInGenotyping() ) { - if ( ! a.isReference() ) { - Assert.assertEquals(actual.getAlleleCountAtMLE(a), expected.getAlleleCountAtMLE(a), "MLE AC for allele " + a); - // TODO -- enable me when IndependentAllelesDiploidExactAFCalc works properly -// if ( ! ( calc instanceof GeneralPloidyExactAFCalc ) ) -// // TODO -- delete when general ploidy works properly with multi-allelics -// Assert.assertEquals(actual.isPolymorphic(a, 0.0), expected.isPolymorphic(a, 0.0), "isPolymorphic with thread 0.0 for allele " + a); - } - } - } - - @Test(enabled = true && ! DEBUG_ONLY, dataProvider = "Models") - public void testLargeGLs(final ExactAFCalculator calc) { - final Genotype BB = makePL(Arrays.asList(C, C), 20000000, 20000000, 0); - GetGLsTest cfg = new GetGLsTest(calc, 1, Arrays.asList(BB, BB, BB), FLAT_3SAMPLE_PRIORS, "flat"); - - final AFCalculationResult resultTracker = cfg.execute(); - - int calculatedAlleleCount = resultTracker.getAlleleCountsOfMLE()[0]; - Assert.assertEquals(calculatedAlleleCount, 6); - } - - @Test(enabled = true && ! DEBUG_ONLY, dataProvider = "Models") - public void testMismatchedGLs(final ExactAFCalculator calc) { - final Genotype AB = makePL(Arrays.asList(A, C), 2000, 0, 2000, 2000, 2000, 2000); - final Genotype AC = makePL(Arrays.asList(A, G), 100, 100, 100, 0, 100, 100); - GetGLsTest cfg = new GetGLsTest(calc, 2, Arrays.asList(AB, AC), FLAT_3SAMPLE_PRIORS, "flat"); - - final AFCalculationResult resultTracker = cfg.execute(); - - Assert.assertEquals(resultTracker.getAlleleCountsOfMLE()[0], 1); - Assert.assertEquals(resultTracker.getAlleleCountsOfMLE()[1], 1); - } - - // -------------------------------------------------------------------------------- - // - // Code to test that the pNonRef value is meaningful - // - // -------------------------------------------------------------------------------- - - private static class PNonRefData { - final Genotype g; - final double pNonRef, tolerance; - final boolean canScale; - final List badModels; - final VariantContext vc; - - private PNonRefData(final VariantContext vc, Genotype g, double pNonRef, double tolerance, final boolean canScale) { - this(vc, g, pNonRef, tolerance, canScale, Collections.emptyList()); - } - - private PNonRefData(final VariantContext vc, Genotype g, double pNonRef, double tolerance, final boolean canScale, final List badModels) { - this.g = g; - this.pNonRef = pNonRef; - this.tolerance = tolerance; - this.canScale = canScale; - this.badModels = badModels; - this.vc = vc; - } - - public PNonRefData scale(final int scaleFactor) { - if ( canScale ) { - final int[] PLs = new int[g.getPL().length]; - for ( int i = 0; i < PLs.length; i++ ) PLs[i] = g.getPL()[i] * ((int) Math.log10(scaleFactor)+1); - final Genotype scaledG = new GenotypeBuilder(g).PL(PLs).make(); - final double scaledPNonRef = pNonRef < 0.5 ? pNonRef / scaleFactor : 1 - ((1-pNonRef) / scaleFactor); - return new PNonRefData(vc, scaledG, scaledPNonRef, tolerance, true); - } else { - return this; - } - } - } - - @DataProvider(name = "PNonRef") - public Object[][] makePNonRefTest() { - List tests = new ArrayList<>(); - - final List AA = Arrays.asList(A, A); - final List AC = Arrays.asList(A, C); - final List CC = Arrays.asList(C, C); - final List AG = Arrays.asList(A, G); - final List GG = Arrays.asList(G, G); - final List CG = Arrays.asList(C, G); - - final VariantContext vc2 = new VariantContextBuilder("x","1", 1, 1, Arrays.asList(A, C)).make(); - final VariantContext vc3 = new VariantContextBuilder("x","1", 1, 1, Arrays.asList(A, C, G)).make(); - final AFCalculatorTestBuilder.PriorType priorType = AFCalculatorTestBuilder.PriorType.flat; - - final double TOLERANCE = 0.5; - - final List initialPNonRefData = Arrays.asList( - // bi-allelic sites - new PNonRefData(vc2, makePL(AA, 0, 10, 10), 0.1666667, TOLERANCE, true), - new PNonRefData(vc2, makePL(AA, 0, 1, 10), 0.4721084, TOLERANCE, false), - new PNonRefData(vc2, makePL(AA, 0, 1, 1), 0.6136992, TOLERANCE, false), - new PNonRefData(vc2, makePL(AA, 0, 5, 5), 0.3874259, TOLERANCE, false), - new PNonRefData(vc2, makePL(AC, 10, 0, 10), 0.9166667, TOLERANCE, true), - new PNonRefData(vc2, makePL(CC, 10, 10, 0), 0.9166667, TOLERANCE, true), - - // tri-allelic sites -- cannot scale because of the naivety of our scaling estimator - new PNonRefData(vc3, makePL(AA, 0, 10, 10, 10, 10, 10), 0.3023255813953489, TOLERANCE * 2, false), // more tolerance because constrained model is a bit inaccurate - new PNonRefData(vc3, makePL(AC, 10, 0, 10, 10, 10, 10), 0.9166667, TOLERANCE, false), - new PNonRefData(vc3, makePL(CC, 10, 10, 0, 10, 10, 10), 0.9166667, TOLERANCE, false), - new PNonRefData(vc3, makePL(AG, 10, 10, 10, 0, 10, 10), 0.9166667, TOLERANCE, false), - new PNonRefData(vc3, makePL(CG, 10, 10, 10, 10, 0, 10), 0.80, TOLERANCE, false), - new PNonRefData(vc3, makePL(GG, 10, 10, 10, 10, 10, 0), 0.9166667, TOLERANCE, false) - ); - - for ( AFCalculatorImplementation modelType : Arrays.asList(AFCalculatorImplementation.EXACT_REFERENCE, AFCalculatorImplementation.EXACT_INDEPENDENT) ) { - for ( int nNonInformative = 0; nNonInformative < 3; nNonInformative++ ) { - for ( final PNonRefData rootData : initialPNonRefData ) { - for ( int plScale = 1; plScale <= 100000; plScale *= 10 ) { - if ( ! rootData.badModels.contains(modelType) && (plScale == 1 || rootData.canScale) ) { - final PNonRefData data = rootData.scale(plScale); - tests.add(new Object[]{data.vc, modelType, priorType, Arrays.asList(data.g), data.pNonRef, data.tolerance, nNonInformative}); - } - } - } - } - } - - return tests.toArray(new Object[][]{}); - } - - @Test(enabled = true && ! DEBUG_ONLY, dataProvider = "PNonRef") - public void testPNonRef(final VariantContext vcRoot, - AFCalculatorImplementation modelType, - AFCalculatorTestBuilder.PriorType priorType, - final List genotypes, - final double expectedPNonRef, - final double tolerance, - final int nNonInformative) { - final AFCalculatorTestBuilder testBuilder - = new AFCalculatorTestBuilder(1, vcRoot.getNAlleles()-1, modelType, priorType); - - final VariantContextBuilder vcb = new VariantContextBuilder(vcRoot); - vcb.genotypes(genotypes); - - final AFCalculationResult resultTracker = testBuilder.makeModel().getLog10PNonRef(vcb.make(), PLOIDY, MAX_ALT_ALLELES, testBuilder.makePriors()); - - Assert.assertEquals(resultTracker.getLog10PosteriorOfAFGT0(), Math.log10(expectedPNonRef), tolerance, - "Actual pNonRef not within tolerance " + tolerance + " of expected"); - } - - @DataProvider(name = "PNonRefBiallelicSystematic") - public Object[][] makePNonRefBiallelicSystematic() { - List tests = new ArrayList<>(); - - final List bigNonRefPLs = Arrays.asList(0, 1, 2, 3, 4, 5, 10, 15, 20, 25, 50, 100, 1000); - final List> bigDiploidPLs = removeBadPLs(Utils.makePermutations(bigNonRefPLs, 3, true)); - - for ( AFCalculatorImplementation modelType : AFCalculatorImplementation.values() ) { - - if ( false ) { // for testing only - tests.add(new Object[]{modelType, toGenotypes(Arrays.asList(Arrays.asList(0, 100, 0)))}); - } else { - if ( modelType == AFCalculatorImplementation.EXACT_GENERAL_PLOIDY ) continue; // TODO -- GENERAL_PLOIDY DOESN'T WORK - - // test all combinations of PLs for 1 sample - for ( final List> PLsPerSample : Utils.makePermutations(bigDiploidPLs, 1, true) ) { - tests.add(new Object[]{modelType, toGenotypes(PLsPerSample)}); - } - - - final List> smallDiploidPLs = new LinkedList<>(); - for ( final int nonRefPL : Arrays.asList(5, 10, 20, 30) ) { - for ( int i = 0; i < 2; i++ ) { - List pls = new ArrayList<>(Collections.nCopies(3, nonRefPL)); - pls.set(i, 0); - smallDiploidPLs.add(pls); - } - } - - for ( final List> PLsPerSample : Utils.makePermutations(smallDiploidPLs, 5, false) ) { - tests.add(new Object[]{modelType, toGenotypes(PLsPerSample)}); - } - } - } - - return tests.toArray(new Object[][]{}); - } - - final List> removeBadPLs(List> listOfPLs) { - List> clean = new LinkedList<>(); - - for ( final List PLs : listOfPLs ) { - int x = PLs.get(0); - boolean bad = false; - for ( int pl1 : PLs ) - if ( pl1 > x ) - bad = true; - else - x = pl1; - if ( ! bad ) clean.add(PLs); - } - - return clean; - } - - private List toGenotypes(final List> PLsPerSample) { - final List nocall = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL); - final List genotypes = new ArrayList<>(PLsPerSample.size()); - - for ( final List PLs : PLsPerSample ) { - final int[] pls = ArrayUtils.toPrimitive(PLs.toArray(new Integer[3])); - final int min = NumberUtils.min(pls); - for ( int i = 0; i < pls.length; i++ ) pls[i] -= min; - genotypes.add(makePL(nocall, pls)); - } - - return genotypes; - } - - @Test(enabled = true && ! DEBUG_ONLY, dataProvider = "PNonRefBiallelicSystematic") - public void PNonRefBiallelicSystematic(AFCalculatorImplementation modelType, final List genotypes) { - //logger.warn("Running " + modelType + " with " + genotypes); - final AFCalculatorTestBuilder refBuilder = new AFCalculatorTestBuilder(genotypes.size(), 1, AFCalculatorImplementation.EXACT_REFERENCE, AFCalculatorTestBuilder.PriorType.human); - final AFCalculatorTestBuilder testBuilder = new AFCalculatorTestBuilder(genotypes.size(), 1, modelType, AFCalculatorTestBuilder.PriorType.human); - - final VariantContextBuilder vcb = new VariantContextBuilder("x", "1", 1, 1, Arrays.asList(A, C)); - vcb.genotypes(genotypes); - - final AFCalculationResult refResult = refBuilder.makeModel().getLog10PNonRef(vcb.make(), PLOIDY, MAX_ALT_ALLELES, testBuilder.makePriors()); - final AFCalculationResult testResult = testBuilder.makeModel().getLog10PNonRef(vcb.make(), PLOIDY, MAX_ALT_ALLELES, testBuilder.makePriors()); - - final double tolerance = 1e-3; - Assert.assertEquals(testResult.getLog10PosteriorOfAFGT0(), refResult.getLog10PosteriorOfAFGT0(), tolerance, - "Actual pNonRef not within tolerance " + tolerance + " of expected"); - Assert.assertEquals(testResult.getAlleleCountsOfMLE(), refResult.getAlleleCountsOfMLE(), - "Actual MLE " + Utils.join(",", testResult.getAlleleCountsOfMLE()) + " not equal to expected " + Utils.join(",", refResult.getAlleleCountsOfMLE())); - } - - // -------------------------------------------------------------------------------- - // - // Test priors - // - // -------------------------------------------------------------------------------- - - @DataProvider(name = "Models") - public Object[][] makeModels() { - List tests = new ArrayList<>(); - - for ( final AFCalculatorImplementation calc : AFCalculatorImplementation.values() ) { - if ( calc.usableForParams(2, 4) ) - tests.add(new Object[]{AFCalculatorImplementation.bestValue(2, AFCalculatorImplementation.UNBOUND_ALTERNATIVE_ALLELE_COUNT, null).newInstance()}); - } - - return tests.toArray(new Object[][]{}); - } - - - @Test(enabled = true, dataProvider = "Models") - public void testNoPrior(final AFCalculator model) { - for ( int REF_PL = 10; REF_PL <= 20; REF_PL += 10 ) { - final Genotype AB = makePL(Arrays.asList(A, C), REF_PL, 0, 10000); - - final double[] flatPriors = new double[]{0.0,0.0,0.0}; - final double[] noPriors = new double[3]; - // test that function computeAlleleFrequency correctly operates when the flat prior option is set - // computeAlleleFrequencyPriors takes linear priors - final ArrayList inputPrior = new ArrayList<>(); - inputPrior.add(1.0/3); - inputPrior.add(1.0/3); - GenotypingEngine.computeAlleleFrequencyPriors(2, noPriors, 0.0, inputPrior); - - GetGLsTest cfgFlatPrior = new GetGLsTest(model, 1, Arrays.asList(AB), flatPriors, "flatPrior"); - GetGLsTest cfgNoPrior = new GetGLsTest(model, 1, Arrays.asList(AB), flatPriors, "noPrior"); - final AFCalculationResult resultTrackerFlat = cfgFlatPrior.execute(); - final AFCalculationResult resultTrackerNoPrior = cfgNoPrior.execute(); - - final double pRefWithNoPrior = AB.getLikelihoods().getAsVector()[0]; - final double pHetWithNoPrior = AB.getLikelihoods().getAsVector()[1] - Math.log10(0.5); - final double nonRefPost = Math.pow(10, pHetWithNoPrior) / (Math.pow(10, pRefWithNoPrior) + Math.pow(10, pHetWithNoPrior)); - final double log10NonRefPost = Math.log10(nonRefPost); - - if ( ! Double.isInfinite(log10NonRefPost) ) { - // check that the no-prior and flat-prior constructions yield same result - Assert.assertEquals(resultTrackerFlat.getLog10PosteriorOfAFGT0(), resultTrackerNoPrior.getLog10PosteriorOfAFGT0()); - } - - } - } - - @Test(enabled = true && !DEBUG_ONLY, dataProvider = "Models") - public void testBiallelicPriors(final AFCalculator model) { - - for ( int REF_PL = 10; REF_PL <= 20; REF_PL += 10 ) { - final Genotype AB = makePL(Arrays.asList(A, C), REF_PL, 0, 10000); - - for ( int log10NonRefPrior = 1; log10NonRefPrior < 10*REF_PL; log10NonRefPrior += 1 ) { - final double refPrior = 1 - QualityUtils.qualToErrorProb(log10NonRefPrior); - final double nonRefPrior = (1-refPrior) / 2; - final double[] priors = MathUtils.normalizeLog10(MathUtils.toLog10(new double[]{refPrior, nonRefPrior, nonRefPrior})); - if ( ! Double.isInfinite(priors[1]) ) { - GetGLsTest cfg = new GetGLsTest(model, 1, Arrays.asList(AB), priors, "pNonRef" + log10NonRefPrior); - final AFCalculationResult resultTracker = cfg.execute(); - final int actualAC = resultTracker.getAlleleCountsOfMLE()[0]; - - final double pRefWithPrior = AB.getLikelihoods().getAsVector()[0] + priors[0]; - final double pHetWithPrior = AB.getLikelihoods().getAsVector()[1] + priors[1] - Math.log10(0.5); - final double nonRefPost = Math.pow(10, pHetWithPrior) / (Math.pow(10, pRefWithPrior) + Math.pow(10, pHetWithPrior)); - final double log10NonRefPost = Math.log10(nonRefPost); - - if ( ! Double.isInfinite(log10NonRefPost) ) - Assert.assertEquals(resultTracker.getLog10PosteriorOfAFGT0(), log10NonRefPost, 1e-2); - - if ( nonRefPost >= 0.9 ) - Assert.assertTrue(resultTracker.isPolymorphic(C, -1)); - - final int expectedMLEAC = 1; // the MLE is independent of the prior - Assert.assertEquals(actualAC, expectedMLEAC, - "actual AC with priors " + log10NonRefPrior + " not expected " - + expectedMLEAC + " priors " + Utils.join(",", priors)); - } - } - } - } - - @Test(enabled = true && ! DEBUG_ONLY, dataProvider = "Models") - - // -------------------------------------------------------------------------------- - // - // Test that polymorphic sites (bi and tri) are properly called - // - // -------------------------------------------------------------------------------- - - @DataProvider(name = "polyTestProvider") - public Object[][] makePolyTestProvider() { - List tests = new ArrayList<>(); - - // list of all high-quality models in the system - final List models = Arrays.asList( - AFCalculatorImplementation.DEFAULT, - AFCalculatorImplementation.EXACT_REFERENCE, - AFCalculatorImplementation.EXACT_INDEPENDENT); - - // note that we cannot use small PLs here or the thresholds are hard to set - for ( final int nonTypePLs : Arrays.asList(100, 1000) ) { - for ( final AFCalculatorImplementation model : models ) { - for ( final int allele1AC : Arrays.asList(0, 1, 2, 10, 100, 1000, 10000) ) { - for ( final int nSamples : Arrays.asList(1, 10, 100, 1000, 10000) ) { -// for ( final int nonTypePLs : Arrays.asList(10) ) { -// for ( final AFCalcFactory.Calculation model : models ) { -// for ( final int allele1AC : Arrays.asList(100) ) { -// for ( final int nSamples : Arrays.asList(1000) ) { - if ( nSamples < allele1AC ) continue; - - final double pPerSample = Math.pow(10, nonTypePLs / -10.0); - final double errorFreq = pPerSample * nSamples; - final boolean poly1 = allele1AC > errorFreq && (nonTypePLs * allele1AC) > 30; - - // bi-allelic tests - { - final AFCalculatorTestBuilder testBuilder - = new AFCalculatorTestBuilder(nSamples, 1, model, AFCalculatorTestBuilder.PriorType.human); - final List ACs = Arrays.asList(allele1AC); - tests.add(new Object[]{testBuilder, ACs, nonTypePLs, Arrays.asList(poly1)}); - } - - // multi-allelic tests - for ( final int allele2AC : Arrays.asList(0, 1, 2, 10, 20, 50) ) { - if ( nSamples < allele2AC || allele1AC + allele2AC > nSamples || nSamples > 100 || nSamples == 1) - continue; - - final AFCalculatorTestBuilder testBuilder - = new AFCalculatorTestBuilder(nSamples, 2, model, AFCalculatorTestBuilder.PriorType.human); - final List ACs = Arrays.asList(allele1AC, allele2AC); - final boolean poly2 = allele2AC > errorFreq && (nonTypePLs * allele2AC) > 90; - tests.add(new Object[]{testBuilder, ACs, nonTypePLs, Arrays.asList(poly1, poly2)}); - } - } - } - } - } - - return tests.toArray(new Object[][]{}); - } - - @Test(enabled = true && ! DEBUG_ONLY, dataProvider = "polyTestProvider") - public void testCallingGeneral(final AFCalculatorTestBuilder testBuilder, final List ACs, final int nonTypePL, final List expectedPoly ) { - testCalling(testBuilder, ACs, nonTypePL, expectedPoly); - } - - @DataProvider(name = "polyTestProviderLotsOfAlleles") - public Object[][] makepolyTestProviderLotsOfAlleles() { - List tests = new ArrayList<>(); - - // list of all high-quality models in the system - final List models = Arrays.asList(AFCalculatorImplementation.EXACT_INDEPENDENT); - - final List alleleCounts = Arrays.asList(0, 1, 2, 3, 4, 5, 10, 20); - - final int nonTypePLs = 1000; - final int nAlleles = 4; - for ( final AFCalculatorImplementation model : models ) { - for ( final List ACs : Utils.makePermutations(alleleCounts, nAlleles, true) ) { - final List isPoly = new ArrayList<>(ACs.size()); - for ( final int ac : ACs ) { - isPoly.add(ac > 0); - } - - final double acSum = ACs.stream().mapToDouble(i->i).sum(); - for ( final int nSamples : Arrays.asList(1, 10, 100) ) { - if ( nSamples < acSum ) { - continue; - } - final AFCalculatorTestBuilder testBuilder - = new AFCalculatorTestBuilder(nSamples, nAlleles, model, AFCalculatorTestBuilder.PriorType.human); - tests.add(new Object[]{testBuilder, ACs, nonTypePLs, isPoly}); - } - } - } - - return tests.toArray(new Object[][]{}); - } - - @Test(enabled = true && ! DEBUG_ONLY, dataProvider = "polyTestProviderLotsOfAlleles") - public void testCallingLotsOfAlleles(final AFCalculatorTestBuilder testBuilder, final List ACs, final int nonTypePL, final List expectedPoly ) { - testCalling(testBuilder, ACs, nonTypePL, expectedPoly); - } - - private void testCalling(final AFCalculatorTestBuilder testBuilder, final List ACs, final int nonTypePL, final List expectedPoly) { - final AFCalculator calc = testBuilder.makeModel(); - final double[] priors = testBuilder.makePriors(); - final VariantContext vc = testBuilder.makeACTest(ACs, 0, nonTypePL); - final AFCalculationResult result = calc.getLog10PNonRef(vc, PLOIDY, testBuilder.numAltAlleles, priors); - - boolean anyPoly = false; - for ( final boolean onePoly : expectedPoly ) anyPoly = anyPoly || onePoly; - - if ( anyPoly ) - Assert.assertTrue(result.getLog10PosteriorOfAFGT0() > -1); - - for ( int altI = 1; altI < result.getAllelesUsedInGenotyping().size(); altI++ ) { - final int i = altI - 1; - final Allele alt = result.getAllelesUsedInGenotyping().get(altI); - - // must be getCalledChrCount because we cannot ensure that the VC made has our desired ACs - Assert.assertEquals(result.getAlleleCountAtMLE(alt), vc.getCalledChrCount(alt)); - Assert.assertEquals(result.isPolymorphic(alt, -1), (boolean) expectedPoly.get(i), "isPolymorphic for allele " + alt + " " + result.getLog10PosteriorOfAFEq0ForAllele(alt)); - } - } -} \ No newline at end of file diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AFCalculatorImplementationUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AFCalculatorImplementationUnitTest.java deleted file mode 100644 index b54fb890eb2..00000000000 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AFCalculatorImplementationUnitTest.java +++ /dev/null @@ -1,68 +0,0 @@ -package org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc; - -import org.testng.Assert; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - -import static org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc.AFCalculatorImplementation.*; - -public final class AFCalculatorImplementationUnitTest { - - @DataProvider(name = "AFCalculatorImplementation") - public Iterator AFCalculatorImplementation() { - final List list = new ArrayList<>(); - list.add(new Object[]{2, 2, EXACT_ORIGINAL, EXACT_ORIGINAL}); - list.add(new Object[]{2, 2, EXACT_REFERENCE, EXACT_REFERENCE}); - list.add(new Object[]{2, 6, EXACT_REFERENCE, EXACT_REFERENCE}); - list.add(new Object[]{2, 10, EXACT_REFERENCE, EXACT_REFERENCE}); - list.add(new Object[]{2, 100, EXACT_REFERENCE, EXACT_REFERENCE}); - list.add(new Object[]{1, 6, EXACT_REFERENCE, EXACT_GENERAL_PLOIDY}); - list.add(new Object[]{1, 10, EXACT_REFERENCE, EXACT_GENERAL_PLOIDY}); - list.add(new Object[]{1, 100, EXACT_REFERENCE, EXACT_GENERAL_PLOIDY}); - - list.add(new Object[]{2, 3, EXACT_ORIGINAL, EXACT_INDEPENDENT}); - return list.iterator(); - } - - @Test(dataProvider = "AFCalculatorImplementation") - public void testPickBestOne(final int ploidy, final int ac, final AFCalculatorImplementation preferred, final AFCalculatorImplementation expected) { - Assert.assertEquals(expected, AFCalculatorImplementation.bestValue(ploidy, ac, preferred)); - } - - @DataProvider(name = "impls") - public Iterator impls() { - final List list = new ArrayList<>(); - list.add(new Object[]{EXACT_ORIGINAL, OriginalDiploidExactAFCalculator.class}); - list.add(new Object[]{EXACT_GENERAL_PLOIDY, GeneralPloidyExactAFCalculator.class}); - list.add(new Object[]{EXACT_INDEPENDENT, IndependentAllelesDiploidExactAFCalculator.class}); - list.add(new Object[]{EXACT_REFERENCE, ReferenceDiploidExactAFCalculator.class}); - return list.iterator(); - } - - @Test(dataProvider = "impls") - public void instance(final AFCalculatorImplementation impl, final Class clazz) { - Assert.assertEquals(impl.newInstance().getClass(), clazz); - } - - @Test - public void testFromCalcClass() throws Exception { - Assert.assertEquals(EXACT_INDEPENDENT, fromCalculatorClass(IndependentAllelesDiploidExactAFCalculator.class)); - Assert.assertEquals(EXACT_REFERENCE, fromCalculatorClass(ReferenceDiploidExactAFCalculator.class)); - Assert.assertEquals(EXACT_ORIGINAL, fromCalculatorClass(OriginalDiploidExactAFCalculator.class)); - Assert.assertEquals(EXACT_GENERAL_PLOIDY, fromCalculatorClass(GeneralPloidyExactAFCalculator.class)); - } - - @Test(expectedExceptions = IllegalArgumentException.class) - public void testFromCalcClassNull() throws Exception { - fromCalculatorClass(null); - } - - @Test(expectedExceptions = IllegalArgumentException.class) - public void testFromCalcClassAbstract() throws Exception { - fromCalculatorClass(ExactAFCalculator.class); - } -} diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AFCalculatorTestBuilder.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AFCalculatorTestBuilder.java deleted file mode 100644 index e5a54a82420..00000000000 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AFCalculatorTestBuilder.java +++ /dev/null @@ -1,208 +0,0 @@ -package org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc; - -import htsjdk.variant.variantcontext.*; -import org.apache.commons.lang.ArrayUtils; -import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypingEngine; -import org.broadinstitute.hellbender.utils.MathUtils; -import org.broadinstitute.hellbender.utils.Utils; -import org.broadinstitute.hellbender.utils.variant.HomoSapiensConstants; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; - -public final class AFCalculatorTestBuilder { - final static Allele A = Allele.create("A", true); - final static Allele C = Allele.create("C"); - final static Allele G = Allele.create("G"); - final static Allele T = Allele.create("T"); - final static Allele AA = Allele.create("AA"); - final static Allele AT = Allele.create("AT"); - final static Allele AG = Allele.create("AG"); - - static int sampleNameCounter = 0; - - final int nSamples; - final int numAltAlleles; - final AFCalculatorImplementation modelType; - final PriorType priorType; - - public AFCalculatorTestBuilder(final int nSamples, final int numAltAlleles, - final AFCalculatorImplementation modelType, final PriorType priorType) { - this.nSamples = nSamples; - this.numAltAlleles = numAltAlleles; - this.modelType = modelType; - this.priorType = priorType; - } - - @Override - public String toString() { - return String.format("AFCalcTestBuilder nSamples=%d nAlts=%d model=%s prior=%s", nSamples, numAltAlleles, modelType, priorType); - } - - public enum PriorType { - flat, - human - } - - public int getNumAltAlleles() { - return numAltAlleles; - } - - public int getnSamples() { - return nSamples; - } - - public AFCalculator makeModel() { - return createCalculator(modelType, nSamples, getNumAltAlleles(), HomoSapiensConstants.DEFAULT_PLOIDY); - } - - /** - * Create a new AFCalc - * - * @param implementation the calculation to use - * @param nSamples the number of samples we'll be using - * @param maxAltAlleles the max. alt alleles to consider for SNPs - * @param ploidy the sample ploidy. Must be consistent with the implementation - * - * @return an initialized AFCalc - */ - private static AFCalculator createCalculator(final AFCalculatorImplementation implementation, final int nSamples, final int maxAltAlleles, final int ploidy) { - if ( implementation == null ) { - throw new IllegalArgumentException("Calculation cannot be null"); - } - if ( nSamples < 0 ) { - throw new IllegalArgumentException("nSamples must be greater than zero " + nSamples); - } - if ( maxAltAlleles < 1 ) { - throw new IllegalArgumentException("maxAltAlleles must be greater than zero " + maxAltAlleles); - } - if ( ploidy < 1 ) { - throw new IllegalArgumentException("sample ploidy must be greater than zero " + ploidy); - } - - if ( ! implementation.usableForParams(ploidy, maxAltAlleles) ) { - throw new IllegalArgumentException("AFCalc " + implementation + " does not support requested ploidy " + ploidy); - } - - return implementation.newInstance(); - } - - public double[] makePriors() { - final int nPriorValues = 2*nSamples+1; - - switch ( priorType ) { - case flat: - return MathUtils.normalizeLog10(new double[nPriorValues]); // flat priors - - //TODO break dependency with human... avoid special reference to this species. - case human: - final double[] humanPriors = new double[nPriorValues]; - GenotypingEngine.computeAlleleFrequencyPriors(nPriorValues - 1, humanPriors, 0.001, new ArrayList<>()); - return humanPriors; - default: - throw new RuntimeException("Unexpected type " + priorType); - } - } - - public VariantContext makeACTest(final List ACs, final int nNonInformative, final int nonTypePL) { - return makeACTest(ArrayUtils.toPrimitive(ACs.toArray(new Integer[]{})), nNonInformative, nonTypePL); - } - - public VariantContext makeACTest(final int[] ACs, final int nNonInformative, final int nonTypePL) { - final int nChrom = nSamples * 2; - - final int[] nhet = new int[numAltAlleles]; - final int[] nhomvar = new int[numAltAlleles]; - - for ( int i = 0; i < ACs.length; i++ ) { - final double p = ACs[i] / (1.0 * nChrom); - nhomvar[i] = (int) Math.floor((nSamples - nNonInformative) * p * p); - nhet[i] = ACs[i] - 2 * nhomvar[i]; - - if ( nhet[i] < 0 ) - throw new IllegalStateException("Bug! nhet[i] < 0"); - } - - final long calcAC = MathUtils.sum(nhet) + 2 * MathUtils.sum(nhomvar); - if ( calcAC != MathUtils.sum(ACs) ) - throw new IllegalStateException("calculated AC " + calcAC + " not equal to desired AC " + Utils.join(",", ACs)); - - return makeACTest(nhet, nhomvar, nNonInformative, nonTypePL); - } - - public VariantContext makeACTest(final int[] nhet, final int[] nhomvar, final int nNonInformative, final int nonTypePL) { - List samples = new ArrayList<>(nSamples); - - for ( int altI = 0; altI < nhet.length; altI++ ) { - for ( int i = 0; i < nhet[altI]; i++ ) - samples.add(makePL(GenotypeType.HET, nonTypePL, altI+1)); - for ( int i = 0; i < nhomvar[altI]; i++ ) - samples.add(makePL(GenotypeType.HOM_VAR, nonTypePL, altI+1)); - } - - final Genotype nonInformative = makeNonInformative(); - samples.addAll(Collections.nCopies(nNonInformative, nonInformative)); - - final int nRef = Math.max((int) (nSamples - nNonInformative - MathUtils.sum(nhet) - MathUtils.sum(nhomvar)), 0); - samples.addAll(Collections.nCopies(nRef, makePL(GenotypeType.HOM_REF, nonTypePL, 0))); - - samples = samples.subList(0, nSamples); - - if ( samples.size() > nSamples ) - throw new IllegalStateException("too many samples"); - - VariantContextBuilder vcb = new VariantContextBuilder("x", "1", 1, 1, getAlleles()); - vcb.genotypes(samples); - return vcb.make(); - } - - public List getAlleles() { - return Arrays.asList(A, C, G, T, AA, AT, AG).subList(0, numAltAlleles+1); - } - - public List getAlleles(final GenotypeType type, final int altI) { - switch (type) { - case HOM_REF: return Arrays.asList(getAlleles().get(0), getAlleles().get(0)); - case HET: return Arrays.asList(getAlleles().get(0), getAlleles().get(altI)); - case HOM_VAR: return Arrays.asList(getAlleles().get(altI), getAlleles().get(altI)); - default: throw new IllegalArgumentException("Unexpected type " + type); - } - } - - public Genotype makePL(final List expectedGT, int ... pls) { - GenotypeBuilder gb = new GenotypeBuilder("sample" + sampleNameCounter++); - gb.alleles(expectedGT); - gb.PL(pls); - return gb.make(); - } - - private int numPLs() { - return GenotypeLikelihoods.numLikelihoods(numAltAlleles + 1, 2); - } - - public Genotype makeNonInformative() { - final int[] nonInformativePLs = new int[GenotypeLikelihoods.numLikelihoods(numAltAlleles, 2)]; - return makePL(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), nonInformativePLs); - } - - public Genotype makePL(final GenotypeType type, final int nonTypePL, final int altI) { - GenotypeBuilder gb = new GenotypeBuilder("sample" + sampleNameCounter++); - gb.alleles(getAlleles(type, altI)); - - final int[] pls = new int[numPLs()]; - Arrays.fill(pls, nonTypePL); - - int index = 0; - switch ( type ) { - case HOM_REF: index = GenotypeLikelihoods.calculatePLindex(0, 0); break; - case HET: index = GenotypeLikelihoods.calculatePLindex(0, altI); break; - case HOM_VAR: index = GenotypeLikelihoods.calculatePLindex(altI, altI); break; - } - pls[index] = 0; - gb.PL(pls); - - return gb.make(); - } -} \ No newline at end of file diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AFPriorProviderUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AFPriorProviderUnitTest.java deleted file mode 100644 index 2ead2c15643..00000000000 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AFPriorProviderUnitTest.java +++ /dev/null @@ -1,144 +0,0 @@ -package org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc; - -import org.broadinstitute.hellbender.utils.MathUtils; -import org.broadinstitute.hellbender.utils.Utils; -import org.broadinstitute.hellbender.GATKBaseTest; -import org.testng.Assert; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Random; - -public final class AFPriorProviderUnitTest extends GATKBaseTest { - - private static final double TOLERANCE = 0.0001; - - @Test(dataProvider="HeterozygosityProviderData") - public void testHeterozygosityProvider(final double h, final int useCount, final int minPloidy, final int maxPloidy) { - final double het = h / maxPloidy; - final Random rdn = Utils.getRandomGenerator(); - final int[] plodies = new int[useCount]; - for (int i = 0; i < useCount; i++) - plodies[i] = rdn.nextInt(maxPloidy - minPloidy + 1) + minPloidy; - - final AFPriorProvider provider = new HeterozygosityAFPriorProvider(het); - for (int i = 0; i < useCount; i++) { - final int ploidy = plodies[i]; - double[] priors = provider.forTotalPloidy(ploidy); - Assert.assertNotNull(priors); - Assert.assertEquals(priors.length, ploidy + 1); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(priors), 0, TOLERANCE); - for (int j = 0; j < priors.length; j++) { - Assert.assertTrue(!Double.isNaN(priors[j])); - Assert.assertTrue(priors[j] < 0); - if (j > 0) Assert.assertEquals(priors[j], Math.log10(het) - Math.log10(j), TOLERANCE); - } - } - } - - @Test(expectedExceptions = IllegalArgumentException.class) - public void testErrorNegativeHet() throws Exception { - new HeterozygosityAFPriorProvider(-0.1); - } - - @Test(expectedExceptions = IllegalArgumentException.class) - public void testErrorTooHighHet() throws Exception { - new HeterozygosityAFPriorProvider(1.1); - } - - @Test(expectedExceptions = IllegalArgumentException.class) - public void testErrorNaNHet() throws Exception { - new HeterozygosityAFPriorProvider(Double.NaN); - } - - @Test(expectedExceptions = IllegalArgumentException.class) - public void testErrorHeterozygosityTooHighForPloidy() throws Exception { - new HeterozygosityAFPriorProvider(0.999).buildPriors(2); - } - - @Test(dataProvider="CustomProviderData") - public void testCustomProvider(final int ploidy) { - final double[] priors = new double[ploidy]; - final Random rdn = Utils.getRandomGenerator(); - double remaining = 1; - final List priorsList = new ArrayList<>(); - for (int i = 0; i < priors.length; i++) { - priors[i] = remaining * rdn.nextDouble() * (.1 / ploidy ); - remaining -= priors[i]; - priorsList.add(priors[i]); - } - - final AFPriorProvider provider = new CustomAFPriorProvider(priorsList); - - final double[] providedPriors = provider.forTotalPloidy(ploidy); - Assert.assertNotNull(providedPriors); - Assert.assertEquals(providedPriors.length, priors.length + 1); - for (int i = 0; i < priors.length; i++) - Assert.assertEquals(providedPriors[i + 1], Math.log10(priors[i]), TOLERANCE); - Assert.assertEquals(MathUtils.approximateLog10SumLog10(providedPriors), 0, TOLERANCE); - } - - - private double[] hets = { 0.00001, 0.001, 0.1, 0.5, 0.99, 0.999 }; - private int[] useCounts = { 10, 100, 1000 }; - - private int[] ploidy = { 1 , 2, 3, 10, 100, 200, 500}; - - @DataProvider(name="CustomProviderData") - public Object[][] customProviderData() { - final Object[][] result = new Object[ploidy.length][]; - for (int i = 0; i < result.length; i++) - result[i] = new Object[] { ploidy[i] }; - return result; - } - - @DataProvider(name="HeterozygosityProviderData") - public Object[][] heterozygosityProviderData() { - final Object[][] result = new Object[hets.length * useCounts.length * ((ploidy.length + 1) * (ploidy.length) / 2)][]; - int idx = 0; - for (double h : hets) - for (int sc : useCounts) - for (int i = 0; i < ploidy.length; i++) - for (int j = i; j < ploidy.length; j++) - result[idx++] = new Object[] { h, sc, ploidy[i], ploidy[j]}; - return result; - } - - @Test(expectedExceptions = IllegalArgumentException.class) - public void testCustomErrorPloidy() throws Exception { - new CustomAFPriorProvider(Arrays.asList(0.5)).forTotalPloidy(-1); - } - - @Test(expectedExceptions = IllegalArgumentException.class) - public void testCustomErrorNull() throws Exception { - new CustomAFPriorProvider(null); - } - - @Test(expectedExceptions = IllegalArgumentException.class) - public void testCustomHetError() throws Exception { - new CustomAFPriorProvider(Arrays.asList(-1.0)); - } - - @Test(expectedExceptions = IllegalArgumentException.class) - public void testCustomNaNError() throws Exception { - new CustomAFPriorProvider(Arrays.asList(Double.NaN)); - } - - @Test(expectedExceptions = IllegalArgumentException.class) - public void testCustomHetTooHighError() throws Exception { - new CustomAFPriorProvider(Arrays.asList(0.5, 0.6)); - } - - @Test - public void testCustomPriors() throws Exception { - final List PRIORS = Arrays.asList(0.5, 0.4); - double[] priors = new CustomAFPriorProvider(PRIORS).buildPriors(17); - for ( int i = 0; i < priors.length; i++ ) { - final double value = i == 0 ? 1 - PRIORS.stream().mapToDouble(Double::doubleValue).sum() : PRIORS.get(i-1); - Assert.assertEquals(priors[i], Math.log10(value), TOLERANCE); - } - } -} diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AlleleFrequencyCalculatorUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AlleleFrequencyCalculatorUnitTest.java index 04d6791103a..55dec5c5061 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AlleleFrequencyCalculatorUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AlleleFrequencyCalculatorUnitTest.java @@ -63,11 +63,11 @@ public void testSymmetries() { for (final Pair pair : switchBWithCPairs) { final VariantContext vc1 = pair.getLeft(); final VariantContext vc2 = pair.getRight(); - final AFCalculationResult result1 = afCalc.getLog10PNonRef(vc1); - final AFCalculationResult result2 = afCalc.getLog10PNonRef(vc2); - Assert.assertEquals(result1.getLog10PosteriorOfAFEq0(), result2.getLog10PosteriorOfAFEq0(), EPS); - Assert.assertEquals(result1.getLog10PosteriorOfAFEq0ForAllele(B), result2.getLog10PosteriorOfAFEq0ForAllele(C), EPS); - Assert.assertEquals(result1.getLog10PosteriorOfAFEq0ForAllele(C), result2.getLog10PosteriorOfAFEq0ForAllele(B), EPS); + final AFCalculationResult result1 = afCalc.calculate(vc1); + final AFCalculationResult result2 = afCalc.calculate(vc2); + Assert.assertEquals(result1.log10ProbOnlyRefAlleleExists(), result2.log10ProbOnlyRefAlleleExists(), EPS); + Assert.assertEquals(result1.getLog10PosteriorOfAlleleAbsent(B), result2.getLog10PosteriorOfAlleleAbsent(C), EPS); + Assert.assertEquals(result1.getLog10PosteriorOfAlleleAbsent(C), result2.getLog10PosteriorOfAlleleAbsent(B), EPS); } } @@ -98,7 +98,7 @@ public void testMLECounts() { for (final Pair pair : vcWithExpectedCounts) { final VariantContext vc = pair.getLeft(); final int[] expected = pair.getRight(); - final int[] actual = afCalc.getLog10PNonRef(vc).getAlleleCountsOfMLE(); + final int[] actual = afCalc.calculate(vc).getAlleleCountsOfMLE(); Assert.assertEquals(actual, expected); } } @@ -122,7 +122,7 @@ public void testManySamplesWithLowConfidence() { final List vcsWithDifferentNumbersOfSamples = IntStream.range(1, 11) .mapToObj(n -> makeVC(alleles, Collections.nCopies(n, AB))).collect(Collectors.toList()); - final int[] counts = vcsWithDifferentNumbersOfSamples.stream().mapToInt(vc -> afCalc.getLog10PNonRef(vc).getAlleleCountAtMLE(B)).toArray(); + final int[] counts = vcsWithDifferentNumbersOfSamples.stream().mapToInt(vc -> afCalc.calculate(vc).getAlleleCountAtMLE(B)).toArray(); Assert.assertEquals(counts[0],0); // one sample Assert.assertEquals(counts[1],0); // two samples Assert.assertEquals(counts[4],2); // five samples @@ -143,11 +143,11 @@ public void testManyVeryConfidentSamples() { for (final int numSamples : new int[] {100, 1000}) { final VariantContext vc = makeVC(alleles, Collections.nCopies(numSamples, AC)); - final AFCalculationResult result = afCalc.getLog10PNonRef(vc); + final AFCalculationResult result = afCalc.calculate(vc); Assert.assertEquals(result.getAlleleCountAtMLE(B), 0); Assert.assertEquals(result.getAlleleCountAtMLE(C), numSamples); - Assert.assertEquals(result.getLog10LikelihoodOfAFEq0(), result.getLog10PosteriorOfAFEq0ForAllele(C), numSamples * 0.01); + Assert.assertEquals(result.log10ProbOnlyRefAlleleExists(), result.getLog10PosteriorOfAlleleAbsent(C), numSamples * 0.01); // with a large number of samples all with the AC genotype, the calculator will learn that the frequencies of the A and C alleles // are 1/2, while the frequency of the B allele is 0. Thus the only genotypes with appreciable priors are AA, AC, and CC @@ -159,7 +159,7 @@ public void testManyVeryConfidentSamples() { // thus the probability that N identical samples don't have the C allele is (x/2)^N, and the log-10 probability of this is // N * [log_10(1/2) - PL/10] final double expectedLog10ProbabilityOfNoCAllele = numSamples * (MathUtils.LOG10_ONE_HALF - EXTREMELY_CONFIDENT_PL / 10); - Assert.assertEquals(result.getLog10PosteriorOfAFEq0ForAllele(C), expectedLog10ProbabilityOfNoCAllele, numSamples * 0.01); + Assert.assertEquals(result.getLog10PosteriorOfAlleleAbsent(C), expectedLog10ProbabilityOfNoCAllele, numSamples * 0.01); } } @@ -183,7 +183,7 @@ public void testApproximateMultiplicativeConfidence() { // since we maintain a flat allele frequency distribution, the probability of being ref as each successive sample is added // is multiplied by the probability of any one. Thus we get an arithmetic series in log space final double[] log10PRefs = vcsWithDifferentNumbersOfSamples.stream() - .mapToDouble(vc -> afCalc.getLog10PNonRef(vc).getLog10LikelihoodOfAFEq0()).toArray(); + .mapToDouble(vc -> afCalc.calculate(vc).log10ProbOnlyRefAlleleExists()).toArray(); for (int n = 0; n < 9; n++) { Assert.assertEquals(log10PRefs[n+1] - log10PRefs[n], log10PRefs[0], 0.01); @@ -200,7 +200,7 @@ public void testManyRefSamplesDontKillGoodVariant() { final List genotypeList = new ArrayList<>(Collections.nCopies(numRef, AA)); genotypeList.add(AB); final VariantContext vc = makeVC(alleles, genotypeList); - final double log10PRef = afCalc.getLog10PNonRef(vc).getLog10LikelihoodOfAFEq0(); + final double log10PRef = afCalc.calculate(vc).log10ProbOnlyRefAlleleExists(); Assert.assertTrue(log10PRef < (-EXTREMELY_CONFIDENT_PL/10) + Math.log10(numRef) + 1); } } @@ -226,7 +226,7 @@ public void testSpanningDeletionIsNotConsideredVariant() { // first test the span del genotype alone. Its best PL containing the SNP is 100, so we expect a variant probability // of about 10^(-100/10) -- a bit less due to the prior bias in favor of the reference final VariantContext vcSpanDel = makeVC(alleles, Arrays.asList(spanDel)); - final double log10PVariant = afCalc.getLog10PNonRef(vcSpanDel).getLog10LikelihoodOfAFGT0(); + final double log10PVariant = afCalc.calculate(vcSpanDel).log10ProbVariantPresent(); Assert.assertTrue(log10PVariant < - 10); // now test a realistic situation of two samples, one with a low-quality SNP and one with the spanning deletion @@ -235,9 +235,9 @@ public void testSpanningDeletionIsNotConsideredVariant() { // Furthermore, to be precise it should be really behave almost identically to a hom ref *haploid* sample, // so we check that, too final VariantContext vcLowQualSnp = makeVC(alleles, Arrays.asList(lowQualSNP)); - final double lowQualSNPQualScore = afCalc.getLog10PNonRef(vcLowQualSnp).getLog10LikelihoodOfAFGT0(); + final double lowQualSNPQualScore = afCalc.calculate(vcLowQualSnp).log10ProbVariantPresent(); final VariantContext vcBoth = makeVC(alleles, Arrays.asList(lowQualSNP, spanDel)); - final double bothQualScore = afCalc.getLog10PNonRef(vcBoth).getLog10LikelihoodOfAFGT0(); + final double bothQualScore = afCalc.calculate(vcBoth).log10ProbVariantPresent(); Assert.assertEquals(lowQualSNPQualScore, bothQualScore, 0.1); Assert.assertTrue(bothQualScore < lowQualSNPQualScore); @@ -245,7 +245,7 @@ public void testSpanningDeletionIsNotConsideredVariant() { final Genotype haploidRef = makeGenotype(1, haploidRefPls); final VariantContext vcLowQualSnpAndHaploidRef = makeVC(alleles, Arrays.asList(lowQualSNP, haploidRef)); - final double lowQualSNPAndHaplpidRefQualScore = afCalc.getLog10PNonRef(vcLowQualSnpAndHaploidRef).getLog10LikelihoodOfAFGT0(); + final double lowQualSNPAndHaplpidRefQualScore = afCalc.calculate(vcLowQualSnpAndHaploidRef).log10ProbVariantPresent(); Assert.assertEquals(bothQualScore, lowQualSNPAndHaplpidRefQualScore, 1e-5); // as a final test, we check that getting rid of the spanning deletion allele, in the sense that @@ -256,7 +256,7 @@ public void testSpanningDeletionIsNotConsideredVariant() { final int[] snpPlsWithoutSpanDel = new int[] {10, 0, 40}; final VariantContext vcNoSpanDel = makeVC(Arrays.asList(A,B), Arrays.asList(makeGenotype(ploidy, snpPlsWithoutSpanDel), makeGenotype(1, haploidRefPlsWithoutSpanDel))); - final double noSpanDelQualScore = afCalc.getLog10PNonRef(vcNoSpanDel).getLog10LikelihoodOfAFGT0(); + final double noSpanDelQualScore = afCalc.calculate(vcNoSpanDel).log10ProbVariantPresent(); Assert.assertEquals(bothQualScore, noSpanDelQualScore, 1e-6); } @@ -274,8 +274,8 @@ public void testPresenceOfUnlikelySpanningDeletionDoesntAffectResults() { final Genotype genotypeWithSpanDel = makeGenotype(ploidy, plsWithSpanDel); final VariantContext vcWithoutSpanDel = makeVC(allelesWithoutSpanDel, Arrays.asList(genotypeWithoutSpanDel)); final VariantContext vcWithSpanDel = makeVC(allelesWithSpanDel, Arrays.asList(genotypeWithSpanDel)); - final double log10PVariantWithoutSpanDel = afCalc.getLog10PNonRef(vcWithoutSpanDel).getLog10LikelihoodOfAFGT0(); - final double log10PVariantWithSpanDel = afCalc.getLog10PNonRef(vcWithSpanDel).getLog10LikelihoodOfAFGT0(); + final double log10PVariantWithoutSpanDel = afCalc.calculate(vcWithoutSpanDel).log10ProbVariantPresent(); + final double log10PVariantWithSpanDel = afCalc.calculate(vcWithSpanDel).log10ProbVariantPresent(); Assert.assertEquals(log10PVariantWithoutSpanDel, log10PVariantWithSpanDel, 0.0001); } @@ -289,7 +289,7 @@ public void testSpanningDeletionWithVeryUnlikelyAltAllele() { // make PLs that don't support the alt allele final List pls = Arrays.asList(new int[] {0,10000,10000,10000,10000, 10000,10000,10000,10000,10000,10000,10000,10000,10000,10000}); final VariantContext vc = makeVC(alleles, pls.stream().map(pl -> makeGenotype(ploidy, pl)).collect(Collectors.toList())); - final double log10PVariant = afCalc.getLog10PNonRef(vc).getLog10LikelihoodOfAFGT0(); + final double log10PVariant = afCalc.calculate(vc).log10ProbVariantPresent(); } // make PLs that correspond to an obvious call i.e. one PL is relatively big and the rest are zero @@ -304,7 +304,7 @@ private static int[] PLsForObviousCall(final int ploidy, final int numAlleles, f private static Genotype genotypeWithObviousCall(final int ploidy, final int numAlleles, final int[] alleles, final int PL) { return makeGenotype(ploidy, PLsForObviousCall(ploidy, numAlleles, alleles, PL)); } - //note the call is irrelevant to the AFCalculator, which only looks at PLs + //note the call is irrelevant to the AlleleFrequencyCalculator, which only looks at PLs private static Genotype makeGenotype(final int ploidy, int ... pls) { return new GenotypeBuilder("sample" + sampleNameCounter++).alleles(Collections.nCopies(ploidy, Allele.NO_CALL)).PL(pls).make(); } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/ExactACSetUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/ExactACSetUnitTest.java deleted file mode 100644 index 75543c2c113..00000000000 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/ExactACSetUnitTest.java +++ /dev/null @@ -1,27 +0,0 @@ -package org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc; - -import org.broadinstitute.hellbender.utils.MathUtils; -import org.testng.Assert; -import org.testng.annotations.Test; - -public final class ExactACSetUnitTest { - @Test - public void test1() throws Exception { - final int[] c1 = {1,2,3}; - final int size1 = 7; - ExactACcounts ec1= new ExactACcounts(c1); - ExactACset acs1 = new ExactACset(size1, ec1); - Assert.assertEquals(acs1.getACsum(), MathUtils.sum(c1)); - Assert.assertEquals(acs1.getACcounts(), ec1); - Assert.assertEquals(acs1.getLog10Likelihoods().length, size1); - - final int[] c2 = {1,2,3}; - final int size2 = 7; - ExactACcounts ec2= new ExactACcounts(c2); - ExactACset acs2 = new ExactACset(size2, ec2); - Assert.assertEquals(acs1, acs2); - Assert.assertEquals(acs1.hashCode(), acs2.hashCode()); - Assert.assertEquals(acs2, acs1); - - } -} diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/ExactACcountsUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/ExactACcountsUnitTest.java deleted file mode 100644 index 4024a3d2e23..00000000000 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/ExactACcountsUnitTest.java +++ /dev/null @@ -1,50 +0,0 @@ -package org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc; - -import org.testng.Assert; -import org.testng.annotations.Test; - -public final class ExactACcountsUnitTest { - @Test - public void test1() throws Exception { - final int[] c1 = {1, 2, 3}; - final int[] c1equal = {1, 2, 3}; - final int[] c2 = {5,6,7}; - final ExactACcounts ec1 = new ExactACcounts(c1); - final ExactACcounts ec1Same = new ExactACcounts(c1);//same array - final ExactACcounts ec1Equal = new ExactACcounts(c1equal); - final ExactACcounts ec2 = new ExactACcounts(c2); - - Assert.assertEquals(ec1, ec1); - Assert.assertEquals(ec1Equal, ec1Equal); - Assert.assertEquals(ec2, ec2); - - Assert.assertTrue(ec1.getCounts() == ec1Same.getCounts()); - Assert.assertFalse(ec1.getCounts() == ec1Equal.getCounts()); - Assert.assertEquals(ec1.getCounts(), ec1Equal.getCounts()); - - Assert.assertEquals(ec1, ec1Equal); - Assert.assertEquals(ec1Equal, ec1); - Assert.assertFalse(ec1 == ec1Equal); - Assert.assertEquals(ec1.hashCode(), ec1Equal.hashCode()); - - Assert.assertNotEquals(ec1, ec2); - Assert.assertNotEquals(ec2, ec1); - Assert.assertNotEquals(ec1Equal, ec2); - Assert.assertNotEquals(ec2, ec1Equal); - - Assert.assertNotNull(ec1.toString()); - Assert.assertNotNull(ec1Equal.toString()); - Assert.assertNotNull(ec2.toString()); - } - - @Test(expectedExceptions = IllegalArgumentException.class) - public void testEmpty() throws Exception { - new ExactACcounts(new int[0]); - } - - @Test(expectedExceptions = IllegalArgumentException.class) - public void testNull() throws Exception { - new ExactACcounts(null); - } - -} diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/FixedAFCalculatorProviderUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/FixedAFCalculatorProviderUnitTest.java deleted file mode 100644 index 03ffb7715d0..00000000000 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/FixedAFCalculatorProviderUnitTest.java +++ /dev/null @@ -1,138 +0,0 @@ -package org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc; - -import htsjdk.variant.variantcontext.*; -import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeCalculationArgumentCollection; -import org.broadinstitute.hellbender.tools.walkers.genotyper.StandardCallerArgumentCollection; -import org.testng.Assert; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -import java.util.Arrays; -import java.util.List; - -/** - * Tests {@link org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.FixedAFCalculatorProvider} - * - * @author Valentin Ruano-Rubio <valentin@broadinstitute.org> - */ -public final class FixedAFCalculatorProviderUnitTest { - - @Test(dataProvider="nonThreadSafeConstructorsData") - public void testNonThreadSafeConstructors(final int ploidy, final int maxAltAlleles, final AFCalculatorImplementation preferred) { - final GenotypeCalculationArgumentCollection args = new GenotypeCalculationArgumentCollection(); - args.MAX_ALTERNATE_ALLELES = maxAltAlleles; - args.samplePloidy = ploidy; - final StandardCallerArgumentCollection callerArgs = new StandardCallerArgumentCollection(); - if (preferred != null ) { - callerArgs.requestedAlleleFrequencyCalculationModel = preferred; - } - callerArgs.genotypeArgs = args; - final FixedAFCalculatorProvider providerCallerArgs = new FixedAFCalculatorProvider(callerArgs, true); - final FixedAFCalculatorProvider providerCallerArgsNoVerify = new FixedAFCalculatorProvider(callerArgs, false); - final FixedAFCalculatorProvider providerGenotypingArgs = new FixedAFCalculatorProvider(args, true); - - Assert.assertNotNull(providerCallerArgs.getInstance(ploidy, maxAltAlleles)); - Assert.assertNotNull(providerCallerArgsNoVerify.getInstance(ploidy, maxAltAlleles)); - Assert.assertTrue(AFCalculatorImplementation.fromCalculatorClass(providerCallerArgs.getInstance(ploidy, maxAltAlleles).getClass()).usableForParams(ploidy, maxAltAlleles)); - Assert.assertTrue(AFCalculatorImplementation.fromCalculatorClass(providerCallerArgsNoVerify.getInstance(ploidy, maxAltAlleles).getClass()).usableForParams(ploidy, maxAltAlleles)); - Assert.assertNotNull(providerGenotypingArgs.getInstance(ploidy, maxAltAlleles)); - Assert.assertTrue(AFCalculatorImplementation.fromCalculatorClass(providerGenotypingArgs.getInstance(ploidy, maxAltAlleles).getClass()).usableForParams(ploidy, maxAltAlleles)); - - final VariantContext vc= new VariantContextBuilder().chr("chr1").alleles("A", "T").make(); - Assert.assertEquals(providerCallerArgs.getInstance(vc, ploidy, maxAltAlleles), providerCallerArgs.getInstance(ploidy, maxAltAlleles));//equal because there's no samples in vc - Assert.assertEquals(providerCallerArgsNoVerify.getInstance(vc, ploidy, maxAltAlleles), providerCallerArgsNoVerify.getInstance(ploidy, maxAltAlleles));//equal because there's no samples in vc - - if (preferred != null && preferred.usableForParams(ploidy,maxAltAlleles)) { - Assert.assertEquals(AFCalculatorImplementation.fromCalculatorClass(providerCallerArgs.getInstance(ploidy, maxAltAlleles).getClass()), preferred); - Assert.assertEquals(AFCalculatorImplementation.fromCalculatorClass(providerCallerArgsNoVerify.getInstance(ploidy, maxAltAlleles).getClass()), preferred); - } - } - - - private static final int[] PLOIDIES = { 1,2,3,4,10 }; - private static final int[] MAX_ALT_ALLELES = { 1,2,3,4,10}; - - @DataProvider(name="nonThreadSafeConstructorsData") - public Object[][] nonThreadSafeConstructorsData() { - final Object[][] result = new Object[PLOIDIES.length * MAX_ALT_ALLELES.length * (AFCalculatorImplementation.values().length + 1)][]; - int idx = 0; - for (int i = 0; i < PLOIDIES.length; i++) { - for (int j = 0; j < MAX_ALT_ALLELES.length; j++) { - result[idx++] = new Object[] { PLOIDIES[i], MAX_ALT_ALLELES[j], null }; - for (final AFCalculatorImplementation impl : AFCalculatorImplementation.values()) { - result[idx++] = new Object[]{PLOIDIES[i], MAX_ALT_ALLELES[j], impl}; - } - } - } - return result; - } - - @Test(expectedExceptions = IllegalArgumentException.class) - public void testPloidyError() throws Exception { - final GenotypeCalculationArgumentCollection args = new GenotypeCalculationArgumentCollection(); - args.samplePloidy = -2; - final StandardCallerArgumentCollection callerArgs = new StandardCallerArgumentCollection(); - callerArgs.genotypeArgs = args; - - new FixedAFCalculatorProvider(callerArgs, false); - } - - @Test(expectedExceptions = IllegalArgumentException.class) - public void testMaxAltAllelesError() throws Exception { - final GenotypeCalculationArgumentCollection args = new GenotypeCalculationArgumentCollection(); - args.samplePloidy = 2; - args.MAX_ALTERNATE_ALLELES = -2; - final StandardCallerArgumentCollection callerArgs = new StandardCallerArgumentCollection(); - callerArgs.genotypeArgs = args; - - new FixedAFCalculatorProvider(callerArgs, false); - } - - @Test(expectedExceptions = IllegalArgumentException.class) - public void testTesInstanceInvalidPloidyError() throws Exception { - final GenotypeCalculationArgumentCollection args = new GenotypeCalculationArgumentCollection(); - args.samplePloidy = 2; - args.MAX_ALTERNATE_ALLELES = 2; - final StandardCallerArgumentCollection callerArgs = new StandardCallerArgumentCollection(); - callerArgs.genotypeArgs = args; - - final FixedAFCalculatorProvider p = new FixedAFCalculatorProvider(callerArgs, true); - p.getInstance(5, 2); - } - - @Test - public void testTesInstanceInvalidPloidyError_noVerify() throws Exception { - final GenotypeCalculationArgumentCollection args = new GenotypeCalculationArgumentCollection(); - args.samplePloidy = 2; - args.MAX_ALTERNATE_ALLELES = 2; - final StandardCallerArgumentCollection callerArgs = new StandardCallerArgumentCollection(); - callerArgs.genotypeArgs = args; - - final FixedAFCalculatorProvider p = new FixedAFCalculatorProvider(callerArgs, false); - p.getInstance(5, 2); //this passes - no validation - } - - @Test(expectedExceptions = IllegalArgumentException.class) - public void testTesInstanceInvalidAlleleNumberError() throws Exception { - final GenotypeCalculationArgumentCollection args = new GenotypeCalculationArgumentCollection(); - args.samplePloidy = 2; - args.MAX_ALTERNATE_ALLELES = 2; - final StandardCallerArgumentCollection callerArgs = new StandardCallerArgumentCollection(); - callerArgs.genotypeArgs = args; - - final FixedAFCalculatorProvider p = new FixedAFCalculatorProvider(callerArgs, true); - p.getInstance(2, 18); - } - - @Test - public void testTesInstanceInvalidAlleleNumberError_noVerify() throws Exception { - final GenotypeCalculationArgumentCollection args = new GenotypeCalculationArgumentCollection(); - args.samplePloidy = 2; - args.MAX_ALTERNATE_ALLELES = 2; - final StandardCallerArgumentCollection callerArgs = new StandardCallerArgumentCollection(); - callerArgs.genotypeArgs = args; - - final FixedAFCalculatorProvider p = new FixedAFCalculatorProvider(callerArgs, false); - p.getInstance(2, 18); //this passes - no validation - } -} diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/GeneralPloidyAFCalculationModelUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/GeneralPloidyAFCalculationModelUnitTest.java deleted file mode 100644 index f5b562494da..00000000000 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/GeneralPloidyAFCalculationModelUnitTest.java +++ /dev/null @@ -1,143 +0,0 @@ -package org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc; - -import htsjdk.variant.variantcontext.*; -import org.broadinstitute.hellbender.GATKBaseTest; -import org.testng.Assert; -import org.testng.annotations.BeforeClass; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -import java.util.Arrays; - -public final class GeneralPloidyAFCalculationModelUnitTest extends GATKBaseTest { - - static double[] AA1, AB1, BB1; - static double[] AA2, AB2, AC2, BB2, BC2, CC2; - static double[] A4_1, B4_1, C4_1, D4_1, E4_1,F4_1; - static double[] A4_400, B4_310, C4_220, D4_130, E4_121, F4_013; - static final int numSamples = 4; - static final int samplePloidy = 4; // = 2*samplesPerPool - - @BeforeClass - public void before() { - // legacy diploid cases - AA1 = new double[]{-5.0, -20.0, -20.0}; - AB1 = new double[]{-20.0, 0.0, -20.0}; - BB1 = new double[]{-20.0, -20.0, 0.0}; - - // diploid, nAlleles = 3. Ordering is [2 0 0] [1 1 0] [0 2 0] [1 0 1] [0 1 1] [0 0 2], ie AA AB BB AC BC CC - AA2 = new double[]{0.0, -20.0, -20.0, -20.0, -20.0, -20.0}; - AB2 = new double[]{-20.0, 0.0, -20.0, -20.0, -20.0, -20.0}; - AC2 = new double[]{-20.0, -20.0, -20.0, 0.0, -20.0, -20.0}; - BB2 = new double[]{-20.0, -20.0, 0.0, -20.0, -20.0, -20.0}; - BC2 = new double[]{-20.0, -20.0, -20.0, -20.0, 0.0, -20.0}; - CC2 = new double[]{-20.0, -20.0, -20.0, -20.0, -20.0, 0.0}; - - // pool (i.e. polyploid cases) - // NAlleles = 2, ploidy=4 - // ordering is [4 0] [3 1] [2 2 ] [1 3] [0 4] - - A4_1 = new double[]{-3.0, -20.0, -20.0, -20.0, -20.0}; - B4_1 = new double[]{-20.0, 0.0, -20.0, -20.0, -20.0}; - C4_1 = new double[]{-20.0, -20.0, 0.0, -20.0, -20.0}; - D4_1 = new double[]{-20.0, -20.0, 0.0, 0.0, -20.0}; - E4_1 = new double[]{-20.0, -20.0, 0.0, 0.0, -20.0}; - F4_1 = new double[]{-20.0, -20.0, -20.0, -20.0, 0.0}; - - // NAlleles = 3, ploidy = 4 - // ordering is [4 0 0] [3 1 0] [2 2 0] [1 3 0] [0 4 0] [3 0 1] [2 1 1] [1 2 1] [0 3 1] [2 0 2] [1 1 2] [0 2 2] [1 0 3] [0 1 3] [0 0 4] - A4_400 = new double[]{0.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0}; - B4_310 = new double[]{-20.0, 0.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0}; - C4_220 = new double[]{-20.0, -20.0, 0.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0}; - D4_130 = new double[]{-20.0, -20.0, -20.0, 0.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0}; - E4_121 = new double[]{-20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, 0.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0}; - F4_013 = new double[]{-20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, 0.0, -20.0}; - - } - - private class GetGLsTest extends TestDataProvider { - GenotypesContext GLs; - int numAltAlleles; - String name; - int ploidy; - private GetGLsTest(String name, int numAltAlleles, int ploidy, Genotype... arg) { - super(GetGLsTest.class, name); - GLs = GenotypesContext.create(arg); - this.name = name; - this.numAltAlleles = numAltAlleles; - this.ploidy = ploidy; - } - - public String toString() { - return String.format("%s input=%s", super.toString(), GLs); - } - } - - private static Genotype createGenotype(String name, double[] gls, int ploidy) { - Allele[] alleles = new Allele[ploidy]; - - for (int i=0; i < ploidy; i++) - alleles[i] = Allele.NO_CALL; - - return new GenotypeBuilder(name, Arrays.asList(alleles)).PL(gls).make(); - } - - @DataProvider(name = "getGLs") - public Object[][] createGLsData() { - - // bi-allelic diploid case - new GetGLsTest("B0", 1, 2, createGenotype("AA1", AA1,2), createGenotype("AA2", AA1,2), createGenotype("AA3", AA1,2)); - new GetGLsTest("B1", 1, 2, createGenotype("AA1", AA1,2), createGenotype("AA2", AA1,2), createGenotype("AB", AB1,2)); - new GetGLsTest("B2", 1, 2, createGenotype("AA1", AA1,2), createGenotype("BB", BB1,2), createGenotype("AA2", AA1,2)); - new GetGLsTest("B3a", 1, 2, createGenotype("AB", AB1,2), createGenotype("AA", AA1,2), createGenotype("BB", BB1,2)); - new GetGLsTest("B3b", 1, 2, createGenotype("AB1", AB1,2), createGenotype("AB2", AB1,2), createGenotype("AB3", AB1,2)); - new GetGLsTest("B4", 1, 2, createGenotype("BB1", BB1,2), createGenotype("BB2", BB1,2), createGenotype("AA", AA1,2)); - new GetGLsTest("B5", 1, 2, createGenotype("BB1", BB1,2), createGenotype("AB", AB1,2), createGenotype("BB2", BB1,2)); - new GetGLsTest("B6", 1, 2, createGenotype("BB1", BB1,2), createGenotype("BB2", BB1,2), createGenotype("BB3", BB1,2)); - - // tri-allelic diploid case - new GetGLsTest("B1C0", 2, 2, createGenotype("AA1", AA2,2), createGenotype("AA2", AA2,2), createGenotype("AB", AB2,2)); - new GetGLsTest("B0C1", 2, 2, createGenotype("AA1", AA2,2), createGenotype("AA2", AA2,2), createGenotype("AC", AC2,2)); - new GetGLsTest("B1C1a", 2,2, createGenotype("AA", AA2,2), createGenotype("AB", AB2,2), createGenotype("AC", AC2,2)); - new GetGLsTest("B1C1b", 2,2, createGenotype("AA1", AA2,2), createGenotype("AA2", AA2,2), createGenotype("BC", BC2,2)); - new GetGLsTest("B2C1", 2, 2, createGenotype("AB1", AB2,2), createGenotype("AB2", AB2,2), createGenotype("AC", AC2,2)); - new GetGLsTest("B3C2a", 2, 2, createGenotype("AB", AB2,2), createGenotype("BC1", BC2,2), createGenotype("BC2", BC2,2)); - new GetGLsTest("B3C2b", 2, 2, createGenotype("AB", AB2,2), createGenotype("BB", BB2,2), createGenotype("CC", CC2,2)); - - // bi-allelic pool case - new GetGLsTest("P0", 1, samplePloidy, createGenotype("A4_1", A4_1,samplePloidy), createGenotype("A4_1", A4_1,samplePloidy), createGenotype("A4_1", A4_1,samplePloidy)); - new GetGLsTest("P1", 1, samplePloidy,createGenotype("A4_1", A4_1,samplePloidy), createGenotype("B4_1", B4_1,samplePloidy), createGenotype("A4_1", A4_1,samplePloidy)); - new GetGLsTest("P2a", 1,samplePloidy, createGenotype("A4_1", A4_1,samplePloidy), createGenotype("C4_1", C4_1,samplePloidy), createGenotype("A4_1", A4_1,samplePloidy)); - new GetGLsTest("P2b", 1, samplePloidy,createGenotype("B4_1", B4_1,samplePloidy), createGenotype("B4_1", B4_1,samplePloidy), createGenotype("A4_1", A4_1,samplePloidy)); - new GetGLsTest("P4", 1, samplePloidy,createGenotype("A4_1", A4_1,samplePloidy), createGenotype("C4_1", C4_1,samplePloidy), createGenotype("C4_1", C4_1,samplePloidy)); - new GetGLsTest("P6", 1, samplePloidy,createGenotype("A4_1", A4_1,samplePloidy), createGenotype("F4_1", F4_1,samplePloidy), createGenotype("C4_1", C4_1,samplePloidy)); - new GetGLsTest("P8", 1, samplePloidy,createGenotype("A4_1", A4_1,samplePloidy), createGenotype("F4_1", F4_1,samplePloidy), createGenotype("F4_1", F4_1,samplePloidy)); - - // multi-allelic pool case - new GetGLsTest("B1C3", 2, samplePloidy,createGenotype("A4_400", A4_400,samplePloidy), createGenotype("A4_400", A4_400,samplePloidy), createGenotype("F4_013", F4_013,samplePloidy)); - new GetGLsTest("B3C9", 2, samplePloidy,createGenotype("F4_013", F4_013,samplePloidy), createGenotype("F4_013", F4_013,samplePloidy), createGenotype("F4_013", F4_013,samplePloidy)); - new GetGLsTest("B6C0", 2, samplePloidy,createGenotype("B4_310", B4_310,samplePloidy), createGenotype("C4_220", C4_220,samplePloidy), createGenotype("D4_130", D4_130,samplePloidy)); - new GetGLsTest("B6C4", 2, samplePloidy,createGenotype("D4_130", D4_130,samplePloidy), createGenotype("E4_121", E4_121,samplePloidy), createGenotype("F4_013", F4_013,samplePloidy)); - new GetGLsTest("B4C7", 2, samplePloidy,createGenotype("F4_013", F4_013,samplePloidy), createGenotype("E4_121", E4_121,samplePloidy), createGenotype("F4_013", F4_013,samplePloidy)); - new GetGLsTest("B2C3", 2, samplePloidy,createGenotype("A4_400", A4_400,samplePloidy), createGenotype("F4_013", F4_013,samplePloidy), createGenotype("B4_310", B4_310,samplePloidy)); - - return GetGLsTest.getTests(GetGLsTest.class); - } - - @Test(dataProvider = "getGLs") - public void testGLs(GetGLsTest cfg) { - final int len = GenotypeLikelihoods.numLikelihoods(1 + cfg.numAltAlleles, cfg.ploidy * cfg.GLs.size()); - double[] priors = new double[len]; // flat priors - - final GeneralPloidyExactAFCalculator calc = new GeneralPloidyExactAFCalculator(); - calc.combineSinglePools(cfg.GLs, cfg.ploidy,cfg.numAltAlleles + 1, priors); - int nameIndex = 1; - - for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) { - int expectedAlleleCount = Integer.valueOf(cfg.name.substring(nameIndex, nameIndex + 1)); - int calculatedAlleleCount = calc.getAltAlleleCountOfMAP(allele); - Assert.assertEquals(calculatedAlleleCount, expectedAlleleCount); - } - } - -} diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/GeneralPloidyFailOverAFCalculatorProviderUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/GeneralPloidyFailOverAFCalculatorProviderUnitTest.java deleted file mode 100644 index 1a013467b6f..00000000000 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/GeneralPloidyFailOverAFCalculatorProviderUnitTest.java +++ /dev/null @@ -1,58 +0,0 @@ -package org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc; - -import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeCalculationArgumentCollection; -import org.broadinstitute.hellbender.GATKBaseTest; -import org.testng.Assert; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -/** - * Tests {@link GeneralPloidyFailOverAFCalculatorProvider} - * - * @author Valentin Ruano-Rubio <valentin@broadinstitute.org> - */ -public class GeneralPloidyFailOverAFCalculatorProviderUnitTest extends GATKBaseTest { - - private final static int[] PLOIDIES = new int[] { AFCalculatorImplementation.UNBOUND_PLOIDY,1,2,3,4,10 }; - private final static int[] MAX_ALT_ALLELES = new int[] { AFCalculatorImplementation.UNBOUND_ALTERNATIVE_ALLELE_COUNT,1,2,3,4,10}; - - @Test(dataProvider= "getMatrixOfPlodiesAndMaxAltAlleles") - public void testAFCalculatorProvider(final int ploidy, final int maxAltAlleles) { - final GenotypeCalculationArgumentCollection args = new GenotypeCalculationArgumentCollection(); - args.MAX_ALTERNATE_ALLELES = maxAltAlleles; - args.samplePloidy = ploidy; - - final GeneralPloidyFailOverAFCalculatorProvider provider = new GeneralPloidyFailOverAFCalculatorProvider(args); - - final AFCalculator calculator = provider.getInstance(ploidy,maxAltAlleles); - Assert.assertNotNull(calculator); - final AFCalculatorImplementation implementation = AFCalculatorImplementation.fromCalculatorClass(calculator.getClass()); - Assert.assertTrue(implementation.usableForParams(ploidy,maxAltAlleles)); - for (final int PLOIDY : PLOIDIES) { - for (final int MAX_ALT_ALLELE : MAX_ALT_ALLELES) { - if (implementation.usableForParams(PLOIDY, MAX_ALT_ALLELE)) { - Assert.assertSame(provider.getInstance(PLOIDY, MAX_ALT_ALLELE), calculator); - } else { - final AFCalculator failOver = provider.getInstance(PLOIDY, MAX_ALT_ALLELE); - Assert.assertNotNull(failOver); - final AFCalculatorImplementation failOverImplementation = AFCalculatorImplementation.fromCalculatorClass(failOver.getClass()); - Assert.assertTrue(failOverImplementation.usableForParams(PLOIDY, MAX_ALT_ALLELE)); - Assert.assertEquals(failOverImplementation, AFCalculatorImplementation.EXACT_GENERAL_PLOIDY); - } - } - } - } - - @DataProvider(name="getMatrixOfPlodiesAndMaxAltAlleles") - public Object[][] getMatrixOfPlodiesAndMaxAltAlleles() { - final Object[][] result = new Object[PLOIDIES.length * MAX_ALT_ALLELES.length][]; - int idx = 0; - for (final int PLOIDY : PLOIDIES) { - for (final int MAX_ALT_ALLELE : MAX_ALT_ALLELES) { - result[idx++] = new Object[]{PLOIDY, MAX_ALT_ALLELE}; - } - } - return result; - } - -} diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/GetGLsTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/GetGLsTest.java deleted file mode 100644 index ff5cea51238..00000000000 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/GetGLsTest.java +++ /dev/null @@ -1,76 +0,0 @@ -package org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc; - -import htsjdk.variant.variantcontext.*; -import org.broadinstitute.hellbender.GATKBaseTest; -import org.broadinstitute.hellbender.utils.variant.HomoSapiensConstants; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; - -public final class GetGLsTest extends GATKBaseTest.TestDataProvider { - GenotypesContext GLs; - int numAltAlleles; - final AFCalculator calc; - final int[] expectedACs; - final double[] priors; - final String priorName; - - GetGLsTest(final AFCalculator calc, int numAltAlleles, List arg, final double[] priors, final String priorName) { - super(GetGLsTest.class); - GLs = GenotypesContext.create(new ArrayList<>(arg)); - this.numAltAlleles = numAltAlleles; - this.calc = calc; - this.priors = priors; - this.priorName = priorName; - - expectedACs = new int[numAltAlleles+1]; - for ( int alleleI = 0; alleleI < expectedACs.length; alleleI++ ) { - expectedACs[alleleI] = 0; - final Allele allele = getAlleles().get(alleleI); - for ( Genotype g : arg ) { - expectedACs[alleleI] += Collections.frequency(g.getAlleles(), allele); - } - } - } - - public AFCalculationResult execute() { - return getCalc().getLog10PNonRef(getVC(), HomoSapiensConstants.DEFAULT_PLOIDY, numAltAlleles, getPriors()); - } - - public AFCalculationResult executeRef() { - final AFCalculator ref = AFCalculatorImplementation.EXACT_REFERENCE.newInstance(); - return ref.getLog10PNonRef(getVC(), HomoSapiensConstants.DEFAULT_PLOIDY, numAltAlleles, getPriors()); - } - - public double[] getPriors() { - return priors; - } - - public AFCalculator getCalc() { - return calc; - } - - public VariantContext getVC() { - VariantContextBuilder builder = new VariantContextBuilder("test", "1", 1, 1, getAlleles()); - builder.genotypes(GLs); - return builder.make(); - } - - public List getAlleles() { - return Arrays.asList(Allele.create("A", true), - Allele.create("C"), - Allele.create("G"), - Allele.create("T")).subList(0, numAltAlleles+1); - } - - public int getExpectedAltAC(final int alleleI) { - return expectedACs[alleleI+1]; - } - - public String toString() { - return String.format("%s model=%s prior=%s input=%s", super.toString(), calc.getClass().getSimpleName(), - priorName, GLs.size() > 5 ? String.format("%d samples", GLs.size()) : GLs); - } -} diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalculatorUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalculatorUnitTest.java deleted file mode 100644 index 16678f3c8c2..00000000000 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalculatorUnitTest.java +++ /dev/null @@ -1,181 +0,0 @@ -package org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc; - -import htsjdk.variant.variantcontext.Allele; -import htsjdk.variant.variantcontext.Genotype; -import htsjdk.variant.variantcontext.VariantContext; -import htsjdk.variant.variantcontext.VariantContextBuilder; -import org.broadinstitute.hellbender.utils.MathUtils; -import org.broadinstitute.hellbender.utils.Utils; -import org.broadinstitute.hellbender.GATKBaseTest; -import org.testng.Assert; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -import java.util.*; - - -// SEE private/R/pls.R if you want the truth output for these tests -public final class IndependentAllelesDiploidExactAFCalculatorUnitTest extends GATKBaseTest { - @DataProvider(name = "TestCombineGLs") - public Object[][] makeTestCombineGLs() { - List tests = new ArrayList<>(); - - tests.add(new Object[]{1, 1, makePL( 0, 10, 20), makePL( 0, 10, 20)}); - tests.add(new Object[]{1, 1, makePL(10, 0, 20), makePL(10, 0, 20)}); - tests.add(new Object[]{1, 1, makePL(20, 10, 0), makePL(20, 10, 0)}); - - // AA AB BB AC BC CC => AA AB+BC CC - tests.add(new Object[]{1, 2, makePL( 0, 10, 20, 30, 40, 50), makePL(0, 10, 20)}); - tests.add(new Object[]{2, 2, makePL( 0, 10, 20, 30, 40, 50), makePL(0, 30, 50)}); - - tests.add(new Object[]{1, 2, makePL( 0, 10, 10, 10, 10, 10), makePL(0, 8, 11)}); - tests.add(new Object[]{2, 2, makePL( 0, 10, 10, 10, 10, 10), makePL(0, 8, 11)}); - - tests.add(new Object[]{1, 2, makePL( 0, 1, 2, 3, 4, 5), makePL(0, 2, 5)}); - tests.add(new Object[]{2, 2, makePL( 0, 1, 2, 3, 4, 5), makePL(0, 4, 9)}); - - tests.add(new Object[]{1, 2, makePL( 0, 50, 50, 50, 50, 50), makePL( 0, 47, 50)}); - tests.add(new Object[]{2, 2, makePL( 0, 50, 50, 50, 50, 50), makePL( 0, 47, 50)}); - - tests.add(new Object[]{1, 2, makePL( 50, 0, 50, 50, 50, 50), makePL(45, 0, 50)}); - tests.add(new Object[]{2, 2, makePL( 50, 0, 50, 50, 50, 50), makePL( 0, 47, 50)}); - - tests.add(new Object[]{1, 2, makePL( 50, 50, 0, 50, 50, 50), makePL(45, 47, 0)}); - tests.add(new Object[]{2, 2, makePL( 50, 50, 0, 50, 50, 50), makePL( 0, 47, 50)}); - - tests.add(new Object[]{1, 2, makePL( 50, 50, 50, 0, 50, 50), makePL(0, 47, 50)}); - tests.add(new Object[]{2, 2, makePL( 50, 50, 50, 0, 50, 50), makePL(45, 0, 50)}); - - tests.add(new Object[]{1, 2, makePL( 50, 50, 50, 50, 0, 50), makePL(45, 0, 50)}); - tests.add(new Object[]{2, 2, makePL( 50, 50, 50, 50, 0, 50), makePL(45, 0, 50)}); - - tests.add(new Object[]{1, 2, makePL( 50, 50, 50, 50, 50, 0), makePL(0, 47, 50)}); - tests.add(new Object[]{2, 2, makePL( 50, 50, 50, 50, 50, 0), makePL(45, 47, 0)}); - - return tests.toArray(new Object[][]{}); - } - - private Genotype makePL(final int ... PLs) { - return AFCalculationUnitTest.makePL(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), PLs); - } - - @Test(enabled = true, dataProvider = "TestCombineGLs") - public void testCombineGLsPrecise(final int altIndex, final int nAlts, final Genotype testg, final Genotype expected) { - final Genotype combined = IndependentAllelesDiploidExactAFCalculator.combineGLsPrecise(testg, altIndex, nAlts); - - Assert.assertEquals(combined.getPL(), expected.getPL(), - "Combined PLs " + Utils.join(",", combined.getPL()) + " != expected " + Utils.join(",", expected.getPL())); - } - - @Test(enabled = true, dataProvider = "TestCombineGLs") - public void testCombinePrecise(final int altIndex, final int nAlts, final Genotype testg, final Genotype expected) { - final Genotype combined = IndependentAllelesDiploidExactAFCalculator.combineGLsPrecise(testg, altIndex, nAlts); - - Assert.assertEquals(combined.getPL(), expected.getPL(), - "Combined PLs " + Utils.join(",", combined.getPL()) + " != expected " + Utils.join(",", expected.getPL())); - } - - static Allele A = Allele.create("A", true); - static Allele C = Allele.create("C"); - static Allele G = Allele.create("G"); - - @DataProvider(name = "TestMakeAlleleConditionalContexts") - public Object[][] makeTestMakeAlleleConditionalContexts() { - List tests = new ArrayList<>(); - - final VariantContextBuilder root = new VariantContextBuilder("x", "1", 1, 1, Arrays.asList(A)); - final VariantContextBuilder vcAC = new VariantContextBuilder(root).alleles(Arrays.asList(A, C)); - final VariantContextBuilder vcAG = new VariantContextBuilder(root).alleles(Arrays.asList(A, G)); - final VariantContextBuilder vcACG = new VariantContextBuilder(root).alleles(Arrays.asList(A, C, G)); - final VariantContextBuilder vcAGC = new VariantContextBuilder(root).alleles(Arrays.asList(A, G, C)); - - final Genotype gACG = makePL( 0, 1, 2, 3, 4, 5); - final Genotype gAGC = makePL( 0, 4, 5, 1, 3, 2); - final Genotype gACcombined = makePL(0, 2, 5); - final Genotype gACcombined2 = makePL(0, 1, 4); - final Genotype gAGcombined = makePL(0, 4, 9); - - // biallelic - tests.add(new Object[]{vcAC.genotypes(gACcombined).make(), Arrays.asList(vcAC.genotypes(gACcombined).make())}); - - // tri-allelic - tests.add(new Object[]{vcACG.genotypes(gACG).make(), Arrays.asList(vcAC.genotypes(gACcombined).make(), vcAG.genotypes(gAGcombined).make())}); - tests.add(new Object[]{vcAGC.genotypes(gAGC).make(), Arrays.asList(vcAG.genotypes(gAGcombined).make(), vcAC.genotypes(gACcombined2).make())}); - - return tests.toArray(new Object[][]{}); - } - - - @Test(enabled = true, dataProvider = "TestMakeAlleleConditionalContexts") - public void testMakeAlleleConditionalContexts(final VariantContext vc, final List expectedVCs) { - final List biAllelicVCs = IndependentAllelesDiploidExactAFCalculator.makeAlleleConditionalContexts(vc); - - Assert.assertEquals(biAllelicVCs.size(), expectedVCs.size()); - - for ( int i = 0; i < biAllelicVCs.size(); i++ ) { - final VariantContext actual = biAllelicVCs.get(i); - final VariantContext expected = expectedVCs.get(i); - Assert.assertEquals(actual.getAlleles(), expected.getAlleles()); - - for ( int j = 0; j < actual.getNSamples(); j++ ) - Assert.assertEquals(actual.getGenotype(j).getPL(), expected.getGenotype(j).getPL(), - "expected PLs " + Utils.join(",", expected.getGenotype(j).getPL()) + " not equal to actual " + Utils.join(",", actual.getGenotype(j).getPL())); - } - } - - - @DataProvider(name = "ThetaNTests") - public Object[][] makeThetaNTests() { - List tests = new ArrayList<>(); - - final List log10LAlleles = Arrays.asList(0.0, -1.0, -2.0, -3.0, -4.0); - - for ( final double log10pRef : Arrays.asList(-1, -2, -3) ) { - for ( final int ploidy : Arrays.asList(1, 2, 3, 4) ) { - for ( List permutations : Utils.makePermutations(log10LAlleles, ploidy, true)) { - tests.add(new Object[]{permutations, Math.pow(10, log10pRef)}); - } - } - } - - return tests.toArray(new Object[][]{}); - } - - @Test(dataProvider = "ThetaNTests") - public void testThetaNTests(final List log10LAlleles, final double pRef) { - // biallelic - final double[] rawPriors = MathUtils.toLog10(new double[]{pRef, 1 - pRef}); - - final double log10pNonRef = Math.log10(1 - pRef); - - final List originalPriors = new LinkedList<>(); - final List pNonRefN = new LinkedList<>(); - for ( int i = 0; i < log10LAlleles.size(); i++ ) { - final double log10LAllele1 = log10LAlleles.get(i); - final double[] L1 = MathUtils.normalizeLog10(new double[]{log10LAllele1, 0.0}); - final AFCalculationResult result1 = new AFCalculationResult(new int[]{1}, Arrays.asList(A, C), L1, rawPriors, Collections.singletonMap(C, -10000.0)); - originalPriors.add(result1); - pNonRefN.add(log10pNonRef*(i+1)); - } - - final List thetaNPriors = IndependentAllelesDiploidExactAFCalculator.applyMultiAllelicPriors(originalPriors); - - double prevPosterior = 0.0; - for ( int i = 0; i < log10LAlleles.size(); i++ ) { - final AFCalculationResult thetaN = thetaNPriors.get(i); - AFCalculationResult orig = null; - for ( final AFCalculationResult x : originalPriors ) - if ( x.getAllelesUsedInGenotyping().equals(thetaN.getAllelesUsedInGenotyping())) - orig = x; - - Assert.assertNotNull(orig, "couldn't find original AFCalc"); - - Assert.assertEquals(orig.getLog10PriorOfAFGT0(), log10pNonRef, 1e-6); - Assert.assertEquals(thetaN.getLog10PriorOfAFGT0(), pNonRefN.get(i), 1e-6); - - Assert.assertTrue(orig.getLog10PosteriorOfAFGT0() <= prevPosterior, "AFCalc results should be sorted but " + prevPosterior + " is > original posterior " + orig.getLog10PosteriorOfAFGT0()); - prevPosterior = orig.getLog10PosteriorOfAFGT0(); - } - } - -} diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/PosteriorProbabilitiesUtilsUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/PosteriorProbabilitiesUtilsUnitTest.java index ee9165efd1b..aae4e9baa69 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/PosteriorProbabilitiesUtilsUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/PosteriorProbabilitiesUtilsUnitTest.java @@ -2,16 +2,17 @@ import htsjdk.variant.variantcontext.*; import htsjdk.variant.vcf.VCFConstants; -import org.apache.commons.math3.util.MathArrays; +import org.broadinstitute.hellbender.GATKBaseTest; import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.utils.MathUtils; -import org.broadinstitute.hellbender.GATKBaseTest; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; import org.broadinstitute.hellbender.utils.variant.HomoSapiensConstants; import org.testng.Assert; import org.testng.annotations.Test; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; @SuppressWarnings("unchecked") public final class PosteriorProbabilitiesUtilsUnitTest extends GATKBaseTest { @@ -98,17 +99,6 @@ private VariantContext makeDeletionVC(final String source, final List al return new VariantContextBuilder(source, "1", start, stop, alleles).genotypes(Arrays.asList(genotypes)).unfiltered().make(); } - private VariantContext makeHomRefBlock(final String source, final Allele refAllele, final Genotype... genotypes) { - final int start = 10; - final int stop = start; - final Map infoMap = new HashMap<>(); - infoMap.put(VCFConstants.END_KEY,100); - final List alleles = new ArrayList<>(); - alleles.add(refAllele); - alleles.add(Allele.NON_REF_ALLELE); - return new VariantContextBuilder(source, "1", start, stop, alleles).genotypes(Arrays.asList(genotypes)).unfiltered().attributes(infoMap).make(); - } - @Test public void testCalculatePosteriorNoExternalData() { final int numSamples = 12; @@ -541,24 +531,6 @@ public void testCalculatePosterior() { } } - private boolean arraysApproxEqual(final double[] a, final double[] b, final double tol) { - if ( a.length != b.length ) { - return false; - } - - for ( int idx = 0; idx < a.length; idx++ ) { - if ( Math.abs(a[idx]-b[idx]) > tol ) { - return false; - } - } - - return true; - } - - private String errMsgArray(final double[] a, final double[] b) { - return String.format("Expected %s, Observed %s", Arrays.toString(a), Arrays.toString(b)); - } - @Test public void testPosteriorMultiAllelic() { // AA AB BB AC BC CC AD BD CD DD @@ -591,12 +563,12 @@ public void testPosteriorMultiAllelic() { -3.8952723, -1.5445506, -3.4951749, -2.6115263, -2.9125508, -0.5618292, -2.2135895, -1.5316722}; - Assert.assertTrue(arraysApproxEqual(expecPrior5, PosteriorProbabilitiesUtils.getDirichletPrior(counts_five,2,false),1e-5),errMsgArray(expecPrior5, PosteriorProbabilitiesUtils.getDirichletPrior(counts_five,2,false))); + assertEqualsDoubleArray(expecPrior5, PosteriorProbabilitiesUtils.getDirichletPrior(counts_five,2,false),1e-5); - Assert.assertTrue(arraysApproxEqual(expected_one,post1,1e-6),errMsgArray(expected_one,post1)); - Assert.assertTrue(arraysApproxEqual(expected_two,post2,1e-5),errMsgArray(expected_two,post2)); - Assert.assertTrue(arraysApproxEqual(expected_three,post3,1e-5),errMsgArray(expected_three,post3)); - Assert.assertTrue(arraysApproxEqual(expected_four,post4,1e-5),errMsgArray(expected_four,post4)); - Assert.assertTrue(arraysApproxEqual(expected_five,post5,1e-5),errMsgArray(expected_five,post5)); + assertEqualsDoubleArray(expected_one,post1,1e-6); + assertEqualsDoubleArray(expected_two,post2,1e-5); + assertEqualsDoubleArray(expected_three,post3,1e-5); + assertEqualsDoubleArray(expected_four,post4,1e-5); + assertEqualsDoubleArray(expected_five,post5,1e-5); } } diff --git a/src/testUtils/java/org/broadinstitute/hellbender/testutils/BaseTest.java b/src/testUtils/java/org/broadinstitute/hellbender/testutils/BaseTest.java index 86febc3766f..47fcbbff616 100644 --- a/src/testUtils/java/org/broadinstitute/hellbender/testutils/BaseTest.java +++ b/src/testUtils/java/org/broadinstitute/hellbender/testutils/BaseTest.java @@ -336,22 +336,6 @@ protected static void assertEqualsDoubleArray(final double[] actual, final doubl Assert.assertEquals(actual[i],expected[i],tolerance,"array position " + i); } - /** - * Checks whether two long arrays contain the same values or not. - * @param actual actual produced array. - * @param expected expected array. - */ - protected static void assertEqualsLongArray(final long[] actual, final long[] expected) { - if (expected == null) - Assert.assertNull(actual); - else { - Assert.assertNotNull(actual); - Assert.assertEquals(actual.length, expected.length,"array length "); - } - for (int i = 0; i < actual.length; i++) - Assert.assertEquals(actual[i],expected[i],"array position " + i); - } - public static void assertEqualsDoubleSmart(final Object actual, final Double expected, final double tolerance) { Assert.assertTrue(actual instanceof Double, "Not a double"); assertEqualsDoubleSmart((double) (Double) actual, (double) expected, tolerance);