Skip to content

Commit

Permalink
Delete the old exact AF calculation model (#6099)
Browse files Browse the repository at this point in the history
  • Loading branch information
davidbenjamin authored Aug 29, 2019
1 parent 764ce4e commit 0f9f925
Show file tree
Hide file tree
Showing 42 changed files with 136 additions and 4,397 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import org.broadinstitute.hellbender.tools.walkers.annotator.*;
import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.AS_RMSMappingQuality;
import org.broadinstitute.hellbender.tools.walkers.genotyper.*;
import org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc.GeneralPloidyFailOverAFCalculatorProvider;
import org.broadinstitute.hellbender.utils.GATKProtectedVariantContextUtils;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;
Expand Down Expand Up @@ -94,7 +93,7 @@ private void initialize()
}

// We only want the engine to generate the AS_QUAL key if we are using AlleleSpecific annotations.
genotypingEngine = new MinimalGenotypingEngine(createUAC(), samples, new GeneralPloidyFailOverAFCalculatorProvider(genotypeArgs), annotationEngine.isRequestedReducibleRawKey(GATKVCFConstants.AS_QUAL_KEY));
genotypingEngine = new MinimalGenotypingEngine(createUAC(), samples, annotationEngine.isRequestedReducibleRawKey(GATKVCFConstants.AS_QUAL_KEY));

if ( includeNonVariants ) {
// Save INFO header names that require alt alleles
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ public GenotypeCalculationArgumentCollection( final GenotypeCalculationArgumentC
Utils.nonNull(other);

this.useNewAFCalculator = other.useNewAFCalculator;
this.useOldAFCalculator = other.useOldAFCalculator;
this.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED = other.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED;
this.snpHeterozygosity = other.snpHeterozygosity;
this.indelHeterozygosity = other.indelHeterozygosity;
Expand All @@ -56,12 +55,6 @@ public GenotypeCalculationArgumentCollection( final GenotypeCalculationArgumentC
@Argument(fullName = "use-new-qual-calculator", shortName = "new-qual", doc = "Use the new AF model instead of the so-called exact model", optional = true)
public boolean useNewAFCalculator = true;

/**
* Use the old GATK 3 qual score aka the "exact model"
*/
@Argument(fullName = "use-old-qual-calculator", shortName = "old-qual", doc = "Use the old AF model", optional = true)
public boolean useOldAFCalculator = false;

/**
* Depending on the value of the --max_alternate_alleles argument, we may genotype only a fraction of the alleles being sent on for genotyping.
* Using this argument instructs the genotyper to annotate (in the INFO field) the number of alternate alleles that were originally discovered at the site.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,7 @@
*/
public abstract class GenotypingEngine<Config extends StandardCallerArgumentCollection> {

protected final AFCalculator newAFCalculator;

protected final AFCalculatorProvider afCalculatorProvider;
protected final AlleleFrequencyCalculator alleleFrequencyCalculator;

protected final Config configuration;

Expand All @@ -46,8 +44,6 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl

private final AFPriorProvider log10AlleleFrequencyPriorsSNPs;

private final AFPriorProvider log10AlleleFrequencyPriorsIndels;

private final List<SimpleInterval> upstreamDeletionsLoc = new LinkedList<>();

private final boolean doAlleleSpecificCalcs;
Expand All @@ -64,66 +60,15 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
*/
protected GenotypingEngine(final Config configuration,
final SampleList samples,
final AFCalculatorProvider afCalculatorProvider,
final boolean doAlleleSpecificCalcs) {
this.configuration = Utils.nonNull(configuration, "the configuration cannot be null");
this.samples = Utils.nonNull(samples, "the sample list cannot be null");
this.afCalculatorProvider = Utils.nonNull(afCalculatorProvider, "the AF calculator provider cannot be null");
this.doAlleleSpecificCalcs = doAlleleSpecificCalcs;
logger = LogManager.getLogger(getClass());
numberOfGenomes = this.samples.numberOfSamples() * configuration.genotypeArgs.samplePloidy;
log10AlleleFrequencyPriorsSNPs = composeAlleleFrequencyPriorProvider(numberOfGenomes,
configuration.genotypeArgs.snpHeterozygosity, configuration.genotypeArgs.inputPrior);
log10AlleleFrequencyPriorsIndels = composeAlleleFrequencyPriorProvider(numberOfGenomes,
configuration.genotypeArgs.indelHeterozygosity, configuration.genotypeArgs.inputPrior);

final double refPseudocount = configuration.genotypeArgs.snpHeterozygosity / Math.pow(configuration.genotypeArgs.heterozygosityStandardDeviation,2);
final double snpPseudocount = configuration.genotypeArgs.snpHeterozygosity * refPseudocount;
final double indelPseudocount = configuration.genotypeArgs.indelHeterozygosity * refPseudocount;
newAFCalculator = new AlleleFrequencyCalculator(refPseudocount, snpPseudocount, indelPseudocount, configuration.genotypeArgs.samplePloidy);
}

/**
* Function that fills vector with allele frequency priors. By default, infinite-sites, neutral variation prior is used,
* where Pr(AC=i) = theta/i where theta is heterozygosity
* @param N Number of chromosomes
* @param priors (output) array to be filled with priors
* @param heterozygosity default heterozygosity to use, if inputPriors is empty
* @param inputPriors Input priors to use (in which case heterozygosity is ignored)
*/
public static void computeAlleleFrequencyPriors(final int N, final double[] priors, final double heterozygosity, final List<Double> inputPriors) {
double sum = 0.0;

if (!inputPriors.isEmpty()) {
// user-specified priors
if (inputPriors.size() != N) {
throw new CommandLineException.BadArgumentValue("inputPrior", "Invalid length of inputPrior vector: vector length must be equal to # samples +1 ");
}

int idx = 1;
for (final double prior: inputPriors) {
if (prior < 0.0) {
throw new CommandLineException.BadArgumentValue("Bad argument: negative values not allowed", "inputPrior");
}
priors[idx++] = Math.log10(prior);
sum += prior;
}
}
else {
// for each i
for (int i = 1; i <= N; i++) {
final double value = heterozygosity / (double)i;
priors[i] = Math.log10(value);
sum += value;
}
}

// protection against the case of heterozygosity too high or an excessive number of samples (which break population genetics assumptions)
if (sum > 1.0) {
throw new CommandLineException.BadArgumentValue("heterozygosity","The heterozygosity value is set too high relative to the number of samples to be processed, or invalid values specified if input priors were provided - try reducing heterozygosity value or correct input priors.");
}
// null frequency for AF=0 is (1 - sum(all other frequencies))
priors[0] = Math.log10(1.0 - sum);
alleleFrequencyCalculator = AlleleFrequencyCalculator.makeCalculator(configuration.genotypeArgs);
}

/**
Expand Down Expand Up @@ -250,20 +195,18 @@ protected VariantCallContext calculateGenotypes(final FeatureContext features,
}


final AFCalculator afCalculator = configuration.genotypeArgs.useOldAFCalculator ?
afCalculatorProvider.getInstance(vc,defaultPloidy,maxAltAlleles) : newAFCalculator;
final AFCalculationResult AFresult = afCalculator.getLog10PNonRef(reducedVC, defaultPloidy, maxAltAlleles, getAlleleFrequencyPriors(vc,defaultPloidy,model));
final AFCalculationResult AFresult = alleleFrequencyCalculator.calculate(reducedVC, defaultPloidy);
final OutputAlleleSubset outputAlternativeAlleles = calculateOutputAlleleSubset(AFresult, vc);

// posterior probability that at least one alt allele exists in the samples
final double probOfAtLeastOneAltAllele = Math.pow(10, AFresult.getLog10PosteriorOfAFGT0());
final double probOfAtLeastOneAltAllele = Math.pow(10, AFresult.log10ProbVariantPresent());

// note the math.abs is necessary because -10 * 0.0 => -0.0 which isn't nice
final double log10Confidence =
! outputAlternativeAlleles.siteIsMonomorphic ||
configuration.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES || configuration.annotateAllSitesWithPLs
? AFresult.getLog10PosteriorOfAFEq0() + 0.0
: AFresult.getLog10PosteriorOfAFGT0() + 0.0 ;
? AFresult.log10ProbOnlyRefAlleleExists() + 0.0
: AFresult.log10ProbVariantPresent() + 0.0 ;


// Add 0.0 removes -0.0 occurrences.
Expand All @@ -272,13 +215,8 @@ protected VariantCallContext calculateGenotypes(final FeatureContext features,
// return a null call if we don't pass the confidence cutoff or the most likely allele frequency is zero
// skip this if we are already looking at a vc with NON_REF as the first alt allele i.e. if we are in GenotypeGVCFs
if ( !passesEmitThreshold(phredScaledConfidence, outputAlternativeAlleles.siteIsMonomorphic)
&& !forceSiteEmission()
&& noAllelesOrFirstAlleleIsNotNonRef(outputAlternativeAlleles.alleles)) {
// technically, at this point our confidence in a reference call isn't accurately estimated
// because it didn't take into account samples with no data, so let's get a better estimate
final double[] AFpriors = getAlleleFrequencyPriors(vc, defaultPloidy, model);
final int INDEX_FOR_AC_EQUALS_1 = 1;
return limitedContext ? null : estimateReferenceConfidence(vc, stratifiedContexts, AFpriors[INDEX_FOR_AC_EQUALS_1], true, probOfAtLeastOneAltAllele);
&& !forceSiteEmission() && noAllelesOrFirstAlleleIsNotNonRef(outputAlternativeAlleles.alleles)) {
return null;
}

// return a null call if we aren't forcing site emission and the only alt allele is a spanning deletion
Expand Down Expand Up @@ -385,7 +323,7 @@ private OutputAlleleSubset calculateOutputAlleleSubset(final AFCalculationResult
// we want to keep the NON_REF symbolic allele but only in the absence of a non-symbolic allele, e.g.
// if we combined a ref / NON_REF gVCF with a ref / alt gVCF
final boolean isNonRefWhichIsLoneAltAllele = alternativeAlleleCount == 1 && allele.equals(Allele.NON_REF_ALLELE);
final boolean isPlausible = afCalculationResult.isPolymorphicPhredScaledQual(allele, configuration.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING);
final boolean isPlausible = afCalculationResult.passesThreshold(allele, configuration.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING);

//it's possible that the upstream deletion that spanned this site was not emitted, mooting the symbolic spanning deletion allele
final boolean isSpuriousSpanningDeletion = GATKVCFConstants.isSpanningDeletion(allele) && !isVcCoveredByDeletion(vc);
Expand Down Expand Up @@ -567,30 +505,6 @@ protected final VariantCallContext estimateReferenceConfidence(final VariantCont
return new VariantCallContext(vc, passesCallThreshold(QualityUtils.phredScaleLog10CorrectRate(log10POfRef)), false);
}

/**
* Returns the log10 prior probability for all possible allele counts from 0 to N where N is the total number of
* genomes (total-ploidy).
*
* @param vc the target variant-context, use to determine the total ploidy thus the possible ACs.
* @param defaultPloidy default ploidy to be assume if we do not have the ploidy for some sample in {@code vc}.
* @param model the calculation model (SNP,INDEL or MIXED) whose priors are to be retrieved.
* @throws java.lang.NullPointerException if either {@code vc} or {@code model} is {@code null}
* @return never {@code null}, an array with exactly <code>total-ploidy(vc) + 1</code> positions.
*/
protected final double[] getAlleleFrequencyPriors( final VariantContext vc, final int defaultPloidy, final GenotypeLikelihoodsCalculationModel model ) {
final int totalPloidy = GATKVariantContextUtils.totalPloidy(vc, defaultPloidy);
switch (model) {
case SNP:
case GENERALPLOIDYSNP:
return log10AlleleFrequencyPriorsSNPs.forTotalPloidy(totalPloidy);
case INDEL:
case GENERALPLOIDYINDEL:
return log10AlleleFrequencyPriorsIndels.forTotalPloidy(totalPloidy);
default:
throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model);
}
}

/**
* Compute the log10 probability of a sample with sequencing depth and no alt allele is actually truly homozygous reference
*
Expand Down Expand Up @@ -646,12 +560,12 @@ protected Map<String,Object> composeCallAttributes(final boolean inheritAttribut
if (AFresult.getAllelesUsedInGenotyping().size() > 2) {
for (final Allele a : allAllelesToUse) {
if (a.isNonReference()) {
perAlleleQuals.add(AFresult.getLog10PosteriorOfAFEq0ForAllele(a));
perAlleleQuals.add(AFresult.getLog10PosteriorOfAlleleAbsent(a));
}
}
}
else {
perAlleleQuals.add(AFresult.getLog10PosteriorOfAFEq0());
perAlleleQuals.add(AFresult.log10ProbOnlyRefAlleleExists());
}

attributes.put(GATKVCFConstants.AS_QUAL_KEY, perAlleleQuals);
Expand Down Expand Up @@ -708,7 +622,7 @@ public double calculateSingleSampleRefVsAnyActiveStateProfileValue(final double[
//TODO End of lousy part.

final double normalizedLog10ACeq0Posterior = log10ACeq0Posterior - log10PosteriorNormalizationConstant;
// This is another condition to return a 0.0 also present in AFCalculator code as well.
// This is another condition to return a 0.0 also present in AlleleFrequencyCalculator code as well.
if (normalizedLog10ACeq0Posterior >= QualityUtils.qualToErrorProbLog10(configuration.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING)) {
return 0.0;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import htsjdk.variant.variantcontext.Allele;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc.AFCalculatorProvider;
import org.broadinstitute.hellbender.utils.genotyper.SampleList;


Expand All @@ -19,9 +18,8 @@ public final class MinimalGenotypingEngine extends GenotypingEngine<UnifiedArgum
* @param configuration the UG configuration.
* @param samples list of samples
*/
public MinimalGenotypingEngine(final UnifiedArgumentCollection configuration, final SampleList samples,
final AFCalculatorProvider afCalculatorProvider) {
this(configuration, samples, afCalculatorProvider, false);
public MinimalGenotypingEngine(final UnifiedArgumentCollection configuration, final SampleList samples) {
this(configuration, samples, false);
}

/**
Expand All @@ -31,9 +29,8 @@ public MinimalGenotypingEngine(final UnifiedArgumentCollection configuration, fi
* @param samples list of samples
* @param doAlleleSpecificCalcs Whether to calculate genotyping annotations needed for allele specific annotations
*/
public MinimalGenotypingEngine(final UnifiedArgumentCollection configuration, final SampleList samples,
final AFCalculatorProvider afCalculatorProvider, boolean doAlleleSpecificCalcs ) {
super(configuration, samples, afCalculatorProvider, doAlleleSpecificCalcs);
public MinimalGenotypingEngine(final UnifiedArgumentCollection configuration, final SampleList samples, boolean doAlleleSpecificCalcs ) {
super(configuration, samples, doAlleleSpecificCalcs);

if ( configuration.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES ) {
throw new UserException("GENOTYPE_GIVEN_ALLELES mode not supported in the MinimalGenotypingEngine");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import org.broadinstitute.barclay.argparser.ArgumentCollection;
import org.broadinstitute.barclay.argparser.Hidden;
import org.broadinstitute.hellbender.engine.FeatureInput;
import org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc.AFCalculatorImplementation;
import org.broadinstitute.hellbender.utils.Utils;

import java.io.File;
Expand Down Expand Up @@ -37,7 +36,6 @@ public void copyStandardCallerArgsFrom( final StandardCallerArgumentCollection o
if ( other.sampleContamination != null ) {
setSampleContamination(other.sampleContamination);
}
this.requestedAlleleFrequencyCalculationModel = other.requestedAlleleFrequencyCalculationModel;
this.exactCallsLog = other.exactCallsLog != null ? new File(other.exactCallsLog.getAbsolutePath()) : null;
this.outputMode = other.outputMode;
this.annotateAllSitesWithPLs = other.annotateAllSitesWithPLs;
Expand Down Expand Up @@ -150,13 +148,6 @@ private boolean contaminationIsPresentInMap(final Map<String, Double> contaminat
return false;
}

/**
* Controls the model used to calculate the probability that a site is variant plus the various sample genotypes in the data at a given locus.
*/
@Hidden
@Argument(fullName = "p-nonref-model", doc = "Non-reference probability calculation model to employ", optional = true)
public AFCalculatorImplementation requestedAlleleFrequencyCalculationModel;

@Hidden
@Argument(shortName = "log-exact-calls", optional=true)
public File exactCallsLog = null;
Expand Down
Loading

0 comments on commit 0f9f925

Please sign in to comment.