-
Notifications
You must be signed in to change notification settings - Fork 596
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
GGVCFs changes to revert "no data" calls to "./." and change GQ0 hom-refs to no-calls #8741
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -150,8 +150,9 @@ private VariantContext regenotypeVC(final VariantContext originalVC, final Refer | |
|
||
final VariantContext result; | ||
|
||
// only re-genotype polymorphic sites | ||
if ( originalVC.isVariant() && originalVC.getAttributeAsInt(VCFConstants.DEPTH_KEY,0) > 0 ) { | ||
// only re-genotype polymorphic sites | ||
// note that the calculateGenotypes method also calculates the QUAL score | ||
final VariantContext regenotypedVC = calculateGenotypes(originalVC, includeNonVariants); | ||
if (regenotypedVC == null) { | ||
return null; | ||
|
@@ -186,7 +187,7 @@ private VariantContext regenotypeVC(final VariantContext originalVC, final Refer | |
//don't count sites with no depth and no confidence towards things like AN and InbreedingCoeff | ||
vcBuilder.genotypes(assignNoCallsAnnotationExcludedGenotypes(result.getGenotypes())); | ||
VariantContext annotated = annotationEngine.annotateContext(vcBuilder.make(), features, ref, null, a -> true); | ||
return new VariantContextBuilder(annotated).genotypes(cleanupGenotypeAnnotations(result, false, keepSB)).make(); | ||
return new VariantContextBuilder(annotated).genotypes(cleanupGenotypeAnnotations(annotated, false, keepSB)).make(); | ||
} else if (includeNonVariants) { | ||
// For monomorphic sites we need to make sure e.g. the hom ref genotypes are created and only then are passed to the annotation engine. | ||
VariantContext preannotated = new VariantContextBuilder(result).genotypes(cleanupGenotypeAnnotations(result, true, false)).make(); | ||
|
@@ -461,24 +462,29 @@ static List<Genotype> cleanupGenotypeAnnotations(final VariantContext vc, final | |
attrs.put(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_GT_KEY, GenotypeGVCFs.PHASED_HOM_VAR_STRING); | ||
} | ||
|
||
// create AD if it's not there | ||
if ( !oldGT.hasAD() && vc.isVariant() ) { | ||
// create AD if it's not there, but only if there's data | ||
if ( !oldGT.hasAD() && vc.isVariant() && depth > 0) { | ||
final int[] AD = new int[vc.getNAlleles()]; | ||
AD[0] = depth; | ||
builder.AD(AD); | ||
} | ||
|
||
if ( createRefGTs ) { | ||
// move the GQ to RGQ | ||
if (oldGT.hasGQ()) { | ||
//keep 0 depth samples and 0 GQ samples as no-call | ||
if (depth > 0 && oldGT.hasGQ()) { | ||
if (oldGT.getGQ() > 0) { | ||
final List<Allele> refAlleles = Collections.nCopies(oldGT.getPloidy(), vc.getReference()); | ||
builder.alleles(refAlleles); | ||
} else { | ||
builder.alleles(Collections.nCopies(oldGT.getPloidy(),Allele.NO_CALL)); | ||
} | ||
|
||
// move the GQ to RGQ | ||
builder.noGQ(); | ||
attrs.put(GATKVCFConstants.REFERENCE_GENOTYPE_QUALITY, oldGT.getGQ()); | ||
} | ||
|
||
//keep 0 depth samples and 0 GQ samples as no-call | ||
if (depth > 0 && oldGT.hasGQ() && oldGT.getGQ() > 0) { | ||
final List<Allele> refAlleles = Collections.nCopies(oldGT.getPloidy(), vc.getReference()); | ||
builder.alleles(refAlleles); | ||
} else { | ||
builder.alleles(Collections.nCopies(oldGT.getPloidy(),Allele.NO_CALL)); | ||
builder.noGQ().noDP(); | ||
} | ||
|
||
// also, the PLs are technically no longer usable | ||
|
@@ -494,8 +500,8 @@ static List<Genotype> cleanupGenotypeAnnotations(final VariantContext vc, final | |
* Does this genotype look like it has no reads and should be excluded from annotations? | ||
*/ | ||
static boolean excludeFromAnnotations(Genotype oldGT) { | ||
return oldGT.isHomRef() && !oldGT.hasPL() | ||
&& ((oldGT.hasDP() && oldGT.getDP() == 0) || !oldGT.hasDP()) | ||
return (oldGT.isHomRef() || oldGT.isNoCall()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. EXTREMELY minor comment... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done This method is probably on the chopping block for the refactor |
||
&& (!oldGT.hasDP() || oldGT.getDP() == 0) | ||
&& oldGT.hasGQ() && oldGT.getGQ() == 0; | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,7 @@ | |
import org.broadinstitute.hellbender.exceptions.UserException; | ||
import org.broadinstitute.hellbender.tools.walkers.ReferenceConfidenceVariantContextMerger; | ||
import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeAssignmentMethod; | ||
import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypesCache; | ||
import org.broadinstitute.hellbender.utils.MathUtils; | ||
import org.broadinstitute.hellbender.utils.SimpleInterval; | ||
import org.broadinstitute.hellbender.utils.Utils; | ||
|
@@ -134,9 +135,16 @@ else if (a.isSymbolic()) { //use the greater of the priors for non-ref | |
builder.phased(vc1.getGenotype(genoIdx).isPhased()); | ||
if ( posteriors.get(genoIdx) != null ) { | ||
GATKVariantContextUtils.makeGenotypeCall(vc1.getMaxPloidy(HomoSapiensConstants.DEFAULT_PLOIDY), builder, | ||
GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, posteriors.get(genoIdx), vc1.getAlleles(), null); | ||
GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, posteriors.get(genoIdx), vc1.getAlleles(), vc1.getGenotype(genoIdx), null); | ||
|
||
builder.attribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY, | ||
Utils.listFromPrimitives(GenotypeLikelihoods.fromLog10Likelihoods(posteriors.get(genoIdx)).getAsPLs())); | ||
Utils.listFromPrimitives(GenotypeLikelihoods.fromLog10Likelihoods(posteriors.get(genoIdx)).getAsPLs())); | ||
//TODO: refactor me -- this is to make up for the GGVCFs changes fixing GQ0 hom-refs | ||
if (!builder.makeWithShallowCopy().hasPL()) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this one is in the path of a lot of tools (HC M2 all of the genotypers) from what I could see. So if there is no PL when calculating the posterior probabilities for the genotypes we give it a score anyway so it gets included in the QUAL score calculations? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Did you check using IntelliJ? It has a fairly generic name, but it's just in CalculateGenotypePosteriors and a HaplotypeCaller GVCF feature I tried and ought to deprecate because it actually made for bigger GVCFs. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
final int maxPosteriorIndex = MathUtils.maxElementIndex(posteriors.get(genoIdx)); | ||
builder.log10PError(GenotypeLikelihoods.getGQLog10FromLikelihoods(maxPosteriorIndex, posteriors.get(genoIdx))); | ||
builder.alleles(GenotypesCache.get(vc1.getGenotype(genoIdx).getPloidy(), maxPosteriorIndex).asAlleleList(vc1.getAlleles())); | ||
} | ||
} | ||
newContext.add(builder.make()); | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice, though why specifically only IllegalStateException? Would we not want to wrap all exceptions?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There's a GGVCFs test that looks for a UserException in a specific case. In my experience it's the IllegalStateExceptions that are the most annoying. We can always add more later.