-
Notifications
You must be signed in to change notification settings - Fork 244
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
make VariantContextBuilder safer #1344
Changes from 10 commits
625e4f4
dc03bb7
5d310a2
143fc8e
4eb67f5
46e5b9c
7c91604
3a7518e
2aa9d3b
b2730d8
266a62f
0a03bda
74eea13
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -36,7 +36,17 @@ | |||||
import htsjdk.variant.vcf.VCFHeaderLineType; | ||||||
|
||||||
import java.io.Serializable; | ||||||
import java.util.*; | ||||||
import java.util.ArrayList; | ||||||
import java.util.Collection; | ||||||
import java.util.Collections; | ||||||
import java.util.Comparator; | ||||||
import java.util.EnumSet; | ||||||
import java.util.HashMap; | ||||||
import java.util.HashSet; | ||||||
import java.util.List; | ||||||
import java.util.Map; | ||||||
import java.util.Set; | ||||||
import java.util.regex.Pattern; | ||||||
import java.util.stream.Collectors; | ||||||
|
||||||
/** | ||||||
|
@@ -225,7 +235,7 @@ public class VariantContext implements Feature, Serializable { | |||||
protected CommonInfo commonInfo = null; | ||||||
public final static double NO_LOG10_PERROR = CommonInfo.NO_LOG10_PERROR; | ||||||
|
||||||
public final static Set<String> PASSES_FILTERS = Collections.unmodifiableSet(new LinkedHashSet<String>()); | ||||||
public final static Set<String> PASSES_FILTERS = Collections.emptySet(); | ||||||
|
||||||
/** The location of this VariantContext */ | ||||||
final protected String contig; | ||||||
|
@@ -260,6 +270,8 @@ public class VariantContext implements Feature, Serializable { | |||||
* Determine which genotype fields are in use in the genotypes in VC | ||||||
* @return an ordered list of genotype fields in use in VC. If vc has genotypes this will always include GT first | ||||||
*/ | ||||||
|
||||||
|
||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. weird extra linebreaks. |
||||||
public List<String> calcVCFGenotypeKeys(final VCFHeader header) { | ||||||
final Set<String> keys = new HashSet<>(); | ||||||
|
||||||
|
@@ -310,9 +322,88 @@ public List<String> calcVCFGenotypeKeys(final VCFHeader header) { | |||||
// | ||||||
// --------------------------------------------------------------------------------------------------------- | ||||||
|
||||||
//no controls and white-spaces characters, no semicolon. | ||||||
public static final Pattern VALID_FILTER = Pattern.compile("^[!-:<-~]+$"); | ||||||
|
||||||
public enum Validation { | ||||||
ALLELES, | ||||||
GENOTYPES | ||||||
ALLELES() { | ||||||
void validate(VariantContext variantContext) { | ||||||
validateAlleles(variantContext); | ||||||
} | ||||||
}, | ||||||
GENOTYPES() { | ||||||
void validate(VariantContext variantContext) { | ||||||
validateGenotypes(variantContext); | ||||||
} | ||||||
}, | ||||||
FILTERS { | ||||||
void validate(VariantContext variantContext) { | ||||||
validateFilters(variantContext); | ||||||
} | ||||||
}; | ||||||
|
||||||
abstract void validate(VariantContext variantContext); | ||||||
|
||||||
|
||||||
static private void validateAlleles(final VariantContext vc) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
|
||||||
boolean alreadySeenRef = false; | ||||||
|
||||||
for (final Allele allele : vc.alleles) { | ||||||
// make sure there's only one reference allele | ||||||
if (allele.isReference()) { | ||||||
if (alreadySeenRef) { | ||||||
throw new IllegalArgumentException("BUG: Received two reference tagged alleles in VariantContext " + vc.alleles + " vc=" + vc); | ||||||
} | ||||||
alreadySeenRef = true; | ||||||
} | ||||||
|
||||||
if (allele.isNoCall()) { | ||||||
throw new IllegalArgumentException("BUG: Cannot add a no call allele to a variant context " + vc.alleles + " vc=" + vc); | ||||||
} | ||||||
} | ||||||
|
||||||
// make sure there's one reference allele | ||||||
if (!alreadySeenRef) { | ||||||
throw new IllegalArgumentException("No reference allele found in VariantContext"); | ||||||
} | ||||||
} | ||||||
|
||||||
static private void validateGenotypes(final VariantContext variantContext) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
|
||||||
final ArrayList<Genotype> genotypes = variantContext.genotypes.getGenotypes(); | ||||||
|
||||||
if (genotypes == null) { | ||||||
throw new IllegalStateException("Genotypes is null"); | ||||||
} | ||||||
|
||||||
for (int i = 0; i < genotypes.size(); i++) { | ||||||
final Genotype genotype = genotypes.get(i); | ||||||
if (genotype.isAvailable()) { | ||||||
final List<Allele> alleles = genotype.getAlleles(); | ||||||
for (int j = 0, size = alleles.size(); j < size; j++) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Now that I'm reading this code, due to the move, I have a comment. It looks like this can be replaced with a foreach loop, e.g.
|
||||||
final Allele gAllele = alleles.get(j); | ||||||
if (!variantContext.hasAllele(gAllele) && gAllele.isCalled()) { | ||||||
throw new IllegalStateException("Allele in genotype " + gAllele + " not in the variant context " + alleles); | ||||||
} | ||||||
} | ||||||
} | ||||||
} | ||||||
} | ||||||
|
||||||
static private void validateFilters(final VariantContext variantContext) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
final Set<String> filters = variantContext.getFilters(); | ||||||
if (filters == null) { | ||||||
return; | ||||||
} | ||||||
|
||||||
for (String filter : filters) { | ||||||
if (!VALID_FILTER.matcher(filter).matches()) { | ||||||
throw new IllegalStateException("Filter '" + filter + | ||||||
"' contains an illegal character. It must conform to the regex ;'" + VALID_FILTER); | ||||||
} | ||||||
} | ||||||
} | ||||||
} | ||||||
|
||||||
private final static EnumSet<Validation> NO_VALIDATION = EnumSet.noneOf(Validation.class); | ||||||
|
@@ -1202,7 +1293,7 @@ public void validateAlternateAlleles() { | |||||
|
||||||
// maintain a list of non-symbolic alleles expected in the REF and ALT fields of the record | ||||||
// (we exclude symbolic alleles because it's commonly expected that they don't show up in the genotypes, e.g. with GATK gVCFs) | ||||||
final List<Allele> reportedAlleles = new ArrayList<Allele>(); | ||||||
final List<Allele> reportedAlleles = new ArrayList<>(); | ||||||
for ( final Allele allele : getAlleles() ) { | ||||||
if ( !allele.isSymbolic() ) | ||||||
reportedAlleles.add(allele); | ||||||
|
@@ -1292,17 +1383,12 @@ public void validateChromosomeCounts() { | |||||
|
||||||
private boolean validate(final EnumSet<Validation> validationToPerform) { | ||||||
validateStop(); | ||||||
for (final Validation val : validationToPerform ) { | ||||||
switch (val) { | ||||||
case ALLELES: validateAlleles(); break; | ||||||
case GENOTYPES: validateGenotypes(); break; | ||||||
default: throw new IllegalArgumentException("Unexpected validation mode " + val); | ||||||
} | ||||||
} | ||||||
validationToPerform.forEach(v->v.validate(this)); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. whitespace
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also validation(s)ToPerform would make more sense |
||||||
|
||||||
return true; | ||||||
} | ||||||
|
||||||
|
||||||
/** | ||||||
* Check that getEnd() == END from the info field, if it's present | ||||||
*/ | ||||||
|
@@ -1329,43 +1415,6 @@ private void validateStop() { | |||||
} | ||||||
} | ||||||
|
||||||
private void validateAlleles() { | ||||||
|
||||||
boolean alreadySeenRef = false; | ||||||
|
||||||
for ( final Allele allele : alleles ) { | ||||||
// make sure there's only one reference allele | ||||||
if ( allele.isReference() ) { | ||||||
if ( alreadySeenRef ) throw new IllegalArgumentException("BUG: Received two reference tagged alleles in VariantContext " + alleles + " this=" + this); | ||||||
alreadySeenRef = true; | ||||||
} | ||||||
|
||||||
if ( allele.isNoCall() ) { | ||||||
throw new IllegalArgumentException("BUG: Cannot add a no call allele to a variant context " + alleles + " this=" + this); | ||||||
} | ||||||
} | ||||||
|
||||||
// make sure there's one reference allele | ||||||
if ( ! alreadySeenRef ) | ||||||
throw new IllegalArgumentException("No reference allele found in VariantContext"); | ||||||
} | ||||||
|
||||||
private void validateGenotypes() { | ||||||
if ( this.genotypes == null ) throw new IllegalStateException("Genotypes is null"); | ||||||
|
||||||
for ( int i = 0; i < genotypes.size(); i++ ) { | ||||||
Genotype genotype = genotypes.get(i); | ||||||
if ( genotype.isAvailable() ) { | ||||||
final List<Allele> alleles = genotype.getAlleles(); | ||||||
for ( int j = 0, size = alleles.size(); j < size; j++ ) { | ||||||
final Allele gAllele = alleles.get(j); | ||||||
if ( ! hasAllele(gAllele) && gAllele.isCalled() ) | ||||||
throw new IllegalStateException("Allele in genotype " + gAllele + " not in the variant context " + alleles); | ||||||
} | ||||||
} | ||||||
} | ||||||
} | ||||||
|
||||||
// --------------------------------------------------------------------------------------------------------- | ||||||
// | ||||||
// utility routines | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
{}