Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

make VariantContextBuilder safer #1344

Merged
merged 13 commits into from
Jul 16, 2019
4 changes: 2 additions & 2 deletions src/main/java/htsjdk/variant/variantcontext/CommonInfo.java
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ public boolean filtersWereApplied() {
}

public boolean isFiltered() {
return filters == null ? false : !filters.isEmpty();
return filters != null && !filters.isEmpty();
}

public boolean isNotFiltered() {
Expand All @@ -114,7 +114,7 @@ public boolean isNotFiltered() {

public void addFilter(String filter) {
if ( filters == null ) // immutable -> mutable
filters = new HashSet<String>();
filters = new HashSet<>();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

{}


if ( filter == null ) throw new IllegalArgumentException("BUG: Attempting to add null filter " + this);
if ( getFilters().contains(filter) ) throw new IllegalArgumentException("BUG: Attempting to add duplicate filter " + filter + " at " + this);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
package htsjdk.variant.variantcontext;

import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.util.ArrayList;
Expand Down
147 changes: 98 additions & 49 deletions src/main/java/htsjdk/variant/variantcontext/VariantContext.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,17 @@
import htsjdk.variant.vcf.VCFHeaderLineType;

import java.io.Serializable;
import java.util.*;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

/**
Expand Down Expand Up @@ -225,7 +235,7 @@ public class VariantContext implements Feature, Serializable {
protected CommonInfo commonInfo = null;
public final static double NO_LOG10_PERROR = CommonInfo.NO_LOG10_PERROR;

public final static Set<String> PASSES_FILTERS = Collections.unmodifiableSet(new LinkedHashSet<String>());
public final static Set<String> PASSES_FILTERS = Collections.emptySet();

/** The location of this VariantContext */
final protected String contig;
Expand Down Expand Up @@ -260,6 +270,8 @@ public class VariantContext implements Feature, Serializable {
* Determine which genotype fields are in use in the genotypes in VC
* @return an ordered list of genotype fields in use in VC. If vc has genotypes this will always include GT first
*/


Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

weird extra linebreaks.

public List<String> calcVCFGenotypeKeys(final VCFHeader header) {
final Set<String> keys = new HashSet<>();

Expand Down Expand Up @@ -310,9 +322,88 @@ public List<String> calcVCFGenotypeKeys(final VCFHeader header) {
//
// ---------------------------------------------------------------------------------------------------------

//no controls and white-spaces characters, no semicolon.
public static final Pattern VALID_FILTER = Pattern.compile("^[!-:<-~]+$");

public enum Validation {
ALLELES,
GENOTYPES
ALLELES() {
void validate(VariantContext variantContext) {
validateAlleles(variantContext);
}
},
GENOTYPES() {
void validate(VariantContext variantContext) {
validateGenotypes(variantContext);
}
},
FILTERS {
void validate(VariantContext variantContext) {
validateFilters(variantContext);
}
};

abstract void validate(VariantContext variantContext);


static private void validateAlleles(final VariantContext vc) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
static private void validateAlleles(final VariantContext vc) {
private static void validateAlleles(final VariantContext vc) {


boolean alreadySeenRef = false;

for (final Allele allele : vc.alleles) {
// make sure there's only one reference allele
if (allele.isReference()) {
if (alreadySeenRef) {
throw new IllegalArgumentException("BUG: Received two reference tagged alleles in VariantContext " + vc.alleles + " vc=" + vc);
}
alreadySeenRef = true;
}

if (allele.isNoCall()) {
throw new IllegalArgumentException("BUG: Cannot add a no call allele to a variant context " + vc.alleles + " vc=" + vc);
}
}

// make sure there's one reference allele
if (!alreadySeenRef) {
throw new IllegalArgumentException("No reference allele found in VariantContext");
}
}

static private void validateGenotypes(final VariantContext variantContext) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
static private void validateGenotypes(final VariantContext variantContext) {
private static void validateGenotypes(final VariantContext variantContext) {


final ArrayList<Genotype> genotypes = variantContext.genotypes.getGenotypes();

if (genotypes == null) {
throw new IllegalStateException("Genotypes is null");
}

for (int i = 0; i < genotypes.size(); i++) {
final Genotype genotype = genotypes.get(i);
if (genotype.isAvailable()) {
final List<Allele> alleles = genotype.getAlleles();
for (int j = 0, size = alleles.size(); j < size; j++) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now that I'm reading this code, due to the move, I have a comment. It looks like this can be replaced with a foreach loop, e.g.

for (Allele gAllele : genotype.getAlleles()) {
    ...
}

final Allele gAllele = alleles.get(j);
if (!variantContext.hasAllele(gAllele) && gAllele.isCalled()) {
throw new IllegalStateException("Allele in genotype " + gAllele + " not in the variant context " + alleles);
}
}
}
}
}

static private void validateFilters(final VariantContext variantContext) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
static private void validateFilters(final VariantContext variantContext) {
private static void validateFilters(final VariantContext variantContext) {

final Set<String> filters = variantContext.getFilters();
if (filters == null) {
return;
}

for (String filter : filters) {
if (!VALID_FILTER.matcher(filter).matches()) {
throw new IllegalStateException("Filter '" + filter +
"' contains an illegal character. It must conform to the regex ;'" + VALID_FILTER);
}
}
}
}

private final static EnumSet<Validation> NO_VALIDATION = EnumSet.noneOf(Validation.class);
Expand Down Expand Up @@ -1202,7 +1293,7 @@ public void validateAlternateAlleles() {

// maintain a list of non-symbolic alleles expected in the REF and ALT fields of the record
// (we exclude symbolic alleles because it's commonly expected that they don't show up in the genotypes, e.g. with GATK gVCFs)
final List<Allele> reportedAlleles = new ArrayList<Allele>();
final List<Allele> reportedAlleles = new ArrayList<>();
for ( final Allele allele : getAlleles() ) {
if ( !allele.isSymbolic() )
reportedAlleles.add(allele);
Expand Down Expand Up @@ -1292,17 +1383,12 @@ public void validateChromosomeCounts() {

private boolean validate(final EnumSet<Validation> validationToPerform) {
validateStop();
for (final Validation val : validationToPerform ) {
switch (val) {
case ALLELES: validateAlleles(); break;
case GENOTYPES: validateGenotypes(); break;
default: throw new IllegalArgumentException("Unexpected validation mode " + val);
}
}
validationToPerform.forEach(v->v.validate(this));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

whitespace

Suggested change
validationToPerform.forEach(v->v.validate(this));
validationToPerform.forEach(v -> v.validate(this));

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also validation(s)ToPerform would make more sense


return true;
}


/**
* Check that getEnd() == END from the info field, if it's present
*/
Expand All @@ -1329,43 +1415,6 @@ private void validateStop() {
}
}

private void validateAlleles() {

boolean alreadySeenRef = false;

for ( final Allele allele : alleles ) {
// make sure there's only one reference allele
if ( allele.isReference() ) {
if ( alreadySeenRef ) throw new IllegalArgumentException("BUG: Received two reference tagged alleles in VariantContext " + alleles + " this=" + this);
alreadySeenRef = true;
}

if ( allele.isNoCall() ) {
throw new IllegalArgumentException("BUG: Cannot add a no call allele to a variant context " + alleles + " this=" + this);
}
}

// make sure there's one reference allele
if ( ! alreadySeenRef )
throw new IllegalArgumentException("No reference allele found in VariantContext");
}

private void validateGenotypes() {
if ( this.genotypes == null ) throw new IllegalStateException("Genotypes is null");

for ( int i = 0; i < genotypes.size(); i++ ) {
Genotype genotype = genotypes.get(i);
if ( genotype.isAvailable() ) {
final List<Allele> alleles = genotype.getAlleles();
for ( int j = 0, size = alleles.size(); j < size; j++ ) {
final Allele gAllele = alleles.get(j);
if ( ! hasAllele(gAllele) && gAllele.isCalled() )
throw new IllegalStateException("Allele in genotype " + gAllele + " not in the variant context " + alleles);
}
}
}
}

// ---------------------------------------------------------------------------------------------------------
//
// utility routines
Expand Down
Loading