Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replaced several for-each loops in VariantContext.Make() based on HaplotypeCaller profiling #1234

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 12 additions & 10 deletions src/main/java/htsjdk/variant/variantcontext/Genotype.java
Original file line number Diff line number Diff line change
Expand Up @@ -197,31 +197,33 @@ public GenotypeType getType() {
protected GenotypeType determineType() {
// TODO -- this code is slow and could be optimized for the diploid case
final List<Allele> alleles = getAlleles();
if ( alleles.isEmpty() )
if ( alleles.isEmpty() ) {
return GenotypeType.UNAVAILABLE;
}

boolean sawNoCall = false, sawMultipleAlleles = false;
Allele observedAllele = null;
Allele firstCallAllele = null;

for ( final Allele allele : alleles ) {
if ( allele.isNoCall() )
for ( int i = 0; i < alleles.size(); i++ ) {
final Allele allele = alleles.get(i);
if ( allele.isNoCall() ) {
sawNoCall = true;
else if ( observedAllele == null )
observedAllele = allele;
else if ( !allele.equals(observedAllele) )
} else if ( firstCallAllele == null ) {
firstCallAllele = allele;
} else if ( !allele.equals(firstCallAllele) )
sawMultipleAlleles = true;
}

if ( sawNoCall ) {
if ( observedAllele == null )
if ( firstCallAllele == null )
return GenotypeType.NO_CALL;
return GenotypeType.MIXED;
}

if ( observedAllele == null )
if ( firstCallAllele == null )
throw new IllegalStateException("BUG: there are no alleles present in this genotype but the alleles list is not null");

return sawMultipleAlleles ? GenotypeType.HET : observedAllele.isReference() ? GenotypeType.HOM_REF : GenotypeType.HOM_VAR;
return sawMultipleAlleles ? GenotypeType.HET : firstCallAllele.isReference() ? GenotypeType.HOM_REF : GenotypeType.HOM_VAR;
}

/**
Expand Down
28 changes: 19 additions & 9 deletions src/main/java/htsjdk/variant/variantcontext/VariantContext.java
Original file line number Diff line number Diff line change
Expand Up @@ -822,8 +822,9 @@ private boolean hasAllele(final Allele allele, final boolean ignoreRefState, fin
return true;

final List<Allele> allelesToConsider = considerRefAllele ? getAlleles() : getAlternateAlleles();
for ( Allele a : allelesToConsider ) {
if ( a.equals(allele, ignoreRefState) )
for (int i = 0, allelesToConsiderSize = allelesToConsider.size(); i < allelesToConsiderSize; i++) {
Allele anAllelesToConsider = allelesToConsider.get(i);
if (anAllelesToConsider.equals(allele, ignoreRefState))
return true;
}

Expand Down Expand Up @@ -1352,9 +1353,12 @@ private void validateAlleles() {
private void validateGenotypes() {
if ( this.genotypes == null ) throw new IllegalStateException("Genotypes is null");

for ( final Genotype g : this.genotypes ) {
if ( g.isAvailable() ) {
for ( Allele gAllele : g.getAlleles() ) {
for ( int i = 0; i < genotypes.size(); i++ ) {
Genotype genotype = genotypes.get(i);
if ( genotype.isAvailable() ) {
final List<Allele> alleles = genotype.getAlleles();
for ( int j = 0, size = alleles.size(); j < size; j++ ) {
final Allele gAllele = alleles.get(j);
if ( ! hasAllele(gAllele) && gAllele.isCalled() )
throw new IllegalStateException("Allele in genotype " + gAllele + " not in the variant context " + alleles);
}
Expand Down Expand Up @@ -1484,9 +1488,10 @@ private static List<Allele> makeAlleles(Collection<Allele> alleles) {

boolean sawRef = false;
for ( final Allele a : alleles ) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should this be made into a doubly indexed array? wouldn't that be faster?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would, but the input to that method is a collection, not a list so I don't actually have the means to access it by index and I don't want to change the VariantContext API to only accept a list. Getting rid of the iteration over the second list seems to have accounted for ~half of the runtime this method was taking up.

for ( final Allele b : alleleList ) {
if ( a.equals(b, true) )
for (int i = 0, alleleListSize = alleleList.size(); i < alleleListSize; i++) {
if (a.equals(alleleList.get(i), true)) {
throw new IllegalArgumentException("Duplicate allele added to VariantContext: " + a);
}
}

// deal with the case where the first allele isn't the reference
Expand Down Expand Up @@ -1696,8 +1701,13 @@ public boolean hasSymbolicAlleles() {
return hasSymbolicAlleles(getAlleles());
}

public static boolean hasSymbolicAlleles(final List<Allele> alleles) {
return alleles.stream().anyMatch(Allele::isSymbolic);
public static boolean hasSymbolicAlleles( final List<Allele> alleles ) {
for (int i = 0, size = alleles.size(); i < size; i++ ) {
if (alleles.get(i).isSymbolic()) {
return true;
}
}
return false;
}

public Allele getAltAlleleWithHighestAlleleCount() {
Expand Down