Skip to content

Commit

Permalink
Use allele info in VariantContext comparisons for stable sorts (#1593)
Browse files Browse the repository at this point in the history
* Use allele info in VariantContext comparisons for more stable sorts
* Ensure a more stable sort for varying length alleles, add unit tests
  • Loading branch information
clintval authored Oct 28, 2022
1 parent 02942a9 commit 14a8cc0
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 5 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
htsjdk.iws
.command_tmp
.DS_Store
atlassian-ide-plugin.xml
/htsjdk.version.properties
/test-output/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,20 @@ public int compare(final VariantContext firstVariantContext, final VariantContex
// Will throw NullPointerException -- happily -- if either of the chromosomes/contigs aren't
// present. This error checking should already have been done in the constructor but it's left
// in as defence anyway.
final int contigCompare =
this.contigIndexLookup.get(firstVariantContext.getContig()) - this.contigIndexLookup.get(secondVariantContext.getContig());
return contigCompare != 0
? contigCompare
: firstVariantContext.getStart() - secondVariantContext.getStart();
int contigCompare = this.contigIndexLookup.get(firstVariantContext.getContig()).compareTo(this.contigIndexLookup.get(secondVariantContext.getContig()));
contigCompare = contigCompare == 0 ? firstVariantContext.getStart() - secondVariantContext.getStart() : contigCompare;
if (contigCompare == 0) {
// Compare variants that have the same genomic span (chr:start-end) lexicographically by all alleles (ref and alts).
for (int i = 0; i < firstVariantContext.getAlleles().size(); i++) {
// If all previous alleles are identical and the first variant has additional alleles, make the first variant greater.
if (i >= secondVariantContext.getAlleles().size()) { return 1; }
contigCompare = firstVariantContext.getAlleles().get(i).compareTo(secondVariantContext.getAlleles().get(i));
if (contigCompare != 0) return contigCompare;
}
}
// If all previous alleles are identical and the second variant has additional alleles, make the second variant greater.
if (firstVariantContext.getAlleles().size() < secondVariantContext.getAlleles().size()) { return -1; }
return contigCompare;
}

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
package htsjdk.variant.variantcontext;

import htsjdk.HtsjdkTest;
import org.testng.Assert;
import org.testng.annotations.BeforeSuite;
import org.testng.annotations.Test;

import java.util.Arrays;
import java.util.Collections;

/**
* Unit tests for VariantContextComparator.
*/
public class VariantContextComparatorUnitTest extends HtsjdkTest {
private Allele refA, altG, altT;

@BeforeSuite
public void before() {
refA = Allele.create("A", true);
altG = Allele.create("G", false);
altT = Allele.create("T", false);
}

@Test
public void testVariantContextsWithSameSiteSortLexicographicallyByAlleleIdentical() {
final String contig = "chr1";
final VariantContextComparator comparator = new VariantContextComparator(Collections.singletonList(contig));
final VariantContextBuilder builder = new VariantContextBuilder("test", contig, 1, 1, Collections.emptyList());

final VariantContext variant1 = builder.alleles(Arrays.asList(refA, altG)).make();
final VariantContext variant2 = builder.alleles(Arrays.asList(refA, altG)).make();

final int compare = comparator.compare(variant1, variant2);
Assert.assertEquals(compare, 0); // TODO: What other criteria might we sort by to break this tie?
}

@Test
public void testVariantContextsWithSameSiteSortLexicographicallyByAllele() {
final String contig = "chr1";
final VariantContextComparator comparator = new VariantContextComparator(Collections.singletonList(contig));
final VariantContextBuilder builder = new VariantContextBuilder("test", contig, 1, 1, Collections.emptyList());

final VariantContext variant1 = builder.alleles(Arrays.asList(refA, altG)).make();
final VariantContext variant2 = builder.alleles(Arrays.asList(refA, altT)).make();

final int compare = comparator.compare(variant1, variant2);
Assert.assertEquals(compare, -1);
}

@Test
public void testVariantContextsWithSameSiteSortLexicographicallyByAlleleThenExtraAllelesForFirstVariant() {
final String contig = "chr1";
final VariantContextComparator comparator = new VariantContextComparator(Collections.singletonList(contig));
final VariantContextBuilder builder = new VariantContextBuilder("test", contig, 1, 1, Collections.emptyList());

final VariantContext variant1 = builder.alleles(Arrays.asList(refA, altG, altT)).make();
final VariantContext variant2 = builder.alleles(Arrays.asList(refA, altG)).make();

final int compare = comparator.compare(variant1, variant2);
Assert.assertEquals(compare, 1);
}

@Test
public void testVariantContextsWithSameSiteSortLexicographicallyByAlleleThenExtraAllelesForSecondVariant() {
final String contig = "chr1";
final VariantContextComparator comparator = new VariantContextComparator(Collections.singletonList(contig));
final VariantContextBuilder builder = new VariantContextBuilder("test", contig, 1, 1, Collections.emptyList());

final VariantContext variant1 = builder.alleles(Arrays.asList(refA, altG)).make();
final VariantContext variant2 = builder.alleles(Arrays.asList(refA, altG, altT)).make();

final int compare = comparator.compare(variant1, variant2);
Assert.assertEquals(compare, -1);
}
}

0 comments on commit 14a8cc0

Please sign in to comment.