From 14a8cc022c98caa27df9d5aa876aeb5394f197ad Mon Sep 17 00:00:00 2001 From: Clint Valentine Date: Fri, 28 Oct 2022 11:56:28 -0700 Subject: [PATCH] Use allele info in VariantContext comparisons for stable sorts (#1593) * Use allele info in VariantContext comparisons for more stable sorts * Ensure a more stable sort for varying length alleles, add unit tests --- .gitignore | 1 + .../VariantContextComparator.java | 19 +++-- .../VariantContextComparatorUnitTest.java | 75 +++++++++++++++++++ 3 files changed, 90 insertions(+), 5 deletions(-) create mode 100644 src/test/java/htsjdk/variant/variantcontext/VariantContextComparatorUnitTest.java diff --git a/.gitignore b/.gitignore index f10fff99c0..694f85de06 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ htsjdk.iws .command_tmp +.DS_Store atlassian-ide-plugin.xml /htsjdk.version.properties /test-output/ diff --git a/src/main/java/htsjdk/variant/variantcontext/VariantContextComparator.java b/src/main/java/htsjdk/variant/variantcontext/VariantContextComparator.java index d4e288f978..bcccbeab4e 100644 --- a/src/main/java/htsjdk/variant/variantcontext/VariantContextComparator.java +++ b/src/main/java/htsjdk/variant/variantcontext/VariantContextComparator.java @@ -83,11 +83,20 @@ public int compare(final VariantContext firstVariantContext, final VariantContex // Will throw NullPointerException -- happily -- if either of the chromosomes/contigs aren't // present. This error checking should already have been done in the constructor but it's left // in as defence anyway. - final int contigCompare = - this.contigIndexLookup.get(firstVariantContext.getContig()) - this.contigIndexLookup.get(secondVariantContext.getContig()); - return contigCompare != 0 - ? contigCompare - : firstVariantContext.getStart() - secondVariantContext.getStart(); + int contigCompare = this.contigIndexLookup.get(firstVariantContext.getContig()).compareTo(this.contigIndexLookup.get(secondVariantContext.getContig())); + contigCompare = contigCompare == 0 ? firstVariantContext.getStart() - secondVariantContext.getStart() : contigCompare; + if (contigCompare == 0) { + // Compare variants that have the same genomic span (chr:start-end) lexicographically by all alleles (ref and alts). + for (int i = 0; i < firstVariantContext.getAlleles().size(); i++) { + // If all previous alleles are identical and the first variant has additional alleles, make the first variant greater. + if (i >= secondVariantContext.getAlleles().size()) { return 1; } + contigCompare = firstVariantContext.getAlleles().get(i).compareTo(secondVariantContext.getAlleles().get(i)); + if (contigCompare != 0) return contigCompare; + } + } + // If all previous alleles are identical and the second variant has additional alleles, make the second variant greater. + if (firstVariantContext.getAlleles().size() < secondVariantContext.getAlleles().size()) { return -1; } + return contigCompare; } /** diff --git a/src/test/java/htsjdk/variant/variantcontext/VariantContextComparatorUnitTest.java b/src/test/java/htsjdk/variant/variantcontext/VariantContextComparatorUnitTest.java new file mode 100644 index 0000000000..578f673ae4 --- /dev/null +++ b/src/test/java/htsjdk/variant/variantcontext/VariantContextComparatorUnitTest.java @@ -0,0 +1,75 @@ +package htsjdk.variant.variantcontext; + +import htsjdk.HtsjdkTest; +import org.testng.Assert; +import org.testng.annotations.BeforeSuite; +import org.testng.annotations.Test; + +import java.util.Arrays; +import java.util.Collections; + +/** + * Unit tests for VariantContextComparator. + */ +public class VariantContextComparatorUnitTest extends HtsjdkTest { + private Allele refA, altG, altT; + + @BeforeSuite + public void before() { + refA = Allele.create("A", true); + altG = Allele.create("G", false); + altT = Allele.create("T", false); + } + + @Test + public void testVariantContextsWithSameSiteSortLexicographicallyByAlleleIdentical() { + final String contig = "chr1"; + final VariantContextComparator comparator = new VariantContextComparator(Collections.singletonList(contig)); + final VariantContextBuilder builder = new VariantContextBuilder("test", contig, 1, 1, Collections.emptyList()); + + final VariantContext variant1 = builder.alleles(Arrays.asList(refA, altG)).make(); + final VariantContext variant2 = builder.alleles(Arrays.asList(refA, altG)).make(); + + final int compare = comparator.compare(variant1, variant2); + Assert.assertEquals(compare, 0); // TODO: What other criteria might we sort by to break this tie? + } + + @Test + public void testVariantContextsWithSameSiteSortLexicographicallyByAllele() { + final String contig = "chr1"; + final VariantContextComparator comparator = new VariantContextComparator(Collections.singletonList(contig)); + final VariantContextBuilder builder = new VariantContextBuilder("test", contig, 1, 1, Collections.emptyList()); + + final VariantContext variant1 = builder.alleles(Arrays.asList(refA, altG)).make(); + final VariantContext variant2 = builder.alleles(Arrays.asList(refA, altT)).make(); + + final int compare = comparator.compare(variant1, variant2); + Assert.assertEquals(compare, -1); + } + + @Test + public void testVariantContextsWithSameSiteSortLexicographicallyByAlleleThenExtraAllelesForFirstVariant() { + final String contig = "chr1"; + final VariantContextComparator comparator = new VariantContextComparator(Collections.singletonList(contig)); + final VariantContextBuilder builder = new VariantContextBuilder("test", contig, 1, 1, Collections.emptyList()); + + final VariantContext variant1 = builder.alleles(Arrays.asList(refA, altG, altT)).make(); + final VariantContext variant2 = builder.alleles(Arrays.asList(refA, altG)).make(); + + final int compare = comparator.compare(variant1, variant2); + Assert.assertEquals(compare, 1); + } + + @Test + public void testVariantContextsWithSameSiteSortLexicographicallyByAlleleThenExtraAllelesForSecondVariant() { + final String contig = "chr1"; + final VariantContextComparator comparator = new VariantContextComparator(Collections.singletonList(contig)); + final VariantContextBuilder builder = new VariantContextBuilder("test", contig, 1, 1, Collections.emptyList()); + + final VariantContext variant1 = builder.alleles(Arrays.asList(refA, altG)).make(); + final VariantContext variant2 = builder.alleles(Arrays.asList(refA, altG, altT)).make(); + + final int compare = comparator.compare(variant1, variant2); + Assert.assertEquals(compare, -1); + } +}