-
Notifications
You must be signed in to change notification settings - Fork 244
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fixing zero-length interval bug in IntervalList.merge #1318
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -379,13 +379,18 @@ private static List<Interval> breakIntervalAtBandMultiples(final Interval interv | |
} | ||
|
||
/** | ||
* Merges a sorted collection of intervals and optionally concatenates unique names or takes the first name. | ||
* Merges a collection of intervals and optionally concatenates unique names or takes the first name. | ||
* * | ||
* @param concatenateNames if true, combine the names of all the intervals with |, otherwise use the name of the first interval. | ||
* @return a single interval which spans from the minimum input start position to the maximum input end position. | ||
* The resulting strandedness and contig are those of the first input with no validation. | ||
* | ||
*/ | ||
static Interval merge(final Iterable<Interval> intervals, final boolean concatenateNames) { | ||
final Interval first = intervals.iterator().next(); | ||
final String chrom = first.getContig(); | ||
int start = first.getStart(); | ||
int end = start; | ||
int end = first.getEnd(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nice catch. sorry. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I definitely didn't notice it in review. It makes sense, you can't have an end before start.. |
||
final boolean neg = first.isNegativeStrand(); | ||
final LinkedHashSet<String> names = new LinkedHashSet<>(); | ||
final String name; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -564,22 +564,47 @@ public void testFromSequenceName(final Path intervalList, final String reference | |
Assert.assertEquals(test.getIntervals(), CollectionUtil.makeList(new Interval(referenceName, 1, length))); | ||
} | ||
|
||
@Test | ||
public void testMerges() { | ||
final SortedSet<Interval> intervals = new TreeSet<Interval>() {{ | ||
add(new Interval("1", 500, 600, false, "foo")); | ||
add(new Interval("1", 550, 650, false, "bar")); | ||
add(new Interval("1", 625, 699, false, "splat")); | ||
}}; | ||
|
||
Interval out = IntervalList.merge(intervals, false); | ||
Assert.assertEquals(out.getStart(), 500); | ||
Assert.assertEquals(out.getEnd(), 699); | ||
@DataProvider | ||
public Object[][] getMergeTestCases() { | ||
final String contig = "1"; | ||
final Interval foo = new Interval(contig, 500, 600, false, "foo"); | ||
final Interval bar = new Interval(contig, 550, 650, false, "bar"); | ||
final Interval splat = new Interval(contig, 625, 699, false, "splat"); | ||
final List<Interval> threeInOrderIntervals = Arrays.asList(foo, bar, splat); | ||
final List<Interval> threeOutOfOrderIntervals = Arrays.asList(bar, foo, splat); | ||
final Interval zeroLengthInterval = new Interval(contig, 626, 625); | ||
final Interval interval600To601 = new Interval(contig, 600, 601); | ||
final Interval interval600To625 = new Interval(contig, 600, 625); | ||
final Interval normalInterval = new Interval(contig, 626, 629, true, "whee"); | ||
final Interval zeroInterval10To9 = new Interval(contig, 10, 9); | ||
return new Object[][]{ | ||
{threeInOrderIntervals, true, new Interval(contig, 500, 699, false, "foo|bar|splat")}, | ||
{threeInOrderIntervals, false, new Interval(contig, 500, 699, false, "foo")}, | ||
{threeOutOfOrderIntervals, true, new Interval(contig, 500, 699, false, "bar|foo|splat")}, | ||
{threeOutOfOrderIntervals, false, new Interval(contig, 500, 699, false, "bar")}, | ||
{Collections.singletonList(normalInterval), true, normalInterval}, | ||
{Collections.singletonList(normalInterval), false, normalInterval}, | ||
{Collections.singletonList(zeroLengthInterval), true, zeroLengthInterval}, | ||
{Collections.singletonList(zeroLengthInterval), false, zeroLengthInterval}, | ||
{Arrays.asList(zeroLengthInterval, interval600To601), true, interval600To625}, | ||
{Arrays.asList(zeroLengthInterval, interval600To601), false, interval600To625}, | ||
{Arrays.asList(zeroLengthInterval, interval600To601), true, interval600To625}, | ||
{Arrays.asList(interval600To601, new Interval(contig, 100, 200, false, "hasName")), true, new Interval(contig, 100, 601, false, "hasName")}, | ||
{Arrays.asList(interval600To601, new Interval(contig, 100, 200, false, "hasName")), false, new Interval(contig, 100, 601, false, "hasName")}, | ||
{Arrays.asList(zeroInterval10To9, new Interval(contig, 11, 15)), false, new Interval(contig, 10, 15)}, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think that this is right. adding a zero-length interval shouldn't add bases to another interval. |
||
{Arrays.asList(zeroInterval10To9, new Interval(contig, 10, 15)), true, new Interval(contig, 10, 15)}, | ||
{Arrays.asList(zeroInterval10To9, new Interval(contig, 9,15)), false, new Interval(contig, 9, 15)}, | ||
{Arrays.asList(zeroInterval10To9, new Interval(contig, 8, 9)), true, new Interval(contig, 8, 9)} | ||
}; | ||
} | ||
|
||
intervals.add(new Interval("1", 626, 629, false, "whee")); | ||
out = IntervalList.merge(intervals, false); | ||
Assert.assertEquals(out.getStart(), 500); | ||
Assert.assertEquals(out.getEnd(), 699); | ||
@Test(dataProvider = "getMergeTestCases") | ||
public void testMerges(Iterable<Interval> intervals, boolean concatNames, Interval expected) { | ||
final Interval merged = IntervalList.merge(intervals, concatNames); | ||
Assert.assertEquals(merged.getContig(), expected.getContig()); | ||
Assert.assertEquals(merged.getStart(), expected.getStart()); | ||
Assert.assertEquals(merged.getStrand(), expected.getStrand()); | ||
Assert.assertEquals(merged.getName(), expected.getName()); | ||
} | ||
|
||
@Test | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
what should happen when we merge a zero-length interval with a non-zerolength one after it?
so,
10-9 merged with 11-15 should remain those two intervals
10-9 merged with 10-15 should become one interval 10-15
10-9 merged with 9-15 should become one interval 9-15
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The first case, you mean it should become 10-15? Merge never leaves them separate.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
no...I think that it should not become 10-15....since the size of the first interval is 0 it shouldn't change the total number of bases covered...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think that zero length intervals need to either be dropped when merging, or ignored...normal rules of merging do not apply.