Skip to content

Commit

Permalink
Adding some sanity checking to IntervalList reading. (#1230)
Browse files Browse the repository at this point in the history
* auto formatted IntervalList
* adding additional checks to IntervalList to prevent invalid values
  • Loading branch information
Yossi Farjoun authored and lbergelson committed Dec 17, 2018
1 parent 28dde96 commit 4a7cb03
Show file tree
Hide file tree
Showing 7 changed files with 313 additions and 177 deletions.
347 changes: 221 additions & 126 deletions src/main/java/htsjdk/samtools/util/IntervalList.java

Large diffs are not rendered by default.

18 changes: 10 additions & 8 deletions src/test/java/htsjdk/samtools/util/EdgeReadIteratorTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@

import htsjdk.samtools.SAMRecordSetBuilder;
import htsjdk.samtools.SamReader;
import org.testng.Assert;
import org.testng.annotations.Test;

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Arrays;
import java.util.List;
Expand Down Expand Up @@ -74,7 +74,7 @@ public void testEmitUncoveredLoci() {
// add a negative-strand fragment mapped on chrM with base quality of 10
builder.addFrag("record" + i, 0, startPosition, true, false, "36M", null, 10);
}
final int coveredEnd = CoordMath.getEnd(startPosition, readLength) +1;
final int coveredEnd = CoordMath.getEnd(startPosition, readLength) + 1;
final EdgeReadIterator sli = new EdgeReadIterator(builder.getSamReader());

int pos = 1;
Expand All @@ -97,7 +97,8 @@ public void testEmitUncoveredLoci() {
*/
@Override
@Test
public void testSimpleGappedAlignment() {final SAMRecordSetBuilder builder = getRecordBuilder();
public void testSimpleGappedAlignment() {
final SAMRecordSetBuilder builder = getRecordBuilder();
// add records up to coverage for the test in that position
final int startPosition = 165;
for (int i = 0; i < coverage; i++) {
Expand Down Expand Up @@ -332,7 +333,6 @@ public void testIntersectingAndNotInterval() {
assertEquals(81, locusPosition);
}


/**
* Test for intersecting interval for read with a deletion in the middle
*/
Expand Down Expand Up @@ -377,7 +377,6 @@ public void testIntersectingIntervalWithComplicatedCigar() {
assertEquals(21, locusPosition);
}


private void fillEmptyLocus(int[] expectedReferencePositions, int[] expectedDepths, int[][] expectedReadOffsets, int i) {
expectedReferencePositions[i] = i + 1;
expectedDepths[i] = 0;
Expand All @@ -395,8 +394,11 @@ private SamReader createSamFileReader() {
return builder.getSamReader();
}


private IntervalList createIntervalList(String s) {
return IntervalList.fromReader(new BufferedReader(new InputStreamReader(new ByteArrayInputStream(s.getBytes()))));
private IntervalList createIntervalList(final String s) {
try (BufferedReader br = new BufferedReader(new InputStreamReader(new ByteArrayInputStream(s.getBytes())))) {
return IntervalList.fromReader(br);
} catch (IOException e) {
throw new RuntimeException("Trouble closing reader: " + s, e);
}
}
}
97 changes: 54 additions & 43 deletions src/test/java/htsjdk/samtools/util/IntervalListTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,23 +25,18 @@
package htsjdk.samtools.util;

import htsjdk.HtsjdkTest;
import htsjdk.samtools.*;
import htsjdk.samtools.SAMException;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.variant.vcf.VCFFileReader;
import org.testng.Assert;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.io.File;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.SortedSet;
import java.util.TreeSet;
import java.io.IOException;
import java.nio.file.Path;
import java.util.*;

/**
* Tests the IntervalList class
Expand All @@ -50,9 +45,10 @@ public class IntervalListTest extends HtsjdkTest {

final SAMFileHeader fileHeader;
final IntervalList list1, list2, list3;
static final public Path TEST_DIR = new File("src/test/resources/htsjdk/samtools/intervallist").toPath();

public IntervalListTest() {
fileHeader = IntervalList.fromFile(new File("src/test/resources/htsjdk/samtools/intervallist/IntervalListchr123_empty.interval_list")).getHeader();
fileHeader = IntervalList.fromPath(TEST_DIR.resolve("IntervalListchr123_empty.interval_list")).getHeader();
fileHeader.setSortOrder(SAMFileHeader.SortOrder.unsorted);

list1 = new IntervalList(fileHeader);
Expand All @@ -76,11 +72,12 @@ public IntervalListTest() {
list3.add(new Interval("3", 50, 470));
}


@Test
public void testIntervalListFrom() {
final String testPath = "src/test/resources/htsjdk/samtools/intervallist/IntervalListFromVCFTestComp.interval_list";
public void testIntervalListFrom() throws IOException {
final String testPath = TEST_DIR.resolve("IntervalListFromVCFTestComp.interval_list").toString();
final IntervalList fromFileList = IntervalList.fromFile(new File(testPath));
final IntervalList fromPathList = IntervalList.fromPath(Paths.get(testPath));
final IntervalList fromPathList = IntervalList.fromPath(IOUtil.getPath(testPath));
fromFileList.getHeader().getSequenceDictionary().assertSameDictionary(fromPathList.getHeader().getSequenceDictionary());
Assert.assertEquals(CollectionUtil.makeCollection(fromFileList.iterator()), CollectionUtil.makeCollection(fromPathList.iterator()));
}
Expand Down Expand Up @@ -225,18 +222,16 @@ public Object[][] unionData() {
union13.add(new Interval("2", 200, 600));
union13.add(new Interval("3", 50, 470));


return new Object[][]{
new Object[]{Arrays.asList(list1, list2, list3), union123},
new Object[]{Arrays.asList(list1, list2), union12},
new Object[]{Arrays.asList(list1, list2), union12},
new Object[]{Arrays.asList(list2, list3), union23},
new Object[]{Arrays.asList(list2, list3), union23},
new Object[]{Arrays.asList(list1, list3), union13},
new Object[]{Arrays.asList(list1, list3), union13}
};
}

@Test(dataProvider = "unionData", enabled = true)
@Test(dataProvider = "unionData")
public void testUnionIntervalLists(final List<IntervalList> lists, final IntervalList list) {
Assert.assertEquals(
CollectionUtil.makeCollection(IntervalList.union(lists).iterator()),
Expand Down Expand Up @@ -467,22 +462,23 @@ public void testOverlapsEmptySecondList() {

@DataProvider(name = "VCFCompData")
public Object[][] VCFCompData() {
final Path intervalListFromVcf = TEST_DIR.resolve("IntervalListFromVCFTest.vcf");
final Path intervalListFromVcfManual = TEST_DIR.resolve("IntervalListFromVCFTestManual.vcf");

return new Object[][]{
new Object[]{"src/test/resources/htsjdk/samtools/intervallist/IntervalListFromVCFTest.vcf", "src/test/resources/htsjdk/samtools/intervallist/IntervalListFromVCFTestComp.interval_list", false},
new Object[]{"src/test/resources/htsjdk/samtools/intervallist/IntervalListFromVCFTest.vcf", "src/test/resources/htsjdk/samtools/intervallist/IntervalListFromVCFTestCompInverse.interval_list", true},
new Object[]{"src/test/resources/htsjdk/samtools/intervallist/IntervalListFromVCFTestManual.vcf", "src/test/resources/htsjdk/samtools/intervallist/IntervalListFromVCFTestManualComp.interval_list", false},
new Object[]{"src/test/resources/htsjdk/samtools/intervallist/IntervalListFromVCFTestManual.vcf", "src/test/resources/htsjdk/samtools/intervallist/IntervalListFromVCFTestCompInverseManual.interval_list", true}
new Object[]{intervalListFromVcf, TEST_DIR.resolve("IntervalListFromVCFTestComp.interval_list"), false},
new Object[]{intervalListFromVcf, TEST_DIR.resolve("IntervalListFromVCFTestCompInverse.interval_list"), true},
new Object[]{intervalListFromVcfManual, TEST_DIR.resolve("IntervalListFromVCFTestManualComp.interval_list"), false},
new Object[]{intervalListFromVcfManual, TEST_DIR.resolve("IntervalListFromVCFTestCompInverseManual.interval_list"), true}
};
}

@Test(dataProvider = "VCFCompData")
public void testFromVCF(final String vcf, final String compInterval, final boolean invertVCF) {
public void testFromVCF(final Path vcf, final Path compInterval, final boolean invertVCF) {

final File vcfFile = new File(vcf);
final File compIntervalFile = new File(compInterval);

final IntervalList compList = IntervalList.fromFile(compIntervalFile);
final IntervalList list = invertVCF ? IntervalList.invert(VCFFileReader.toIntervalList(vcfFile.toPath())) : VCFFileReader.toIntervalList(vcfFile.toPath());
final IntervalList compList = IntervalList.fromPath(compInterval);
final IntervalList list = invertVCF ? IntervalList.invert(VCFFileReader.toIntervalList(vcf)) : VCFFileReader.toIntervalList(vcf);

compList.getHeader().getSequenceDictionary().assertSameDictionary(list.getHeader().getSequenceDictionary());

Expand All @@ -492,8 +488,8 @@ public void testFromVCF(final String vcf, final String compInterval, final boole
//assert that the intervals correspond
Assert.assertEquals(intervals, compIntervals);

final List<String> intervalNames = new LinkedList<String>();
final List<String> compIntervalNames = new LinkedList<String>();
final List<String> intervalNames = new LinkedList<>();
final List<String> compIntervalNames = new LinkedList<>();

for (final Interval interval : intervals) {
intervalNames.add(interval.getName());
Expand All @@ -507,13 +503,11 @@ public void testFromVCF(final String vcf, final String compInterval, final boole


@Test(dataProvider = "VCFCompData")
public void testFromVCFWithPath(final String vcf, final String compInterval, final boolean invertVCF) {
public void testFromVCFWithPath(final Path vcf, final Path compInterval, final boolean invertVCF) {

final File vcfFile = new File(vcf);
final File compIntervalFile = new File(compInterval);

final IntervalList compList = IntervalList.fromFile(compIntervalFile);
final IntervalList list = invertVCF ? IntervalList.invert(VCFFileReader.toIntervalList(vcfFile.toPath())) : VCFFileReader.toIntervalList(vcfFile.toPath());
final IntervalList compList = IntervalList.fromPath(compInterval);
final IntervalList list = invertVCF ? IntervalList.invert(VCFFileReader.toIntervalList(vcf)) : VCFFileReader.toIntervalList(vcf);

compList.getHeader().getSequenceDictionary().assertSameDictionary(list.getHeader().getSequenceDictionary());

Expand All @@ -523,8 +517,8 @@ public void testFromVCFWithPath(final String vcf, final String compInterval, fin
//assert that the intervals correspond
Assert.assertEquals(intervals, compIntervals);

final List<String> intervalNames = new LinkedList<String>();
final List<String> compIntervalNames = new LinkedList<String>();
final List<String> intervalNames = new LinkedList<>();
final List<String> compIntervalNames = new LinkedList<>();

for (final Interval interval : intervals) {
intervalNames.add(interval.getName());
Expand All @@ -539,17 +533,18 @@ public void testFromVCFWithPath(final String vcf, final String compInterval, fin

@DataProvider
public Object[][] testFromSequenceData() {
final Path intervalList = TEST_DIR.resolve("IntervalListFromVCFTestComp.interval_list");
return new Object[][]{
new Object[]{"src/test/resources/htsjdk/samtools/intervallist/IntervalListFromVCFTestComp.interval_list", "1", 249250621},
new Object[]{"src/test/resources/htsjdk/samtools/intervallist/IntervalListFromVCFTestComp.interval_list", "2", 243199373},
new Object[]{"src/test/resources/htsjdk/samtools/intervallist/IntervalListFromVCFTestComp.interval_list", "3", 198022430},
new Object[]{intervalList, "1", 249250621},
new Object[]{intervalList, "2", 243199373},
new Object[]{intervalList, "3", 198022430},
};
}

@Test(dataProvider = "testFromSequenceData")
public void testFromSequenceName(final String intervalList, final String referenceName, final Integer length) {
public void testFromSequenceName(final Path intervalList, final String referenceName, final Integer length) {

final IntervalList intervals = IntervalList.fromFile(new File(intervalList));
final IntervalList intervals = IntervalList.fromPath(intervalList);
final IntervalList test = IntervalList.fromName(intervals.getHeader(), referenceName);
Assert.assertEquals(test.getIntervals(), CollectionUtil.makeList(new Interval(referenceName, 1, length)));
}
Expand Down Expand Up @@ -659,8 +654,24 @@ public void changeHeader() {

@Test(expectedExceptions = IllegalArgumentException.class)
public void testContigsAbsentInHeader() {
String vcf = "src/test/resources/htsjdk/samtools/intervallist/IntervalListFromVCFNoContigLines.vcf";
String vcf = TEST_DIR.resolve("IntervalListFromVCFNoContigLines.vcf").toString();
final File vcfFile = new File(vcf);
VCFFileReader.toIntervalList(vcfFile.toPath());
}


@DataProvider
public static Object[][] brokenFiles() {
return new Object[][]{
{TEST_DIR.resolve("broken.end.extends.too.far.interval_list")},
{TEST_DIR.resolve("broken.start.bigger.than.end.interval_list")},
{TEST_DIR.resolve("broken.unallowed.strand.interval_list")},
{TEST_DIR.resolve("broken.zero.start.interval_list")},
};
}

@Test(dataProvider = "brokenFiles", expectedExceptions = IllegalArgumentException.class)
public void testBreaks(final Path brokenIntervalFile){
IntervalList.fromPath(brokenIntervalFile);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
@SQ SN:1 LN:249250621
@SQ SN:2 LN:243199373
@SQ SN:3 LN:198022430
1 8216712 8216712 + rs11121115
1 17032814 17032814 + rs2773183
2 1143476 1243476 * borked
2 9240279 9240279 + rs56249990
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
@SQ SN:1 LN:249250621
@SQ SN:2 LN:243199373
@SQ SN:3 LN:198022430
1 8216712 8216712 + rs11121115
1 17032816 17032814 + borked
2 1143476 1143476 + rs4998209
2 9240279 9240279 + rs56249990
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
@SQ SN:1 LN:249250621
@SQ SN:2 LN:243199373
@SQ SN:3 LN:198022430
1 8216712 8216712 + rs11121115
1 17032814 17032814 + rs2773183
2 1143476 243199374 + borked
2 9240279 9240279 + rs56249990
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
@SQ SN:1 LN:249250621
@SQ SN:2 LN:243199373
@SQ SN:3 LN:198022430
1 8216712 8216712 + rs11121115
1 0 17032814 + borked
2 1143476 1143476 + rs4998209
2 9240279 9240279 + rs56249990

0 comments on commit 4a7cb03

Please sign in to comment.