From df6d0c0f38e65f820ea0c67a06c6315ad251593a Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Thu, 14 Mar 2019 23:20:52 -0400 Subject: [PATCH 1/9] Implemented a tribble (co)dec for IntervalList. - moved the business part of the dec from IntervalList to the Tribble dec. - testing the Tribble directly. --- .../java/htsjdk/samtools/util/Interval.java | 19 ++- .../htsjdk/samtools/util/IntervalList.java | 73 +-------- .../java/htsjdk/samtools/util/LineReader.java | 4 +- .../htsjdk/tribble/AsciiSamFeatureCodec.java | 91 ++++++++++ .../IntervalList/IntervalListCodec.java | 155 ++++++++++++++++++ .../htsjdk/variant/vcf/AbstractVCFCodec.java | 49 ++---- .../IntervalList/IntervalListCodecTest.java | 115 +++++++++++++ .../htsjdk/tribble/interval_list/example.dict | 3 + .../interval_list/shortExample.interval_list | 7 + .../shortExampleWithEmptyLine.interval_list | 8 + 10 files changed, 425 insertions(+), 99 deletions(-) create mode 100644 src/main/java/htsjdk/tribble/AsciiSamFeatureCodec.java create mode 100644 src/main/java/htsjdk/tribble/IntervalList/IntervalListCodec.java create mode 100644 src/test/java/htsjdk/tribble/IntervalList/IntervalListCodecTest.java create mode 100644 src/test/resources/htsjdk/tribble/interval_list/example.dict create mode 100644 src/test/resources/htsjdk/tribble/interval_list/shortExample.interval_list create mode 100644 src/test/resources/htsjdk/tribble/interval_list/shortExampleWithEmptyLine.interval_list diff --git a/src/main/java/htsjdk/samtools/util/Interval.java b/src/main/java/htsjdk/samtools/util/Interval.java index c0493745a9..a0ffb59790 100644 --- a/src/main/java/htsjdk/samtools/util/Interval.java +++ b/src/main/java/htsjdk/samtools/util/Interval.java @@ -24,6 +24,7 @@ package htsjdk.samtools.util; import htsjdk.samtools.SAMException; +import htsjdk.tribble.Feature; import htsjdk.tribble.annotation.Strand; import java.util.Collection; @@ -33,7 +34,7 @@ * * @author Tim Fennell */ -public class Interval implements Comparable, Cloneable, Locatable { +public class Interval implements Comparable, Cloneable, Feature { private final boolean negativeStrand; private final String name; private final String contig; @@ -209,6 +210,22 @@ public boolean equals(final Object other) { } } + /** + * Equals method that also checks strand and name + */ + public boolean equalsWithStrandAndName(final Object other) { + if (!this.equals(other)) { + return false; + } + + final Interval that = (Interval) other; + if (this.negativeStrand != that.negativeStrand) { + return false; + } + + return this.name.equals(that.name); + } + @Override public int hashCode() { int result = getContig().hashCode(); diff --git a/src/main/java/htsjdk/samtools/util/IntervalList.java b/src/main/java/htsjdk/samtools/util/IntervalList.java index 47b3a1189c..9bf7c31c51 100644 --- a/src/main/java/htsjdk/samtools/util/IntervalList.java +++ b/src/main/java/htsjdk/samtools/util/IntervalList.java @@ -28,6 +28,7 @@ import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.SAMSequenceRecord; import htsjdk.samtools.SAMTextHeaderCodec; +import htsjdk.tribble.IntervalList.IntervalListCodec; import java.io.*; import java.nio.file.Path; @@ -38,8 +39,6 @@ import java.util.Iterator; import java.util.LinkedHashSet; import java.util.List; -import java.util.SortedSet; -import java.util.TreeSet; /** * Represents a list of intervals against a reference sequence that can be written to @@ -512,11 +511,6 @@ public static IntervalList fromFiles(final Collection intervalListFiles) { */ public static IntervalList fromReader(final BufferedReader in) { - final int SEQUENCE_POS=0; - final int START_POS=1; - final int END_POS=2; - final int STRAND_POS=3; - final int NAME_POS=4; try { // Setup a reader and parse the header final StringBuilder builder = new StringBuilder(4096); @@ -539,71 +533,18 @@ public static IntervalList fromReader(final BufferedReader in) { final IntervalList list = new IntervalList(codec.decode(headerReader, "BufferedReader")); final SAMSequenceDictionary dict = list.getHeader().getSequenceDictionary(); - //there might not be any lines after the header, in which case we should return an empty list + // There might not be any lines after the header, in which case we should return an empty list if (line == null) { return list; } + final IntervalListCodec intervalListCodec = new IntervalListCodec(dict); + // Then read in the intervals - final FormatUtil format = new FormatUtil(); - String lastSeq = null; do { - if (line.trim().isEmpty()) { - continue; // skip over blank lines - } - - // Make sure we have the right number of fields - final String[] fields = line.split("\t"); - if (fields.length != 5) { - throw new SAMException("Invalid interval record contains " + - fields.length + " fields: " + line); - } - - // Then parse them out - String seq = fields[SEQUENCE_POS]; - if (seq.equals(lastSeq)) { - seq = lastSeq; - } - lastSeq = seq; - - final int start = format.parseInt(fields[START_POS]); - final int end = format.parseInt(fields[END_POS]); - if (start < 1) { - throw new IllegalArgumentException("Coordinate less than 1: start value of " + start + - " is less than 1 and thus illegal"); - } - - if (start > end + 1) { - throw new IllegalArgumentException("Start value of " + start + - " is greater than end + 1 for end of value: " + end + - ". I'm afraid I cannot let you do that."); - } - - final boolean negative; - switch (fields[STRAND_POS]) { - case "-": - negative = true; - break; - case "+": - negative = false; - break; - default: - throw new IllegalArgumentException("Invalid strand field: " + fields[STRAND_POS]); - } - - final String name = fields[NAME_POS]; - - final Interval interval = new Interval(seq, start, end, negative, name); - final SAMSequenceRecord sequence = dict.getSequence(seq); - if (sequence == null) { - log.warn("Ignoring interval for unknown reference: " + interval); - } else { - final int sequenceLength = sequence.getSequenceLength(); - if (sequenceLength > 0 && sequenceLength < end) { - throw new IllegalArgumentException("interval with end: " + end + " extends beyond end of sequence with length: " + sequenceLength); - } - - list.intervals.add(interval); + Interval nullableInterval = intervalListCodec.decode(line); + if (nullableInterval != null) { + list.intervals.add(nullableInterval); } } while ((line = in.readLine()) != null); diff --git a/src/main/java/htsjdk/samtools/util/LineReader.java b/src/main/java/htsjdk/samtools/util/LineReader.java index 4a07f15b86..7e84bc2b14 100644 --- a/src/main/java/htsjdk/samtools/util/LineReader.java +++ b/src/main/java/htsjdk/samtools/util/LineReader.java @@ -28,7 +28,9 @@ /** * Interface allows for implementations that read lines from a String, an ASCII file, or somewhere else. */ -public interface LineReader extends Closeable{ +public interface LineReader extends Closeable { + + int EOF_VALUE = -1; /** * Read a line and remove the line terminator diff --git a/src/main/java/htsjdk/tribble/AsciiSamFeatureCodec.java b/src/main/java/htsjdk/tribble/AsciiSamFeatureCodec.java new file mode 100644 index 0000000000..3fd7112204 --- /dev/null +++ b/src/main/java/htsjdk/tribble/AsciiSamFeatureCodec.java @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2019 by The Broad Institute, Inc. and the Massachusetts Institute of Technology. + * All Rights Reserved. + * + * This software is licensed under the terms of the GNU Lesser General Public License (LGPL), Version 2.1 which + * is available at http://www.opensource.org/licenses/lgpl-2.1.php. + * + * THE SOFTWARE IS PROVIDED "AS IS." THE BROAD AND MIT MAKE NO REPRESENTATIONS OR WARRANTIES OF + * ANY KIND CONCERNING THE SOFTWARE, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT + * OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. IN NO EVENT SHALL THE BROAD OR MIT, OR THEIR + * RESPECTIVE TRUSTEES, DIRECTORS, OFFICERS, EMPLOYEES, AND AFFILIATES BE LIABLE FOR ANY DAMAGES OF + * ANY KIND, INCLUDING, WITHOUT LIMITATION, INCIDENTAL OR CONSEQUENTIAL DAMAGES, ECONOMIC + * DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER THE BROAD OR MIT SHALL + * BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE + * FOREGOING. + */ + +package htsjdk.tribble; + +import htsjdk.samtools.util.*; +import htsjdk.tribble.readers.AsciiLineReader; +import htsjdk.tribble.readers.AsciiLineReaderIterator; + +import java.io.IOException; +import java.io.InputStream; + +/** + * A convenience base class for codecs that want to read in features from ASCII lines. + *

+ * This class overrides the general decode Features for streams and presents instead + * Strings to decode(String) and readHeader(LineReader) functions. + * + * @param The feature type this codec reads + */ +public abstract class AsciiSamFeatureCodec extends AbstractFeatureCodec { + private static final Log log = Log.getInstance(AsciiSamFeatureCodec.class); + + protected AsciiSamFeatureCodec(final Class myClass) { + super(myClass); + } + + @Override + public void close(final LineReader lineIterator) { + CloserUtil.close(lineIterator); + } + + @Override + public boolean isDone(final LineReader lineIterator) { + return lineIterator.peek() == LineReader.EOF_VALUE; + } + + @Override + public LocationAware makeIndexableSourceFromStream(final InputStream inputStream) { + return new AsciiLineReaderIterator(AsciiLineReader.from(inputStream)); + } + + @Override + public LineReader makeSourceFromStream(final InputStream bufferedInputStream) { + return new BufferedLineReader(bufferedInputStream); + } + + /** + * Convenience method. Decoding in ASCII files operates line-by-line, so obviate the need to call + * {@link LineReader#readLine()} in implementing classes and, instead, have them implement + * {@link AsciiSamFeatureCodec#decode(String)}. + */ + @Override + public T decode(final LineReader lineIterator) { + return decode(lineIterator.readLine()); + } + + /** + * @see AsciiSamFeatureCodec#decode(LineReader) + */ + public abstract T decode(String s); + + @Override + public FeatureCodecHeader readHeader(final LineReader lineReader) throws IOException { + // TODO: Track header end here, rather than assuming there isn't one...need to maintain length of header... + final Object header = readActualHeader(lineReader); + return new FeatureCodecHeader(header, FeatureCodecHeader.NO_HEADER_END); + } + + /** + * Read and return the header, or null if there is no header. + * + * @return the actual header data in the file, or null if none is available + */ + abstract public Object readActualHeader(final LineReader reader); +} diff --git a/src/main/java/htsjdk/tribble/IntervalList/IntervalListCodec.java b/src/main/java/htsjdk/tribble/IntervalList/IntervalListCodec.java new file mode 100644 index 0000000000..73f857149a --- /dev/null +++ b/src/main/java/htsjdk/tribble/IntervalList/IntervalListCodec.java @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2019, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package htsjdk.tribble.IntervalList; + +import htsjdk.samtools.*; +import htsjdk.samtools.util.FormatUtil; +import htsjdk.samtools.util.Interval; +import htsjdk.samtools.util.LineReader; +import htsjdk.samtools.util.Log; +import htsjdk.tribble.AsciiSamFeatureCodec; +import htsjdk.tribble.TribbleException; + +/** + * A tribble codec for IntervalLists. + * + * Also contains the parseing code for the non-tribble parsing of IntervalLists + */ + +public class IntervalListCodec extends AsciiSamFeatureCodec { + + static final Log log = Log.getInstance(IntervalListCodec.class); + + final char[] shortArray; + private SAMSequenceDictionary dictionary = null; + + public IntervalListCodec() { + super(Interval.class); + shortArray = new char[1]; + } + + public IntervalListCodec(final SAMSequenceDictionary dict) { + this(); + dictionary = dict; + } + + String lastSeq = null; + + private Interval parseIntervalString(final String line, final SAMSequenceDictionary dict) { + final int SEQUENCE_POS = 0; + final int START_POS = 1; + final int END_POS = 2; + final int STRAND_POS = 3; + final int NAME_POS = 4; + + final FormatUtil format = new FormatUtil(); + + // Make sure we have the right number of fields + final String[] fields = line.split("\t"); + if (fields.length != 5) { + throw new SAMException("Invalid interval record contains " + + fields.length + " fields: " + line); + } + + // Then parse them out + String seq = fields[SEQUENCE_POS]; + if (seq.equals(lastSeq)) { + seq = lastSeq; + } + lastSeq = seq; + + final int start = format.parseInt(fields[START_POS]); + final int end = format.parseInt(fields[END_POS]); + if (start < 1) { + throw new IllegalArgumentException("Coordinate less than 1: start value of " + start + + " is less than 1 and thus illegal"); + } + + if (start > end + 1) { + throw new IllegalArgumentException("Start value of " + start + + " is greater than end + 1 for end of value: " + end + + ". I'm afraid I cannot let you do that."); + } + + final boolean negative; + switch (fields[STRAND_POS]) { + case "-": + negative = true; + break; + case "+": + negative = false; + break; + default: + throw new IllegalArgumentException("Invalid strand field: " + fields[STRAND_POS]); + } + + final String name = fields[NAME_POS]; + + final Interval interval = new Interval(seq, start, end, negative, name); + final SAMSequenceRecord sequence = dict.getSequence(seq); + if (sequence == null) { + log.warn("Ignoring interval for unknown reference: " + interval); + return null; + } else { + final int sequenceLength = sequence.getSequenceLength(); + if (sequenceLength > 0 && sequenceLength < end) { + throw new IllegalArgumentException("interval with end: " + end + " extends beyond end of sequence with length: " + sequenceLength); + } + return interval; + } + } + + @Override + public Interval decode(final String line) { + if (line.startsWith("@")) { + return null; + } + + if (line.trim().isEmpty()) { + return null; + } + // our header cannot be null, we need the dictionary from the header + if (dictionary == null) { + throw new TribbleException("IntervalList dictionary cannot be null when decoding a record"); + } + + return parseIntervalString(line, dictionary); + } + + @Override + public Object readActualHeader(LineReader lineReader) { + final SAMTextHeaderCodec headerCodec = new SAMTextHeaderCodec(); + + final SAMFileHeader header = headerCodec.decode(lineReader, ""); + dictionary = header.getSequenceDictionary(); + return header; + } + + @Override + public boolean canDecode(String s) { + return s.endsWith(".interval_list"); + } +} + diff --git a/src/main/java/htsjdk/variant/vcf/AbstractVCFCodec.java b/src/main/java/htsjdk/variant/vcf/AbstractVCFCodec.java index bfa48b4c87..1b89929dae 100644 --- a/src/main/java/htsjdk/variant/vcf/AbstractVCFCodec.java +++ b/src/main/java/htsjdk/variant/vcf/AbstractVCFCodec.java @@ -34,30 +34,14 @@ import htsjdk.tribble.index.tabix.TabixFormat; import htsjdk.tribble.util.ParsingUtils; import htsjdk.variant.utils.GeneralUtils; -import htsjdk.variant.variantcontext.Allele; -import htsjdk.variant.variantcontext.Genotype; -import htsjdk.variant.variantcontext.GenotypeBuilder; -import htsjdk.variant.variantcontext.GenotypeLikelihoods; -import htsjdk.variant.variantcontext.LazyGenotypesContext; -import htsjdk.variant.variantcontext.VariantContext; -import htsjdk.variant.variantcontext.VariantContextBuilder; +import htsjdk.variant.variantcontext.*; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.nio.file.Files; import java.nio.file.Path; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.StringTokenizer; +import java.util.*; import java.util.zip.GZIPInputStream; @@ -244,19 +228,22 @@ public VCFHeaderVersion getVersion() { } /** - * Explicitly set the VCFHeader on this codec. This will overwrite the header read from the file - * and the version state stored in this instance; conversely, reading the header from a file will - * overwrite whatever is set here. The returned header may not be identical to the header argument - * since the header lines may be "repaired" (i.e., rewritten) if doOnTheFlyModifications is set. - */ - public VCFHeader setVCFHeader(final VCFHeader header, final VCFHeaderVersion version) { - this.version = version; - - if (this.doOnTheFlyModifications) this.header = VCFStandardHeaderLines.repairStandardHeaderLines(header); - else this.header = header; - - return this.header; - } + * Explicitly set the VCFHeader on this codec. This will overwrite the header read from the file + * and the version state stored in this instance; conversely, reading the header from a file will + * overwrite whatever is set here. The returned header may not be identical to the header argument + * since the header lines may be "repaired" (i.e., rewritten) if doOnTheFlyModifications is set. + */ + public VCFHeader setVCFHeader(final VCFHeader header, final VCFHeaderVersion version) { + this.version = version; + + if (this.doOnTheFlyModifications) { + this.header = VCFStandardHeaderLines.repairStandardHeaderLines(header); + } else { + this.header = header; + } + + return this.header; + } /** * the fast decode function diff --git a/src/test/java/htsjdk/tribble/IntervalList/IntervalListCodecTest.java b/src/test/java/htsjdk/tribble/IntervalList/IntervalListCodecTest.java new file mode 100644 index 0000000000..ea9ab77003 --- /dev/null +++ b/src/test/java/htsjdk/tribble/IntervalList/IntervalListCodecTest.java @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2019, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package htsjdk.tribble.IntervalList; + +import htsjdk.HtsjdkTest; +import htsjdk.samtools.SAMSequenceDictionary; +import htsjdk.samtools.util.IOUtil; +import htsjdk.samtools.util.Interval; +import htsjdk.samtools.util.IntervalList; +import htsjdk.samtools.util.IntervalListTest; +import htsjdk.tribble.*; +import htsjdk.variant.utils.SAMSequenceDictionaryExtractor; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Path; + +public class IntervalListCodecTest extends HtsjdkTest { + + @DataProvider() + public Object[][] simpleDecodeData() { + return new Object[][]{ + {"chr1\t1\t3\t-\thi, Mom!", new Interval("chr1", 1, 3, true, "hi, Mom!")}, + {"chr1\t1\t3\t+\thi, Mom!", new Interval("chr1", 1, 3, false, "hi, Mom!")}, + {"chr1\t4\t3\t-\thi, Mom!", new Interval("chr1", 4, 3, true, "hi, Mom!")}, + {"chr2\t1\t0\t-\thi, Mom!", new Interval("chr2", 1, 0, true, "hi, Mom!")}, + }; + } + + @Test(dataProvider = "simpleDecodeData") + public void testSimpleDecode(final String decodeThis, final Interval expectedInterval) throws IOException { + final SAMSequenceDictionary dict = SAMSequenceDictionaryExtractor.extractDictionary(IOUtil.getPath(TestUtils.DATA_DIR + "interval_list/example.dict")); + final IntervalListCodec codec = new IntervalListCodec(dict); + final Interval interval; + + interval = codec.decode(decodeThis); + + Assert.assertTrue(interval.equalsWithStrandAndName(expectedInterval)); + } + + @DataProvider + Object[][] TribbleDecodeData(){ + return new Object[][]{ + {new File(TestUtils.DATA_DIR, "interval_list/shortExample.interval_list")}, + {new File(TestUtils.DATA_DIR, "interval_list/shortExampleWithEmptyLine.interval_list")} + }; + } + + @Test(dataProvider = "TribbleDecodeData") + public void testTribbleDecode(final File file) throws IOException { + final IntervalList intervalListLocal = IntervalList.fromFile(file); + try (final FeatureReader intervalListReader = AbstractFeatureReader.getFeatureReader(file.getAbsolutePath(), new IntervalListCodec(), false); + final CloseableTribbleIterator iterator = intervalListReader.iterator()) { + Assert.assertEquals(intervalListLocal.getHeader(), intervalListReader.getHeader()); + + for (final Interval interval : intervalListLocal) { + Assert.assertTrue(iterator.hasNext()); + Assert.assertTrue(interval.equalsWithStrandAndName(iterator.next())); + } + Assert.assertFalse(iterator.hasNext()); + } + } + + /** + * Test reading a IntervalList file which is malformed. + */ + @Test(expectedExceptions = RuntimeException.class, dataProvider = "brokenFiles", dataProviderClass = IntervalListTest.class) + public void testDecodeIntervalListFile_bad(Path file) throws Exception { + IntervalListCodec codec = new IntervalListCodec(); + + try (FeatureReader intervalListReader = AbstractFeatureReader.getFeatureReader(IOUtil.getFullCanonicalPath(file.toFile()), codec, false); + CloseableTribbleIterator iter = intervalListReader.iterator()) { + for (final Feature unused : iter) { + } + } + } + + // Once someone implement tabix interval-lists, this should fail (and they should make a test that passes...) + @Test(expectedExceptions = TribbleException.class) + public void testGetTabixFormat() { + new IntervalListCodec().getTabixFormat(); + } + + @Test + public void testCanDecode() { + final IntervalListCodec codec = new IntervalListCodec(); + final String pattern = "filename.interval_list"; + Assert.assertTrue(codec.canDecode(pattern)); + } +} diff --git a/src/test/resources/htsjdk/tribble/interval_list/example.dict b/src/test/resources/htsjdk/tribble/interval_list/example.dict new file mode 100644 index 0000000000..6e25d82e40 --- /dev/null +++ b/src/test/resources/htsjdk/tribble/interval_list/example.dict @@ -0,0 +1,3 @@ +@SQ SN:chr1 LN:249250621 +@SQ SN:chr2 LN:243199373 +@SQ SN:chr3 LN:198022430 \ No newline at end of file diff --git a/src/test/resources/htsjdk/tribble/interval_list/shortExample.interval_list b/src/test/resources/htsjdk/tribble/interval_list/shortExample.interval_list new file mode 100644 index 0000000000..e63eb8f1e2 --- /dev/null +++ b/src/test/resources/htsjdk/tribble/interval_list/shortExample.interval_list @@ -0,0 +1,7 @@ +@SQ SN:1 LN:249250621 +@SQ SN:2 LN:243199373 +@SQ SN:3 LN:198022430 +1 8216712 8216712 + rs11121115 +1 17032814 17032814 - rs2773183 +2 1143476 1143476 + rs4998209 +2 9240279 9240279 + rs56249990 diff --git a/src/test/resources/htsjdk/tribble/interval_list/shortExampleWithEmptyLine.interval_list b/src/test/resources/htsjdk/tribble/interval_list/shortExampleWithEmptyLine.interval_list new file mode 100644 index 0000000000..cf101f064f --- /dev/null +++ b/src/test/resources/htsjdk/tribble/interval_list/shortExampleWithEmptyLine.interval_list @@ -0,0 +1,8 @@ +@SQ SN:1 LN:249250621 +@SQ SN:2 LN:243199373 +@SQ SN:3 LN:198022430 +1 8216712 8216712 + rs11121115 +1 17032814 17032814 + rs2773183 + +2 1143476 1143476 + rs4998209 +2 9240279 9240279 + rs56249990 From 841bad30414e7b27528bf03f5523da1882ce3ab5 Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Thu, 14 Mar 2019 23:09:14 -0400 Subject: [PATCH 2/9] -cleanup --- .../htsjdk/samtools/util/IntervalList.java | 14 ++------ .../htsjdk/tribble/AsciiSamFeatureCodec.java | 33 +++++++++++-------- .../htsjdk/tribble/interval_list/example.dict | 2 +- 3 files changed, 24 insertions(+), 25 deletions(-) diff --git a/src/main/java/htsjdk/samtools/util/IntervalList.java b/src/main/java/htsjdk/samtools/util/IntervalList.java index 9bf7c31c51..741420b551 100644 --- a/src/main/java/htsjdk/samtools/util/IntervalList.java +++ b/src/main/java/htsjdk/samtools/util/IntervalList.java @@ -32,13 +32,7 @@ import java.io.*; import java.nio.file.Path; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.Comparator; -import java.util.Iterator; -import java.util.LinkedHashSet; -import java.util.List; +import java.util.*; /** * Represents a list of intervals against a reference sequence that can be written to @@ -542,10 +536,8 @@ public static IntervalList fromReader(final BufferedReader in) { // Then read in the intervals do { - Interval nullableInterval = intervalListCodec.decode(line); - if (nullableInterval != null) { - list.intervals.add(nullableInterval); - } + final Optional maybeInterval = Optional.ofNullable(intervalListCodec.decode(line)); + maybeInterval.ifPresent(list.intervals::add); } while ((line = in.readLine()) != null); diff --git a/src/main/java/htsjdk/tribble/AsciiSamFeatureCodec.java b/src/main/java/htsjdk/tribble/AsciiSamFeatureCodec.java index 3fd7112204..fb2209a8fd 100644 --- a/src/main/java/htsjdk/tribble/AsciiSamFeatureCodec.java +++ b/src/main/java/htsjdk/tribble/AsciiSamFeatureCodec.java @@ -1,21 +1,28 @@ /* - * Copyright (c) 2019 by The Broad Institute, Inc. and the Massachusetts Institute of Technology. - * All Rights Reserved. + * Copyright (c) 2019, The Broad Institute * - * This software is licensed under the terms of the GNU Lesser General Public License (LGPL), Version 2.1 which - * is available at http://www.opensource.org/licenses/lgpl-2.1.php. + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: * - * THE SOFTWARE IS PROVIDED "AS IS." THE BROAD AND MIT MAKE NO REPRESENTATIONS OR WARRANTIES OF - * ANY KIND CONCERNING THE SOFTWARE, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT - * OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. IN NO EVENT SHALL THE BROAD OR MIT, OR THEIR - * RESPECTIVE TRUSTEES, DIRECTORS, OFFICERS, EMPLOYEES, AND AFFILIATES BE LIABLE FOR ANY DAMAGES OF - * ANY KIND, INCLUDING, WITHOUT LIMITATION, INCIDENTAL OR CONSEQUENTIAL DAMAGES, ECONOMIC - * DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER THE BROAD OR MIT SHALL - * BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE - * FOREGOING. + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. */ + package htsjdk.tribble; import htsjdk.samtools.util.*; diff --git a/src/test/resources/htsjdk/tribble/interval_list/example.dict b/src/test/resources/htsjdk/tribble/interval_list/example.dict index 6e25d82e40..53e60f3a84 100644 --- a/src/test/resources/htsjdk/tribble/interval_list/example.dict +++ b/src/test/resources/htsjdk/tribble/interval_list/example.dict @@ -1,3 +1,3 @@ @SQ SN:chr1 LN:249250621 @SQ SN:chr2 LN:243199373 -@SQ SN:chr3 LN:198022430 \ No newline at end of file +@SQ SN:chr3 LN:198022430 From 6f75f7b5f512ca22df64521ab257efab29e4a413 Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Tue, 19 Mar 2019 14:00:18 -0400 Subject: [PATCH 3/9] - responding to review comments. - using Strand class to parse the strand --- .../samtools/util/BufferedLineReader.java | 2 +- .../java/htsjdk/samtools/util/LineReader.java | 3 +- .../htsjdk/tribble/AsciiSamFeatureCodec.java | 98 ------------------- .../IntervalList/IntervalListCodec.java | 62 ++++++++---- 4 files changed, 44 insertions(+), 121 deletions(-) delete mode 100644 src/main/java/htsjdk/tribble/AsciiSamFeatureCodec.java diff --git a/src/main/java/htsjdk/samtools/util/BufferedLineReader.java b/src/main/java/htsjdk/samtools/util/BufferedLineReader.java index de3c187b4c..f5a1835629 100644 --- a/src/main/java/htsjdk/samtools/util/BufferedLineReader.java +++ b/src/main/java/htsjdk/samtools/util/BufferedLineReader.java @@ -74,7 +74,7 @@ public String readLine() { /** * Non-destructive one-character look-ahead. * - * @return If not eof, the next character that would be read. If eof, -1. + * @return If not eof, the next character that would be read. If eof, {@value EOF_VALUE}. */ @Override public int peek() { diff --git a/src/main/java/htsjdk/samtools/util/LineReader.java b/src/main/java/htsjdk/samtools/util/LineReader.java index 7e84bc2b14..5555c59f2f 100644 --- a/src/main/java/htsjdk/samtools/util/LineReader.java +++ b/src/main/java/htsjdk/samtools/util/LineReader.java @@ -30,6 +30,7 @@ */ public interface LineReader extends Closeable { + // value to return in call to peek, if eof has been reached. int EOF_VALUE = -1; /** @@ -45,7 +46,7 @@ public interface LineReader extends Closeable { /** * Non-destructive one-character look-ahead. - * @return If not eof, the next character that would be read. If eof, -1. + * @return If not eof, the next character that would be read. If eof, {@value EOF_VALUE}. */ int peek(); diff --git a/src/main/java/htsjdk/tribble/AsciiSamFeatureCodec.java b/src/main/java/htsjdk/tribble/AsciiSamFeatureCodec.java deleted file mode 100644 index fb2209a8fd..0000000000 --- a/src/main/java/htsjdk/tribble/AsciiSamFeatureCodec.java +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2019, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - - -package htsjdk.tribble; - -import htsjdk.samtools.util.*; -import htsjdk.tribble.readers.AsciiLineReader; -import htsjdk.tribble.readers.AsciiLineReaderIterator; - -import java.io.IOException; -import java.io.InputStream; - -/** - * A convenience base class for codecs that want to read in features from ASCII lines. - *

- * This class overrides the general decode Features for streams and presents instead - * Strings to decode(String) and readHeader(LineReader) functions. - * - * @param The feature type this codec reads - */ -public abstract class AsciiSamFeatureCodec extends AbstractFeatureCodec { - private static final Log log = Log.getInstance(AsciiSamFeatureCodec.class); - - protected AsciiSamFeatureCodec(final Class myClass) { - super(myClass); - } - - @Override - public void close(final LineReader lineIterator) { - CloserUtil.close(lineIterator); - } - - @Override - public boolean isDone(final LineReader lineIterator) { - return lineIterator.peek() == LineReader.EOF_VALUE; - } - - @Override - public LocationAware makeIndexableSourceFromStream(final InputStream inputStream) { - return new AsciiLineReaderIterator(AsciiLineReader.from(inputStream)); - } - - @Override - public LineReader makeSourceFromStream(final InputStream bufferedInputStream) { - return new BufferedLineReader(bufferedInputStream); - } - - /** - * Convenience method. Decoding in ASCII files operates line-by-line, so obviate the need to call - * {@link LineReader#readLine()} in implementing classes and, instead, have them implement - * {@link AsciiSamFeatureCodec#decode(String)}. - */ - @Override - public T decode(final LineReader lineIterator) { - return decode(lineIterator.readLine()); - } - - /** - * @see AsciiSamFeatureCodec#decode(LineReader) - */ - public abstract T decode(String s); - - @Override - public FeatureCodecHeader readHeader(final LineReader lineReader) throws IOException { - // TODO: Track header end here, rather than assuming there isn't one...need to maintain length of header... - final Object header = readActualHeader(lineReader); - return new FeatureCodecHeader(header, FeatureCodecHeader.NO_HEADER_END); - } - - /** - * Read and return the header, or null if there is no header. - * - * @return the actual header data in the file, or null if none is available - */ - abstract public Object readActualHeader(final LineReader reader); -} diff --git a/src/main/java/htsjdk/tribble/IntervalList/IntervalListCodec.java b/src/main/java/htsjdk/tribble/IntervalList/IntervalListCodec.java index 73f857149a..a93e7724fb 100644 --- a/src/main/java/htsjdk/tribble/IntervalList/IntervalListCodec.java +++ b/src/main/java/htsjdk/tribble/IntervalList/IntervalListCodec.java @@ -29,25 +29,25 @@ import htsjdk.samtools.util.Interval; import htsjdk.samtools.util.LineReader; import htsjdk.samtools.util.Log; -import htsjdk.tribble.AsciiSamFeatureCodec; +import htsjdk.tribble.AsciiFeatureCodec; import htsjdk.tribble.TribbleException; +import htsjdk.tribble.annotation.Strand; +import htsjdk.tribble.readers.LineIterator; /** * A tribble codec for IntervalLists. * - * Also contains the parseing code for the non-tribble parsing of IntervalLists + * Also contains the parsing code for the non-tribble parsing of IntervalLists */ -public class IntervalListCodec extends AsciiSamFeatureCodec { +public class IntervalListCodec extends AsciiFeatureCodec { static final Log log = Log.getInstance(IntervalListCodec.class); - final char[] shortArray; private SAMSequenceDictionary dictionary = null; public IntervalListCodec() { super(Interval.class); - shortArray = new char[1]; } public IntervalListCodec(final SAMSequenceDictionary dict) { @@ -69,7 +69,7 @@ private Interval parseIntervalString(final String line, final SAMSequenceDiction // Make sure we have the right number of fields final String[] fields = line.split("\t"); if (fields.length != 5) { - throw new SAMException("Invalid interval record contains " + + throw new TribbleException("Invalid interval record contains " + fields.length + " fields: " + line); } @@ -93,21 +93,12 @@ private Interval parseIntervalString(final String line, final SAMSequenceDiction ". I'm afraid I cannot let you do that."); } - final boolean negative; - switch (fields[STRAND_POS]) { - case "-": - negative = true; - break; - case "+": - negative = false; - break; - default: - throw new IllegalArgumentException("Invalid strand field: " + fields[STRAND_POS]); - } + Strand strand = Strand.decode(fields[STRAND_POS]); + if (strand==Strand.NONE) throw new IllegalArgumentException("Invalid strand field: " + fields[STRAND_POS]); final String name = fields[NAME_POS]; - final Interval interval = new Interval(seq, start, end, negative, name); + final Interval interval = new Interval(seq, start, end, strand==Strand.NEGATIVE, name); final SAMSequenceRecord sequence = dict.getSequence(seq); if (sequence == null) { log.warn("Ignoring interval for unknown reference: " + interval); @@ -138,11 +129,39 @@ public Interval decode(final String line) { return parseIntervalString(line, dictionary); } + +// @Override +// public Object readActualHeader(LineReader lineReader) { +// final SAMTextHeaderCodec headerCodec = new SAMTextHeaderCodec(); +// +// final SAMFileHeader header = headerCodec.decode(lineReader, ""); +// dictionary = header.getSequenceDictionary(); +// return header; +// } + @Override - public Object readActualHeader(LineReader lineReader) { + public Object readActualHeader(LineIterator lineIterator) { final SAMTextHeaderCodec headerCodec = new SAMTextHeaderCodec(); - - final SAMFileHeader header = headerCodec.decode(lineReader, ""); + final SAMFileHeader header = headerCodec.decode(new LineReader() { + int lineNo = 0; + @Override + public String readLine() { + lineNo++; + return lineIterator.next(); + } + @Override + public int getLineNumber() { + return lineNo; + } + @Override + public int peek() { + return lineIterator.hasNext() ? + lineIterator.peek().charAt(0) : + LineReader.EOF_VALUE; + } + @Override + public void close() { } + }, "IntervalListCodec"); dictionary = header.getSequenceDictionary(); return header; } @@ -151,5 +170,6 @@ public Object readActualHeader(LineReader lineReader) { public boolean canDecode(String s) { return s.endsWith(".interval_list"); } + } From d5e317f6f1b2433ce5b291a3020772533558ebd2 Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Tue, 19 Mar 2019 14:19:36 -0400 Subject: [PATCH 4/9] -rebased, removed commented code --- .../tribble/IntervalList/IntervalListCodec.java | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/main/java/htsjdk/tribble/IntervalList/IntervalListCodec.java b/src/main/java/htsjdk/tribble/IntervalList/IntervalListCodec.java index a93e7724fb..291b2ee9cb 100644 --- a/src/main/java/htsjdk/tribble/IntervalList/IntervalListCodec.java +++ b/src/main/java/htsjdk/tribble/IntervalList/IntervalListCodec.java @@ -130,15 +130,6 @@ public Interval decode(final String line) { } -// @Override -// public Object readActualHeader(LineReader lineReader) { -// final SAMTextHeaderCodec headerCodec = new SAMTextHeaderCodec(); -// -// final SAMFileHeader header = headerCodec.decode(lineReader, ""); -// dictionary = header.getSequenceDictionary(); -// return header; -// } - @Override public Object readActualHeader(LineIterator lineIterator) { final SAMTextHeaderCodec headerCodec = new SAMTextHeaderCodec(); @@ -170,6 +161,4 @@ public void close() { } public boolean canDecode(String s) { return s.endsWith(".interval_list"); } - } - From d71b5c7012896cf5740e70428116ddacb197f189 Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Mon, 25 Mar 2019 17:20:24 -0400 Subject: [PATCH 5/9] - responding to review comments --- .../htsjdk/tribble/IntervalList/IntervalListCodec.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/htsjdk/tribble/IntervalList/IntervalListCodec.java b/src/main/java/htsjdk/tribble/IntervalList/IntervalListCodec.java index 291b2ee9cb..89d4ee3147 100644 --- a/src/main/java/htsjdk/tribble/IntervalList/IntervalListCodec.java +++ b/src/main/java/htsjdk/tribble/IntervalList/IntervalListCodec.java @@ -42,16 +42,16 @@ public class IntervalListCodec extends AsciiFeatureCodec { - static final Log log = Log.getInstance(IntervalListCodec.class); + private final Log log = Log.getInstance(IntervalListCodec.class); - private SAMSequenceDictionary dictionary = null; + private SAMSequenceDictionary dictionary; public IntervalListCodec() { - super(Interval.class); + this(null); } public IntervalListCodec(final SAMSequenceDictionary dict) { - this(); + super(Interval.class); dictionary = dict; } From ffb6dc549729a873aae198f884db64369c0557ba Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Tue, 2 Apr 2019 14:00:50 -0400 Subject: [PATCH 6/9] - responding to review comments (added test) --- .../IntervalList/IntervalListCodecTest.java | 31 +++++++++++++++++++ .../TribbleIndexFeatureReaderTest.java | 10 ++---- 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/src/test/java/htsjdk/tribble/IntervalList/IntervalListCodecTest.java b/src/test/java/htsjdk/tribble/IntervalList/IntervalListCodecTest.java index ea9ab77003..787d5925f4 100644 --- a/src/test/java/htsjdk/tribble/IntervalList/IntervalListCodecTest.java +++ b/src/test/java/htsjdk/tribble/IntervalList/IntervalListCodecTest.java @@ -31,6 +31,7 @@ import htsjdk.samtools.util.IntervalList; import htsjdk.samtools.util.IntervalListTest; import htsjdk.tribble.*; +import htsjdk.tribble.readers.LineIterator; import htsjdk.variant.utils.SAMSequenceDictionaryExtractor; import org.testng.Assert; import org.testng.annotations.DataProvider; @@ -100,12 +101,42 @@ public void testDecodeIntervalListFile_bad(Path file) throws Exception { } } + + + // Once someone implement tabix interval-lists, this should fail (and they should make a test that passes...) @Test(expectedExceptions = TribbleException.class) public void testGetTabixFormat() { new IntervalListCodec().getTabixFormat(); } + + @DataProvider() + public Object[][] createFeatureFileStrings() { + return new Object[][]{ + {new File(TestUtils.DATA_DIR, "interval_list/shortExample.interval_list"), 4} + }; + } + + @Test(dataProvider = "createFeatureFileStrings") + public void testIndexedIntervalList(final File testPath, final int expectedCount) throws IOException { + final IntervalListCodec codec = new IntervalListCodec(); + try (final TribbleIndexedFeatureReader featureReader = + new TribbleIndexedFeatureReader<>(testPath.getAbsolutePath(), codec, false)) { + Assert.assertEquals(featureReader.iterator().stream().count(),expectedCount); + } + } + + @Test(dataProvider = "createFeatureFileStrings", expectedExceptions = TribbleException.class) + public void testIndexedIntervalListWithQuery(final File testPath, final int ignored) throws IOException { + final IntervalListCodec codec = new IntervalListCodec(); + try (final TribbleIndexedFeatureReader featureReader = + new TribbleIndexedFeatureReader<>(testPath.getAbsolutePath(), codec, false)) { + + Assert.assertEquals(featureReader.query("1",17032814,17032814).stream().count(),1); + } + } + @Test public void testCanDecode() { final IntervalListCodec codec = new IntervalListCodec(); diff --git a/src/test/java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java b/src/test/java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java index 37a5295dcf..7b537275ab 100644 --- a/src/test/java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java +++ b/src/test/java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java @@ -28,14 +28,8 @@ public void testIndexedGZIPVCF(final String testPath, final int expectedCount) t final VCFCodec codec = new VCFCodec(); try (final TribbleIndexedFeatureReader featureReader = new TribbleIndexedFeatureReader<>(testPath, codec, false)) { - final CloseableTribbleIterator localIterator = featureReader.iterator(); - int count = 0; - for (final Feature feat : featureReader.iterator()) { - localIterator.next(); - count++; - } - Assert.assertEquals(count, expectedCount); + + Assert.assertEquals(featureReader.iterator().stream().count(),1); } } - } From cdd5501a9e0b134264dfabc9be7537c13948cce5 Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Tue, 2 Apr 2019 14:01:43 -0400 Subject: [PATCH 7/9] - spaces --- .../tribble/IntervalList/IntervalListCodecTest.java | 8 ++------ .../htsjdk/tribble/TribbleIndexFeatureReaderTest.java | 4 ++-- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/src/test/java/htsjdk/tribble/IntervalList/IntervalListCodecTest.java b/src/test/java/htsjdk/tribble/IntervalList/IntervalListCodecTest.java index 787d5925f4..94d6d9d293 100644 --- a/src/test/java/htsjdk/tribble/IntervalList/IntervalListCodecTest.java +++ b/src/test/java/htsjdk/tribble/IntervalList/IntervalListCodecTest.java @@ -60,7 +60,6 @@ public void testSimpleDecode(final String decodeThis, final Interval expectedInt final Interval interval; interval = codec.decode(decodeThis); - Assert.assertTrue(interval.equalsWithStrandAndName(expectedInterval)); } @@ -101,9 +100,6 @@ public void testDecodeIntervalListFile_bad(Path file) throws Exception { } } - - - // Once someone implement tabix interval-lists, this should fail (and they should make a test that passes...) @Test(expectedExceptions = TribbleException.class) public void testGetTabixFormat() { @@ -123,7 +119,7 @@ public void testIndexedIntervalList(final File testPath, final int expectedCount final IntervalListCodec codec = new IntervalListCodec(); try (final TribbleIndexedFeatureReader featureReader = new TribbleIndexedFeatureReader<>(testPath.getAbsolutePath(), codec, false)) { - Assert.assertEquals(featureReader.iterator().stream().count(),expectedCount); + Assert.assertEquals(featureReader.iterator().stream().count(), expectedCount); } } @@ -133,7 +129,7 @@ public void testIndexedIntervalListWithQuery(final File testPath, final int igno try (final TribbleIndexedFeatureReader featureReader = new TribbleIndexedFeatureReader<>(testPath.getAbsolutePath(), codec, false)) { - Assert.assertEquals(featureReader.query("1",17032814,17032814).stream().count(),1); + Assert.assertEquals(featureReader.query("1", 17032814, 17032814).stream().count(), 1); } } diff --git a/src/test/java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java b/src/test/java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java index 7b537275ab..1c13384952 100644 --- a/src/test/java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java +++ b/src/test/java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java @@ -27,9 +27,9 @@ public Object[][] createFeatureFileStrings() { public void testIndexedGZIPVCF(final String testPath, final int expectedCount) throws IOException { final VCFCodec codec = new VCFCodec(); try (final TribbleIndexedFeatureReader featureReader = - new TribbleIndexedFeatureReader<>(testPath, codec, false)) { + new TribbleIndexedFeatureReader<>(testPath, codec, false)) { - Assert.assertEquals(featureReader.iterator().stream().count(),1); + Assert.assertEquals(featureReader.iterator().stream().count(), 1); } } } From 5ee772316588109fce9a67ab006237f7c4343f63 Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Tue, 2 Apr 2019 15:30:45 -0400 Subject: [PATCH 8/9] - fixed a typo, moved tests to same file --- .../IntervalList/IntervalListCodecTest.java | 26 --------------- .../TribbleIndexFeatureReaderTest.java | 32 ++++++++++++++++++- 2 files changed, 31 insertions(+), 27 deletions(-) diff --git a/src/test/java/htsjdk/tribble/IntervalList/IntervalListCodecTest.java b/src/test/java/htsjdk/tribble/IntervalList/IntervalListCodecTest.java index 94d6d9d293..98e462a994 100644 --- a/src/test/java/htsjdk/tribble/IntervalList/IntervalListCodecTest.java +++ b/src/test/java/htsjdk/tribble/IntervalList/IntervalListCodecTest.java @@ -107,32 +107,6 @@ public void testGetTabixFormat() { } - @DataProvider() - public Object[][] createFeatureFileStrings() { - return new Object[][]{ - {new File(TestUtils.DATA_DIR, "interval_list/shortExample.interval_list"), 4} - }; - } - - @Test(dataProvider = "createFeatureFileStrings") - public void testIndexedIntervalList(final File testPath, final int expectedCount) throws IOException { - final IntervalListCodec codec = new IntervalListCodec(); - try (final TribbleIndexedFeatureReader featureReader = - new TribbleIndexedFeatureReader<>(testPath.getAbsolutePath(), codec, false)) { - Assert.assertEquals(featureReader.iterator().stream().count(), expectedCount); - } - } - - @Test(dataProvider = "createFeatureFileStrings", expectedExceptions = TribbleException.class) - public void testIndexedIntervalListWithQuery(final File testPath, final int ignored) throws IOException { - final IntervalListCodec codec = new IntervalListCodec(); - try (final TribbleIndexedFeatureReader featureReader = - new TribbleIndexedFeatureReader<>(testPath.getAbsolutePath(), codec, false)) { - - Assert.assertEquals(featureReader.query("1", 17032814, 17032814).stream().count(), 1); - } - } - @Test public void testCanDecode() { final IntervalListCodec codec = new IntervalListCodec(); diff --git a/src/test/java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java b/src/test/java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java index 1c13384952..3be4381918 100644 --- a/src/test/java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java +++ b/src/test/java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java @@ -1,6 +1,8 @@ package htsjdk.tribble; import htsjdk.HtsjdkTest; +import htsjdk.samtools.util.Interval; +import htsjdk.tribble.IntervalList.IntervalListCodec; import htsjdk.tribble.readers.LineIterator; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.vcf.VCFCodec; @@ -8,6 +10,7 @@ import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.io.File; import java.io.IOException; @@ -29,7 +32,34 @@ public void testIndexedGZIPVCF(final String testPath, final int expectedCount) t try (final TribbleIndexedFeatureReader featureReader = new TribbleIndexedFeatureReader<>(testPath, codec, false)) { - Assert.assertEquals(featureReader.iterator().stream().count(), 1); + Assert.assertEquals(featureReader.iterator().stream().count(), expectedCount); + } + } + + + @DataProvider() + public Object[][] createIntervalFileStrings() { + return new Object[][]{ + {new File(TestUtils.DATA_DIR, "interval_list/shortExample.interval_list"), 4} + }; + } + + @Test(dataProvider = "createFeatureFileStrings") + public void testIndexedIntervalList(final File testPath, final int expectedCount) throws IOException { + final IntervalListCodec codec = new IntervalListCodec(); + try (final TribbleIndexedFeatureReader featureReader = + new TribbleIndexedFeatureReader<>(testPath.getAbsolutePath(), codec, false)) { + Assert.assertEquals(featureReader.iterator().stream().count(), expectedCount); + } + } + + @Test(dataProvider = "createFeatureFileStrings", expectedExceptions = TribbleException.class) + public void testIndexedIntervalListWithQuery(final File testPath, final int ignored) throws IOException { + final IntervalListCodec codec = new IntervalListCodec(); + try (final TribbleIndexedFeatureReader featureReader = + new TribbleIndexedFeatureReader<>(testPath.getAbsolutePath(), codec, false)) { + + Assert.assertEquals(featureReader.query("1", 17032814, 17032814).stream().count(), 1); } } } From 0c7b5283c1fdc09ce7ba8316dff54738966465dc Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Tue, 2 Apr 2019 15:48:52 -0400 Subject: [PATCH 9/9] - fixed a typo, moved tests to same file --- .../java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java b/src/test/java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java index 3be4381918..ba04dff3b6 100644 --- a/src/test/java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java +++ b/src/test/java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java @@ -44,7 +44,7 @@ public Object[][] createIntervalFileStrings() { }; } - @Test(dataProvider = "createFeatureFileStrings") + @Test(dataProvider = "createIntervalFileStrings") public void testIndexedIntervalList(final File testPath, final int expectedCount) throws IOException { final IntervalListCodec codec = new IntervalListCodec(); try (final TribbleIndexedFeatureReader featureReader = @@ -53,7 +53,7 @@ public void testIndexedIntervalList(final File testPath, final int expectedCount } } - @Test(dataProvider = "createFeatureFileStrings", expectedExceptions = TribbleException.class) + @Test(dataProvider = "createIntervalFileStrings", expectedExceptions = TribbleException.class) public void testIndexedIntervalListWithQuery(final File testPath, final int ignored) throws IOException { final IntervalListCodec codec = new IntervalListCodec(); try (final TribbleIndexedFeatureReader featureReader =