From 6f75f7b5f512ca22df64521ab257efab29e4a413 Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Tue, 19 Mar 2019 14:00:18 -0400 Subject: [PATCH] - responding to review comments. - using Strand class to parse the strand --- .../samtools/util/BufferedLineReader.java | 2 +- .../java/htsjdk/samtools/util/LineReader.java | 3 +- .../htsjdk/tribble/AsciiSamFeatureCodec.java | 98 ------------------- .../IntervalList/IntervalListCodec.java | 62 ++++++++---- 4 files changed, 44 insertions(+), 121 deletions(-) delete mode 100644 src/main/java/htsjdk/tribble/AsciiSamFeatureCodec.java diff --git a/src/main/java/htsjdk/samtools/util/BufferedLineReader.java b/src/main/java/htsjdk/samtools/util/BufferedLineReader.java index de3c187b4c..f5a1835629 100644 --- a/src/main/java/htsjdk/samtools/util/BufferedLineReader.java +++ b/src/main/java/htsjdk/samtools/util/BufferedLineReader.java @@ -74,7 +74,7 @@ public String readLine() { /** * Non-destructive one-character look-ahead. * - * @return If not eof, the next character that would be read. If eof, -1. + * @return If not eof, the next character that would be read. If eof, {@value EOF_VALUE}. */ @Override public int peek() { diff --git a/src/main/java/htsjdk/samtools/util/LineReader.java b/src/main/java/htsjdk/samtools/util/LineReader.java index 7e84bc2b14..5555c59f2f 100644 --- a/src/main/java/htsjdk/samtools/util/LineReader.java +++ b/src/main/java/htsjdk/samtools/util/LineReader.java @@ -30,6 +30,7 @@ */ public interface LineReader extends Closeable { + // value to return in call to peek, if eof has been reached. int EOF_VALUE = -1; /** @@ -45,7 +46,7 @@ public interface LineReader extends Closeable { /** * Non-destructive one-character look-ahead. - * @return If not eof, the next character that would be read. If eof, -1. + * @return If not eof, the next character that would be read. If eof, {@value EOF_VALUE}. */ int peek(); diff --git a/src/main/java/htsjdk/tribble/AsciiSamFeatureCodec.java b/src/main/java/htsjdk/tribble/AsciiSamFeatureCodec.java deleted file mode 100644 index fb2209a8fd..0000000000 --- a/src/main/java/htsjdk/tribble/AsciiSamFeatureCodec.java +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2019, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - - -package htsjdk.tribble; - -import htsjdk.samtools.util.*; -import htsjdk.tribble.readers.AsciiLineReader; -import htsjdk.tribble.readers.AsciiLineReaderIterator; - -import java.io.IOException; -import java.io.InputStream; - -/** - * A convenience base class for codecs that want to read in features from ASCII lines. - *

- * This class overrides the general decode Features for streams and presents instead - * Strings to decode(String) and readHeader(LineReader) functions. - * - * @param The feature type this codec reads - */ -public abstract class AsciiSamFeatureCodec extends AbstractFeatureCodec { - private static final Log log = Log.getInstance(AsciiSamFeatureCodec.class); - - protected AsciiSamFeatureCodec(final Class myClass) { - super(myClass); - } - - @Override - public void close(final LineReader lineIterator) { - CloserUtil.close(lineIterator); - } - - @Override - public boolean isDone(final LineReader lineIterator) { - return lineIterator.peek() == LineReader.EOF_VALUE; - } - - @Override - public LocationAware makeIndexableSourceFromStream(final InputStream inputStream) { - return new AsciiLineReaderIterator(AsciiLineReader.from(inputStream)); - } - - @Override - public LineReader makeSourceFromStream(final InputStream bufferedInputStream) { - return new BufferedLineReader(bufferedInputStream); - } - - /** - * Convenience method. Decoding in ASCII files operates line-by-line, so obviate the need to call - * {@link LineReader#readLine()} in implementing classes and, instead, have them implement - * {@link AsciiSamFeatureCodec#decode(String)}. - */ - @Override - public T decode(final LineReader lineIterator) { - return decode(lineIterator.readLine()); - } - - /** - * @see AsciiSamFeatureCodec#decode(LineReader) - */ - public abstract T decode(String s); - - @Override - public FeatureCodecHeader readHeader(final LineReader lineReader) throws IOException { - // TODO: Track header end here, rather than assuming there isn't one...need to maintain length of header... - final Object header = readActualHeader(lineReader); - return new FeatureCodecHeader(header, FeatureCodecHeader.NO_HEADER_END); - } - - /** - * Read and return the header, or null if there is no header. - * - * @return the actual header data in the file, or null if none is available - */ - abstract public Object readActualHeader(final LineReader reader); -} diff --git a/src/main/java/htsjdk/tribble/IntervalList/IntervalListCodec.java b/src/main/java/htsjdk/tribble/IntervalList/IntervalListCodec.java index 73f857149a..a93e7724fb 100644 --- a/src/main/java/htsjdk/tribble/IntervalList/IntervalListCodec.java +++ b/src/main/java/htsjdk/tribble/IntervalList/IntervalListCodec.java @@ -29,25 +29,25 @@ import htsjdk.samtools.util.Interval; import htsjdk.samtools.util.LineReader; import htsjdk.samtools.util.Log; -import htsjdk.tribble.AsciiSamFeatureCodec; +import htsjdk.tribble.AsciiFeatureCodec; import htsjdk.tribble.TribbleException; +import htsjdk.tribble.annotation.Strand; +import htsjdk.tribble.readers.LineIterator; /** * A tribble codec for IntervalLists. * - * Also contains the parseing code for the non-tribble parsing of IntervalLists + * Also contains the parsing code for the non-tribble parsing of IntervalLists */ -public class IntervalListCodec extends AsciiSamFeatureCodec { +public class IntervalListCodec extends AsciiFeatureCodec { static final Log log = Log.getInstance(IntervalListCodec.class); - final char[] shortArray; private SAMSequenceDictionary dictionary = null; public IntervalListCodec() { super(Interval.class); - shortArray = new char[1]; } public IntervalListCodec(final SAMSequenceDictionary dict) { @@ -69,7 +69,7 @@ private Interval parseIntervalString(final String line, final SAMSequenceDiction // Make sure we have the right number of fields final String[] fields = line.split("\t"); if (fields.length != 5) { - throw new SAMException("Invalid interval record contains " + + throw new TribbleException("Invalid interval record contains " + fields.length + " fields: " + line); } @@ -93,21 +93,12 @@ private Interval parseIntervalString(final String line, final SAMSequenceDiction ". I'm afraid I cannot let you do that."); } - final boolean negative; - switch (fields[STRAND_POS]) { - case "-": - negative = true; - break; - case "+": - negative = false; - break; - default: - throw new IllegalArgumentException("Invalid strand field: " + fields[STRAND_POS]); - } + Strand strand = Strand.decode(fields[STRAND_POS]); + if (strand==Strand.NONE) throw new IllegalArgumentException("Invalid strand field: " + fields[STRAND_POS]); final String name = fields[NAME_POS]; - final Interval interval = new Interval(seq, start, end, negative, name); + final Interval interval = new Interval(seq, start, end, strand==Strand.NEGATIVE, name); final SAMSequenceRecord sequence = dict.getSequence(seq); if (sequence == null) { log.warn("Ignoring interval for unknown reference: " + interval); @@ -138,11 +129,39 @@ public Interval decode(final String line) { return parseIntervalString(line, dictionary); } + +// @Override +// public Object readActualHeader(LineReader lineReader) { +// final SAMTextHeaderCodec headerCodec = new SAMTextHeaderCodec(); +// +// final SAMFileHeader header = headerCodec.decode(lineReader, ""); +// dictionary = header.getSequenceDictionary(); +// return header; +// } + @Override - public Object readActualHeader(LineReader lineReader) { + public Object readActualHeader(LineIterator lineIterator) { final SAMTextHeaderCodec headerCodec = new SAMTextHeaderCodec(); - - final SAMFileHeader header = headerCodec.decode(lineReader, ""); + final SAMFileHeader header = headerCodec.decode(new LineReader() { + int lineNo = 0; + @Override + public String readLine() { + lineNo++; + return lineIterator.next(); + } + @Override + public int getLineNumber() { + return lineNo; + } + @Override + public int peek() { + return lineIterator.hasNext() ? + lineIterator.peek().charAt(0) : + LineReader.EOF_VALUE; + } + @Override + public void close() { } + }, "IntervalListCodec"); dictionary = header.getSequenceDictionary(); return header; } @@ -151,5 +170,6 @@ public Object readActualHeader(LineReader lineReader) { public boolean canDecode(String s) { return s.endsWith(".interval_list"); } + }