From 46b1a00878087501757d28e2036a01a9c969193f Mon Sep 17 00:00:00 2001 From: Kevin Savage Date: Mon, 26 Aug 2019 21:22:00 +0100 Subject: [PATCH] Add LiftOver constructor that takes an input stream (#1412) * Add LiftOver constructor that takes an InputStream --- .../java/htsjdk/samtools/liftover/Chain.java | 39 ++++++++++++------- .../htsjdk/samtools/liftover/LiftOver.java | 22 +++++++---- .../samtools/liftover/LiftOverTest.java | 21 +++++++--- 3 files changed, 56 insertions(+), 26 deletions(-) diff --git a/src/main/java/htsjdk/samtools/liftover/Chain.java b/src/main/java/htsjdk/samtools/liftover/Chain.java index 8ac87fb356..45c51ba6bd 100644 --- a/src/main/java/htsjdk/samtools/liftover/Chain.java +++ b/src/main/java/htsjdk/samtools/liftover/Chain.java @@ -36,6 +36,7 @@ import java.util.List; import java.util.regex.Pattern; + /** * Holds a single chain from a UCSC chain file. Chain file format is described here: http://genome.ucsc.edu/goldenPath/help/chain.html * @@ -309,26 +310,36 @@ public int hashCode() { /** * Read all the chains and load into an OverlapDetector. * @param chainFile File in UCSC chain format. - * @return OverlapDetector will all Chains from reader loaded into it. + * @return OverlapDetector with all Chains from reader loaded into it. */ static OverlapDetector loadChains(final File chainFile) { - final BufferedLineReader reader = new BufferedLineReader(IOUtil.openFileForReading(chainFile)); + IOUtil.assertFileIsReadable(chainFile); + try(final BufferedLineReader reader = new BufferedLineReader(IOUtil.openFileForReading(chainFile))){ + return loadChains(reader, chainFile.toString()); + } + } + + /** + * Read all the chains and load into an OverlapDetector. + * @param reader reader of file in UCSC chain format. + * @return OverlapDetector with all Chains from reader loaded into it. + */ + static OverlapDetector loadChains(final BufferedLineReader reader, String sourceName) { final OverlapDetector ret = new OverlapDetector(0, 0); Chain chain; - while ((chain = Chain.loadChain(reader, chainFile.toString())) != null) { + while ((chain = Chain.loadChain(reader, sourceName)) != null) { ret.addLhs(chain, chain.interval); } - reader.close(); return ret; } /** * Read a single Chain from reader. * @param reader Text representation of chains. - * @param chainFile For error messages only. + * @param sourceName For error messages only. * @return New Chain with associated ContinuousBlocks. */ - private static Chain loadChain(final BufferedLineReader reader, final String chainFile) { + private static Chain loadChain(final BufferedLineReader reader, final String sourceName) { String line; while (true) { line = reader.readLine(); @@ -342,10 +353,10 @@ private static Chain loadChain(final BufferedLineReader reader, final String cha } final String[] chainFields = SPLITTER.split(line); if (chainFields.length != 13) { - throwChainFileParseException("chain line has wrong number of fields", chainFile, reader.getLineNumber()); + throwChainFileParseException("chain line has wrong number of fields", sourceName, reader.getLineNumber()); } if (!"chain".equals(chainFields[0])) { - throwChainFileParseException("chain line does not start with 'chain'", chainFile, reader.getLineNumber()); + throwChainFileParseException("chain line does not start with 'chain'", sourceName, reader.getLineNumber()); } double score = 0; String fromSequenceName = null; @@ -372,7 +383,7 @@ private static Chain loadChain(final BufferedLineReader reader, final String cha toChainEnd = Integer.parseInt(chainFields[11]); id = Integer.parseInt(chainFields[12]); } catch (NumberFormatException e) { - throwChainFileParseException("Invalid field", chainFile, reader.getLineNumber()); + throwChainFileParseException("Invalid field", sourceName, reader.getLineNumber()); } final Chain chain = new Chain(score, fromSequenceName, fromSequenceSize, fromChainStart, fromChainEnd, toSequenceName, toSequenceSize, toNegativeStrand, toChainStart, toChainEnd, id); @@ -383,18 +394,18 @@ private static Chain loadChain(final BufferedLineReader reader, final String cha line = reader.readLine(); if (line == null || line.equals("")) { if (!sawLastLine) { - throwChainFileParseException("Reached end of chain without seeing terminal block", chainFile, reader.getLineNumber()); + throwChainFileParseException("Reached end of chain without seeing terminal block", sourceName, reader.getLineNumber()); } break; } if (sawLastLine) { - throwChainFileParseException("Terminal block seen before end of chain", chainFile, reader.getLineNumber()); + throwChainFileParseException("Terminal block seen before end of chain", sourceName, reader.getLineNumber()); } String[] blockFields = SPLITTER.split(line); if (blockFields.length == 1) { sawLastLine = true; } else if (blockFields.length != 3) { - throwChainFileParseException("Block line has unexpected number of fields", chainFile, reader.getLineNumber()); + throwChainFileParseException("Block line has unexpected number of fields", sourceName, reader.getLineNumber()); } int size = Integer.parseInt(blockFields[0]); chain.addBlock(fromBlockStart, toBlockStart, size); @@ -408,7 +419,7 @@ private static Chain loadChain(final BufferedLineReader reader, final String cha return chain; } - private static void throwChainFileParseException(final String message, final String chainFile, final int lineNumber) { - throw new SAMException(message + " in chain file " + chainFile + " at line " + lineNumber); + private static void throwChainFileParseException(final String message, final String sourceName, final int lineNumber) { + throw new SAMException(message + " in chain file " + sourceName + " at line " + lineNumber); } } diff --git a/src/main/java/htsjdk/samtools/liftover/LiftOver.java b/src/main/java/htsjdk/samtools/liftover/LiftOver.java index 13ea9dafbf..9afebd63ca 100644 --- a/src/main/java/htsjdk/samtools/liftover/LiftOver.java +++ b/src/main/java/htsjdk/samtools/liftover/LiftOver.java @@ -25,12 +25,10 @@ import htsjdk.samtools.SAMException; import htsjdk.samtools.SAMSequenceDictionary; -import htsjdk.samtools.util.IOUtil; -import htsjdk.samtools.util.Interval; -import htsjdk.samtools.util.Log; -import htsjdk.samtools.util.OverlapDetector; +import htsjdk.samtools.util.*; import java.io.File; +import java.io.InputStream; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -39,6 +37,7 @@ import java.util.Map; import java.util.Set; + /** * Java port of UCSC liftOver. Only the most basic liftOver functionality is implemented. * Internally coordinates are 0-based, half-open. The API is standard Picard 1-based, inclusive. @@ -84,10 +83,19 @@ public long getFailedIntervalsBelowThreshold() { /** * Load UCSC chain file in order to lift over Intervals. */ - public LiftOver(File chainFile) { - IOUtil.assertFileIsReadable(chainFile); - chains = Chain.loadChains(chainFile); + public LiftOver(File chainFile){ + this(Chain.loadChains(chainFile)); + } + + /** + * Load UCSC chain file in order to lift over Intervals. + */ + public LiftOver(InputStream chainFileInputStream, String sourceName) { + this(Chain.loadChains(new BufferedLineReader(chainFileInputStream), sourceName)); + } + private LiftOver(OverlapDetector chains) { + this.chains = chains; for (final Chain chain : this.chains.getAll()) { final String from = chain.fromSequenceName; final String to = chain.toSequenceName; diff --git a/src/test/java/htsjdk/samtools/liftover/LiftOverTest.java b/src/test/java/htsjdk/samtools/liftover/LiftOverTest.java index c1af58cf07..5cd57e7962 100644 --- a/src/test/java/htsjdk/samtools/liftover/LiftOverTest.java +++ b/src/test/java/htsjdk/samtools/liftover/LiftOverTest.java @@ -31,8 +31,7 @@ import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -import java.io.File; -import java.io.PrintWriter; +import java.io.*; import java.util.*; import java.util.stream.Stream; @@ -44,7 +43,8 @@ public class LiftOverTest extends HtsjdkTest { private static final File CHAIN_FILE = new File(TEST_DATA_DIR, "hg18ToHg19.over.chain"); private LiftOver liftOver; - Map> contigMap; + private Map> contigMap; + private LiftOver liftOverFromInputStream; @BeforeClass public void initLiftOver() { @@ -52,6 +52,12 @@ public void initLiftOver() { contigMap = liftOver.getContigMap(); } + @BeforeClass + public void initLiftOverFromInputStream() throws FileNotFoundException { + InputStream chainFileInputStream = new FileInputStream(CHAIN_FILE); + liftOverFromInputStream = new LiftOver(chainFileInputStream, CHAIN_FILE.toString()); + } + @Test(dataProvider = "testIntervals") public void testBasic(final Interval in, final Interval expected) { Assert.assertEquals(liftOver.liftOver(in), expected); @@ -461,7 +467,7 @@ public void testWriteChain() throws Exception { File outFile = File.createTempFile("test.", ".chain"); outFile.deleteOnExit(); PrintWriter pw = new PrintWriter(outFile); - final Map originalChainMap = new TreeMap(); + final Map originalChainMap = new TreeMap<>(); for (final Chain chain : chains.getAll()) { chain.write(pw); originalChainMap.put(chain.id, chain); @@ -469,13 +475,18 @@ public void testWriteChain() throws Exception { pw.close(); final OverlapDetector newChains = Chain.loadChains(outFile); - final Map newChainMap = new TreeMap(); + final Map newChainMap = new TreeMap<>(); for (final Chain chain : newChains.getAll()) { newChainMap.put(chain.id, chain); } Assert.assertEquals(newChainMap, originalChainMap); } + @Test(dataProvider = "testIntervals") + public void loadLiftOverFromInputStream(final Interval in, final Interval expected) { + Assert.assertEquals(liftOverFromInputStream.liftOver(in), expected); + } + @Test(dataProvider = "testIntervals") public void testGetContigMap(final Interval in, final Interval expected) { if (expected != null) {