Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add LiftOver constructor that takes an input stream #1412

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 25 additions & 14 deletions src/main/java/htsjdk/samtools/liftover/Chain.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import java.util.List;
import java.util.regex.Pattern;


/**
* Holds a single chain from a UCSC chain file. Chain file format is described here: http://genome.ucsc.edu/goldenPath/help/chain.html
*
Expand Down Expand Up @@ -309,26 +310,36 @@ public int hashCode() {
/**
* Read all the chains and load into an OverlapDetector.
* @param chainFile File in UCSC chain format.
* @return OverlapDetector will all Chains from reader loaded into it.
* @return OverlapDetector with all Chains from reader loaded into it.
*/
static OverlapDetector<Chain> loadChains(final File chainFile) {
kevinpetersavage marked this conversation as resolved.
Show resolved Hide resolved
final BufferedLineReader reader = new BufferedLineReader(IOUtil.openFileForReading(chainFile));
IOUtil.assertFileIsReadable(chainFile);
try(final BufferedLineReader reader = new BufferedLineReader(IOUtil.openFileForReading(chainFile))){
return loadChains(reader, chainFile.toString());
}
}

/**
* Read all the chains and load into an OverlapDetector.
* @param reader reader of file in UCSC chain format.
* @return OverlapDetector with all Chains from reader loaded into it.
*/
static OverlapDetector<Chain> loadChains(final BufferedLineReader reader, String sourceName) {
final OverlapDetector<Chain> ret = new OverlapDetector<Chain>(0, 0);
Chain chain;
while ((chain = Chain.loadChain(reader, chainFile.toString())) != null) {
while ((chain = Chain.loadChain(reader, sourceName)) != null) {
ret.addLhs(chain, chain.interval);
}
reader.close();
return ret;
}

/**
* Read a single Chain from reader.
* @param reader Text representation of chains.
* @param chainFile For error messages only.
* @param sourceName For error messages only.
* @return New Chain with associated ContinuousBlocks.
*/
private static Chain loadChain(final BufferedLineReader reader, final String chainFile) {
private static Chain loadChain(final BufferedLineReader reader, final String sourceName) {
String line;
while (true) {
line = reader.readLine();
Expand All @@ -342,10 +353,10 @@ private static Chain loadChain(final BufferedLineReader reader, final String cha
}
final String[] chainFields = SPLITTER.split(line);
if (chainFields.length != 13) {
throwChainFileParseException("chain line has wrong number of fields", chainFile, reader.getLineNumber());
throwChainFileParseException("chain line has wrong number of fields", sourceName, reader.getLineNumber());
}
if (!"chain".equals(chainFields[0])) {
throwChainFileParseException("chain line does not start with 'chain'", chainFile, reader.getLineNumber());
throwChainFileParseException("chain line does not start with 'chain'", sourceName, reader.getLineNumber());
}
double score = 0;
String fromSequenceName = null;
Expand All @@ -372,7 +383,7 @@ private static Chain loadChain(final BufferedLineReader reader, final String cha
toChainEnd = Integer.parseInt(chainFields[11]);
id = Integer.parseInt(chainFields[12]);
} catch (NumberFormatException e) {
throwChainFileParseException("Invalid field", chainFile, reader.getLineNumber());
throwChainFileParseException("Invalid field", sourceName, reader.getLineNumber());
}
final Chain chain = new Chain(score, fromSequenceName, fromSequenceSize, fromChainStart, fromChainEnd, toSequenceName, toSequenceSize, toNegativeStrand, toChainStart,
toChainEnd, id);
Expand All @@ -383,18 +394,18 @@ private static Chain loadChain(final BufferedLineReader reader, final String cha
line = reader.readLine();
if (line == null || line.equals("")) {
if (!sawLastLine) {
throwChainFileParseException("Reached end of chain without seeing terminal block", chainFile, reader.getLineNumber());
throwChainFileParseException("Reached end of chain without seeing terminal block", sourceName, reader.getLineNumber());
}
break;
}
if (sawLastLine) {
throwChainFileParseException("Terminal block seen before end of chain", chainFile, reader.getLineNumber());
throwChainFileParseException("Terminal block seen before end of chain", sourceName, reader.getLineNumber());
}
String[] blockFields = SPLITTER.split(line);
if (blockFields.length == 1) {
sawLastLine = true;
} else if (blockFields.length != 3) {
throwChainFileParseException("Block line has unexpected number of fields", chainFile, reader.getLineNumber());
throwChainFileParseException("Block line has unexpected number of fields", sourceName, reader.getLineNumber());
}
int size = Integer.parseInt(blockFields[0]);
chain.addBlock(fromBlockStart, toBlockStart, size);
Expand All @@ -408,7 +419,7 @@ private static Chain loadChain(final BufferedLineReader reader, final String cha
return chain;
}

private static void throwChainFileParseException(final String message, final String chainFile, final int lineNumber) {
throw new SAMException(message + " in chain file " + chainFile + " at line " + lineNumber);
private static void throwChainFileParseException(final String message, final String sourceName, final int lineNumber) {
throw new SAMException(message + " in chain file " + sourceName + " at line " + lineNumber);
}
}
22 changes: 15 additions & 7 deletions src/main/java/htsjdk/samtools/liftover/LiftOver.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,10 @@

import htsjdk.samtools.SAMException;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Interval;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.OverlapDetector;
import htsjdk.samtools.util.*;

import java.io.File;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
Expand All @@ -39,6 +37,7 @@
import java.util.Map;
import java.util.Set;


/**
* Java port of UCSC liftOver. Only the most basic liftOver functionality is implemented.
* Internally coordinates are 0-based, half-open. The API is standard Picard 1-based, inclusive.
Expand Down Expand Up @@ -84,10 +83,19 @@ public long getFailedIntervalsBelowThreshold() {
/**
* Load UCSC chain file in order to lift over Intervals.
*/
public LiftOver(File chainFile) {
IOUtil.assertFileIsReadable(chainFile);
chains = Chain.loadChains(chainFile);
public LiftOver(File chainFile){
this(Chain.loadChains(chainFile));
}

/**
* Load UCSC chain file in order to lift over Intervals.
*/
public LiftOver(InputStream chainFileInputStream, String sourceName) {
this(Chain.loadChains(new BufferedLineReader(chainFileInputStream), sourceName));
}

private LiftOver(OverlapDetector<Chain> chains) {
this.chains = chains;
for (final Chain chain : this.chains.getAll()) {
final String from = chain.fromSequenceName;
final String to = chain.toSequenceName;
Expand Down
21 changes: 16 additions & 5 deletions src/test/java/htsjdk/samtools/liftover/LiftOverTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,7 @@
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.io.File;
import java.io.PrintWriter;
import java.io.*;
import java.util.*;
import java.util.stream.Stream;

Expand All @@ -44,14 +43,21 @@ public class LiftOverTest extends HtsjdkTest {
private static final File CHAIN_FILE = new File(TEST_DATA_DIR, "hg18ToHg19.over.chain");

private LiftOver liftOver;
Map<String, Set<String>> contigMap;
private Map<String, Set<String>> contigMap;
private LiftOver liftOverFromInputStream;

@BeforeClass
public void initLiftOver() {
liftOver = new LiftOver(CHAIN_FILE);
contigMap = liftOver.getContigMap();
}

@BeforeClass
public void initLiftOverFromInputStream() throws FileNotFoundException {
InputStream chainFileInputStream = new FileInputStream(CHAIN_FILE);
liftOverFromInputStream = new LiftOver(chainFileInputStream, CHAIN_FILE.toString());
}

@Test(dataProvider = "testIntervals")
public void testBasic(final Interval in, final Interval expected) {
Assert.assertEquals(liftOver.liftOver(in), expected);
Expand Down Expand Up @@ -461,21 +467,26 @@ public void testWriteChain() throws Exception {
File outFile = File.createTempFile("test.", ".chain");
outFile.deleteOnExit();
PrintWriter pw = new PrintWriter(outFile);
final Map<Integer, Chain> originalChainMap = new TreeMap<Integer, Chain>();
final Map<Integer, Chain> originalChainMap = new TreeMap<>();
for (final Chain chain : chains.getAll()) {
chain.write(pw);
originalChainMap.put(chain.id, chain);
}
pw.close();

final OverlapDetector<Chain> newChains = Chain.loadChains(outFile);
final Map<Integer, Chain> newChainMap = new TreeMap<Integer, Chain>();
final Map<Integer, Chain> newChainMap = new TreeMap<>();
for (final Chain chain : newChains.getAll()) {
newChainMap.put(chain.id, chain);
}
Assert.assertEquals(newChainMap, originalChainMap);
}

@Test(dataProvider = "testIntervals")
public void loadLiftOverFromInputStream(final Interval in, final Interval expected) {
Assert.assertEquals(liftOverFromInputStream.liftOver(in), expected);
}

@Test(dataProvider = "testIntervals")
public void testGetContigMap(final Interval in, final Interval expected) {
if (expected != null) {
Expand Down