Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding getPathToDataFile default method to FeatureCodec #1223

Merged
merged 3 commits into from
Nov 14, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions src/main/java/htsjdk/tribble/AbstractFeatureReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,12 @@ public abstract class AbstractFeatureReader<T extends Feature, SOURCE> implement
// the logging destination for this source
//private final static Logger log = Logger.getLogger("BasicFeatureSource");

// the path to underlying data source
/**
* The path to underlying data file, this must be the input path converted with {@link FeatureCodec#getPathToDataFile(String)}
*/
String path;


// a wrapper to apply to the raw stream of the Feature file to allow features like prefetching and caching to be injected
final Function<SeekableByteChannel, SeekableByteChannel> wrapper;
// a wrapper to apply to the raw stream of the index file
Expand Down Expand Up @@ -102,8 +105,12 @@ public static <FEATURE extends Feature, SOURCE> AbstractFeatureReader<FEATURE, S
*/
public static <FEATURE extends Feature, SOURCE> AbstractFeatureReader<FEATURE, SOURCE> getFeatureReader(final String featureResource, String indexResource, final FeatureCodec<FEATURE, SOURCE> codec, final boolean requireIndex, Function<SeekableByteChannel, SeekableByteChannel> wrapper, Function<SeekableByteChannel, SeekableByteChannel> indexWrapper) throws TribbleException {
try {
// Test for tabix index
if (methods.isTabix(featureResource, indexResource)) {

// Test for tabix index.
// Note that we use pathToDataFile here when determining the file type, but featureResource when constructing the readers.
// This is because the reader's constructor will convert the path and it needs to be converted exactly once.
final String pathToDataFile = codec.getPathToDataFile(featureResource);
if (methods.isTabix(pathToDataFile, indexResource)) {
if ( ! (codec instanceof AsciiFeatureCodec) )
throw new TribbleException("Tabix indexed files only work with ASCII codecs, but received non-Ascii codec " + codec.getClass().getSimpleName());
return new TabixFeatureReader<>(featureResource, indexResource, (AsciiFeatureCodec) codec, wrapper, indexWrapper);
Expand Down Expand Up @@ -145,7 +152,7 @@ protected AbstractFeatureReader(final String path, final FeatureCodec<T, SOURCE>
protected AbstractFeatureReader(final String path, final FeatureCodec<T, SOURCE> codec,
final Function<SeekableByteChannel, SeekableByteChannel> wrapper,
final Function<SeekableByteChannel, SeekableByteChannel> indexWrapper) {
this.path = path;
this.path = codec.getPathToDataFile(path);
lbergelson marked this conversation as resolved.
Show resolved Hide resolved
this.codec = codec;
this.wrapper = wrapper;
this.indexWrapper = indexWrapper;
Expand Down
17 changes: 17 additions & 0 deletions src/main/java/htsjdk/tribble/FeatureCodec.java
Original file line number Diff line number Diff line change
Expand Up @@ -156,4 +156,21 @@ public interface FeatureCodec<FEATURE_TYPE extends Feature, SOURCE> {
default public TabixFormat getTabixFormat() {
throw new TribbleException(this.getClass().getSimpleName() + "does not have defined tabix format");
}

/**
* Codecs may override this method if the file that they recognize with {@link #canDecode(String)} is different than
* the file that contains the data they parse.
*
* This enables a class of codecs where the input file is a configuration that defines how to locate and handle the
* datafile.
*
* The default implementation returns the same path which was passed in.
*
* @param path the path to a file that this codec {@link #canDecode}
* @return the path to the data file that should be parsed by this codec to produce Features.
* @throws TribbleException codecs may throw if they cannot decode the path.
*/
default String getPathToDataFile(String path){
return path;
}
}
2 changes: 1 addition & 1 deletion src/main/java/htsjdk/tribble/TabixFeatureReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ public TabixFeatureReader(final String featureFile, final String indexFile, fina
final Function<SeekableByteChannel, SeekableByteChannel> wrapper,
final Function<SeekableByteChannel, SeekableByteChannel> indexWrapper) throws IOException {
super(featureFile, codec, wrapper, indexWrapper);
tabixReader = new TabixReader(featureFile, indexFile, wrapper, indexWrapper);
tabixReader = new TabixReader(this.path, indexFile, wrapper, indexWrapper);
sequenceNames = new ArrayList<>(tabixReader.getChromosomes());
readHeader();
}
Expand Down
50 changes: 50 additions & 0 deletions src/test/java/htsjdk/tribble/AbstractFeatureReaderTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import htsjdk.samtools.FileTruncatedException;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.IOUtilTest;
import htsjdk.samtools.util.RuntimeIOException;
import htsjdk.samtools.util.TestUtil;
import htsjdk.tribble.bed.BEDCodec;
import htsjdk.tribble.bed.BEDFeature;
Expand Down Expand Up @@ -51,6 +52,7 @@ public class AbstractFeatureReaderTest extends HtsjdkTest {

//wrapper which skips the first byte of a file and leaves the rest unchanged
private static final Function<SeekableByteChannel, SeekableByteChannel> WRAPPER = SkippingByteChannel::new;
public static final String REDIRECTING_CODEC_TEST_FILES = "src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/redirectingCodecTest/";

/**
* Asserts readability and correctness of VCF over HTTP. The VCF is indexed and requires and index.
Expand Down Expand Up @@ -228,4 +230,52 @@ public SeekableByteChannel truncate(long size) throws IOException {
}
}

@DataProvider
public Object[][] getVcfRedirects(){
return new Object[][]{
{REDIRECTING_CODEC_TEST_FILES + "vcf.redirect"},
{REDIRECTING_CODEC_TEST_FILES + "vcf.gz.redirect"}
};
}

/**
* Test a codec that uses {@link FeatureCodec#getPathToDataFile(String)} in order to specify a data file that's
* different than the file it identifies with {@link FeatureCodec#canDecode}).
*/
@Test(dataProvider = "getVcfRedirects")
public void testCodecWithGetPathToDataFile(String vcfRedirect) throws IOException {
final VcfRedirectCodec vcfRedirectCodec = new VcfRedirectCodec();
final String vcf = REDIRECTING_CODEC_TEST_FILES + "dataFiles/test.vcf";
Assert.assertTrue(vcfRedirectCodec.canDecode(vcfRedirect), "should have been able to decode " + vcfRedirect);
try(FeatureReader<VariantContext> redirectReader = AbstractFeatureReader.getFeatureReader(vcfRedirect, vcfRedirectCodec, false);
FeatureReader<VariantContext> directReader = AbstractFeatureReader.getFeatureReader(vcf, new VCFCodec(), false)){
Assert.assertEquals(redirectReader.getHeader().toString(), directReader.getHeader().toString());
final int redirectVcfSize = redirectReader.iterator().toList().size();
Assert.assertTrue( redirectVcfSize > 0, "iterator found " + redirectVcfSize + " records");
Assert.assertEquals(redirectVcfSize, directReader.iterator().toList().size());

final int redirectQuerySize = redirectReader.query("20", 1, 20000).toList().size();
Assert.assertTrue(redirectQuerySize > 0, "query found " + redirectVcfSize + " records");
Assert.assertEquals(redirectQuerySize, directReader.query("20", 1, 20000).toList().size() );
}
}

/**
* codec which redirects to another location after reading the input file
*/
private static class VcfRedirectCodec extends VCFCodec{
@Override
public boolean canDecode(String potentialInput) {
return super.canDecode(this.getPathToDataFile(potentialInput));
}

@Override
public String getPathToDataFile(String path) {
try {
return Files.readAllLines(IOUtil.getPath(path)).get(0);
} catch (IOException e) {
throw new RuntimeIOException(e);
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
##fileformat=VCFv4.1
##fileDate=20090805
##source=myImputationProgramV3.1
##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta
##contig=<ID=20,length=62435964,assembly=B36,md5=f126cdf8a6e0c7f379d618ff66beb2da,species="Homo sapiens",taxonomy=x>
##phasing=partial
##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">
##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">
##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129">
##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership">
##FILTER=<ID=q10,Description="Quality below 10">
##FILTER=<ID=s50,Description="Less than 50% of samples have data">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003
20 14370 rs6054257 G A 29 PASS NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,.
20 17330 . T A 3 q10 NS=3;DP=11;AF=0.017 GT:GQ:DP:HQ 0|0:49:3:58,50 0|1:3:5:65,3 0/0:41:3
20 1110696 rs6040355 A G,T 67 PASS NS=2;DP=10;AF=0.333,0.667;AA=T;DB GT:GQ:DP:HQ 1|2:21:6:23,27 2|1:2:0:18,2 2/2:35:4
20 1230237 . T . 47 PASS NS=3;DP=13;AA=T GT:GQ:DP:HQ 0|0:54:7:56,60 0|0:48:4:51,51 0/0:61:2
20 1234567 microsat1 GTC G,GTCT 50 PASS NS=3;DP=9;AA=G GT:GQ:DP 0/1:35:4 0/2:17:2 1/1:40:3
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/redirectingCodecTest/dataFiles/test.vcf.gz
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/redirectingCodecTest/dataFiles/test.vcf