diff --git a/src/main/java/htsjdk/tribble/AbstractFeatureReader.java b/src/main/java/htsjdk/tribble/AbstractFeatureReader.java index 958135fe05..3351bb1579 100755 --- a/src/main/java/htsjdk/tribble/AbstractFeatureReader.java +++ b/src/main/java/htsjdk/tribble/AbstractFeatureReader.java @@ -43,9 +43,12 @@ public abstract class AbstractFeatureReader implement // the logging destination for this source //private final static Logger log = Logger.getLogger("BasicFeatureSource"); - // the path to underlying data source + /** + * The path to underlying data file, this must be the input path converted with {@link FeatureCodec#getPathToDataFile(String)} + */ String path; + // a wrapper to apply to the raw stream of the Feature file to allow features like prefetching and caching to be injected final Function wrapper; // a wrapper to apply to the raw stream of the index file @@ -102,8 +105,12 @@ public static AbstractFeatureReader AbstractFeatureReader getFeatureReader(final String featureResource, String indexResource, final FeatureCodec codec, final boolean requireIndex, Function wrapper, Function indexWrapper) throws TribbleException { try { - // Test for tabix index - if (methods.isTabix(featureResource, indexResource)) { + + // Test for tabix index. + // Note that we use pathToDataFile here when determining the file type, but featureResource when constructing the readers. + // This is because the reader's constructor will convert the path and it needs to be converted exactly once. + final String pathToDataFile = codec.getPathToDataFile(featureResource); + if (methods.isTabix(pathToDataFile, indexResource)) { if ( ! (codec instanceof AsciiFeatureCodec) ) throw new TribbleException("Tabix indexed files only work with ASCII codecs, but received non-Ascii codec " + codec.getClass().getSimpleName()); return new TabixFeatureReader<>(featureResource, indexResource, (AsciiFeatureCodec) codec, wrapper, indexWrapper); @@ -145,7 +152,7 @@ protected AbstractFeatureReader(final String path, final FeatureCodec protected AbstractFeatureReader(final String path, final FeatureCodec codec, final Function wrapper, final Function indexWrapper) { - this.path = path; + this.path = codec.getPathToDataFile(path); this.codec = codec; this.wrapper = wrapper; this.indexWrapper = indexWrapper; diff --git a/src/main/java/htsjdk/tribble/FeatureCodec.java b/src/main/java/htsjdk/tribble/FeatureCodec.java index e33d21bb4a..fc3e8f6181 100644 --- a/src/main/java/htsjdk/tribble/FeatureCodec.java +++ b/src/main/java/htsjdk/tribble/FeatureCodec.java @@ -156,4 +156,21 @@ public interface FeatureCodec { default public TabixFormat getTabixFormat() { throw new TribbleException(this.getClass().getSimpleName() + "does not have defined tabix format"); } + + /** + * Codecs may override this method if the file that they recognize with {@link #canDecode(String)} is different than + * the file that contains the data they parse. + * + * This enables a class of codecs where the input file is a configuration that defines how to locate and handle the + * datafile. + * + * The default implementation returns the same path which was passed in. + * + * @param path the path to a file that this codec {@link #canDecode} + * @return the path to the data file that should be parsed by this codec to produce Features. + * @throws TribbleException codecs may throw if they cannot decode the path. + */ + default String getPathToDataFile(String path){ + return path; + } } diff --git a/src/main/java/htsjdk/tribble/TabixFeatureReader.java b/src/main/java/htsjdk/tribble/TabixFeatureReader.java index d222c99eed..cd846e4f8b 100644 --- a/src/main/java/htsjdk/tribble/TabixFeatureReader.java +++ b/src/main/java/htsjdk/tribble/TabixFeatureReader.java @@ -77,7 +77,7 @@ public TabixFeatureReader(final String featureFile, final String indexFile, fina final Function wrapper, final Function indexWrapper) throws IOException { super(featureFile, codec, wrapper, indexWrapper); - tabixReader = new TabixReader(featureFile, indexFile, wrapper, indexWrapper); + tabixReader = new TabixReader(this.path, indexFile, wrapper, indexWrapper); sequenceNames = new ArrayList<>(tabixReader.getChromosomes()); readHeader(); } diff --git a/src/test/java/htsjdk/tribble/AbstractFeatureReaderTest.java b/src/test/java/htsjdk/tribble/AbstractFeatureReaderTest.java index e4d3d2bfd8..a51460df2b 100644 --- a/src/test/java/htsjdk/tribble/AbstractFeatureReaderTest.java +++ b/src/test/java/htsjdk/tribble/AbstractFeatureReaderTest.java @@ -6,6 +6,7 @@ import htsjdk.samtools.FileTruncatedException; import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.IOUtilTest; +import htsjdk.samtools.util.RuntimeIOException; import htsjdk.samtools.util.TestUtil; import htsjdk.tribble.bed.BEDCodec; import htsjdk.tribble.bed.BEDFeature; @@ -51,6 +52,7 @@ public class AbstractFeatureReaderTest extends HtsjdkTest { //wrapper which skips the first byte of a file and leaves the rest unchanged private static final Function WRAPPER = SkippingByteChannel::new; + public static final String REDIRECTING_CODEC_TEST_FILES = "src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/redirectingCodecTest/"; /** * Asserts readability and correctness of VCF over HTTP. The VCF is indexed and requires and index. @@ -228,4 +230,52 @@ public SeekableByteChannel truncate(long size) throws IOException { } } + @DataProvider + public Object[][] getVcfRedirects(){ + return new Object[][]{ + {REDIRECTING_CODEC_TEST_FILES + "vcf.redirect"}, + {REDIRECTING_CODEC_TEST_FILES + "vcf.gz.redirect"} + }; + } + + /** + * Test a codec that uses {@link FeatureCodec#getPathToDataFile(String)} in order to specify a data file that's + * different than the file it identifies with {@link FeatureCodec#canDecode}). + */ + @Test(dataProvider = "getVcfRedirects") + public void testCodecWithGetPathToDataFile(String vcfRedirect) throws IOException { + final VcfRedirectCodec vcfRedirectCodec = new VcfRedirectCodec(); + final String vcf = REDIRECTING_CODEC_TEST_FILES + "dataFiles/test.vcf"; + Assert.assertTrue(vcfRedirectCodec.canDecode(vcfRedirect), "should have been able to decode " + vcfRedirect); + try(FeatureReader redirectReader = AbstractFeatureReader.getFeatureReader(vcfRedirect, vcfRedirectCodec, false); + FeatureReader directReader = AbstractFeatureReader.getFeatureReader(vcf, new VCFCodec(), false)){ + Assert.assertEquals(redirectReader.getHeader().toString(), directReader.getHeader().toString()); + final int redirectVcfSize = redirectReader.iterator().toList().size(); + Assert.assertTrue( redirectVcfSize > 0, "iterator found " + redirectVcfSize + " records"); + Assert.assertEquals(redirectVcfSize, directReader.iterator().toList().size()); + + final int redirectQuerySize = redirectReader.query("20", 1, 20000).toList().size(); + Assert.assertTrue(redirectQuerySize > 0, "query found " + redirectVcfSize + " records"); + Assert.assertEquals(redirectQuerySize, directReader.query("20", 1, 20000).toList().size() ); + } + } + + /** + * codec which redirects to another location after reading the input file + */ + private static class VcfRedirectCodec extends VCFCodec{ + @Override + public boolean canDecode(String potentialInput) { + return super.canDecode(this.getPathToDataFile(potentialInput)); + } + + @Override + public String getPathToDataFile(String path) { + try { + return Files.readAllLines(IOUtil.getPath(path)).get(0); + } catch (IOException e) { + throw new RuntimeIOException(e); + } + } + } } diff --git a/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/redirectingCodecTest/dataFiles/test.vcf b/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/redirectingCodecTest/dataFiles/test.vcf new file mode 100644 index 0000000000..27d45004ca --- /dev/null +++ b/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/redirectingCodecTest/dataFiles/test.vcf @@ -0,0 +1,24 @@ +##fileformat=VCFv4.1 +##fileDate=20090805 +##source=myImputationProgramV3.1 +##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta +##contig= +##phasing=partial +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003 +20 14370 rs6054257 G A 29 PASS NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,. +20 17330 . T A 3 q10 NS=3;DP=11;AF=0.017 GT:GQ:DP:HQ 0|0:49:3:58,50 0|1:3:5:65,3 0/0:41:3 +20 1110696 rs6040355 A G,T 67 PASS NS=2;DP=10;AF=0.333,0.667;AA=T;DB GT:GQ:DP:HQ 1|2:21:6:23,27 2|1:2:0:18,2 2/2:35:4 +20 1230237 . T . 47 PASS NS=3;DP=13;AA=T GT:GQ:DP:HQ 0|0:54:7:56,60 0|0:48:4:51,51 0/0:61:2 +20 1234567 microsat1 GTC G,GTCT 50 PASS NS=3;DP=9;AA=G GT:GQ:DP 0/1:35:4 0/2:17:2 1/1:40:3 diff --git a/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/redirectingCodecTest/dataFiles/test.vcf.gz b/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/redirectingCodecTest/dataFiles/test.vcf.gz new file mode 100644 index 0000000000..44072dc94d Binary files /dev/null and b/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/redirectingCodecTest/dataFiles/test.vcf.gz differ diff --git a/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/redirectingCodecTest/dataFiles/test.vcf.gz.tbi b/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/redirectingCodecTest/dataFiles/test.vcf.gz.tbi new file mode 100644 index 0000000000..ed46415e21 Binary files /dev/null and b/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/redirectingCodecTest/dataFiles/test.vcf.gz.tbi differ diff --git a/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/redirectingCodecTest/dataFiles/test.vcf.idx b/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/redirectingCodecTest/dataFiles/test.vcf.idx new file mode 100644 index 0000000000..344ae573fc Binary files /dev/null and b/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/redirectingCodecTest/dataFiles/test.vcf.idx differ diff --git a/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/redirectingCodecTest/vcf.gz.redirect b/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/redirectingCodecTest/vcf.gz.redirect new file mode 100644 index 0000000000..e552554b0f --- /dev/null +++ b/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/redirectingCodecTest/vcf.gz.redirect @@ -0,0 +1 @@ +src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/redirectingCodecTest/dataFiles/test.vcf.gz \ No newline at end of file diff --git a/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/redirectingCodecTest/vcf.redirect b/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/redirectingCodecTest/vcf.redirect new file mode 100644 index 0000000000..a94bc44d13 --- /dev/null +++ b/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/redirectingCodecTest/vcf.redirect @@ -0,0 +1 @@ +src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/redirectingCodecTest/dataFiles/test.vcf \ No newline at end of file