diff --git a/src/main/java/genepi/imputationserver/steps/vcf/FastVCFFileReader.java b/src/main/java/genepi/imputationserver/steps/vcf/FastVCFFileReader.java index f747893..3c4d605 100644 --- a/src/main/java/genepi/imputationserver/steps/vcf/FastVCFFileReader.java +++ b/src/main/java/genepi/imputationserver/steps/vcf/FastVCFFileReader.java @@ -5,11 +5,16 @@ import java.util.List; import java.util.Vector; -import genepi.io.text.LineReader; +import genepi.io.FileUtil; +import java.io.BufferedReader; +import java.io.FileInputStream; +import java.io.InputStream; +import java.io.InputStreamReader; + import htsjdk.variant.vcf.VCFFileReader; import htsjdk.variant.vcf.VCFHeader; -public class FastVCFFileReader extends LineReader { +public class FastVCFFileReader { private List samples; @@ -21,13 +26,19 @@ public class FastVCFFileReader extends LineReader { private List header = new Vector<>(); - private VCFLineParser parser; + private String filename; + + protected BufferedReader in; + + private int lineNumber; - public FastVCFFileReader(String vcfFilename) throws IOException { + private String line; - super(vcfFilename); + private VCFLineParser parser; + + public FastVCFFileReader(String filename) throws IOException { // load header - VCFFileReader reader = new VCFFileReader(new File(vcfFilename), false); + VCFFileReader reader = new VCFFileReader(new File(filename), false); VCFHeader header = reader.getFileHeader(); samples = header.getGenotypeSamples(); samplesCount = samples.size(); @@ -36,6 +47,11 @@ public FastVCFFileReader(String vcfFilename) throws IOException { parser = new VCFLineParser(samplesCount); + this.filename = filename; + FileInputStream inputStream = new FileInputStream(filename); + InputStream in2 = FileUtil.decompressStream(inputStream); + this.in = new BufferedReader(new InputStreamReader(in2)); + } public List getGenotypedSamples() { @@ -54,27 +70,46 @@ public int getSamplesCount() { return samplesCount; } - @Override - protected void parseLine(String line) throws IOException { - - // not a header line - if (line.charAt(0) != '#') { - - variantContext = parser.parseLine(line); - - if (variantContext.getNSamples() != samplesCount) { - throw new IOException("Line " + getLineNumber() + ": different number of samples."); + public boolean next() throws IOException { + while(true) { + if ((this.line = this.in.readLine()) != null) { + try { + this.lineNumber++; + if (this.line.trim().isEmpty()) { + continue; + } + + // Check if the line starts with '#' and skip processing for header lines + if (this.line.startsWith("#")) { + header.add(this.line); + continue; + } + + // Parse non-header lines + this.parseLine(this.line); + return true; + } catch (Exception var2) { + throw new IOException(this.filename + ": Line " + this.lineNumber + ": " + var2.getMessage()); + } } - snpsCount++; + return false; + } + } - } else { - header.add(line); - next(); + protected void parseLine(String line) throws IOException { + variantContext = parser.parseLine(line); + if (variantContext.getNSamples() != samplesCount) { + throw new IOException("Line " + lineNumber + ": different number of samples."); } + snpsCount++; + } + public void close() throws IOException { + in.close(); } + public List getFileHeader() { return header; } diff --git a/src/test/java/genepi/imputationserver/steps/QualityControlCommandTest.java b/src/test/java/genepi/imputationserver/steps/QualityControlCommandTest.java index 600b3bc..9455463 100644 --- a/src/test/java/genepi/imputationserver/steps/QualityControlCommandTest.java +++ b/src/test/java/genepi/imputationserver/steps/QualityControlCommandTest.java @@ -516,6 +516,20 @@ public void testQcStatisticsFilterSampleCallrate() throws Exception { } + @Test + public void testBigHeader() throws Exception { + + String inputFolder = "test-data/data/big-header"; + + QualityControlCommand command = buildCommand(inputFolder); + command.setReference("test-data/configs/hapmap-chr1/hapmap2-region-simple.json"); + assertEquals(1, (int) command.call()); + + OutputReader log = new OutputReader(CLOUDGENE_LOG); + log.view(); + + } + @Test public void testChr23PipelineLifting() throws Exception { diff --git a/test-data/data/big-header/minimac_test.50.vcf.gz b/test-data/data/big-header/minimac_test.50.vcf.gz new file mode 100644 index 0000000..627da29 Binary files /dev/null and b/test-data/data/big-header/minimac_test.50.vcf.gz differ