From aa8980937e80bee82edd344df1435ba82ab670c2 Mon Sep 17 00:00:00 2001 From: jmthibault79 Date: Mon, 1 Apr 2019 10:51:12 -0400 Subject: [PATCH] Follow-up to #1276: Remove multiple versions of Slice/Container getCRAIEntries() (#1329) * Add bai and crai index init checks to Slice * Remove multiple versions of Slice/Container getCRAIEntries() * move Container.setByteOffset() call inside ContainerIO.readContainer() * populateSlicesAndIndexingParameters -> distributeIndexingParametersToSlices - called on Container write now as well * Change setSlices signature to take a List * rm duplicate test code --- .../htsjdk/samtools/BAMIndexMetaData.java | 2 +- .../java/htsjdk/samtools/CRAMBAIIndexer.java | 109 +++++++------- .../java/htsjdk/samtools/CRAMCRAIIndexer.java | 31 ++-- .../samtools/CRAMContainerStreamWriter.java | 5 +- .../java/htsjdk/samtools/CRAMFileReader.java | 4 +- .../java/htsjdk/samtools/CRAMIterator.java | 7 +- .../java/htsjdk/samtools/cram/CRAIEntry.java | 4 +- .../java/htsjdk/samtools/cram/CRAIIndex.java | 16 +- .../samtools/cram/build/ContainerFactory.java | 17 ++- .../samtools/cram/build/ContainerParser.java | 2 +- .../build/CramContainerHeaderIterator.java | 17 ++- .../cram/build/CramContainerIterator.java | 30 ++-- .../htsjdk/samtools/cram/build/CramIO.java | 66 +++++---- .../cram/build/CramSpanContainerIterator.java | 5 +- .../samtools/cram/structure/Container.java | 112 +++++++------- .../cram/structure/ContainerHeaderIO.java | 56 ++++++- .../samtools/cram/structure/ContainerIO.java | 108 +++++++------- .../htsjdk/samtools/cram/structure/Slice.java | 112 +++++++++----- .../htsjdk/samtools/CRAMBAIIndexerTest.java | 138 ++++++++++++------ .../htsjdk/samtools/CRAMSliceMD5Test.java | 13 +- .../CramContainerHeaderIteratorTest.java | 6 +- .../htsjdk/samtools/cram/CRAIEntryTest.java | 84 ++++++----- .../htsjdk/samtools/cram/VersionTest.java | 3 +- .../cram/build/ContainerFactoryTest.java | 40 ++--- .../cram/build/ContainerParserTest.java | 18 ++- .../cram/structure/CRAMStructureTestUtil.java | 81 ++++++++-- .../cram/structure/ContainerTest.java | 86 +++++++---- .../samtools/cram/structure/SliceTests.java | 10 ++ 28 files changed, 738 insertions(+), 444 deletions(-) diff --git a/src/main/java/htsjdk/samtools/BAMIndexMetaData.java b/src/main/java/htsjdk/samtools/BAMIndexMetaData.java index 4007e8bf7a..09391b6515 100644 --- a/src/main/java/htsjdk/samtools/BAMIndexMetaData.java +++ b/src/main/java/htsjdk/samtools/BAMIndexMetaData.java @@ -159,7 +159,7 @@ void recordMetaData(final Slice slice) { unAlignedRecords += slice.unmappedReadsCount; } - final long start = slice.offset; + final long start = slice.byteOffsetFromContainer; if (BlockCompressedFilePointerUtil.compare(start, firstOffset) < 1 || firstOffset == -1) { this.firstOffset = start; diff --git a/src/main/java/htsjdk/samtools/CRAMBAIIndexer.java b/src/main/java/htsjdk/samtools/CRAMBAIIndexer.java index 68db624702..3d6822a0d5 100755 --- a/src/main/java/htsjdk/samtools/CRAMBAIIndexer.java +++ b/src/main/java/htsjdk/samtools/CRAMBAIIndexer.java @@ -38,7 +38,8 @@ */ package htsjdk.samtools; -import htsjdk.samtools.cram.build.ContainerParser; +import htsjdk.samtools.cram.CRAIEntry; +import htsjdk.samtools.cram.CRAIIndex; import htsjdk.samtools.cram.build.CramIO; import htsjdk.samtools.cram.ref.ReferenceContext; import htsjdk.samtools.cram.structure.AlignmentSpan; @@ -52,7 +53,7 @@ import htsjdk.samtools.util.ProgressLogger; import java.io.File; -import java.io.IOException; +import java.io.InputStream; import java.io.OutputStream; import java.util.Arrays; import java.util.List; @@ -61,11 +62,22 @@ /** * Class for both constructing BAM index content and writing it out. + * * There are two usage patterns: - * 1) Building a bam index from an existing cram file - * 2) Building a bam index while building the cram file - * In both cases, processAlignment is called for each cram slice and - * finish() is called at the end. + * 1) Building a bam index (BAI) while building the CRAM file + * 2) Building a bam index (BAI) from an existing CRAI file + * + * 1) is driven by {@link CRAMContainerStreamWriter} and proceeds by calling {@link CRAMBAIIndexer#processContainer} + * after each {@link Container} is built, and {@link CRAMBAIIndexer#finish()} is called at the end. + * + * 2) is driven by {@link CRAIIndex#openCraiFileAsBaiStream(InputStream, SAMSequenceDictionary)} + * and proceeds by processing {@link CRAIEntry} elements obtained from + * {@link CRAMCRAIIndexer#readIndex(InputStream)}. {@link CRAMBAIIndexer#processAsSingleReferenceSlice(Slice)} + * is called on each {@link CRAIEntry} and {@link CRAMBAIIndexer#finish()} is called at the end. + * + * NOTE: a third pattern of building a BAI from a CRAM file is also supported by this class, + * but it is unused. This would be accomplished via {@link #createIndex(SeekableStream, File, Log, ValidationStringency)}. + * */ public class CRAMBAIIndexer { @@ -78,7 +90,7 @@ public class CRAMBAIIndexer { private int currentReference = 0; // content is built up from the input bam file using this - private final BAMIndexBuilder indexBuilder; + private final CRAMBAIIndexBuilder indexBuilder; /** * Create a CRAM indexer that writes BAI to a file. @@ -91,7 +103,7 @@ private CRAMBAIIndexer(final File output, final SAMFileHeader fileHeader) { throw new SAMException("CRAM file must be coordinate-sorted for indexing."); } numReferences = fileHeader.getSequenceDictionary().size(); - indexBuilder = new BAMIndexBuilder(fileHeader); + indexBuilder = new CRAMBAIIndexBuilder(fileHeader); outputWriter = new BinaryBAMIndexWriter(numReferences, output); } @@ -106,7 +118,7 @@ public CRAMBAIIndexer(final OutputStream output, final SAMFileHeader fileHeader) throw new SAMException("CRAM file must be coordinate-sorted for indexing."); } numReferences = fileHeader.getSequenceDictionary().size(); - indexBuilder = new BAMIndexBuilder(fileHeader); + indexBuilder = new CRAMBAIIndexBuilder(fileHeader); outputWriter = new BinaryBAMIndexWriter(numReferences, output); } @@ -118,21 +130,19 @@ public CRAMBAIIndexer(final OutputStream output, final SAMFileHeader fileHeader) * * @param container container to be indexed */ - public void processContainer(final Container container, final ValidationStringency validationStringency) { + void processContainer(final Container container, final ValidationStringency validationStringency) { if (container == null || container.isEOF()) { return; } int sliceIndex = 0; - for (final Slice slice : container.slices) { - slice.containerOffset = container.offset; + for (final Slice slice : container.getSlices()) { slice.index = sliceIndex++; if (slice.getReferenceContext().isMultiRef()) { final Map spanMap = container.getSpans(validationStringency); // TODO why are we updating the original slice here? - slice.containerOffset = container.offset; slice.index = sliceIndex++; /** @@ -142,8 +152,8 @@ public void processContainer(final Container container, final ValidationStringen for (final ReferenceContext refContext : new TreeSet<>(spanMap.keySet())) { final AlignmentSpan span = spanMap.get(refContext); final Slice fakeSlice = new Slice(refContext); - fakeSlice.containerOffset = slice.containerOffset; - fakeSlice.offset = slice.offset; + fakeSlice.containerByteOffset = slice.containerByteOffset; + fakeSlice.byteOffsetFromContainer = slice.byteOffsetFromContainer; fakeSlice.index = slice.index; fakeSlice.alignmentStart = span.getStart(); @@ -156,8 +166,8 @@ public void processContainer(final Container container, final ValidationStringen if (unmappedSpan != null) { final Slice fakeSlice = new Slice(ReferenceContext.UNMAPPED_UNPLACED_CONTEXT); - fakeSlice.containerOffset = slice.containerOffset; - fakeSlice.offset = slice.offset; + fakeSlice.containerByteOffset = slice.containerByteOffset; + fakeSlice.byteOffsetFromContainer = slice.byteOffsetFromContainer; fakeSlice.index = slice.index; fakeSlice.alignmentStart = SAMRecord.NO_ALIGNMENT_START; @@ -183,6 +193,9 @@ public void processContainer(final Container container, final ValidationStringen * @throws htsjdk.samtools.SAMException if slice refers to multiple reference sequences. */ public void processAsSingleReferenceSlice(final Slice slice) { + // validate that this Slice is ready to be indexed + slice.baiIndexInitializationCheck(); + final ReferenceContext sliceContext = slice.getReferenceContext(); if (sliceContext.isMultiRef()) { throw new SAMException("Expecting a single reference or unmapped slice."); @@ -250,40 +263,31 @@ public static void createIndex(final SeekableStream stream, } final CRAMBAIIndexer indexer = new CRAMBAIIndexer(output, cramHeader.getSamFileHeader()); - int totalRecords = 0; Container container = null; ProgressLogger progressLogger = new ProgressLogger(log, 1, "indexed", "slices"); do { - try { - final long offset = stream.position(); - container = ContainerIO.readContainer(cramHeader.getVersion(), stream); - if (container == null || container.isEOF()) { - break; - } + container = ContainerIO.readContainer(cramHeader.getVersion(), stream); + if (container == null || container.isEOF()) { + break; + } - container.offset = offset; - - indexer.processContainer(container, validationStringency); - - if (null != log) { - String sequenceName; - final ReferenceContext containerContext = container.getReferenceContext(); - switch (containerContext.getType()) { - case UNMAPPED_UNPLACED_TYPE: - sequenceName = "?"; - break; - case MULTIPLE_REFERENCE_TYPE: - sequenceName = "???"; - break; - default: - sequenceName = cramHeader.getSamFileHeader().getSequence(containerContext.getSequenceId()).getSequenceName(); - break; - } - progressLogger.record(sequenceName, container.alignmentStart); + indexer.processContainer(container, validationStringency); + + if (null != log) { + String sequenceName; + final ReferenceContext containerContext = container.getReferenceContext(); + switch (containerContext.getType()) { + case UNMAPPED_UNPLACED_TYPE: + sequenceName = "?"; + break; + case MULTIPLE_REFERENCE_TYPE: + sequenceName = "???"; + break; + default: + sequenceName = cramHeader.getSamFileHeader().getSequence(containerContext.getSequenceId()).getSequenceName(); + break; } - - } catch (final IOException e) { - throw new RuntimeException("Failed to read cram container", e); + progressLogger.record(sequenceName, container.alignmentStart); } } while (!container.isEOF()); @@ -296,8 +300,11 @@ public static void createIndex(final SeekableStream stream, * One instance is used to construct an entire index. * processAlignment is called for each alignment until a new reference is encountered, then * processReference is called when all records for the reference have been processed. + * + * TODO: this was copied from BAMIndexer.BAMIndexBuilder + * so perhaps they should be merged back together */ - private class BAMIndexBuilder { + private class CRAMBAIIndexBuilder { private final SAMFileHeader bamHeader; @@ -315,7 +322,7 @@ private class BAMIndexBuilder { /** * @param header SAMFileHeader used for reference name (in index stats) and for max bin number */ - private BAMIndexBuilder(final SAMFileHeader header) { + private CRAMBAIIndexBuilder(final SAMFileHeader header) { this.bamHeader = header; } @@ -353,7 +360,7 @@ private int computeIndexingBin(final Slice slice) { * Record any index information for a given CRAM slice * * Reads these Slice fields: - * sequenceId, alignmentStart, alignmentSpan, containerOffset, index + * sequenceId, alignmentStart, alignmentSpan, containerByteOffset, index * * @param slice CRAM slice, single ref only. */ @@ -396,8 +403,8 @@ private void processSingleReferenceSlice(final Slice slice) { // process chunks - final long chunkStart = (slice.containerOffset << 16) | slice.index; - final long chunkEnd = ((slice.containerOffset << 16) | slice.index) + 1; + final long chunkStart = (slice.containerByteOffset << 16) | slice.index; + final long chunkEnd = ((slice.containerByteOffset << 16) | slice.index) + 1; final Chunk newChunk = new Chunk(chunkStart, chunkEnd); diff --git a/src/main/java/htsjdk/samtools/CRAMCRAIIndexer.java b/src/main/java/htsjdk/samtools/CRAMCRAIIndexer.java index f4a6c3363c..0ac24929ee 100644 --- a/src/main/java/htsjdk/samtools/CRAMCRAIIndexer.java +++ b/src/main/java/htsjdk/samtools/CRAMCRAIIndexer.java @@ -13,7 +13,6 @@ import java.io.OutputStream; import java.io.IOException; import java.util.Collection; -import java.util.List; import java.util.Scanner; import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream; @@ -94,27 +93,19 @@ public void finish() { * @param craiStream stream for output index */ public static void writeIndex(final SeekableStream cramStream, OutputStream craiStream) { - try { - final CramHeader cramHeader = CramIO.readCramHeader(cramStream); - final CRAMCRAIIndexer indexer = new CRAMCRAIIndexer(craiStream, cramHeader.getSamFileHeader()); - final Version cramVersion = cramHeader.getVersion(); - - // get the first container and its offset - long offset = cramStream.position(); - Container container = ContainerIO.readContainer(cramVersion, cramStream); - - while (container != null && !container.isEOF()) { - container.offset = offset; - indexer.processContainer(container); - offset = cramStream.position(); - container = ContainerIO.readContainer(cramVersion, cramStream); - } + final CramHeader cramHeader = CramIO.readCramHeader(cramStream); + final CRAMCRAIIndexer indexer = new CRAMCRAIIndexer(craiStream, cramHeader.getSamFileHeader()); + final Version cramVersion = cramHeader.getVersion(); - indexer.finish(); - } - catch (IOException e) { - throw new RuntimeIOException("Error writing CRAI index to output stream"); + // get the first container + Container container = ContainerIO.readContainer(cramVersion, cramStream); + + while (container != null && !container.isEOF()) { + indexer.processContainer(container); + container = ContainerIO.readContainer(cramVersion, cramStream); } + + indexer.finish(); } /** diff --git a/src/main/java/htsjdk/samtools/CRAMContainerStreamWriter.java b/src/main/java/htsjdk/samtools/CRAMContainerStreamWriter.java index 42e9b38e0d..b87744e45d 100644 --- a/src/main/java/htsjdk/samtools/CRAMContainerStreamWriter.java +++ b/src/main/java/htsjdk/samtools/CRAMContainerStreamWriter.java @@ -442,11 +442,10 @@ protected void flushContainer() throws IllegalArgumentException { } } - final Container container = containerFactory.buildContainer(cramRecords); - for (final Slice slice : container.slices) { + final Container container = containerFactory.buildContainer(cramRecords, offset); + for (final Slice slice : container.getSlices()) { slice.setRefMD5(referenceBases); } - container.offset = offset; offset += ContainerIO.writeContainer(cramVersion, container, outputStream); if (indexer != null) { /** diff --git a/src/main/java/htsjdk/samtools/CRAMFileReader.java b/src/main/java/htsjdk/samtools/CRAMFileReader.java index b0b746b1bc..5c782e12af 100644 --- a/src/main/java/htsjdk/samtools/CRAMFileReader.java +++ b/src/main/java/htsjdk/samtools/CRAMFileReader.java @@ -21,7 +21,7 @@ import htsjdk.samtools.cram.ref.CRAMReferenceSource; import htsjdk.samtools.cram.ref.ReferenceSource; import htsjdk.samtools.cram.structure.Container; -import htsjdk.samtools.cram.structure.ContainerIO; +import htsjdk.samtools.cram.structure.ContainerHeaderIO; import htsjdk.samtools.seekablestream.SeekableFileStream; import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.samtools.util.CloseableIterator; @@ -400,7 +400,7 @@ public CloseableIterator queryUnmapped() { seekableStream.seek(0); newIterator = new CRAMIterator(seekableStream, referenceSource, validationStringency); seekableStream.seek(startOfLastLinearBin >>> 16); - final Container container = ContainerIO.readContainerHeader(newIterator.getCramHeader().getVersion().major, seekableStream); + final Container container = ContainerHeaderIO.readContainerHeader(newIterator.getCramHeader().getVersion().major, seekableStream); seekableStream.seek(seekableStream.position() + container.containerByteSize); iterator = newIterator; boolean atAlignments; diff --git a/src/main/java/htsjdk/samtools/CRAMIterator.java b/src/main/java/htsjdk/samtools/CRAMIterator.java index 4119b56298..1f4e5b6833 100644 --- a/src/main/java/htsjdk/samtools/CRAMIterator.java +++ b/src/main/java/htsjdk/samtools/CRAMIterator.java @@ -177,8 +177,7 @@ void nextContainer() throws IllegalArgumentException, CRAMException { } } - for (int i = 0; i < container.slices.length; i++) { - final Slice slice = container.slices[i]; + for (final Slice slice : container.getSlices()) { final ReferenceContext sliceContext = slice.getReferenceContext(); if (! sliceContext.isMappedSingleRef()) @@ -212,8 +211,8 @@ void nextContainer() throws IllegalArgumentException, CRAMException { samRecord.setValidationStringency(validationStringency); if (mReader != null) { - final long chunkStart = (container.offset << 16) | cramRecord.sliceIndex; - final long chunkEnd = ((container.offset << 16) | cramRecord.sliceIndex) + 1; + final long chunkStart = (container.byteOffset << 16) | cramRecord.sliceIndex; + final long chunkEnd = ((container.byteOffset << 16) | cramRecord.sliceIndex) + 1; samRecord.setFileSource(new SAMFileSource(mReader, new BAMFileSpan(new Chunk(chunkStart, chunkEnd)))); } diff --git a/src/main/java/htsjdk/samtools/cram/CRAIEntry.java b/src/main/java/htsjdk/samtools/cram/CRAIEntry.java index 4453d680ab..460ee6e04a 100644 --- a/src/main/java/htsjdk/samtools/cram/CRAIEntry.java +++ b/src/main/java/htsjdk/samtools/cram/CRAIEntry.java @@ -16,10 +16,10 @@ public class CRAIEntry implements Comparable { private final int alignmentSpan; // this Slice's Container's offset in bytes from the beginning of the stream - // equal to Slice.containerOffset and Container.offset + // equal to Slice.containerByteOffset and Container.byteOffset private final long containerStartByteOffset; // this Slice's offset in bytes from the beginning of its Container - // equal to Slice.offset and Container.landmarks[Slice.index] + // equal to Slice.byteOffsetFromContainer and Container.landmarks[Slice.index] private final int sliceByteOffset; private final int sliceByteSize; diff --git a/src/main/java/htsjdk/samtools/cram/CRAIIndex.java b/src/main/java/htsjdk/samtools/cram/CRAIIndex.java index 7a4f021cc0..9f9b548a34 100644 --- a/src/main/java/htsjdk/samtools/cram/CRAIIndex.java +++ b/src/main/java/htsjdk/samtools/cram/CRAIIndex.java @@ -59,7 +59,7 @@ public void writeIndex(final OutputStream os) { * @param container the container to index */ public void processContainer(final Container container) { - addEntries(container.getCRAIEntriesSplittingMultiRef()); + addEntries(container.getCRAIEntries()); } public static SeekableStream openCraiFileAsBaiStream(final File cramIndexFile, final SAMSequenceDictionary dictionary) { @@ -84,14 +84,15 @@ public static SeekableStream openCraiFileAsBaiStream(final InputStream indexStre for (final CRAIEntry entry : full) { final Slice slice = new Slice(new ReferenceContext(entry.getSequenceId())); - slice.containerOffset = entry.getContainerStartByteOffset(); + slice.containerByteOffset = entry.getContainerStartByteOffset(); slice.alignmentStart = entry.getAlignmentStart(); slice.alignmentSpan = entry.getAlignmentSpan(); - slice.offset = entry.getSliceByteOffset(); + slice.byteOffsetFromContainer = entry.getSliceByteOffset(); // NOTE: the sliceIndex and read count fields can't be derived from the CRAM index // so we can only set them to zero // see https://github.com/samtools/htsjdk/issues/531 + slice.mappedReadsCount = 0; slice.unmappedReadsCount = 0; slice.unplacedReadsCount = 0; @@ -106,12 +107,8 @@ public static SeekableStream openCraiFileAsBaiStream(final InputStream indexStre public static List find(final List list, final int seqId, final int start, final int span) { final boolean matchEntireSequence = start < 1 || span < 1; - final CRAIEntry query = new CRAIEntry(seqId, - start, - span, - Long.MAX_VALUE, - Integer.MAX_VALUE, - Integer.MAX_VALUE); + final int dummyValue = 1; + final CRAIEntry query = new CRAIEntry(seqId, start, span, dummyValue, dummyValue, dummyValue); return list.stream() .filter(e -> e.getSequenceId() == seqId) @@ -124,6 +121,7 @@ public static CRAIEntry getLeftmost(final List list) { if (list == null || list.isEmpty()) { return null; } + return list.stream() .sorted() .findFirst() diff --git a/src/main/java/htsjdk/samtools/cram/build/ContainerFactory.java b/src/main/java/htsjdk/samtools/cram/build/ContainerFactory.java index ce4329f831..9a7c7c08c9 100644 --- a/src/main/java/htsjdk/samtools/cram/build/ContainerFactory.java +++ b/src/main/java/htsjdk/samtools/cram/build/ContainerFactory.java @@ -37,7 +37,19 @@ public ContainerFactory(final SAMFileHeader samFileHeader, final int recordsPerS this.recordsPerSlice = recordsPerSlice; } - public Container buildContainer(final List records) { + /** + * Build a Container (and its constituent Slices) from {@link CramCompressionRecord}s. + * Note that this will always result in a single Container, regardless of how many Slices + * are created. It is up to the caller to divide the records into multiple Containers, + * if that is desired. + * + * TODO: enable a setting to automate this, perhaps "recordsPerContainer" + * + * @param records the records used to build the Container + * @param containerByteOffset the Container's byte offset from the start of the stream + * @return the container built from these records + */ + public Container buildContainer(final List records, final long containerByteOffset) { // sets header APDelta final boolean coordinateSorted = samFileHeader.getSortOrder() == SAMFileHeader.SortOrder.coordinate; final CompressionHeader compressionHeader = new CompressionHeaderFactory().build(records, null, coordinateSorted); @@ -58,8 +70,7 @@ public Container buildContainer(final List records) { slices.add(slice); } - final Container container = Container.initializeFromSlices(slices); - container.compressionHeader = compressionHeader; + final Container container = Container.initializeFromSlices(slices, compressionHeader, containerByteOffset); container.nofRecords = records.size(); container.globalRecordCounter = lastGlobalRecordCounter; container.blockCount = 0; diff --git a/src/main/java/htsjdk/samtools/cram/build/ContainerParser.java b/src/main/java/htsjdk/samtools/cram/build/ContainerParser.java index 9766f37eae..3f37cb5cbd 100644 --- a/src/main/java/htsjdk/samtools/cram/build/ContainerParser.java +++ b/src/main/java/htsjdk/samtools/cram/build/ContainerParser.java @@ -50,7 +50,7 @@ public List getRecords(final Container container, records = new ArrayList<>(container.nofRecords); } - for (final Slice slice : container.slices) { + for (final Slice slice : container.getSlices()) { records.addAll(getRecords(slice, container.compressionHeader, validationStringency)); } diff --git a/src/main/java/htsjdk/samtools/cram/build/CramContainerHeaderIterator.java b/src/main/java/htsjdk/samtools/cram/build/CramContainerHeaderIterator.java index 7a57aa0029..6d12be154a 100644 --- a/src/main/java/htsjdk/samtools/cram/build/CramContainerHeaderIterator.java +++ b/src/main/java/htsjdk/samtools/cram/build/CramContainerHeaderIterator.java @@ -1,10 +1,9 @@ package htsjdk.samtools.cram.build; -import htsjdk.samtools.cram.common.Version; import htsjdk.samtools.cram.io.CountingInputStream; import htsjdk.samtools.cram.io.InputStreamUtils; import htsjdk.samtools.cram.structure.Container; -import htsjdk.samtools.cram.structure.ContainerIO; +import htsjdk.samtools.cram.structure.ContainerHeaderIO; import java.io.InputStream; @@ -22,8 +21,18 @@ public CramContainerHeaderIterator(final InputStream inputStream) { super(inputStream); } - protected Container containerFromStream(final Version cramVersion, final CountingInputStream countingStream) { - final Container container = ContainerIO.readContainerHeader(cramVersion.major, countingStream); + /** + * Consume the entirety of the next container from the stream, but retain only the header. + * This is intended as a performance optimization, because it does not decode block data. + * + * @see CramContainerIterator#containerFromStream(CountingInputStream) + * + * @param countingStream the {@link CountingInputStream} to read from + * @return The next Container's header from the stream, returned as a Container. + */ + @Override + protected Container containerFromStream(final CountingInputStream countingStream) { + final Container container = ContainerHeaderIO.readContainerHeader(getCramHeader().getVersion().major, countingStream); InputStreamUtils.skipFully(countingStream, container.containerByteSize); return container; } diff --git a/src/main/java/htsjdk/samtools/cram/build/CramContainerIterator.java b/src/main/java/htsjdk/samtools/cram/build/CramContainerIterator.java index 88468b0dba..964d8b7a0e 100644 --- a/src/main/java/htsjdk/samtools/cram/build/CramContainerIterator.java +++ b/src/main/java/htsjdk/samtools/cram/build/CramContainerIterator.java @@ -1,6 +1,5 @@ package htsjdk.samtools.cram.build; -import htsjdk.samtools.cram.common.Version; import htsjdk.samtools.cram.io.CountingInputStream; import htsjdk.samtools.cram.structure.Container; import htsjdk.samtools.cram.structure.ContainerIO; @@ -17,20 +16,14 @@ public class CramContainerIterator implements Iterator { private CountingInputStream countingInputStream; private Container nextContainer; private boolean eof = false; - private long offset = 0; public CramContainerIterator(final InputStream inputStream) { this.countingInputStream = new CountingInputStream(inputStream); cramHeader = CramIO.readCramHeader(countingInputStream); - this.offset = countingInputStream.getCount(); } - void readNextContainer() { - nextContainer = containerFromStream(cramHeader.getVersion(), countingInputStream); - final long containerSizeInBytes = countingInputStream.getCount() - offset; - - nextContainer.offset = offset; - offset += containerSizeInBytes; + private void readNextContainer() { + nextContainer = containerFromStream(countingInputStream); if (nextContainer.isEOF()) { eof = true; @@ -40,18 +33,27 @@ void readNextContainer() { /** * Consume the entirety of the next container from the stream. - * @param cramVersion - * @param countingStream + * + * @see CramContainerIterator#containerFromStream(CountingInputStream) + * + * @param countingStream the {@link CountingInputStream} to read from * @return The next Container from the stream. */ - protected Container containerFromStream(final Version cramVersion, final CountingInputStream countingStream) { + protected Container containerFromStream(final CountingInputStream countingStream) { return ContainerIO.readContainer(cramHeader.getVersion(), countingStream); } @Override public boolean hasNext() { - if (eof) return false; - if (nextContainer == null) readNextContainer(); + if (eof) { + return false; + } + + if (nextContainer == null) { + readNextContainer(); + } + + // readNextContainer() may set eof return !eof; } diff --git a/src/main/java/htsjdk/samtools/cram/build/CramIO.java b/src/main/java/htsjdk/samtools/cram/build/CramIO.java index 51a4161217..612d1ffe86 100644 --- a/src/main/java/htsjdk/samtools/cram/build/CramIO.java +++ b/src/main/java/htsjdk/samtools/cram/build/CramIO.java @@ -24,14 +24,12 @@ import htsjdk.samtools.cram.io.CountingInputStream; import htsjdk.samtools.cram.io.InputStreamUtils; import htsjdk.samtools.cram.ref.ReferenceContext; +import htsjdk.samtools.cram.structure.*; import htsjdk.samtools.cram.structure.block.Block; -import htsjdk.samtools.cram.structure.Container; -import htsjdk.samtools.cram.structure.ContainerIO; -import htsjdk.samtools.cram.structure.CramHeader; -import htsjdk.samtools.cram.structure.Slice; import htsjdk.samtools.seekablestream.SeekableFileStream; import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.samtools.util.BufferedLineReader; +import htsjdk.samtools.util.LineReader; import htsjdk.samtools.util.Log; import htsjdk.samtools.util.RuntimeIOException; @@ -215,7 +213,14 @@ public static CramHeader readCramHeader(final InputStream inputStream) { try { final CramHeader header = readFormatDefinition(inputStream); - final SAMFileHeader samFileHeader = readSAMFileHeader(header.getVersion(), inputStream, new String(header.getId())); + // the location of the stream pointer after the CramHeader has been read + final long containerByteOffset = CramIO.DEFINITION_LENGTH; + + final SAMFileHeader samFileHeader = readSAMFileHeader( + header.getVersion(), + inputStream, + new String(header.getId()), + containerByteOffset); return new CramHeader(header.getVersion(), new String(header.getId()), samFileHeader); } catch (final IOException e) { @@ -262,7 +267,6 @@ private static long writeContainerForSamFileHeader(final int major, final SAMFil container.blockCount = 1; container.blocks = new Block[]{block}; container.landmarks = new int[0]; - container.slices = new Slice[0]; container.bases = 0; container.globalRecordCounter = 0; container.nofRecords = 0; @@ -271,7 +275,7 @@ private static long writeContainerForSamFileHeader(final int major, final SAMFil block.write(major, byteArrayOutputStream); container.containerByteSize = byteArrayOutputStream.size(); - final int containerHeaderByteSize = ContainerIO.writeContainerHeader(major, container, os); + final int containerHeaderByteSize = ContainerHeaderIO.writeContainerHeader(major, container, os); try { os.write(byteArrayOutputStream.toByteArray(), 0, byteArrayOutputStream.size()); } catch (final IOException e) { @@ -281,8 +285,11 @@ private static long writeContainerForSamFileHeader(final int major, final SAMFil return containerHeaderByteSize + byteArrayOutputStream.size(); } - private static SAMFileHeader readSAMFileHeader(final Version version, InputStream inputStream, final String id) { - final Container container = ContainerIO.readContainerHeader(version.major, inputStream); + private static SAMFileHeader readSAMFileHeader(final Version version, + final InputStream inputStream, + final String id, + final long containerByteOffset) { + final Container container = ContainerHeaderIO.readContainerHeader(version.major, inputStream, containerByteOffset); final Block block; { if (version.compatibleWith(CramVersions.CRAM_v3)) { @@ -293,36 +300,39 @@ private static SAMFileHeader readSAMFileHeader(final Version version, InputStrea } else { /* * pending issue: container.containerByteSize inputStream 2 bytes shorter - * then needed in the v21 test cram files. - */ + * than needed in the v21 test cram files. + */ block = Block.read(version.major, inputStream); } } - inputStream = new ByteArrayInputStream(block.getUncompressedContent()); + byte[] bytes; + try (final InputStream blockStream = new ByteArrayInputStream(block.getUncompressedContent())) { + + final ByteBuffer buffer = ByteBuffer.allocate(4); + buffer.order(ByteOrder.LITTLE_ENDIAN); - final ByteBuffer buffer = ByteBuffer.allocate(4); - buffer.order(ByteOrder.LITTLE_ENDIAN); - try { for (int i = 0; i < 4; i++) - buffer.put((byte) inputStream.read()); - } catch (final IOException e) { - throw new RuntimeIOException(e); - } - buffer.flip(); - final int size = buffer.asIntBuffer().get(); + buffer.put((byte) blockStream.read()); - final DataInputStream dataInputStream = new DataInputStream(inputStream); - final byte[] bytes = new byte[size]; - try { + buffer.flip(); + final int size = buffer.asIntBuffer().get(); + + final DataInputStream dataInputStream = new DataInputStream(blockStream); + bytes = new byte[size]; dataInputStream.readFully(bytes); } catch (final IOException e) { throw new RuntimeIOException(e); } - final BufferedLineReader bufferedLineReader = new BufferedLineReader(new ByteArrayInputStream(bytes)); final SAMTextHeaderCodec codec = new SAMTextHeaderCodec(); - return codec.decode(bufferedLineReader, id); + + try (final InputStream byteStream = new ByteArrayInputStream(bytes); + final LineReader lineReader = new BufferedLineReader(byteStream)) { + return codec.decode(lineReader, id); + } catch (final IOException e) { + throw new RuntimeIOException(e); + } } /** @@ -337,8 +347,8 @@ private static SAMFileHeader readSAMFileHeader(final Version version, InputStrea public static boolean replaceCramHeader(final File file, final CramHeader newHeader) { try (final CountingInputStream countingInputStream = new CountingInputStream(new FileInputStream(file)); final RandomAccessFile randomAccessFile = new RandomAccessFile(file, "rw")) { - final CramHeader header = readFormatDefinition(countingInputStream); - final Container c = ContainerIO.readContainerHeader(header.getVersion().major, countingInputStream); + final CramHeader cramHeader = readFormatDefinition(countingInputStream); + final Container c = ContainerHeaderIO.readContainerHeader(cramHeader.getVersion().major, countingInputStream); final long pos = countingInputStream.getCount(); countingInputStream.close(); diff --git a/src/main/java/htsjdk/samtools/cram/build/CramSpanContainerIterator.java b/src/main/java/htsjdk/samtools/cram/build/CramSpanContainerIterator.java index 240449155d..a237f63f4d 100644 --- a/src/main/java/htsjdk/samtools/cram/build/CramSpanContainerIterator.java +++ b/src/main/java/htsjdk/samtools/cram/build/CramSpanContainerIterator.java @@ -99,10 +99,7 @@ public Container next() { throw new RuntimeException("No more containers in this boundary."); } - final long offset = seekableStream.position(); - final Container c = ContainerIO.readContainer(cramHeader.getVersion(), seekableStream); - c.offset = offset; - return c; + return ContainerIO.readContainer(cramHeader.getVersion(), seekableStream); } catch (final IOException e) { throw new RuntimeIOException(e); } diff --git a/src/main/java/htsjdk/samtools/cram/structure/Container.java b/src/main/java/htsjdk/samtools/cram/structure/Container.java index 36acbca9fa..229db1eac1 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/Container.java +++ b/src/main/java/htsjdk/samtools/cram/structure/Container.java @@ -34,7 +34,7 @@ public class Container { /** * Byte size of the content excluding header. */ - public int containerByteSize; + public int containerByteSize = 0; // minimum alignment start of the reads in this Container // uses a 1-based coordinate system @@ -65,13 +65,29 @@ public class Container { public CompressionHeader compressionHeader; // slices found in the container: - public Slice[] slices; + private Slice[] slices; + + // this Container's byte offset from the the start of the stream. + // Used for indexing. + public long byteOffset; + + public Slice[] getSlices() { + return slices; + } - // for indexing: /** - * Container start in the stream, in bytes. + * Assign {@link Slice}s to this Container and set its byteOffset. + * Also distribute the Container's byte offset to the {@link Slice}s, for indexing. + * @param slices the Slices belonging to this container + * @param byteOffset the byte location in the stream where this Container begins */ - public long offset; + void setSlicesAndByteOffset(final List slices, final long byteOffset) { + for (final Slice slice : slices) { + slice.containerByteOffset = byteOffset; + } + this.slices = slices.toArray(new Slice[0]); + this.byteOffset = byteOffset; + } /** * Construct this Container by providing its {@link ReferenceContext} @@ -105,10 +121,14 @@ public ReferenceContext getReferenceContext() { * TODO for general Container refactoring: make this part of construction * * @param containerSlices the constituent Slices of the Container + * @param compressionHeader the CRAM {@link CompressionHeader} to assign to the Container + * @param containerByteOffset the Container's byte offset from the start of the stream * @throws CRAMException for invalid Container states * @return the initialized Container */ - public static Container initializeFromSlices(final List containerSlices) { + public static Container initializeFromSlices(final List containerSlices, + final CompressionHeader compressionHeader, + final long containerByteOffset) { final Set sliceRefContexts = containerSlices.stream() .map(Slice::getReferenceContext) .collect(Collectors.toSet()); @@ -123,7 +143,8 @@ else if (sliceRefContexts.size() > 1) { final ReferenceContext commonRefContext = sliceRefContexts.iterator().next(); final Container container = new Container(commonRefContext); - container.slices = containerSlices.toArray(new Slice[0]); + container.setSlicesAndByteOffset(containerSlices, containerByteOffset); + container.compressionHeader = compressionHeader; if (commonRefContext.isMappedSingleRef()) { int start = Integer.MAX_VALUE; @@ -147,72 +168,61 @@ else if (sliceRefContexts.size() > 1) { } /** - * Assign this Container's slices, and populate those slices' - * indexing parameters from this Container - * @param slicesToPopulate the slices to populate + * Populate the indexing parameters of this Container's slices + * + * Requires: valid landmarks and containerByteSize + * + * @throws CRAMException when the Container is in an invalid state */ - void populateSlicesAndIndexingParameters(final ArrayList slicesToPopulate) { + public void distributeIndexingParametersToSlices() { + if (slices.length == 0) { + return; + } - slices = new Slice[slicesToPopulate.size()]; + if (landmarks == null) { + throw new CRAMException("Cannot set Slice indexing parameters if this Container does not have landmarks"); + } - if (slicesToPopulate.isEmpty()) { - return; + if (landmarks.length != slices.length) { + final String format = "This Container's landmark and slice counts do not match: %d landmarks and %d slices"; + throw new CRAMException(String.format(format, landmarks.length, slices.length)); } - final int lastSliceIndex = slicesToPopulate.size() - 1; + if (containerByteSize == 0) { + throw new CRAMException("Cannot set Slice indexing parameters if this Container's byte size is unknown"); + } + + final int lastSliceIndex = slices.length - 1; for (int i = 0; i < lastSliceIndex; i++) { - final Slice slice = slicesToPopulate.get(i); - slice.containerOffset = offset; + final Slice slice = slices[i]; slice.index = i; - slice.offset = landmarks[i]; - slice.size = landmarks[i + 1] - slice.offset; - slices[i] = slice; + slice.byteOffsetFromContainer = landmarks[i]; + slice.byteSize = landmarks[i + 1] - slice.byteOffsetFromContainer; } - final Slice lastSlice = slicesToPopulate.get(lastSliceIndex); - lastSlice.containerOffset = offset; + final Slice lastSlice = slices[lastSliceIndex]; lastSlice.index = lastSliceIndex; - lastSlice.offset = landmarks[lastSliceIndex]; + lastSlice.byteOffsetFromContainer = landmarks[lastSliceIndex]; // calculate a "final landmark" indicating the byte offset of the end of the container // equivalent to the container's total byte size final int containerHeaderSize = landmarks[0]; final int containerTotalByteSize = containerHeaderSize + containerByteSize; - lastSlice.size = containerTotalByteSize - lastSlice.offset; - - this.slices[lastSliceIndex] = lastSlice; + lastSlice.byteSize = containerTotalByteSize - lastSlice.byteOffsetFromContainer; } /** - * Retrieve the list of CRAI Index entries corresponding to this Container. - * + * Retrieve the list of CRAI Index entries corresponding to this Container * @return the list of CRAI Index entries */ public List getCRAIEntries() { - return Arrays.stream(slices) - .map(Slice::getCRAIEntry) - .collect(Collectors.toList()); - } - - /** - * Retrieve the list of CRAI Index entries corresponding to this Container. - * - * TODO: investigate why we sometimes split multi-ref Slices - * into different entries and sometimes do not - * - * TODO: clearly identify and enforce preconditions, e.g. - * a Container built from Slices which were in turn built from records - * - * @return the list of CRAI Index entries - */ - public List getCRAIEntriesSplittingMultiRef() { if (isEOF()) { return Collections.emptyList(); } - return Arrays.stream(slices) - .map(s -> s.getCRAIEntriesSplittingMultiRef(compressionHeader, landmarks, offset)) + return Arrays.stream(getSlices()) + .map(s -> s.getCRAIEntries(compressionHeader)) .flatMap(List::stream) .sorted() .collect(Collectors.toList()); @@ -223,17 +233,17 @@ public String toString() { return String .format("seqID=%s, start=%d, span=%d, records=%d, slices=%d, blocks=%d.", referenceContext, alignmentStart, alignmentSpan, nofRecords, - slices == null ? -1 : slices.length, blockCount); + getSlices() == null ? -1 : getSlices().length, blockCount); } public boolean isEOF() { final boolean v3 = containerByteSize == 15 && referenceContext.isUnmappedUnplaced() && alignmentStart == 4542278 && blockCount == 1 - && nofRecords == 0 && (slices == null || slices.length == 0); + && nofRecords == 0 && (getSlices() == null || getSlices().length == 0); final boolean v2 = containerByteSize == 11 && referenceContext.isUnmappedUnplaced() && alignmentStart == 4542278 && blockCount == 1 - && nofRecords == 0 && (slices == null || slices.length == 0); + && nofRecords == 0 && (getSlices() == null || getSlices().length == 0); return v3 || v2; } @@ -248,7 +258,7 @@ public boolean isEOF() { */ public Map getSpans(final ValidationStringency validationStringency) { final Map containerSpanMap = new HashMap<>(); - for (final Slice slice : slices) { + for (final Slice slice : getSlices()) { switch (slice.getReferenceContext().getType()) { case UNMAPPED_UNPLACED_TYPE: containerSpanMap.put(ReferenceContext.UNMAPPED_UNPLACED_CONTEXT, AlignmentSpan.UNPLACED_SPAN); diff --git a/src/main/java/htsjdk/samtools/cram/structure/ContainerHeaderIO.java b/src/main/java/htsjdk/samtools/cram/structure/ContainerHeaderIO.java index 543d8bc7d7..572fd93b7f 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/ContainerHeaderIO.java +++ b/src/main/java/htsjdk/samtools/cram/structure/ContainerHeaderIO.java @@ -18,12 +18,9 @@ package htsjdk.samtools.cram.structure; import htsjdk.samtools.cram.build.CramIO; -import htsjdk.samtools.cram.io.CRC32OutputStream; -import htsjdk.samtools.cram.io.CramIntArray; -import htsjdk.samtools.cram.io.CramInt; -import htsjdk.samtools.cram.io.ITF8; -import htsjdk.samtools.cram.io.LTF8; +import htsjdk.samtools.cram.io.*; import htsjdk.samtools.cram.ref.ReferenceContext; +import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.samtools.util.RuntimeIOException; import java.io.ByteArrayInputStream; @@ -31,8 +28,18 @@ import java.io.InputStream; import java.io.OutputStream; -class ContainerHeaderIO { - public static Container readContainerHeader(final int major, final InputStream inputStream) { +public class ContainerHeaderIO { + /** + * Reads container header only from an {@link InputStream}. + * + * @param major the CRAM version to assume + * @param inputStream the input stream to read from + * @param containerByteOffset the byte offset from the start of the stream + * @return a new {@link Container} object with container header values filled out but empty body (no slices and blocks). + */ + public static Container readContainerHeader(final int major, + final InputStream inputStream, + final long containerByteOffset) { final byte[] peek = new byte[4]; try { int character = inputStream.read(); @@ -41,7 +48,7 @@ public static Container readContainerHeader(final int major, final InputStream i final byte[] eofMarker = major >= 3 ? CramIO.ZERO_F_EOF_MARKER : CramIO.ZERO_B_EOF_MARKER; try (final ByteArrayInputStream eofBAIS = new ByteArrayInputStream(eofMarker)) { - return readContainerHeader(majorVersionForEOF, eofBAIS); + return readContainerHeader(majorVersionForEOF, eofBAIS, containerByteOffset); } } peek[0] = (byte) character; @@ -70,9 +77,42 @@ public static Container readContainerHeader(final int major, final InputStream i if (major >= 3) container.checksum = CramInt.readInt32(inputStream); + container.byteOffset = containerByteOffset; return container; } + // convenience methods for SeekableStream and CountingInputStream + // TODO: merge these two classes? + + /** + * Reads container header only from a {@link SeekableStream}. + * + * @param major the CRAM version to assume + * @param seekableStream the seekable input stream to read from + * @return a new {@link Container} object with container header values filled out but empty body (no slices and blocks). + */ + public static Container readContainerHeader(final int major, final SeekableStream seekableStream) { + try { + final long containerByteOffset = seekableStream.position(); + return readContainerHeader(major, seekableStream, containerByteOffset); + } + catch (final IOException e) { + throw new RuntimeIOException(e); + } + } + + /** + * Reads container header only from a {@link CountingInputStream}. + * + * @param major the CRAM version to assume + * @param countingStream the counting input stream to read from + * @return a new {@link Container} object with container header values filled out but empty body (no slices and blocks). + */ + public static Container readContainerHeader(final int major, final CountingInputStream countingStream) { + final long containerByteOffset = countingStream.getCount(); + return readContainerHeader(major, countingStream, containerByteOffset); + } + /** * Write CRAM {@link Container} out into the given {@link OutputStream}. * @param major CRAM major version diff --git a/src/main/java/htsjdk/samtools/cram/structure/ContainerIO.java b/src/main/java/htsjdk/samtools/cram/structure/ContainerIO.java index a9697f29df..cd65c7442d 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/ContainerIO.java +++ b/src/main/java/htsjdk/samtools/cram/structure/ContainerIO.java @@ -3,8 +3,10 @@ import htsjdk.samtools.cram.build.CramIO; import htsjdk.samtools.cram.common.CramVersionPolicies; import htsjdk.samtools.cram.common.Version; +import htsjdk.samtools.cram.io.CountingInputStream; import htsjdk.samtools.cram.structure.block.Block; import htsjdk.samtools.cram.structure.block.BlockContentType; +import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.samtools.util.Log; import htsjdk.samtools.util.RuntimeIOException; @@ -19,89 +21,98 @@ public class ContainerIO { private static final Log log = Log.getInstance(ContainerIO.class); /** - * Reads a CRAM container from the input stream. Returns an EOF container when there is no more data or the EOF marker found. + * Reads a CRAM container from an {@link InputStream}. + * Returns an EOF container when there is no more data or the EOF marker found. * * @param version CRAM version to expect - * @param inputStream the stream to read from + * @param inputStream the {@link InputStream} stream to read from + * @param containerByteOffset the byte offset from the start of the stream * @return a new container object read from the stream */ - public static Container readContainer(final Version version, final InputStream inputStream) { - final Container container = readContainer(version.major, inputStream); + private static Container readContainer(final Version version, + final InputStream inputStream, + final long containerByteOffset) { + Container container = readContainerInternal(version.major, inputStream, containerByteOffset); if (container == null) { // this will cause System.exit(1): CramVersionPolicies.eofNotFound(version); - return readContainer(version.major, new ByteArrayInputStream(CramIO.ZERO_B_EOF_MARKER)); + return readContainerInternal(version.major, new ByteArrayInputStream(CramIO.ZERO_B_EOF_MARKER), containerByteOffset); + } + + if (container.isEOF()) { + log.debug("EOF marker found, file/stream is complete."); } - if (container.isEOF()) log.debug("EOF marker found, file/stream is complete."); return container; } + // convenience methods for SeekableStream and CountingInputStream + // TODO: merge these two classes? + /** - * Reads next container from the stream. + * Reads a CRAM container from a Seekable input stream. + * Returns an EOF container when there is no more data or the EOF marker found. * - * @param inputStream the stream to read from - * @return CRAM container or null if no more data + * @param version CRAM version to expect + * @param seekableInputStream the {@link SeekableStream} stream to read from + * @return a new container object read from the stream */ - private static Container readContainer(final int major, final InputStream inputStream) { - return readContainer(major, inputStream, 0, Integer.MAX_VALUE); + public static Container readContainer(final Version version, final SeekableStream seekableInputStream) { + try { + final long containerByteOffset = seekableInputStream.position(); + return readContainer(version, seekableInputStream, containerByteOffset); + } + catch (final IOException e) { + throw new RuntimeIOException(e); + } } /** - * Reads container header only from a {@link InputStream}. + * Reads a CRAM container from a Counting input stream. + * Returns an EOF container when there is no more data or the EOF marker found. * - * @param major the CRAM version to assume - * @param inputStream the input stream to read from - * @return a new {@link Container} object with container header values filled out but empty body (no slices and blocks). + * @param version CRAM version to expect + * @param countingInputStream the {@link CountingInputStream} stream to read from + * @return a new container object read from the stream */ - public static Container readContainerHeader(final int major, final InputStream inputStream) { - return ContainerHeaderIO.readContainerHeader(major, inputStream); + public static Container readContainer(final Version version, final CountingInputStream countingInputStream) { + final long containerByteOffset = countingInputStream.getCount(); + return readContainer(version, countingInputStream, containerByteOffset); } - @SuppressWarnings("SameParameterValue") - private static Container readContainer(final int major, final InputStream inputStream, final int fromSlice, int howManySlices) { + /** + * Reads next container from the stream. + * + * @param major the CRAM version to assume + * @param inputStream the stream to read from + * @param containerByteOffset the byte offset from the start of the stream + * @return CRAM container or null if no more data + */ + private static Container readContainerInternal(final int major, + final InputStream inputStream, + final long containerByteOffset) { - final Container container = readContainerHeader(major, inputStream); + final Container container = ContainerHeaderIO.readContainerHeader(major, inputStream, containerByteOffset); if (container.isEOF()) { return container; } container.compressionHeader = CompressionHeader.read(major, inputStream); - howManySlices = Math.min(container.landmarks.length, howManySlices); - - try { - if (fromSlice > 0) //noinspection ResultOfMethodCallIgnored - inputStream.skip(container.landmarks[fromSlice]); - } catch (final IOException e) { - throw new RuntimeIOException(e); - } - final ArrayList slices = new ArrayList<>(); - for (int sliceCount = fromSlice; sliceCount < howManySlices - fromSlice; sliceCount++) { + for (int sliceCounter = 0; sliceCounter < container.landmarks.length; sliceCounter++) { slices.add(SliceIO.read(major, inputStream)); } - container.populateSlicesAndIndexingParameters(slices); + container.setSlicesAndByteOffset(slices, containerByteOffset); + container.distributeIndexingParametersToSlices(); log.debug("READ CONTAINER: " + container.toString()); return container; } - /** - * Writes a {@link Container} header information to a {@link OutputStream}. - * - * @param major the CRAM version to assume - * @param container the container holding the header to write - * @param outputStream the stream to write to - * @return the number of bytes written - */ - public static int writeContainerHeader(final int major, final Container container, final OutputStream outputStream) { - return ContainerHeaderIO.writeContainerHeader(major, container, outputStream); - } - /** * Writes a complete {@link Container} with it's header to a {@link OutputStream}. The method is aware of file header containers and is * suitable for general purpose use: basically any container is allowed. @@ -139,8 +150,7 @@ public static int writeContainer(final Version version, final Container containe container.blockCount = 1; final List landmarks = new ArrayList<>(); - for (int i = 0; i < container.slices.length; i++) { - final Slice slice = container.slices[i]; + for (final Slice slice : container.getSlices()) { landmarks.add(byteArrayOutputStream.size()); SliceIO.write(version.major, slice, byteArrayOutputStream); container.blockCount++; @@ -148,12 +158,12 @@ public static int writeContainer(final Version version, final Container containe if (slice.embeddedRefBlock != null) container.blockCount++; container.blockCount += slice.external.size(); } - container.landmarks = new int[landmarks.size()]; - for (int i = 0; i < container.landmarks.length; i++) - container.landmarks[i] = landmarks.get(i); - + container.landmarks = landmarks.stream().mapToInt(Integer::intValue).toArray(); container.containerByteSize = byteArrayOutputStream.size(); + // Slices require the Container's landmarks and containerByteSize before indexing + container.distributeIndexingParametersToSlices(); + int length = ContainerHeaderIO.writeContainerHeader(version.major, container, outputStream); try { outputStream.write(byteArrayOutputStream.toByteArray(), 0, byteArrayOutputStream.size()); diff --git a/src/main/java/htsjdk/samtools/cram/structure/Slice.java b/src/main/java/htsjdk/samtools/cram/structure/Slice.java index 760d2e23bb..8cb8ca15dd 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/Slice.java +++ b/src/main/java/htsjdk/samtools/cram/structure/Slice.java @@ -75,16 +75,22 @@ public class Slice { // for indexing purposes + public static final int UNINITIALIZED_INDEXING_PARAMETER = -1; + // the Slice's offset in bytes from the beginning of its Container // equal to Container.landmarks[Slice.index] of its enclosing Container - public int offset = -1; + // BAI and CRAI + public int byteOffsetFromContainer = UNINITIALIZED_INDEXING_PARAMETER; // this Slice's Container's offset in bytes from the beginning of the stream - // equal to Container.offset of its enclosing Container - public long containerOffset = -1; + // equal to Container.byteOffset of its enclosing Container + // BAI and CRAI + public long containerByteOffset = UNINITIALIZED_INDEXING_PARAMETER; // this Slice's size in bytes - public int size = -1; - // this Slice's index within its Container - public int index = -1; + // CRAI only + public int byteSize = UNINITIALIZED_INDEXING_PARAMETER; + // this Slice's index number within its Container + // BAI only + public int index = UNINITIALIZED_INDEXING_PARAMETER; // to pass this to the container: public long bases; @@ -113,6 +119,56 @@ public ReferenceContext getReferenceContext() { return referenceContext; } + /** + * Confirm that we have initialized the 3 BAI index parameters: + * byteOffsetFromContainer, containerByteOffset, and index + */ + public void baiIndexInitializationCheck() { + final StringBuilder error = new StringBuilder(); + + if (byteOffsetFromContainer == UNINITIALIZED_INDEXING_PARAMETER) { + error.append("Cannot index this Slice for BAI because its byteOffsetFromContainer is unknown.").append(System.lineSeparator()); + } + + if (containerByteOffset == UNINITIALIZED_INDEXING_PARAMETER) { + error.append("Cannot index this Slice for BAI because its containerByteOffset is unknown.").append(System.lineSeparator()); + } + + if (index == UNINITIALIZED_INDEXING_PARAMETER) { + error.append("Cannot index this Slice for BAI because its index is unknown.").append(System.lineSeparator()); + } + + if (error.length() > 0) { + throw new CRAMException(error.toString()); + } + } + + /** + * Confirm that we have initialized the 3 CRAI index parameters: + * byteOffsetFromContainer, containerByteOffset, and byteSize + * + * NOTE: this is currently unused because we always use BAI + */ + void craiIndexInitializationCheck() { + final StringBuilder error = new StringBuilder(); + + if (byteOffsetFromContainer == UNINITIALIZED_INDEXING_PARAMETER) { + error.append("Cannot index this Slice for CRAI because its byteOffsetFromContainer is unknown.").append(System.lineSeparator()); + } + + if (containerByteOffset == UNINITIALIZED_INDEXING_PARAMETER) { + error.append("Cannot index this Slice for CRAI because its containerByteOffset is unknown.").append(System.lineSeparator()); + } + + if (byteSize == UNINITIALIZED_INDEXING_PARAMETER) { + error.append("Cannot index this Slice for CRAI because its byteSize is unknown.").append(System.lineSeparator()); + } + + if (error.length() > 0) { + throw new CRAMException(error.toString()); + } + } + private void alignmentBordersSanityCheck(final byte[] ref) { if (referenceContext.isUnmappedUnplaced()) { return; @@ -327,7 +383,7 @@ public CramRecordReader createCramRecordReader(final CompressionHeader header, * @param validationStringency how strict to be when reading CRAM records */ public Map getMultiRefAlignmentSpans(final CompressionHeader header, - final ValidationStringency validationStringency) { + final ValidationStringency validationStringency) { final MultiRefSliceAlignmentSpanReader reader = new MultiRefSliceAlignmentSpanReader(getCoreBlockInputStream(), getExternalBlockInputMap(), header, @@ -337,35 +393,20 @@ public Map getMultiRefAlignmentSpans(final Comp return reader.getReferenceSpans(); } - /** - * Generate a CRAI Index entry from this Slice - * @return a new CRAI Index Entry derived from this Slice - */ - public CRAIEntry getCRAIEntry() { - return new CRAIEntry(referenceContext.getSerializableId(), alignmentStart, alignmentSpan, containerOffset, offset, size); - } - /** * Generate a CRAI Index entry from this Slice and other container parameters, * splitting Multiple Reference slices into constituent reference sequence entries. * - * TODO: investigate why we sometimes need to pass in an external containerStartByteOffset - * because this Slice's containerOffset is incorrect - * - * TODO: investigate why we sometimes split multi-ref and sometimes do not - * * @param compressionHeader the enclosing {@link Container}'s CompressionHeader - * @param landmarks the enclosing {@link Container}'s landmarks - * @param containerStartByteOffset the byte offset of the enclosing {@link Container} * @return a list of CRAI Index Entries derived from this Slice */ - public List getCRAIEntriesSplittingMultiRef(final CompressionHeader compressionHeader, - final int[] landmarks, - final long containerStartByteOffset) { + public List getCRAIEntries(final CompressionHeader compressionHeader) { if (! compressionHeader.isCoordinateSorted()) { throw new CRAMException("Cannot construct index if the CRAM is not Coordinate Sorted"); } + craiIndexInitializationCheck(); + if (referenceContext.isMultiRef()) { final Map spans = getMultiRefAlignmentSpans(compressionHeader, ValidationStringency.DEFAULT_STRINGENCY); @@ -373,15 +414,21 @@ public List getCRAIEntriesSplittingMultiRef(final CompressionHeader c .map(e -> new CRAIEntry(e.getKey().getSerializableId(), e.getValue().getStart(), e.getValue().getSpan(), - containerStartByteOffset, - landmarks[index], - size)) + containerByteOffset, + byteOffsetFromContainer, + byteSize)) .sorted() .collect(Collectors.toList()); } else { // single ref or unmapped final int sequenceId = referenceContext.getSerializableId(); - return Collections.singletonList(new CRAIEntry(sequenceId, alignmentStart, alignmentSpan, containerStartByteOffset, offset, size)); + return Collections.singletonList(new CRAIEntry( + sequenceId, + alignmentStart, + alignmentSpan, + containerByteOffset, + byteOffsetFromContainer, + byteSize)); } } @@ -474,18 +521,15 @@ private static Slice initializeFromRecords(final Collection records = CRAMStructureTestUtil.getMultiRefRecordsWithOneUnmapped(RECORDS_PER_SLICE * 2); - final Container container1 = FACTORY.buildContainer(records.subList(0, RECORDS_PER_SLICE)); + final long dummyByteOffset = 0; + final Container container1 = FACTORY.buildContainer(records.subList(0, RECORDS_PER_SLICE), dummyByteOffset); Assert.assertTrue(container1.getReferenceContext().isMultiRef()); - final Container container2 = FACTORY.buildContainer(records.subList(RECORDS_PER_SLICE, RECORDS_PER_SLICE * 2)); + final Container container2 = FACTORY.buildContainer(records.subList(RECORDS_PER_SLICE, RECORDS_PER_SLICE * 2), dummyByteOffset); Assert.assertTrue(container2.getReferenceContext().isMultiRef()); final AbstractBAMFileIndex index = getAbstractBAMFileIndex(indexMethod.index(container1, container2)); @@ -143,8 +177,9 @@ public void test_processUnplacedContainersAsSlices() { unplacedContainers(this::indexContainersAsSingleRefSlices); } - public void unplacedContainers(final IndexContainers indexMethod) { - final Container unplacedContainer = FACTORY.buildContainer(CRAMStructureTestUtil.getUnplacedRecords(RECORDS_PER_SLICE)); + private void unplacedContainers(final IndexContainers indexMethod) { + final long dummyByteOffset = 0; + final Container unplacedContainer = FACTORY.buildContainer(CRAMStructureTestUtil.getUnplacedRecords(RECORDS_PER_SLICE), dummyByteOffset); Assert.assertTrue(unplacedContainer.getReferenceContext().isUnmappedUnplaced()); // these two sets of records are "half" unplaced: they have either a valid reference index or start position, @@ -153,12 +188,16 @@ public void unplacedContainers(final IndexContainers indexMethod) { // these will be considered unplaced by CRAMBAIIndexer - final Container halfUnplacedNoStartContainer = FACTORY.buildContainer(CRAMStructureTestUtil.getHalfUnplacedNoStartRecords(RECORDS_PER_SLICE, 0)); + final Container halfUnplacedNoStartContainer = FACTORY.buildContainer( + CRAMStructureTestUtil.getHalfUnplacedNoStartRecords(RECORDS_PER_SLICE, 0), + dummyByteOffset); Assert.assertTrue(halfUnplacedNoStartContainer.getReferenceContext().isUnmappedUnplaced()); // these will NOT be considered unplaced by CRAMBAIIndexer - final Container halfUnplacedNoRefContainer = FACTORY.buildContainer(CRAMStructureTestUtil.getHalfUnplacedNoRefRecords(RECORDS_PER_SLICE)); + final Container halfUnplacedNoRefContainer = FACTORY.buildContainer( + CRAMStructureTestUtil.getHalfUnplacedNoRefRecords(RECORDS_PER_SLICE), + dummyByteOffset); Assert.assertTrue(halfUnplacedNoRefContainer.getReferenceContext().isUnmappedUnplaced()); final AbstractBAMFileIndex index = getAbstractBAMFileIndex(indexMethod.index( @@ -177,52 +216,59 @@ public void testRequireCoordinateSortOrder() { new CRAMBAIIndexer(new ByteArrayOutputStream(), header); } + private Slice getSlice(final ReferenceContext refContext, + final int mappedReadsCount, + final int unmappedReadsCount, + final int unplacedReadsCount) { + final Slice mapped = new Slice(refContext); + mapped.mappedReadsCount = mappedReadsCount; + mapped.unmappedReadsCount = unmappedReadsCount; + mapped.unplacedReadsCount = unplacedReadsCount; + // arbitrary - need these for indexing + mapped.byteOffsetFromContainer = 789; + mapped.containerByteOffset = 123; + mapped.index = 456; + return mapped; + } + private interface IndexContainers { byte[] index(final Container... containers); } private byte[] indexSingleRefSlice(final Slice slice) { - byte[] indexBytes; - try (final ByteArrayOutputStream indexBAOS = new ByteArrayOutputStream()) { - - final CRAMBAIIndexer indexer = new CRAMBAIIndexer(indexBAOS, SAM_FILE_HEADER); + return getIndexerOutput(indexer -> { indexer.processAsSingleReferenceSlice(slice); - indexer.finish(); - - indexBytes = indexBAOS.toByteArray(); - } - catch (final IOException e) { - throw new RuntimeIOException(e); - } - return indexBytes; + }); } private byte[] indexContainers(final Container... containers) { - byte[] indexBytes; - try (final ByteArrayOutputStream indexBAOS = new ByteArrayOutputStream()) { - - final CRAMBAIIndexer indexer = new CRAMBAIIndexer(indexBAOS, SAM_FILE_HEADER); + return getIndexerOutput(indexer -> { for (final Container container : containers) { + // this sets up the Container's landmarks, required for indexing + // readContainer() also does this + ContainerIO.writeContainer(CramVersions.DEFAULT_CRAM_VERSION, container, new ByteArrayOutputStream()); indexer.processContainer(container, ValidationStringency.STRICT); } - indexer.finish(); - - indexBytes = indexBAOS.toByteArray(); - } - catch (final IOException e) { - throw new RuntimeIOException(e); - } - return indexBytes; + }); } private byte[] indexContainersAsSingleRefSlices(final Container... containers) { + return getIndexerOutput(indexer -> { + for (final Container container : containers) { + // this sets up the Container's landmarks, required for indexing + // readContainer() also does this + ContainerIO.writeContainer(CramVersions.DEFAULT_CRAM_VERSION, container, new ByteArrayOutputStream()); + indexer.processAsSingleReferenceSlice(container.getSlices()[0]); + } + }); + } + + private byte[] getIndexerOutput(final Consumer indexerFunction) { byte[] indexBytes; try (final ByteArrayOutputStream indexBAOS = new ByteArrayOutputStream()) { final CRAMBAIIndexer indexer = new CRAMBAIIndexer(indexBAOS, SAM_FILE_HEADER); - for (final Container container : containers) { - indexer.processAsSingleReferenceSlice(container.slices[0]); - } + indexerFunction.accept(indexer); indexer.finish(); indexBytes = indexBAOS.toByteArray(); diff --git a/src/test/java/htsjdk/samtools/CRAMSliceMD5Test.java b/src/test/java/htsjdk/samtools/CRAMSliceMD5Test.java index 4833ac613d..39569cf5a6 100644 --- a/src/test/java/htsjdk/samtools/CRAMSliceMD5Test.java +++ b/src/test/java/htsjdk/samtools/CRAMSliceMD5Test.java @@ -3,6 +3,7 @@ import htsjdk.HtsjdkTest; import htsjdk.samtools.cram.CRAMException; import htsjdk.samtools.cram.build.CramIO; +import htsjdk.samtools.cram.io.CountingInputStream; import htsjdk.samtools.cram.ref.CRAMReferenceSource; import htsjdk.samtools.cram.ref.ReferenceSource; import htsjdk.samtools.cram.structure.Container; @@ -30,10 +31,14 @@ public void testSliceMD5() throws IOException { final CramTestCase test = new CramTestCase(); // read the CRAM: - final ByteArrayInputStream bais = new ByteArrayInputStream(test.cramData); - final CramHeader cramHeader = CramIO.readCramHeader(bais); - final Container container = ContainerIO.readContainer(cramHeader.getVersion(), bais); - final Slice slice = container.slices[0]; + Container container; + try (final ByteArrayInputStream bais = new ByteArrayInputStream(test.cramData); + final CountingInputStream inputStream = new CountingInputStream(bais)) { + final CramHeader cramHeader = CramIO.readCramHeader(inputStream); + container = ContainerIO.readContainer(cramHeader.getVersion(), inputStream); + } + + final Slice slice = container.getSlices()[0]; Assert.assertEquals(slice.alignmentStart, 1); Assert.assertEquals(slice.alignmentSpan, test.referenceBases.length); // check the slice MD5 is the MD5 of upper-cased ref bases: diff --git a/src/test/java/htsjdk/samtools/CramContainerHeaderIteratorTest.java b/src/test/java/htsjdk/samtools/CramContainerHeaderIteratorTest.java index 55cedc2c03..ec876b141c 100644 --- a/src/test/java/htsjdk/samtools/CramContainerHeaderIteratorTest.java +++ b/src/test/java/htsjdk/samtools/CramContainerHeaderIteratorTest.java @@ -48,14 +48,14 @@ public void test() throws IOException { Assert.assertEquals(headerOnlyContainer.blockCount, fullContainer.blockCount); Assert.assertEquals(headerOnlyContainer.landmarks, fullContainer.landmarks); Assert.assertEquals(headerOnlyContainer.checksum, fullContainer.checksum); - Assert.assertEquals(headerOnlyContainer.offset, fullContainer.offset); + Assert.assertEquals(headerOnlyContainer.byteOffset, fullContainer.byteOffset); // unpopulated fields Assert.assertNull(headerOnlyContainer.blocks); Assert.assertNull(headerOnlyContainer.compressionHeader); - Assert.assertNull(headerOnlyContainer.slices); + Assert.assertNull(headerOnlyContainer.getSlices()); // try to read a container from the offset to check it's correct try (SeekableFileStream seekableFileStream = new SeekableFileStream(cramFile)) { - seekableFileStream.seek(headerOnlyContainer.offset); + seekableFileStream.seek(headerOnlyContainer.byteOffset); Container container = ContainerIO.readContainer(actualHeader.getVersion(), seekableFileStream); Assert.assertEquals(container.alignmentStart, fullContainer.alignmentStart); Assert.assertEquals(container.alignmentSpan, fullContainer.alignmentSpan); diff --git a/src/test/java/htsjdk/samtools/cram/CRAIEntryTest.java b/src/test/java/htsjdk/samtools/cram/CRAIEntryTest.java index a49f9cc71f..223f9b4ada 100644 --- a/src/test/java/htsjdk/samtools/cram/CRAIEntryTest.java +++ b/src/test/java/htsjdk/samtools/cram/CRAIEntryTest.java @@ -3,6 +3,7 @@ import htsjdk.samtools.cram.build.CompressionHeaderFactory; import htsjdk.samtools.cram.ref.ReferenceContext; import htsjdk.samtools.cram.structure.*; +import htsjdk.samtools.util.TestUtil; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -13,13 +14,24 @@ * Created by vadim on 25/08/2015. */ public class CRAIEntryTest extends CramRecordTestHelper { + private static final Random RANDOM = new Random(TestUtil.RANDOM_SEED); + + private static final CompressionHeader COMPRESSION_HEADER = + new CompressionHeaderFactory().build(Collections.EMPTY_LIST, null, true); + + @Test(dataProvider = "uninitializedCRAIParameterTestCases", dataProviderClass = CRAMStructureTestUtil.class, expectedExceptions = CRAMException.class) + public void uninitializedSliceParameterTest(final Slice s) { + s.getCRAIEntries(COMPRESSION_HEADER); + } @Test public void singleRefTestGetCRAIEntries() { - final Slice slice1 = createSingleRefSlice(0); - final Slice slice2 = createSingleRefSlice(0); + final ReferenceContext refContext = new ReferenceContext(0); + final Slice slice1 = createSliceWithArbitraryValues(refContext); + final Slice slice2 = createSliceWithArbitraryValues(refContext); - final Container container = Container.initializeFromSlices(Arrays.asList(slice1, slice2)); + final long containerByteOffset = 12345; + final Container container = Container.initializeFromSlices(Arrays.asList(slice1, slice2), COMPRESSION_HEADER, containerByteOffset); final List entries = container.getCRAIEntries(); Assert.assertNotNull(entries); @@ -35,31 +47,18 @@ public void multiRefExceptionTest() { new CRAIEntry(ReferenceContext.MULTIPLE_REFERENCE_ID, dummy, dummy, dummy, dummy, dummy); } - @Test(expectedExceptions = CRAMException.class) - public void multiRefTestGetCRAIEntries() { - final Slice multi = new Slice(ReferenceContext.MULTIPLE_REFERENCE_CONTEXT); - - final Container container = Container.initializeFromSlices(Arrays.asList(multi)); - container.getCRAIEntries(); - } - - // requirement for getCRAIEntriesSplittingMultiRef(): - // a Container built from Slices which were in turn built from records - - // TODO: clearly identify and enforce preconditions - @Test - public void testGetCRAIEntriesSplittingMultiRef() { - final int[] landmarks = new int[] { 100, 200 }; + public void multiRefTestGetCRAIEntries() { + final int sliceAlnSpan = CRAMStructureTestUtil.READ_LENGTH_FOR_TEST_RECORDS; + final int sliceByteSize = 500; - // the indices of the above landmarks array final int slice1Index = 0; - final int slice2Index = 1; - final int slice1AlnStartOffset = 10; + final int slice1ByteOffsetFromContainer = 750; + + final int slice2Index = 1; final int slice2AlnStartOffset = 20; - final int sliceAlnSpan = CRAMStructureTestUtil.READ_LENGTH_FOR_TEST_RECORDS; - final int sliceByteSize = 500; + final int slice2ByteOffsetFromContainer = slice1ByteOffsetFromContainer + sliceByteSize; final int containerOffset = 1000; @@ -80,31 +79,30 @@ public void testGetCRAIEntriesSplittingMultiRef() { final Slice slice2 = Slice.buildSlice(records2, compressionHeader); slice1.index = slice1Index; - slice1.size = sliceByteSize; + slice1.byteSize = sliceByteSize; + slice1.byteOffsetFromContainer = slice1ByteOffsetFromContainer; slice2.index = slice2Index; - slice2.size = sliceByteSize; + slice2.byteSize = sliceByteSize; + slice2.byteOffsetFromContainer = slice2ByteOffsetFromContainer; - final Container container = Container.initializeFromSlices(Arrays.asList(slice1, slice2)); - container.compressionHeader = compressionHeader; - container.landmarks = landmarks; - container.offset = containerOffset; + final Container container = Container.initializeFromSlices(Arrays.asList(slice1, slice2), compressionHeader, containerOffset); - final List entries = container.getCRAIEntriesSplittingMultiRef(); + final List entries = container.getCRAIEntries(); Assert.assertNotNull(entries); Assert.assertEquals(entries.size(), 6); // slice 1 has index entries for refs 0, 1, 2 - assertEntryForSlice(entries.get(0), 0, slice1AlnStartOffset, sliceAlnSpan, containerOffset, landmarks[0], sliceByteSize); - assertEntryForSlice(entries.get(1), 1, slice1AlnStartOffset + 1, sliceAlnSpan, containerOffset, landmarks[0], sliceByteSize); - assertEntryForSlice(entries.get(2), 2, slice1AlnStartOffset + 2, sliceAlnSpan, containerOffset, landmarks[0], sliceByteSize); + assertEntryForSlice(entries.get(0), 0, slice1AlnStartOffset, sliceAlnSpan, containerOffset, slice1ByteOffsetFromContainer, sliceByteSize); + assertEntryForSlice(entries.get(1), 1, slice1AlnStartOffset + 1, sliceAlnSpan, containerOffset, slice1ByteOffsetFromContainer, sliceByteSize); + assertEntryForSlice(entries.get(2), 2, slice1AlnStartOffset + 2, sliceAlnSpan, containerOffset, slice1ByteOffsetFromContainer, sliceByteSize); // slice 2 has index entries for refs 2, 3, 4 - assertEntryForSlice(entries.get(3), 2, slice2AlnStartOffset + 3, sliceAlnSpan, containerOffset, landmarks[1], sliceByteSize); - assertEntryForSlice(entries.get(4), 3, slice2AlnStartOffset + 4, sliceAlnSpan, containerOffset, landmarks[1], sliceByteSize); - assertEntryForSlice(entries.get(5), 4, slice2AlnStartOffset + 5, sliceAlnSpan, containerOffset, landmarks[1], sliceByteSize); + assertEntryForSlice(entries.get(3), 2, slice2AlnStartOffset + 3, sliceAlnSpan, containerOffset, slice2ByteOffsetFromContainer, sliceByteSize); + assertEntryForSlice(entries.get(4), 3, slice2AlnStartOffset + 4, sliceAlnSpan, containerOffset, slice2ByteOffsetFromContainer, sliceByteSize); + assertEntryForSlice(entries.get(5), 4, slice2AlnStartOffset + 5, sliceAlnSpan, containerOffset, slice2ByteOffsetFromContainer, sliceByteSize); } @Test @@ -264,15 +262,15 @@ public void testCompareTo() { Assert.assertEquals(testEntries, expected); } - private static Slice createSingleRefSlice(final int sequenceId) { - int counter = sequenceId; + private static Slice createSliceWithArbitraryValues(final ReferenceContext refContext) { + int counter = RANDOM.nextInt(100); - final Slice single = new Slice(new ReferenceContext(sequenceId)); + final Slice single = new Slice(refContext); single.alignmentStart = counter++; single.alignmentSpan = counter++; - single.containerOffset = counter++; - single.offset = counter++; - single.size = counter++; + single.containerByteOffset = counter++; + single.byteOffsetFromContainer = counter++; + single.byteSize = counter++; return single; } @@ -290,7 +288,7 @@ private List createMultiRefRecords(final int indexStart, private void assertEntryForSlice(final CRAIEntry entry, final Slice slice) { assertEntryForSlice(entry, slice.getReferenceContext().getSerializableId(), - slice.alignmentStart, slice.alignmentSpan, slice.containerOffset, slice.offset, slice.size); + slice.alignmentStart, slice.alignmentSpan, slice.containerByteOffset, slice.byteOffsetFromContainer, slice.byteSize); } private void assertEntryForSlice(final CRAIEntry entry, diff --git a/src/test/java/htsjdk/samtools/cram/VersionTest.java b/src/test/java/htsjdk/samtools/cram/VersionTest.java index c58f22b162..c74e24e425 100644 --- a/src/test/java/htsjdk/samtools/cram/VersionTest.java +++ b/src/test/java/htsjdk/samtools/cram/VersionTest.java @@ -10,6 +10,7 @@ import htsjdk.samtools.cram.io.CramInt; import htsjdk.samtools.cram.io.InputStreamUtils; import htsjdk.samtools.cram.ref.ReferenceSource; +import htsjdk.samtools.cram.structure.ContainerHeaderIO; import htsjdk.samtools.cram.structure.block.Block; import htsjdk.samtools.cram.structure.Container; import htsjdk.samtools.cram.structure.ContainerIO; @@ -74,7 +75,7 @@ public void test_V3() throws IOException { // position stream at the start of the 1st container: cramSeekableStream.seek(containerStart); // read only container header: - ContainerIO.readContainerHeader(version.major, cramSeekableStream); + ContainerHeaderIO.readContainerHeader(version.major, cramSeekableStream, containerStart); // read the following 4 bytes of CRC32: int crcByteSize = 4; diff --git a/src/test/java/htsjdk/samtools/cram/build/ContainerFactoryTest.java b/src/test/java/htsjdk/samtools/cram/build/ContainerFactoryTest.java index dcf6f66f5e..2d68abd77e 100644 --- a/src/test/java/htsjdk/samtools/cram/build/ContainerFactoryTest.java +++ b/src/test/java/htsjdk/samtools/cram/build/ContainerFactoryTest.java @@ -27,22 +27,23 @@ public void recordsPerSliceTest() { // build a container with the max records per slice final List records = CRAMStructureTestUtil.getSingleRefRecords(recordsPerSlice, 0); - final Container container = factory.buildContainer(records); + final long dummyByteOffset = 0; + final Container container = factory.buildContainer(records, dummyByteOffset); Assert.assertEquals(container.nofRecords, recordsPerSlice); - Assert.assertEquals(container.slices.length, 1); - Assert.assertEquals(container.slices[0].nofRecords, recordsPerSlice); + Assert.assertEquals(container.getSlices().length, 1); + Assert.assertEquals(container.getSlices()[0].nofRecords, recordsPerSlice); // build a container with 1 too many records to fit into a slice // 2 slices: recordsPerSlice records and 1 record records.add(CRAMStructureTestUtil.createMappedRecord(recordsPerSlice, 0, 1)); - final Container container2 = factory.buildContainer(records); + final Container container2 = factory.buildContainer(records, dummyByteOffset); Assert.assertEquals(container2.nofRecords, recordsPerSlice + 1); - Assert.assertEquals(container2.slices.length, 2); - Assert.assertEquals(container2.slices[0].nofRecords, recordsPerSlice); - Assert.assertEquals(container2.slices[1].nofRecords, 1); + Assert.assertEquals(container2.getSlices().length, 2); + Assert.assertEquals(container2.getSlices()[0].nofRecords, recordsPerSlice); + Assert.assertEquals(container2.getSlices()[1].nofRecords, 1); } @DataProvider(name = "containerStateTests") @@ -103,18 +104,21 @@ public void testContainerState(final List records, final ReferenceContext expectedReferenceContext, final int expectedAlignmentStart, final int expectedAlignmentSpan) { - final Container container = buildFromNewFactory(records); + final ContainerFactory factory = new ContainerFactory(CRAMStructureTestUtil.getSAMFileHeaderForTests(), TEST_RECORD_COUNT); + final long byteOffset = 9999; + final Container container = factory.buildContainer(records, byteOffset); final int globalRecordCounter = 0; // first Container final int baseCount = TEST_RECORD_COUNT * READ_LENGTH_FOR_TEST_RECORDS; CRAMStructureTestUtil.assertContainerState(container, expectedReferenceContext, expectedAlignmentStart, expectedAlignmentSpan, - TEST_RECORD_COUNT, baseCount, globalRecordCounter); + TEST_RECORD_COUNT, baseCount, globalRecordCounter, byteOffset); } @Test public void testMultiRefWithStateTransitions() { - final List containers = CRAMStructureTestUtil.getMultiRefContainersForStateTest(); + final long firstContainerByteOffset = 737735342; + final List containers = CRAMStructureTestUtil.getMultiRefContainersForStateTest(firstContainerByteOffset); // first container is single-ref @@ -124,25 +128,21 @@ public void testMultiRefWithStateTransitions() { int recordCount = 1; int globalRecordCount = 0; // first container - no records yet CRAMStructureTestUtil.assertContainerState(containers.get(0), refContext, alignmentStart, alignmentSpan, - recordCount, READ_LENGTH_FOR_TEST_RECORDS * recordCount, globalRecordCount); + recordCount, READ_LENGTH_FOR_TEST_RECORDS * recordCount, globalRecordCount, + firstContainerByteOffset); // when other refs are added, subsequent containers are multiref recordCount++; // this container has 2 records globalRecordCount = containers.get(0).nofRecords; // we've seen 1 record before this container CRAMStructureTestUtil.assertContainerState(containers.get(1), ReferenceContext.MULTIPLE_REFERENCE_CONTEXT, - Slice.NO_ALIGNMENT_START, Slice.NO_ALIGNMENT_SPAN, - recordCount, READ_LENGTH_FOR_TEST_RECORDS * recordCount, globalRecordCount); + Slice.NO_ALIGNMENT_START, Slice.NO_ALIGNMENT_SPAN, recordCount, + READ_LENGTH_FOR_TEST_RECORDS * recordCount, globalRecordCount, firstContainerByteOffset + 1); recordCount++; // this container has 3 records globalRecordCount = containers.get(0).nofRecords + containers.get(1).nofRecords; // we've seen 3 records before this container CRAMStructureTestUtil.assertContainerState(containers.get(2), ReferenceContext.MULTIPLE_REFERENCE_CONTEXT, - Slice.NO_ALIGNMENT_START, Slice.NO_ALIGNMENT_SPAN, - recordCount, READ_LENGTH_FOR_TEST_RECORDS * recordCount, globalRecordCount); - } - - private Container buildFromNewFactory(final List records) { - final ContainerFactory factory = new ContainerFactory(CRAMStructureTestUtil.getSAMFileHeaderForTests(), TEST_RECORD_COUNT); - return factory.buildContainer(records); + Slice.NO_ALIGNMENT_START, Slice.NO_ALIGNMENT_SPAN, recordCount, + READ_LENGTH_FOR_TEST_RECORDS * recordCount, globalRecordCount, firstContainerByteOffset + 2); } } diff --git a/src/test/java/htsjdk/samtools/cram/build/ContainerParserTest.java b/src/test/java/htsjdk/samtools/cram/build/ContainerParserTest.java index fd39b71f26..a6122c367a 100644 --- a/src/test/java/htsjdk/samtools/cram/build/ContainerParserTest.java +++ b/src/test/java/htsjdk/samtools/cram/build/ContainerParserTest.java @@ -2,6 +2,7 @@ import htsjdk.HtsjdkTest; import htsjdk.samtools.ValidationStringency; +import htsjdk.samtools.cram.io.CountingInputStream; import htsjdk.samtools.cram.structure.CRAMStructureTestUtil; import htsjdk.samtools.cram.common.CramVersions; import htsjdk.samtools.cram.common.Version; @@ -36,9 +37,15 @@ private Object[][] cramVersions() { @Test(dataProvider = "cramVersions") public void testEOF(final Version version) throws IOException { + byte[] eofBytes; try (final ByteArrayOutputStream baos = new ByteArrayOutputStream()) { CramIO.issueEOF(version, baos); - final Container container = ContainerIO.readContainer(version, new ByteArrayInputStream(baos.toByteArray())); + eofBytes = baos.toByteArray(); + } + + try (final ByteArrayInputStream bais = new ByteArrayInputStream(eofBytes); + final CountingInputStream inputStream = new CountingInputStream(bais)) { + final Container container = ContainerIO.readContainer(version, inputStream); Assert.assertTrue(container.isEOF()); Assert.assertTrue(PARSER.getRecords(container, null, ValidationStringency.STRICT).isEmpty()); } @@ -77,7 +84,8 @@ private Object[][] getRecordsTestCases() { @Test(dataProvider = "getRecordsTestCases") public void getRecordsTest(final List records) { - final Container container = FACTORY.buildContainer(records); + final long dummyByteOffset = 0; + final Container container = FACTORY.buildContainer(records, dummyByteOffset); final List roundTripRecords = PARSER.getRecords(container, null, ValidationStringency.STRICT); // TODO this fails. return to this when refactoring Container and CramCompressionRecord @@ -96,7 +104,8 @@ public void testMultirefContainer() { } } - final Container container = FACTORY.buildContainer(CRAMStructureTestUtil.getMultiRefRecords(TEST_RECORD_COUNT)); + final long dummyByteOffset = 0; + final Container container = FACTORY.buildContainer(CRAMStructureTestUtil.getMultiRefRecords(TEST_RECORD_COUNT), dummyByteOffset); final Map spanMap = container.getSpans(ValidationStringency.STRICT); Assert.assertEquals(spanMap, expectedSpans); @@ -108,7 +117,8 @@ public void testMultirefContainerWithUnmapped() { expectedSpans.add(new AlignmentSpan(1, READ_LENGTH_FOR_TEST_RECORDS, 1, 0)); expectedSpans.add(new AlignmentSpan(2, READ_LENGTH_FOR_TEST_RECORDS, 1, 0)); - final List containers = CRAMStructureTestUtil.getMultiRefContainersForStateTest(); + final long firstContainerByteOffset = 999; + final List containers = CRAMStructureTestUtil.getMultiRefContainersForStateTest(firstContainerByteOffset); // first container is single-ref diff --git a/src/test/java/htsjdk/samtools/cram/structure/CRAMStructureTestUtil.java b/src/test/java/htsjdk/samtools/cram/structure/CRAMStructureTestUtil.java index bef39a8c04..baf5e784db 100644 --- a/src/test/java/htsjdk/samtools/cram/structure/CRAMStructureTestUtil.java +++ b/src/test/java/htsjdk/samtools/cram/structure/CRAMStructureTestUtil.java @@ -1,5 +1,6 @@ package htsjdk.samtools.cram.structure; +import htsjdk.HtsjdkTest; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMSequenceDictionary; @@ -7,12 +8,13 @@ import htsjdk.samtools.cram.build.ContainerFactory; import htsjdk.samtools.cram.ref.ReferenceContext; import org.testng.Assert; +import org.testng.annotations.DataProvider; import java.util.ArrayList; import java.util.Collections; import java.util.List; -public class CRAMStructureTestUtil { +public class CRAMStructureTestUtil extends HtsjdkTest { public static final int READ_LENGTH_FOR_TEST_RECORDS = 123; private static final SAMFileHeader header = initializeSAMFileHeaderForTests(); @@ -141,7 +143,7 @@ public static List getMultiRefRecordsWithOneUnmapped(fina return retval; } - public static List getMultiRefContainersForStateTest() { + public static List getMultiRefContainersForStateTest(final long firstContainerByteOffset) { final ContainerFactory factory = new ContainerFactory(getSAMFileHeaderForTests(), 10); final List testContainers = new ArrayList<>(3); @@ -149,15 +151,15 @@ public static List getMultiRefContainersForStateTest() { int index = 0; records.add(createMappedRecord(index, index, index + 1)); - final Container container0 = factory.buildContainer(records); + final Container container0 = factory.buildContainer(records, firstContainerByteOffset); index++; records.add(createMappedRecord(index, index, index + 1)); - final Container container1 = factory.buildContainer(records); + final Container container1 = factory.buildContainer(records, firstContainerByteOffset + 1); index++; records.add(createUnmappedUnplacedRecord(index)); - final Container container2 = factory.buildContainer(records); + final Container container2 = factory.buildContainer(records, firstContainerByteOffset + 2); testContainers.add(container0); testContainers.add(container1); @@ -165,6 +167,62 @@ public static List getMultiRefContainersForStateTest() { return testContainers; } + private static Slice getIndexInitializedSlice() { + final ReferenceContext refContext = new ReferenceContext(0); + + final Slice slice = new Slice(refContext); + slice.byteOffsetFromContainer = 1; + slice.containerByteOffset = 1; + slice.byteSize = 1; + slice.index = 1; + + return slice; + } + + private static Slice getNoContainerOffsetSlice() { + final Slice noContainerOffset = getIndexInitializedSlice(); + noContainerOffset.containerByteOffset = Slice.UNINITIALIZED_INDEXING_PARAMETER; + return noContainerOffset; + } + + private static Slice getNoOffsetFromContainerSlice() { + final Slice noOffsetFromContainer = getIndexInitializedSlice(); + noOffsetFromContainer.byteOffsetFromContainer = Slice.UNINITIALIZED_INDEXING_PARAMETER; + return noOffsetFromContainer; + } + + private static Slice getNoSizeSlice() { + final Slice noSize = getIndexInitializedSlice(); + noSize.byteSize = Slice.UNINITIALIZED_INDEXING_PARAMETER; + return noSize; + } + + private static Slice getNoIndexSlice() { + final Slice noIndex = getIndexInitializedSlice(); + noIndex.index = Slice.UNINITIALIZED_INDEXING_PARAMETER; + return noIndex; + } + + @DataProvider(name = "uninitializedBAIParameterTestCases") + static Object[][] uninitializedBAIParameterTestCases() { + + return new Object[][] { + { getNoContainerOffsetSlice() }, + { getNoOffsetFromContainerSlice() }, + { getNoIndexSlice() } + }; + } + + @DataProvider(name = "uninitializedCRAIParameterTestCases") + static Object[][] uninitializedCRAIParameterTestCases() { + + return new Object[][] { + { getNoContainerOffsetSlice() }, + { getNoOffsetFromContainerSlice() }, + { getNoSizeSlice() } + }; + } + // assert that slices and containers have values equal to what the caller expects public static void assertSliceState(final Slice slice, @@ -194,10 +252,12 @@ public static void assertSliceState(final Slice slice, public static void assertContainerState(final Container container, final ReferenceContext expectedReferenceContext, final int expectedAlignmentStart, - final int expectedAlignmentSpan) { + final int expectedAlignmentSpan, + final long expectedByteOffset) { Assert.assertEquals(container.getReferenceContext(), expectedReferenceContext); Assert.assertEquals(container.alignmentStart, expectedAlignmentStart); Assert.assertEquals(container.alignmentSpan, expectedAlignmentSpan); + Assert.assertEquals(container.byteOffset, expectedByteOffset); } public static void assertContainerState(final Container container, @@ -206,18 +266,19 @@ public static void assertContainerState(final Container container, final int expectedAlignmentSpan, final int expectedRecordCount, final int expectedBaseCount, - final int expectedGlobalRecordCounter) { - assertContainerState(container, expectedReferenceContext, expectedAlignmentStart, expectedAlignmentSpan); + final int expectedGlobalRecordCounter, + final long expectedByteOffset) { + assertContainerState(container, expectedReferenceContext, expectedAlignmentStart, expectedAlignmentSpan, expectedByteOffset); Assert.assertEquals(container.nofRecords, expectedRecordCount); Assert.assertEquals(container.bases, expectedBaseCount); Assert.assertEquals(container.globalRecordCounter, expectedGlobalRecordCounter); - Assert.assertEquals(container.slices.length, 1); + Assert.assertEquals(container.getSlices().length, 1); // verify the underlying slice too - assertSliceState(container.slices[0], expectedReferenceContext, expectedAlignmentStart, expectedAlignmentSpan, + assertSliceState(container.getSlices()[0], expectedReferenceContext, expectedAlignmentStart, expectedAlignmentSpan, expectedRecordCount, expectedBaseCount, expectedGlobalRecordCounter); } } \ No newline at end of file diff --git a/src/test/java/htsjdk/samtools/cram/structure/ContainerTest.java b/src/test/java/htsjdk/samtools/cram/structure/ContainerTest.java index 789c739f0f..b1caa0d441 100644 --- a/src/test/java/htsjdk/samtools/cram/structure/ContainerTest.java +++ b/src/test/java/htsjdk/samtools/cram/structure/ContainerTest.java @@ -3,14 +3,15 @@ import htsjdk.HtsjdkTest; import htsjdk.samtools.ValidationStringency; import htsjdk.samtools.cram.CRAMException; +import htsjdk.samtools.cram.build.CompressionHeaderFactory; import htsjdk.samtools.cram.build.ContainerFactory; import htsjdk.samtools.cram.ref.ReferenceContext; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.List; import java.util.Map; @@ -20,6 +21,9 @@ public class ContainerTest extends HtsjdkTest { private static final ContainerFactory FACTORY = new ContainerFactory(CRAMStructureTestUtil.getSAMFileHeaderForTests(), TEST_RECORD_COUNT); + private static final CompressionHeader COMPRESSION_HEADER = + new CompressionHeaderFactory().build(Collections.EMPTY_LIST, null, true); + @DataProvider(name = "containerStateTestCases") private Object[][] containerStateTestCases() { final ReferenceContext mappedReferenceContext = new ReferenceContext(5); // arbitrary @@ -62,8 +66,9 @@ public void initializeFromSlicesTest(final List slices, final ReferenceContext expectedReferenceContext, final int expectedAlignmentStart, final int expectedAlignmentSpan) { - final Container container = Container.initializeFromSlices(slices); - CRAMStructureTestUtil.assertContainerState(container, expectedReferenceContext, expectedAlignmentStart, expectedAlignmentSpan); + final long byteOffset = 536635; + final Container container = Container.initializeFromSlices(slices, COMPRESSION_HEADER, byteOffset); + CRAMStructureTestUtil.assertContainerState(container, expectedReferenceContext, expectedAlignmentStart, expectedAlignmentSpan, byteOffset); } @DataProvider(name = "illegalCombinationTestCases") @@ -90,7 +95,8 @@ private Object[][] illegalCombinationTestCases() { @Test(dataProvider = "illegalCombinationTestCases", expectedExceptions = CRAMException.class) public static void illegalCombinationsStateTest(final Slice one, final Slice another) { - Container.initializeFromSlices(Arrays.asList(one, another)); + final long dummyByteOffset = 0; + Container.initializeFromSlices(Arrays.asList(one, another), COMPRESSION_HEADER, dummyByteOffset); } @DataProvider(name = "getSpansTestCases") @@ -143,7 +149,8 @@ private Object[][] getSpansTestCases() { public void getSpansTest(final List records, final ReferenceContext expectedReferenceContext, final AlignmentSpan expectedAlignmentSpan) { - final Container container = FACTORY.buildContainer(records); + final long dummyByteOffset = 0; + final Container container = FACTORY.buildContainer(records, dummyByteOffset); final Map spanMap = container.getSpans(ValidationStringency.STRICT); Assert.assertEquals(spanMap.size(), 1); @@ -154,12 +161,12 @@ public void getSpansTest(final List records, // show that we can populate all of the slice indexing fields from the // values in the container's header - // this is part of the deserialization process, and supports index creation + // this is part of the serialization/deserialization process, and supports index creation // single slice @Test - public static void populateSlicesAndIndexingParametersOneSlice() { + public static void distributeIndexingParametersToSlicesOneSlice() { // this container starts 100,000 bytes into the CRAM stream final int containerStreamByteOffset = 100000; @@ -172,13 +179,13 @@ public static void populateSlicesAndIndexingParametersOneSlice() { final Container container = createOneSliceContainer(containerStreamByteOffset, containerHeaderSize, sliceSize); - assertSliceIndexingParams(container.slices[0], 0, containerStreamByteOffset, sliceSize, containerHeaderSize); + assertSliceIndexingParams(container.getSlices()[0], 0, containerStreamByteOffset, sliceSize, containerHeaderSize); } // two slices @Test - public static void populateSlicesAndIndexingParametersTwoSlices() { + public static void distributeIndexingParametersToSlicesTwoSlices() { // this container starts 200,000 bytes into the CRAM stream final int containerStreamByteOffset = 200000; @@ -193,8 +200,44 @@ public static void populateSlicesAndIndexingParametersTwoSlices() { final Container container = createTwoSliceContainer(containerStreamByteOffset, containerHeaderSize, slice0size, slice1size); - assertSliceIndexingParams(container.slices[0], 0, containerStreamByteOffset, slice0size, containerHeaderSize); - assertSliceIndexingParams(container.slices[1], 1, containerStreamByteOffset, slice1size, containerHeaderSize + slice0size); + assertSliceIndexingParams(container.getSlices()[0], 0, containerStreamByteOffset, slice0size, containerHeaderSize); + assertSliceIndexingParams(container.getSlices()[1], 1, containerStreamByteOffset, slice1size, containerHeaderSize + slice0size); + } + + @DataProvider(name = "containerDistributeNegative") + private Object[][] containerDistributeNegative() { + final ReferenceContext refContext = new ReferenceContext(0); + + final Container nullLandmarks = new Container(refContext); + nullLandmarks.containerByteSize = 6789; + nullLandmarks.landmarks = null; + nullLandmarks.setSlicesAndByteOffset(Collections.singletonList(new Slice(refContext)), 999); + + final Container tooManyLandmarks = new Container(refContext); + tooManyLandmarks.containerByteSize = 111; + tooManyLandmarks.landmarks = new int[]{ 1, 2, 3, 4, 5 }; + tooManyLandmarks.setSlicesAndByteOffset(Collections.singletonList(new Slice(refContext)), 12345); + + final Container tooManySlices = new Container(refContext); + tooManySlices.containerByteSize = 675345389; + tooManySlices.landmarks = new int[]{ 1 }; + tooManySlices.setSlicesAndByteOffset(Arrays.asList(new Slice(refContext), new Slice(refContext)), 12345); + + final Container noByteSize = new Container(refContext); + noByteSize.landmarks = new int[]{ 1, 2 }; + noByteSize.setSlicesAndByteOffset(Arrays.asList(new Slice(refContext), new Slice(refContext)), 12345); + + return new Object[][] { + { nullLandmarks }, + { tooManyLandmarks }, + { tooManySlices }, + { noByteSize }, + }; + } + + @Test(expectedExceptions = CRAMException.class, dataProvider = "containerDistributeNegative") + public static void distributeIndexingParametersToSlicesNegative(final Container container) { + container.distributeIndexingParametersToSlices(); } private static Container createOneSliceContainer(final int containerStreamByteOffset, @@ -203,16 +246,13 @@ private static Container createOneSliceContainer(final int containerStreamByteOf final ReferenceContext refContext = new ReferenceContext(0); final Container container = new Container(refContext); - container.offset = containerStreamByteOffset; container.containerByteSize = slice0size; container.landmarks = new int[]{ containerHeaderSize, // beginning of slice }; - final ArrayList slices = new ArrayList() {{ - add(new Slice(refContext)); - }}; - container.populateSlicesAndIndexingParameters(slices); + container.setSlicesAndByteOffset(Collections.singletonList(new Slice(refContext)), containerStreamByteOffset); + container.distributeIndexingParametersToSlices(); return container; } @@ -225,18 +265,14 @@ private static Container createTwoSliceContainer(final int containerStreamByteOf final ReferenceContext refContext = new ReferenceContext(0); final Container container = new Container(refContext); - container.offset = containerStreamByteOffset; container.containerByteSize = containerDataSize; container.landmarks = new int[]{ containerHeaderSize, // beginning of slice 1 containerHeaderSize + slice0size // beginning of slice 2 }; - final ArrayList slices = new ArrayList() {{ - add(new Slice(refContext)); - add(new Slice(refContext)); - }}; - container.populateSlicesAndIndexingParameters(slices); + container.setSlicesAndByteOffset(Arrays.asList(new Slice(refContext), new Slice(refContext)), containerStreamByteOffset); + container.distributeIndexingParametersToSlices(); return container; } @@ -246,8 +282,8 @@ private static void assertSliceIndexingParams(final Slice slice, final int expectedSize, final int expectedOffset) { Assert.assertEquals(slice.index, expectedIndex); - Assert.assertEquals(slice.containerOffset, expectedContainerOffset); - Assert.assertEquals(slice.size, expectedSize); - Assert.assertEquals(slice.offset, expectedOffset); + Assert.assertEquals(slice.containerByteOffset, expectedContainerOffset); + Assert.assertEquals(slice.byteSize, expectedSize); + Assert.assertEquals(slice.byteOffsetFromContainer, expectedOffset); } } diff --git a/src/test/java/htsjdk/samtools/cram/structure/SliceTests.java b/src/test/java/htsjdk/samtools/cram/structure/SliceTests.java index de225160ce..fdd64bca43 100644 --- a/src/test/java/htsjdk/samtools/cram/structure/SliceTests.java +++ b/src/test/java/htsjdk/samtools/cram/structure/SliceTests.java @@ -189,6 +189,16 @@ public void testSingleAndUnmappedBuild() { Slice.NO_ALIGNMENT_START, Slice.NO_ALIGNMENT_SPAN, records.size(), expectedBaseCount); } + @Test(dataProvider = "uninitializedBAIParameterTestCases", dataProviderClass = CRAMStructureTestUtil.class, expectedExceptions = CRAMException.class) + public void uninitializedBAIParameterTest(final Slice s) { + s.baiIndexInitializationCheck(); + } + + @Test(dataProvider = "uninitializedCRAIParameterTestCases", dataProviderClass = CRAMStructureTestUtil.class, expectedExceptions = CRAMException.class) + public void uninitializedCRAIParameterTest(final Slice s) { + s.craiIndexInitializationCheck(); + } + private static void buildSliceAndAssert(final List records, final ReferenceContext expectedReferenceContext, final int expectedAlignmentStart,