From 5442f78cda87e742a7cab1579187471f4ab6be73 Mon Sep 17 00:00:00 2001 From: jmthibault79 Date: Wed, 3 Apr 2019 10:16:38 -0400 Subject: [PATCH] CRAM: revert #1326 and fix tests and comments (#1341) * Revert #1326 because it was correct before - added comments clarifying the real situation - see https://github.com/samtools/hts-specs/issues/396 - see https://github.com/samtools/hts-specs/pull/398 - comments and rename CRAIEntry.sliceByteOffset - blockCount comments --- .../htsjdk/samtools/BAMIndexMetaData.java | 2 +- .../java/htsjdk/samtools/CRAMBAIIndexer.java | 4 +- .../java/htsjdk/samtools/CRAMFileReader.java | 2 +- .../java/htsjdk/samtools/cram/CRAIEntry.java | 24 ++++---- .../java/htsjdk/samtools/cram/CRAIIndex.java | 2 +- .../build/CramContainerHeaderIterator.java | 2 +- .../htsjdk/samtools/cram/build/CramIO.java | 12 ++-- .../samtools/cram/structure/Container.java | 56 +++++++++++-------- .../cram/structure/ContainerHeaderIO.java | 4 +- .../samtools/cram/structure/ContainerIO.java | 30 ++++++---- .../htsjdk/samtools/cram/structure/Slice.java | 50 ++++++++++------- .../samtools/cram/structure/SliceIO.java | 6 ++ .../htsjdk/samtools/CRAMBAIIndexerTest.java | 6 +- .../CramContainerHeaderIteratorTest.java | 2 +- .../htsjdk/samtools/cram/CRAIEntryTest.java | 11 ++-- .../cram/structure/CRAMStructureTestUtil.java | 4 +- .../cram/structure/ContainerTest.java | 48 ++++++++-------- 17 files changed, 152 insertions(+), 113 deletions(-) diff --git a/src/main/java/htsjdk/samtools/BAMIndexMetaData.java b/src/main/java/htsjdk/samtools/BAMIndexMetaData.java index 09391b6515..3a502e93e3 100644 --- a/src/main/java/htsjdk/samtools/BAMIndexMetaData.java +++ b/src/main/java/htsjdk/samtools/BAMIndexMetaData.java @@ -159,7 +159,7 @@ void recordMetaData(final Slice slice) { unAlignedRecords += slice.unmappedReadsCount; } - final long start = slice.byteOffsetFromContainer; + final long start = slice.byteOffsetFromCompressionHeaderStart; if (BlockCompressedFilePointerUtil.compare(start, firstOffset) < 1 || firstOffset == -1) { this.firstOffset = start; diff --git a/src/main/java/htsjdk/samtools/CRAMBAIIndexer.java b/src/main/java/htsjdk/samtools/CRAMBAIIndexer.java index 3d6822a0d5..cd0cf3fe69 100755 --- a/src/main/java/htsjdk/samtools/CRAMBAIIndexer.java +++ b/src/main/java/htsjdk/samtools/CRAMBAIIndexer.java @@ -153,7 +153,7 @@ void processContainer(final Container container, final ValidationStringency vali final AlignmentSpan span = spanMap.get(refContext); final Slice fakeSlice = new Slice(refContext); fakeSlice.containerByteOffset = slice.containerByteOffset; - fakeSlice.byteOffsetFromContainer = slice.byteOffsetFromContainer; + fakeSlice.byteOffsetFromCompressionHeaderStart = slice.byteOffsetFromCompressionHeaderStart; fakeSlice.index = slice.index; fakeSlice.alignmentStart = span.getStart(); @@ -167,7 +167,7 @@ void processContainer(final Container container, final ValidationStringency vali if (unmappedSpan != null) { final Slice fakeSlice = new Slice(ReferenceContext.UNMAPPED_UNPLACED_CONTEXT); fakeSlice.containerByteOffset = slice.containerByteOffset; - fakeSlice.byteOffsetFromContainer = slice.byteOffsetFromContainer; + fakeSlice.byteOffsetFromCompressionHeaderStart = slice.byteOffsetFromCompressionHeaderStart; fakeSlice.index = slice.index; fakeSlice.alignmentStart = SAMRecord.NO_ALIGNMENT_START; diff --git a/src/main/java/htsjdk/samtools/CRAMFileReader.java b/src/main/java/htsjdk/samtools/CRAMFileReader.java index 5c782e12af..bf4fe6f0a1 100644 --- a/src/main/java/htsjdk/samtools/CRAMFileReader.java +++ b/src/main/java/htsjdk/samtools/CRAMFileReader.java @@ -401,7 +401,7 @@ public CloseableIterator queryUnmapped() { newIterator = new CRAMIterator(seekableStream, referenceSource, validationStringency); seekableStream.seek(startOfLastLinearBin >>> 16); final Container container = ContainerHeaderIO.readContainerHeader(newIterator.getCramHeader().getVersion().major, seekableStream); - seekableStream.seek(seekableStream.position() + container.containerByteSize); + seekableStream.seek(seekableStream.position() + container.containerBlocksByteSize); iterator = newIterator; boolean atAlignments; do { diff --git a/src/main/java/htsjdk/samtools/cram/CRAIEntry.java b/src/main/java/htsjdk/samtools/cram/CRAIEntry.java index 460ee6e04a..834e6050e1 100644 --- a/src/main/java/htsjdk/samtools/cram/CRAIEntry.java +++ b/src/main/java/htsjdk/samtools/cram/CRAIEntry.java @@ -18,9 +18,9 @@ public class CRAIEntry implements Comparable { // this Slice's Container's offset in bytes from the beginning of the stream // equal to Slice.containerByteOffset and Container.byteOffset private final long containerStartByteOffset; - // this Slice's offset in bytes from the beginning of its Container - // equal to Slice.byteOffsetFromContainer and Container.landmarks[Slice.index] - private final int sliceByteOffset; + // this Slice's offset in bytes from the beginning of its Container's Compression Header + // equal to Slice.byteOffsetFromCompressionHeaderStart and Container.landmarks[Slice.index] + private final int sliceByteOffsetFromCompressionHeaderStart; private final int sliceByteSize; private static final int CRAI_INDEX_COLUMNS = 6; @@ -30,7 +30,7 @@ public CRAIEntry(final int sequenceId, final int alignmentStart, final int alignmentSpan, final long containerStartByteOffset, - final int sliceByteOffset, + final int sliceByteOffsetFromCompressionHeaderStart, final int sliceByteSize) { if (sequenceId == ReferenceContext.MULTIPLE_REFERENCE_ID) { throw new CRAMException("Cannot directly index a multiref slice. Index by its constituent references instead."); @@ -40,7 +40,7 @@ public CRAIEntry(final int sequenceId, this.alignmentStart = alignmentStart; this.alignmentSpan = alignmentSpan; this.containerStartByteOffset = containerStartByteOffset; - this.sliceByteOffset = sliceByteOffset; + this.sliceByteOffsetFromCompressionHeaderStart = sliceByteOffsetFromCompressionHeaderStart; this.sliceByteSize = sliceByteSize; } @@ -62,7 +62,7 @@ public CRAIEntry(final String line) { alignmentStart = Integer.parseInt(chunks[1]); alignmentSpan = Integer.parseInt(chunks[2]); containerStartByteOffset = Long.parseLong(chunks[3]); - sliceByteOffset = Integer.parseInt(chunks[4]); + sliceByteOffsetFromCompressionHeaderStart = Integer.parseInt(chunks[4]); sliceByteSize = Integer.parseInt(chunks[5]); } catch (final NumberFormatException e) { throw new CRAIIndex.CRAIIndexException(e); @@ -89,7 +89,7 @@ public void writeToStream(OutputStream os) { private String serializeToString() { return String.format(ENTRY_FORMAT, sequenceId, alignmentStart, alignmentSpan, - containerStartByteOffset, sliceByteOffset, sliceByteSize); + containerStartByteOffset, sliceByteOffsetFromCompressionHeaderStart, sliceByteSize); } @Override @@ -130,7 +130,7 @@ public int compareTo(final CRAIEntry other) { return Long.compare(containerStartByteOffset, other.containerStartByteOffset); } - return Long.compare(sliceByteOffset, other.sliceByteOffset); + return Long.compare(sliceByteOffsetFromCompressionHeaderStart, other.sliceByteOffsetFromCompressionHeaderStart); }; public static boolean intersect(final CRAIEntry e0, final CRAIEntry e1) { @@ -169,8 +169,8 @@ public long getContainerStartByteOffset() { return containerStartByteOffset; } - public int getSliceByteOffset() { - return sliceByteOffset; + public int getSliceByteOffsetFromCompressionHeaderStart() { + return sliceByteOffsetFromCompressionHeaderStart; } public int getSliceByteSize() { @@ -188,7 +188,7 @@ public boolean equals(Object o) { if (alignmentStart != entry.alignmentStart) return false; if (alignmentSpan != entry.alignmentSpan) return false; if (containerStartByteOffset != entry.containerStartByteOffset) return false; - if (sliceByteOffset != entry.sliceByteOffset) return false; + if (sliceByteOffsetFromCompressionHeaderStart != entry.sliceByteOffsetFromCompressionHeaderStart) return false; return sliceByteSize == entry.sliceByteSize; } @@ -198,7 +198,7 @@ public int hashCode() { result = 31 * result + alignmentStart; result = 31 * result + alignmentSpan; result = 31 * result + (int) (containerStartByteOffset ^ (containerStartByteOffset >>> 32)); - result = 31 * result + sliceByteOffset; + result = 31 * result + sliceByteOffsetFromCompressionHeaderStart; result = 31 * result + sliceByteSize; return result; } diff --git a/src/main/java/htsjdk/samtools/cram/CRAIIndex.java b/src/main/java/htsjdk/samtools/cram/CRAIIndex.java index 9f9b548a34..f5f92d35dc 100644 --- a/src/main/java/htsjdk/samtools/cram/CRAIIndex.java +++ b/src/main/java/htsjdk/samtools/cram/CRAIIndex.java @@ -87,7 +87,7 @@ public static SeekableStream openCraiFileAsBaiStream(final InputStream indexStre slice.containerByteOffset = entry.getContainerStartByteOffset(); slice.alignmentStart = entry.getAlignmentStart(); slice.alignmentSpan = entry.getAlignmentSpan(); - slice.byteOffsetFromContainer = entry.getSliceByteOffset(); + slice.byteOffsetFromCompressionHeaderStart = entry.getSliceByteOffsetFromCompressionHeaderStart(); // NOTE: the sliceIndex and read count fields can't be derived from the CRAM index // so we can only set them to zero diff --git a/src/main/java/htsjdk/samtools/cram/build/CramContainerHeaderIterator.java b/src/main/java/htsjdk/samtools/cram/build/CramContainerHeaderIterator.java index 6d12be154a..370d565103 100644 --- a/src/main/java/htsjdk/samtools/cram/build/CramContainerHeaderIterator.java +++ b/src/main/java/htsjdk/samtools/cram/build/CramContainerHeaderIterator.java @@ -33,7 +33,7 @@ public CramContainerHeaderIterator(final InputStream inputStream) { @Override protected Container containerFromStream(final CountingInputStream countingStream) { final Container container = ContainerHeaderIO.readContainerHeader(getCramHeader().getVersion().major, countingStream); - InputStreamUtils.skipFully(countingStream, container.containerByteSize); + InputStreamUtils.skipFully(countingStream, container.containerBlocksByteSize); return container; } diff --git a/src/main/java/htsjdk/samtools/cram/build/CramIO.java b/src/main/java/htsjdk/samtools/cram/build/CramIO.java index 612d1ffe86..478713011e 100644 --- a/src/main/java/htsjdk/samtools/cram/build/CramIO.java +++ b/src/main/java/htsjdk/samtools/cram/build/CramIO.java @@ -273,16 +273,16 @@ private static long writeContainerForSamFileHeader(final int major, final SAMFil final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); block.write(major, byteArrayOutputStream); - container.containerByteSize = byteArrayOutputStream.size(); + container.containerBlocksByteSize = byteArrayOutputStream.size(); final int containerHeaderByteSize = ContainerHeaderIO.writeContainerHeader(major, container, os); try { - os.write(byteArrayOutputStream.toByteArray(), 0, byteArrayOutputStream.size()); + os.write(byteArrayOutputStream.toByteArray(), 0, container.containerBlocksByteSize); } catch (final IOException e) { throw new RuntimeIOException(e); } - return containerHeaderByteSize + byteArrayOutputStream.size(); + return containerHeaderByteSize + container.containerBlocksByteSize; } private static SAMFileHeader readSAMFileHeader(final Version version, @@ -293,13 +293,13 @@ private static SAMFileHeader readSAMFileHeader(final Version version, final Block block; { if (version.compatibleWith(CramVersions.CRAM_v3)) { - final byte[] bytes = new byte[container.containerByteSize]; + final byte[] bytes = new byte[container.containerBlocksByteSize]; InputStreamUtils.readFully(inputStream, bytes, 0, bytes.length); block = Block.read(version.major, new ByteArrayInputStream(bytes)); // ignore the rest of the container } else { /* - * pending issue: container.containerByteSize inputStream 2 bytes shorter + * pending issue: container.containerBlocksByteSize inputStream 2 bytes shorter * than needed in the v21 test cram files. */ block = Block.read(version.major, inputStream); @@ -355,7 +355,7 @@ public static boolean replaceCramHeader(final File file, final CramHeader newHea final Block block = Block.createRawFileHeaderBlock(toByteArray(newHeader.getSamFileHeader())); final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); block.write(newHeader.getVersion().major, byteArrayOutputStream); - if (byteArrayOutputStream.size() > c.containerByteSize) { + if (byteArrayOutputStream.size() > c.containerBlocksByteSize) { log.error("Failed to replace CRAM header because the new header does not fit."); return false; } diff --git a/src/main/java/htsjdk/samtools/cram/structure/Container.java b/src/main/java/htsjdk/samtools/cram/structure/Container.java index 229db1eac1..9848b64632 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/Container.java +++ b/src/main/java/htsjdk/samtools/cram/structure/Container.java @@ -32,9 +32,13 @@ public class Container { // container header as defined in the specs, in addition to sequenceId from ReferenceContext /** - * Byte size of the content excluding header. + * The total length of all blocks in this container, of all types. + * + * Equal to the total length of this container, minus the Container Header. + * + * @see htsjdk.samtools.cram.structure.block.BlockContentType */ - public int containerByteSize = 0; + public int containerBlocksByteSize = 0; // minimum alignment start of the reads in this Container // uses a 1-based coordinate system @@ -46,13 +50,23 @@ public class Container { public long bases = 0; public int blockCount = -1; - // Slice byte boundaries within this container, after the header. Equal to Slice.offset. - // e.g. if landmarks[0] = 9000 and landmarks[1] = 109000, we know: - // the container's header size = 9000 - // Slice[0].offset = 9000 - // Slice[0].size = 109000 - 9000 = 100000 - // Slice[1].offset = 109000 - + /** + * Slice byte boundaries as offsets within this container, + * counted after the container header. The start of the compression header + * has offset 0. + * + * Equal to {@link Slice#byteOffsetFromCompressionHeaderStart}. + * + * As an example, suppose we have: + * - landmarks[0] = 9000 + * - landmarks[1] = 109000 + * - containerBlocksByteSize = 123456 + * + * We therefore know: + * - the compression header size = 9000 + * - Slice 0 has offset 9000 and size 100000 (109000 - 9000) + * - Slice 1 has offset 109000 and size 14456 (123456 - 109000) + */ public int[] landmarks; public int checksum = 0; @@ -170,7 +184,7 @@ else if (sliceRefContexts.size() > 1) { /** * Populate the indexing parameters of this Container's slices * - * Requires: valid landmarks and containerByteSize + * Requires: valid landmarks and containerBlocksByteSize * * @throws CRAMException when the Container is in an invalid state */ @@ -188,28 +202,22 @@ public void distributeIndexingParametersToSlices() { throw new CRAMException(String.format(format, landmarks.length, slices.length)); } - if (containerByteSize == 0) { - throw new CRAMException("Cannot set Slice indexing parameters if this Container's byte size is unknown"); + if (containerBlocksByteSize == 0) { + throw new CRAMException("Cannot set Slice indexing parameters if the byte size of this Container's blocks is unknown"); } final int lastSliceIndex = slices.length - 1; for (int i = 0; i < lastSliceIndex; i++) { final Slice slice = slices[i]; slice.index = i; - slice.byteOffsetFromContainer = landmarks[i]; - slice.byteSize = landmarks[i + 1] - slice.byteOffsetFromContainer; + slice.byteOffsetFromCompressionHeaderStart = landmarks[i]; + slice.byteSize = landmarks[i + 1] - slice.byteOffsetFromCompressionHeaderStart; } final Slice lastSlice = slices[lastSliceIndex]; lastSlice.index = lastSliceIndex; - lastSlice.byteOffsetFromContainer = landmarks[lastSliceIndex]; - - // calculate a "final landmark" indicating the byte offset of the end of the container - // equivalent to the container's total byte size - - final int containerHeaderSize = landmarks[0]; - final int containerTotalByteSize = containerHeaderSize + containerByteSize; - lastSlice.byteSize = containerTotalByteSize - lastSlice.byteOffsetFromContainer; + lastSlice.byteOffsetFromCompressionHeaderStart = landmarks[lastSliceIndex]; + lastSlice.byteSize = containerBlocksByteSize - lastSlice.byteOffsetFromCompressionHeaderStart; } /** @@ -237,11 +245,11 @@ public String toString() { } public boolean isEOF() { - final boolean v3 = containerByteSize == 15 && referenceContext.isUnmappedUnplaced() + final boolean v3 = containerBlocksByteSize == 15 && referenceContext.isUnmappedUnplaced() && alignmentStart == 4542278 && blockCount == 1 && nofRecords == 0 && (getSlices() == null || getSlices().length == 0); - final boolean v2 = containerByteSize == 11 && referenceContext.isUnmappedUnplaced() + final boolean v2 = containerBlocksByteSize == 11 && referenceContext.isUnmappedUnplaced() && alignmentStart == 4542278 && blockCount == 1 && nofRecords == 0 && (getSlices() == null || getSlices().length == 0); diff --git a/src/main/java/htsjdk/samtools/cram/structure/ContainerHeaderIO.java b/src/main/java/htsjdk/samtools/cram/structure/ContainerHeaderIO.java index 572fd93b7f..26c52f44a2 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/ContainerHeaderIO.java +++ b/src/main/java/htsjdk/samtools/cram/structure/ContainerHeaderIO.java @@ -65,7 +65,7 @@ public static Container readContainerHeader(final int major, final int containerByteSize = CramInt.readInt32(peek); final ReferenceContext refContext = new ReferenceContext(ITF8.readUnsignedITF8(inputStream)); final Container container = new Container(refContext); - container.containerByteSize = containerByteSize; + container.containerBlocksByteSize = containerByteSize; container.alignmentStart = ITF8.readUnsignedITF8(inputStream); container.alignmentSpan = ITF8.readUnsignedITF8(inputStream); @@ -123,7 +123,7 @@ public static Container readContainerHeader(final int major, final CountingInput public static int writeContainerHeader(final int major, final Container container, final OutputStream outputStream) { final CRC32OutputStream crc32OutputStream = new CRC32OutputStream(outputStream); - int length = (CramInt.writeInt32(container.containerByteSize, crc32OutputStream) + 7) / 8; + int length = (CramInt.writeInt32(container.containerBlocksByteSize, crc32OutputStream) + 7) / 8; length += (ITF8.writeUnsignedITF8(container.getReferenceContext().getSerializableId(), crc32OutputStream) + 7) / 8; length += (ITF8.writeUnsignedITF8(container.alignmentStart, crc32OutputStream) + 7) / 8; length += (ITF8.writeUnsignedITF8(container.alignmentSpan, crc32OutputStream) + 7) / 8; diff --git a/src/main/java/htsjdk/samtools/cram/structure/ContainerIO.java b/src/main/java/htsjdk/samtools/cram/structure/ContainerIO.java index cd65c7442d..a591d77661 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/ContainerIO.java +++ b/src/main/java/htsjdk/samtools/cram/structure/ContainerIO.java @@ -131,49 +131,59 @@ public static int writeContainer(final Version version, final Container containe if (isFileHeaderContainer) { final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); firstBlock.write(version.major, byteArrayOutputStream); - container.containerByteSize = byteArrayOutputStream.size(); + container.containerBlocksByteSize = byteArrayOutputStream.size(); final int containerHeaderByteSize = ContainerHeaderIO.writeContainerHeader(version.major, container, outputStream); try { - outputStream.write(byteArrayOutputStream.toByteArray(), 0, byteArrayOutputStream.size()); + outputStream.write(byteArrayOutputStream.toByteArray(), 0, container.containerBlocksByteSize); } catch (final IOException e) { throw new RuntimeIOException(e); } - return containerHeaderByteSize + byteArrayOutputStream.size(); + return containerHeaderByteSize + container.containerBlocksByteSize; } } } + // use this BAOS for two purposes: writing out and counting bytes for landmarks/containerBlocksByteSize final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); - container.compressionHeader.write(version, byteArrayOutputStream); + + // TODO: ensure that the Container blockCount stays in sync with the + // Slice's blockCount in SliceIO.write() + // 1 Compression Header Block container.blockCount = 1; final List landmarks = new ArrayList<>(); for (final Slice slice : container.getSlices()) { + // landmark 0 = byte length of the compression header + // landmarks after 0 = byte length of the compression header plus all slices before this one landmarks.add(byteArrayOutputStream.size()); SliceIO.write(version.major, slice, byteArrayOutputStream); + // 1 Slice Header Block container.blockCount++; + // 1 Core Data Block per Slice container.blockCount++; + // TODO: should we count the embedded reference block as an additional block? if (slice.embeddedRefBlock != null) container.blockCount++; + // Each Slice has a variable number of External Data Blocks container.blockCount += slice.external.size(); } container.landmarks = landmarks.stream().mapToInt(Integer::intValue).toArray(); - container.containerByteSize = byteArrayOutputStream.size(); + // compression header plus all slices, if any (EOF Containers do not; File Header Containers are handled above) + container.containerBlocksByteSize = byteArrayOutputStream.size(); - // Slices require the Container's landmarks and containerByteSize before indexing + // Slices require the Container's landmarks and containerBlocksByteSize before indexing container.distributeIndexingParametersToSlices(); - int length = ContainerHeaderIO.writeContainerHeader(version.major, container, outputStream); + final int containerHeaderLength = ContainerHeaderIO.writeContainerHeader(version.major, container, outputStream); try { - outputStream.write(byteArrayOutputStream.toByteArray(), 0, byteArrayOutputStream.size()); + outputStream.write(byteArrayOutputStream.toByteArray(), 0, container.containerBlocksByteSize); } catch (final IOException e) { throw new RuntimeIOException(e); } - length += byteArrayOutputStream.size(); log.debug("CONTAINER WRITTEN: " + container.toString()); - return length; + return containerHeaderLength + container.containerBlocksByteSize; } } diff --git a/src/main/java/htsjdk/samtools/cram/structure/Slice.java b/src/main/java/htsjdk/samtools/cram/structure/Slice.java index 8cb8ca15dd..1c68375d38 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/Slice.java +++ b/src/main/java/htsjdk/samtools/cram/structure/Slice.java @@ -77,19 +77,31 @@ public class Slice { public static final int UNINITIALIZED_INDEXING_PARAMETER = -1; - // the Slice's offset in bytes from the beginning of its Container - // equal to Container.landmarks[Slice.index] of its enclosing Container - // BAI and CRAI - public int byteOffsetFromContainer = UNINITIALIZED_INDEXING_PARAMETER; - // this Slice's Container's offset in bytes from the beginning of the stream - // equal to Container.byteOffset of its enclosing Container - // BAI and CRAI + /** + * The Slice's offset in bytes from the beginning of the Container's Compression Header + * (or the end of the Container Header), equal to {@link Container#landmarks} + * + * Used by BAI and CRAI indexing + */ + public int byteOffsetFromCompressionHeaderStart = UNINITIALIZED_INDEXING_PARAMETER; + /** + * The Slice's Container's offset in bytes from the beginning of the stream + * equal to {@link Container#byteOffset} + * + * Used by BAI and CRAI indexing + */ public long containerByteOffset = UNINITIALIZED_INDEXING_PARAMETER; - // this Slice's size in bytes - // CRAI only + /** + * The Slice's size in bytes + * + * Used by CRAI indexing only + */ public int byteSize = UNINITIALIZED_INDEXING_PARAMETER; - // this Slice's index number within its Container - // BAI only + /** + * The Slice's index number within its Container + * + * Used by BAI indexing only + */ public int index = UNINITIALIZED_INDEXING_PARAMETER; // to pass this to the container: @@ -121,13 +133,13 @@ public ReferenceContext getReferenceContext() { /** * Confirm that we have initialized the 3 BAI index parameters: - * byteOffsetFromContainer, containerByteOffset, and index + * byteOffsetFromCompressionHeaderStart, containerByteOffset, and index */ public void baiIndexInitializationCheck() { final StringBuilder error = new StringBuilder(); - if (byteOffsetFromContainer == UNINITIALIZED_INDEXING_PARAMETER) { - error.append("Cannot index this Slice for BAI because its byteOffsetFromContainer is unknown.").append(System.lineSeparator()); + if (byteOffsetFromCompressionHeaderStart == UNINITIALIZED_INDEXING_PARAMETER) { + error.append("Cannot index this Slice for BAI because its byteOffsetFromCompressionHeaderStart is unknown.").append(System.lineSeparator()); } if (containerByteOffset == UNINITIALIZED_INDEXING_PARAMETER) { @@ -145,15 +157,15 @@ public void baiIndexInitializationCheck() { /** * Confirm that we have initialized the 3 CRAI index parameters: - * byteOffsetFromContainer, containerByteOffset, and byteSize + * byteOffsetFromCompressionHeaderStart, containerByteOffset, and byteSize * * NOTE: this is currently unused because we always use BAI */ void craiIndexInitializationCheck() { final StringBuilder error = new StringBuilder(); - if (byteOffsetFromContainer == UNINITIALIZED_INDEXING_PARAMETER) { - error.append("Cannot index this Slice for CRAI because its byteOffsetFromContainer is unknown.").append(System.lineSeparator()); + if (byteOffsetFromCompressionHeaderStart == UNINITIALIZED_INDEXING_PARAMETER) { + error.append("Cannot index this Slice for CRAI because its byteOffsetFromCompressionHeaderStart is unknown.").append(System.lineSeparator()); } if (containerByteOffset == UNINITIALIZED_INDEXING_PARAMETER) { @@ -415,7 +427,7 @@ public List getCRAIEntries(final CompressionHeader compressionHeader) e.getValue().getStart(), e.getValue().getSpan(), containerByteOffset, - byteOffsetFromContainer, + byteOffsetFromCompressionHeaderStart, byteSize)) .sorted() .collect(Collectors.toList()); @@ -427,7 +439,7 @@ public List getCRAIEntries(final CompressionHeader compressionHeader) alignmentStart, alignmentSpan, containerByteOffset, - byteOffsetFromContainer, + byteOffsetFromCompressionHeaderStart, byteSize)); } } diff --git a/src/main/java/htsjdk/samtools/cram/structure/SliceIO.java b/src/main/java/htsjdk/samtools/cram/structure/SliceIO.java index 7b606e28d1..c1d5024232 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/SliceIO.java +++ b/src/main/java/htsjdk/samtools/cram/structure/SliceIO.java @@ -137,7 +137,13 @@ private static void readSliceBlocks(final int major, final Slice slice, final In } public static void write(final int major, final Slice slice, final OutputStream outputStream) { + // TODO: ensure that the Slice blockCount stays in sync with the + // Container's blockCount in ContainerIO.writeContainer() + // 1 Core Data Block per Slice + // Each Slice has a variable number of External Data Blocks + // TODO: should we count the embedded reference block as an additional block? + slice.nofBlocks = 1 + slice.external.size() + (slice.embeddedRefBlock == null ? 0 : 1); { diff --git a/src/test/java/htsjdk/samtools/CRAMBAIIndexerTest.java b/src/test/java/htsjdk/samtools/CRAMBAIIndexerTest.java index 54c9e9b307..48aecc75fd 100644 --- a/src/test/java/htsjdk/samtools/CRAMBAIIndexerTest.java +++ b/src/test/java/htsjdk/samtools/CRAMBAIIndexerTest.java @@ -68,11 +68,11 @@ private Object[][] missingIndexParams() { noByteOffsetFromContainer.index = 456; final Slice noContainerByteOffset = new Slice(refContext); - noContainerByteOffset.byteOffsetFromContainer = 789; + noContainerByteOffset.byteOffsetFromCompressionHeaderStart = 789; noContainerByteOffset.index = 456; final Slice noIndex = new Slice(refContext); - noIndex.byteOffsetFromContainer = 789; + noIndex.byteOffsetFromCompressionHeaderStart = 789; noIndex.containerByteOffset = 123; return new Object[][] { @@ -225,7 +225,7 @@ private Slice getSlice(final ReferenceContext refContext, mapped.unmappedReadsCount = unmappedReadsCount; mapped.unplacedReadsCount = unplacedReadsCount; // arbitrary - need these for indexing - mapped.byteOffsetFromContainer = 789; + mapped.byteOffsetFromCompressionHeaderStart = 789; mapped.containerByteOffset = 123; mapped.index = 456; return mapped; diff --git a/src/test/java/htsjdk/samtools/CramContainerHeaderIteratorTest.java b/src/test/java/htsjdk/samtools/CramContainerHeaderIteratorTest.java index ec876b141c..87aabd47bb 100644 --- a/src/test/java/htsjdk/samtools/CramContainerHeaderIteratorTest.java +++ b/src/test/java/htsjdk/samtools/CramContainerHeaderIteratorTest.java @@ -38,7 +38,7 @@ public void test() throws IOException { for (int i = 0; i < fullContainers.size(); i++) { Container fullContainer = fullContainers.get(i); Container headerOnlyContainer = headerOnlyContainers.get(i); - Assert.assertEquals(headerOnlyContainer.containerByteSize, fullContainer.containerByteSize); + Assert.assertEquals(headerOnlyContainer.containerBlocksByteSize, fullContainer.containerBlocksByteSize); Assert.assertEquals(headerOnlyContainer.getReferenceContext(), fullContainer.getReferenceContext()); Assert.assertEquals(headerOnlyContainer.alignmentStart, fullContainer.alignmentStart); Assert.assertEquals(headerOnlyContainer.alignmentSpan, fullContainer.alignmentSpan); diff --git a/src/test/java/htsjdk/samtools/cram/CRAIEntryTest.java b/src/test/java/htsjdk/samtools/cram/CRAIEntryTest.java index 223f9b4ada..bdece0837b 100644 --- a/src/test/java/htsjdk/samtools/cram/CRAIEntryTest.java +++ b/src/test/java/htsjdk/samtools/cram/CRAIEntryTest.java @@ -80,11 +80,11 @@ public void multiRefTestGetCRAIEntries() { slice1.index = slice1Index; slice1.byteSize = sliceByteSize; - slice1.byteOffsetFromContainer = slice1ByteOffsetFromContainer; + slice1.byteOffsetFromCompressionHeaderStart = slice1ByteOffsetFromContainer; slice2.index = slice2Index; slice2.byteSize = sliceByteSize; - slice2.byteOffsetFromContainer = slice2ByteOffsetFromContainer; + slice2.byteOffsetFromCompressionHeaderStart = slice2ByteOffsetFromContainer; final Container container = Container.initializeFromSlices(Arrays.asList(slice1, slice2), compressionHeader, containerOffset); @@ -269,7 +269,7 @@ private static Slice createSliceWithArbitraryValues(final ReferenceContext refCo single.alignmentStart = counter++; single.alignmentSpan = counter++; single.containerByteOffset = counter++; - single.byteOffsetFromContainer = counter++; + single.byteOffsetFromCompressionHeaderStart = counter++; single.byteSize = counter++; return single; } @@ -288,7 +288,8 @@ private List createMultiRefRecords(final int indexStart, private void assertEntryForSlice(final CRAIEntry entry, final Slice slice) { assertEntryForSlice(entry, slice.getReferenceContext().getSerializableId(), - slice.alignmentStart, slice.alignmentSpan, slice.containerByteOffset, slice.byteOffsetFromContainer, slice.byteSize); + slice.alignmentStart, slice.alignmentSpan, slice.containerByteOffset, + slice.byteOffsetFromCompressionHeaderStart, slice.byteSize); } private void assertEntryForSlice(final CRAIEntry entry, @@ -302,7 +303,7 @@ private void assertEntryForSlice(final CRAIEntry entry, Assert.assertEquals(entry.getAlignmentStart(), alignmentStart); Assert.assertEquals(entry.getAlignmentSpan(), alignmentSpan); Assert.assertEquals(entry.getContainerStartByteOffset(), containerOffset); - Assert.assertEquals(entry.getSliceByteOffset(), sliceByteOffset); + Assert.assertEquals(entry.getSliceByteOffsetFromCompressionHeaderStart(), sliceByteOffset); Assert.assertEquals(entry.getSliceByteSize(), sliceByteSize); } } diff --git a/src/test/java/htsjdk/samtools/cram/structure/CRAMStructureTestUtil.java b/src/test/java/htsjdk/samtools/cram/structure/CRAMStructureTestUtil.java index baf5e784db..2059dbd775 100644 --- a/src/test/java/htsjdk/samtools/cram/structure/CRAMStructureTestUtil.java +++ b/src/test/java/htsjdk/samtools/cram/structure/CRAMStructureTestUtil.java @@ -171,7 +171,7 @@ private static Slice getIndexInitializedSlice() { final ReferenceContext refContext = new ReferenceContext(0); final Slice slice = new Slice(refContext); - slice.byteOffsetFromContainer = 1; + slice.byteOffsetFromCompressionHeaderStart = 1; slice.containerByteOffset = 1; slice.byteSize = 1; slice.index = 1; @@ -187,7 +187,7 @@ private static Slice getNoContainerOffsetSlice() { private static Slice getNoOffsetFromContainerSlice() { final Slice noOffsetFromContainer = getIndexInitializedSlice(); - noOffsetFromContainer.byteOffsetFromContainer = Slice.UNINITIALIZED_INDEXING_PARAMETER; + noOffsetFromContainer.byteOffsetFromCompressionHeaderStart = Slice.UNINITIALIZED_INDEXING_PARAMETER; return noOffsetFromContainer; } diff --git a/src/test/java/htsjdk/samtools/cram/structure/ContainerTest.java b/src/test/java/htsjdk/samtools/cram/structure/ContainerTest.java index b1caa0d441..bf5ecbd799 100644 --- a/src/test/java/htsjdk/samtools/cram/structure/ContainerTest.java +++ b/src/test/java/htsjdk/samtools/cram/structure/ContainerTest.java @@ -171,15 +171,16 @@ public static void distributeIndexingParametersToSlicesOneSlice() { final int containerStreamByteOffset = 100000; // this Container consists of: - // a header of size 1234 bytes + // a header (size irrelevant) + // a Compression Header of size 7552 bytes // a Slice of size 6262 bytes - final int containerHeaderSize = 1234; + final int compressionHeaderSize = 7552; final int sliceSize = 6262; - final Container container = createOneSliceContainer(containerStreamByteOffset, containerHeaderSize, sliceSize); + final Container container = createOneSliceContainer(containerStreamByteOffset, sliceSize, compressionHeaderSize); - assertSliceIndexingParams(container.getSlices()[0], 0, containerStreamByteOffset, sliceSize, containerHeaderSize); + assertSliceIndexingParams(container.getSlices()[0], 0, containerStreamByteOffset, sliceSize, compressionHeaderSize); } // two slices @@ -190,18 +191,19 @@ public static void distributeIndexingParametersToSlicesTwoSlices() { final int containerStreamByteOffset = 200000; // this Container consists of: - // a header of size 3234 bytes + // a header (size irrelevant) + // a Compression Header of size 64343 bytes // a Slice of size 7890 bytes // a Slice of size 5555 bytes - final int containerHeaderSize = 3234; + final int compressionHeaderSize = 64343; final int slice0size = 7890; final int slice1size = 5555; - final Container container = createTwoSliceContainer(containerStreamByteOffset, containerHeaderSize, slice0size, slice1size); + final Container container = createTwoSliceContainer(containerStreamByteOffset, slice0size, slice1size, compressionHeaderSize); - assertSliceIndexingParams(container.getSlices()[0], 0, containerStreamByteOffset, slice0size, containerHeaderSize); - assertSliceIndexingParams(container.getSlices()[1], 1, containerStreamByteOffset, slice1size, containerHeaderSize + slice0size); + assertSliceIndexingParams(container.getSlices()[0], 0, containerStreamByteOffset, slice0size, compressionHeaderSize); + assertSliceIndexingParams(container.getSlices()[1], 1, containerStreamByteOffset, slice1size, compressionHeaderSize + slice0size); } @DataProvider(name = "containerDistributeNegative") @@ -209,17 +211,17 @@ private Object[][] containerDistributeNegative() { final ReferenceContext refContext = new ReferenceContext(0); final Container nullLandmarks = new Container(refContext); - nullLandmarks.containerByteSize = 6789; + nullLandmarks.containerBlocksByteSize = 6789; nullLandmarks.landmarks = null; nullLandmarks.setSlicesAndByteOffset(Collections.singletonList(new Slice(refContext)), 999); final Container tooManyLandmarks = new Container(refContext); - tooManyLandmarks.containerByteSize = 111; + tooManyLandmarks.containerBlocksByteSize = 111; tooManyLandmarks.landmarks = new int[]{ 1, 2, 3, 4, 5 }; tooManyLandmarks.setSlicesAndByteOffset(Collections.singletonList(new Slice(refContext)), 12345); final Container tooManySlices = new Container(refContext); - tooManySlices.containerByteSize = 675345389; + tooManySlices.containerBlocksByteSize = 675345389; tooManySlices.landmarks = new int[]{ 1 }; tooManySlices.setSlicesAndByteOffset(Arrays.asList(new Slice(refContext), new Slice(refContext)), 12345); @@ -241,14 +243,14 @@ public static void distributeIndexingParametersToSlicesNegative(final Container } private static Container createOneSliceContainer(final int containerStreamByteOffset, - final int containerHeaderSize, - final int slice0size) { + final int slice0size, + final int compressionHeaderSize) { final ReferenceContext refContext = new ReferenceContext(0); final Container container = new Container(refContext); - container.containerByteSize = slice0size; + container.containerBlocksByteSize = compressionHeaderSize + slice0size; container.landmarks = new int[]{ - containerHeaderSize, // beginning of slice + compressionHeaderSize, // beginning of slice }; container.setSlicesAndByteOffset(Collections.singletonList(new Slice(refContext)), containerStreamByteOffset); @@ -257,18 +259,18 @@ private static Container createOneSliceContainer(final int containerStreamByteOf } private static Container createTwoSliceContainer(final int containerStreamByteOffset, - final int containerHeaderSize, final int slice0size, - final int slice1size) { - final int containerDataSize = slice0size + slice1size; + final int slice1size, + final int compressionHeaderSize) { + final int containerDataSize = compressionHeaderSize + slice0size + slice1size; final ReferenceContext refContext = new ReferenceContext(0); final Container container = new Container(refContext); - container.containerByteSize = containerDataSize; + container.containerBlocksByteSize = containerDataSize; container.landmarks = new int[]{ - containerHeaderSize, // beginning of slice 1 - containerHeaderSize + slice0size // beginning of slice 2 + compressionHeaderSize, // beginning of slice 1 + compressionHeaderSize + slice0size // beginning of slice 2 }; container.setSlicesAndByteOffset(Arrays.asList(new Slice(refContext), new Slice(refContext)), containerStreamByteOffset); @@ -284,6 +286,6 @@ private static void assertSliceIndexingParams(final Slice slice, Assert.assertEquals(slice.index, expectedIndex); Assert.assertEquals(slice.containerByteOffset, expectedContainerOffset); Assert.assertEquals(slice.byteSize, expectedSize); - Assert.assertEquals(slice.byteOffsetFromContainer, expectedOffset); + Assert.assertEquals(slice.byteOffsetFromCompressionHeaderStart, expectedOffset); } }