Skip to content

Commit

Permalink
CRAM: revert #1326 and fix tests and comments (#1341)
Browse files Browse the repository at this point in the history
* Revert #1326 because it was correct before
- added comments clarifying the real situation
- see samtools/hts-specs#396
- see samtools/hts-specs#398
- comments and rename CRAIEntry.sliceByteOffset
- blockCount comments
  • Loading branch information
jmthibault79 authored Apr 3, 2019
1 parent 1ece54c commit 5442f78
Show file tree
Hide file tree
Showing 17 changed files with 152 additions and 113 deletions.
2 changes: 1 addition & 1 deletion src/main/java/htsjdk/samtools/BAMIndexMetaData.java
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ void recordMetaData(final Slice slice) {
unAlignedRecords += slice.unmappedReadsCount;
}

final long start = slice.byteOffsetFromContainer;
final long start = slice.byteOffsetFromCompressionHeaderStart;

if (BlockCompressedFilePointerUtil.compare(start, firstOffset) < 1 || firstOffset == -1) {
this.firstOffset = start;
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/htsjdk/samtools/CRAMBAIIndexer.java
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ void processContainer(final Container container, final ValidationStringency vali
final AlignmentSpan span = spanMap.get(refContext);
final Slice fakeSlice = new Slice(refContext);
fakeSlice.containerByteOffset = slice.containerByteOffset;
fakeSlice.byteOffsetFromContainer = slice.byteOffsetFromContainer;
fakeSlice.byteOffsetFromCompressionHeaderStart = slice.byteOffsetFromCompressionHeaderStart;
fakeSlice.index = slice.index;

fakeSlice.alignmentStart = span.getStart();
Expand All @@ -167,7 +167,7 @@ void processContainer(final Container container, final ValidationStringency vali
if (unmappedSpan != null) {
final Slice fakeSlice = new Slice(ReferenceContext.UNMAPPED_UNPLACED_CONTEXT);
fakeSlice.containerByteOffset = slice.containerByteOffset;
fakeSlice.byteOffsetFromContainer = slice.byteOffsetFromContainer;
fakeSlice.byteOffsetFromCompressionHeaderStart = slice.byteOffsetFromCompressionHeaderStart;
fakeSlice.index = slice.index;

fakeSlice.alignmentStart = SAMRecord.NO_ALIGNMENT_START;
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/htsjdk/samtools/CRAMFileReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ public CloseableIterator<SAMRecord> queryUnmapped() {
newIterator = new CRAMIterator(seekableStream, referenceSource, validationStringency);
seekableStream.seek(startOfLastLinearBin >>> 16);
final Container container = ContainerHeaderIO.readContainerHeader(newIterator.getCramHeader().getVersion().major, seekableStream);
seekableStream.seek(seekableStream.position() + container.containerByteSize);
seekableStream.seek(seekableStream.position() + container.containerBlocksByteSize);
iterator = newIterator;
boolean atAlignments;
do {
Expand Down
24 changes: 12 additions & 12 deletions src/main/java/htsjdk/samtools/cram/CRAIEntry.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ public class CRAIEntry implements Comparable<CRAIEntry> {
// this Slice's Container's offset in bytes from the beginning of the stream
// equal to Slice.containerByteOffset and Container.byteOffset
private final long containerStartByteOffset;
// this Slice's offset in bytes from the beginning of its Container
// equal to Slice.byteOffsetFromContainer and Container.landmarks[Slice.index]
private final int sliceByteOffset;
// this Slice's offset in bytes from the beginning of its Container's Compression Header
// equal to Slice.byteOffsetFromCompressionHeaderStart and Container.landmarks[Slice.index]
private final int sliceByteOffsetFromCompressionHeaderStart;
private final int sliceByteSize;

private static final int CRAI_INDEX_COLUMNS = 6;
Expand All @@ -30,7 +30,7 @@ public CRAIEntry(final int sequenceId,
final int alignmentStart,
final int alignmentSpan,
final long containerStartByteOffset,
final int sliceByteOffset,
final int sliceByteOffsetFromCompressionHeaderStart,
final int sliceByteSize) {
if (sequenceId == ReferenceContext.MULTIPLE_REFERENCE_ID) {
throw new CRAMException("Cannot directly index a multiref slice. Index by its constituent references instead.");
Expand All @@ -40,7 +40,7 @@ public CRAIEntry(final int sequenceId,
this.alignmentStart = alignmentStart;
this.alignmentSpan = alignmentSpan;
this.containerStartByteOffset = containerStartByteOffset;
this.sliceByteOffset = sliceByteOffset;
this.sliceByteOffsetFromCompressionHeaderStart = sliceByteOffsetFromCompressionHeaderStart;
this.sliceByteSize = sliceByteSize;
}

Expand All @@ -62,7 +62,7 @@ public CRAIEntry(final String line) {
alignmentStart = Integer.parseInt(chunks[1]);
alignmentSpan = Integer.parseInt(chunks[2]);
containerStartByteOffset = Long.parseLong(chunks[3]);
sliceByteOffset = Integer.parseInt(chunks[4]);
sliceByteOffsetFromCompressionHeaderStart = Integer.parseInt(chunks[4]);
sliceByteSize = Integer.parseInt(chunks[5]);
} catch (final NumberFormatException e) {
throw new CRAIIndex.CRAIIndexException(e);
Expand All @@ -89,7 +89,7 @@ public void writeToStream(OutputStream os) {
private String serializeToString() {
return String.format(ENTRY_FORMAT,
sequenceId, alignmentStart, alignmentSpan,
containerStartByteOffset, sliceByteOffset, sliceByteSize);
containerStartByteOffset, sliceByteOffsetFromCompressionHeaderStart, sliceByteSize);
}

@Override
Expand Down Expand Up @@ -130,7 +130,7 @@ public int compareTo(final CRAIEntry other) {
return Long.compare(containerStartByteOffset, other.containerStartByteOffset);
}

return Long.compare(sliceByteOffset, other.sliceByteOffset);
return Long.compare(sliceByteOffsetFromCompressionHeaderStart, other.sliceByteOffsetFromCompressionHeaderStart);
};

public static boolean intersect(final CRAIEntry e0, final CRAIEntry e1) {
Expand Down Expand Up @@ -169,8 +169,8 @@ public long getContainerStartByteOffset() {
return containerStartByteOffset;
}

public int getSliceByteOffset() {
return sliceByteOffset;
public int getSliceByteOffsetFromCompressionHeaderStart() {
return sliceByteOffsetFromCompressionHeaderStart;
}

public int getSliceByteSize() {
Expand All @@ -188,7 +188,7 @@ public boolean equals(Object o) {
if (alignmentStart != entry.alignmentStart) return false;
if (alignmentSpan != entry.alignmentSpan) return false;
if (containerStartByteOffset != entry.containerStartByteOffset) return false;
if (sliceByteOffset != entry.sliceByteOffset) return false;
if (sliceByteOffsetFromCompressionHeaderStart != entry.sliceByteOffsetFromCompressionHeaderStart) return false;
return sliceByteSize == entry.sliceByteSize;
}

Expand All @@ -198,7 +198,7 @@ public int hashCode() {
result = 31 * result + alignmentStart;
result = 31 * result + alignmentSpan;
result = 31 * result + (int) (containerStartByteOffset ^ (containerStartByteOffset >>> 32));
result = 31 * result + sliceByteOffset;
result = 31 * result + sliceByteOffsetFromCompressionHeaderStart;
result = 31 * result + sliceByteSize;
return result;
}
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/htsjdk/samtools/cram/CRAIIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ public static SeekableStream openCraiFileAsBaiStream(final InputStream indexStre
slice.containerByteOffset = entry.getContainerStartByteOffset();
slice.alignmentStart = entry.getAlignmentStart();
slice.alignmentSpan = entry.getAlignmentSpan();
slice.byteOffsetFromContainer = entry.getSliceByteOffset();
slice.byteOffsetFromCompressionHeaderStart = entry.getSliceByteOffsetFromCompressionHeaderStart();

// NOTE: the sliceIndex and read count fields can't be derived from the CRAM index
// so we can only set them to zero
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ public CramContainerHeaderIterator(final InputStream inputStream) {
@Override
protected Container containerFromStream(final CountingInputStream countingStream) {
final Container container = ContainerHeaderIO.readContainerHeader(getCramHeader().getVersion().major, countingStream);
InputStreamUtils.skipFully(countingStream, container.containerByteSize);
InputStreamUtils.skipFully(countingStream, container.containerBlocksByteSize);
return container;
}

Expand Down
12 changes: 6 additions & 6 deletions src/main/java/htsjdk/samtools/cram/build/CramIO.java
Original file line number Diff line number Diff line change
Expand Up @@ -273,16 +273,16 @@ private static long writeContainerForSamFileHeader(final int major, final SAMFil

final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
block.write(major, byteArrayOutputStream);
container.containerByteSize = byteArrayOutputStream.size();
container.containerBlocksByteSize = byteArrayOutputStream.size();

final int containerHeaderByteSize = ContainerHeaderIO.writeContainerHeader(major, container, os);
try {
os.write(byteArrayOutputStream.toByteArray(), 0, byteArrayOutputStream.size());
os.write(byteArrayOutputStream.toByteArray(), 0, container.containerBlocksByteSize);
} catch (final IOException e) {
throw new RuntimeIOException(e);
}

return containerHeaderByteSize + byteArrayOutputStream.size();
return containerHeaderByteSize + container.containerBlocksByteSize;
}

private static SAMFileHeader readSAMFileHeader(final Version version,
Expand All @@ -293,13 +293,13 @@ private static SAMFileHeader readSAMFileHeader(final Version version,
final Block block;
{
if (version.compatibleWith(CramVersions.CRAM_v3)) {
final byte[] bytes = new byte[container.containerByteSize];
final byte[] bytes = new byte[container.containerBlocksByteSize];
InputStreamUtils.readFully(inputStream, bytes, 0, bytes.length);
block = Block.read(version.major, new ByteArrayInputStream(bytes));
// ignore the rest of the container
} else {
/*
* pending issue: container.containerByteSize inputStream 2 bytes shorter
* pending issue: container.containerBlocksByteSize inputStream 2 bytes shorter
* than needed in the v21 test cram files.
*/
block = Block.read(version.major, inputStream);
Expand Down Expand Up @@ -355,7 +355,7 @@ public static boolean replaceCramHeader(final File file, final CramHeader newHea
final Block block = Block.createRawFileHeaderBlock(toByteArray(newHeader.getSamFileHeader()));
final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
block.write(newHeader.getVersion().major, byteArrayOutputStream);
if (byteArrayOutputStream.size() > c.containerByteSize) {
if (byteArrayOutputStream.size() > c.containerBlocksByteSize) {
log.error("Failed to replace CRAM header because the new header does not fit.");
return false;
}
Expand Down
56 changes: 32 additions & 24 deletions src/main/java/htsjdk/samtools/cram/structure/Container.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,13 @@ public class Container {
// container header as defined in the specs, in addition to sequenceId from ReferenceContext

/**
* Byte size of the content excluding header.
* The total length of all blocks in this container, of all types.
*
* Equal to the total length of this container, minus the Container Header.
*
* @see htsjdk.samtools.cram.structure.block.BlockContentType
*/
public int containerByteSize = 0;
public int containerBlocksByteSize = 0;

// minimum alignment start of the reads in this Container
// uses a 1-based coordinate system
Expand All @@ -46,13 +50,23 @@ public class Container {
public long bases = 0;
public int blockCount = -1;

// Slice byte boundaries within this container, after the header. Equal to Slice.offset.
// e.g. if landmarks[0] = 9000 and landmarks[1] = 109000, we know:
// the container's header size = 9000
// Slice[0].offset = 9000
// Slice[0].size = 109000 - 9000 = 100000
// Slice[1].offset = 109000

/**
* Slice byte boundaries as offsets within this container,
* counted after the container header. The start of the compression header
* has offset 0.
*
* Equal to {@link Slice#byteOffsetFromCompressionHeaderStart}.
*
* As an example, suppose we have:
* - landmarks[0] = 9000
* - landmarks[1] = 109000
* - containerBlocksByteSize = 123456
*
* We therefore know:
* - the compression header size = 9000
* - Slice 0 has offset 9000 and size 100000 (109000 - 9000)
* - Slice 1 has offset 109000 and size 14456 (123456 - 109000)
*/
public int[] landmarks;

public int checksum = 0;
Expand Down Expand Up @@ -170,7 +184,7 @@ else if (sliceRefContexts.size() > 1) {
/**
* Populate the indexing parameters of this Container's slices
*
* Requires: valid landmarks and containerByteSize
* Requires: valid landmarks and containerBlocksByteSize
*
* @throws CRAMException when the Container is in an invalid state
*/
Expand All @@ -188,28 +202,22 @@ public void distributeIndexingParametersToSlices() {
throw new CRAMException(String.format(format, landmarks.length, slices.length));
}

if (containerByteSize == 0) {
throw new CRAMException("Cannot set Slice indexing parameters if this Container's byte size is unknown");
if (containerBlocksByteSize == 0) {
throw new CRAMException("Cannot set Slice indexing parameters if the byte size of this Container's blocks is unknown");
}

final int lastSliceIndex = slices.length - 1;
for (int i = 0; i < lastSliceIndex; i++) {
final Slice slice = slices[i];
slice.index = i;
slice.byteOffsetFromContainer = landmarks[i];
slice.byteSize = landmarks[i + 1] - slice.byteOffsetFromContainer;
slice.byteOffsetFromCompressionHeaderStart = landmarks[i];
slice.byteSize = landmarks[i + 1] - slice.byteOffsetFromCompressionHeaderStart;
}

final Slice lastSlice = slices[lastSliceIndex];
lastSlice.index = lastSliceIndex;
lastSlice.byteOffsetFromContainer = landmarks[lastSliceIndex];

// calculate a "final landmark" indicating the byte offset of the end of the container
// equivalent to the container's total byte size

final int containerHeaderSize = landmarks[0];
final int containerTotalByteSize = containerHeaderSize + containerByteSize;
lastSlice.byteSize = containerTotalByteSize - lastSlice.byteOffsetFromContainer;
lastSlice.byteOffsetFromCompressionHeaderStart = landmarks[lastSliceIndex];
lastSlice.byteSize = containerBlocksByteSize - lastSlice.byteOffsetFromCompressionHeaderStart;
}

/**
Expand Down Expand Up @@ -237,11 +245,11 @@ public String toString() {
}

public boolean isEOF() {
final boolean v3 = containerByteSize == 15 && referenceContext.isUnmappedUnplaced()
final boolean v3 = containerBlocksByteSize == 15 && referenceContext.isUnmappedUnplaced()
&& alignmentStart == 4542278 && blockCount == 1
&& nofRecords == 0 && (getSlices() == null || getSlices().length == 0);

final boolean v2 = containerByteSize == 11 && referenceContext.isUnmappedUnplaced()
final boolean v2 = containerBlocksByteSize == 11 && referenceContext.isUnmappedUnplaced()
&& alignmentStart == 4542278 && blockCount == 1
&& nofRecords == 0 && (getSlices() == null || getSlices().length == 0);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ public static Container readContainerHeader(final int major,
final int containerByteSize = CramInt.readInt32(peek);
final ReferenceContext refContext = new ReferenceContext(ITF8.readUnsignedITF8(inputStream));
final Container container = new Container(refContext);
container.containerByteSize = containerByteSize;
container.containerBlocksByteSize = containerByteSize;

container.alignmentStart = ITF8.readUnsignedITF8(inputStream);
container.alignmentSpan = ITF8.readUnsignedITF8(inputStream);
Expand Down Expand Up @@ -123,7 +123,7 @@ public static Container readContainerHeader(final int major, final CountingInput
public static int writeContainerHeader(final int major, final Container container, final OutputStream outputStream) {
final CRC32OutputStream crc32OutputStream = new CRC32OutputStream(outputStream);

int length = (CramInt.writeInt32(container.containerByteSize, crc32OutputStream) + 7) / 8;
int length = (CramInt.writeInt32(container.containerBlocksByteSize, crc32OutputStream) + 7) / 8;
length += (ITF8.writeUnsignedITF8(container.getReferenceContext().getSerializableId(), crc32OutputStream) + 7) / 8;
length += (ITF8.writeUnsignedITF8(container.alignmentStart, crc32OutputStream) + 7) / 8;
length += (ITF8.writeUnsignedITF8(container.alignmentSpan, crc32OutputStream) + 7) / 8;
Expand Down
Loading

0 comments on commit 5442f78

Please sign in to comment.