Skip to content

Commit

Permalink
Change CRAM validation error reporting granularity from container to …
Browse files Browse the repository at this point in the history
…record (#1091)

Defer validation checking until records are retrieved, rather than validating the entire container.
  • Loading branch information
Anton Mazur authored and cmnbroad committed Dec 10, 2018
1 parent 1126e5c commit 6ac7a60
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 18 deletions.
40 changes: 22 additions & 18 deletions src/main/java/htsjdk/samtools/CRAMIterator.java
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ public void setValidationStringency(
this.validationStringency = validationStringency;
}

/**
* `samRecordIndex` only used when validation is not `SILENT`
* (for identification by the validator which records are invalid)
*/
private long samRecordIndex;
private ArrayList<CramCompressionRecord> cramRecords;

Expand All @@ -84,7 +88,7 @@ public CRAMIterator(final InputStream inputStream, final CRAMReferenceSource ref
this.containerIterator = containerIterator;

firstContainerOffset = this.countingInputStream.getCount();
records = new ArrayList<SAMRecord>(10000);
records = new ArrayList<SAMRecord>(CRAMContainerStreamWriter.DEFAULT_RECORDS_PER_SLICE);
normalizer = new CramNormalizer(cramHeader.getSamFileHeader(),
referenceSource);
parser = new ContainerParser(cramHeader.getSamFileHeader());
Expand All @@ -103,7 +107,7 @@ public CRAMIterator(final SeekableStream seekableStream, final CRAMReferenceSour
this.containerIterator = containerIterator;

firstContainerOffset = containerIterator.getFirstContainerOffset();
records = new ArrayList<SAMRecord>(10000);
records = new ArrayList<SAMRecord>(CRAMContainerStreamWriter.DEFAULT_RECORDS_PER_SLICE);
normalizer = new CramNormalizer(cramHeader.getSamFileHeader(),
referenceSource);
parser = new ContainerParser(cramHeader.getSamFileHeader());
Expand Down Expand Up @@ -143,10 +147,7 @@ void nextContainer() throws IOException, IllegalArgumentException,
}
}

if (records == null)
records = new ArrayList<SAMRecord>(container.nofRecords);
else
records.clear();
records.clear();
if (cramRecords == null)
cramRecords = new ArrayList<CramCompressionRecord>(container.nofRecords);
else
Expand All @@ -172,15 +173,17 @@ void nextContainer() throws IOException, IllegalArgumentException,

for (int i = 0; i < container.slices.length; i++) {
final Slice slice = container.slices[i];

if (slice.sequenceId < 0)
continue;

if (!slice.validateRefMD5(refs)) {
final String msg = String.format(
"Reference sequence MD5 mismatch for slice: sequence id %d, start %d, span %d, expected MD5 %s",
slice.sequenceId,
slice.alignmentStart,
slice.alignmentSpan,
String.format("%032x", new BigInteger(1, slice.refMD5)));
slice.sequenceId,
slice.alignmentStart,
slice.alignmentSpan,
String.format("%032x", new BigInteger(1, slice.refMD5)));
throw new CRAMException(msg);
}
}
Expand All @@ -201,12 +204,6 @@ void nextContainer() throws IOException, IllegalArgumentException,

samRecord.setValidationStringency(validationStringency);

if (validationStringency != ValidationStringency.SILENT) {
final List<SAMValidationError> validationErrors = samRecord.isValid();
SAMUtils.processValidationErrors(validationErrors,
samRecordIndex, validationStringency);
}

if (mReader != null) {
final long chunkStart = (container.offset << 16) | cramRecord.sliceIndex;
final long chunkEnd = ((container.offset << 16) | cramRecord.sliceIndex) + 1;
Expand All @@ -215,7 +212,6 @@ void nextContainer() throws IOException, IllegalArgumentException,
}

records.add(samRecord);
samRecordIndex++;
}
cramRecords.clear();
iterator = records.iterator();
Expand Down Expand Up @@ -267,7 +263,15 @@ public boolean hasNext() {
@Override
public SAMRecord next() {
if (hasNext()) {
return iterator.next();

SAMRecord samRecord = iterator.next();

if (validationStringency != ValidationStringency.SILENT) {
SAMUtils.processValidationErrors(samRecord.isValid(), samRecordIndex++, validationStringency);
}

return samRecord;

} else {
throw new NoSuchElementException();
}
Expand Down
45 changes: 45 additions & 0 deletions src/test/java/htsjdk/samtools/CRAMIteratorTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package htsjdk.samtools;

import htsjdk.HtsjdkTest;
import htsjdk.samtools.cram.ref.ReferenceSource;
import htsjdk.samtools.seekablestream.SeekableStream;
import org.testng.Assert;
import org.testng.annotations.Test;

import java.io.File;


/**
* This test serves for verifying CRAMIterator records validation using strict validation strategy
*
* @author Anton_Mazur@epam.com, EPAM Systems, Inc.
**/

public class CRAMIteratorTest extends HtsjdkTest {

@Test(description = "This test checks that records validation is deferred until they are retrieved")
public void noValidationFailureOnContainerOpen() {
try (SAMRecordIterator cramIteratorOverInvalidRecords = getCramFileIterator(ValidationStringency.STRICT)) {
Assert.assertTrue(cramIteratorOverInvalidRecords.hasNext());
}
}

@Test(expectedExceptions = SAMException.class)
public void throwOnRecordValidationFailure() {
try (SAMRecordIterator cramIteratorOverInvalidRecords = getCramFileIterator(ValidationStringency.STRICT)) {
while (cramIteratorOverInvalidRecords.hasNext()) {
cramIteratorOverInvalidRecords.next();
}
}
}

private SAMRecordIterator getCramFileIterator(ValidationStringency valStringency) {
final File refFile = new File("src/test/resources/htsjdk/samtools/cram/ce.fa");
final File cramFile = new File("src/test/resources/htsjdk/samtools/cram/ce#containsInvalidRecords.3.0.cram");
final ReferenceSource source = new ReferenceSource(refFile);

final CRAMFileReader cramFileReader = new CRAMFileReader(cramFile, (SeekableStream) null, source);
cramFileReader.setValidationStringency(valStringency);
return cramFileReader.getIterator();
}
}
Binary file not shown.

0 comments on commit 6ac7a60

Please sign in to comment.