Skip to content

Commit

Permalink
CRAM queryAlignmentStart/queryMate fix. (#1164)
Browse files Browse the repository at this point in the history
  • Loading branch information
cmnbroad authored Jun 25, 2019
1 parent 765728e commit 3a35b89
Show file tree
Hide file tree
Showing 13 changed files with 1,684 additions and 93 deletions.
42 changes: 0 additions & 42 deletions src/main/java/htsjdk/samtools/BAMFileReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -1087,48 +1087,6 @@ SAMRecord advance() {
}
}

/**
* A decorating iterator that filters out records that do not match the given reference and start position.
*/
private class BAMStartingAtIteratorFilter implements BAMIteratorFilter {

private final int mReferenceIndex;
private final int mRegionStart;

public BAMStartingAtIteratorFilter(final int referenceIndex, final int start) {
mReferenceIndex = referenceIndex;
mRegionStart = start;
}

/**
*
* @return MATCHES_FILTER if this record matches the filter;
* CONTINUE_ITERATION if does not match filter but iteration should continue;
* STOP_ITERATION if does not match filter and iteration should end.
*/
@Override
public FilteringIteratorState compareToFilter(final SAMRecord record) {
// If beyond the end of this reference sequence, end iteration
final int referenceIndex = record.getReferenceIndex();
if (referenceIndex < 0 || referenceIndex > mReferenceIndex) {
return FilteringIteratorState.STOP_ITERATION;
} else if (referenceIndex < mReferenceIndex) {
// If before this reference sequence, continue
return FilteringIteratorState.CONTINUE_ITERATION;
}
final int alignmentStart = record.getAlignmentStart();
if (alignmentStart > mRegionStart) {
// If scanned beyond target region, end iteration
return FilteringIteratorState.STOP_ITERATION;
} else if (alignmentStart == mRegionStart) {
return FilteringIteratorState.MATCHES_FILTER;
} else {
return FilteringIteratorState.CONTINUE_ITERATION;
}
}

}

private class BAMFileIndexUnmappedIterator extends BAMFileIterator {
private BAMFileIndexUnmappedIterator() {
while (this.hasNext() && peek().getReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
Expand Down
66 changes: 66 additions & 0 deletions src/main/java/htsjdk/samtools/BAMStartingAtIteratorFilter.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
/*
* The MIT License
*
* Copyright (c) 2019 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/

package htsjdk.samtools;

/**
* A decorating iterator that filters out records that do not match the given reference and start position.
*/
public class BAMStartingAtIteratorFilter implements BAMIteratorFilter {

private final int mReferenceIndex;
private final int mRegionStart;

public BAMStartingAtIteratorFilter(final int referenceIndex, final int start) {
mReferenceIndex = referenceIndex;
mRegionStart = start;
}

/**
*
* @return MATCHES_FILTER if this record matches the filter;
* CONTINUE_ITERATION if does not match filter but iteration should continue;
* STOP_ITERATION if does not match filter and iteration should end.
*/
@Override
public FilteringIteratorState compareToFilter(final SAMRecord record) {
// If beyond the end of this reference sequence, end iteration
final int referenceIndex = record.getReferenceIndex();
if (referenceIndex < 0 || referenceIndex > mReferenceIndex) {
return FilteringIteratorState.STOP_ITERATION;
} else if (referenceIndex < mReferenceIndex) {
// If before this reference sequence, continue
return FilteringIteratorState.CONTINUE_ITERATION;
}
final int alignmentStart = record.getAlignmentStart();
if (alignmentStart > mRegionStart) {
// If scanned beyond target region, end iteration
return FilteringIteratorState.STOP_ITERATION;
} else if (alignmentStart == mRegionStart) {
return FilteringIteratorState.MATCHES_FILTER;
} else {
return FilteringIteratorState.CONTINUE_ITERATION;
}
}
}
65 changes: 53 additions & 12 deletions src/main/java/htsjdk/samtools/CRAMFileReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,9 @@ public CloseableIterator<SAMRecord> queryAlignmentStart(final String sequence,
final int start) {
final SAMFileHeader fileHeader = getFileHeader();
final int referenceIndex = fileHeader.getSequenceIndex(sequence);
return new CRAMIntervalIterator(new QueryInterval[]{new QueryInterval(referenceIndex, start, -1)}, true);
// alignment start requires a filtering iterator to ensure that records in the
// same container that start AFTER the requested start are filtered out
return new CRAMAlignmentStartIterator(referenceIndex, start);
}

@Override
Expand Down Expand Up @@ -489,21 +491,32 @@ private static long[] coordinatesFromQueryIntervals(BAMIndex index, QueryInterva
return BAMFileSpan.merge(spanArray).toCoordinateArray();
}

private class CRAMIntervalIterator extends BAMQueryMultipleIntervalsIteratorFilter
/**
* This class is intended to be a base class for various CRAM filtering iterators. Subclasses must
* ensure that {@link CRAMIntervalIteratorBase#initializeIterator} is called once after the subclass'
* construction is complete, preferably at the end of the subclass' constructor, but before any
* attempt is made to use the iterator.
*/
private abstract class CRAMIntervalIteratorBase extends BAMQueryMultipleIntervalsIteratorFilter
implements CloseableIterator<SAMRecord> {

// the granularity of this iterator is the container, so the records returned
// by it must still be filtered to find those matching the filter criteria
private CRAMIterator unfilteredIterator;
SAMRecord nextRec = null;
private SAMRecord nextRec = null;

public CRAMIntervalIterator(final QueryInterval[] queries, final boolean contained) {
this(queries, contained, coordinatesFromQueryIntervals(getIndex(), queries));
}

public CRAMIntervalIterator(final QueryInterval[] queries, final boolean contained, final long[] coordinates) {
public CRAMIntervalIteratorBase(final QueryInterval[] queries, final boolean contained) {
super(queries, contained);
}

/**
* Subclasses must call this method in their constructors AFTER construction of this class is complete.
* It can't be called directly by this class's constructor because it calls getRecord(), which may be
* overridden in subclasses, and can depend on state established by the subclass' constructor (specifically,
* it may need to establish a filter comparator).
* @param coordinates array or coordinates as produced by {@link BAMFileSpan#toCoordinateArray}
*/
protected void initializeIterator(final long[] coordinates) {
if (coordinates != null && coordinates.length != 0) {

unfilteredIterator = new CRAMIterator(
Expand Down Expand Up @@ -537,19 +550,19 @@ public SAMRecord next() {
return getNextRecord();
}

private SAMRecord getNextRecord() {
protected SAMRecord getNextRecord() {
final SAMRecord result = nextRec;
nextRec = null;
while(nextRec == null && unfilteredIterator.hasNext()) {
while (nextRec == null && unfilteredIterator.hasNext()) {
SAMRecord nextRecord = unfilteredIterator.next();
switch(compareToFilter(nextRecord)) {
switch (compareToFilter(nextRecord)) {
case MATCHES_FILTER:
nextRec = nextRecord;
break;
case CONTINUE_ITERATION:
continue;
case STOP_ITERATION:
break;
return result;
default:
throw new SAMException("Unexpected return from compareToFilter");
}
Expand All @@ -562,4 +575,32 @@ public void remove() {
throw new RuntimeException("Method \"remove\" not implemented for CRAMIntervalIterator.");
}
}

// An iterator for querying reads that match a set of query intervals
private class CRAMIntervalIterator extends CRAMIntervalIteratorBase {
public CRAMIntervalIterator(final QueryInterval[] queries, final boolean contained) {
this(queries, contained, coordinatesFromQueryIntervals(getIndex(), queries));
}

public CRAMIntervalIterator(final QueryInterval[] queries, final boolean contained, final long[] coordinates) {
super(queries, contained);
initializeIterator(coordinates);
}
}

// An iterator for querying reads that match a given alignment start
private class CRAMAlignmentStartIterator extends CRAMIntervalIteratorBase {
final BAMStartingAtIteratorFilter startingAtIteratorFilter;

public CRAMAlignmentStartIterator(final int referenceIndex, final int start) {
super(new QueryInterval[]{new QueryInterval(referenceIndex, start, -1)}, true);
startingAtIteratorFilter = new BAMStartingAtIteratorFilter(referenceIndex, start);
initializeIterator(coordinatesFromQueryIntervals(getIndex(), intervals));
}

@Override
public FilteringIteratorState compareToFilter(final SAMRecord record) {
return startingAtIteratorFilter.compareToFilter(record);
}
}
}
33 changes: 18 additions & 15 deletions src/test/java/htsjdk/samtools/CRAMFileBAIIndexTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,49 +46,52 @@ public class CRAMFileBAIIndexTest extends HtsjdkTest {
private int nofReads = 10000 ;
private int nofReadsPerContainer = 1000 ;

private final static String TEST_QUERY_ALIGNMENT_CONTIG = "chrM";
private final static int TEST_QUERY_ALIGNMENT_START = 1519;


// Mixes testing queryAlignmentStart with each CRAMFileReaderConstructor
// Separate into individual tests
@Test
public void testConstructors () throws IOException {
CRAMFileReader reader = new CRAMFileReader(cramFile, indexFile, source, ValidationStringency.SILENT);
CloseableIterator<SAMRecord> iterator = reader.queryAlignmentStart("chrM", 1500);
CloseableIterator<SAMRecord> iterator = reader.queryAlignmentStart(TEST_QUERY_ALIGNMENT_CONTIG, TEST_QUERY_ALIGNMENT_START);
Assert.assertTrue(iterator.hasNext());
SAMRecord record = iterator.next();

Assert.assertEquals(record.getReferenceName(), "chrM");
Assert.assertTrue(record.getAlignmentStart() >= 1500);
Assert.assertEquals(record.getReferenceName(), TEST_QUERY_ALIGNMENT_CONTIG);
Assert.assertEquals(record.getAlignmentStart(), TEST_QUERY_ALIGNMENT_START);
reader.close();

reader = new CRAMFileReader(new SeekableFileStream(cramFile), indexFile, source, ValidationStringency.SILENT);
iterator = reader.queryAlignmentStart("chrM", 1500);
iterator = reader.queryAlignmentStart(TEST_QUERY_ALIGNMENT_CONTIG, TEST_QUERY_ALIGNMENT_START);
Assert.assertTrue(iterator.hasNext());
record = iterator.next();

Assert.assertEquals(record.getReferenceName(), "chrM");
Assert.assertTrue(record.getAlignmentStart() >= 1500);
Assert.assertEquals(record.getReferenceName(), TEST_QUERY_ALIGNMENT_CONTIG);
Assert.assertEquals(record.getAlignmentStart(), TEST_QUERY_ALIGNMENT_START);
reader.close();

reader = new CRAMFileReader(new SeekableFileStream(cramFile), new SeekableFileStream(indexFile), source, ValidationStringency.SILENT);
iterator = reader.queryAlignmentStart("chrM", 1500);
iterator = reader.queryAlignmentStart(TEST_QUERY_ALIGNMENT_CONTIG, TEST_QUERY_ALIGNMENT_START);
Assert.assertTrue(iterator.hasNext());
record = iterator.next();

Assert.assertEquals(record.getReferenceName(), "chrM");
Assert.assertTrue(record.getAlignmentStart() >= 1500);
Assert.assertEquals(record.getReferenceName(), TEST_QUERY_ALIGNMENT_CONTIG);
Assert.assertEquals(record.getAlignmentStart(), TEST_QUERY_ALIGNMENT_START);
reader.close();

reader = new CRAMFileReader(new SeekableFileStream(cramFile), (File)null, source, ValidationStringency.SILENT);
try {
reader.queryAlignmentStart("chrM", 1500);
reader.queryAlignmentStart(TEST_QUERY_ALIGNMENT_CONTIG, TEST_QUERY_ALIGNMENT_START);
Assert.fail("Expecting query to fail when there is no index");
} catch (SAMException e) {
}
reader.close();

reader = new CRAMFileReader(new SeekableFileStream(cramFile), (SeekableFileStream)null, source, ValidationStringency.SILENT);
try {
reader.queryAlignmentStart("chrM", 1500);
reader.queryAlignmentStart(TEST_QUERY_ALIGNMENT_CONTIG, TEST_QUERY_ALIGNMENT_START);
Assert.fail("Expecting query to fail when there is no index");
} catch (SAMException e) {
}
Expand All @@ -100,12 +103,12 @@ record = iterator.next();
public void test_chrM_1500_location() throws IOException {
CRAMFileReader reader = new CRAMFileReader(cramFile, indexFile, source);
reader.setValidationStringency(ValidationStringency.SILENT);
CloseableIterator<SAMRecord> iterator = reader.queryAlignmentStart("chrM", 1500);
CloseableIterator<SAMRecord> iterator = reader.queryAlignmentStart(TEST_QUERY_ALIGNMENT_CONTIG, TEST_QUERY_ALIGNMENT_START);
Assert.assertTrue(iterator.hasNext());
SAMRecord record = iterator.next();

Assert.assertEquals(record.getReferenceName(), "chrM");
Assert.assertTrue(record.getAlignmentStart() >= 1500);
Assert.assertEquals(record.getReferenceName(), TEST_QUERY_ALIGNMENT_CONTIG);
Assert.assertEquals(record.getAlignmentStart(), TEST_QUERY_ALIGNMENT_START);
}

@Test
Expand Down Expand Up @@ -214,7 +217,7 @@ public void testIteratorFromFileSpan_SecondContainer() throws IOException {
@Test
public void testQueryInterval() throws IOException {
CRAMFileReader reader = new CRAMFileReader(new ByteArraySeekableStream(cramBytes), new ByteArraySeekableStream(baiBytes), source, ValidationStringency.SILENT);
QueryInterval[] query = new QueryInterval[]{new QueryInterval(0, 1519, 1520), new QueryInterval(1, 470535, 470536)};
QueryInterval[] query = new QueryInterval[]{new QueryInterval(0, TEST_QUERY_ALIGNMENT_START, TEST_QUERY_ALIGNMENT_START+1), new QueryInterval(1, 470535, 470536)};
final CloseableIterator<SAMRecord> iterator = reader.query(query, false);
Assert.assertTrue(iterator.hasNext());
SAMRecord r1 = iterator.next();
Expand Down
16 changes: 8 additions & 8 deletions src/test/java/htsjdk/samtools/CRAMFileCRAIIndexTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,12 @@ public void testFileFileConstructor () throws IOException {
tmpCraiFile,
source,
ValidationStringency.STRICT);
CloseableIterator<SAMRecord> iterator = reader.queryAlignmentStart("chrM", 1500);
CloseableIterator<SAMRecord> iterator = reader.queryAlignmentStart("chrM", 1519);

Assert.assertTrue(iterator.hasNext());
SAMRecord record = iterator.next();
Assert.assertEquals(record.getReferenceName(), "chrM");
Assert.assertTrue(record.getAlignmentStart() >= 1500);
Assert.assertEquals(record.getAlignmentStart(), 1519);
reader.close();
}

Expand All @@ -69,12 +69,12 @@ public void testStreamFileConstructor () throws IOException {
tmpCraiFile,
source,
ValidationStringency.STRICT);
CloseableIterator<SAMRecord> iterator = reader.queryAlignmentStart("chrM", 1500);
CloseableIterator<SAMRecord> iterator = reader.queryAlignmentStart("chrM", 1519);
Assert.assertTrue(iterator.hasNext());
SAMRecord record = iterator.next();

Assert.assertEquals(record.getReferenceName(), "chrM");
Assert.assertTrue(record.getAlignmentStart() >= 1500);
Assert.assertEquals(record.getAlignmentStart(), 1519);
reader.close();
}

Expand All @@ -85,12 +85,12 @@ public void testStreamStreamConstructor() throws IOException {
new SeekableFileStream(tmpCraiFile),
source,
ValidationStringency.STRICT);
CloseableIterator<SAMRecord> iterator = reader.queryAlignmentStart("chrM", 1500);
CloseableIterator<SAMRecord> iterator = reader.queryAlignmentStart("chrM", 1519);
Assert.assertTrue(iterator.hasNext());
SAMRecord record = iterator.next();

Assert.assertEquals(record.getReferenceName(), "chrM");
Assert.assertTrue(record.getAlignmentStart() >= 1500);
Assert.assertEquals(record.getAlignmentStart(), 1519);
reader.close();
}

Expand All @@ -102,7 +102,7 @@ public void testFileFileConstructorNoIndex () throws IOException {
source,
ValidationStringency.STRICT);
try {
reader.queryAlignmentStart("chrM", 1500);
reader.queryAlignmentStart("chrM", 1519);
}
finally {
reader.close();
Expand All @@ -117,7 +117,7 @@ public void testStreamStreamConstructorNoIndex () throws IOException {
source,
ValidationStringency.STRICT);
try {
reader.queryAlignmentStart("chrM", 1500);
reader.queryAlignmentStart("chrM", 1519);
}
finally {
reader.close();
Expand Down
Loading

0 comments on commit 3a35b89

Please sign in to comment.