Skip to content

Commit

Permalink
Consolidate common code into CRAMStructureTestUtil: (#1312)
Browse files Browse the repository at this point in the history
- ContainerTest, ContainerFactoryTest, ContainerParserTest, SliceTests, and CRAMBAIIndexerTest
  • Loading branch information
jmthibault79 authored Mar 2, 2019
1 parent b1cb410 commit 1509dcc
Show file tree
Hide file tree
Showing 6 changed files with 290 additions and 380 deletions.
93 changes: 37 additions & 56 deletions src/test/java/htsjdk/samtools/CRAMBAIIndexerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,84 +4,65 @@
import htsjdk.samtools.cram.build.ContainerFactory;
import htsjdk.samtools.cram.structure.Container;
import htsjdk.samtools.cram.structure.CramCompressionRecord;
import htsjdk.samtools.cram.structure.Slice;
import htsjdk.samtools.cram.structure.CRAMStructureTestUtil;
import htsjdk.samtools.seekablestream.SeekableMemoryStream;
import org.testng.Assert;
import org.testng.annotations.Test;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

/**
* Created by vadim on 12/01/2016.
*/
public class CRAMBAIIndexerTest extends HtsjdkTest {
@Test
public void test_processMultiContainer() throws IOException {
// 1 record with ref id 0
// 3 records with ref id 1
// 2 records with ref id 2

private static CramCompressionRecord createRecord(int recordIndex, int seqId, int start) {
byte[] bases = "AAAAA".getBytes();
int readLength = bases.length;

final CramCompressionRecord record = new CramCompressionRecord();
record.setSegmentUnmapped(false);
record.setMultiFragment(false);
record.sequenceId = seqId;
record.alignmentStart = start;
record.readBases = record.qualityScores = bases;
record.readName = Integer.toString(recordIndex);
record.readLength = readLength;
record.readFeatures = Collections.emptyList();
final int expected0 = 1;
final int expected1 = 3;
final int expected2 = 2;

return record;
}
@Test
public void test_processMultiContainer() {
SAMFileHeader samFileHeader = new SAMFileHeader();
samFileHeader.addSequence(new SAMSequenceRecord("1", 10));
samFileHeader.addSequence(new SAMSequenceRecord("2", 10));
samFileHeader.addSequence(new SAMSequenceRecord("3", 10));
ByteArrayOutputStream indexBAOS = new ByteArrayOutputStream();
CRAMBAIIndexer indexer = new CRAMBAIIndexer(indexBAOS, samFileHeader);
int recordsPerContainer = 3;
ContainerFactory containerFactory = new ContainerFactory(samFileHeader, recordsPerContainer);
List<CramCompressionRecord> records = new ArrayList<>();
records.add(createRecord(0, 0, 1));
records.add(createRecord(1, 1, 2));
records.add(createRecord(2, 1, 3));
final List<CramCompressionRecord> records1 = new ArrayList<>();
records1.add(CRAMStructureTestUtil.createMappedRecord(0, 0, 1));
records1.add(CRAMStructureTestUtil.createMappedRecord(1, 1, 2));
records1.add(CRAMStructureTestUtil.createMappedRecord(2, 1, 3));

final Container container1 = containerFactory.buildContainer(records);
Assert.assertNotNull(container1);
Assert.assertEquals(container1.nofRecords, records.size());
Assert.assertTrue(container1.getReferenceContext().isMultiRef());
final List<CramCompressionRecord> records2 = new ArrayList<>();
records2.add(CRAMStructureTestUtil.createMappedRecord(3, 1, 3));
records2.add(CRAMStructureTestUtil.createMappedRecord(4, 2, 3));
records2.add(CRAMStructureTestUtil.createMappedRecord(5, 2, 4));

indexer.processContainer(container1, ValidationStringency.STRICT);
final SAMFileHeader samFileHeader = CRAMStructureTestUtil.getSAMFileHeaderForTests();

records.clear();
records.add(createRecord(3, 1, 3));
records.add(createRecord(4, 2, 3));
records.add(createRecord(5, 2, 4));
final Container container2 = containerFactory.buildContainer(records);
Assert.assertNotNull(container2);
Assert.assertEquals(container2.nofRecords, records.size());
Assert.assertTrue(container2.getReferenceContext().isMultiRef());
final int recordsPerContainer = 3;
final ContainerFactory containerFactory = new ContainerFactory(samFileHeader, recordsPerContainer);

indexer.processContainer(container2, ValidationStringency.STRICT);
final Container container1 = containerFactory.buildContainer(records1);
Assert.assertTrue(container1.getReferenceContext().isMultiRef());

indexer.finish();
final Container container2 = containerFactory.buildContainer(records2);
Assert.assertTrue(container2.getReferenceContext().isMultiRef());

BAMIndex index = new CachingBAMFileIndex(new SeekableMemoryStream(indexBAOS.toByteArray(), null), samFileHeader.getSequenceDictionary());
final BAMIndexMetaData metaData_0 = index.getMetaData(0);
Assert.assertNotNull(metaData_0);
Assert.assertEquals(metaData_0.getAlignedRecordCount(), 1);
byte[] indexBytes;
try (final ByteArrayOutputStream indexBAOS = new ByteArrayOutputStream()) {
final CRAMBAIIndexer indexer = new CRAMBAIIndexer(indexBAOS, samFileHeader);
indexer.processContainer(container1, ValidationStringency.STRICT);
indexer.processContainer(container2, ValidationStringency.STRICT);
indexer.finish();
indexBytes = indexBAOS.toByteArray();
}

final BAMIndexMetaData metaData_1 = index.getMetaData(1);
Assert.assertNotNull(metaData_1);
Assert.assertEquals(metaData_1.getAlignedRecordCount(), 3);
final BAMIndex index = new CachingBAMFileIndex(new SeekableMemoryStream(indexBytes, null), samFileHeader.getSequenceDictionary());

final BAMIndexMetaData metaData_2 = index.getMetaData(2);
Assert.assertNotNull(metaData_2);
Assert.assertEquals(metaData_2.getAlignedRecordCount(), 2);
Assert.assertEquals(index.getMetaData(0).getAlignedRecordCount(), expected0);
Assert.assertEquals(index.getMetaData(1).getAlignedRecordCount(), expected1);
Assert.assertEquals(index.getMetaData(2).getAlignedRecordCount(), expected2);
}

}
90 changes: 31 additions & 59 deletions src/test/java/htsjdk/samtools/cram/build/ContainerFactoryTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,8 @@
import htsjdk.samtools.cram.ref.ReferenceContext;
import htsjdk.samtools.cram.structure.Container;
import htsjdk.samtools.cram.structure.CramCompressionRecord;
import htsjdk.samtools.cram.structure.CramCompressionRecordUtil;
import htsjdk.samtools.cram.structure.CRAMStructureTestUtil;
import htsjdk.samtools.cram.structure.Slice;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

Expand All @@ -17,51 +16,56 @@
*/
public class ContainerFactoryTest extends HtsjdkTest {
private static final int TEST_RECORD_COUNT = 10;
private static final int READ_LENGTH_FOR_TEST_RECORDS = CRAMStructureTestUtil.READ_LENGTH_FOR_TEST_RECORDS;

@DataProvider(name = "containerStateTests")
private Object[][] containerStateTests() {
final int mappedSequenceId = 0; // arbitrary
final ReferenceContext mappedRefContext = new ReferenceContext(mappedSequenceId);
final int mappedAlignmentStart = 1;
final int mappedAlignmentSpan = CramCompressionRecordUtil.READ_LENGTH_FOR_TEST_RECORDS + TEST_RECORD_COUNT - 1;
// record spans:
// [1 to READ_LENGTH_FOR_TEST_RECORDS]
// [2 to READ_LENGTH_FOR_TEST_RECORDS + 1]
// up to [TEST_RECORD_COUNT to READ_LENGTH_FOR_TEST_RECORDS + TEST_RECORD_COUNT - 1]
final int mappedAlignmentSpan = READ_LENGTH_FOR_TEST_RECORDS + TEST_RECORD_COUNT - 1;

return new Object[][]{
{
CramCompressionRecordUtil.getSingleRefRecords(TEST_RECORD_COUNT, mappedSequenceId),
CRAMStructureTestUtil.getSingleRefRecords(TEST_RECORD_COUNT, mappedSequenceId),
mappedRefContext, mappedAlignmentStart, mappedAlignmentSpan
},
{
CramCompressionRecordUtil.getMultiRefRecords(TEST_RECORD_COUNT),
CRAMStructureTestUtil.getMultiRefRecords(TEST_RECORD_COUNT),
ReferenceContext.MULTIPLE_REFERENCE_CONTEXT, Slice.NO_ALIGNMENT_START, Slice.NO_ALIGNMENT_SPAN
},
{
CramCompressionRecordUtil.getUnmappedRecords(TEST_RECORD_COUNT),
CRAMStructureTestUtil.getUnmappedRecords(TEST_RECORD_COUNT),
ReferenceContext.UNMAPPED_UNPLACED_CONTEXT, Slice.NO_ALIGNMENT_START, Slice.NO_ALIGNMENT_SPAN
},

// these two sets of records are "half" unplaced: they have either a valid reference index or start position,
// but not both. We treat these weird edge cases as unplaced.

{
CramCompressionRecordUtil.getHalfUnmappedNoRefRecords(TEST_RECORD_COUNT),
CRAMStructureTestUtil.getHalfUnmappedNoRefRecords(TEST_RECORD_COUNT),
ReferenceContext.UNMAPPED_UNPLACED_CONTEXT, Slice.NO_ALIGNMENT_START, Slice.NO_ALIGNMENT_SPAN
},
{
CramCompressionRecordUtil.getHalfUnmappedNoStartRecords(TEST_RECORD_COUNT, mappedSequenceId),
CRAMStructureTestUtil.getHalfUnmappedNoStartRecords(TEST_RECORD_COUNT, mappedSequenceId),
ReferenceContext.UNMAPPED_UNPLACED_CONTEXT, Slice.NO_ALIGNMENT_START, Slice.NO_ALIGNMENT_SPAN
},

// show that unmapped-unplaced reads cause a single ref slice/container to become multiref

{
CramCompressionRecordUtil.getSingleRefRecordsWithOneUnmapped(TEST_RECORD_COUNT, mappedSequenceId),
CRAMStructureTestUtil.getSingleRefRecordsWithOneUnmapped(TEST_RECORD_COUNT, mappedSequenceId),
ReferenceContext.MULTIPLE_REFERENCE_CONTEXT, Slice.NO_ALIGNMENT_START, Slice.NO_ALIGNMENT_SPAN
},

// show that unmapped-unplaced reads don't change the state of a multi-ref slice/container

{
CramCompressionRecordUtil.getMultiRefRecordsWithOneUnmapped(TEST_RECORD_COUNT),
CRAMStructureTestUtil.getMultiRefRecordsWithOneUnmapped(TEST_RECORD_COUNT),
ReferenceContext.MULTIPLE_REFERENCE_CONTEXT, Slice.NO_ALIGNMENT_START, Slice.NO_ALIGNMENT_SPAN
},
};
Expand All @@ -73,77 +77,45 @@ public void testContainerState(final List<CramCompressionRecord> records,
final int expectedAlignmentStart,
final int expectedAlignmentSpan) {
final Container container = buildFromNewFactory(records);
assertContainerState(container, expectedReferenceContext, expectedAlignmentStart, expectedAlignmentSpan);
final int globalRecordCounter = 0; // first Container
final int baseCount = TEST_RECORD_COUNT * READ_LENGTH_FOR_TEST_RECORDS;

CRAMStructureTestUtil.assertContainerState(container, expectedReferenceContext,
expectedAlignmentStart, expectedAlignmentSpan,
TEST_RECORD_COUNT, baseCount, globalRecordCounter);
}

@Test
public void testMultiRefWithStateTransitions() {
final List<Container> containers = CramCompressionRecordUtil.getMultiRefContainersForStateTest();
final List<Container> containers = CRAMStructureTestUtil.getMultiRefContainersForStateTest();

// first container is single-ref

final ReferenceContext refContext = new ReferenceContext(0);
final int alignmentStart = 1;
final int alignmentSpan = CramCompressionRecordUtil.READ_LENGTH_FOR_TEST_RECORDS;
final int alignmentSpan = READ_LENGTH_FOR_TEST_RECORDS;
int recordCount = 1;
int globalRecordCount = 0; // first container - no records yet
assertContainerState(containers.get(0), refContext, alignmentStart, alignmentSpan,
globalRecordCount, recordCount, CramCompressionRecordUtil.READ_LENGTH_FOR_TEST_RECORDS * recordCount);
CRAMStructureTestUtil.assertContainerState(containers.get(0), refContext, alignmentStart, alignmentSpan,
recordCount, READ_LENGTH_FOR_TEST_RECORDS * recordCount, globalRecordCount);

// when other refs are added, subsequent containers are multiref

recordCount++; // this container has 2 records
globalRecordCount = containers.get(0).nofRecords; // we've seen 1 record before this container
assertContainerState(containers.get(1), ReferenceContext.MULTIPLE_REFERENCE_CONTEXT, Slice.NO_ALIGNMENT_START, Slice.NO_ALIGNMENT_SPAN,
globalRecordCount, recordCount, CramCompressionRecordUtil.READ_LENGTH_FOR_TEST_RECORDS * recordCount);
CRAMStructureTestUtil.assertContainerState(containers.get(1), ReferenceContext.MULTIPLE_REFERENCE_CONTEXT,
Slice.NO_ALIGNMENT_START, Slice.NO_ALIGNMENT_SPAN,
recordCount, READ_LENGTH_FOR_TEST_RECORDS * recordCount, globalRecordCount);

recordCount++; // this container has 3 records
globalRecordCount = containers.get(0).nofRecords + containers.get(1).nofRecords; // we've seen 3 records before this container
assertContainerState(containers.get(2), ReferenceContext.MULTIPLE_REFERENCE_CONTEXT, Slice.NO_ALIGNMENT_START, Slice.NO_ALIGNMENT_SPAN,
globalRecordCount, recordCount, CramCompressionRecordUtil.READ_LENGTH_FOR_TEST_RECORDS * recordCount);
CRAMStructureTestUtil.assertContainerState(containers.get(2), ReferenceContext.MULTIPLE_REFERENCE_CONTEXT,
Slice.NO_ALIGNMENT_START, Slice.NO_ALIGNMENT_SPAN,
recordCount, READ_LENGTH_FOR_TEST_RECORDS * recordCount, globalRecordCount);
}

private Container buildFromNewFactory(final List<CramCompressionRecord> records) {
final ContainerFactory factory = new ContainerFactory(CramCompressionRecordUtil.getSAMFileHeaderForTests(), TEST_RECORD_COUNT);
final ContainerFactory factory = new ContainerFactory(CRAMStructureTestUtil.getSAMFileHeaderForTests(), TEST_RECORD_COUNT);
return factory.buildContainer(records);
}

private void assertContainerState(final Container container,
final ReferenceContext referenceContext,
final int alignmentStart,
final int alignmentSpan) {
final int globalRecordCounter = 0; // first Container
final int baseCount = TEST_RECORD_COUNT * CramCompressionRecordUtil.READ_LENGTH_FOR_TEST_RECORDS;

assertContainerState(container, referenceContext, alignmentStart, alignmentSpan, globalRecordCounter, TEST_RECORD_COUNT, baseCount);
}

private static void assertContainerState(final Container container,
final ReferenceContext referenceContext,
final int alignmentStart,
final int alignmentSpan,
final int globalRecordCounter,
final int recordCount,
final int baseCount) {
Assert.assertNotNull(container);
Assert.assertEquals(container.getReferenceContext(), referenceContext);
Assert.assertEquals(container.alignmentStart, alignmentStart);
Assert.assertEquals(container.alignmentSpan, alignmentSpan);
Assert.assertEquals(container.nofRecords, recordCount);
Assert.assertEquals(container.globalRecordCounter, globalRecordCounter);
Assert.assertEquals(container.bases, baseCount);

Assert.assertEquals(container.slices.length, 1);

// verify the underlying slice too

final Slice slice = container.slices[0];
Assert.assertEquals(slice.getReferenceContext(), container.getReferenceContext());
Assert.assertEquals(slice.globalRecordCounter, globalRecordCounter);
Assert.assertEquals(slice.alignmentStart, alignmentStart);
Assert.assertEquals(slice.alignmentSpan, alignmentSpan);
Assert.assertEquals(slice.nofRecords, recordCount);
Assert.assertEquals(slice.globalRecordCounter, globalRecordCounter);
Assert.assertEquals(slice.bases, baseCount);
}
}
Loading

0 comments on commit 1509dcc

Please sign in to comment.