Skip to content

Commit

Permalink
Make VCFHeader not throw exception if contig header lines lack length…
Browse files Browse the repository at this point in the history
… field (#1418)

* Make VCFHeader not throw exception if contig header lines lack length field
  • Loading branch information
cwhelan authored and lbergelson committed Oct 7, 2019
1 parent e357c42 commit 4d73aff
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 5 deletions.
17 changes: 15 additions & 2 deletions src/main/java/htsjdk/variant/vcf/VCFContigHeaderLine.java
Original file line number Diff line number Diff line change
Expand Up @@ -76,10 +76,23 @@ public Integer getContigIndex() {
return contigIndex;
}

/**
* Get the SAMSequenceRecord that corresponds to this VCF header line.
* If the VCF header line does not have a length tag, the SAMSequenceRecord returned will be set to have a length of
* SAMSequenceRecord.UNKNOWN_SEQUENCE_LENGTH. Records with unknown length will match any record with the same name
* when evaluated by SAMSequenceRecord.isSameSequence.
* @return The SAMSequenceRecord containing the ID, length, assembly, and index of this contig. Returns null if the
* contig header line does not have a length.
*/
public SAMSequenceRecord getSAMSequenceRecord() {
final String lengthString = this.getGenericFieldValue("length");
if (lengthString == null) throw new TribbleException("Contig " + this.getID() + " does not have a length field.");
final SAMSequenceRecord record = new SAMSequenceRecord(this.getID(), Integer.parseInt(lengthString));
final int length;
if (lengthString == null) {
length = SAMSequenceRecord.UNKNOWN_SEQUENCE_LENGTH;
} else {
length = Integer.parseInt(lengthString);
}
final SAMSequenceRecord record = new SAMSequenceRecord(this.getID(), length);
record.setAssembly(this.getGenericFieldValue("assembly"));
record.setSequenceIndex(this.contigIndex);
return record;
Expand Down
8 changes: 5 additions & 3 deletions src/main/java/htsjdk/variant/vcf/VCFHeader.java
Original file line number Diff line number Diff line change
Expand Up @@ -261,16 +261,18 @@ public List<VCFContigHeaderLine> getContigLines() {

/**
* Returns the contigs in this VCF file as a SAMSequenceDictionary. Returns null if contigs lines are
* not present in the header. Throws SAMException if one or more contig lines do not have length
* information.
* not present in the header. If contig lines are missing length tags, they will be created with
* length set to SAMSequenceRecord.UNKNOWN_SEQUENCE_LENGTH. Records with unknown length will match any record with
* the same name when evaluated by SAMSequenceRecord.isSameSequence.
*/
public SAMSequenceDictionary getSequenceDictionary() {
final List<VCFContigHeaderLine> contigHeaderLines = this.getContigLines();
if (contigHeaderLines.isEmpty()) return null;

final List<SAMSequenceRecord> sequenceRecords = new ArrayList<SAMSequenceRecord>(contigHeaderLines.size());
for (final VCFContigHeaderLine contigHeaderLine : contigHeaderLines) {
sequenceRecords.add(contigHeaderLine.getSAMSequenceRecord());
final SAMSequenceRecord samSequenceRecord = contigHeaderLine.getSAMSequenceRecord();
sequenceRecords.add(samSequenceRecord);
}

return new SAMSequenceDictionary(sequenceRecords);
Expand Down
17 changes: 17 additions & 0 deletions src/test/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@

package htsjdk.variant.vcf;

import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.FileExtensions;
import htsjdk.samtools.util.IOUtil;
Expand Down Expand Up @@ -239,6 +241,21 @@ public void testVCFHeaderAddContigLine() {
}

@Test
public void testVCFHeaderContigLineMissingLength() {
final VCFHeader header = getHiSeqVCFHeader();
final VCFContigHeaderLine contigLine = new VCFContigHeaderLine(
"<ID=chr1>", VCFHeaderVersion.VCF4_0, VCFHeader.CONTIG_KEY, 0);
header.addMetaDataLine(contigLine);
Assert.assertTrue(header.getContigLines().contains(contigLine), "Test contig line not found in contig header lines");
Assert.assertTrue(header.getMetaDataInInputOrder().contains(contigLine), "Test contig line not found in set of all header lines");

final SAMSequenceDictionary sequenceDictionary = header.getSequenceDictionary();
Assert.assertNotNull(sequenceDictionary);
Assert.assertEquals(sequenceDictionary.getSequence("chr1").getSequenceLength(), SAMSequenceRecord.UNKNOWN_SEQUENCE_LENGTH);

}

@Test
public void testVCFHeaderHonorContigLineOrder() throws IOException {
try (final VCFFileReader vcfReader = new VCFFileReader(new File(variantTestDataRoot + "dbsnp_135.b37.1000.vcf"), false)) {
// start with a header with a bunch of contig lines
Expand Down

0 comments on commit 4d73aff

Please sign in to comment.