From 0b16296654e4c67a2ae3b13785f6801a5941464a Mon Sep 17 00:00:00 2001 From: Chris Norman Date: Wed, 27 Feb 2019 10:01:27 -0500 Subject: [PATCH] Simplify CRAM sequence dictionary extractor to not require a fake reference. (#1308) --- .../java/htsjdk/samtools/CRAMIterator.java | 25 ------------------- .../utils/SAMSequenceDictionaryExtractor.java | 23 ++++++++++++----- 2 files changed, 17 insertions(+), 31 deletions(-) diff --git a/src/main/java/htsjdk/samtools/CRAMIterator.java b/src/main/java/htsjdk/samtools/CRAMIterator.java index 72b1082288..3e99e8e755 100644 --- a/src/main/java/htsjdk/samtools/CRAMIterator.java +++ b/src/main/java/htsjdk/samtools/CRAMIterator.java @@ -32,24 +32,13 @@ import java.io.InputStream; import java.math.BigInteger; -import java.nio.file.Files; -import java.nio.file.Path; import java.util.*; import htsjdk.samtools.cram.CRAMException; -import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.RuntimeIOException; public class CRAMIterator implements SAMRecordIterator { - /** A CRAMReferenceSource that is never invoked . It's used in {@link #extractDictionary(Path)}*/ - private static final CRAMReferenceSource NIL_CRAM_REFERENCE_SRC = new CRAMReferenceSource() { - @Override - public byte[] getReferenceBases(final SAMSequenceRecord sequenceRecord, boolean tryNameVariants) { - throw new IllegalStateException("CRAMReferenceSource.getReferenceBases shouldn't be called"); - } - }; - private final CountingInputStream countingInputStream; private final CramHeader cramHeader; private final ArrayList records; @@ -315,18 +304,4 @@ public SAMFileHeader getSAMFileHeader() { return cramHeader.getSamFileHeader(); } - /** extracts a {@link SAMSequenceDictionary} from a cram file. - * @return the dictionary of the cram file - * @throws SAMException if a dictionary cannot be extracted - */ - public static SAMSequenceDictionary extractDictionary(final Path cramPath) { - IOUtil.assertFileIsReadable(cramPath); - try (final InputStream in = Files.newInputStream(cramPath)) { - try(final CRAMIterator iter= new CRAMIterator(in, NIL_CRAM_REFERENCE_SRC, ValidationStringency.SILENT)) { - return iter.getSAMFileHeader().getSequenceDictionary(); - } - } catch (final Exception err) { - throw new SAMException("Cannot extract dictionary from "+cramPath, err); - } - } } diff --git a/src/main/java/htsjdk/variant/utils/SAMSequenceDictionaryExtractor.java b/src/main/java/htsjdk/variant/utils/SAMSequenceDictionaryExtractor.java index e236fc6303..46744efd7c 100644 --- a/src/main/java/htsjdk/variant/utils/SAMSequenceDictionaryExtractor.java +++ b/src/main/java/htsjdk/variant/utils/SAMSequenceDictionaryExtractor.java @@ -26,19 +26,20 @@ import htsjdk.samtools.*; import htsjdk.samtools.cram.build.CramIO; +import htsjdk.samtools.cram.structure.CramHeader; import htsjdk.samtools.reference.ReferenceSequenceFileFactory; -import htsjdk.samtools.util.BufferedLineReader; -import htsjdk.samtools.util.CollectionUtil; -import htsjdk.samtools.util.IOUtil; -import htsjdk.samtools.util.IntervalList; +import htsjdk.samtools.util.*; import htsjdk.tribble.util.ParsingUtils; import htsjdk.variant.vcf.VCFFileReader; import java.io.File; import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; import java.nio.file.Path; import java.util.Arrays; import java.util.Collection; +import java.util.Optional; /** * Small class for loading a SAMSequenceDictionary from a file @@ -74,8 +75,18 @@ SAMSequenceDictionary extractDictionary(final Path dictionary) { CRAM(CramIO.CRAM_FILE_EXTENSION) { @Override - SAMSequenceDictionary extractDictionary(final Path cram) { - return CRAMIterator.extractDictionary(cram); + SAMSequenceDictionary extractDictionary(final Path cramPath) { + IOUtil.assertFileIsReadable(cramPath); + try (final InputStream in = Files.newInputStream(cramPath)) { + final CramHeader cramHeader = CramIO.readCramHeader(in); + final Optional samHeader = Optional.ofNullable(cramHeader.getSamFileHeader()); + if (samHeader.isPresent()) { + return samHeader.get().getSequenceDictionary(); + } + } catch (IOException e) { + throw new RuntimeIOException(e); + } + throw new SAMException(String.format("Can't retrieve sequence dictionary from %s", cramPath)); } }, SAM(IOUtil.SAM_FILE_EXTENSION, BamFileIoUtils.BAM_FILE_EXTENSION) {