Skip to content

Commit

Permalink
Redux: use internal fast BAM writer
Browse files Browse the repository at this point in the history
  • Loading branch information
charlesshale committed Dec 21, 2024
1 parent f91a7bd commit 684dc0b
Show file tree
Hide file tree
Showing 2 changed files with 131 additions and 5 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
package com.hartwig.hmftools.common.bam;

import static com.hartwig.hmftools.common.bam.SamRecordUtils.SAM_LOGGER;

import java.io.File;
import java.io.OutputStream;
import java.io.StringWriter;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Iterator;

import htsjdk.samtools.BAMRecordCodec;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMFileWriter;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.SAMTextHeaderCodec;
import htsjdk.samtools.util.BinaryCodec;
import htsjdk.samtools.util.BlockCompressedOutputStream;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.ProgressLoggerInterface;
import htsjdk.samtools.util.zip.DeflaterFactory;

public class FastBamWriter implements SAMFileWriter
{
private final String mFilename;
private final SAMFileHeader mHeader;

private BinaryCodec mOutputBinaryCodec;
private BAMRecordCodec mBamRecordCodec;
private BlockCompressedOutputStream mBlockCompressedOutputStream;
private boolean mClosed;

// taken from HTS JDK
private static final int HTSJDK_COMPRESSION_LEVEL = 5;
private static final byte[] HTSJDK_BAM_MAGIC = "BAM\u0001".getBytes();

// unused:
private static final int BUFFER_SIZE = 131072;

public FastBamWriter(final SAMFileHeader header, final String filename)
{
mHeader = header;
mFilename = filename;

try
{
DeflaterFactory deflaterFactory = BlockCompressedOutputStream.getDefaultDeflaterFactory();
File outputFile = new File(filename);

OutputStream os = IOUtil.maybeBufferOutputStream(Files.newOutputStream(outputFile.toPath()), BUFFER_SIZE);
mBlockCompressedOutputStream = new BlockCompressedOutputStream(os, (Path) null, HTSJDK_COMPRESSION_LEVEL, deflaterFactory);
mOutputBinaryCodec = new BinaryCodec(mBlockCompressedOutputStream);
mOutputBinaryCodec.setOutputFileName(filename);

writeHeader();

mBamRecordCodec = new BAMRecordCodec(mHeader);
mBamRecordCodec.setOutputStream(mOutputBinaryCodec.getOutputStream(), mFilename);
}
catch(Exception e)
{
SAM_LOGGER.error("failed to open BAM({}) for writing: {}", filename, e.toString());

mOutputBinaryCodec = null;
mBamRecordCodec = null;
mBlockCompressedOutputStream = null;
}

mClosed = false;
}

public String filename() { return mFilename; }

@Override
public void addAlignment(SAMRecord record)
{
writeAlignment(record);
}

@Override
public SAMFileHeader getFileHeader() { return mHeader; }

@Override
public void setProgressLogger(final ProgressLoggerInterface loggerInterface) {}

@Override
public void close()
{
if(mClosed)
return;

mOutputBinaryCodec.close();
mClosed = true;
}

private void writeHeader()
{
StringWriter headerTextBuffer = new StringWriter();
(new SAMTextHeaderCodec()).encode(headerTextBuffer, mHeader);

mOutputBinaryCodec.writeBytes(HTSJDK_BAM_MAGIC);
mOutputBinaryCodec.writeString(headerTextBuffer.toString(), true, false);
mOutputBinaryCodec.writeInt(mHeader.getSequenceDictionary().size());

Iterator sequenceIter = mHeader.getSequenceDictionary().getSequences().iterator();

while(sequenceIter.hasNext())
{
SAMSequenceRecord sequenceRecord = (SAMSequenceRecord)sequenceIter.next();
mOutputBinaryCodec.writeString(sequenceRecord.getSequenceName(), true, true);
mOutputBinaryCodec.writeInt(sequenceRecord.getSequenceLength());
}
}

private void writeAlignment(final SAMRecord record)
{
mBamRecordCodec.encode(record);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import java.util.stream.Collectors;

import com.google.common.collect.Lists;
import com.hartwig.hmftools.common.bam.FastBamWriter;
import com.hartwig.hmftools.common.bamops.BamOperations;
import com.hartwig.hmftools.common.bamops.BamToolName;
import com.hartwig.hmftools.common.basequal.jitter.JitterAnalyser;
Expand Down Expand Up @@ -194,26 +195,31 @@ public SAMFileWriter initialiseSamFileWriter(final String filename, boolean isSo
{
SAMFileHeader fileHeader = buildCombinedHeader(mConfig.BamFiles, mConfig.RefGenomeFile);

// the sorted BAM writers make use of the pre-sorted mode, which still checks that each read is after the previous
// however since this check is redundant given that Redux sorts records itself, this presort checking is disabled after
// the first sorted BAM has been written. The first BAM needs to retain this presorted setting so the header is 'coordinate' sorted

/*
if(isSorted)
{
// the sorted BAM writers make use of the pre-sorted mode, which still checks that each read is after the previous
// however since this check is redundant given that Redux sorts records itself, this presort checking is disabled after
// the first sorted BAM has been written. The first BAM needs to retain this presorted setting so the header is 'coordinate' sorted
if(!mHasWrittenFirstSorted)
mHasWrittenFirstSorted = true;
else
isSorted = false;
}
*/

if(isSorted)
fileHeader.setSortOrder(SAMFileHeader.SortOrder.coordinate);
else
fileHeader.setSortOrder(SAMFileHeader.SortOrder.unsorted);

/*
boolean presorted = isSorted;

SAMFileWriter samFileWriter = new SAMFileWriterFactory().makeBAMWriter(fileHeader, presorted, new File(filename));
*/

SAMFileWriter samFileWriter = new FastBamWriter(fileHeader, filename);

return samFileWriter;
}
Expand Down

0 comments on commit 684dc0b

Please sign in to comment.