From dec84c3ead1e663852ea79f8c5a4dde40d6b7c20 Mon Sep 17 00:00:00 2001 From: Florian Battke Date: Mon, 4 Jul 2022 14:30:42 +0200 Subject: [PATCH] Filter bad-cigar reads in bam streamer These reads are ok according to SAM/BAM spec, but Manta can't work with them --- src/c++/lib/htsapi/bam_streamer.cpp | 36 ++++++++++++++++++++++++++++- src/c++/lib/htsapi/bam_streamer.hpp | 1 + 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/src/c++/lib/htsapi/bam_streamer.cpp b/src/c++/lib/htsapi/bam_streamer.cpp index cddb2998..d61943cc 100644 --- a/src/c++/lib/htsapi/bam_streamer.cpp +++ b/src/c++/lib/htsapi/bam_streamer.cpp @@ -186,7 +186,7 @@ void bam_streamer::resetRegion(int referenceContigId, int beginPos, int endPos) _record_no = 0; } -bool bam_streamer::next() +bool bam_streamer::next0() { if (nullptr == _hfp) return false; @@ -223,6 +223,40 @@ bool bam_streamer::next() return _is_record_set; } +bool bam_streamer::next() { + bool ret; + while ((ret=next0())) { + const auto cigar = _brec.raw_cigar(); + if (_brec.n_cigar() > 1) { + int op0 = bam_cigar_op(cigar[0]); + int op1 = bam_cigar_op(cigar[1]); + int op_n0 = bam_cigar_op(cigar[_brec.n_cigar() - 1]); + int op_n1 = bam_cigar_op(cigar[_brec.n_cigar() - 2]); + if ( + (op0 == BAM_CINS && op1 == BAM_CDEL) || //starts with ID + (op0 == BAM_CDEL && op1 == BAM_CINS) || //starts with DI + (op_n1 == BAM_CDEL && op_n0 == BAM_CINS) || //ends with DI + (op_n1 == BAM_CINS && op_n0 == BAM_CDEL) || //ends with ID + (op0 == BAM_CSOFT_CLIP && (op1==BAM_CDEL || op1==BAM_CINS)) || // starts with SD or SI + ((op_n1 == BAM_CDEL || op_n1==BAM_CINS) && op_n0 == BAM_CSOFT_CLIP) // ends with SD or SI + ) { + char op0cigar = (op0==BAM_CINS?'I':(op0==BAM_CDEL?'D':(op0==BAM_CSOFT_CLIP?'S':'x'))); + char op1cigar = (op1==BAM_CINS?'I':(op1==BAM_CDEL?'D':(op1==BAM_CSOFT_CLIP?'S':'x'))); + char opn1cigar = (op_n1==BAM_CINS?'I':(op_n1==BAM_CDEL?'D':(op_n1==BAM_CSOFT_CLIP?'S':'x'))); + char opn0cigar = (op_n0==BAM_CINS?'I':(op_n0==BAM_CDEL?'D':(op_n0==BAM_CSOFT_CLIP?'S':'x'))); + std::cout << "Skipping read: " << _brec.qname() << " CIGAR edges: " + << " " << op0cigar << op1cigar <<"..."<< opn1cigar<header->n_targets); diff --git a/src/c++/lib/htsapi/bam_streamer.hpp b/src/c++/lib/htsapi/bam_streamer.hpp index f181e202..81f68369 100644 --- a/src/c++/lib/htsapi/bam_streamer.hpp +++ b/src/c++/lib/htsapi/bam_streamer.hpp @@ -76,6 +76,7 @@ struct bam_streamer : public stream_state_reporter, public boost::noncopyable { void resetRegion(int referenceContigId, int beginPos, int endPos); bool next(); + bool next0(); const bam_record* get_record_ptr() const {