Skip to content

Commit

Permalink
handle bad cigar strs
Browse files Browse the repository at this point in the history
  • Loading branch information
psathyrella committed Mar 17, 2024
1 parent ed7749d commit d0ea114
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 2 deletions.
2 changes: 1 addition & 1 deletion python/indelutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ def check_cigar_len(cigars, qrseq, glseq, uid=None, debug=False): # check consi
cigar_len = sum([length for code, length in cigars if code != tmpcode])
if cigar_len != len(tmpseq):
# raise Exception('cigar length %d (without %s) doesn\'t match %s seq length %d%s' % (cigar_len, tmpcode, seqtype, len(tmpseq), (' for %s' % uid) if uid is not None else ''))
# print 'cigar length %d (without %s) doesn\'t match %s seq length %d%s' % (cigar_len, tmpcode, seqtype, len(tmpseq), (' for %s' % uid) if uid is not None else '')
# print('cigar length %d (without %s) doesn\'t match %s seq length %d%s' % (cigar_len, tmpcode, seqtype, len(tmpseq), (' for %s' % uid) if uid is not None else ''))
raise IndelfoReconstructionError() # ok i still don't like this but it happens

# ----------------------------------------------------------------------------------------
Expand Down
8 changes: 7 additions & 1 deletion python/waterer.py
Original file line number Diff line number Diff line change
Expand Up @@ -548,7 +548,7 @@ def read_query(self, references, reads):
'new_indels' : {}
}

last_scores = {r : None for r in utils.regions}
last_scores, bad_cigars = {r : None for r in utils.regions}, []
for read in reads: # loop over the matches found for each query sequence
read.seq = qinfo['seq'] # only the first one has read.seq set by default, so we need to set the rest by hand
gene = references[read.tid]
Expand All @@ -566,6 +566,9 @@ def read_query(self, references, reads):
assert len(qinfo['matches'][region]) == self.args.n_max_per_region[utils.regions.index(region)] # there better not be a way to get more than we asked for
continue

if 'M' not in read.cigarstring: # cigar str doesn't actually have any matches, which means the cigar parsing stuff will fail
bad_cigars.append(read.cigarstring)
continue
indelfo = indelutils.get_indelfo_from_cigar(read.cigarstring, qinfo['seq'], qrbounds, self.glfo['seqs'][region][gene], glbounds, {region : gene}, uid=qinfo['name']) # note that qinfo['seq'] differs from self.input_info[qinfo['name']]['seqs'][0] if we've already reversed an indel in this sequence
if indelutils.has_indels(indelfo):
if len(qinfo['matches'][region]) > 0: # skip any gene matches with indels after the first one for each region (if we want to handle [i.e. reverse] an indel, we will have stored the indel info for the first match, and we'll be rerunning)
Expand All @@ -578,6 +581,9 @@ def read_query(self, references, reads):
qinfo['qrbounds'][gene] = qrbounds
qinfo['glbounds'][gene] = glbounds

if len(bad_cigars) > 0:
print(' %s no M in %d / %d cigar strs for %s: %s' % (utils.wrnstr(), len(bad_cigars), len(reads), qinfo['name'], ' '.join(bad_cigars)))

if not utils.has_d_gene(self.args.locus) and len(qinfo['matches']['v']) > 0:
_, first_v_match = qinfo['matches']['v'][0]
self.add_dummy_d_match(qinfo, first_v_qr_end=qinfo['qrbounds'][first_v_match][1])
Expand Down

0 comments on commit d0ea114

Please sign in to comment.