Skip to content

Commit

Permalink
fix(call): better multi-stage strategy for determining TR consensus seqs
Browse files Browse the repository at this point in the history
  • Loading branch information
davidlougheed committed Jun 7, 2024
1 parent 9e2b311 commit 04dc445
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 6 deletions.
17 changes: 12 additions & 5 deletions strkit/call/consensus.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,17 @@ def best_representative(seqs: Iterable[str]) -> Optional[str]:

def consensus_seq(seqs: Iterable[str], logger: logging.Logger) -> Optional[str]:
# Return a stringified, gapless version of the column-wise mode for the MSA
# - Filter out blanks and if the consensus fails, try eliminating the outlier VERY naively
# via just comparing sorted values
seqs_t = tuple(sorted(filter(neq_blank, seqs)))
res = _consensus_seq(seqs_t)
# If the consensus fails, try a best-representative strategy instead. If that fails, something's gone wrong...

seqs_l = list(seqs)
if len(set(seqs_l)) == 1:
return seqs_l[0]

seqs_l.sort()
res = _consensus_seq(seqs_l)
if res is None:
logger.error(f"Got no consensus sequence from sequences: {seqs_t}")
logger.error(f"Got no consensus sequence from sequences {seqs_l}; trying best representative strategy")
res = best_representatives(seqs_l)
if res is None:
logger.debug(f"Got no best representative from sequences {seqs_l}")
return res
2 changes: 1 addition & 1 deletion strkit/call/output/vcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def output_contig_vcf_lines(
n_alleles: int = get_n_alleles(2, params.sex_chroms, contig) or 2

peak_seqs = (result["peaks"] or {}).get("seqs", ())
if any(map(is_none, peak_seqs)):
if any(map(is_none, peak_seqs)): # Occurs when no consensus for one of the peaks
logger.error(f"Encountered None in results[{result_idx}].peaks.seqs: {peak_seqs}")
continue

Expand Down

0 comments on commit 04dc445

Please sign in to comment.