diff --git a/src/aligner.cpp b/src/aligner.cpp index f10c8512a27..808a4d189e5 100644 --- a/src/aligner.cpp +++ b/src/aligner.cpp @@ -1421,7 +1421,8 @@ void Aligner::align_pinned_multi(Alignment& alignment, vector& alt_al } void Aligner::align_global_banded(Alignment& alignment, const HandleGraph& g, - int32_t band_padding, bool permissive_banding) const { + int32_t band_padding, bool permissive_banding, + const unordered_map* left_align_strand) const { if (alignment.sequence().empty()) { // we can save time by using a specialized deletion aligner for empty strings @@ -1446,7 +1447,8 @@ void Aligner::align_global_banded(Alignment& alignment, const HandleGraph& g, g, band_padding, permissive_banding, - false); + false, + left_align_strand); band_graph.align(score_matrix, nt_table, gap_open, gap_extension); } else if (best_score <= numeric_limits::max() && worst_score >= numeric_limits::min()) { @@ -1455,7 +1457,8 @@ void Aligner::align_global_banded(Alignment& alignment, const HandleGraph& g, g, band_padding, permissive_banding, - false); + false, + left_align_strand); band_graph.align(score_matrix, nt_table, gap_open, gap_extension); } else if (best_score <= numeric_limits::max() && worst_score >= numeric_limits::min()) { @@ -1464,7 +1467,8 @@ void Aligner::align_global_banded(Alignment& alignment, const HandleGraph& g, g, band_padding, permissive_banding, - false); + false, + left_align_strand); band_graph.align(score_matrix, nt_table, gap_open, gap_extension); } else { @@ -1473,14 +1477,16 @@ void Aligner::align_global_banded(Alignment& alignment, const HandleGraph& g, g, band_padding, permissive_banding, - false); + false, + left_align_strand); band_graph.align(score_matrix, nt_table, gap_open, gap_extension); } } void Aligner::align_global_banded_multi(Alignment& alignment, vector& alt_alignments, const HandleGraph& g, - int32_t max_alt_alns, int32_t band_padding, bool permissive_banding) const { + int32_t max_alt_alns, int32_t band_padding, bool permissive_banding, + const unordered_map* left_align_strand) const { if (alignment.sequence().empty()) { // we can save time by using a specialized deletion aligner for empty strings @@ -1505,7 +1511,8 @@ void Aligner::align_global_banded_multi(Alignment& alignment, vector& max_alt_alns, band_padding, permissive_banding, - false); + false, + left_align_strand); band_graph.align(score_matrix, nt_table, gap_open, gap_extension); } else if (best_score <= numeric_limits::max() && worst_score >= numeric_limits::min()) { @@ -1516,7 +1523,8 @@ void Aligner::align_global_banded_multi(Alignment& alignment, vector& max_alt_alns, band_padding, permissive_banding, - false); + false, + left_align_strand); band_graph.align(score_matrix, nt_table, gap_open, gap_extension); } else if (best_score <= numeric_limits::max() && worst_score >= numeric_limits::min()) { @@ -1527,7 +1535,8 @@ void Aligner::align_global_banded_multi(Alignment& alignment, vector& max_alt_alns, band_padding, permissive_banding, - false); + false, + left_align_strand); band_graph.align(score_matrix, nt_table, gap_open, gap_extension); } else { @@ -1538,7 +1547,8 @@ void Aligner::align_global_banded_multi(Alignment& alignment, vector& max_alt_alns, band_padding, permissive_banding, - false); + false, + left_align_strand); band_graph.align(score_matrix, nt_table, gap_open, gap_extension); } @@ -2095,7 +2105,8 @@ void QualAdjAligner::align_pinned_multi(Alignment& alignment, vector& } void QualAdjAligner::align_global_banded(Alignment& alignment, const HandleGraph& g, - int32_t band_padding, bool permissive_banding) const { + int32_t band_padding, bool permissive_banding, + const unordered_map* left_align_strand) const { if (alignment.sequence().empty()) { // we can save time by using a specialized deletion aligner for empty strings @@ -2118,7 +2129,8 @@ void QualAdjAligner::align_global_banded(Alignment& alignment, const HandleGraph g, band_padding, permissive_banding, - true); + true, + left_align_strand); band_graph.align(score_matrix, nt_table, gap_open, gap_extension); } else if (best_score <= numeric_limits::max() && worst_score >= numeric_limits::min()) { @@ -2127,7 +2139,8 @@ void QualAdjAligner::align_global_banded(Alignment& alignment, const HandleGraph g, band_padding, permissive_banding, - true); + true, + left_align_strand); band_graph.align(score_matrix, nt_table, gap_open, gap_extension); } else if (best_score <= numeric_limits::max() && worst_score >= numeric_limits::min()) { @@ -2136,7 +2149,8 @@ void QualAdjAligner::align_global_banded(Alignment& alignment, const HandleGraph g, band_padding, permissive_banding, - true); + true, + left_align_strand); band_graph.align(score_matrix, nt_table, gap_open, gap_extension); } else { @@ -2145,14 +2159,16 @@ void QualAdjAligner::align_global_banded(Alignment& alignment, const HandleGraph g, band_padding, permissive_banding, - true); + true, + left_align_strand); band_graph.align(score_matrix, nt_table, gap_open, gap_extension); } } void QualAdjAligner::align_global_banded_multi(Alignment& alignment, vector& alt_alignments, const HandleGraph& g, - int32_t max_alt_alns, int32_t band_padding, bool permissive_banding) const { + int32_t max_alt_alns, int32_t band_padding, bool permissive_banding, + const unordered_map* left_align_strand) const { if (alignment.sequence().empty()) { // we can save time by using a specialized deletion aligner for empty strings @@ -2177,7 +2193,8 @@ void QualAdjAligner::align_global_banded_multi(Alignment& alignment, vector::max() && worst_score >= numeric_limits::min()) { @@ -2188,7 +2205,8 @@ void QualAdjAligner::align_global_banded_multi(Alignment& alignment, vector::max() && worst_score >= numeric_limits::min()) { @@ -2199,7 +2217,8 @@ void QualAdjAligner::align_global_banded_multi(Alignment& alignment, vector* left_align_strand = nullptr) const = 0; /// store top scoring global alignments in the vector in descending score order up to a maximum number /// of alternate alignments (including the optimal alignment). if there are fewer than the maximum @@ -161,7 +162,8 @@ namespace vg { /// optimal alignment will be stored in both the vector and the original alignment object virtual void align_global_banded_multi(Alignment& alignment, vector& alt_alignments, const HandleGraph& g, int32_t max_alt_alns, int32_t band_padding = 0, - bool permissive_banding = true) const = 0; + bool permissive_banding = true, + const unordered_map* left_align_strand = nullptr) const = 0; /// xdrop aligner virtual void align_xdrop(Alignment& alignment, const HandleGraph& g, const vector& mems, bool reverse_complemented, uint16_t max_gap_length = default_xdrop_max_gap_length) const = 0; @@ -358,14 +360,16 @@ namespace vg { /// permissive banding auto detects the width of band needed so that paths can travel /// through every node in the graph void align_global_banded(Alignment& alignment, const HandleGraph& g, - int32_t band_padding = 0, bool permissive_banding = true) const; + int32_t band_padding = 0, bool permissive_banding = true, + const unordered_map* left_align_strand = nullptr) const; /// store top scoring global alignments in the vector in descending score order up to a maximum number /// of alternate alignments (including the optimal alignment). if there are fewer than the maximum /// number of alignments in the return value, then the vector contains all possible alignments. the /// optimal alignment will be stored in both the vector and the original alignment object void align_global_banded_multi(Alignment& alignment, vector& alt_alignments, const HandleGraph& g, - int32_t max_alt_alns, int32_t band_padding = 0, bool permissive_banding = true) const; + int32_t max_alt_alns, int32_t band_padding = 0, bool permissive_banding = true, + const unordered_map* left_align_strand = nullptr) const; /// xdrop aligner void align_xdrop(Alignment& alignment, const HandleGraph& g, const vector& mems, @@ -428,11 +432,13 @@ namespace vg { void align(Alignment& alignment, const HandleGraph& g, bool traceback_aln) const; void align_global_banded(Alignment& alignment, const HandleGraph& g, - int32_t band_padding = 0, bool permissive_banding = true) const; + int32_t band_padding = 0, bool permissive_banding = true, + const unordered_map* left_align_strand = nullptr) const; void align_pinned(Alignment& alignment, const HandleGraph& g, bool pin_left, bool xdrop = false, uint16_t xdrop_max_gap_length = default_xdrop_max_gap_length) const; void align_global_banded_multi(Alignment& alignment, vector& alt_alignments, const HandleGraph& g, - int32_t max_alt_alns, int32_t band_padding = 0, bool permissive_banding = true) const; + int32_t max_alt_alns, int32_t band_padding = 0, bool permissive_banding = true, + const unordered_map* left_align_strand = nullptr) const; void align_pinned_multi(Alignment& alignment, vector& alt_alignments, const HandleGraph& g, bool pin_left, int32_t max_alt_alns) const; diff --git a/src/banded_global_aligner.cpp b/src/banded_global_aligner.cpp index 0dca6eb05bb..fb8ff9f73b9 100644 --- a/src/banded_global_aligner.cpp +++ b/src/banded_global_aligner.cpp @@ -209,13 +209,14 @@ void BandedGlobalAligner::BABuilder::finalize_alignment(const list BandedGlobalAligner::BAMatrix::BAMatrix(Alignment& alignment, handle_t node, int64_t top_diag, int64_t bottom_diag, - const vector& seeds, int64_t cumulative_seq_len) : + const vector& seeds, int64_t cumulative_seq_len, bool left_alignment_strand) : node(node), top_diag(top_diag), bottom_diag(bottom_diag), seeds(seeds), alignment(alignment), cumulative_seq_len(cumulative_seq_len), + left_alignment_strand(left_alignment_strand), match(nullptr), insert_col(nullptr), insert_row(nullptr) @@ -810,6 +811,11 @@ void BandedGlobalAligner::BAMatrix::traceback(const HandleGraph& graph, continue; } + vector prev_mats{InsertCol, Match, InsertRow}; + if (left_alignment_strand) { + std::swap(prev_mats[0], prev_mats[2]); + } + // find optimal traceback idx = i * ncols + j; bool found_trace = false; @@ -843,51 +849,71 @@ void BandedGlobalAligner::BAMatrix::traceback(const HandleGraph& graph, cerr << "[BAMatrix::traceback] transitioning from match, current score " << (int) match[idx] << " match/mismatch score " << (int) match_score << " from node char " << j << " (" << node_seq[j] << ") and read char " << i + top_diag + j << " (" << read[i + top_diag + j] << ")" << endl; #endif - source_score = match[next_idx]; - score_diff = curr_score - (source_score + match_score); - if (score_diff == 0) { + for (auto prev_mat : prev_mats) { + + switch (prev_mat) { + case Match: + { + source_score = match[next_idx]; + score_diff = curr_score - (source_score + match_score); + if (score_diff == 0) { #ifdef debug_banded_aligner_traceback - cerr << "[BAMatrix::traceback] found next cell in match matrix with score " << (int) match[next_idx] << endl; -#endif - mat = Match; - found_trace = true; - } - else if (source_score != min_inf) { - alt_score = curr_traceback_score - score_diff; - traceback_stack.propose_deflection(alt_score, node_id, i, j, node_id, Match); - } - - source_score = insert_row[next_idx]; - if (source_score > min_inf) { - score_diff = curr_score - (source_score + match_score); - if (!found_trace && score_diff == 0) { + cerr << "[BAMatrix::traceback] found next cell in match matrix with score " << (int) match[next_idx] << endl; +#endif + mat = Match; + found_trace = true; + } + else if (source_score != min_inf) { + alt_score = curr_traceback_score - score_diff; + traceback_stack.propose_deflection(alt_score, node_id, i, j, node_id, Match); + } + break; + } + case InsertRow: + { + source_score = insert_row[next_idx]; + if (source_score > min_inf) { + score_diff = curr_score - (source_score + match_score); + if (!found_trace && score_diff == 0) { #ifdef debug_banded_aligner_traceback - cerr << "[BAMatrix::traceback] found next cell in insert row matrix with score " << (int) insert_row[next_idx] << endl; -#endif - mat = InsertRow; - found_trace = true; - } - else { - alt_score = curr_traceback_score - score_diff; - traceback_stack.propose_deflection(alt_score, node_id, i, j, node_id, InsertRow); - } - } - - source_score = insert_col[next_idx]; - if (source_score > min_inf) { - score_diff = curr_score - (source_score + match_score); - if (!found_trace && score_diff == 0) { + cerr << "[BAMatrix::traceback] found next cell in insert row matrix with score " << (int) insert_row[next_idx] << endl; +#endif + mat = InsertRow; + found_trace = true; + } + else { + alt_score = curr_traceback_score - score_diff; + traceback_stack.propose_deflection(alt_score, node_id, i, j, node_id, InsertRow); + } + } + break; + } + case InsertCol: + { + source_score = insert_col[next_idx]; + if (source_score > min_inf) { + score_diff = curr_score - (source_score + match_score); + if (!found_trace && score_diff == 0) { #ifdef debug_banded_aligner_traceback - cerr << "[BAMatrix::traceback] found next cell in insert column matrix with score " << (int) insert_col[next_idx] << endl; -#endif - mat = InsertCol; - found_trace = true; - } - else if (source_score != min_inf) { - alt_score = curr_traceback_score - score_diff; - traceback_stack.propose_deflection(alt_score, node_id, i, j, node_id, InsertCol); + cerr << "[BAMatrix::traceback] found next cell in insert column matrix with score " << (int) insert_col[next_idx] << endl; +#endif + mat = InsertCol; + found_trace = true; + } + else if (source_score != min_inf) { + alt_score = curr_traceback_score - score_diff; + traceback_stack.propose_deflection(alt_score, node_id, i, j, node_id, InsertCol); + } + + } + break; + } + default: + { + cerr << "error: invalid previous matrix" << endl; + exit(1); + } } - } if (!found_trace) { @@ -918,41 +944,61 @@ void BandedGlobalAligner::BAMatrix::traceback(const HandleGraph& graph, curr_score = insert_row[idx]; next_idx = (i - 1) * ncols + j; - - source_score = match[next_idx]; - score_diff = curr_score - (source_score - gap_open); - if (score_diff == 0) { - mat = Match; - found_trace = true; - } - else if (source_score != min_inf) { - alt_score = curr_traceback_score - score_diff; - traceback_stack.propose_deflection(alt_score, node_id, i, j, node_id, Match); - } - - source_score = insert_row[next_idx]; - if (source_score > min_inf) { - score_diff = curr_score - (source_score - gap_extend); - if (!found_trace && score_diff == 0) { - mat = InsertRow; - found_trace = true; - } - else if (source_score != min_inf) { - alt_score = curr_traceback_score - score_diff; - traceback_stack.propose_deflection(alt_score, node_id, i, j, node_id, InsertRow); - } - } - - source_score = insert_col[next_idx]; - if (source_score > min_inf) { - score_diff = curr_score - (source_score - gap_open); - if (!found_trace && score_diff == 0) { - mat = InsertCol; - found_trace = true; - } - else if (source_score != min_inf) { - alt_score = curr_traceback_score - score_diff; - traceback_stack.propose_deflection(alt_score, node_id, i, j, node_id, InsertCol); + + for (auto prev_mat : prev_mats) { + + switch (prev_mat) { + case Match: + { + source_score = match[next_idx]; + score_diff = curr_score - (source_score - gap_open); + if (score_diff == 0) { + mat = Match; + found_trace = true; + } + else if (source_score != min_inf) { + alt_score = curr_traceback_score - score_diff; + traceback_stack.propose_deflection(alt_score, node_id, i, j, node_id, Match); + } + break; + } + case InsertRow: + { + source_score = insert_row[next_idx]; + if (source_score > min_inf) { + score_diff = curr_score - (source_score - gap_extend); + if (!found_trace && score_diff == 0) { + mat = InsertRow; + found_trace = true; + } + else if (source_score != min_inf) { + alt_score = curr_traceback_score - score_diff; + traceback_stack.propose_deflection(alt_score, node_id, i, j, node_id, InsertRow); + } + } + break; + } + case InsertCol: + { + source_score = insert_col[next_idx]; + if (source_score > min_inf) { + score_diff = curr_score - (source_score - gap_open); + if (!found_trace && score_diff == 0) { + mat = InsertCol; + found_trace = true; + } + else if (source_score != min_inf) { + alt_score = curr_traceback_score - score_diff; + traceback_stack.propose_deflection(alt_score, node_id, i, j, node_id, InsertCol); + } + } + break; + } + default: + { + cerr << "error: invalid previous matrix" << endl; + exit(1); + } } } @@ -978,40 +1024,59 @@ void BandedGlobalAligner::BAMatrix::traceback(const HandleGraph& graph, curr_score = insert_col[idx]; next_idx = (i + 1) * ncols + j - 1; - source_score = match[next_idx]; - score_diff = curr_score - (source_score - gap_open); - if (score_diff == 0) { - mat = Match; - found_trace = true; - } - else if (source_score != min_inf) { - alt_score = curr_traceback_score - score_diff; - traceback_stack.propose_deflection(alt_score, node_id, i, j, node_id, Match); - } - - source_score = insert_row[next_idx]; - if (source_score > min_inf) { - score_diff = curr_score - (source_score - gap_open); - if (!found_trace && score_diff == 0) { - mat = InsertRow; - found_trace = true; - } - else if (source_score != min_inf) { - alt_score = curr_traceback_score - score_diff; - traceback_stack.propose_deflection(alt_score, node_id, i, j, node_id, InsertRow); - } - } - - source_score = insert_col[next_idx]; - if (source_score > min_inf) { - score_diff = curr_score - (source_score - gap_extend); - if (!found_trace && score_diff == 0) { - mat = InsertCol; - found_trace = true; - } - else if (source_score != min_inf) { - alt_score = curr_traceback_score - score_diff; - traceback_stack.propose_deflection(alt_score, node_id, i, j, node_id, InsertCol); + for (auto prev_mat : prev_mats) { + switch (prev_mat) { + case Match: + { + source_score = match[next_idx]; + score_diff = curr_score - (source_score - gap_open); + if (score_diff == 0) { + mat = Match; + found_trace = true; + } + else if (source_score != min_inf) { + alt_score = curr_traceback_score - score_diff; + traceback_stack.propose_deflection(alt_score, node_id, i, j, node_id, Match); + } + break; + } + case InsertRow: + { + source_score = insert_row[next_idx]; + if (source_score > min_inf) { + score_diff = curr_score - (source_score - gap_open); + if (!found_trace && score_diff == 0) { + mat = InsertRow; + found_trace = true; + } + else if (source_score != min_inf) { + alt_score = curr_traceback_score - score_diff; + traceback_stack.propose_deflection(alt_score, node_id, i, j, node_id, InsertRow); + } + } + break; + } + case InsertCol: + { + source_score = insert_col[next_idx]; + if (source_score > min_inf) { + score_diff = curr_score - (source_score - gap_extend); + if (!found_trace && score_diff == 0) { + mat = InsertCol; + found_trace = true; + } + else if (source_score != min_inf) { + alt_score = curr_traceback_score - score_diff; + traceback_stack.propose_deflection(alt_score, node_id, i, j, node_id, InsertCol); + } + } + break; + } + default: + { + cerr << "error: invalid previous matrix" << endl; + exit(1); + } } } @@ -1352,6 +1417,10 @@ void BandedGlobalAligner::BAMatrix::traceback_over_edge(const HandleGra #ifdef debug_banded_aligner_traceback cerr << "[BAMatrix::traceback_over_edge] checking seed rectangular coordinates (" << seed_row << ", " << seed_col << "), with indices calculated from current diagonal " << curr_diag << " (top diag " << top_diag << " + offset " << i << "), seed top diagonal " << seed->top_diag << ", seed seq length " << seed_ncols << " with insert column offset " << (mat == InsertCol) << endl; #endif + vector prev_mats{InsertCol, Match, InsertRow}; + if (left_alignment_strand) { + std::swap(prev_mats[0], prev_mats[2]); + } switch (mat) { case Match: @@ -1388,64 +1457,83 @@ void BandedGlobalAligner::BAMatrix::traceback_over_edge(const HandleGra break; } - source_score = seed->match[next_idx]; - // don't need to check edge condition because match does not have min inf - score_diff = curr_score - (source_score + match_score); - if (score_diff == 0 && !found_trace) { - traceback_mat = Match; - traceback_seed = seed; - traceback_seed_row = seed_row; - traceback_seed_col = seed_col; - found_trace = true; - empty_intermediate_nodes = seed_record.second; + for (auto prev_mat : prev_mats) { + switch (prev_mat) { + case Match: + { + source_score = seed->match[next_idx]; + // don't need to check edge condition because match does not have min inf + score_diff = curr_score - (source_score + match_score); + if (score_diff == 0 && !found_trace) { + traceback_mat = Match; + traceback_seed = seed; + traceback_seed_row = seed_row; + traceback_seed_col = seed_col; + found_trace = true; + empty_intermediate_nodes = seed_record.second; #ifdef debug_banded_aligner_traceback - cerr << "[BAMatrix::traceback_over_edge] hit found in match matrix with score " << (int) seed->match[next_idx] << endl; -#endif - } - else if (source_score != min_inf) { - alt_score = curr_traceback_score - score_diff; - traceback_stack.propose_deflection(alt_score, node_id, i, j, seed_node_id, Match); - } - - source_score = seed->insert_col[next_idx]; - // check edge condition - if (source_score > min_inf) { - score_diff = curr_score - (source_score + match_score); - if (score_diff == 0 && !found_trace) { + cerr << "[BAMatrix::traceback_over_edge] hit found in match matrix with score " << (int) seed->match[next_idx] << endl; +#endif + } + else if (source_score != min_inf) { + alt_score = curr_traceback_score - score_diff; + traceback_stack.propose_deflection(alt_score, node_id, i, j, seed_node_id, Match); + } + break; + } + case InsertRow: + { + source_score = seed->insert_row[next_idx]; + // check edge condition + if (source_score > min_inf) { + score_diff = curr_score - (source_score + match_score); + if (score_diff == 0 && !found_trace) { #ifdef debug_banded_aligner_traceback - cerr << "[BAMatrix::traceback_over_edge] hit found in insert column matrix with score " << (int) seed->insert_col[next_idx] << endl; -#endif - traceback_mat = InsertCol; - traceback_seed = seed; - traceback_seed_row = seed_row; - traceback_seed_col = seed_col; - found_trace = true; - empty_intermediate_nodes = seed_record.second; - } - else { - alt_score = curr_traceback_score - score_diff; - traceback_stack.propose_deflection(alt_score, node_id, i, j, seed_node_id, InsertCol); - } - } - - source_score = seed->insert_row[next_idx]; - // check edge condition - if (source_score > min_inf) { - score_diff = curr_score - (source_score + match_score); - if (score_diff == 0 && !found_trace) { + cerr << "[BAMatrix::traceback_over_edge] hit found in insert row matrix with score " << (int) seed->insert_row[next_idx] << endl; +#endif + traceback_mat = InsertRow; + traceback_seed = seed; + traceback_seed_row = seed_row; + traceback_seed_col = seed_col; + found_trace = true; + empty_intermediate_nodes = seed_record.second; + } + else { + alt_score = curr_traceback_score - score_diff; + traceback_stack.propose_deflection(alt_score, node_id, i, j, seed_node_id, InsertRow); + } + } + break; + } + case InsertCol: + { + source_score = seed->insert_col[next_idx]; + // check edge condition + if (source_score > min_inf) { + score_diff = curr_score - (source_score + match_score); + if (score_diff == 0 && !found_trace) { #ifdef debug_banded_aligner_traceback - cerr << "[BAMatrix::traceback_over_edge] hit found in insert row matrix with score " << (int) seed->insert_row[next_idx] << endl; -#endif - traceback_mat = InsertRow; - traceback_seed = seed; - traceback_seed_row = seed_row; - traceback_seed_col = seed_col; - found_trace = true; - empty_intermediate_nodes = seed_record.second; - } - else { - alt_score = curr_traceback_score - score_diff; - traceback_stack.propose_deflection(alt_score, node_id, i, j, seed_node_id, InsertRow); + cerr << "[BAMatrix::traceback_over_edge] hit found in insert column matrix with score " << (int) seed->insert_col[next_idx] << endl; +#endif + traceback_mat = InsertCol; + traceback_seed = seed; + traceback_seed_row = seed_row; + traceback_seed_col = seed_col; + found_trace = true; + empty_intermediate_nodes = seed_record.second; + } + else { + alt_score = curr_traceback_score - score_diff; + traceback_stack.propose_deflection(alt_score, node_id, i, j, seed_node_id, InsertCol); + } + } + break; + } + default: + { + cerr << "error: invalid matrix type" << endl; + exit(1); + } } } @@ -1454,73 +1542,92 @@ void BandedGlobalAligner::BAMatrix::traceback_over_edge(const HandleGra case InsertCol: { - source_score = seed->match[next_idx]; - // don't need to check edge condition because match does not have min inf - score_diff = curr_score - (source_score - gap_open); - if (score_diff == 0 && !found_trace) { + for (auto prev_mat : prev_mats) { + switch (prev_mat) { + case Match: + { + source_score = seed->match[next_idx]; + // don't need to check edge condition because match does not have min inf + score_diff = curr_score - (source_score - gap_open); + if (score_diff == 0 && !found_trace) { #ifdef debug_banded_aligner_traceback - cerr << "[BAMatrix::traceback_over_edge] hit found in match matrix with score " << (int) seed->match[next_idx] << endl; -#endif - traceback_mat = Match; - traceback_seed = seed; - traceback_seed_row = seed_row; - traceback_seed_col = seed_col; - found_trace = true; - empty_intermediate_nodes = seed_record.second; - } - else if (source_score != min_inf) { - alt_score = curr_traceback_score - score_diff; + cerr << "[BAMatrix::traceback_over_edge] hit found in match matrix with score " << (int) seed->match[next_idx] << endl; +#endif + traceback_mat = Match; + traceback_seed = seed; + traceback_seed_row = seed_row; + traceback_seed_col = seed_col; + found_trace = true; + empty_intermediate_nodes = seed_record.second; + } + else if (source_score != min_inf) { + alt_score = curr_traceback_score - score_diff; #ifdef debug_banded_aligner_traceback - cerr << "[BAMatrix::traceback_over_edge] no hit in match matrix, proposing deflection with alt score " << (int) alt_score << " from current traceback score " << (int) curr_traceback_score << " and score diff " << (int) score_diff << endl; -#endif - traceback_stack.propose_deflection(alt_score, node_id, i, j, seed_node_id, Match); - } - - source_score = seed->insert_col[next_idx]; - // check edge condition - if (source_score > min_inf) { - score_diff = curr_score - (source_score - gap_extend); - if (score_diff == 0 && !found_trace) { + cerr << "[BAMatrix::traceback_over_edge] no hit in match matrix, proposing deflection with alt score " << (int) alt_score << " from current traceback score " << (int) curr_traceback_score << " and score diff " << (int) score_diff << endl; +#endif + traceback_stack.propose_deflection(alt_score, node_id, i, j, seed_node_id, Match); + } + break; + } + case InsertRow: + { + source_score = seed->insert_row[next_idx]; + // check edge condition + if (source_score > min_inf) { + score_diff = curr_score - (source_score - gap_open); + if (score_diff == 0 && !found_trace) { #ifdef debug_banded_aligner_traceback - cerr << "[BAMatrix::traceback_over_edge] hit found in insert column matrix with score " << (int) seed->match[next_idx] << endl; -#endif - traceback_mat = InsertCol; - traceback_seed = seed; - traceback_seed_row = seed_row; - traceback_seed_col = seed_col; - found_trace = true; - empty_intermediate_nodes = seed_record.second; - } - else { - alt_score = curr_traceback_score - score_diff; + cerr << "[BAMatrix::traceback_over_edge] hit found in insert row matrix with score " << (int) seed->match[next_idx] << endl; +#endif + traceback_mat = InsertRow; + traceback_seed = seed; + traceback_seed_row = seed_row; + traceback_seed_col = seed_col; + found_trace = true; + empty_intermediate_nodes = seed_record.second; + } + else { + alt_score = curr_traceback_score - score_diff; #ifdef debug_banded_aligner_traceback - cerr << "[BAMatrix::traceback_over_edge] no hit in insert row matrix, proposing deflection with alt score " << (int) alt_score << " from current traceback score " << (int) curr_traceback_score << " and score diff " << (int) score_diff << endl; -#endif - traceback_stack.propose_deflection(alt_score, node_id, i, j, seed_node_id, InsertCol); - } - } - - source_score = seed->insert_row[next_idx]; - // check edge condition - if (source_score > min_inf) { - score_diff = curr_score - (source_score - gap_open); - if (score_diff == 0 && !found_trace) { + cerr << "[BAMatrix::traceback_over_edge] no hit in insert column matrix, proposing deflection with alt score " << (int) alt_score << " from current traceback score " << (int) curr_traceback_score << " and score diff " << (int) score_diff << endl; +#endif + traceback_stack.propose_deflection(alt_score, node_id, i, j, seed_node_id, InsertRow); + } + } + break; + } + case InsertCol: + { + source_score = seed->insert_col[next_idx]; + // check edge condition + if (source_score > min_inf) { + score_diff = curr_score - (source_score - gap_extend); + if (score_diff == 0 && !found_trace) { #ifdef debug_banded_aligner_traceback - cerr << "[BAMatrix::traceback_over_edge] hit found in insert row matrix with score " << (int) seed->match[next_idx] << endl; -#endif - traceback_mat = InsertRow; - traceback_seed = seed; - traceback_seed_row = seed_row; - traceback_seed_col = seed_col; - found_trace = true; - empty_intermediate_nodes = seed_record.second; - } - else { - alt_score = curr_traceback_score - score_diff; + cerr << "[BAMatrix::traceback_over_edge] hit found in insert column matrix with score " << (int) seed->match[next_idx] << endl; +#endif + traceback_mat = InsertCol; + traceback_seed = seed; + traceback_seed_row = seed_row; + traceback_seed_col = seed_col; + found_trace = true; + empty_intermediate_nodes = seed_record.second; + } + else { + alt_score = curr_traceback_score - score_diff; #ifdef debug_banded_aligner_traceback - cerr << "[BAMatrix::traceback_over_edge] no hit in insert column matrix, proposing deflection with alt score " << (int) alt_score << " from current traceback score " << (int) curr_traceback_score << " and score diff " << (int) score_diff << endl; -#endif - traceback_stack.propose_deflection(alt_score, node_id, i, j, seed_node_id, InsertRow); + cerr << "[BAMatrix::traceback_over_edge] no hit in insert row matrix, proposing deflection with alt score " << (int) alt_score << " from current traceback score " << (int) curr_traceback_score << " and score diff " << (int) score_diff << endl; +#endif + traceback_stack.propose_deflection(alt_score, node_id, i, j, seed_node_id, InsertCol); + } + } + break; + } + default: + { + cerr << "error: invalid matrix type" << endl; + exit(1); + } } } @@ -1809,12 +1916,14 @@ void BandedGlobalAligner::BAMatrix::print_band(const HandleGraph& graph template BandedGlobalAligner::BandedGlobalAligner(Alignment& alignment, const HandleGraph& g, int64_t band_padding, bool permissive_banding, - bool adjust_for_base_quality) : + bool adjust_for_base_quality, + const unordered_map* left_align_strand) : BandedGlobalAligner(alignment, g, nullptr, 1, band_padding, permissive_banding, - adjust_for_base_quality) + adjust_for_base_quality, + left_align_strand) { // nothing to do, just funnel into internal constructor } @@ -1824,13 +1933,15 @@ BandedGlobalAligner::BandedGlobalAligner(Alignment& alignment, const Ha vector& alt_alignments, int64_t max_multi_alns, int64_t band_padding, bool permissive_banding, - bool adjust_for_base_quality) : + bool adjust_for_base_quality, + const unordered_map* left_align_strand) : BandedGlobalAligner(alignment, g, &alt_alignments, max_multi_alns, band_padding, permissive_banding, - adjust_for_base_quality) + adjust_for_base_quality, + left_align_strand) { // check data integrity and funnel into internal constructor if (!alt_alignments.empty()) { @@ -1845,7 +1956,8 @@ BandedGlobalAligner::BandedGlobalAligner(Alignment& alignment, const Ha int64_t max_multi_alns, int64_t band_padding, bool permissive_banding, - bool adjust_for_base_quality) : + bool adjust_for_base_quality, + const unordered_map* left_align_strand) : graph(g), alignment(alignment), alt_alignments(alt_alignments), @@ -1929,12 +2041,20 @@ BandedGlobalAligner::BandedGlobalAligner(Alignment& alignment, const Ha seeds.push_back(banded_matrices[node_id_to_idx[graph.get_id(prev)]]); }); + bool strand = false; + if (left_align_strand) { + auto it = left_align_strand->find(node); + if (it != left_align_strand->end()) { + strand = it->second; + } + } banded_matrices[i] = new BAMatrix(alignment, node, band_ends[i].first, band_ends[i].second, std::move(seeds), - shortest_seqs[i]); + shortest_seqs[i], + strand); } } @@ -2367,17 +2487,43 @@ BandedGlobalAligner::AltTracebackStack::AltTracebackStack(const HandleG } else { // let the insert routine figure out which one is the best and which ones to keep in the stack - if (band_matrix->match[final_idx] != min_inf) { - insert_traceback(null_prefix, band_matrix->match[final_idx], - node_id, final_row, final_col, node_id, Match, path); + vector mats{InsertCol, Match, InsertRow}; + if (band_matrix->left_alignment_strand) { + std::swap(mats[0], mats[2]); } - if (band_matrix->insert_row[final_idx] != min_inf) { - insert_traceback(null_prefix, band_matrix->insert_row[final_idx], - node_id, final_row, final_col, node_id, InsertRow, path); - } - if (band_matrix->insert_col[final_idx] != min_inf) { - insert_traceback(null_prefix, band_matrix->insert_col[final_idx], - node_id, final_row, final_col, node_id, InsertCol, path); + for (auto mat : mats) { + switch (mat) + { + case Match: + { + if (band_matrix->match[final_idx] != min_inf) { + insert_traceback(null_prefix, band_matrix->match[final_idx], + node_id, final_row, final_col, node_id, Match, path); + } + break; + } + case InsertCol: + { + if (band_matrix->insert_row[final_idx] != min_inf) { + insert_traceback(null_prefix, band_matrix->insert_row[final_idx], + node_id, final_row, final_col, node_id, InsertRow, path); + } + break; + } + case InsertRow: + { + if (band_matrix->insert_col[final_idx] != min_inf) { + insert_traceback(null_prefix, band_matrix->insert_col[final_idx], + node_id, final_row, final_col, node_id, InsertCol, path); + } + break; + } + default: + { + cerr << "error: invalid matrix type" << endl; + exit(1); + } + } } } } diff --git a/src/banded_global_aligner.hpp b/src/banded_global_aligner.hpp index b77f9fe449f..74ff2628ce7 100644 --- a/src/banded_global_aligner.hpp +++ b/src/banded_global_aligner.hpp @@ -60,7 +60,8 @@ namespace vg { /// BandedGlobalAligner(Alignment& alignment, const HandleGraph& g, int64_t band_padding, bool permissive_banding = false, - bool adjust_for_base_quality = false); + bool adjust_for_base_quality = false, + const unordered_map* left_align_strand = nullptr); /// Initializes banded multi-alignment, which computes the top scoring alternate alignments in addition @@ -78,7 +79,8 @@ namespace vg { BandedGlobalAligner(Alignment& alignment, const HandleGraph& g, vector& alt_alignments, int64_t max_multi_alns, int64_t band_padding, bool permissive_banding = false, - bool adjust_for_base_quality = false); + bool adjust_for_base_quality = false, + const unordered_map* left_align_strand = nullptr); ~BandedGlobalAligner(); @@ -133,7 +135,8 @@ namespace vg { BandedGlobalAligner(Alignment& alignment, const HandleGraph& g, vector* alt_alignments, int64_t max_multi_alns, int64_t band_padding, bool permissive_banding = false, - bool adjust_for_base_quality = false); + bool adjust_for_base_quality = false, + const unordered_map* left_align_strand = nullptr); /// Traceback through dynamic programming matrices to compute alignment void traceback(int8_t* score_mat, int8_t* nt_table, int8_t gap_open, int8_t gap_extend, IntType min_inf); @@ -156,7 +159,7 @@ namespace vg { public: BAMatrix(Alignment& alignment, handle_t node, int64_t top_diag, int64_t bottom_diag, - const vector& seeds, int64_t cumulative_seq_len); + const vector& seeds, int64_t cumulative_seq_len, bool left_alignment_strand); ~BAMatrix(); /// Use DP to fill the band with alignment scores @@ -188,6 +191,8 @@ namespace vg { handle_t node; + bool left_alignment_strand; + Alignment& alignment; /// Length of shortest sequence leading to matrix from a source node diff --git a/src/main.cpp b/src/main.cpp index dc87d7381ad..514f8b89760 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,6 +1,3 @@ -// Needed for crash.hpp to work because it uses newer types -#define _POSIX_C_SOURCE 200809L - #include #include #include diff --git a/src/multipath_alignment_graph.cpp b/src/multipath_alignment_graph.cpp index a4d1c3e0cd2..a9cf989d726 100644 --- a/src/multipath_alignment_graph.cpp +++ b/src/multipath_alignment_graph.cpp @@ -4227,7 +4227,7 @@ void MultipathAlignmentGraph::align(const Alignment& alignment, const HandleGrap double pessimistic_tail_gap_multiplier, bool simplify_topologies, size_t unmergeable_len, size_t band_padding, multipath_alignment_t& multipath_aln_out, SnarlManager* cutting_snarls, SnarlDistanceIndex* dist_index, const function(id_t)>* project, - bool allow_negative_scores) { + bool allow_negative_scores, unordered_map* left_align_strand) { // don't dynamically choose band padding, shim constant value into a function type function constant_padding = [&](const Alignment& seq, const HandleGraph& graph) { @@ -4248,7 +4248,8 @@ void MultipathAlignmentGraph::align(const Alignment& alignment, const HandleGrap cutting_snarls, dist_index, project, - allow_negative_scores); + allow_negative_scores, + left_align_strand); } void MultipathAlignmentGraph::deduplicate_alt_alns(vector>& alt_alns, @@ -5184,7 +5185,7 @@ void MultipathAlignmentGraph::align(const Alignment& alignment, const HandleGrap function band_padding_function, multipath_alignment_t& multipath_aln_out, SnarlManager* cutting_snarls, SnarlDistanceIndex* dist_index, const function(id_t)>* project, - bool allow_negative_scores) { + bool allow_negative_scores, unordered_map* left_align_strand) { // TODO: magic number // how many tails we need to have before we try the more complicated but @@ -5351,7 +5352,8 @@ void MultipathAlignmentGraph::align(const Alignment& alignment, const HandleGrap vector alt_alignments; aligner->align_global_banded_multi(intervening_sequence, alt_alignments, connecting_graph, num_alns_iter, - band_padding_function(intervening_sequence, connecting_graph), true); + band_padding_function(intervening_sequence, connecting_graph), true, + left_align_strand); // remove alignments with the same path deduplicated = convert_and_deduplicate(alt_alignments, false, false); diff --git a/src/multipath_alignment_graph.hpp b/src/multipath_alignment_graph.hpp index bb7abbd9889..90b3de547a7 100644 --- a/src/multipath_alignment_graph.hpp +++ b/src/multipath_alignment_graph.hpp @@ -197,7 +197,7 @@ namespace vg { size_t max_alt_alns, bool dynamic_alt_alns, size_t max_gap, double pessimistic_tail_gap_multiplier, bool simplify_topologies, size_t unmergeable_len, size_t band_padding, multipath_alignment_t& multipath_aln_out, SnarlManager* cutting_snarls = nullptr, SnarlDistanceIndex* dist_index = nullptr, const function(id_t)>* project = nullptr, - bool allow_negative_scores = false); + bool allow_negative_scores = false, unordered_map* left_align_strand = nullptr); /// Do intervening and tail alignments between the anchoring paths and /// store the result in a multipath_alignment_t. Reachability edges must @@ -215,7 +215,8 @@ namespace vg { size_t max_alt_alns, bool dynamic_alt_alns, size_t max_gap, double pessimistic_tail_gap_multiplier, bool simplify_topologies, size_t unmergeable_len, function band_padding_function, multipath_alignment_t& multipath_aln_out, SnarlManager* cutting_snarls = nullptr, SnarlDistanceIndex* dist_index = nullptr, - const function(id_t)>* project = nullptr, bool allow_negative_scores = false); + const function(id_t)>* project = nullptr, bool allow_negative_scores = false, + unordered_map* left_align_strand = nullptr); /// Converts a MultipathAlignmentGraph to a GraphViz Dot representation, output to the given ostream. /// If given the Alignment query we are working on, can produce information about subpath iterators. diff --git a/src/surjector.cpp b/src/surjector.cpp index 0f1f407daab..83f008ce335 100644 --- a/src/surjector.cpp +++ b/src/surjector.cpp @@ -3068,6 +3068,13 @@ using namespace std; #endif } + // left align on forward strands and right align on reverse strands + unordered_map left_align_strand; + left_align_strand.reserve(aln_graph->get_node_count()); + aln_graph->for_each_handle([&](const handle_t& handle) { + left_align_strand[handle] = node_trans.at(aln_graph->get_id(handle)).second; + }); + // align the intervening segments and store the result in a multipath alignment multipath_alignment_t mp_aln; mp_aln_graph.align(source, *aln_graph, get_aligner(), @@ -3083,7 +3090,8 @@ using namespace std; nullptr, // snarl manager nullptr, // distance index nullptr, // projector - allow_negative_scores); + allow_negative_scores, // subpath local + &left_align_strand); // strand to left align against topologically_order_subpaths(mp_aln);