Skip to content

Commit

Permalink
Merge pull request #612 from ahehn-nv/ahehn/approximate_alignments_bu…
Browse files Browse the repository at this point in the history
…gfix

[cudaaligner] Fixed a bug in approximate alignment backtrace
  • Loading branch information
Joyjit Daw authored Dec 4, 2020
2 parents fe07172 + 8ccae4b commit c68960e
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 7 deletions.
25 changes: 18 additions & 7 deletions cudaaligner/src/myers_gpu.cu
Original file line number Diff line number Diff line change
Expand Up @@ -425,19 +425,25 @@ __device__ int32_t myers_backtrace_banded(int8_t* path, device_matrix_view<WordT
assert(pv.num_cols() == score.num_cols());
assert(mv.num_cols() == score.num_cols());
assert(score.num_rows() == ceiling_divide(band_width, word_size));
assert(diagonal_begin >= 0);
assert(diagonal_end >= diagonal_begin);
assert(diagonal_end >= 2); // this should only break if target_size == 0 - which is not valid input.

int32_t i = band_width;
int32_t j = target_size;

const WordType last_entry_mask = band_width % word_size != 0 ? (WordType(1) << (band_width % word_size)) - 1 : ~WordType(0);

nw_score_t myscore = score((i - 1) / word_size, j); // row 0 is implicit, NW matrix is shifted by i -> i-1 (see get_myers_score)
int32_t pos = 0;
const nw_score_t last_diagonal_score = diagonal_end < 2 ? out_of_band : get_myers_score(1, diagonal_end - 2, pv, mv, score, last_entry_mask) + 2;
nw_score_t myscore = score((i - 1) / word_size, j); // row 0 is implicit, NW matrix is shifted by i -> i-1, i.e. i \in [1,band_width] for get_myers_score. (see get_myers_score)
int32_t pos = 0;
while (j >= diagonal_end)
{
int8_t r = 0;
nw_score_t const above = i <= 1 ? j : get_myers_score(i - 1, j, pv, mv, score, last_entry_mask);
nw_score_t const diag = i <= 1 ? j - 1 : get_myers_score(i - 1, j - 1, pv, mv, score, last_entry_mask);
nw_score_t const left = get_myers_score(i, j - 1, pv, mv, score, last_entry_mask);
int8_t r = 0;
// Worst case for the implicit top row (i == 0) of the bottom right block of the NW is the last diagonal entry on the same row + (j - diagonal_end) * indel cost.
nw_score_t const above = i <= 1 ? (last_diagonal_score + j - diagonal_end) : get_myers_score(i - 1, j, pv, mv, score, last_entry_mask);
nw_score_t const diag = i <= 1 ? (last_diagonal_score + j - 1 - diagonal_end) : get_myers_score(i - 1, j - 1, pv, mv, score, last_entry_mask);
nw_score_t const left = i < 1 ? (last_diagonal_score + j - 1 - diagonal_end) : get_myers_score(i, j - 1, pv, mv, score, last_entry_mask);
if (left + 1 == myscore)
{
r = static_cast<int8_t>(AlignmentState::insertion);
Expand All @@ -452,6 +458,7 @@ __device__ int32_t myers_backtrace_banded(int8_t* path, device_matrix_view<WordT
}
else
{
assert(diag == myscore || diag + 1 == myscore);
r = (diag == myscore ? static_cast<int8_t>(AlignmentState::match) : static_cast<int8_t>(AlignmentState::mismatch));
myscore = diag;
--i;
Expand All @@ -463,9 +470,10 @@ __device__ int32_t myers_backtrace_banded(int8_t* path, device_matrix_view<WordT
while (j >= diagonal_begin)
{
int8_t r = 0;
nw_score_t const above = i <= 1 ? j : get_myers_score(i - 1, j, pv, mv, score, last_entry_mask);
nw_score_t const above = i <= 1 ? out_of_band : get_myers_score(i - 1, j, pv, mv, score, last_entry_mask);
nw_score_t const diag = i <= 0 ? j - 1 : get_myers_score(i, j - 1, pv, mv, score, last_entry_mask);
nw_score_t const left = i >= band_width ? out_of_band : get_myers_score(i + 1, j - 1, pv, mv, score, last_entry_mask);
// out-of-band cases: diag always preferrable, since worst-case-(above|left) - myscore >= diag - myscore always holds.
if (left + 1 == myscore)
{
r = static_cast<int8_t>(AlignmentState::insertion);
Expand All @@ -481,6 +489,7 @@ __device__ int32_t myers_backtrace_banded(int8_t* path, device_matrix_view<WordT
}
else
{
assert(diag == myscore || diag + 1 == myscore);
r = (diag == myscore ? static_cast<int8_t>(AlignmentState::match) : static_cast<int8_t>(AlignmentState::mismatch));
myscore = diag;
--j;
Expand All @@ -494,6 +503,7 @@ __device__ int32_t myers_backtrace_banded(int8_t* path, device_matrix_view<WordT
nw_score_t const above = i == 1 ? j : get_myers_score(i - 1, j, pv, mv, score, last_entry_mask);
nw_score_t const diag = i == 1 ? j - 1 : get_myers_score(i - 1, j - 1, pv, mv, score, last_entry_mask);
nw_score_t const left = i > band_width ? out_of_band : get_myers_score(i, j - 1, pv, mv, score, last_entry_mask);
// out-of-band cases: diag always preferrable, since worst-case-(above|left) - myscore >= diag - myscore always holds.
if (left + 1 == myscore)
{
r = static_cast<int8_t>(AlignmentState::insertion);
Expand All @@ -508,6 +518,7 @@ __device__ int32_t myers_backtrace_banded(int8_t* path, device_matrix_view<WordT
}
else
{
assert(diag == myscore || diag + 1 == myscore);
r = (diag == myscore ? static_cast<int8_t>(AlignmentState::match) : static_cast<int8_t>(AlignmentState::mismatch));
myscore = diag;
--i;
Expand Down
72 changes: 72 additions & 0 deletions cudaaligner/tests/Test_ApproximateBandedMyers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,80 @@ class TestApproximateBandedMyers : public ::testing::TestWithParam<TestCase>
{
};

void implicit_new_entries_test_impl(const std::string& query, const std::string& target, const std::string& expected_cigar)
{
using namespace claraparabricks::genomeworks::cudaaligner;
using namespace claraparabricks::genomeworks;
const int32_t max_bw = 7;
DefaultDeviceAllocator allocator = create_default_device_allocator();
std::unique_ptr<Aligner> aligner = std::make_unique<AlignerGlobalMyersBanded>(-1,
max_bw,
allocator,
nullptr,
0);
ASSERT_EQ(StatusType::success, aligner->add_alignment(query.c_str(), query.length(), target.c_str(), target.length()))
<< "Could not add alignment to aligner";
aligner->align_all();
aligner->sync_alignments();
const std::vector<std::shared_ptr<Alignment>>& alignments = aligner->get_alignments();
ASSERT_EQ(get_size(alignments), 1);
ASSERT_EQ(alignments[0]->get_status(), StatusType::success);
ASSERT_EQ(alignments[0]->is_optimal(), false);
ASSERT_EQ(alignments[0]->convert_to_cigar(), expected_cigar);
}

} // namespace

TEST(TestApproximateBandedMyersStatic, ImplicitNWEntries1)
{
// The banded Myers implementation uses implicit entries for the 0-th row of the NW matrix (and the band),
// which is assumed to be the worst case value. This does not pose a problem on the top left block and
// the diagonal band, but on the lower right block there are cases where the backtrace runs through this worst case
// - which is technically still part of the band. This tests this specific corner case.

// * = tested implicit 0-row entry
// Band of the NW matrix:
// top left block of NW m.| diagonal band (each column shifted by one row) | bottom right block
// NW_(i,j), index i of first shown row:
// 1 1 1 1 1 1 1 | 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 | 20 20 20 20 20 20
//
// A A C C G G | T T A A A A C C C C G G G G G T T A A | A C G G T T
// row 0 (implicit): | * | *
// A 1 0 1 2 3 4 5 |A 5 C 5 C 5 G 5 G 5 T 5 T 5 A 5 A 5 C 5 C 5 G 5 G 5 T 6 T 7 A 8 A 9 C 10 C 11 |C 12 12 13 14 15 16
// A 2 1 0 1 2 3 4 |C 4 C 4 G 4 G 4 T 4 T 4 A 4 A 4 C 4 C 4 G 4 G 4 T 5 T 6 A 7 A 8 C 9 C 10 G 11 |G 12 13 12 13 14 15
// C 3 2 1 0 1 2 3 |C 3 G 3 G 3 T 3 T 3 A 3 A 3 C 3 C 3 G 4 G 4 T 5 T 6 A 7 A 8 C 9 C 10 G 11 G 12 |G 12 13 13 12 13 14
// C 4 3 2 1 0 1 2 |G 2 G 2 T 2 T 2 A 2 A 2 C 2 C 2 G 3 G 4 T 5 T 6 A 7 A 8 C 9 C 10 G 11 G 12 T 13 |T 13 13 14 13 12 13
// G 5 4 3 2 1 0 1 |G 1 T 1 T 1 A 1 A 1 C 2 C 2 G 3 G 4 T 5 T 6 A 7 A 8 C 9 C 10 G 11 G 12 T 12 T 12 |T 13 14 14 14 13 12
// G 6 5 4 3 2 1 0 |T 0 T 0 A 0 A 0 C 1 C 2 G 3 G 4 T 5 T 6 A 7 A 8 C 9 C 10 G 10 G 11 T 11 T 11 T 12 |T 13 14 15 15 14 13
// T 7 6 5 4 3 2 1 |T 1 A 1 A 1 C 1 C 2 G 3 G 4 T 5 T 6 A 7 A 8 C 9 C 10 G 10 G 10 T 10 T 10 T 11 T 12 |T 13 14 15 16 15 14

implicit_new_entries_test_impl("AACCGGTTAACCGGTTAACCGGTTTT",
"AACCGGTTAAAACCCCGGGGGTTAAACGGTT",
"10M2I2M2I7M3I5M2D");
}

TEST(TestApproximateBandedMyersStatic, ImplicitNWEntries2)
{
// * = tested implicit 0-row entry
// Band of the NW matrix:
// top left block of NW mat, | diagonal band (each column shifted by one row) | bottom right block
// NW_(i,j), index i of first shown row:
// 1 1 1 1 1 1 1 1 | 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 | 19 19 19 19 19 19
//
// A A C C G G T | T A A A A C C C C G G G G G T T A A | C C G G T T
// row 0 (implicit): | * |
// A 1 0 1 2 3 4 5 6 |A 6 C 6 C 6 G 6 G 6 T 6 T 6 A 6 A 6 C 6 C 6 G 6 G 6 T 7 T 7 A 8 A 8 C 9 |C 9 10 11 12 13 14
// A 2 1 0 1 2 3 4 5 |C 5 C 5 G 5 G 5 T 5 T 5 A 5 A 5 C 5 C 5 G 5 G 5 T 6 T 7 A 8 A 9 C 9 C 10 |C 9 9 10 11 12 13
// C 3 2 1 0 1 2 3 4 |C 4 G 4 G 4 T 4 T 4 A 4 A 4 C 4 C 4 G 4 G 4 T 5 T 6 A 7 A 8 C 9 C 10 G 11 |G 10 10 9 10 11 12
// C 4 3 2 1 0 1 2 3 |G 3 G 3 T 3 T 3 A 3 A 3 C 3 C 3 G 4 G 4 T 5 T 6 A 7 A 8 C 9 C 10 G 11 G 12 |G 11 11 10 9 10 11
// G 5 4 3 2 1 0 1 2 |G 2 T 2 T 2 A 2 A 2 C 2 C 2 G 3 G 4 T 5 T 6 A 7 A 8 C 9 C 10 G 11 G 12 T 13 |T 12 12 11 10 9 10
// G 6 5 4 3 2 1 0 1 |T 1 T 1 A 1 A 1 C 2 C 2 G 3 G 4 T 5 T 6 A 7 A 8 C 9 C 10 G 11 G 12 T 12 T 13 |T 13 13 12 11 10 9
// T 7 6 5 4 3 2 1 0 |T 0 A 0 A 0 C 1 C 2 G 3 G 4 T 5 T 6 A 7 A 8 C 9 C 10 G 10 G 11 T 11 T 12 T 13 |T 14 14 13 12 11 10
implicit_new_entries_test_impl("AACCGGTTAACCGGTTAACCGGTTT",
"AACCGGTTAAAACCCCGGGGGTTAACCGGTT",
"10M2I2M2I3M2I3M1I6M1D");
}

TEST_P(TestApproximateBandedMyers, EditDistanceMonotonicallyDecreasesWithBandWidth)
{
using namespace claraparabricks::genomeworks::cudaaligner;
Expand Down

0 comments on commit c68960e

Please sign in to comment.