Skip to content

Commit

Permalink
review update & add single rhs test
Browse files Browse the repository at this point in the history
  • Loading branch information
yhmtsai committed May 15, 2020
1 parent 33abb09 commit 966568c
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 54 deletions.
14 changes: 5 additions & 9 deletions cuda/solver/gmres_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,8 @@ void finish_arnoldi(std::shared_ptr<const CudaExecutor> exec,
krylov_bases->get_const_values() +
k * num_rows * hessenberg_iter->get_size()[1];
if (hessenberg_iter->get_size()[1] > 1) {
// TODO: single rhs will use vendor's dot, otherwise, use our own
// TODO: this condition should be tuned
// single rhs will use vendor's dot, otherwise, use our own
// multidot_kernel which parallelize multiple rhs.
zero_array(hessenberg_iter->get_size()[1],
hessenberg_iter->get_values() + k * stride_hessenberg);
Expand All @@ -166,14 +167,9 @@ void finish_arnoldi(std::shared_ptr<const CudaExecutor> exec,
stride_krylov, as_cuda_type(hessenberg_iter->get_values()),
stride_hessenberg, as_cuda_type(stop_status));
} else {
for (size_type col = 0; col < hessenberg_iter->get_size()[1];
++col) {
cublas::dot(exec->get_cublas_handle(), num_rows,
k_krylov_bases + col, stride_krylov,
next_krylov_basis + col, stride_krylov,
hessenberg_iter->get_values() +
k * stride_hessenberg + col);
}
cublas::dot(exec->get_cublas_handle(), num_rows, k_krylov_bases,
stride_krylov, next_krylov_basis, stride_krylov,
hessenberg_iter->get_values() + k * stride_hessenberg);
}
update_next_krylov_kernel<default_block_size>
<<<ceildiv(num_rows * stride_krylov, default_block_size),
Expand Down
62 changes: 44 additions & 18 deletions cuda/test/solver/gmres_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,32 +81,31 @@ class Gmres : public ::testing::Test {
std::normal_distribution<>(-1.0, 1.0), rand_engine, ref);
}

void initialize_data()
void initialize_data(int nrhs = 43)
{
int m = 597;
int n = 43;
x = gen_mtx(m, n);
y = gen_mtx(gko::solver::default_krylov_dim, n);
x = gen_mtx(m, nrhs);
y = gen_mtx(gko::solver::default_krylov_dim, nrhs);
before_preconditioner = Mtx::create_with_config_of(x.get());
b = gen_mtx(m, n);
b_norm = gen_mtx(1, n);
krylov_bases = gen_mtx(m * (gko::solver::default_krylov_dim + 1), n);
b = gen_mtx(m, nrhs);
b_norm = gen_mtx(1, nrhs);
krylov_bases = gen_mtx(m * (gko::solver::default_krylov_dim + 1), nrhs);
hessenberg = gen_mtx(gko::solver::default_krylov_dim + 1,
gko::solver::default_krylov_dim * n);
hessenberg_iter = gen_mtx(gko::solver::default_krylov_dim + 1, n);
residual = gen_mtx(m, n);
residual_norm = gen_mtx(1, n);
gko::solver::default_krylov_dim * nrhs);
hessenberg_iter = gen_mtx(gko::solver::default_krylov_dim + 1, nrhs);
residual = gen_mtx(m, nrhs);
residual_norm = gen_mtx(1, nrhs);
residual_norm_collection =
gen_mtx(gko::solver::default_krylov_dim + 1, n);
givens_sin = gen_mtx(gko::solver::default_krylov_dim, n);
givens_cos = gen_mtx(gko::solver::default_krylov_dim, n);
gen_mtx(gko::solver::default_krylov_dim + 1, nrhs);
givens_sin = gen_mtx(gko::solver::default_krylov_dim, nrhs);
givens_cos = gen_mtx(gko::solver::default_krylov_dim, nrhs);
stop_status = std::unique_ptr<gko::Array<gko::stopping_status>>(
new gko::Array<gko::stopping_status>(ref, n));
new gko::Array<gko::stopping_status>(ref, nrhs));
for (size_t i = 0; i < stop_status->get_num_elems(); ++i) {
stop_status->get_data()[i].reset();
}
final_iter_nums = std::unique_ptr<gko::Array<gko::size_type>>(
new gko::Array<gko::size_type>(ref, n));
new gko::Array<gko::size_type>(ref, nrhs));
for (size_t i = 0; i < final_iter_nums->get_num_elems(); ++i) {
final_iter_nums->get_data()[i] = 5;
}
Expand Down Expand Up @@ -137,10 +136,10 @@ class Gmres : public ::testing::Test {
d_givens_cos = Mtx::create(cuda);
d_givens_cos->copy_from(givens_cos.get());
d_stop_status = std::unique_ptr<gko::Array<gko::stopping_status>>(
new gko::Array<gko::stopping_status>(cuda, n));
new gko::Array<gko::stopping_status>(cuda, nrhs));
*d_stop_status = *stop_status;
d_final_iter_nums = std::unique_ptr<gko::Array<gko::size_type>>(
new gko::Array<gko::size_type>(cuda, n));
new gko::Array<gko::size_type>(cuda, nrhs));
*d_final_iter_nums = *final_iter_nums;
}

Expand Down Expand Up @@ -251,6 +250,33 @@ TEST_F(Gmres, CudaGmresStep1IsEquivalentToRef)
}


TEST_F(Gmres, CudaGmresStep1OnSingleRHSIsEquivalentToRef)
{
initialize_data(1);
int iter = 5;

gko::kernels::reference::gmres::step_1(
ref, x->get_size()[0], givens_sin.get(), givens_cos.get(),
residual_norm.get(), residual_norm_collection.get(), krylov_bases.get(),
hessenberg_iter.get(), b_norm.get(), iter, final_iter_nums.get(),
stop_status.get());
gko::kernels::cuda::gmres::step_1(
cuda, d_x->get_size()[0], d_givens_sin.get(), d_givens_cos.get(),
d_residual_norm.get(), d_residual_norm_collection.get(),
d_krylov_bases.get(), d_hessenberg_iter.get(), d_b_norm.get(), iter,
d_final_iter_nums.get(), d_stop_status.get());

GKO_ASSERT_MTX_NEAR(d_givens_sin, givens_sin, 1e-14);
GKO_ASSERT_MTX_NEAR(d_givens_cos, givens_cos, 1e-14);
GKO_ASSERT_MTX_NEAR(d_residual_norm, residual_norm, 1e-14);
GKO_ASSERT_MTX_NEAR(d_residual_norm_collection, residual_norm_collection,
1e-14);
GKO_ASSERT_MTX_NEAR(d_hessenberg_iter, hessenberg_iter, 1e-14);
GKO_ASSERT_MTX_NEAR(d_krylov_bases, krylov_bases, 1e-14);
GKO_ASSERT_ARRAY_EQ(d_final_iter_nums, final_iter_nums);
}


TEST_F(Gmres, CudaGmresStep2IsEquivalentToRef)
{
initialize_data();
Expand Down
14 changes: 5 additions & 9 deletions hip/solver/gmres_kernels.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,8 @@ void finish_arnoldi(std::shared_ptr<const HipExecutor> exec, size_type num_rows,
krylov_bases->get_const_values() +
k * num_rows * hessenberg_iter->get_size()[1];
if (hessenberg_iter->get_size()[1] > 1) {
// TODO: single rhs will use vendor's dot, otherwise, use our own
// TODO: this condition should be tuned
// single rhs will use vendor's dot, otherwise, use our own
// multidot_kernel which parallelize multiple rhs.
zero_array(hessenberg_iter->get_size()[1],
hessenberg_iter->get_values() + k * stride_hessenberg);
Expand All @@ -172,14 +173,9 @@ void finish_arnoldi(std::shared_ptr<const HipExecutor> exec, size_type num_rows,
stride_krylov, as_hip_type(hessenberg_iter->get_values()),
stride_hessenberg, as_hip_type(stop_status));
} else {
for (size_type col = 0; col < hessenberg_iter->get_size()[1];
++col) {
hipblas::dot(exec->get_hipblas_handle(), num_rows,
k_krylov_bases + col, stride_krylov,
next_krylov_basis + col, stride_krylov,
hessenberg_iter->get_values() +
k * stride_hessenberg + col);
}
hipblas::dot(exec->get_hipblas_handle(), num_rows, k_krylov_bases,
stride_krylov, next_krylov_basis, stride_krylov,
hessenberg_iter->get_values() + k * stride_hessenberg);
}
hipLaunchKernelGGL(
HIP_KERNEL_NAME(update_next_krylov_kernel<default_block_size>),
Expand Down
62 changes: 44 additions & 18 deletions hip/test/solver/gmres_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,32 +81,31 @@ class Gmres : public ::testing::Test {
std::normal_distribution<>(-1.0, 1.0), rand_engine, ref);
}

void initialize_data()
void initialize_data(int nrhs = 43)
{
int m = 597;
int n = 43;
x = gen_mtx(m, n);
y = gen_mtx(gko::solver::default_krylov_dim, n);
x = gen_mtx(m, nrhs);
y = gen_mtx(gko::solver::default_krylov_dim, nrhs);
before_preconditioner = Mtx::create_with_config_of(x.get());
b = gen_mtx(m, n);
b_norm = gen_mtx(1, n);
krylov_bases = gen_mtx(m * (gko::solver::default_krylov_dim + 1), n);
b = gen_mtx(m, nrhs);
b_norm = gen_mtx(1, nrhs);
krylov_bases = gen_mtx(m * (gko::solver::default_krylov_dim + 1), nrhs);
hessenberg = gen_mtx(gko::solver::default_krylov_dim + 1,
gko::solver::default_krylov_dim * n);
hessenberg_iter = gen_mtx(gko::solver::default_krylov_dim + 1, n);
residual = gen_mtx(m, n);
residual_norm = gen_mtx(1, n);
gko::solver::default_krylov_dim * nrhs);
hessenberg_iter = gen_mtx(gko::solver::default_krylov_dim + 1, nrhs);
residual = gen_mtx(m, nrhs);
residual_norm = gen_mtx(1, nrhs);
residual_norm_collection =
gen_mtx(gko::solver::default_krylov_dim + 1, n);
givens_sin = gen_mtx(gko::solver::default_krylov_dim, n);
givens_cos = gen_mtx(gko::solver::default_krylov_dim, n);
gen_mtx(gko::solver::default_krylov_dim + 1, nrhs);
givens_sin = gen_mtx(gko::solver::default_krylov_dim, nrhs);
givens_cos = gen_mtx(gko::solver::default_krylov_dim, nrhs);
stop_status = std::unique_ptr<gko::Array<gko::stopping_status>>(
new gko::Array<gko::stopping_status>(ref, n));
new gko::Array<gko::stopping_status>(ref, nrhs));
for (size_t i = 0; i < stop_status->get_num_elems(); ++i) {
stop_status->get_data()[i].reset();
}
final_iter_nums = std::unique_ptr<gko::Array<gko::size_type>>(
new gko::Array<gko::size_type>(ref, n));
new gko::Array<gko::size_type>(ref, nrhs));
for (size_t i = 0; i < final_iter_nums->get_num_elems(); ++i) {
final_iter_nums->get_data()[i] = 5;
}
Expand Down Expand Up @@ -137,10 +136,10 @@ class Gmres : public ::testing::Test {
d_givens_cos = Mtx::create(hip);
d_givens_cos->copy_from(givens_cos.get());
d_stop_status = std::unique_ptr<gko::Array<gko::stopping_status>>(
new gko::Array<gko::stopping_status>(hip, n));
new gko::Array<gko::stopping_status>(hip, nrhs));
*d_stop_status = *stop_status;
d_final_iter_nums = std::unique_ptr<gko::Array<gko::size_type>>(
new gko::Array<gko::size_type>(hip, n));
new gko::Array<gko::size_type>(hip, nrhs));
*d_final_iter_nums = *final_iter_nums;
}

Expand Down Expand Up @@ -251,6 +250,33 @@ TEST_F(Gmres, HipGmresStep1IsEquivalentToRef)
}


TEST_F(Gmres, HipGmresStep1OnSingleRHSIsEquivalentToRef)
{
initialize_data(1);
int iter = 5;

gko::kernels::reference::gmres::step_1(
ref, x->get_size()[0], givens_sin.get(), givens_cos.get(),
residual_norm.get(), residual_norm_collection.get(), krylov_bases.get(),
hessenberg_iter.get(), b_norm.get(), iter, final_iter_nums.get(),
stop_status.get());
gko::kernels::hip::gmres::step_1(
hip, d_x->get_size()[0], d_givens_sin.get(), d_givens_cos.get(),
d_residual_norm.get(), d_residual_norm_collection.get(),
d_krylov_bases.get(), d_hessenberg_iter.get(), d_b_norm.get(), iter,
d_final_iter_nums.get(), d_stop_status.get());

GKO_ASSERT_MTX_NEAR(d_givens_sin, givens_sin, 1e-14);
GKO_ASSERT_MTX_NEAR(d_givens_cos, givens_cos, 1e-14);
GKO_ASSERT_MTX_NEAR(d_residual_norm, residual_norm, 1e-14);
GKO_ASSERT_MTX_NEAR(d_residual_norm_collection, residual_norm_collection,
1e-14);
GKO_ASSERT_MTX_NEAR(d_hessenberg_iter, hessenberg_iter, 1e-14);
GKO_ASSERT_MTX_NEAR(d_krylov_bases, krylov_bases, 1e-14);
GKO_ASSERT_ARRAY_EQ(d_final_iter_nums, final_iter_nums);
}


TEST_F(Gmres, HipGmresStep2IsEquivalentToRef)
{
initialize_data();
Expand Down

0 comments on commit 966568c

Please sign in to comment.