Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update Permutation class and kernels. #469

Merged
merged 6 commits into from
Mar 7, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 77 additions & 1 deletion common/matrix/dense_kernels.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -361,4 +361,80 @@ __global__ __launch_bounds__(default_block_size) void reduce_total_cols(
}


} // namespace kernel
template <size_type block_size, typename IndexType, typename ValueType>
__global__ __launch_bounds__(block_size) void row_permute(
size_type num_rows, size_type num_cols,
const IndexType *__restrict__ perm_idxs, const ValueType *__restrict__ orig,
size_type stride_orig, ValueType *__restrict__ result,
size_type stride_result)
{
constexpr auto warps_per_block = block_size / config::warp_size;
const auto global_id =
thread::get_thread_id<config::warp_size, warps_per_block>();
const auto row_id = global_id / num_cols;
const auto col_id = global_id % num_cols;
if (row_id < num_rows) {
result[row_id * stride_result + col_id] =
orig[perm_idxs[row_id] * stride_orig + col_id];
}
}


template <size_type block_size, typename IndexType, typename ValueType>
__global__ __launch_bounds__(block_size) void column_permute(
size_type num_rows, size_type num_cols,
const IndexType *__restrict__ perm_idxs, const ValueType *__restrict__ orig,
size_type stride_orig, ValueType *__restrict__ result,
size_type stride_result)
{
constexpr auto warps_per_block = block_size / config::warp_size;
const auto global_id =
thread::get_thread_id<config::warp_size, warps_per_block>();
const auto row_id = global_id / num_cols;
const auto col_id = global_id % num_cols;
if (row_id < num_rows) {
result[row_id * stride_result + col_id] =
orig[row_id * stride_orig + perm_idxs[col_id]];
}
}


template <size_type block_size, typename IndexType, typename ValueType>
__global__ __launch_bounds__(block_size) void inverse_row_permute(
size_type num_rows, size_type num_cols,
const IndexType *__restrict__ perm_idxs, const ValueType *__restrict__ orig,
size_type stride_orig, ValueType *__restrict__ result,
size_type stride_result)
{
constexpr auto warps_per_block = block_size / config::warp_size;
const auto global_id =
thread::get_thread_id<config::warp_size, warps_per_block>();
const auto row_id = global_id / num_cols;
const auto col_id = global_id % num_cols;
if (row_id < num_rows) {
result[perm_idxs[row_id] * stride_result + col_id] =
orig[row_id * stride_orig + col_id];
}
}


template <size_type block_size, typename IndexType, typename ValueType>
__global__ __launch_bounds__(block_size) void inverse_column_permute(
size_type num_rows, size_type num_cols,
const IndexType *__restrict__ perm_idxs, const ValueType *__restrict__ orig,
size_type stride_orig, ValueType *__restrict__ result,
size_type stride_result)
{
constexpr auto warps_per_block = block_size / config::warp_size;
const auto global_id =
thread::get_thread_id<config::warp_size, warps_per_block>();
const auto row_id = global_id / num_cols;
const auto col_id = global_id % num_cols;
if (row_id < num_rows) {
result[row_id * stride_result + perm_idxs[col_id]] =
orig[row_id * stride_orig + col_id];
}
}


} // namespace kernel
88 changes: 88 additions & 0 deletions core/test/matrix/permutation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,19 @@ TYPED_TEST(Permutation, CanBeConstructedWithSize)
}


TYPED_TEST(Permutation, FactorySetsCorrectPermuteMask)
{
using i_type = typename TestFixture::i_type;
auto m = gko::matrix::Permutation<i_type>::create(this->exec);
auto mask = m->get_permute_mask();

ASSERT_EQ(mask, gko::matrix::row_permute);
}


TYPED_TEST(Permutation, PermutationCanBeConstructedFromExistingData)
{
using i_type = typename TestFixture::i_type;
using i_type = typename TestFixture::i_type;
i_type data[] = {1, 0, 2};

Expand All @@ -131,6 +142,35 @@ TYPED_TEST(Permutation, PermutationCanBeConstructedFromExistingData)
}


TYPED_TEST(Permutation, CanBeConstructedWithSizeAndMask)
{
using i_type = typename TestFixture::i_type;
auto m = gko::matrix::Permutation<i_type>::create(
this->exec, gko::dim<2>{2, 3}, gko::matrix::column_permute);

ASSERT_EQ(m->get_size(), gko::dim<2>(2, 3));
ASSERT_EQ(m->get_permutation_size(), 2);
ASSERT_EQ(m->get_permute_mask(), gko::matrix::column_permute);
}


TYPED_TEST(Permutation, CanExplicitlyOverrideSetPermuteMask)
{
using i_type = typename TestFixture::i_type;
auto m = gko::matrix::Permutation<i_type>::create(
this->exec, gko::dim<2>{2, 3}, gko::matrix::column_permute);

auto mask = m->get_permute_mask();
ASSERT_EQ(mask, gko::matrix::column_permute);

m->set_permute_mask(gko::matrix::row_permute |
gko::matrix::inverse_permute);

auto s_mask = m->get_permute_mask();
ASSERT_EQ(s_mask, gko::matrix::row_permute | gko::matrix::inverse_permute);
}


TYPED_TEST(Permutation, PermutationThrowsforWrongRowPermDimensions)
{
using i_type = typename TestFixture::i_type;
Expand All @@ -143,6 +183,28 @@ TYPED_TEST(Permutation, PermutationThrowsforWrongRowPermDimensions)
}


TYPED_TEST(Permutation, SettingMaskDoesNotModifyData)
{
using i_type = typename TestFixture::i_type;
i_type data[] = {1, 0, 2};

auto m = gko::matrix::Permutation<i_type>::create(
this->exec, gko::dim<2>{3, 5},
gko::Array<i_type>::view(this->exec, 3, data));

auto mask = m->get_permute_mask();
ASSERT_EQ(m->get_const_permutation(), data);
ASSERT_EQ(mask, gko::matrix::row_permute);

m->set_permute_mask(gko::matrix::row_permute |
gko::matrix::inverse_permute);

auto s_mask = m->get_permute_mask();
ASSERT_EQ(s_mask, gko::matrix::row_permute | gko::matrix::inverse_permute);
ASSERT_EQ(m->get_const_permutation(), data);
}


TYPED_TEST(Permutation, PermutationThrowsforWrongColPermDimensions)
{
using i_type = typename TestFixture::i_type;
Expand Down Expand Up @@ -186,6 +248,32 @@ TYPED_TEST(Permutation, CanBeMoved)
}


TYPED_TEST(Permutation, CopyingPreservesMask)
{
using i_type = typename TestFixture::i_type;
auto mtx_copy = gko::matrix::Permutation<i_type>::create(this->exec);

mtx_copy->copy_from(this->mtx.get());

auto o_mask = this->mtx->get_permute_mask();
auto n_mask = mtx_copy->get_permute_mask();
ASSERT_EQ(o_mask, gko::matrix::row_permute);
ASSERT_EQ(o_mask, n_mask);

this->mtx->set_permute_mask(gko::matrix::column_permute);

o_mask = this->mtx->get_permute_mask();
n_mask = mtx_copy->get_permute_mask();
ASSERT_EQ(o_mask, gko::matrix::column_permute);
ASSERT_NE(o_mask, n_mask);

mtx_copy->copy_from(this->mtx.get());

n_mask = mtx_copy->get_permute_mask();
ASSERT_EQ(o_mask, n_mask);
}


TYPED_TEST(Permutation, CanBeCloned)
{
auto mtx_clone = this->mtx->clone();
Expand Down
52 changes: 48 additions & 4 deletions cuda/matrix/dense_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -562,7 +562,18 @@ template <typename ValueType, typename IndexType>
void row_permute(std::shared_ptr<const CudaExecutor> exec,
const Array<IndexType> *permutation_indices,
matrix::Dense<ValueType> *row_permuted,
const matrix::Dense<ValueType> *orig) GKO_NOT_IMPLEMENTED;
const matrix::Dense<ValueType> *orig)
{
constexpr auto block_size = default_block_size;
const dim3 grid_dim =
ceildiv(orig->get_size()[0] * orig->get_size()[1], block_size);
const dim3 block_dim{config::warp_size, 1, block_size / config::warp_size};
kernel::row_permute<block_size><<<grid_dim, block_dim>>>(
orig->get_size()[0], orig->get_size()[1],
as_cuda_type(permutation_indices->get_const_data()),
as_cuda_type(orig->get_const_values()), orig->get_stride(),
as_cuda_type(row_permuted->get_values()), row_permuted->get_stride());
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_ROW_PERMUTE_KERNEL);

Expand All @@ -571,7 +582,19 @@ template <typename ValueType, typename IndexType>
void column_permute(std::shared_ptr<const CudaExecutor> exec,
const Array<IndexType> *permutation_indices,
matrix::Dense<ValueType> *column_permuted,
const matrix::Dense<ValueType> *orig) GKO_NOT_IMPLEMENTED;
const matrix::Dense<ValueType> *orig)
{
constexpr auto block_size = default_block_size;
const dim3 grid_dim =
ceildiv(orig->get_size()[0] * orig->get_size()[1], block_size);
const dim3 block_dim{config::warp_size, 1, block_size / config::warp_size};
kernel::column_permute<block_size><<<grid_dim, block_dim>>>(
orig->get_size()[0], orig->get_size()[1],
as_cuda_type(permutation_indices->get_const_data()),
as_cuda_type(orig->get_const_values()), orig->get_stride(),
as_cuda_type(column_permuted->get_values()),
column_permuted->get_stride());
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_COLUMN_PERMUTE_KERNEL);
Expand All @@ -582,7 +605,17 @@ void inverse_row_permute(std::shared_ptr<const CudaExecutor> exec,
const Array<IndexType> *permutation_indices,
matrix::Dense<ValueType> *row_permuted,
const matrix::Dense<ValueType> *orig)
GKO_NOT_IMPLEMENTED;
{
constexpr auto block_size = default_block_size;
const dim3 grid_dim =
ceildiv(orig->get_size()[0] * orig->get_size()[1], block_size);
const dim3 block_dim{config::warp_size, 1, block_size / config::warp_size};
kernel::inverse_row_permute<block_size><<<grid_dim, block_dim>>>(
orig->get_size()[0], orig->get_size()[1],
as_cuda_type(permutation_indices->get_const_data()),
as_cuda_type(orig->get_const_values()), orig->get_stride(),
as_cuda_type(row_permuted->get_values()), row_permuted->get_stride());
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_INVERSE_ROW_PERMUTE_KERNEL);
Expand All @@ -593,7 +626,18 @@ void inverse_column_permute(std::shared_ptr<const CudaExecutor> exec,
const Array<IndexType> *permutation_indices,
matrix::Dense<ValueType> *column_permuted,
const matrix::Dense<ValueType> *orig)
GKO_NOT_IMPLEMENTED;
{
constexpr auto block_size = default_block_size;
const dim3 grid_dim =
ceildiv(orig->get_size()[0] * orig->get_size()[1], block_size);
const dim3 block_dim{config::warp_size, 1, block_size / config::warp_size};
kernel::inverse_column_permute<block_size><<<grid_dim, block_dim>>>(
orig->get_size()[0], orig->get_size()[1],
as_cuda_type(permutation_indices->get_const_data()),
as_cuda_type(orig->get_const_values()), orig->get_stride(),
as_cuda_type(column_permuted->get_values()),
column_permuted->get_stride());
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_INVERSE_COLUMN_PERMUTE_KERNEL);
Expand Down
75 changes: 73 additions & 2 deletions cuda/test/matrix/dense_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,11 @@ namespace {

class Dense : public ::testing::Test {
protected:
using Mtx = gko::matrix::Dense<>;
using ComplexMtx = gko::matrix::Dense<std::complex<double>>;
using itype = int;
using vtype = double;
using Mtx = gko::matrix::Dense<vtype>;
using Arr = gko::Array<itype>;
using ComplexMtx = gko::matrix::Dense<std::complex<vtype>>;

Dense() : rand_engine(15) {}

Expand Down Expand Up @@ -123,6 +126,22 @@ class Dense : public ::testing::Test {
dalpha->copy_from(alpha.get());
dbeta = Mtx::create(cuda);
dbeta->copy_from(beta.get());

std::vector<itype> tmp(x->get_size()[0], 0);
auto rng = std::default_random_engine{};
std::iota(tmp.begin(), tmp.end(), 0);
std::shuffle(tmp.begin(), tmp.end(), rng);
std::vector<itype> tmp2(x->get_size()[1], 0);
std::iota(tmp2.begin(), tmp2.end(), 0);
std::shuffle(tmp2.begin(), tmp2.end(), rng);
rpermute_idxs =
std::unique_ptr<Arr>(new Arr{ref, tmp.begin(), tmp.end()});
drpermute_idxs =
std::unique_ptr<Arr>(new Arr{cuda, tmp.begin(), tmp.end()});
cpermute_idxs =
std::unique_ptr<Arr>(new Arr{ref, tmp2.begin(), tmp2.end()});
dcpermute_idxs =
std::unique_ptr<Arr>(new Arr{cuda, tmp2.begin(), tmp2.end()});
}

std::shared_ptr<gko::ReferenceExecutor> ref;
Expand All @@ -142,6 +161,10 @@ class Dense : public ::testing::Test {
std::unique_ptr<Mtx> dy;
std::unique_ptr<Mtx> dalpha;
std::unique_ptr<Mtx> dbeta;
std::unique_ptr<Arr> rpermute_idxs;
std::unique_ptr<Arr> drpermute_idxs;
std::unique_ptr<Arr> cpermute_idxs;
std::unique_ptr<Arr> dcpermute_idxs;
};


Expand Down Expand Up @@ -463,4 +486,52 @@ TEST_F(Dense, CalculateTotalColsIsEquivalentToRef)
}


TEST_F(Dense, IsRowPermutable)
{
set_up_apply_data();

auto r_permute = x->row_permute(rpermute_idxs.get());
thoasm marked this conversation as resolved.
Show resolved Hide resolved
auto dr_permute = dx->row_permute(drpermute_idxs.get());

GKO_ASSERT_MTX_NEAR(static_cast<Mtx *>(r_permute.get()),
static_cast<Mtx *>(dr_permute.get()), 0);
}


TEST_F(Dense, IsColPermutable)
{
set_up_apply_data();

auto c_permute = x->column_permute(cpermute_idxs.get());
auto dc_permute = dx->column_permute(dcpermute_idxs.get());

GKO_ASSERT_MTX_NEAR(static_cast<Mtx *>(c_permute.get()),
static_cast<Mtx *>(dc_permute.get()), 0);
}


TEST_F(Dense, IsInverseRowPermutable)
{
set_up_apply_data();

auto inverse_r_permute = x->inverse_row_permute(rpermute_idxs.get());
auto d_inverse_r_permute = dx->inverse_row_permute(drpermute_idxs.get());

GKO_ASSERT_MTX_NEAR(static_cast<Mtx *>(inverse_r_permute.get()),
static_cast<Mtx *>(d_inverse_r_permute.get()), 0);
}


TEST_F(Dense, IsInverseColPermutable)
{
set_up_apply_data();

auto inverse_c_permute = x->inverse_column_permute(cpermute_idxs.get());
auto d_inverse_c_permute = dx->inverse_column_permute(dcpermute_idxs.get());

GKO_ASSERT_MTX_NEAR(static_cast<Mtx *>(inverse_c_permute.get()),
static_cast<Mtx *>(d_inverse_c_permute.get()), 0);
}


} // namespace
Loading