Skip to content

Commit

Permalink
montgomery conversion for cuda
Browse files Browse the repository at this point in the history
  • Loading branch information
yshekel committed Jun 13, 2024
1 parent dfc3a53 commit 876d52f
Show file tree
Hide file tree
Showing 13 changed files with 183 additions and 147 deletions.
4 changes: 2 additions & 2 deletions icicle_v3/backend/cpu/src/curve/cpu_mont_conversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ using namespace curve_config;
using namespace icicle;

template <typename T>
eIcicleError cpu_convert_mont(
const Device& device, const T* input, size_t n, bool is_into, const ConvertMontgomeryConfig& config, T* output)
eIcicleError
cpu_convert_mont(const Device& device, const T* input, size_t n, bool is_into, const VecOpsConfig& config, T* output)
{
for (size_t i = 0; i < n; ++i) {
output[i] = is_into ? T::to_montgomery(input[i]) : T::from_montgomery(input[i]);
Expand Down
5 changes: 4 additions & 1 deletion icicle_v3/backend/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,10 @@ target_link_libraries(icicle_cuda_field PRIVATE ${CUDA_LIBRARIES}) # Link to CUD

# curve API library
if (CURVE)
add_library(icicle_cuda_curve SHARED src/curve/cuda_msm.cu)
add_library(icicle_cuda_curve SHARED
src/curve/cuda_msm.cu
src/curve/cuda_mont.cu
)
target_include_directories(icicle_cuda_curve PRIVATE include)
target_link_libraries(icicle_cuda_curve PUBLIC icicle_device icicle_curve)
set_target_properties(icicle_cuda_curve PROPERTIES OUTPUT_NAME "icicle_cuda_curve_${FIELD}")
Expand Down
54 changes: 54 additions & 0 deletions icicle_v3/backend/cuda/include/cuda_mont.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#include <cuda.h>
#include <stdexcept>

#include "icicle/errors.h"
#include "icicle/vec_ops.h"
#include "gpu-utils/error_handler.h"

namespace montgomery {
#define MAX_THREADS_PER_BLOCK 256

template <typename E, bool is_into>
__global__ void MontgomeryKernel(const E* input, int n, E* output)
{
int tid = blockIdx.x * blockDim.x + threadIdx.x;
if (tid < n) { output[tid] = is_into ? E::to_montgomery(input[tid]) : E::from_montgomery(input[tid]); }
}

template <typename E, bool is_into>
cudaError_t ConvertMontgomery(const E* input, size_t n, const VecOpsConfig& config, E* output)
{
cudaStream_t cuda_stream = reinterpret_cast<cudaStream_t>(config.stream);

E *d_alloc_out = nullptr, *d_alloc_in = nullptr, *d_out;
const E* d_in;
if (!config.is_a_on_device) {
CHK_IF_RETURN(cudaMallocAsync(&d_alloc_in, n * sizeof(E), cuda_stream));
CHK_IF_RETURN(cudaMemcpyAsync(d_alloc_in, input, n * sizeof(E), cudaMemcpyHostToDevice, cuda_stream));
d_in = d_alloc_in;
} else {
d_in = input;
}

if (!config.is_result_on_device) {
CHK_IF_RETURN(cudaMallocAsync(&d_alloc_out, n * sizeof(E), cuda_stream));
d_out = d_alloc_out;
} else {
d_out = output;
}

int num_threads = MAX_THREADS_PER_BLOCK;
int num_blocks = (n + num_threads - 1) / num_threads;
MontgomeryKernel<E, is_into><<<num_blocks, num_threads, 0, cuda_stream>>>(d_in, n, d_out);

if (d_alloc_in) { CHK_IF_RETURN(cudaFreeAsync(d_alloc_in, cuda_stream)); }
if (d_alloc_out) {
CHK_IF_RETURN(cudaMemcpyAsync(output, d_out, n * sizeof(E), cudaMemcpyDeviceToHost, cuda_stream));
CHK_IF_RETURN(cudaFreeAsync(d_out, cuda_stream));
}
if (!config.is_async) return CHK_STICKY(cudaStreamSynchronize(cuda_stream));

return CHK_LAST();
}

} // namespace montgomery
33 changes: 33 additions & 0 deletions icicle_v3/backend/cuda/src/curve/cuda_mont.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#include <cuda.h>
#include <stdexcept>

#include "icicle/errors.h"
#include "icicle/curves/montgomery_conversion.h"
#include "gpu-utils/error_handler.h"
#include "error_translation.h"
#include "cuda_mont.cuh"

#include "icicle/curves/curve_config.h"
using namespace curve_config;
using namespace icicle;

namespace icicle {

template <typename T>
eIcicleError
cuda_convert_mont(const Device& device, const T* input, size_t n, bool is_into, const VecOpsConfig& config, T* output)
{
cudaError_t err = is_into ? montgomery::ConvertMontgomery<T, true>(input, n, config, output)
: montgomery::ConvertMontgomery<T, false>(input, n, config, output);
return translateCudaError(err);
}

REGISTER_AFFINE_CONVERT_MONTGOMERY_BACKEND("CUDA", cuda_convert_mont<affine_t>);
REGISTER_PROJECTIVE_CONVERT_MONTGOMERY_BACKEND("CUDA", cuda_convert_mont<projective_t>);

#ifdef G2
REGISTER_AFFINE_G2_CONVERT_MONTGOMERY_BACKEND("CUDA", cuda_convert_mont<g2_affine_t>);
REGISTER_PROJECTIVE_G2_CONVERT_MONTGOMERY_BACKEND("CUDA", cuda_convert_mont<g2_projective_t>);
#endif // G2

} // namespace icicle
51 changes: 3 additions & 48 deletions icicle_v3/backend/cuda/src/field/cuda_mont.cu
Original file line number Diff line number Diff line change
Expand Up @@ -5,64 +5,19 @@
#include "icicle/vec_ops.h"
#include "gpu-utils/error_handler.h"
#include "error_translation.h"
#include "cuda_mont.cuh"

namespace icicle {

#define MAX_THREADS_PER_BLOCK 256

template <typename E>
__global__ void MontgomeryKernel(const E* input, int n, bool is_into, E* output)
{
int tid = blockIdx.x * blockDim.x + threadIdx.x;
if (tid < n) { output[tid] = is_into ? E::to_montgomery(input[tid]) : E::from_montgomery(input[tid]); }
}

template <typename E>
cudaError_t ConvertMontgomery(const E* input, int n, bool is_into, const VecOpsConfig& config, E* output)
{
cudaStream_t cuda_stream = reinterpret_cast<cudaStream_t>(config.stream);

E *d_alloc_out = nullptr, *d_alloc_in = nullptr, *d_out;
const E* d_in;
if (!config.is_a_on_device) {
CHK_IF_RETURN(cudaMallocAsync(&d_alloc_in, n * sizeof(E), cuda_stream));
CHK_IF_RETURN(cudaMemcpyAsync(d_alloc_in, input, n * sizeof(E), cudaMemcpyHostToDevice, cuda_stream));
d_in = d_alloc_in;
} else {
d_in = input;
}

if (!config.is_result_on_device) {
CHK_IF_RETURN(cudaMallocAsync(&d_alloc_out, n * sizeof(E), cuda_stream));
d_out = d_alloc_out;
} else {
d_out = output;
}

int num_threads = MAX_THREADS_PER_BLOCK;
int num_blocks = (n + num_threads - 1) / num_threads;
MontgomeryKernel<E><<<num_blocks, num_threads, 0, cuda_stream>>>(d_in, n, is_into, d_out);

if (d_alloc_in) { CHK_IF_RETURN(cudaFreeAsync(d_alloc_in, cuda_stream)); }
if (d_alloc_out) {
CHK_IF_RETURN(cudaMemcpyAsync(output, d_out, n * sizeof(E), cudaMemcpyDeviceToHost, cuda_stream));
CHK_IF_RETURN(cudaFreeAsync(d_out, cuda_stream));
}
if (!config.is_async) return CHK_STICKY(cudaStreamSynchronize(cuda_stream));

return CHK_LAST();
}

/************************************ REGISTRATION ************************************/

#include "icicle/fields/field_config.h"
using namespace field_config;

template <typename F>
eIcicleError convert_montgomery_cuda(
const Device& device, const F* input, uint64_t n, bool is_into, const VecOpsConfig& config, F* output)
{
auto err = ConvertMontgomery<F>(input, n, is_into, config, output);
auto err = is_into ? montgomery::ConvertMontgomery<F, true>(input, n, config, output)
: montgomery::ConvertMontgomery<F, false>(input, n, config, output);
return translateCudaError(err);
}

Expand Down
40 changes: 5 additions & 35 deletions icicle_v3/include/icicle/curves/montgomery_conversion.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,48 +9,18 @@

#include "icicle/curves/affine.h"
#include "icicle/curves/projective.h"
#include "icicle/vec_ops.h"
#include "icicle/fields/field.h"
#include "icicle/curves/curve_config.h"

using namespace curve_config;

namespace icicle {

/*************************** Frontend APIs ***************************/

struct ConvertMontgomeryConfig {
icicleStreamHandle stream; /**< stream for async execution. */
bool is_input_on_device;
bool is_output_on_device;
bool is_async;

ConfigExtension ext; /** backend specific extensions*/
};

static ConvertMontgomeryConfig default_convert_montgomery_config()
{
ConvertMontgomeryConfig config = {
nullptr, // stream
false, // is_input_on_device
false, // is_output_on_device
false, // is_async
};
return config;
}

template <typename T>
eIcicleError
points_convert_montgomery(const T* input, size_t n, bool is_into, const ConvertMontgomeryConfig& config, T* output);

/*************************** Backend registration ***************************/

using AffineConvertMontImpl = std::function<eIcicleError(
const Device& device,
const affine_t* input,
size_t n,
bool is_into,
const ConvertMontgomeryConfig& config,
affine_t* output)>;
const Device& device, const affine_t* input, size_t n, bool is_into, const VecOpsConfig& config, affine_t* output)>;

void register_affine_convert_montgomery(const std::string& deviceType, AffineConvertMontImpl);

Expand All @@ -67,7 +37,7 @@ namespace icicle {
const projective_t* input,
size_t n,
bool is_into,
const ConvertMontgomeryConfig& config,
const VecOpsConfig& config,
projective_t* output)>;

void register_projective_convert_montgomery(const std::string& deviceType, ProjectiveConvertMontImpl);
Expand All @@ -86,7 +56,7 @@ namespace icicle {
const g2_affine_t* input,
size_t n,
bool is_into,
const ConvertMontgomeryConfig& config,
const VecOpsConfig& config,
g2_affine_t* output)>;

void register_affine_g2_convert_montgomery(const std::string& deviceType, AffineG2ConvertMontImpl);
Expand All @@ -104,7 +74,7 @@ namespace icicle {
const g2_projective_t* input,
size_t n,
bool is_into,
const ConvertMontgomeryConfig& config,
const VecOpsConfig& config,
g2_projective_t* output)>;

void register_projective_g2_convert_montgomery(const std::string& deviceType, ProjectiveG2ConvertMontImpl);
Expand Down
2 changes: 1 addition & 1 deletion icicle_v3/include/icicle/curves/projective.h
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ class Projective
out[i] = (i % size < 100) ? rand_host() : out[i - 100];
}

static void rand_host_many_affine(Affine<FF>* out, int size)
static void rand_host_many(Affine<FF>* out, int size)
{
for (int i = 0; i < size; i++)
out[i] = (i % size < 100) ? to_affine(rand_host()) : out[i - 100];
Expand Down
3 changes: 1 addition & 2 deletions icicle_v3/include/icicle/vec_ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,7 @@ namespace icicle {
eIcicleError vector_mul(const S* vec_a, const S* vec_b, uint64_t n, const VecOpsConfig& config, S* output);

template <typename S>
eIcicleError
scalar_convert_montgomery(const S* input, uint64_t size, bool is_into, const VecOpsConfig& config, S* output);
eIcicleError convert_montgomery(const S* input, uint64_t size, bool is_into, const VecOpsConfig& config, S* output);

/*************************** Backend registration ***************************/

Expand Down
4 changes: 2 additions & 2 deletions icicle_v3/src/curves/ffi_extern.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ extern "C" void CONCAT_EXPAND(CURVE, generate_projective_points)(projective_t* p

extern "C" void CONCAT_EXPAND(CURVE, generate_affine_points)(affine_t* points, int size)
{
projective_t::rand_host_many_affine(points, size);
projective_t::rand_host_many(points, size);
}

/********************************** G2 **********************************/
Expand All @@ -53,6 +53,6 @@ extern "C" void CONCAT_EXPAND(CURVE, g2_generate_projective_points)(g2_projectiv

extern "C" void CONCAT_EXPAND(CURVE, g2_generate_affine_points)(g2_affine_t* points, int size)
{
g2_projective_t::rand_host_many_affine(points, size);
g2_projective_t::rand_host_many(points, size);
}
#endif // G2
32 changes: 12 additions & 20 deletions icicle_v3/src/curves/montgomery_conversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@ namespace icicle {
ICICLE_DISPATCHER_INST(AffineConvertMont, affine_convert_montgomery, AffineConvertMontImpl);

extern "C" eIcicleError CONCAT_EXPAND(CURVE, affine_convert_montgomery)(
const affine_t* input, size_t n, bool is_into, const ConvertMontgomeryConfig& config, affine_t* output)
const affine_t* input, size_t n, bool is_into, const VecOpsConfig& config, affine_t* output)
{
return AffineConvertMont::execute(input, n, is_into, config, output);
}

template <>
eIcicleError points_convert_montgomery(
const affine_t* input, size_t n, bool is_into, const ConvertMontgomeryConfig& config, affine_t* output)
eIcicleError
convert_montgomery(const affine_t* input, size_t n, bool is_into, const VecOpsConfig& config, affine_t* output)
{
return CONCAT_EXPAND(CURVE, affine_convert_montgomery)(input, n, is_into, config, output);
}
Expand All @@ -26,15 +26,15 @@ namespace icicle {
ICICLE_DISPATCHER_INST(AffineG2ConvertMont, affine_g2_convert_montgomery, AffineG2ConvertMontImpl);

extern "C" eIcicleError CONCAT_EXPAND(CURVE, affine_g2_convert_montgomery)(
const g2_affine_t* input, size_t n, bool is_into, const ConvertMontgomeryConfig& config, g2_affine_t* output)
const g2_affine_t* input, size_t n, bool is_into, const VecOpsConfig& config, g2_affine_t* output)
{
return AffineG2ConvertMont::execute(input, n, is_into, config, output);
}

#ifndef G1_AFFINE_SAME_TYPE_AS_G2_AFFINE
template <>
eIcicleError points_convert_montgomery(
const g2_affine_t* input, size_t n, bool is_into, const ConvertMontgomeryConfig& config, g2_affine_t* output)
eIcicleError
convert_montgomery(const g2_affine_t* input, size_t n, bool is_into, const VecOpsConfig& config, g2_affine_t* output)
{
return CONCAT_EXPAND(CURVE, affine_g2_convert_montgomery)(input, n, is_into, config, output);
}
Expand All @@ -44,14 +44,14 @@ namespace icicle {
ICICLE_DISPATCHER_INST(ProjectiveConvertMont, projective_convert_montgomery, ProjectiveConvertMontImpl);

extern "C" eIcicleError CONCAT_EXPAND(CURVE, projective_convert_montgomery)(
const projective_t* input, size_t n, bool is_into, const ConvertMontgomeryConfig& config, projective_t* output)
const projective_t* input, size_t n, bool is_into, const VecOpsConfig& config, projective_t* output)
{
return ProjectiveConvertMont::execute(input, n, is_into, config, output);
}

template <>
eIcicleError points_convert_montgomery(
const projective_t* input, size_t n, bool is_into, const ConvertMontgomeryConfig& config, projective_t* output)
eIcicleError convert_montgomery(
const projective_t* input, size_t n, bool is_into, const VecOpsConfig& config, projective_t* output)
{
return CONCAT_EXPAND(CURVE, projective_convert_montgomery)(input, n, is_into, config, output);
}
Expand All @@ -60,22 +60,14 @@ namespace icicle {
ICICLE_DISPATCHER_INST(ProjectiveG2ConvertMont, projective_g2_convert_montgomery, ProjectiveG2ConvertMontImpl);

extern "C" eIcicleError CONCAT_EXPAND(CURVE, projective_g2_convert_montgomery)(
const g2_projective_t* input,
size_t n,
bool is_into,
const ConvertMontgomeryConfig& config,
g2_projective_t* output)
const g2_projective_t* input, size_t n, bool is_into, const VecOpsConfig& config, g2_projective_t* output)
{
return ProjectiveG2ConvertMont::execute(input, n, is_into, config, output);
}

template <>
eIcicleError points_convert_montgomery(
const g2_projective_t* input,
size_t n,
bool is_into,
const ConvertMontgomeryConfig& config,
g2_projective_t* output)
eIcicleError convert_montgomery(
const g2_projective_t* input, size_t n, bool is_into, const VecOpsConfig& config, g2_projective_t* output)
{
return CONCAT_EXPAND(CURVE, projective_g2_convert_montgomery)(input, n, is_into, config, output);
}
Expand Down
4 changes: 2 additions & 2 deletions icicle_v3/src/vec_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,8 @@ namespace icicle {
}

template <>
eIcicleError scalar_convert_montgomery(
const scalar_t* input, uint64_t size, bool is_into, const VecOpsConfig& config, scalar_t* output)
eIcicleError
convert_montgomery(const scalar_t* input, uint64_t size, bool is_into, const VecOpsConfig& config, scalar_t* output)
{
return CONCAT_EXPAND(FIELD, scalar_convert_montgomery)(input, size, is_into, config, output);
}
Expand Down
Loading

0 comments on commit 876d52f

Please sign in to comment.