From c6b437f30de2cb69afce5133ece9c516299b993a Mon Sep 17 00:00:00 2001 From: DmytroTym Date: Thu, 12 Oct 2023 14:02:13 +0300 Subject: [PATCH 1/2] Hotfix to go regression when 2 curves are imported --- icicle/appUtils/msm/msm.cu | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/icicle/appUtils/msm/msm.cu b/icicle/appUtils/msm/msm.cu index c58381cc0..644778576 100644 --- a/icicle/appUtils/msm/msm.cu +++ b/icicle/appUtils/msm/msm.cu @@ -151,7 +151,8 @@ __global__ void add_ones_kernel(A* points, S* scalars, P* results, const unsigne results[tid] = sum; } -__global__ void find_cutoff_kernel(unsigned* v, unsigned size, unsigned cutoff, unsigned run_length, unsigned* result) +template +__global__ void find_cutoff_kernel(unsigned* v, unsigned size, unsigned cutoff, unsigned run_length, S* fake_param, unsigned* result) { unsigned tid = (blockIdx.x * blockDim.x) + threadIdx.x; const unsigned nof_threads = (size + run_length - 1) / run_length; @@ -166,8 +167,8 @@ __global__ void find_cutoff_kernel(unsigned* v, unsigned size, unsigned cutoff, if (tid == 0 && v[size - 1] > cutoff) { result[0] = size; } } -__global__ void -find_max_size(unsigned* bucket_sizes, unsigned* single_bucket_indices, unsigned c, unsigned* largest_bucket_size) +template __global__ void +find_max_size(unsigned* bucket_sizes, unsigned* single_bucket_indices, unsigned c, S* fake_param, unsigned* largest_bucket_size) { for (int i = 0;; i++) { if (single_bucket_indices[i] & ((1 << c) - 1)) { @@ -525,14 +526,14 @@ void bucket_method_msm( NUM_THREADS = min(1 << 5, cutoff_nof_runs); NUM_BLOCKS = (cutoff_nof_runs + NUM_THREADS - 1) / NUM_THREADS; find_cutoff_kernel<<>>( - sorted_bucket_sizes, h_nof_buckets_to_compute, bucket_th, cutoff_run_length, nof_large_buckets); + sorted_bucket_sizes, h_nof_buckets_to_compute, bucket_th, cutoff_run_length, d_scalars, nof_large_buckets); unsigned h_nof_large_buckets; cudaMemcpyAsync(&h_nof_large_buckets, nof_large_buckets, sizeof(unsigned), cudaMemcpyDeviceToHost, stream); unsigned* max_res; cudaMallocAsync(&max_res, sizeof(unsigned) * 2, stream); - find_max_size<<<1, 1, 0, stream>>>(sorted_bucket_sizes, sorted_single_bucket_indices, c, max_res); + find_max_size<<<1, 1, 0, stream>>>(sorted_bucket_sizes, sorted_single_bucket_indices, c, d_scalars, max_res); unsigned h_max_res[2]; cudaMemcpyAsync(h_max_res, max_res, sizeof(unsigned) * 2, cudaMemcpyDeviceToHost, stream); From 4a02a04f1809a61299e2ace55e30f6ac5e178dd6 Mon Sep 17 00:00:00 2001 From: DmytroTym Date: Thu, 12 Oct 2023 14:37:00 +0300 Subject: [PATCH 2/2] clang fmt --- icicle/appUtils/msm/msm.cu | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/icicle/appUtils/msm/msm.cu b/icicle/appUtils/msm/msm.cu index 644778576..1d6fbf1bf 100644 --- a/icicle/appUtils/msm/msm.cu +++ b/icicle/appUtils/msm/msm.cu @@ -152,7 +152,8 @@ __global__ void add_ones_kernel(A* points, S* scalars, P* results, const unsigne } template -__global__ void find_cutoff_kernel(unsigned* v, unsigned size, unsigned cutoff, unsigned run_length, S* fake_param, unsigned* result) +__global__ void +find_cutoff_kernel(unsigned* v, unsigned size, unsigned cutoff, unsigned run_length, S* fake_param, unsigned* result) { unsigned tid = (blockIdx.x * blockDim.x) + threadIdx.x; const unsigned nof_threads = (size + run_length - 1) / run_length; @@ -167,8 +168,9 @@ __global__ void find_cutoff_kernel(unsigned* v, unsigned size, unsigned cutoff, if (tid == 0 && v[size - 1] > cutoff) { result[0] = size; } } -template __global__ void -find_max_size(unsigned* bucket_sizes, unsigned* single_bucket_indices, unsigned c, S* fake_param, unsigned* largest_bucket_size) +template +__global__ void find_max_size( + unsigned* bucket_sizes, unsigned* single_bucket_indices, unsigned c, S* fake_param, unsigned* largest_bucket_size) { for (int i = 0;; i++) { if (single_bucket_indices[i] & ((1 << c) - 1)) {