Skip to content

Commit

Permalink
feat: use latest rust-gpu-tools
Browse files Browse the repository at this point in the history
rust-gpu-tools had a few breaking changes, most notable:

When creating kernels, the way the global work size has changed.
it is no longer the total number of threads, but how many groups of
local work size sized threads there are.
  • Loading branch information
vmx committed Jul 9, 2021
1 parent 5196224 commit 8f133be
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 9 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ paired = { version = "0.22.0", optional = true }

# gpu feature
#rust-gpu-tools = { version = "0.3.0", optional = true }
rust-gpu-tools = { git = "https://github.com/filecoin-project/rust-gpu-tools", rev = "2827a11196dd638c9afe5aeb99f6425c0d1a7670", optional = true }
rust-gpu-tools = { git = "https://github.com/filecoin-project/rust-gpu-tools", branch = "master", default-features = false, optional = true }
ff-cl-gen = { version = "0.3.0", optional = true }
fs2 = { version = "0.4.3", optional = true }

Expand Down
4 changes: 2 additions & 2 deletions src/gpu/fft.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ where
// Select the first device for FFT
let device = devices[0];

let src = sources::kernel::<E>(device.brand() == opencl::Brand::Nvidia);
let src = sources::kernel::<E>(device.vendor() == opencl::Vendor::Nvidia);

let program = opencl::Program::from_opencl(&device, &src)?;
let pq_buffer = program.create_buffer::<E::Fr>(1 << MAX_LOG2_RADIX >> 1)?;
Expand Down Expand Up @@ -77,7 +77,7 @@ where

let n = 1u32 << log_n;
let local_work_size = 1 << cmp::min(deg - 1, MAX_LOG2_LOCAL_WORK_SIZE);
let global_work_size = (n >> deg) * local_work_size;
let global_work_size = n >> deg;
let kernel = self.program.create_kernel(
"radix_fft",
global_work_size as usize,
Expand Down
11 changes: 5 additions & 6 deletions src/gpu/multiexp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ where
E: Engine,
{
pub fn create(d: opencl::Device, priority: bool) -> GPUResult<SingleMultiexpKernel<E>> {
let src = sources::kernel::<E>(d.brand() == opencl::Brand::Nvidia);
let src = sources::kernel::<E>(d.vendor() == opencl::Vendor::Nvidia);

let exp_bits = exp_size::<E>() * 8;
let core_count = utils::get_core_count(&d);
Expand Down Expand Up @@ -156,10 +156,9 @@ where
.program
.create_buffer::<<G as CurveAffine>::Projective>(2 * self.core_count)?;

// Make global work size divisible by `LOCAL_WORK_SIZE`
let mut global_work_size = num_windows * num_groups;
global_work_size +=
(LOCAL_WORK_SIZE - (global_work_size % LOCAL_WORK_SIZE)) % LOCAL_WORK_SIZE;
// The global work size follows CUDA's definition and is the number of `LOCAL_WORK_SIZE`
// sized thread groups.
let global_work_size = (num_windows * num_groups + LOCAL_WORK_SIZE - 1) / LOCAL_WORK_SIZE;

let kernel = self.program.create_kernel(
if TypeId::of::<G>() == TypeId::of::<E::G1Affine>() {
Expand All @@ -184,7 +183,7 @@ where
.arg(&(window_size as u32))
.run()?;

let mut results = vec![<G as CurveAffine>::Projective::zero(); num_groups * num_windows];
let mut results = vec![<G as CurveAffine>::Projective::zero(); 2 * self.core_count];
self.program
.read_into_buffer(&result_buffer, 0, &mut results)?;

Expand Down

0 comments on commit 8f133be

Please sign in to comment.