Skip to content

Commit

Permalink
Add SVM fine grain system support for issue #38
Browse files Browse the repository at this point in the history
  • Loading branch information
kenba committed Oct 3, 2021
1 parent 1fa9173 commit f4f0cf9
Show file tree
Hide file tree
Showing 2 changed files with 158 additions and 23 deletions.
76 changes: 56 additions & 20 deletions src/svm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ use cl3::types::{cl_device_svm_capabilities, cl_svm_mem_flags, cl_uint};
use libc::c_void;
#[cfg(feature = "serde")]
use serde::ser::{Serialize, SerializeSeq, Serializer};
use std::alloc::{self, Layout};
use std::fmt;
use std::fmt::Debug;
use std::iter::IntoIterator;
Expand All @@ -41,6 +42,7 @@ struct SvmRawVec<'a, T> {
cap: usize,
context: &'a Context,
fine_grain_buffer: bool,
fine_grain_system: bool,
atomics: bool,
}

Expand All @@ -56,16 +58,16 @@ impl<'a, T> SvmRawVec<'a, T> {
"No OpenCL SVM, use OpenCL buffers"
);

let fine_grain_system: bool = svm_capabilities & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM != 0;
assert!(!fine_grain_system, "SVM supports system memory, use Vec!");

let fine_grain_buffer: bool = svm_capabilities & CL_DEVICE_SVM_FINE_GRAIN_BUFFER != 0;
let atomics: bool = fine_grain_buffer && (svm_capabilities & CL_DEVICE_SVM_ATOMICS != 0);
let fine_grain_system: bool = svm_capabilities & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM != 0;
let atomics: bool = (fine_grain_buffer || fine_grain_system)
&& (svm_capabilities & CL_DEVICE_SVM_ATOMICS != 0);
SvmRawVec {
ptr: ptr::null_mut(),
cap: 0,
context,
fine_grain_buffer,
fine_grain_system,
atomics,
}
}
Expand Down Expand Up @@ -106,27 +108,44 @@ impl<'a, T> SvmRawVec<'a, T> {
// Ensure within capacity.
assert!(size <= (isize::MAX as usize) / 2, "capacity overflow");

let svm_mem_flags: cl_svm_mem_flags = if self.fine_grain_buffer {
if self.atomics {
CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_READ_WRITE | CL_MEM_SVM_ATOMICS
} else {
CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_READ_WRITE
// allocation, determine whether to use svm_alloc or not
let ptr = if self.fine_grain_system {
let new_layout = Layout::array::<T>(new_cap).unwrap();
let new_ptr = unsafe { alloc::alloc(new_layout) as *mut c_void };
if new_ptr.is_null() {
alloc::handle_alloc_error(new_layout);
}
new_ptr
} else {
CL_MEM_READ_WRITE
let svm_mem_flags: cl_svm_mem_flags = if self.fine_grain_buffer {
if self.atomics {
CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_READ_WRITE | CL_MEM_SVM_ATOMICS
} else {
CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_READ_WRITE
}
} else {
CL_MEM_READ_WRITE
};
let alignment = mem::align_of::<T>();
svm_alloc(
self.context.get(),
svm_mem_flags,
size,
alignment as cl_uint,
)?
};
let alignment = mem::align_of::<T>();
let ptr = svm_alloc(
self.context.get(),
svm_mem_flags,
size,
alignment as cl_uint,
)?;

// reallocation, copy old data to new pointer and free old memory
if 0 < self.cap {
unsafe { ptr::copy(self.ptr, ptr as *mut T, self.cap) };
svm_free(self.context.get(), self.ptr as *mut c_void);
if self.fine_grain_system {
let layout = Layout::array::<T>(self.cap).unwrap();
unsafe {
alloc::dealloc(self.ptr as *mut u8, layout);
}
} else {
svm_free(self.context.get(), self.ptr as *mut c_void);
}
}

self.ptr = ptr as *mut T;
Expand All @@ -143,7 +162,14 @@ impl<'a, T> SvmRawVec<'a, T> {
impl<'a, T> Drop for SvmRawVec<'a, T> {
fn drop(&mut self) {
if !self.ptr.is_null() {
svm_free(self.context.get(), self.ptr as *mut c_void);
if self.fine_grain_system {
let layout = Layout::array::<T>(self.cap).unwrap();
unsafe {
alloc::dealloc(self.ptr as *mut u8, layout);
}
} else {
svm_free(self.context.get(), self.ptr as *mut c_void);
}
self.ptr = ptr::null_mut();
}
}
Expand Down Expand Up @@ -241,9 +267,19 @@ impl<'a, T> SvmVec<'a, T> {
self.len == 0
}

/// Whether the vector is fine grain buffer
pub fn is_fine_grain_buffer(&self) -> bool {
self.buf.fine_grain_buffer
}

/// Whether the vector is fine grain system
pub fn is_fine_grain_system(&self) -> bool {
self.buf.fine_grain_system
}

/// Whether the vector is fine grained
pub fn is_fine_grained(&self) -> bool {
self.buf.fine_grain_buffer
self.buf.fine_grain_buffer || self.buf.fine_grain_system
}

/// Whether the vector can use atomics
Expand Down
105 changes: 102 additions & 3 deletions tests/opencl2_kernel_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,14 @@

extern crate opencl3;

use cl3::device::{CL_DEVICE_SVM_FINE_GRAIN_BUFFER, CL_DEVICE_TYPE_GPU};
use cl3::device::{
CL_DEVICE_SVM_FINE_GRAIN_BUFFER, CL_DEVICE_SVM_FINE_GRAIN_SYSTEM, CL_DEVICE_TYPE_ALL,
CL_DEVICE_TYPE_GPU,
};
use opencl3::command_queue::CommandQueue;
use opencl3::context::Context;
use opencl3::device::Device;
use opencl3::kernel::{create_program_kernels, ExecuteKernel};
use opencl3::kernel::{create_program_kernels, ExecuteKernel, Kernel};
use opencl3::platform::get_platforms;
use opencl3::program::{Program, CL_STD_2_0};
use opencl3::svm::SvmVec;
Expand Down Expand Up @@ -149,7 +152,7 @@ fn test_opencl_2_kernel_example() -> Result<()> {
// Copy into an OpenCL SVM vector
let mut test_values =
SvmVec::<cl_int>::allocate(&context, ARRAY_SIZE).expect("SVM allocation failed");
test_values.clone_from_slice(&value_array);
test_values.copy_from_slice(&value_array);

// Make test_values immutable
let test_values = test_values;
Expand Down Expand Up @@ -191,3 +194,99 @@ fn test_opencl_2_kernel_example() -> Result<()> {

Ok(())
}

#[test]
#[ignore]
fn test_opencl_2_system_svm_example() -> Result<()> {
let platforms = get_platforms()?;
assert!(0 < platforms.len());

/////////////////////////////////////////////////////////////////////
// Query OpenCL compute environment
let opencl_2: &str = "OpenCL 2";
let opencl_3: &str = "OpenCL 3";

// Find an OpenCL fine grained SVM, platform and device
let mut device_id = ptr::null_mut();
let mut is_fine_grained_system_svm: bool = false;
for p in platforms {
let platform_version = p.version()?;

if platform_version.contains(&opencl_2) || platform_version.contains(&opencl_3) {
let devices = p
.get_devices(CL_DEVICE_TYPE_ALL)
.expect("Platform::get_devices failed");

for dev_id in devices {
let device = Device::new(dev_id);
let svm_mem_capability = device.svm_mem_capability();
is_fine_grained_system_svm =
0 < svm_mem_capability & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM;
if is_fine_grained_system_svm {
device_id = dev_id;
break;
}
}
}
}

if is_fine_grained_system_svm {
// Create OpenCL context from the OpenCL svm device
let device = Device::new(device_id);
let vendor = device.vendor().expect("Device.vendor failed");
let vendor_id = device.vendor_id().expect("Device.vendor_id failed");
println!("OpenCL device vendor name: {}", vendor);
println!("OpenCL device vendor id: {:X}", vendor_id);

/////////////////////////////////////////////////////////////////////
// Initialise OpenCL compute environment

// Create a Context on the OpenCL svm device
let context = Context::from_device(&device).expect("Context::from_device failed");

// Build the OpenCL program source and create the kernel.
let program = Program::create_and_build_from_source(&context, PROGRAM_SOURCE, "")
.expect("Program::create_and_build_from_source failed");

let kernel = Kernel::create(&program, SUM_KERNEL_NAME).expect("Kernel::create failed");

// Create a command_queue on the Context's device
let queue = CommandQueue::create_with_properties(&context, context.default_device(), 0, 0)
.expect("CommandQueue::create_with_properties failed");

// The input data
const ARRAY_SIZE: usize = 8;
let value_array: [cl_int; ARRAY_SIZE] = [3, 2, 5, 9, 7, 1, 4, 2];

// Copy into an OpenCL SVM vector
let mut test_values =
SvmVec::<cl_int>::allocate(&context, ARRAY_SIZE).expect("SVM allocation failed");
test_values.copy_from_slice(&value_array);

// Make test_values immutable
let test_values = test_values;

// The output data, an OpenCL SVM vector
let mut results =
SvmVec::<cl_int>::allocate_zeroed(&context, ARRAY_SIZE).expect("SVM allocation failed");

// Run the sum kernel on the input data
let sum_kernel_event = ExecuteKernel::new(&kernel)
.set_arg_svm(results.as_mut_ptr())
.set_arg_svm(test_values.as_ptr())
.set_global_work_size(ARRAY_SIZE)
.enqueue_nd_range(&queue)?;

// Wait for the kernel to complete execution on the device
sum_kernel_event.wait()?;

// Can access OpenCL SVM directly, no need to map or read the results
println!("sum results: {:?}", results);
assert_eq!(33, results[0]);
assert_eq!(0, results[ARRAY_SIZE - 1]);
} else {
println!("OpenCL fine grained system SVM device not found")
}

Ok(())
}

0 comments on commit f4f0cf9

Please sign in to comment.