Skip to content

Commit

Permalink
[bug] Fix device memory allocation for numpy array on CUDA backend (t…
Browse files Browse the repository at this point in the history
…aichi-dev#7008)

Issue: fix taichi-dev#6924

### Brief Summary
  • Loading branch information
jim19930609 authored Jan 2, 2023
1 parent 4433118 commit 6e166fb
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 2 deletions.
11 changes: 9 additions & 2 deletions taichi/codegen/cuda/codegen_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -627,6 +627,7 @@ FunctionType CUDAModuleToFunctionConverter::convert(
CUDAContext::get_instance().make_current();
std::vector<void *> arg_buffers(args.size(), nullptr);
std::vector<void *> device_buffers(args.size(), nullptr);
std::vector<DeviceAllocation> temporary_devallocs(args.size());

bool transferred = false;
for (int i = 0; i < (int)args.size(); i++) {
Expand Down Expand Up @@ -655,7 +656,13 @@ FunctionType CUDAModuleToFunctionConverter::convert(
// host.
// See CUDA driver API `cuPointerGetAttribute` for more details.
transferred = true;
CUDADriver::get_instance().malloc(&device_buffers[i], arr_sz);

auto result_buffer = context.result_buffer;
DeviceAllocation devalloc =
executor->allocate_memory_ndarray(arr_sz, result_buffer);
device_buffers[i] = executor->get_ndarray_alloc_info_ptr(devalloc);
temporary_devallocs[i] = devalloc;

CUDADriver::get_instance().memcpy_host_to_device(
(void *)device_buffers[i], arg_buffers[i], arr_sz);
} else {
Expand Down Expand Up @@ -703,7 +710,7 @@ FunctionType CUDAModuleToFunctionConverter::convert(
CUDADriver::get_instance().memcpy_device_to_host(
arg_buffers[i], (void *)device_buffers[i],
context.array_runtime_sizes[i]);
CUDADriver::get_instance().mem_free((void *)device_buffers[i]);
executor->deallocate_memory_ndarray(temporary_devallocs[i]);
}
}
}
Expand Down
4 changes: 4 additions & 0 deletions taichi/runtime/llvm/llvm_runtime_executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,10 @@ DeviceAllocation LlvmRuntimeExecutor::allocate_memory_ndarray(
result_buffer});
}

void LlvmRuntimeExecutor::deallocate_memory_ndarray(DeviceAllocation handle) {
cuda_device()->dealloc_memory(handle);
}

void LlvmRuntimeExecutor::fill_ndarray(const DeviceAllocation &alloc,
std::size_t size,
uint32_t data) {
Expand Down
2 changes: 2 additions & 0 deletions taichi/runtime/llvm/llvm_runtime_executor.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ class LlvmRuntimeExecutor {
DeviceAllocation allocate_memory_ndarray(std::size_t alloc_size,
uint64 *result_buffer);

void deallocate_memory_ndarray(DeviceAllocation handle);

void check_runtime_error(uint64 *result_buffer);

uint64_t *get_ndarray_alloc_info_ptr(const DeviceAllocation &alloc);
Expand Down

0 comments on commit 6e166fb

Please sign in to comment.