From 0b94ecb5dc89c5d09b5ea86840110410d69d6e36 Mon Sep 17 00:00:00 2001 From: He Sichao <1310722434@qq.com> Date: Fri, 9 Aug 2024 12:14:54 +0800 Subject: [PATCH 1/7] Add `stream_` for `CUDAContext ` to Use a specific CUDA stream to launch CUDA kernel --- c_api/include/taichi/taichi_cuda.h | 2 ++ c_api/src/taichi_llvm_impl.cpp | 7 +++++++ taichi/rhi/cuda/cuda_context.cpp | 6 +++--- taichi/rhi/cuda/cuda_context.h | 9 +++++++++ 4 files changed, 21 insertions(+), 3 deletions(-) diff --git a/c_api/include/taichi/taichi_cuda.h b/c_api/include/taichi/taichi_cuda.h index 136f6ce9e1f9e..cf3a1429d42c6 100644 --- a/c_api/include/taichi/taichi_cuda.h +++ b/c_api/include/taichi/taichi_cuda.h @@ -24,6 +24,8 @@ TI_DLL_EXPORT TiMemory TI_API_CALL ti_import_cuda_memory(TiRuntime runtime, void *ptr, size_t memory_size); +TI_DLL_EXPORT void TI_API_CALL ti_set_cuda_stream(void *stream); + #ifdef __cplusplus } // extern "C" #endif // __cplusplus diff --git a/c_api/src/taichi_llvm_impl.cpp b/c_api/src/taichi_llvm_impl.cpp index 277ba50df70d7..6a135029a9416 100644 --- a/c_api/src/taichi_llvm_impl.cpp +++ b/c_api/src/taichi_llvm_impl.cpp @@ -14,6 +14,7 @@ #ifdef TI_WITH_CUDA #include "taichi/rhi/cuda/cuda_device.h" +#include "taichi/rhi/cuda/cuda_context.h" #include "taichi/runtime/cuda/kernel_launcher.h" #endif @@ -242,4 +243,10 @@ TI_DLL_EXPORT TiMemory TI_API_CALL ti_import_cuda_memory(TiRuntime runtime, #endif } +// function.set_cuda_stream +TI_DLL_EXPORT void TI_API_CALL ti_set_cuda_stream(void *stream) { +#ifdef TI_WITH_CUDA + CUDAContext::get_instance().set_stream(stream); +} + #endif // TI_WITH_LLVM diff --git a/taichi/rhi/cuda/cuda_context.cpp b/taichi/rhi/cuda/cuda_context.cpp index 587f737e935e4..bf79a13e18a8c 100644 --- a/taichi/rhi/cuda/cuda_context.cpp +++ b/taichi/rhi/cuda/cuda_context.cpp @@ -12,7 +12,7 @@ namespace taichi::lang { CUDAContext::CUDAContext() - : profiler_(nullptr), driver_(CUDADriver::get_instance_without_context()) { + : profiler_(nullptr), driver_(CUDADriver::get_instance_without_context()), stream_(nullptr) { // CUDA initialization dev_count_ = 0; driver_.init(0); @@ -156,14 +156,14 @@ void CUDAContext::launch(void *func, dynamic_shared_mem_bytes); } driver_.launch_kernel(func, grid_dim, 1, 1, block_dim, 1, 1, - dynamic_shared_mem_bytes, nullptr, + dynamic_shared_mem_bytes, stream_, arg_pointers.data(), nullptr); } if (profiler_) profiler_->stop(task_handle); if (debug_) { - driver_.stream_synchronize(nullptr); + driver_.stream_synchronize(stream_); } } diff --git a/taichi/rhi/cuda/cuda_context.h b/taichi/rhi/cuda/cuda_context.h index e912cca7aa0c1..fa7d2fa93e936 100644 --- a/taichi/rhi/cuda/cuda_context.h +++ b/taichi/rhi/cuda/cuda_context.h @@ -29,6 +29,7 @@ class CUDAContext { int max_shared_memory_bytes_; bool debug_; bool supports_mem_pool_; + void *stream_; public: CUDAContext(); @@ -108,6 +109,14 @@ class CUDAContext { } static CUDAContext &get_instance(); + + void set_stream(void *stream) { + stream_ = stream; + } + + void *get_stream() const { + return stream_; + } }; } // namespace taichi::lang From 8741bac231f6aa958dc4cc8ff15da9f16fe7e14a Mon Sep 17 00:00:00 2001 From: He Sichao <1310722434@qq.com> Date: Fri, 9 Aug 2024 13:05:58 +0800 Subject: [PATCH 2/7] Add test case --- c_api/include/taichi/taichi_cuda.h | 5 +++++ c_api/src/taichi_llvm_impl.cpp | 16 +++++++++++++++- c_api/tests/c_api_interop_test.cpp | 15 +++++++++++++++ 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/c_api/include/taichi/taichi_cuda.h b/c_api/include/taichi/taichi_cuda.h index cf3a1429d42c6..0cf45bb9b18c8 100644 --- a/c_api/include/taichi/taichi_cuda.h +++ b/c_api/include/taichi/taichi_cuda.h @@ -20,12 +20,17 @@ ti_export_cuda_memory(TiRuntime runtime, TiMemory memory, TiCudaMemoryInteropInfo *interop_info); +// Function `ti_import_cuda_memory` TI_DLL_EXPORT TiMemory TI_API_CALL ti_import_cuda_memory(TiRuntime runtime, void *ptr, size_t memory_size); +// Function `ti_set_cuda_stream` TI_DLL_EXPORT void TI_API_CALL ti_set_cuda_stream(void *stream); +// Function `ti_get_cuda_stream` +TI_DLL_EXPORT void TI_API_CALL *ti_get_cuda_stream(); + #ifdef __cplusplus } // extern "C" #endif // __cplusplus diff --git a/c_api/src/taichi_llvm_impl.cpp b/c_api/src/taichi_llvm_impl.cpp index 6a135029a9416..573c9c7a970d8 100644 --- a/c_api/src/taichi_llvm_impl.cpp +++ b/c_api/src/taichi_llvm_impl.cpp @@ -246,7 +246,21 @@ TI_DLL_EXPORT TiMemory TI_API_CALL ti_import_cuda_memory(TiRuntime runtime, // function.set_cuda_stream TI_DLL_EXPORT void TI_API_CALL ti_set_cuda_stream(void *stream) { #ifdef TI_WITH_CUDA - CUDAContext::get_instance().set_stream(stream); + CUDAContext::get_instance().set_stream(stream); + +#else + TI_NOT_IMPLEMENTED; +#endif +} + +// function.get_cuda_stream +TI_DLL_EXPORT void TI_API_CALL *ti_get_cuda_stream() { +#ifdef TI_WITH_CUDA + return CUDAContext::get_instance().get_stream(); +#else + TI_NOT_IMPLEMENTED; + +#endif } #endif // TI_WITH_LLVM diff --git a/c_api/tests/c_api_interop_test.cpp b/c_api/tests/c_api_interop_test.cpp index 73ae75c58bd31..ff3a1227f2bd1 100644 --- a/c_api/tests/c_api_interop_test.cpp +++ b/c_api/tests/c_api_interop_test.cpp @@ -160,3 +160,18 @@ TEST_F(CapiTest, TestCUDAImport) { EXPECT_EQ(data_out[3], 4.0); } #endif // TI_WITH_CUDA + +#ifdef TI_WITH_CUDA +TEST_F(CapiTest, TestCUDAStreamSet) { + EXPECT_EQ(ti_get_cuda_stream(), nullptr); + + void *stream1 = reinterpret_cast(0x12345678); + void *stream2 = reinterpret_cast(0x87654321); + + ti_set_cuda_stream(stream1); + EXPECT_EQ(ti_get_cuda_stream(), stream1); + + ti_set_cuda_stream(stream2); + EXPECT_EQ(ti_get_cuda_stream(), stream2); +} +#endif From 28f169c405b193cb635705d63c8c5cb125defbe5 Mon Sep 17 00:00:00 2001 From: He Sichao <1310722434@qq.com> Date: Fri, 9 Aug 2024 14:18:05 +0800 Subject: [PATCH 3/7] Update taichi_llvm_impl.cpp --- c_api/src/taichi_llvm_impl.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/c_api/src/taichi_llvm_impl.cpp b/c_api/src/taichi_llvm_impl.cpp index 573c9c7a970d8..d027d8c14712f 100644 --- a/c_api/src/taichi_llvm_impl.cpp +++ b/c_api/src/taichi_llvm_impl.cpp @@ -246,7 +246,7 @@ TI_DLL_EXPORT TiMemory TI_API_CALL ti_import_cuda_memory(TiRuntime runtime, // function.set_cuda_stream TI_DLL_EXPORT void TI_API_CALL ti_set_cuda_stream(void *stream) { #ifdef TI_WITH_CUDA - CUDAContext::get_instance().set_stream(stream); + taichi::lang::CUDAContext::get_instance().set_stream(stream); #else TI_NOT_IMPLEMENTED; @@ -256,7 +256,7 @@ TI_DLL_EXPORT void TI_API_CALL ti_set_cuda_stream(void *stream) { // function.get_cuda_stream TI_DLL_EXPORT void TI_API_CALL *ti_get_cuda_stream() { #ifdef TI_WITH_CUDA - return CUDAContext::get_instance().get_stream(); + return taichi::lang::CUDAContext::get_instance().get_stream(); #else TI_NOT_IMPLEMENTED; From 67760c87e0b35c5020b465613df26e9474823ab3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 9 Aug 2024 05:10:53 +0000 Subject: [PATCH 4/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- c_api/tests/c_api_interop_test.cpp | 2 +- taichi/rhi/cuda/cuda_context.cpp | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/c_api/tests/c_api_interop_test.cpp b/c_api/tests/c_api_interop_test.cpp index ff3a1227f2bd1..45fcaf08400b8 100644 --- a/c_api/tests/c_api_interop_test.cpp +++ b/c_api/tests/c_api_interop_test.cpp @@ -164,7 +164,7 @@ TEST_F(CapiTest, TestCUDAImport) { #ifdef TI_WITH_CUDA TEST_F(CapiTest, TestCUDAStreamSet) { EXPECT_EQ(ti_get_cuda_stream(), nullptr); - + void *stream1 = reinterpret_cast(0x12345678); void *stream2 = reinterpret_cast(0x87654321); diff --git a/taichi/rhi/cuda/cuda_context.cpp b/taichi/rhi/cuda/cuda_context.cpp index bf79a13e18a8c..a71833e89c18a 100644 --- a/taichi/rhi/cuda/cuda_context.cpp +++ b/taichi/rhi/cuda/cuda_context.cpp @@ -12,7 +12,9 @@ namespace taichi::lang { CUDAContext::CUDAContext() - : profiler_(nullptr), driver_(CUDADriver::get_instance_without_context()), stream_(nullptr) { + : profiler_(nullptr), + driver_(CUDADriver::get_instance_without_context()), + stream_(nullptr) { // CUDA initialization dev_count_ = 0; driver_.init(0); From 6e5f3f79a72600dfe8c5f020b25b96739b813f93 Mon Sep 17 00:00:00 2001 From: He Sichao <1310722434@qq.com> Date: Wed, 14 Aug 2024 11:25:50 +0800 Subject: [PATCH 5/7] Fix test --- c_api/include/taichi/taichi_cuda.h | 2 +- c_api/src/taichi_llvm_impl.cpp | 4 ++-- c_api/tests/c_api_interop_test.cpp | 11 ++++++++--- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/c_api/include/taichi/taichi_cuda.h b/c_api/include/taichi/taichi_cuda.h index 0cf45bb9b18c8..a67b4f22e8d02 100644 --- a/c_api/include/taichi/taichi_cuda.h +++ b/c_api/include/taichi/taichi_cuda.h @@ -29,7 +29,7 @@ TI_DLL_EXPORT TiMemory TI_API_CALL ti_import_cuda_memory(TiRuntime runtime, TI_DLL_EXPORT void TI_API_CALL ti_set_cuda_stream(void *stream); // Function `ti_get_cuda_stream` -TI_DLL_EXPORT void TI_API_CALL *ti_get_cuda_stream(); +TI_DLL_EXPORT void TI_API_CALL ti_get_cuda_stream(void **stream); #ifdef __cplusplus } // extern "C" diff --git a/c_api/src/taichi_llvm_impl.cpp b/c_api/src/taichi_llvm_impl.cpp index d027d8c14712f..cc36704f9bff1 100644 --- a/c_api/src/taichi_llvm_impl.cpp +++ b/c_api/src/taichi_llvm_impl.cpp @@ -254,9 +254,9 @@ TI_DLL_EXPORT void TI_API_CALL ti_set_cuda_stream(void *stream) { } // function.get_cuda_stream -TI_DLL_EXPORT void TI_API_CALL *ti_get_cuda_stream() { +TI_DLL_EXPORT void TI_API_CALL ti_get_cuda_stream(void **stream) { #ifdef TI_WITH_CUDA - return taichi::lang::CUDAContext::get_instance().get_stream(); + *stream = taichi::lang::CUDAContext::get_instance().get_stream(); #else TI_NOT_IMPLEMENTED; diff --git a/c_api/tests/c_api_interop_test.cpp b/c_api/tests/c_api_interop_test.cpp index 45fcaf08400b8..34bb1de7adb0c 100644 --- a/c_api/tests/c_api_interop_test.cpp +++ b/c_api/tests/c_api_interop_test.cpp @@ -163,15 +163,20 @@ TEST_F(CapiTest, TestCUDAImport) { #ifdef TI_WITH_CUDA TEST_F(CapiTest, TestCUDAStreamSet) { - EXPECT_EQ(ti_get_cuda_stream(), nullptr); + void* temp_stream = nullptr; + + ti_get_cuda_stream(&temp_stream) + EXPECT_EQ(temp_stream, nullptr); void *stream1 = reinterpret_cast(0x12345678); void *stream2 = reinterpret_cast(0x87654321); ti_set_cuda_stream(stream1); - EXPECT_EQ(ti_get_cuda_stream(), stream1); + ti_get_cuda_stream(&temp_stream); + EXPECT_EQ(temp_stream, stream1); ti_set_cuda_stream(stream2); - EXPECT_EQ(ti_get_cuda_stream(), stream2); + ti_get_cuda_stream(&temp_stream); + EXPECT_EQ(temp_stream, stream2); } #endif From 68f5d6122fab6e74d216407b313375011a3391a0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 14 Aug 2024 03:26:59 +0000 Subject: [PATCH 6/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- c_api/tests/c_api_interop_test.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/c_api/tests/c_api_interop_test.cpp b/c_api/tests/c_api_interop_test.cpp index 34bb1de7adb0c..fdf700911f328 100644 --- a/c_api/tests/c_api_interop_test.cpp +++ b/c_api/tests/c_api_interop_test.cpp @@ -163,10 +163,9 @@ TEST_F(CapiTest, TestCUDAImport) { #ifdef TI_WITH_CUDA TEST_F(CapiTest, TestCUDAStreamSet) { - void* temp_stream = nullptr; + void *temp_stream = nullptr; - ti_get_cuda_stream(&temp_stream) - EXPECT_EQ(temp_stream, nullptr); + ti_get_cuda_stream(&temp_stream) EXPECT_EQ(temp_stream, nullptr); void *stream1 = reinterpret_cast(0x12345678); void *stream2 = reinterpret_cast(0x87654321); From 22f891d56c9af492a7e54295ab9c5175a92534a5 Mon Sep 17 00:00:00 2001 From: Sichao He <1310722434@qq.com> Date: Wed, 14 Aug 2024 11:28:53 +0800 Subject: [PATCH 7/7] Update c_api_interop_test.cpp --- c_api/tests/c_api_interop_test.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/c_api/tests/c_api_interop_test.cpp b/c_api/tests/c_api_interop_test.cpp index fdf700911f328..16bae248dbb90 100644 --- a/c_api/tests/c_api_interop_test.cpp +++ b/c_api/tests/c_api_interop_test.cpp @@ -165,7 +165,8 @@ TEST_F(CapiTest, TestCUDAImport) { TEST_F(CapiTest, TestCUDAStreamSet) { void *temp_stream = nullptr; - ti_get_cuda_stream(&temp_stream) EXPECT_EQ(temp_stream, nullptr); + ti_get_cuda_stream(&temp_stream); + EXPECT_EQ(temp_stream, nullptr); void *stream1 = reinterpret_cast(0x12345678); void *stream2 = reinterpret_cast(0x87654321);