diff --git a/taichi/codegen/amdgpu/CMakeLists.txt b/taichi/codegen/amdgpu/CMakeLists.txt index fa30cd7f9c736..9c5a04d589c2b 100644 --- a/taichi/codegen/amdgpu/CMakeLists.txt +++ b/taichi/codegen/amdgpu/CMakeLists.txt @@ -14,4 +14,4 @@ target_include_directories(amdgpu_codegen ) target_link_libraries(amdgpu_codegen PRIVATE taichi_util) -# target_link_libraries(amdgpu_codegen PRIVATE amdgpu_runtime) +target_link_libraries(amdgpu_codegen PRIVATE amdgpu_runtime) diff --git a/taichi/runtime/amdgpu/jit_amdgpu.h b/taichi/runtime/amdgpu/jit_amdgpu.h index 9aa06627f43b6..6bf15fcbd621f 100644 --- a/taichi/runtime/amdgpu/jit_amdgpu.h +++ b/taichi/runtime/amdgpu/jit_amdgpu.h @@ -66,15 +66,17 @@ class JITModuleAMDGPU : public JITModule { } void call(const std::string &name, - const std::vector &arg_pointers) override { - launch(name, 1, 1, 0, arg_pointers); + const std::vector &arg_pointers, + const std::vector &arg_sizes) override { + launch(name, 1, 1, 0, arg_pointers, arg_sizes); } void launch(const std::string &name, std::size_t grid_dim, std::size_t block_dim, std::size_t dynamic_shared_mem_bytes, - std::vector &arg_pointers) override { + std::vector &arg_pointers, + std::vector &arg_sizes) override { auto func = lookup_function(name); AMDGPUContext::get_instance().launch(func, name, arg_pointers, grid_dim, block_dim, dynamic_shared_mem_bytes); diff --git a/tests/cpp/backends/amdgpu_device_test.cpp b/tests/cpp/backends/amdgpu_device_test.cpp index 9851d7fd4322c..6c1bc6a273dab 100644 --- a/tests/cpp/backends/amdgpu_device_test.cpp +++ b/tests/cpp/backends/amdgpu_device_test.cpp @@ -195,6 +195,71 @@ TEST(AMDGPU, ConvertFuncParamAddressSpacePass) { } } +TEST(AMDGPU, ConvertProgramAndLaunch) { + std::string program = +"target datalayout = \"e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7\"\n" +"target triple = \"amdgcn-amd-amdhsa\"\n" +"define amdgpu_kernel void @runtime_add(double addrspace(1)* %0, double addrspace(1)* %1, double addrspace(1)* %2) #0 {\n" +" %4 = alloca double*, align 8, addrspace(5)\n" +" %5 = addrspacecast double addrspace(1)* %2 to double*\n" +" %6 = addrspacecast double addrspace(1)* %1 to double*\n" +" %7 = addrspacecast double addrspace(1)* %0 to double*\n" +" %8 = addrspacecast double* addrspace(5)* %4 to double**\n" +" %9 = alloca double*, align 8, addrspace(5)\n" +" %10 = addrspacecast double* addrspace(5)* %9 to double**\n" +" %11 = alloca double*, align 8, addrspace(5)\n" +" %12 = addrspacecast double* addrspace(5)* %11 to double**\n" +" store double* %7, double** %8, align 8\n" +" store double* %6, double** %10, align 8\n" +" store double* %5, double** %12, align 8\n" +" %13 = load double*, double** %8, align 8\n" +" %14 = load double, double* %13, align 8\n" +" %15 = load double*, double** %10, align 8\n" +" %16 = load double, double* %15, align 8\n" +" %17 = fadd contract double %14, %16\n" +" %18 = load double*, double** %12, align 8\n" +" store double %17, double* %18, align 8\n" +" ret void\n" +"}\n"; + llvm::LLVMContext llvm_context; + llvm::SMDiagnostic diagnostic_err; + std::unique_ptr llvm_module = llvm::parseIR( + llvm::MemoryBuffer::getMemBuffer(program)->getMemBufferRef(), + diagnostic_err, llvm_context); + + // auto amdgpu_session = new JITSessionAMDGPU(new TaichiLLVMContext(new CompileConfig, Arch::amdgpu), new CompileConfig(), llvm::DataLayout("")); + LLVMInitializeAMDGPUTarget(); + LLVMInitializeAMDGPUTargetMC(); + LLVMInitializeAMDGPUTargetInfo(); + LLVMInitializeAMDGPUAsmPrinter(); + LLVMInitializeAMDGPUAsmParser(); + auto amdgpu_session = new JITSessionAMDGPU(nullptr, new CompileConfig(), llvm::DataLayout("")); + auto amdgpu_module = amdgpu_session->add_module(std::move(llvm_module), 0); + std::vector arg_pointers; + std::vector arg_sizes; + double *args[3]; + size_t arg_size = sizeof(double); + AMDGPUDriver::get_instance().malloc((void**)&(args[0]), sizeof(double) * 3); + args[1] = args[0] + 1; + args[2] = args[0] + 2; + double a = 10.0; + double b = 7.0; + double ret; + AMDGPUDriver::get_instance().memcpy_host_to_device(args[0], &a, sizeof(double)); + AMDGPUDriver::get_instance().memcpy_host_to_device(args[1], &b, sizeof(double)); + arg_pointers.push_back((void *)&args[0]); + arg_pointers.push_back((void *)&args[1]); + arg_pointers.push_back((void *)&args[2]); + arg_sizes.push_back(arg_size); + arg_sizes.push_back(arg_size); + arg_sizes.push_back(arg_size); + amdgpu_module->call("runtime_add", arg_pointers, arg_sizes); + AMDGPUDriver::get_instance().stream_synchronize(nullptr); + AMDGPUDriver::get_instance().memcpy_device_to_host(&ret, args[2], sizeof(double)); + EXPECT_EQ(ret, 17); + AMDGPUDriver::get_instance().mem_free(args[0]); +} + } // namespace lang } // namespace taichi #endif