From 924403d7912787ba2b84c79ba67483e5835b6f33 Mon Sep 17 00:00:00 2001 From: pengyu <6712304+FantasyVR@users.noreply.github.com> Date: Fri, 2 Dec 2022 20:13:04 +0800 Subject: [PATCH] [lang] Use less gpu memory when building sparse matrix (#6781) Issue: #2906 ### Brief Summary The `read_int` function of ndarray consumes more than 100M gpu memory. It's better to use `memcpy_device_to_host` function to obtain `num_triplets_`. --- taichi/program/sparse_matrix.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/taichi/program/sparse_matrix.cpp b/taichi/program/sparse_matrix.cpp index 8b470f9171c69..e631d76bb895f 100644 --- a/taichi/program/sparse_matrix.cpp +++ b/taichi/program/sparse_matrix.cpp @@ -156,7 +156,8 @@ std::unique_ptr SparseMatrixBuilder::build_cuda() { built_ = true; auto sm = make_cu_sparse_matrix(rows_, cols_, dtype_); #ifdef TI_WITH_CUDA - num_triplets_ = ndarray_data_base_ptr_->read_int(std::vector{0}); + CUDADriver::get_instance().memcpy_device_to_host( + &num_triplets_, (void *)get_ndarray_data_ptr(), sizeof(int)); auto len = 3 * num_triplets_ + 1; std::vector trips(len); CUDADriver::get_instance().memcpy_device_to_host(