From a5097b4529499cf6579a5516006b6cadf800c7fa Mon Sep 17 00:00:00 2001 From: Amadeusz Szymko Date: Thu, 1 Aug 2024 10:04:54 +0900 Subject: [PATCH] fix(autoware_lidar_centerpoint): place device vector in CUDA device system (#8272) Signed-off-by: amadeuszsz Signed-off-by: Kenzo Lobos-Tsunekawa --- .../postprocess/postprocess_kernel.hpp | 3 --- .../lib/postprocess/postprocess_kernel.cu | 17 ++++++++--------- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/perception/lidar_centerpoint/include/lidar_centerpoint/postprocess/postprocess_kernel.hpp b/perception/lidar_centerpoint/include/lidar_centerpoint/postprocess/postprocess_kernel.hpp index e15d3022c947c..58431b48eb917 100644 --- a/perception/lidar_centerpoint/include/lidar_centerpoint/postprocess/postprocess_kernel.hpp +++ b/perception/lidar_centerpoint/include/lidar_centerpoint/postprocess/postprocess_kernel.hpp @@ -19,7 +19,6 @@ #include "cuda_runtime_api.h" #include "lidar_centerpoint/centerpoint_config.hpp" #include "lidar_centerpoint/utils.hpp" -#include "thrust/device_vector.h" #include @@ -37,8 +36,6 @@ class PostProcessCUDA private: CenterPointConfig config_; - thrust::device_vector boxes3d_d_; - thrust::device_vector yaw_norm_thresholds_d_; }; } // namespace centerpoint diff --git a/perception/lidar_centerpoint/lib/postprocess/postprocess_kernel.cu b/perception/lidar_centerpoint/lib/postprocess/postprocess_kernel.cu index 12835bab038a6..fb8e364b352fc 100644 --- a/perception/lidar_centerpoint/lib/postprocess/postprocess_kernel.cu +++ b/perception/lidar_centerpoint/lib/postprocess/postprocess_kernel.cu @@ -137,10 +137,6 @@ __global__ void generateBoxes3D_kernel( PostProcessCUDA::PostProcessCUDA(const CenterPointConfig & config) : config_(config) { - const auto num_raw_boxes3d = config.down_grid_size_y_ * config.down_grid_size_x_; - boxes3d_d_ = thrust::device_vector(num_raw_boxes3d); - yaw_norm_thresholds_d_ = thrust::device_vector( - config_.yaw_norm_thresholds_.begin(), config_.yaw_norm_thresholds_.end()); } // cspell: ignore divup @@ -153,23 +149,26 @@ cudaError_t PostProcessCUDA::generateDetectedBoxes3D_launch( divup(config_.down_grid_size_y_, THREADS_PER_BLOCK), divup(config_.down_grid_size_x_, THREADS_PER_BLOCK)); dim3 threads(THREADS_PER_BLOCK, THREADS_PER_BLOCK); + auto boxes3d_d = + thrust::device_vector(config_.down_grid_size_y_ * config_.down_grid_size_x_); + auto yaw_norm_thresholds_d = thrust::device_vector( + config_.yaw_norm_thresholds_.begin(), config_.yaw_norm_thresholds_.end()); generateBoxes3D_kernel<<>>( out_heatmap, out_offset, out_z, out_dim, out_rot, out_vel, config_.voxel_size_x_, config_.voxel_size_y_, config_.range_min_x_, config_.range_min_y_, config_.down_grid_size_x_, config_.down_grid_size_y_, config_.downsample_factor_, config_.class_size_, - config_.has_variance_, thrust::raw_pointer_cast(yaw_norm_thresholds_d_.data()), - thrust::raw_pointer_cast(boxes3d_d_.data())); + config_.has_variance_, thrust::raw_pointer_cast(yaw_norm_thresholds_d.data()), + thrust::raw_pointer_cast(boxes3d_d.data())); // suppress by score const auto num_det_boxes3d = thrust::count_if( - thrust::device, boxes3d_d_.begin(), boxes3d_d_.end(), - is_score_greater(config_.score_threshold_)); + thrust::device, boxes3d_d.begin(), boxes3d_d.end(), is_score_greater(config_.score_threshold_)); if (num_det_boxes3d == 0) { return cudaGetLastError(); } thrust::device_vector det_boxes3d_d(num_det_boxes3d); thrust::copy_if( - thrust::device, boxes3d_d_.begin(), boxes3d_d_.end(), det_boxes3d_d.begin(), + thrust::device, boxes3d_d.begin(), boxes3d_d.end(), det_boxes3d_d.begin(), is_score_greater(config_.score_threshold_)); // sort by score