feat(lidar_centerpoint_tvm): add lidar centerpoint tvm package (autow…

…arefoundation#2385) * add centerpoint tvm Signed-off-by: Xinyu Wang <xinyu.wang@tier4.jp> * fixed typo in cmakelist and some formats Signed-off-by: Xinyu Wang <xinyu.wang@tier4.jp> Signed-off-by: Xinyu Wang <xinyu.wang@tier4.jp> Signed-off-by: kminoda <koji.minoda@tier4.jp>
kminoda · Jan 6, 2023 · 04f8c4a · 04f8c4a
1 parent df6e2ff
commit 04f8c4a
Show file tree

Hide file tree

Showing 29 changed files with 2,364 additions and 0 deletions.
diff --git a/perception/lidar_centerpoint_tvm/.gitignore b/perception/lidar_centerpoint_tvm/.gitignore
@@ -0,0 +1 @@
+data/
diff --git a/perception/lidar_centerpoint_tvm/CMakeLists.txt b/perception/lidar_centerpoint_tvm/CMakeLists.txt
@@ -0,0 +1,77 @@
+cmake_minimum_required(VERSION 3.14)
+project(lidar_centerpoint_tvm)
+
+find_package(autoware_cmake REQUIRED)
+autoware_package()
+
+set(tvm_runtime_DIR ${tvm_vendor_DIR})
+find_package(tvm_runtime CONFIG REQUIRED)
+
+# Gather neural network information.
+set(${PROJECT_NAME}_BACKEND llvm CACHE STRING "${PROJECT_NAME} neural network backend")
+set(MODEL_NAME_ENCODER centerpoint_encoder)
+
+# Get neural network.
+set(NN_DEPENDENCY_ENCODER "")
+get_neural_network(${MODEL_NAME_ENCODER} ${${PROJECT_NAME}_BACKEND} NN_DEPENDENCY_ENCODER)
+
+set(MODEL_NAME_BACKBONE centerpoint_backbone)
+
+# Get neural network.
+set(NN_DEPENDENCY_BACKBONE "")
+get_neural_network(${MODEL_NAME_BACKBONE} ${${PROJECT_NAME}_BACKEND} NN_DEPENDENCY_BACKBONE)
+
+if((NOT NN_DEPENDENCY_ENCODER STREQUAL "") AND (NOT NN_DEPENDENCY_BACKBONE STREQUAL ""))
+  ## centerpoint_tvm ##
+  ament_auto_add_library(${PROJECT_NAME} SHARED
+    data/models/${MODEL_NAME_ENCODER}/inference_engine_tvm_config.hpp
+    data/models/${MODEL_NAME_BACKBONE}/inference_engine_tvm_config.hpp
+    lib/centerpoint_tvm.cpp
+    lib/utils.cpp
+    lib/ros_utils.cpp
+    lib/network/scatter.cpp
+    lib/preprocess/pointcloud_densification.cpp
+    lib/preprocess/voxel_generator.cpp
+    lib/preprocess/generate_features.cpp
+    lib/postprocess/circle_nms.cpp
+    lib/postprocess/generate_detected_boxes.cpp
+  )
+
+  add_dependencies(${PROJECT_NAME} ${NN_DEPENDENCY_ENCODER})
+  add_dependencies(${PROJECT_NAME} ${NN_DEPENDENCY_BACKBONE})
+
+  target_compile_options(${PROJECT_NAME} PRIVATE "-Wno-sign-conversion" "-Wno-conversion")
+
+  target_include_directories(${PROJECT_NAME} SYSTEM PUBLIC
+    "${tvm_vendor_INCLUDE_DIRS}"
+  )
+
+  target_link_libraries(${PROJECT_NAME}
+    ${tvm_runtime_LIBRARIES}
+  )
+
+  target_include_directories(${PROJECT_NAME} PRIVATE
+    data/models
+  )
+
+  ## node ##
+  ament_auto_add_library(lidar_centerpoint_tvm_component SHARED
+    src/node.cpp
+  )
+
+  target_link_libraries(lidar_centerpoint_tvm_component
+    ${PROJECT_NAME}
+  )
+
+  rclcpp_components_register_node(lidar_centerpoint_tvm_component
+    PLUGIN "autoware::perception::lidar_centerpoint_tvm::LidarCenterPointTVMNode"
+    EXECUTABLE lidar_centerpoint_tvm_node
+  )
+
+  ament_auto_package(INSTALL_TO_SHARE
+    launch
+    config
+  )
+else()
+  message(WARNING "Neural network not found, skipping package.")
+endif()
diff --git a/perception/lidar_centerpoint_tvm/README.md b/perception/lidar_centerpoint_tvm/README.md
@@ -0,0 +1,73 @@
+# lidar_centerpoint_tvm
+
+## Design
+
+### Usage
+
+lidar_centerpoint_tvm is a package for detecting dynamic 3D objects using TVM compiled centerpoint module for different backends. To use this package, replace `lidar_centerpoint` with `lidar_centerpoint_tvm` in perception launch files(for example, `lidar_based_detection.launch.xml` is lidar based detection is chosen.).
+
+#### Neural network
+
+This package will not build without a neural network for its inference.
+The network is provided by the `tvm_utility` package.
+See its design page for more information on how to enable downloading pre-compiled networks (by setting the `DOWNLOAD_ARTIFACTS` cmake variable), or how to handle user-compiled networks.
+
+#### Backend
+
+The backend used for the inference can be selected by setting the `lidar_centerpoint_tvm_BACKEND` cmake variable.
+The current available options are `llvm` for a CPU backend, and `vulkan` or `opencl` for a GPU backend.
+It defaults to `llvm`.
+
+### Inputs / Outputs
+
+### Input
+
+| Name                 | Type                            | Description      |
+| -------------------- | ------------------------------- | ---------------- |
+| `~/input/pointcloud` | `sensor_msgs::msg::PointCloud2` | input pointcloud |
+
+### Output
+
+| Name                       | Type                                                  | Description          |
+| -------------------------- | ----------------------------------------------------- | -------------------- |
+| `~/output/objects`         | `autoware_auto_perception_msgs::msg::DetectedObjects` | detected objects     |
+| `debug/cyclic_time_ms`     | `tier4_debug_msgs::msg::Float64Stamped`               | cyclic time (msg)    |
+| `debug/processing_time_ms` | `tier4_debug_msgs::msg::Float64Stamped`               | processing time (ms) |
+
+## Parameters
+
+### Core Parameters
+
+| Name                            | Type   | Default Value | Description                                                 |
+| ------------------------------- | ------ | ------------- | ----------------------------------------------------------- |
+| `score_threshold`               | float  | `0.1`         | detected objects with score less than threshold are ignored |
+| `densification_world_frame_id`  | string | `map`         | the world frame id to fuse multi-frame pointcloud           |
+| `densification_num_past_frames` | int    | `1`           | the number of past frames to fuse with the current frame    |
+
+### Bounding Box
+
+The lidar segmentation node establishes a bounding box for the detected obstacles.
+The `L-fit` method of fitting a bounding box to a cluster is used for that.
+
+### Limitation and Known Issue
+
+Due to an accuracy issue of `centerpoint` model, `vulkan` cannot be used at the moment.
+As for 'llvm' backend, real-time performance cannot be achieved.
+
+## Reference
+
+[1] Yin, Tianwei, Xingyi Zhou, and Philipp Krähenbühl. "Center-based 3d object detection and tracking." arXiv preprint arXiv:2006.11275 (2020).
+
+[2] Lang, Alex H., et al. "Pointpillars: Fast encoders for object detection from point clouds." Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition. 2019.
+
+[3] <https://github.com/tianweiy/CenterPoint>
+
+[4] <https://github.com/Abraham423/CenterPoint>
+
+[5] <https://github.com/open-mmlab/OpenPCDet>
+
+## Related issues
+
+<!-- Required -->
+
+- #908: Run Lidar Centerpoint with TVM
diff --git a/perception/lidar_centerpoint_tvm/config/centerpoint.param.yaml b/perception/lidar_centerpoint_tvm/config/centerpoint.param.yaml
@@ -0,0 +1,10 @@
+/**:
+  ros__parameters:
+    class_names: ["CAR", "PEDESTRIAN", "BICYCLE"]
+    rename_car_to_truck_and_bus: true
+    point_feature_size: 4
+    max_voxel_size: 40000
+    point_cloud_range: [-89.6, -89.6, -3.0, 89.6, 89.6, 5.0]
+    voxel_size: [0.32, 0.32, 8.0]
+    downsample_factor: 1
+    encoder_in_feature_size: 9
diff --git a/perception/lidar_centerpoint_tvm/config/centerpoint_tiny.param.yaml b/perception/lidar_centerpoint_tvm/config/centerpoint_tiny.param.yaml
@@ -0,0 +1,10 @@
+/**:
+  ros__parameters:
+    class_names: ["CAR", "PEDESTRIAN", "BICYCLE"]
+    rename_car_to_truck_and_bus: true
+    point_feature_size: 4
+    max_voxel_size: 40000
+    point_cloud_range: [-89.6, -89.6, -3.0, 89.6, 89.6, 5.0]
+    voxel_size: [0.32, 0.32, 8.0]
+    downsample_factor: 2
+    encoder_in_feature_size: 9
diff --git a/perception/lidar_centerpoint_tvm/include/lidar_centerpoint_tvm/centerpoint_config.hpp b/perception/lidar_centerpoint_tvm/include/lidar_centerpoint_tvm/centerpoint_config.hpp
@@ -0,0 +1,126 @@
+// Copyright 2021-2022 AutoCore Ltd., TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIDAR_CENTERPOINT_TVM__CENTERPOINT_CONFIG_HPP_
+#define LIDAR_CENTERPOINT_TVM__CENTERPOINT_CONFIG_HPP_
+
+#include <cstddef>
+#include <vector>
+
+namespace autoware
+{
+namespace perception
+{
+namespace lidar_centerpoint_tvm
+{
+class CenterPointConfig
+{
+public:
+  explicit CenterPointConfig(
+    const std::size_t class_size, const float point_feature_size, const std::size_t max_voxel_size,
+    const std::vector<double> & point_cloud_range, const std::vector<double> & voxel_size,
+    const std::size_t downsample_factor, const std::size_t encoder_in_feature_size,
+    const float score_threshold, const float circle_nms_dist_threshold,
+    const float yaw_norm_threshold)
+  {
+    class_size_ = class_size;
+    point_feature_size_ = point_feature_size;
+    max_voxel_size_ = max_voxel_size;
+    if (point_cloud_range.size() == 6) {
+      range_min_x_ = static_cast<float>(point_cloud_range[0]);
+      range_min_y_ = static_cast<float>(point_cloud_range[1]);
+      range_min_z_ = static_cast<float>(point_cloud_range[2]);
+      range_max_x_ = static_cast<float>(point_cloud_range[3]);
+      range_max_y_ = static_cast<float>(point_cloud_range[4]);
+      range_max_z_ = static_cast<float>(point_cloud_range[5]);
+    }
+    if (voxel_size.size() == 3) {
+      voxel_size_x_ = static_cast<float>(voxel_size[0]);
+      voxel_size_y_ = static_cast<float>(voxel_size[1]);
+      voxel_size_z_ = static_cast<float>(voxel_size[2]);
+    }
+    downsample_factor_ = downsample_factor;
+    encoder_in_feature_size_ = encoder_in_feature_size;
+
+    if (score_threshold > 0 && score_threshold < 1) {
+      score_threshold_ = score_threshold;
+    }
+
+    if (circle_nms_dist_threshold > 0) {
+      circle_nms_dist_threshold_ = circle_nms_dist_threshold;
+    }
+
+    if (yaw_norm_threshold >= 0 && yaw_norm_threshold < 1) {
+      yaw_norm_threshold_ = yaw_norm_threshold;
+    }
+
+    grid_size_x_ = static_cast<std::size_t>((range_max_x_ - range_min_x_) / voxel_size_x_);
+    grid_size_y_ = static_cast<std::size_t>((range_max_y_ - range_min_y_) / voxel_size_y_);
+    grid_size_z_ = static_cast<std::size_t>((range_max_z_ - range_min_z_) / voxel_size_z_);
+    offset_x_ = range_min_x_ + voxel_size_x_ / 2;
+    offset_y_ = range_min_y_ + voxel_size_y_ / 2;
+    offset_z_ = range_min_z_ + voxel_size_z_ / 2;
+    down_grid_size_x_ = grid_size_x_ / downsample_factor_;
+    down_grid_size_y_ = grid_size_y_ / downsample_factor_;
+  };
+
+  // input params
+  std::size_t class_size_{3};
+  const std::size_t point_dim_size_{3};  // x, y and z
+  std::size_t point_feature_size_{4};    // x, y, z and time lag
+  std::size_t max_point_in_voxel_size_{32};
+  std::size_t max_voxel_size_{40000};
+  float range_min_x_{-89.6f};
+  float range_min_y_{-89.6f};
+  float range_min_z_{-3.0f};
+  float range_max_x_{89.6f};
+  float range_max_y_{89.6f};
+  float range_max_z_{5.0f};
+  float voxel_size_x_{0.32f};
+  float voxel_size_y_{0.32f};
+  float voxel_size_z_{8.0f};
+
+  // network params
+  const std::size_t batch_size_{1};
+  std::size_t downsample_factor_{2};
+  std::size_t encoder_in_feature_size_{9};
+  const std::size_t encoder_out_feature_size_{32};
+  const std::size_t head_out_size_{6};
+  const std::size_t head_out_offset_size_{2};
+  const std::size_t head_out_z_size_{1};
+  const std::size_t head_out_dim_size_{3};
+  const std::size_t head_out_rot_size_{2};
+  const std::size_t head_out_vel_size_{2};
+
+  // post-process params
+  float score_threshold_{0.35f};
+  float circle_nms_dist_threshold_{1.5f};
+  float yaw_norm_threshold_{0.0f};
+
+  // calculated params
+  std::size_t grid_size_x_ = (range_max_x_ - range_min_x_) / voxel_size_x_;
+  std::size_t grid_size_y_ = (range_max_y_ - range_min_y_) / voxel_size_y_;
+  std::size_t grid_size_z_ = (range_max_z_ - range_min_z_) / voxel_size_z_;
+  float offset_x_ = range_min_x_ + voxel_size_x_ / 2;
+  float offset_y_ = range_min_y_ + voxel_size_y_ / 2;
+  float offset_z_ = range_min_z_ + voxel_size_z_ / 2;
+  std::size_t down_grid_size_x_ = grid_size_x_ / downsample_factor_;
+  std::size_t down_grid_size_y_ = grid_size_y_ / downsample_factor_;
+};
+
+}  // namespace lidar_centerpoint_tvm
+}  // namespace perception
+}  // namespace autoware
+
+#endif  // LIDAR_CENTERPOINT_TVM__CENTERPOINT_CONFIG_HPP_