diff --git a/zed_components/CMakeLists.txt b/zed_components/CMakeLists.txt index 098be870..aadb922c 100644 --- a/zed_components/CMakeLists.txt +++ b/zed_components/CMakeLists.txt @@ -75,7 +75,7 @@ endif() ############################################# # Dependencies -find_package(ZED 3 REQUIRED) +find_package(ZED 4 REQUIRED) exec_program(uname ARGS -p OUTPUT_VARIABLE CMAKE_SYSTEM_NAME2) if(CMAKE_SYSTEM_NAME2 MATCHES "aarch64") # Jetson TX @@ -83,6 +83,7 @@ if(CMAKE_SYSTEM_NAME2 MATCHES "aarch64") # Jetson TX endif() find_package(CUDA REQUIRED) +find_package(OpenCV REQUIRED) set(DEPENDENCIES rclcpp @@ -150,6 +151,7 @@ add_custom_target(all_${PROJECT_NAME}_files SOURCES ${all_files}) ############################################################################### # INCLUDES and LIBS include_directories( + ${OpenCV_INCLUDE_DIRS} ${CUDA_INCLUDE_DIRS} ${ZED_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR}/src/include @@ -160,6 +162,7 @@ include_directories( # create ament index resource which references the libraries in the binary dir set(node_plugins "") +link_directories(${OpenCV_LIBRARY_DIRS}) link_directories(${ZED_LIBRARY_DIR}) link_directories(${CUDA_LIBRARY_DIRS}) @@ -184,13 +187,21 @@ set(SL_TOOLS_SRC set(ZED_CAMERA_INC ${CMAKE_CURRENT_SOURCE_DIR}/src/include/visibility_control.hpp ${CMAKE_CURRENT_SOURCE_DIR}/src/zed_camera/include/sl_types.hpp - ${CMAKE_CURRENT_SOURCE_DIR}/src/zed_camera/include/zed_camera_component.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/zed_camera/include/zed_camera_component.hpp + + ${CMAKE_CURRENT_SOURCE_DIR}/src/zed_camera/include/yolo.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/zed_camera/include/cuda_utils.h + ${CMAKE_CURRENT_SOURCE_DIR}/src/zed_camera/include/logging.h + ${CMAKE_CURRENT_SOURCE_DIR}/src/zed_camera/include/utils.h ) set(ZED_CAMERA_SRC ${CMAKE_CURRENT_SOURCE_DIR}/src/zed_camera/src/zed_camera_component.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/zed_camera/src/yolo.cpp ) +SET(TRT_LIBS nvinfer nvonnxparser) + ############################################################################### # Bin and Install @@ -204,6 +215,8 @@ target_compile_definitions(zed_camera_component PRIVATE "COMPOSITION_BUILDING_DLL" ) target_link_libraries(zed_camera_component + ${TRT_LIBS} + ${OpenCV_LIBS} ${ZED_LIBS} ) ament_target_dependencies(zed_camera_component diff --git a/zed_components/src/zed_camera/include/cuda_utils.h b/zed_components/src/zed_camera/include/cuda_utils.h new file mode 100644 index 00000000..c4edd066 --- /dev/null +++ b/zed_components/src/zed_camera/include/cuda_utils.h @@ -0,0 +1,17 @@ +#ifndef TRTX_CUDA_UTILS_H_ +#define TRTX_CUDA_UTILS_H_ + +#include + +#ifndef CUDA_CHECK +#define CUDA_CHECK(callstr)\ + {\ + cudaError_t error_code = callstr;\ + if (error_code != cudaSuccess) {\ + std::cerr << "CUDA error " << error_code << " at " << __FILE__ << ":" << __LINE__;\ + assert(0);\ + }\ + } +#endif // CUDA_CHECK + +#endif // TRTX_CUDA_UTILS_H_ \ No newline at end of file diff --git a/zed_components/src/zed_camera/include/logging.h b/zed_components/src/zed_camera/include/logging.h new file mode 100644 index 00000000..1339ee2f --- /dev/null +++ b/zed_components/src/zed_camera/include/logging.h @@ -0,0 +1,509 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TENSORRT_LOGGING_H +#define TENSORRT_LOGGING_H + +#include "NvInferRuntimeCommon.h" +#include +#include +#include +#include +#include +#include +#include + +#if NV_TENSORRT_MAJOR >= 8 +#define TRT_NOEXCEPT noexcept +#else +#define TRT_NOEXCEPT +#endif + +using Severity = nvinfer1::ILogger::Severity; + +class LogStreamConsumerBuffer : public std::stringbuf +{ +public: + LogStreamConsumerBuffer(std::ostream& stream, const std::string& prefix, bool shouldLog) + : mOutput(stream) + , mPrefix(prefix) + , mShouldLog(shouldLog) + { + } + + LogStreamConsumerBuffer(LogStreamConsumerBuffer&& other) + : mOutput(other.mOutput) + { + } + + ~LogStreamConsumerBuffer() + { + // std::streambuf::pbase() gives a pointer to the beginning of the buffered part of the output sequence + // std::streambuf::pptr() gives a pointer to the current position of the output sequence + // if the pointer to the beginning is not equal to the pointer to the current position, + // call putOutput() to log the output to the stream + if (pbase() != pptr()) + { + putOutput(); + } + } + + // synchronizes the stream buffer and returns 0 on success + // synchronizing the stream buffer consists of inserting the buffer contents into the stream, + // resetting the buffer and flushing the stream + virtual int sync() + { + putOutput(); + return 0; + } + + void putOutput() + { + if (mShouldLog) + { + // prepend timestamp + std::time_t timestamp = std::time(nullptr); + tm* tm_local = std::localtime(×tamp); + std::cout << "["; + std::cout << std::setw(2) << std::setfill('0') << 1 + tm_local->tm_mon << "/"; + std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_mday << "/"; + std::cout << std::setw(4) << std::setfill('0') << 1900 + tm_local->tm_year << "-"; + std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_hour << ":"; + std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_min << ":"; + std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_sec << "] "; + // std::stringbuf::str() gets the string contents of the buffer + // insert the buffer contents pre-appended by the appropriate prefix into the stream + mOutput << mPrefix << str(); + // set the buffer to empty + str(""); + // flush the stream + mOutput.flush(); + } + } + + void setShouldLog(bool shouldLog) + { + mShouldLog = shouldLog; + } + +private: + std::ostream& mOutput; + std::string mPrefix; + bool mShouldLog; +}; + +//! +//! \class LogStreamConsumerBase +//! \brief Convenience object used to initialize LogStreamConsumerBuffer before std::ostream in LogStreamConsumer +//! +class LogStreamConsumerBase +{ +public: + LogStreamConsumerBase(std::ostream& stream, const std::string& prefix, bool shouldLog) + : mBuffer(stream, prefix, shouldLog) + { + } + +protected: + LogStreamConsumerBuffer mBuffer; +}; + +//! +//! \class LogStreamConsumer +//! \brief Convenience object used to facilitate use of C++ stream syntax when logging messages. +//! Order of base classes is LogStreamConsumerBase and then std::ostream. +//! This is because the LogStreamConsumerBase class is used to initialize the LogStreamConsumerBuffer member field +//! in LogStreamConsumer and then the address of the buffer is passed to std::ostream. +//! This is necessary to prevent the address of an uninitialized buffer from being passed to std::ostream. +//! Please do not change the order of the parent classes. +//! +class LogStreamConsumer : protected LogStreamConsumerBase, public std::ostream +{ +public: + //! \brief Creates a LogStreamConsumer which logs messages with level severity. + //! Reportable severity determines if the messages are severe enough to be logged. + LogStreamConsumer(Severity reportableSeverity, Severity severity) + : LogStreamConsumerBase(severityOstream(severity), severityPrefix(severity), severity <= reportableSeverity) + , std::ostream(&mBuffer) // links the stream buffer with the stream + , mShouldLog(severity <= reportableSeverity) + , mSeverity(severity) + { + } + + LogStreamConsumer(LogStreamConsumer&& other) + : LogStreamConsumerBase(severityOstream(other.mSeverity), severityPrefix(other.mSeverity), other.mShouldLog) + , std::ostream(&mBuffer) // links the stream buffer with the stream + , mShouldLog(other.mShouldLog) + , mSeverity(other.mSeverity) + { + } + + void setReportableSeverity(Severity reportableSeverity) + { + mShouldLog = mSeverity <= reportableSeverity; + mBuffer.setShouldLog(mShouldLog); + } + +private: + static std::ostream& severityOstream(Severity severity) + { + return severity >= Severity::kINFO ? std::cout : std::cerr; + } + + static std::string severityPrefix(Severity severity) + { + switch (severity) + { + case Severity::kINTERNAL_ERROR: return "[F] "; + case Severity::kERROR: return "[E] "; + case Severity::kWARNING: return "[W] "; + case Severity::kINFO: return "[I] "; + case Severity::kVERBOSE: return "[V] "; + default: assert(0); return ""; + } + } + + bool mShouldLog; + Severity mSeverity; +}; + +//! \class Logger +//! +//! \brief Class which manages logging of TensorRT tools and samples +//! +//! \details This class provides a common interface for TensorRT tools and samples to log information to the console, +//! and supports logging two types of messages: +//! +//! - Debugging messages with an associated severity (info, warning, error, or internal error/fatal) +//! - Test pass/fail messages +//! +//! The advantage of having all samples use this class for logging as opposed to emitting directly to stdout/stderr is +//! that the logic for controlling the verbosity and formatting of sample output is centralized in one location. +//! +//! In the future, this class could be extended to support dumping test results to a file in some standard format +//! (for example, JUnit XML), and providing additional metadata (e.g. timing the duration of a test run). +//! +//! TODO: For backwards compatibility with existing samples, this class inherits directly from the nvinfer1::ILogger +//! interface, which is problematic since there isn't a clean separation between messages coming from the TensorRT +//! library and messages coming from the sample. +//! +//! In the future (once all samples are updated to use Logger::getTRTLogger() to access the ILogger) we can refactor the +//! class to eliminate the inheritance and instead make the nvinfer1::ILogger implementation a member of the Logger +//! object. + +class Logger : public nvinfer1::ILogger +{ +public: + Logger(Severity severity = Severity::kWARNING) + : mReportableSeverity(severity) + { + } + + //! + //! \enum TestResult + //! \brief Represents the state of a given test + //! + enum class TestResult + { + kRUNNING, //!< The test is running + kPASSED, //!< The test passed + kFAILED, //!< The test failed + kWAIVED //!< The test was waived + }; + + //! + //! \brief Forward-compatible method for retrieving the nvinfer::ILogger associated with this Logger + //! \return The nvinfer1::ILogger associated with this Logger + //! + //! TODO Once all samples are updated to use this method to register the logger with TensorRT, + //! we can eliminate the inheritance of Logger from ILogger + //! + nvinfer1::ILogger& getTRTLogger() + { + return *this; + } + + //! + //! \brief Implementation of the nvinfer1::ILogger::log() virtual method + //! + //! Note samples should not be calling this function directly; it will eventually go away once we eliminate the + //! inheritance from nvinfer1::ILogger + //! + void log(Severity severity, const char* msg) TRT_NOEXCEPT override + { + LogStreamConsumer(mReportableSeverity, severity) << "[TRT] " << std::string(msg) << std::endl; + } + + //! + //! \brief Method for controlling the verbosity of logging output + //! + //! \param severity The logger will only emit messages that have severity of this level or higher. + //! + void setReportableSeverity(Severity severity) + { + mReportableSeverity = severity; + } + + //! + //! \brief Opaque handle that holds logging information for a particular test + //! + //! This object is an opaque handle to information used by the Logger to print test results. + //! The sample must call Logger::defineTest() in order to obtain a TestAtom that can be used + //! with Logger::reportTest{Start,End}(). + //! + class TestAtom + { + public: + TestAtom(TestAtom&&) = default; + + private: + friend class Logger; + + TestAtom(bool started, const std::string& name, const std::string& cmdline) + : mStarted(started) + , mName(name) + , mCmdline(cmdline) + { + } + + bool mStarted; + std::string mName; + std::string mCmdline; + }; + + //! + //! \brief Define a test for logging + //! + //! \param[in] name The name of the test. This should be a string starting with + //! "TensorRT" and containing dot-separated strings containing + //! the characters [A-Za-z0-9_]. + //! For example, "TensorRT.sample_googlenet" + //! \param[in] cmdline The command line used to reproduce the test + // + //! \return a TestAtom that can be used in Logger::reportTest{Start,End}(). + //! + static TestAtom defineTest(const std::string& name, const std::string& cmdline) + { + return TestAtom(false, name, cmdline); + } + + //! + //! \brief A convenience overloaded version of defineTest() that accepts an array of command-line arguments + //! as input + //! + //! \param[in] name The name of the test + //! \param[in] argc The number of command-line arguments + //! \param[in] argv The array of command-line arguments (given as C strings) + //! + //! \return a TestAtom that can be used in Logger::reportTest{Start,End}(). + static TestAtom defineTest(const std::string& name, int argc, char const* const* argv) + { + auto cmdline = genCmdlineString(argc, argv); + return defineTest(name, cmdline); + } + + //! + //! \brief Report that a test has started. + //! + //! \pre reportTestStart() has not been called yet for the given testAtom + //! + //! \param[in] testAtom The handle to the test that has started + //! + static void reportTestStart(TestAtom& testAtom) + { + reportTestResult(testAtom, TestResult::kRUNNING); + assert(!testAtom.mStarted); + testAtom.mStarted = true; + } + + //! + //! \brief Report that a test has ended. + //! + //! \pre reportTestStart() has been called for the given testAtom + //! + //! \param[in] testAtom The handle to the test that has ended + //! \param[in] result The result of the test. Should be one of TestResult::kPASSED, + //! TestResult::kFAILED, TestResult::kWAIVED + //! + static void reportTestEnd(const TestAtom& testAtom, TestResult result) + { + assert(result != TestResult::kRUNNING); + assert(testAtom.mStarted); + reportTestResult(testAtom, result); + } + + static int reportPass(const TestAtom& testAtom) + { + reportTestEnd(testAtom, TestResult::kPASSED); + return EXIT_SUCCESS; + } + + static int reportFail(const TestAtom& testAtom) + { + reportTestEnd(testAtom, TestResult::kFAILED); + return EXIT_FAILURE; + } + + static int reportWaive(const TestAtom& testAtom) + { + reportTestEnd(testAtom, TestResult::kWAIVED); + return EXIT_SUCCESS; + } + + static int reportTest(const TestAtom& testAtom, bool pass) + { + return pass ? reportPass(testAtom) : reportFail(testAtom); + } + + Severity getReportableSeverity() const + { + return mReportableSeverity; + } + +private: + //! + //! \brief returns an appropriate string for prefixing a log message with the given severity + //! + static const char* severityPrefix(Severity severity) + { + switch (severity) + { + case Severity::kINTERNAL_ERROR: return "[F] "; + case Severity::kERROR: return "[E] "; + case Severity::kWARNING: return "[W] "; + case Severity::kINFO: return "[I] "; + case Severity::kVERBOSE: return "[V] "; + default: assert(0); return ""; + } + } + + //! + //! \brief returns an appropriate string for prefixing a test result message with the given result + //! + static const char* testResultString(TestResult result) + { + switch (result) + { + case TestResult::kRUNNING: return "RUNNING"; + case TestResult::kPASSED: return "PASSED"; + case TestResult::kFAILED: return "FAILED"; + case TestResult::kWAIVED: return "WAIVED"; + default: assert(0); return ""; + } + } + + //! + //! \brief returns an appropriate output stream (cout or cerr) to use with the given severity + //! + static std::ostream& severityOstream(Severity severity) + { + return severity >= Severity::kINFO ? std::cout : std::cerr; + } + + //! + //! \brief method that implements logging test results + //! + static void reportTestResult(const TestAtom& testAtom, TestResult result) + { + severityOstream(Severity::kINFO) << "&&&& " << testResultString(result) << " " << testAtom.mName << " # " + << testAtom.mCmdline << std::endl; + } + + //! + //! \brief generate a command line string from the given (argc, argv) values + //! + static std::string genCmdlineString(int argc, char const* const* argv) + { + std::stringstream ss; + for (int i = 0; i < argc; i++) + { + if (i > 0) + ss << " "; + ss << argv[i]; + } + return ss.str(); + } + + Severity mReportableSeverity; +}; + +namespace +{ + +//! +//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kVERBOSE +//! +//! Example usage: +//! +//! LOG_VERBOSE(logger) << "hello world" << std::endl; +//! +inline LogStreamConsumer LOG_VERBOSE(const Logger& logger) +{ + return LogStreamConsumer(logger.getReportableSeverity(), Severity::kVERBOSE); +} + +//! +//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINFO +//! +//! Example usage: +//! +//! LOG_INFO(logger) << "hello world" << std::endl; +//! +inline LogStreamConsumer LOG_INFO(const Logger& logger) +{ + return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINFO); +} + +//! +//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kWARNING +//! +//! Example usage: +//! +//! LOG_WARN(logger) << "hello world" << std::endl; +//! +inline LogStreamConsumer LOG_WARN(const Logger& logger) +{ + return LogStreamConsumer(logger.getReportableSeverity(), Severity::kWARNING); +} + +//! +//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kERROR +//! +//! Example usage: +//! +//! LOG_ERROR(logger) << "hello world" << std::endl; +//! +inline LogStreamConsumer LOG_ERROR(const Logger& logger) +{ + return LogStreamConsumer(logger.getReportableSeverity(), Severity::kERROR); +} + +//! +//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINTERNAL_ERROR +// ("fatal" severity) +//! +//! Example usage: +//! +//! LOG_FATAL(logger) << "hello world" << std::endl; +//! +inline LogStreamConsumer LOG_FATAL(const Logger& logger) +{ + return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINTERNAL_ERROR); +} + +} // anonymous namespace + +#endif // TENSORRT_LOGGING_H diff --git a/zed_components/src/zed_camera/include/utils.h b/zed_components/src/zed_camera/include/utils.h new file mode 100644 index 00000000..14094e55 --- /dev/null +++ b/zed_components/src/zed_camera/include/utils.h @@ -0,0 +1,140 @@ +#ifndef TRTX_YOLOV5_UTILS_H_ +#define TRTX_YOLOV5_UTILS_H_ + +#include +#include +#include +#include "yolo.hpp" + + +static inline cv::Mat preprocess_img(cv::Mat& img, int input_w, int input_h) { + int w, h, x, y; + float r_w = input_w / (img.cols * 1.0); + float r_h = input_h / (img.rows * 1.0); + if (r_h > r_w) { + w = input_w; + h = r_w * img.rows; + x = 0; + y = (input_h - h) / 2; + } else { + w = r_h * img.cols; + h = input_h; + x = (input_w - w) / 2; + y = 0; + } + cv::Mat re(h, w, CV_8UC3); + cv::resize(img, re, re.size(), 0, 0, cv::INTER_LINEAR); + cv::Mat out(input_h, input_w, CV_8UC3, cv::Scalar(128, 128, 128)); + re.copyTo(out(cv::Rect(x, y, re.cols, re.rows))); + return out; +} + +float const id_colors[5][3] = { + { 232.0f, 176.0f, 59.0f}, + { 175.0f, 208.0f, 25.0f}, + { 102.0f, 205.0f, 105.0f}, + { 185.0f, 0.0f, 255.0f}, + { 99.0f, 107.0f, 252.0f} +}; + +inline cv::Scalar generateColorID_u(int idx) { + if (idx < 0) return cv::Scalar(236, 184, 36, 255); + int color_idx = idx % 5; + return cv::Scalar(id_colors[color_idx][0], id_colors[color_idx][1], id_colors[color_idx][2], 255); +} + +inline sl::float4 generateColorID_f(int idx) { + auto clr_u = generateColorID_u(idx); + return sl::float4(static_cast (clr_u.val[0]) / 255.f, static_cast (clr_u.val[1]) / 255.f, static_cast (clr_u.val[2]) / 255.f, 1.f); +} + +inline bool renderObject(const sl::ObjectData& i, const bool isTrackingON) { + if (isTrackingON) + return (i.tracking_state == sl::OBJECT_TRACKING_STATE::OK); + else + return (i.tracking_state == sl::OBJECT_TRACKING_STATE::OK || i.tracking_state == sl::OBJECT_TRACKING_STATE::OFF); +} + +float const class_colors[6][3] = { + { 44.0f, 117.0f, 255.0f}, // PEOPLE + { 255.0f, 0.0f, 255.0f}, // VEHICLE + { 0.0f, 0.0f, 255.0f}, + { 0.0f, 255.0f, 255.0f}, + { 0.0f, 255.0f, 0.0f}, + { 255.0f, 255.0f, 255.0f} +}; + +inline sl::float4 getColorClass(int idx) { + idx = std::min(5, idx); + sl::float4 clr(class_colors[idx][0], class_colors[idx][1], class_colors[idx][2], 1.f); + return clr / 255.f; +} + +template +inline uchar _applyFading(T val, float current_alpha, double current_clr) { + return static_cast (current_alpha * current_clr + (1.0 - current_alpha) * val); +} + +inline cv::Vec4b applyFading(cv::Scalar val, float current_alpha, cv::Scalar current_clr) { + cv::Vec4b out; + out[0] = _applyFading(val.val[0], current_alpha, current_clr.val[0]); + out[1] = _applyFading(val.val[1], current_alpha, current_clr.val[1]); + out[2] = _applyFading(val.val[2], current_alpha, current_clr.val[2]); + out[3] = 255; + return out; +} + +inline void drawVerticalLine( + cv::Mat &left_display, + cv::Point2i start_pt, + cv::Point2i end_pt, + cv::Scalar clr, + int thickness) { + int n_steps = 7; + cv::Point2i pt1, pt4; + pt1.x = ((n_steps - 1) * start_pt.x + end_pt.x) / n_steps; + pt1.y = ((n_steps - 1) * start_pt.y + end_pt.y) / n_steps; + + pt4.x = (start_pt.x + (n_steps - 1) * end_pt.x) / n_steps; + pt4.y = (start_pt.y + (n_steps - 1) * end_pt.y) / n_steps; + + cv::line(left_display, start_pt, pt1, clr, thickness); + cv::line(left_display, pt4, end_pt, clr, thickness); +} + +inline cv::Mat slMat2cvMat(sl::Mat& input) { + // Mapping between MAT_TYPE and CV_TYPE + int cv_type = -1; + switch (input.getDataType()) { + case sl::MAT_TYPE::F32_C1: cv_type = CV_32FC1; + break; + case sl::MAT_TYPE::F32_C2: cv_type = CV_32FC2; + break; + case sl::MAT_TYPE::F32_C3: cv_type = CV_32FC3; + break; + case sl::MAT_TYPE::F32_C4: cv_type = CV_32FC4; + break; + case sl::MAT_TYPE::U8_C1: cv_type = CV_8UC1; + break; + case sl::MAT_TYPE::U8_C2: cv_type = CV_8UC2; + break; + case sl::MAT_TYPE::U8_C3: cv_type = CV_8UC3; + break; + case sl::MAT_TYPE::U8_C4: cv_type = CV_8UC4; + break; + default: break; + } + + return cv::Mat(input.getHeight(), input.getWidth(), cv_type, input.getPtr(sl::MEM::CPU)); +} + +inline bool readFile(std::string filename, std::vector &file_content) { + // open the file: + std::ifstream instream(filename, std::ios::in | std::ios::binary); + if (!instream.is_open()) return true; + file_content = std::vector((std::istreambuf_iterator(instream)), std::istreambuf_iterator()); + return false; +} + +#endif // TRTX_YOLOV5_UTILS_H_ + diff --git a/zed_components/src/zed_camera/include/yolo.hpp b/zed_components/src/zed_camera/include/yolo.hpp new file mode 100644 index 00000000..9d1e1f16 --- /dev/null +++ b/zed_components/src/zed_camera/include/yolo.hpp @@ -0,0 +1,125 @@ +#ifndef YOLO_HPP +#define YOLO_HPP + +#include +#include +#include + +#include "cuda_utils.h" +#include "logging.h" +#include "utils.h" + +enum class YOLO_MODEL_VERSION_OUTPUT_STYLE { + YOLOV6, + YOLOV8_V5 +}; + +struct BBox { + float x1, y1, x2, y2; +}; + +struct BBoxInfo { + BBox box; + int label; + float prob; +}; + +inline std::vector split_str(std::string s, std::string delimiter) { + size_t pos_start = 0, pos_end, delim_len = delimiter.length(); + std::string token; + std::vector res; + + while ((pos_end = s.find(delimiter, pos_start)) != std::string::npos) { + token = s.substr(pos_start, pos_end - pos_start); + pos_start = pos_end + delim_len; + res.push_back(token); + } + + res.push_back(s.substr(pos_start)); + return res; +} + + +struct OptimDim { + nvinfer1::Dims4 size; + std::string tensor_name; + + bool setFromString(std::string &arg) { + // "images:1x3x512x512" + std::vector v_ = split_str(arg, ":"); + if (v_.size() != 2) return true; + + std::string dims_str = v_.back(); + std::vector v = split_str(dims_str, "x"); + + size.nbDims = 4; + // assuming batch is 1 and channel is 3 + size.d[0] = 1; + size.d[1] = 3; + + if (v.size() == 2) { + size.d[2] = stoi(v[0]); + size.d[3] = stoi(v[1]); + } else if (v.size() == 3) { + size.d[2] = stoi(v[1]); + size.d[3] = stoi(v[2]); + } else if (v.size() == 4) { + size.d[2] = stoi(v[2]); + size.d[3] = stoi(v[3]); + } else return true; + + if (size.d[2] != size.d[3]) std::cerr << "Warning only squared input are currently supported" << std::endl; + + tensor_name = v_.front(); + return false; + } +}; + +class Yolo { +public: + Yolo(); + ~Yolo(); + + static int build_engine(std::string onnx_path, std::string engine_path, OptimDim dyn_dim_profile); + + int init(std::string engine_path); + std::vector run(sl::Mat left_sl, int orig_image_h, int orig_image_w, float thres); + + sl::Resolution getInferenceSize() { + return sl::Resolution(input_width, input_height); + } + +private: + + cv::Mat left_cv_rgb; + + float nms = 0.4; + + // Get input dimension size + std::string input_binding_name = "images"; // input + std::string output_name = "classes"; + int inputIndex, outputIndex; + size_t input_width = 0, input_height = 0, batch_size = 1; + // Yolov6 1x8400x85 // 85=5+80=cxcy+cwch+obj_conf+cls_conf //https://github.com/DefTruth/lite.ai.toolkit/blob/1267584d5dae6269978e17ffd5ec29da496e503e/lite/ort/cv/yolov6.cpp#L97 + // Yolov8/yolov5 1x84x8400 + size_t out_dim = 8400, out_class_number = 80 /*for COCO*/, out_box_struct_number = 4; // https://github.com/ultralytics/yolov3/issues/750#issuecomment-569783354 + size_t output_size = 0; + + YOLO_MODEL_VERSION_OUTPUT_STYLE yolo_model_version; + + + float *h_input, *h_output; + float *d_input, *d_output; + + nvinfer1::IRuntime* runtime; + nvinfer1::ICudaEngine* engine; + nvinfer1::IExecutionContext* context; + cudaStream_t stream; + + bool is_init = false; + + +}; + +#endif /* YOLO_HPP */ + diff --git a/zed_components/src/zed_camera/include/zed_camera_component.hpp b/zed_components/src/zed_camera/include/zed_camera_component.hpp index eb3b6b68..33562f32 100644 --- a/zed_components/src/zed_camera/include/zed_camera_component.hpp +++ b/zed_components/src/zed_camera/include/zed_camera_component.hpp @@ -21,6 +21,7 @@ #include "sl_tools.hpp" #include "sl_types.hpp" #include "visibility_control.hpp" +#include "yolo.hpp" #include @@ -315,24 +316,27 @@ class ZedCamera : public rclcpp::Node float mMappingRes = 0.05f; float mMappingRangeMax = 10.0f; - bool mObjDetEnabled = false; + bool mObjDetEnabled = true; bool mObjDetTracking = true; float mObjDetConfidence = 40.0f; double mObjDetPredTimeout = 0.5; bool mObjDetReducedPrecision = false; float mObjDetMaxRange = 15.0f; std::vector mObjDetFilter; - bool mObjDetPeopleEnable = true; - bool mObjDetVehiclesEnable = true; - bool mObjDetBagsEnable = true; - bool mObjDetAnimalsEnable = true; - bool mObjDetElectronicsEnable = true; - bool mObjDetFruitsEnable = true; - bool mObjDetSportEnable = true; + bool mObjDetPeopleEnable = false; + bool mObjDetVehiclesEnable = false; + bool mObjDetBagsEnable = false; + bool mObjDetAnimalsEnable = false; + bool mObjDetElectronicsEnable = false; + bool mObjDetFruitsEnable = false; + bool mObjDetSportEnable = false; bool mObjDetBodyFitting = false; - sl::OBJECT_DETECTION_MODEL mObjDetModel = sl::OBJECT_DETECTION_MODEL::MULTI_CLASS_BOX_FAST; + sl::OBJECT_DETECTION_MODEL mObjDetModel = sl::OBJECT_DETECTION_MODEL::CUSTOM_BOX_OBJECTS; sl::OBJECT_FILTERING_MODE mObjFilterMode = sl::OBJECT_FILTERING_MODE::NMS3D; + Yolo detector; + std::string mObjDetEnginePath = ""; + bool mBodyTrkEnabled = false; sl::BODY_TRACKING_MODEL mBodyTrkModel = sl::BODY_TRACKING_MODEL::HUMAN_BODY_FAST; sl::BODY_FORMAT mBodyTrkFmt = sl::BODY_FORMAT::BODY_38; diff --git a/zed_components/src/zed_camera/src/yolo.cpp b/zed_components/src/zed_camera/src/yolo.cpp new file mode 100644 index 00000000..00d396e4 --- /dev/null +++ b/zed_components/src/zed_camera/src/yolo.cpp @@ -0,0 +1,498 @@ +#include "yolo.hpp" +#include "logging.h" +#include "NvOnnxParser.h" + +using namespace nvinfer1; + +static Logger gLogger; + +inline int clamp(int val, int min, int max) { + if (val <= min) return min; + if (val >= max) return max; + return val; +} + + +#define WEIGHTED_NMS + +std::vector nonMaximumSuppression(const float nmsThresh, std::vector binfo) { + + auto overlap1D = [](float x1min, float x1max, float x2min, float x2max) -> float { + if (x1min > x2min) { + std::swap(x1min, x2min); + std::swap(x1max, x2max); + } + return x1max < x2min ? 0 : std::min(x1max, x2max) - x2min; + }; + + auto computeIoU = [&overlap1D](BBox& bbox1, BBox & bbox2) -> float { + float overlapX = overlap1D(bbox1.x1, bbox1.x2, bbox2.x1, bbox2.x2); + float overlapY = overlap1D(bbox1.y1, bbox1.y2, bbox2.y1, bbox2.y2); + float area1 = (bbox1.x2 - bbox1.x1) * (bbox1.y2 - bbox1.y1); + float area2 = (bbox2.x2 - bbox2.x1) * (bbox2.y2 - bbox2.y1); + float overlap2D = overlapX * overlapY; + float u = area1 + area2 - overlap2D; + return u == 0 ? 0 : overlap2D / u; + }; + + std::stable_sort(binfo.begin(), binfo.end(), [](const BBoxInfo& b1, const BBoxInfo & b2) { + return b1.prob > b2.prob; }); + + std::vector out; + +#if defined(WEIGHTED_NMS) + std::vector > weigthed_nms_candidates; +#endif + for (auto& i : binfo) { + bool keep = true; + +#if defined(WEIGHTED_NMS) + int j_index = 0; +#endif + + for (auto& j : out) { + if (keep) { + float overlap = computeIoU(i.box, j.box); + keep = overlap <= nmsThresh; +#if defined(WEIGHTED_NMS) + if (!keep && fabs(j.prob - i.prob) < 0.52f) // add label similarity check + weigthed_nms_candidates[j_index].push_back(i); +#endif + } else + break; + +#if defined(WEIGHTED_NMS) + j_index++; +#endif + + } + if (keep) { + out.push_back(i); +#if defined(WEIGHTED_NMS) + weigthed_nms_candidates.emplace_back(); + weigthed_nms_candidates.back().clear(); +#endif + } + } + +#if defined(WEIGHTED_NMS) + + for (int i = 0; i < out.size(); i++) { + // the best confidence + BBoxInfo& best = out[i]; + float sum_tl_x = best.box.x1 * best.prob; + float sum_tl_y = best.box.y1 * best.prob; + float sum_br_x = best.box.x2 * best.prob; + float sum_br_y = best.box.y2 * best.prob; + + float weight = best.prob; + for (auto& it : weigthed_nms_candidates[i]) { + sum_tl_x += it.box.x1 * it.prob; + sum_tl_y += it.box.y1 * it.prob; + sum_br_x += it.box.x2 * it.prob; + sum_br_y += it.box.y2 * it.prob; + weight += it.prob; + } + + weight = 1.f / weight; + best.box.x1 = sum_tl_x * weight; + best.box.y1 = sum_tl_y * weight; + best.box.x2 = sum_br_x * weight; + best.box.y2 = sum_br_y * weight; + } + +#endif + + return out; +} + +Yolo::Yolo() { +} + +Yolo::~Yolo() { + if (is_init) { + + // Release stream and buffers + cudaStreamDestroy(stream); + CUDA_CHECK(cudaFree(d_input)); + CUDA_CHECK(cudaFree(d_output)); + // Destroy the engine + context->destroy(); + engine->destroy(); + runtime->destroy(); + + delete[] h_input; + delete[] h_output; + } + is_init = false; +} + +int Yolo::build_engine(std::string onnx_path, std::string engine_path, OptimDim dyn_dim_profile) { + + + std::vector onnx_file_content; + if (readFile(onnx_path, onnx_file_content)) return 1; + + if ((!onnx_file_content.empty())) { + + ICudaEngine * engine; + // Create engine (onnx) + std::cout << "Creating engine from onnx model" << std::endl; + + gLogger.setReportableSeverity(Severity::kINFO); + auto builder = nvinfer1::createInferBuilder(gLogger); + if (!builder) { + std::cerr << "createInferBuilder failed" << std::endl; + return 1; + } + + auto explicitBatch = 1U << static_cast (nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); + auto network = builder->createNetworkV2(explicitBatch); + + if (!network) { + std::cerr << "createNetwork failed" << std::endl; + return 1; + } + + auto config = builder->createBuilderConfig(); + if (!config) { + std::cerr << "createBuilderConfig failed" << std::endl; + return 1; + } + + ////////// Dynamic dimensions handling : support only 1 size at a time + if (!dyn_dim_profile.tensor_name.empty()) { + + IOptimizationProfile* profile = builder->createOptimizationProfile(); + + profile->setDimensions(dyn_dim_profile.tensor_name.c_str(), OptProfileSelector::kMIN, dyn_dim_profile.size); + profile->setDimensions(dyn_dim_profile.tensor_name.c_str(), OptProfileSelector::kOPT, dyn_dim_profile.size); + profile->setDimensions(dyn_dim_profile.tensor_name.c_str(), OptProfileSelector::kMAX, dyn_dim_profile.size); + + config->addOptimizationProfile(profile); + builder->setMaxBatchSize(1); + } + + auto parser = nvonnxparser::createParser(*network, gLogger); + if (!parser) { + std::cerr << "nvonnxparser::createParser failed" << std::endl; + return 1; + } + + bool parsed = false; + unsigned char *onnx_model_buffer = onnx_file_content.data(); + size_t onnx_model_buffer_size = onnx_file_content.size() * sizeof (char); + parsed = parser->parse(onnx_model_buffer, onnx_model_buffer_size); + + if (!parsed) { + std::cerr << "onnx file parsing failed" << std::endl; + return 1; + } + + if (builder->platformHasFastFp16()) { + std::cout << "FP16 enabled!" << std::endl; + config->setFlag(BuilderFlag::kFP16); + } + + //////////////// Actual engine building + + engine = builder->buildEngineWithConfig(*network, *config); + + onnx_file_content.clear(); + + // write plan file if it is specified + if (engine == nullptr) return 1; + IHostMemory* ptr = engine->serialize(); + assert(ptr); + if (ptr == nullptr) return 1; + + FILE *fp = fopen(engine_path.c_str(), "wb"); + fwrite(reinterpret_cast (ptr->data()), ptr->size() * sizeof (char), 1, fp); + fclose(fp); + + parser->destroy(); + network->destroy(); + config->destroy(); + builder->destroy(); + + engine->destroy(); + + return 0; + } else return 1; + + +} + +int Yolo::init(std::string engine_name) { + + + // deserialize the .engine and run inference + std::ifstream file(engine_name, std::ios::binary); + if (!file.good()) { + std::cerr << "read " << engine_name << " error!" << std::endl; + return -1; + } + char *trtModelStream = nullptr; + size_t size = 0; + file.seekg(0, file.end); + size = file.tellg(); + file.seekg(0, file.beg); + trtModelStream = new char[size]; + if (!trtModelStream) return 1; + file.read(trtModelStream, size); + file.close(); + + // prepare input data --------------------------- + runtime = createInferRuntime(gLogger); + if (runtime == nullptr) return 1; + engine = runtime->deserializeCudaEngine(trtModelStream, size); + if (engine == nullptr) return 1; + context = engine->createExecutionContext(); + if (context == nullptr) return 1; + + delete[] trtModelStream; + if (engine->getNbBindings() != 2) return 1; + + + const int bindings = engine->getNbBindings(); + for (int i = 0; i < bindings; i++) { + if (engine->bindingIsInput(i)) { + input_binding_name = engine->getBindingName(i); + Dims bind_dim = engine->getBindingDimensions(i); + input_width = bind_dim.d[3]; + input_height = bind_dim.d[2]; + inputIndex = i; + std::cout << "Inference size : " << input_height << "x" << input_width << std::endl; + }//if (engine->getTensorIOMode(engine->getBindingName(i)) == TensorIOMode::kOUTPUT) + else { + output_name = engine->getBindingName(i); + // fill size, allocation must be done externally + outputIndex = i; + Dims bind_dim = engine->getBindingDimensions(i); + size_t batch = bind_dim.d[0]; + if (batch > batch_size) { + std::cout << "batch > 1 not supported" << std::endl; + return 1; + } + size_t dim1 = bind_dim.d[1]; + size_t dim2 = bind_dim.d[2]; + + if (dim1 > dim2) { + // Yolov6 1x8400x85 // 85=5+80=cxcy+cwch+obj_conf+cls_conf + out_dim = dim1; + out_box_struct_number = 5; + out_class_number = dim2 - out_box_struct_number; + yolo_model_version = YOLO_MODEL_VERSION_OUTPUT_STYLE::YOLOV6; + std::cout << "YOLOV6 format" << std::endl; + } else { + // Yolov8 1x84x8400 + out_dim = dim2; + out_box_struct_number = 4; + out_class_number = dim1 - out_box_struct_number; + yolo_model_version = YOLO_MODEL_VERSION_OUTPUT_STYLE::YOLOV8_V5; + std::cout << "YOLOV8/YOLOV5 format" << std::endl; + } + } + } + output_size = out_dim * (out_class_number + out_box_struct_number); + h_input = new float[batch_size * 3 * input_height * input_width]; + h_output = new float[batch_size * output_size]; + // In order to bind the buffers, we need to know the names of the input and output tensors. + // Note that indices are guaranteed to be less than IEngine::getNbBindings() + assert(inputIndex == 0); + assert(outputIndex == 1); + // Create GPU buffers on device + CUDA_CHECK(cudaMalloc(&d_input, batch_size * 3 * input_height * input_width * sizeof (float))); + CUDA_CHECK(cudaMalloc(&d_output, batch_size * output_size * sizeof (float))); + // Create stream + CUDA_CHECK(cudaStreamCreate(&stream)); + + if (batch_size != 1) return 1; // This sample only support batch 1 for now + + is_init = true; + return 0; +} + +std::vector Yolo::run(sl::Mat left_sl, int orig_image_h, int orig_image_w, float thres) { + std::vector binfo; + + size_t frame_s = input_height * input_width; + + /////// Preparing inference + cv::Mat left_cv_rgba = slMat2cvMat(left_sl); + cv::cvtColor(left_cv_rgba, left_cv_rgb, cv::COLOR_BGRA2BGR); + if (left_cv_rgb.empty()) return binfo; + cv::Mat pr_img = preprocess_img(left_cv_rgb, input_width, input_height); // letterbox BGR to RGB + int i = 0; + int batch = 0; + for (int row = 0; row < input_height; ++row) { + uchar* uc_pixel = pr_img.data + row * pr_img.step; + for (int col = 0; col < input_width; ++col) { + h_input[batch * 3 * frame_s + i] = (float) uc_pixel[2] / 255.0; + h_input[batch * 3 * frame_s + i + frame_s] = (float) uc_pixel[1] / 255.0; + h_input[batch * 3 * frame_s + i + 2 * frame_s] = (float) uc_pixel[0] / 255.0; + uc_pixel += 3; + ++i; + } + } + + /////// INFERENCE + // DMA input batch data to device, infer on the batch asynchronously, and DMA output back to host + CUDA_CHECK(cudaMemcpyAsync(d_input, h_input, batch_size * 3 * frame_s * sizeof (float), cudaMemcpyHostToDevice, stream)); + + std::vector d_buffers_nvinfer(2); + d_buffers_nvinfer[inputIndex] = d_input; + d_buffers_nvinfer[outputIndex] = d_output; + context->enqueueV2(&d_buffers_nvinfer[0], stream, nullptr); + + CUDA_CHECK(cudaMemcpyAsync(h_output, d_output, batch_size * output_size * sizeof (float), cudaMemcpyDeviceToHost, stream)); + cudaStreamSynchronize(stream); + + + /////// Extraction + + float scalingFactor = std::min(static_cast (input_width) / orig_image_w, static_cast (input_height) / orig_image_h); + float xOffset = (input_width - scalingFactor * orig_image_w) * 0.5f; + float yOffset = (input_height - scalingFactor * orig_image_h) * 0.5f; + scalingFactor = 1.f / scalingFactor; + float scalingFactor_x = scalingFactor; + float scalingFactor_y = scalingFactor; + + + switch (yolo_model_version) { + default: + case YOLO_MODEL_VERSION_OUTPUT_STYLE::YOLOV8_V5: + { + // https://github.com/triple-Mu/YOLOv8-TensorRT/blob/df11cec3abaab7fefb28fb760f1cebbddd5ec826/csrc/detect/normal/include/yolov8.hpp#L343 + auto num_channels = out_class_number + out_box_struct_number; + auto num_anchors = out_dim; + auto num_labels = out_class_number; + + auto& dw = xOffset; + auto& dh = yOffset; + + auto& width = orig_image_w; + auto& height = orig_image_h; + + cv::Mat output = cv::Mat( + num_channels, + num_anchors, + CV_32F, + static_cast (h_output) + ); + output = output.t(); + for (int i = 0; i < num_anchors; i++) { + auto row_ptr = output.row(i).ptr(); + auto bboxes_ptr = row_ptr; + auto scores_ptr = row_ptr + out_box_struct_number; + auto max_s_ptr = std::max_element(scores_ptr, scores_ptr + num_labels); + float score = *max_s_ptr; + if (score > thres) { + int label = max_s_ptr - scores_ptr; + + BBoxInfo bbi; + + float x = *bboxes_ptr++ - dw; + float y = *bboxes_ptr++ - dh; + float w = *bboxes_ptr++; + float h = *bboxes_ptr; + + float x0 = clamp((x - 0.5f * w) * scalingFactor_x, 0.f, width); + float y0 = clamp((y - 0.5f * h) * scalingFactor_y, 0.f, height); + float x1 = clamp((x + 0.5f * w) * scalingFactor_x, 0.f, width); + float y1 = clamp((y + 0.5f * h) * scalingFactor_y, 0.f, height); + + cv::Rect_ bbox; + bbox.x = x0; + bbox.y = y0; + bbox.width = x1 - x0; + bbox.height = y1 - y0; + + bbi.box.x1 = x0; + bbi.box.y1 = y0; + bbi.box.x2 = x1; + bbi.box.y2 = y1; + + if ((bbi.box.x1 > bbi.box.x2) || (bbi.box.y1 > bbi.box.y2)) break; + + bbi.label = label; + bbi.prob = score; + + binfo.push_back(bbi); + } + } + break; + } + case YOLO_MODEL_VERSION_OUTPUT_STYLE::YOLOV6: + { + // https://github.com/DefTruth/lite.ai.toolkit/blob/1267584d5dae6269978e17ffd5ec29da496e503e/lite/ort/cv/yolov6.cpp#L97 + + auto& dw_ = xOffset; + auto& dh_ = yOffset; + + auto& width = orig_image_w; + auto& height = orig_image_h; + + const unsigned int num_anchors = out_dim; // n = ? + const unsigned int num_classes = out_class_number; // - out_box_struct_number; // 80 + + for (unsigned int i = 0; i < num_anchors; ++i) { + const float *offset_obj_cls_ptr = h_output + (i * (num_classes + 5)); // row ptr + float obj_conf = offset_obj_cls_ptr[4]; /*always == 1 for some reasons*/ + float cls_conf = offset_obj_cls_ptr[5]; + + // The confidence is remapped because it output basically garbage with conf < ~0.1 and we don't want to clamp it either + const float conf_offset = 0.1; + const float input_start = 0; + const float output_start = input_start; + const float output_end = 1; + const float input_end = output_end - conf_offset; + + float conf = (obj_conf * cls_conf) - conf_offset; + if (conf < 0) conf = 0; + conf = (conf - input_start) / (input_end - input_start) * (output_end - output_start) + output_start; + + if (conf > thres) { + + unsigned int label = 0; + for (unsigned int j = 0; j < num_classes; ++j) { + float tmp_conf = offset_obj_cls_ptr[j + 5]; + if (tmp_conf > cls_conf) { + cls_conf = tmp_conf; + label = j; + } + } // argmax + + BBoxInfo bbi; + + float cx = offset_obj_cls_ptr[0]; + float cy = offset_obj_cls_ptr[1]; + float w = offset_obj_cls_ptr[2]; + float h = offset_obj_cls_ptr[3]; + float x1 = ((cx - w / 2.f) - (float) dw_) * scalingFactor_x; + float y1 = ((cy - h / 2.f) - (float) dh_) * scalingFactor_y; + float x2 = ((cx + w / 2.f) - (float) dw_) * scalingFactor_x; + float y2 = ((cy + h / 2.f) - (float) dh_) * scalingFactor_y; + + bbi.box.x1 = std::max(0.f, x1); + bbi.box.y1 = std::max(0.f, y1); + bbi.box.x2 = std::min(x2, (float) width - 1.f); + bbi.box.y2 = std::min(y2, (float) height - 1.f); + + if ((bbi.box.x1 > bbi.box.x2) || (bbi.box.y1 > bbi.box.y2)) break; + + bbi.label = label; + bbi.prob = conf; + + binfo.push_back(bbi); + } + } + break; + } + }; + + /// NMS + binfo = nonMaximumSuppression(nms, binfo); + + return binfo; +} diff --git a/zed_components/src/zed_camera/src/zed_camera_component.cpp b/zed_components/src/zed_camera/src/zed_camera_component.cpp index 429e5d81..dbe40453 100644 --- a/zed_components/src/zed_camera/src/zed_camera_component.cpp +++ b/zed_components/src/zed_camera/src/zed_camera_component.cpp @@ -1459,9 +1459,13 @@ void ZedCamera::getOdParams() DEBUG_STREAM_OD(" 'object_detection.model': " << model_str.c_str()); + getParam("object_detection.custom_model_path", mObjDetEnginePath, mObjDetEnginePath); + + DEBUG_STREAM_OD(" 'object_detection.custom_model_path': " << mObjDetEnginePath.c_str()); + bool matched = false; for (int idx = static_cast(sl::OBJECT_DETECTION_MODEL::MULTI_CLASS_BOX_FAST); - idx < static_cast(sl::OBJECT_DETECTION_MODEL::CUSTOM_BOX_OBJECTS); idx++) + idx <= static_cast(sl::OBJECT_DETECTION_MODEL::CUSTOM_BOX_OBJECTS); idx++) { sl::OBJECT_DETECTION_MODEL test_model = static_cast(idx); std::string test_model_str = sl::toString(test_model).c_str(); @@ -4039,7 +4043,7 @@ bool ZedCamera::startPosTracking() sl::PositionalTrackingFusionParameters params; params.enable_GNSS_fusion = mGnssFusionEnabled; - params.gnss_initialisation_distance = mGnssInitDistance; + // params.gnss_initialisation_distance = mGnssInitDistance; sl::FUSION_ERROR_CODE fus_err = mFusion.enablePositionalTracking(params); if (fus_err != sl::FUSION_ERROR_CODE::SUCCESS) { @@ -4224,29 +4228,41 @@ bool ZedCamera::startObjDetect() mObjDetInstID = ++mAiInstanceID; od_p.instance_module_id = mObjDetInstID; - mObjDetFilter.clear(); - if (mObjDetPeopleEnable) { - mObjDetFilter.push_back(sl::OBJECT_CLASS::PERSON); - } - if (mObjDetVehiclesEnable) { - mObjDetFilter.push_back(sl::OBJECT_CLASS::VEHICLE); - } - if (mObjDetBagsEnable) { - mObjDetFilter.push_back(sl::OBJECT_CLASS::BAG); - } - if (mObjDetAnimalsEnable) { - mObjDetFilter.push_back(sl::OBJECT_CLASS::ANIMAL); - } - if (mObjDetElectronicsEnable) { - mObjDetFilter.push_back(sl::OBJECT_CLASS::ELECTRONICS); - } - if (mObjDetFruitsEnable) { - mObjDetFilter.push_back(sl::OBJECT_CLASS::FRUIT_VEGETABLE); - } - if (mObjDetSportEnable) { - mObjDetFilter.push_back(sl::OBJECT_CLASS::SPORT); + if (mObjDetModel == sl::OBJECT_DETECTION_MODEL::CUSTOM_BOX_OBJECTS) { + if (detector.init(mObjDetEnginePath)) { + RCLCPP_ERROR_STREAM(get_logger(), "Custom Object detection error: Failed to initalize Yolo with the provided engine!"); + RCLCPP_ERROR_STREAM(get_logger(), "Try checking to make sure you have the dependencies installed and that the engine file is in the correct location!"); + RCLCPP_ERROR_STREAM(get_logger(), "Disabling object detection to prevent further issues"); + mObjDetRunning = false; + return false; + } + RCLCPP_INFO(get_logger(), "Custom Object Detection model loaded from path %s", mObjDetEnginePath); + } else { + mObjDetFilter.clear(); + if (mObjDetPeopleEnable) { + mObjDetFilter.push_back(sl::OBJECT_CLASS::PERSON); + } + if (mObjDetVehiclesEnable) { + mObjDetFilter.push_back(sl::OBJECT_CLASS::VEHICLE); + } + if (mObjDetBagsEnable) { + mObjDetFilter.push_back(sl::OBJECT_CLASS::BAG); + } + if (mObjDetAnimalsEnable) { + mObjDetFilter.push_back(sl::OBJECT_CLASS::ANIMAL); + } + if (mObjDetElectronicsEnable) { + mObjDetFilter.push_back(sl::OBJECT_CLASS::ELECTRONICS); + } + if (mObjDetFruitsEnable) { + mObjDetFilter.push_back(sl::OBJECT_CLASS::FRUIT_VEGETABLE); + } + if (mObjDetSportEnable) { + mObjDetFilter.push_back(sl::OBJECT_CLASS::SPORT); + } } + sl::ERROR_CODE objDetError = mZed.enableObjectDetection(od_p); if (objDetError != sl::ERROR_CODE::SUCCESS) { @@ -6543,7 +6559,7 @@ void ZedCamera::processGeoPose() getGnss2BaseTransform(); } - mGeoPoseStatus = mFusion.getGeoPose(mLastGeoPose); + // mGeoPoseStatus = mFusion.getGeoPose(mLastGeoPose); publishGeoPoseStatus(); @@ -6746,6 +6762,15 @@ void ZedCamera::publishGnssPose() } } +std::vector cvt(const BBox &bbox_in) { + std::vector bbox_out(4); + bbox_out[0] = sl::uint2(bbox_in.x1, bbox_in.y1); + bbox_out[1] = sl::uint2(bbox_in.x2, bbox_in.y1); + bbox_out[2] = sl::uint2(bbox_in.x2, bbox_in.y2); + bbox_out[3] = sl::uint2(bbox_in.x1, bbox_in.y2); + return bbox_out; +} + void ZedCamera::processDetectedObjects(rclcpp::Time t) { size_t objdet_sub_count = 0; @@ -6772,31 +6797,59 @@ void ZedCamera::processDetectedObjects(rclcpp::Time t) // ----> Process realtime dynamic parameters objectTracker_parameters_rt.detection_confidence_threshold = mObjDetConfidence; - mObjDetFilter.clear(); - if (mObjDetPeopleEnable) { - mObjDetFilter.push_back(sl::OBJECT_CLASS::PERSON); - } - if (mObjDetVehiclesEnable) { - mObjDetFilter.push_back(sl::OBJECT_CLASS::VEHICLE); - } - if (mObjDetBagsEnable) { - mObjDetFilter.push_back(sl::OBJECT_CLASS::BAG); - } - if (mObjDetAnimalsEnable) { - mObjDetFilter.push_back(sl::OBJECT_CLASS::ANIMAL); - } - if (mObjDetElectronicsEnable) { - mObjDetFilter.push_back(sl::OBJECT_CLASS::ELECTRONICS); - } - if (mObjDetFruitsEnable) { - mObjDetFilter.push_back(sl::OBJECT_CLASS::FRUIT_VEGETABLE); - } - if (mObjDetSportEnable) { - mObjDetFilter.push_back(sl::OBJECT_CLASS::SPORT); + if (mObjDetModel != sl::OBJECT_DETECTION_MODEL::CUSTOM_BOX_OBJECTS) { + mObjDetFilter.clear(); + if (mObjDetPeopleEnable) { + mObjDetFilter.push_back(sl::OBJECT_CLASS::PERSON); + } + if (mObjDetVehiclesEnable) { + mObjDetFilter.push_back(sl::OBJECT_CLASS::VEHICLE); + } + if (mObjDetBagsEnable) { + mObjDetFilter.push_back(sl::OBJECT_CLASS::BAG); + } + if (mObjDetAnimalsEnable) { + mObjDetFilter.push_back(sl::OBJECT_CLASS::ANIMAL); + } + if (mObjDetElectronicsEnable) { + mObjDetFilter.push_back(sl::OBJECT_CLASS::ELECTRONICS); + } + if (mObjDetFruitsEnable) { + mObjDetFilter.push_back(sl::OBJECT_CLASS::FRUIT_VEGETABLE); + } + if (mObjDetSportEnable) { + mObjDetFilter.push_back(sl::OBJECT_CLASS::SPORT); + } + objectTracker_parameters_rt.object_class_filter = mObjDetFilter; } - objectTracker_parameters_rt.object_class_filter = mObjDetFilter; // <---- Process realtime dynamic parameters + + // ----> Custom Object Detection: Get 2d bounding boxes, convert to objects and send to ZED + + if (mObjDetModel == sl::OBJECT_DETECTION_MODEL::CUSTOM_BOX_OBJECTS) { + sl::Mat left_sl; + mZed.retrieveImage(left_sl, sl::VIEW::LEFT); + + auto detections = detector.run(left_sl, mCamHeight, mCamWidth, mObjDetConfidence / 100); // Last arg is the CONF_THRESH argument, may need to get from config + + std::vector objects2d; + for (auto &detection : detections) { + sl::CustomBoxObjectData object2d; + object2d.unique_object_id = sl::generate_unique_id(); + object2d.probability = detection.prob; + object2d.label = (int)detection.label; + object2d.bounding_box_2d = cvt(detection.box); + object2d.is_grounded = ((int)detection.label == 0); + objects2d.push_back(object2d); + } + + mZed.ingestCustomBoxObjects(objects2d, mObjDetInstID); + } + + // <---- Custom Object Detection: Get 2d bounding boxes, convert to objects and send to ZED + + sl::Objects objects; sl::ERROR_CODE objDetRes = mZed.retrieveObjects( diff --git a/zed_wrapper/CMakeLists.txt b/zed_wrapper/CMakeLists.txt index 30254951..3187db4f 100644 --- a/zed_wrapper/CMakeLists.txt +++ b/zed_wrapper/CMakeLists.txt @@ -75,8 +75,9 @@ endif() ############################################# # Dependencies -find_package(ZED 3 REQUIRED) +find_package(ZED 4 REQUIRED) find_package(CUDA REQUIRED) +find_package(OpenCV REQUIRED) find_package(ament_cmake_auto REQUIRED) ament_auto_find_build_dependencies() @@ -100,6 +101,7 @@ add_custom_target(all_${PROJECT_NAME}_files SOURCES ${all_files}) ############################################################################### # INCLUDES and LIBS include_directories( + ${OpenCV_INCLUDE_DIRS} ${CUDA_INCLUDE_DIRS} ${ZED_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR}/src diff --git a/zed_wrapper/config/common.yaml b/zed_wrapper/config/common.yaml index ba9275ee..a2ac930f 100644 --- a/zed_wrapper/config/common.yaml +++ b/zed_wrapper/config/common.yaml @@ -112,7 +112,8 @@ object_detection: od_enabled: false # True to enable Object Detection - model: 'MULTI_CLASS_BOX_MEDIUM' # 'MULTI_CLASS_BOX_FAST', 'MULTI_CLASS_BOX_MEDIUM', 'MULTI_CLASS_BOX_ACCURATE', 'PERSON_HEAD_BOX_FAST', 'PERSON_HEAD_BOX_ACCURATE' + model: 'MULTI_CLASS_BOX_MEDIUM' # 'MULTI_CLASS_BOX_FAST', 'MULTI_CLASS_BOX_MEDIUM', 'MULTI_CLASS_BOX_ACCURATE', 'PERSON_HEAD_BOX_FAST', 'PERSON_HEAD_BOX_ACCURATE', 'CUSTOM_BOX_OBJECTS' + custom_model_path: '' # The path to your custom Yolo engine file (if using CUSTOM_BOX_OBJECTS) allow_reduced_precision_inference: true # Allow inference to run at a lower precision to improve runtime and memory usage max_range: 20.0 # [m] Defines a upper depth range for detections confidence_threshold: 50.0 # [DYNAMIC] - Minimum value of the detection confidence of an object [0,99]