[NVIDIA] Make USE_CUDA_GRAPH a public option and set its default as true

openvinotoolkit · Jul 19, 2023 · bf8bce2 · bf8bce2
1 parent 0393ec8
commit bf8bce2
Show file tree

Hide file tree

Showing 6 changed files with 14 additions and 13 deletions.
diff --git a/modules/nvidia_plugin/README.md b/modules/nvidia_plugin/README.md
@@ -175,6 +175,7 @@ Please refer to OpenVINO documentation for details.
 
 ### Plugin specific parameters
 * `ov::nvidia_gpu::operation_benchmark` - specifies if operation level benchmark should be run for increasing performance of network (`false` by default)
+* `ov::nvidia_gpu::use_cuda_graph` - specifies if NVIDIA plugin attempts to use CUDA Graph feature to speed up sequential network inferences (`true` by default)
 
 All parameters must be set before calling `ov::Core::compile_model()` in order to take effect.
 

diff --git a/modules/nvidia_plugin/include/nvidia/properties.hpp b/modules/nvidia_plugin/include/nvidia/properties.hpp
@@ -24,5 +24,10 @@ namespace nvidia_gpu {
  */
 static constexpr Property<bool, PropertyMutability::RW> operation_benchmark{"NVIDIA_OPERATION_BENCHMARK"};
 
+/**
+ * @brief Specifies if NVIDIA plugin attempts to use CUDA Graph feature to speed up sequential network inferences
+ */
+static constexpr ov::Property<bool, ov::PropertyMutability::RW> use_cuda_graph{"NVIDIA_USE_CUDA_GRAPH"};
+
 }  // namespace nvidia_gpu
 }  // namespace ov
diff --git a/modules/nvidia_plugin/src/cuda_compiled_model.cpp b/modules/nvidia_plugin/src/cuda_compiled_model.cpp
@@ -53,8 +53,8 @@ CompiledModel::CompiledModel(const std::shared_ptr<const ov::Model>& model,
       config_(std::move(cfg)),
       cuda_stream_executor_(std::move(wait_executor)),
       loaded_from_cache_(loaded_from_cache),
-      use_cuda_graph_{get_property(ov::nvidia_gpu::internal::use_cuda_graph.name()).as<bool>() &&
-    !get_property(ov::enable_profiling.name()).as<bool>()} {
+      use_cuda_graph_{get_property(ov::nvidia_gpu::use_cuda_graph.name()).as<bool>() &&
+                      !get_property(ov::enable_profiling.name()).as<bool>()} {
     try {
         compile_model(model);
         init_executor();  // creates thread-based executor using for async requests

diff --git a/modules/nvidia_plugin/src/cuda_config.cpp b/modules/nvidia_plugin/src/cuda_config.cpp
@@ -42,6 +42,7 @@ std::vector<ov::PropertyName> Configuration::get_rw_properties() {
         ov::PropertyName{ov::hint::execution_mode.name(), ov::PropertyMutability::RW},
         ov::PropertyName{ov::enable_profiling.name(), ov::PropertyMutability::RW},
         ov::PropertyName{ov::nvidia_gpu::operation_benchmark.name(), ov::PropertyMutability::RW},
+        ov::PropertyName{ov::nvidia_gpu::use_cuda_graph.name(), ov::PropertyMutability::RW},
     };
     return rw_properties;
 }
@@ -166,7 +167,7 @@ Configuration::Configuration(const ov::AnyMap& config, const Configuration& defa
             streams_executor_config_.set_property(key, value);
         } else if (ov::nvidia_gpu::operation_benchmark == key) {
             operation_benchmark = value.as<bool>();
-        } else if (internal::use_cuda_graph == key) {
+        } else if (ov::nvidia_gpu::use_cuda_graph == key) {
             use_cuda_graph = value.as<bool>();
         } else if (ov::enable_profiling == key) {
             is_profiling_enabled = value.as<bool>();
@@ -200,7 +201,7 @@ ov::Any Configuration::get(const std::string& name) const {
         return is_profiling_enabled;
     } else if (name == ov::nvidia_gpu::operation_benchmark) {
         return operation_benchmark;
-    } else if (name == internal::use_cuda_graph) {
+    } else if (name == ov::nvidia_gpu::use_cuda_graph) {
         return use_cuda_graph;
     } else if (name == ov::num_streams) {
         return (num_streams == 0) ?

diff --git a/modules/nvidia_plugin/src/cuda_config.hpp b/modules/nvidia_plugin/src/cuda_config.hpp
@@ -15,13 +15,6 @@
 namespace ov {
 namespace nvidia_gpu {
 
-namespace internal {
-/**
- * @brief Defines if NVIDIA Plugin should use CUDA graphs for performance acceleration
- */
-static constexpr ov::Property<bool, ov::PropertyMutability::RW> use_cuda_graph{"NVIDIA_USE_CUDA_GRAPH"};
-
-}  // namespace internal
 struct Configuration {
     using Ptr = std::shared_ptr<Configuration>;
 
@@ -58,7 +51,7 @@ struct Configuration {
     int device_id = 0;
     bool is_profiling_enabled = false;
     bool operation_benchmark = false;
-    bool use_cuda_graph = false;
+    bool use_cuda_graph = true;
     uint32_t hint_num_requests = 0;
     ov::streams::Num num_streams = 0;
     ov::hint::PerformanceMode performance_mode = ov::hint::PerformanceMode::LATENCY;

diff --git a/...gin/tests/functional/shared_tests_instances/behavior/ov_executable_network/properties.cpp b/...gin/tests/functional/shared_tests_instances/behavior/ov_executable_network/properties.cpp
@@ -72,7 +72,8 @@ const std::vector<ov::AnyMap> default_properties = {
     {ov::hint::execution_mode(ov::hint::ExecutionMode::PERFORMANCE)},
     {ov::enable_profiling(false)},
     {ov::device::id("0")},
-    {ov::nvidia_gpu::operation_benchmark(false)}
+    {ov::nvidia_gpu::operation_benchmark(false)},
+    {ov::nvidia_gpu::use_cuda_graph(true)}
 };
 
 INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests,