Skip to content

Commit

Permalink
[NVIDIA] Make USE_CUDA_GRAPH a public option and set its default as true
Browse files Browse the repository at this point in the history
  • Loading branch information
Andrii Pavliuk authored and nkogteva committed Jul 19, 2023
1 parent 0393ec8 commit bf8bce2
Show file tree
Hide file tree
Showing 6 changed files with 14 additions and 13 deletions.
1 change: 1 addition & 0 deletions modules/nvidia_plugin/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ Please refer to OpenVINO documentation for details.

### Plugin specific parameters
* `ov::nvidia_gpu::operation_benchmark` - specifies if operation level benchmark should be run for increasing performance of network (`false` by default)
* `ov::nvidia_gpu::use_cuda_graph` - specifies if NVIDIA plugin attempts to use CUDA Graph feature to speed up sequential network inferences (`true` by default)

All parameters must be set before calling `ov::Core::compile_model()` in order to take effect.

Expand Down
5 changes: 5 additions & 0 deletions modules/nvidia_plugin/include/nvidia/properties.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,10 @@ namespace nvidia_gpu {
*/
static constexpr Property<bool, PropertyMutability::RW> operation_benchmark{"NVIDIA_OPERATION_BENCHMARK"};

/**
* @brief Specifies if NVIDIA plugin attempts to use CUDA Graph feature to speed up sequential network inferences
*/
static constexpr ov::Property<bool, ov::PropertyMutability::RW> use_cuda_graph{"NVIDIA_USE_CUDA_GRAPH"};

} // namespace nvidia_gpu
} // namespace ov
4 changes: 2 additions & 2 deletions modules/nvidia_plugin/src/cuda_compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ CompiledModel::CompiledModel(const std::shared_ptr<const ov::Model>& model,
config_(std::move(cfg)),
cuda_stream_executor_(std::move(wait_executor)),
loaded_from_cache_(loaded_from_cache),
use_cuda_graph_{get_property(ov::nvidia_gpu::internal::use_cuda_graph.name()).as<bool>() &&
!get_property(ov::enable_profiling.name()).as<bool>()} {
use_cuda_graph_{get_property(ov::nvidia_gpu::use_cuda_graph.name()).as<bool>() &&
!get_property(ov::enable_profiling.name()).as<bool>()} {
try {
compile_model(model);
init_executor(); // creates thread-based executor using for async requests
Expand Down
5 changes: 3 additions & 2 deletions modules/nvidia_plugin/src/cuda_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ std::vector<ov::PropertyName> Configuration::get_rw_properties() {
ov::PropertyName{ov::hint::execution_mode.name(), ov::PropertyMutability::RW},
ov::PropertyName{ov::enable_profiling.name(), ov::PropertyMutability::RW},
ov::PropertyName{ov::nvidia_gpu::operation_benchmark.name(), ov::PropertyMutability::RW},
ov::PropertyName{ov::nvidia_gpu::use_cuda_graph.name(), ov::PropertyMutability::RW},
};
return rw_properties;
}
Expand Down Expand Up @@ -166,7 +167,7 @@ Configuration::Configuration(const ov::AnyMap& config, const Configuration& defa
streams_executor_config_.set_property(key, value);
} else if (ov::nvidia_gpu::operation_benchmark == key) {
operation_benchmark = value.as<bool>();
} else if (internal::use_cuda_graph == key) {
} else if (ov::nvidia_gpu::use_cuda_graph == key) {
use_cuda_graph = value.as<bool>();
} else if (ov::enable_profiling == key) {
is_profiling_enabled = value.as<bool>();
Expand Down Expand Up @@ -200,7 +201,7 @@ ov::Any Configuration::get(const std::string& name) const {
return is_profiling_enabled;
} else if (name == ov::nvidia_gpu::operation_benchmark) {
return operation_benchmark;
} else if (name == internal::use_cuda_graph) {
} else if (name == ov::nvidia_gpu::use_cuda_graph) {
return use_cuda_graph;
} else if (name == ov::num_streams) {
return (num_streams == 0) ?
Expand Down
9 changes: 1 addition & 8 deletions modules/nvidia_plugin/src/cuda_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,6 @@
namespace ov {
namespace nvidia_gpu {

namespace internal {
/**
* @brief Defines if NVIDIA Plugin should use CUDA graphs for performance acceleration
*/
static constexpr ov::Property<bool, ov::PropertyMutability::RW> use_cuda_graph{"NVIDIA_USE_CUDA_GRAPH"};

} // namespace internal
struct Configuration {
using Ptr = std::shared_ptr<Configuration>;

Expand Down Expand Up @@ -58,7 +51,7 @@ struct Configuration {
int device_id = 0;
bool is_profiling_enabled = false;
bool operation_benchmark = false;
bool use_cuda_graph = false;
bool use_cuda_graph = true;
uint32_t hint_num_requests = 0;
ov::streams::Num num_streams = 0;
ov::hint::PerformanceMode performance_mode = ov::hint::PerformanceMode::LATENCY;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,8 @@ const std::vector<ov::AnyMap> default_properties = {
{ov::hint::execution_mode(ov::hint::ExecutionMode::PERFORMANCE)},
{ov::enable_profiling(false)},
{ov::device::id("0")},
{ov::nvidia_gpu::operation_benchmark(false)}
{ov::nvidia_gpu::operation_benchmark(false)},
{ov::nvidia_gpu::use_cuda_graph(true)}
};

INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests,
Expand Down

0 comments on commit bf8bce2

Please sign in to comment.