-
Notifications
You must be signed in to change notification settings - Fork 144
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[NVIDIA] Add Multi-graph Feature to NVIDIA Plugin (#710)
* [NVIDIA] Change CudaGraphTopologyRunner/SubGraph to aggregation from inheritance * [NVIDIA] Add new constructor to SubGraph * [NVIDIA] Update CudaGraphContext to use vectors * [NVIDIA] Extract inputs/outputs to TensorMappingContext * [NVIDIA] Update SubGraph to use direct exec_sequence_ and std::shared_ptr<MemoryManager> * [NVIDIA] Add CudaGraphInfo and update CudaGraphContext to use them * [NVIDIA] Fix single-graph tests * [NVIDIA] Add CudaMultiGraphTest test * [NVIDIA] Add SubGraph::IsCudaGraphCompatible() cache * [NVIDIA] Add execute_sequence/capture_sequence member functions to Profiler * [NVIDIA] Enable TensorIterator to use Profiler::execute_sequence() * [NVIDIA] Enable SubGraph to use Profiler::execute/capture_sequence() * [NVIDIA] Extract ITopologyRunner into the separate header * [NVIDIA] Update tests to include cuda_eager_topology_runner.hpp * [NVIDIA] Add IExecutionDelegator * [NVIDIA] Add cuda_perf_counts.hpp * [NVIDIA] Add SimpleExecutionDelegator class and use it when profiling is not needed * [NVIDIA] Update tests to use SimpleExecutionDelegator * [NVIDIA] Add updateExecSequence() to TensorIteratorOp * [NVIDIA] Update TensorIteratorOp::IsCudaGraphCompatible() to use SubGraph implementation * [NVIDIA] Add rebase fixes * [NVIDIA] Add comment fixes * [NVIDIA] Rename functions to correspond to OV coding style * [NVIDIA] Add number_of_cuda_graphs property * [NVIDIA] Fix and update SimpleExecutionDelegator * [NVIDIA] Fix build error on some configurations * [NVIDIA] Temporary disable CUDA graph compatibility for TensorIterator
- Loading branch information
1 parent
ebaf9dd
commit 394a8cf
Showing
48 changed files
with
1,291 additions
and
411 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
// Copyright (C) 2018-2023 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#include "cuda_graph_context.hpp" | ||
|
||
namespace ov { | ||
namespace nvidia_gpu { | ||
|
||
void CudaGraphContext::reset() { | ||
graphs_.clear(); | ||
currentGraphIndex_ = 0; | ||
} | ||
|
||
void CudaGraphContext::start_next_graph_addition() { | ||
currentGraphIndex_ = graphs_.size(); | ||
graphs_.emplace_back(); | ||
} | ||
|
||
void CudaGraphContext::add_parameter(const std::string& tensorName, | ||
const CUDA::Stream& stream, | ||
CUDA::DevicePointer<void*> dst, | ||
const void* src, | ||
std::size_t size) { | ||
OPENVINO_ASSERT(currentGraphIndex_ < graphs_.size(), "Graph index/vector size incosistency"); | ||
graphs_[currentGraphIndex_].add_parameter(tensorName, stream, dst, src, size); | ||
} | ||
|
||
void CudaGraphContext::add_result(const std::string& tensorName, | ||
const CUDA::Stream& stream, | ||
void* dst, | ||
CUDA::DevicePointer<const void*> src, | ||
std::size_t size) { | ||
OPENVINO_ASSERT(currentGraphIndex_ < graphs_.size(), "Graph index/vector size incosistency"); | ||
graphs_[currentGraphIndex_].add_result(tensorName, stream, dst, src, size); | ||
} | ||
|
||
void CudaGraphContext::add_graph(const CUDA::Graph& graph) { | ||
OPENVINO_ASSERT(currentGraphIndex_ < graphs_.size(), "Graph index/vector size incosistency"); | ||
graphs_[currentGraphIndex_].set_graph(graph); | ||
} | ||
|
||
bool CudaGraphContext::is_initialized() const { | ||
const auto size = graphs_.size(); | ||
return size != 0 && graphs_[size - 1].is_initialized(); | ||
} | ||
|
||
void CudaGraphContext::update_capture(const TensorMappingContext& context) { | ||
for (currentGraphIndex_ = 0; currentGraphIndex_ < graphs_.size(); ++currentGraphIndex_) { | ||
graphs_[currentGraphIndex_].update_capture(context); | ||
} | ||
} | ||
|
||
void CudaGraphContext::launch(std::size_t index, const CUDA::Stream& stream) const { | ||
currentGraphIndex_ = index; | ||
OPENVINO_ASSERT(currentGraphIndex_ < graphs_.size(), "Graph index/vector size incosistency"); | ||
graphs_[currentGraphIndex_].launch(stream); | ||
} | ||
|
||
std::size_t CudaGraphContext::get_params_count() const { | ||
std::size_t res = 0; | ||
for (const auto& graph : graphs_) { | ||
res += graph.get_params_count(); | ||
} | ||
return res; | ||
} | ||
|
||
std::size_t CudaGraphContext::get_results_count() const { | ||
std::size_t res = 0; | ||
for (const auto& graph : graphs_) { | ||
res += graph.get_results_count(); | ||
} | ||
return res; | ||
} | ||
|
||
std::size_t CudaGraphContext::get_graphs_count() const { return graphs_.size(); } | ||
|
||
void CudaGraphContext::CudaGraphInfo::add_parameter(const std::string& tensorName, | ||
const CUDA::Stream& stream, | ||
CUDA::DevicePointer<void*> dst, | ||
const void* src, | ||
std::size_t size) { | ||
CUDA::CaptureInfo captureInfo{stream}; | ||
parameterNodes_.emplace(tensorName, captureInfo.addUploadNode(dst, src, size)); | ||
} | ||
|
||
void CudaGraphContext::CudaGraphInfo::add_result(const std::string& tensorName, | ||
const CUDA::Stream& stream, | ||
void* dst, | ||
CUDA::DevicePointer<const void*> src, | ||
std::size_t size) { | ||
CUDA::CaptureInfo captureInfo{stream}; | ||
resultNodes_.emplace(tensorName, captureInfo.addDownloadNode(dst, src, size)); | ||
} | ||
|
||
void CudaGraphContext::CudaGraphInfo::set_graph(const CUDA::Graph& graph) { | ||
graph_.emplace(graph); | ||
graphExec_.emplace(graph); | ||
} | ||
|
||
bool CudaGraphContext::CudaGraphInfo::is_initialized() const { return graph_.has_value() && graphExec_.has_value(); } | ||
|
||
void CudaGraphContext::CudaGraphInfo::update_capture(const TensorMappingContext& context) { | ||
for (auto&& [tensorName, node] : parameterNodes_) { | ||
node.update_src(graphExec_.value(), (context.get_input_tensor(tensorName)->data())); | ||
} | ||
for (auto&& [tensorName, node] : resultNodes_) { | ||
node.update_dst(graphExec_.value(), context.get_output_tensor(tensorName)->data()); | ||
} | ||
} | ||
|
||
void CudaGraphContext::CudaGraphInfo::launch(const CUDA::Stream& stream) const { graphExec_.value().launch(stream); } | ||
|
||
std::size_t CudaGraphContext::CudaGraphInfo::get_params_count() const { return parameterNodes_.size(); } | ||
|
||
std::size_t CudaGraphContext::CudaGraphInfo::get_results_count() const { return resultNodes_.size(); } | ||
|
||
bool operator==(const CudaGraphContext::CudaGraphInfo& lhs, const CudaGraphContext::CudaGraphInfo& rhs) { | ||
return lhs.graph_ == rhs.graph_ && lhs.graphExec_ == rhs.graphExec_ && lhs.parameterNodes_ == rhs.parameterNodes_ && | ||
lhs.resultNodes_ == rhs.resultNodes_; | ||
} | ||
|
||
bool operator!=(const CudaGraphContext::CudaGraphInfo& lhs, const CudaGraphContext::CudaGraphInfo& rhs) { | ||
return !(lhs == rhs); | ||
} | ||
|
||
bool operator==(const CudaGraphContext& lhs, const CudaGraphContext& rhs) { return lhs.graphs_ == rhs.graphs_; } | ||
|
||
bool operator!=(const CudaGraphContext& lhs, const CudaGraphContext& rhs) { return !(lhs == rhs); } | ||
|
||
} // namespace nvidia_gpu | ||
} // namespace ov |
Oops, something went wrong.