openvinotoolkit · nkogteva · Aug 28, 2023 · Aug 24, 2023
@@ -10,7 +10,6 @@
 
 #include "dnn_be.hpp"
 #include "event.hpp"
-#include "ie_common.h"
 
 namespace CUDA {
 
@@ -27,7 +26,7 @@ inline std::vector<std::shared_ptr<DnnBEExecutionPlan>> getAllExecutionPlansFrom
             try {
                 auto plan = CUDA::DnnBEExecutionPlanBuilder().setDnnHandle(dnnHandle).setEngineConfig(config).build();
                 plans.push_back(std::move(plan));
-            } catch (const InferenceEngine::Exception&) {
+            } catch (const ov::Exception&) {
                 continue;
             }
         }
@@ -42,7 +41,7 @@ inline std::vector<std::shared_ptr<DnnBEExecutionPlan>> getAllExecutionPlansFrom
                 auto plan =
                     CUDA::DnnBEExecutionPlanBuilder().setDnnHandle(dnnHandle).setEngineConfig(engineConfig).build();
                 plans.push_back(std::move(plan));
-            } catch (const InferenceEngine::Exception&) {
+            } catch (const ov::Exception&) {
                 continue;
             }
         }

@@ -13,8 +13,6 @@
 namespace ov {
 namespace nvidia_gpu {
 
-using Blob = InferenceEngine::Blob;
-
 class Profiler;
 
 class InferenceRequestContext {
@@ -45,8 +43,6 @@ class InferenceRequestContext {
           is_benchmark_mode_{isBenchmarkMode} {}
     // don't allow storing references to temporary
     template <typename... Args>
-    InferenceRequestContext(InferenceEngine::BlobMap&& inputs, Args... args) = delete;
-    template <typename... Args>
     InferenceRequestContext(std::vector<std::shared_ptr<ov::Tensor>>&& inputs,
                             std::map<std::string, std::size_t>&& inputMapping,
                             std::vector<std::shared_ptr<ov::Tensor>>&& outputs,

@@ -273,7 +273,7 @@ void OperationBuffersExtractor::initConstantMemory(DeviceMemBlock::Ptr memory_bl
     for (const auto& buffer_id : memory_block->bufferIds()) {
         auto span = immutableBuffer(buffer_id);
         void* device_ptr = memory_block->deviceBufferPtr(buffer_id);
-        IE_ASSERT(device_ptr != nullptr);
+        OPENVINO_ASSERT(device_ptr != nullptr);
         throwIfError(::cudaMemcpy(device_ptr, span.data(), span.size_bytes(), cudaMemcpyHostToDevice));
     }
 }

@@ -14,6 +14,7 @@
 #include <unordered_map>
 #include <vector>
 
+#include "openvino/core/node.hpp"
 #include "memory_manager/cuda_workbuffers.hpp"
 
 namespace ov {
@@ -36,7 +37,7 @@ class OperationBuffersExtractor {
      * Nodes are ordered in their execution order.
      * @param [in] is_stable_params Makes input parameters alive for whole graph's life time
      * @param [in] is_stable_results Makes output results alive for till end of the graph's life time
-     * @throws InferenceEngineException if the given subgraph is bad formed
+     * @throws ov::Exception if the given subgraph is bad formed
      */
     OperationBuffersExtractor(gsl::span<const NodePtr> ordered_nodes,
                               bool is_stable_params = false,
@@ -61,7 +62,7 @@ class OperationBuffersExtractor {
      * @param buffer_id Identifier of a buffer.
      * Can be obtained via InputBufferIds or OutputBufferIds
      * @returns Lifespan start of the given buffer
-     * @throws InferenceEngine::details::InferenceEngineException
+     * @throws ov::Exception
      * if buffer with the provided index doesn't exist
      */
     int mutableBufferLifespanStart(BufferID buffer_id) const;
@@ -71,7 +72,7 @@ class OperationBuffersExtractor {
      * @param buffer_id Identifier of a buffer.
      * Can be obtained via InputBufferIds or OutputBufferIds
      * @returns Lifespan end of the given buffer
-     * @throws InferenceEngine::details::InferenceEngineException
+     * @throws ov::Exception
      * if buffer with the provided index doesn't exist
      */
     int mutableBufferLifespanEnd(BufferID buffer_id) const;
@@ -81,7 +82,7 @@ class OperationBuffersExtractor {
      * @param buffer_id Identifier of a buffer.
      * Can be obtained via InputBufferIds or OutputBufferIds
      * @returns Size of the given buffer
-     * @throws InferenceEngine::details::InferenceEngineException
+     * @throws ov::Exception
      * if buffer with the provided index doesn't exist
      */
     std::size_t mutableBufferSize(BufferID buffer_id) const;
@@ -90,7 +91,7 @@ class OperationBuffersExtractor {
      * Provides mutable buffer content
      * @param buffer_id Identifier of a buffer.
      * @returns mutable buffer content
-     * @throws InferenceEngine::details::InferenceEngineException
+     * @throws ov::Exception
      * if buffer with the provided index doesn't exist
      */
     gsl::span<const Byte> immutableBuffer(BufferID buffer_id) const;

@@ -23,7 +23,7 @@ class DeviceMemBlock {
     using Ptr = std::shared_ptr<DeviceMemBlock>;
 
     /**
-     * @throws InferenceEngineException if device memory block allocation
+     * @throws ov::Exception if device memory block allocation
      * failed.
      */
     DeviceMemBlock(MemoryModel::Ptr model);

@@ -4,13 +4,13 @@
 
 #include "cuda_immutable_memory_block_builder.hpp"
 
-#include <details/ie_exception.hpp>
+#include "openvino/core/except.hpp"
 
 namespace ov {
 namespace nvidia_gpu {
 
 void ImmutableMemoryBlockBuilder::addAllocation(BufferID id, const void* data, size_t bsize) {
-    IE_ASSERT(data != nullptr);
+    OPENVINO_ASSERT(data != nullptr);
     model_builder_.addAllocation(id, bsize);
     allocations_.emplace_back(AllocRecord{id, data, bsize});
 }
@@ -22,7 +22,7 @@ std::pair<DeviceMemBlock::Ptr, MemoryModel::Ptr> ImmutableMemoryBlockBuilder::bu
     auto memory_block = std::make_shared<DeviceMemBlock>(memory_model);
     for (const auto& allocation : allocations_) {
         void* device_ptr = memory_block->deviceBufferPtr(allocation.id);
-        IE_ASSERT(device_ptr != nullptr);
+        OPENVINO_ASSERT(device_ptr != nullptr);
         throwIfError(::cudaMemcpy(device_ptr, allocation.data, allocation.bsize, cudaMemcpyHostToDevice));
     }
     return {memory_block, memory_model};

@@ -30,7 +30,7 @@ class ImmutableMemoryBlockBuilder {
      * pointer is still valid when `ImmutableMemoryBlockBuilder::build()` method
      * is invoked.
      * @param [in] bsize Tensor memory size in bytes.
-     * @throws InferenceEngineException if
+     * @throws ov::Exception if
      *  - allocation size is zero
      *  - tensor with specified id is already added
      *  - data pointer is nullptr

@@ -3,9 +3,6 @@
 //
 
 #include "cuda_memory_manager.hpp"
-
-#include <details/ie_exception.hpp>
-
 #include "cuda_operation_base.hpp"
 
 namespace ov {
@@ -24,8 +21,7 @@ MemoryManager::InputTensors MemoryManager::inputTensorPointers(const IOperationM
     for (auto id : operation.GetInputIds()) {
         const void* ptr = immutable_tensors_->deviceTensorPtr(id);
         if (ptr == nullptr) ptr = mutable_tensors_model_->deviceTensorPtr(mutableBufferPtr.cast<uint8_t*>(), id);
-
-        IE_ASSERT(ptr != nullptr) << "Tensor not found. ID is " << id;
+        OPENVINO_ASSERT(ptr != nullptr, "Tensor not found. ID is " + to_string(id));
         result.emplace_back(ptr);
     }
     return result;
@@ -37,7 +33,7 @@ MemoryManager::OutputTensors MemoryManager::outputTensorPointers(const IOperatio
     for (auto id : operation.GetOutputIds()) {
         void* ptr = mutable_tensors_model_->deviceTensorPtr(mutableBufferPtr.cast<uint8_t*>(), id);
 
-        IE_ASSERT(ptr != nullptr) << "Tensor not found. ID is " << id;
+        OPENVINO_ASSERT(ptr != nullptr, "Tensor not found. ID is " + to_string(id));
         result.emplace_back(ptr);
     }
     return result;
@@ -49,12 +45,12 @@ Workbuffers MemoryManager::workBuffers(const IOperationExec& operation,
     const auto& indices = operation.GetWorkbufferIds();
     for (const auto immutable_id : indices.immutableIds) {
         void* ptr = immutable_workbuffers_->deviceBufferPtr(immutable_id);
-        IE_ASSERT(ptr != nullptr) << "Workbuffer not found. ID is " << immutable_id;
+        OPENVINO_ASSERT(ptr != nullptr, "Workbuffer not found. ID is " + std::to_string(immutable_id));
         result.immutable_buffers.emplace_back(ptr);
     }
     for (const auto mutable_id : indices.mutableIds) {
         void* ptr = mutable_tensors_model_->deviceBufferPtr(mutableBufferPtr.cast<uint8_t*>(), mutable_id);
-        IE_ASSERT(ptr != nullptr) << "Workbuffer not found. ID is " << mutable_id;
+        OPENVINO_ASSERT(ptr != nullptr, "Workbuffer not found. ID is " + std::to_string(mutable_id));
         result.mutable_buffers.emplace_back(ptr);
     }
     return result;

@@ -50,7 +50,7 @@ class MemoryManager {
      * @param[in] operation An operation which defines input tensors.
      * @param[in] mutableBufferPtr A memory block based on which mapping is performed.
      * @returns An array of corresponding input tensor pointers.
-     * @throws InferenceEngineException if any of tensor pointers is not found
+     * @throws ov::Exception if any of tensor pointers is not found
      */
     InputTensors inputTensorPointers(const IOperationMeta& operation,
                                      CUDA::DevicePointer<void*> mutableBufferPtr) const;
@@ -60,7 +60,7 @@ class MemoryManager {
      * @param[in] operation An operation which defines output tensors.
      * @param[in] mutableBufferPtr A memory block based on which mapping is performed.
      * @returns An array of corresponding output tensor pointers.
-     * @throws InferenceEngineException if any of tensor pointers is not found
+     * @throws ov::Exception if any of tensor pointers is not found
      */
     OutputTensors outputTensorPointers(const IOperationMeta& operation,
                                        CUDA::DevicePointer<void*> mutableBufferPtr) const;
@@ -70,7 +70,7 @@ class MemoryManager {
      * @param[in] operation An operation
      * @param[in] mutableBufferPtr A memory block based on which mapping is performed.
      * @returns Work buffer pointers
-     * @throws InferenceEngineException if any of tensor pointers is not found
+     * @throws ov::Exception if any of tensor pointers is not found
      */
     Workbuffers workBuffers(const IOperationExec& operation, CUDA::DevicePointer<void*> mutableBufferPtr) const;
 

@@ -3,9 +3,7 @@
 //
 
 #include "cuda_immutable_memory_model_builder.hpp"
-
-#include <details/ie_exception.hpp>
-
+#include "openvino/core/except.hpp"
 #include "memory_manager/model/details/cuda_memory_utils.hpp"
 
 namespace ov {
@@ -14,9 +12,9 @@ namespace nvidia_gpu {
 ImmutableMemoryModelBuilder::ImmutableMemoryModelBuilder() : end_offset_{0} {}
 
 void ImmutableMemoryModelBuilder::addAllocation(BufferID id, size_t bsize) {
-    IE_ASSERT(bsize > 0);  // Verify that allocation size isn't zero.
+    OPENVINO_ASSERT(bsize > 0, "Allocation size is zero!");  // Verify that allocation size isn't zero.
     auto res = offsets_.emplace(id, end_offset_);
-    IE_ASSERT(res.second);  // Verify that "id" is unique.
+    OPENVINO_ASSERT(res.second, "ID is not unique!");  // Verify that "id" is unique.
     end_offset_ += applyAllignment(bsize);
 }
 

@@ -23,7 +23,7 @@ class ImmutableMemoryModelBuilder {
      * @param [in] id Buffer identifier. Will be used to obtain device side
      * tensor pointer.
      * @param [in] bsize Tensor memory size in bytes.
-     * @throws InferenceEngineException if allocation size is zero or tensor
+     * @throws ov::Exception if allocation size is zero or tensor
      * with specified id is already added.
      */
     void addAllocation(BufferID id, size_t bsize);

@@ -3,18 +3,16 @@
 //
 
 #include "cuda_memory_model_builder.hpp"
-
-#include <details/ie_exception.hpp>
-
+#include "openvino/core/except.hpp"
 #include "memory_manager/model/details/cuda_memory_utils.hpp"
 
 namespace ov {
 namespace nvidia_gpu {
 
 void MemoryModelBuilder::addAllocation(BufferID id, int producerIndex, int lastConsumerIndex, size_t bsize) {
-    IE_ASSERT(bsize > 0);  // Verify that allocation size isn't zero.
+    OPENVINO_ASSERT(bsize > 0, "Allocation size is zero!");  // Verify that allocation size isn't zero.
     auto res = offsets_.emplace(id, 0);
-    IE_ASSERT(res.second);  // Verify that "id" is unique.
+    OPENVINO_ASSERT(res.second, "ID is not unique!");  // Verify that "id" is unique.
     const int64_t aligned_size = static_cast<int64_t>(applyAllignment(bsize));
     boxes_.emplace_back(MemorySolver::Box{producerIndex, lastConsumerIndex, aligned_size, id});
 }

@@ -26,7 +26,7 @@ class MemoryModelBuilder {
      * @param [in] lastConsumerIndex The execution order index of last use. After that
      * data will be released. -1 is a reserved value for "till to end".
      * @param [in] bsize Tensor memory size in bytes.
-     * @throws InferenceEngineException if allocation size is zero or tensor
+     * @throws ov::Exception if allocation size is zero or tensor
      * with specified id is already added.
      */
     void addAllocation(BufferID id, int producerIndex, int lastConsumerIndex, size_t bsize);

@@ -4,13 +4,12 @@
 
 #pragma once
 
-#include <ie_extension.h>
-
 #include <cstdint>
 #include <error.hpp>
 #include <iostream>
 #include <memory>
 #include <optional>
+#include <sstream>
 #include <unordered_map>
 #include <utility>
 
@@ -78,5 +77,11 @@ inline std::ostream& operator<<(std::ostream& s, const TensorID& t) {
     return s;
 }
 
+inline std::string to_string(const TensorID& x) {
+    std::ostringstream ss;
+    ss << x;
+    return ss.str();
+}
+
 }  // namespace nvidia_gpu
 }  // namespace ov
@@ -7,6 +7,7 @@
 #include <vector>
 
 #include "kernels/details/numpy_broadcast_mapper.cuh"
+#include "openvino/core/shape.hpp"
 #include "workbuffer_desc.hpp"
 
 namespace ov {

@@ -109,8 +109,8 @@ void ConvolutionDescriptorsCuDnn::BenchmarkOptimalAlgo(const CUDA::DnnHandle& dn
     for (auto& algo : cudnnAlgos) {
         FindAlgo(dnnHandle);
         algo = algo_perf_;
-        IE_ASSERT(algo_perf_.algo >= 0);
-        IE_ASSERT(algo_perf_.algo < convForwardAlgorithmMaxCount);
+        OPENVINO_ASSERT(algo_perf_.algo >= 0);
+        OPENVINO_ASSERT(algo_perf_.algo < convForwardAlgorithmMaxCount);
         timesCuDNNAlgosSelected[algo_perf_.algo] += 1;
     }
     auto maxAlgoIter = std::max_element(timesCuDNNAlgosSelected.begin(), timesCuDNNAlgosSelected.end());
@@ -340,8 +340,8 @@ void ConvolutionBackpropDataDescriptorCuDnn::BenchmarkOptimalAlgo(const CUDA::Dn
     for (auto& algo : cudnnAlgos) {
         FindAlgo(dnnHandle);
         algo = algo_perf_;
-        IE_ASSERT(algo_perf_.algo >= 0);
-        IE_ASSERT(algo_perf_.algo < convBackwardDataAlgorithmMaxCount);
+        OPENVINO_ASSERT(algo_perf_.algo >= 0);
+        OPENVINO_ASSERT(algo_perf_.algo < convBackwardDataAlgorithmMaxCount);
         timesCuDNNAlgosSelected[algo_perf_.algo] += 1;
     }
     auto maxAlgoIter = std::max_element(timesCuDNNAlgosSelected.begin(), timesCuDNNAlgosSelected.end());

@@ -116,7 +116,7 @@ std::vector<DevicePointer<void*>> SubGraph::getSharedWorkbuffers(const IOperatio
     result.reserve(ids.immutableIds.size());
     for (const auto immutable_id : ids.immutableIds) {
         void* ptr = memory_manager_->immutableWorkbuffers().deviceBufferPtr(immutable_id);
-        IE_ASSERT(ptr != nullptr) << "Workbuffer not found. ID is " << immutable_id;
+        OPENVINO_ASSERT(ptr != nullptr, "Workbuffer not found. ID is " + std::to_string(immutable_id));
         result.emplace_back(ptr);
     }
     return result;

@@ -4,8 +4,6 @@
 
 #include "tensor_iterator.hpp"
 
-#include <cpp/ie_cnn_network.h>
-
 #include <cstdint>
 #include <cuda_op_buffers_extractor.hpp>
 #include <cuda_profiler.hpp>
@@ -43,9 +41,6 @@ TensorIteratorOp::TensorIteratorOp(const CreationContext& context,
         outputs_info_.emplace_back(getTensorByteSize(output), output.get_element_type(), output.get_shape());
     }
 
-    // Get body topology from ngraph func1tion
-    InferenceEngine::CNNNetwork body_network(op.get_body());
-
     // Setup input_primitive_maps/ output_primitive_maps and back_edges
     const auto& loop_input_descs = op.get_input_descriptions();
     const auto& loop_output_descs = op.get_output_descriptions();