Dynamic plugin reshape blob (#6969)

* Added nGraph as a public dependency * Fixed Windows warning * Fixed CMake * Fixed constant op * Fixed typo * Added reshape to PartialShape to CNNNetwork * Added SetShape to InferRequest * Enable support of DynamicShape in IE Data * Add support of dynamic shapes to template plugin * Fixed build * Enable support dynamic rank * Fixed test for dynamic rank * Fixed some tests * Fix preprocess tests * Fixed SetBlob * Fixed code style * Add more tests * Fixed accuracy tests * Fixed documentation * Added tests for custom operation * Added new tests * WIP: move setShape from infer request to Blob * Returned isApplicable check back * Removed obsolete tests for InferRequest::SetShape and add new test for Blob::setShape (a trivial one) * Fixed artifacts * Break code style * Revert "Break code style" This reverts commit 71ee638. * Added -j8 for fix_all * Applied code style fixes * Adde doxygen items * Fixed style * Applied codestyle patch * Reverted unrelevant commit with template extension * Fixed cmake file for shared func tests (pick from master) * Revert all changes in template_extension * Deleted some old stuff that commented and won't be used * Fixed wrong exception throwing * Code style fix * Fixed preprocessing part * Fixed incorrect blob reshape in GetBlob * Deleted incorrect assert in GAPI that prevents passing some tests in Debug * Fixed issues identified during review * Removed SetShape, replace getLayoutByDims by getLayoutByRank and removed problematic modification from IE preprocessing * Fixed comments * Removed obsolete setShape * [VPU] Fixed allocating dynamic blobs in myriad_infer_request * Fixed comments * Fixed CNNNgraphImpl and comments Co-authored-by: Ilya Churaev <ilya.churaev@intel.com> Co-authored-by: Polina <polina.brzezinskaya@intel.com>
openvinotoolkit · Sep 1, 2021 · bfae017 · bfae017
1 parent 26eea3d
commit bfae017
Show file tree

Hide file tree

Showing 22 changed files with 1,625 additions and 78 deletions.
diff --git a/docs/template_plugin/src/template_infer_request.cpp b/docs/template_plugin/src/template_infer_request.cpp
@@ -4,6 +4,9 @@
 
 #include "template_infer_request.hpp"
 
+#include <debug.h>
+#include <ie_compound_blob.h>
+
 #include <algorithm>
 #include <map>
 #include <memory>
@@ -45,8 +48,6 @@ TemplateInferRequest::TemplateInferRequest(const InferenceEngine::InputsDataMap&
     };
 
     _executable = _executableNetwork->_plugin->_backend->compile(_executableNetwork->_function);
-    _parameters = _executableNetwork->_function->get_parameters();
-    _results = _executableNetwork->_function->get_results();
 
     allocateDeviceBuffers();
     allocateBlobs();
@@ -65,35 +66,51 @@ void TemplateInferRequest::allocateDeviceBuffers() {
     _outputTensors.resize(_networkOutputs.size());
 }
 
+template <typename BlobData, typename GetNetworkPrecisionF>
+static void AllocateImplSingle(BlobMap& blobMap,
+                               BlobMap& networkBlobMap,
+                               const BlobData& blobData,
+                               GetNetworkPrecisionF&& GetNetworkPrecision,
+                               const SizeVector& dims) {
+    const auto& precision = blobData.second->getTensorDesc().getPrecision();
+    auto layout = blobData.second->getTensorDesc().getLayout();
+    if (dims.size() > 0 && layout == InferenceEngine::Layout::SCALAR) {
+        layout = InferenceEngine::Layout::ANY;
+    }
+    const auto deviceLayout = TensorDesc::getLayoutByDims(dims);
+    Blob::Ptr blob;
+    blob = make_blob_with_precision({precision, dims, layout});
+    blob->allocate();
+    blobMap[blobData.first] = blob;
+
+    auto networkPresion = InferenceEngine::details::convertPrecision(GetNetworkPrecision(blobData.first));
+    Blob::Ptr networkBlob;
+    if (precision == networkPresion && layout == deviceLayout) {
+        networkBlob = blob;
+    } else {
+        networkBlob = make_blob_with_precision({networkPresion, dims, deviceLayout});
+        networkBlob->allocate();
+    }
+    networkBlobMap[blobData.first] = networkBlob;
+}
+
 template <typename BlobDataMap, typename GetNetworkPrecisionF>
 static void AllocateImpl(const BlobDataMap& userDataMap,
                          BlobMap& userBlobMap,
                          BlobMap& deviceBlobMap,
                          GetNetworkPrecisionF&& GetNetworkPrecision,
                          bool isInputBlob = true) {
-    for (auto&& userData : userDataMap) {
-        const auto& dims = userData.second->getTensorDesc().getDims();
-        const auto deviceLayout = TensorDesc::getLayoutByDims(dims);
-        const auto userPrecision = userData.second->getTensorDesc().getPrecision();
-        const auto userLayout = userData.second->getTensorDesc().getLayout();
-
-        const auto networkPrecision = InferenceEngine::details::convertPrecision(GetNetworkPrecision(userData.first));
-        Blob::Ptr userBlob = make_blob_with_precision({userPrecision, dims, userLayout});
-        userBlob->allocate();
-        userBlobMap[userData.first] = userBlob;
-
-        Blob::Ptr deviceBlob;
-        if (userPrecision == networkPrecision && userLayout == deviceLayout) {
-            deviceBlob = userBlob;
+    for (const auto& userData : userDataMap) {
+        auto partialShape = userData.second->getPartialShape();
+        SizeVector dims;
+        if (partialShape.is_static()) {
+            dims = userData.second->getTensorDesc().getDims();
+        } else if (partialShape.rank().is_static()) {
+            dims = SizeVector(partialShape.rank().get_length(), 0);
         } else {
-            if (userLayout != deviceLayout && !isInputBlob) {
-                IE_THROW(NotImplemented) << "Template Plugin: does not support setLayout for outputs";
-            }
-            deviceBlob = make_blob_with_precision({networkPrecision, dims, deviceLayout});
-            deviceBlob->allocate();
+            dims = SizeVector{0};
         }
-
-        deviceBlobMap[userData.first] = deviceBlob;
+        AllocateImplSingle(userBlobMap, deviceBlobMap, userData, GetNetworkPrecision, dims);
     }
 }
 
@@ -239,8 +256,8 @@ void TemplateInferRequest::inferPreprocess() {
     IInferRequestInternal::execDataPreprocessing(_deviceInputs);
     for (auto&& networkInput : _deviceInputs) {
         auto index = _executableNetwork->_inputIndex[networkInput.first];
-        const auto& parameter = _parameters[index];
-        const auto& parameterShape = parameter->get_shape();
+        const auto& parameter = _executableNetwork->_function->get_parameters()[index];
+        auto parameterShape = networkInput.second->getTensorDesc().getDims();
         const auto& parameterType = parameter->get_element_type();
         _inputTensors[index] = _executableNetwork->_plugin->_backend->create_tensor(
             parameterType,
@@ -254,7 +271,11 @@ void TemplateInferRequest::inferPreprocess() {
         if (outputBlob->getTensorDesc().getPrecision() == networkOutput->getTensorDesc().getPrecision()) {
             networkOutput = outputBlob;
         }
-        const auto& result = _results[index];
+        const auto& result = _executableNetwork->_function->get_results()[index];
+        if (result->get_output_partial_shape(0).is_dynamic()) {
+            _outputTensors[index] = _executableNetwork->_plugin->_backend->create_tensor();
+            continue;
+        }
         const auto& resultShape = result->get_shape();
         const auto& resultType = result->get_element_type();
         _outputTensors[index] = _executableNetwork->_plugin->_backend->create_tensor(
@@ -287,19 +308,190 @@ void TemplateInferRequest::waitPipeline() {
 void TemplateInferRequest::inferPostprocess() {
     OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin, _profilingTask[Postprocess]);
     auto start = Time::now();
-    for (auto&& output : _outputs) {
-        auto outputBlob = output.second;
+    for (auto&& output : _networkOutputs) {
+        auto index = _executableNetwork->_outputIndex[output.first];
+        const auto& result = _executableNetwork->_function->get_results()[index];
+        if (result->get_output_partial_shape(0).is_dynamic()) {
+            // Touch blob to allocate it
+            Blob::Ptr blob;
+            GetBlob(output.first);
+        }
+        auto outputBlob = _outputs.at(output.first);
         auto networkOutput = _networkOutputBlobs[output.first];
-        // perform precision conversion of network output's precision and computational
-        // graph output's precision are different
         if (outputBlob->getTensorDesc().getPrecision() != networkOutput->getTensorDesc().getPrecision()) {
             blobCopy(networkOutput, outputBlob);
+        } else if (result->get_output_partial_shape(0).is_dynamic()) {
+            auto tensor = _outputTensors[_executableNetwork->_outputIndex.at(output.first)];
+            tensor->read(InferenceEngine::as<InferenceEngine::MemoryBlob>(outputBlob)->wmap().as<char*>(),
+                         tensor->get_size_in_bytes());
         }
     }
     _durations[Postprocess] = Time::now() - start;
 }
 // ! [infer_request:infer_postprocess]
 
+// ! [infer_request:get_blob]
+InferenceEngine::Blob::Ptr TemplateInferRequest::GetBlob(const std::string& name) {
+    OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin, "GetBlob");
+    InputInfo::Ptr foundInput;
+    DataPtr foundOutput;
+    Blob::Ptr data;
+    const SizeVector oneVector = {1};
+    if (findInputAndOutputBlobByName(name, foundInput, foundOutput)) {
+        // ROI blob is returned only if it was set previously. Otherwise default blob is returned.
+        auto it = _preProcData.find(name);
+        if (it != _preProcData.end()) {
+            data = it->second->getRoiBlob();
+        } else {
+            data = _inputs[name];
+            SizeVector dims;
+            if (!data) {
+                auto&& parameters = _executableNetwork->_function->get_parameters();
+                const auto& pshape = parameters.at(_executableNetwork->_inputIndex.at(name))->get_partial_shape();
+                dims = pshape.is_dynamic() ? SizeVector({0}) : pshape.get_shape();
+                AllocateImplSingle(
+                    _inputs,
+                    _deviceInputs,
+                    *_networkInputs.find(name),
+                    [&](const std::string& blobName) {
+                        return parameters.at(_executableNetwork->_inputIndex.at(blobName))->get_element_type();
+                    },
+                    dims);
+                data = _inputs[name];
+            } else {
+                dims = data->getTensorDesc().getDims();
+            }
+            checkBlob(data, name, true, foundInput->getTensorDesc().getLayout() != SCALAR ? dims : oneVector);
+            auto& devBlob = _deviceInputs[name];
+            if (preProcessingRequired(foundInput, data, devBlob)) {
+                // if no devBlob, performs inplace
+                addInputPreProcessingFor(name, data, devBlob ? devBlob : _inputs[name]);
+            }
+        }
+    } else {
+        data = _outputs[name];
+        SizeVector dims;
+        if (!foundOutput->isDynamic()) {
+            dims = foundOutput->getTensorDesc().getDims();
+        } else if (_outputTensors[_executableNetwork->_outputIndex.at(name)]->get_partial_shape().is_static()) {
+            dims = _outputTensors[_executableNetwork->_outputIndex.at(name)]->get_shape();
+        } else {
+            IE_THROW() << "Output blob dimensions are not all known for output name " << name
+                       << " with partial shape: " << foundOutput->getPartialShape();
+        }
+
+        if (data) {
+            if (data->getTensorDesc().getDims() != dims) {
+                // TODO: implement something smart here instead of raw re-allocation
+                data.reset();
+            }
+        }
+
+        if (!data) {
+            auto&& results = _executableNetwork->_function->get_results();
+            AllocateImplSingle(
+                _outputs,
+                _networkOutputBlobs,
+                *_networkOutputs.find(name),
+                [&](const std::string& blobName) {
+                    return results.at(_executableNetwork->_outputIndex.at(blobName))->get_element_type();
+                },
+                dims);
+            data = _outputs[name];
+        }
+        checkBlob(data, name, false, foundOutput->getTensorDesc().getLayout() != SCALAR ? dims : oneVector);
+    }
+    return data;
+}
+// ! [infer_request:get_blob]
+
+// ! [infer_request:set_blob]
+void TemplateInferRequest::SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr& userBlob) {
+    OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin, "SetBlob");
+    if (name.empty()) {
+        IE_THROW(NotFound) << "Failed to set blob with empty name";
+    }
+    if (!userBlob)
+        IE_THROW(NotAllocated) << "Failed to set empty blob with name: \'" << name << "\'";
+    const bool compoundBlobPassed = userBlob->is<CompoundBlob>();
+    const bool remoteBlobPassed = userBlob->is<RemoteBlob>();
+    if (!compoundBlobPassed && !remoteBlobPassed && userBlob->buffer() == nullptr)
+        IE_THROW(NotAllocated) << "Input data was not allocated. Input name: \'" << name << "\'";
+    if (userBlob->size() == 0) {
+        IE_THROW() << "Input data is empty. Input name: \'" << name << "\'";
+    }
+
+    InputInfo::Ptr foundInput;
+    DataPtr foundOutput;
+    size_t dataSize = userBlob->size();
+    if (findInputAndOutputBlobByName(name, foundInput, foundOutput)) {
+        // ilavreno: the condition below is obsolete, but we need an exact list of precisions
+        // which are supports by G-API preprocessing
+        if (foundInput->getPrecision() != userBlob->getTensorDesc().getPrecision()) {
+            IE_THROW(ParameterMismatch)
+                << "Failed to set Blob with precision not corresponding to user input precision";
+        }
+
+        auto& devBlob = _deviceInputs[name];
+        auto usrDims = userBlob->getTensorDesc().getDims();
+        auto usrLayout = userBlob->getTensorDesc().getLayout();
+        auto devDims = devBlob->getTensorDesc().getDims();
+        auto devLayout = devBlob->getTensorDesc().getLayout();
+        auto devPrecision = devBlob->getTensorDesc().getPrecision();
+        if (foundInput->getInputData()->isDynamic() && (devDims != usrDims || devLayout != usrLayout)) {
+            devBlob = make_blob_with_precision({devPrecision, usrDims, TensorDesc::getLayoutByDims(usrDims)});
+            devBlob->allocate();
+            _deviceInputs[name] = devBlob;
+        }
+        const bool preProcRequired = preProcessingRequired(foundInput, userBlob, devBlob);
+        if (compoundBlobPassed && !preProcRequired) {
+            IE_THROW(NotImplemented) << "cannot set compound blob: supported only for input pre-processing";
+        }
+
+        if (preProcRequired) {
+            addInputPreProcessingFor(name, userBlob, devBlob ? devBlob : _inputs[name]);
+        } else {
+            size_t inputSize = devBlob->getTensorDesc().getLayout() != InferenceEngine::Layout::SCALAR
+                                   ? InferenceEngine::details::product(devBlob->getTensorDesc().getDims())
+                                   : 1;
+            if (dataSize != inputSize) {
+                IE_THROW() << "Input blob size is not equal network input size (" << dataSize << "!=" << inputSize
+                           << ").";
+            }
+            _inputs[name] = userBlob;
+            devBlob = userBlob;
+        }
+    } else {
+        if (compoundBlobPassed) {
+            IE_THROW(NotImplemented) << "cannot set compound blob: supported only for input pre-processing";
+        }
+        auto& devBlob = _networkOutputBlobs[name];
+        auto usrDims = userBlob->getTensorDesc().getDims();
+        auto usrLayout = userBlob->getTensorDesc().getLayout();
+        auto devDims = devBlob->getTensorDesc().getDims();
+        auto devLayout = devBlob->getTensorDesc().getLayout();
+        auto devPrecision = devBlob->getTensorDesc().getPrecision();
+        if (foundOutput->isDynamic() && (devDims != usrDims || devLayout != usrLayout)) {
+            devBlob = make_blob_with_precision({devPrecision, usrDims, TensorDesc::getLayoutByDims(usrDims)});
+            devBlob->allocate();
+            _networkOutputBlobs[name] = devBlob;
+        }
+        size_t outputSize = devBlob->getTensorDesc().getLayout() != InferenceEngine::Layout::SCALAR
+                                ? details::product(devBlob->getTensorDesc().getDims())
+                                : 1;
+        if (dataSize != outputSize) {
+            IE_THROW() << "Output blob size is not equal network output size (" << dataSize << "!=" << outputSize
+                       << ").";
+        }
+        if (foundOutput->getPrecision() != userBlob->getTensorDesc().getPrecision()) {
+            IE_THROW(ParameterMismatch)
+                << "Failed to set Blob with precision not corresponding to user output precision";
+        }
+        _outputs[name] = userBlob;
+    }
+}
+// ! [infer_request:set_blob]
+
 // ! [infer_request:get_performance_counts]
 std::map<std::string, InferenceEngineProfileInfo> TemplateInferRequest::GetPerformanceCounts() const {
     std::map<std::string, InferenceEngineProfileInfo> perfMap;

diff --git a/docs/template_plugin/src/template_infer_request.hpp b/docs/template_plugin/src/template_infer_request.hpp
@@ -40,6 +40,9 @@ class TemplateInferRequest : public InferenceEngine::IInferRequestInternal {
     void waitPipeline();
     void inferPostprocess();
 
+    InferenceEngine::Blob::Ptr GetBlob(const std::string& name) override;
+    void SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr& userBlob) override;
+
 private:
     void allocateDeviceBuffers();
     void allocateBlobs();
@@ -52,8 +55,6 @@ class TemplateInferRequest : public InferenceEngine::IInferRequestInternal {
     std::array<std::chrono::duration<float, std::micro>, numOfStages> _durations;
 
     InferenceEngine::BlobMap _networkOutputBlobs;
-    ngraph::ParameterVector _parameters;
-    ngraph::ResultVector _results;
 
     std::vector<std::shared_ptr<ngraph::runtime::Tensor>> _inputTensors;
     std::vector<std::shared_ptr<ngraph::runtime::Tensor>> _outputTensors;

diff --git a/...emplate_plugin/tests/functional/shared_tests_instances/behavior/infer_request_dynamic.cpp b/...emplate_plugin/tests/functional/shared_tests_instances/behavior/infer_request_dynamic.cpp
@@ -0,0 +1,30 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "behavior/infer_request_dynamic.hpp"
+
+using namespace BehaviorTestsDefinitions;
+
+namespace {
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+    InferenceEngine::Precision::FP32,
+    InferenceEngine::Precision::FP16
+};
+
+const std::vector<std::map<std::string, std::string>> configs = {
+    {}
+};
+
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTests, InferRequestDynamicTests,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(CommonTestUtils::DEVICE_TEMPLATE),
+                                ::testing::ValuesIn(configs)),
+                        InferRequestDynamicTests::getTestCaseName);
+
+}  // namespace
+
diff --git a/inference-engine/src/inference_engine/include/ie/cpp/ie_cnn_network.h b/inference-engine/src/inference_engine/include/ie/cpp/ie_cnn_network.h
@@ -182,8 +182,20 @@ class INFERENCE_ENGINE_API_CLASS(CNNNetwork) {
      * @param inputShapes A map of pairs: name of corresponding data and its dimension.
      */
     void reshape(const ICNNNetwork::InputShapes& inputShapes);
+
+    /**
+     * @brief Run shape inference with new input shapes for the network
+     * @param inputShapes A map of pairs: name of corresponding data and its dimension.
+     */
+    void reshape(const std::initializer_list<ICNNNetwork::InputShapes::value_type>& inputShapes);
     IE_SUPPRESS_DEPRECATED_END
 
+    /**
+     * @brief Run shape inference with new input partial shapes for the network
+     * @param inputShapes A map of pairs: name of corresponding data and its dimension.
+     */
+    void reshape(const std::map<std::string, ngraph::PartialShape>& inputShapes);
+
     /**
      * @brief Serialize network to IR and weights files.
      *