Skip to content

Commit

Permalink
Dynamic plugin reshape blob (#6969)
Browse files Browse the repository at this point in the history
* Added nGraph as a public dependency

* Fixed Windows warning

* Fixed CMake

* Fixed constant op

* Fixed typo

* Added reshape to PartialShape to CNNNetwork

* Added SetShape to InferRequest

* Enable support of DynamicShape in IE Data

* Add support of dynamic shapes to template plugin

* Fixed build

* Enable support dynamic rank

* Fixed test for dynamic rank

* Fixed some tests

* Fix preprocess tests

* Fixed SetBlob

* Fixed code style

* Add more tests

* Fixed accuracy tests

* Fixed documentation

* Added tests for custom operation

* Added new tests

* WIP: move setShape from infer request to Blob

* Returned isApplicable check back

* Removed obsolete tests for InferRequest::SetShape and add new test for Blob::setShape (a trivial one)

* Fixed artifacts

* Break code style

* Revert "Break code style"

This reverts commit 71ee638.

* Added -j8 for fix_all

* Applied code style fixes

* Adde doxygen items

* Fixed style

* Applied codestyle patch

* Reverted unrelevant commit with template extension

* Fixed cmake file for shared func tests (pick from master)

* Revert all changes in template_extension

* Deleted some old stuff that commented and won't be used

* Fixed wrong exception throwing

* Code style fix

* Fixed preprocessing part

* Fixed incorrect blob reshape in GetBlob

* Deleted incorrect assert in GAPI that prevents passing some tests in Debug

* Fixed issues identified during review

* Removed SetShape, replace getLayoutByDims by getLayoutByRank and removed problematic modification from IE preprocessing

* Fixed comments

* Removed obsolete setShape

* [VPU] Fixed allocating dynamic blobs in myriad_infer_request

* Fixed comments

* Fixed CNNNgraphImpl and comments

Co-authored-by: Ilya Churaev <ilya.churaev@intel.com>
Co-authored-by: Polina <polina.brzezinskaya@intel.com>
  • Loading branch information
3 people authored Sep 1, 2021
1 parent 26eea3d commit bfae017
Show file tree
Hide file tree
Showing 22 changed files with 1,625 additions and 78 deletions.
252 changes: 222 additions & 30 deletions docs/template_plugin/src/template_infer_request.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

#include "template_infer_request.hpp"

#include <debug.h>
#include <ie_compound_blob.h>

#include <algorithm>
#include <map>
#include <memory>
Expand Down Expand Up @@ -45,8 +48,6 @@ TemplateInferRequest::TemplateInferRequest(const InferenceEngine::InputsDataMap&
};

_executable = _executableNetwork->_plugin->_backend->compile(_executableNetwork->_function);
_parameters = _executableNetwork->_function->get_parameters();
_results = _executableNetwork->_function->get_results();

allocateDeviceBuffers();
allocateBlobs();
Expand All @@ -65,35 +66,51 @@ void TemplateInferRequest::allocateDeviceBuffers() {
_outputTensors.resize(_networkOutputs.size());
}

template <typename BlobData, typename GetNetworkPrecisionF>
static void AllocateImplSingle(BlobMap& blobMap,
BlobMap& networkBlobMap,
const BlobData& blobData,
GetNetworkPrecisionF&& GetNetworkPrecision,
const SizeVector& dims) {
const auto& precision = blobData.second->getTensorDesc().getPrecision();
auto layout = blobData.second->getTensorDesc().getLayout();
if (dims.size() > 0 && layout == InferenceEngine::Layout::SCALAR) {
layout = InferenceEngine::Layout::ANY;
}
const auto deviceLayout = TensorDesc::getLayoutByDims(dims);
Blob::Ptr blob;
blob = make_blob_with_precision({precision, dims, layout});
blob->allocate();
blobMap[blobData.first] = blob;

auto networkPresion = InferenceEngine::details::convertPrecision(GetNetworkPrecision(blobData.first));
Blob::Ptr networkBlob;
if (precision == networkPresion && layout == deviceLayout) {
networkBlob = blob;
} else {
networkBlob = make_blob_with_precision({networkPresion, dims, deviceLayout});
networkBlob->allocate();
}
networkBlobMap[blobData.first] = networkBlob;
}

template <typename BlobDataMap, typename GetNetworkPrecisionF>
static void AllocateImpl(const BlobDataMap& userDataMap,
BlobMap& userBlobMap,
BlobMap& deviceBlobMap,
GetNetworkPrecisionF&& GetNetworkPrecision,
bool isInputBlob = true) {
for (auto&& userData : userDataMap) {
const auto& dims = userData.second->getTensorDesc().getDims();
const auto deviceLayout = TensorDesc::getLayoutByDims(dims);
const auto userPrecision = userData.second->getTensorDesc().getPrecision();
const auto userLayout = userData.second->getTensorDesc().getLayout();

const auto networkPrecision = InferenceEngine::details::convertPrecision(GetNetworkPrecision(userData.first));
Blob::Ptr userBlob = make_blob_with_precision({userPrecision, dims, userLayout});
userBlob->allocate();
userBlobMap[userData.first] = userBlob;

Blob::Ptr deviceBlob;
if (userPrecision == networkPrecision && userLayout == deviceLayout) {
deviceBlob = userBlob;
for (const auto& userData : userDataMap) {
auto partialShape = userData.second->getPartialShape();
SizeVector dims;
if (partialShape.is_static()) {
dims = userData.second->getTensorDesc().getDims();
} else if (partialShape.rank().is_static()) {
dims = SizeVector(partialShape.rank().get_length(), 0);
} else {
if (userLayout != deviceLayout && !isInputBlob) {
IE_THROW(NotImplemented) << "Template Plugin: does not support setLayout for outputs";
}
deviceBlob = make_blob_with_precision({networkPrecision, dims, deviceLayout});
deviceBlob->allocate();
dims = SizeVector{0};
}

deviceBlobMap[userData.first] = deviceBlob;
AllocateImplSingle(userBlobMap, deviceBlobMap, userData, GetNetworkPrecision, dims);
}
}

Expand Down Expand Up @@ -239,8 +256,8 @@ void TemplateInferRequest::inferPreprocess() {
IInferRequestInternal::execDataPreprocessing(_deviceInputs);
for (auto&& networkInput : _deviceInputs) {
auto index = _executableNetwork->_inputIndex[networkInput.first];
const auto& parameter = _parameters[index];
const auto& parameterShape = parameter->get_shape();
const auto& parameter = _executableNetwork->_function->get_parameters()[index];
auto parameterShape = networkInput.second->getTensorDesc().getDims();
const auto& parameterType = parameter->get_element_type();
_inputTensors[index] = _executableNetwork->_plugin->_backend->create_tensor(
parameterType,
Expand All @@ -254,7 +271,11 @@ void TemplateInferRequest::inferPreprocess() {
if (outputBlob->getTensorDesc().getPrecision() == networkOutput->getTensorDesc().getPrecision()) {
networkOutput = outputBlob;
}
const auto& result = _results[index];
const auto& result = _executableNetwork->_function->get_results()[index];
if (result->get_output_partial_shape(0).is_dynamic()) {
_outputTensors[index] = _executableNetwork->_plugin->_backend->create_tensor();
continue;
}
const auto& resultShape = result->get_shape();
const auto& resultType = result->get_element_type();
_outputTensors[index] = _executableNetwork->_plugin->_backend->create_tensor(
Expand Down Expand Up @@ -287,19 +308,190 @@ void TemplateInferRequest::waitPipeline() {
void TemplateInferRequest::inferPostprocess() {
OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin, _profilingTask[Postprocess]);
auto start = Time::now();
for (auto&& output : _outputs) {
auto outputBlob = output.second;
for (auto&& output : _networkOutputs) {
auto index = _executableNetwork->_outputIndex[output.first];
const auto& result = _executableNetwork->_function->get_results()[index];
if (result->get_output_partial_shape(0).is_dynamic()) {
// Touch blob to allocate it
Blob::Ptr blob;
GetBlob(output.first);
}
auto outputBlob = _outputs.at(output.first);
auto networkOutput = _networkOutputBlobs[output.first];
// perform precision conversion of network output's precision and computational
// graph output's precision are different
if (outputBlob->getTensorDesc().getPrecision() != networkOutput->getTensorDesc().getPrecision()) {
blobCopy(networkOutput, outputBlob);
} else if (result->get_output_partial_shape(0).is_dynamic()) {
auto tensor = _outputTensors[_executableNetwork->_outputIndex.at(output.first)];
tensor->read(InferenceEngine::as<InferenceEngine::MemoryBlob>(outputBlob)->wmap().as<char*>(),
tensor->get_size_in_bytes());
}
}
_durations[Postprocess] = Time::now() - start;
}
// ! [infer_request:infer_postprocess]

// ! [infer_request:get_blob]
InferenceEngine::Blob::Ptr TemplateInferRequest::GetBlob(const std::string& name) {
OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin, "GetBlob");
InputInfo::Ptr foundInput;
DataPtr foundOutput;
Blob::Ptr data;
const SizeVector oneVector = {1};
if (findInputAndOutputBlobByName(name, foundInput, foundOutput)) {
// ROI blob is returned only if it was set previously. Otherwise default blob is returned.
auto it = _preProcData.find(name);
if (it != _preProcData.end()) {
data = it->second->getRoiBlob();
} else {
data = _inputs[name];
SizeVector dims;
if (!data) {
auto&& parameters = _executableNetwork->_function->get_parameters();
const auto& pshape = parameters.at(_executableNetwork->_inputIndex.at(name))->get_partial_shape();
dims = pshape.is_dynamic() ? SizeVector({0}) : pshape.get_shape();
AllocateImplSingle(
_inputs,
_deviceInputs,
*_networkInputs.find(name),
[&](const std::string& blobName) {
return parameters.at(_executableNetwork->_inputIndex.at(blobName))->get_element_type();
},
dims);
data = _inputs[name];
} else {
dims = data->getTensorDesc().getDims();
}
checkBlob(data, name, true, foundInput->getTensorDesc().getLayout() != SCALAR ? dims : oneVector);
auto& devBlob = _deviceInputs[name];
if (preProcessingRequired(foundInput, data, devBlob)) {
// if no devBlob, performs inplace
addInputPreProcessingFor(name, data, devBlob ? devBlob : _inputs[name]);
}
}
} else {
data = _outputs[name];
SizeVector dims;
if (!foundOutput->isDynamic()) {
dims = foundOutput->getTensorDesc().getDims();
} else if (_outputTensors[_executableNetwork->_outputIndex.at(name)]->get_partial_shape().is_static()) {
dims = _outputTensors[_executableNetwork->_outputIndex.at(name)]->get_shape();
} else {
IE_THROW() << "Output blob dimensions are not all known for output name " << name
<< " with partial shape: " << foundOutput->getPartialShape();
}

if (data) {
if (data->getTensorDesc().getDims() != dims) {
// TODO: implement something smart here instead of raw re-allocation
data.reset();
}
}

if (!data) {
auto&& results = _executableNetwork->_function->get_results();
AllocateImplSingle(
_outputs,
_networkOutputBlobs,
*_networkOutputs.find(name),
[&](const std::string& blobName) {
return results.at(_executableNetwork->_outputIndex.at(blobName))->get_element_type();
},
dims);
data = _outputs[name];
}
checkBlob(data, name, false, foundOutput->getTensorDesc().getLayout() != SCALAR ? dims : oneVector);
}
return data;
}
// ! [infer_request:get_blob]

// ! [infer_request:set_blob]
void TemplateInferRequest::SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr& userBlob) {
OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin, "SetBlob");
if (name.empty()) {
IE_THROW(NotFound) << "Failed to set blob with empty name";
}
if (!userBlob)
IE_THROW(NotAllocated) << "Failed to set empty blob with name: \'" << name << "\'";
const bool compoundBlobPassed = userBlob->is<CompoundBlob>();
const bool remoteBlobPassed = userBlob->is<RemoteBlob>();
if (!compoundBlobPassed && !remoteBlobPassed && userBlob->buffer() == nullptr)
IE_THROW(NotAllocated) << "Input data was not allocated. Input name: \'" << name << "\'";
if (userBlob->size() == 0) {
IE_THROW() << "Input data is empty. Input name: \'" << name << "\'";
}

InputInfo::Ptr foundInput;
DataPtr foundOutput;
size_t dataSize = userBlob->size();
if (findInputAndOutputBlobByName(name, foundInput, foundOutput)) {
// ilavreno: the condition below is obsolete, but we need an exact list of precisions
// which are supports by G-API preprocessing
if (foundInput->getPrecision() != userBlob->getTensorDesc().getPrecision()) {
IE_THROW(ParameterMismatch)
<< "Failed to set Blob with precision not corresponding to user input precision";
}

auto& devBlob = _deviceInputs[name];
auto usrDims = userBlob->getTensorDesc().getDims();
auto usrLayout = userBlob->getTensorDesc().getLayout();
auto devDims = devBlob->getTensorDesc().getDims();
auto devLayout = devBlob->getTensorDesc().getLayout();
auto devPrecision = devBlob->getTensorDesc().getPrecision();
if (foundInput->getInputData()->isDynamic() && (devDims != usrDims || devLayout != usrLayout)) {
devBlob = make_blob_with_precision({devPrecision, usrDims, TensorDesc::getLayoutByDims(usrDims)});
devBlob->allocate();
_deviceInputs[name] = devBlob;
}
const bool preProcRequired = preProcessingRequired(foundInput, userBlob, devBlob);
if (compoundBlobPassed && !preProcRequired) {
IE_THROW(NotImplemented) << "cannot set compound blob: supported only for input pre-processing";
}

if (preProcRequired) {
addInputPreProcessingFor(name, userBlob, devBlob ? devBlob : _inputs[name]);
} else {
size_t inputSize = devBlob->getTensorDesc().getLayout() != InferenceEngine::Layout::SCALAR
? InferenceEngine::details::product(devBlob->getTensorDesc().getDims())
: 1;
if (dataSize != inputSize) {
IE_THROW() << "Input blob size is not equal network input size (" << dataSize << "!=" << inputSize
<< ").";
}
_inputs[name] = userBlob;
devBlob = userBlob;
}
} else {
if (compoundBlobPassed) {
IE_THROW(NotImplemented) << "cannot set compound blob: supported only for input pre-processing";
}
auto& devBlob = _networkOutputBlobs[name];
auto usrDims = userBlob->getTensorDesc().getDims();
auto usrLayout = userBlob->getTensorDesc().getLayout();
auto devDims = devBlob->getTensorDesc().getDims();
auto devLayout = devBlob->getTensorDesc().getLayout();
auto devPrecision = devBlob->getTensorDesc().getPrecision();
if (foundOutput->isDynamic() && (devDims != usrDims || devLayout != usrLayout)) {
devBlob = make_blob_with_precision({devPrecision, usrDims, TensorDesc::getLayoutByDims(usrDims)});
devBlob->allocate();
_networkOutputBlobs[name] = devBlob;
}
size_t outputSize = devBlob->getTensorDesc().getLayout() != InferenceEngine::Layout::SCALAR
? details::product(devBlob->getTensorDesc().getDims())
: 1;
if (dataSize != outputSize) {
IE_THROW() << "Output blob size is not equal network output size (" << dataSize << "!=" << outputSize
<< ").";
}
if (foundOutput->getPrecision() != userBlob->getTensorDesc().getPrecision()) {
IE_THROW(ParameterMismatch)
<< "Failed to set Blob with precision not corresponding to user output precision";
}
_outputs[name] = userBlob;
}
}
// ! [infer_request:set_blob]

// ! [infer_request:get_performance_counts]
std::map<std::string, InferenceEngineProfileInfo> TemplateInferRequest::GetPerformanceCounts() const {
std::map<std::string, InferenceEngineProfileInfo> perfMap;
Expand Down
5 changes: 3 additions & 2 deletions docs/template_plugin/src/template_infer_request.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ class TemplateInferRequest : public InferenceEngine::IInferRequestInternal {
void waitPipeline();
void inferPostprocess();

InferenceEngine::Blob::Ptr GetBlob(const std::string& name) override;
void SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr& userBlob) override;

private:
void allocateDeviceBuffers();
void allocateBlobs();
Expand All @@ -52,8 +55,6 @@ class TemplateInferRequest : public InferenceEngine::IInferRequestInternal {
std::array<std::chrono::duration<float, std::micro>, numOfStages> _durations;

InferenceEngine::BlobMap _networkOutputBlobs;
ngraph::ParameterVector _parameters;
ngraph::ResultVector _results;

std::vector<std::shared_ptr<ngraph::runtime::Tensor>> _inputTensors;
std::vector<std::shared_ptr<ngraph::runtime::Tensor>> _outputTensors;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include <vector>

#include "behavior/infer_request_dynamic.hpp"

using namespace BehaviorTestsDefinitions;

namespace {

const std::vector<InferenceEngine::Precision> netPrecisions = {
InferenceEngine::Precision::FP32,
InferenceEngine::Precision::FP16
};

const std::vector<std::map<std::string, std::string>> configs = {
{}
};

INSTANTIATE_TEST_CASE_P(smoke_BehaviorTests, InferRequestDynamicTests,
::testing::Combine(
::testing::ValuesIn(netPrecisions),
::testing::Values(CommonTestUtils::DEVICE_TEMPLATE),
::testing::ValuesIn(configs)),
InferRequestDynamicTests::getTestCaseName);

} // namespace

Original file line number Diff line number Diff line change
Expand Up @@ -182,8 +182,20 @@ class INFERENCE_ENGINE_API_CLASS(CNNNetwork) {
* @param inputShapes A map of pairs: name of corresponding data and its dimension.
*/
void reshape(const ICNNNetwork::InputShapes& inputShapes);

/**
* @brief Run shape inference with new input shapes for the network
* @param inputShapes A map of pairs: name of corresponding data and its dimension.
*/
void reshape(const std::initializer_list<ICNNNetwork::InputShapes::value_type>& inputShapes);
IE_SUPPRESS_DEPRECATED_END

/**
* @brief Run shape inference with new input partial shapes for the network
* @param inputShapes A map of pairs: name of corresponding data and its dimension.
*/
void reshape(const std::map<std::string, ngraph::PartialShape>& inputShapes);

/**
* @brief Serialize network to IR and weights files.
*
Expand Down
Loading

0 comments on commit bfae017

Please sign in to comment.