forked from google-ai-edge/mediapipe
-
Notifications
You must be signed in to change notification settings - Fork 6
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Enable no-copy API in Model API OVMS Adapter to optimize Embedding endpoint #92
Merged
Merged
Changes from 11 commits
Commits
Show all changes
12 commits
Select commit
Hold shift + click to select a range
af21831
Use move
atobiszei c76640a
Cleanup
atobiszei ea42f22
Cleanup
atobiszei da73147
Bump Model API sha
atobiszei e4b6b5e
Review fixes
atobiszei 2d9f997
Fix Model API & OVMS
atobiszei e3b15ff
Review fixes
atobiszei bf2123e
Make model_api dep loadable
atobiszei f005f1a
Bazel cache
atobiszei 4fae66e
Update commit sha to merged change
atobiszei 9ce4014
Fix
atobiszei e7e2787
Remove commented out line
atobiszei File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -70,31 +70,109 @@ OVMSInferenceAdapter::~OVMSInferenceAdapter() { | |
LOG(INFO) << "OVMSAdapter destr"; | ||
} | ||
|
||
InferenceOutput OVMSInferenceAdapter::infer(const InferenceInput& input) { | ||
inline std::vector<int64_t> getShapeAcceptableByCAPI(const ov::Shape& shape) { | ||
if (std::any_of(shape.begin(), shape.end(), [](size_t dim) { | ||
return dim > std::numeric_limits<int64_t>::max();})) { | ||
throw std::runtime_error("Cannot use C-API with dimension size greater than int64_t max value"); | ||
} | ||
return std::vector<int64_t>{shape.begin(), shape.end()}; | ||
} | ||
|
||
void OVMSInferenceAdapter::infer(const InferenceInput& input, InferenceOutput& output) { | ||
///////////////////// | ||
// PREPARE REQUEST | ||
///////////////////// | ||
OVMS_InferenceRequest* request{nullptr}; | ||
ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestNew(&request, cserver, servableName.c_str(), servableVersion)); | ||
CREATE_GUARD(requestGuard, OVMS_InferenceRequest, request); | ||
|
||
InferenceOutput output; | ||
|
||
OVMS_Status* status{nullptr}; | ||
std::vector<std::string> outputsSet; | ||
// PREPARE EACH INPUT | ||
// extract single tensor | ||
for (const auto& [name, input_tensor] : input) { | ||
const char* realInputName = name.c_str(); | ||
const auto& ovinputShape = input_tensor.get_shape(); | ||
if (std::any_of(ovinputShape.begin(), ovinputShape.end(), [](size_t dim) { | ||
return dim > std::numeric_limits<int64_t>::max();})) { | ||
throw std::runtime_error("Cannot use C-API with dimension size greater than int64_t max value"); | ||
const char* realName = name.c_str(); | ||
const auto& ovShape = input_tensor.get_shape(); | ||
std::vector<int64_t> capiShape = getShapeAcceptableByCAPI(ovShape); | ||
OVMS_DataType inputDataType = OVPrecision2CAPI(input_tensor.get_element_type()); | ||
ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestAddInput(request, realName, inputDataType, capiShape.data(), capiShape.size())); | ||
const uint32_t NOT_USED_NUM = 0; | ||
ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestInputSetData(request, | ||
realName, | ||
reinterpret_cast<void*>(input_tensor.data()), | ||
input_tensor.get_byte_size(), | ||
OVMS_BUFFERTYPE_CPU, | ||
NOT_USED_NUM)); | ||
} | ||
for (const auto& [name, output_tensor] : output) { | ||
outputsSet.emplace_back(name); | ||
const char* realName = name.c_str(); | ||
const auto& ovShape = output_tensor.get_shape(); | ||
std::vector<int64_t> capiShape = getShapeAcceptableByCAPI(ovShape); | ||
OVMS_DataType inputDataType = OVPrecision2CAPI(output_tensor.get_element_type()); | ||
ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestAddOutput(request, realName, inputDataType, capiShape.data(), capiShape.size())); | ||
const uint32_t NOT_USED_NUM = 0; | ||
ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestOutputSetData(request, | ||
realName, | ||
reinterpret_cast<void*>(output_tensor.data()), | ||
output_tensor.get_byte_size(), | ||
OVMS_BUFFERTYPE_CPU, | ||
NOT_USED_NUM)); | ||
|
||
} | ||
#if (OVMS_DUMP_TO_FILE == 1) | ||
dumpOvTensorInput(input,"input"); | ||
#endif | ||
////////////////// | ||
// INFERENCE | ||
////////////////// | ||
OVMS_InferenceResponse* response = nullptr; | ||
ASSERT_CAPI_STATUS_NULL(OVMS_Inference(cserver, request, &response)); | ||
CREATE_GUARD(responseGuard, OVMS_InferenceResponse, response); | ||
uint32_t outputCount = 42; | ||
ASSERT_CAPI_STATUS_NULL(OVMS_InferenceResponseOutputCount(response, &outputCount)); | ||
uint32_t parameterCount = 42; | ||
ASSERT_CAPI_STATUS_NULL(OVMS_InferenceResponseParameterCount(response, ¶meterCount)); | ||
const void* voutputData; | ||
size_t bytesize = 42; | ||
OVMS_DataType datatype = (OVMS_DataType)199; | ||
const int64_t* shape{nullptr}; | ||
size_t dimCount = 42; | ||
OVMS_BufferType bufferType = (OVMS_BufferType)199; | ||
uint32_t deviceId = 42; | ||
const char* outputName{nullptr}; | ||
for (size_t i = 0; i < outputCount; ++i) { | ||
ASSERT_CAPI_STATUS_NULL(OVMS_InferenceResponseOutput(response, i, &outputName, &datatype, &shape, &dimCount, &voutputData, &bytesize, &bufferType, &deviceId)); | ||
if (std::find(outputsSet.begin(), outputsSet.end(), outputName) == outputsSet.end()) { | ||
output.emplace(outputName, std::move(makeOvTensor(datatype, shape, dimCount, voutputData, bytesize))); | ||
} else { | ||
//output.emplace(outputName, input.at(outputName)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This line should probably be removed |
||
} | ||
std::vector<int64_t> inputShape{ovinputShape.begin(), ovinputShape.end()}; | ||
} | ||
#if (OVMS_DUMP_TO_FILE == 1) | ||
dumpOvTensorInput(output,"output"); | ||
#endif | ||
} | ||
InferenceOutput OVMSInferenceAdapter::infer(const InferenceInput& input) { | ||
///////////////////// | ||
// PREPARE REQUEST | ||
///////////////////// | ||
OVMS_InferenceRequest* request{nullptr}; | ||
ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestNew(&request, cserver, servableName.c_str(), servableVersion)); | ||
CREATE_GUARD(requestGuard, OVMS_InferenceRequest, request); | ||
|
||
InferenceOutput output; | ||
OVMS_Status* status{nullptr}; | ||
// PREPARE EACH INPUT | ||
for (const auto& [name, input_tensor] : input) { | ||
const char* realName = name.c_str(); | ||
const auto& ovShape = input_tensor.get_shape(); | ||
std::vector<int64_t> capiShape = getShapeAcceptableByCAPI(ovShape); | ||
OVMS_DataType inputDataType = OVPrecision2CAPI(input_tensor.get_element_type()); | ||
ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestAddInput(request, realInputName, inputDataType, inputShape.data(), inputShape.size())); | ||
ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestAddInput(request, realName, inputDataType, capiShape.data(), capiShape.size())); | ||
const uint32_t NOT_USED_NUM = 0; | ||
ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestInputSetData(request, | ||
realInputName, | ||
realName, | ||
reinterpret_cast<void*>(input_tensor.data()), | ||
input_tensor.get_byte_size(), | ||
OVMS_BUFFERTYPE_CPU, | ||
|
@@ -135,7 +213,7 @@ InferenceOutput OVMSInferenceAdapter::infer(const InferenceInput& input) { | |
const char* outputName{nullptr}; | ||
for (size_t i = 0; i < outputCount; ++i) { | ||
ASSERT_CAPI_STATUS_NULL(OVMS_InferenceResponseOutput(response, i, &outputName, &datatype, &shape, &dimCount, &voutputData, &bytesize, &bufferType, &deviceId)); | ||
output[outputName] = makeOvTensor(datatype, shape, dimCount, voutputData, bytesize); | ||
output.emplace(outputName, std::move(makeOvTensor(datatype, shape, dimCount, voutputData, bytesize))); | ||
} | ||
#if (OVMS_DUMP_TO_FILE == 1) | ||
dumpOvTensorInput(output,"output"); | ||
|
@@ -169,16 +247,32 @@ void OVMSInferenceAdapter::loadModel(const std::shared_ptr<const ov::Model>& mod | |
inputMinMax.second.emplace_back(shapeMax[i]); | ||
} | ||
this->inShapesMinMaxes.insert({tensorName, std::move(inputMinMax)}); | ||
this->inputDatatypes.insert({tensorName, CAPI2OVPrecision(datatype)}); | ||
} | ||
for (id = 0; id < outputCount; ++id) { | ||
ASSERT_CAPI_STATUS_NULL(OVMS_ServableMetadataOutput(servableMetadata, id, &tensorName, &datatype, &dimCount, &shapeMin, &shapeMax)); | ||
outputNames.emplace_back(tensorName); | ||
shape_min_max_t outputMinMax; | ||
for (size_t i = 0; i < dimCount; ++i) { | ||
outputMinMax.first.emplace_back(shapeMin[i]); | ||
outputMinMax.second.emplace_back(shapeMax[i]); | ||
} | ||
this->outShapesMinMaxes.insert({tensorName, std::move(outputMinMax)}); | ||
this->outputDatatypes.insert({tensorName, CAPI2OVPrecision(datatype)}); | ||
} | ||
const ov::AnyMap* servableMetadataRtInfo; | ||
ASSERT_CAPI_STATUS_NULL(OVMS_ServableMetadataInfo(servableMetadata, reinterpret_cast<const void**>(&servableMetadataRtInfo))); | ||
this->modelConfig = *servableMetadataRtInfo; | ||
} | ||
|
||
ov::element::Type_t OVMSInferenceAdapter::getInputDatatype(const std::string& inputName) const { | ||
return inputDatatypes.at(inputName); | ||
} | ||
|
||
ov::element::Type_t OVMSInferenceAdapter::getOutputDatatype(const std::string& outputName) const { | ||
return outputDatatypes.at(outputName); | ||
} | ||
|
||
ov::PartialShape OVMSInferenceAdapter::getInputShape(const std::string& inputName) const { | ||
auto it = inShapesMinMaxes.find(inputName); | ||
if (it == inShapesMinMaxes.end()) { | ||
|
@@ -194,6 +288,21 @@ ov::PartialShape OVMSInferenceAdapter::getInputShape(const std::string& inputNam | |
} | ||
return ovShape; | ||
} | ||
ov::PartialShape OVMSInferenceAdapter::getOutputShape(const std::string& outputName) const { | ||
auto it = outShapesMinMaxes.find(outputName); | ||
if (it == outShapesMinMaxes.end()) { | ||
LOG(INFO) << "Could not find output:" << outputName; | ||
throw std::runtime_error(std::string("Adapter could not find output:") + outputName); | ||
} | ||
|
||
ov::PartialShape ovShape; | ||
const auto& [minBorder, maxBorder] = it->second; | ||
ovShape.reserve(minBorder.size()); | ||
for (size_t i = 0; i < minBorder.size(); ++i) { | ||
ovShape.emplace_back(ov::Dimension{minBorder[i], maxBorder[i]}); | ||
} | ||
return ovShape; | ||
} | ||
|
||
std::vector<std::string> OVMSInferenceAdapter::getInputNames() const { return inputNames; } | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
does it mean that we only get selected outputs?