From 955f13267ea921a2ce0794dceeab0f01095c8c82 Mon Sep 17 00:00:00 2001
From: sivanov-work <sergey.ivanov@intel.com>
Date: Thu, 31 Oct 2024 12:21:25 +0000
Subject: [PATCH] Add possibility to assign multiple images to fill in batched
 tensor data

---
 .../tools/common/include/tensor_utils.hpp     |   8 +
 .../tools/common/src/tensor_utils.cpp         |  24 +++
 .../tools/single-image-test/main.cpp          | 148 ++++++++++++++----
 3 files changed, 150 insertions(+), 30 deletions(-)
diff --git a/src/plugins/intel_npu/tools/common/include/tensor_utils.hpp b/src/plugins/intel_npu/tools/common/include/tensor_utils.hpp
index 87b2301a7ae4fb..39034384a0397d 100644
--- a/src/plugins/intel_npu/tools/common/include/tensor_utils.hpp
+++ b/src/plugins/intel_npu/tools/common/include/tensor_utils.hpp
@@ -68,5 +68,13 @@ std::vector<std::vector<float>> parseTensorsAsFP32(const std::map<std::string, o
  * @return The merged batched tensor
  */
 ov::Tensor joinTensors(const std::list<ov::Tensor>& tensors, const ov::Layout& layout);
+
+/**
+ * @brief Split a batche tensor on several non-batched tensors having the same shapes and precisions.
+ *
+ * @param tensors The source non-batched tensors
+ * @return The merged batched tensor
+ */
+std::list<ov::Tensor> splitBatchedTensor(const ov::Tensor &tensor, const ov::Layout& layout, size_t parts);
 }  // namespace utils
 }  // namespace npu
diff --git a/src/plugins/intel_npu/tools/common/src/tensor_utils.cpp b/src/plugins/intel_npu/tools/common/src/tensor_utils.cpp
index 32616b86135243..527ebb05116be6 100644
--- a/src/plugins/intel_npu/tools/common/src/tensor_utils.cpp
+++ b/src/plugins/intel_npu/tools/common/src/tensor_utils.cpp
@@ -492,5 +492,29 @@ ov::Tensor joinTensors(const std::list<ov::Tensor>& tensors, const ov::Layout& l
     }
     return out;
 }
+
+std::list<ov::Tensor> splitBatchedTensor(const ov::Tensor &tensor, const ov::Layout& layout, size_t parts) {
+    if (!parts) {
+        OPENVINO_THROW("Cannot split tensor on parts: ", parts);
+    }
+    auto pivotShape = tensor.get_shape();
+   if (!ov::layout::has_batch(layout)) {
+        OPENVINO_THROW("Cannot split tensor: has no batch_idx in layout", layout.to_string());
+    }
+    auto pivotPrecision = tensor.get_element_type();
+    if (pivotShape[ov::layout::batch_idx(layout)] % parts != 0) {
+        OPENVINO_THROW("Cannot split tensor with batch size: ", pivotShape[ov::layout::batch_idx(layout)], " on: ", parts ," equal tensors");
+    }
+    pivotShape[ov::layout::batch_idx(layout)] /= parts;
+    std::list<ov::Tensor> ret;
+    const auto *inputBuffer = tensor.data<unsigned char>();
+    for (size_t i = 0; i < parts; i ++) {
+        ov::Tensor out(pivotPrecision, pivotShape);
+        memcpy(out.data<unsigned char>(), inputBuffer, out.get_byte_size());
+        inputBuffer += out.get_byte_size();
+        ret.push_back(std::move(out));
+    }
+    return ret;
+}
 }  // namespace utils
 }  // namespace npu
diff --git a/src/plugins/intel_npu/tools/single-image-test/main.cpp b/src/plugins/intel_npu/tools/single-image-test/main.cpp
index 4018982b022ed3..70405231573fef 100644
--- a/src/plugins/intel_npu/tools/single-image-test/main.cpp
+++ b/src/plugins/intel_npu/tools/single-image-test/main.cpp
@@ -171,6 +171,22 @@ std::vector<std::string> splitStringList(const std::string& str, char delim) {
     return out;
 }
 
+template<template<class> class Container, class T>
+std::string to_string(const Container<T>& c) {
+    std::stringstream stream;
+    std::string ret;
+    if (!c.empty()) {
+        stream << "[";
+        for (const auto &elem : c) {
+            stream << elem << ",";
+        }
+        ret = stream.str();
+        ret.pop_back();
+        ret += "]";
+    }
+    return ret;
+}
+
 std::map<std::string, std::string> parseArgMap(std::string argMap) {
     argMap.erase(std::remove_if(argMap.begin(), argMap.end(), ::isspace), argMap.end());
 
@@ -374,7 +390,7 @@ void convertBufferType(OutT* destination, const InT* source, size_t numberOfElem
     });
 }
 
-void cvToOV(const cv::Mat& cvImg, const ov::Tensor& tensor, const ov::Shape& shape, const ov::Layout& layout,
+void cvToOV(const cv::Mat& cvImg, size_t imgIdx, size_t imgNumber, const ov::Tensor& tensor, const ov::Shape& shape, const ov::Layout& layout,
             const std::string& colorFormat) {
     const ov::element::Type& precision = tensor.get_element_type();
 
@@ -437,8 +453,12 @@ void cvToOV(const cv::Mat& cvImg, const ov::Tensor& tensor, const ov::Shape& sha
 
     if (layout == ov::Layout("NHWC")) {
         const auto dataBuffer = reinterpret_cast<uint8_t*>(tensor.data());
-
-        cv::Mat out(static_cast<int>(H), static_cast<int>(W), cvType, dataBuffer);
+        cv::Mat auxOut(static_cast<int>(H), static_cast<int>(W), cvType);
+        cv::Mat tensorOut(static_cast<int>(H), static_cast<int>(W), cvType, dataBuffer);
+        // only a first image from an input image array fills an original input tensor up.
+        // Subsequent images (if exist) will fill batch slices of the input tensor
+        // by its number in the input array respectfully
+        cv::Mat &out = (imgIdx == 0 ? tensorOut : auxOut);
 
         if (precision == ov::element::Type_t::f16) {
             const auto inPtr = in.ptr<float>();
@@ -454,13 +474,32 @@ void cvToOV(const cv::Mat& cvImg, const ov::Tensor& tensor, const ov::Shape& sha
             in.copyTo(out);
         }
 
-        for (size_t n = 1; n < N; ++n) {
+        // being called sequencially with ascending `imgIdx` value, it fills up rest of the batched tensor by
+        // a last requested image data until its ending from a batched slice position
+        // determined by parameter 'imgIdx', so that filling N batched tensor by array of images size M, where M < N, will make up
+        // The final batched tensor will comprise [imgIdx_0, imgIdx_1,..., imgIdx_M, imgIdx_M,...,imgIdx_M] as its slices
+        if (imgIdx == 0 && N != 1) {
+            std::cout << "Fill up all input batch slices up to " << N
+                      << " with image data from the array: [" << imgIdx
+                      << "/" << imgNumber << "]" << std::endl;
+        }
+        for (size_t n = std::max<size_t>(1, imgIdx); n < N; ++n) {
+            if (n == std::max<size_t>(1, imgIdx) && imgIdx >= 1) {
+                std::cout << "Fill input batch slices starting from index "
+                          << n << " up to " << N << " with image data from the array: ["
+                          << imgIdx << "/" << imgNumber << "]" << std::endl;
+            }
             cv::Mat batch(static_cast<int>(H), static_cast<int>(W), cvType,
                           dataBuffer + n * (out.size().area() * out.elemSize()));
             out.copyTo(batch);
         }
     } else if (layout == ov::Layout("NCHW")) {
-        auto tensorPlanes = ovToCV(tensor, shape, layout, 0);
+        ov::Tensor auxTensor(precision, shape);
+        const ov::Tensor &outTensor = (imgIdx == 0 ? tensor : auxTensor);
+        // only a first image from an input image array fills an original input tensor up.
+        // Subsequent images (if exist) will fill batch slices of the input tensor
+        // by its number in the input array respectfully
+        auto tensorPlanes = ovToCV(outTensor, shape, layout, 0);
 
         if (!(precision == ov::element::Type_t::f16 ||
             precision == ov::element::Type_t::bf16)) {
@@ -483,7 +522,21 @@ void cvToOV(const cv::Mat& cvImg, const ov::Tensor& tensor, const ov::Shape& sha
             }
         }
 
-        for (size_t n = 1; n < N; ++n) {
+        // being called sequencially with ascending `imgIdx` value, it fills up rest of the batched tensor by
+        // a last requested image data until its ending from a batched slice position
+        // determined by parameter 'imgIdx', so that filling N batched tensor by array of images size M, where M < N, will make up
+        // The final batched tensor will comprise [imgIdx_0, imgIdx_1,..., imgIdx_M, imgIdx_M,...,imgIdx_M] as its slices
+        if (imgIdx == 0 && N != 1) {
+            std::cout << "Fill up all input batch slices planes up to " << N
+                      << " with image data from the array: [" << imgIdx
+                      << "/" << imgNumber << "]" << std::endl;
+        }
+        for (size_t n = std::max<size_t>(1, imgIdx); n < N; ++n) {
+            if (n == std::max<size_t>(1, imgIdx) && imgIdx >= 1) {
+                std::cout << "Fill input batch slices planes starting from index "
+                          << n << " up to " << N << " with image data from the array: ["
+                          << imgIdx << "/" << imgNumber << "]" << std::endl;
+            }
             const auto batchPlanes = ovToCV(tensor, shape, layout, n);
 
             OPENVINO_ASSERT(batchPlanes.size() == tensorPlanes.size());
@@ -642,27 +695,28 @@ std::string cleanName(std::string&& name) {
     return std::move(name);
 }
 
-ov::Tensor loadImage(const ov::element::Type& precision, const ov::Shape& shape, const ov::Layout& layout,
-                     const std::string& filePath, const std::string& colorFormat) {
-    const auto frame = cv::imread(filePath, cv::IMREAD_COLOR);
-    OPENVINO_ASSERT(!frame.empty(), "Failed to open input image file ", filePath);
-
+ov::Tensor loadImages(const ov::element::Type& precision, const ov::Shape& shape, const ov::Layout& layout,
+                     const std::vector<std::string>& filePaths, const std::string& colorFormat) {
     const ov::Tensor tensor(precision, shape);
+    for (size_t fileIndex = 0; fileIndex != filePaths.size(); fileIndex++) {
+        const auto &filePath = filePaths[fileIndex];
+        const auto frame = cv::imread(filePath, cv::IMREAD_COLOR);
+        OPENVINO_ASSERT(!frame.empty(), "Failed to open input image file ", filePath);
 
-    cvToOV(frame, tensor, shape, layout, colorFormat);
-
+        cvToOV(frame, fileIndex, filePaths.size(), tensor, shape, layout, colorFormat);
+    }
     return tensor;
 }
 
-ov::Tensor loadBinary(const ov::element::Type& modelPrecision, const ov::Shape& shape, const ov::Layout& layout,
-                      const std::string& filePath, const ov::element::Type& dataPrecision) {
+void loadBinary(const std::string& filePath, size_t imgIdx, size_t imgNumber, ov::Tensor &requestedTensor,
+                const ov::element::Type& modelPrecision, const ov::Shape& shape,
+                const ov::Layout& layout, const ov::element::Type& dataPrecision) {
     std::ifstream binaryFile(filePath, std::ios_base::binary | std::ios_base::ate);
     OPENVINO_ASSERT(binaryFile, "Failed to open input binary file: ", filePath);
     const auto fileSize = binaryFile.tellg();
     binaryFile.seekg(0, std::ios_base::beg);
     OPENVINO_ASSERT(binaryFile.good(), "While reading a file an error is encountered");
     const size_t fileBytes = static_cast<size_t>(fileSize);
-    ov::Tensor requestedTensor(modelPrecision, shape);
     const size_t reqTensorBytes = static_cast<size_t>(requestedTensor.get_byte_size());
 
     if (dataPrecision != modelPrecision && dataPrecision != ov::element::Type_t::undefined) {
@@ -676,7 +730,7 @@ ov::Tensor loadBinary(const ov::element::Type& modelPrecision, const ov::Shape&
             std::cout << "File contains " << fileBytes
                       << " bytes, but it expected to be: " << inputTensor.get_byte_size()
                       << " while converting precision from " << dataPrecision << " to " << modelPrecision
-                      << ". Check whether it is possible to batch loading " << std::endl;
+                      << ". Check whether it is possible to fit it into batch loading " << std::endl;
             OPENVINO_ASSERT(ov::layout::has_batch(layout),
                             "Input layout has no batch dimenstion: ", layout.to_string());
             size_t N = shape[ov::layout::batch_idx(layout)];
@@ -691,9 +745,15 @@ ov::Tensor loadBinary(const ov::element::Type& modelPrecision, const ov::Shape&
             const ov::Tensor convertedPrecisionTensor(modelPrecision, debatchedInputTensorShape);
             npu::utils::convertTensorPrecision(inputDebatchedTensor, convertedPrecisionTensor);
             std::list<ov::Tensor> tensorsToJoin;
-            std::generate_n(std::back_inserter(tensorsToJoin), N, [&convertedPrecisionTensor]() {
-                return convertedPrecisionTensor;
-            });
+            std::list<ov::Tensor> tensorsFromSplit = npu::utils::splitBatchedTensor(requestedTensor, layout, N);
+            // Constitute a new bathed tensor size N from parts of it enumerated by indices from the interval [0...imgIdx], where imgIdx < N
+            // The rest parts of the new tensor [imgIdx+1...N] will be filled up by same content of an image of imgIdx
+            std::copy_n(tensorsFromSplit.begin(), std::min(imgIdx, N), std::back_inserter(tensorsToJoin));
+            if (imgIdx < N) {
+                std::generate_n(std::back_inserter(tensorsToJoin), N - imgIdx, [&convertedPrecisionTensor]() {
+                    return convertedPrecisionTensor;
+                });
+            }
             requestedTensor = npu::utils::joinTensors(tensorsToJoin, layout);
         }
 
@@ -704,22 +764,41 @@ ov::Tensor loadBinary(const ov::element::Type& modelPrecision, const ov::Shape&
         } else {
             std::cout << "File contains " << fileBytes << " bytes, but it expected to be: " << reqTensorBytes
                       << " when datatypes match. "
-                      << ". Check whether it is possible to batch loading " << std::endl;
+                      << "Check whether it is possible to fit it into batch loading " << std::endl;
             OPENVINO_ASSERT(ov::layout::has_batch(layout),
                             "Input layout has no batch dimenstion: ", layout.to_string());
             size_t N = shape[ov::layout::batch_idx(layout)];
             OPENVINO_ASSERT(fileBytes * N == reqTensorBytes, "File contains ", fileBytes, " bytes, but ",
                             reqTensorBytes, " in batch size ", N, " expected");
 
-            // duplicate a binary into tensor memory if the tensor batched
-            for (size_t n = 0; n < N; ++n) {
+            // duplicate a binary into tensor memory if the tensor batched starting from imgIdx
+            if (imgIdx == 0 && N != 1) {
+                std::cout << "Fill up all input batch slices up to " << N
+                          << " with binary data from the array: [" << imgIdx
+                          << "/" << imgNumber << "]" << std::endl;
+            }
+            for (size_t n = std::max<size_t>(0, imgIdx); n < N; ++n) {
+                if (n == std::max<size_t>(1, imgIdx) && imgIdx >= 1) {
+                    std::cout << "Fill input batch slices starting from index "
+                              << n << " up to " << N
+                              << " with binary data from the data sources array: ["
+                              << imgIdx << "/" << imgNumber << "]" << std::endl;
+                }
                 binaryFile.seekg(0, std::ios_base::beg);
                 binaryFile.read(reinterpret_cast<char*>(requestedTensor.data()) + fileBytes * n,
                                 static_cast<std::streamsize>(fileBytes));
             }
         }
     }
+}
 
+ov::Tensor loadBinaries(const ov::element::Type& modelPrecision, const ov::Shape& shape, const ov::Layout& layout,
+                      const std::vector<std::string>& filePaths, const ov::element::Type& dataPrecision) {
+    ov::Tensor requestedTensor(modelPrecision, shape);
+    for (size_t fileIndex = 0; fileIndex != filePaths.size(); fileIndex++) {
+        const auto &filePath = filePaths[fileIndex];
+        loadBinary(filePath, fileIndex, filePaths.size(), requestedTensor, modelPrecision, shape, layout, dataPrecision);
+    }
     return requestedTensor;
 }
 
@@ -740,12 +819,12 @@ ov::Tensor loadBinary(const ov::element::Type& modelPrecision, const ov::Shape&
  * @return The tensor containing the loaded data.
  */
 ov::Tensor loadInput(const ov::element::Type& modelPrecision, const ov::Shape& shape, const ov::Layout& layout,
-                     const std::string& filePath, const std::string& colorFormat,
+                     const std::vector<std::string>& filePaths, const std::string& colorFormat,
                      const ov::element::Type& dataPrecision = ov::element::Type_t::undefined) {
     if (isImage(shape, layout) && !FLAGS_img_as_bin) {
-        return loadImage(modelPrecision, shape, layout, filePath, colorFormat);
+        return loadImages(modelPrecision, shape, layout, filePaths, colorFormat);
     } else {
-        return loadBinary(modelPrecision, shape, layout, filePath, dataPrecision);
+        return loadBinaries(modelPrecision, shape, layout, filePaths, dataPrecision);
     }
 }
 
@@ -1756,11 +1835,20 @@ static int runSingleImageTest() {
         std::map<RegexPtr, ov::Layout> outModelLayouts = parseLayoutRegex(FLAGS_oml);
 
         std::vector<std::string> inputFilesPerCase;
-        std::vector<std::vector<std::string>> inputFilesForOneInfer;
+        using FilesPerInput = std::vector<std::string>;
+        using FilesForModelInputs = std::vector<FilesPerInput>;
+        std::vector<FilesForModelInputs> inputFilesForOneInfer;
 
         inputFilesPerCase = splitStringList(FLAGS_input, ';');
         for (const auto& images : inputFilesPerCase) {
-            inputFilesForOneInfer.push_back(splitStringList(images, ','));
+            std::vector<std::string> filesPerModel = splitStringList(images, ',');
+            FilesForModelInputs entireModelFiles;
+            entireModelFiles.reserve(filesPerModel.size());
+            for (auto &&filesPerInput : filesPerModel) {
+                // from now on each input of a model support multiple image files as content of a batched input
+                entireModelFiles.push_back(splitStringList(filesPerInput, '|'));
+            }
+            inputFilesForOneInfer.push_back(std::move(entireModelFiles));
         }
 
         std::vector<std::string> inputBinPrecisionStrPerCase;
@@ -1983,7 +2071,7 @@ static int runSingleImageTest() {
         for (size_t numberOfTestCase = 0; numberOfTestCase < inputFilesPerCase.size(); ++numberOfTestCase) {
             const auto inputsInfo = compiledModel.inputs();
             const auto outputsInfo = compiledModel.outputs();
-            std::vector<std::string> inputFiles = inputFilesForOneInfer[numberOfTestCase];
+            const FilesForModelInputs &inputFiles = inputFilesForOneInfer[numberOfTestCase];
             OPENVINO_ASSERT(inputFiles.size() == inputsInfo.size(), "Number of input files ", inputFiles.size(),
                             " doesn't match network configuration ", inputsInfo.size());
 
@@ -2018,7 +2106,7 @@ static int runSingleImageTest() {
 
                 inputDescriptors.emplace(inputInfo.get_any_name(), TensorDescriptor{precision, shape, inputLayout});
 
-                std::cout << "Load input #" << inputInd << " from " << inputFiles[inputInd] << " as " << precision
+                std::cout << "Load input #" << inputInd << " from " << to_string(inputFiles[inputInd]) << " as " << precision
                           << " " << inputLayout.to_string() << " " << shape << std::endl;
 
                 const ov::Tensor tensor =