Add PPHumanSeg new model and background blur (#166)

* add model and bg blur * lint * Update src/background-filter.cpp Co-authored-by: Kaito Udagawa <umireon@gmail.com> * Update src/models/Model.h Co-authored-by: Kaito Udagawa <umireon@gmail.com> * Update background-filter.cpp * Update ModelPPHumanSeg.h * Update Model.h --------- Co-authored-by: Kaito Udagawa <umireon@gmail.com>
locaal-ai · Mar 10, 2023 · 8591913 · 8591913
1 parent 074f7d3
commit 8591913
Show file tree

Hide file tree

Showing 12 changed files with 275 additions and 180 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -4,7 +4,7 @@ set(CMAKE_CXX_STANDARD 20)
 
 # Change obs-plugintemplate to your plugin's name in a machine-readable format (e.g.:
 # obs-myawesomeplugin) and set
-project(obs-backgroundremoval VERSION 0.5.2)
+project(obs-backgroundremoval VERSION 0.5.3)
 add_library(${CMAKE_PROJECT_NAME} MODULE)
 
 # Replace `Your Name Here` with the name (yours or your organization's) you want to see as the

diff --git a/buildspec.json b/buildspec.json
@@ -82,5 +82,5 @@
         }
     },
     "name": "obs-backgroundremoval",
-    "version": "0.5.2"
+    "version": "0.5.3"
 }
diff --git a/cmake/BuildMyOnnxruntime.cmake b/cmake/BuildMyOnnxruntime.cmake
@@ -76,8 +76,10 @@ ExternalProject_Add(
   GIT_TAG v1.13.1
   GIT_SHALLOW ON
   CONFIGURE_COMMAND "${Onnxruntime_PLATFORM_CONFIGURE}"
-  BUILD_COMMAND ${PYTHON3} <SOURCE_DIR>/tools/ci_build/build.py --build_dir <BINARY_DIR> --config
-                ${Onnxruntime_BUILD_TYPE} --parallel --skip_tests ${Onnxruntime_PLATFORM_OPTIONS}
+  BUILD_COMMAND
+    ${PYTHON3} <SOURCE_DIR>/tools/ci_build/build.py --build_dir <BINARY_DIR> --config
+    ${Onnxruntime_BUILD_TYPE} --parallel --skip_tests --skip_submodule_sync
+    ${Onnxruntime_PLATFORM_OPTIONS}
   BUILD_BYPRODUCTS
     <INSTALL_DIR>/lib/${CMAKE_STATIC_LIBRARY_PREFIX}onnxruntime_session${CMAKE_STATIC_LIBRARY_SUFFIX}
     <INSTALL_DIR>/lib/${CMAKE_STATIC_LIBRARY_PREFIX}onnxruntime_framework${CMAKE_STATIC_LIBRARY_SUFFIX}

diff --git a/data/models/pphumanseg_fp32.onnx b/data/models/pphumanseg_fp32.onnx
diff --git a/src/background-filter.cpp b/src/background-filter.cpp
@@ -29,13 +29,19 @@
 #include <fstream>
 
 #include "plugin-macros.generated.h"
-#include "Model.h"
+#include "models/ModelMODNET.h"
+#include "models/ModelSINET.h"
+#include "models/ModelMediapipe.h"
+#include "models/ModelSelfie.h"
+#include "models/ModelRVM.h"
+#include "models/ModelPPHumanSeg.h"
 
 const char *MODEL_SINET = "models/SINet_Softmax_simple.onnx";
 const char *MODEL_MODNET = "models/modnet_simple.onnx";
 const char *MODEL_MEDIAPIPE = "models/mediapipe.onnx";
 const char *MODEL_SELFIE = "models/selfie_segmentation.onnx";
 const char *MODEL_RVM = "models/rvm_mobilenetv3_fp32.onnx";
+const char *MODEL_PPHUMANSEG = "models/pphumanseg_fp32.onnx";
 
 const char *USEGPU_CPU = "cpu";
 const char *USEGPU_DML = "dml";
@@ -72,6 +78,7 @@ struct background_removal_filter {
   cv::Mat backgroundMask;
   int maskEveryXFrames = 1;
   int maskEveryXFramesCount = 0;
+  int64_t blurBackground = 0;
 
 #if _WIN32
   const wchar_t *modelFilepath = nullptr;
@@ -130,11 +137,16 @@ static obs_properties_t *filter_properties(void *data)
   obs_property_list_add_string(p_model_select, obs_module_text("MediaPipe"), MODEL_MEDIAPIPE);
   obs_property_list_add_string(p_model_select, obs_module_text("Selfie Segmentation"),
                                MODEL_SELFIE);
+  obs_property_list_add_string(p_model_select, obs_module_text("PPHumanSeg"), MODEL_PPHUMANSEG);
   obs_property_list_add_string(p_model_select, obs_module_text("Robust Video Matting"), MODEL_RVM);
 
   obs_properties_add_int(props, "mask_every_x_frames",
                          obs_module_text("Calculate mask every X frame"), 1, 300, 1);
 
+  obs_properties_add_int_slider(props, "blur_background",
+                                obs_module_text("Blur background factor (0 - no blur, use color)"),
+                                0, 100, 1);
+
   UNUSED_PARAMETER(data);
   return props;
 }
@@ -263,7 +275,9 @@ static void filter_update(void *data, obs_data_t *settings)
   tf->smoothContour = (float)obs_data_get_double(settings, "smooth_contour");
   tf->feather = (float)obs_data_get_double(settings, "feather");
   tf->maskEveryXFrames = (int)obs_data_get_int(settings, "mask_every_x_frames");
+  tf->maskEveryXFrames = (int)obs_data_get_int(settings, "mask_every_x_frames");
   tf->maskEveryXFramesCount = (int)(0);
+  tf->blurBackground = obs_data_get_int(settings, "blur_background");
 
   const std::string newUseGpu = obs_data_get_string(settings, "useGPU");
   const std::string newModel = obs_data_get_string(settings, "model_select");
@@ -289,6 +303,9 @@ static void filter_update(void *data, obs_data_t *settings)
     if (tf->modelSelection == MODEL_RVM) {
       tf->model.reset(new ModelRVM);
     }
+    if (tf->modelSelection == MODEL_PPHUMANSEG) {
+      tf->model.reset(new ModelPPHumanSeg);
+    }
 
     createOrtSession(tf);
   }
@@ -469,6 +486,14 @@ static struct obs_source_frame *filter_render(void *data, struct obs_source_fram
 
   // Apply the mask back to the main image.
   try {
+    cv::Mat blurredBackground;
+    if (tf->blurBackground > 0.0) {
+      // Blur the background (fast box filter)
+      int k_size = (int)(5 + tf->blurBackground);
+      k_size = k_size % 2 == 0 ? k_size + 1 : k_size;
+      cv::boxFilter(imageBGR, blurredBackground, imageBGR.depth(), cv::Size(k_size, k_size));
+    }
+
     if (tf->feather > 0.0) {
       // If we're going to feather/alpha blend, we need to do some processing that
       // will combine the blended "foreground" and "masked background" images onto the main image.
@@ -495,7 +520,12 @@ static struct obs_source_frame *filter_render(void *data, struct obs_source_fram
     } else {
       // If we're not feathering/alpha blending, we can
       // apply the mask as-is back onto the main image.
-      imageBGR.setTo(tf->backgroundColor, backgroundMask);
+      if (tf->blurBackground > 0.0) {
+        // copy the blurred background to the main image where the mask is 0
+        blurredBackground.copyTo(imageBGR, backgroundMask);
+      } else {
+        imageBGR.setTo(tf->backgroundColor, backgroundMask);
+      }
     }
   } catch (const std::exception &e) {
     blog(LOG_ERROR, "%s", e.what());

diff --git a/src/Model.h → src/models/Model.h b/src/Model.h → src/models/Model.h
@@ -228,52 +228,6 @@ class Model {
   }
 };
 
-class ModelSelfie : public Model {
-  private:
-  /* data */
-  public:
-  ModelSelfie(/* args */) {}
-  ~ModelSelfie() {}
-
-  virtual void postprocessOutput(cv::Mat &outputImage)
-  {
-    cv::normalize(outputImage, outputImage, 1.0, 0.0, cv::NORM_MINMAX);
-  }
-};
-
-class ModelMediaPipe : public Model {
-  private:
-  /* data */
-  public:
-  ModelMediaPipe(/* args */) {}
-  ~ModelMediaPipe() {}
-
-  virtual cv::Mat getNetworkOutput(const std::vector<std::vector<int64_t>> &outputDims,
-                                   std::vector<std::vector<float>> &outputTensorValues)
-  {
-    uint32_t outputWidth = (int)outputDims[0].at(2);
-    uint32_t outputHeight = (int)outputDims[0].at(1);
-    int32_t outputChannels = CV_32FC2;
-
-    return cv::Mat(outputHeight, outputWidth, outputChannels, outputTensorValues[0].data());
-  }
-
-  virtual void postprocessOutput(cv::Mat &outputImage)
-  {
-    // take 1st channel
-    std::vector<cv::Mat> outputImageSplit;
-    cv::split(outputImage, outputImageSplit);
-
-    // "Softmax"
-    cv::Mat outputA, outputB;
-    cv::exp(outputImageSplit[0], outputA);
-    cv::exp(outputImageSplit[1], outputB);
-    outputImage = outputA / (outputA + outputB);
-
-    cv::normalize(outputImage, outputImage, 1.0, 0.0, cv::NORM_MINMAX);
-  }
-};
-
 class ModelBCHW : public Model {
   public:
   ModelBCHW(/* args */) {}
@@ -305,132 +259,4 @@ class ModelBCHW : public Model {
   }
 };
 
-class ModelSINET : public ModelBCHW {
-  public:
-  ModelSINET(/* args */) {}
-  ~ModelSINET() {}
-
-  virtual void prepareInputToNetwork(cv::Mat &resizedImage, cv::Mat &preprocessedImage)
-  {
-    cv::subtract(resizedImage, cv::Scalar(102.890434, 111.25247, 126.91212), resizedImage);
-    cv::multiply(resizedImage, cv::Scalar(1.0 / 62.93292, 1.0 / 62.82138, 1.0 / 66.355705) / 255.0,
-                 resizedImage);
-    hwc_to_chw(resizedImage, preprocessedImage);
-  }
-};
-
-class ModelMODNET : public ModelBCHW {
-  public:
-  ModelMODNET(/* args */) {}
-  ~ModelMODNET() {}
-
-  virtual void prepareInputToNetwork(cv::Mat &resizedImage, cv::Mat &preprocessedImage)
-  {
-    cv::subtract(resizedImage, cv::Scalar::all(127.5), resizedImage);
-    resizedImage = resizedImage / 127.5;
-    hwc_to_chw(resizedImage, preprocessedImage);
-  }
-};
-
-class ModelRVM : public ModelBCHW {
-  private:
-  /* data */
-  public:
-  ModelRVM(/* args */) {}
-  ~ModelRVM() {}
-
-  virtual void prepareInputToNetwork(cv::Mat &resizedImage, cv::Mat &preprocessedImage)
-  {
-    resizedImage = resizedImage / 256.0;
-    hwc_to_chw(resizedImage, preprocessedImage);
-  }
-
-  virtual void populateInputOutputNames(const std::unique_ptr<Ort::Session> &session,
-                                        std::vector<Ort::AllocatedStringPtr> &inputNames,
-                                        std::vector<Ort::AllocatedStringPtr> &outputNames)
-  {
-    Ort::AllocatorWithDefaultOptions allocator;
-
-    inputNames.clear();
-    outputNames.clear();
-
-    for (size_t i = 0; i < session->GetInputCount(); i++) {
-      inputNames.push_back(session->GetInputNameAllocated(i, allocator));
-    }
-    for (size_t i = 1; i < session->GetOutputCount(); i++) {
-      outputNames.push_back(session->GetOutputNameAllocated(i, allocator));
-    }
-  }
-
-  virtual bool populateInputOutputShapes(const std::unique_ptr<Ort::Session> &session,
-                                         std::vector<std::vector<int64_t>> &inputDims,
-                                         std::vector<std::vector<int64_t>> &outputDims)
-  {
-    // Assuming model only has one input and one output image
-
-    inputDims.clear();
-    outputDims.clear();
-
-    for (size_t i = 0; i < session->GetInputCount(); i++) {
-      // Get input shape
-      const Ort::TypeInfo inputTypeInfo = session->GetInputTypeInfo(i);
-      const auto inputTensorInfo = inputTypeInfo.GetTensorTypeAndShapeInfo();
-      inputDims.push_back(inputTensorInfo.GetShape());
-    }
-
-    for (size_t i = 1; i < session->GetOutputCount(); i++) {
-      // Get output shape
-      const Ort::TypeInfo outputTypeInfo = session->GetOutputTypeInfo(i);
-      const auto outputTensorInfo = outputTypeInfo.GetTensorTypeAndShapeInfo();
-      outputDims.push_back(outputTensorInfo.GetShape());
-    }
-
-    inputDims[0][0] = 1;
-    inputDims[0][2] = 192;
-    inputDims[0][3] = 192;
-    for (size_t i = 1; i < 5; i++) {
-      inputDims[i][0] = 1;
-      inputDims[i][1] = (i == 1) ? 16 : (i == 2) ? 20 : (i == 3) ? 40 : 64;
-      inputDims[i][2] = 192 / (2 << (i - 1));
-      inputDims[i][3] = 192 / (2 << (i - 1));
-    }
-
-    outputDims[0][0] = 1;
-    outputDims[0][2] = 192;
-    outputDims[0][3] = 192;
-    for (size_t i = 1; i < 5; i++) {
-      outputDims[i][0] = 1;
-      outputDims[i][2] = 192 / (2 << (i - 1));
-      outputDims[i][3] = 192 / (2 << (i - 1));
-    }
-    return true;
-  }
-
-  virtual void loadInputToTensor(const cv::Mat &preprocessedImage, uint32_t, uint32_t,
-                                 std::vector<std::vector<float>> &inputTensorValues)
-  {
-    inputTensorValues[0].assign(preprocessedImage.begin<float>(), preprocessedImage.end<float>());
-    inputTensorValues[5][0] = 1.0f;
-  }
-
-  virtual cv::Mat getNetworkOutput(const std::vector<std::vector<int64_t>> &outputDims,
-                                   std::vector<std::vector<float>> &outputTensorValues)
-  {
-    // BCHW
-    uint32_t outputWidth = (int)outputDims[0].at(3);
-    uint32_t outputHeight = (int)outputDims[0].at(2);
-    int32_t outputChannels = CV_32FC1;
-
-    return cv::Mat(outputHeight, outputWidth, outputChannels, outputTensorValues[0].data());
-  }
-
-  virtual void assignOutputToInput(std::vector<std::vector<float>> &outputTensorValues,
-                                   std::vector<std::vector<float>> &inputTensorValues)
-  {
-    for (size_t i = 1; i < 5; i++) {
-      inputTensorValues[i].assign(outputTensorValues[i].begin(), outputTensorValues[i].end());
-    }
-  }
-};
-
 #endif
diff --git a/src/models/ModelMODNET.h b/src/models/ModelMODNET.h
@@ -0,0 +1,19 @@
+#ifndef MODELMODNET_H
+#define MODELMODNET_H
+
+#include "Model.h"
+
+class ModelMODNET : public ModelBCHW {
+  public:
+  ModelMODNET(/* args */) {}
+  ~ModelMODNET() {}
+
+  virtual void prepareInputToNetwork(cv::Mat &resizedImage, cv::Mat &preprocessedImage)
+  {
+    cv::subtract(resizedImage, cv::Scalar::all(127.5), resizedImage);
+    resizedImage = resizedImage / 127.5;
+    hwc_to_chw(resizedImage, preprocessedImage);
+  }
+};
+
+#endif // MODELMODNET_H
diff --git a/src/models/ModelMediapipe.h b/src/models/ModelMediapipe.h
@@ -0,0 +1,39 @@
+#ifndef MODELMEDIAPIPE_H
+#define MODELMEDIAPIPE_H
+
+#include "Model.h"
+
+class ModelMediaPipe : public Model {
+  private:
+  /* data */
+  public:
+  ModelMediaPipe(/* args */) {}
+  ~ModelMediaPipe() {}
+
+  virtual cv::Mat getNetworkOutput(const std::vector<std::vector<int64_t>> &outputDims,
+                                   std::vector<std::vector<float>> &outputTensorValues)
+  {
+    uint32_t outputWidth = (int)outputDims[0].at(2);
+    uint32_t outputHeight = (int)outputDims[0].at(1);
+    int32_t outputChannels = CV_32FC2;
+
+    return cv::Mat(outputHeight, outputWidth, outputChannels, outputTensorValues[0].data());
+  }
+
+  virtual void postprocessOutput(cv::Mat &outputImage)
+  {
+    // take 1st channel
+    std::vector<cv::Mat> outputImageSplit;
+    cv::split(outputImage, outputImageSplit);
+
+    // "Softmax"
+    cv::Mat outputA, outputB;
+    cv::exp(outputImageSplit[0], outputA);
+    cv::exp(outputImageSplit[1], outputB);
+    outputImage = outputA / (outputA + outputB);
+
+    cv::normalize(outputImage, outputImage, 1.0, 0.0, cv::NORM_MINMAX);
+  }
+};
+
+#endif // MODELMEDIAPIPE_H