diff --git a/CMakeLists.txt b/CMakeLists.txt index b555be71..35ef1d23 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ set(CMAKE_CXX_STANDARD 20) # Change obs-plugintemplate to your plugin's name in a machine-readable format (e.g.: # obs-myawesomeplugin) and set -project(obs-backgroundremoval VERSION 0.5.2) +project(obs-backgroundremoval VERSION 0.5.3) add_library(${CMAKE_PROJECT_NAME} MODULE) # Replace `Your Name Here` with the name (yours or your organization's) you want to see as the diff --git a/buildspec.json b/buildspec.json index be923a25..8073f58c 100644 --- a/buildspec.json +++ b/buildspec.json @@ -82,5 +82,5 @@ } }, "name": "obs-backgroundremoval", - "version": "0.5.2" + "version": "0.5.3" } diff --git a/cmake/BuildMyOnnxruntime.cmake b/cmake/BuildMyOnnxruntime.cmake index 60be77cc..e232496b 100644 --- a/cmake/BuildMyOnnxruntime.cmake +++ b/cmake/BuildMyOnnxruntime.cmake @@ -76,8 +76,10 @@ ExternalProject_Add( GIT_TAG v1.13.1 GIT_SHALLOW ON CONFIGURE_COMMAND "${Onnxruntime_PLATFORM_CONFIGURE}" - BUILD_COMMAND ${PYTHON3} /tools/ci_build/build.py --build_dir --config - ${Onnxruntime_BUILD_TYPE} --parallel --skip_tests ${Onnxruntime_PLATFORM_OPTIONS} + BUILD_COMMAND + ${PYTHON3} /tools/ci_build/build.py --build_dir --config + ${Onnxruntime_BUILD_TYPE} --parallel --skip_tests --skip_submodule_sync + ${Onnxruntime_PLATFORM_OPTIONS} BUILD_BYPRODUCTS /lib/${CMAKE_STATIC_LIBRARY_PREFIX}onnxruntime_session${CMAKE_STATIC_LIBRARY_SUFFIX} /lib/${CMAKE_STATIC_LIBRARY_PREFIX}onnxruntime_framework${CMAKE_STATIC_LIBRARY_SUFFIX} diff --git a/data/models/pphumanseg_fp32.onnx b/data/models/pphumanseg_fp32.onnx new file mode 100644 index 00000000..b1b077f2 Binary files /dev/null and b/data/models/pphumanseg_fp32.onnx differ diff --git a/src/background-filter.cpp b/src/background-filter.cpp index 902d7774..1c18b4f4 100644 --- a/src/background-filter.cpp +++ b/src/background-filter.cpp @@ -29,13 +29,19 @@ #include #include "plugin-macros.generated.h" -#include "Model.h" +#include "models/ModelMODNET.h" +#include "models/ModelSINET.h" +#include "models/ModelMediapipe.h" +#include "models/ModelSelfie.h" +#include "models/ModelRVM.h" +#include "models/ModelPPHumanSeg.h" const char *MODEL_SINET = "models/SINet_Softmax_simple.onnx"; const char *MODEL_MODNET = "models/modnet_simple.onnx"; const char *MODEL_MEDIAPIPE = "models/mediapipe.onnx"; const char *MODEL_SELFIE = "models/selfie_segmentation.onnx"; const char *MODEL_RVM = "models/rvm_mobilenetv3_fp32.onnx"; +const char *MODEL_PPHUMANSEG = "models/pphumanseg_fp32.onnx"; const char *USEGPU_CPU = "cpu"; const char *USEGPU_DML = "dml"; @@ -72,6 +78,7 @@ struct background_removal_filter { cv::Mat backgroundMask; int maskEveryXFrames = 1; int maskEveryXFramesCount = 0; + int64_t blurBackground = 0; #if _WIN32 const wchar_t *modelFilepath = nullptr; @@ -130,11 +137,16 @@ static obs_properties_t *filter_properties(void *data) obs_property_list_add_string(p_model_select, obs_module_text("MediaPipe"), MODEL_MEDIAPIPE); obs_property_list_add_string(p_model_select, obs_module_text("Selfie Segmentation"), MODEL_SELFIE); + obs_property_list_add_string(p_model_select, obs_module_text("PPHumanSeg"), MODEL_PPHUMANSEG); obs_property_list_add_string(p_model_select, obs_module_text("Robust Video Matting"), MODEL_RVM); obs_properties_add_int(props, "mask_every_x_frames", obs_module_text("Calculate mask every X frame"), 1, 300, 1); + obs_properties_add_int_slider(props, "blur_background", + obs_module_text("Blur background factor (0 - no blur, use color)"), + 0, 100, 1); + UNUSED_PARAMETER(data); return props; } @@ -263,7 +275,9 @@ static void filter_update(void *data, obs_data_t *settings) tf->smoothContour = (float)obs_data_get_double(settings, "smooth_contour"); tf->feather = (float)obs_data_get_double(settings, "feather"); tf->maskEveryXFrames = (int)obs_data_get_int(settings, "mask_every_x_frames"); + tf->maskEveryXFrames = (int)obs_data_get_int(settings, "mask_every_x_frames"); tf->maskEveryXFramesCount = (int)(0); + tf->blurBackground = obs_data_get_int(settings, "blur_background"); const std::string newUseGpu = obs_data_get_string(settings, "useGPU"); const std::string newModel = obs_data_get_string(settings, "model_select"); @@ -289,6 +303,9 @@ static void filter_update(void *data, obs_data_t *settings) if (tf->modelSelection == MODEL_RVM) { tf->model.reset(new ModelRVM); } + if (tf->modelSelection == MODEL_PPHUMANSEG) { + tf->model.reset(new ModelPPHumanSeg); + } createOrtSession(tf); } @@ -469,6 +486,14 @@ static struct obs_source_frame *filter_render(void *data, struct obs_source_fram // Apply the mask back to the main image. try { + cv::Mat blurredBackground; + if (tf->blurBackground > 0.0) { + // Blur the background (fast box filter) + int k_size = (int)(5 + tf->blurBackground); + k_size = k_size % 2 == 0 ? k_size + 1 : k_size; + cv::boxFilter(imageBGR, blurredBackground, imageBGR.depth(), cv::Size(k_size, k_size)); + } + if (tf->feather > 0.0) { // If we're going to feather/alpha blend, we need to do some processing that // will combine the blended "foreground" and "masked background" images onto the main image. @@ -495,7 +520,12 @@ static struct obs_source_frame *filter_render(void *data, struct obs_source_fram } else { // If we're not feathering/alpha blending, we can // apply the mask as-is back onto the main image. - imageBGR.setTo(tf->backgroundColor, backgroundMask); + if (tf->blurBackground > 0.0) { + // copy the blurred background to the main image where the mask is 0 + blurredBackground.copyTo(imageBGR, backgroundMask); + } else { + imageBGR.setTo(tf->backgroundColor, backgroundMask); + } } } catch (const std::exception &e) { blog(LOG_ERROR, "%s", e.what()); diff --git a/src/Model.h b/src/models/Model.h similarity index 61% rename from src/Model.h rename to src/models/Model.h index cea01c96..9a7ea02f 100644 --- a/src/Model.h +++ b/src/models/Model.h @@ -228,52 +228,6 @@ class Model { } }; -class ModelSelfie : public Model { - private: - /* data */ - public: - ModelSelfie(/* args */) {} - ~ModelSelfie() {} - - virtual void postprocessOutput(cv::Mat &outputImage) - { - cv::normalize(outputImage, outputImage, 1.0, 0.0, cv::NORM_MINMAX); - } -}; - -class ModelMediaPipe : public Model { - private: - /* data */ - public: - ModelMediaPipe(/* args */) {} - ~ModelMediaPipe() {} - - virtual cv::Mat getNetworkOutput(const std::vector> &outputDims, - std::vector> &outputTensorValues) - { - uint32_t outputWidth = (int)outputDims[0].at(2); - uint32_t outputHeight = (int)outputDims[0].at(1); - int32_t outputChannels = CV_32FC2; - - return cv::Mat(outputHeight, outputWidth, outputChannels, outputTensorValues[0].data()); - } - - virtual void postprocessOutput(cv::Mat &outputImage) - { - // take 1st channel - std::vector outputImageSplit; - cv::split(outputImage, outputImageSplit); - - // "Softmax" - cv::Mat outputA, outputB; - cv::exp(outputImageSplit[0], outputA); - cv::exp(outputImageSplit[1], outputB); - outputImage = outputA / (outputA + outputB); - - cv::normalize(outputImage, outputImage, 1.0, 0.0, cv::NORM_MINMAX); - } -}; - class ModelBCHW : public Model { public: ModelBCHW(/* args */) {} @@ -305,132 +259,4 @@ class ModelBCHW : public Model { } }; -class ModelSINET : public ModelBCHW { - public: - ModelSINET(/* args */) {} - ~ModelSINET() {} - - virtual void prepareInputToNetwork(cv::Mat &resizedImage, cv::Mat &preprocessedImage) - { - cv::subtract(resizedImage, cv::Scalar(102.890434, 111.25247, 126.91212), resizedImage); - cv::multiply(resizedImage, cv::Scalar(1.0 / 62.93292, 1.0 / 62.82138, 1.0 / 66.355705) / 255.0, - resizedImage); - hwc_to_chw(resizedImage, preprocessedImage); - } -}; - -class ModelMODNET : public ModelBCHW { - public: - ModelMODNET(/* args */) {} - ~ModelMODNET() {} - - virtual void prepareInputToNetwork(cv::Mat &resizedImage, cv::Mat &preprocessedImage) - { - cv::subtract(resizedImage, cv::Scalar::all(127.5), resizedImage); - resizedImage = resizedImage / 127.5; - hwc_to_chw(resizedImage, preprocessedImage); - } -}; - -class ModelRVM : public ModelBCHW { - private: - /* data */ - public: - ModelRVM(/* args */) {} - ~ModelRVM() {} - - virtual void prepareInputToNetwork(cv::Mat &resizedImage, cv::Mat &preprocessedImage) - { - resizedImage = resizedImage / 256.0; - hwc_to_chw(resizedImage, preprocessedImage); - } - - virtual void populateInputOutputNames(const std::unique_ptr &session, - std::vector &inputNames, - std::vector &outputNames) - { - Ort::AllocatorWithDefaultOptions allocator; - - inputNames.clear(); - outputNames.clear(); - - for (size_t i = 0; i < session->GetInputCount(); i++) { - inputNames.push_back(session->GetInputNameAllocated(i, allocator)); - } - for (size_t i = 1; i < session->GetOutputCount(); i++) { - outputNames.push_back(session->GetOutputNameAllocated(i, allocator)); - } - } - - virtual bool populateInputOutputShapes(const std::unique_ptr &session, - std::vector> &inputDims, - std::vector> &outputDims) - { - // Assuming model only has one input and one output image - - inputDims.clear(); - outputDims.clear(); - - for (size_t i = 0; i < session->GetInputCount(); i++) { - // Get input shape - const Ort::TypeInfo inputTypeInfo = session->GetInputTypeInfo(i); - const auto inputTensorInfo = inputTypeInfo.GetTensorTypeAndShapeInfo(); - inputDims.push_back(inputTensorInfo.GetShape()); - } - - for (size_t i = 1; i < session->GetOutputCount(); i++) { - // Get output shape - const Ort::TypeInfo outputTypeInfo = session->GetOutputTypeInfo(i); - const auto outputTensorInfo = outputTypeInfo.GetTensorTypeAndShapeInfo(); - outputDims.push_back(outputTensorInfo.GetShape()); - } - - inputDims[0][0] = 1; - inputDims[0][2] = 192; - inputDims[0][3] = 192; - for (size_t i = 1; i < 5; i++) { - inputDims[i][0] = 1; - inputDims[i][1] = (i == 1) ? 16 : (i == 2) ? 20 : (i == 3) ? 40 : 64; - inputDims[i][2] = 192 / (2 << (i - 1)); - inputDims[i][3] = 192 / (2 << (i - 1)); - } - - outputDims[0][0] = 1; - outputDims[0][2] = 192; - outputDims[0][3] = 192; - for (size_t i = 1; i < 5; i++) { - outputDims[i][0] = 1; - outputDims[i][2] = 192 / (2 << (i - 1)); - outputDims[i][3] = 192 / (2 << (i - 1)); - } - return true; - } - - virtual void loadInputToTensor(const cv::Mat &preprocessedImage, uint32_t, uint32_t, - std::vector> &inputTensorValues) - { - inputTensorValues[0].assign(preprocessedImage.begin(), preprocessedImage.end()); - inputTensorValues[5][0] = 1.0f; - } - - virtual cv::Mat getNetworkOutput(const std::vector> &outputDims, - std::vector> &outputTensorValues) - { - // BCHW - uint32_t outputWidth = (int)outputDims[0].at(3); - uint32_t outputHeight = (int)outputDims[0].at(2); - int32_t outputChannels = CV_32FC1; - - return cv::Mat(outputHeight, outputWidth, outputChannels, outputTensorValues[0].data()); - } - - virtual void assignOutputToInput(std::vector> &outputTensorValues, - std::vector> &inputTensorValues) - { - for (size_t i = 1; i < 5; i++) { - inputTensorValues[i].assign(outputTensorValues[i].begin(), outputTensorValues[i].end()); - } - } -}; - #endif diff --git a/src/models/ModelMODNET.h b/src/models/ModelMODNET.h new file mode 100644 index 00000000..23e87bef --- /dev/null +++ b/src/models/ModelMODNET.h @@ -0,0 +1,19 @@ +#ifndef MODELMODNET_H +#define MODELMODNET_H + +#include "Model.h" + +class ModelMODNET : public ModelBCHW { + public: + ModelMODNET(/* args */) {} + ~ModelMODNET() {} + + virtual void prepareInputToNetwork(cv::Mat &resizedImage, cv::Mat &preprocessedImage) + { + cv::subtract(resizedImage, cv::Scalar::all(127.5), resizedImage); + resizedImage = resizedImage / 127.5; + hwc_to_chw(resizedImage, preprocessedImage); + } +}; + +#endif // MODELMODNET_H diff --git a/src/models/ModelMediapipe.h b/src/models/ModelMediapipe.h new file mode 100644 index 00000000..899f875e --- /dev/null +++ b/src/models/ModelMediapipe.h @@ -0,0 +1,39 @@ +#ifndef MODELMEDIAPIPE_H +#define MODELMEDIAPIPE_H + +#include "Model.h" + +class ModelMediaPipe : public Model { + private: + /* data */ + public: + ModelMediaPipe(/* args */) {} + ~ModelMediaPipe() {} + + virtual cv::Mat getNetworkOutput(const std::vector> &outputDims, + std::vector> &outputTensorValues) + { + uint32_t outputWidth = (int)outputDims[0].at(2); + uint32_t outputHeight = (int)outputDims[0].at(1); + int32_t outputChannels = CV_32FC2; + + return cv::Mat(outputHeight, outputWidth, outputChannels, outputTensorValues[0].data()); + } + + virtual void postprocessOutput(cv::Mat &outputImage) + { + // take 1st channel + std::vector outputImageSplit; + cv::split(outputImage, outputImageSplit); + + // "Softmax" + cv::Mat outputA, outputB; + cv::exp(outputImageSplit[0], outputA); + cv::exp(outputImageSplit[1], outputB); + outputImage = outputA / (outputA + outputB); + + cv::normalize(outputImage, outputImage, 1.0, 0.0, cv::NORM_MINMAX); + } +}; + +#endif // MODELMEDIAPIPE_H diff --git a/src/models/ModelPPHumanSeg.h b/src/models/ModelPPHumanSeg.h new file mode 100644 index 00000000..1ead21de --- /dev/null +++ b/src/models/ModelPPHumanSeg.h @@ -0,0 +1,44 @@ +#ifndef MODELPPHUMANSEG_H +#define MODELPPHUMANSEG_H + +#include "Model.h" + +class ModelPPHumanSeg : public ModelBCHW { + public: + ModelPPHumanSeg(/* args */) {} + ~ModelPPHumanSeg() {} + + virtual void prepareInputToNetwork(cv::Mat &resizedImage, cv::Mat &preprocessedImage) + { + resizedImage = (resizedImage / 256.0 - cv::Scalar(0.5, 0.5, 0.5)) / cv::Scalar(0.5, 0.5, 0.5); + + hwc_to_chw(resizedImage, preprocessedImage); + } + + virtual cv::Mat getNetworkOutput(const std::vector> &outputDims, + std::vector> &outputTensorValues) + { + uint32_t outputWidth = (int)outputDims[0].at(2); + uint32_t outputHeight = (int)outputDims[0].at(1); + int32_t outputChannels = CV_32FC2; + + return cv::Mat(outputHeight, outputWidth, outputChannels, outputTensorValues[0].data()); + } + + virtual void postprocessOutput(cv::Mat &outputImage) + { + // take 1st channel + std::vector outputImageSplit; + cv::split(outputImage, outputImageSplit); + + // "Softmax" + cv::Mat outputA, outputB; + cv::exp(outputImageSplit[1], outputA); + cv::exp(outputImageSplit[0], outputB); + outputImage = outputA / (outputA + outputB); + + cv::normalize(outputImage, outputImage, 1.0, 0.0, cv::NORM_MINMAX); + } +}; + +#endif // MODELPPHUMANSEG_H diff --git a/src/models/ModelRVM.h b/src/models/ModelRVM.h new file mode 100644 index 00000000..656b7950 --- /dev/null +++ b/src/models/ModelRVM.h @@ -0,0 +1,96 @@ +#ifndef MODELRVM_H +#define MODELRVM_H + +#include "Model.h" + +class ModelRVM : public ModelBCHW { + private: + /* data */ + public: + ModelRVM(/* args */) {} + ~ModelRVM() {} + + virtual void prepareInputToNetwork(cv::Mat &resizedImage, cv::Mat &preprocessedImage) + { + resizedImage = resizedImage / 256.0; + hwc_to_chw(resizedImage, preprocessedImage); + } + + virtual void populateInputOutputNames(const std::unique_ptr &session, + std::vector &inputNames, + std::vector &outputNames) + { + Ort::AllocatorWithDefaultOptions allocator; + + inputNames.clear(); + outputNames.clear(); + + for (size_t i = 0; i < session->GetInputCount(); i++) { + inputNames.push_back(session->GetInputNameAllocated(i, allocator)); + } + for (size_t i = 1; i < session->GetOutputCount(); i++) { + outputNames.push_back(session->GetOutputNameAllocated(i, allocator)); + } + } + + virtual bool populateInputOutputShapes(const std::unique_ptr &session, + std::vector> &inputDims, + std::vector> &outputDims) + { + // Assuming model only has one input and one output image + + inputDims.clear(); + outputDims.clear(); + + for (size_t i = 0; i < session->GetInputCount(); i++) { + // Get input shape + const Ort::TypeInfo inputTypeInfo = session->GetInputTypeInfo(i); + const auto inputTensorInfo = inputTypeInfo.GetTensorTypeAndShapeInfo(); + inputDims.push_back(inputTensorInfo.GetShape()); + } + + for (size_t i = 1; i < session->GetOutputCount(); i++) { + // Get output shape + const Ort::TypeInfo outputTypeInfo = session->GetOutputTypeInfo(i); + const auto outputTensorInfo = outputTypeInfo.GetTensorTypeAndShapeInfo(); + outputDims.push_back(outputTensorInfo.GetShape()); + } + + inputDims[0][0] = 1; + inputDims[0][2] = 192; + inputDims[0][3] = 192; + for (size_t i = 1; i < 5; i++) { + inputDims[i][0] = 1; + inputDims[i][1] = (i == 1) ? 16 : (i == 2) ? 20 : (i == 3) ? 40 : 64; + inputDims[i][2] = 192 / (2 << (i - 1)); + inputDims[i][3] = 192 / (2 << (i - 1)); + } + + outputDims[0][0] = 1; + outputDims[0][2] = 192; + outputDims[0][3] = 192; + for (size_t i = 1; i < 5; i++) { + outputDims[i][0] = 1; + outputDims[i][2] = 192 / (2 << (i - 1)); + outputDims[i][3] = 192 / (2 << (i - 1)); + } + return true; + } + + virtual void loadInputToTensor(const cv::Mat &preprocessedImage, uint32_t, uint32_t, + std::vector> &inputTensorValues) + { + inputTensorValues[0].assign(preprocessedImage.begin(), preprocessedImage.end()); + inputTensorValues[5][0] = 1.0f; + } + + virtual void assignOutputToInput(std::vector> &outputTensorValues, + std::vector> &inputTensorValues) + { + for (size_t i = 1; i < 5; i++) { + inputTensorValues[i].assign(outputTensorValues[i].begin(), outputTensorValues[i].end()); + } + } +}; + +#endif /* MODELRVM_H */ diff --git a/src/models/ModelSINET.h b/src/models/ModelSINET.h new file mode 100644 index 00000000..5d9fb781 --- /dev/null +++ b/src/models/ModelSINET.h @@ -0,0 +1,20 @@ +#ifndef MODELSINET_H +#define MODELSINET_H + +#include "Model.h" + +class ModelSINET : public ModelBCHW { + public: + ModelSINET(/* args */) {} + ~ModelSINET() {} + + virtual void prepareInputToNetwork(cv::Mat &resizedImage, cv::Mat &preprocessedImage) + { + cv::subtract(resizedImage, cv::Scalar(102.890434, 111.25247, 126.91212), resizedImage); + cv::multiply(resizedImage, cv::Scalar(1.0 / 62.93292, 1.0 / 62.82138, 1.0 / 66.355705) / 255.0, + resizedImage); + hwc_to_chw(resizedImage, preprocessedImage); + } +}; + +#endif // MODELSINET_H diff --git a/src/models/ModelSelfie.h b/src/models/ModelSelfie.h new file mode 100644 index 00000000..d49c8c02 --- /dev/null +++ b/src/models/ModelSelfie.h @@ -0,0 +1,19 @@ +#ifndef MODELSELFIE_H +#define MODELSELFIE_H + +#include "Model.h" + +class ModelSelfie : public Model { + private: + /* data */ + public: + ModelSelfie(/* args */) {} + ~ModelSelfie() {} + + virtual void postprocessOutput(cv::Mat &outputImage) + { + cv::normalize(outputImage, outputImage, 1.0, 0.0, cv::NORM_MINMAX); + } +}; + +#endif // MODELSELFIE_H