From 68c7b5e361393734104ad9bc6d9d991aa102f5d4 Mon Sep 17 00:00:00 2001
From: Wendell Hom <whom@nvidia.com>
Date: Mon, 2 Dec 2024 14:22:02 -0800
Subject: [PATCH] Holoscan SDK v2.7.0 Release

Co-authored-by: Alexis Girault <agirault@nvidia.com>
Co-authored-by: Andreas Heumann <aheumann@nvidia.com>
Co-authored-by: Cristiana Dinea <cdinea@nvidia.com>
Co-authored-by: Gigon Bae <gbae@nvidia.com>
Co-authored-by: Gregory Lee <grelee@nvidia.com>
Co-authored-by: Ian Stewart <istewart@nvidia.com>
Co-authored-by: Ilies Chergui <ichergui@nvidia.com>
Co-authored-by: Julien Jomier <jjomier@nvidia.com>
Co-authored-by: Shekhar Dwivedi <shekhard@nvidia.com>
Co-authored-by: Soham Sinha <sohams@nvidia.com>
Co-authored-by: Tom Birdsong <tbirdsong@nvidia.com>
Co-authored-by: Victor Chang <vicchang@nvidia.com>
Co-authored-by: Wendell Hom <whom@nvidia.com>
---
 .vscode/c_cpp_properties.json                 |   6 +-
 .vscode/settings.json                         |   3 +-
 CMakeLists.txt                                |  22 +-
 DEVELOP.md                                    |   2 +-
 Dockerfile                                    |   4 +-
 NOTICE.txt                                    |   2 +-
 VERSION                                       |   2 +-
 cmake/deps/gxf.cmake                          |  29 +
 cmake/modules/HoloscanCPack.cmake             |  12 +-
 cmake/modules/PrintProjectSettings.cmake      |   7 +-
 .../modules/WrapOperatorAsGXFExtension.cmake  | 210 +++++-
 cmake/modules/cpack/NOTICE.txt                |   2 +-
 cmake/modules/cpack/README.md                 |  35 +
 docs/_templates/layout.html                   |   8 +
 docs/api/holoscan_cpp_api.md                  |   5 +-
 docs/cli/package.md                           |  44 +-
 docs/components/conditions.md                 |  52 +-
 docs/components/resources.md                  |  10 +
 docs/components/schedulers.md                 |   3 +
 docs/examples/ping_multi_port.md              |   2 +-
 docs/flow_tracking.md                         | 195 ++++-
 docs/holoscan_create_app.md                   |  67 +-
 docs/holoscan_create_distributed_app.md       |  16 +-
 docs/holoscan_debugging.md                    |   2 +-
 docs/hsdk_faq.md                              |   9 +-
 docs/inference.md                             |  20 +-
 docs/sdk_installation.md                      |   6 +-
 docs/use_igpu_with_dgpu.md                    |   2 +-
 docs/visualization.md                         |   4 +-
 examples/README.md                            |   2 +
 examples/conditions/CMakeLists.txt            |   2 +-
 .../asynchronous/cpp/CMakeLists.min.txt       |  49 +-
 .../asynchronous/cpp/CMakeLists.txt           |  52 +-
 .../asynchronous/cpp/ping_async.cpp           |   5 +-
 .../conditions/multi_message/CMakeLists.txt   |  30 +
 examples/conditions/multi_message/README.md   |  68 ++
 .../multi_message/cpp/CMakeLists.min.txt      |  89 +++
 .../multi_message/cpp/CMakeLists.txt          | 116 +++
 .../multi_message/cpp/common_ops.hpp          |  51 ++
 .../cpp/multi_message_per_receiver.cpp        | 115 +++
 .../cpp/multi_message_sum_of_all.cpp          | 125 ++++
 .../cpp/single_message_timeout.cpp            |  92 +++
 .../multi_message/python/CMakeLists.txt       |  87 +++
 .../python/multi_message_per_receiver.py      |  98 +++
 .../python/multi_message_sum_of_all.py        | 101 +++
 .../python/single_message_timeout.py          |  85 +++
 examples/flow_tracker/cpp/CMakeLists.min.txt  |   5 +-
 examples/flow_tracker/cpp/CMakeLists.txt      |   5 +-
 .../flow_tracker/python/CMakeLists.min.txt    |   3 +-
 examples/flow_tracker/python/CMakeLists.txt   |   3 +-
 examples/multithread/cpp/CMakeLists.min.txt   |  18 +-
 examples/multithread/cpp/CMakeLists.txt       |  18 +-
 examples/multithread/cpp/multithread.cpp      |   3 +-
 .../multithread/python/CMakeLists.min.txt     |  72 +-
 examples/multithread/python/CMakeLists.txt    |  60 +-
 examples/multithread/python/multithread.py    |  18 +-
 .../ping_conditional/cpp/ping_conditional.cpp |   4 +-
 .../ping_distributed/cpp/CMakeLists.min.txt   |   2 +-
 .../ping_distributed/cpp/ping_distributed.cpp |  10 +-
 .../ping_multi_port/cpp/ping_multi_port.cpp   |   3 +-
 examples/ping_vector/cpp/ping_vector.cpp      |   3 +-
 examples/resources/CMakeLists.txt             |   1 +
 examples/resources/clock/cpp/ping_clock.cpp   |   6 +-
 .../resources/native/cpp/native_resource.cpp  |   2 +-
 examples/resources/thread_pool/CMakeLists.txt |  30 +
 examples/resources/thread_pool/README.md      |  51 ++
 .../thread_pool/cpp/CMakeLists.min.txt        |  50 ++
 .../resources/thread_pool/cpp/CMakeLists.txt  |  75 ++
 .../cpp/ping_simple_thread_pool.cpp           |  66 ++
 .../thread_pool/python/CMakeLists.min.txt     |  31 +
 .../thread_pool/python/CMakeLists.txt         |  55 ++
 .../python/ping_simple_thread_pool.py         |  60 ++
 .../tensor_interop/cpp/tensor_interop.cpp     |   4 +-
 examples/v4l2_camera/cpp/CMakeLists.min.txt   |   2 +-
 examples/v4l2_camera/cpp/v4l2_camera.cpp      |   6 +-
 .../CMakeLists.txt                            |   5 +-
 .../gxf_extension/CMakeLists.txt              |  15 +
 .../gxf_registry/CMakeLists.txt               |  64 ++
 .../gxf_registry/ping_installable.yaml.in     |  64 ++
 .../gxf_registry/target.yaml.in               |   6 +
 .../ping_rx_native_op/ping_rx_native_op.cpp   |   4 +-
 .../ping_tx_native_op/ping_tx_native_op.cpp   |   2 +-
 .../gxf_holoscan_wrapper/CMakeLists.txt       |  59 ++
 gxf_extensions/ucx/CMakeLists.txt             |  22 +
 include/holoscan/core/app_driver.hpp          |   4 +-
 include/holoscan/core/application.hpp         |   3 +-
 include/holoscan/core/arg.hpp                 |   5 +
 include/holoscan/core/argument_setter.hpp     |   2 +
 include/holoscan/core/component.hpp           |   2 +
 include/holoscan/core/condition.hpp           |  17 +-
 .../core/conditions/gxf/expiring_message.hpp  |   2 +
 .../core/conditions/gxf/message_available.hpp |   2 +-
 .../gxf/multi_message_available.hpp           |  94 +++
 .../gxf/multi_message_available_timeout.hpp   |  74 ++
 include/holoscan/core/config.hpp              |   2 +
 include/holoscan/core/dataflow_tracker.hpp    |  24 +
 .../core/executors/gxf/gxf_executor.hpp       |   5 +
 .../executors/gxf/gxf_parameter_adaptor.hpp   |   2 +
 include/holoscan/core/forward_def.hpp         |   9 +-
 include/holoscan/core/fragment.hpp            |  25 +-
 include/holoscan/core/gxf/entity_group.hpp    | 128 ++++
 include/holoscan/core/gxf/gxf_component.hpp   |   8 +
 include/holoscan/core/gxf/gxf_condition.hpp   |  13 +
 .../core/gxf/gxf_extension_registrar.hpp      |   1 +
 include/holoscan/core/gxf/gxf_io_context.hpp  |   2 +
 .../holoscan/core/gxf/gxf_network_context.hpp |  10 +
 include/holoscan/core/gxf/gxf_operator.hpp    |  14 +
 include/holoscan/core/gxf/gxf_resource.hpp    |  33 +-
 include/holoscan/core/gxf/gxf_scheduler.hpp   |  12 +-
 include/holoscan/core/gxf/gxf_utils.hpp       |  34 +-
 include/holoscan/core/gxf/gxf_wrapper.hpp     |   2 +-
 include/holoscan/core/io_context.hpp          |   5 +-
 include/holoscan/core/io_spec.hpp             |   9 +
 include/holoscan/core/messagelabel.hpp        |  22 +-
 include/holoscan/core/network_context.hpp     |   2 +
 .../core/network_contexts/gxf/ucx_context.hpp |   2 +-
 include/holoscan/core/operator.hpp            |   3 +
 include/holoscan/core/operator_spec.hpp       |  25 +
 include/holoscan/core/resource.hpp            |   2 +
 .../core/resources/gxf/cpu_thread.hpp         |  57 ++
 .../core/resources/gxf/system_resources.hpp   | 163 +++++
 include/holoscan/core/scheduler.hpp           |   2 +
 .../schedulers/gxf/multithread_scheduler.hpp  |   4 +-
 .../core/services/app_driver/server.hpp       |   5 +-
 .../services/common/network_constants.hpp     |   5 +-
 include/holoscan/holoscan.hpp                 |   4 +
 include/holoscan/logger/logger.hpp            |   1 -
 .../holoscan/operators/holoviz/holoviz.hpp    |  16 +-
 .../operators/inference/inference.hpp         |   2 +-
 modules/holoinfer/src/CMakeLists.txt          |   1 +
 .../src/include/holoinfer_constants.hpp       |   3 +-
 .../holoinfer/src/include/holoinfer_utils.hpp |  17 +-
 modules/holoinfer/src/infer/onnx/core.cpp     | 346 ++++++---
 modules/holoinfer/src/infer/onnx/core.hpp     |   8 +-
 modules/holoinfer/src/infer/torch/core.cpp    |  11 +-
 modules/holoinfer/src/infer/trt/core.cpp      |  30 +-
 .../holoinfer/src/manager/infer_manager.cpp   |  65 +-
 .../holoinfer/src/manager/infer_manager.hpp   |   4 +
 .../src/process/transforms/generate_boxes.cpp |   2 +
 modules/holoinfer/src/utils/infer_buffer.cpp  |   2 +
 modules/holoinfer/src/utils/work_queue.cpp    |  85 +++
 modules/holoinfer/src/utils/work_queue.hpp    | 129 ++++
 modules/holoviz/src/CMakeLists.txt            |   1 +
 .../src/cuda/gen_primitive_vertices.cu        | 171 +++++
 .../src/cuda/gen_primitive_vertices.hpp       |  51 ++
 modules/holoviz/src/export.map                |   1 +
 modules/holoviz/src/glfw_window.cpp           |  14 +-
 modules/holoviz/src/holoviz.cpp               |   6 +
 modules/holoviz/src/holoviz/holoviz.hpp       |  17 +-
 modules/holoviz/src/layers/geometry_layer.cpp | 340 ++++-----
 modules/holoviz/src/layers/geometry_layer.hpp |  12 +
 modules/holoviz/src/layers/im_gui_layer.cpp   |   4 +-
 modules/holoviz/src/vulkan/buffer.cpp         |  18 +-
 modules/holoviz/src/vulkan/texture.cpp        |  28 +-
 modules/holoviz/src/vulkan/vulkan_app.cpp     |  25 +-
 modules/holoviz/src/vulkan/vulkan_app.hpp     |  15 +-
 .../tests/functional/geometry_layer_test.cpp  |  62 +-
 .../tests/functional/image_layer_test.cpp     |  11 -
 .../holoviz/tests/functional/init_test.cpp    |  20 +
 python/holoscan/CMakeLists.txt                |  10 +-
 .../holoscan/{__init__.py => __init__.py.in}  |   2 +-
 .../holoscan/cli/common/artifact_sources.py   |  19 +-
 python/holoscan/cli/common/constants.py       |   1 +
 python/holoscan/cli/common/sdk_utils.py       |  19 +-
 python/holoscan/cli/holoscan                  |   2 +-
 python/holoscan/cli/packager/arguments.py     |   1 +
 .../cli/packager/container_builder.py         |  30 +
 .../holoscan/cli/packager/package_command.py  |   7 +
 python/holoscan/cli/packager/parameters.py    |  29 +
 .../cli/packager/templates/Dockerfile.jinja2  |  18 +-
 python/holoscan/conditions/CMakeLists.txt     |   2 +
 python/holoscan/conditions/__init__.py        |  12 +
 python/holoscan/conditions/conditions.cpp     |   4 +
 .../downstream_message_affordable.cpp         |   8 +-
 .../holoscan/conditions/expiring_message.cpp  |  14 +-
 .../holoscan/conditions/message_available.cpp |   6 +-
 .../conditions/multi_message_available.cpp    | 136 ++++
 .../multi_message_available_pydoc.hpp         |  95 +++
 .../multi_message_available_timeout.cpp       | 105 +++
 .../multi_message_available_timeout_pydoc.hpp |  74 ++
 python/holoscan/core/__init__.py              | 115 +--
 python/holoscan/core/application.cpp          |  20 +-
 python/holoscan/core/application_pydoc.hpp    |   3 +
 python/holoscan/core/arg.cpp                  |   5 +-
 python/holoscan/core/arg_pydoc.hpp            |   8 +-
 python/holoscan/core/condition.cpp            |   4 +-
 python/holoscan/core/dl_converter.cpp         |  55 +-
 python/holoscan/core/dl_converter.hpp         |   4 +
 python/holoscan/core/fragment.cpp             |   7 +
 python/holoscan/core/fragment_pydoc.hpp       |  16 +
 python/holoscan/core/io_context.cpp           | 287 ++++----
 python/holoscan/core/io_context.hpp           |  31 +
 python/holoscan/core/kwarg_handling.cpp       |  84 ++-
 python/holoscan/core/metadata.cpp             | 245 ++++---
 python/holoscan/core/operator.cpp             |  61 +-
 python/holoscan/core/operator_pydoc.hpp       |  19 +
 python/holoscan/core/tensor.cpp               |  90 ++-
 python/holoscan/gxf/__init__.py               |   3 +
 python/holoscan/gxf/gxf.cpp                   |  17 +
 python/holoscan/gxf/gxf_operator.cpp          |   6 +-
 python/holoscan/gxf/gxf_operator_pydoc.hpp    |   9 +
 python/holoscan/gxf/gxf_pydoc.hpp             |  25 +
 python/holoscan/operators/holoviz/__init__.py |  14 +-
 python/holoscan/operators/holoviz/holoviz.cpp |  19 +-
 python/holoscan/operators/holoviz/pydoc.hpp   |   6 +
 .../operators/video_stream_replayer/pydoc.hpp |   5 +-
 python/holoscan/resources/CMakeLists.txt      |   1 +
 python/holoscan/resources/__init__.py         |   3 +
 .../gxf_component_resource_pydoc.hpp          |   2 +-
 python/holoscan/resources/resources.cpp       |   2 +
 .../holoscan/resources/system_resources.cpp   |  78 ++
 .../resources/system_resources_pydoc.hpp      |  64 ++
 .../schedulers/multithread_scheduler.cpp      |   6 +-
 .../multithread_scheduler_pydoc.hpp           |   4 +
 python/requirements.dev.txt                   |   8 +-
 python/requirements.lint.txt                  |   8 +-
 python/requirements.txt                       |  17 +-
 .../tests/cli/unit/common/test_sdk_utils.py   |  15 +-
 .../tests/cli/unit/packager/test_arguments.py |   5 +
 .../tests/system/test_application_minimal.py  |  40 ++
 .../tests/system/test_holoviz_dual_window.py  | 129 ++++
 python/tests/unit/test_conditions.py          |  54 ++
 python/tests/unit/test_core.py                | 186 +++--
 python/tests/unit/test_resources.py           |  27 +-
 python/tests/unit/test_schedulers.py          |   1 +
 run                                           |  35 +-
 runtime_docker/Dockerfile                     |  14 +-
 scripts/.gitignore                            |   3 +
 scripts/CMakeLists.txt                        |   2 +
 scripts/README.md                             |  40 ++
 scripts/ctest_time_comparison.py              |  81 +++
 scripts/generate_gxf_manifest.py              | 670 ++++++++++++++++++
 scripts/graph_surgeon.py                      |   8 +-
 src/CMakeLists.txt                            |   5 +
 src/core/app_driver.cpp                       |  12 +-
 src/core/application.cpp                      |  25 +-
 .../gxf/multi_message_available.cpp           | 163 +++++
 .../gxf/multi_message_available_timeout.cpp   | 142 ++++
 src/core/executors/gxf/gxf_executor.cpp       | 310 ++++++--
 src/core/flow_tracking_annotation.cpp         |  29 +-
 src/core/fragment.cpp                         |  48 +-
 src/core/gxf/entity_group.cpp                 |  53 ++
 src/core/gxf/gxf_component.cpp                |  12 +
 src/core/gxf/gxf_condition.cpp                |   8 +
 src/core/gxf/gxf_network_context.cpp          |   8 +
 src/core/gxf/gxf_operator.cpp                 |  15 +
 src/core/gxf/gxf_resource.cpp                 |  26 +-
 src/core/gxf/gxf_scheduler.cpp                |   8 +
 src/core/gxf/gxf_utils.cpp                    |  55 +-
 src/core/resources/gxf/cpu_thread.cpp         |  44 ++
 src/core/resources/gxf/system_resources.cpp   | 110 +++
 .../schedulers/gxf/multithread_scheduler.cpp  |   7 +
 src/core/services/app_driver/server.cpp       |  28 +-
 src/core/services/app_worker/server.cpp       |  11 +-
 src/operators/holoviz/holoviz.cpp             | 201 ++++--
 src/operators/inference/inference.cpp         |   5 -
 src/utils/holoinfer_utils.cpp                 |  11 +-
 tests/CMakeLists.txt                          |  25 +-
 tests/core/condition_classes.cpp              | 157 ++++
 tests/core/entity_group.cpp                   | 130 ++++
 tests/core/fragment.cpp                       |  34 +
 tests/core/io_spec.cpp                        |   6 +
 tests/core/resource_classes.cpp               |  45 ++
 tests/core/scheduler_classes.cpp              |   1 +
 tests/flow_tracking/flow_tracking_cycle.cpp   | 373 +---------
 .../flow_tracking/limited_tracking_tests.cpp  |  70 ++
 tests/flow_tracking/sample_test_graphs.hpp    | 427 +++++++++++
 tests/holoinfer/inference/test_core.hpp       |   6 +-
 tests/holoinfer/inference/test_inference.cpp  | 381 +++++-----
 tests/holoinfer/inference/test_parameters.cpp |  10 +-
 tests/operators/operator_classes.cpp          |  91 ++-
 .../multi_receiver_operator_ping_app.cpp      |   8 +-
 272 files changed, 9685 insertions(+), 1882 deletions(-)
 create mode 100644 cmake/modules/cpack/README.md
 create mode 100644 docs/_templates/layout.html
 create mode 100644 examples/conditions/multi_message/CMakeLists.txt
 create mode 100644 examples/conditions/multi_message/README.md
 create mode 100644 examples/conditions/multi_message/cpp/CMakeLists.min.txt
 create mode 100644 examples/conditions/multi_message/cpp/CMakeLists.txt
 create mode 100644 examples/conditions/multi_message/cpp/common_ops.hpp
 create mode 100644 examples/conditions/multi_message/cpp/multi_message_per_receiver.cpp
 create mode 100644 examples/conditions/multi_message/cpp/multi_message_sum_of_all.cpp
 create mode 100644 examples/conditions/multi_message/cpp/single_message_timeout.cpp
 create mode 100644 examples/conditions/multi_message/python/CMakeLists.txt
 create mode 100644 examples/conditions/multi_message/python/multi_message_per_receiver.py
 create mode 100644 examples/conditions/multi_message/python/multi_message_sum_of_all.py
 create mode 100644 examples/conditions/multi_message/python/single_message_timeout.py
 create mode 100644 examples/resources/thread_pool/CMakeLists.txt
 create mode 100644 examples/resources/thread_pool/README.md
 create mode 100644 examples/resources/thread_pool/cpp/CMakeLists.min.txt
 create mode 100644 examples/resources/thread_pool/cpp/CMakeLists.txt
 create mode 100644 examples/resources/thread_pool/cpp/ping_simple_thread_pool.cpp
 create mode 100644 examples/resources/thread_pool/python/CMakeLists.min.txt
 create mode 100644 examples/resources/thread_pool/python/CMakeLists.txt
 create mode 100644 examples/resources/thread_pool/python/ping_simple_thread_pool.py
 create mode 100644 examples/wrap_operator_as_gxf_extension/gxf_registry/CMakeLists.txt
 create mode 100644 examples/wrap_operator_as_gxf_extension/gxf_registry/ping_installable.yaml.in
 create mode 100644 examples/wrap_operator_as_gxf_extension/gxf_registry/target.yaml.in
 create mode 100644 include/holoscan/core/conditions/gxf/multi_message_available.hpp
 create mode 100644 include/holoscan/core/conditions/gxf/multi_message_available_timeout.hpp
 create mode 100644 include/holoscan/core/gxf/entity_group.hpp
 create mode 100644 include/holoscan/core/resources/gxf/cpu_thread.hpp
 create mode 100644 include/holoscan/core/resources/gxf/system_resources.hpp
 create mode 100644 modules/holoinfer/src/utils/work_queue.cpp
 create mode 100644 modules/holoinfer/src/utils/work_queue.hpp
 create mode 100644 modules/holoviz/src/cuda/gen_primitive_vertices.cu
 create mode 100644 modules/holoviz/src/cuda/gen_primitive_vertices.hpp
 rename python/holoscan/{__init__.py => __init__.py.in} (98%)
 create mode 100644 python/holoscan/conditions/multi_message_available.cpp
 create mode 100644 python/holoscan/conditions/multi_message_available_pydoc.hpp
 create mode 100644 python/holoscan/conditions/multi_message_available_timeout.cpp
 create mode 100644 python/holoscan/conditions/multi_message_available_timeout_pydoc.hpp
 create mode 100644 python/holoscan/resources/system_resources.cpp
 create mode 100644 python/holoscan/resources/system_resources_pydoc.hpp
 create mode 100644 python/tests/system/test_holoviz_dual_window.py
 create mode 100644 scripts/.gitignore
 create mode 100755 scripts/ctest_time_comparison.py
 create mode 100755 scripts/generate_gxf_manifest.py
 create mode 100644 src/core/conditions/gxf/multi_message_available.cpp
 create mode 100644 src/core/conditions/gxf/multi_message_available_timeout.cpp
 create mode 100644 src/core/gxf/entity_group.cpp
 create mode 100644 src/core/resources/gxf/cpu_thread.cpp
 create mode 100644 src/core/resources/gxf/system_resources.cpp
 create mode 100644 tests/core/entity_group.cpp
 create mode 100644 tests/flow_tracking/limited_tracking_tests.cpp
 create mode 100644 tests/flow_tracking/sample_test_graphs.hpp

diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json
index 48359173..7038bef4 100644
--- a/.vscode/c_cpp_properties.json
+++ b/.vscode/c_cpp_properties.json
@@ -4,7 +4,8 @@
         {
             "name": "x86_64",
             "includePath": [
-                "${workspaceFolder}/${env:HOLOSCAN_PUBLIC_FOLDER}/include"
+                "${workspaceFolder}/${env:HOLOSCAN_PUBLIC_FOLDER}/include",
+                "/usr/local/cuda/include",
             ],
             "defines": [],
             "compilerPath": "/usr/bin/gcc",
@@ -21,7 +22,8 @@
         {
             "name": "arm64",
             "includePath": [
-                "${workspaceFolder}/${env:HOLOSCAN_PUBLIC_FOLDER}/include"
+                "${workspaceFolder}/${env:HOLOSCAN_PUBLIC_FOLDER}/include",
+                "/usr/local/cuda/include",
             ],
             "defines": [],
             "compilerPath": "/usr/bin/gcc",
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 91e12916..e7515183 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -124,7 +124,8 @@
         "syncstream": "cpp",
         "__functional_base_03": "cpp",
         "annotated_ptr": "cpp",
-        "stream_ref": "cpp"
+        "stream_ref": "cpp",
+        "expected": "cpp"
     },
     "git.alwaysSignOff": true,
     "git.untrackedChanges": "separate",
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ad320d71..b9d16f80 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -25,7 +25,6 @@ option(BUILD_SHARED_LIBS "Build Shared Libraries" ON)
 option(HOLOSCAN_BUILD_LIBTORCH "Build support for the LibTorch backend" ON)
 option(HOLOSCAN_BUILD_ORT "Build support for the ONNX Runtime backend" ON)
 option(HOLOSCAN_BUILD_AJA "Build support for AJA" ON)
-option(HOLOSCAN_BUILD_GXF_EXTENSIONS "Build GXF Extensions" ON)
 option(HOLOSCAN_BUILD_EXAMPLES "Build Holoscan SDK Examples" ON)
 option(HOLOSCAN_BUILD_PYTHON "Build Holoscan SDK Python Bindings" ON)
 option(HOLOSCAN_DOWNLOAD_DATASETS "Download SDK Datasets" ON)
@@ -33,6 +32,9 @@ option(HOLOSCAN_BUILD_TESTS "Build Holoscan SDK Tests" ON)
 option(HOLOSCAN_USE_CCACHE "Use ccache for building Holoscan SDK" OFF)
 option(HOLOSCAN_INSTALL_EXAMPLE_SOURCE "Install the example source code" ON)
 option(HOLOSCAN_ENABLE_CLANG_TIDY "Enable use of clang-tidy" OFF)
+option(HOLOSCAN_ENABLE_GOOGLE_SANITIZER "Enable use of google sanitizer" OFF)
+option(HOLOSCAN_BUILD_GXF_EXTENSIONS "Build GXF Extensions" ON)
+option(HOLOSCAN_REGISTER_GXF_EXTENSIONS "Register extensions with the Graph Composer registry" OFF)
 
 # ##############################################################################
 # # Prerequisite statements
@@ -65,6 +67,18 @@ project(${HOLOSCAN_PACKAGE_NAME}
 )
 include(SetupCUDA) # CUDA Language enabled there after additional setup
 
+# If enabling google sanitizer
+if(HOLOSCAN_ENABLE_GOOGLE_SANITIZER)
+  # Note: Before running the tests, the following command must be executed:
+  #   export ASAN_OPTIONS=symbolize=1:protect_shadow_gap=0
+  # This ensures that stack traces are symbolized and prevents false positives related to shadow gap
+  # protection.
+  # Without this, CUDA-related tests may fail (e.g., CUDA runtime API error "out of memory").
+  # (See https://github.com/NVIDIA/DALI/pull/362).
+  # You may still see some false positives when running the tests that uses the Vulkan-related APIs.
+  list(APPEND CMAKE_CXX_FLAGS "-g -O1 -Wno-stringop-truncation -fsanitize=address -fno-omit-frame-pointer")
+endif()
+
 # ##############################################################################
 # # Global properties (CMAKE_*)
 # ##############################################################################
@@ -302,6 +316,12 @@ foreach(_component ${HOLOSCAN_GXF_COMPONENTS})
     endif()
 endforeach()
 
+install(
+  DIRECTORY ${GXF_PYTHON_MODULE_PATH}
+  DESTINATION python/${HOLOSCAN_INSTALL_LIB_DIR}
+  COMPONENT "holoscan-gxf_libs"
+)
+
 # Install CMake script to build GXE applications
 install(FILES "${CMAKE_SOURCE_DIR}/cmake/modules/GenerateGXEAppInstall.cmake"
 RENAME GenerateGXEApp.cmake
diff --git a/DEVELOP.md b/DEVELOP.md
index cfaf1b63..d578c9a7 100644
--- a/DEVELOP.md
+++ b/DEVELOP.md
@@ -200,7 +200,7 @@ Visual Studio Code can be utilized to develop the Holoscan SDK. The `.devcontain
 
 The `./run` script contains `vscode` and `vscode_remote` commands for launching Visual Studio Code in a container or from a remote machine, respectively.
 
-- To launch Visual Studio Code in a dev container, use `./run vscode`.
+- To launch Visual Studio Code in a dev container, use `./run vscode` (`-j <# of workers>` or `--parallel <# of workers>` can be used to specify the number of parallel jobs to run during the build process). For more information, refer to the instructions from `./run vscode -h`.
 - To attach to an existing dev container from a remote machine, use `./run vscode_remote`. For more information, refer to the instructions from `./run vscode_remote -h`.
 
 Once Visual Studio Code is launched, the development container will be built and the recommended extensions will be installed automatically, along with CMake being configured.
diff --git a/Dockerfile b/Dockerfile
index a96dc1d1..421e6703 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -24,7 +24,7 @@ ARG ONNX_RUNTIME_VERSION=1.18.1_38712740_24.08-cuda-12.6
 ARG LIBTORCH_VERSION=2.5.0_24.08
 ARG TORCHVISION_VERSION=0.20.0_24.08
 ARG GRPC_VERSION=1.54.2
-ARG GXF_VERSION=447_20241004_bf72709
+ARG GXF_VERSION=447_20241029_bf72709
 ARG MOFED_VERSION=24.07-0.6.1.0
 
 ############################################################
@@ -182,7 +182,6 @@ FROM build-tools AS ucx-patcher
 # the necessary rpath for non-containerized applications. We patch RPATH
 # for portability when we later repackage these libraries for distribution
 # outside of the container.
-WORKDIR /opt/ucx/${UCX_VERSION}
 RUN patchelf --set-rpath '$ORIGIN' /opt/hpcx/ucx/lib/libuc*.so* \
     && patchelf --set-rpath '$ORIGIN:$ORIGIN/..' /opt/hpcx/ucx/lib/ucx/libuc*.so* \
     && patchelf --set-rpath '$ORIGIN/../lib' /opt/hpcx/ucx/bin/*
@@ -256,6 +255,7 @@ ARG GXF_VERSION
 ENV GXF=/opt/nvidia/gxf/${GXF_VERSION}
 COPY --from=gxf-downloader ${GXF} ${GXF}
 ENV CMAKE_PREFIX_PATH="${CMAKE_PREFIX_PATH}:${GXF}"
+ENV PYTHONPATH="${PYTHONPATH}:/opt/nvidia/gxf/${GXF_VERSION}/python"
 
 # Setup Docker & NVIDIA Container Toolkit's apt repositories to enable DooD
 # for packaging & running applications with the CLI
diff --git a/NOTICE.txt b/NOTICE.txt
index fc6ad65d..305df187 100644
--- a/NOTICE.txt
+++ b/NOTICE.txt
@@ -1,5 +1,5 @@
 Holoscan SDK
-Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 This product includes software developed at NVIDIA CORPORATION (https://www.nvidia.com/).
 
diff --git a/VERSION b/VERSION
index e70b4523..24ba9a38 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2.6.0
+2.7.0
diff --git a/cmake/deps/gxf.cmake b/cmake/deps/gxf.cmake
index de94264f..e3c242c5 100644
--- a/cmake/deps/gxf.cmake
+++ b/cmake/deps/gxf.cmake
@@ -111,8 +111,37 @@ foreach(component ${HOLOSCAN_GXF_COMPONENTS})
     endif()
 endforeach()
 
+if(NOT GXF_ROOT)
+    cmake_path(GET "${GXF_DIR}" PARENT_PATH PARENT_PATH PARENT_PATH GXF_ROOT)
+endif()
+find_path(GXF_PYTHON_MODULE_PATH
+    NAMES
+        core/__init__.py
+        core/Gxf.py
+    PATHS ${GXF_ROOT}/python/gxf
+    REQUIRED
+)
+
+# Test that the GXF Python module is in PYTHONPATH
+find_package(Python3 COMPONENTS Interpreter REQUIRED)
+execute_process(
+    COMMAND "${Python3_EXECUTABLE}" -c "import os; import gxf; print(os.pathsep.join(gxf.__path__).strip())"
+    RESULT_VARIABLE GXF_MODULE_FOUND
+    OUTPUT_VARIABLE GXF_MODULE_DIR
+    OUTPUT_STRIP_TRAILING_WHITESPACE
+)
+if(NOT GXF_MODULE_FOUND EQUAL 0)
+    message(FATAL_ERROR "GXF Python module not found in PYTHONPATH")
+endif()
+if(NOT GXF_MODULE_DIR STREQUAL "${GXF_PYTHON_MODULE_PATH}")
+    message(WARNING
+        "Expected GXF Python module at ${GXF_PYTHON_MODULE_PATH} but found at ${GXF_MODULE_DIR}."
+        " Do you need to update your PYTHONPATH?")
+endif()
+
 # Set variables in parent scope for use throughout the Holoscan project
 set(GXF_INCLUDE_DIR ${GXF_INCLUDE_DIR} PARENT_SCOPE)
+set(GXF_PYTHON_MODULE_PATH ${GXF_PYTHON_MODULE_PATH} PARENT_SCOPE)
 set(HOLOSCAN_GXF_LIB_DIR ${HOLOSCAN_GXF_LIB_DIR} PARENT_SCOPE)
 set(HOLOSCAN_GXF_BIN_DIR ${HOLOSCAN_GXF_BIN_DIR} PARENT_SCOPE)
 set(HOLOSCAN_GXE_LOCATION ${HOLOSCAN_GXE_LOCATION} PARENT_SCOPE)
diff --git a/cmake/modules/HoloscanCPack.cmake b/cmake/modules/HoloscanCPack.cmake
index ca695054..beadf6a5 100644
--- a/cmake/modules/HoloscanCPack.cmake
+++ b/cmake/modules/HoloscanCPack.cmake
@@ -13,9 +13,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Copy NOTICE file for installation
-install(FILES ${CMAKE_CURRENT_LIST_DIR}/cpack/NOTICE.txt
+# Copy NOTICE file for packaging
+install(FILES "${CMAKE_CURRENT_LIST_DIR}/cpack/NOTICE.txt"
     DESTINATION .
+    RENAME NOTICE
+    COMPONENT holoscan-cpack
+)
+
+# Copy LICENSE file for installation
+install(FILES "${CMAKE_BINARY_DIR}/LICENSE.txt"
+    DESTINATION "/usr/share/doc/holoscan/"
+    RENAME copyright
     COMPONENT holoscan-cpack
 )
 
diff --git a/cmake/modules/PrintProjectSettings.cmake b/cmake/modules/PrintProjectSettings.cmake
index b8008af2..41bf317b 100644
--- a/cmake/modules/PrintProjectSettings.cmake
+++ b/cmake/modules/PrintProjectSettings.cmake
@@ -46,11 +46,16 @@ message(STATUS "CMAKE_FIND_ROOT_PATH             : ${CMAKE_FIND_ROOT_PATH}")
 message(STATUS "CMAKE_FIND_ROOT_PATH_MODE_INCLUDE: ${CMAKE_FIND_ROOT_PATH_MODE_INCLUDE}")
 message(STATUS "")
 message(STATUS "BUILD_SHARED_LIBS                : ${BUILD_SHARED_LIBS}")
+message(STATUS "HOLOSCAN_BUILD_PYTHON            : ${HOLOSCAN_BUILD_PYTHON}")
 message(STATUS "HOLOSCAN_BUILD_AJA               : ${HOLOSCAN_BUILD_AJA}")
 message(STATUS "HOLOSCAN_BUILD_EXAMPLES          : ${HOLOSCAN_BUILD_EXAMPLES}")
 message(STATUS "HOLOSCAN_BUILD_TESTS             : ${HOLOSCAN_BUILD_TESTS}")
+message(STATUS "HOLOSCAN_ENABLE_CLANG_TIDY       : ${HOLOSCAN_ENABLE_CLANG_TIDY}")
+message(STATUS "HOLOSCAN_ENABLE_GOOGLE_SANITIZER : ${HOLOSCAN_ENABLE_GOOGLE_SANITIZER}")
 message(STATUS "HOLOSCAN_USE_CCACHE              : ${HOLOSCAN_USE_CCACHE}")
 message(STATUS "HOLOSCAN_USE_CCACHE_SKIPPED      : ${HOLOSCAN_USE_CCACHE_SKIPPED}")
 message(STATUS "HOLOSCAN_CACHE_DIR               : ${HOLOSCAN_CACHE_DIR}")
 message(STATUS "HOLOSCAN_TOP                     : ${HOLOSCAN_TOP}")
-message(STATUS "HOLOSCAN_INSTALL_LIB_DIR         : ${HOLOSCAN_INSTALL_LIB_DIR}")
\ No newline at end of file
+message(STATUS "HOLOSCAN_INSTALL_LIB_DIR         : ${HOLOSCAN_INSTALL_LIB_DIR}")
+message(STATUS "HOLOSCAN_BUILD_GXF_EXTENSIONS    : ${HOLOSCAN_BUILD_GXF_EXTENSIONS}")
+message(STATUS "HOLOSCAN_REGISTER_GXF_EXTENSIONS : ${HOLOSCAN_REGISTER_GXF_EXTENSIONS}")
\ No newline at end of file
diff --git a/cmake/modules/WrapOperatorAsGXFExtension.cmake b/cmake/modules/WrapOperatorAsGXFExtension.cmake
index 588974f1..ae44d301 100644
--- a/cmake/modules/WrapOperatorAsGXFExtension.cmake
+++ b/cmake/modules/WrapOperatorAsGXFExtension.cmake
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -13,6 +13,172 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# Generates a Graph Composer registry manifest file to accompany a GXF extension.
+#
+# The GXF / Graph Composer registry allows GXF-based applications such as Graph Composer to access
+# a local or remote cache of GXF extensions. Each extension in the local registry must provide
+# a metadata YAML file for inspection.
+#
+# See also:
+# - scripts/generate_gxf_manifest.py
+# - https://docs.nvidia.com/metropolis/deepstream/dev-guide/graphtools-docs/docs/text/GraphComposer_Registry.html
+#
+# Inputs are:
+#  MANIFEST_NAME: name of the manifest file to generate
+#  EXTENSION_NAME: name of the extension library
+#  EXTENSION_TARGET: name of the CMake target for the extension library
+#  BINARY_FILES: list of binary files to include in the manifest. Defaults to the library target output.
+#  FORWARD_ARGS : Arguments to pass directly to the generation script
+#
+# Outputs:
+#  ${MANIFEST_NAME}: the generated manifest YAML file in the current CMake binary directory.
+#                    Default: "{extension_name}_manifest.yaml"
+#
+# Limitations: The `generate_gxf_manifest.py` utility does not support cross-compilation.
+#              If cross-compilation is detected, this function will skip and do nothing.
+#
+function(generate_gxf_registry_manifest)
+  # Skip if HOLOSCAN_ENABLE_GOOGLE_SANITIZER is enabled
+  if(HOLOSCAN_ENABLE_GOOGLE_SANITIZER)
+    return()
+  endif()
+  if(CMAKE_CROSSCOMPILING)
+    message(STATUS "Skipping GXF registry manifest generation due to cross-compilation.")
+    return()
+  endif()
+
+  set(SINGLE_VALUE_VARS
+    MANIFEST_NAME
+    EXTENSION_NAME
+    EXTENSION_TARGET
+  )
+  set(MULTI_VALUE_VARS
+    BINARY_FILES
+    FORWARD_ARGS
+  )
+  cmake_parse_arguments(ARG "" "${SINGLE_VALUE_VARS}" "${MULTI_VALUE_VARS}" ${ARGN})
+
+  if(NOT ARG_MANIFEST_NAME)
+    set(ARG_MANIFEST_NAME "${ARG_EXTENSION_NAME}_manifest.yaml")
+  endif()
+  if(NOT ARG_BINARY_FILES)
+    set(ARG_BINARY_FILES "$<TARGET_FILE:${ARG_EXTENSION_TARGET}>")
+  endif()
+
+  if(GXF_EXTENSIONS_DIR)
+    set(CORE_EXT_SEARCH_PATH "${GXF_EXTENSIONS_DIR}")
+  elseif(GXF_DIR)
+    get_filename_component(CORE_EXT_SEARCH_PATH "${GXF_DIR}/../../../" ABSOLUTE)
+  endif()
+
+  find_package(Python3 COMPONENTS Interpreter REQUIRED)
+  find_program(GENERATE_MANIFEST_FILE_PY
+    generate_gxf_manifest.py
+    HINTS
+      "${CMAKE_SOURCE_DIR}/scripts"
+      "${CMAKE_CURRENT_FUNCTION_LIST_DIR}"
+      "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/../../../bin"
+    REQUIRED
+  )
+  add_custom_command(
+    OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${ARG_MANIFEST_NAME}"
+    DEPENDS
+      ${ARG_EXTENSION_TARGET}
+      ${ARG_BINARY_FILES}
+      ${GENERATE_MANIFEST_FILE_PY}
+    COMMAND ${Python3_EXECUTABLE} "${GENERATE_MANIFEST_FILE_PY}"
+      --output "${CMAKE_CURRENT_BINARY_DIR}/${ARG_MANIFEST_NAME}"
+      --name ${ARG_EXTENSION_NAME}
+      --extension-library $<TARGET_FILE:${ARG_EXTENSION_TARGET}>
+      --arch ${CMAKE_HOST_SYSTEM_PROCESSOR}
+      --binaries ${ARG_BINARY_FILES}
+      --search-path "${CORE_EXT_SEARCH_PATH}"
+      --search-path "${CMAKE_BINARY_DIR}/lib/gxf_extensions"
+      --db "${CMAKE_BINARY_DIR}/gxf_extension_cache.pickle"
+      --quiet
+      ${ARG_FORWARD_ARGS}
+  )
+  add_custom_target(
+    generate_${ARG_EXTENSION_NAME}_gxf_registry_manifest ALL
+    DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/${ARG_MANIFEST_NAME}"
+    COMMENT "Generating GXF registry manifest for ${ARG_EXTENSION_NAME}"
+  )
+endfunction()
+
+# Invoke the GXF registry CLI to register a GXF extension manifest to the local cache.
+#
+# Requirements:
+#  - The `registry` executable is available in the system PATH.
+#  - Extension dependencies are registered to the local registry cache in advance.
+#
+# Inputs:
+#  EXTENSION_NAME: name of the extension to register
+#  MANIFEST: path to the manifest file to register
+#  DEPENDS: list of CMake targets or generated files on which the registration depends.
+#           Defaults to input manifest file.
+#
+# Outputs:
+#  - `register_${EXTENSION_NAME}` CMake target that registers the extension manifest and can be used
+#    as a dependency for subsequent extension registration operations.
+#  - register_${EXTENSION_NAME}.stamp: a stamp file indicating the registration target ran.
+#
+function(register_gxf_extension)
+  # Skip if HOLOSCAN_ENABLE_GOOGLE_SANITIZER is enabled
+  if(HOLOSCAN_ENABLE_GOOGLE_SANITIZER)
+    return()
+  endif()
+
+  cmake_parse_arguments(ARG "" "EXTENSION_NAME;MANIFEST" "DEPENDS" ${ARGN})
+
+  find_program(GXF_REGISTRY_EXECUTABLE registry
+    HINTS /usr/bin
+    REQUIRED
+  )
+  if(NOT GXF_REGISTRY_EXECUTABLE)
+    message(FATAL_ERROR "Could not find GXF registry executable")
+  endif()
+
+  # Mark placeholder file as "dirty" so that the registration target always runs after each reconfigure
+  file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/register_${ARG_EXTENSION_NAME}_configure.stamp" "")
+  add_custom_command(
+    OUTPUT "register_${ARG_EXTENSION_NAME}.stamp"
+    COMMAND ${GXF_REGISTRY_EXECUTABLE} extn add -m "${ARG_MANIFEST}"
+    COMMAND ${CMAKE_COMMAND} -E touch "register_${ARG_EXTENSION_NAME}.stamp"
+    DEPENDS
+      "${ARG_MANIFEST}"
+      ${ARG_DEPENDS}
+      "${CMAKE_CURRENT_BINARY_DIR}/register_${ARG_EXTENSION_NAME}_configure.stamp"
+    COMMENT "Registering ${ARG_EXTENSION_NAME} to the local GXF registry cache"
+  )
+  add_custom_target(
+    register_${ARG_EXTENSION_NAME} ALL
+    DEPENDS "register_${ARG_EXTENSION_NAME}.stamp"
+    COMMENT "Registering GXF registry manifest for ${ARG_EXTENSION_NAME}"
+  )
+endfunction()
+
+# Convert UUID hash pair to IETF UUID format
+#
+# https://tools.ietf.org/html/rfc4122
+#
+# Inputs:
+#   HASH1: first 64-bit hash in format '0x1234567890abcdef'
+#   HASH2: second 64-bit hash in format '0x1234567890abcdef'
+# Output:
+#   String in IETF UUID format '12345678-90ab-cdef-1234-567890abcdef'
+function(convert_uuid_hashes_to_ietf HASH1 HASH2 OUTPUT)
+  string(REGEX REPLACE "^0x" "" HASH1 "${HASH1}")
+  string(REGEX REPLACE "^0x" "" HASH2 "${HASH2}")
+
+  string(SUBSTRING "${HASH1}" 0 8 part1)
+  string(SUBSTRING "${HASH1}" 8 4 part2)
+  string(SUBSTRING "${HASH1}" 12 4 part3)
+  string(SUBSTRING "${HASH2}" 0 4 part4)
+  string(SUBSTRING "${HASH2}" 4 12 part5)
+
+  set(${OUTPUT} "${part1}-${part2}-${part3}-${part4}-${part5}" PARENT_SCOPE)
+endfunction()
+
 # Generates a GXF extension that includes a GXF codelet which wraps the input C++ operator
 #
 # Inputs are:
@@ -36,14 +202,19 @@
 #   EXTENSION_TARGET_NAME: name of the cmake target to generate for the gxf extension library
 #     Note: optional, defaults to EXTENSION_NAME lowercase
 #   EXTENSION_TARGET_PROPERTIES: any valid cmake properties that should affect the target above.
+#   EXTENSION_DEPENDS: list of GXF extensions on which this extension depends. Accepts generator expressions.
+#   MANIFEST_ARGS: list of additional arguments to pass to the manifest generation script
+#   REGISTER: whether to automatically register the extension with the local GXF registry cache.
+#   REGISTER_DEPENDS: list of CMake targets on which GXF extension registration depends.
 #
 # Outputs are:
 #   lib<CODELET_TARGET_NAME>.so
 #   lib<EXTENSION_TARGET_NAME>.so
+#   <EXTENSION_TARGET_NAME>_manifest.yaml
 
 function(wrap_operator_as_gxf_extension)
   # Define arguments
-  list(APPEND OPTION_VARS "")
+  list(APPEND OPTION_VARS REGISTER)
   list(APPEND REQUIRED_SINGLE_VALUE_VARS
     EXTENSION_ID_HASH1
     EXTENSION_ID_HASH2
@@ -68,6 +239,9 @@ function(wrap_operator_as_gxf_extension)
   list(APPEND MULTI_VALUE_VARS
     EXTENSION_TARGET_PROPERTIES
     CODELET_TARGET_PROPERTIES
+    EXTENSION_DEPENDS
+    MANIFEST_ARGS
+    REGISTER_DEPENDS
   )
 
   # Parse arguments
@@ -170,6 +344,38 @@ function(wrap_operator_as_gxf_extension)
     )
   endif()
 
+  convert_uuid_hashes_to_ietf(${EXTENSION_ID_HASH1} ${EXTENSION_ID_HASH2} CODELET_UUID)
+  find_package(CUDAToolkit REQUIRED)
+  set(MANIFEST_FORWARD_ARGS
+    --uuid "${CODELET_UUID}"
+    --version "${EXTENSION_VERSION}"
+    --cuda "${CUDAToolkit_VERSION}"
+    ${ARG_MANIFEST_ARGS}
+  )
+  if(ARG_EXTENSION_DEPENDS)
+    list(APPEND MANIFEST_FORWARD_ARGS
+      "--extension-dependencies"
+      ${ARG_EXTENSION_DEPENDS}
+    )
+  endif()
+
+  generate_gxf_registry_manifest(
+    EXTENSION_NAME ${EXTENSION_NAME}
+    EXTENSION_TARGET ${EXTENSION_TARGET_NAME}
+    MANIFEST_NAME ${EXTENSION_NAME}_manifest.yaml
+    BINARY_FILES
+      $<TARGET_FILE:${EXTENSION_TARGET_NAME}>
+      $<TARGET_FILE:${CODELET_TARGET_NAME}>
+    FORWARD_ARGS ${MANIFEST_FORWARD_ARGS}
+  )
+  if(ARG_REGISTER AND NOT CMAKE_CROSSCOMPILING)
+    register_gxf_extension(
+      EXTENSION_NAME ${EXTENSION_NAME}
+      MANIFEST "${EXTENSION_NAME}_manifest.yaml"
+      DEPENDS ${ARG_REGISTER_DEPENDS}
+    )
+  endif()
+
   # Add link directories property to find the gxf wrapping library from the install tree
   get_target_property(EXTENSION_LINK_DIRECTORIES ${EXTENSION_TARGET_NAME} LINK_DIRECTORIES)
   # Sets the EXTENSION_LINK_DIRECTORIES to an empty string
diff --git a/cmake/modules/cpack/NOTICE.txt b/cmake/modules/cpack/NOTICE.txt
index 9b8e3906..6d9dca3d 100644
--- a/cmake/modules/cpack/NOTICE.txt
+++ b/cmake/modules/cpack/NOTICE.txt
@@ -1,5 +1,5 @@
 Holoscan SDK
-Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 This product includes software developed at NVIDIA CORPORATION (https://www.nvidia.com/).
 
diff --git a/cmake/modules/cpack/README.md b/cmake/modules/cpack/README.md
new file mode 100644
index 00000000..07e34dd5
--- /dev/null
+++ b/cmake/modules/cpack/README.md
@@ -0,0 +1,35 @@
+# Holoscan SDK
+
+The **Holoscan SDK** is part of [NVIDIA Holoscan](https://developer.nvidia.com/holoscan-sdk), the AI sensor processing platform that combines hardware systems for low-latency sensor and network connectivity, optimized libraries for data processing and AI, and core microservices to run streaming, imaging, and other applications, from embedded to edge to cloud. It can be used to build streaming AI pipelines for a variety of domains, including Medical Devices, High Performance Computing at the Edge, Industrial Inspection and more.
+
+## Table of Contents
+
+- [Getting Started](#getting-started)
+- [Obtaining the Holoscan SDK](#obtaining-the-holoscan-sdk)
+- [Troubleshooting and Feedback](#troubleshooting-and-feedback)
+- [Additional Notes](#additional-notes)
+
+## Getting Started
+
+Visit the Holoscan User Guide to get started with the Holoscan SDK: <https://docs.nvidia.com/holoscan/sdk-user-guide/getting_started.html>
+
+The Holoscan User Guide includes:
+- An introduction to the NVIDIA Holoscan platform, including the Holoscan C++/Python SDK;
+- Requirements and setup steps;
+- Detailed SDK documentation, including a developer introduction, examples, and API details.
+
+We also recommend visiting [NVIDIA HoloHub](https://github.com/nvidia-holoscan/holohub) to view
+community projects and reusable components available for your Holoscan project.
+
+## Troubleshooting and Feedback
+
+We appreciate community discussion and feedback in support of Holoscan platform users and developers. We ask that users:
+- Review the [Holoscan SDK Frequently Asked Questions](FAQ.md) document for common solutions and workarounds.
+- Direct questions to the [NVIDIA Support Forum](https://forums.developer.nvidia.com/c/healthcare/holoscan-sdk/320/all).
+- Enter SDK issues on the [SDK GitHub Issues board](https://github.com/nvidia-holoscan/holoscan-sdk/issues).
+
+## Contributing to Holoscan SDK
+
+Holoscan SDK is developed internally and released as open source software. We welcome community contributions
+and may include them in Holoscan SDK releases at our discretion. Please refer to the Holoscan SDK
+[Contributing Guide](/CONTRIBUTING.md) for more information.
diff --git a/docs/_templates/layout.html b/docs/_templates/layout.html
new file mode 100644
index 00000000..8317b604
--- /dev/null
+++ b/docs/_templates/layout.html
@@ -0,0 +1,8 @@
+{% extends "!layout.html" %}
+
+{% block extrahead %}
+<script src="//assets.adobedtm.com/5d4962a43b79/c1061d2c5e7b/launch-191c2462b890.min.js"></script>
+{% endblock %}
+{% block footer %}
+<script type="text/javascript">_satellite.pageBottom();</script>
+{% endblock %}
\ No newline at end of file
diff --git a/docs/api/holoscan_cpp_api.md b/docs/api/holoscan_cpp_api.md
index b5c4ba92..5490cf81 100644
--- a/docs/api/holoscan_cpp_api.md
+++ b/docs/api/holoscan_cpp_api.md
@@ -97,6 +97,7 @@
 - {ref}`exhale_class_classholoscan_1_1ParameterWrapper`
 - {ref}`exhale_class_classholoscan_1_1Resource`
 - {ref}`exhale_class_classholoscan_1_1Scheduler`
+- {ref}`exhale_class_classholoscan_1_1ThreadPool`
 
 ### Operators
 
@@ -164,6 +165,7 @@
 - {ref}`exhale_class_classholoscan_1_1StdComponentSerializer`
 - {ref}`exhale_class_classholoscan_1_1StdEntitySerializer`
 - {ref}`exhale_class_classholoscan_1_1StreamOrderedAllocator`
+- {ref}`exhale_class_classholoscan_1_1ThreadPool`
 - {ref}`exhale_class_classholoscan_1_1Transmitter`
 - {ref}`exhale_class_classholoscan_1_1UcxComponentSerializer`
 - {ref}`exhale_class_classholoscan_1_1UcxEntitySerializer`
@@ -294,7 +296,6 @@
 
 ### Inference Module Typedefs
 
-- {ref}`exhale_typedef_data__processor_8hpp_1a52a96a33d28d5268514788dc66953631`
 - {ref}`exhale_typedef_data__processor_8hpp_1aebc6df65b363c69857e1a735ea8108ce`
 - {ref}`exhale_typedef_holoinfer__buffer_8hpp_1a087e5c16b34b9ed56caef479b684c421`
 - {ref}`exhale_typedef_holoinfer__buffer_8hpp_1a33b28575b822fc2e74dd30eab1ae22bf`
@@ -320,6 +321,7 @@
 - {ref}`exhale_variable_dataflow__tracker_8hpp_1af7cd18b9eb2b9d9b76d328b59900f566`
 - {ref}`exhale_variable_expected_8hpp_1ae6efc4444700a9a08911d884857bb06c`
 - {ref}`exhale_variable_gpu__resource__monitor_8hpp_1ae9c4ec64e9b50146f256c3e70eccb823`
+- {ref}`exhale_variable_io__context_8hpp_1a7d68812a7241b94af93ec46784458585`
 - {ref}`exhale_variable_serialization__buffer_8hpp_1aa7a8ceba3b1b28fd04e0139b78701b36`
 - {ref}`exhale_variable_type__traits_8hpp_1a2b61ac0c36bd39ca398dde9664e65e33`
 - {ref}`exhale_variable_type__traits_8hpp_1a3891b0c8d38e9c0a11b23dc8edd31ceb`
@@ -337,7 +339,6 @@
 - {ref}`exhale_variable_infer__manager_8hpp_1a25cde569b0d251fbd30765ec68766a0b`
 - {ref}`exhale_variable_infer__manager_8hpp_1a52921e7945bc7ee74cb281271e8fbeb4`
 - {ref}`exhale_variable_modules_2holoinfer_2src_2include_2holoinfer__utils_8hpp_1aed7f62ec8a46ab6cbe3334ac26c719c6`
-- {ref}`exhale_variable_process__manager_8hpp_1ab99c5b36d1bbf94cb7b4d231de096fdb`
 - {ref}`exhale_variable_utils_8hpp_1aba4496e4cd0c7966ca1730727c109373`
 
 ```{toctree}
diff --git a/docs/cli/package.md b/docs/cli/package.md
index dde5f01b..70725a7d 100755
--- a/docs/cli/package.md
+++ b/docs/cli/package.md
@@ -6,7 +6,7 @@
 
 ## Synopsis
 
-`holoscan package` [](#cli-help) [](#cli-log-level) [](#cli-package-config) [](#cli-package-docs) [](#cli-package-models) [](#cli-package-platform) [](#cli-package-platform-config) [](#cli-package-timeout) [](#cli-package-version) [](#cli-package-base-image) [](#cli-package-build-image) [](#cli-package-includes) [](#cli-package-build-cache) [](#cli-package-cmake-args) [](#cli-package-no-cache) [](#cli-package-sdk) [](#cli-package-source) [](#cli-package-sdk-version) [](#cli-package-holoscan-sdk-file) [](#cli-package-monai-deploy-sdk-file) [](#cli-package-output) [](#cli-package-tag) [](#cli-package-username) [](#cli-package-uid) [](#cli-package-gid) [](#cli-package-application) [](#cli-package-source)
+`holoscan package` [](#cli-help) [](#cli-log-level) [](#cli-package-config) [](#cli-package-docs) [](#cli-package-add) [](#cli-package-models) [](#cli-package-platform) [](#cli-package-platform-config) [](#cli-package-timeout) [](#cli-package-version) [](#cli-package-base-image) [](#cli-package-build-image) [](#cli-package-includes) [](#cli-package-build-cache) [](#cli-package-cmake-args) [](#cli-package-no-cache) [](#cli-package-sdk) [](#cli-package-source) [](#cli-package-sdk-version) [](#cli-package-holoscan-sdk-file) [](#cli-package-monai-deploy-sdk-file) [](#cli-package-output) [](#cli-package-tag) [](#cli-package-username) [](#cli-package-uid) [](#cli-package-gid) [](#cli-package-application) [](#cli-package-source)
 
 ## Examples
 
@@ -82,6 +82,48 @@ Path to the application's [configuration file](./run_config.md). The configurati
 
 An optional directory path of documentation, README, licenses that shall be included in the package.
 
+(#cli-package-add)=
+
+### `[--add DIR_PATH]`
+
+`--add` enables additional files to be added to the application package. Use this option to include additional Python modules, files, or static objects (.so) on which the application depends.
+
+- `DIR_PATH` must be a directory path. The packager recursively copies all the files and directories inside `DIR_PATH` to `/opt/holoscan/app/lib`.
+- `--add` may be specified multiple times.
+
+For example:
+```bash
+holoscan package --add /path/to/python/module-1 --add /path/to/static-objects
+```
+
+With the example above, assuming the directories contain the following:
+
+```bash
+/path/to/
+├── python
+│   ├── module-1
+│   │   ├── __init__.py
+│   │   └── main.py
+└── static-objects
+    ├── my-lib.so
+    └── my-other-lib.so
+```
+
+The resulting package will contain the following:
+
+```bash
+/opt/holoscan/
+├── app
+│   └── my-app
+└──lib/
+    ├── module-1
+    │   ├── __init__.py
+    │   └── main.py
+    ├── my-lib.so
+    └── my-other-lib.so
+
+```
+
 (#cli-package-models)=
 
 ### `[--models|-m MODELS]`
diff --git a/docs/components/conditions.md b/docs/components/conditions.md
index 81a6a080..d8feae00 100644
--- a/docs/components/conditions.md
+++ b/docs/components/conditions.md
@@ -21,13 +21,15 @@ By default, operators are always `READY`, meaning they are scheduled to continuo
 
 - MessageAvailableCondition
 - ExpiringMessageAvailableCondition
+- MultiMessageAvailableCondition
+- MultiMessageAvailableTimeoutCondition
 - DownstreamMessageAffordableCondition
 - CountCondition
 - BooleanCondition
 - PeriodicCondition
 - AsynchronousCondition
 
-These conditions fall under various types as detailed below. Often, conditions are explicitly added to an operator by the application author, but it should also be noted that unless the default is overridden a `MessageAvailableCondition` is automatically added for each of an operator's input ports and a `DownstreamMessageAffordableCondition` is automatically added for each of it's output ports. 
+These conditions fall under various types as detailed below. Often, conditions are explicitly added to an operator by the application author, but it should also be noted that unless the default is overridden, a `MessageAvailableCondition` is automatically added for each of an operator's input ports and a `DownstreamMessageAffordableCondition` is automatically added for each of it's output ports.
 
 :::{note}
 Detailed APIs can be found here: {ref}`C++ <api/holoscan_cpp_api:conditions>`/{py:mod}`Python <holoscan.conditions>`.
@@ -43,17 +45,21 @@ conversely, the operator is unscheduled from execution whenever any one of the s
 
 The following table gives a rough categorization of the available condition types to help better understand their purpose and how they are assigned. More detailed descriptions of the individual conditions are given in the following sections.
 
-|           Condition Name             |  Classification  |   Associated With   |
-|--------------------------------------|------------------|---------------------|
-| MessageAvailableCondition            |  message-driven  | single input port   |
-| ExpiringMessageAvailableCondition    |  message-driven  | single input port   |
-| DownstreamMessageAffordableCondition |  message-driven  | single output port  |
-| PeriodicCondition                    |   clock-driven   | operator as a whole |
-| CountCondition                       |      other       | operator as a whole |
-| BooleanCondition                     | execution-driven | operator as a whole |
-| AsynchronousCondition                | execution-driven | operator as a whole |
+|           Condition Name               |  Classification  |   Associated With              |
+|----------------------------------------|------------------|--------------------------------|
+| MessageAvailableCondition              |  message-driven  | single input port              |
+| ExpiringMessageAvailableCondition      |  message-driven  | single input port              |
+| MultiMessageAffordableCondition        |  message-driven  | multiple input ports           |
+| MultiMessageAffordableTimeoutCondition |  message-driven  | single or multiple input ports |
+| DownstreamMessageAffordableCondition   |  message-driven  | single output port             |
+| PeriodicCondition                      |   clock-driven   | operator as a whole            |
+| CountCondition                         |      other       | operator as a whole            |
+| BooleanCondition                       | execution-driven | operator as a whole            |
+| AsynchronousCondition                  | execution-driven | operator as a whole            |
 
-Here, the various message-driven conditions are associated with an input port (receiver) or output port (transmitter). Message-driven conditions are typically assigned via the `IOSpec::condition` method ({cpp:func}`C++ <holoscan::IOSPec::condition>`/{py:func}`Python <holoscan.core.IOSpec.condition>`) method as called from an operator's `setup` ({cpp:func}`C++ <holoscan::Operator::setup>`/{py:func}`Python <holoscan.core.Operator.setup>`) method. All other condition types are typically passed as either a positional or keyword argument during operator construction in the application's `compose` method (i.e. passed to {cpp:func}`~holoscan::Fragment::make_operator` in C++ or the operator class's constructor in Python). Once these conditions are assigned, they automatically enforce the associated criteria for that transmitter/receiver as part of the conditions controlling whether the operator will call `compute`. Due to the AND combination of conditions discussed above, all ports must meet their associated conditions in order for an operator to call `compute`.
+Here, the various message-driven conditions are associated with an input port (receiver) or output port (transmitter). Message-driven conditions that are associated with a single input port are assigned via the `IOSpec::condition` method ({cpp:func}`C++ <holoscan::IOSPec::condition>`/{py:func}`Python <holoscan.core.IOSpec.condition>`) method as called from an operator's `setup` ({cpp:func}`C++ <holoscan::Operator::setup>`/{py:func}`Python <holoscan.core.Operator.setup>`) method. Those associated with multiple input ports would instead be assigned via the `OperatorSpec::multi_port_condition` method ({cpp:func}`C++ <holoscan::OperatorSpec::multi_port_condition>`/{py:func}`Python <holoscan.core.OperatorSpec.multi_port_condition>`) method as called from an operator's `setup` ({cpp:func}`C++ <holoscan::Operator::setup>`/{py:func}`Python <holoscan.core.Operator.setup>`) method.
+
+All other condition types are typically passed as either a positional or keyword argument during operator construction in the application's `compose` method (i.e. passed to {cpp:func}`~holoscan::Fragment::make_operator` in C++ or the operator class's constructor in Python). Once these conditions are assigned, they automatically enforce the associated criteria for that transmitter/receiver as part of the conditions controlling whether the operator will call `compute`. Due to the AND combination of conditions discussed above, all ports must meet their associated conditions in order for an operator to call `compute`.
 
 The `PeriodicCondition` is clock-driven. It automatically takes effect based on timing from it's associated clock. The `CountCondition` is another condition type that automatically takes effect, stopping execution of an operator after a specified count is reached.
 
@@ -72,7 +78,9 @@ An operator associated with `ExpiringMessageAvailableCondition` ({cpp:class}`C++
 This condition is associated with a specific input or output port of an operator through the `condition()` method on the return value (IOSpec) of the OperatorSpec's `input()` or `output()` method.
 
 The parameters ``max_batch_size`` and ``max_delay_ns`` dictate the maximum number of messages to be batched together and the maximum delay from first message to wait before executing the entity respectively.
-Please note that `ExpiringMessageAvailableCondition` requires that the input messages sent to any port using this condition must contain a timestamp. This means that the upstream operator has to emit using a timestamp .
+Please note that `ExpiringMessageAvailableCondition` requires that the input messages sent to any port using this condition must contain a timestamp. This means that the upstream operator has to emit using a timestamp.
+
+To obtain a similar capability without the need for a timestamp, the `MultiMessageAvailableTimeoutCondition` described below can be used with only a single input port assigned. The difference in the timing computation is that `MultiMessageAvailableTimeOutCondition` measures time between the last time `compute` was called on the operator while `ExpiringMessageAvailableCondition` is instead based on the elapsed time since a message arrived in the operator's input queue.
 
 ## DownstreamMessageAffordableCondition
 
@@ -80,6 +88,26 @@ The `DownstreamMessageAffordableCondition` ({cpp:class}`C++ <holoscan::gxf::Down
 This condition is associated with a specific output port of an operator through the `condition()` method on the return value (IOSpec) of the OperatorSpec's `output()` method.
 The minimum number of messages that permits the execution of the operator is specified by `min_size` parameter (default: `1`).
 
+## MultiMessageAvailableCondition
+
+An operator associated with `MultiMessageAvailableCondition` ({cpp:class}`C++ <holoscan::gxf::MessageAvailableCondition>`/{py:class}`Python <holoscan.conditions.MessageAvailableCondition>`) is executed when the associated queues of multiple user-specified input ports have the required number of elements.
+
+This condition is associated with multiple input ports of an operator through the `multi_port_condition()` method on OperatorSpec. The `port_names` argument to `multi_port_condition` controls which input ports are associated with this condition.
+
+This condition has two operating modes. The first mode is `MultiMessageAvailableCondition::SamplingMode::SumOfAll` (C++) or `holoscan.conditions.MultiMessageAvailableCondition.SamplingMode.SUM_OF_ALL` (Python). In this mode, the `min_sum` parameter is used to specify the total number of messages that must be received across all the ports included in `port_names` for the operator to execute. The second available mode is `MultiMessageAvailableCondition::SamplingMode::PerReceiver` (C++) or `holoscan.conditions.MultiMessageAvailableCondition.SamplingMode.PER_RECEIVER` (Python). This mode instead takes a vector/list of `min_sizes` equal in length to the `port_names`. This controls the number of messages that must arrive at each individual port in order for the operator to execute. This latter, "per-receiver" mode is equivalent to setting a `MessageAvailableCondition` in each input port individually.
+
+For more details see the [C++ example](https://github.com/nvidia-holoscan/holoscan-sdk/blob/main/examples/conditions/multi_message/cpp/multi_message_per_receiver.cpp) or [Python example](https://github.com/nvidia-holoscan/holoscan-sdk/blob/main/examples/conditions/multi_message/python/multi_message_per_receiver.py).
+
+## MultiMessageAvailableTimeoutCondition
+
+This operator is the same as `MultiMessageAvailableCondition` described above, but has one additional parameter "execution_frequency" that can be used to specify a timeout interval after which the operator will be allowed to execute even if the condition on the number of messages received has not yet been met.
+
+For more details see the [C++ example](https://github.com/nvidia-holoscan/holoscan-sdk/blob/main/examples/conditions/multi_message/cpp/multi_message_sum_of_all.cpp) or [Python example](https://github.com/nvidia-holoscan/holoscan-sdk/blob/main/examples/conditions/multi_message/python/multi_message_sum_of_all.py).
+
+:::{note}
+This condition can also be assigned via `IOSpec::condition` instead of `OperatorSpec::multi_port_condition` to support the use case where there is only one port to consider. This provides a way for a single input port to support a message available condition that has a timeout interval.
+:::
+
 ## CountCondition
 
 An operator associated with `CountCondition` ({cpp:class}`C++ <holoscan::gxf::CountCondition>`/{py:class}`Python <holoscan.conditions.CountCondition>`) is executed for a specific number of times specified using its `count` parameter.
diff --git a/docs/components/resources.md b/docs/components/resources.md
index 5cc3efea..5661e5bf 100644
--- a/docs/components/resources.md
+++ b/docs/components/resources.md
@@ -97,3 +97,13 @@ This is the receiver class used by input ports of operators within a fragment.
 ### UcxReceiver
 
 This is the receiver class used by input ports of operators that connect fragments in a distributed applications. It takes care of receiving UCX active messages and deserializing their contents.
+
+## System Resources
+
+The components in this "system resources" section are related to system resources such as CPU Threads that can be used by operators. 
+
+### ThreadPool
+
+This resource represents a thread pool that can be used to pin operators to run using specific CPU threads. This functionality is not supported by the `GreedyScheduler` because it is single-threaded, but it is supported by both the `EventBasedScheduler` and `MultiThreadScheduler`. Unlike other resource types, a ThreadPool should **not** be created via `make_resource` ({cpp:func}`C++ <holoscan::Fragment::make_resource>`/{py::func}`Python <holoscan.core.Fragment.make_resource>`), but should instead use the dedicated `make_thread_pool` ({cpp:func}`C++ <holoscan::Fragment::make_resource>`/{py::func}`Python <holoscan.core.Fragment.make_resource>`) method. This dedicated method is necessary as the thread pool requires some additional initialization logic that is not required by the other resource types. See the section on {ref}`configuring thread pools <configuring-app-thread-pools>` in the user guide for usage.
+
+- The parameter `initial_size` indicates the number of threads to initialize the thread pool with.
diff --git a/docs/components/schedulers.md b/docs/components/schedulers.md
index 8914a01d..51e3a158 100644
--- a/docs/components/schedulers.md
+++ b/docs/components/schedulers.md
@@ -31,6 +31,7 @@ The multithread scheduler has several parameters that the user can configure. Th
 
 - The number of worker threads used by the scheduler can be set via `worker_thread_number`, which defaults to `1`. This should be set based on a consideration of both the workflow and the available hardware. For example, the topology of the computation graph will determine how many operators it may be possible to run in parallel. Some operators may potentially launch multiple threads internally, so some amount of performance profiling may be required to determine optimal parameters for a given workflow.
 - The value of `check_recession_period_ms` controls how long the scheduler will sleep before checking a given condition again. In other words, this is the polling interval for operators that are in a `WAIT` state. The default value for this parameter is `5` ms.
+- The value of `strict_job_thread_pinning` controls then behavior when user-defined thread pools with thread pinning are used. If this value is `false` (the default), then whenever an operator pinned to a thread is not in a READY state, some other unpinned operator could make use of that thread. If `true` only the pinned operator can make use of the thread.
 
 
 ## Event-Based Scheduler
@@ -38,3 +39,5 @@ The multithread scheduler has several parameters that the user can configure. Th
 The event-based scheduler is also a multi-thread scheduler, but it is event-based rather than polling based. As such, there is no `check_recession_period_ms` parameter, and this scheduler will not have the high CPU usage that can occur when polling at a short interval. Instead, the scheduler only wakes up when an event is received indicating that an operator is ready to execute. The parameters of this scheduler are a superset of the parameters available for the `GreedyScheduler` (described above). Only the parameters unique to the event-based scheduler are described here.
 
 - The number of worker threads used by the scheduler can be set via `worker_thread_number`, which defaults to `1`. This should be set based on a consideration of both the workflow and the available hardware. For example, the topology of the computation graph will determine how many operators it may be possible to run in parallel. Some operators may potentially launch multiple threads internally, so some amount of performance profiling may be required to determine optimal parameters for a given workflow.
+
+For this scheduler, there is no `strict_job_thread_pinning` option (see description for the Multithread Scheduler above). The thread pinning is always strict.
diff --git a/docs/examples/ping_multi_port.md b/docs/examples/ping_multi_port.md
index 4c5a1c09..147d57b8 100644
--- a/docs/examples/ping_multi_port.md
+++ b/docs/examples/ping_multi_port.md
@@ -291,7 +291,7 @@ not change when we went from passing `int` to `ValueData` objects.
 
 ## Receiving Any Number of Inputs
 
-In this workflow, `PingRxOp` has a single input port - `receivers` - that is connected to two upstream ports from `PingMxOp`. When an input port needs to connect to multiple upstream ports, we define it with `spec.param()` instead of `spec.input()`. The inputs are then stored in a vector, following the order they were added with `add_flow()`.
+In this workflow, `PingRxOp` has a single input port - `receivers` - that is connected to two upstream ports from `PingMxOp`. When an input port needs to connect to multiple upstream ports, we define it with `spec.input()` and set the size to `IOSpec::kAnySize` (or `IOSpec.ANY_SIZE` in Python). This allows the input port to receive data from multiple sources. The inputs are then stored in a vector, following the order they were added with `add_flow()`.
 
 `````{tab-set}
 ````{tab-item} C++
diff --git a/docs/flow_tracking.md b/docs/flow_tracking.md
index b45e6bab..c259939d 100644
--- a/docs/flow_tracking.md
+++ b/docs/flow_tracking.md
@@ -1,10 +1,6 @@
 (holoscan-flow-tracking)=
 # Data Flow Tracking
 
-:::{warning}
-Data Flow Tracking is currently only supported between multiple fragments in a [distributed application](./holoscan_create_distributed_app.md) in a single machine.
-:::
-
 The Holoscan SDK provides the Data Flow Tracking APIs as a mechanism to profile your application and analyze the fine-grained timing properties and data flow between operators in the graph of a fragment.
 
 Currently, data flow tracking is only supported between the root operators and leaf operators of a graph and in simple cycles in a graph (support for tracking data flow between any pair of operators in a graph is planned for the future).
@@ -187,7 +183,11 @@ with Tracker(app) as trackers:
 
 ## Customizing Data Flow Tracking
 
-Data flow tracking can be customized using a few, optional configuration parameters. The `track()` method ({cpp:func}`C++ <holoscan::Fragment::track>`//{py:func}`Python <holoscan.core.Application.track>`) (or `track_distributed` method ({cpp:func}`C++ <holoscan::Application::track_distributed>`/{py:func}`Python <holoscan.core.Application.track_distributed>`)` for distributed apps) can be configured to skip a few messages at the beginning of an application's execution as a *warm-up* period. It is also possible to discard a few messages at the end of an application's run as a *wrap-up* period. Additionally, outlier end-to-end latencies can be ignored by setting a latency threshold value (in ms) which is the minimum latency below which the observed latencies are ignored.
+Data flow tracking can be customized using a few optional configuration parameters. The `track()` method ({cpp:func}`C++ <holoscan::Fragment::track>`//{py:func}`Python <holoscan.core.Application.track>`) (or `track_distributed` method ({cpp:func}`C++ <holoscan::Application::track_distributed>`/{py:func}`Python <holoscan.core.Application.track_distributed>`)` for distributed apps) can be configured to skip a few messages at the beginning of an application's execution as a *warm-up* period. It is also possible to discard a few messages at the end of an application's run as a *wrap-up* period. Additionally, outlier end-to-end latencies can be ignored by setting a latency threshold value (in ms) which is the minimum latency below which the observed latencies are ignored.
+Finally, it is possible to limit the timestamping of messages at all nodes except the root and leaf
+operators, so that the overhead of timestamping and sending timestamped messages are reduced. In
+this way, end-to-end latencies are still calculated, but pathwise fine-grained data are not stored
+for unique pairs of root and leaf operators.
 
 For Python, it is recommended to use the {py:class}`Tracker<holoscan.core.Tracker>` context manager class instead of the `track` or `track_distributed` methods. This class will autodetect if the application is a single fragment or distributed app, using the appropriate method for each.
 
@@ -201,7 +201,8 @@ For effective benchmarking, it is common practice to include warm-up and cool-do
 :caption: Optional parameters to `track()`
 Fragment::track(uint64_t num_start_messages_to_skip = kDefaultNumStartMessagesToSkip,
                          uint64_t num_last_messages_to_discard = kDefaultNumLastMessagesToDiscard,
-                         int latency_threshold = kDefaultLatencyThreshold);
+                         int latency_threshold = kDefaultLatencyThreshold,
+                         bool is_limited_tracking = false);
 ```
 ````
 ````{tab-item} Python
@@ -209,7 +210,8 @@ Fragment::track(uint64_t num_start_messages_to_skip = kDefaultNumStartMessagesTo
 :caption: Optional parameters to `Tracker`
 Tracker(num_start_messages_to_skip=num_start_messages_to_skip,
         num_last_messages_to_discard=num_last_messages_to_discard,
-        latency_threshold=latency_threshold)
+        latency_threshold=latency_threshold,
+        is_limited_tracking=False)
 ```
 ````
 `````
@@ -217,11 +219,13 @@ The default values of these parameters of `track()` are as follows:
 - `kDefaultNumStartMessagesToSkip`: 10
 - `kDefaultNumLastMessagesToDiscard`: 10
 - `kDefaultLatencyThreshold`: 0 (do not filter out any latency values)
+- `is_limited_tracking`: false
 
 These parameters can also be configured using the helper functions:
 {cpp:func}`set_skip_starting_messages <holoscan::DataFlowTracker::set_skip_starting_messages>`,
-{cpp:func}`set_discard_last_messages <holoscan::DataFlowTracker::set_discard_last_messages>`
-and {cpp:func}`set_skip_latencies <holoscan::DataFlowTracker::set_skip_latencies>`.
+{cpp:func}`set_discard_last_messages <holoscan::DataFlowTracker::set_discard_last_messages>`,
+{cpp:func}`set_skip_latencies <holoscan::DataFlowTracker::set_skip_latencies>`,
+and {cpp:func}`set_limited_tracking <holoscan::DataFlowTracker::set_limited_tracking>`,
 
 ## Logging
 
@@ -259,4 +263,175 @@ The logger file logs the paths of the messages after a leaf operator has finishe
 
 >"(root operator name, message receive timestamp, message publish timestamp) -> ... -> (leaf operator name, message receive timestamp, message publish timestamp)".
 
-This log file can further be analyzed to understand latency distributions, bottlenecks, data flow, and other characteristics of an application.
+This log file can further be analyzed to understand latency distributions, bottlenecks, data flow,
+and other characteristics of an application.
+
+## Configuring Clock Synchronization in Multiple Machines for Distributed Application Flow Tracking
+
+For flow tracking in distributed applications that span multiple machines, system administrators
+must ensure that the clocks of all machines are synchronized. It is up to the administrator's
+preference on how to synchronize the clocks. [Linux PTP](https://linuxptp.sourceforge.net/) is a
+popular and commonly used mechanism for clock synchronization.
+
+Install the `linuxptp` package on all machines:
+
+```bash
+git clone http://git.code.sf.net/p/linuxptp/code linuxptp
+cd linuxptp/
+make
+sudo make install
+```
+
+:::{tip}
+The Ubuntu `linuxptp` package can also be used. However, the above repository provides access to
+different PTP configurations.
+:::
+
+### Check PTP Hardware Timestamping Support
+
+Check if your machine and network interface card supports PTP hardware timestamping:
+
+```bash
+$ sudo apt-get update && sudo apt-get install ethtool
+$ ethtool -T <interface_name>
+```
+
+If the output of the above command is like the one provided below, it means PTP hardware
+timestamping may be supported:
+
+```bash
+$ ethtool -T eno1
+Time stamping parameters for eno1:
+Capabilities:
+	hardware-transmit     (SOF_TIMESTAMPING_TX_HARDWARE)
+	software-transmit     (SOF_TIMESTAMPING_TX_SOFTWARE)
+	hardware-receive      (SOF_TIMESTAMPING_RX_HARDWARE)
+	software-receive      (SOF_TIMESTAMPING_RX_SOFTWARE)
+	software-system-clock (SOF_TIMESTAMPING_SOFTWARE)
+	hardware-raw-clock    (SOF_TIMESTAMPING_RAW_HARDWARE)
+PTP Hardware Clock: 0
+Hardware Transmit Timestamp Modes:
+	off                   (HWTSTAMP_TX_OFF)
+	on                    (HWTSTAMP_TX_ON)
+Hardware Receive Filter Modes:
+	none                  (HWTSTAMP_FILTER_NONE)
+	all                   (HWTSTAMP_FILTER_ALL)
+	ptpv1-l4-sync         (HWTSTAMP_FILTER_PTP_V1_L4_SYNC)
+	ptpv1-l4-delay-req    (HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ)
+	ptpv2-l4-sync         (HWTSTAMP_FILTER_PTP_V2_L4_SYNC)
+	ptpv2-l4-delay-req    (HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ)
+	ptpv2-l2-sync         (HWTSTAMP_FILTER_PTP_V2_L2_SYNC)
+	ptpv2-l2-delay-req    (HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ)
+	ptpv2-event           (HWTSTAMP_FILTER_PTP_V2_EVENT)
+	ptpv2-sync            (HWTSTAMP_FILTER_PTP_V2_SYNC)
+	ptpv2-delay-req       (HWTSTAMP_FILTER_PTP_V2_DELAY_REQ)
+```
+
+However, if the output is the one provided below, it means PTP hardware timestamping is not supported:
+
+```bash
+$ ethtool -T eno1
+$ ethtool -T eno1
+Time stamping parameters for eno1:
+Capabilities:
+	software-transmit
+	software-receive
+	software-system-clock
+PTP Hardware Clock: none
+Hardware Transmit Timestamp Modes: none
+Hardware Receive Filter Modes: none
+```
+
+### Without PTP Hardware Timestamping Support
+
+Even if PTP hardware timestamping is not supported, it is possible to synchronize the clocks of
+different machines using software-based clock synchronization. Here, we show an example of how to synchronize the clocks of two machines using
+the [automotive PTP profiles](https://linuxptp.nwtime.org/documentation/). Developers and administrators can use their own profiles.
+
+Select one machine as the clock server and the others as the clients. On the server, run the following command:
+
+```bash
+sudo ptp4l -i eno1 -f linuxptp/configs/automotive-master.cfg -m -S
+ptp4l[7526757.990]: port 1 (eno1): INITIALIZING to MASTER on INIT_COMPLETE
+ptp4l[7526757.991]: port 0 (/var/run/ptp4l): INITIALIZING to LISTENING on INIT_COMPLETE
+ptp4l[7526757.991]: port 0 (/var/run/ptp4lro): INITIALIZING to LISTENING on INIT_COMPLETE
+```
+
+On the clients, run the following command:
+
+```bash
+$ sudo ptp4l -i eno1 -f linuxptp/configs/automotive-slave.cfg -m -S
+ptp4l[7370954.836]: port 1 (eno1): INITIALIZING to SLAVE on INIT_COMPLETE
+ptp4l[7370954.836]: port 0 (/var/run/ptp4l): INITIALIZING to LISTENING on INIT_COMPLETE
+ptp4l[7370954.836]: port 0 (/var/run/ptp4lro): INITIALIZING to LISTENING on INIT_COMPLETE
+ptp4l[7370956.785]: rms 5451145770 max 5451387307 freq -32919 +/-   0 delay 72882 +/-   0
+ptp4l[7370957.785]: rms 5451209853 max 5451525811 freq -32919 +/-   0 delay 71671 +/-   0
+...
+... wait until rms value drops in the range of orders of microseconds
+ptp4l[7371017.791]: rms 196201 max 324853 freq -13722 +/- 34129 delay 73814 +/-   0
+ptp4l[7371018.791]: rms 167568 max 249998 freq  +6509 +/- 30532 delay 73609 +/-   0
+ptp4l[7371019.791]: rms 158762 max 216309 freq  -8778 +/- 28459 delay 73060 +/-   0
+```
+
+`CLOCK_REALTIME` on both the Linux machines are synchronized to the range of microseconds. Now,
+different fragments of a distributed application can be run on these machines, with flow tracking,
+end-to-end latency of an application can be measured across these machines.
+
+Eventually, the `ptp4l` commands can be added as system-d services to start automatically on boot.
+
+### With PTP Hardware Timestamping Support
+
+If PTP hardware timestamping is supported, the physical clock of the network interface card can be
+synchronized to the system clock, `CLOCK_REALTIME`. This can be done by running the following
+commands
+
+```bash
+$ sudo ptp4l -i eno1 -f linuxptp/configs/gPTP.cfg --step_threshold=1 -m &
+ptp4l[7527677.746]: port 1 (eno1): INITIALIZING to LISTENING on INIT_COMPLETE
+ptp4l[7527677.747]: port 0 (/var/run/ptp4l): INITIALIZING to LISTENING on INIT_COMPLETE
+ptp4l[7527677.747]: port 0 (/var/run/ptp4lro): INITIALIZING to LISTENING on INIT_COMPLETE
+ptp4l[7527681.663]: port 1 (eno1): LISTENING to MASTER on ANNOUNCE_RECEIPT_TIMEOUT_EXPIRES
+ptp4l[7527681.663]: selected local clock f02f74.fffe.cb3590 as best master
+ptp4l[7527681.663]: port 1 (eno1): assuming the grand master role
+
+
+$ sudo pmc -u -b 0 -t 1 "SET GRANDMASTER_SETTINGS_NP clockClass 248 \
+        clockAccuracy 0xfe offsetScaledLogVariance 0xffff \
+        currentUtcOffset 37 leap61 0 leap59 0 currentUtcOffsetValid 1 \
+        ptpTimescale 1 timeTraceable 1 frequencyTraceable 0 \
+        timeSource 0xa0"
+sending: SET GRANDMASTER_SETTINGS_NP
+ptp4l[7527704.409]: port 1 (eno1): assuming the grand master role
+	f02f74.fffe.cb3590-0 seq 0 RESPONSE MANAGEMENT GRANDMASTER_SETTINGS_NP 
+		clockClass              248
+		clockAccuracy           0xfe
+		offsetScaledLogVariance 0xffff
+		currentUtcOffset        37
+		leap61                  0
+		leap59                  0
+		currentUtcOffsetValid   1
+		ptpTimescale            1
+		timeTraceable           1
+		frequencyTraceable      0
+		timeSource              0xa0
+
+
+$ sudo phc2sys -s eno1 -c CLOCK_REALTIME --step_threshold=1 --transportSpecific=1 -w -m
+phc2sys[7527727.996]: ioctl PTP_SYS_OFFSET_PRECISE: Invalid argument
+phc2sys[7527728.997]: CLOCK_REALTIME phc offset   7422791 s0 freq    +628 delay   1394
+phc2sys[7527729.997]: CLOCK_REALTIME phc offset   7422778 s1 freq    +615 delay   1474
+phc2sys[7527730.997]: CLOCK_REALTIME phc offset       118 s2 freq    +733 delay   1375
+phc2sys[7527731.997]: CLOCK_REALTIME phc offset        57 s2 freq    +708 delay   1294
+phc2sys[7527732.998]: CLOCK_REALTIME phc offset       -42 s2 freq    +626 delay   1422
+phc2sys[7527733.998]: CLOCK_REALTIME phc offset        52 s2 freq    +707 delay   1392
+phc2sys[7527734.998]: CLOCK_REALTIME phc offset       -65 s2 freq    +606 delay   1421
+phc2sys[7527735.998]: CLOCK_REALTIME phc offset       -48 s2 freq    +603 delay   1453
+phc2sys[7527736.999]: CLOCK_REALTIME phc offset        -2 s2 freq    +635 delay   1392
+```
+
+From here on, clocks on other machines can also be synchronized to the above server clock.
+
+Further references:
+
+- [Synchronizing Time with Linux PTP](https://tsn.readthedocs.io/timesync.html)
+- [Linux PTP Documentation and Configurations](https://linuxptp.nwtime.org/documentation/)
diff --git a/docs/holoscan_create_app.md b/docs/holoscan_create_app.md
index b4a4cf80..f14e61e3 100644
--- a/docs/holoscan_create_app.md
+++ b/docs/holoscan_create_app.md
@@ -710,6 +710,69 @@ app.run()
 This is also illustrated in the [multithread](https://github.com/nvidia-holoscan/holoscan-sdk/blob/main/examples/multithread) example.
 :::
 
+
+(configuring-app-thread-pools)=
+### Configuring worker thread pools
+
+Both the `MultiThreadScheduler` and `EventBasedScheduler` discussed in the previous section automatically create an internal worker thread pool by default. In some scenarios, it may be desirable for users to instead assign operators to specific user-defined thread pools. This also allows optionally pinning operators to a specific thread.
+
+Assuming that, I have three operators, `op1`, `op2` and `op3`. Assume that I want to assign these two a thread pool and that I would like operators 2 and 3 to be pinned to specific threads in the thread pool. The code for configuring thread pools from the Fragment `compose` method is shown in the example below.
+
+`````{tab-set}
+````{tab-item} C++
+We create thread pools via calls to the {cpp:func}`~holoscan::Fragment::make_thread_pool` method. The first argument is a user-defined name for the thread pool while the second is the number of threads initially in the thread pool. This `make_thread_pool` method returns a shared pointer to a {cpp:class}`~holoscan::ThreadPool` object. The {cpp:func}`~holoscan::ThreadPool::add` method of that object can then be used to add a single operator or a vector of operators to the thread pool. The second argument to the `add` function is a boolean indicating whether the given operators should be pinned to always run on a specific thread within the thread pool.
+
+```{code-block} cpp
+:name: holoscan-thread-pool-example-cpp
+
+    // The following code would be within `Fragment::compose` after operators have been defined
+    // Assume op1, op2 and op3 are `shared_ptr<OperatorType>` as returned by `make_operator`
+
+    // create a thread pool with a three threads
+    auto pool1 = make_thread_pool("pool1", 3);
+    // assign a single operator to the thread pool (unpinned)
+    pool1->add(op1, false);
+    // assign multiple operators to this thread pool (pinned)
+    pool1->add({op2, op3}, true);
+
+```
+
+````
+
+````{tab-item} Python
+We create thread pools via calls to the {py:func}`~holoscan.core.Fragment.make_thread_pool` method. The first argument is a user-defined name for the thread pool while the second is the initial size of the thread pool. It is not necessary to modify this as the size will be incremented as needed automatically. This `make_thread_pool` method returns a shared pointer to a {py:class}`~holoscan.resources.ThreadPool` object. The {py:func}`~holoscan.resources.ThreadPool.add` method of that object can then be used to add a single operator or a vector of operators to the thread pool. The second argument to the `add` function is a boolean indicating whether the given operators should be pinned to always run on a specific thread within the thread pool.
+
+```{code-block} python
+:name: holoscan-thread-pool-example-python
+    # The following code would be within `Fragment::compose` after operators have been defined
+    # Assume op1, op2 and op3 are `shared_ptr<OperatorType>` as returned by `make_operator`
+
+    # create a thread pool with a single thread
+    pool1 = self.make_thread_pool("pool1", 1);
+    # assign a single operator to the thread pool (unpinned)
+    pool1.add(op1, True);
+    # assign multiple operators to this thread pool (pinned)
+    pool1.add([op2, op3], True);
+```
+````
+`````
+:::{note}
+It is not necessary to define a thread pool for Holoscan applications. There is a default thread pool that gets used for any operators the user did not explicitly assign to a thread pool. The use of thread pools provides a way to explicitly indicate that threads should be pinned.
+
+One case where separate thread pools **must** be used is in order to support pinning of operators involving separate GPU devices. Only a single GPU device should be used from any given thread pool. Operators associated with a GPU device resource are those using one of the CUDA-based allocators like
+`BlockMemoryPool`, `CudaStreamPool`, `RMMAllocator` or `StreamOrderedAllocator`.
+:::
+
+:::{tip}
+A concrete example of a simple application with two pairs of operators in separate thread pools is given in the [thread pool resource example](https://github.com/nvidia-holoscan/holoscan-sdk/blob/main/examples/resources/thread_pool).
+:::
+
+Note that any given operator can only belong to a single thread pool. Assigning the same operator to multiple thread pools may result in errors being logged at application startup time.
+
+There is also a related boolean parameter, `strict_thread_pinning` that can be passed as a `holoscan::Arg` to the `MultiThreadScheduler` constructor. When this argument is set to `false` and an operator is pinned to a specific thread, it is allowed for other operators to also run on that same thread whenever the pinned operator is not ready to execute. When `strict_thread_pinning` is `true`, the thread can ONLY be used by the operator that was pinned to the thread. For the `EventBasedScheduler`, it is always in strict pinning mode and there is no such parameter.
+
+If a thread pool is configured by the single-thread `GreedyScheduler` is used a warning will be logged indicating that the user-defined thread pools would be ignored. Only `MultiThreadScheduler` and `EventBasedScheduler` can make use of the thread pools.
+
 (configuring-app-runtime)=
 ### Configuring runtime properties
 
@@ -1035,7 +1098,7 @@ Given a CMake project, a pre-built executable, or a Python application, you can
 
 ## Dynamic Application Metadata
 
-As of Holoscan v2.3 it is possible to send metadata alongside the data emitted from an operator's output ports. This metadata can then be used and/or modified by any downstream operators. Currently this feature is only available for C++ applications, but will also be available to Python applications in a future release. The subsections below describe how this feature can be enabled and used.
+As of Holoscan v2.3 (for C++) or v2.4 (for Python) it is possible to send metadata alongside the data emitted from an operator's output ports. This metadata can then be used and/or modified by any downstream operators. The subsections below describe how this feature can be enabled and used.
 
 ### Enabling application metadata
 
@@ -1236,7 +1299,7 @@ The metadata policy would typically be set during {py:func}`~holoscan.core.Appli
 
 # Example for setting metadata policy from Application.compose()
 my_op = MyOperator(self, name="my_op")
-my_op.metadata_policy = holoscan.MetadataPolicy.RAISE
+my_op.metadata_policy = holoscan.core.MetadataPolicy.RAISE
 
 ```
 ````
diff --git a/docs/holoscan_create_distributed_app.md b/docs/holoscan_create_distributed_app.md
index dbcfcf4c..039d0fcf 100644
--- a/docs/holoscan_create_distributed_app.md
+++ b/docs/holoscan_create_distributed_app.md
@@ -201,7 +201,7 @@ ip -o -4 addr show | awk '{print $2, $4}' # to show interface name and IP
 `````{warning}
 ### Known limitations
 
-The following are known limitations of the distributed application support in the SDK, which will be addressed in future updates:
+The following are known limitations of the distributed application support in the SDK, some of which will be addressed in future updates:
 
 #### 1. A connection error message is displayed even when the distributed application is running correctly.
 
@@ -213,7 +213,15 @@ By default, device ID 0 is used by the UCX extensions to send/receive data betwe
 
 #### 3. "Address already in use" errors in distributed applications due to the health check service.
 
-In scenarios where distributed applications have both the driver and workers running on the same host, either within a Docker container or directly on the host, there's a possibility of encountering "Address already in use" errors. A potential solution is to assign a different port number to the `HOLOSCAN_HEALTH_CHECK_PORT` environment variable (default: `8777`), for example, by using `export HOLOSCAN_HEALTH_CHECK_PORT=8780`.
+If the driver or worker is running, the health check service is launched by default. Alternatively, if the environment variable `HOLOSCAN_ENABLE_HEALTH_CHECK` is set to true or false (can use "1" or "0", "on" or "off" as well, case-insensitive), the health check service is enabled or disabled accordingly. If the environment variable is not set or is invalid, the default value is used.
+
+In scenarios where distributed applications have both the driver and workers running on the same host, either within a Docker container or directly on the host, there's a possibility of encountering "Address already in use" errors.
+This issue can be avoided by setting the `HOLOSCAN_HEALTH_CHECK_PORT` environment variable to a different port number for the health check service. The default port number is `8777`. For example, the port number can be set to `8780` by using `export HOLOSCAN_HEALTH_CHECK_PORT=8780`.
+Alternatively, the health check service can be disabled by setting the `HOLOSCAN_ENABLE_HEALTH_CHECK` environment variable to `false`.
+
+#### 4. The use of the management port is unsupported on the NVIDIA IGX Orin Developer Kit.
+
+IGX devices come with two ethernet ports, noted as port #4 and #5 in the [NVIDIA IGX Orin User Guide](https://docs.nvidia.com/igx-orin/user-guide/latest/system-overview.html#i-o-and-external-interfaces). To run distributed applications on these devices, the user must ensure that ethernet port #4 is used to connect the driver and the workers.
 `````
 
 `````{note}
@@ -292,7 +300,7 @@ A table of the types that have codecs pre-registered so that they can be seriali
 | std::vector&lt;T&gt;                    | T is std::string or any of the boolean, integer or floating point types above             |
 | std::vector&lt;std::vector&lt;T&gt;&gt; | T is std::string or any of the boolean, integer or floating point types above             |
 | std::vector&lt;HolovizOp::InputSpec&gt; | a vector of InputSpec objects that are specific to HolovizOp                              |
-| std::shared_ptr&lt;%&gt;                | T is any of the scalar, vector or std::string types above |
+| std::shared_ptr&lt;T&gt;                | T is any of the scalar, vector or std::string types above |
 | tensor types                            | holoscan::Tensor, nvidia::gxf::Tensor, nvidia::gxf::VideoBuffer, nvidia::gxf::AudioBuffer |
 | GXF-specific types                      | nvidia::gxf::TimeStamp, nvidia::gxf::EndOfStream                                          |
 
@@ -689,7 +697,7 @@ if __name__ == "__main__":
     args = parser.parse_args(app_argv[1:])
     main(on_gpu=args.gpu)
 ```
-For Python, `app.argv[1:]` can be used with an `ArgumentParser` from Python's [argparse](https://docs.python.org/3/library/argparse.html) module. 
+For Python, `app.argv[1:]` can be used with an `ArgumentParser` from Python's [argparse](https://docs.python.org/3/library/argparse.html) module.
 
 Alternatively, it may be preferable to instead use `parser.parse_known_args()` to allow any arguments not defined by the user's parser to pass through to the application class itself. If one also sets `add_help=False` when constructing the `ArgumentParser`, it is possible to print the parser's help while still preserving the default application help (covering the default set of distributed application arguments). An example of this style is shown in the code block below.
 ```{code-block} Python
diff --git a/docs/holoscan_debugging.md b/docs/holoscan_debugging.md
index 77f91392..bba59121 100644
--- a/docs/holoscan_debugging.md
+++ b/docs/holoscan_debugging.md
@@ -18,7 +18,7 @@ The [Holoscan SDK](https://github.com/nvidia-holoscan/holoscan-sdk) can be effec
 
 ### Launching VSCode with the Holoscan SDK
 
-- **Local Development**: Use the `./run vscode` command to launch Visual Studio Code in a development container.
+- **Local Development**: Use the `./run vscode` command to launch Visual Studio Code in a development container (`-j <# of workers>` or `--parallel <# of workers>` can be used to specify the number of parallel jobs to run during the build process). For more information, refer to the instructions from `./run vscode -h`.
 - **Remote Development**: For attaching to an existing dev container from a remote machine, use `./run vscode_remote`. Additional instructions can be accessed via `./run vscode_remote -h`.
 
 Upon launching Visual Studio Code, the development container will automatically be built. This process also involves the installation of recommended extensions and the configuration of CMake.
diff --git a/docs/hsdk_faq.md b/docs/hsdk_faq.md
index 3009864b..c7e87612 100644
--- a/docs/hsdk_faq.md
+++ b/docs/hsdk_faq.md
@@ -32,13 +32,13 @@ A1: There are multiple ways to  install the Holoscan SDK:
   * For **dGPU** (x86_64, IGX Orin dGPU, Clara AGX dGPU, GH200)
 
 ```
-docker pull nvcr.io/nvidia/clara-holoscan/holoscan:v2.6.0-dgpu
+docker pull nvcr.io/nvidia/clara-holoscan/holoscan:v2.7.0-dgpu
 ```
 
   * For **iGPU** (Jetson, IGX Orin iGPU, Clara AGX iGPU)
 
 ```
-docker pull nvcr.io/nvidia/clara-holoscan/holoscan:v2.6.0-igpu
+docker pull nvcr.io/nvidia/clara-holoscan/holoscan:v2.7.0-igpu
 ```
 
 For more information, please refer to details and usage instructions on [**NGC**](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/clara-holoscan/containers/holoscan).
@@ -827,6 +827,9 @@ A1: You can use the
 
 command to launch VSCode in a development container. Configure CMake, build the source code, and use the Run and Debug view to start debugging sessions.
 
+`-j <# of workers>` or `--parallel <# of workers>` can be used to specify the number of parallel jobs to run during the build process.
+For more information, refer to the instructions from `./run vscode -h`.
+
 **Q2: How can I get started with debugging my Holoscan application?**
 
 For debugging applications in Holoscan repo, refer to the [Debugging Section](https://docs.nvidia.com/holoscan/sdk-user-guide/holoscan_debugging.html).  For debugging applications in Holohub, refer to HoloHub [tutorials](https://github.com/nvidia-holoscan/holohub/tree/main/tutorials/debugging) for strategies to set up debugging with Visual Studio Code or other tools such as GDB.
@@ -1074,7 +1077,7 @@ To achieve synchronized termination across all Holoviz instances:
 1. Create a shared boolean scheduling condition.
 1. For each HolovizOp in your application:
    * Set this condition as a general execution condition.
-   * Importantly, also set it as the `window_close_scheduling_term` parameter.
+   * Importantly, also set it as the `window_close_condition` parameter (Note: this parameter was named `window_close_scheduling_term` in releases prior to v2.7).
 
 **Q16:I'm trying to use the `render_buffer_output` from Holoviz Python operator, but I get the following error :**
 
diff --git a/docs/inference.md b/docs/inference.md
index 557bf1bb..72def417 100644
--- a/docs/inference.md
+++ b/docs/inference.md
@@ -42,25 +42,19 @@ Required parameters and related features available with the Holoscan Inference M
                 - It is recommended to use the same version of torch for `torchscript` model generation, as used in the HOLOSCAN SDK on the respective architectures.
                 - Additionally, it is recommended to generate the `torchscript` model on the same architecture on which it will be executed. For example, `torchscript` model must be generated on `x86_64` to be executed in an application running on `x86_64` only.
         - ONNX runtime:
-            - Data flow via host only. `input_on_cuda`, `output_on_cuda` and `transmit_on_cuda` must be `false`.
-            - CUDA-based inference (supported on x86_64).
-            - CPU-based inference (supported on x86_64 and aarch64).
+            - CUDA and CPU based inference supported both on x86_64 and aarch64.
+            - End-to-end CUDA-based data buffer parameters supported. `input_on_cuda`, `output_on_cuda` and `transmit_on_cuda` will all be true for end-to-end CUDA-based data movement.
+            - `input_on_cuda`, `output_on_cuda` and `transmit_on_cuda` can be either `true` or `false`.
 
     - `infer_on_cpu` parameter is set to `true` if CPU based inference is desired.
 
-        The tables below demonstrate the supported features related to the data buffer and the inference with `trt` and `onnxrt` based backend, on x86 and aarch64 system respectively.
-
-        | x86  | `input_on_cuda`  | `output_on_cuda`  | `transmit_on_cuda` | `infer_on_cpu` |
-        |---|---|---|---|---|
-        | Supported values for `trt`  | `true` or `false`  | `true` or `false` | `true` or `false` | `false` |
-        | Supported values for `torch`  | `true` or `false`  | `true` or `false` | `true` or `false` | `true` or `false` |
-        | Supported values for `onnxrt`  | `false`  | `false` | `true` or `false` | `true` or `false` |
+        The tables below demonstrate the supported features related to the data buffer and the inference with `trt`, `torch` and `onnxrt` based backend.
 
-        | Aarch64  | `input_on_cuda`  | `output_on_cuda`  | `transmit_on_cuda` | `infer_on_cpu` |
+        |  | `input_on_cuda`  | `output_on_cuda`  | `transmit_on_cuda` | `infer_on_cpu` |
         |---|---|---|---|---|
         | Supported values for `trt`  | `true` or `false`  | `true` or `false` | `true` or `false` | `false` |
         | Supported values for `torch`  | `true` or `false`  | `true` or `false` | `true` or `false` | `true` or `false` |
-        | Supported values for `onnxrt`  | `false`  | `false` | `true` or `false` | `true` |
+        | Supported values for `onnxrt`  | `true` or `false`  | `true` or `false` | `true` or `false` | `true` or `false` |
 
     - `model_path_map`: User can design single or multi AI inference pipeline by populating `model_path_map` in the config file.
         - With a single entry, it is single inference; with more than one entry, multi AI inference is enabled.
@@ -78,7 +72,7 @@ Required parameters and related features available with the Holoscan Inference M
         - Parameter `parallel_inference` can be either `true` or `false`. Default value is `true`.
         - Inferences are launched in parallel without any check of the available GPU resources. You must ensure that there is enough memory and compute available to run all the inferences in parallel.
     - `enable_fp16`: Generation of the TensorRT engine files with FP16 option
-        - If `backend` is set to `trt`, and if the input models are in __onnx__ format, then you can generate the engine file with fp16 option to accelerate inferencing.
+        - If `backend` is set to `onnx` or `trt` if the input models are in __onnx__ format, then you can generate the engine file with fp16 option to accelerate inferencing.
         - It takes few minutes to generate the engine files for the first time.
         - It can be either `true` or `false`. Default value is `false`.
     - `is_engine_path`: if the input models are specified in __trt engine format__ in `model_path_map`, this flag must be set to `true`. Default value is `false`.
diff --git a/docs/sdk_installation.md b/docs/sdk_installation.md
index 2dab76f9..98d83e9b 100644
--- a/docs/sdk_installation.md
+++ b/docs/sdk_installation.md
@@ -18,7 +18,7 @@ Setup your developer kit:
 Developer Kit | User Guide | OS | GPU Mode
 ------------- | ---------- | --- | ---
 [NVIDIA IGX Orin][igx] | [Guide][igx-guide] | [IGX Software][igx-sw] 1.0 Production Release | iGPU **or*** dGPU
-[NVIDIA Jetson AGX Orin and Orin Nano][jetson-orin] | [Guide][jetson-guide] | [JetPack][jp] 6.0 | iGPU
+[NVIDIA Jetson AGX Orin and Orin Nano][jetson-orin] | [Guide][jetson-guide] | [JetPack][jp] 6.1 | iGPU
 [NVIDIA Clara AGX][clara-agx]<br>_Only supporting the NGC container_ | [Guide][clara-guide] | [HoloPack][sdkm] 1.2<br>_[Upgrade to 535+ drivers required][cagx-upgrade]_ | dGPU
 
 [clara-agx]: https://www.nvidia.com/en-gb/clara/intelligent-medical-instruments
@@ -82,11 +82,11 @@ We provide multiple ways to install and run the Holoscan SDK:
 ````{tab-item} NGC Container
 - **dGPU** (x86_64, IGX Orin dGPU, Clara AGX dGPU, GH200)
    ```bash
-   docker pull nvcr.io/nvidia/clara-holoscan/holoscan:v2.6.0-dgpu
+   docker pull nvcr.io/nvidia/clara-holoscan/holoscan:v2.7.0-dgpu
    ```
 - **iGPU** (Jetson, IGX Orin iGPU, Clara AGX iGPU)
    ```bash
-   docker pull nvcr.io/nvidia/clara-holoscan/holoscan:v2.6.0-igpu
+   docker pull nvcr.io/nvidia/clara-holoscan/holoscan:v2.7.0-igpu
    ```
 See details and usage instructions on [NGC][container].
 ````
diff --git a/docs/use_igpu_with_dgpu.md b/docs/use_igpu_with_dgpu.md
index de2c4a0e..983bcd5a 100644
--- a/docs/use_igpu_with_dgpu.md
+++ b/docs/use_igpu_with_dgpu.md
@@ -34,7 +34,7 @@ COMMON_DOCKER_FLAGS="--rm -i --init --net=host
 --runtime=nvidia -e NVIDIA_DRIVER_CAPABILITIES=all
 --cap-add CAP_SYS_PTRACE --ipc=host --ulimit memlock=-1 --ulimit stack=67108864
 "
-HOLOSCAN_VERSION=2.6.0
+HOLOSCAN_VERSION=2.7.0
 HOLOSCAN_IMG="nvcr.io/nvidia/clara-holoscan/holoscan:v$HOLOSCAN_VERSION"
 HOLOSCAN_DGPU_IMG="$HOLOSCAN_IMG-dgpu"
 HOLOSCAN_IGPU_IMG="$HOLOSCAN_IMG-igpu"
diff --git a/docs/visualization.md b/docs/visualization.md
index f4a04e7c..426ede1d 100644
--- a/docs/visualization.md
+++ b/docs/visualization.md
@@ -192,7 +192,7 @@ Image format detection for `nvidia::gxf::Tensor`. Tensors don't have image forma
 | kInt8 | 4 | signed RGBA-8-8-8-8 single plane | - |
 | kUnsigned16 | 4 | RGBA-16-16-16-16 single plane | - |
 | kInt16 | 4 | signed RGBA-16-16-16-16 single plane | - |
-| kFloat32 | 4 | RGBA-16-16-16-16 single plane | - |
+| kFloat32 | 4 | RGBA float 32 single plane | - |
 
 ````
 ````{tab-item} Module
@@ -460,7 +460,7 @@ When providing CUDA resources to Holoviz through (e.g., {func}`viz::ImageCudaDev
 
 ## Reading the Frame Buffer
 
-The rendered frame buffer can be read back. This is useful when when doing offscreen rendering or running Holoviz in a headless environment.
+The rendered frame buffer can be read back. This is useful when doing offscreen rendering or running Holoviz in a headless environment.
 
 :::{note}
 Reading the depth buffer is not supported when using the Holoviz operator.
diff --git a/examples/README.md b/examples/README.md
index 23d45ed9..ce7bcc60 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -67,6 +67,8 @@ The following examples illustrate the use of specific **conditions** to modify t
 
 * [**PeriodicCondition**](conditions/periodic): trigger an operator at a user-defined time interval
 * [**AsynchronousCondition**](conditions/asynchronous): allow operators to run asynchronously (C++ API only)
+* [**ExpiringMessageAvailableCondition**](conditions/expiring_message): allow operators to run when a certain number of messages have arrived or after a specified time interval has elapsed.
+* [**MultiMessageAvailableCondition**, **MultiMessageAvailableTimeoutCondition**](conditions/multi_message): allow operators to run only once a certain number of messages have arrived across multiple associated input ports (optionally with a timeout on the interval to wait for messages).
 
 The following examples illustrate the use of specific resource classes that can be passed to operators or schedulers:
 
diff --git a/examples/conditions/CMakeLists.txt b/examples/conditions/CMakeLists.txt
index bc240d35..e9ef2f9a 100644
--- a/examples/conditions/CMakeLists.txt
+++ b/examples/conditions/CMakeLists.txt
@@ -15,5 +15,5 @@
 
 add_subdirectory(asynchronous)
 add_subdirectory(expiring_message)
+add_subdirectory(multi_message)
 add_subdirectory(periodic)
-
diff --git a/examples/conditions/asynchronous/cpp/CMakeLists.min.txt b/examples/conditions/asynchronous/cpp/CMakeLists.min.txt
index 0d25e198..0e974dbe 100644
--- a/examples/conditions/asynchronous/cpp/CMakeLists.min.txt
+++ b/examples/conditions/asynchronous/cpp/CMakeLists.min.txt
@@ -43,10 +43,47 @@ add_dependencies(ping_async ping_async_yaml)
 
 # Testing
 if(BUILD_TESTING)
-  add_test(NAME EXAMPLE_CPP_PING_ASYNC_TEST
-           COMMAND ping_async
-           WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
-          )
-  set_tests_properties(EXAMPLE_CPP_PING_ASYNC_TEST PROPERTIES
-                       PASS_REGULAR_EXPRESSION "Rx message value: 20")
+  # Test all the combinations for the async test
+  # RT: receive/transmit
+  # R: receive only
+  # T: transmit only
+  # O: receive and transmit async set to false
+  # M: Multi-thread
+  # G: Greedy
+  set(testconfigs RT T R O MRT GRT)
+  foreach(config IN LISTS testconfigs)
+    file(READ ${CMAKE_CURRENT_SOURCE_DIR}/ping_async.yaml CONFIG_STRING)
+    string(REPLACE "async_receive: true" "async_receive: false" CONFIG_STRING "${CONFIG_STRING}")
+
+    string(FIND "${config}" "R" HAS_R)
+    if(HAS_R GREATER -1)
+      string(REPLACE "async_receive: false" "async_receive: true" CONFIG_STRING "${CONFIG_STRING}")
+    endif()
+
+    string(FIND "${config}" "T" HAS_T)
+    if(HAS_T GREATER -1)
+      string(REPLACE "async_transmit: false" "async_transmit: true" CONFIG_STRING "${CONFIG_STRING}")
+    endif()
+
+    string(FIND "${config}" "M" HAS_M)
+    if(HAS_M GREATER -1)
+      string(REPLACE "scheduler: event_based" "scheduler: multi_thread" CONFIG_STRING "${CONFIG_STRING}")
+    endif()
+
+    string(FIND "${config}" "G" HAS_G)
+    if(HAS_G GREATER -1)
+      string(REPLACE "scheduler: event_based" "scheduler: greedy" CONFIG_STRING "${CONFIG_STRING}")
+    endif()
+
+    # Write the config
+    set(CONFIG_FILE ${CMAKE_CURRENT_BINARY_DIR}/cpp_video_replayer_config_${config}.yaml)
+    file(WRITE ${CONFIG_FILE} "${CONFIG_STRING}")
+
+    add_test(NAME EXAMPLE_CPP_PING_ASYNC_${config}_TEST
+             COMMAND ping_async ${CONFIG_FILE}
+             WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+            )
+    set_tests_properties(EXAMPLE_CPP_PING_ASYNC_${config}_TEST PROPERTIES
+                         PASS_REGULAR_EXPRESSION "Rx message value: 20")
+  endforeach()
 endif()
diff --git a/examples/conditions/asynchronous/cpp/CMakeLists.txt b/examples/conditions/asynchronous/cpp/CMakeLists.txt
index b1dd7b9c..90ffc458 100644
--- a/examples/conditions/asynchronous/cpp/CMakeLists.txt
+++ b/examples/conditions/asynchronous/cpp/CMakeLists.txt
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -74,11 +74,47 @@ install(FILES
 
 # Testing
 if(HOLOSCAN_BUILD_TESTS)
-  add_test(NAME EXAMPLE_CPP_PING_ASYNC_TEST
-           COMMAND ping_async
-           WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
-          )
-  set_tests_properties(EXAMPLE_CPP_PING_ASYNC_TEST PROPERTIES
-                       PASS_REGULAR_EXPRESSION "Rx message value: 20")
-endif()
+  # Test all the combinations for the async test
+  # RT: receive/transmit
+  # R: receive only
+  # T: transmit only
+  # O: receive and transmit async set to false
+  # M: Multi-thread
+  # G: Greedy
+  set(testconfigs RT T R O MRT GRT)
+  foreach(config IN LISTS testconfigs)
+    file(READ ${CMAKE_CURRENT_SOURCE_DIR}/ping_async.yaml CONFIG_STRING)
+    string(REPLACE "async_receive: true" "async_receive: false" CONFIG_STRING "${CONFIG_STRING}")
+
+    string(FIND "${config}" "R" HAS_R)
+    if(HAS_R GREATER -1)
+      string(REPLACE "async_receive: false" "async_receive: true" CONFIG_STRING "${CONFIG_STRING}")
+    endif()
+
+    string(FIND "${config}" "T" HAS_T)
+    if(HAS_T GREATER -1)
+      string(REPLACE "async_transmit: false" "async_transmit: true" CONFIG_STRING "${CONFIG_STRING}")
+    endif()
+
+    string(FIND "${config}" "M" HAS_M)
+    if(HAS_M GREATER -1)
+      string(REPLACE "scheduler: event_based" "scheduler: multi_thread" CONFIG_STRING "${CONFIG_STRING}")
+    endif()
 
+    string(FIND "${config}" "G" HAS_G)
+    if(HAS_G GREATER -1)
+      string(REPLACE "scheduler: event_based" "scheduler: greedy" CONFIG_STRING "${CONFIG_STRING}")
+    endif()
+
+    # Write the config
+    set(CONFIG_FILE ${CMAKE_CURRENT_BINARY_DIR}/cpp_video_replayer_config_${config}.yaml)
+    file(WRITE ${CONFIG_FILE} "${CONFIG_STRING}")
+
+    add_test(NAME EXAMPLE_CPP_PING_ASYNC_${config}_TEST
+             COMMAND ping_async ${CONFIG_FILE}
+             WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+            )
+    set_tests_properties(EXAMPLE_CPP_PING_ASYNC_${config}_TEST PROPERTIES
+                         PASS_REGULAR_EXPRESSION "Rx message value: 20")
+  endforeach()
+endif()
diff --git a/examples/conditions/asynchronous/cpp/ping_async.cpp b/examples/conditions/asynchronous/cpp/ping_async.cpp
index a6ba0c4a..be20260b 100644
--- a/examples/conditions/asynchronous/cpp/ping_async.cpp
+++ b/examples/conditions/asynchronous/cpp/ping_async.cpp
@@ -63,9 +63,10 @@ class App : public holoscan::Application {
 int main([[maybe_unused]] int argc, char** argv) {
   auto app = holoscan::make_application<App>();
 
-  // Get the configuration
+  // Get the yaml configuration file
   auto config_path = std::filesystem::canonical(argv[0]).parent_path();
-  config_path += "/ping_async.yaml";
+  config_path /= std::filesystem::path("ping_async.yaml");
+  if (argc >= 2) { config_path = argv[1]; }
   app->config(config_path);
 
   // set customizable application parameters via the YAML
diff --git a/examples/conditions/multi_message/CMakeLists.txt b/examples/conditions/multi_message/CMakeLists.txt
new file mode 100644
index 00000000..0fa63c4d
--- /dev/null
+++ b/examples/conditions/multi_message/CMakeLists.txt
@@ -0,0 +1,30 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+if(HOLOSCAN_CPP_EXAMPLES)
+  add_subdirectory(cpp)
+endif()
+
+if(HOLOSCAN_PYTHON_EXAMPLES)
+  add_subdirectory(python)
+endif()
+
+file(RELATIVE_PATH app_relative_dest_path ${CMAKE_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
+
+install(
+  FILES README.md
+  DESTINATION "${app_relative_dest_path}"
+  COMPONENT "holoscan-examples"
+)
diff --git a/examples/conditions/multi_message/README.md b/examples/conditions/multi_message/README.md
new file mode 100644
index 00000000..7d7390db
--- /dev/null
+++ b/examples/conditions/multi_message/README.md
@@ -0,0 +1,68 @@
+# Holoscan::MultiMessageAvailableCondition
+
+This example demonstrates how to use Holoscan::MultiMessageAvailableCondition.
+
+This condition type is unique among the conditions that are currently provided by the SDK in that it is a condition that applies across **multiple** input ports in combination as compared to `MessageAvailableCondition` or `ExpiringMessageAvailableCondition` which apply only to a **single** input port. Since a holoscan::IOSpec object is associated with a single input (or output) port, the existing `IOSpec::condition` API cannot be used to support this multi-port condition type. Instead there is a dedicated `OperatorSpec::multi_port_condition` method for handling such conditions. The application here shows how the `OperatorSetup::method` can call this method, passing a list of the input port names to which the condition should apply.
+
+The multi-message available condition has two modes of operation. 
+
+  1. The "SumOfAll" mode will be satisfied when the **total** number of messages arriving across the associated input ports reaches a specified **min_sum**. In that case, it doesn't matter which of the inputs the messages arrive on, only that the specified number have arrived.
+  2. In the "PerReceiver" mode, you instead specify a **min_sizes** vector of the minimum number of messages that must arrive on each individual port associated with the condition. The "PerReceiver" mode behaves equivalently to having a default `MessageAvailableCondition` with the corresponding `min_size` on each receiver individually.
+
+When using such a multi-receiver condition, one should set a "None" condition on the individual input ports that are being passed as the "port_names" argument to `OperatorSpec::multi_port_condition`. Otherwise, the ports will **also** have Holoscan's default `MessageAvailable` condition, which is likely undesired in any scenario where the multi-receiver condition is being used.
+
+*Visit the [SDK User Guide](https://docs.nvidia.com/holoscan/sdk-user-guide/components/conditions.html) to learn more about the Periodic Condition.*
+
+## App Description
+
+The "multi_message_per_receiver" example in this folder has two types of operators involved:
+  1. Multiple instances of `StringTransmitterOp`, each of which simply transmits a single, user-configurable message string.
+  2. A single `PerReceiverRxOp` which has three input ports, one of which has a queue capacity of 2 while the others have the default capacity of 1. A `MultiMessageAvailableCondition` is used to set this operator to only call compute once the expected number of messages have arrived on each port.
+
+The `StringTransmitterOp` operators use periodic conditions with different rates, so messages are emitted at different temporal frequencies by each. Each transmitter will have the default `DownstreamMessageAffordableCondition`, so each `StringTransmitterOp` will only call compute when there would be space in the corresponding receiver queue of the `PerReceiverRxOp`.
+
+The "multi_message_sum_of_all" example is similar except the receiving operator is configured to use the "SumOfAll" mode with a variant of the multi-message available condition called `MultiMessageAvailableTimeoutCondition`. This condition operates exactly like `MultiMessageAvailableCondition` except that it has one additional required parameter named "execution_frequency". The operator will be ready to execute again after the time interval specified by "execution_frequency", **even if** the specified number of messages has not yet arrived.
+
+The final, "single_message_timeout" example shows a simpler case where the default `MessageAvailableCondition` on a single input port receiver is instead replaced with the `MultiMessageAvailableTimeoutCondition`. In this case there is only one receiver (input port) associated with the multi-message condition, so it acts like the standard `MessageAvailableCondition` except with an additional "execution_frequency" parameter that will allow the operator to execute at that frequency even if not all messages have arrived. When there is only a single port associated with the condition, there is no difference in behavior between the "PerReceiver" and "SumOfAll" modes.
+
+### Build instructions
+
+Built with the SDK, see instructions from the top level README.
+
+### Run instructions (C++)
+
+First, go in your `build` or `install` directory (automatically done by `./run launch`).
+
+To run the "multi_message_per_receiver" example with `MultiMessageAvailableCondition` in "PerReceiver" mode:
+```bash
+./examples/conditions/multi_message/cpp/multi_message_per_receiver
+```
+
+To run the "multi_message_sum_of_all" example with `MultiMessageAvailableTimeoutCondition` in "SumOfAll" mode:
+```bash
+./examples/conditions/multi_message/cpp/multi_message_sum_of_all
+```
+
+To run the "single_message_timeout" example with `MultiMessageAvailableTimeoutCondition` with only a single input port:
+```bash
+./examples/conditions/multi_message/cpp/multi_message_sum_of_all
+```
+
+### Run instructions (Python)
+
+First, go in your `build` or `install` directory (automatically done by `./run launch`).
+
+To run the "multi_message_per_receiver" example with `MultiMessageAvailableCondition` in "PerReceiver" mode:
+```bash
+python ./examples/conditions/multi_message/python/multi_message_per_receiver.py
+```
+
+To run the "multi_message_sum_of_all" example with `MultiMessageAvailableTimeoutCondition` in "SumOfAll" mode:
+```bash
+python ./examples/conditions/multi_message/python/multi_message_sum_of_all.py
+```
+
+To run the "single_message_timeout" example with `MultiMessageAvailableTimeoutCondition` with only a single input port:
+```bash
+python ./examples/conditions/multi_message/python/single_message_timeout.py
+```
diff --git a/examples/conditions/multi_message/cpp/CMakeLists.min.txt b/examples/conditions/multi_message/cpp/CMakeLists.min.txt
new file mode 100644
index 00000000..ba1b3da3
--- /dev/null
+++ b/examples/conditions/multi_message/cpp/CMakeLists.min.txt
@@ -0,0 +1,89 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the \"License\");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an \"AS IS\" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+cmake_minimum_required(VERSION 3.20)
+project(multi_message CXX)
+
+# Finds the package holoscan
+find_package(holoscan REQUIRED CONFIG
+             PATHS "/opt/nvidia/holoscan" "/workspace/holoscan-sdk/install")
+
+add_executable(multi_message_per_receiver
+  multi_message_per_receiver.cpp
+  common_ops.hpp
+)
+target_link_libraries(multi_message_per_receiver
+  PRIVATE
+  holoscan::core
+)
+
+add_executable(multi_message_sum_of_all
+  multi_message_sum_of_all.cpp
+  common_ops.hpp
+)
+target_link_libraries(multi_message_sum_of_all
+  PRIVATE
+  holoscan::core
+)
+
+add_executable(single_message_timeout
+  single_message_timeout.cpp
+  common_ops.hpp
+)
+target_link_libraries(single_message_timeout
+  PRIVATE
+  holoscan::core
+)
+
+# Testing
+if(BUILD_TESTING)
+  add_test(NAME EXAMPLE_CPP_MULTI_MESSAGE_PER_RECEIVER_TEST
+           COMMAND multi_message_per_receiver
+           WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+          )
+  set_tests_properties(EXAMPLE_CPP_MULTI_MESSAGE_PER_RECEIVER_TEST PROPERTIES
+                       PASS_REGULAR_EXPRESSION "message received on in1: Hello from tx1"
+                       PASS_REGULAR_EXPRESSION "message received on in2: Hello from tx2"
+                       PASS_REGULAR_EXPRESSION "message received on in3: Hello from tx3"
+                       FAIL_REGULAR_EXPRESSION "Unable to convert argument type"
+                      )
+
+  add_test(NAME EXAMPLE_CPP_MULTI_MESSAGE_SUM_OF_ALL_TEST
+           COMMAND multi_message_sum_of_all
+           WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+          )
+  set_tests_properties(
+    EXAMPLE_CPP_MULTI_MESSAGE_SUM_OF_ALL_TEST PROPERTIES
+    # check that multiple messages arrived on all ports
+    # (expect several message on in1, 4-5 on in2 and at least one on in3 with the timings specified)
+    PASS_REGULAR_EXPRESSION "messages received on in1: \\[\"tx1\", \"tx1\", \"tx1\", \"tx1\""
+    PASS_REGULAR_EXPRESSION "messages received on in2: \\[\"tx2\", \"tx2\", \"tx2\""
+    PASS_REGULAR_EXPRESSION "messages received on in3: \\[\"tx3\""
+    # with given periodic conditions should not be possible that tx3 arrived >3 this times
+    FAIL_REGULAR_EXPRESSION "messages received on in3: \\[\"tx3\", \"tx3\", \"tx3\", \"tx3\""
+    FAIL_REGULAR_EXPRESSION "Unable to convert argument type"
+  )
+
+  add_test(NAME EXAMPLE_CPP_SINGLE_MESSAGE_TIMEOUT_TEST
+           COMMAND single_message_timeout
+           WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+          )
+  set_tests_properties(
+    EXAMPLE_CPP_SINGLE_MESSAGE_TIMEOUT_TEST PROPERTIES
+    # check that despite min_sum=5, only 3 messages arrived due to the execution_frequency
+    PASS_REGULAR_EXPRESSION "3 messages received on in"
+    FAIL_REGULAR_EXPRESSION "Unable to convert argument type"
+  )
+endif()
diff --git a/examples/conditions/multi_message/cpp/CMakeLists.txt b/examples/conditions/multi_message/cpp/CMakeLists.txt
new file mode 100644
index 00000000..e8b71cb7
--- /dev/null
+++ b/examples/conditions/multi_message/cpp/CMakeLists.txt
@@ -0,0 +1,116 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Create examples
+add_executable(multi_message_per_receiver
+  multi_message_per_receiver.cpp
+  common_ops.hpp
+)
+target_link_libraries(multi_message_per_receiver
+  PRIVATE
+  holoscan::core
+)
+
+add_executable(multi_message_sum_of_all
+  multi_message_sum_of_all.cpp
+  common_ops.hpp
+)
+target_link_libraries(multi_message_sum_of_all
+  PRIVATE
+  holoscan::core
+)
+
+add_executable(single_message_timeout
+  single_message_timeout.cpp
+  common_ops.hpp
+)
+target_link_libraries(single_message_timeout
+  PRIVATE
+  holoscan::core
+)
+
+# Install examples
+
+# Set the install RPATH based on the location of the Holoscan SDK libraries
+# The GXF extensions are loaded by the GXF libraries - no need to include here
+file(RELATIVE_PATH install_lib_relative_path ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_SOURCE_DIR}/${HOLOSCAN_INSTALL_LIB_DIR})
+set_target_properties(multi_message_per_receiver PROPERTIES INSTALL_RPATH "\$ORIGIN/${install_lib_relative_path}")
+set_target_properties(multi_message_sum_of_all PROPERTIES INSTALL_RPATH "\$ORIGIN/${install_lib_relative_path}")
+set_target_properties(single_message_timeout PROPERTIES INSTALL_RPATH "\$ORIGIN/${install_lib_relative_path}")
+
+# Install following the relative folder path
+file(RELATIVE_PATH app_relative_dest_path ${CMAKE_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
+
+if(HOLOSCAN_INSTALL_EXAMPLE_SOURCE)
+# Install the source
+install(FILES multi_message_per_receiver.cpp multi_message_sum_of_all.cpp single_message_timeout.cpp common_ops.hpp
+  DESTINATION "${app_relative_dest_path}"
+  COMPONENT holoscan-examples
+)
+
+# Install the minimal CMakeLists.txt file
+install(FILES CMakeLists.min.txt
+  RENAME "CMakeLists.txt"
+  DESTINATION "${app_relative_dest_path}"
+  COMPONENT holoscan-examples
+)
+endif()
+
+# Install the compiled examples
+install(TARGETS multi_message_per_receiver multi_message_sum_of_all single_message_timeout
+  DESTINATION "${app_relative_dest_path}"
+  COMPONENT holoscan-examples
+)
+
+# Testing
+if(HOLOSCAN_BUILD_TESTS)
+  add_test(NAME EXAMPLE_CPP_MULTI_MESSAGE_PER_RECEIVER_TEST
+           COMMAND multi_message_per_receiver
+           WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+          )
+  set_tests_properties(EXAMPLE_CPP_MULTI_MESSAGE_PER_RECEIVER_TEST PROPERTIES
+                       PASS_REGULAR_EXPRESSION "message received on in1: Hello from tx1"
+                       PASS_REGULAR_EXPRESSION "message received on in2: Hello from tx2"
+                       PASS_REGULAR_EXPRESSION "message received on in3: Hello from tx3"
+                       FAIL_REGULAR_EXPRESSION "Unable to convert argument type"
+                      )
+
+  add_test(NAME EXAMPLE_CPP_MULTI_MESSAGE_SUM_OF_ALL_TEST
+           COMMAND multi_message_sum_of_all
+           WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+          )
+  set_tests_properties(
+    EXAMPLE_CPP_MULTI_MESSAGE_SUM_OF_ALL_TEST PROPERTIES
+    # check that multiple messages arrived on all ports
+    # (expect several message on in1, 4-5 on in2 and at least one on in3 with the timings specified)
+    PASS_REGULAR_EXPRESSION "messages received on in1: \\[\"tx1\", \"tx1\", \"tx1\", \"tx1\""
+    PASS_REGULAR_EXPRESSION "messages received on in2: \\[\"tx2\", \"tx2\", \"tx2\""
+    PASS_REGULAR_EXPRESSION "messages received on in3: \\[\"tx3\""
+    # with given periodic conditions should not be possible that tx3 arrived >3 this times
+    FAIL_REGULAR_EXPRESSION "messages received on in3: \\[\"tx3\", \"tx3\", \"tx3\", \"tx3\""
+    FAIL_REGULAR_EXPRESSION "Unable to convert argument type"
+  )
+
+  add_test(NAME EXAMPLE_CPP_SINGLE_MESSAGE_TIMEOUT_TEST
+           COMMAND single_message_timeout
+           WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+          )
+  set_tests_properties(
+    EXAMPLE_CPP_SINGLE_MESSAGE_TIMEOUT_TEST PROPERTIES
+    # check that despite min_sum=5, only 3 messages arrived due to the execution_frequency
+    PASS_REGULAR_EXPRESSION "3 messages received on in"
+    FAIL_REGULAR_EXPRESSION "Unable to convert argument type"
+  )
+endif()
diff --git a/examples/conditions/multi_message/cpp/common_ops.hpp b/examples/conditions/multi_message/cpp/common_ops.hpp
new file mode 100644
index 00000000..299a520a
--- /dev/null
+++ b/examples/conditions/multi_message/cpp/common_ops.hpp
@@ -0,0 +1,51 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <chrono>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "holoscan/holoscan.hpp"
+
+namespace holoscan::ops {
+
+class StringTxOp : public Operator {
+ public:
+  HOLOSCAN_OPERATOR_FORWARD_ARGS(StringTxOp)
+
+  StringTxOp() = default;
+
+  void setup(OperatorSpec& spec) override { spec.output<std::shared_ptr<std::string>>("out"); }
+
+  void compute([[maybe_unused]] InputContext& op_input, OutputContext& op_output,
+               [[maybe_unused]] ExecutionContext& context) override {
+    auto value = std::make_shared<std::string>(message_);
+    if (verbose_) { HOLOSCAN_LOG_INFO("{}: sending message", name()); }
+    op_output.emit(value, "out");
+  };
+
+  void set_message(const std::string& message, bool verbose = false) {
+    message_ = message;
+    verbose_ = verbose;
+  }
+
+ private:
+  std::string message_{};
+  bool verbose_ = false;
+};
+
+}  // namespace holoscan::ops
diff --git a/examples/conditions/multi_message/cpp/multi_message_per_receiver.cpp b/examples/conditions/multi_message/cpp/multi_message_per_receiver.cpp
new file mode 100644
index 00000000..ed0e8dbe
--- /dev/null
+++ b/examples/conditions/multi_message/cpp/multi_message_per_receiver.cpp
@@ -0,0 +1,115 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <chrono>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "./common_ops.hpp"
+#include "holoscan/holoscan.hpp"
+
+namespace holoscan::ops {
+
+class PerReceiverRxOp : public Operator {
+ public:
+  HOLOSCAN_OPERATOR_FORWARD_ARGS(PerReceiverRxOp)
+
+  PerReceiverRxOp() = default;
+
+  void setup(OperatorSpec& spec) override {
+    // Using size argument to explicitly set the receiver message queue size for each input.
+    spec.input<std::shared_ptr<std::string>>("in1");
+    spec.input<std::shared_ptr<std::string>>("in2", IOSpec::IOSize(2));
+    spec.input<std::shared_ptr<std::string>>("in3");
+
+    // Configure a MultiMessageAvailableCondition in "PerReceiver" mode so the operator will run
+    // only when 1, 2 and 1 messages have arrived on ports "in1", "in2" and "in3", respectively.
+    // This per-receiver mode is equivalent to putting a MessageAvailable condition on each input
+    // individually.
+    ArgList multi_message_args{
+        holoscan::Arg("min_sizes", std::vector<size_t>{1, 2, 1}),
+        holoscan::Arg("sampling_mode", MultiMessageAvailableCondition::SamplingMode::kPerReceiver)};
+    std::vector<std::string> input_port_names{"in1", "in2", "in3"};
+    spec.multi_port_condition(
+        ConditionType::kMultiMessageAvailable, input_port_names, multi_message_args);
+  }
+
+  void compute(InputContext& op_input, [[maybe_unused]] OutputContext& op_output,
+               [[maybe_unused]] ExecutionContext& context) override {
+    std::string msg1{};
+    std::string msg2{};
+    std::string msg3{};
+    std::string msg4{};
+    auto in_value1 = op_input.receive<std::shared_ptr<std::string>>("in1");
+    if (in_value1) { msg1 = *in_value1.value(); }
+
+    // receive twice from in2 because there are 2 messages due to size = 2
+    auto in_value2 = op_input.receive<std::shared_ptr<std::string>>("in2");
+    if (in_value2) { msg2 = *in_value2.value(); }
+    auto in_value3 = op_input.receive<std::shared_ptr<std::string>>("in2");
+    if (in_value3) { msg3 = *in_value3.value(); }
+
+    auto in_value4 = op_input.receive<std::shared_ptr<std::string>>("in3");
+    if (in_value4) { msg4 = *in_value4.value(); }
+
+    HOLOSCAN_LOG_INFO("message received on in1: {}", msg1);
+    HOLOSCAN_LOG_INFO("first message received on in2: {}", msg2);
+    HOLOSCAN_LOG_INFO("second message received on in2: {}", msg3);
+    HOLOSCAN_LOG_INFO("message received on in3: {}", msg4);
+  };
+};
+
+}  // namespace holoscan::ops
+
+class MultiMessageApp : public holoscan::Application {
+ public:
+  void compose() override {
+    using namespace holoscan;
+    using namespace std::chrono_literals;
+
+    auto tx1 = make_operator<ops::StringTxOp>(
+        "tx1", make_condition<PeriodicCondition>("periodic-condition1", 0.05s));
+    tx1->set_message("Hello from tx1");
+
+    auto tx2 = make_operator<ops::StringTxOp>(
+        "tx2", make_condition<PeriodicCondition>("periodic-condition2", 0.025s));
+    tx2->set_message("Hello from tx2");
+
+    auto tx3 = make_operator<ops::StringTxOp>(
+        "tx3", make_condition<PeriodicCondition>("periodic-condition3", 0.1s));
+    tx3->set_message("Hello from tx3");
+
+    auto multi_rx = make_operator<ops::PerReceiverRxOp>(
+        "multi_rx", make_condition<CountCondition>("count-condition4", 4));
+
+    add_flow(tx1, multi_rx, {{"out", "in1"}});
+    add_flow(tx2, multi_rx, {{"out", "in2"}});
+    add_flow(tx3, multi_rx, {{"out", "in3"}});
+  }
+};
+
+int main([[maybe_unused]] int argc, [[maybe_unused]] char** argv) {
+  auto app = holoscan::make_application<MultiMessageApp>();
+
+  // use the event-based scheduler so multiple operators can run simultaneously
+  app->scheduler(app->make_scheduler<holoscan::EventBasedScheduler>(
+      "event-based-scheduler", holoscan::Arg("worker_thread_number", static_cast<int64_t>(4))));
+
+  app->run();
+
+  return 0;
+}
diff --git a/examples/conditions/multi_message/cpp/multi_message_sum_of_all.cpp b/examples/conditions/multi_message/cpp/multi_message_sum_of_all.cpp
new file mode 100644
index 00000000..42af6ab4
--- /dev/null
+++ b/examples/conditions/multi_message/cpp/multi_message_sum_of_all.cpp
@@ -0,0 +1,125 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <chrono>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "./common_ops.hpp"
+#include "holoscan/holoscan.hpp"
+
+namespace holoscan::ops {
+
+class SumOfAllThrottledRxOp : public Operator {
+ public:
+  HOLOSCAN_OPERATOR_FORWARD_ARGS(SumOfAllThrottledRxOp)
+
+  SumOfAllThrottledRxOp() = default;
+
+  void setup(OperatorSpec& spec) override {
+    // Using size argument to explicitly set the receiver message queue size for each input.
+    spec.input<std::shared_ptr<std::string>>("in1", IOSpec::IOSize(10));
+    spec.input<std::shared_ptr<std::string>>("in2", IOSpec::IOSize(10));
+    spec.input<std::shared_ptr<std::string>>("in3", IOSpec::IOSize(10));
+
+    // Use kMultiMessageAvailableTimeout to considers all three ports together. In this
+    // "SumOfAll" mode, it only matters that `min_sum` messages have arrived across all the ports
+    // that are listed in `input_port_names` below, but it does not matter which ports the messages
+    // arrived on. The "execution_frequency" is set to 30ms, so the operator can run once 30 ms has
+    // elapsed even if 20 messages have not arrived. Use ConditionType::kMultiMessageAvailable
+    // instead if the timeout interval is not desired.
+    ArgList multi_message_args{
+        holoscan::Arg("execution_frequency", std::string{"30ms"}),
+        holoscan::Arg("min_sum", static_cast<size_t>(20)),
+        holoscan::Arg("sampling_mode",
+                      MultiMessageAvailableTimeoutCondition::SamplingMode::kSumOfAll)};
+    std::vector<std::string> input_port_names{"in1", "in2", "in3"};
+    spec.multi_port_condition(
+        ConditionType::kMultiMessageAvailableTimeout, input_port_names, multi_message_args);
+  }
+
+  void compute(InputContext& op_input, [[maybe_unused]] OutputContext& op_output,
+               [[maybe_unused]] ExecutionContext& context) override {
+    // We haven't set a specific number of messages per port, so loop over each
+    // input until no more messages are in the queue.
+    auto in_value1 = op_input.receive<std::shared_ptr<std::string>>("in1");
+    std::vector<std::string> msgs_input1;
+    while (in_value1) {
+      msgs_input1.push_back(*in_value1.value());
+      in_value1 = op_input.receive<std::shared_ptr<std::string>>("in1");
+    }
+
+    auto in_value2 = op_input.receive<std::shared_ptr<std::string>>("in2");
+    std::vector<std::string> msgs_input2;
+    while (in_value2) {
+      msgs_input2.push_back(*in_value2.value());
+      in_value2 = op_input.receive<std::shared_ptr<std::string>>("in2");
+    }
+
+    auto in_value3 = op_input.receive<std::shared_ptr<std::string>>("in3");
+    std::vector<std::string> msgs_input3;
+    while (in_value3) {
+      msgs_input3.push_back(*in_value3.value());
+      in_value3 = op_input.receive<std::shared_ptr<std::string>>("in3");
+    }
+
+    HOLOSCAN_LOG_INFO("messages received on in1: {}", msgs_input1);
+    HOLOSCAN_LOG_INFO("messages received on in2: {}", msgs_input2);
+    HOLOSCAN_LOG_INFO("messages received on in3: {}", msgs_input3);
+  };
+};
+
+}  // namespace holoscan::ops
+
+class MultiMessageThrottledApp : public holoscan::Application {
+ public:
+  void compose() override {
+    using namespace holoscan;
+    using namespace std::chrono_literals;
+
+    auto tx1 = make_operator<ops::StringTxOp>(
+        "tx1", make_condition<PeriodicCondition>("periodic-condition1", 4ms));
+    tx1->set_message("tx1");
+
+    auto tx2 = make_operator<ops::StringTxOp>(
+        "tx2", make_condition<PeriodicCondition>("periodic-condition2", 8ms));
+    tx2->set_message("tx2");
+
+    auto tx3 = make_operator<ops::StringTxOp>(
+        "tx3", make_condition<PeriodicCondition>("periodic-condition3", 16ms));
+    tx3->set_message("tx3");
+
+    auto multi_rx_timeout = make_operator<ops::SumOfAllThrottledRxOp>(
+        "multi_rx_timeout", make_condition<CountCondition>("count-condition", 5));
+
+    add_flow(tx1, multi_rx_timeout, {{"out", "in1"}});
+    add_flow(tx2, multi_rx_timeout, {{"out", "in2"}});
+    add_flow(tx3, multi_rx_timeout, {{"out", "in3"}});
+  }
+};
+
+int main([[maybe_unused]] int argc, [[maybe_unused]] char** argv) {
+  auto app = holoscan::make_application<MultiMessageThrottledApp>();
+
+  // use the event-based scheduler so multiple operators can run simultaneously
+  app->scheduler(app->make_scheduler<holoscan::EventBasedScheduler>(
+      "event-based-scheduler", holoscan::Arg("worker_thread_number", static_cast<int64_t>(4))));
+
+  app->run();
+
+  return 0;
+}
diff --git a/examples/conditions/multi_message/cpp/single_message_timeout.cpp b/examples/conditions/multi_message/cpp/single_message_timeout.cpp
new file mode 100644
index 00000000..c162a1ad
--- /dev/null
+++ b/examples/conditions/multi_message/cpp/single_message_timeout.cpp
@@ -0,0 +1,92 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <chrono>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "./common_ops.hpp"
+#include "holoscan/holoscan.hpp"
+
+namespace holoscan::ops {
+
+class RxTimeoutOp : public Operator {
+ public:
+  HOLOSCAN_OPERATOR_FORWARD_ARGS(RxTimeoutOp)
+
+  RxTimeoutOp() = default;
+
+  void setup(OperatorSpec& spec) override {
+    // Set a condition to allow execution once 5 messages have arrived or at least 250 ms has
+    // elapsed since the prior time operator::compute was called.
+    spec.input<std::shared_ptr<std::string>>("in", IOSpec::IOSize(5))
+        .condition(ConditionType::kMultiMessageAvailableTimeout,
+                   holoscan::Arg("execution_frequency", std::string{"250ms"}),
+                   holoscan::Arg("min_sum", static_cast<size_t>(5)));
+  }
+
+  void compute(InputContext& op_input, [[maybe_unused]] OutputContext& op_output,
+               [[maybe_unused]] ExecutionContext& context) override {
+    // We haven't set a specific number of messages per port, so loop over each
+    // input until no more messages are in the queue.
+    auto in_value = op_input.receive<std::shared_ptr<std::string>>("in");
+    if (!in_value) { HOLOSCAN_LOG_INFO("No message available"); }
+    std::vector<std::string> msgs_input;
+    size_t message_count = 0;
+    while (in_value) {
+      msgs_input.push_back(*in_value.value());
+      message_count++;
+      in_value = op_input.receive<std::shared_ptr<std::string>>("in");
+    }
+
+    HOLOSCAN_LOG_INFO("{} messages received on in: {}", message_count, msgs_input);
+  };
+};
+
+}  // namespace holoscan::ops
+
+class RxTimeoutApp : public holoscan::Application {
+ public:
+  void compose() override {
+    using namespace holoscan;
+    using namespace std::chrono_literals;
+
+    auto tx = make_operator<ops::StringTxOp>(
+        "tx",
+        make_condition<PeriodicCondition>("periodic-condition1", 100ms),
+        make_condition<CountCondition>("count", 18));
+    tx->set_message("hello from tx", true);
+
+    auto rx_timeout = make_operator<ops::RxTimeoutOp>("rx_timeout");
+    add_flow(tx, rx_timeout);
+  }
+};
+
+int main([[maybe_unused]] int argc, [[maybe_unused]] char** argv) {
+  auto app = holoscan::make_application<RxTimeoutApp>();
+
+  // Add a timeout slightly less than the execution_frequency so any final messages have time to
+  // arrive after tx stops calling compute. If the deadlock timeout here is > execution_frequency
+  // than the receive operator will continue to call compute indefinitely with 0 messages at the
+  // execution frequency.
+  app->scheduler(app->make_scheduler<holoscan::GreedyScheduler>(
+      "greedy-scheduler", holoscan::Arg("stop_on_deadlock_timeout", static_cast<int64_t>(245))));
+
+  app->run();
+
+  return 0;
+}
diff --git a/examples/conditions/multi_message/python/CMakeLists.txt b/examples/conditions/multi_message/python/CMakeLists.txt
new file mode 100644
index 00000000..d749f27f
--- /dev/null
+++ b/examples/conditions/multi_message/python/CMakeLists.txt
@@ -0,0 +1,87 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Get relative folder path for the app
+file(RELATIVE_PATH app_relative_dest_path ${CMAKE_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
+
+# Copy the per-receiver application
+add_custom_target(python_multi_message_per_receiver ALL
+  COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/multi_message_per_receiver.py" ${CMAKE_CURRENT_BINARY_DIR}
+  DEPENDS "multi_message_per_receiver.py"
+  BYPRODUCTS "multi_message_per_receiver.py"
+)
+
+# Copy the sum-of-all application
+add_custom_target(python_multi_message_sum_of_all ALL
+  COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/multi_message_sum_of_all.py" ${CMAKE_CURRENT_BINARY_DIR}
+  DEPENDS "multi_message_sum_of_all.py"
+  BYPRODUCTS "multi_message_sum_of_all.py"
+)
+
+# Copy the sum-of-all application
+add_custom_target(python_single_message_timeout ALL
+  COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/single_message_timeout.py" ${CMAKE_CURRENT_BINARY_DIR}
+  DEPENDS "single_message_timeout.py"
+  BYPRODUCTS "single_message_timeout.py"
+)
+
+# Install the apps
+install(FILES
+    "${CMAKE_CURRENT_SOURCE_DIR}/multi_message_per_receiver.py"
+    "${CMAKE_CURRENT_SOURCE_DIR}/multi_message_sum_of_all.py"
+    "${CMAKE_CURRENT_SOURCE_DIR}/single_message_timeout.py"
+    DESTINATION "${app_relative_dest_path}"
+    COMPONENT "holoscan-examples"
+)
+
+# Testing
+if(HOLOSCAN_BUILD_TESTS)
+  add_test(NAME EXAMPLE_PYTHON_MULTI_MESSAGE_PER_RECEIVER_TEST
+    COMMAND python3 multi_message_per_receiver.py
+    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+  )
+  set_tests_properties(EXAMPLE_PYTHON_MULTI_MESSAGE_PER_RECEIVER_TEST PROPERTIES
+    PASS_REGULAR_EXPRESSION "message received on in1: Hello from tx1"
+    PASS_REGULAR_EXPRESSION "message received on in2: Hello from tx2"
+    PASS_REGULAR_EXPRESSION "message received on in3: Hello from tx3"
+    FAIL_REGULAR_EXPRESSION "Unable to convert argument type"
+  )
+
+  add_test(NAME EXAMPLE_PYTHON_MULTI_MESSAGE_SUM_OF_ALL_TEST
+    COMMAND python3 multi_message_sum_of_all.py
+    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+  )
+  set_tests_properties(EXAMPLE_PYTHON_MULTI_MESSAGE_SUM_OF_ALL_TEST PROPERTIES
+    # check that multiple messages arrived on all ports
+    # (expect several message on in1, 4-5 on in2 and at least one on in3 with the timings specified)
+    PASS_REGULAR_EXPRESSION "messages received on in1: \\(\'tx1\', \'tx1\', \'tx1\', \'tx1\'"
+    PASS_REGULAR_EXPRESSION "messages received on in2: \\(\'tx2\', \'tx2\', \'tx2\'"
+    PASS_REGULAR_EXPRESSION "messages received on in3: \\(\'tx3\'"
+    # with given periodic conditions should not be possible that tx3 arrived >3 this times
+    FAIL_REGULAR_EXPRESSION "messages received on in3: \\(\'tx3\', \'tx3\', \'tx3\', \'tx3\'"
+    FAIL_REGULAR_EXPRESSION "Unable to convert argument type"
+  )
+
+  add_test(NAME EXAMPLE_PYTHON_SINGLE_MESSAGE_TIMEOUT_TEST
+    COMMAND python3 single_message_timeout.py
+    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+  )
+  set_tests_properties(
+    EXAMPLE_PYTHON_SINGLE_MESSAGE_TIMEOUT_TEST PROPERTIES
+    # check that despite min_sum=5, only 3 messages arrived due to the execution_frequency
+    PASS_REGULAR_EXPRESSION "3 messages received on in"
+    FAIL_REGULAR_EXPRESSION "Unable to convert argument type"
+  )
+endif()
diff --git a/examples/conditions/multi_message/python/multi_message_per_receiver.py b/examples/conditions/multi_message/python/multi_message_per_receiver.py
new file mode 100644
index 00000000..16d8c29d
--- /dev/null
+++ b/examples/conditions/multi_message/python/multi_message_per_receiver.py
@@ -0,0 +1,98 @@
+"""
+SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+SPDX-License-Identifier: Apache-2.0
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""  # noqa: E501
+
+import datetime
+
+from holoscan.conditions import CountCondition, PeriodicCondition
+from holoscan.core import Application, ConditionType, IOSpec, Operator
+from holoscan.schedulers import EventBasedScheduler
+
+# Now define a simple application using the operators defined above
+
+
+class StringTxOp(Operator):
+    def __init__(self, fragment, *args, message="", **kwargs):
+        self.message = message
+        super().__init__(fragment, *args, **kwargs)
+
+    def setup(self, spec):
+        spec.output("out")
+
+    def compute(self, op_input, op_output, context):
+        print(f"{self.name}: sending message")
+        op_output.emit(self.message, "out")
+
+
+class PerReceiverRxOp(Operator):
+    def __init__(self, fragment, *args, message="", **kwargs):
+        self.message = message
+        super().__init__(fragment, *args, **kwargs)
+
+    def setup(self, spec):
+        # Using size argument to explicitly set the receiver message queue size for each input.
+        spec.input("in1")
+        spec.input("in2", size=IOSpec.IOSize(2))
+        spec.input("in3")
+
+        # Configure a MultiMessageAvailableCondition in "PerReceiver" mode so the operator will run
+        # only when 1, 2 and 1 messages have arrived on ports "in1", "in2" and "in3", respectively.
+        # This per-receiver mode is equivalent to putting a MessageAvailable condition on each input
+        # individually.
+        spec.multi_port_condition(
+            kind=ConditionType.MULTI_MESSAGE_AVAILABLE,
+            port_names=["in1", "in2", "in3"],
+            sampling_mode="PerReceiver",
+            min_sizes=[1, 2, 1],
+        )
+
+    def compute(self, op_input, op_output, context):
+        msg1 = op_input.receive("in1")
+        msg2 = op_input.receive("in2")
+        msg3 = op_input.receive("in3")
+        print(f"message received on in1: {msg1}")
+        print(f"first message received on in2: {msg2[0]}")
+        print(f"second message received on in2: {msg2[1]}")
+        print(f"message received on in3: {msg3}")
+
+
+class MultiMessageApp(Application):
+    def compose(self):
+        period1 = PeriodicCondition(self, recess_period=datetime.timedelta(milliseconds=50))
+        tx1 = StringTxOp(self, period1, message="Hello from tx1", name="tx1")
+
+        period2 = PeriodicCondition(self, recess_period=datetime.timedelta(milliseconds=25))
+        tx2 = StringTxOp(self, period2, message="Hello from tx2", name="tx2")
+
+        period3 = PeriodicCondition(self, recess_period=datetime.timedelta(milliseconds=100))
+        tx3 = StringTxOp(self, period3, message="Hello from tx3", name="tx3")
+
+        multi_rx = PerReceiverRxOp(self, CountCondition(self, count=4), name="multi_rx")
+
+        # Connect the operators into the workflow
+        self.add_flow(tx1, multi_rx, {("out", "in1")})
+        self.add_flow(tx2, multi_rx, {("out", "in2")})
+        self.add_flow(tx3, multi_rx, {("out", "in3")})
+
+
+def main():
+    app = MultiMessageApp()
+    app.scheduler(EventBasedScheduler(app, worker_thread_number=4))
+    app.run()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/conditions/multi_message/python/multi_message_sum_of_all.py b/examples/conditions/multi_message/python/multi_message_sum_of_all.py
new file mode 100644
index 00000000..a4b48bbc
--- /dev/null
+++ b/examples/conditions/multi_message/python/multi_message_sum_of_all.py
@@ -0,0 +1,101 @@
+"""
+SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+SPDX-License-Identifier: Apache-2.0
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""  # noqa: E501
+
+import datetime
+
+from holoscan.conditions import CountCondition, PeriodicCondition
+from holoscan.core import Application, ConditionType, IOSpec, Operator
+from holoscan.schedulers import EventBasedScheduler
+
+# Now define a simple application using the operators defined above
+
+
+class StringTxOp(Operator):
+    def __init__(self, fragment, *args, message="", **kwargs):
+        self.message = message
+        super().__init__(fragment, *args, **kwargs)
+
+    def setup(self, spec):
+        spec.output("out")
+
+    def compute(self, op_input, op_output, context):
+        op_output.emit(self.message, "out")
+
+
+class SumOfAllThrottledRxOp(Operator):
+    def __init__(self, fragment, *args, message="", **kwargs):
+        self.message = message
+        super().__init__(fragment, *args, **kwargs)
+
+    def setup(self, spec):
+        # Using size argument to explicitly set the receiver message queue size for each input.
+        spec.input("in1", size=IOSpec.IOSize(20))
+        spec.input("in2", size=IOSpec.IOSize(20))
+        spec.input("in3", size=IOSpec.IOSize(20))
+
+        # Use kMultiMessageAvailableTimeout to considers all three ports together. In this
+        # "SumOfAll" mode, it only matters that `min_sum` messages have arrived across all the
+        # ports that are listed in `input_port_names` below, but it does not matter which ports the
+        # messages arrived on. The "execution_frequency" is set to 30ms, so the operator can run
+        # once 30 ms has elapsed even if 20 messages have not arrived. Use
+        # ConditionType.MULTI_MESSAGE_AVAILABLE instead if the timeout interval is not desired.
+        spec.multi_port_condition(
+            kind=ConditionType.MULTI_MESSAGE_AVAILABLE_TIMEOUT,
+            execution_frequency="30ms",
+            port_names=["in1", "in2", "in3"],
+            sampling_mode="SumOfAll",
+            min_sum=20,
+        )
+
+    def compute(self, op_input, op_output, context):
+        msg1 = op_input.receive("in1")
+        msg2 = op_input.receive("in2")
+        msg3 = op_input.receive("in3")
+        print(f"messages received on in1: {msg1}")
+        print(f"messages received on in2: {msg2}")
+        print(f"messages received on in3: {msg3}")
+
+
+class MultiMessageThrottledApp(Application):
+    def compose(self):
+        period1 = PeriodicCondition(self, recess_period=datetime.timedelta(milliseconds=4))
+        tx1 = StringTxOp(self, period1, message="tx1", name="tx1")
+
+        period2 = PeriodicCondition(self, recess_period=datetime.timedelta(milliseconds=8))
+        tx2 = StringTxOp(self, period2, message="tx2", name="tx2")
+
+        period3 = PeriodicCondition(self, recess_period=datetime.timedelta(milliseconds=16))
+        tx3 = StringTxOp(self, period3, message="tx3", name="tx3")
+
+        multi_rx_timeout = SumOfAllThrottledRxOp(
+            self, CountCondition(self, count=5), name="multi_rx_timeout"
+        )
+
+        # Connect the operators into the workflow
+        self.add_flow(tx1, multi_rx_timeout, {("out", "in1")})
+        self.add_flow(tx2, multi_rx_timeout, {("out", "in2")})
+        self.add_flow(tx3, multi_rx_timeout, {("out", "in3")})
+
+
+def main():
+    app = MultiMessageThrottledApp()
+    app.scheduler(EventBasedScheduler(app, worker_thread_number=4))
+    app.run()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/conditions/multi_message/python/single_message_timeout.py b/examples/conditions/multi_message/python/single_message_timeout.py
new file mode 100644
index 00000000..e6f65aee
--- /dev/null
+++ b/examples/conditions/multi_message/python/single_message_timeout.py
@@ -0,0 +1,85 @@
+"""
+SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+SPDX-License-Identifier: Apache-2.0
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""  # noqa: E501
+
+import datetime
+
+from holoscan.conditions import CountCondition, PeriodicCondition
+from holoscan.core import Application, ConditionType, IOSpec, Operator
+from holoscan.schedulers import GreedyScheduler
+
+# Now define a simple application using the operators defined above
+
+
+class StringTxOp(Operator):
+    def __init__(self, fragment, *args, message="", **kwargs):
+        self.message = message
+        super().__init__(fragment, *args, **kwargs)
+
+    def setup(self, spec):
+        spec.output("out")
+
+    def compute(self, op_input, op_output, context):
+        print(f"{self.name}: sending message")
+        op_output.emit(self.message, "out")
+
+
+class RxTimeoutOp(Operator):
+    def __init__(self, fragment, *args, message="", **kwargs):
+        self.message = message
+        super().__init__(fragment, *args, **kwargs)
+
+    def setup(self, spec):
+        # Set a condition to allow execution once 5 messages have arrived or at least 250 ms has
+        # elapsed since the prior time operator::compute was called.
+        spec.input("in", size=IOSpec.IOSize(5)).condition(
+            ConditionType.MULTI_MESSAGE_AVAILABLE_TIMEOUT, execution_frequency="250ms", min_sum=5
+        )
+
+    def compute(self, op_input, op_output, context):
+        messages = op_input.receive("in")
+        print(f"{len(messages)} messages received on in: {messages}")
+
+
+class RxTimeoutApp(Application):
+    def compose(self):
+        tx = StringTxOp(
+            self,
+            PeriodicCondition(
+                self, recess_period=datetime.timedelta(milliseconds=100), name="tx_period"
+            ),
+            CountCondition(self, count=18, name="tx_count"),
+            message="tx",
+            name="tx",
+        )
+        rx_timeout = RxTimeoutOp(self, name="rx_timeout")
+
+        # Connect the operators into the workflow
+        self.add_flow(tx, rx_timeout)
+
+
+def main():
+    app = RxTimeoutApp()
+    # Add a timeout slightly less than the execution_frequency so any final messages have time to
+    # arrive after tx stops calling compute. If the deadlock timeout here is > execution_frequency
+    # than the receive operator will continue to call compute indefinitely with 0 messages at the
+    # execution frequency.
+    app.scheduler(GreedyScheduler(app, stop_on_deadlock_timeout=245))
+    app.run()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/flow_tracker/cpp/CMakeLists.min.txt b/examples/flow_tracker/cpp/CMakeLists.min.txt
index 42535ae0..db3aa171 100644
--- a/examples/flow_tracker/cpp/CMakeLists.min.txt
+++ b/examples/flow_tracker/cpp/CMakeLists.min.txt
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the \"License\");
@@ -36,5 +36,6 @@ if(BUILD_TESTING)
            WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
           )
   set_tests_properties(EXAMPLE_CPP_FLOW_TRACKER_TEST PROPERTIES
-                       PASS_REGULAR_EXPRESSION "Data Flow Tracking Results")
+                       PASS_REGULAR_EXPRESSION "Data Flow Tracking Results"
+                       PASS_REGULAR_EXPRESSION "Path 1: root1,middle1,leaf1")
 endif()
diff --git a/examples/flow_tracker/cpp/CMakeLists.txt b/examples/flow_tracker/cpp/CMakeLists.txt
index 05977a49..0d9e40b9 100644
--- a/examples/flow_tracker/cpp/CMakeLists.txt
+++ b/examples/flow_tracker/cpp/CMakeLists.txt
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -58,5 +58,6 @@ if(HOLOSCAN_BUILD_TESTS)
            WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
           )
   set_tests_properties(EXAMPLE_CPP_FLOW_TRACKER_TEST PROPERTIES
-                       PASS_REGULAR_EXPRESSION "Data Flow Tracking Results")
+                       PASS_REGULAR_EXPRESSION "Data Flow Tracking Results"
+                       PASS_REGULAR_EXPRESSION "Path 1: root1,middle1,leaf1")
 endif()
diff --git a/examples/flow_tracker/python/CMakeLists.min.txt b/examples/flow_tracker/python/CMakeLists.min.txt
index 74a7295a..2b6c7920 100644
--- a/examples/flow_tracker/python/CMakeLists.min.txt
+++ b/examples/flow_tracker/python/CMakeLists.min.txt
@@ -20,5 +20,6 @@ if(BUILD_TESTING)
     WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
   )
   set_tests_properties(EXAMPLE_PYTHON_FLOW_TRACKER_TEST PROPERTIES
-                       PASS_REGULAR_EXPRESSION "Data Flow Tracking Results")
+                       PASS_REGULAR_EXPRESSION "Data Flow Tracking Results"
+                       PASS_REGULAR_EXPRESSION "Path 1: root1,middle1,leaf1")
 endif()
diff --git a/examples/flow_tracker/python/CMakeLists.txt b/examples/flow_tracker/python/CMakeLists.txt
index d9377ce1..d46ec0ad 100644
--- a/examples/flow_tracker/python/CMakeLists.txt
+++ b/examples/flow_tracker/python/CMakeLists.txt
@@ -44,5 +44,6 @@ if(HOLOSCAN_BUILD_TESTS)
     WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
   )
   set_tests_properties(EXAMPLE_PYTHON_FLOW_TRACKER_TEST PROPERTIES
-                       PASS_REGULAR_EXPRESSION "Data Flow Tracking Results")
+                       PASS_REGULAR_EXPRESSION "Data Flow Tracking Results"
+                       PASS_REGULAR_EXPRESSION "Path 1: root1,middle1,leaf1")
 endif()
diff --git a/examples/multithread/cpp/CMakeLists.min.txt b/examples/multithread/cpp/CMakeLists.min.txt
index 9e0cd5cd..3ab5b02e 100644
--- a/examples/multithread/cpp/CMakeLists.min.txt
+++ b/examples/multithread/cpp/CMakeLists.min.txt
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the \"License\");
@@ -44,4 +44,20 @@ if(BUILD_TESTING)
           )
   set_tests_properties(EXAMPLE_CPP_MULTITHREAD_OPERATOR_TEST PROPERTIES
                        PASS_REGULAR_EXPRESSION "sum of received values: 496")
+
+  # Test the flow tracking
+  file(READ ${CMAKE_CURRENT_SOURCE_DIR}/multithread.yaml CONFIG_STRING)
+  string(REPLACE "tracking: false" "tracking: true" CONFIG_STRING "${CONFIG_STRING}")
+
+  # Write the config
+  set(CONFIG_FILE ${CMAKE_CURRENT_BINARY_DIR}/multithread_tracking.yaml)
+  file(WRITE ${CONFIG_FILE} "${CONFIG_STRING}")
+
+  add_test(NAME EXAMPLE_CPP_MULTITHREAD_OPERATOR_TRACKING_TEST
+           COMMAND multithread ${CONFIG_FILE}
+           WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+          )
+  set_tests_properties(EXAMPLE_CPP_MULTITHREAD_OPERATOR_TRACKING_TEST PROPERTIES
+                       PASS_REGULAR_EXPRESSION "Data Flow Tracking Results:"
+                       PASS_REGULAR_EXPRESSION "tx->out: 1")
 endif()
diff --git a/examples/multithread/cpp/CMakeLists.txt b/examples/multithread/cpp/CMakeLists.txt
index 899c4622..f47834b5 100644
--- a/examples/multithread/cpp/CMakeLists.txt
+++ b/examples/multithread/cpp/CMakeLists.txt
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -74,4 +74,20 @@ if(HOLOSCAN_BUILD_TESTS)
           )
   set_tests_properties(EXAMPLE_CPP_MULTITHREAD_OPERATOR_TEST PROPERTIES
                        PASS_REGULAR_EXPRESSION "sum of received values: 496")
+
+  # Test the flow tracking
+  file(READ ${CMAKE_CURRENT_SOURCE_DIR}/multithread.yaml CONFIG_STRING)
+  string(REPLACE "tracking: false" "tracking: true" CONFIG_STRING "${CONFIG_STRING}")
+
+  # Write the config
+  set(CONFIG_FILE ${CMAKE_CURRENT_BINARY_DIR}/multithread_tracking.yaml)
+  file(WRITE ${CONFIG_FILE} "${CONFIG_STRING}")
+
+  add_test(NAME EXAMPLE_CPP_MULTITHREAD_OPERATOR_TRACKING_TEST
+           COMMAND multithread ${CONFIG_FILE}
+           WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+          )
+  set_tests_properties(EXAMPLE_CPP_MULTITHREAD_OPERATOR_TRACKING_TEST PROPERTIES
+                       PASS_REGULAR_EXPRESSION "Data Flow Tracking Results:"
+                       PASS_REGULAR_EXPRESSION "tx->out: 1")
 endif()
diff --git a/examples/multithread/cpp/multithread.cpp b/examples/multithread/cpp/multithread.cpp
index bc324769..f253eb79 100644
--- a/examples/multithread/cpp/multithread.cpp
+++ b/examples/multithread/cpp/multithread.cpp
@@ -167,7 +167,8 @@ int main([[maybe_unused]] int argc, char** argv) {
 
   // Get the configuration
   auto config_path = std::filesystem::canonical(argv[0]).parent_path();
-  config_path += "/multithread.yaml";
+  config_path /= std::filesystem::path("multithread.yaml");
+  if (argc >= 2) { config_path = argv[1]; }
   app->config(config_path);
 
   // Turn on data flow tracking if it is specified in the YAML
diff --git a/examples/multithread/python/CMakeLists.min.txt b/examples/multithread/python/CMakeLists.min.txt
index e8cf2a22..f54a419e 100644
--- a/examples/multithread/python/CMakeLists.min.txt
+++ b/examples/multithread/python/CMakeLists.min.txt
@@ -13,19 +13,85 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# Copy native operator multithread application
+add_custom_target(python_multithread ALL
+  COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/multithread.py" ${CMAKE_CURRENT_BINARY_DIR}
+  DEPENDS "multithread.py"
+  BYPRODUCTS "multithread.py"
+)
+
 # Testing
 if(BUILD_TESTING)
   add_test(NAME EXAMPLE_PYTHON_MULTITHREAD_TEST
-    COMMAND python3 multithread.py
-    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+    COMMAND python3 multithread.py --name EXAMPLE_PYTHON_MULTITHREAD_TEST
+                                   --output_file multithread_times
+    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
   )
   set_tests_properties(EXAMPLE_PYTHON_MULTITHREAD_TEST PROPERTIES
                        PASS_REGULAR_EXPRESSION "sum of received values: 496")
 
+  add_test(NAME EXAMPLE_PYTHON_MULTITHREAD_8_TEST
+           COMMAND python3 multithread.py --threads 8 --num_delay_ops 8 --delay 0.25 --delay_step 0.1
+                   --name EXAMPLE_PYTHON_MULTITHREAD_8_TEST --output_file multithread_times
+           WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+  )
+  set_tests_properties(EXAMPLE_PYTHON_MULTITHREAD_8_TEST PROPERTIES
+                       PASS_REGULAR_EXPRESSION "sum of received values: 28"
+                       DEPENDS EXAMPLE_PYTHON_MULTITHREAD_TEST)
+
+  add_test(NAME EXAMPLE_PYTHON_MULTITHREAD_1_TEST
+           COMMAND python3 multithread.py --threads 1 --num_delay_ops 8 --delay 0.25 --delay_step 0.1
+           --name EXAMPLE_PYTHON_MULTITHREAD_1_TEST --output_file multithread_times
+           WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+  )
+  set_tests_properties(EXAMPLE_PYTHON_MULTITHREAD_1_TEST PROPERTIES
+                       PASS_REGULAR_EXPRESSION "sum of received values: 28"
+                       DEPENDS EXAMPLE_PYTHON_MULTITHREAD_8_TEST)
+
+  add_test(NAME EXAMPLE_PYTHON_MULTITHREAD_0_TEST
+           COMMAND python3 multithread.py --threads 0 --num_delay_ops 8 --delay 0.25 --delay_step 0.1
+           --name EXAMPLE_PYTHON_MULTITHREAD_0_TEST --output_file multithread_times
+           WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
+  set_tests_properties(EXAMPLE_PYTHON_MULTITHREAD_0_TEST PROPERTIES
+                       PASS_REGULAR_EXPRESSION "sum of received values: 28"
+                       DEPENDS EXAMPLE_PYTHON_MULTITHREAD_1_TEST)
+
+  # Add a test to check the validity of the timing
+  # This is only valid on dgpu
+  execute_process(COMMAND nvidia-smi -L
+                  OUTPUT_VARIABLE HOST_GPU)
+  if(NOT HOST_GPU MATCHES "nvgpu")
+    add_test(NAME EXAMPLE_PYTHON_MULTITHREAD_TIMES_VALIDATION_TEST
+            COMMAND python3 ${CMAKE_SOURCE_DIR}/../bin/ctest_time_comparison.py
+                  multithread_times
+                  EXAMPLE_PYTHON_MULTITHREAD_TEST LESS EXAMPLE_PYTHON_MULTITHREAD_8_TEST
+                  EXAMPLE_PYTHON_MULTITHREAD_8_TEST LESS EXAMPLE_PYTHON_MULTITHREAD_1_TEST
+                  EXAMPLE_PYTHON_MULTITHREAD_8_TEST LESS EXAMPLE_PYTHON_MULTITHREAD_0_TEST
+            WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+    )
+
+    set_tests_properties(EXAMPLE_PYTHON_MULTITHREAD_TIMES_VALIDATION_TEST PROPERTIES
+      DEPENDS EXAMPLE_PYTHON_MULTITHREAD_TEST
+      DEPENDS EXAMPLE_PYTHON_MULTITHREAD_8_TEST
+      DEPENDS EXAMPLE_PYTHON_MULTITHREAD_1_TEST
+      DEPENDS EXAMPLE_PYTHON_MULTITHREAD_0_TEST
+      PASS_REGULAR_EXPRESSION "Timing for tests matches expectations"
+    )
+  endif()
+
   add_test(NAME EXAMPLE_PYTHON_MULTITHREAD_EVENT_BASED_TEST
     COMMAND python3 multithread.py --event_based
-    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
   )
   set_tests_properties(EXAMPLE_PYTHON_MULTITHREAD_TEST PROPERTIES
                        PASS_REGULAR_EXPRESSION "sum of received values: 496")
+
+  add_test(NAME EXAMPLE_PYTHON_MULTITHREAD_TRACK_TEST
+          COMMAND python3 multithread.py --track --count 5
+          WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+          )
+  set_tests_properties(EXAMPLE_PYTHON_MULTITHREAD_TRACK_TEST PROPERTIES
+                       PASS_REGULAR_EXPRESSION "Number of messages: 6"
+                       PASS_REGULAR_EXPRESSION "tx->out: 5")
+
 endif()
diff --git a/examples/multithread/python/CMakeLists.txt b/examples/multithread/python/CMakeLists.txt
index cdef557f..21236dca 100644
--- a/examples/multithread/python/CMakeLists.txt
+++ b/examples/multithread/python/CMakeLists.txt
@@ -40,16 +40,74 @@ install(FILES CMakeLists.min.txt
 # Testing
 if(HOLOSCAN_BUILD_TESTS)
   add_test(NAME EXAMPLE_PYTHON_MULTITHREAD_TEST
-    COMMAND python3 multithread.py
+    COMMAND python3 multithread.py --name EXAMPLE_PYTHON_MULTITHREAD_TEST
+                                   --output_file multithread_times
     WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
   )
   set_tests_properties(EXAMPLE_PYTHON_MULTITHREAD_TEST PROPERTIES
                        PASS_REGULAR_EXPRESSION "sum of received values: 496")
 
+  add_test(NAME EXAMPLE_PYTHON_MULTITHREAD_8_TEST
+           COMMAND python3 multithread.py --threads 8 --num_delay_ops 8 --delay 0.25 --delay_step 0.1
+                   --name EXAMPLE_PYTHON_MULTITHREAD_8_TEST --output_file multithread_times
+           WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+  )
+  set_tests_properties(EXAMPLE_PYTHON_MULTITHREAD_8_TEST PROPERTIES
+                       PASS_REGULAR_EXPRESSION "sum of received values: 28"
+                       DEPENDS EXAMPLE_PYTHON_MULTITHREAD_TEST)
+
+  add_test(NAME EXAMPLE_PYTHON_MULTITHREAD_1_TEST
+           COMMAND python3 multithread.py --threads 1 --num_delay_ops 8 --delay 0.25 --delay_step 0.1
+           --name EXAMPLE_PYTHON_MULTITHREAD_1_TEST --output_file multithread_times
+           WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+  )
+  set_tests_properties(EXAMPLE_PYTHON_MULTITHREAD_1_TEST PROPERTIES
+                       PASS_REGULAR_EXPRESSION "sum of received values: 28"
+                       DEPENDS EXAMPLE_PYTHON_MULTITHREAD_8_TEST)
+
+  add_test(NAME EXAMPLE_PYTHON_MULTITHREAD_0_TEST
+           COMMAND python3 multithread.py --threads 0 --num_delay_ops 8 --delay 0.25 --delay_step 0.1
+           --name EXAMPLE_PYTHON_MULTITHREAD_0_TEST --output_file multithread_times
+           WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
+  set_tests_properties(EXAMPLE_PYTHON_MULTITHREAD_0_TEST PROPERTIES
+                       PASS_REGULAR_EXPRESSION "sum of received values: 28"
+                       DEPENDS EXAMPLE_PYTHON_MULTITHREAD_1_TEST)
+
+  # Add a test to check the validity of the timing
+  # This test is only valid for dgpu
+  execute_process(COMMAND nvidia-smi -L
+                  OUTPUT_VARIABLE HOST_GPU)
+  if(NOT HOST_GPU MATCHES "nvgpu")
+    add_test(NAME EXAMPLE_PYTHON_MULTITHREAD_TIMES_VALIDATION_TEST
+            COMMAND python3 ${Holoscan-examples_SOURCE_DIR}/../scripts/ctest_time_comparison.py
+                  multithread_times
+                  EXAMPLE_PYTHON_MULTITHREAD_TEST LESS EXAMPLE_PYTHON_MULTITHREAD_8_TEST
+                  EXAMPLE_PYTHON_MULTITHREAD_8_TEST LESS EXAMPLE_PYTHON_MULTITHREAD_1_TEST
+                  EXAMPLE_PYTHON_MULTITHREAD_8_TEST LESS EXAMPLE_PYTHON_MULTITHREAD_0_TEST
+            WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+    )
+
+    set_tests_properties(EXAMPLE_PYTHON_MULTITHREAD_TIMES_VALIDATION_TEST PROPERTIES
+      DEPENDS EXAMPLE_PYTHON_MULTITHREAD_TEST
+      DEPENDS EXAMPLE_PYTHON_MULTITHREAD_8_TEST
+      DEPENDS EXAMPLE_PYTHON_MULTITHREAD_1_TEST
+      DEPENDS EXAMPLE_PYTHON_MULTITHREAD_0_TEST
+      PASS_REGULAR_EXPRESSION "Timing for tests matches expectations"
+    )
+  endif()
+
   add_test(NAME EXAMPLE_PYTHON_MULTITHREAD_EVENT_BASED_TEST
     COMMAND python3 multithread.py --event_based
     WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
   )
   set_tests_properties(EXAMPLE_PYTHON_MULTITHREAD_TEST PROPERTIES
                        PASS_REGULAR_EXPRESSION "sum of received values: 496")
+
+  add_test(NAME EXAMPLE_PYTHON_MULTITHREAD_TRACK_TEST
+          COMMAND python3 multithread.py --track --count 5
+          WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+          )
+  set_tests_properties(EXAMPLE_PYTHON_MULTITHREAD_TRACK_TEST PROPERTIES
+                       PASS_REGULAR_EXPRESSION "Number of messages: 6"
+                       PASS_REGULAR_EXPRESSION "tx->out: 5")
 endif()
diff --git a/examples/multithread/python/multithread.py b/examples/multithread/python/multithread.py
index 2dd30f6b..368f19f1 100644
--- a/examples/multithread/python/multithread.py
+++ b/examples/multithread/python/multithread.py
@@ -143,7 +143,9 @@ def compose(self):
             self.add_flow(d, rx, {("out_val", "values"), ("out_name", "names")})
 
 
-def main(threads, num_delays, delay, delay_step, event_based, count, silent, track):
+def main(
+    threads, num_delays, delay, delay_step, event_based, count, silent, track, name, output_file
+):
     app = ParallelPingApp(
         num_delays=num_delays, delay=delay, delay_step=delay_step, count=count, silent=silent
     )
@@ -173,6 +175,10 @@ def main(threads, num_delays, delay, delay_step, event_based, count, silent, tra
     duration = time.time() - tstart
     print(f"Total app runtime = {duration:0.3f} s")
 
+    if name and output_file:
+        with open(output_file, "a") as file:
+            file.write(f"{name} {duration:0.3f}\n")
+
 
 if __name__ == "__main__":
     # Parse args
@@ -242,6 +248,14 @@ def main(threads, num_delays, delay, delay_step, event_based, count, silent, tra
         action="store_true",
         help="enable data flow tracking",
     )
+    parser.add_argument(
+        "--name",
+        help="specify the name of the specific run for the output file",
+    )
+    parser.add_argument(
+        "--output_file",
+        help="store the output timing to a file",
+    )
 
     args = parser.parse_args()
     if args.delay < 0:
@@ -267,4 +281,6 @@ def main(threads, num_delays, delay, delay_step, event_based, count, silent, tra
         count=args.count,
         silent=args.silent,
         track=args.track,
+        name=args.name,
+        output_file=args.output_file,
     )
diff --git a/examples/ping_conditional/cpp/ping_conditional.cpp b/examples/ping_conditional/cpp/ping_conditional.cpp
index 2039bd16..873074f0 100644
--- a/examples/ping_conditional/cpp/ping_conditional.cpp
+++ b/examples/ping_conditional/cpp/ping_conditional.cpp
@@ -39,8 +39,8 @@ class PingTxOp : public Operator {
     } else {
       op_output.emit(nullptr, "out");  // emit nullptr for even values
     }
-    index_++;
-  }  // NOLINT(clang-analyzer-cplusplus.NewDeleteLeaks)
+    index_++;  // NOLINT(clang-analyzer-cplusplus.NewDeleteLeaks)
+  }
 
  private:
   int index_ = 0;
diff --git a/examples/ping_distributed/cpp/CMakeLists.min.txt b/examples/ping_distributed/cpp/CMakeLists.min.txt
index 1fb4e91d..b911f7b2 100644
--- a/examples/ping_distributed/cpp/CMakeLists.min.txt
+++ b/examples/ping_distributed/cpp/CMakeLists.min.txt
@@ -32,7 +32,7 @@ target_link_libraries(ping_distributed
 )
 
 # Testing
-if(HOLOSCAN_BUILD_TESTS)
+if(BUILD_TESTING)
   # For iGPU, a call to `ucp_init_version` with CUDA_VISIBLE_DEVICES="" seems to cause a segfault.
   # Limit the following test case to x86_64 systems to avoid this.
   if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64")
diff --git a/examples/ping_distributed/cpp/ping_distributed.cpp b/examples/ping_distributed/cpp/ping_distributed.cpp
index 0c78d37b..28a5d62c 100644
--- a/examples/ping_distributed/cpp/ping_distributed.cpp
+++ b/examples/ping_distributed/cpp/ping_distributed.cpp
@@ -18,6 +18,7 @@
 #include <algorithm>
 #include <iostream>
 #include <string>
+#include <utility>
 #include <vector>
 
 #include <holoscan/holoscan.hpp>
@@ -26,16 +27,16 @@
 
 class Fragment1 : public holoscan::Fragment {
  public:
-  // NOLINTNEXTLINE(modernize-pass-by-value,bugprone-easily-swappable-parameters)
+  // NOLINTNEXTLINE(bugprone-easily-swappable-parameters)
   Fragment1(bool gpu_tensor, int64_t count, int32_t batch_size, int32_t rows, int32_t columns,
-            int32_t channels, const std::string& data_type)
+            int32_t channels, std::string data_type)
       : gpu_tensor_(gpu_tensor),
         batch_size_(batch_size),
         count_(count),
         rows_(rows),
         columns_(columns),
         channels_(channels),
-        data_type_(data_type) {}
+        data_type_(std::move(data_type)) {}
 
   void compose() override {
     using namespace holoscan;
@@ -78,7 +79,7 @@ class App : public holoscan::Application {
   // Inherit the constructor
   using Application::Application;
 
-  // NOLINTNEXTLINE(modernize-pass-by-value,bugprone-easily-swappable-parameters)
+  // NOLINTNEXTLINE(bugprone-easily-swappable-parameters)
   void set_options(bool gpu_tensor = false, int64_t count = 10, int32_t batch_size = 0,
                    int32_t rows = 32, int32_t columns = 1024, int32_t channels = 0,
                    const std::string& data_type = "uint8_t") {
@@ -91,7 +92,6 @@ class App : public holoscan::Application {
     channels_ = channels;
     data_type_ = data_type;
   }
-  // NOLINTEND(fuchsia-default-arguments-declarations)
 
   void compose() override {
     using namespace holoscan;
diff --git a/examples/ping_multi_port/cpp/ping_multi_port.cpp b/examples/ping_multi_port/cpp/ping_multi_port.cpp
index 1dea98cd..0ade4e6e 100644
--- a/examples/ping_multi_port/cpp/ping_multi_port.cpp
+++ b/examples/ping_multi_port/cpp/ping_multi_port.cpp
@@ -88,7 +88,8 @@ class PingMxOp : public Operator {
     spec.param(multiplier_, "multiplier", "Multiplier", "Multiply the input by this value", 2);
   }
 
-  void compute(InputContext& op_input, OutputContext& op_output, ExecutionContext&) override {
+  void compute(InputContext& op_input, OutputContext& op_output,
+               [[maybe_unused]] ExecutionContext& context) override {
     auto value1 = op_input.receive<std::shared_ptr<ValueData>>("in1").value();
     auto value2 = op_input.receive<std::shared_ptr<ValueData>>("in2").value();
 
diff --git a/examples/ping_vector/cpp/ping_vector.cpp b/examples/ping_vector/cpp/ping_vector.cpp
index 748e0fc6..fff34dd4 100644
--- a/examples/ping_vector/cpp/ping_vector.cpp
+++ b/examples/ping_vector/cpp/ping_vector.cpp
@@ -58,7 +58,8 @@ class PingMxOp : public Operator {
     spec.param(multiplier_, "multiplier", "Multiplier", "Multiply the input by this value", 2);
   }
 
-  void compute(InputContext& op_input, OutputContext& op_output, ExecutionContext&) override {
+  void compute(InputContext& op_input, OutputContext& op_output,
+               [[maybe_unused]] ExecutionContext& context) override {
     auto values1 = op_input.receive<std::vector<int>>("in").value();
 
     HOLOSCAN_LOG_INFO("Middle message received (count: {})", count_++);
diff --git a/examples/resources/CMakeLists.txt b/examples/resources/CMakeLists.txt
index 01485913..774ec398 100644
--- a/examples/resources/CMakeLists.txt
+++ b/examples/resources/CMakeLists.txt
@@ -15,3 +15,4 @@
 
 add_subdirectory(clock)
 add_subdirectory(native)
+add_subdirectory(thread_pool)
diff --git a/examples/resources/clock/cpp/ping_clock.cpp b/examples/resources/clock/cpp/ping_clock.cpp
index 18d74864..4e1471f2 100644
--- a/examples/resources/clock/cpp/ping_clock.cpp
+++ b/examples/resources/clock/cpp/ping_clock.cpp
@@ -51,9 +51,9 @@ void TimedPingRxOp::compute(InputContext& op_input, [[maybe_unused]] OutputConte
   auto scheduler = fragment_->scheduler();
 
   // To get the clock we currently have to cast the scheduler to gxf::GXFScheduler.
-  // TODO: Refactor C++ lib so the clock method is on Scheduler rather than GXFScheduler.
-  //       That would allow us to avoid this dynamic_pointer_cast, but might require adding
-  //       renaming Clock->GXFClock and then adding a new holoscan::Clock independent of GXF.
+  // TODO(unknown): Refactor C++ lib so the clock method is on Scheduler rather than GXFScheduler.
+  //   That would allow us to avoid this dynamic_pointer_cast, but might require renaming
+  //   Clock->GXFClock and then adding a new holoscan::Clock independent of GXF.
   auto gxf_scheduler = std::dynamic_pointer_cast<gxf::GXFScheduler>(scheduler);
   auto clock = gxf_scheduler->clock();
 
diff --git a/examples/resources/native/cpp/native_resource.cpp b/examples/resources/native/cpp/native_resource.cpp
index 72f092bb..f2d675d8 100644
--- a/examples/resources/native/cpp/native_resource.cpp
+++ b/examples/resources/native/cpp/native_resource.cpp
@@ -45,7 +45,7 @@ class MinimalNativeResourceOp : public Operator {
 
   MinimalNativeResourceOp() = default;
 
-  void compute(InputContext& op_input, [[maybe_unused]] OutputContext& op_output,
+  void compute([[maybe_unused]] InputContext& op_input, [[maybe_unused]] OutputContext& op_output,
                [[maybe_unused]] ExecutionContext& context) override {
     auto res = resource<MinimalNativeResource>("string_native_resource");
     if (res) {
diff --git a/examples/resources/thread_pool/CMakeLists.txt b/examples/resources/thread_pool/CMakeLists.txt
new file mode 100644
index 00000000..0fa63c4d
--- /dev/null
+++ b/examples/resources/thread_pool/CMakeLists.txt
@@ -0,0 +1,30 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+if(HOLOSCAN_CPP_EXAMPLES)
+  add_subdirectory(cpp)
+endif()
+
+if(HOLOSCAN_PYTHON_EXAMPLES)
+  add_subdirectory(python)
+endif()
+
+file(RELATIVE_PATH app_relative_dest_path ${CMAKE_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
+
+install(
+  FILES README.md
+  DESTINATION "${app_relative_dest_path}"
+  COMPONENT "holoscan-examples"
+)
diff --git a/examples/resources/thread_pool/README.md b/examples/resources/thread_pool/README.md
new file mode 100644
index 00000000..416fe2f3
--- /dev/null
+++ b/examples/resources/thread_pool/README.md
@@ -0,0 +1,51 @@
+# Ping Simple
+
+This example demonstrates a simple ping application with two operators connected using add_flow().
+
+There are two operators involved in this example:
+  1. a transmitter, set to transmit a sequence of integers from 1-10 to it's 'out' port
+  2. a receiver that prints the received values to the terminal
+
+## C++ Run instructions
+
+* **using deb package install or NGC container**:
+  ```bash
+  /opt/nvidia/holoscan/examples/resources/thread_pool/cpp/ping_simple_thread_pool
+  ```
+* **source (dev container)**:
+  ```bash
+  ./run launch # optional: append `install` for install tree
+  ./examples/resources/thread_pool/cpp/ping_simple_thread_pool
+  ```
+* **source (local env)**:
+  ```bash
+  ${BUILD_OR_INSTALL_DIR}/examples/resources/thread_pool/cpp/ping_simple_thread_pool
+  ```
+
+## Python Run instructions
+
+* **using python wheel**:
+  ```bash
+  # [Prerequisite] Download example .py file below to `APP_DIR`
+  # [Optional] Start the virtualenv where holoscan is installed
+  python3 <APP_DIR>/ping_simple_thread_pool.py
+  ```
+* **using deb package install**:
+  ```bash
+  export PYTHONPATH=/opt/nvidia/holoscan/python/lib
+  python3 /opt/nvidia/holoscan/examples/resources/thread_pool/python/ping_simple_thread_pool.py
+  ```
+* **from NGC container**:
+  ```bash
+  python3 /opt/nvidia/holoscan/examples/resources/thread_pool/python/ping_simple_thread_pool.py
+  ```
+* **source (dev container)**:
+  ```bash
+  ./run launch # optional: append `install` for install tree
+  python3 ./examples/resources/thread_pool/python/ping_simple_thread_pool.py
+  ```
+* **source (local env)**:
+  ```bash
+  export PYTHONPATH=${BUILD_OR_INSTALL_DIR}/python/lib
+  python3 ${BUILD_OR_INSTALL_DIR}/examples/resources/thread_pool/python/ping_simple_thread_pool.py
+  ```
diff --git a/examples/resources/thread_pool/cpp/CMakeLists.min.txt b/examples/resources/thread_pool/cpp/CMakeLists.min.txt
new file mode 100644
index 00000000..1be40815
--- /dev/null
+++ b/examples/resources/thread_pool/cpp/CMakeLists.min.txt
@@ -0,0 +1,50 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the \"License\");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an \"AS IS\" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+cmake_minimum_required(VERSION 3.20)
+project(holoscan_ping_simple_thread_pool CXX)
+
+# Finds the package holoscan
+find_package(holoscan REQUIRED CONFIG
+             PATHS "/opt/nvidia/holoscan" "/workspace/holoscan-sdk/install")
+
+add_executable(ping_simple_thread_pool
+  ping_simple_thread_pool.cpp
+)
+
+target_link_libraries(ping_simple_thread_pool
+  PRIVATE
+  holoscan::core
+  holoscan::ops::ping_rx
+  holoscan::ops::ping_tx
+)
+
+# Testing
+if(BUILD_TESTING)
+  add_test(NAME EXAMPLE_CPP_PING_SIMPLE_THREAD_POOL_TEST
+           COMMAND ping_simple_thread_pool
+           WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+          )
+  set_tests_properties(EXAMPLE_CPP_PING_SIMPLE_THREAD_POOL_TEST PROPERTIES
+    ENVIRONMENT "HOLOSCAN_LOG_LEVEL=DEBUG"
+    # Note: the following regular expressions are specific to the logging within GXF itself not Holoscan
+    PASS_REGULAR_EXPRESSION "ThreadPool \\[cid: \\d+, name: pool1\\] created thread \\[uid: \\d+\\] for pinned entity \\[eid: \\d+, name: tx1\\]"
+    PASS_REGULAR_EXPRESSION "ThreadPool \\[cid: \\d+, name: pool1\\] created thread \\[uid: \\d+\\] for pinned entity \\[eid: \\d+, name: rx2\\]"
+    PASS_REGULAR_EXPRESSION "ThreadPool \\[cid: \\d+, name: pool2\\] created thread \\[uid: \\d+\\] for pinned entity \\[eid: \\d+, name: tx2\\]"
+    PASS_REGULAR_EXPRESSION "ThreadPool \\[cid: \\d+, name: pool2\\] created thread \\[uid: \\d+\\] for pinned entity \\[eid: \\d+, name: rx2\\]"
+    PASS_REGULAR_EXPRESSION "Rx message value: 10"
+    PASS_REGULAR_EXPRESSION "Rx message value: 15"
+  )
+endif()
diff --git a/examples/resources/thread_pool/cpp/CMakeLists.txt b/examples/resources/thread_pool/cpp/CMakeLists.txt
new file mode 100644
index 00000000..4997236f
--- /dev/null
+++ b/examples/resources/thread_pool/cpp/CMakeLists.txt
@@ -0,0 +1,75 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Create examples
+add_executable(ping_simple_thread_pool
+  ping_simple_thread_pool.cpp
+)
+target_link_libraries(ping_simple_thread_pool
+  PUBLIC
+  holoscan::core
+  holoscan::ops::ping_tx
+  holoscan::ops::ping_rx
+)
+
+# Install examples
+
+# Set the install RPATH based on the location of the Holoscan SDK libraries
+# The GXF extensions are loaded by the GXF libraries - no need to include here
+file(RELATIVE_PATH install_lib_relative_path ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_SOURCE_DIR}/${HOLOSCAN_INSTALL_LIB_DIR})
+set_target_properties(ping_simple_thread_pool PROPERTIES INSTALL_RPATH "\$ORIGIN/${install_lib_relative_path}")
+
+# Install following the relative folder path
+file(RELATIVE_PATH app_relative_dest_path ${CMAKE_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
+
+if(HOLOSCAN_INSTALL_EXAMPLE_SOURCE)
+# Install the source
+install(FILES ping_simple_thread_pool.cpp
+  DESTINATION "${app_relative_dest_path}"
+  COMPONENT holoscan-examples
+)
+
+# Install the minimal CMakeLists.txt file
+install(FILES CMakeLists.min.txt
+  RENAME "CMakeLists.txt"
+  DESTINATION "${app_relative_dest_path}"
+  COMPONENT holoscan-examples
+)
+endif()
+
+# Install the compiled example
+install(TARGETS ping_simple_thread_pool
+  DESTINATION "${app_relative_dest_path}"
+  COMPONENT holoscan-examples
+)
+
+# Testing
+if(HOLOSCAN_BUILD_TESTS)
+  add_test(NAME EXAMPLE_CPP_PING_SIMPLE_THREAD_POOL_TEST
+           COMMAND ping_simple_thread_pool
+           WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+          )
+  set_tests_properties(EXAMPLE_CPP_PING_SIMPLE_THREAD_POOL_TEST PROPERTIES
+    ENVIRONMENT "HOLOSCAN_LOG_LEVEL=DEBUG"
+    # Note: the following regular expressions are specific to the logging within GXF itself not Holoscan
+    PASS_REGULAR_EXPRESSION "ThreadPool \\[cid: \\d+, name: pool1\\] created thread \\[uid: \\d+\\] for pinned entity \\[eid: \\d+, name: tx1\\]"
+    PASS_REGULAR_EXPRESSION "ThreadPool \\[cid: \\d+, name: pool1\\] created thread \\[uid: \\d+\\] for pinned entity \\[eid: \\d+, name: rx2\\]"
+    PASS_REGULAR_EXPRESSION "ThreadPool \\[cid: \\d+, name: pool2\\] created thread \\[uid: \\d+\\] for pinned entity \\[eid: \\d+, name: tx2\\]"
+    PASS_REGULAR_EXPRESSION "ThreadPool \\[cid: \\d+, name: pool2\\] created thread \\[uid: \\d+\\] for pinned entity \\[eid: \\d+, name: rx2\\]"
+    PASS_REGULAR_EXPRESSION "Rx message value: 10"
+    PASS_REGULAR_EXPRESSION "Rx message value: 15"
+  )
+endif()
+
diff --git a/examples/resources/thread_pool/cpp/ping_simple_thread_pool.cpp b/examples/resources/thread_pool/cpp/ping_simple_thread_pool.cpp
new file mode 100644
index 00000000..30f5d256
--- /dev/null
+++ b/examples/resources/thread_pool/cpp/ping_simple_thread_pool.cpp
@@ -0,0 +1,66 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <holoscan/holoscan.hpp>
+#include <holoscan/operators/ping_tx/ping_tx.hpp>
+#include <holoscan/operators/ping_rx/ping_rx.hpp>
+
+class SampleThreadPoolApp : public holoscan::Application {
+ public:
+  void compose() override {
+    using namespace holoscan;
+    // Define the tx, forward and rx operators, allowing the tx operator to execute 10 times
+    auto tx1 = make_operator<ops::PingTxOp>("tx1", make_condition<CountCondition>(10));
+    auto rx1 = make_operator<ops::PingRxOp>("rx1");
+
+    auto tx2 = make_operator<ops::PingTxOp>("tx2", make_condition<CountCondition>(15));
+    auto rx2 = make_operator<ops::PingRxOp>("rx2");
+
+    // Create a thread pool with two threads
+    auto pool1 = make_thread_pool("pool1", 2);
+    // can assign operators individually to this thread pool (setting pinning to true)
+    pool1->add(tx1, true);
+    pool1->add(rx1, true);
+
+    // Create a second thread pool with two threads. We use two separate pools in this example
+    // purely for demonstration purposes. In practice, all operators can typically be added to the
+    // same thread pool. The one exception to this is that all operators in a thread pool using a
+    // GPU-based allocator like like BlockMemoryPool, CudaStreamPool, RMMAllocator or
+    // StreamOrderedAllocator must be using a common CUDA Device ID ("dev id" parameter). If
+    // operators involving different devices exist, these should be assigned to separate thread
+    // pools.
+    auto pool2 = make_thread_pool("pool2", 2);
+    // Assign multiple operators to the pool in a single call
+    pool2->add({tx2, rx2}, true);
+
+    // Define the workflow:  tx1 -> rx1 and tx2 -> rx2
+    add_flow(tx1, rx1);
+    add_flow(tx2, rx2);
+  }
+};
+
+int main([[maybe_unused]] int argc, [[maybe_unused]] char** argv) {
+  auto app = holoscan::make_application<SampleThreadPoolApp>();
+
+  // The default greedy scheduler is single threaded, so the ThreadPool would not be utilised.
+  // Instead we configure the app to use EventBasedScheduler.
+  app->scheduler(app->make_scheduler<holoscan::EventBasedScheduler>(
+      "event-based", holoscan::Arg("worker_thread_number", static_cast<int64_t>(4))));
+  app->run();
+
+  return 0;
+}
diff --git a/examples/resources/thread_pool/python/CMakeLists.min.txt b/examples/resources/thread_pool/python/CMakeLists.min.txt
new file mode 100644
index 00000000..e6c58435
--- /dev/null
+++ b/examples/resources/thread_pool/python/CMakeLists.min.txt
@@ -0,0 +1,31 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the \"License\");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an \"AS IS\" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Testing
+if(BUILD_TESTING)
+  add_test(NAME EXAMPLE_PYTHON_PING_SIMPLE_THREAD_POOL_TEST
+    COMMAND python3 ping_simple_thread_pool.py
+    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+  )
+  set_tests_properties(EXAMPLE_PYTHON_PING_SIMPLE_THREAD_POOL_TEST PROPERTIES
+    # Note: the following regular expressions are specific to the logging within GXF itself not Holoscan
+    PASS_REGULAR_EXPRESSION "ThreadPool \\[cid: \\d+, name: pool1\\] created thread \\[uid: \\d+\\] for pinned entity \\[eid: \\d+, name: tx1\\]"
+    PASS_REGULAR_EXPRESSION "ThreadPool \\[cid: \\d+, name: pool1\\] created thread \\[uid: \\d+\\] for pinned entity \\[eid: \\d+, name: rx2\\]"
+    PASS_REGULAR_EXPRESSION "ThreadPool \\[cid: \\d+, name: pool2\\] created thread \\[uid: \\d+\\] for pinned entity \\[eid: \\d+, name: tx2\\]"
+    PASS_REGULAR_EXPRESSION "ThreadPool \\[cid: \\d+, name: pool2\\] created thread \\[uid: \\d+\\] for pinned entity \\[eid: \\d+, name: rx2\\]"
+    PASS_REGULAR_EXPRESSION "Rx message value: 10"
+    PASS_REGULAR_EXPRESSION "Rx message value: 15"
+  )
+endif()
diff --git a/examples/resources/thread_pool/python/CMakeLists.txt b/examples/resources/thread_pool/python/CMakeLists.txt
new file mode 100644
index 00000000..3524532d
--- /dev/null
+++ b/examples/resources/thread_pool/python/CMakeLists.txt
@@ -0,0 +1,55 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Get relative folder path for the app
+file(RELATIVE_PATH app_relative_dest_path ${CMAKE_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
+
+# Copy native operator ping application
+add_custom_target(python_ping_simple_thread_pool ALL
+  COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/ping_simple_thread_pool.py" ${CMAKE_CURRENT_BINARY_DIR}
+  DEPENDS "ping_simple_thread_pool.py"
+  BYPRODUCTS "ping_simple_thread_pool.py"
+)
+
+# Install the app
+install(FILES
+    "${CMAKE_CURRENT_SOURCE_DIR}/ping_simple_thread_pool.py"
+    DESTINATION "${app_relative_dest_path}"
+    COMPONENT "holoscan-examples"
+)
+
+# Install the minimal CMakeLists.txt file
+install(FILES CMakeLists.min.txt
+  RENAME "CMakeLists.txt"
+  DESTINATION "${app_relative_dest_path}"
+  COMPONENT holoscan-examples
+)
+
+# Testing
+if(HOLOSCAN_BUILD_TESTS)
+  add_test(NAME EXAMPLE_PYTHON_PING_SIMPLE_THREAD_POOL_TEST
+    COMMAND python3 ping_simple_thread_pool.py
+    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+  )
+  set_tests_properties(EXAMPLE_PYTHON_PING_SIMPLE_THREAD_POOL_TEST PROPERTIES
+    # Note: the following regular expressions are specific to the logging within GXF itself not Holoscan
+    PASS_REGULAR_EXPRESSION "ThreadPool \\[cid: \\d+, name: pool1\\] created thread \\[uid: \\d+\\] for pinned entity \\[eid: \\d+, name: tx1\\]"
+    PASS_REGULAR_EXPRESSION "ThreadPool \\[cid: \\d+, name: pool1\\] created thread \\[uid: \\d+\\] for pinned entity \\[eid: \\d+, name: rx2\\]"
+    PASS_REGULAR_EXPRESSION "ThreadPool \\[cid: \\d+, name: pool2\\] created thread \\[uid: \\d+\\] for pinned entity \\[eid: \\d+, name: tx2\\]"
+    PASS_REGULAR_EXPRESSION "ThreadPool \\[cid: \\d+, name: pool2\\] created thread \\[uid: \\d+\\] for pinned entity \\[eid: \\d+, name: rx2\\]"
+    PASS_REGULAR_EXPRESSION "Rx message value: 10"
+    PASS_REGULAR_EXPRESSION "Rx message value: 15"
+  )
+endif()
diff --git a/examples/resources/thread_pool/python/ping_simple_thread_pool.py b/examples/resources/thread_pool/python/ping_simple_thread_pool.py
new file mode 100644
index 00000000..36ef50a0
--- /dev/null
+++ b/examples/resources/thread_pool/python/ping_simple_thread_pool.py
@@ -0,0 +1,60 @@
+"""
+SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+SPDX-License-Identifier: Apache-2.0
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""  # noqa: E501
+
+from holoscan.conditions import CountCondition
+from holoscan.core import Application
+from holoscan.operators import PingRxOp, PingTxOp
+from holoscan.schedulers import EventBasedScheduler
+
+
+class SampleThreadPoolApp(Application):
+    def compose(self):
+        # Define the tx and rx operators, allowing tx to execute 10 times
+        tx1 = PingTxOp(self, CountCondition(self, 10), name="tx1")
+        tx2 = PingTxOp(self, CountCondition(self, 15), name="tx2")
+        rx1 = PingRxOp(self, name="rx1")
+        rx2 = PingRxOp(self, name="rx2")
+
+        # Create a thread pool with two threads and pin two operators to these threads.
+        pool1 = self.make_thread_pool("pool1", 2)
+        pool1.add(tx1, True)
+        pool1.add(rx1, True)
+
+        # Create a second thread pool with two threads. We use two separate pools in this example
+        # purely for demonstration purposes. In practice, all operators can typically be added to
+        # the same thread pool. The one exception to this is that all operators in a thread pool
+        # using a GPU-based allocator like like BlockMemoryPool, CudaStreamPool, RMMAllocator or
+        # StreamOrderedAllocator must be using a common CUDA Device ID ("dev id" parameter). If
+        # operators involving different devices exist, these should be assigned to separate thread
+        # pools.
+        pool2 = self.make_thread_pool("pool2", 2)
+        pool2.add([tx2, rx2], True)
+
+        # Define the workflow:  tx1 -> rx1 and tx2 -> rx2
+        self.add_flow(tx1, rx1)
+        self.add_flow(tx2, rx2)
+
+
+def main():
+    app = SampleThreadPoolApp()
+    scheduler = EventBasedScheduler(app, worker_thread_number=3, name="ebs")
+    app.scheduler(scheduler)
+    app.run()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/tensor_interop/cpp/tensor_interop.cpp b/examples/tensor_interop/cpp/tensor_interop.cpp
index 8cb68f09..305b808e 100644
--- a/examples/tensor_interop/cpp/tensor_interop.cpp
+++ b/examples/tensor_interop/cpp/tensor_interop.cpp
@@ -32,6 +32,7 @@
 #include "./receive_tensor_gxf.hpp"
 #include "./send_tensor_gxf.hpp"
 
+// NOLINTBEGIN(cppcoreguidelines-macro-usage)
 #ifdef CUDA_TRY
 #undef CUDA_TRY
 #define CUDA_TRY(stmt)                                                                  \
@@ -47,6 +48,7 @@
     }                                                                                   \
   }
 #endif
+// NOLINTEND(cppcoreguidelines-macro-usage)
 
 namespace holoscan::ops {
 
@@ -84,7 +86,7 @@ class ProcessTensorOp : public Operator {
   }
 
   void compute(InputContext& op_input, OutputContext& op_output,
-               ExecutionContext& context) override {
+               [[maybe_unused]] ExecutionContext& context) override {
     // The type of `in_message` is 'holoscan::TensorMap'.
     auto in_message = op_input.receive<holoscan::TensorMap>("in").value();
     // The type of out_message is TensorMap
diff --git a/examples/v4l2_camera/cpp/CMakeLists.min.txt b/examples/v4l2_camera/cpp/CMakeLists.min.txt
index 57231d8e..9f9320eb 100644
--- a/examples/v4l2_camera/cpp/CMakeLists.min.txt
+++ b/examples/v4l2_camera/cpp/CMakeLists.min.txt
@@ -43,7 +43,7 @@ add_dependencies(v4l2_camera v4l2_camera_yaml)
 
 # Testing
 option(HOLOSCAN_BUILD_V4L2_TESTS "Build tests for V4L2 loopback" OFF)
-if(HOLOSCAN_BUILD_TESTS AND HOLOSCAN_BUILD_V4L2_TESTS)
+if(BUILD_TESTING AND HOLOSCAN_BUILD_V4L2_TESTS)
   # Assumes that the v4l2 video loopback has already been mounted and the yaml files have been
   # updated to use the virtual loopback device.
 
diff --git a/examples/v4l2_camera/cpp/v4l2_camera.cpp b/examples/v4l2_camera/cpp/v4l2_camera.cpp
index 28907853..b6fe4171 100644
--- a/examples/v4l2_camera/cpp/v4l2_camera.cpp
+++ b/examples/v4l2_camera/cpp/v4l2_camera.cpp
@@ -55,11 +55,7 @@ class App : public holoscan::Application {
       // Set Holoviz width and height from source resolution
       auto viz_args = from_config("visualizer");
       for (auto& arg : from_config("source")) {
-        if (arg.name() == "width") {
-          viz_args.add(arg);
-        } else if (arg.name() == "height") {
-          viz_args.add(arg);
-        }
+        if (arg.name() == "width" || arg.name() == "height") { viz_args.add(arg); }
       }
       visualizer =
           make_operator<ops::HolovizOp>("visualizer", viz_args, Arg("allocator") = allocator);
diff --git a/examples/wrap_operator_as_gxf_extension/CMakeLists.txt b/examples/wrap_operator_as_gxf_extension/CMakeLists.txt
index e5029ad7..d85b2fb7 100644
--- a/examples/wrap_operator_as_gxf_extension/CMakeLists.txt
+++ b/examples/wrap_operator_as_gxf_extension/CMakeLists.txt
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -17,6 +17,9 @@ add_subdirectory(gxf_extension)
 add_subdirectory(gxf_app)
 add_subdirectory(ping_rx_native_op)
 add_subdirectory(ping_tx_native_op)
+if(HOLOSCAN_REGISTER_GXF_EXTENSIONS)
+  add_subdirectory(gxf_registry)
+endif()
 
 file(RELATIVE_PATH app_relative_dest_path ${CMAKE_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
 
diff --git a/examples/wrap_operator_as_gxf_extension/gxf_extension/CMakeLists.txt b/examples/wrap_operator_as_gxf_extension/gxf_extension/CMakeLists.txt
index f30d2952..8eaea7e0 100644
--- a/examples/wrap_operator_as_gxf_extension/gxf_extension/CMakeLists.txt
+++ b/examples/wrap_operator_as_gxf_extension/gxf_extension/CMakeLists.txt
@@ -14,6 +14,9 @@
 # limitations under the License.
 
 include(WrapOperatorAsGXFExtension)
+if(HOLOSCAN_REGISTER_GXF_EXTENSIONS)
+  set(REGISTER_ARG "REGISTER")
+endif()
 wrap_operator_as_gxf_extension(
   OPERATOR_CLASS "myops::PingTxNativeOp"
   OPERATOR_HEADER_INCLUDE "ping_tx_native_op/ping_tx_native_op.hpp"
@@ -37,6 +40,12 @@ wrap_operator_as_gxf_extension(
   EXTENSION_TARGET_NAME "gxf_wrapped_ping_tx_native_op" # optional, defaults to EXTENSION_NAME lowercase
   EXTENSION_TARGET_PROPERTIES
     LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+  EXTENSION_DEPENDS
+    $<TARGET_FILE:GXF::std>
+    $<TARGET_FILE:gxf_holoscan_wrapper>
+  ${REGISTER_ARG}
+  REGISTER_DEPENDS
+    register_HoloscanWrapperExtension
 )
 
 wrap_operator_as_gxf_extension(
@@ -62,6 +71,12 @@ wrap_operator_as_gxf_extension(
   EXTENSION_TARGET_NAME "gxf_wrapped_ping_rx_native_op" # optional, defaults to EXTENSION_NAME lowercase
   EXTENSION_TARGET_PROPERTIES
     LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+  EXTENSION_DEPENDS
+    $<TARGET_FILE:GXF::std>
+    $<TARGET_FILE:gxf_holoscan_wrapper>
+  ${REGISTER_ARG}
+  REGISTER_DEPENDS
+    register_HoloscanWrapperExtension
 )
 
 
diff --git a/examples/wrap_operator_as_gxf_extension/gxf_registry/CMakeLists.txt b/examples/wrap_operator_as_gxf_extension/gxf_registry/CMakeLists.txt
new file mode 100644
index 00000000..e55369ab
--- /dev/null
+++ b/examples/wrap_operator_as_gxf_extension/gxf_registry/CMakeLists.txt
@@ -0,0 +1,64 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+if(BUILD_TESTING AND HOLOSCAN_REGISTER_GXF_EXTENSIONS)
+    find_package(CUDAToolkit)
+    configure_file(target.yaml.in target.yaml @ONLY)
+    configure_file(ping_installable.yaml.in ping_installable.yaml @ONLY)
+
+    find_program(GXF_REGISTRY_EXECUTABLE registry REQUIRED)
+    add_test(NAME gxf_registry_inspection_test
+        COMMAND registry extn info -n PingRxNativeOpExtension
+    )
+    set_tests_properties(gxf_registry_inspection_test PROPERTIES
+        PASS_REGULAR_EXPRESSION "name :        PingRxNativeOpExtension"
+        PASS_REGULAR_EXPRESSION "uuid :        2e62c3ee-c4f0-4784-aed1-83505e49dc73"
+        PASS_REGULAR_EXPRESSION "version :     ${holoscan_VERSION}"
+        PASS_REGULAR_EXPRESSION "myexts::PingRxNativeOpCodelet :"
+    )
+
+    add_test(NAME gxf_registry_install_graph_test
+        COMMAND registry
+            graph install
+            -g ping_installable.yaml
+            -m test_install_app/manifest.yaml
+            -u test_install_app
+            -i ""
+            -d target.yaml
+        )
+    set_tests_properties(gxf_registry_install_graph_test PROPERTIES
+        PASS_REGULAR_EXPRESSION "Graph installed to output directory"
+    )
+
+    find_program(GXE_EXECUTABLE gxe
+        HINTS
+            /usr/local/bin
+            $<TARGET_FILE:GXF::gxe>
+            ${HOLOSCAN_GXE_LOCATION}
+        REQUIRED
+    )
+    add_test(NAME gxf_registry_run_graph_test
+        COMMAND ${GXE_EXECUTABLE}
+            -app ping_installable.yaml
+            -manifest test_install_app/manifest.yaml
+            -app_root ${CMAKE_CURRENT_BINARY_DIR}
+    )
+    set_tests_properties(gxf_registry_run_graph_test PROPERTIES
+        DEPENDS gxf_registry_install_graph_test
+        PASS_REGULAR_EXPRESSION "Number of pings received: 10"
+        FAIL_REGULAR_EXPRESSION "[^a-z]Error;ERROR;Failed"
+    )
+
+endif()
\ No newline at end of file
diff --git a/examples/wrap_operator_as_gxf_extension/gxf_registry/ping_installable.yaml.in b/examples/wrap_operator_as_gxf_extension/gxf_registry/ping_installable.yaml.in
new file mode 100644
index 00000000..4108005e
--- /dev/null
+++ b/examples/wrap_operator_as_gxf_extension/gxf_registry/ping_installable.yaml.in
@@ -0,0 +1,64 @@
+%YAML 1.2
+# SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+dependencies:
+- extension: PingRxNativeOpExtension
+  uuid: 2e62c3ee-c4f0-4784-aed1-83505e49dc73
+  version: @holoscan_VERSION@
+- extension: PingTxNativeOpExtension
+  uuid: 2f3f69b2-7c2c-4fd8-b119-237f5110572d
+  version: @holoscan_VERSION@
+name: tx
+components:
+  # spec.output<gxf::Entity>("out");
+  - name: out
+    type: nvidia::gxf::DoubleBufferTransmitter
+  - type: nvidia::gxf::DownstreamReceptiveSchedulingTerm
+    parameters:
+      transmitter: out
+      min_size: 1
+  - name: ping_tx_native_op
+    type: myexts::PingTxNativeOpCodelet
+  - name: count_condition
+    type: nvidia::gxf::CountSchedulingTerm
+    parameters:
+      count: 10
+---
+name: rx
+components:
+  # spec.input<gxf::Entity>("in");
+  - name: in
+    type: nvidia::gxf::DoubleBufferReceiver
+  - type: nvidia::gxf::MessageAvailableSchedulingTerm
+    parameters:
+      receiver: in
+      min_size: 1
+  - name: ping_rx_native_op
+    type: myexts::PingRxNativeOpCodelet
+---
+components:
+  - type: nvidia::gxf::Connection
+    parameters:
+      source: tx/out
+      target: rx/in
+---
+components:
+  - name: rt_clock
+    type: nvidia::gxf::RealtimeClock
+  - type: nvidia::gxf::GreedyScheduler
+    parameters:
+      clock: rt_clock
+      max_duration_ms: 1000000
diff --git a/examples/wrap_operator_as_gxf_extension/gxf_registry/target.yaml.in b/examples/wrap_operator_as_gxf_extension/gxf_registry/target.yaml.in
new file mode 100644
index 00000000..b44937ae
--- /dev/null
+++ b/examples/wrap_operator_as_gxf_extension/gxf_registry/target.yaml.in
@@ -0,0 +1,6 @@
+platform:
+  arch: @CMAKE_SYSTEM_PROCESSOR@
+  os: linux
+  distribution: ubuntu_22.04
+compute:
+  cuda: @CUDAToolkit_VERSION@
diff --git a/examples/wrap_operator_as_gxf_extension/ping_rx_native_op/ping_rx_native_op.cpp b/examples/wrap_operator_as_gxf_extension/ping_rx_native_op/ping_rx_native_op.cpp
index b0baee89..1cd3ccc0 100644
--- a/examples/wrap_operator_as_gxf_extension/ping_rx_native_op/ping_rx_native_op.cpp
+++ b/examples/wrap_operator_as_gxf_extension/ping_rx_native_op/ping_rx_native_op.cpp
@@ -26,8 +26,8 @@ void PingRxNativeOp::setup(OperatorSpec& spec) {
   spec.input<holoscan::gxf::Entity>("in");
 }
 
-void PingRxNativeOp::compute(InputContext& op_input, OutputContext& op_output,
-                             ExecutionContext& context) {
+void PingRxNativeOp::compute(InputContext& op_input, [[maybe_unused]] OutputContext& op_output,
+                             [[maybe_unused]] ExecutionContext& context) {
   HOLOSCAN_LOG_INFO("PingRxNativeOp::compute() called.");
 
   // The type of `in_message` is 'holoscan::gxf::Entity'.
diff --git a/examples/wrap_operator_as_gxf_extension/ping_tx_native_op/ping_tx_native_op.cpp b/examples/wrap_operator_as_gxf_extension/ping_tx_native_op/ping_tx_native_op.cpp
index 4dbdcfc3..7d7937a7 100644
--- a/examples/wrap_operator_as_gxf_extension/ping_tx_native_op/ping_tx_native_op.cpp
+++ b/examples/wrap_operator_as_gxf_extension/ping_tx_native_op/ping_tx_native_op.cpp
@@ -26,7 +26,7 @@ void PingTxNativeOp::setup(OperatorSpec& spec) {
   spec.output<holoscan::gxf::Entity>("out");
 }
 
-void PingTxNativeOp::compute(InputContext& op_input, OutputContext& op_output,
+void PingTxNativeOp::compute([[maybe_unused]] InputContext& op_input, OutputContext& op_output,
                              ExecutionContext& context) {
   HOLOSCAN_LOG_INFO("PingTxNativeOp::compute() called.");
 
diff --git a/gxf_extensions/gxf_holoscan_wrapper/CMakeLists.txt b/gxf_extensions/gxf_holoscan_wrapper/CMakeLists.txt
index 1022c014..156b7381 100644
--- a/gxf_extensions/gxf_holoscan_wrapper/CMakeLists.txt
+++ b/gxf_extensions/gxf_holoscan_wrapper/CMakeLists.txt
@@ -47,6 +47,65 @@ target_include_directories(gxf_holoscan_wrapper_lib
     $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
 )
 
+# Generate the GXF registry manifest
+include(WrapOperatorAsGXFExtension)
+generate_gxf_registry_manifest(
+  EXTENSION_TARGET gxf_holoscan_wrapper
+  EXTENSION_NAME HoloscanWrapperExtension
+  BINARY_FILES
+    $<TARGET_FILE:gxf_holoscan_wrapper>
+    $<TARGET_FILE:gxf_holoscan_wrapper_lib>
+    $<TARGET_FILE:GXF::logger>
+    $<TARGET_FILE:holoscan::core>
+    $<TARGET_FILE:holoscan::infer>
+    $<TARGET_FILE:holoscan::infer::onnx_runtime>
+    $<TARGET_FILE:holoscan::infer::torch>
+    $<TARGET_FILE:holoscan::logger>
+    $<TARGET_FILE:holoscan::ops::aja>
+    $<TARGET_FILE:holoscan::ops::async_ping_rx>
+    $<TARGET_FILE:holoscan::ops::async_ping_tx>
+    $<TARGET_FILE:holoscan::ops::bayer_demosaic>
+    $<TARGET_FILE:holoscan::ops::format_converter>
+    $<TARGET_FILE:holoscan::ops::gxf_codelet>
+    $<TARGET_FILE:holoscan::ops::holoviz>
+    $<TARGET_FILE:holoscan::ops::inference>
+    $<TARGET_FILE:holoscan::ops::inference_processor>
+    $<TARGET_FILE:holoscan::ops::ping_rx>
+    $<TARGET_FILE:holoscan::ops::ping_tensor_rx>
+    $<TARGET_FILE:holoscan::ops::ping_tensor_tx>
+    $<TARGET_FILE:holoscan::ops::ping_tx>
+    $<TARGET_FILE:holoscan::ops::segmentation_postprocessor>
+    $<TARGET_FILE:holoscan::ops::v4l2>
+    $<TARGET_FILE:holoscan::ops::video_stream_recorder>
+    $<TARGET_FILE:holoscan::ops::video_stream_replayer>
+    $<TARGET_FILE:holoscan::spdlog_logger>
+    $<TARGET_FILE:holoscan::viz>
+    $<TARGET_FILE:ucx::ucp>
+    $<TARGET_FILE:ucx::ucs>
+    $<TARGET_FILE:ucx::uct>
+    $<TARGET_FILE:yaml-cpp::yaml-cpp>
+  FORWARD_ARGS
+    --uuid "12d01b4e-e06f-49ef-93c4-961834347385"
+    --version "${holoscan_VERSION}"
+    --extension-dependencies
+      $<TARGET_FILE:GXF::std>
+      $<TARGET_FILE:GXF::cuda>
+      $<TARGET_FILE:GXF::multimedia>
+      $<TARGET_FILE:GXF::serialization>
+      $<TARGET_FILE:GXF::ucx>
+      $<TARGET_FILE:gxf_ucx_holoscan>
+    --headers
+      "${CMAKE_CURRENT_SOURCE_DIR}/operator_wrapper.hpp"
+      "${CMAKE_CURRENT_SOURCE_DIR}/operator_wrapper_fragment.hpp"
+)
+if(HOLOSCAN_REGISTER_GXF_EXTENSIONS)
+  register_gxf_extension(
+    EXTENSION_NAME HoloscanWrapperExtension
+    MANIFEST "${CMAKE_CURRENT_BINARY_DIR}/HoloscanWrapperExtension_manifest.yaml"
+    DEPENDS register_UcxHoloscanExtension
+  )
+endif()
+
 # Install the header files
 install(FILES
   operator_wrapper.hpp
diff --git a/gxf_extensions/ucx/CMakeLists.txt b/gxf_extensions/ucx/CMakeLists.txt
index 08b58d35..5282c767 100644
--- a/gxf_extensions/ucx/CMakeLists.txt
+++ b/gxf_extensions/ucx/CMakeLists.txt
@@ -39,5 +39,27 @@ target_link_libraries(gxf_ucx_holoscan
   PUBLIC  gxf_ucx_holoscan_lib
   PRIVATE holoscan_security_flags
 )
+
+include(WrapOperatorAsGXFExtension)
+generate_gxf_registry_manifest(
+  EXTENSION_TARGET gxf_ucx_holoscan
+  EXTENSION_NAME UcxHoloscanExtension
+  FORWARD_ARGS
+  --uuid "e549f7ce-9ecf-4d53-8156-418727c176df"
+  --version "${holoscan_VERSION}"
+  --extension-dependencies
+    $<TARGET_FILE:GXF::std>
+    $<TARGET_FILE:GXF::multimedia>
+    $<TARGET_FILE:GXF::serialization>
+  --headers
+    "${CMAKE_CURRENT_SOURCE_DIR}/ucx_holoscan_component_serializer.hpp"
+)
+if(HOLOSCAN_REGISTER_GXF_EXTENSIONS)
+  register_gxf_extension(
+    EXTENSION_NAME UcxHoloscanExtension
+    MANIFEST "${CMAKE_CURRENT_BINARY_DIR}/UcxHoloscanExtension_manifest.yaml"
+  )
+endif()
+
 # Install GXF extension as a component 'holoscan-gxf_extensions'
 install_gxf_extension(gxf_ucx_holoscan)
diff --git a/include/holoscan/core/app_driver.hpp b/include/holoscan/core/app_driver.hpp
index f1b61cb6..95455114 100644
--- a/include/holoscan/core/app_driver.hpp
+++ b/include/holoscan/core/app_driver.hpp
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -18,6 +18,8 @@
 #ifndef HOLOSCAN_CORE_APP_DRIVER_HPP
 #define HOLOSCAN_CORE_APP_DRIVER_HPP
 
+#include <yaml-cpp/yaml.h>
+
 #include <future>
 #include <memory>
 #include <queue>
diff --git a/include/holoscan/core/application.hpp b/include/holoscan/core/application.hpp
index 083a8a17..fe6b59a1 100644
--- a/include/holoscan/core/application.hpp
+++ b/include/holoscan/core/application.hpp
@@ -302,7 +302,7 @@ class Application : public Fragment {
   std::unordered_map<std::string, DataFlowTracker*> track_distributed(
       uint64_t num_start_messages_to_skip = kDefaultNumStartMessagesToSkip,
       uint64_t num_last_messages_to_discard = kDefaultNumLastMessagesToDiscard,
-      int latency_threshold = kDefaultLatencyThreshold);
+      int latency_threshold = kDefaultLatencyThreshold, bool is_limited_tracking = false);
 
  protected:
   friend class AppDriver;
@@ -373,6 +373,7 @@ class Application : public Fragment {
    * @brief Configure UCX environment variables
    */
   void set_ucx_env();
+  void set_v4l2_env();
 };
 
 }  // namespace holoscan
diff --git a/include/holoscan/core/arg.hpp b/include/holoscan/core/arg.hpp
index a04feeb5..caaf0223 100644
--- a/include/holoscan/core/arg.hpp
+++ b/include/holoscan/core/arg.hpp
@@ -287,6 +287,11 @@ class Arg {
    */
   const std::string& name() const { return name_; }
 
+  /**
+   * @brief Set the name of the argument
+   */
+  void name(const std::string& arg_name) { name_ = arg_name; }
+
   /**
    * @brief Get the type of the argument.
    *
diff --git a/include/holoscan/core/argument_setter.hpp b/include/holoscan/core/argument_setter.hpp
index d9331a55..421c18e6 100644
--- a/include/holoscan/core/argument_setter.hpp
+++ b/include/holoscan/core/argument_setter.hpp
@@ -18,6 +18,8 @@
 #ifndef HOLOSCAN_CORE_ARGUMENT_SETTER_HPP
 #define HOLOSCAN_CORE_ARGUMENT_SETTER_HPP
 
+#include <yaml-cpp/yaml.h>
+
 #include <any>
 #include <complex>
 #include <functional>
diff --git a/include/holoscan/core/component.hpp b/include/holoscan/core/component.hpp
index 75879d93..6d7a0e81 100644
--- a/include/holoscan/core/component.hpp
+++ b/include/holoscan/core/component.hpp
@@ -18,6 +18,8 @@
 #ifndef HOLOSCAN_CORE_COMPONENT_HPP
 #define HOLOSCAN_CORE_COMPONENT_HPP
 
+#include <yaml-cpp/yaml.h>
+
 #include <stdio.h>
 #include <iostream>
 #include <memory>
diff --git a/include/holoscan/core/condition.hpp b/include/holoscan/core/condition.hpp
index fea7eb49..ad2cb1e5 100644
--- a/include/holoscan/core/condition.hpp
+++ b/include/holoscan/core/condition.hpp
@@ -19,6 +19,7 @@
 #define HOLOSCAN_CORE_CONDITION_HPP
 
 #include <gxf/core/gxf.h>
+#include <yaml-cpp/yaml.h>
 
 #include <any>
 #include <iostream>
@@ -104,13 +105,15 @@ class Resource;
 enum class ConditionType {
   kNone,              ///< No condition
   kMessageAvailable,  ///< Default for input port (nvidia::gxf::MessageAvailableSchedulingTerm)
-  kDownstreamMessageAffordable,  ///< Default for output port
-                                 ///< (nvidia::gxf::DownstreamReceptiveSchedulingTerm)
-  kCount,                        ///< nvidia::gxf::CountSchedulingTerm
-  kBoolean,                      ///< nvidia::gxf::BooleanSchedulingTerm
-  kPeriodic,                     ///< nvidia::gxf::PeriodicSchedulingTerm
-  kAsynchronous,                 ///< nvidia::gxf::AsynchronousSchedulingTerm
-  kExpiringMessageAvailable,     ///< nvidia::gxf::ExpiringMessageAvailableSchedulingTerm
+  kDownstreamMessageAffordable,   ///< Default for output port
+                                  ///< (nvidia::gxf::DownstreamReceptiveSchedulingTerm)
+  kCount,                         ///< nvidia::gxf::CountSchedulingTerm
+  kBoolean,                       ///< nvidia::gxf::BooleanSchedulingTerm
+  kPeriodic,                      ///< nvidia::gxf::PeriodicSchedulingTerm
+  kAsynchronous,                  ///< nvidia::gxf::AsynchronousSchedulingTerm
+  kExpiringMessageAvailable,      ///< nvidia::gxf::ExpiringMessageAvailableSchedulingTerm
+  kMultiMessageAvailable,         ///< nvidia::gxf::MultiMessageAvailableSchedulingTerm
+  kMultiMessageAvailableTimeout,  ///< nvidia::gxf::MessageAvailableFrequencyThrottler
 };
 
 /**
diff --git a/include/holoscan/core/conditions/gxf/expiring_message.hpp b/include/holoscan/core/conditions/gxf/expiring_message.hpp
index 467bb212..ff95920e 100644
--- a/include/holoscan/core/conditions/gxf/expiring_message.hpp
+++ b/include/holoscan/core/conditions/gxf/expiring_message.hpp
@@ -20,6 +20,8 @@
 
 #include <memory>
 
+#include <gxf/std/scheduling_terms.hpp>
+
 #include "../../gxf/gxf_condition.hpp"
 #include "../../resources/gxf/clock.hpp"
 #include "../../resources/gxf/realtime_clock.hpp"
diff --git a/include/holoscan/core/conditions/gxf/message_available.hpp b/include/holoscan/core/conditions/gxf/message_available.hpp
index 2126eb0b..4e4dd171 100644
--- a/include/holoscan/core/conditions/gxf/message_available.hpp
+++ b/include/holoscan/core/conditions/gxf/message_available.hpp
@@ -40,7 +40,7 @@ class MessageAvailableCondition : public gxf::GXFCondition {
   std::shared_ptr<gxf::GXFResource> receiver() { return receiver_.get(); }
 
   void min_size(uint64_t min_size);
-  size_t min_size() { return min_size_; }
+  uint64_t min_size() { return min_size_; }
 
   void front_stage_max_size(size_t front_stage_max_size);
   size_t front_stage_max_size() { return front_stage_max_size_; }
diff --git a/include/holoscan/core/conditions/gxf/multi_message_available.hpp b/include/holoscan/core/conditions/gxf/multi_message_available.hpp
new file mode 100644
index 00000000..8d43a06e
--- /dev/null
+++ b/include/holoscan/core/conditions/gxf/multi_message_available.hpp
@@ -0,0 +1,94 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef HOLOSCAN_CORE_CONDITIONS_GXF_MULTI_MESSAGE_AVAILABLE_HPP
+#define HOLOSCAN_CORE_CONDITIONS_GXF_MULTI_MESSAGE_AVAILABLE_HPP
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <gxf/std/scheduling_terms.hpp>
+
+#include "../../gxf/gxf_condition.hpp"
+#include "../../resource.hpp"
+#include "../../resources/gxf/realtime_clock.hpp"
+
+namespace holoscan {
+
+class MultiMessageAvailableCondition : public gxf::GXFCondition {
+ public:
+  HOLOSCAN_CONDITION_FORWARD_ARGS_SUPER(MultiMessageAvailableCondition, GXFCondition)
+
+  /**
+   * @brief sampling mode to apply to the conditions across the input ports (receivers).
+   *
+   * SamplingMode::kSumOfAll    - min_sum specified is for the sum of all messages at all receivers
+   * SamplingMode::kPerReceiver - min_sizes specified is a minimum size per receiver connected
+   */
+  using SamplingMode = nvidia::gxf::SamplingMode;
+
+  MultiMessageAvailableCondition() = default;
+
+  const char* gxf_typename() const override {
+    return "nvidia::gxf::MultiMessageAvailableSchedulingTerm";
+  }
+
+  void receivers(std::vector<std::shared_ptr<gxf::GXFResource>> receivers) {
+    receivers_ = receivers;
+  }
+  std::vector<std::shared_ptr<gxf::GXFResource>>& receivers() { return receivers_.get(); }
+
+  void initialize() override;
+
+  void setup(ComponentSpec& spec) override;
+
+  // wrap setters available on the underling nvidia::gxf::MultiMessageAvailableSchedulingTerm
+  // void min_size(size_t value);  // min_size parameter is deprecated
+  void min_sum(size_t value);
+  size_t min_sum() { return min_sum_; }
+
+  void sampling_mode(SamplingMode value);
+  SamplingMode sampling_mode() {
+    std::string mode = sampling_mode_.get().as<std::string>();
+    if (mode == "SumOfAll") {
+      return SamplingMode::kSumOfAll;
+    } else if (mode == "PerReceiver") {
+      return SamplingMode::kPerReceiver;
+    } else {
+      throw std::runtime_error(fmt::format("unknown mode: {}", mode));
+    }
+  }
+
+  void add_min_size(size_t value);
+
+  std::vector<size_t> min_sizes() { return min_sizes_; }
+
+  nvidia::gxf::MultiMessageAvailableSchedulingTerm* get() const;
+
+ private:
+  Parameter<std::vector<std::shared_ptr<gxf::GXFResource>>> receivers_;
+  Parameter<size_t> min_sum_;
+  Parameter<std::vector<size_t>> min_sizes_;
+  // use YAML::Node because GXFParameterAdaptor doesn't have a type specific to SamplingMode
+  Parameter<YAML::Node> sampling_mode_;  // corresponds to nvidia::gxf::SamplingMode
+};
+
+}  // namespace holoscan
+
+#endif /* HOLOSCAN_CORE_CONDITIONS_GXF_MULTI_MESSAGE_AVAILABLE_HPP */
diff --git a/include/holoscan/core/conditions/gxf/multi_message_available_timeout.hpp b/include/holoscan/core/conditions/gxf/multi_message_available_timeout.hpp
new file mode 100644
index 00000000..84af1c30
--- /dev/null
+++ b/include/holoscan/core/conditions/gxf/multi_message_available_timeout.hpp
@@ -0,0 +1,74 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef HOLOSCAN_CORE_CONDITIONS_GXF_MULTI_MESSAGE_AVAILABLE_TIMEOUT_HPP
+#define HOLOSCAN_CORE_CONDITIONS_GXF_MULTI_MESSAGE_AVAILABLE_TIMEOUT_HPP
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <gxf/std/scheduling_terms.hpp>
+
+#include "../../gxf/gxf_condition.hpp"
+#include "../../resource.hpp"
+#include "../../resources/gxf/realtime_clock.hpp"
+
+namespace holoscan {
+
+class MultiMessageAvailableTimeoutCondition : public gxf::GXFCondition {
+ public:
+  HOLOSCAN_CONDITION_FORWARD_ARGS_SUPER(MultiMessageAvailableTimeoutCondition, GXFCondition)
+
+  /**
+   * @brief sampling mode to apply to the conditions across the input ports (receivers).
+   *
+   * SamplingMode::kSumOfAll    - min_sum specified is for the sum of all messages at all receivers
+   * SamplingMode::kPerReceiver - min_sizes specified is a minimum size per receiver connected
+   */
+  using SamplingMode = nvidia::gxf::SamplingMode;
+
+  MultiMessageAvailableTimeoutCondition() = default;
+
+  const char* gxf_typename() const override {
+    return "nvidia::gxf::MessageAvailableFrequencyThrottler";
+  }
+
+  void receivers(std::vector<std::shared_ptr<gxf::GXFResource>> receivers) {
+    receivers_ = receivers;
+  }
+  std::vector<std::shared_ptr<gxf::GXFResource>>& receivers() { return receivers_.get(); }
+
+  void initialize() override;
+
+  void setup(ComponentSpec& spec) override;
+
+  nvidia::gxf::MessageAvailableFrequencyThrottler* get() const;
+
+ private:
+  Parameter<std::vector<std::shared_ptr<gxf::GXFResource>>> receivers_;
+  Parameter<std::string> execution_frequency_;
+  Parameter<size_t> min_sum_;
+  Parameter<std::vector<size_t>> min_sizes_;
+  // use YAML::Node because GXFParameterAdaptor doesn't have a type specific to SamplingMode
+  Parameter<YAML::Node> sampling_mode_;  // corresponds to nvidia::gxf::SamplingMode
+};
+
+}  // namespace holoscan
+
+#endif /* HOLOSCAN_CORE_CONDITIONS_GXF_MULTI_MESSAGE_AVAILABLE_TIMEOUT_HPP */
diff --git a/include/holoscan/core/config.hpp b/include/holoscan/core/config.hpp
index 8c355ff9..171558cf 100644
--- a/include/holoscan/core/config.hpp
+++ b/include/holoscan/core/config.hpp
@@ -18,6 +18,8 @@
 #ifndef HOLOSCAN_CORE_CONFIG_HPP
 #define HOLOSCAN_CORE_CONFIG_HPP
 
+#include <yaml-cpp/yaml.h>
+
 #include <filesystem>
 #include <iostream>
 #include <string>
diff --git a/include/holoscan/core/dataflow_tracker.hpp b/include/holoscan/core/dataflow_tracker.hpp
index cb6a530b..1854027e 100644
--- a/include/holoscan/core/dataflow_tracker.hpp
+++ b/include/holoscan/core/dataflow_tracker.hpp
@@ -123,6 +123,24 @@ class DataFlowTracker {
    */
   void set_discard_last_messages(uint64_t num) { num_last_messages_to_discard_ = num; }
 
+  /**
+   * @brief Set the limited tracking option which enables tracking only at root and leaf operators.
+   * This also means that distinction between paths are ignored if multiple paths have the same
+   * root and leaf operators.
+   *
+   * @param limited_tracking The boolean value to set the limited tracking option. True enables
+   * tracking only at root and leaf operators.
+   */
+  void set_limited_tracking(bool limited_tracking) { is_limited_tracking = limited_tracking; }
+
+  /**
+   * @brief Get whether the limited tracking option is enabled or not.
+   *
+   * @return true if limited tracking is enabled.
+   * @return false if limited tracking is not enabled.
+   */
+  bool limited_tracking() { return is_limited_tracking; }
+
   /**
    * @brief Enable message logging at the end of the every execution of a leaf
    * Operator.
@@ -239,6 +257,12 @@ class DataFlowTracker {
   void write_to_logfile(std::string text);
 
  private:
+  bool is_limited_tracking =
+      false;  ///< The variable is used to indicate whether tracking is performed only at the root
+              ///< and leaf operators, and intermediate operators are not timestamped.
+              ///< This enables us to minimize the overhead of timestamping at every operator, and
+              /// serializing and transferring a large amount timestamp data.
+
   std::map<std::string, uint64_t>
       source_messages_;  ///< The map of source names to the number of published messages.
   std::mutex source_messages_mutex_;  ///< The mutex for the source_messages_.
diff --git a/include/holoscan/core/executors/gxf/gxf_executor.hpp b/include/holoscan/core/executors/gxf/gxf_executor.hpp
index ecbca966..0a9d8a15 100644
--- a/include/holoscan/core/executors/gxf/gxf_executor.hpp
+++ b/include/holoscan/core/executors/gxf/gxf_executor.hpp
@@ -43,6 +43,7 @@ namespace holoscan {
 // Forward declarations
 class Arg;
 class Condition;
+class GPUDevice;
 class Resource;
 
 }  // namespace holoscan
@@ -350,6 +351,10 @@ class GXFExecutor : public holoscan::Executor {
    */
   void add_component_args_to_graph_entity(std::vector<Arg>& args,
                                           std::shared_ptr<nvidia::gxf::GraphEntity> graph_entity);
+
+  std::shared_ptr<GPUDevice> add_gpu_device_to_graph_entity(
+      const std::string& device_name, std::shared_ptr<nvidia::gxf::GraphEntity> graph_entity,
+      std::optional<int32_t> device_id = std::nullopt);
 };
 
 }  // namespace holoscan::gxf
diff --git a/include/holoscan/core/executors/gxf/gxf_parameter_adaptor.hpp b/include/holoscan/core/executors/gxf/gxf_parameter_adaptor.hpp
index 031637e0..99eed93c 100644
--- a/include/holoscan/core/executors/gxf/gxf_parameter_adaptor.hpp
+++ b/include/holoscan/core/executors/gxf/gxf_parameter_adaptor.hpp
@@ -18,6 +18,8 @@
 #ifndef HOLOSCAN_CORE_EXECUTORS_GXF_GXF_PARAMETER_ADAPTOR_HPP
 #define HOLOSCAN_CORE_EXECUTORS_GXF_GXF_PARAMETER_ADAPTOR_HPP
 
+#include <yaml-cpp/yaml.h>
+
 #include <functional>
 #include <memory>
 #include <string>
diff --git a/include/holoscan/core/forward_def.hpp b/include/holoscan/core/forward_def.hpp
index 91d35d6a..44e4eb0b 100644
--- a/include/holoscan/core/forward_def.hpp
+++ b/include/holoscan/core/forward_def.hpp
@@ -73,6 +73,7 @@ class Scheduler;
 // holoscan::gxf
 namespace gxf {
 class Entity;
+class EntityGroup;
 class GXFComponent;
 class GXFCondition;
 class GXFInputContext;
@@ -81,6 +82,7 @@ class GXFResource;
 class GXFExtensionManager;
 class GXFNetworkContext;
 class GXFScheduler;
+class GXFSystemResourceBase;
 }  // namespace gxf
 
 // Distributed Application
@@ -119,18 +121,22 @@ class CudaStreamCondition;
 class DownstreamMessageAffordableCondition;
 class ExpiringMessageAvailableCondition;
 class MessageAvailableCondition;
+class MultiMessageAvailableCondition;
+class MultiMessageAvailableTimeoutCondition;
 class PeriodicCondition;
 
 // Resources
 class Allocator;
 class AnnotatedDoubleBufferReceiver;
 class AnnotatedDoubleBufferTransmitter;
+class BlockMemoryPool;
 class Clock;
 class CudaAllocator;
-class BlockMemoryPool;
 class CudaStreamPool;
+class CPUThread;
 class DoubleBufferReceiver;
 class DoubleBufferTransmitter;
+class GPUDevice;
 class HoloscanUcxReceiver;
 class HoloscanUcxTransmitter;
 class ManualClock;
@@ -141,6 +147,7 @@ class SerializationBuffer;
 class StdComponentSerializer;
 class StdEntitySerializer;
 class StreamOrderedAllocator;
+class ThreadPool;
 class Transmitter;
 class UcxComponentSerializer;
 class UcxEntitySerializer;
diff --git a/include/holoscan/core/fragment.hpp b/include/holoscan/core/fragment.hpp
index f0a8e1d8..2e7a5588 100644
--- a/include/holoscan/core/fragment.hpp
+++ b/include/holoscan/core/fragment.hpp
@@ -28,6 +28,7 @@
 #include <unordered_set>
 #include <tuple>
 #include <utility>  // for std::pair
+#include <vector>
 
 #include "common.hpp"
 #include "config.hpp"
@@ -44,6 +45,8 @@ namespace gxf {
 class GXFExecutor;
 }  // namespace gxf
 
+class ThreadPool;
+
 // key = operator name,  value = (input port names, output port names, multi-receiver names)
 using FragmentPortMap =
     std::unordered_map<std::string,
@@ -486,6 +489,15 @@ class Fragment {
     return network_context;
   }
 
+  /**
+   * @brief Create a new thread pool resource.
+   *
+   * @param name The name of the thread pool.
+   * @param initial_size The initial number of threads in the thread pool.
+   * @return The shared pointer to the thread pool resource.
+   */
+  std::shared_ptr<ThreadPool> make_thread_pool(const std::string& name, int64_t initial_size = 1);
+
   /**
    * @brief Add an operator to the graph.
    *
@@ -640,12 +652,15 @@ class Fragment {
    * @param num_last_messages_to_discard The number of messages to discard at the end.
    * @param latency_threshold The minimum end-to-end latency in milliseconds to account for
    * in the end-to-end latency metric calculations.
+   * @param is_limited_tracking If true, the tracking is limited to root and leaf nodes, minimizing
+   * the timestamps by avoiding intermediate operators.
    * @return A reference to the DataFlowTracker object in which results will be
    * stored.
    */
   DataFlowTracker& track(uint64_t num_start_messages_to_skip = kDefaultNumStartMessagesToSkip,
                          uint64_t num_last_messages_to_discard = kDefaultNumLastMessagesToDiscard,
-                         int latency_threshold = kDefaultLatencyThreshold);
+                         int latency_threshold = kDefaultLatencyThreshold,
+                         bool is_limited_tracking = false);
 
   /**
    * @brief Get the DataFlowTracker object for this fragment.
@@ -706,6 +721,8 @@ class Fragment {
   /// Load the GXF extensions specified in the configuration.
   void load_extensions_from_config();
 
+  std::vector<std::shared_ptr<ThreadPool>>& thread_pools() { return thread_pools_; }
+
   // Note: Maintain the order of declarations (executor_ and graph_) to ensure proper destruction
   //       of the executor's context.
   std::string name_;                      ///< The name of the fragment.
@@ -716,8 +733,10 @@ class Fragment {
   std::shared_ptr<Scheduler> scheduler_;  ///< The scheduler used by the executor
   std::shared_ptr<NetworkContext> network_context_;  ///< The network_context used by the executor
   std::shared_ptr<DataFlowTracker> data_flow_tracker_;  ///< The DataFlowTracker for the fragment
-  bool is_composed_ = false;                            ///< Whether the graph is composed or not.
-  bool is_metadata_enabled_ = false;                    ///< Whether metadata is enabled or not.
+  std::vector<std::shared_ptr<ThreadPool>>
+      thread_pools_;                  ///< Any thread pools used by the fragment
+  bool is_composed_ = false;          ///< Whether the graph is composed or not.
+  bool is_metadata_enabled_ = false;  ///< Whether metadata is enabled or not.
 };
 
 }  // namespace holoscan
diff --git a/include/holoscan/core/gxf/entity_group.hpp b/include/holoscan/core/gxf/entity_group.hpp
new file mode 100644
index 00000000..b0026e0f
--- /dev/null
+++ b/include/holoscan/core/gxf/entity_group.hpp
@@ -0,0 +1,128 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef HOLOSCAN_CORE_GXF_ENTITY_GROUP_HPP
+#define HOLOSCAN_CORE_GXF_ENTITY_GROUP_HPP
+
+#include <gxf/core/gxf.h>
+
+#include <memory>
+#include <string>
+
+#include "./gxf_condition.hpp"
+#include "./gxf_operator.hpp"
+#include "../operator.hpp"
+
+namespace holoscan::gxf {
+
+/**
+ * @brief GXF entity group.
+ *
+ * Define an entity group for the underlying GXF runtime. Entity groups are used to associate
+ * components with resources inheriting from nvidia::gxf::ResourceBase. The components of
+ * this type exposed in Holoscan SDK's API are GPUDevice and ThreadPool.
+ *
+ */
+class EntityGroup {
+ public:
+  EntityGroup() = delete;
+
+  EntityGroup(gxf_context_t context, const std::string& name);
+
+  /**
+   * @brief Get the group id of the entity group.
+   *
+   * @return The GXF group id of the entity group.
+   */
+  gxf_uid_t gxf_gid() const { return gxf_gid_; }
+
+  /**
+   * @brief Get the GXF context of the entity group.
+   *
+   * @return The GXF context of the entity group.
+   */
+  gxf_context_t gxf_context() const { return gxf_context_; }
+
+  /**
+   * @brief Get the name of the entity group.
+   *
+   * @return The name of the entity group.
+   */
+  std::string name() const { return name_; }
+
+  /**
+   * @brief Add a GXF entity to the entity group.
+   *
+   * If the entity is already a member of a different entity group, it will be removed from that
+   * group and added to this one.
+   *
+   * Will raise a runtime_error if the entity is already a member of this entity group.
+   *
+   *
+   * @param eid The GXF unique id corresponding to the entity.
+   */
+  void add(gxf_uid_t eid);
+
+  /**
+   * @brief Add a GXFComponent to the entity group.
+   *
+   * If the component is already a member of a different entity group, it will be removed from that
+   * group and the entity it belongs to will be added to this one.
+   *
+   * Will raise a runtime_error if the entity is already a member of this entity group.
+   *
+   * @param component The component to add to the entity group.
+   */
+  void add(const GXFComponent& component);
+
+  /**
+   * @brief Add an Operator to the entity group.
+   *
+   * If the operator is already a member of a different entity group, it will be removed from that
+   * group and added to this one.
+   *
+   * Will raise a runtime_error if the entity is already a member of this entity group.
+   *
+   * @param op The operator to add to the entity group.
+   * @param entity_prefix A string prefix that can be used to indicate the entity the operator
+   *                      belongs to.
+   */
+  void add(std::shared_ptr<Operator> op, const std::string& entity_prefix = "");
+
+  // TODO:
+  //   There is also the following related runtime GXF method
+  //     gxf_result_t Runtime::GxfEntityGroupFindResources(gxf_uid_t eid,
+  //                                                       uint64_t* num_resource_cids,
+  //                                                       gxf_uid_t* resource_cids)
+  //   It takes an entity's eid, determines the corresponding group id and then returns all of the
+  //   component ids associated with resource_components for that group.
+  //
+  //   should this find_resources API be supported as a static method of EntityGroup?
+  //     static std::vector<gxf_uid_t> find_resources(gxf_uid_t eid);
+  //
+  //   or perhaps even better if we could return the actual SystemResource objects
+  //     static std::vector<SystemResource> find_resources(gxf_uid_t eid);
+
+ private:
+  std::string name_;              ///< The name of the entity group.
+  gxf_context_t gxf_context_;     ///< The GXF context
+  gxf_uid_t gxf_gid_ = kNullUid;  ///< The GXF group id.
+};
+
+}  // namespace holoscan::gxf
+
+#endif /* HOLOSCAN_CORE_GXF_ENTITY_GROUP_HPP */
diff --git a/include/holoscan/core/gxf/gxf_component.hpp b/include/holoscan/core/gxf/gxf_component.hpp
index f0a296be..6c3e3d5e 100644
--- a/include/holoscan/core/gxf/gxf_component.hpp
+++ b/include/holoscan/core/gxf/gxf_component.hpp
@@ -24,8 +24,10 @@
 #include <memory>
 #include <string>
 #include <utility>
+#include <vector>
 
 #include <gxf/app/graph_entity.hpp>
+#include <gxf/core/component.hpp>
 #include "../parameter.hpp"
 #include "./gxf_utils.hpp"
 
@@ -59,6 +61,12 @@ class GXFComponent {
     gxf_graph_entity_ = std::move(graph_entity);
   }
 
+  /// @brief The name of the entity group this component belongs to
+  std::string gxf_entity_group_name();
+
+  /// @brief The group id of the entity group this component belongs to
+  gxf_uid_t gxf_entity_group_id();
+
   void* gxf_cptr() { return gxf_cptr_; }
 
   nvidia::gxf::Handle<nvidia::gxf::Component> gxf_component() { return gxf_component_; }
diff --git a/include/holoscan/core/gxf/gxf_condition.hpp b/include/holoscan/core/gxf/gxf_condition.hpp
index 2a4d930e..f02c4900 100644
--- a/include/holoscan/core/gxf/gxf_condition.hpp
+++ b/include/holoscan/core/gxf/gxf_condition.hpp
@@ -18,6 +18,9 @@
 #ifndef HOLOSCAN_CORE_GXF_GXF_CONDITION_HPP
 #define HOLOSCAN_CORE_GXF_GXF_CONDITION_HPP
 
+#include <yaml-cpp/yaml.h>
+
+#include <memory>
 #include <string>
 
 #include "../condition.hpp"
@@ -36,6 +39,16 @@ class GXFCondition : public holoscan::Condition, public gxf::GXFComponent {
   void initialize() override;
 
   void add_to_graph_entity(Operator* op);
+  void add_to_graph_entity(Fragment* fragment,
+                           std::shared_ptr<nvidia::gxf::GraphEntity> graph_entity);
+
+  /**
+   * @brief Get a YAML representation of the condition.
+   *
+   * @return YAML node including type and spec of the condition in addition to the base component
+   * properties.
+   */
+  YAML::Node to_yaml_node() const override;
 };
 
 }  // namespace holoscan::gxf
diff --git a/include/holoscan/core/gxf/gxf_extension_registrar.hpp b/include/holoscan/core/gxf/gxf_extension_registrar.hpp
index bdf6525c..60f21b41 100644
--- a/include/holoscan/core/gxf/gxf_extension_registrar.hpp
+++ b/include/holoscan/core/gxf/gxf_extension_registrar.hpp
@@ -24,6 +24,7 @@
 #include <set>
 #include <memory>
 
+#include <gxf/core/expected.hpp>
 #include <gxf/std/default_extension.hpp>
 #include "../common.hpp"
 
diff --git a/include/holoscan/core/gxf/gxf_io_context.hpp b/include/holoscan/core/gxf/gxf_io_context.hpp
index 02ab6d95..d3f73930 100644
--- a/include/holoscan/core/gxf/gxf_io_context.hpp
+++ b/include/holoscan/core/gxf/gxf_io_context.hpp
@@ -23,6 +23,8 @@
 #include <unordered_map>
 
 #include "../io_context.hpp"
+#include "gxf/core/handle.hpp"
+#include "gxf/std/receiver.hpp"
 
 namespace holoscan::gxf {
 
diff --git a/include/holoscan/core/gxf/gxf_network_context.hpp b/include/holoscan/core/gxf/gxf_network_context.hpp
index d12a6c2a..1ff74233 100644
--- a/include/holoscan/core/gxf/gxf_network_context.hpp
+++ b/include/holoscan/core/gxf/gxf_network_context.hpp
@@ -18,6 +18,8 @@
 #ifndef HOLOSCAN_CORE_GXF_GXF_NETWORK_CONTEXT_HPP
 #define HOLOSCAN_CORE_GXF_GXF_NETWORK_CONTEXT_HPP
 
+#include <yaml-cpp/yaml.h>
+
 #include <memory>
 #include <string>
 #include <utility>
@@ -46,6 +48,14 @@ class GXFNetworkContext : public holoscan::NetworkContext, public GXFComponent {
    */
   virtual const char* gxf_typename() const = 0;
 
+  /**
+   * @brief Get a YAML representation of the network context.
+   *
+   * @return YAML node including type, specs, resources of the network context in addition
+   * to the base component properties.
+   */
+  YAML::Node to_yaml_node() const override;
+
  protected:
   // Make Fragment a friend class so it can call reset_graph_entities
   friend class holoscan::Fragment;
diff --git a/include/holoscan/core/gxf/gxf_operator.hpp b/include/holoscan/core/gxf/gxf_operator.hpp
index f953845b..11b2154f 100644
--- a/include/holoscan/core/gxf/gxf_operator.hpp
+++ b/include/holoscan/core/gxf/gxf_operator.hpp
@@ -19,6 +19,7 @@
 #define HOLOSCAN_CORE_GXF_GXF_OPERATOR_HPP
 
 #include <gxf/core/gxf.h>
+#include <yaml-cpp/yaml.h>
 
 #include <iostream>
 #include <string>
@@ -27,6 +28,8 @@
 #include "../executors/gxf/gxf_parameter_adaptor.hpp"
 #include "../operator.hpp"
 #include "./gxf_utils.hpp"
+#include "gxf/core/handle.hpp"
+#include "gxf/std/codelet.hpp"
 
 namespace holoscan::ops {
 
@@ -103,6 +106,9 @@ class GXFOperator : public holoscan::Operator {
    */
   gxf_uid_t gxf_cid() const { return gxf_cid_; }
 
+  /// @brief The name of the entity group this operator belongs
+  std::string gxf_entity_group_name() const;
+
   /**
    * @brief Register the argument setter and the GXF parameter adaptor for the given type.
    *
@@ -177,6 +183,14 @@ class GXFOperator : public holoscan::Operator {
     register_parameter_adaptor<typeT>();
   }
 
+  /**
+   * @brief Get a YAML representation of the operator.
+   *
+   * @return YAML node including type, specs, conditions and resources of the operator in addition
+   * to the base component properties.
+   */
+  YAML::Node to_yaml_node() const override;
+
  protected:
   /**
    * This method is invoked by 'GXFExecutor::initialize_operator(Operator* op)' during
diff --git a/include/holoscan/core/gxf/gxf_resource.hpp b/include/holoscan/core/gxf/gxf_resource.hpp
index be710ba5..bfb12006 100644
--- a/include/holoscan/core/gxf/gxf_resource.hpp
+++ b/include/holoscan/core/gxf/gxf_resource.hpp
@@ -18,12 +18,15 @@
 #ifndef HOLOSCAN_CORE_GXF_GXF_RESOURCE_HPP
 #define HOLOSCAN_CORE_GXF_GXF_RESOURCE_HPP
 
+#include <yaml-cpp/yaml.h>
+
+#include <memory>
 #include <string>
 
 #include <gxf/core/component.hpp>
+#include <gxf/std/resources.hpp>
 
 #include "../resource.hpp"
-
 #include "./gxf_component.hpp"
 #include "./gxf_utils.hpp"
 
@@ -40,10 +43,21 @@ class GXFResource : public holoscan::Resource, public gxf::GXFComponent {
  protected:
   // Make GXFExecutor a friend class so it can call protected initialization methods
   friend class holoscan::gxf::GXFExecutor;
-  // Operator::initialize_resources() needs to call add_to_graph_entity()
+  // Operator::initialize_resources() and Fragment::make_thread_pool call add_to_graph_entity()
   friend class holoscan::Operator;
+  friend class holoscan::Fragment;
 
   virtual void add_to_graph_entity(Operator* op);
+  void add_to_graph_entity(Fragment* fragment,
+                           std::shared_ptr<nvidia::gxf::GraphEntity> graph_entity);
+
+  /**
+   * @brief Get a YAML representation of the resource.
+   *
+   * @return YAML node including type and specs of the resource in addition to the base
+   * component properties.
+   */
+  YAML::Node to_yaml_node() const override;
 
   /**
    * This method is invoked by `GXFResource::initialize()`.
@@ -56,6 +70,21 @@ class GXFResource : public holoscan::Resource, public gxf::GXFComponent {
   std::string gxf_typename_ = "unknown_gxf_typename";
 };
 
+/**
+ * @brief Base class to be used with Resource types that inherit from nvidia::gxf::ResourceBase
+ *
+ * Resource components that can be registered with GXF's Registrar::resource should inherit from
+ * this class. Any other resource components should just use GXFResource directly.
+ */
+class GXFSystemResourceBase : public GXFResource {
+ public:
+  HOLOSCAN_RESOURCE_FORWARD_ARGS_SUPER(GXFSystemResourceBase, GXFResource)
+  GXFSystemResourceBase() = default;
+  GXFSystemResourceBase(const std::string& name, nvidia::gxf::ResourceBase* component);
+
+  const char* gxf_typename() const override { return "nvidia::gxf::ResourceBase"; }
+};
+
 }  // namespace holoscan::gxf
 
 #endif /* HOLOSCAN_CORE_GXF_GXF_RESOURCE_HPP */
diff --git a/include/holoscan/core/gxf/gxf_scheduler.hpp b/include/holoscan/core/gxf/gxf_scheduler.hpp
index b3ca5c61..bb9b507d 100644
--- a/include/holoscan/core/gxf/gxf_scheduler.hpp
+++ b/include/holoscan/core/gxf/gxf_scheduler.hpp
@@ -18,14 +18,16 @@
 #ifndef HOLOSCAN_CORE_GXF_GXF_SCHEDULER_HPP
 #define HOLOSCAN_CORE_GXF_GXF_SCHEDULER_HPP
 
+#include <yaml-cpp/yaml.h>
+
 #include <memory>
 #include <string>
 #include <utility>
 
+#include "../resources/gxf/clock.hpp"
 #include "../scheduler.hpp"
 #include "./gxf_component.hpp"
 #include "gxf/std/clock.hpp"
-#include "../resources/gxf/clock.hpp"
 
 namespace holoscan::gxf {
 
@@ -62,6 +64,14 @@ class GXFScheduler : public holoscan::Scheduler, public GXFComponent {
    */
   virtual nvidia::gxf::Clock* gxf_clock();
 
+  /**
+   * @brief Get a YAML representation of the scheduler.
+   *
+   * @return YAML node including type, specs, and resources of the scheduler in addition
+   * to the base component properties.
+   */
+  YAML::Node to_yaml_node() const override;
+
  protected:
   // Make Fragment a friend class so it can call reset_graph_entities
   friend class holoscan::Fragment;
diff --git a/include/holoscan/core/gxf/gxf_utils.hpp b/include/holoscan/core/gxf/gxf_utils.hpp
index 5d81e3a9..112f111f 100644
--- a/include/holoscan/core/gxf/gxf_utils.hpp
+++ b/include/holoscan/core/gxf/gxf_utils.hpp
@@ -19,8 +19,10 @@
 #define HOLOSCAN_CORE_GXF_GXF_UTILS_HPP
 
 #include <gxf/core/gxf.h>
+#include <cstdint>
 #include <cstdlib>
 #include <iostream>
+#include <optional>
 #include <sstream>
 #include <string>
 #include <utility>
@@ -33,19 +35,19 @@
 // macro like GXF_ASSERT_SUCCESS, but uses HOLOSCAN_LOG_ERROR and includes line/filename info
 // Note: HOLOSCAN_GXF_CALL depends on GNU C statement expressions ({ })
 //       https://gcc.gnu.org/onlinedocs/gcc/Statement-Exprs.html
-#define HOLOSCAN_GXF_CALL(stmt)                                                         \
-  ({                                                                                    \
-    gxf_result_t code = (stmt);                                                         \
-    if (code != GXF_SUCCESS) {                                                          \
-      HOLOSCAN_LOG_ERROR("GXF call '{}' in line {} of file {} failed with '{}' ({})",   \
-                         #stmt,                                                         \
-                         __LINE__,                                                      \
-                         __FILE__,                                                      \
-                         GxfResultStr(code),                                            \
-                         static_cast<int>(code));                                       \
-      if (!std::getenv("HOLOSCAN_DISABLE_BACKTRACE")) { PrettyPrintBacktrace(); }       \
-    }                                                                                   \
-    code;                                                                               \
+#define HOLOSCAN_GXF_CALL(stmt)                                                       \
+  ({                                                                                  \
+    gxf_result_t code = (stmt);                                                       \
+    if (code != GXF_SUCCESS) {                                                        \
+      HOLOSCAN_LOG_ERROR("GXF call '{}' in line {} of file {} failed with '{}' ({})", \
+                         #stmt,                                                       \
+                         __LINE__,                                                    \
+                         __FILE__,                                                    \
+                         GxfResultStr(code),                                          \
+                         static_cast<int>(code));                                     \
+      if (!std::getenv("HOLOSCAN_DISABLE_BACKTRACE")) { PrettyPrintBacktrace(); }     \
+    }                                                                                 \
+    code;                                                                             \
   })
 
 #define HOLOSCAN_GXF_CALL_FATAL(stmt)                                                 \
@@ -264,6 +266,12 @@ gxf_uid_t add_entity_group(void* context, std::string name);
  */
 uint64_t get_default_queue_policy();
 
+std::optional<int32_t> gxf_device_id(gxf_context_t context, gxf_uid_t eid);
+
+std::string gxf_entity_group_name(gxf_context_t context, gxf_uid_t eid);
+
+gxf_uid_t gxf_entity_group_id(gxf_context_t context, gxf_uid_t eid);
+
 }  // namespace holoscan::gxf
 
 #endif /* HOLOSCAN_CORE_GXF_GXF_UTILS_HPP */
diff --git a/include/holoscan/core/gxf/gxf_wrapper.hpp b/include/holoscan/core/gxf/gxf_wrapper.hpp
index f5637833..94bcd9b1 100644
--- a/include/holoscan/core/gxf/gxf_wrapper.hpp
+++ b/include/holoscan/core/gxf/gxf_wrapper.hpp
@@ -20,8 +20,8 @@
 
 #include "holoscan/core/gxf/gxf_operator.hpp"
 
+#include "gxf/core/registrar.hpp"
 #include "gxf/std/codelet.hpp"
-#include "gxf/core/parameter_parser_std.hpp"
 #include "holoscan/profiler/profiler.hpp"
 
 namespace holoscan::gxf {
diff --git a/include/holoscan/core/io_context.hpp b/include/holoscan/core/io_context.hpp
index 42c81d95..6e60999b 100644
--- a/include/holoscan/core/io_context.hpp
+++ b/include/holoscan/core/io_context.hpp
@@ -494,8 +494,9 @@ class InputContext {
   }
 
   inline holoscan::RuntimeError create_receive_error(const char* name, const char* message) {
-    auto error_message = fmt::format("ReceiveError on input port '{}': {}", name, message);
-    HOLOSCAN_LOG_DEBUG(error_message);
+    auto error_message =
+        fmt::format("Failure receiving message from input port '{}': {}", name, message);
+    HOLOSCAN_LOG_TRACE(error_message);
     return holoscan::RuntimeError(holoscan::ErrorCode::kReceiveError, error_message.c_str());
   }
 
diff --git a/include/holoscan/core/io_spec.hpp b/include/holoscan/core/io_spec.hpp
index bb8adc9f..955b9412 100644
--- a/include/holoscan/core/io_spec.hpp
+++ b/include/holoscan/core/io_spec.hpp
@@ -36,6 +36,7 @@
 #include "./conditions/gxf/downstream_affordable.hpp"
 #include "./conditions/gxf/expiring_message.hpp"
 #include "./conditions/gxf/message_available.hpp"
+#include "./conditions/gxf/multi_message_available_timeout.hpp"
 #include "./conditions/gxf/periodic.hpp"
 #include "./gxf/entity.hpp"
 #include "./resource.hpp"
@@ -206,6 +207,14 @@ class IOSpec {
             type,
             std::make_shared<DownstreamMessageAffordableCondition>(std::forward<ArgsT>(args)...));
         break;
+      case ConditionType::kMultiMessageAvailableTimeout:
+        // May want to use this multi-message condition even with a single port as a way to have
+        // a timeout on the condition. Unlike ExpiringMessageAvailableCondition, this one does not
+        // require a timestamp to be emitted by the upstream operator.
+        conditions_.emplace_back(
+            type,
+            std::make_shared<MultiMessageAvailableTimeoutCondition>(std::forward<ArgsT>(args)...));
+        break;
       case ConditionType::kNone:
         conditions_.emplace_back(type, nullptr);
         break;
diff --git a/include/holoscan/core/messagelabel.hpp b/include/holoscan/core/messagelabel.hpp
index 530d9c84..fc460f2f 100644
--- a/include/holoscan/core/messagelabel.hpp
+++ b/include/holoscan/core/messagelabel.hpp
@@ -18,13 +18,14 @@
 #ifndef HOLOSCAN_CORE_MESSAGELABEL_HPP
 #define HOLOSCAN_CORE_MESSAGELABEL_HPP
 
-#include <chrono>
+#include <ctime>
 #include <iterator>
 #include <string>
 #include <unordered_set>
 #include <vector>
 
 #include "./forward_def.hpp"
+#include "holoscan/logger/logger.hpp"
 
 namespace holoscan {
 
@@ -35,14 +36,23 @@ namespace holoscan {
 
 /**
  * @brief Return the current time in microseconds since the epoch.
-This function uses the C++11 standard library's chrono library.
+ * This function previously used the C++11 standard library's chrono library.
+ * This function now uses CLOCK_REALTIME to get the current time, because the clock chrono uses is
+ * implementation-dependent. We want a known clock like CLOCK_REALTIME so that we can use the clock
+ * across machines. CLOCK_REALTIME is a clock that is supposed to be synchronized with PTP
+ * synchronization.
  *
- * @return The current time in microseconds since epoch.
+ * @return The current time in microseconds returned by CLOCK_REALTIME. If CLOCK_REALTIME is not
+ * available, it returns -1.
  */
 static inline int64_t get_current_time_us() {
-  return static_cast<int64_t>(std::chrono::duration_cast<std::chrono::microseconds>(
-                                  std::chrono::system_clock::now().time_since_epoch())
-                                  .count());
+  struct timespec ts;
+  if (clock_gettime(CLOCK_REALTIME, &ts) == 0) {
+    return static_cast<int64_t>(ts.tv_sec) * 1000000 + static_cast<int64_t>(ts.tv_nsec) / 1000;
+  } else {
+    HOLOSCAN_LOG_ERROR("Error in clock_gettime");
+    return -1;
+  }
 }
 
 /** @brief This struct represents a timestamp label for a Holoscan Operator.
diff --git a/include/holoscan/core/network_context.hpp b/include/holoscan/core/network_context.hpp
index 6bf3b57e..4411b31f 100644
--- a/include/holoscan/core/network_context.hpp
+++ b/include/holoscan/core/network_context.hpp
@@ -18,6 +18,8 @@
 #ifndef HOLOSCAN_CORE_NETWORK_CONTEXT_HPP
 #define HOLOSCAN_CORE_NETWORK_CONTEXT_HPP
 
+#include <yaml-cpp/yaml.h>
+
 #include <stdio.h>
 #include <iostream>
 #include <memory>
diff --git a/include/holoscan/core/network_contexts/gxf/ucx_context.hpp b/include/holoscan/core/network_contexts/gxf/ucx_context.hpp
index f1866cdb..c9d22ac1 100644
--- a/include/holoscan/core/network_contexts/gxf/ucx_context.hpp
+++ b/include/holoscan/core/network_contexts/gxf/ucx_context.hpp
@@ -56,7 +56,7 @@ class UcxContext : public gxf::GXFNetworkContext {
   Parameter<bool> cpu_data_only_;  ///< Support CPU memory only for UCX communication
   Parameter<bool> enable_async_;   ///< Control whether UCX transmit/receive uses asynchronous mode
 
-  // TODO: support GPUDevice nvidia::gxf::Resource
+  // TODO(unknown): support GPUDevice nvidia::gxf::Resource
   // nvidia::gxf::Resource<nvidia::gxf::Handle<nvidia::gxf::GPUDevice>> gpu_device_;
 };
 
diff --git a/include/holoscan/core/operator.hpp b/include/holoscan/core/operator.hpp
index edd63aba..b02b784e 100644
--- a/include/holoscan/core/operator.hpp
+++ b/include/holoscan/core/operator.hpp
@@ -18,6 +18,8 @@
 #ifndef HOLOSCAN_CORE_OPERATOR_HPP
 #define HOLOSCAN_CORE_OPERATOR_HPP
 
+#include <yaml-cpp/yaml.h>
+
 #include <stdio.h>
 #include <algorithm>
 #include <iostream>
@@ -37,6 +39,7 @@
 #include "./condition.hpp"
 #include "./forward_def.hpp"
 #include "./graph.hpp"
+#include "./io_spec.hpp"
 #include "./messagelabel.hpp"
 #include "./metadata.hpp"
 #include "./operator_spec.hpp"
diff --git a/include/holoscan/core/operator_spec.hpp b/include/holoscan/core/operator_spec.hpp
index 60e9017a..25e77e14 100644
--- a/include/holoscan/core/operator_spec.hpp
+++ b/include/holoscan/core/operator_spec.hpp
@@ -18,6 +18,8 @@
 #ifndef HOLOSCAN_CORE_OPERATOR_SPEC_HPP
 #define HOLOSCAN_CORE_OPERATOR_SPEC_HPP
 
+#include <yaml-cpp/yaml.h>
+
 #include <iostream>
 #include <list>
 #include <memory>
@@ -32,6 +34,12 @@
 #include "./io_spec.hpp"
 namespace holoscan {
 
+struct MultiMessageConditionInfo {
+  ConditionType kind;
+  std::vector<std::string> port_names;
+  ArgList args;
+};
+
 /**
  * @brief Class to define the specification of an operator.
  */
@@ -62,6 +70,20 @@ class OperatorSpec : public ComponentSpec {
     return input<DataT>("__iospec_input");
   }
 
+  /**
+   * @brief Add a Condition that depends on the status of multiple input ports.
+   *
+   * @param type The type of multi-message condition (currently only kMultiMessageAvailable)
+   * @param port_names The names of the input ports the condition will apply to
+   * @param args ArgList of arguments to pass to the MultiMessageAvailableCondition
+   */
+  void multi_port_condition(ConditionType kind, std::vector<std::string> port_names, ArgList args) {
+    multi_port_conditions_.emplace_back(
+        MultiMessageConditionInfo{kind, std::move(port_names), std::move(args)});
+  }
+
+  std::vector<MultiMessageConditionInfo>& multi_port_conditions() { return multi_port_conditions_; }
+
   /**
    * @brief Define an input specification for this operator.
    *
@@ -287,6 +309,9 @@ class OperatorSpec : public ComponentSpec {
   std::unordered_map<std::string, std::shared_ptr<IOSpec>> inputs_;   ///< Input specs
   std::unordered_map<std::string, std::shared_ptr<IOSpec>> outputs_;  ///< Outputs specs
 
+  // multi-message conditions span multiple IOSpec objects, so store them on OperatorSpec instead
+  std::vector<MultiMessageConditionInfo> multi_port_conditions_;
+
   /// Container for receivers parameters
   std::list<Parameter<std::vector<IOSpec*>>> receivers_params_;
 };
diff --git a/include/holoscan/core/resource.hpp b/include/holoscan/core/resource.hpp
index 0a591885..dfc45f60 100644
--- a/include/holoscan/core/resource.hpp
+++ b/include/holoscan/core/resource.hpp
@@ -18,6 +18,8 @@
 #ifndef HOLOSCAN_CORE_RESOURCE_HPP
 #define HOLOSCAN_CORE_RESOURCE_HPP
 
+#include <yaml-cpp/yaml.h>
+
 #include <iostream>
 #include <memory>
 #include <string>
diff --git a/include/holoscan/core/resources/gxf/cpu_thread.hpp b/include/holoscan/core/resources/gxf/cpu_thread.hpp
new file mode 100644
index 00000000..b5c78254
--- /dev/null
+++ b/include/holoscan/core/resources/gxf/cpu_thread.hpp
@@ -0,0 +1,57 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef HOLOSCAN_CORE_RESOURCES_GXF_CPU_THREAD_HPP
+#define HOLOSCAN_CORE_RESOURCES_GXF_CPU_THREAD_HPP
+
+#include <string>
+
+#include <gxf/std/cpu_thread.hpp>
+#include "../../component_spec.hpp"
+#include "../../gxf/gxf_resource.hpp"
+#include "../../parameter.hpp"
+
+namespace holoscan {
+
+/**
+ * @brief CPU thread class.
+ *
+ * A CPUThread resource can be added to an operator to control whether it will be pinned to a
+ * specific thread in a ThreadPool (as used by MultiThreadScheduler). See the ThreadPool API
+ * documentation for a more detailed description of its usage.
+ *
+ */
+class CPUThread : public gxf::GXFResource {
+ public:
+  HOLOSCAN_RESOURCE_FORWARD_ARGS_SUPER(CPUThread, gxf::GXFResource)
+
+  explicit CPUThread(bool pin_entity = true) : pin_entity_(pin_entity) { name_ = "cpu_thread"; }
+
+  CPUThread(const std::string& name, nvidia::gxf::CPUThread* component);
+
+  /// @brief The underlying GXF component's name.
+  const char* gxf_typename() const override { return "nvidia::gxf::CPUThread"; }
+
+  void setup(ComponentSpec& spec) override;
+
+ private:
+  Parameter<bool> pin_entity_{false};  ///< Whether or not to pin an operator to a specific thread
+};
+
+}  // namespace holoscan
+
+#endif /* HOLOSCAN_CORE_RESOURCES_GXF_CPU_THREAD_HPP */
diff --git a/include/holoscan/core/resources/gxf/system_resources.hpp b/include/holoscan/core/resources/gxf/system_resources.hpp
new file mode 100644
index 00000000..1854dbc6
--- /dev/null
+++ b/include/holoscan/core/resources/gxf/system_resources.hpp
@@ -0,0 +1,163 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef HOLOSCAN_CORE_RESOURCES_GXF_SYSTEM_RESOURCES_HPP
+#define HOLOSCAN_CORE_RESOURCES_GXF_SYSTEM_RESOURCES_HPP
+
+#include <yaml-cpp/yaml.h>
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <gxf/std/resources.hpp>
+#include "../../component_spec.hpp"
+#include "../../gxf/entity_group.hpp"
+#include "../../gxf/gxf_resource.hpp"
+#include "../../operator.hpp"
+
+namespace holoscan {
+
+/**
+ * @brief Thread pool resource.
+ *
+ * This is a thread pool for use with the EventBasedScheduler or MultiThreadScheduler. This
+ * resource should be created via the `Fragment::make_thread_pool` method instead of the usual
+ * `Fragment::make_resource` method as it requires additional configuration of an associated
+ * GXF EntityGroup.
+ *
+ * pool1 = make_thread_pool("pool1", Arg("initial_size", static_cast<int64_t>(2)));
+ *
+ * The operators can be added via the `add` method. For strict thread pinning, the `pin_operator`
+ * argument should be true and the initial_size of the thread pool should be at least as large as
+ * the number of operators that will be pinned to threads.
+ *
+ * pool1.add(op1, true);
+ * pool1.add(op2, true);
+ *
+ * This add method takes care of adding any needed holoscan::CPUThread resource to the operator.
+ *
+ * The MultiThreadScheduler's `strict_job_thread_pinning` argument can be set true to disallow
+ * execution of any other entities on the pinned thread. The EventBasedScheduler always uses strict
+ * thread pinning.
+ *
+ */
+class ThreadPool : public gxf::GXFSystemResourceBase {
+ public:
+  HOLOSCAN_RESOURCE_FORWARD_ARGS_SUPER(ThreadPool, gxf::GXFSystemResourceBase)
+  ThreadPool() = default;
+  ThreadPool(const std::string& name, nvidia::gxf::ThreadPool* component);
+
+  /// @brief The underlying GXF component's name.
+  const char* gxf_typename() const override { return "nvidia::gxf::ThreadPool"; }
+
+  void setup(ComponentSpec& spec) override;
+
+  /// @brief The number of threads currently in the thread pool.
+  int64_t size() const;
+
+  /**
+   * @brief Add an operator to the thread pool
+   *
+   * @param op The operator to add.
+   * @param pin_operator Whether the operator should be pinned to a specific thread in the pool.
+   */
+  void add(const std::shared_ptr<Operator>& op, bool pin_operator = true);
+
+  /**
+   * @brief Add multiple operators to the thread pool
+   *
+   * @param op The operators to add.
+   * @param pin_operator Whether the operators should be pinned to a specific thread in the pool.
+   */
+  void add(std::vector<std::shared_ptr<Operator>> ops, bool pin_operator = true);
+
+  // The entity group that this thread pool is associated with.
+  std::shared_ptr<gxf::EntityGroup> entity_group() const { return entity_group_; }
+
+  /// @brief The operators associated with this thread pool.
+  std::vector<std::shared_ptr<Operator>> operators() const { return operators_; }
+
+  /**
+   * @brief Get a YAML representation of the thread pool.
+   *
+   * @return YAML node including properties of the base resource and the operators in the pool.
+   */
+  YAML::Node to_yaml_node() const override;
+
+ protected:
+  friend class Fragment;  // allow Fragment::make_thread_pool to set entity_group_
+
+  /// @brief Set the entity group for this thread pool.
+  void entity_group(const std::shared_ptr<gxf::EntityGroup>& entity_group) {
+    entity_group_ = entity_group;
+  }
+
+ private:
+  Parameter<int64_t> initial_size_;  ///< Initial size of the thread pool.
+  // Note: The GXF priority parameter is not currently functional, so we don't expose it here.
+  // Parameter<int64_t> priority_;      ///< priority of the thread pool (0=low, 1=medium, 2=high)
+
+  std::shared_ptr<gxf::EntityGroup> entity_group_;  ///< The entity group associated with the thread
+                                                    ///< pool.
+  std::vector<std::shared_ptr<Operator>> operators_;  ///< The operators associated with the thread
+                                                      ///< pool.
+};
+
+/**
+ * @brief GPU device resource.
+ *
+ * This resource can be used to associate a set of components with a particular GPU device ID.
+ *
+ * The Holoscan SDK components which will use a GPUDevice resource if found include:
+ *   - BlockMemoryPool
+ *   - CudaStreamPool
+ *   - UcxContext
+ *   - UcxReceiver
+ *   - UcxTransmitter
+ *
+ * dev0 = make_resource<GPUDevice>("dev0", Arg("dev_id", static_cast<int32_t>(0)));
+ *
+ * gpu0_group = EntityGroup("gpu0_group");
+ * gpu0_group.add(*dev0);
+ *
+ * Then any other components that need to be associated with this device can be added to that
+ * same entity group.
+ *
+ */
+class GPUDevice : public gxf::GXFSystemResourceBase {
+ public:
+  HOLOSCAN_RESOURCE_FORWARD_ARGS_SUPER(GPUDevice, gxf::GXFSystemResourceBase)
+  GPUDevice() = default;
+  GPUDevice(const std::string& name, nvidia::gxf::GPUDevice* component);
+
+  /// @brief The underlying GXF component's name.
+  const char* gxf_typename() const override { return "nvidia::gxf::GPUDevice"; }
+
+  void setup(ComponentSpec& spec) override;
+
+  /// @brief The GPU device ID.
+  int32_t device_id() const;
+
+ private:
+  Parameter<int32_t> dev_id_;  ///< The GPU device ID.
+};
+
+}  // namespace holoscan
+
+#endif /* HOLOSCAN_CORE_RESOURCES_GXF_SYSTEM_RESOURCES_HPP */
diff --git a/include/holoscan/core/scheduler.hpp b/include/holoscan/core/scheduler.hpp
index 0860868b..3d7c921d 100644
--- a/include/holoscan/core/scheduler.hpp
+++ b/include/holoscan/core/scheduler.hpp
@@ -18,6 +18,8 @@
 #ifndef HOLOSCAN_CORE_SCHEDULER_HPP
 #define HOLOSCAN_CORE_SCHEDULER_HPP
 
+#include <yaml-cpp/yaml.h>
+
 #include <stdio.h>
 #include <iostream>
 #include <memory>
diff --git a/include/holoscan/core/schedulers/gxf/multithread_scheduler.hpp b/include/holoscan/core/schedulers/gxf/multithread_scheduler.hpp
index f299ac80..b996e4a0 100644
--- a/include/holoscan/core/schedulers/gxf/multithread_scheduler.hpp
+++ b/include/holoscan/core/schedulers/gxf/multithread_scheduler.hpp
@@ -58,9 +58,7 @@ class MultiThreadScheduler : public gxf::GXFScheduler {
   Parameter<double> check_recession_period_ms_;
   Parameter<int64_t> max_duration_ms_;
   Parameter<int64_t> stop_on_deadlock_timeout_;  // in ms
-  // The following two parameters need to wait on ThreadPool support
-  // Parameter<bool> thread_pool_allocation_auto_;
-  // Parameter<bool> strict_job_thread_pinning_;
+  Parameter<bool> strict_job_thread_pinning_;
 };
 
 }  // namespace holoscan
diff --git a/include/holoscan/core/services/app_driver/server.hpp b/include/holoscan/core/services/app_driver/server.hpp
index 62d9fbfb..e820563b 100644
--- a/include/holoscan/core/services/app_driver/server.hpp
+++ b/include/holoscan/core/services/app_driver/server.hpp
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -35,9 +35,6 @@ class Server;
 
 namespace holoscan::service {
 
-constexpr int32_t kDefaultAppDriverPort = 8765;
-constexpr int32_t kDefaultHealthCheckingPort = 8777;
-
 class AppDriverServer {
  public:
   explicit AppDriverServer(holoscan::AppDriver* app_driver, bool need_driver = true,
diff --git a/include/holoscan/core/services/common/network_constants.hpp b/include/holoscan/core/services/common/network_constants.hpp
index 9a641555..a21a0b3c 100644
--- a/include/holoscan/core/services/common/network_constants.hpp
+++ b/include/holoscan/core/services/common/network_constants.hpp
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -25,6 +25,9 @@ namespace holoscan::service {
 constexpr uint32_t kMinNetworkPort = 10000;
 constexpr uint32_t kMaxNetworkPort = 32767;
 
+constexpr int32_t kDefaultAppDriverPort = 8765;
+constexpr int32_t kDefaultHealthCheckingPort = 8777;
+
 }  // namespace holoscan::service
 
 #endif /* HOLOSCAN_CORE_SERVICES_COMMON_NETWORK_CONSTANTS_HPP */
diff --git a/include/holoscan/holoscan.hpp b/include/holoscan/holoscan.hpp
index 523f4033..9f4658cc 100644
--- a/include/holoscan/holoscan.hpp
+++ b/include/holoscan/holoscan.hpp
@@ -39,6 +39,7 @@
 
 // Domain objects
 #include "./core/gxf/entity.hpp"
+#include "./core/gxf/entity_group.hpp"
 
 // Conditions
 #include "./core/conditions/gxf/asynchronous.hpp"
@@ -50,6 +51,8 @@
 #include "./core/conditions/gxf/downstream_affordable.hpp"
 #include "./core/conditions/gxf/expiring_message.hpp"
 #include "./core/conditions/gxf/message_available.hpp"
+#include "./core/conditions/gxf/multi_message_available.hpp"
+#include "./core/conditions/gxf/multi_message_available_timeout.hpp"
 #include "./core/conditions/gxf/periodic.hpp"
 
 // NetworkContexts
@@ -69,6 +72,7 @@
 #include "./core/resources/gxf/std_component_serializer.hpp"
 #include "./core/resources/gxf/std_entity_serializer.hpp"
 #include "./core/resources/gxf/stream_ordered_allocator.hpp"
+#include "./core/resources/gxf/system_resources.hpp"
 #include "./core/resources/gxf/ucx_component_serializer.hpp"
 #include "./core/resources/gxf/ucx_entity_serializer.hpp"
 #include "./core/resources/gxf/ucx_holoscan_component_serializer.hpp"
diff --git a/include/holoscan/logger/logger.hpp b/include/holoscan/logger/logger.hpp
index 7f06bb2f..4713b74d 100644
--- a/include/holoscan/logger/logger.hpp
+++ b/include/holoscan/logger/logger.hpp
@@ -21,7 +21,6 @@
 #include <fmt/format.h>
 #include <fmt/ranges.h>  // allows fmt to format std::array, std::vector, etc.
 
-#include <chrono>
 #include <memory>
 #include <string>
 #include <string_view>
diff --git a/include/holoscan/operators/holoviz/holoviz.hpp b/include/holoscan/operators/holoviz/holoviz.hpp
index 8aa9738b..217bdb33 100644
--- a/include/holoscan/operators/holoviz/holoviz.hpp
+++ b/include/holoscan/operators/holoviz/holoviz.hpp
@@ -187,7 +187,7 @@ struct BufferInfo;
  * - **window_title**: Title on window canvas (default: `"Holoviz"`)
  *   - type: `std::string`
  * - **display_name**: In exclusive display or fullscreen mode, name of display to use as shown
- *   with `xrandr` or `hwinfo --monitor` (default: ``)
+ *   with `xrandr` or `hwinfo --monitor` (default: `""`)
  *   - type: `std::string`
  * - **width**: Window width or display resolution width if in exclusive display or fullscreen mode
  *   (default: `1920`)
@@ -216,8 +216,15 @@ struct BufferInfo;
  *   'ColorSpace::PASS_THROUGH' is supported since there is no display. For other color spaces the
  *   display needs to be configured for HDR (default: `ColorSpace::AUTO`)
  *   - type: `std::string`
- * - **window_close_scheduling_term**: BooleanSchedulingTerm to stop the codelet from ticking
- *   when the window is closed
+ * - **window_close_condition.**: BooleanCondition on the operator that will cause it to stop
+ *   executing if the display window is closed. By default, this condition is created
+ *   automatically during HolovizOp::initialize. The user may want to provide it if, for example
+ *   there are multiple HolovizOp operators and you want to share the same window close condition
+ *   across both. By sharing the same condition, if one of the display windows is closed it would
+ *   also close the other(s).
+ * - **window_close_scheduling_term**: This is a deprecated parameter name for
+ *   `window_close_condition`. Please use `window_close_condition` instead as
+ *   `window_close_scheduling_term` will be removed in a future release.
  *   - type: `gxf::Handle<gxf::BooleanSchedulingTerm>`
  * - **allocator**: Allocator used to allocate memory for `render_buffer_output`
  *   - type: `gxf::Handle<gxf::Allocator>`
@@ -913,6 +920,7 @@ class HolovizOp : public Operator {
   Parameter<bool> vsync_;
   Parameter<ColorSpace> display_color_space_;
   Parameter<std::shared_ptr<BooleanCondition>> window_close_scheduling_term_;
+  Parameter<std::shared_ptr<BooleanCondition>> window_close_condition_;
   Parameter<std::shared_ptr<Allocator>> allocator_;
   Parameter<std::string> font_path_;
   Parameter<std::string> camera_pose_output_type_;
@@ -939,8 +947,6 @@ class HolovizOp : public Operator {
   bool camera_pose_output_enabled_ = false;
   bool is_first_tick_ = true;
 
-  static std::mutex mutex_;  ///< mutex to protect start method
-
   static std::remove_pointer_t<viz::KeyCallbackFunction> key_callback_handler;
   static std::remove_pointer_t<viz::UnicodeCharCallbackFunction> unicode_char_callback_handler;
   static std::remove_pointer_t<viz::MouseButtonCallbackFunction> mouse_button_callback_handler;
diff --git a/include/holoscan/operators/inference/inference.hpp b/include/holoscan/operators/inference/inference.hpp
index f72c66df..01cf0743 100644
--- a/include/holoscan/operators/inference/inference.hpp
+++ b/include/holoscan/operators/inference/inference.hpp
@@ -168,7 +168,7 @@ class InferenceOp : public holoscan::Operator {
   ///  @brief Memory allocator
   Parameter<std::shared_ptr<Allocator>> allocator_;
 
-  ///  @brief Flag to enable inference on CPU (only supported by onnxruntime).
+  ///  @brief Flag to enable inference on CPU (only supported by ONNX Runtime and LibTorch).
   /// Default is False.
   Parameter<bool> infer_on_cpu_;
 
diff --git a/modules/holoinfer/src/CMakeLists.txt b/modules/holoinfer/src/CMakeLists.txt
index f8ad6ae0..29c65f30 100644
--- a/modules/holoinfer/src/CMakeLists.txt
+++ b/modules/holoinfer/src/CMakeLists.txt
@@ -32,6 +32,7 @@ set(holoinfer_src
     manager/process_manager.cpp
     utils/infer_utils.cpp
     utils/infer_buffer.cpp
+    utils/work_queue.cpp
 )
 
 add_library(${PROJECT_NAME} SHARED ${holoinfer_src})
diff --git a/modules/holoinfer/src/include/holoinfer_constants.hpp b/modules/holoinfer/src/include/holoinfer_constants.hpp
index ed5fda55..b6a5793b 100644
--- a/modules/holoinfer/src/include/holoinfer_constants.hpp
+++ b/modules/holoinfer/src/include/holoinfer_constants.hpp
@@ -45,7 +45,8 @@ enum class holoinfer_datatype {
   h_Int32 = 2,
   h_Int64 = 3,
   h_UInt8 = 4,
-  h_Unsupported = 5
+  h_Float16 = 5,
+  h_Unsupported = 6
 };
 /// @brief Data processor implementation codes
 enum class holoinfer_data_processor { h_CUDA = 0, h_HOST = 1, h_CUDA_AND_HOST = 2 };
diff --git a/modules/holoinfer/src/include/holoinfer_utils.hpp b/modules/holoinfer/src/include/holoinfer_utils.hpp
index b4c79143..400e7c65 100644
--- a/modules/holoinfer/src/include/holoinfer_utils.hpp
+++ b/modules/holoinfer/src/include/holoinfer_utils.hpp
@@ -24,21 +24,7 @@
 #include <string>
 #include <vector>
 
-#include "gxf/core/entity.hpp"
 #include "gxf/core/gxf.h"
-#include "gxf/core/parameter.hpp"
-#include "gxf/cuda/cuda_stream.hpp"
-#include "gxf/cuda/cuda_stream_id.hpp"
-#include "gxf/cuda/cuda_stream_pool.hpp"
-#include "gxf/multimedia/video.hpp"
-#include "gxf/std/allocator.hpp"
-#include "gxf/std/clock.hpp"
-#include "gxf/std/codelet.hpp"
-#include "gxf/core/parameter_parser_std.hpp"
-#include "gxf/std/receiver.hpp"
-#include "gxf/std/tensor.hpp"
-#include "gxf/std/timestamp.hpp"
-#include "gxf/std/transmitter.hpp"
 
 #include "holoinfer_buffer.hpp"
 #include "holoinfer_constants.hpp"
@@ -121,7 +107,8 @@ static const std::map<std::string, holoinfer_datatype> kHoloInferDataTypeMap = {
     {"kInt32", holoinfer_datatype::h_Int32},
     {"kInt8", holoinfer_datatype::h_Int8},
     {"kUInt8", holoinfer_datatype::h_UInt8},
-    {"kInt64", holoinfer_datatype::h_Int64}};
+    {"kInt64", holoinfer_datatype::h_Int64},
+    {"kFloat16", holoinfer_datatype::h_Float16}};
 
 InferStatus parse_yaml_node(const node_type& in_config, std::vector<std::string>& names,
                             std::vector<std::vector<int64_t>>& dims,
diff --git a/modules/holoinfer/src/infer/onnx/core.cpp b/modules/holoinfer/src/infer/onnx/core.cpp
index 1eb28f2c..01263f34 100644
--- a/modules/holoinfer/src/infer/onnx/core.cpp
+++ b/modules/holoinfer/src/infer/onnx/core.cpp
@@ -18,6 +18,7 @@
 
 #include <onnxruntime_cxx_api.h>
 
+#include <cassert>
 #include <functional>
 #include <memory>
 #include <string>
@@ -33,16 +34,23 @@ namespace inference {
 class OnnxInferImpl {
  public:
   // Internal only
-  OnnxInferImpl(const std::string& model_file_path, bool cuda_flag);
+  OnnxInferImpl(const std::string& model_file_path, bool enable_fp16, bool cuda_flag,
+                bool cuda_buf_in, bool cuda_buf_out);
+  ~OnnxInferImpl();
 
-  std::string model_path_{""};
-  bool use_cuda_ = true;
+  const std::string model_path_;
+  const bool enable_fp16_;
+  const bool use_cuda_;
+  const bool cuda_buf_in_;
+  const bool cuda_buf_out_;
+
+  std::unique_ptr<Ort::Env> env_;
 
   Ort::SessionOptions session_options_;
-  OrtCUDAProviderOptions cuda_options_{};
+  OrtCUDAProviderOptionsV2* cuda_options_ = nullptr;
+  OrtTensorRTProviderOptionsV2* tensor_rt_options_ = nullptr;
 
-  std::unique_ptr<Ort::Env> env_ = nullptr;
-  std::unique_ptr<Ort::Session> session_ = nullptr;
+  std::unique_ptr<Ort::Session> session_;
 
   Ort::AllocatorWithDefaultOptions allocator_;
 
@@ -62,22 +70,17 @@ class OnnxInferImpl {
   std::vector<Ort::Value> input_tensors_;
   std::vector<Ort::Value> output_tensors_;
 
-  std::vector<Ort::Value> input_tensors_gpu_;
-  std::vector<Ort::Value> output_tensors_gpu_;
-
   Ort::MemoryInfo memory_info_ = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator,
                                                             OrtMemType::OrtMemTypeDefault);
 
   Ort::MemoryInfo memory_info_cuda_ =
       Ort::MemoryInfo("Cuda", OrtAllocatorType::OrtArenaAllocator, 0, OrtMemTypeDefault);
-  std::unique_ptr<Ort::Allocator> memory_allocator_cuda_;
 
-  holoinfer_datatype get_holoinfer_datatype(ONNXTensorElementDataType datatype);
+  cudaStream_t cuda_stream_ = nullptr;
+  cudaEvent_t cuda_event_ = nullptr;
 
-  Ort::Value create_tensor(const std::shared_ptr<DataBuffer>& input_buffer,
-                           const std::vector<int64_t>& dims);
-  void transfer_to_output(std::vector<std::shared_ptr<DataBuffer>>& output_buffer,
-                          const size_t& index);
+  Ort::Value create_tensor(const std::shared_ptr<DataBuffer>& data_buffer,
+                           const std::vector<int64_t>& dims, bool cuda_buf);
 
   // Wrapped Public APIs
   InferStatus do_inference(const std::vector<std::shared_ptr<DataBuffer>>& input_buffer,
@@ -93,26 +96,6 @@ class OnnxInferImpl {
   void cleanup();
 };
 
-template <typename T>
-Ort::Value create_tensor_core(const std::shared_ptr<DataBuffer>& input_buffer,
-                              const std::vector<int64_t>& dims, Ort::MemoryInfo& memory_info_) {
-  size_t input_tensor_size = accumulate(dims.begin(), dims.end(), 1, std::multiplies<size_t>());
-
-  return Ort::Value::CreateTensor<T>(memory_info_,
-                                     static_cast<T*>(input_buffer->host_buffer_->data()),
-                                     input_tensor_size,
-                                     dims.data(),
-                                     dims.size());
-}
-
-template <typename T>
-void transfer_to_host(std::shared_ptr<DataBuffer>& output_buffer, Ort::Value& output_tensor,
-                      const size_t& output_tensor_size) {
-  memcpy(output_buffer->host_buffer_->data(),
-         output_tensor.GetTensorMutableData<T>(),
-         output_tensor_size * sizeof(T));
-}
-
 void OnnxInfer::print_model_details() {
   impl_->print_model_details();
 }
@@ -133,7 +116,7 @@ void OnnxInferImpl::print_model_details() {
   }
 }
 
-holoinfer_datatype OnnxInferImpl::get_holoinfer_datatype(ONNXTensorElementDataType data_type) {
+static holoinfer_datatype get_holoinfer_datatype(ONNXTensorElementDataType data_type) {
   switch (data_type) {
     case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT:
       return holoinfer_datatype::h_Float32;
@@ -145,11 +128,36 @@ holoinfer_datatype OnnxInferImpl::get_holoinfer_datatype(ONNXTensorElementDataTy
       return holoinfer_datatype::h_Int64;
     case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8:
       return holoinfer_datatype::h_UInt8;
+    case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16:
+      return holoinfer_datatype::h_Float16;
     default:
       return holoinfer_datatype::h_Unsupported;
   }
 }
 
+static ONNXTensorElementDataType get_onnx_datatype(holoinfer_datatype data_type) {
+  switch (data_type) {
+    case holoinfer_datatype::h_Float32:
+      return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
+    case holoinfer_datatype::h_Int8:
+      return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8;
+    case holoinfer_datatype::h_Int32:
+      return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32;
+    case holoinfer_datatype::h_Int64:
+      return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
+    case holoinfer_datatype::h_UInt8:
+      return ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8;
+    case holoinfer_datatype::h_Float16:
+      return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16;
+    default:
+      HOLOSCAN_LOG_INFO(
+          "Onnxruntime backend is supported with following input data types: float, float16, int8, "
+          "int32, int64, uint8");
+      HOLOSCAN_LOG_ERROR("Unsupported datatype in Onnx backend tensor creation.");
+      return ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED;
+  }
+}
+
 void OnnxInfer::populate_model_details() {
   impl_->populate_model_details();
 }
@@ -199,17 +207,59 @@ int OnnxInfer::set_holoscan_inf_onnx_session_options() {
 int OnnxInferImpl::set_holoscan_inf_onnx_session_options() {
   session_options_.SetIntraOpNumThreads(1);
   session_options_.SetInterOpNumThreads(1);
-  if (use_cuda_) { session_options_.AppendExecutionProvider_CUDA(cuda_options_); }
+  if (use_cuda_) {
+    // create and initialize TensorRT provider options
+    Ort::ThrowOnError(Ort::GetApi().CreateTensorRTProviderOptions(&tensor_rt_options_));
+
+    const std::filesystem::path path(model_path_);
+    std::filesystem::path trt_engine_cache_path(model_path_);
+    trt_engine_cache_path.replace_extension("");
+    trt_engine_cache_path += "_onnx_cache_" + Ort::GetVersionString();
+
+    const std::vector<const char*> option_keys = {
+        "trt_fp16_enable",
+        "trt_engine_cache_enable",
+        "trt_engine_cache_path",
+        "trt_timing_cache_enable",
+        "trt_timing_cache_path",
+    };
+    const std::vector<const char*> option_values = {
+        enable_fp16_ ? "1" : "0",       // trt_fp16_enable
+        "1",                            // trt_engine_cache_enable
+        trt_engine_cache_path.c_str(),  // trt_engine_cache_path
+        "1",                            // trt_timing_cache_enable
+        trt_engine_cache_path.c_str(),  // trt_timing_cache_path
+    };
+    assert(option_keys.size() == option_values.size());
+    Ort::ThrowOnError(Ort::GetApi().UpdateTensorRTProviderOptions(
+        tensor_rt_options_, option_keys.data(), option_values.data(), option_keys.size()));
+    Ort::ThrowOnError(Ort::GetApi().UpdateTensorRTProviderOptionsWithValue(
+        tensor_rt_options_, "user_compute_stream", cuda_stream_));
+
+    // add the TensoRT provider
+    session_options_.AppendExecutionProvider_TensorRT_V2(*tensor_rt_options_);
+
+    // create and initialize CUDA provider options
+    Ort::ThrowOnError(Ort::GetApi().CreateCUDAProviderOptions(&cuda_options_));
+    Ort::ThrowOnError(Ort::GetApi().UpdateCUDAProviderOptionsWithValue(
+        cuda_options_, "user_compute_stream", cuda_stream_));
+
+    // add the CUDA provider
+    session_options_.AppendExecutionProvider_CUDA_V2(*cuda_options_);
+  }
   session_options_.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
   return 0;
 }
 
-extern "C" OnnxInfer* NewOnnxInfer(const std::string& model_file_path, bool cuda_flag) {
-  return new OnnxInfer(model_file_path, cuda_flag);
+extern "C" OnnxInfer* NewOnnxInfer(const std::string& model_file_path, bool enable_fp16,
+                                   bool cuda_flag, bool cuda_buf_in, bool cuda_buf_out) {
+  return new OnnxInfer(model_file_path, enable_fp16, cuda_flag, cuda_buf_in, cuda_buf_out);
 }
 
-OnnxInfer::OnnxInfer(const std::string& model_file_path, bool cuda_flag)
-    : impl_(new OnnxInferImpl(model_file_path, cuda_flag)) {}
+OnnxInfer::OnnxInfer(const std::string& model_file_path, bool enable_fp16, bool cuda_flag,
+                     bool cuda_buf_in, bool cuda_buf_out)
+    : impl_(new OnnxInferImpl(model_file_path, enable_fp16, cuda_flag, cuda_buf_in, cuda_buf_out)) {
+}
 
 OnnxInfer::~OnnxInfer() {
   if (impl_) {
@@ -218,21 +268,73 @@ OnnxInfer::~OnnxInfer() {
   }
 }
 
-OnnxInferImpl::OnnxInferImpl(const std::string& model_file_path, bool cuda_flag)
-    : model_path_(model_file_path), use_cuda_(cuda_flag) {
+static void logging_function(void* param, OrtLoggingLevel severity, const char* category,
+                             const char* logid, const char* code_location, const char* message) {
+  LogLevel log_level;
+  switch (severity) {
+    case OrtLoggingLevel::ORT_LOGGING_LEVEL_FATAL:
+      log_level = LogLevel::CRITICAL;
+      break;
+    case OrtLoggingLevel::ORT_LOGGING_LEVEL_ERROR:
+      log_level = LogLevel::ERROR;
+      break;
+    case OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING:
+      log_level = LogLevel::WARN;
+      break;
+    case OrtLoggingLevel::ORT_LOGGING_LEVEL_INFO:
+      log_level = LogLevel::INFO;
+      break;
+    case OrtLoggingLevel::ORT_LOGGING_LEVEL_VERBOSE:
+      log_level = LogLevel::DEBUG;
+      break;
+  }
+  HOLOSCAN_LOG_CALL(log_level, "Onnxruntime {} {}", code_location, message);
+}
+
+OnnxInferImpl::OnnxInferImpl(const std::string& model_file_path, bool enable_fp16, bool cuda_flag,
+                             bool cuda_buf_in, bool cuda_buf_out)
+    : model_path_(model_file_path),
+      enable_fp16_(enable_fp16),
+      use_cuda_(cuda_flag),
+      cuda_buf_in_(cuda_buf_in),
+      cuda_buf_out_(cuda_buf_out) {
   try {
-    set_holoscan_inf_onnx_session_options();
+    OrtLoggingLevel logging_level;
+    switch (log_level()) {
+      case LogLevel::OFF:
+      case LogLevel::CRITICAL:
+        logging_level = OrtLoggingLevel::ORT_LOGGING_LEVEL_FATAL;
+        break;
+      case LogLevel::ERROR:
+        logging_level = OrtLoggingLevel::ORT_LOGGING_LEVEL_ERROR;
+        break;
+      case LogLevel::WARN:
+        logging_level = OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING;
+        break;
+      case LogLevel::INFO:
+        logging_level = OrtLoggingLevel::ORT_LOGGING_LEVEL_INFO;
+        break;
+      case LogLevel::DEBUG:
+      case LogLevel::TRACE:
+        logging_level = OrtLoggingLevel::ORT_LOGGING_LEVEL_VERBOSE;
+        break;
+    }
+    env_ = std::make_unique<Ort::Env>(logging_level, "onnx", logging_function, nullptr);
+    if (!env_) {
+      HOLOSCAN_LOG_ERROR("Env creation failed in Onnx inference constructor");
+      throw std::runtime_error("Onnxruntime env creation failed");
+    }
 
-    auto env_local = std::make_unique<Ort::Env>(ORT_LOGGING_LEVEL_WARNING, "test");
-    env_ = std::move(env_local);
+    check_cuda(cudaStreamCreate(&cuda_stream_));
+    check_cuda(cudaEventCreateWithFlags(&cuda_event_, cudaEventDisableTiming));
+
+    set_holoscan_inf_onnx_session_options();
 
-    auto _session =
-        std::make_unique<Ort::Session>(*env_, model_file_path.c_str(), session_options_);
-    if (!_session) {
+    session_ = std::make_unique<Ort::Session>(*env_, model_file_path.c_str(), session_options_);
+    if (!session_) {
       HOLOSCAN_LOG_ERROR("Session creation failed in Onnx inference constructor");
       throw std::runtime_error("Onnxruntime session creation failed");
     }
-    session_ = std::move(_session);
     populate_model_details();
   } catch (const Ort::Exception& exception) {
     HOLOSCAN_LOG_ERROR(exception.what());
@@ -240,60 +342,55 @@ OnnxInferImpl::OnnxInferImpl(const std::string& model_file_path, bool cuda_flag)
   }
 }
 
-Ort::Value OnnxInferImpl::create_tensor(const std::shared_ptr<DataBuffer>& input_buffer,
-                                        const std::vector<int64_t>& dims) {
-  auto data_type = input_buffer->get_datatype();
+OnnxInferImpl::~OnnxInferImpl() {
+  if (tensor_rt_options_) { Ort::GetApi().ReleaseTensorRTProviderOptions(tensor_rt_options_); }
+  if (cuda_options_) { Ort::GetApi().ReleaseCUDAProviderOptions(cuda_options_); }
+  if (cuda_stream_) { cudaStreamDestroy(cuda_stream_); }
+  if (cuda_event_) { cudaEventDestroy(cuda_event_); }
+}
 
-  switch (data_type) {
-    case holoinfer_datatype::h_Float32:
-      return create_tensor_core<float>(input_buffer, dims, memory_info_);
-    case holoinfer_datatype::h_Int8:
-      return create_tensor_core<int8_t>(input_buffer, dims, memory_info_);
-    case holoinfer_datatype::h_Int32:
-      return create_tensor_core<int32_t>(input_buffer, dims, memory_info_);
-    case holoinfer_datatype::h_Int64:
-      return create_tensor_core<int64_t>(input_buffer, dims, memory_info_);
-    case holoinfer_datatype::h_UInt8:
-      return create_tensor_core<uint8_t>(input_buffer, dims, memory_info_);
-    default: {
-      HOLOSCAN_LOG_INFO(
-          "Onnxruntime backend is supported with following data types: float, int8, int32, int64, "
-          "uint8");
-      HOLOSCAN_LOG_ERROR("Unsupported datatype in Onnx backend tensor creation.");
+Ort::Value OnnxInferImpl::create_tensor(const std::shared_ptr<DataBuffer>& data_buffer,
+                                        const std::vector<int64_t>& dims, bool cuda_buf) {
+  const size_t tensor_size = accumulate(dims.begin(), dims.end(), 1, std::multiplies<size_t>());
+
+  const OrtMemoryInfo* info;
+  void* p_data;
+  if (cuda_buf) {
+    if (data_buffer->device_buffer_->size() != tensor_size) {
+      HOLOSCAN_LOG_ERROR("Onnx: Device buffer size mismatch, expected {}, but is {}.",
+                         tensor_size,
+                         data_buffer->device_buffer_->size());
       return Ort::Value(nullptr);
     }
+    p_data = data_buffer->device_buffer_->data();
+    info = memory_info_cuda_;
+  } else {
+    if (data_buffer->host_buffer_->size() != tensor_size) {
+      HOLOSCAN_LOG_ERROR("Onnx: Host buffer size mismatch, expected {}, but is {}.",
+                         tensor_size,
+                         data_buffer->host_buffer_->size());
+      return Ort::Value(nullptr);
+    }
+    p_data = data_buffer->host_buffer_->data();
+    info = memory_info_;
   }
-}
-
-void OnnxInferImpl::transfer_to_output(std::vector<std::shared_ptr<DataBuffer>>& output_buffer,
-                                       const size_t& index) {
-  size_t output_tensor_size = accumulate(
-      output_dims_[index].begin(), output_dims_[index].end(), 1, std::multiplies<size_t>());
-
-  auto data_type = output_buffer[index]->get_datatype();
 
-  switch (data_type) {
-    case holoinfer_datatype::h_Float32:
-      transfer_to_host<float>(output_buffer[index], output_tensors_[index], output_tensor_size);
-      break;
-    case holoinfer_datatype::h_Int8:
-      transfer_to_host<int8_t>(output_buffer[index], output_tensors_[index], output_tensor_size);
-      break;
-    case holoinfer_datatype::h_Int32:
-      transfer_to_host<int32_t>(output_buffer[index], output_tensors_[index], output_tensor_size);
-      break;
-    case holoinfer_datatype::h_Int64:
-      transfer_to_host<int64_t>(output_buffer[index], output_tensors_[index], output_tensor_size);
-      break;
-    case holoinfer_datatype::h_UInt8:
-      transfer_to_host<uint8_t>(output_buffer[index], output_tensors_[index], output_tensor_size);
-      break;
-    default:
-      HOLOSCAN_LOG_INFO(
-          "Onnxruntime backend is supported with following data types: float, int8, int32, int64, "
-          "uint8");
-      throw std::runtime_error("Unsupported datatype in output transfer with onnxrt backend.");
+  Ort::Value tensor(nullptr);
+  const ONNXTensorElementDataType element_data_type =
+      get_onnx_datatype(data_buffer->get_datatype());
+  if (cuda_buf && !use_cuda_) {
+    // create a tensor in CPU memory, we copy to/from this buffer before/after inference
+    tensor = Ort::Value::CreateTensor(allocator_, dims.data(), dims.size(), element_data_type);
+  } else {
+    // wrap the buffer
+    tensor = Ort::Value::CreateTensor(info,
+                                      static_cast<float*>(p_data),
+                                      tensor_size * get_element_size(data_buffer->get_datatype()),
+                                      dims.data(),
+                                      dims.size(),
+                                      element_data_type);
   }
+  return tensor;
 }
 
 InferStatus OnnxInfer::do_inference(const std::vector<std::shared_ptr<DataBuffer>>& input_buffer,
@@ -310,7 +407,9 @@ InferStatus OnnxInferImpl::do_inference(
   InferStatus status = InferStatus(holoinfer_code::H_ERROR);
 
   try {
-    check_cuda(cudaEventSynchronize(cuda_event_data));
+    // synchronize the CUDA stream used for inference with the CUDA event recorded when preparing
+    // the input data
+    check_cuda(cudaStreamWaitEvent(cuda_stream_, cuda_event_data));
 
     input_tensors_.clear();
     output_tensors_.clear();
@@ -325,27 +424,27 @@ InferStatus OnnxInferImpl::do_inference(
     }
 
     for (size_t a = 0; a < input_buffer.size(); a++) {
-      if (input_buffer[a]->host_buffer_->size() == 0) {
-        status.set_message("ONNX inference core: Input Host buffer empty.");
-        return status;
-      }
-
-      Ort::Value i_tensor = create_tensor(input_buffer[a], input_dims_[a]);
-
+      Ort::Value i_tensor = create_tensor(input_buffer[a], input_dims_[a], cuda_buf_in_);
       if (!i_tensor) {
         status.set_message("Onnxruntime: Error creating Ort tensor.");
         return status;
       }
+      if (cuda_buf_in_ && !use_cuda_) {
+        // Copy the the input data to the input Ort tensor if inference is on CPU and input on
+        // device
+        // Note: there is a bug in the C++ API, GetTensorRawData() is returning a `const void*`
+        // instead of a `void *` as the C API
+        check_cuda(cudaMemcpyAsync(const_cast<void*>(i_tensor.GetTensorRawData()),
+                                   input_buffer[a]->device_buffer_->data(),
+                                   input_buffer[a]->device_buffer_->get_bytes(),
+                                   cudaMemcpyDeviceToHost,
+                                   cuda_stream_));
+      }
       input_tensors_.push_back(std::move(i_tensor));
     }
 
     for (unsigned int a = 0; a < output_buffer.size(); a++) {
-      if (output_buffer[a]->host_buffer_->size() == 0) {
-        status.set_message("ONNX inference core: Output Host buffer empty.");
-        return status;
-      }
-
-      Ort::Value o_tensor = create_tensor(output_buffer[a], output_dims_[a]);
+      Ort::Value o_tensor = create_tensor(output_buffer[a], output_dims_[a], cuda_buf_out_);
 
       if (!o_tensor) {
         status.set_message("Onnxruntime: Error creating output Ort tensor.");
@@ -354,6 +453,11 @@ InferStatus OnnxInferImpl::do_inference(
       output_tensors_.push_back(std::move(o_tensor));
     }
 
+    if (!use_cuda_) {
+      // synchronize CUDA with CPU if using CPU inference
+      check_cuda(cudaStreamSynchronize(cuda_stream_));
+    }
+
     session_->Run(Ort::RunOptions{nullptr},
                   input_names_.data(),
                   input_tensors_.data(),
@@ -362,8 +466,22 @@ InferStatus OnnxInferImpl::do_inference(
                   output_tensors_.data(),
                   output_tensors_.size());
 
-    for (unsigned int a = 0; a < output_buffer.size(); a++) {
-      transfer_to_output(output_buffer, a);
+    if (cuda_buf_out_ && !use_cuda_) {
+      for (size_t index = 0; index < output_buffer.size(); ++index) {
+        // Copy the the input data to the input Ort tensor if inference is on CPU and input on
+        // device
+        check_cuda(cudaMemcpyAsync(output_buffer[index]->device_buffer_->data(),
+                                   output_tensors_[index].GetTensorRawData(),
+                                   output_buffer[index]->device_buffer_->get_bytes(),
+                                   cudaMemcpyHostToDevice,
+                                   cuda_stream_));
+      }
+    }
+
+    if (cuda_buf_out_ || use_cuda_) {
+      // record a CUDA event and pass it back to the caller
+      check_cuda(cudaEventRecord(cuda_event_, cuda_stream_));
+      *cuda_event_inference = cuda_event_;
     }
   } catch (const Ort::Exception& exception) {
     HOLOSCAN_LOG_ERROR(exception.what());
diff --git a/modules/holoinfer/src/infer/onnx/core.hpp b/modules/holoinfer/src/infer/onnx/core.hpp
index 5d5840ae..ee731ee4 100644
--- a/modules/holoinfer/src/infer/onnx/core.hpp
+++ b/modules/holoinfer/src/infer/onnx/core.hpp
@@ -43,9 +43,13 @@ class OnnxInfer : public InferBase {
   /**
    * @brief Constructor
    * @param model_file_path Path to onnx model file
+   * @param enable_fp16 Flag showing if trt engine file conversion will use FP16.
    * @param cuda_flag Flag to show if inference will happen using CUDA
+   * @param cuda_buf_in Flag to demonstrate if input data buffer is on cuda
+   * @param cuda_buf_out Flag to demonstrate if output data buffer will be on cuda
    * */
-  OnnxInfer(const std::string& model_file_path, bool cuda_flag);
+  OnnxInfer(const std::string& model_file_path, bool enable_fp16, bool cuda_flag, bool cuda_buf_in,
+            bool cuda_buf_out);
 
   /**
    * @brief Destructor
@@ -65,7 +69,7 @@ class OnnxInfer : public InferBase {
    * */
   InferStatus do_inference(const std::vector<std::shared_ptr<DataBuffer>>& input_data,
                            std::vector<std::shared_ptr<DataBuffer>>& output_buffer,
-                           cudaEvent_t cuda_event_data, cudaEvent_t *cuda_event_inference);
+                           cudaEvent_t cuda_event_data, cudaEvent_t* cuda_event_inference);
 
   /**
    * @brief Populate class parameters with model details and values
diff --git a/modules/holoinfer/src/infer/torch/core.cpp b/modules/holoinfer/src/infer/torch/core.cpp
index 539b8f4e..8d36ef0c 100644
--- a/modules/holoinfer/src/infer/torch/core.cpp
+++ b/modules/holoinfer/src/infer/torch/core.cpp
@@ -21,6 +21,8 @@
 
 #include <torch/script.h>
 
+#include <yaml-cpp/yaml.h>
+
 #include <functional>
 #include <memory>
 #include <string>
@@ -162,6 +164,9 @@ torch::Tensor TorchInferImpl::create_tensor(const std::shared_ptr<DataBuffer>& i
     case holoinfer_datatype::h_Float32:
       return create_tensor_core<float>(
           input_buffer, dims, torch::kF32, infer_device_, input_device_, cstream);
+    case holoinfer_datatype::h_Float16:
+      return create_tensor_core<int16_t>(
+          input_buffer, dims, torch::kF16, infer_device_, input_device_, cstream);
     case holoinfer_datatype::h_Int8:
       return create_tensor_core<int8_t>(
           input_buffer, dims, torch::kI8, infer_device_, input_device_, cstream);
@@ -176,7 +181,8 @@ torch::Tensor TorchInferImpl::create_tensor(const std::shared_ptr<DataBuffer>& i
           input_buffer, dims, torch::kUInt8, infer_device_, input_device_, cstream);
     default: {
       HOLOSCAN_LOG_INFO(
-          "Torch backend is supported with following data types: float, int8, int32, int64, uint8");
+          "Torch backend supports following input data types: float, float16, int8, int32, int64, "
+          "uint8");
       HOLOSCAN_LOG_ERROR("Unsupported datatype in Torch backend tensor creation.");
       return torch::empty({0});
     }
@@ -295,7 +301,8 @@ InferStatus TorchInferImpl::transfer_to_output(
                                            cstream);
     default:
       HOLOSCAN_LOG_INFO(
-          "Torch backend is supported with following data types: float, int8, int32, int64, uint8");
+          "Torch backend is supported with following output data types: float, int8, int32, int64, "
+          "uint8");
       return InferStatus(holoinfer_code::H_ERROR, "Unsupported datatype for transfer.");
   }
 }
diff --git a/modules/holoinfer/src/infer/trt/core.cpp b/modules/holoinfer/src/infer/trt/core.cpp
index b4c691d9..06caa5f3 100644
--- a/modules/holoinfer/src/infer/trt/core.cpp
+++ b/modules/holoinfer/src/infer/trt/core.cpp
@@ -191,8 +191,13 @@ bool TrtInfer::initialize_parameters() {
         holoinfer_type = holoinfer_datatype::h_UInt8;
         break;
       }
+      case nvinfer1::DataType::kHALF: {
+        holoinfer_type = holoinfer_datatype::h_Float16;
+        break;
+      }
       default: {
-        HOLOSCAN_LOG_INFO("TensorRT backend supports float, int8, int32, uint8 data types.");
+        HOLOSCAN_LOG_INFO(
+            "TensorRT backend supports float, float16, int8, int32, uint8 data types.");
         HOLOSCAN_LOG_ERROR("Data type not supported.");
         return false;
       }
@@ -259,26 +264,23 @@ InferStatus TrtInfer::do_inference(const std::vector<std::shared_ptr<DataBuffer>
       return status;
     }
 
-    if (input_buffer->device_buffer_->data() == nullptr) {
-      status.set_message(" TRT inference core: Data in Input Device buffer is null.");
-      return status;
-    }
-
-    // Host to Device transfer
-    if (!cuda_buf_in_) {
+    if (cuda_buf_in_) {
+      if (input_buffer->device_buffer_->data() == nullptr) {
+        status.set_message(" TRT inference core: Data in Input Device buffer is null.");
+        return status;
+      }
+    } else {
+      // Host to Device transfer
       if (input_buffer->host_buffer_->size() == 0) {
         status.set_message(" TRT inference core: Empty input host buffer.");
         return status;
       }
 
-      if (input_buffer->device_buffer_->size() != input_buffer->host_buffer_->size()) {
-        status.set_message(" TRT inference core: Input Host and Device buffer size mismatch.");
-        return status;
-      }
+      input_buffer->device_buffer_->resize(input_buffer->host_buffer_->size());
 
       auto cstatus = cudaMemcpyAsync(input_buffer->device_buffer_->data(),
                                      input_buffer->host_buffer_->data(),
-                                     input_buffer->device_buffer_->get_bytes(),
+                                     input_buffer->host_buffer_->get_bytes(),
                                      cudaMemcpyHostToDevice,
                                      cuda_stream_);
       if (cstatus != cudaSuccess) {
@@ -377,7 +379,7 @@ InferStatus TrtInfer::do_inference(const std::vector<std::shared_ptr<DataBuffer>
 
       // Instantiate graphExec from graph. The error node and error message parameters are unused
       // here.
-      check_cuda(cudaGraphInstantiate(&cuda_graph_instance_, cuda_graph, nullptr, nullptr, 0));
+      check_cuda(cudaGraphInstantiate(&cuda_graph_instance_, cuda_graph, 0));
     }
 
     check_cuda(cudaGraphDestroy(cuda_graph));
diff --git a/modules/holoinfer/src/manager/infer_manager.cpp b/modules/holoinfer/src/manager/infer_manager.cpp
index 6082eb2d..763ceaf4 100644
--- a/modules/holoinfer/src/manager/infer_manager.cpp
+++ b/modules/holoinfer/src/manager/infer_manager.cpp
@@ -17,7 +17,9 @@
 #include "infer_manager.hpp"
 
 #include <dlfcn.h>
+#include <sys/sysinfo.h>
 
+#include <algorithm>
 #include <map>
 #include <memory>
 #include <set>
@@ -249,11 +251,6 @@ InferStatus ManagerInfer::set_inference_params(std::shared_ptr<InferenceSpecs>&
         }
 
         case holoinfer_backend::h_onnx: {
-          if (cuda_buffer_in_ || cuda_buffer_out_) {
-            status.set_message(
-                "Inference manager, Cuda based in and out buffer not supported in onnxrt");
-            return status;
-          }
           if (inference_specs->is_engine_path_) {
             status.set_message(
                 "Inference manager, Engine path cannot be true with onnx runtime backend");
@@ -266,12 +263,6 @@ InferStatus ManagerInfer::set_inference_params(std::shared_ptr<InferenceSpecs>&
             return status;
           }
 
-          bool is_aarch64 = is_platform_aarch64();
-          if (is_aarch64 && inference_specs->oncuda_) {
-            status.set_message("Onnxruntime with CUDA not supported on aarch64.");
-            return status;
-          }
-
 #if use_onnxruntime
           HOLOSCAN_LOG_INFO("Searching for ONNX Runtime libraries");
           void* handle = dlopen("libholoscan_infer_onnx_runtime.so", RTLD_NOW);
@@ -281,7 +272,7 @@ InferStatus ManagerInfer::set_inference_params(std::shared_ptr<InferenceSpecs>&
             return status;
           }
           HOLOSCAN_LOG_INFO("Found ONNX Runtime libraries");
-          using NewOnnxInfer = OnnxInfer* (*)(const std::string&, bool);
+          using NewOnnxInfer = OnnxInfer* (*)(const std::string&, bool, bool, bool, bool);
           auto new_ort_infer = reinterpret_cast<NewOnnxInfer>(dlsym(handle, "NewOnnxInfer"));
           if (!new_ort_infer) {
             HOLOSCAN_LOG_ERROR(dlerror());
@@ -290,7 +281,11 @@ InferStatus ManagerInfer::set_inference_params(std::shared_ptr<InferenceSpecs>&
             return status;
           }
           dlclose(handle);
-          auto context = new_ort_infer(model_path, inference_specs->oncuda_);
+          auto context = new_ort_infer(model_path,
+                                       inference_specs->use_fp16_,
+                                       inference_specs->oncuda_,
+                                       cuda_buffer_in_,
+                                       cuda_buffer_out_);
           holo_infer_context_[model_name] = std::unique_ptr<OnnxInfer>(context);
 #else
           HOLOSCAN_LOG_ERROR("Onnxruntime backend not supported or incorrectly installed.");
@@ -363,9 +358,6 @@ InferStatus ManagerInfer::set_inference_params(std::shared_ptr<InferenceSpecs>&
       // if the GPU for inference is not same as GPU-dt
       DataMap dm;
 
-      // in and out buffer on cuda not supported for onnxrt.
-      bool allocate_cuda = (backend_type.compare("onnxrt") == 0) ? false : true;
-
       for (unsigned int d = 0; d < out_tensor_names.size(); d++) {
         std::vector<int64_t> dims = holo_infer_context_.at(model_name)->get_output_dims()[d];
         auto datatype = holo_infer_context_.at(model_name)->get_output_datatype()[d];
@@ -378,7 +370,7 @@ InferStatus ManagerInfer::set_inference_params(std::shared_ptr<InferenceSpecs>&
                                         dims,
                                         datatype,
                                         out_tensor_names[d],
-                                        allocate_cuda,
+                                        true /* allocate_cuda */,
                                         device_id);
         if (astatus.get_code() != holoinfer_code::H_SUCCESS) {
           astatus.display_message();
@@ -390,8 +382,8 @@ InferStatus ManagerInfer::set_inference_params(std::shared_ptr<InferenceSpecs>&
         if (device_id != device_gpu_dt_) {
           check_cuda(cudaSetDevice(device_id));
 
-          auto astatus =
-              allocate_buffers(dm, dims, datatype, out_tensor_names[d], allocate_cuda, device_id);
+          auto astatus = allocate_buffers(
+              dm, dims, datatype, out_tensor_names[d], true /* allocate_cuda */, device_id);
           if (astatus.get_code() != holoinfer_code::H_SUCCESS) {
             astatus.display_message();
             status.set_message("Allocation failed for output tensor: " + out_tensor_names[d]);
@@ -459,8 +451,8 @@ InferStatus ManagerInfer::set_inference_params(std::shared_ptr<InferenceSpecs>&
             return status;
           }
 
-          auto astatus =
-              allocate_buffers(dm_in, dims, datatype, in_tensor_names[d], allocate_cuda, device_id);
+          auto astatus = allocate_buffers(
+              dm_in, dims, datatype, in_tensor_names[d], true /* allocate_cuda */, device_id);
           if (astatus.get_code() != holoinfer_code::H_SUCCESS) {
             astatus.display_message();
             status.set_message("Allocation failed for output tensor: " + out_tensor_names[d]);
@@ -487,6 +479,13 @@ InferStatus ManagerInfer::set_inference_params(std::shared_ptr<InferenceSpecs>&
     }
 
     check_cuda(cudaEventCreateWithFlags(&cuda_event_, cudaEventDisableTiming));
+
+    if (inference_specs->parallel_processing_) {
+      // create the work queue for parallel processing, limit the worker count the available core
+      // count
+      work_queue_ = std::make_unique<WorkQueue>(
+          std::min(infer_param_.size(), static_cast<size_t>(get_nprocs())));
+    }
   } catch (const std::runtime_error& rt) {
     raise_error("Inference Manager", "Setting Inference parameters: " + std::string(rt.what()));
   } catch (...) {
@@ -728,8 +727,7 @@ InferStatus ManagerInfer::execute_inference(std::shared_ptr<InferenceSpecs>& inf
 
   std::chrono::steady_clock::time_point s_time;
   std::chrono::steady_clock::time_point e_time;
-
-  std::map<std::string, std::future<InferStatus>> inference_futures;
+  std::map<std::string, std::shared_ptr<std::packaged_task<InferStatus()>>> inference_futures;
   s_time = std::chrono::steady_clock::now();
   for (const auto& [model_instance, _] : infer_param_) {
     bool process_model = true;
@@ -766,13 +764,12 @@ InferStatus ManagerInfer::execute_inference(std::shared_ptr<InferenceSpecs>& inf
         }
       } else {
         inference_futures.insert({model_instance,
-                                  std::async(std::launch::async,
-                                             std::bind(&ManagerInfer::run_core_inference,
-                                                       this,
-                                                       model_instance,
-                                                       permodel_preprocess_data,
-                                                       permodel_output_data,
-                                                       cuda_stream))});
+                                  work_queue_->async(std::bind(&ManagerInfer::run_core_inference,
+                                                                this,
+                                                                model_instance,
+                                                                permodel_preprocess_data,
+                                                                permodel_output_data,
+                                                                cuda_stream))});
       }
     }
   }
@@ -780,7 +777,7 @@ InferStatus ManagerInfer::execute_inference(std::shared_ptr<InferenceSpecs>& inf
   if (parallel_processing_) {
     std::string failed_models;
     for (auto& inf_fut : inference_futures) {
-      InferStatus infer_status = inf_fut.second.get();
+      InferStatus infer_status = inf_fut.second->get_future().get();
       if (infer_status.get_code() != holoinfer_code::H_SUCCESS) {
         status.set_code(holoinfer_code::H_ERROR);
         infer_status.display_message();
@@ -788,8 +785,8 @@ InferStatus ManagerInfer::execute_inference(std::shared_ptr<InferenceSpecs>& inf
       }
     }
     if (status.get_code() != holoinfer_code::H_SUCCESS) {
-        status.set_message("Inference manager, Inference failed in execution for" + failed_models);
-        return status;
+      status.set_message("Inference manager, Inference failed in execution for" + failed_models);
+      return status;
     }
   }
 
@@ -841,7 +838,7 @@ InferStatus InferContext::execute_inference(std::shared_ptr<InferenceSpecs>& inf
     status = g_manager->execute_inference(inference_specs, cuda_stream);
   } catch (const std::exception& e) {
     status.set_code(holoinfer_code::H_ERROR);
-    status.set_message(std::string("Inference manager, Error in inference setup: ") + e.what());
+    status.set_message(std::string("Inference manager, Error in inference execution: ") + e.what());
     return status;
   }
 
diff --git a/modules/holoinfer/src/manager/infer_manager.hpp b/modules/holoinfer/src/manager/infer_manager.hpp
index 71cb5d8f..ff20d892 100644
--- a/modules/holoinfer/src/manager/infer_manager.hpp
+++ b/modules/holoinfer/src/manager/infer_manager.hpp
@@ -30,6 +30,7 @@
 #include <holoinfer_constants.hpp>
 #include <holoinfer_utils.hpp>
 #include <infer/infer.hpp>
+#include <utils/work_queue.hpp>
 
 #if use_onnxruntime
 #include <infer/onnx/core.hpp>
@@ -177,6 +178,9 @@ class ManagerInfer {
   /// Map storing inferred output dimension per tensor
   DimType models_output_dims_;
 
+  /// Work queue use for parallel processing
+  std::unique_ptr<WorkQueue> work_queue_;
+
   /// Map storing Backends supported with holoinfer mapping
   inline static std::map<std::string, holoinfer_backend> supported_backend_{
       {"onnxrt", holoinfer_backend::h_onnx},
diff --git a/modules/holoinfer/src/process/transforms/generate_boxes.cpp b/modules/holoinfer/src/process/transforms/generate_boxes.cpp
index 584e5323..6a181f3e 100644
--- a/modules/holoinfer/src/process/transforms/generate_boxes.cpp
+++ b/modules/holoinfer/src/process/transforms/generate_boxes.cpp
@@ -16,6 +16,8 @@
  */
 #include "generate_boxes.hpp"
 
+#include <yaml-cpp/yaml.h>
+
 #include <functional>
 #include <map>
 #include <memory>
diff --git a/modules/holoinfer/src/utils/infer_buffer.cpp b/modules/holoinfer/src/utils/infer_buffer.cpp
index 3c3305f7..ceb1bbda 100644
--- a/modules/holoinfer/src/utils/infer_buffer.cpp
+++ b/modules/holoinfer/src/utils/infer_buffer.cpp
@@ -43,6 +43,8 @@ uint32_t get_element_size(holoinfer_datatype element_type) noexcept {
     case holoinfer_datatype::h_Int8:
     case holoinfer_datatype::h_UInt8:
       return 1;
+    case holoinfer_datatype::h_Float16:
+      return 2;
   }
   return 0;
 }
diff --git a/modules/holoinfer/src/utils/work_queue.cpp b/modules/holoinfer/src/utils/work_queue.cpp
new file mode 100644
index 00000000..1ec0cc23
--- /dev/null
+++ b/modules/holoinfer/src/utils/work_queue.cpp
@@ -0,0 +1,85 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "work_queue.hpp"
+
+#include <memory>
+
+namespace holoscan {
+namespace inference {
+
+WorkQueue::WorkQueue(uint32_t threads) {
+  threads_.resize(threads);
+  for (uint32_t i = 0; i < threads; ++i) { add_thread(i); }
+}
+
+WorkQueue::~WorkQueue() {
+  stop();
+}
+
+void WorkQueue::add_thread(int i) {
+  auto f = [this, i]() {
+    std::optional<std::unique_ptr<std::function<void()>>> func;
+
+    func = queue_.pop();
+
+    while (true) {
+      while (func) {
+        (**func)();
+        if (done_) { return; }
+        func = queue_.pop();
+      }
+
+      // queue is empty, wait
+      std::unique_lock<std::mutex> lock(mutex_);
+      condition_.wait(lock, [this, &func]() {
+        func = queue_.pop();
+        return func || done_;
+      });
+      // if there is not function to execute then `done_` was true, exit.
+      if (!func) {
+        return;
+      }
+    }
+  };
+  threads_[i].reset(new std::thread(f));
+}
+
+void WorkQueue::stop() {
+  if (done_) { return; }
+
+  // signal the threads to finish
+  done_ = true;
+
+  // wake up all threads
+  {
+    std::unique_lock<std::mutex> lock(mutex_);
+    condition_.notify_all();
+  }
+
+  // wait for the threads to finish
+  for (size_t i = 0; i < threads_.size(); ++i) {
+    if (threads_[i]->joinable()) { threads_[i]->join(); }
+  }
+
+  // clear the queue
+  while (queue_.pop()) {}
+
+  threads_.clear();
+}
+
+}  // namespace inference
+}  // namespace holoscan
diff --git a/modules/holoinfer/src/utils/work_queue.hpp b/modules/holoinfer/src/utils/work_queue.hpp
new file mode 100644
index 00000000..eac9c016
--- /dev/null
+++ b/modules/holoinfer/src/utils/work_queue.hpp
@@ -0,0 +1,129 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MODULES_HOLOINFER_SRC_UTILS_WORK_QUEUE_HPP
+#define MODULES_HOLOINFER_SRC_UTILS_WORK_QUEUE_HPP
+
+#include <cstdint>
+#include <functional>
+#include <future>
+#include <memory>
+#include <optional>
+#include <queue>
+#include <utility>
+#include <vector>
+
+namespace holoscan {
+namespace inference {
+
+/**
+ * A thread safe queue
+ *
+ * @tparam T type of object contained in the queue
+ */
+template <typename T>
+class ThreadSafeQueue {
+ public:
+  /**
+   * Add an object to the queue
+   *
+   * @param value object to add
+   */
+  void push(T& value) {
+    std::unique_lock<std::mutex> lock(mutex_);
+    queue_.push(std::move(value));
+  }
+
+  /**
+   * Get an object of the queue
+   *
+   * @return std::optional<T> object that had been unqueued or null if queue was empty
+   */
+  std::optional<T> pop() {
+    std::unique_lock<std::mutex> lock(mutex_);
+    if (queue_.empty()) { return {}; }
+    T item = std::move(queue_.front());
+    queue_.pop();
+    return std::optional<T>(std::move(item));
+  }
+
+ private:
+  std::queue<T> queue_;
+  std::mutex mutex_;
+};
+
+/**
+ * The WorkQueue class maintains a pool of threads which execute jobs added with the `async()`
+ * function.
+ */
+class WorkQueue {
+ public:
+  /**
+   * Construct a new work queue
+   *
+   * @param threads number of worker threads
+   */
+  explicit WorkQueue(uint32_t threads);
+  WorkQueue() = delete;
+
+  /**
+   * Destroy the work queue
+   */
+  ~WorkQueue();
+
+  /**
+   * Enqueue a function to be executed asynchronously by a thread of the pool
+   *
+   * @tparam F function type
+   * @tparam Args argument types
+   * @param f function (can be any callable object)
+   * @param args function arguments
+   * @return std::shared_pointer with std::packed_task
+   */
+  template <class F, class... Args>
+  auto async(F&& f, Args&&... args)
+      -> std::shared_ptr<
+          std::packaged_task<std::invoke_result_t<std::decay_t<F>, std::decay_t<Args>...>()>> {
+    auto packed_task = std::make_shared<
+        std::packaged_task<std::invoke_result_t<std::decay_t<F>, std::decay_t<Args>...>()>>(
+        std::bind(std::forward<F>(f), std::forward<Args>(args)...));
+    auto function = std::make_unique<std::function<void()>>([packed_task]() { (*packed_task)(); });
+    queue_.push(function);
+
+    std::unique_lock<std::mutex> lock(mutex_);
+    condition_.notify_one();
+    return packed_task;
+  }
+
+  /**
+   * Stop all threads.
+   */
+  void stop();
+
+ private:
+  void add_thread(int i);
+
+  std::vector<std::unique_ptr<std::thread>> threads_;
+  ThreadSafeQueue<std::unique_ptr<std::function<void()>>> queue_;
+  std::atomic<bool> done_ = false;
+  std::mutex mutex_;
+  std::condition_variable condition_;
+};
+
+}  // namespace inference
+}  // namespace holoscan
+
+#endif /* MODULES_HOLOINFER_SRC_UTILS_WORK_QUEUE_HPP */
diff --git a/modules/holoviz/src/CMakeLists.txt b/modules/holoviz/src/CMakeLists.txt
index 4288e6c2..782262fc 100644
--- a/modules/holoviz/src/CMakeLists.txt
+++ b/modules/holoviz/src/CMakeLists.txt
@@ -62,6 +62,7 @@ target_sources(${PROJECT_NAME}
         cuda/convert.cu
         cuda/cuda_service.cpp
         cuda/gen_depth_map.cu
+        cuda/gen_primitive_vertices.cu
 
         layers/geometry_layer.cpp
         layers/image_layer.cpp
diff --git a/modules/holoviz/src/cuda/gen_primitive_vertices.cu b/modules/holoviz/src/cuda/gen_primitive_vertices.cu
new file mode 100644
index 00000000..89b3d21d
--- /dev/null
+++ b/modules/holoviz/src/cuda/gen_primitive_vertices.cu
@@ -0,0 +1,171 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "gen_primitive_vertices.hpp"
+
+#include <vector>
+
+#include "cuda_service.hpp"
+
+namespace holoscan::viz {
+
+namespace {
+
+__global__ void copy_and_add_zero(uint32_t vertex_count, const float* src, float* dst) {
+  const uint vertex_index = blockIdx.x * blockDim.x + threadIdx.x;
+  if (vertex_index >= vertex_count) { return; }
+
+  // just copy and add zero for Z
+  src += vertex_index * 2;
+  dst += vertex_index * 3;
+  dst[0] = src[0];
+  dst[1] = src[1];
+  dst[2] = 0.F;
+}
+
+// generate crosses
+__global__ void gen_cross_list_vertices(uint32_t primitive_count, float aspect_ratio,
+                                        const float* src, float* dst) {
+  const uint primitive_index = blockIdx.x * blockDim.x + threadIdx.x;
+  if (primitive_index >= primitive_count) { return; }
+
+  src += primitive_index * 3;
+  const float x = src[0];
+  const float y = src[1];
+  const float sy = src[2] * 0.5F;
+  const float sx = sy / aspect_ratio;
+
+  dst += primitive_index * 12;
+  dst[0] = x - sx;
+  dst[1] = y;
+  dst[2] = 0.F;
+  dst[3] = x + sx;
+  dst[4] = y;
+  dst[5] = 0.F;
+  dst[6] = x;
+  dst[7] = y - sy;
+  dst[8] = 0.F;
+  dst[9] = x;
+  dst[10] = y + sy;
+  dst[11] = 0.F;
+}
+// generate rectangles
+__global__ void gen_rectangle_list_vertices(uint32_t primitive_count, const float* src,
+                                            float* dst) {
+  const uint primitive_index = blockIdx.x * blockDim.x + threadIdx.x;
+  if (primitive_index >= primitive_count) { return; }
+
+  src += primitive_index * 4;
+  const float x0 = src[0];
+  const float y0 = src[1];
+  const float x1 = src[2];
+  const float y1 = src[3];
+
+  dst += primitive_index * 15;
+  dst[0] = x0;
+  dst[1] = y0;
+  dst[2] = 0.F;
+  dst[3] = x1;
+  dst[4] = y0;
+  dst[5] = 0.F;
+  dst[6] = x1;
+  dst[7] = y1;
+  dst[8] = 0.F;
+  dst[9] = x0;
+  dst[10] = y1;
+  dst[11] = 0.F;
+  dst[12] = x0;
+  dst[13] = y0;
+  dst[14] = 0.F;
+}
+
+__global__ void gen_oval_list_vertices(uint32_t primitive_count, const float* src, float* dst) {
+  const uint primitive_index = blockIdx.x * blockDim.x + threadIdx.x;
+  if (primitive_index >= primitive_count) { return; }
+
+  src += primitive_index * 4;
+  const float x = src[0];
+  const float y = src[1];
+  const float rx = src[2] * 0.5F;
+  const float ry = src[3] * 0.5F;
+
+  dst += primitive_index * (CIRCLE_SEGMENTS + 1) * 3;
+  for (uint32_t segment = 0; segment <= CIRCLE_SEGMENTS; ++segment) {
+    const float rad = (2.F * M_PI) / CIRCLE_SEGMENTS * segment;
+    const float px = x + std::cos(rad) * rx;
+    const float py = y + std::sin(rad) * ry;
+    dst[0] = px;
+    dst[1] = py;
+    dst[2] = 0.F;
+    dst += 3;
+  }
+}
+
+}  // namespace
+
+void gen_primitive_vertices(PrimitiveTopology topology, uint32_t primitive_count,
+                            const std::vector<uint32_t>& vertex_counts, float aspect_ratio,
+                            CUdeviceptr src, CUdeviceptr dst, CUstream stream) {
+  const dim3 block_dim(32, 1);
+
+  switch (topology) {
+    case PrimitiveTopology::POINT_LIST:
+    case PrimitiveTopology::LINE_LIST:
+    case PrimitiveTopology::LINE_STRIP:
+    case PrimitiveTopology::TRIANGLE_LIST: {
+      if (vertex_counts.size() != 1) {
+        throw std::runtime_error("Unexpected vertex count vector.");
+      }
+      const dim3 launch_grid((vertex_counts[0] + (block_dim.x - 1)) / block_dim.x);
+      copy_and_add_zero<<<launch_grid, block_dim, 0, stream>>>(
+          vertex_counts[0], reinterpret_cast<const float*>(src), reinterpret_cast<float*>(dst));
+    } break;
+    case PrimitiveTopology::CROSS_LIST: {
+      const dim3 launch_grid((primitive_count + (block_dim.x - 1)) / block_dim.x);
+      gen_cross_list_vertices<<<launch_grid, block_dim, 0, stream>>>(
+          primitive_count,
+          aspect_ratio,
+          reinterpret_cast<const float*>(src),
+          reinterpret_cast<float*>(dst));
+    } break;
+    case PrimitiveTopology::OVAL_LIST: {
+      const dim3 launch_grid((primitive_count + (block_dim.x - 1)) / block_dim.x);
+      gen_oval_list_vertices<<<launch_grid, block_dim, 0, stream>>>(
+          primitive_count, reinterpret_cast<const float*>(src), reinterpret_cast<float*>(dst));
+    } break;
+    case PrimitiveTopology::RECTANGLE_LIST: {
+      const dim3 launch_grid((primitive_count + (block_dim.x - 1)) / block_dim.x);
+      gen_rectangle_list_vertices<<<launch_grid, block_dim, 0, stream>>>(
+          primitive_count, reinterpret_cast<const float*>(src), reinterpret_cast<float*>(dst));
+    } break;
+    case PrimitiveTopology::POINT_LIST_3D:
+    case PrimitiveTopology::LINE_LIST_3D:
+    case PrimitiveTopology::LINE_STRIP_3D:
+    case PrimitiveTopology::TRIANGLE_LIST_3D:
+      if (vertex_counts.size() != 1) {
+        throw std::runtime_error("Unexpected vertex count vector.");
+      }
+      CudaCheck(cuMemcpyAsync(dst, src, vertex_counts[0] * 3 * sizeof(float), stream));
+      break;
+    default:
+      throw std::runtime_error("Unsupported primitive topology.");
+  }
+
+#undef CASE
+}
+
+}  // namespace holoscan::viz
diff --git a/modules/holoviz/src/cuda/gen_primitive_vertices.hpp b/modules/holoviz/src/cuda/gen_primitive_vertices.hpp
new file mode 100644
index 00000000..6b951ccc
--- /dev/null
+++ b/modules/holoviz/src/cuda/gen_primitive_vertices.hpp
@@ -0,0 +1,51 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MODULES_HOLOVIZ_SRC_CUDA_GEN_PRIMITIVE_VERTICES_HPP
+#define MODULES_HOLOVIZ_SRC_CUDA_GEN_PRIMITIVE_VERTICES_HPP
+
+#include <cuda.h>
+
+#include <cstddef>
+#include <cstdint>
+#include <vector>
+
+#include "../holoviz/primitive_topology.hpp"
+
+namespace holoscan::viz {
+
+/// the segment count a circle is made of
+constexpr uint32_t CIRCLE_SEGMENTS = 32;
+
+/**
+ * @brief Generate vertex coordinates for geometric primitives.
+ *
+ * @param topology primitive topology
+ * @param primitive_count     primitive count
+ * @param vertex_counts  vertex counts
+ * @param aspect_ratio aspect ratio
+ * @param src       memory containing source coordinates
+ * @param dst       memory to write generated coordinates to
+ * @param stream    CUDA stream to use
+ */
+void gen_primitive_vertices(PrimitiveTopology topology, uint32_t primitive_count,
+                            const std::vector<uint32_t>& vertex_counts, float aspect_ratio,
+                            CUdeviceptr src, CUdeviceptr dst, CUstream stream);
+
+}  // namespace holoscan::viz
+
+#endif /* MODULES_HOLOVIZ_SRC_CUDA_GEN_PRIMITIVE_VERTICES_HPP */
diff --git a/modules/holoviz/src/export.map b/modules/holoviz/src/export.map
index 5c16b801..938ceb81 100644
--- a/modules/holoviz/src/export.map
+++ b/modules/holoviz/src/export.map
@@ -70,6 +70,7 @@
             "holoscan::viz::LineWidth(float)";
             "holoscan::viz::PointSize(float)";
             "holoscan::viz::Primitive(holoscan::viz::PrimitiveTopology, unsigned int, unsigned long, float const*)";
+            "holoscan::viz::PrimitiveCudaDevice(holoscan::viz::PrimitiveTopology, unsigned int, unsigned long, unsigned long long)";
             "holoscan::viz::Text(float, float, float, char const*)";
             "holoscan::viz::DepthMap(holoscan::viz::DepthMapRenderMode, unsigned int, unsigned int, holoscan::viz::ImageFormat, unsigned long long, holoscan::viz::ImageFormat, unsigned long long)";
 
diff --git a/modules/holoviz/src/glfw_window.cpp b/modules/holoviz/src/glfw_window.cpp
index 8bbab6f9..9350121a 100644
--- a/modules/holoviz/src/glfw_window.cpp
+++ b/modules/holoviz/src/glfw_window.cpp
@@ -58,6 +58,9 @@ struct GLFWWindow::Impl {
   Impl() = delete;
 
   ~Impl() {
+    // glfwDestroyWindow() and glfwTerminate() are not thread-safe, take the lock
+    std::lock_guard<std::mutex> guard(mutex_);
+
     if (intern_window_ && window_) {
       if (init_flags_ & InitFlags::FULLSCREEN) {
         // GLFW is not switching back to the original mode when just destroying the window,
@@ -69,7 +72,6 @@ struct GLFWWindow::Impl {
       glfwDestroyWindow(window_);
     }
 
-    std::lock_guard<std::mutex> guard(mutex_);
     --glfw_init_count_;
     if (glfw_init_count_ == 0) { glfwTerminate(); }
   }
@@ -182,7 +184,11 @@ GLFWWindow::GLFWWindow(uint32_t width, uint32_t height, const char* title, InitF
     if (!monitor) { monitor = glfwGetPrimaryMonitor(); }
   }
 
-  impl_->window_ = glfwCreateWindow(width, height, title, monitor, NULL);
+  {
+    // glfwCreateWindow() is not thread safe, take the lock
+    std::lock_guard<std::mutex> guard(impl_->mutex_);
+    impl_->window_ = glfwCreateWindow(width, height, title, monitor, NULL);
+  }
   if (!impl_->window_) { throw std::runtime_error("Failed to create glfw window"); }
 
   impl_->intern_window_ = true;
@@ -199,6 +205,8 @@ GLFWWindow::GLFWWindow(uint32_t width, uint32_t height, const char* title, InitF
 GLFWWindow::~GLFWWindow() {}
 
 void GLFWWindow::init_im_gui() {
+  // ImGui is calling glfwCreateStandardCursor() which is not thread safe, take the lock
+  std::lock_guard<std::mutex> guard(impl_->mutex_);
   ImGui_ImplGlfw_InitForVulkan(impl_->window_, true);
 }
 
@@ -527,6 +535,8 @@ void GLFWWindow::im_gui_new_frame() {
 }
 
 void GLFWWindow::begin() {
+  // GLFW event processing is not thread safe, take the lock
+  std::lock_guard<std::mutex> guard(impl_->mutex_);
   glfwPollEvents();
 }
 
diff --git a/modules/holoviz/src/holoviz.cpp b/modules/holoviz/src/holoviz.cpp
index 88074290..ab46bf15 100644
--- a/modules/holoviz/src/holoviz.cpp
+++ b/modules/holoviz/src/holoviz.cpp
@@ -259,6 +259,12 @@ void Primitive(PrimitiveTopology topology, uint32_t primitive_count, size_t data
   Context::get().get_active_geometry_layer()->primitive(topology, primitive_count, data_size, data);
 }
 
+void PrimitiveCudaDevice(PrimitiveTopology topology, uint32_t primitive_count, size_t data_size,
+                         CUdeviceptr data) {
+  Context::get().get_active_geometry_layer()->primitive_cuda_device(
+      topology, primitive_count, data_size, data);
+}
+
 void DepthMap(DepthMapRenderMode render_mode, uint32_t width, uint32_t height,
               ImageFormat depth_fmt, CUdeviceptr depth_device_ptr, ImageFormat color_fmt,
               CUdeviceptr color_device_ptr) {
diff --git a/modules/holoviz/src/holoviz/holoviz.hpp b/modules/holoviz/src/holoviz/holoviz.hpp
index aa5c89d7..99301b5a 100644
--- a/modules/holoviz/src/holoviz/holoviz.hpp
+++ b/modules/holoviz/src/holoviz/holoviz.hpp
@@ -383,10 +383,10 @@ void BeginImageLayer();
  * @param row_pitch     the number of bytes between each row, if zero then data is
  * assumed to be contiguous in memory
  * @param device_ptr_plane_1    CUDA device memory pointer for plane 1
- * @param row_pitch_1     the number of bytes between each row for plane 1, if zero then data is
+ * @param row_pitch_plane_1 the number of bytes between each row for plane 1, if zero then data is
  * assumed to be contiguous in memory
  * @param device_ptr_plane_2    CUDA device memory pointer for plane 2
- * @param row_pitch_2     the number of bytes between each row for plane 2, if zero then data is
+ * @param row_pitch_plane_2 the number of bytes between each row for plane 2, if zero then data is
  * assumed to be contiguous in memory
  */
 void ImageCudaDevice(uint32_t width, uint32_t height, ImageFormat fmt, CUdeviceptr device_ptr,
@@ -567,6 +567,19 @@ void PointSize(float size);
  */
 void Primitive(PrimitiveTopology topology, uint32_t primitive_count, size_t data_size,
                const float* data);
+
+/**
+ * Draw a geometric primitive, source is CUDA device memory.
+ *
+ * @param topology          primitive topology
+ * @param primitive_count   primitive count
+ * @param data_size         size of the data array in floats
+ * @param data              CUDA device memory pointer to data, the format and size of the array
+ *                          depends on the primitive count and topology
+ */
+void PrimitiveCudaDevice(PrimitiveTopology topology, uint32_t primitive_count, size_t data_size,
+                         CUdeviceptr data);
+
 /**
  * Draw text.
  *
diff --git a/modules/holoviz/src/layers/geometry_layer.cpp b/modules/holoviz/src/layers/geometry_layer.cpp
index 50ebacfe..0a089d05 100644
--- a/modules/holoviz/src/layers/geometry_layer.cpp
+++ b/modules/holoviz/src/layers/geometry_layer.cpp
@@ -28,19 +28,18 @@
 #include <sstream>
 #include <stdexcept>
 #include <string>
+#include <utility>
 #include <vector>
 
 #include "../context.hpp"
 #include "../cuda/cuda_service.hpp"
 #include "../cuda/gen_depth_map.hpp"
+#include "../cuda/gen_primitive_vertices.hpp"
 #include "../vulkan/buffer.hpp"
 #include "../vulkan/vulkan_app.hpp"
 
 namespace holoscan::viz {
 
-/// the segment count a circle is made of
-constexpr uint32_t CIRCLE_SEGMENTS = 32;
-
 class Attributes {
  public:
   Attributes() : color_({1.F, 1.F, 1.F, 1.F}), line_width_(1.F), point_size_(1.F) {}
@@ -58,15 +57,84 @@ class Attributes {
 class Primitive {
  public:
   Primitive(const Attributes& attributes, PrimitiveTopology topology, uint32_t primitive_count,
-            size_t data_size, const float* data, uint32_t vertex_offset,
-            std::vector<uint32_t>& vertex_counts, vk::PrimitiveTopology vk_topology)
+            size_t data_size, const float* host_data, CUdeviceptr device_data,
+            uint32_t vertex_offset, CUstream cuda_stream)
       : attributes_(attributes),
         topology_(topology),
         primitive_count_(primitive_count),
+        device_data_(device_data),
         vertex_offset_(vertex_offset),
-        vertex_counts_(vertex_counts),
-        vk_topology_(vk_topology) {
-    data_.assign(data, data + data_size);
+        cuda_stream_(cuda_stream) {
+    size_t required_data_size;
+    switch (topology) {
+      case PrimitiveTopology::POINT_LIST:
+        required_data_size = primitive_count * 2;
+        vertex_counts_.push_back(required_data_size / 2);
+        vk_topology_ = vk::PrimitiveTopology::ePointList;
+        break;
+      case PrimitiveTopology::LINE_LIST:
+        required_data_size = primitive_count * 2 * 2;
+        vertex_counts_.push_back(required_data_size / 2);
+        vk_topology_ = vk::PrimitiveTopology::eLineList;
+        break;
+      case PrimitiveTopology::LINE_STRIP:
+        required_data_size = 2 + primitive_count * 2;
+        vertex_counts_.push_back(required_data_size / 2);
+        vk_topology_ = vk::PrimitiveTopology::eLineStrip;
+        break;
+      case PrimitiveTopology::TRIANGLE_LIST:
+        required_data_size = primitive_count * 3 * 2;
+        vertex_counts_.push_back(required_data_size / 2);
+        vk_topology_ = vk::PrimitiveTopology::eTriangleList;
+        break;
+      case PrimitiveTopology::CROSS_LIST:
+        required_data_size = primitive_count * 3;
+        vertex_counts_.push_back(primitive_count * 4);
+        vk_topology_ = vk::PrimitiveTopology::eLineList;
+        break;
+      case PrimitiveTopology::RECTANGLE_LIST:
+        required_data_size = primitive_count * 2 * 2;
+        for (uint32_t i = 0; i < primitive_count; ++i) { vertex_counts_.push_back(5); }
+        vk_topology_ = vk::PrimitiveTopology::eLineStrip;
+        break;
+      case PrimitiveTopology::OVAL_LIST:
+        required_data_size = primitive_count * 4;
+        for (uint32_t i = 0; i < primitive_count; ++i) {
+          vertex_counts_.push_back(CIRCLE_SEGMENTS + 1);
+        }
+        vk_topology_ = vk::PrimitiveTopology::eLineStrip;
+        break;
+      case PrimitiveTopology::POINT_LIST_3D:
+        required_data_size = primitive_count * 3;
+        vertex_counts_.push_back(required_data_size / 3);
+        vk_topology_ = vk::PrimitiveTopology::ePointList;
+        break;
+      case PrimitiveTopology::LINE_LIST_3D:
+        required_data_size = primitive_count * 2 * 3;
+        vertex_counts_.push_back(required_data_size / 3);
+        vk_topology_ = vk::PrimitiveTopology::eLineList;
+        break;
+      case PrimitiveTopology::LINE_STRIP_3D:
+        required_data_size = 3 + primitive_count * 3;
+        vertex_counts_.push_back(required_data_size / 3);
+        vk_topology_ = vk::PrimitiveTopology::eLineStrip;
+        break;
+      case PrimitiveTopology::TRIANGLE_LIST_3D:
+        required_data_size = primitive_count * 3 * 3;
+        vertex_counts_.push_back(required_data_size / 3);
+        vk_topology_ = vk::PrimitiveTopology::eTriangleList;
+        break;
+    }
+
+    if (data_size < required_data_size) {
+      std::stringstream buf;
+      buf << "Required data array size is " << required_data_size << " but only " << data_size
+          << " where specified";
+      throw std::runtime_error(buf.str().c_str());
+    }
+
+    if (host_data) { host_data_.assign(host_data, host_data + required_data_size); }
+    data_size_ = required_data_size;
   }
   Primitive() = delete;
 
@@ -87,20 +155,28 @@ class Primitive {
   }
 
   bool operator==(const Primitive& rhs) const {
+    // we can reuse if the attributes, topology and primitive count match and
+    // if we did not switch from host to device memory and vice versa
     return ((attributes_ == rhs.attributes_) && (topology_ == rhs.topology_) &&
-            (primitive_count_ == rhs.primitive_count_) && (data_ == rhs.data_));
+            (primitive_count_ == rhs.primitive_count_) &&
+            ((host_data_.empty() && !device_data_) ||
+             (((!host_data_.empty()) == (rhs.device_data_ == 0)) &&
+              ((device_data_ != 0) == (rhs.host_data_.empty())))));
   }
 
   const Attributes attributes_;
 
   const PrimitiveTopology topology_;
   const uint32_t primitive_count_;
-  std::vector<float> data_;
+  std::vector<float> host_data_;
+  CUdeviceptr device_data_;
+  CUstream cuda_stream_ = 0;
 
   // internal state
-  const uint32_t vertex_offset_;
-  const std::vector<uint32_t> vertex_counts_;
-  const vk::PrimitiveTopology vk_topology_;
+  const uint32_t vertex_offset_;  ///< vertex start offset (in units of 3 * float)
+  size_t data_size_;              ///< size of input data
+  std::vector<uint32_t> vertex_counts_;
+  vk::PrimitiveTopology vk_topology_;
 };
 
 class Text {
@@ -169,11 +245,19 @@ class GeometryLayer::Impl {
       // Data will be uploaded when drawing regardless if the layer is reused or not
       /// @todo this should be made explicit, first check if the layer can be reused and then
       ///     update the reused layer with these properties below which don't prevent reusing
-      auto it = other.depth_maps_.begin();
+      auto depth_map_it = other.depth_maps_.begin();
       for (auto&& depth_map : depth_maps_) {
-        it->depth_device_ptr_ = depth_map.depth_device_ptr_;
-        it->color_device_ptr_ = depth_map.color_device_ptr_;
-        it->cuda_stream_ = depth_map.cuda_stream_;
+        depth_map_it->depth_device_ptr_ = depth_map.depth_device_ptr_;
+        depth_map_it->color_device_ptr_ = depth_map.color_device_ptr_;
+        depth_map_it->cuda_stream_ = depth_map.cuda_stream_;
+        ++depth_map_it;
+      }
+      auto primitive_it = other.primitives_.begin();
+      for (auto&& primitive : primitives_) {
+        primitive_it->host_data_ = primitive.host_data_;
+        primitive_it->device_data_ = primitive.device_data_;
+        primitive_it->cuda_stream_ = primitive.cuda_stream_;
+        ++primitive_it;
       }
       return true;
     }
@@ -226,86 +310,34 @@ void GeometryLayer::primitive(PrimitiveTopology topology, uint32_t primitive_cou
   if (data_size == 0) { throw std::invalid_argument("data_size should not be zero"); }
   if (data == nullptr) { throw std::invalid_argument("data should not be nullptr"); }
 
-  uint32_t required_data_size;
-  std::vector<uint32_t> vertex_counts;
-  vk::PrimitiveTopology vkTopology;
-  switch (topology) {
-    case PrimitiveTopology::POINT_LIST:
-      required_data_size = primitive_count * 2;
-      vertex_counts.push_back(required_data_size / 2);
-      vkTopology = vk::PrimitiveTopology::ePointList;
-      break;
-    case PrimitiveTopology::LINE_LIST:
-      required_data_size = primitive_count * 2 * 2;
-      vertex_counts.push_back(required_data_size / 2);
-      vkTopology = vk::PrimitiveTopology::eLineList;
-      break;
-    case PrimitiveTopology::LINE_STRIP:
-      required_data_size = 2 + primitive_count * 2;
-      vertex_counts.push_back(required_data_size / 2);
-      vkTopology = vk::PrimitiveTopology::eLineStrip;
-      break;
-    case PrimitiveTopology::TRIANGLE_LIST:
-      required_data_size = primitive_count * 3 * 2;
-      vertex_counts.push_back(required_data_size / 2);
-      vkTopology = vk::PrimitiveTopology::eTriangleList;
-      break;
-    case PrimitiveTopology::CROSS_LIST:
-      required_data_size = primitive_count * 3;
-      vertex_counts.push_back(primitive_count * 4);
-      vkTopology = vk::PrimitiveTopology::eLineList;
-      break;
-    case PrimitiveTopology::RECTANGLE_LIST:
-      required_data_size = primitive_count * 2 * 2;
-      for (uint32_t i = 0; i < primitive_count; ++i) { vertex_counts.push_back(5); }
-      vkTopology = vk::PrimitiveTopology::eLineStrip;
-      break;
-    case PrimitiveTopology::OVAL_LIST:
-      required_data_size = primitive_count * 4;
-      for (uint32_t i = 0; i < primitive_count; ++i) {
-        vertex_counts.push_back(CIRCLE_SEGMENTS + 1);
-      }
-      vkTopology = vk::PrimitiveTopology::eLineStrip;
-      break;
-    case PrimitiveTopology::POINT_LIST_3D:
-      required_data_size = primitive_count * 3;
-      vertex_counts.push_back(required_data_size / 3);
-      vkTopology = vk::PrimitiveTopology::ePointList;
-      break;
-    case PrimitiveTopology::LINE_LIST_3D:
-      required_data_size = primitive_count * 2 * 3;
-      vertex_counts.push_back(required_data_size / 3);
-      vkTopology = vk::PrimitiveTopology::eLineList;
-      break;
-    case PrimitiveTopology::LINE_STRIP_3D:
-      required_data_size = 3 + primitive_count * 3;
-      vertex_counts.push_back(required_data_size / 3);
-      vkTopology = vk::PrimitiveTopology::eLineStrip;
-      break;
-    case PrimitiveTopology::TRIANGLE_LIST_3D:
-      required_data_size = primitive_count * 3 * 3;
-      vertex_counts.push_back(required_data_size / 3);
-      vkTopology = vk::PrimitiveTopology::eTriangleList;
-      break;
-  }
-
-  if (data_size < required_data_size) {
-    std::stringstream buf;
-    buf << "Required data array size is " << required_data_size << " but only " << data_size
-        << " where specified";
-    throw std::runtime_error(buf.str().c_str());
-  }
+  const auto& primitive = impl_->primitives_.emplace_back(impl_->attributes_,
+                                                          topology,
+                                                          primitive_count,
+                                                          data_size,
+                                                          data,
+                                                          0,
+                                                          impl_->vertex_count_,
+                                                          Context::get().get_cuda_stream());
 
-  impl_->primitives_.emplace_back(impl_->attributes_,
-                                  topology,
-                                  primitive_count,
-                                  data_size,
-                                  data,
-                                  impl_->vertex_count_,
-                                  vertex_counts,
-                                  vkTopology);
+  for (auto&& vertex_count : primitive.vertex_counts_) { impl_->vertex_count_ += vertex_count; }
+}
 
-  for (auto&& vertex_count : vertex_counts) { impl_->vertex_count_ += vertex_count; }
+void GeometryLayer::primitive_cuda_device(PrimitiveTopology topology, uint32_t primitive_count,
+                                          size_t data_size, CUdeviceptr data) {
+  if (primitive_count == 0) { throw std::invalid_argument("primitive_count should not be zero"); }
+  if (data_size == 0) { throw std::invalid_argument("data_size should not be zero"); }
+  if (data == 0) { throw std::invalid_argument("data should not be 0"); }
+
+  const auto& primitive = impl_->primitives_.emplace_back(impl_->attributes_,
+                                                          topology,
+                                                          primitive_count,
+                                                          data_size,
+                                                          nullptr,
+                                                          data,
+                                                          impl_->vertex_count_,
+                                                          Context::get().get_cuda_stream());
+
+  for (auto&& vertex_count : primitive.vertex_counts_) { impl_->vertex_count_ += vertex_count; }
 }
 
 void GeometryLayer::text(float x, float y, float size, const char* text) {
@@ -349,7 +381,7 @@ bool GeometryLayer::can_be_reused(Layer& other) const {
 }
 
 void GeometryLayer::end(Vulkan* vulkan) {
-  // if the aspect ratio changed, re-create the text and primitive buffers because the generated
+  // if the aspect ratio changed, re-create the text buffers because the generated
   // vertex positions depend on the aspect ratio
   if (impl_->aspect_ratio_ != vulkan->get_window()->get_aspect_ratio()) {
     impl_->aspect_ratio_ = vulkan->get_window()->get_aspect_ratio();
@@ -357,84 +389,56 @@ void GeometryLayer::end(Vulkan* vulkan) {
     impl_->text_draw_list_.reset();
     impl_->text_vertex_buffer_.reset();
     impl_->text_index_buffer_.reset();
-
-    // only crosses depend on the aspect ratio
-    bool has_crosses = false;
-    for (auto&& primitive : impl_->primitives_) {
-      if (primitive.topology_ == PrimitiveTopology::CROSS_LIST) {
-        has_crosses = true;
-        break;
-      }
-    }
-    if (has_crosses) { impl_->vertex_buffer_.reset(); }
   }
 
   if (!impl_->primitives_.empty()) {
     if (!impl_->vertex_buffer_) {
-      // setup the vertex buffer
-      std::vector<float> vertices;
-      vertices.reserve(impl_->vertex_count_ * 3);
-
-      for (auto&& primitive : impl_->primitives_) {
-        switch (primitive.topology_) {
-          case PrimitiveTopology::POINT_LIST:
-          case PrimitiveTopology::LINE_LIST:
-          case PrimitiveTopology::LINE_STRIP:
-          case PrimitiveTopology::TRIANGLE_LIST:
-            // just copy
-            for (uint32_t index = 0; index < primitive.data_.size() / 2; ++index) {
-              vertices.insert(
-                  vertices.end(),
-                  {primitive.data_[index * 2 + 0], primitive.data_[index * 2 + 1], 0.F});
-            }
-            break;
-          case PrimitiveTopology::CROSS_LIST:
-            // generate crosses
-            for (uint32_t index = 0; index < primitive.primitive_count_; ++index) {
-              const float x = primitive.data_[index * 3 + 0];
-              const float y = primitive.data_[index * 3 + 1];
-              const float sy = primitive.data_[index * 3 + 2] * 0.5F;
-              const float sx = sy / impl_->aspect_ratio_;
-              vertices.insert(vertices.end(),
-                              {x - sx, y, 0.F, x + sx, y, 0.F, x, y - sy, 0.F, x, y + sy, 0.F});
-            }
-            break;
-          case PrimitiveTopology::RECTANGLE_LIST:
-            // generate rectangles
-            for (uint32_t index = 0; index < primitive.primitive_count_; ++index) {
-              const float x0 = primitive.data_[index * 4 + 0];
-              const float y0 = primitive.data_[index * 4 + 1];
-              const float x1 = primitive.data_[index * 4 + 2];
-              const float y1 = primitive.data_[index * 4 + 3];
-              vertices.insert(vertices.end(),
-                              {x0, y0, 0.F, x1, y0, 0.F, x1, y1, 0.F, x0, y1, 0.F, x0, y0, 0.F});
-            }
-            break;
-          case PrimitiveTopology::OVAL_LIST:
-            for (uint32_t index = 0; index < primitive.primitive_count_; ++index) {
-              const float x = primitive.data_[index * 4 + 0];
-              const float y = primitive.data_[index * 4 + 1];
-              const float rx = primitive.data_[index * 4 + 2] * 0.5F;
-              const float ry = primitive.data_[index * 4 + 3] * 0.5F;
-              for (uint32_t segment = 0; segment <= CIRCLE_SEGMENTS; ++segment) {
-                const float rad = (2.F * M_PI) / CIRCLE_SEGMENTS * segment;
-                const float px = x + std::cos(rad) * rx;
-                const float py = y + std::sin(rad) * ry;
-                vertices.insert(vertices.end(), {px, py, 0.F});
-              }
-            }
-            break;
-          case PrimitiveTopology::POINT_LIST_3D:
-          case PrimitiveTopology::LINE_LIST_3D:
-          case PrimitiveTopology::LINE_STRIP_3D:
-          case PrimitiveTopology::TRIANGLE_LIST_3D:
-            vertices.insert(vertices.end(), primitive.data_.begin(), primitive.data_.end());
-            break;
-        }
+      // allocate the vertex buffer
+      impl_->vertex_buffer_ = vulkan->create_buffer_for_cuda_interop(
+          impl_->vertex_count_ * 3 * sizeof(float), vk::BufferUsageFlagBits::eVertexBuffer);
+    }
+
+    // generate vertex data
+    CudaService* const cuda_service = vulkan->get_cuda_service();
+    const CudaService::ScopedPush cuda_context = cuda_service->PushContext();
+
+    for (auto&& primitive : impl_->primitives_) {
+      // select the stream to be used by CUDA operations
+      const CUstream stream = cuda_service->select_cuda_stream(primitive.cuda_stream_);
+      impl_->vertex_buffer_->begin_access_with_cuda(stream);
+
+      UniqueAsyncCUdeviceptr tmp_src_device_ptr;
+      CUdeviceptr src_device_ptr;
+
+      // if the data is on host, allocate temporary memory and copy the data to it
+      if (!primitive.host_data_.empty()) {
+        const size_t size = primitive.data_size_ * sizeof(float);
+        tmp_src_device_ptr.reset([size, stream] {
+          CUdeviceptr device_ptr;
+          CudaCheck(cuMemAllocAsync(&device_ptr, size, stream));
+          return std::pair<CUdeviceptr, CUstream>(device_ptr, stream);
+        }());
+        src_device_ptr = tmp_src_device_ptr.get().first;
+        // copy from host to device
+        CudaCheck(cuMemcpyHtoDAsync(src_device_ptr,
+                                    reinterpret_cast<const float*>(primitive.host_data_.data()),
+                                    size,
+                                    stream));
+      } else {
+        src_device_ptr = primitive.device_data_;
       }
 
-      impl_->vertex_buffer_ = vulkan->create_buffer(
-          vertices.size() * sizeof(float), vertices.data(), vk::BufferUsageFlagBits::eVertexBuffer);
+      // generate vertex data
+      gen_primitive_vertices(
+          primitive.topology_,
+          primitive.primitive_count_,
+          primitive.vertex_counts_,
+          impl_->aspect_ratio_,
+          src_device_ptr,
+          impl_->vertex_buffer_->device_ptr_.get() + primitive.vertex_offset_ * sizeof(float) * 3,
+          stream);
+
+      impl_->vertex_buffer_->end_access_with_cuda(stream);
     }
   }
 
diff --git a/modules/holoviz/src/layers/geometry_layer.hpp b/modules/holoviz/src/layers/geometry_layer.hpp
index 48cfe358..4942baca 100644
--- a/modules/holoviz/src/layers/geometry_layer.hpp
+++ b/modules/holoviz/src/layers/geometry_layer.hpp
@@ -79,6 +79,18 @@ class GeometryLayer : public Layer {
   void primitive(PrimitiveTopology topology, uint32_t primitive_count, size_t data_size,
                  const float* data);
 
+  /**
+   * Draw a geometric primitive, source is CUDA device memory.
+   *
+   * @param topology          primitive topology
+   * @param primitive_count   primitive count
+   * @param data_size         size of the data array in floats
+   * @param data              CUDA device memory pointer to data, the format and size of the array
+   *                          depends on the primitive count and topology
+   */
+  void primitive_cuda_device(PrimitiveTopology topology, uint32_t primitive_count, size_t data_size,
+                             CUdeviceptr data);
+
   /**
    * Draw text.
    *
diff --git a/modules/holoviz/src/layers/im_gui_layer.cpp b/modules/holoviz/src/layers/im_gui_layer.cpp
index fa2f64be..eac62693 100644
--- a/modules/holoviz/src/layers/im_gui_layer.cpp
+++ b/modules/holoviz/src/layers/im_gui_layer.cpp
@@ -56,8 +56,8 @@ void ImGuiLayer::end(Vulkan* vulkan) {
   // nothing to do if there are no vertices
   if (impl_->draw_data_->TotalVtxCount > 0) {
     // copy all vertex and index data to one host buffer
-    std::unique_ptr<ImDrawVert> vertex_data(new ImDrawVert[impl_->draw_data_->TotalVtxCount]);
-    std::unique_ptr<ImDrawIdx> index_data(new ImDrawIdx[impl_->draw_data_->TotalIdxCount]);
+    std::unique_ptr<ImDrawVert[]> vertex_data(new ImDrawVert[impl_->draw_data_->TotalVtxCount]);
+    std::unique_ptr<ImDrawIdx[]> index_data(new ImDrawIdx[impl_->draw_data_->TotalIdxCount]);
 
     ImDrawVert* vertex = vertex_data.get();
     ImDrawIdx* index = index_data.get();
diff --git a/modules/holoviz/src/vulkan/buffer.cpp b/modules/holoviz/src/vulkan/buffer.cpp
index 1d700e86..c364c2ca 100644
--- a/modules/holoviz/src/vulkan/buffer.cpp
+++ b/modules/holoviz/src/vulkan/buffer.cpp
@@ -27,14 +27,16 @@ Buffer::Buffer(Vulkan* vulkan, nvvk::ResourceAllocator* alloc, size_t size)
     : Resource(vulkan, alloc), size_(size) {}
 
 Buffer::~Buffer() {
-  wait();
-
-  // check if this buffer had been imported to CUDA
-  if (device_ptr_) {
-    const CudaService::ScopedPush cuda_context = vulkan_->get_cuda_service()->PushContext();
-    device_ptr_.reset();
-  }
-  alloc_->destroy(buffer_);
+  try {
+    wait();
+
+    // check if this buffer had been imported to CUDA
+    if (device_ptr_) {
+      const CudaService::ScopedPush cuda_context = vulkan_->get_cuda_service()->PushContext();
+      device_ptr_.reset();
+    }
+    alloc_->destroy(buffer_);
+  } catch (const std::exception& e) {}  // ignore potential exceptions
 }
 
 void Buffer::import_to_cuda(const std::unique_ptr<CudaService>& cuda_service) {
diff --git a/modules/holoviz/src/vulkan/texture.cpp b/modules/holoviz/src/vulkan/texture.cpp
index 8e89fa31..77036289 100644
--- a/modules/holoviz/src/vulkan/texture.cpp
+++ b/modules/holoviz/src/vulkan/texture.cpp
@@ -31,14 +31,16 @@ Texture::Texture(Vulkan* vulkan, nvvk::ResourceAllocator* alloc, uint32_t width,
     : Resource(vulkan, alloc), width_(width), height_(height), format_(format) {}
 
 Texture::~Texture() {
-  wait();
+  try {
+    wait();
 
-  // check if this texture had been imported to CUDA
-  if (!mipmaps_.empty()) {
-    const CudaService::ScopedPush cuda_context = vulkan_->get_cuda_service()->PushContext();
-    mipmaps_.clear();
-  }
-  alloc_->destroy(texture_);
+    // check if this texture had been imported to CUDA
+    if (!mipmaps_.empty()) {
+      const CudaService::ScopedPush cuda_context = vulkan_->get_cuda_service()->PushContext();
+      mipmaps_.clear();
+    }
+    alloc_->destroy(texture_);
+  } catch (const std::exception& e) {}  // ignore potential exceptions
 }
 
 void Texture::import_to_cuda(const std::unique_ptr<CudaService>& cuda_service) {
@@ -142,20 +144,14 @@ void Texture::upload(CUstream ext_stream, const std::array<CUdeviceptr, 3>& devi
     if (!device_ptr[plane]) { break; }
 
     uint32_t channels, hw_channels, component_size, width_divisor, height_divisior;
-    format_info(format_,
-                &channels,
-                &hw_channels,
-                &component_size,
-                &width_divisor,
-                &height_divisior,
-                plane);
+    format_info(
+        format_, &channels, &hw_channels, &component_size, &width_divisor, &height_divisior, plane);
 
     // the width and height might be different for each plane for Y'CbCr formats
     const uint32_t width = width_ / width_divisor;
     const uint32_t height = height_ / height_divisior;
 
-    size_t src_pitch =
-        row_pitch[plane] != 0 ? row_pitch[plane] : width * channels * component_size;
+    size_t src_pitch = row_pitch[plane] != 0 ? row_pitch[plane] : width * channels * component_size;
 
     if (!mipmaps_.empty()) {
       // direct upload to CUDA imported Vulkan texture by copying to CUDA array
diff --git a/modules/holoviz/src/vulkan/vulkan_app.cpp b/modules/holoviz/src/vulkan/vulkan_app.cpp
index 4ad466a3..951be8d8 100644
--- a/modules/holoviz/src/vulkan/vulkan_app.cpp
+++ b/modules/holoviz/src/vulkan/vulkan_app.cpp
@@ -221,6 +221,7 @@ class Vulkan::Impl {
     bool export_alloc_dedicated_initialized_ = false;
 
     nvvk::BatchSubmission batch_submission_;
+    nvvk::BatchSubmission transfer_batch_submission_;
 
     nvvk::CommandPool transfer_cmd_pool_;
     bool transfer_cmd_pool_initialized_ = false;
@@ -444,6 +445,7 @@ void Vulkan::Impl::setup(Window* window, const std::string& font_path, float fon
 
   // init batch submission
   nvvk_.batch_submission_.init(nvvk_.vk_ctx_.m_queueGCT);
+  nvvk_.transfer_batch_submission_.init(nvvk_.vk_ctx_.m_queueT);
 
   // init command pool
   nvvk_.transfer_cmd_pool_.init(device_, nvvk_.vk_ctx_.m_queueT.familyIndex);
@@ -733,13 +735,18 @@ void Vulkan::Impl::end_transfer_pass() {
   // associates all current staging resources with the transfer fence
   nvvk_.alloc_.finalizeStaging(transfer_job.fence_.get());
 
+  // add the command buffer to the batch submission
+  nvvk_.transfer_batch_submission_.enqueue(transfer_job.cmd_buffer_);
+
+  // signal the transfer job semaphore on completion
+  nvvk_.transfer_batch_submission_.enqueueSignal(transfer_job.semaphore_.get());
+
   // submit staged transfers
-  vk::SubmitInfo submit_info;
-  submit_info.commandBufferCount = 1;
-  submit_info.pCommandBuffers = &transfer_job.cmd_buffer_;
-  submit_info.signalSemaphoreCount = 1;
-  submit_info.pSignalSemaphores = &transfer_job.semaphore_.get();
-  queue_t_.submit(submit_info, transfer_job.fence_.get());
+  const vk::Result result =
+      vk::Result(nvvk_.transfer_batch_submission_.execute(transfer_job.fence_.get(), 0b0000'0001));
+  if (result != vk::Result::eSuccess) {
+    vk::throwResultException(result, "Failed to execute batch submission");
+  }
 
   // next graphics submission must wait for transfer completion
   nvvk_.batch_submission_.enqueueWait(transfer_job.semaphore_.get(),
@@ -890,7 +897,7 @@ void Vulkan::Impl::submit_frame() {
   const vk::Result result =
       vk::Result(nvvk_.batch_submission_.execute(wait_fences_[image_index].get(), 0b0000'0001));
   if (result != vk::Result::eSuccess) {
-    vk::throwResultException(result, "Failed to execute bach submission");
+    vk::throwResultException(result, "Failed to execute batch submission");
   }
 
   // Presenting frame
@@ -1645,6 +1652,8 @@ void Vulkan::Impl::upload_to_texture(Texture* texture, const std::array<Buffer*,
     }
     image_subresource_layers.layerCount = 1;
 
+    buffers[plane]->access_with_vulkan(nvvk_.transfer_batch_submission_);
+
     vk::BufferImageCopy buffer_image_copy;
     buffer_image_copy.imageSubresource = image_subresource_layers;
     buffer_image_copy.imageExtent = vk::Extent3D{width, height, 1};
@@ -2187,7 +2196,7 @@ void Vulkan::Impl::read_framebuffer(Vulkan* vulkan, ImageFormat fmt, uint32_t wi
   const vk::Result result =
       vk::Result(nvvk_.batch_submission_.execute(read_job.fence_.get(), 0b0000'0001));
   if (result != vk::Result::eSuccess) {
-    vk::throwResultException(result, "Failed to execute bach submission");
+    vk::throwResultException(result, "Failed to execute batch submission");
   }
 
   // copy the buffer to CUDA memory
diff --git a/modules/holoviz/src/vulkan/vulkan_app.hpp b/modules/holoviz/src/vulkan/vulkan_app.hpp
index 5c64d6e2..e13b29e0 100644
--- a/modules/holoviz/src/vulkan/vulkan_app.hpp
+++ b/modules/holoviz/src/vulkan/vulkan_app.hpp
@@ -161,12 +161,15 @@ class Vulkan {
     bool normalized_ = true;     //< if true, then texture coordinates are normalize (0...1), else
                                  //< (0...width, 0...height)
     bool cuda_interop_ = false;  //< used for interop with CUDA
-    vk::SamplerYcbcrModelConversion ycbcr_model_conversion_;  ///< YCbCr model conversion
-    vk::SamplerYcbcrRange ycbcr_range_;                       ///< YCbCR range
-    vk::ChromaLocation x_chroma_location_;  ///< chroma location in x direction for formats which
-                                            ///< are chroma downsampled in width (420 and 422)
-    vk::ChromaLocation y_chroma_location_;  ///< chroma location in y direction for formats which
-                                            ///< are chroma downsampled in height (420)
+    vk::SamplerYcbcrModelConversion ycbcr_model_conversion_ =
+        vk::SamplerYcbcrModelConversion::eYcbcr601;  ///< YCbCr model conversion
+    vk::SamplerYcbcrRange ycbcr_range_ = vk::SamplerYcbcrRange::eItuFull;  ///< YCbCR range
+    vk::ChromaLocation x_chroma_location_ =
+        vk::ChromaLocation::eCositedEven;  ///< chroma location in x direction for formats which
+                                           ///< are chroma downsampled in width (420 and 422)
+    vk::ChromaLocation y_chroma_location_ =
+        vk::ChromaLocation::eCositedEven;  ///< chroma location in y direction for formats which
+                                           ///< are chroma downsampled in height (420)
   };
 
   /**
diff --git a/modules/holoviz/tests/functional/geometry_layer_test.cpp b/modules/holoviz/tests/functional/geometry_layer_test.cpp
index b44ba8b7..b617d201 100644
--- a/modules/holoviz/tests/functional/geometry_layer_test.cpp
+++ b/modules/holoviz/tests/functional/geometry_layer_test.cpp
@@ -58,12 +58,16 @@ std::ostream& operator<<(std::ostream& os, const PrimitiveTopology& topology) {
 
 }  // namespace holoscan::viz
 
+enum class Source { HOST, CUDA_DEVICE };
+
 // Fixture that initializes Holoviz
-class PrimitiveTopology : public TestHeadless,
-                          public testing::WithParamInterface<viz::PrimitiveTopology> {};
+class PrimitiveTopology
+    : public TestHeadless,
+      public testing::WithParamInterface<std::tuple<viz::PrimitiveTopology, Source>> {};
 
 TEST_P(PrimitiveTopology, Primitive) {
-  const viz::PrimitiveTopology topology = GetParam();
+  const viz::PrimitiveTopology topology = std::get<0>(GetParam());
+  const Source source = std::get<1>(GetParam());
 
   std::vector<uint32_t> color_crc, depth_crc;
   uint32_t primitive_count;
@@ -273,21 +277,47 @@ TEST_P(PrimitiveTopology, Primitive) {
 
   EXPECT_NO_THROW(viz::Begin());
 
-  EXPECT_NO_THROW(viz::BeginGeometryLayer());
+  viz::CudaService::ScopedPush cuda_context;
+  viz::UniqueCUdeviceptr device_ptr;
+
+  viz::CudaService cuda_service(0);
+
+  if (source == Source::CUDA_DEVICE) {
+    cuda_context = cuda_service.PushContext();
+    device_ptr.reset([size = data.size() * sizeof(float)] {
+      CUdeviceptr device_ptr;
+      EXPECT_EQ(cuMemAlloc(&device_ptr, size), CUDA_SUCCESS);
+      return device_ptr;
+    }());
+
+    EXPECT_EQ(cuMemcpyHtoD(device_ptr.get(), data.data(), data.size() * sizeof(float)),
+              CUDA_SUCCESS);
+  }
 
   for (uint32_t i = 0; i < 3; ++i) {
-    if (i == 1) {
-      EXPECT_NO_THROW(viz::Color(1.F, 0.5F, 0.25F, 0.75F));
-    } else if (i == 2) {
+    EXPECT_NO_THROW(viz::BeginGeometryLayer());
+
+    if (i != 0) { EXPECT_NO_THROW(viz::Color(1.F, 0.5F, 0.25F, 0.75F)); }
+    if (i == 2) {
       EXPECT_NO_THROW(viz::PointSize(4.F));
       EXPECT_NO_THROW(viz::LineWidth(3.F));
     }
 
-    EXPECT_NO_THROW(viz::Primitive(topology, primitive_count, data.size(), data.data()));
+    if (source == Source::CUDA_DEVICE) {
+      EXPECT_NO_THROW(
+          viz::PrimitiveCudaDevice(topology, primitive_count, data.size(), device_ptr.get()));
+    } else {
+      EXPECT_NO_THROW(viz::Primitive(topology, primitive_count, data.size(), data.data()));
+    }
+
+    EXPECT_NO_THROW(viz::EndLayer());
 
     for (auto&& item : data) { item += 0.1F; }
+    if (source == Source::CUDA_DEVICE) {
+      EXPECT_EQ(cuMemcpyHtoD(device_ptr.get(), data.data(), data.size() * sizeof(float)),
+                CUDA_SUCCESS);
+    }
   }
-  EXPECT_NO_THROW(viz::EndLayer());
 
   EXPECT_NO_THROW(viz::End());
 
@@ -297,12 +327,14 @@ TEST_P(PrimitiveTopology, Primitive) {
 
 INSTANTIATE_TEST_SUITE_P(
     GeometryLayer, PrimitiveTopology,
-    testing::Values(viz::PrimitiveTopology::POINT_LIST, viz::PrimitiveTopology::LINE_LIST,
-                    viz::PrimitiveTopology::LINE_STRIP, viz::PrimitiveTopology::TRIANGLE_LIST,
-                    viz::PrimitiveTopology::CROSS_LIST, viz::PrimitiveTopology::RECTANGLE_LIST,
-                    viz::PrimitiveTopology::OVAL_LIST, viz::PrimitiveTopology::POINT_LIST_3D,
-                    viz::PrimitiveTopology::LINE_LIST_3D, viz::PrimitiveTopology::LINE_STRIP_3D,
-                    viz::PrimitiveTopology::TRIANGLE_LIST_3D));
+    testing::Combine(
+        testing::Values(viz::PrimitiveTopology::POINT_LIST, viz::PrimitiveTopology::LINE_LIST,
+                        viz::PrimitiveTopology::LINE_STRIP, viz::PrimitiveTopology::TRIANGLE_LIST,
+                        viz::PrimitiveTopology::CROSS_LIST, viz::PrimitiveTopology::RECTANGLE_LIST,
+                        viz::PrimitiveTopology::OVAL_LIST, viz::PrimitiveTopology::POINT_LIST_3D,
+                        viz::PrimitiveTopology::LINE_LIST_3D, viz::PrimitiveTopology::LINE_STRIP_3D,
+                        viz::PrimitiveTopology::TRIANGLE_LIST_3D),
+        testing::Values(Source::HOST, Source::CUDA_DEVICE)));
 
 // Fixture that initializes Holoviz
 class GeometryLayer : public TestHeadless {};
diff --git a/modules/holoviz/tests/functional/image_layer_test.cpp b/modules/holoviz/tests/functional/image_layer_test.cpp
index b2c63b16..68b0068d 100644
--- a/modules/holoviz/tests/functional/image_layer_test.cpp
+++ b/modules/holoviz/tests/functional/image_layer_test.cpp
@@ -269,17 +269,6 @@ TEST_P(ImageLayer, Image) {
     SetupData(color_format);
     SetupData(depth_format);
   } else if (is_yuv) {
-    // Skip test on iGPU, there is a Vulkan driver issue. The test fails on the first run only, the
-    // second run (within the same container) passes. The Vulkan driver has a shader cache, if the
-    // shader for the YUV format exists, the test passes, if the shader is not in the cache it
-    // fails.
-    CUdevice device = 0;
-    ASSERT_EQ(cuDeviceGet(&device, 0), CUDA_SUCCESS);
-    int is_integrated = false;
-    ASSERT_EQ(cuDeviceGetAttribute(&is_integrated, CU_DEVICE_ATTRIBUTE_INTEGRATED, device),
-              CUDA_SUCCESS);
-    if (is_integrated) { GTEST_SKIP() << "YUV tests fail on integrated devices, test skipped"; }
-
     color_format = image_format;
 
     // create a smooth RGB pattern so we don't need to deal with linear chroma filtering when
diff --git a/modules/holoviz/tests/functional/init_test.cpp b/modules/holoviz/tests/functional/init_test.cpp
index 60632086..fde7620a 100644
--- a/modules/holoviz/tests/functional/init_test.cpp
+++ b/modules/holoviz/tests/functional/init_test.cpp
@@ -21,6 +21,9 @@
 #include <GLFW/glfw3.h>
 #include <stdlib.h>
 
+#include <future>
+#include <vector>
+
 #include <holoviz/holoviz.hpp>
 
 namespace viz = holoscan::viz;
@@ -143,3 +146,20 @@ TEST(Init, Errors) {
       std::runtime_error);
   EXPECT_NO_THROW(viz::Shutdown());
 }
+
+TEST(Init, MultiThreaded) {
+  if (glfwInit() == GLFW_FALSE) {
+    const char* description;
+    int code = glfwGetError(&description);
+    ASSERT_EQ(code, GLFW_PLATFORM_UNAVAILABLE)
+        << "Expected `GLFW_PLATFORM_UNAVAILABLE` but got `" << code << "`: `" << description << "`";
+    GTEST_SKIP() << "No display server available, skipping test." << description;
+  }
+
+  // create multiple windows from threads
+  std::vector<std::future<void>> futures;
+  for (int i = 0; i < 8; ++i) {
+    futures.push_back(std::async(std::launch::async, [] { viz::Init(640, 480, "Holoviz test"); }));
+  }
+  for (auto&& future : futures) { future.wait(); }
+}
diff --git a/python/holoscan/CMakeLists.txt b/python/holoscan/CMakeLists.txt
index 3fa6bc2c..63cfd588 100644
--- a/python/holoscan/CMakeLists.txt
+++ b/python/holoscan/CMakeLists.txt
@@ -80,12 +80,12 @@ function(holoscan_pybind11_module pybind11_module_name)
 endfunction()
 
 # custom target for top-level __init__.py file is copied
-set(CMAKE_PYBIND11_PRIMARY_INIT_FILE ${CMAKE_CURRENT_LIST_DIR}/__init__.py)
-add_custom_target(holoscan-python-pyinit
-    COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_PYBIND11_PRIMARY_INIT_FILE}" "${HOLOSCAN_PYTHON_MODULE_BINARY_DIR}/"
-    DEPENDS "${CMAKE_PYBIND11_PRIMARY_INIT_FILE}"
+set(CMAKE_PYBIND11_PRIMARY_INIT_FILE ${CMAKE_CURRENT_LIST_DIR}/__init__.py.in)
+configure_file(
+    ${CMAKE_PYBIND11_PRIMARY_INIT_FILE}
+    ${HOLOSCAN_PYTHON_MODULE_BINARY_DIR}/__init__.py
+    @ONLY
 )
-add_dependencies(holoscan-python holoscan-python-pyinit)
 
 # custom target for top-level decorator.py file is copied
 set(CMAKE_PYBIND11_DECORATORS_PY_FILE ${CMAKE_CURRENT_LIST_DIR}/decorator.py)
diff --git a/python/holoscan/__init__.py b/python/holoscan/__init__.py.in
similarity index 98%
rename from python/holoscan/__init__.py
rename to python/holoscan/__init__.py.in
index 8bd0dfe4..fc16204e 100644
--- a/python/holoscan/__init__.py
+++ b/python/holoscan/__init__.py.in
@@ -25,7 +25,7 @@ def _set_version():
     try:
         return importlib.metadata.version("holoscan")
     except ImportError:
-        return "unknown version"
+        return "@holoscan_VERSION@" or "unknown version"
 
 
 __all__ = ["__version__", "as_tensor", "cli", "core", "gxf"]
diff --git a/python/holoscan/cli/common/artifact_sources.py b/python/holoscan/cli/common/artifact_sources.py
index e0693d57..7221c83d 100644
--- a/python/holoscan/cli/common/artifact_sources.py
+++ b/python/holoscan/cli/common/artifact_sources.py
@@ -20,7 +20,7 @@
 from typing import Any, Optional
 
 import requests
-from packaging.version import Version
+from packaging.version import InvalidVersion, Version
 
 from holoscan import __version__ as holoscan_version_string
 
@@ -41,12 +41,19 @@ class ArtifactSources:
     ManifestFileUrl = None
 
     def __init__(self) -> None:
-        ArtifactSources.HoloscanVersion = ".".join(
-            str(i) for i in Version(holoscan_version_string).release[0:3]
-        )
-        ArtifactSources.ManifestFileUrl = f"https://edge.urm.nvidia.com/artifactory/sw-holoscan-cli-generic/{ArtifactSources.HoloscanVersion}/artifacts.json"
         self._logger = logging.getLogger("common")
-        self._supported_holoscan_versions = ["2.6.0"]
+        self._supported_holoscan_versions = ["2.6.0", "2.7.0"]
+        try:
+            ArtifactSources.HoloscanVersion = ".".join(
+                str(i) for i in Version(holoscan_version_string).release[0:3]
+            )
+        except InvalidVersion as ex:
+            raise RuntimeError(
+                "Unable to detect Holoscan version. Use --sdk-version to specify "
+                "a Holoscan SDK version to use."
+            ) from ex
+
+        ArtifactSources.ManifestFileUrl = f"https://edge.urm.nvidia.com/artifactory/sw-holoscan-cli-generic/{ArtifactSources.HoloscanVersion}/artifacts.json"
 
     @property
     def holoscan_versions(self) -> list[str]:
diff --git a/python/holoscan/cli/common/constants.py b/python/holoscan/cli/common/constants.py
index 319e7097..6baed731 100644
--- a/python/holoscan/cli/common/constants.py
+++ b/python/holoscan/cli/common/constants.py
@@ -34,6 +34,7 @@ class DefaultValues:
     )  # A local directory used for storing Docker build cache
 
     HOLOSCAN_APP_DIR = Path("/opt/holoscan/app")  # Path to user's application
+    HOLOSCAN_LIB_DIR = Path("/opt/holoscan/lib")  # Path to user's application
     HOLOSCAN_CONFIG_PATH = Path("/var/holoscan/app.yaml")  # Path to the application config file
     HOLOSCAN_DOCS_DIR = Path("/opt/holoscan/docs")  # Path to documentation
     HOLOSCAN_LOGS_DIR = Path("/var/holoscan/logs")  # Path to application logs
diff --git a/python/holoscan/cli/common/sdk_utils.py b/python/holoscan/cli/common/sdk_utils.py
index a0fec094..b1d676a3 100644
--- a/python/holoscan/cli/common/sdk_utils.py
+++ b/python/holoscan/cli/common/sdk_utils.py
@@ -23,6 +23,8 @@
 
 from packaging.version import Version
 
+from holoscan import __version__ as holoscan_version_string
+
 from .artifact_sources import ArtifactSources
 from .enum_types import SdkType
 from .exceptions import FailedToDetectSDKVersionError, InvalidSdkError
@@ -51,7 +53,19 @@ def detect_sdk(sdk: Optional[SdkType] = None) -> SdkType:
 
     command = None
     try:
-        command = Path(sys.argv[0]).name.lower()
+        # For Python 3.10+, this check is to support use of the holoscan cli bash script bundled
+        # with the Debian package.
+        # Since the Debian package bundles with 3.10, we don't need to handle 3.9 but
+        # we still need to check if `orig_argv` is supported to avoid breaking unit test.
+        if (
+            getattr(sys, "orig_argv", None)
+            and len(sys.orig_argv) >= 3
+            and sys.orig_argv[0] == "python3"
+            and sys.orig_argv[2] == "holoscan.cli"
+        ):
+            command = "holoscan"
+        else:
+            command = Path(sys.argv[0]).name.lower()
         return SdkType(command)
     except Exception as ex:
         raise InvalidSdkError(f"Invalid SDK value provided: {command}") from ex
@@ -114,8 +128,7 @@ def detect_holoscan_version(
         return sdk_version.base_version
     else:
         try:
-            ver_str = importlib.metadata.version("holoscan").title()
-            ver = Version(ver_str)
+            ver = Version(holoscan_version_string)
             ver_str = ".".join(str(i) for i in ver.release)
 
             if len(ver.release) == 1 and ver.major == ver.release[0]:
diff --git a/python/holoscan/cli/holoscan b/python/holoscan/cli/holoscan
index 439182c0..bd5d02b9 100755
--- a/python/holoscan/cli/holoscan
+++ b/python/holoscan/cli/holoscan
@@ -6,7 +6,7 @@ while IFS= read -r req; do
     # Check if the dependency is installed
     python3 -c "import pkg_resources; pkg_resources.require(\"$req\")" &>/dev/null
     if [ $? -ne 0 ]; then
-        missing_deps+="$req "
+        missing_deps+="\"$req\" "
         echo "Missing dependency: $req" >&2
     fi
 done <<EOF
diff --git a/python/holoscan/cli/packager/arguments.py b/python/holoscan/cli/packager/arguments.py
index 63c9a2e0..9043dfb2 100644
--- a/python/holoscan/cli/packager/arguments.py
+++ b/python/holoscan/cli/packager/arguments.py
@@ -76,6 +76,7 @@ def __init__(self, args: Namespace, temp_dir: str) -> None:
         self.build_parameters.tarball_output = args.output
         self.build_parameters.cmake_args = args.cmake_args
         self.build_parameters.includes = args.includes
+        self.build_parameters.additional_libs = args.additional_libs
 
         models = Models()
         platform = Platform(self._artifact_sources)
diff --git a/python/holoscan/cli/packager/container_builder.py b/python/holoscan/cli/packager/container_builder.py
index c1f7c3a9..819b070a 100644
--- a/python/holoscan/cli/packager/container_builder.py
+++ b/python/holoscan/cli/packager/container_builder.py
@@ -56,6 +56,7 @@ def __init__(
         self._copy_application()
         self._copy_model_files()
         self._copy_docs()
+        self._copy_libs()
         _ = self._write_dockerignore()
         _ = self._copy_script()
 
@@ -239,6 +240,35 @@ def _copy_application(self):
         target_config_file_path = Path(os.path.join(self._temp_dir, "app.config"))
         shutil.copyfile(self._build_parameters.app_config_file_path, target_config_file_path)
 
+    def _copy_libs(self):
+        """
+        - Copy additional libraries to the temporary application directory.
+        - Stores all subdirectories from the copied libraries to the 'additional_lib_paths'
+          parameter that will be used to set the LD_LIBRARY_PATH or PYTHONPATH environment variable
+          in the Dockerfile.
+        """
+        if self._build_parameters.additional_libs is None:
+            return
+        target_libs_path = Path(os.path.join(self._temp_dir, "lib"))
+        if os.path.exists(target_libs_path):
+            shutil.rmtree(target_libs_path)
+
+        for lib_path in self._build_parameters.additional_libs:
+            self._logger.debug(
+                f"Copying additional libraries from {lib_path} to {target_libs_path}"
+            )
+            shutil.copytree(lib_path, target_libs_path, dirs_exist_ok=True)
+
+        subdirectories = [
+            os.path.join(
+                DefaultValues.HOLOSCAN_LIB_DIR,
+                os.path.join(root, subdir).replace(str(target_libs_path), "").lstrip("/"),
+            )
+            for root, dirs, _ in os.walk(target_libs_path)
+            for subdir in dirs
+        ]
+        self._build_parameters.additional_lib_paths = ":".join(subdirectories)
+
     def _copy_model_files(self):
         """Copy models to temporary location"""
         if self._build_parameters.models:
diff --git a/python/holoscan/cli/packager/package_command.py b/python/holoscan/cli/packager/package_command.py
index 67b06225..27efcc5d 100644
--- a/python/holoscan/cli/packager/package_command.py
+++ b/python/holoscan/cli/packager/package_command.py
@@ -79,6 +79,13 @@ def create_package_parser(
         help="target platform configuration for the build output. "
         f"Valid values: {str.join(', ', SDK.PLATFORM_CONFIGS)}.",
     )
+    parser.add_argument(
+        "--add",
+        action="append",
+        dest="additional_libs",
+        type=valid_existing_dir_path,
+        help="include additional library files, python files into the application directory.",
+    )
     parser.add_argument("--timeout", type=int, help="override default application timeout")
     parser.add_argument("--version", type=Version, help="set the version of the application")
 
diff --git a/python/holoscan/cli/packager/parameters.py b/python/holoscan/cli/packager/parameters.py
index c5e49b97..79613023 100644
--- a/python/holoscan/cli/packager/parameters.py
+++ b/python/holoscan/cli/packager/parameters.py
@@ -243,6 +243,7 @@ def __init__(self):
         self._logger = logging.getLogger("packager.parameters")
         self._data = {}
         self._data["app_dir"] = DefaultValues.HOLOSCAN_APP_DIR
+        self._data["lib_dir"] = DefaultValues.HOLOSCAN_LIB_DIR
         self._data["config_file_path"] = DefaultValues.HOLOSCAN_CONFIG_PATH
         self._data["docs_dir"] = DefaultValues.HOLOSCAN_DOCS_DIR
         self._data["logs_dir"] = DefaultValues.HOLOSCAN_LOGS_DIR
@@ -262,6 +263,7 @@ def __init__(self):
         self._data["tarball_output"] = None
         self._data["cmake_args"] = ""
         self._data["includes"] = []
+        self._data["additional_lib_paths"] = ""
 
         self._data["application_directory"] = None
         self._data["application_type"] = None
@@ -276,6 +278,8 @@ def __init__(self):
         self._data["title"] = None
         self._data["version"] = None
 
+        self._additional_libs = []
+
     @property
     def build_cache(self) -> int:
         return self._data["build_cache"]
@@ -519,6 +523,31 @@ def includes(self) -> str:
     def includes(self, value: str):
         self._data["includes"] = value
 
+    @property
+    def additional_lib_paths(self) -> str:
+        """
+        Additional libraries that user wants to include in the package.
+        Stores the post-processed values to be injected into LD_LIBRARY_PATH and PYTHONPATH in the
+        Jinja2 template.
+        """
+        return self._data["additional_lib_paths"]
+
+    @additional_lib_paths.setter
+    def additional_lib_paths(self, value: str):
+        self._data["additional_lib_paths"] = value
+
+    @property
+    def additional_libs(self) -> list[str]:
+        """
+        Additional libraries that user wants to include in the package.
+        Stores paths entered from the command line before processing.
+        """
+        return self._additional_libs
+
+    @additional_libs.setter
+    def additional_libs(self, value: list[str]):
+        self._additional_libs = value
+
     @property
     def to_jinja(self) -> dict[str, Any]:
         return self._data
diff --git a/python/holoscan/cli/packager/templates/Dockerfile.jinja2 b/python/holoscan/cli/packager/templates/Dockerfile.jinja2
index 9a7fcbdc..f9e0adba 100644
--- a/python/holoscan/cli/packager/templates/Dockerfile.jinja2
+++ b/python/holoscan/cli/packager/templates/Dockerfile.jinja2
@@ -295,7 +295,7 @@ RUN apt-get update \
         libnvinfer10="10.3.*+cuda12.5" \
         libnvinfer-plugin10="10.3.*+cuda12.5" \
         libnvonnxparsers10="10.3.*+cuda12.5" \
-        libcusparselt0="0.6.2.3-*" \
+        libcusparselt0="0.6.3.2-*" \
         libcudnn9-cuda-12  \
     && rm -rf /var/lib/apt/lists/* \
     && rm -f /usr/lib/*/libcudnn*train.so*
@@ -336,7 +336,7 @@ RUN apt update \
     && apt-get install -y --no-install-recommends --no-install-suggests \
         gcc \
         python3-dev \
-    && rm -rf /var/lib/apt/lists/* 
+    && rm -rf /var/lib/apt/lists/*
 {% endif %}
 
 {% endif %}
@@ -412,8 +412,7 @@ WORKDIR {{ working_dir }}
 USER $UNAME
 
 ENV PATH=/home/${UNAME}/.local/bin:/opt/nvidia/holoscan/bin:$PATH
-ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:{{ app_dir }}:/home/${UNAME}/.local/lib/python3.10/site-packages/holoscan/lib
-ENV PYTHONPATH="{{ app_dir }}:${PYTHONPATH}"
+ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/${UNAME}/.local/lib/python3.10/site-packages/holoscan/lib
 
 {% if application_type == 'PythonModule' or application_type == 'PythonFile' %}
 COPY ./pip/requirements.txt /tmp/requirements.txt
@@ -467,4 +466,15 @@ COPY --from=builder /install {{ app_dir }}
 COPY ./app {{ app_dir }}
 {% endif %}
 
+{% if additional_lib_paths != '' %}
+
+ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:{{ additional_lib_paths }}:{{ lib_dir }}
+COPY ./lib {{ lib_dir }}
+
+{% if application_type == 'PythonModule' or application_type == 'PythonFile' %}
+ENV PYTHONPATH=$PYTHONPATH:{{ additional_lib_paths }}:{{ lib_dir }}
+{% endif %}
+
+{% endif %}
+
 ENTRYPOINT ["/var/holoscan/tools"]
diff --git a/python/holoscan/conditions/CMakeLists.txt b/python/holoscan/conditions/CMakeLists.txt
index 973bbd83..aa303ce2 100644
--- a/python/holoscan/conditions/CMakeLists.txt
+++ b/python/holoscan/conditions/CMakeLists.txt
@@ -24,5 +24,7 @@ holoscan_pybind11_module(conditions
     downstream_message_affordable.cpp
     expiring_message.cpp
     message_available.cpp
+    multi_message_available.cpp
+    multi_message_available_timeout.cpp
     periodic.cpp
 )
diff --git a/python/holoscan/conditions/__init__.py b/python/holoscan/conditions/__init__.py
index f4c70ebf..9d0b89d3 100644
--- a/python/holoscan/conditions/__init__.py
+++ b/python/holoscan/conditions/__init__.py
@@ -25,6 +25,8 @@
     holoscan.conditions.DownstreamMessageAffordableCondition
     holoscan.conditions.ExpiringMessageAvailableCondition
     holoscan.conditions.MessageAvailableCondition
+    holoscan.conditions.MultiMessageAvailableCondition
+    holoscan.conditions.MultiMessageAvailableTimeoutCondition
     holoscan.conditions.PeriodicCondition
 """
 
@@ -39,6 +41,8 @@
     DownstreamMessageAffordableCondition,
     ExpiringMessageAvailableCondition,
     MessageAvailableCondition,
+    MultiMessageAvailableCondition,
+    MultiMessageAvailableTimeoutCondition,
     PeriodicCondition,
 )
 
@@ -53,5 +57,13 @@
     "DownstreamMessageAffordableCondition",
     "ExpiringMessageAvailableCondition",
     "MessageAvailableCondition",
+    "MultiMessageAvailableCondition",
+    "MultiMessageAvailableTimeoutCondition",
     "PeriodicCondition",
 ]
+
+
+# expose the SamplingMode enum from MultiMessageAvailableCondition
+# (done this way instead of redefinining it in the bindings to avoids error:
+#  ImportError: generic_type: type "SamplingMode" is already registered!)
+MultiMessageAvailableTimeoutCondition.SamplingMode = MultiMessageAvailableCondition.SamplingMode
diff --git a/python/holoscan/conditions/conditions.cpp b/python/holoscan/conditions/conditions.cpp
index 5ff5a5ae..3d43586f 100644
--- a/python/holoscan/conditions/conditions.cpp
+++ b/python/holoscan/conditions/conditions.cpp
@@ -30,6 +30,8 @@ void init_cuda_stream(py::module_&);
 void init_periodic(py::module_&);
 void init_downstream_message_affordable(py::module_&);
 void init_message_available(py::module_&);
+void init_multi_message_available(py::module_&);
+void init_multi_message_available_timeout(py::module_&);
 void init_expiring_message_available(py::module_&);
 
 PYBIND11_MODULE(_conditions, m) {
@@ -48,6 +50,8 @@ PYBIND11_MODULE(_conditions, m) {
   init_periodic(m);
   init_downstream_message_affordable(m);
   init_message_available(m);
+  init_multi_message_available(m);
+  init_multi_message_available_timeout(m);
   init_expiring_message_available(m);
 }  // PYBIND11_MODULE
 }  // namespace holoscan
diff --git a/python/holoscan/conditions/downstream_message_affordable.cpp b/python/holoscan/conditions/downstream_message_affordable.cpp
index a45ea6cb..55bdce83 100644
--- a/python/holoscan/conditions/downstream_message_affordable.cpp
+++ b/python/holoscan/conditions/downstream_message_affordable.cpp
@@ -51,15 +51,13 @@ class PyDownstreamMessageAffordableCondition : public DownstreamMessageAffordabl
 
   // Define a constructor that fully initializes the object.
   explicit PyDownstreamMessageAffordableCondition(
-      Fragment* fragment,
-      // std::shared_ptr<gxf::GXFResource> transmitter,
-      // add transmitter here? gxf_uid_t eid,
-      uint64_t min_size = 1L, const std::string& name = "noname_downstream_affordable_condition")
+      Fragment* fragment, uint64_t min_size = 1L,
+      const std::string& name = "noname_downstream_affordable_condition")
       : DownstreamMessageAffordableCondition(Arg{"min_size", min_size}) {
     name_ = name;
     fragment_ = fragment;
     spec_ = std::make_shared<ComponentSpec>(fragment);
-    // transmitter_ = transmitter;  // e.g. DoubleBufferTransmitter
+    // Note "transmitter" parameter is set automatically from GXFExecutor
     setup(*spec_);
   }
 };
diff --git a/python/holoscan/conditions/expiring_message.cpp b/python/holoscan/conditions/expiring_message.cpp
index 71932868..77b08648 100644
--- a/python/holoscan/conditions/expiring_message.cpp
+++ b/python/holoscan/conditions/expiring_message.cpp
@@ -54,9 +54,8 @@ class PyExpiringMessageAvailableCondition : public ExpiringMessageAvailableCondi
 
   // Define a constructor that fully initializes the object.
   explicit PyExpiringMessageAvailableCondition(
-      Fragment* fragment,
-      // std::shared_ptr<gxf::GXFResource> receiver,
-      int64_t max_batch_size, int64_t max_delay_ns, std::shared_ptr<Clock> clock = nullptr,
+      Fragment* fragment, int64_t max_batch_size, int64_t max_delay_ns,
+      std::shared_ptr<Clock> clock = nullptr,
       const std::string& name = "noname_expiring_message_available_condition")
       : ExpiringMessageAvailableCondition(max_batch_size, max_delay_ns) {
     name_ = name;
@@ -66,16 +65,15 @@ class PyExpiringMessageAvailableCondition : public ExpiringMessageAvailableCondi
     } else {
       this->add_arg(Arg{"clock", fragment_->make_resource<RealtimeClock>("realtime_clock")});
     }
+    // Note "receiver" parameter is set automatically from GXFExecutor
     spec_ = std::make_shared<ComponentSpec>(fragment);
-    // receiver = receiver;  // e.g. DoubleBufferReceiver
     setup(*spec_);
   }
 
   template <typename Rep, typename Period>
   PyExpiringMessageAvailableCondition(
-      Fragment* fragment,
-      // std::shared_ptr<gxf::GXFResource> receiver,
-      int64_t max_batch_size, std::chrono::duration<Rep, Period> recess_period_duration,
+      Fragment* fragment, int64_t max_batch_size,
+      std::chrono::duration<Rep, Period> recess_period_duration,
       std::shared_ptr<Clock> clock = nullptr,
       const std::string& name = "noname_expiring_message_available_condition")
       : ExpiringMessageAvailableCondition(max_batch_size, recess_period_duration) {
@@ -87,7 +85,7 @@ class PyExpiringMessageAvailableCondition : public ExpiringMessageAvailableCondi
       this->add_arg(Arg{"clock", fragment_->make_resource<RealtimeClock>("realtime_clock")});
     }
     spec_ = std::make_shared<ComponentSpec>(fragment);
-    // receiver = receiver;  // e.g. DoubleBufferReceiver
+    // Note "receiver" parameter is set automatically from GXFExecutor
     setup(*spec_);
   }
 };
diff --git a/python/holoscan/conditions/message_available.cpp b/python/holoscan/conditions/message_available.cpp
index fd90daf6..34bfb532 100644
--- a/python/holoscan/conditions/message_available.cpp
+++ b/python/holoscan/conditions/message_available.cpp
@@ -50,16 +50,14 @@ class PyMessageAvailableCondition : public MessageAvailableCondition {
 
   // Define a constructor that fully initializes the object.
   explicit PyMessageAvailableCondition(
-      Fragment* fragment,
-      // std::shared_ptr<gxf::GXFResource> receiver,
-      uint64_t min_size = 1UL, size_t front_stage_max_size = 1UL,
+      Fragment* fragment, uint64_t min_size = 1UL, size_t front_stage_max_size = 1UL,
       const std::string& name = "noname_message_available_condition")
       : MessageAvailableCondition(
             ArgList{Arg{"min_size", min_size}, Arg{"front_stage_max_size", front_stage_max_size}}) {
     name_ = name;
     fragment_ = fragment;
+    // Note "receiver" parameter is set automatically from GXFExecutor
     spec_ = std::make_shared<ComponentSpec>(fragment);
-    // receiver = receiver;  // e.g. DoubleBufferReceiver
     setup(*spec_);
   }
 };
diff --git a/python/holoscan/conditions/multi_message_available.cpp b/python/holoscan/conditions/multi_message_available.cpp
new file mode 100644
index 00000000..e5ddd387
--- /dev/null
+++ b/python/holoscan/conditions/multi_message_available.cpp
@@ -0,0 +1,136 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include <cstdint>
+#include <memory>
+#include <optional>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "./multi_message_available_pydoc.hpp"
+#include "holoscan/core/component_spec.hpp"
+#include "holoscan/core/conditions/gxf/multi_message_available.hpp"
+#include "holoscan/core/fragment.hpp"
+#include "holoscan/core/gxf/gxf_resource.hpp"
+
+using std::string_literals::operator""s;  // NOLINT(misc-unused-using-decls)
+using pybind11::literals::operator""_a;   // NOLINT(misc-unused-using-decls)
+namespace py = pybind11;
+
+namespace holoscan {
+
+/* Trampoline classes for handling Python kwargs
+ *
+ * These add a constructor that takes a Fragment for which to initialize the condition.
+ * The explicit parameter list and default arguments take care of providing a Pythonic
+ * kwarg-based interface with appropriate default values matching the condition's
+ * default parameters in the C++ API `setup` method.
+ *
+ * The sequence of events in this constructor is based on Fragment::make_condition<ConditionT>
+ */
+
+class PyMultiMessageAvailableCondition : public MultiMessageAvailableCondition {
+ public:
+  /* Inherit the constructors */
+  using MultiMessageAvailableCondition::MultiMessageAvailableCondition;
+
+  // Define a constructor that fully initializes the object.
+  explicit PyMultiMessageAvailableCondition(
+      Fragment* fragment,
+      std::variant<MultiMessageAvailableCondition::SamplingMode, std::string> sampling_mode =
+          MultiMessageAvailableCondition::SamplingMode::kSumOfAll,
+      std::optional<size_t> min_sum = std::nullopt,
+      std::optional<std::vector<size_t>> min_sizes = std::nullopt,
+      const std::string& name = "multi_message_condition") {
+    name_ = name;
+    fragment_ = fragment;
+    if (min_sum.has_value()) { this->add_arg(Arg("min_sum", min_sum.value())); }
+    if (min_sizes.has_value()) { this->add_arg(Arg("min_sizes", min_sizes.value())); }
+
+    // need to pass mode via a YAML::Node. Can take either a string or enum from Python
+    if (std::holds_alternative<std::string>(sampling_mode)) {
+      this->add_arg(Arg("sampling_mode", YAML::Node(std::get<std::string>(sampling_mode))));
+    } else {
+      auto mode_value = std::get<MultiMessageAvailableCondition::SamplingMode>(sampling_mode);
+      if (mode_value == MultiMessageAvailableCondition::SamplingMode::kSumOfAll) {
+        this->add_arg(Arg("sampling_mode", YAML::Node("SumOfAll")));
+      } else if (mode_value == MultiMessageAvailableCondition::SamplingMode::kPerReceiver) {
+        this->add_arg(Arg("sampling_mode", YAML::Node("PerReceiver")));
+      } else {
+        HOLOSCAN_LOG_ERROR("Invalid sampling mode: {}", static_cast<int>(mode_value));
+      }
+    }
+    // Note "receivers" parameter is set automatically from GXFExecutor
+    spec_ = std::make_shared<ComponentSpec>(fragment);
+    setup(*spec_);
+  }
+};
+
+void init_multi_message_available(py::module_& m) {
+  py::class_<MultiMessageAvailableCondition,
+             PyMultiMessageAvailableCondition,
+             gxf::GXFCondition,
+             std::shared_ptr<MultiMessageAvailableCondition>>
+      multi_message_condition(
+          m,
+          "MultiMessageAvailableCondition",
+          doc::MultiMessageAvailableCondition::doc_MultiMessageAvailableCondition);
+
+  // have to define the enum here before it is used as an argument type for py::init below
+  py::enum_<MultiMessageAvailableCondition::SamplingMode>(multi_message_condition, "SamplingMode")
+      .value("SUM_OF_ALL", MultiMessageAvailableCondition::SamplingMode::kSumOfAll)
+      .value("PER_RECEIVER", MultiMessageAvailableCondition::SamplingMode::kPerReceiver);
+
+  multi_message_condition
+      .def(py::init<Fragment*,
+                    std::variant<MultiMessageAvailableCondition::SamplingMode, std::string>,
+                    std::optional<size_t>,
+                    std::optional<std::vector<size_t>>,
+                    const std::string&>(),
+           "fragment"_a,
+           "sampling_mode"_a = MultiMessageAvailableCondition::SamplingMode::kSumOfAll,
+           "min_sum"_a = py::none(),
+           "min_sizes"_a = py::none(),
+           "name"_a = "multi_message_condition"s,
+           doc::MultiMessageAvailableCondition::doc_MultiMessageAvailableCondition)
+      .def_property("receivers",
+                    py::overload_cast<>(&MultiMessageAvailableCondition::receivers),
+                    py::overload_cast<std::vector<std::shared_ptr<gxf::GXFResource>>>(
+                        &MultiMessageAvailableCondition::receivers),
+                    doc::MultiMessageAvailableCondition::doc_receivers)
+      .def_property("min_sum",
+                    py::overload_cast<>(&MultiMessageAvailableCondition::min_sum),
+                    py::overload_cast<size_t>(&MultiMessageAvailableCondition::min_sum),
+                    doc::MultiMessageAvailableCondition::doc_min_sum)
+      .def_property_readonly("min_sizes",
+                             &MultiMessageAvailableCondition::min_sizes,
+                             doc::MultiMessageAvailableCondition::doc_min_sizes)
+      .def("add_min_size",
+           &MultiMessageAvailableCondition::add_min_size,
+           "value"_a,
+           doc::MultiMessageAvailableCondition::doc_add_min_size)
+      .def_property("sampling_mode",
+                    py::overload_cast<>(&MultiMessageAvailableCondition::sampling_mode),
+                    py::overload_cast<MultiMessageAvailableCondition::SamplingMode>(
+                        &MultiMessageAvailableCondition::sampling_mode),
+                    doc::MultiMessageAvailableCondition::doc_sampling_mode);
+}
+}  // namespace holoscan
diff --git a/python/holoscan/conditions/multi_message_available_pydoc.hpp b/python/holoscan/conditions/multi_message_available_pydoc.hpp
new file mode 100644
index 00000000..904e3f48
--- /dev/null
+++ b/python/holoscan/conditions/multi_message_available_pydoc.hpp
@@ -0,0 +1,95 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef PYHOLOSCAN_CONDITIONS_MULTI_MESSAGE_AVAILABLE_PYDOC_HPP
+#define PYHOLOSCAN_CONDITIONS_MULTI_MESSAGE_AVAILABLE_PYDOC_HPP
+
+#include <string>
+
+#include "../macros.hpp"
+
+namespace holoscan::doc {
+
+namespace MultiMessageAvailableCondition {
+
+PYDOC(MultiMessageAvailableCondition, R"doc(
+Condition that checks the number of messages available across multiple inputs.
+
+This condition is used to check if a sufficient number of messages are available across multiple
+input ports. It can operator in one of two modes:
+
+    1. ``SUM_OF_ALL``: The condition checks if the sum of messages available across all input ports
+      is greater than or equal to a given threshold. For this mode, `min_sum` should be specified.
+    2. ``PER_RECEIVER``: The condition checks if the number of messages available at each input
+      port is greater than or equal to a given threshold. For this mode, `min_sizes` should be
+      specified.
+
+Parameters
+----------
+fragment : holoscan.core.Fragment
+    The fragment the condition will be associated with
+sampling_mode : {"SumOfAll", "PerReceiver"} or MultiMessageAvailableCondition.SamplingMode, optional
+    The sampling method to use when checking for messages in receiver queues.
+min_sum : int, optional
+    The scheduling term permits execution if the sum of message counts of all receivers have at
+    least the given number of messages available. This option is only intended for use with
+    "SumOfAll" `sampling_mode`.
+min_sizes : list of int, optional
+    The scheduling term permits execution if all given receivers have at least the given number of
+    messages available in this list. This option is only intended for use with
+    "PerReceiver" `sampling_mode`. The length of `min_sizes` must match the
+    number of receivers associated with the condition.
+name : str, optional
+    The name of the condition.
+)doc")
+
+PYDOC(receivers, R"doc(
+The receivers associated with the condition.
+)doc")
+
+PYDOC(sampling_mode, R"doc(
+The sampling mode for the condition. This parameter determines how the minimum number of messages
+is calculated. The two possible values are:
+``MultiMessageAvailableCondition.SamplingMode.SUM_OF_ALL`` and
+``MultiMessageAvailableCondition.SamplingMode.PER_RECEIVER.``)doc")
+
+PYDOC(min_sizes, R"doc(
+Get the minimum number of messages that permits the execution of the entity. There is one value per
+receiver associated with this condition. This parameter is only used when `sampling_mode` is set
+to ``MultiMessageAvailableCondition.SamplingMode.PER_RECEIVER``; otherwise, it is ignored.
+)doc")
+
+PYDOC(add_min_size, R"doc(
+Append an integer value to the min_sizes vector.
+
+Parameters
+----------
+value : int
+    The value to append to the min_sizes vector.
+)doc")
+
+PYDOC(min_sum, R"doc(
+The total number of messages that permits the execution of the entity. This total is over all
+receivers associated with this condition. This parameter is only used when `sampling_mode` is set
+to ``MultiMessageAvailableCondition.SamplingMode.SUM_OF_ALL``; otherwise, it is ignored.
+)doc")
+
+}  // namespace MultiMessageAvailableCondition
+
+}  // namespace holoscan::doc
+
+#endif /* PYHOLOSCAN_CONDITIONS_MULTI_MESSAGE_AVAILABLE_PYDOC_HPP */
diff --git a/python/holoscan/conditions/multi_message_available_timeout.cpp b/python/holoscan/conditions/multi_message_available_timeout.cpp
new file mode 100644
index 00000000..eb45a545
--- /dev/null
+++ b/python/holoscan/conditions/multi_message_available_timeout.cpp
@@ -0,0 +1,105 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include <cstdint>
+#include <memory>
+#include <optional>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "./multi_message_available_timeout_pydoc.hpp"
+#include "holoscan/core/component_spec.hpp"
+#include "holoscan/core/conditions/gxf/multi_message_available_timeout.hpp"
+#include "holoscan/core/fragment.hpp"
+#include "holoscan/core/gxf/gxf_resource.hpp"
+
+using std::string_literals::operator""s;  // NOLINT(misc-unused-using-decls)
+using pybind11::literals::operator""_a;   // NOLINT(misc-unused-using-decls)
+namespace py = pybind11;
+
+namespace holoscan {
+
+/* Trampoline classes for handling Python kwargs
+ *
+ * These add a constructor that takes a Fragment for which to initialize the condition.
+ * The explicit parameter list and default arguments take care of providing a Pythonic
+ * kwarg-based interface with appropriate default values matching the condition's
+ * default parameters in the C++ API `setup` method.
+ *
+ * The sequence of events in this constructor is based on Fragment::make_condition<ConditionT>
+ */
+
+class PyMultiMessageAvailableTimeoutCondition : public MultiMessageAvailableTimeoutCondition {
+ public:
+  /* Inherit the constructors */
+  using MultiMessageAvailableTimeoutCondition::MultiMessageAvailableTimeoutCondition;
+
+  // Define a constructor that fully initializes the object.
+  explicit PyMultiMessageAvailableTimeoutCondition(
+      Fragment* fragment, const std::string& execution_frequency,
+      std::variant<MultiMessageAvailableTimeoutCondition::SamplingMode, std::string> sampling_mode =
+          MultiMessageAvailableTimeoutCondition::SamplingMode::kSumOfAll,
+      std::optional<size_t> min_sum = std::nullopt,
+      std::optional<std::vector<size_t>> min_sizes = std::nullopt,
+      const std::string& name = "multi_message_timeout_condition")
+      : MultiMessageAvailableTimeoutCondition(Arg("execution_frequency", execution_frequency)) {
+    name_ = name;
+    fragment_ = fragment;
+    if (min_sum.has_value()) { this->add_arg(Arg("min_sum", min_sum.value())); }
+    if (min_sizes.has_value()) { this->add_arg(Arg("min_sizes", min_sizes.value())); }
+
+    // need to pass mode via a YAML::Node. Can take either a string or enum from Python
+    if (std::holds_alternative<std::string>(sampling_mode)) {
+      this->add_arg(Arg("sampling_mode", YAML::Node(std::get<std::string>(sampling_mode))));
+    } else {
+      auto mode_value =
+          std::get<MultiMessageAvailableTimeoutCondition::SamplingMode>(sampling_mode);
+      this->add_arg(Arg("sampling_mode", mode_value));
+    }
+    // Note "receivers" parameter is set automatically from GXFExecutor
+    spec_ = std::make_shared<ComponentSpec>(fragment);
+    setup(*spec_);
+  }
+};
+
+void init_multi_message_available_timeout(py::module_& m) {
+  py::class_<MultiMessageAvailableTimeoutCondition,
+             PyMultiMessageAvailableTimeoutCondition,
+             gxf::GXFCondition,
+             std::shared_ptr<MultiMessageAvailableTimeoutCondition>>(
+      m,
+      "MultiMessageAvailableTimeoutCondition",
+      doc::MultiMessageAvailableTimeoutCondition::doc_MultiMessageAvailableTimeoutCondition)
+      .def(py::init<Fragment*,
+                    const std::string&,
+                    std::variant<MultiMessageAvailableTimeoutCondition::SamplingMode, std::string>,
+                    std::optional<size_t>,
+                    std::optional<std::vector<size_t>>,
+                    const std::string&>(),
+           "fragment"_a,
+           "execution_frequency"_a,
+           "sampling_mode"_a = MultiMessageAvailableTimeoutCondition::SamplingMode::kSumOfAll,
+           "min_sum"_a = py::none(),
+           "min_sizes"_a = py::none(),
+           "name"_a = "multi_message_timeout_condition"s,
+           doc::MultiMessageAvailableTimeoutCondition::doc_MultiMessageAvailableTimeoutCondition);
+}
+}  // namespace holoscan
diff --git a/python/holoscan/conditions/multi_message_available_timeout_pydoc.hpp b/python/holoscan/conditions/multi_message_available_timeout_pydoc.hpp
new file mode 100644
index 00000000..9ceba25b
--- /dev/null
+++ b/python/holoscan/conditions/multi_message_available_timeout_pydoc.hpp
@@ -0,0 +1,74 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef PYHOLOSCAN_CONDITIONS_MULTI_MESSAGE_AVAILABLE_TIMEOUT_PYDOC_HPP
+#define PYHOLOSCAN_CONDITIONS_MULTI_MESSAGE_AVAILABLE_TIMEOUT_PYDOC_HPP
+
+#include <string>
+
+#include "../macros.hpp"
+
+namespace holoscan::doc {
+
+namespace MultiMessageAvailableTimeoutCondition {
+
+PYDOC(MultiMessageAvailableTimeoutCondition, R"doc(
+Condition that checks the number of messages available across multiple inputs.
+
+This condition is used to check if a sufficient number of messages are available across multiple
+input ports. It can operator in one of two modes:
+
+    1. ``SUM_OF_ALL``: The condition checks if the sum of messages available across all input ports
+      is greater than or equal to a given threshold. For this mode, `min_sum` should be specified.
+    2. ``PER_RECEIVER``: The condition checks if the number of messages available at each input
+      port is greater than or equal to a given threshold. For this mode, `min_sizes` should be
+      specified.
+
+Parameters
+----------
+fragment : holoscan.core.Fragment
+    The fragment the condition will be associated with
+execution_frequency : std::string
+    The 'execution frequency' indicates the amount of time after which the entity will be allowed
+    to execute again, even if the specified number of messages have not yet been received. The
+    period is specified as a string containing  of a number and an (optional) unit. If no unit is
+    given the value is assumed to be in nanoseconds. Supported units are: Hz, s, ms.
+    Examples: "10ms", "10000000", "0.2s", "50Hz".
+sampling_mode : {"SumOfAll", "PerReceiver"} or MultiMessageAvailableTimeoutCondition.SamplingMode, optional
+    The sampling method to use when checking for messages in receiver queues.
+min_sum : int, optional
+    The scheduling term permits execution if the sum of message counts of all receivers have at
+    least the given number of messages available. This option is only intended for use with
+    "SumOfAll" `sampling_mode`.
+min_sizes : list of int, optional
+    The scheduling term permits execution if all given receivers have at least the given number of
+    messages available in this list. This option is only intended for use with
+    "PerReceiver" `sampling_mode`. The length of `min_sizes` must match the
+    number of receivers associated with the condition.
+name : str, optional
+    The name of the condition.
+)doc")
+
+PYDOC(receivers, R"doc(
+The receivers associated with the condition.
+)doc")
+
+}  // namespace MultiMessageAvailableTimeoutCondition
+
+}  // namespace holoscan::doc
+
+#endif /* PYHOLOSCAN_CONDITIONS_MULTI_MESSAGE_AVAILABLE_TIMEOUT_PYDOC_HPP */
diff --git a/python/holoscan/core/__init__.py b/python/holoscan/core/__init__.py
index ff868177..4bc8b280 100644
--- a/python/holoscan/core/__init__.py
+++ b/python/holoscan/core/__init__.py
@@ -43,6 +43,7 @@
     holoscan.core.IOSpec
     holoscan.core.Message
     holoscan.core.MetadataDictionary
+    holoscan.core.MultiMessageConditionInfo
     holoscan.core.MetadataPolicy
     holoscan.core.NetworkContext
     holoscan.core.Operator
@@ -58,56 +59,76 @@
     holoscan.core.py_object_to_arg
 """
 
+import os
+import sys
+
 # Note: Python 3.7+ expects the threading module to be initialized (imported) before additional
 # threads are created (by C++ modules using pybind11).
 # Otherwise you will get an assert tlock.locked() error on exit.
 # (CLARAHOLOS-765)
 import threading as _threading  # noqa: F401, I001
 
-from ..graphs._graphs import FragmentGraph, OperatorGraph
-from ._core import Application as _Application
-from ._core import (
-    Arg,
-    ArgContainerType,
-    ArgElementType,
-    ArgList,
-    ArgType,
-    CLIOptions,
-    Component,
-    Condition,
-    ConditionType,
-    Config,
-    DataFlowMetric,
-    DataFlowTracker,
-    DLDevice,
-    DLDeviceType,
-    ExecutionContext,
-    Executor,
-)
-from ._core import Fragment as _Fragment
-from ._core import (
-    InputContext,
-    IOSpec,
-    Message,
-    MetadataDictionary,
-    MetadataPolicy,
-    NetworkContext,
-)
-from ._core import Operator as _Operator
-from ._core import OutputContext, ParameterFlag
-from ._core import PyComponentSpec as ComponentSpec
-from ._core import PyRegistryContext as _RegistryContext
-from ._core import PyOperatorSpec as OperatorSpec
-from ._core import PyTensor as Tensor
-from ._core import Resource as _Resource
-from ._core import (
-    Scheduler,
-    arg_to_py_object,
-    arglist_to_kwargs,
-    kwargs_to_arglist,
-    py_object_to_arg,
-)
-from ._core import register_types as _register_types
+# Temporarily set RTLD_GLOBAL to ensure that global symbols in the Holoscan C++ API
+# (including logging-related symbols like nvidia::LoggingFunction) are shared
+# across bindings. This is necessary because the Python interpreter loads the
+# Pybind11 module with RTLD_LOCAL by default, which can duplicate symbols and
+# lead to symbol resolution issues when the C++ API and global symbols are loaded
+# as shared libraries by the Python interpreter.
+original_flags = sys.getdlopenflags()  # Save the current dlopen flags
+try:
+    sys.setdlopenflags(os.RTLD_GLOBAL | os.RTLD_LAZY)
+
+    # Import statements for the C++ API classes
+    from ..graphs._graphs import FragmentGraph, OperatorGraph
+    from ._core import Application as _Application
+    from ._core import (
+        Arg,
+        ArgContainerType,
+        ArgElementType,
+        ArgList,
+        ArgType,
+        CLIOptions,
+        Component,
+        Condition,
+        ConditionType,
+        Config,
+        DataFlowMetric,
+        DataFlowTracker,
+        DLDevice,
+        DLDeviceType,
+        ExecutionContext,
+        Executor,
+        InputContext,
+        IOSpec,
+        Message,
+        MetadataDictionary,
+        MetadataPolicy,
+        NetworkContext,
+        OutputContext,
+        ParameterFlag,
+        Scheduler,
+        arg_to_py_object,
+        arglist_to_kwargs,
+        kwargs_to_arglist,
+        py_object_to_arg,
+    )
+    from ._core import Fragment as _Fragment
+    from ._core import Operator as _Operator
+    from ._core import PyComponentSpec as ComponentSpec
+    from ._core import PyOperatorSpec as OperatorSpec
+    from ._core import PyRegistryContext as _RegistryContext
+    from ._core import PyTensor as Tensor
+    from ._core import Resource as _Resource
+    from ._core import register_types as _register_types
+finally:
+    # Restore the original dlopen flags immediately after the imports
+    sys.setdlopenflags(original_flags)
+del original_flags
+
+# need these imports for ThreadPool return type of Fragment.make_thread_pool to work
+from ..gxf._gxf import GXFResource as _GXFResource  # noqa: E402, F401, I001
+from ..resources import ThreadPool as _ThreadPool  # noqa: E402, F401, I001
+
 
 Graph = OperatorGraph  # define alias for backward compatibility
 
@@ -138,6 +159,7 @@
     "Message",
     "MetadataDictionary",
     "MetadataPolicy",
+    "MultiMessageConditionInfo",
     "NetworkContext",
     "Operator",
     "OperatorSpec",
@@ -361,6 +383,7 @@ def __init__(
         num_start_messages_to_skip=10,
         num_last_messages_to_discard=10,
         latency_threshold=0,
+        is_limited_tracking=False,
     ):
         """
         Parameters
@@ -382,6 +405,9 @@ def __init__(
         latency_threshold : int, optional
             The minimum end-to-end latency in milliseconds to account for in the end-to-end
             latency metric calculations.
+        is_limited_tracking : bool, optional
+            If true, the tracking is limited to root and leaf nodes, minimizing the timestamps by
+            avoiding intermediate operators.
         """
         self.app = app
 
@@ -400,6 +426,7 @@ def __init__(
             num_start_messages_to_skip=num_start_messages_to_skip,
             num_last_messages_to_discard=num_last_messages_to_discard,
             latency_threshold=latency_threshold,
+            is_limited_tracking=False,
         )
 
     def __enter__(self):
diff --git a/python/holoscan/core/application.cpp b/python/holoscan/core/application.cpp
index 7ee15ed6..64983d55 100644
--- a/python/holoscan/core/application.cpp
+++ b/python/holoscan/core/application.cpp
@@ -124,10 +124,13 @@ void init_application(py::module_& m) {
           [](Application& app,
              uint64_t num_start_messages_to_skip,
              uint64_t num_last_messages_to_discard,
-             int latency_threshold)
+             int latency_threshold,
+             bool is_limited_tracking)
               -> std::unordered_map<std::string, std::reference_wrapper<DataFlowTracker>> {
-            auto tracker_pointers = app.track_distributed(
-                num_start_messages_to_skip, num_last_messages_to_discard, latency_threshold);
+            auto tracker_pointers = app.track_distributed(num_start_messages_to_skip,
+                                                          num_last_messages_to_discard,
+                                                          latency_threshold,
+                                                          is_limited_tracking);
             std::unordered_map<std::string, std::reference_wrapper<DataFlowTracker>> trackers;
             for (const auto& [name, tracker_ptr] : tracker_pointers) {
               trackers.emplace(name, std::ref(*tracker_ptr));
@@ -137,6 +140,7 @@ void init_application(py::module_& m) {
           "num_start_messages_to_skip"_a = kDefaultNumStartMessagesToSkip,
           "num_last_messages_to_discard"_a = kDefaultNumLastMessagesToDiscard,
           "latency_threshold"_a = kDefaultLatencyThreshold,
+          "is_limited_tracking"_a = false,
           // doc::Fragment::doc_track_distributed,
           py::return_value_policy::reference_internal)
       .def(
@@ -198,6 +202,16 @@ void PyApplication::compose() {
 }
 
 void PyApplication::run() {
+  // Debug log to show that the run() function is executed
+  // (with the logging function pointer info to check if the logging function pointer address is
+  // the same as the one set in the Python side).
+  // This message is checked by the test_app_log_function in test_application_minimal.py.
+
+  // NOLINTBEGIN(cppcoreguidelines-pro-type-reinterpret-cast)
+  HOLOSCAN_LOG_DEBUG("Executing PyApplication::run()... (log_func_ptr=0x{:x})",
+                     reinterpret_cast<uint64_t>(&nvidia::LoggingFunction));
+  // NOLINTEND(cppcoreguidelines-pro-type-reinterpret-cast)
+
   // Create a deleter for DLManagedTensor objects so that they can be deleted in a separate thread
   // to avoid blocking the GXF runtime mutex.
   LazyDLManagedTensorDeleter deleter;
diff --git a/python/holoscan/core/application_pydoc.hpp b/python/holoscan/core/application_pydoc.hpp
index ec54c814..9f6c5f6d 100644
--- a/python/holoscan/core/application_pydoc.hpp
+++ b/python/holoscan/core/application_pydoc.hpp
@@ -156,6 +156,9 @@ num_last_messages_to_discard : int
 latency_threshold : int
     The minimum end-to-end latency in milliseconds to account for in the
     end-to-end latency metric calculations
+is_limited_tracking : bool
+    If true, the tracking is limited to root and leaf nodes, minimizing the timestamps by avoiding
+    intermediate operators.
 
 Returns
 -------
diff --git a/python/holoscan/core/arg.cpp b/python/holoscan/core/arg.cpp
index 4ce52a4e..c580b899 100644
--- a/python/holoscan/core/arg.cpp
+++ b/python/holoscan/core/arg.cpp
@@ -100,7 +100,10 @@ void init_arg(py::module_& m) {
       //    Instead, see py_object_to_arg() utility for getting an Arg object from a Python one
       // Arg& operator=(const ArgT& value)
       // Arg&& operator=(ArgT&& value)
-      .def_property_readonly("name", &Arg::name, doc::Arg::doc_name)
+      .def_property("name",
+                    py::overload_cast<>(&Arg::name, py::const_),
+                    py::overload_cast<const std::string&>(&Arg::name),
+                    doc::Arg::doc_name)
       .def_property_readonly("arg_type", &Arg::arg_type, doc::Arg::doc_arg_type)
       .def_property_readonly("has_value", &Arg::has_value, doc::Arg::doc_has_value)
       // std::any& value()
diff --git a/python/holoscan/core/arg_pydoc.hpp b/python/holoscan/core/arg_pydoc.hpp
index 63125529..33c6b1d7 100644
--- a/python/holoscan/core/arg_pydoc.hpp
+++ b/python/holoscan/core/arg_pydoc.hpp
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -52,11 +52,7 @@ name : str, optional
 )doc")
 
 PYDOC(name, R"doc(
-The name of the argument.
-
-Returns
--------
-name : str
+Name of the argument.
 )doc")
 
 PYDOC(arg_type, R"doc(
diff --git a/python/holoscan/core/condition.cpp b/python/holoscan/core/condition.cpp
index 170c0f69..0e1cc4a3 100644
--- a/python/holoscan/core/condition.cpp
+++ b/python/holoscan/core/condition.cpp
@@ -99,7 +99,9 @@ void init_condition(py::module_& m) {
       .value("BOOLEAN", ConditionType::kBoolean)
       .value("PERIODIC", ConditionType::kPeriodic)
       .value("ASYNCHRONOUS", ConditionType::kAsynchronous)
-      .value("EXPIRING_MESSAGE_AVAILABLE", ConditionType::kExpiringMessageAvailable);
+      .value("EXPIRING_MESSAGE_AVAILABLE", ConditionType::kExpiringMessageAvailable)
+      .value("MULTI_MESSAGE_AVAILABLE", ConditionType::kMultiMessageAvailable)
+      .value("MULTI_MESSAGE_AVAILABLE_TIMEOUT", ConditionType::kMultiMessageAvailableTimeout);
 
   py::class_<Condition, Component, PyCondition, std::shared_ptr<Condition>>(
       m, "Condition", doc::Condition::doc_Condition)
diff --git a/python/holoscan/core/dl_converter.cpp b/python/holoscan/core/dl_converter.cpp
index 49f9d623..0c57f936 100644
--- a/python/holoscan/core/dl_converter.cpp
+++ b/python/holoscan/core/dl_converter.cpp
@@ -24,6 +24,7 @@
 
 #include <memory>
 #include <string>
+#include <utility>
 
 #include "gxf/std/dlpack_utils.hpp"  // nvidia::gxf::numpyTypestr
 #include "holoscan/core/common.hpp"
@@ -119,49 +120,48 @@ void set_array_interface(const py::object& obj,
   }
 }
 
-// NOLINTBEGIN(readability-function-cognitive-complexity)
-py::capsule py_dlpack(Tensor* tensor, py::object stream) {
-  // TOIMPROVE: need to get current stream pointer and call with the stream
-  cudaStream_t curr_stream_ptr = nullptr;  // legacy stream
+void synchronize_streams(cudaStream_t stream1, cudaStream_t stream2) {
+  cudaEvent_t stream1_event{};
+  HOLOSCAN_CUDA_CALL_THROW_ERROR(cudaEventCreateWithFlags(&stream1_event, cudaEventDisableTiming),
+                                 "Failure during call to cudaEventCreateWithFlags");
+  HOLOSCAN_CUDA_CALL_THROW_ERROR(cudaEventRecord(stream1_event, stream1),
+                                 "Failure during call to cudaEventRecord");
+  HOLOSCAN_CUDA_CALL_THROW_ERROR(cudaStreamWaitEvent(stream2, stream1_event, 0),
+                                 "Failure during call to cudaStreamWaitEvent");
+  HOLOSCAN_CUDA_CALL_THROW_ERROR(cudaEventDestroy(stream1_event),
+                                 "Failure during call to cudaEventDestroy");
+}
 
+void process_dlpack_stream(py::object stream_obj) {
   int64_t stream_id = 1;  // legacy default stream
   cudaStream_t stream_ptr = nullptr;
 
-  if (stream.is_none()) {
-    stream = py::int_(1);  // legacy default stream
-  } else if (py::isinstance<py::int_>(stream)) {
-    stream_id = stream.cast<int64_t>();
+  if (stream_obj.is_none()) {
+    stream_obj = py::int_(1);  // legacy default stream
+  } else if (py::isinstance<py::int_>(stream_obj)) {
+    stream_id = stream_obj.cast<int64_t>();
     if (stream_id < -1) {
       throw std::runtime_error(
           "Invalid stream, valid stream should be -1 (non-blocking), 1 (legacy default stream), 2 "
           "(per-thread default stream), or a positive integer (stream pointer)");
     }
-    if (stream_id <= 2) {
-      // Allow the stream id 0 as a special case for the default stream.
-      // This is to support the legacy behavior.
-      stream_ptr = nullptr;
-    } else {
-      // NOLINTNEXTLINE(performance-no-int-to-ptr,cppcoreguidelines-pro-type-reinterpret-cast)
-      stream_ptr = reinterpret_cast<cudaStream_t>(stream_id);
-    }
+    // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast,performance-no-int-to-ptr)
+    if (stream_id > 2) { stream_ptr = reinterpret_cast<cudaStream_t>(stream_id); }
   } else {
     throw std::runtime_error(fmt::format("Invalid stream type: should be int type but given '{}'",
-                                         std::string(py::str(stream))));
+                                         std::string(py::str(stream_obj))));
   }
 
   // Wait for the current stream to finish before the provided stream starts consuming the memory.
+  cudaStream_t curr_stream_ptr = nullptr;  // legacy stream
   if (stream_id >= 0 && curr_stream_ptr != stream_ptr) {
-    cudaEvent_t curr_stream_event{};
-    HOLOSCAN_CUDA_CALL_THROW_ERROR(
-        cudaEventCreateWithFlags(&curr_stream_event, cudaEventDisableTiming),
-        "Failure during call to cudaEventCreateWithFlags");
-    HOLOSCAN_CUDA_CALL_THROW_ERROR(cudaEventRecord(curr_stream_event, curr_stream_ptr),
-                                   "Failure during call to cudaEventRecord");
-    HOLOSCAN_CUDA_CALL_THROW_ERROR(cudaStreamWaitEvent(stream_ptr, curr_stream_event, 0),
-                                   "Failure during call to cudaStreamWaitEvent");
-    HOLOSCAN_CUDA_CALL_THROW_ERROR(cudaEventDestroy(curr_stream_event),
-                                   "Failure during call to cudaEventDestroy");
+    synchronize_streams(curr_stream_ptr, stream_ptr);
   }
+}
+
+py::capsule py_dlpack(Tensor* tensor, py::object stream) {
+  // determine stream and synchronize it with the default stream if necessary
+  process_dlpack_stream(std::move(stream));
 
   DLManagedTensor* dl_managed_tensor = tensor->to_dlpack();
 
@@ -184,7 +184,6 @@ py::capsule py_dlpack(Tensor* tensor, py::object stream) {
 
   return dlpack_capsule;
 }
-// NOLINTEND(readability-function-cognitive-complexity)
 
 py::tuple py_dlpack_device(Tensor* tensor) {
   auto& dl_tensor = tensor->dl_ctx()->tensor.dl_tensor;
diff --git a/python/holoscan/core/dl_converter.hpp b/python/holoscan/core/dl_converter.hpp
index ec257fbd..c03c410a 100644
--- a/python/holoscan/core/dl_converter.hpp
+++ b/python/holoscan/core/dl_converter.hpp
@@ -18,6 +18,7 @@
 #ifndef PYHOLOSCAN_CORE_DL_CONVERTER_HPP
 #define PYHOLOSCAN_CORE_DL_CONVERTER_HPP
 
+#include <cuda_runtime.h>
 #include <dlpack/dlpack.h>
 #include <pybind11/pybind11.h>
 
@@ -123,6 +124,9 @@ pybind11::tuple array2pytuple(const T* arr, size_t length) {
   return result;
 }
 
+/// @brief Synchronize two CUDA streams
+void synchronize_streams(cudaStream_t stream1, cudaStream_t stream2);
+
 }  // namespace holoscan
 
 #endif /* PYHOLOSCAN_CORE_DL_CONVERTER_HPP */
diff --git a/python/holoscan/core/fragment.cpp b/python/holoscan/core/fragment.cpp
index c14d8dcd..93582a91 100644
--- a/python/holoscan/core/fragment.cpp
+++ b/python/holoscan/core/fragment.cpp
@@ -34,6 +34,7 @@
 #include "holoscan/core/graph.hpp"
 #include "holoscan/core/network_context.hpp"
 #include "holoscan/core/operator.hpp"
+#include "holoscan/core/resources/gxf/system_resources.hpp"
 #include "holoscan/core/scheduler.hpp"
 #include "kwarg_handling.hpp"
 
@@ -138,12 +139,18 @@ void init_fragment(py::module_& m) {
            "num_start_messages_to_skip"_a = kDefaultNumStartMessagesToSkip,
            "num_last_messages_to_discard"_a = kDefaultNumLastMessagesToDiscard,
            "latency_threshold"_a = kDefaultLatencyThreshold,
+           "is_limited_tracking"_a = false,
            doc::Fragment::doc_track,
            py::return_value_policy::reference_internal)
       .def_property("is_metadata_enabled",
                     py::overload_cast<>(&Fragment::is_metadata_enabled, py::const_),
                     py::overload_cast<bool>(&Fragment::is_metadata_enabled),
                     doc::Fragment::doc_is_metadata_enabled)
+      .def("make_thread_pool",
+           &Fragment::make_thread_pool,
+           "name"_a,
+           "initialize_size"_a = 1,
+           doc::Fragment::doc_make_thread_pool)
       .def("run",
            &Fragment::run,
            doc::Fragment::doc_run,
diff --git a/python/holoscan/core/fragment_pydoc.hpp b/python/holoscan/core/fragment_pydoc.hpp
index 4b541a18..728de05e 100644
--- a/python/holoscan/core/fragment_pydoc.hpp
+++ b/python/holoscan/core/fragment_pydoc.hpp
@@ -229,6 +229,9 @@ num_last_messages_to_discard : int
 latency_threshold : int
     The minimum end-to-end latency in milliseconds to account for in the
     end-to-end latency metric calculations
+is_limited_tracking : bool, optional
+    If ``True``, the tracking is limited to root and leaf nodes, minimizing
+    the timestamps by avoiding intermediate operators.
 
 Returns
 -------
@@ -237,6 +240,19 @@ tracker : holoscan.core.DataFlowTracker
     different paths through the computation graph.
 )doc")
 
+PYDOC(make_thread_pool, R"doc(
+Create a ThreadPool associated with this Fragment.
+
+The add method must be used to add individual operators to the pool.
+
+Parameters
+----------
+name : str
+    A name for the thread pool.
+initialize_size : 1
+    The initial number of threads in the pool.
+)doc")
+
 PYDOC(run, R"doc(
 The run method of the Fragment.
 
diff --git a/python/holoscan/core/io_context.cpp b/python/holoscan/core/io_context.cpp
index 203ba14c..4e75c4e2 100644
--- a/python/holoscan/core/io_context.cpp
+++ b/python/holoscan/core/io_context.cpp
@@ -105,11 +105,19 @@ static void register_py_object_codec() {
       "std::shared_ptr<GILGuardedPyObject>"s);
 }
 
-// NOLINTBEGIN(readability-function-cognitive-complexity)
-py::object PyInputContext::py_receive(const std::string& name, const std::string& kind) {
-  auto* py_op = py_op_.cast<PyOperator*>();
-  auto py_op_spec = py_op->py_shared_spec();
+namespace {
 
+/**
+ * @brief Determine if tuple of objects will be received based on kind or the OperatorSpec
+ *
+ * @param name The name of the input
+ * @param kind The kind of the input
+ * @param py_op_spec The OperatorSpec object
+ *
+ * @return True if the input should be returned as a tuple, false otherwise
+ */
+bool should_return_as_tuple(const std::string& name, const std::string& kind,
+                            const std::shared_ptr<PyOperatorSpec>& py_op_spec) {
   bool should_return_tuple = false;
   bool is_receivers = false;
   for (const auto& receivers : py_op_spec->py_receivers()) {
@@ -123,27 +131,25 @@ py::object PyInputContext::py_receive(const std::string& name, const std::string
   if (!kind.empty()) {
     if (kind == "single") {
       if (is_receivers) {
-        HOLOSCAN_LOG_ERROR(
+        std::string err_msg = fmt::format(
             "Invalid kind '{}' for receive() method, cannot be 'single' for the input port with "
             "'IOSpec.ANY_SIZE'",
             kind);
-        throw std::runtime_error(fmt::format(
-            "Invalid kind '{}' for receive() method, cannot be 'single' for the input port with "
-            "'IOSpec.ANY_SIZE'",
-            kind));
+        HOLOSCAN_LOG_ERROR(err_msg);
+        throw std::runtime_error(err_msg);
       }
       should_return_tuple = false;
     } else if (kind == "multi") {
       should_return_tuple = true;
     } else {
-      HOLOSCAN_LOG_ERROR("Invalid kind '{}' for receive() method, must be 'single' or 'multi'",
-                         kind);
-      throw std::runtime_error(
-          fmt::format("Invalid kind '{}' for receive() method, must be 'single' or 'multi'", kind));
+      std::string err_msg =
+          fmt::format("Invalid kind '{}' for receive() method, must be 'single' or 'multi'", kind);
+      HOLOSCAN_LOG_ERROR(err_msg);
+      throw std::runtime_error(err_msg);
     }
   } else {
-    // If the 'queue_size' equals IOSpec.PRECEDING_COUNT (0) or 'queue_size > 1', returns a tuple.
     if (!should_return_tuple) {
+      // If the 'queue_size' equals IOSpec.PRECEDING_COUNT (0) or 'queue_size > 1', returns a tuple.
       auto input_spec = py_op_spec->inputs().find(name);
       if (input_spec != py_op_spec->inputs().end()) {
         auto queue_size = input_spec->second->queue_size();
@@ -151,59 +157,146 @@ py::object PyInputContext::py_receive(const std::string& name, const std::string
       }
     }
   }
+  return should_return_tuple;
+}
+}  // namespace
 
-  if (should_return_tuple) {
-    auto maybe_any_result = receive<std::vector<std::any>>(name.c_str());
-    if (!maybe_any_result.has_value()) {
-      HOLOSCAN_LOG_ERROR("Unable to receive input (std::vector<std::any>) with name '{}'", name);
-      return py::none();
-    }
-    auto any_result = maybe_any_result.value();
-    if (any_result.empty()) { return py::make_tuple(); }
-
-    // Check element type (querying the first element using the name '{name}:0')
-    auto& element = any_result[0];
-    const auto& element_type = element.type();
-    auto& registry = holoscan::EmitterReceiverRegistry::get_instance();
-    const auto& receiver_func = registry.get_receiver(element_type);
-
-    py::tuple result_tuple(any_result.size());
-    int counter = 0;
-    try {
-      for (const auto& any_item : any_result) {
-        const auto& item_type = any_item.type();
-        if (item_type == typeid(kNoReceivedMessage) || item_type == typeid(std::nullptr_t)) {
-          // add None to the tuple
-          PyTuple_SET_ITEM(result_tuple.ptr(), counter++, py::none().release().ptr());
-          continue;
-        }
-        // Get the Python object from the entity
-        py::object in_obj = receiver_func(any_item, name, *this);
-
-        // Move the Python object into the tuple
-        PyTuple_SET_ITEM(result_tuple.ptr(), counter++, in_obj.release().ptr());
+py::object PyInputContext::receive_as_tuple(const std::string& name) {
+  auto maybe_any_result = receive<std::vector<std::any>>(name.c_str());
+  if (!maybe_any_result.has_value()) {
+    HOLOSCAN_LOG_ERROR("Unable to receive input (std::vector<std::any>) with name '{}'", name);
+    return py::none();
+  }
+  auto any_result = maybe_any_result.value();
+  if (any_result.empty()) { return py::make_tuple(); }
+
+  // Get receiver from registry based only on the type of the first element
+  auto& registry = holoscan::EmitterReceiverRegistry::get_instance();
+  const auto& receiver_func = registry.get_receiver(any_result[0].type());
+
+  py::tuple result_tuple(any_result.size());
+  int counter = 0;
+  try {
+    for (const auto& any_item : any_result) {
+      const auto& item_type = any_item.type();
+      if (item_type == typeid(kNoReceivedMessage) || item_type == typeid(std::nullptr_t)) {
+        PyTuple_SET_ITEM(result_tuple.ptr(), counter++, py::none().release().ptr());
+        continue;
       }
-    } catch (const std::bad_any_cast& e) {
-      HOLOSCAN_LOG_ERROR(
-          "Unable to receive input (std::vector<holoscan::gxf::Entity>) with name "
-          "'{}' ({})",
-          name,
-          e.what());
+      py::object in_obj = receiver_func(any_item, name, *this);
+      PyTuple_SET_ITEM(result_tuple.ptr(), counter++, in_obj.release().ptr());
     }
-    return result_tuple;
+  } catch (const std::bad_any_cast& e) {
+    HOLOSCAN_LOG_ERROR(
+        "Unable to receive input (std::vector<holoscan::gxf::Entity>) with name "
+        "'{}' ({})",
+        name,
+        e.what());
   }
+  return result_tuple;
+}
+
+py::object PyInputContext::receive_as_single(const std::string& name) {
   auto maybe_result = receive<std::any>(name.c_str());
   if (!maybe_result.has_value()) {
     HOLOSCAN_LOG_DEBUG("Unable to receive input (std::any) with name '{}'", name);
     return py::none();
   }
   auto result = maybe_result.value();
-  const auto& result_type = result.type();
   auto& registry = holoscan::EmitterReceiverRegistry::get_instance();
-  const auto& receiver_func = registry.get_receiver(result_type);
+  const auto& receiver_func = registry.get_receiver(result.type());
   return receiver_func(result, name, *this);
 }
 
+py::object PyInputContext::py_receive(const std::string& name, const std::string& kind) {
+  auto* py_op = py_op_.cast<PyOperator*>();
+  auto py_op_spec = py_op->py_shared_spec();
+
+  bool should_return_tuple = should_return_as_tuple(name, kind, py_op_spec);
+  if (should_return_tuple) { return receive_as_tuple(name); }
+  return receive_as_single(name);
+}
+
+bool PyOutputContext::handle_py_entity(py::object& data, const std::string& name,
+                                       int64_t acq_timestamp, EmitterReceiverRegistry& registry) {
+  if (py::isinstance<holoscan::PyEntity>(data)) {
+    HOLOSCAN_LOG_DEBUG("py_emit: emitting a holoscan::PyEntity");
+    const auto& emit_func = registry.get_emitter(typeid(holoscan::PyEntity));
+    emit_func(data, name, *this, acq_timestamp);
+    return true;
+  }
+  return false;
+}
+
+bool PyOutputContext::handle_py_dict(py::object& data, const std::string& name,
+                                     int64_t acq_timestamp, EmitterReceiverRegistry& registry) {
+  if (py::isinstance<py::dict>(data)) {
+    const auto& emit_func = registry.get_emitter(typeid(pybind11::dict));
+    emit_func(data, name, *this, acq_timestamp);
+    return true;
+  }
+  return false;
+}
+
+bool PyOutputContext::handle_holoviz_op(py::object& data, const std::string& name,
+                                        int64_t acq_timestamp, EmitterReceiverRegistry& registry) {
+  // Emit a sequence of HolovizOp.InputSpec as a C++ object without having to explicitly set
+  // emitter_name="std::vector<holoscan::ops::HolovizOp::InputSpec>" when calling emit.
+  if ((py::isinstance<py::list>(data) || py::isinstance<py::tuple>(data)) && py::len(data) > 0) {
+    auto seq = data.cast<py::sequence>();
+    if (py::isinstance<holoscan::ops::HolovizOp::InputSpec>(seq[0])) {
+      HOLOSCAN_LOG_DEBUG(
+          "py_emit: emitting a std::vector<holoscan::ops::HolovizOp::InputSpec> object");
+      const auto& emit_func =
+          registry.get_emitter(typeid(std::vector<holoscan::ops::HolovizOp::InputSpec>));
+      emit_func(data, name, *this, acq_timestamp);
+      return true;
+    }
+  }
+  return false;
+}
+
+bool PyOutputContext::check_distributed_app(const std::string& name) {
+  bool is_ucx_connector = false;
+  if (outputs_.find(name) != outputs_.end()) {
+    auto connector_type = outputs_.at(name)->connector_type();
+    is_ucx_connector = connector_type == IOSpec::ConnectorType::kUCX;
+  }
+
+  if (is_ucx_connector) { return true; }
+
+  // If this operator doesn't have a UCX connector, can still determine if the app is
+  // a multi-fragment app via the application pointer assigned to the fragment
+  auto* py_op = py_op_.cast<PyOperator*>();
+  auto py_op_spec = py_op->py_shared_spec();
+  auto* app_ptr = py_op_spec->fragment()->application();
+  if (app_ptr != nullptr) {
+    // a non-empty fragment graph means that the application is multi-fragment
+    if (!(app_ptr->fragment_graph().is_empty())) { return true; }
+  }
+  return false;
+}
+
+void PyOutputContext::emit_tensor_like_distributed(py::object& data, const std::string& name,
+                                                   int64_t acq_timestamp,
+                                                   EmitterReceiverRegistry& registry) {
+  HOLOSCAN_LOG_DEBUG("py_emit: emitting a tensor-like object over a UCX connector");
+  const auto& emit_func = registry.get_emitter(typeid(holoscan::Tensor));
+  emit_func(data, name, *this, acq_timestamp);
+}
+
+void PyOutputContext::emit_python_object(py::object& data, const std::string& name,
+                                         int64_t acq_timestamp, EmitterReceiverRegistry& registry) {
+  // Note: issue 4290043
+  // Instead of calling cloudpickle directly here to serialize to a string, we instead register
+  // a codec for type std::shared_ptr<GILGuardedPyObject> in this module, so that proper
+  // serialization will occur for distributed applications even in the case where an implicit
+  // broadcast codelet was inserted.
+  HOLOSCAN_LOG_DEBUG("py_emit: emitting a std::shared_ptr<GILGuardedPyObject>");
+  const auto& emit_func = registry.get_emitter(typeid(std::shared_ptr<GILGuardedPyObject>));
+  emit_func(data, name, *this, acq_timestamp);
+}
+
 void PyOutputContext::py_emit(py::object& data, const std::string& name,
                               const std::string& emitter_name, int64_t acq_timestamp) {
   // Note:: Issue 4206197
@@ -217,15 +310,16 @@ void PyOutputContext::py_emit(py::object& data, const std::string& name,
   // For this reason, we need to release the GIL before entity ref-count-related functions are
   // called.
 
-// avoid overhead of retrieving operator name for release builds
+  // avoid overhead of retrieving operator name for release builds
 #ifdef NDEBUG
-
 #else
   auto op_name = py_op_.attr("name").cast<std::string>();
   HOLOSCAN_LOG_DEBUG("py_emit (operator name={}, port name={}):", op_name, name);
 #endif
 
   auto& registry = holoscan::EmitterReceiverRegistry::get_instance();
+
+  // If the user specified emitter_name, emit using that
   if (!emitter_name.empty()) {
     HOLOSCAN_LOG_DEBUG("py_emit: emitting a {}", emitter_name);
     const auto& emit_func = registry.get_emitter(emitter_name);
@@ -234,85 +328,35 @@ void PyOutputContext::py_emit(py::object& data, const std::string& name,
   }
 
   // If this is a PyEntity emit a gxf::Entity so that it can be consumed by non-Python operator.
-  if (py::isinstance<holoscan::PyEntity>(data)) {
-    HOLOSCAN_LOG_DEBUG("py_emit: emitting a holoscan::PyEntity");
-    const auto& emit_func = registry.get_emitter(typeid(holoscan::PyEntity));
-    emit_func(data, name, *this, acq_timestamp);
-    return;
-  }
+  if (handle_py_entity(data, name, acq_timestamp, registry)) { return; }
 
   /// @todo Workaround for HolovizOp which expects a list of input specs.
   /// If we don't do the cast here the operator receives a python list object. There should be a
   /// generic way for this, or the operator needs to register expected types.
-  if (py::isinstance<py::list>(data) || py::isinstance<py::tuple>(data)) {
-    if (py::len(data) > 0) {
-      auto seq = data.cast<py::sequence>();
-      if (py::isinstance<holoscan::ops::HolovizOp::InputSpec>(seq[0])) {
-        HOLOSCAN_LOG_DEBUG(
-            "py_emit: emitting a std::vector<holoscan::ops::HolovizOp::InputSpec> object");
-        const auto& emit_func =
-            registry.get_emitter(typeid(std::vector<holoscan::ops::HolovizOp::InputSpec>));
-        emit_func(data, name, *this, acq_timestamp);
-        return;
-      }
-    }
-  }
+  if (handle_holoviz_op(data, name, acq_timestamp, registry)) { return; }
 
   // handle pybind11::dict separately from other Python types for special TensorMap treatment
-  if (py::isinstance<py::dict>(data)) {
-    const auto& emit_func = registry.get_emitter(typeid(pybind11::dict));
-    emit_func(data, name, *this, acq_timestamp);
-    return;
-  }
+  if (handle_py_dict(data, name, acq_timestamp, registry)) { return; }
 
-  bool is_ucx_connector = false;
-  if (outputs_.find(name) != outputs_.end()) {
-    auto connector_type = outputs_.at(name)->connector_type();
-    is_ucx_connector = connector_type == IOSpec::ConnectorType::kUCX;
-  }
-
-  bool is_distributed_app = false;
-  if (is_ucx_connector) {
-    is_distributed_app = true;
-  } else {
-    // If this operator doesn't have a UCX connector, can still determine if the app is
-    // a multi-fragment app via the application pointer assigned to the fragment.
-    auto* py_op = py_op_.cast<PyOperator*>();
-    auto py_op_spec = py_op->py_shared_spec();
-    auto* app_ptr = py_op_spec->fragment()->application();
-    if (app_ptr != nullptr) {
-      // a non-empty fragment graph means that the application is multi-fragment
-      if (!(app_ptr->fragment_graph().is_empty())) { is_distributed_app = true; }
-    }
-  }
+  bool is_distributed_app = check_distributed_app(name);
   HOLOSCAN_LOG_DEBUG("py_emit: detected {}distributed app", is_distributed_app ? "" : "non-");
-
-  // Note: issue 4290043
-  // For distributed applications, always convert tensor-like data to an entity containing a
-  // holoscan::Tensor. Previously this was only done on operators where `is_ucx_connector` was
-  // true, but that lead to a bug in cases where an implicit broadcast codelet was inserted at
-  // run time by the GXFExecutor. To ensure the UCX transmitter downstream of the broadcast
-  // will receive an entity containiner a holoscan::Tensor for any array-like object, we need to
-  // always make the conversion here. This would have additional overhead of entity creation for
-  // single fragment applications, where serialization of tensors is not necessary, so we guard
-  // this loop in an `is_distributed_app` condition. This way single fragment applications will
-  // still just directly pass the Python object.
   if (is_distributed_app && is_tensor_like(data)) {
-    HOLOSCAN_LOG_DEBUG("py_emit: emitting a tensor-like object over a UCX connector");
-    const auto& emit_func = registry.get_emitter(typeid(holoscan::Tensor));
-    emit_func(data, name, *this, acq_timestamp);
+    // Note: issue 4290043
+    // For distributed applications, always convert tensor-like data to an entity containing a
+    // holoscan::Tensor. Previously this was only done on operators where `is_ucx_connector` was
+    // true, but that lead to a bug in cases where an implicit broadcast codelet was inserted at
+    // run time by the GXFExecutor. To ensure the UCX transmitter downstream of the broadcast
+    // will receive an entity containing a holoscan::Tensor for any array-like object, we need to
+    // always make the conversion here. This would have additional overhead of entity creation for
+    // single fragment applications, where serialization of tensors is not necessary, so we guard
+    // this loop in an `is_distributed_app` condition. This way single fragment applications will
+    // still just directly pass the Python object.
+    emit_tensor_like_distributed(data, name, acq_timestamp, registry);
     return;
   }
 
   // Emit everything else as a Python object.
-  // Note: issue 4290043
-  // Instead of calling cloudpickle directly here to serialize to a string, we instead register
-  // a codec for type std::shared_ptr<GILGuardedPyObject> in this module, so that proper
-  // serialization will occur for distributed applications even in the case where an implicit
-  // broadcast codelet was inserted.
-  HOLOSCAN_LOG_DEBUG("py_emit: emitting a std::shared_ptr<GILGuardedPyObject>");
-  const auto& emit_func = registry.get_emitter(typeid(std::shared_ptr<GILGuardedPyObject>));
-  emit_func(data, name, *this, acq_timestamp);
+  emit_python_object(data, name, acq_timestamp, registry);
 }
 
 void init_io_context(py::module_& m) {
@@ -400,7 +444,6 @@ void init_io_context(py::module_& m) {
            "Return a reference to the static EmitterReceiverRegistry",
            py::return_value_policy::reference_internal);
 }
-// NOLINTEND(readability-function-cognitive-complexity)
 
 PyInputContext::PyInputContext(ExecutionContext* execution_context, Operator* op,
                                std::unordered_map<std::string, std::shared_ptr<IOSpec>>& inputs,
diff --git a/python/holoscan/core/io_context.hpp b/python/holoscan/core/io_context.hpp
index 9e4e1758..2288ef6d 100644
--- a/python/holoscan/core/io_context.hpp
+++ b/python/holoscan/core/io_context.hpp
@@ -41,6 +41,8 @@ namespace py = pybind11;
 
 namespace holoscan {
 
+class EmitterReceiverRegistry;  // Forward declaration
+
 void init_io_context(py::module_&);
 
 class PyInputContext : public gxf::GXFInputContext {
@@ -55,6 +57,12 @@ class PyInputContext : public gxf::GXFInputContext {
 
  private:
   py::object py_op_ = py::none();
+
+  /// @brief Receive data as a Python object from the specified input port
+  py::object receive_as_single(const std::string& name);
+
+  /// @brief Receive data as a tuple of Python objects from the specified input port
+  py::object receive_as_tuple(const std::string& name);
 };
 
 class PyOutputContext : public gxf::GXFOutputContext {
@@ -71,6 +79,29 @@ class PyOutputContext : public gxf::GXFOutputContext {
 
  private:
   py::object py_op_ = py::none();
+
+  /// @brief Handle emitting data if it is a PyEntity object
+  bool handle_py_entity(py::object& data, const std::string& name, int64_t acq_timestamp,
+                        EmitterReceiverRegistry& registry);
+
+  /// @brief Handle emitting data if it is a list or tuple of HolovizOp.InputSpec
+  bool handle_holoviz_op(py::object& data, const std::string& name, int64_t acq_timestamp,
+                         EmitterReceiverRegistry& registry);
+
+  /// @brief Handle emitting data if it is a Python dict
+  bool handle_py_dict(py::object& data, const std::string& name, int64_t acq_timestamp,
+                      EmitterReceiverRegistry& registry);
+
+  /// @brief Determine if the current operator belongs to a distributed application
+  bool check_distributed_app(const std::string& name);
+
+  /// @brief emit tensor-like Python objects for distributed applications
+  void emit_tensor_like_distributed(py::object& data, const std::string& name,
+                                    int64_t acq_timestamp, EmitterReceiverRegistry& registry);
+
+  /// @brief emit as a Python object
+  void emit_python_object(py::object& data, const std::string& name, int64_t acq_timestamp,
+                          EmitterReceiverRegistry& registry);
 };
 
 }  // namespace holoscan
diff --git a/python/holoscan/core/kwarg_handling.cpp b/python/holoscan/core/kwarg_handling.cpp
index 80eacabc..91bc355e 100644
--- a/python/holoscan/core/kwarg_handling.cpp
+++ b/python/holoscan/core/kwarg_handling.cpp
@@ -117,56 +117,72 @@ void set_vector_arg_via_numpy_array(const py::array& obj, Arg& out) {
   }
 }
 
-// NOLINTBEGIN(readability-function-cognitive-complexity)
+namespace {
+
+template <typename T>
+YAML::Node process_nested_sequence_as_node(const py::sequence& sequence) {
+  YAML::Node yaml_node = YAML::Load("[]");  // Create an empty sequence
+  for (const auto& item : sequence) {
+    YAML::Node inner_yaml_node = YAML::Load("[]");  // Create an empty sequence
+    for (const auto& inner_item : item) {
+      inner_yaml_node.push_back(cast_to_yaml_node<T>(inner_item));
+    }
+    if (inner_yaml_node.size() > 0) { yaml_node.push_back(inner_yaml_node); }
+  }
+  return yaml_node;
+}
+
+template <typename T>
+YAML::Node process_sequence_as_node(const py::sequence& sequence) {
+  YAML::Node yaml_node = YAML::Load("[]");  // Create an empty sequence
+  for (const auto& item : sequence) { yaml_node.push_back(cast_to_yaml_node<T>(item)); }
+  return yaml_node;
+}
+
+template <typename T>
+std::vector<T> process_shared_ptr_sequence(const py::sequence& sequence) {
+  std::vector<T> v;
+  size_t length = py::len(sequence);
+  v.reserve(length);
+  for (const auto& item : sequence) { v.push_back(item.cast<T>()); }
+  return v;
+}
+
+template <typename T>
+std::vector<std::vector<T>> process_nested_shared_ptr_sequence(const py::sequence& sequence) {
+  std::vector<std::vector<T>> v;
+  v.reserve(static_cast<size_t>(py::len(sequence)));
+  for (const auto& item : sequence) {
+    std::vector<T> vv;
+    vv.reserve(static_cast<size_t>(py::len(item)));
+    for (const auto& inner_item : item) { vv.push_back(inner_item.cast<T>()); }
+    v.push_back(vv);
+  }
+  return v;
+}
+}  // namespace
+
 template <typename T>
 void set_vector_arg_via_py_sequence(const py::sequence& seq, Arg& out) {
   // not intended for images or other large tensors, just
   // for short arrays containing parameter settings to operators/resources
 
+  auto first_item = seq[0];
   if constexpr (std::is_same_v<T, std::shared_ptr<Resource>> ||
                 std::is_same_v<T, std::shared_ptr<Condition>>) {
-    auto first_item = seq[0];
     if (py::isinstance<py::sequence>(first_item) && !py::isinstance<py::str>(first_item)) {
-      // Handle list of list and other sequence of sequence types.
-      std::vector<std::vector<T>> v;
-      v.reserve(static_cast<size_t>(py::len(seq)));
-      for (const auto& item : seq) {
-        std::vector<T> vv;
-        vv.reserve(static_cast<size_t>(py::len(item)));
-        for (const auto& inner_item : item) { vv.push_back(inner_item.cast<T>()); }
-        v.push_back(vv);
-      }
-      out = v;
+      out = process_nested_shared_ptr_sequence<T>(seq);
     } else {
-      // 1d vector to handle a sequence of elements
-      std::vector<T> v;
-      size_t length = py::len(seq);
-      v.reserve(length);
-      for (const auto& item : seq) { v.push_back(item.cast<T>()); }
-      out = v;
+      out = process_shared_ptr_sequence<T>(seq);
     }
   } else {
-    auto first_item = seq[0];
     if (py::isinstance<py::sequence>(first_item) && !py::isinstance<py::str>(first_item)) {
-      // Handle list of list and other sequence of sequence types.
-      YAML::Node yaml_node = YAML::Load("[]");  // Create an empty sequence
-      for (const auto& item : seq) {
-        YAML::Node inner_yaml_node = YAML::Load("[]");  // Create an empty sequence
-        for (const auto& inner_item : item) {
-          inner_yaml_node.push_back(cast_to_yaml_node<T>(inner_item));
-        }
-        if (inner_yaml_node.size() > 0) { yaml_node.push_back(inner_yaml_node); }
-      }
-      out = yaml_node;
+      out = process_nested_sequence_as_node<T>(seq);
     } else {
-      // 1d vector to handle a sequence of elements
-      YAML::Node yaml_node = YAML::Load("[]");  // Create an empty sequence
-      for (const auto& item : seq) { yaml_node.push_back(cast_to_yaml_node<T>(item)); }
-      out = yaml_node;
+      out = process_sequence_as_node<T>(seq);
     }
   }
 }
-// NOLINTEND(readability-function-cognitive-complexity)
 
 void set_vector_arg_via_iterable(const py::object& obj, Arg& out) {
   py::sequence seq;
diff --git a/python/holoscan/core/metadata.cpp b/python/holoscan/core/metadata.cpp
index 9cf287c5..977ff398 100644
--- a/python/holoscan/core/metadata.cpp
+++ b/python/holoscan/core/metadata.cpp
@@ -22,6 +22,7 @@
 #include <cstdint>
 #include <memory>
 #include <string>
+#include <unordered_map>
 #include <utility>
 #include <vector>
 
@@ -245,119 +246,147 @@ void py_object_to_metadata_object(MetadataObject& meta_obj, const py::object& va
   }
 }
 
-// NOLINTBEGIN(readability-function-cognitive-complexity)
+using CastFunction = std::function<py::object(const std::any&)>;
+
+/// @brief Cast the value stored in C++ MetadataObject to a Python Object
 py::object metadata_obj_to_pyobject(MetadataObject& meta_obj) {
+  static const std::unordered_map<std::type_index, CastFunction> cast_map = {
+      // Return a Python objects as-is.
+      {typeid(std::shared_ptr<GILGuardedPyObject>),
+       [](const std::any& value) {
+         return std::any_cast<std::shared_ptr<GILGuardedPyObject>>(value)->obj();
+       }},
+      // For C++ types, this function currently supports casting T, vector<T>, and
+      // vector<vector<<T>> types where T is either std::string, bool or various integer or floating
+      // point types.
+
+      // Handle scalar types
+      {typeid(std::string),
+       [](const std::any& value) { return py::cast(std::any_cast<std::string>(value)); }},
+      {typeid(float), [](const std::any& value) { return py::cast(std::any_cast<float>(value)); }},
+      {typeid(double),
+       [](const std::any& value) { return py::cast(std::any_cast<double>(value)); }},
+      {typeid(bool), [](const std::any& value) { return py::cast(std::any_cast<bool>(value)); }},
+      {typeid(int64_t),
+       [](const std::any& value) { return py::cast(std::any_cast<int64_t>(value)); }},
+      {typeid(uint64_t),
+       [](const std::any& value) { return py::cast(std::any_cast<uint64_t>(value)); }},
+      {typeid(int32_t),
+       [](const std::any& value) { return py::cast(std::any_cast<int32_t>(value)); }},
+      {typeid(uint32_t),
+       [](const std::any& value) { return py::cast(std::any_cast<uint32_t>(value)); }},
+      {typeid(int16_t),
+       [](const std::any& value) { return py::cast(std::any_cast<int16_t>(value)); }},
+      {typeid(uint16_t),
+       [](const std::any& value) { return py::cast(std::any_cast<uint16_t>(value)); }},
+      {typeid(int8_t),
+       [](const std::any& value) { return py::cast(std::any_cast<int8_t>(value)); }},
+      {typeid(uint8_t),
+       [](const std::any& value) { return py::cast(std::any_cast<uint8_t>(value)); }},
+      {typeid(std::complex<float>),
+       [](const std::any& value) { return py::cast(std::any_cast<std::complex<float>>(value)); }},
+      {typeid(std::complex<double>),
+       [](const std::any& value) { return py::cast(std::any_cast<std::complex<double>>(value)); }},
+      // Handle std::vector<T> types
+      {typeid(std::vector<std::string>),
+       [](const std::any& value) {
+         return py::cast(std::any_cast<std::vector<std::string>>(value));
+       }},
+      {typeid(std::vector<float>),
+       [](const std::any& value) { return py::cast(std::any_cast<std::vector<float>>(value)); }},
+      {typeid(std::vector<double>),
+       [](const std::any& value) { return py::cast(std::any_cast<std::vector<double>>(value)); }},
+      {typeid(std::vector<bool>),
+       [](const std::any& value) { return py::cast(std::any_cast<std::vector<bool>>(value)); }},
+      {typeid(std::vector<int64_t>),
+       [](const std::any& value) { return py::cast(std::any_cast<std::vector<int64_t>>(value)); }},
+      {typeid(std::vector<uint64_t>),
+       [](const std::any& value) { return py::cast(std::any_cast<std::vector<uint64_t>>(value)); }},
+      {typeid(std::vector<int32_t>),
+       [](const std::any& value) { return py::cast(std::any_cast<std::vector<int32_t>>(value)); }},
+      {typeid(std::vector<uint32_t>),
+       [](const std::any& value) { return py::cast(std::any_cast<std::vector<uint32_t>>(value)); }},
+      {typeid(std::vector<int16_t>),
+       [](const std::any& value) { return py::cast(std::any_cast<std::vector<int16_t>>(value)); }},
+      {typeid(std::vector<uint16_t>),
+       [](const std::any& value) { return py::cast(std::any_cast<std::vector<uint16_t>>(value)); }},
+      {typeid(std::vector<int8_t>),
+       [](const std::any& value) { return py::cast(std::any_cast<std::vector<int8_t>>(value)); }},
+      {typeid(std::vector<uint8_t>),
+       [](const std::any& value) { return py::cast(std::any_cast<std::vector<uint8_t>>(value)); }},
+      {typeid(std::vector<std::complex<float>>),
+       [](const std::any& value) {
+         return py::cast(std::any_cast<std::vector<std::complex<float>>>(value));
+       }},
+      {typeid(std::vector<std::complex<double>>),
+       [](const std::any& value) {
+         return py::cast(std::any_cast<std::vector<std::complex<double>>>(value));
+       }},
+      // Handle std::vector<std::vector<T>> types
+      {typeid(std::vector<std::vector<std::string>>),
+       [](const std::any& value) {
+         return py::cast(std::any_cast<std::vector<std::vector<std::string>>>(value));
+       }},
+      {typeid(std::vector<std::vector<float>>),
+       [](const std::any& value) {
+         return py::cast(std::any_cast<std::vector<std::vector<float>>>(value));
+       }},
+      {typeid(std::vector<std::vector<double>>),
+       [](const std::any& value) {
+         return py::cast(std::any_cast<std::vector<std::vector<double>>>(value));
+       }},
+      {typeid(std::vector<std::vector<bool>>),
+       [](const std::any& value) {
+         return py::cast(std::any_cast<std::vector<std::vector<bool>>>(value));
+       }},
+      {typeid(std::vector<std::vector<int64_t>>),
+       [](const std::any& value) {
+         return py::cast(std::any_cast<std::vector<std::vector<int64_t>>>(value));
+       }},
+      {typeid(std::vector<std::vector<uint64_t>>),
+       [](const std::any& value) {
+         return py::cast(std::any_cast<std::vector<std::vector<uint64_t>>>(value));
+       }},
+      {typeid(std::vector<std::vector<int32_t>>),
+       [](const std::any& value) {
+         return py::cast(std::any_cast<std::vector<std::vector<int32_t>>>(value));
+       }},
+      {typeid(std::vector<std::vector<uint32_t>>),
+       [](const std::any& value) {
+         return py::cast(std::any_cast<std::vector<std::vector<uint32_t>>>(value));
+       }},
+      {typeid(std::vector<std::vector<int16_t>>),
+       [](const std::any& value) {
+         return py::cast(std::any_cast<std::vector<std::vector<int16_t>>>(value));
+       }},
+      {typeid(std::vector<std::vector<uint16_t>>),
+       [](const std::any& value) {
+         return py::cast(std::any_cast<std::vector<std::vector<uint16_t>>>(value));
+       }},
+      {typeid(std::vector<std::vector<int8_t>>),
+       [](const std::any& value) {
+         return py::cast(std::any_cast<std::vector<std::vector<int8_t>>>(value));
+       }},
+      {typeid(std::vector<std::vector<uint8_t>>),
+       [](const std::any& value) {
+         return py::cast(std::any_cast<std::vector<std::vector<uint8_t>>>(value));
+       }},
+      {typeid(std::vector<std::vector<std::complex<float>>>),
+       [](const std::any& value) {
+         return py::cast(std::any_cast<std::vector<std::vector<std::complex<float>>>>(value));
+       }},
+      {typeid(std::vector<std::vector<std::complex<double>>>), [](const std::any& value) {
+         return py::cast(std::any_cast<std::vector<std::vector<std::complex<double>>>>(value));
+       }}};
+
   std::any value = meta_obj.value();
   const auto& id = value.type();
-  // Return a Python objects as-is.
-  if (id == typeid(std::shared_ptr<GILGuardedPyObject>)) {
-    return std::any_cast<std::shared_ptr<GILGuardedPyObject>>(value)->obj();
-  }
-  // For C++ types, support casting T, vector<T>, and vector<vector<<T>> types
-  // where T is either std::string, bool or various integer or floating point types.
-  if (id == typeid(std::string)) { return py::cast(std::any_cast<std::string>(value)); }
-  if (id == typeid(float)) { return py::cast(std::any_cast<float>(value)); }
-  if (id == typeid(double)) { return py::cast(std::any_cast<double>(value)); }
-  if (id == typeid(bool)) { return py::cast(std::any_cast<bool>(value)); }
-  if (id == typeid(int64_t)) { return py::cast(std::any_cast<int64_t>(value)); }
-  if (id == typeid(uint64_t)) { return py::cast(std::any_cast<uint64_t>(value)); }
-  if (id == typeid(int32_t)) { return py::cast(std::any_cast<int32_t>(value)); }
-  if (id == typeid(uint32_t)) { return py::cast(std::any_cast<uint32_t>(value)); }
-  if (id == typeid(int16_t)) { return py::cast(std::any_cast<int16_t>(value)); }
-  if (id == typeid(uint16_t)) { return py::cast(std::any_cast<uint16_t>(value)); }
-  if (id == typeid(int8_t)) { return py::cast(std::any_cast<int8_t>(value)); }
-  if (id == typeid(uint8_t)) { return py::cast(std::any_cast<uint8_t>(value)); }
-  if (id == typeid(std::complex<float>)) {
-    return py::cast(std::any_cast<std::complex<float>>(value));
-  }
-  if (id == typeid(std::complex<double>)) {
-    return py::cast(std::any_cast<std::complex<double>>(value));
-  }
-  if (id == typeid(std::vector<std::string>)) {
-    return py::cast(std::any_cast<std::vector<std::string>>(value));
-  }
-  if (id == typeid(std::vector<float>)) {
-    return py::cast(std::any_cast<std::vector<float>>(value));
-  }
-  if (id == typeid(std::vector<double>)) {
-    return py::cast(std::any_cast<std::vector<double>>(value));
-  }
-  if (id == typeid(std::vector<bool>)) { return py::cast(std::any_cast<std::vector<bool>>(value)); }
-  if (id == typeid(std::vector<int64_t>)) {
-    return py::cast(std::any_cast<std::vector<int64_t>>(value));
-  }
-  if (id == typeid(std::vector<uint64_t>)) {
-    return py::cast(std::any_cast<std::vector<uint64_t>>(value));
-  }
-  if (id == typeid(std::vector<int32_t>)) {
-    return py::cast(std::any_cast<std::vector<int32_t>>(value));
-  }
-  if (id == typeid(std::vector<uint32_t>)) {
-    return py::cast(std::any_cast<std::vector<uint32_t>>(value));
-  }
-  if (id == typeid(std::vector<int16_t>)) {
-    return py::cast(std::any_cast<std::vector<int16_t>>(value));
-  }
-  if (id == typeid(std::vector<uint16_t>)) {
-    return py::cast(std::any_cast<std::vector<uint16_t>>(value));
-  }
-  if (id == typeid(std::vector<int8_t>)) {
-    return py::cast(std::any_cast<std::vector<int8_t>>(value));
-  }
-  if (id == typeid(std::vector<uint8_t>)) {
-    return py::cast(std::any_cast<std::vector<uint8_t>>(value));
-  }
-  if (id == typeid(std::vector<std::complex<float>>)) {
-    return py::cast(std::any_cast<std::vector<std::complex<float>>>(value));
-  }
-  if (id == typeid(std::vector<std::complex<double>>)) {
-    return py::cast(std::any_cast<std::vector<std::complex<double>>>(value));
-  }
-  if (id == typeid(std::vector<std::vector<std::string>>)) {
-    return py::cast(std::any_cast<std::vector<std::vector<std::string>>>(value));
-  }
-  if (id == typeid(std::vector<std::vector<float>>)) {
-    return py::cast(std::any_cast<std::vector<std::vector<float>>>(value));
-  }
-  if (id == typeid(std::vector<std::vector<double>>)) {
-    return py::cast(std::any_cast<std::vector<std::vector<double>>>(value));
-  }
-  if (id == typeid(std::vector<std::vector<bool>>)) {
-    return py::cast(std::any_cast<std::vector<std::vector<bool>>>(value));
-  }
-  if (id == typeid(std::vector<std::vector<int64_t>>)) {
-    return py::cast(std::any_cast<std::vector<std::vector<int64_t>>>(value));
-  }
-  if (id == typeid(std::vector<std::vector<uint64_t>>)) {
-    return py::cast(std::any_cast<std::vector<std::vector<uint64_t>>>(value));
-  }
-  if (id == typeid(std::vector<std::vector<int32_t>>)) {
-    return py::cast(std::any_cast<std::vector<std::vector<int32_t>>>(value));
-  }
-  if (id == typeid(std::vector<std::vector<uint32_t>>)) {
-    return py::cast(std::any_cast<std::vector<std::vector<uint32_t>>>(value));
-  }
-  if (id == typeid(std::vector<std::vector<int16_t>>)) {
-    return py::cast(std::any_cast<std::vector<std::vector<int16_t>>>(value));
-  }
-  if (id == typeid(std::vector<std::vector<uint16_t>>)) {
-    return py::cast(std::any_cast<std::vector<std::vector<uint16_t>>>(value));
-  }
-  if (id == typeid(std::vector<std::vector<int8_t>>)) {
-    return py::cast(std::any_cast<std::vector<std::vector<int8_t>>>(value));
-  }
-  if (id == typeid(std::vector<std::vector<uint8_t>>)) {
-    return py::cast(std::any_cast<std::vector<std::vector<uint8_t>>>(value));
-  }
-  if (id == typeid(std::vector<std::vector<std::complex<float>>>)) {
-    return py::cast(std::any_cast<std::vector<std::vector<std::complex<float>>>>(value));
-  }
-  if (id == typeid(std::vector<std::vector<std::complex<double>>>)) {
-    return py::cast(std::any_cast<std::vector<std::vector<std::complex<double>>>>(value));
-  }
+
+  auto it = cast_map.find(id);
+  if (it != cast_map.end()) { return it->second(value); }
+
   return py::none();
 }
-// NOLINTEND(readability-function-cognitive-complexity)
 
 void init_metadata(py::module_& m) {
   py::class_<MetaNoneValue>(m, "MetaNoneValue").def(py::init<>());
diff --git a/python/holoscan/core/operator.cpp b/python/holoscan/core/operator.cpp
index 5745d47f..77a7c4f7 100644
--- a/python/holoscan/core/operator.cpp
+++ b/python/holoscan/core/operator.cpp
@@ -47,6 +47,15 @@ namespace py = pybind11;
 namespace holoscan {
 
 void init_operator(py::module_& m) {
+  py::class_<MultiMessageConditionInfo, std::shared_ptr<MultiMessageConditionInfo>>(
+      m,
+      "MultiMessageConditionInfo",
+      R"doc(Information associated with a multi-message condition.)doc")
+      .def(py::init<>())
+      .def_readwrite("kind", &MultiMessageConditionInfo::kind)
+      .def_readwrite("port_names", &MultiMessageConditionInfo::port_names)
+      .def_readwrite("args", &MultiMessageConditionInfo::args);
+
   py::class_<OperatorSpec, ComponentSpec, std::shared_ptr<OperatorSpec>>(
       m, "OperatorSpec", R"doc(Operator specification class.)doc")
       .def(py::init<Fragment*>(), "fragment"_a, doc::OperatorSpec::doc_OperatorSpec)
@@ -56,20 +65,21 @@ void init_operator(py::module_& m) {
            py::return_value_policy::reference_internal)
       .def(
           "input",
-          // Note: The return type needs to be specified explicitly because pybind11 can't deduce
+          // Note: The return type needs to be specified explicitly because pybind11 can't
+          // deduce
           //       it.
-          //       Otherwise, this method will return a new IOSpec object instead of a reference to
-          //       the existing one.
-          [](OperatorSpec& op, const std::string& name, const py::object& size) -> IOSpec& {
+          //       Otherwise, this method will return a new IOSpec object instead of a reference
+          //       to the existing one.
+          [](OperatorSpec& spec, const std::string& name, const py::object& size) -> IOSpec& {
             // Check if 'size' is an int and convert to IOSpec::IOSize if necessary
             if (py::isinstance<py::int_>(size)) {
               auto size_int = size.cast<int>();
               // Assuming IOSpec::IOSize can be constructed from an int
-              return op.input<gxf::Entity>(name, IOSpec::IOSize(size_int));
+              return spec.input<gxf::Entity>(name, IOSpec::IOSize(size_int));
             }
             if (py::isinstance<IOSpec::IOSize>(size)) {
               // Directly pass IOSpec::IOSize if 'size' is already the correct type
-              return op.input<gxf::Entity>(name, size.cast<IOSpec::IOSize>());
+              return spec.input<gxf::Entity>(name, size.cast<IOSpec::IOSize>());
             }
             throw std::runtime_error(
                 "Invalid type for 'size'. Expected 'int' or 'holoscan.core.IOSpec.IOSize'.");
@@ -92,6 +102,45 @@ void init_operator(py::module_& m) {
            "name"_a,
            doc::OperatorSpec::doc_output_kwargs,
            py::return_value_policy::reference_internal)
+      .def(
+          "multi_port_condition",
+          [](OperatorSpec& spec,
+             ConditionType type,
+             const std::vector<std::string>& port_names,
+             const py::kwargs& kwargs) {
+            // special handling of str -> YAML::Node conversion for sampling_mode argument
+            ArgList extra_args{};
+            for (const auto& [name, handle] : kwargs) {
+              auto arg_name = name.cast<std::string>();
+              auto arg_value = handle.cast<py::object>();
+              if (arg_name == std::string("sampling_mode")) {
+                if (py::isinstance<py::str>(arg_value)) {
+                  auto mode_str = arg_value.cast<std::string>();
+                  if (mode_str == "SumOfAll") {
+                    extra_args.add(Arg("sampling_mode", YAML::Node("SumOfAll")));
+                  } else if (mode_str == "PerReceiver") {
+                    extra_args.add(Arg("sampling_mode", YAML::Node("PerReceiver")));
+                  } else {
+                    throw std::runtime_error("Invalid sampling mode: " + mode_str);
+                  }
+                } else {
+                  throw std::runtime_error("Invalid type for 'sampling_mode'. Expected 'str'.");
+                }
+                kwargs.attr("pop")(arg_name);
+              }
+            }
+            // automatically convert the remaining arguments
+            ArgList args = kwargs_to_arglist(kwargs);
+            // append any arguments such as sampling_mode that were handled separately
+            args.add(extra_args);
+            return spec.multi_port_condition(type, port_names, args);
+          },
+          "kind"_a,
+          "port_names"_a,
+          doc::OperatorSpec::doc_multi_port_condition)
+      .def("multi_port_conditions",
+           &OperatorSpec::multi_port_conditions,
+           doc::OperatorSpec::doc_multi_port_conditions)
       .def_property_readonly("outputs",
                              &OperatorSpec::outputs,
                              doc::OperatorSpec::doc_outputs,
diff --git a/python/holoscan/core/operator_pydoc.hpp b/python/holoscan/core/operator_pydoc.hpp
index b671e5d4..faa4a7f3 100644
--- a/python/holoscan/core/operator_pydoc.hpp
+++ b/python/holoscan/core/operator_pydoc.hpp
@@ -108,6 +108,25 @@ PYDOC(outputs, R"doc(
 Return the reference of the output port map.
 )doc")
 
+PYDOC(multi_port_condition, R"doc(
+Add a Condition that depends on the status of multiple input ports.
+
+Parameters
+----------
+port_names : The names of the input ports this condition will apply to.
+kwargs : dict or holoscan::ArgList
+    Additional arguments to pass to the multi-message condition.
+)doc")
+
+PYDOC(multi_port_conditions, R"doc(
+Returns a list of multi-message conditions associated with the operator.
+
+Parameters
+----------
+conditions : list of holoscan.core.MultiMessageConditionInfo
+    The list of info structdors for the multi-message conditions associated with the operator.
+)doc")
+
 PYDOC(param, R"doc(
 Add a parameter to the specification.
 
diff --git a/python/holoscan/core/tensor.cpp b/python/holoscan/core/tensor.cpp
index 36c2574f..ab0a2707 100644
--- a/python/holoscan/core/tensor.cpp
+++ b/python/holoscan/core/tensor.cpp
@@ -368,14 +368,9 @@ py::object PyTensor::from_dlpack_pyobj(const py::object& obj) {
   return py_tensor;
 }
 
-// NOLINTBEGIN(readability-function-cognitive-complexity)
-std::shared_ptr<PyTensor> PyTensor::from_array_interface(const py::object& obj, bool cuda) {
-  auto memory_buf = std::make_shared<ArrayInterfaceMemoryBuffer>();
-  memory_buf->obj_ref = obj;  // hold obj to prevent it from being garbage collected
-
-  const char* interface_name = cuda ? "__cuda_array_interface__" : "__array_interface__";
-  auto array_interface = obj.attr(interface_name).cast<py::dict>();
-
+DLTensor init_dl_tensor_from_interface(
+    const std::shared_ptr<ArrayInterfaceMemoryBuffer>& memory_buf, const py::dict& array_interface,
+    bool cuda) {
   // Process mandatory entries
   memory_buf->dl_shape = array_interface["shape"].cast<std::vector<int64_t>>();
   auto& shape = memory_buf->dl_shape;
@@ -433,47 +428,49 @@ std::shared_ptr<PyTensor> PyTensor::from_array_interface(const py::object& obj,
   local_dl_tensor.strides = strides.data();
 
   // We do not process 'descr', 'mask', and 'offset' entries
+  return local_dl_tensor;
+}
 
+void process_array_interface_stream(const py::object& stream_obj) {
+  int64_t stream_id = 1;  // legacy default stream
+  cudaStream_t stream_ptr = nullptr;
+  if (stream_obj.is_none()) {
+    stream_id = -1;
+  } else {
+    stream_id = stream_obj.cast<int64_t>();
+  }
+  if (stream_id < -1) {
+    throw std::runtime_error(
+        "Invalid stream, valid stream should be  None (no synchronization), 1 (legacy default "
+        "stream), 2 "
+        "(per-thread defaultstream), or a positive integer (stream pointer)");
+  }
+  if (stream_id > 2) {
+    // NOLINTNEXTLINE(performance-no-int-to-ptr,cppcoreguidelines-pro-type-reinterpret-cast)
+    stream_ptr = reinterpret_cast<cudaStream_t>(stream_id);
+  }
+
+  // Wait for the current stream to finish before the provided stream starts consuming the memory.
+  cudaStream_t curr_stream_ptr = nullptr;  // legacy stream
+  if (stream_id >= 0 && curr_stream_ptr != stream_ptr) {
+    synchronize_streams(curr_stream_ptr, stream_ptr);
+  }
+}
+
+std::shared_ptr<PyTensor> PyTensor::from_array_interface(const py::object& obj, bool cuda) {
+  auto memory_buf = std::make_shared<ArrayInterfaceMemoryBuffer>();
+  memory_buf->obj_ref = obj;  // hold obj to prevent it from being garbage collected
+
+  const char* interface_name = cuda ? "__cuda_array_interface__" : "__array_interface__";
+  auto array_interface = obj.attr(interface_name).cast<py::dict>();
+
+  DLTensor local_dl_tensor = init_dl_tensor_from_interface(memory_buf, array_interface, cuda);
   if (cuda) {
-    // Process 'stream' entry
+    // determine stream and synchronize it with the default stream if necessary
     py::object stream_obj = py::none();
-    if (array_interface.contains("stream")) { stream_obj = array_interface["stream"]; }
-
-    int64_t stream_id = 1;  // legacy default stream
-    cudaStream_t stream_ptr = nullptr;
-    if (stream_obj.is_none()) {
-      stream_id = -1;
-    } else {
-      stream_id = stream_obj.cast<int64_t>();
-    }
-    if (stream_id < -1) {
-      throw std::runtime_error(
-          "Invalid stream, valid stream should be  None (no synchronization), 1 (legacy default "
-          "stream), 2 "
-          "(per-thread defaultstream), or a positive integer (stream pointer)");
-    }
-    if (stream_id <= 2) {
-      stream_ptr = nullptr;
-    } else {
-      // NOLINTNEXTLINE(performance-no-int-to-ptr,cppcoreguidelines-pro-type-reinterpret-cast)
-      stream_ptr = reinterpret_cast<cudaStream_t>(stream_id);
-    }
-
-    cudaStream_t curr_stream_ptr = nullptr;  // legacy stream
-
-    if (stream_id >= 0 && curr_stream_ptr != stream_ptr) {
-      cudaEvent_t curr_stream_event{};
-      HOLOSCAN_CUDA_CALL_THROW_ERROR(
-          cudaEventCreateWithFlags(&curr_stream_event, cudaEventDisableTiming),
-          "Failure during call to cudaEventCreateWithFlags");
-      HOLOSCAN_CUDA_CALL_THROW_ERROR(cudaEventRecord(curr_stream_event, stream_ptr),
-                                     "Failure during call to cudaEventRecord");
-      // Make current stream (curr_stream_ptr) to wait until the given stream (stream_ptr)
-      // is finished. This is a reverse of py_dlpack() method.
-      HOLOSCAN_CUDA_CALL_THROW_ERROR(cudaStreamWaitEvent(curr_stream_ptr, curr_stream_event, 0),
-                                     "Failure during call to cudaStreamWaitEvent");
-      HOLOSCAN_CUDA_CALL_THROW_ERROR(cudaEventDestroy(curr_stream_event),
-                                     "Failure during call to cudaEventDestroy");
+    if (array_interface.contains("stream")) {
+      stream_obj = array_interface["stream"];
+      process_array_interface_stream(stream_obj);
     }
   }
   // Create DLManagedTensor object
@@ -503,7 +500,6 @@ std::shared_ptr<PyTensor> PyTensor::from_array_interface(const py::object& obj,
 
   return tensor;
 }
-// NOLINTEND(readability-function-cognitive-complexity)
 
 std::shared_ptr<PyTensor> PyTensor::from_dlpack(const py::object& obj) {
   // Pybind11 doesn't have a way to get/set a pointer with a name so we have to use the C API
diff --git a/python/holoscan/gxf/__init__.py b/python/holoscan/gxf/__init__.py
index 7c3ef440..3e188bd8 100644
--- a/python/holoscan/gxf/__init__.py
+++ b/python/holoscan/gxf/__init__.py
@@ -26,6 +26,7 @@
     holoscan.gxf.GXFOutputContext
     holoscan.gxf.GXFResource
     holoscan.gxf.GXFScheduler
+    holoscan.gxf.GXFSystemResourceBase
 """
 
 from ._gxf import (  # noqa: I001
@@ -38,6 +39,7 @@
     GXFOutputContext,
     GXFResource,
     GXFScheduler,
+    GXFSystemResourceBase,
 )
 from ._gxf import PyEntity as Entity
 from ._gxf import load_extensions
@@ -53,5 +55,6 @@
     "GXFOutputContext",
     "GXFResource",
     "GXFScheduler",
+    "GXFSystemResourceBase",
     "load_extensions",
 ]
diff --git a/python/holoscan/gxf/gxf.cpp b/python/holoscan/gxf/gxf.cpp
index 965f80f1..edaf659c 100644
--- a/python/holoscan/gxf/gxf.cpp
+++ b/python/holoscan/gxf/gxf.cpp
@@ -124,6 +124,23 @@ PYBIND11_MODULE(_gxf, m) {
           },
           R"doc(Return repr(self).)doc");
 
+  py::class_<gxf::GXFSystemResourceBase,
+             gxf::GXFResource,
+             Resource,
+             gxf::GXFComponent,
+             std::shared_ptr<gxf::GXFSystemResourceBase>>(
+      m, "GXFSystemResourceBase", doc::GXFSystemResourceBase::doc_GXFSystemResourceBase)
+      .def(py::init<>(), doc::GXFSystemResourceBase::doc_GXFSystemResourceBase)
+      .def(
+          "__repr__",
+          [](const py::object& obj) {
+            // use py::object and obj.cast to avoid a segfault if object has not been initialized
+            auto resource = obj.cast<std::shared_ptr<gxf::GXFSystemResourceBase>>();
+            if (resource) { return resource->description(); }
+            return std::string("<GXFSystemResourceBase: None>");
+          },
+          R"doc(Return repr(self).)doc");
+
   py::class_<gxf::GXFCondition, Condition, gxf::GXFComponent, std::shared_ptr<gxf::GXFCondition>>(
       m, "GXFCondition", doc::GXFCondition::doc_GXFCondition)
       .def(py::init<>(), doc::GXFCondition::doc_GXFCondition)
diff --git a/python/holoscan/gxf/gxf_operator.cpp b/python/holoscan/gxf/gxf_operator.cpp
index aab1704f..e258a913 100644
--- a/python/holoscan/gxf/gxf_operator.cpp
+++ b/python/holoscan/gxf/gxf_operator.cpp
@@ -65,12 +65,16 @@ void init_gxf_operator(py::module_& m) {
                     py::overload_cast<>(&ops::GXFOperator::gxf_cid, py::const_),
                     py::overload_cast<gxf_uid_t>(&ops::GXFOperator::gxf_cid),
                     doc::GXFOperator::doc_gxf_cid)
+      .def_property_readonly("gxf_entity_group_name",
+                             &ops::GXFOperator::gxf_entity_group_name,
+                             doc::GXFOperator::doc_gxf_entity_group_name)
       .def_property_readonly(
           "description", &ops::GXFOperator::description, doc::GXFOperator::doc_description)
       .def(
           "__repr__",
           [](const py::object& obj) {
-            // use py::object and obj.cast to avoid a segfault if object has not been initialized
+            // use py::object and obj.cast to avoid a segfault if object has not been
+            // initialized
             auto op = obj.cast<std::shared_ptr<ops::GXFOperator>>();
             if (op) { return op->description(); }
             return std::string("<GXFOperator: None>");
diff --git a/python/holoscan/gxf/gxf_operator_pydoc.hpp b/python/holoscan/gxf/gxf_operator_pydoc.hpp
index 4b6a5537..af624d75 100644
--- a/python/holoscan/gxf/gxf_operator_pydoc.hpp
+++ b/python/holoscan/gxf/gxf_operator_pydoc.hpp
@@ -61,6 +61,15 @@ PYDOC(gxf_cid, R"doc(
 The GXF component ID.
 )doc")
 
+PYDOC(gxf_entity_group_name, R"doc(
+The name of the GXF EntityGroup containing this operator.
+
+Returns
+-------
+str
+    The entity group name.
+)doc")
+
 PYDOC(description, R"doc(
 YAML formatted string describing the operator.
 )doc")
diff --git a/python/holoscan/gxf/gxf_pydoc.hpp b/python/holoscan/gxf/gxf_pydoc.hpp
index daf25a45..93899b94 100644
--- a/python/holoscan/gxf/gxf_pydoc.hpp
+++ b/python/holoscan/gxf/gxf_pydoc.hpp
@@ -98,6 +98,31 @@ Parameters
 
 }  // namespace GXFResource
 
+namespace GXFSystemResourceBase {
+
+// Constructor
+PYDOC(GXFSystemResourceBase, R"doc(
+A class considered a GXF nvidia::gxf::Resource.
+
+This represents a resource such as a ThreadPool or GPUDevice that may be shared amongst multiple
+entities in an nvidia::gxf::EntityGroup.
+
+)doc")
+
+PYDOC(GXFSystemResourceBase_kwargs, R"doc(
+A class considered a GXF nvidia::gxf::Resource.
+
+This represents a resource such as a ThreadPool or GPUDevice that may be shared amongst multiple
+entities in an nvidia::gxf::EntityGroup.
+
+Parameters
+----------
+**kwargs : dict
+    Keyword arguments to pass on to the parent resource class.
+)doc")
+
+}  // namespace GXFSystemResourceBase
+
 namespace GXFInputContext {
 
 // Constructor
diff --git a/python/holoscan/operators/holoviz/__init__.py b/python/holoscan/operators/holoviz/__init__.py
index 3e578ac9..78b9f9f9 100644
--- a/python/holoscan/operators/holoviz/__init__.py
+++ b/python/holoscan/operators/holoviz/__init__.py
@@ -17,6 +17,9 @@
 
 from collections.abc import MutableMapping, Sequence
 
+# BooleanCondition, Allocator, CudaStreamPool are all used as argument types so have to be imported
+# before HolovizOp's __init__() can be called.
+from holoscan.conditions import BooleanCondition  # noqa: F401
 from holoscan.core import IOSpec, io_type_registry
 from holoscan.resources import Allocator, CudaStreamPool, UnboundedAllocator  # noqa: F401
 
@@ -162,6 +165,7 @@ def __init__(
         window_size_callback=None,
         font_path="",
         cuda_stream_pool=None,
+        window_close_condition=None,
         name="holoviz_op",
     ):
         if allocator is None:
@@ -370,7 +374,6 @@ def __init__(
             ispec.views = tensor.get("views", [])
 
             tensor_input_specs.append(ispec)
-
         super().__init__(
             fragment,
             *args,
@@ -405,17 +408,10 @@ def __init__(
             window_size_callback=window_size_callback,
             font_path=font_path,
             cuda_stream_pool=cuda_stream_pool,
+            window_close_condition=window_close_condition,
             name=name,
         )
 
-    InputSpec = _HolovizOp.InputSpec
-    InputType = _HolovizOp.InputType
-    ImageFormat = _HolovizOp.ImageFormat
-    YuvModelConversion = _HolovizOp.YuvModelConversion
-    YuvRange = _HolovizOp.YuvRange
-    ChromaLocation = _HolovizOp.ChromaLocation
-    DepthMapRenderMode = _HolovizOp.DepthMapRenderMode
-
 
 # copy docstrings defined in operators_pydoc.hpp
 HolovizOp.__doc__ = _HolovizOp.__doc__
diff --git a/python/holoscan/operators/holoviz/holoviz.cpp b/python/holoscan/operators/holoviz/holoviz.cpp
index fabf091d..eaa0cc1c 100644
--- a/python/holoscan/operators/holoviz/holoviz.cpp
+++ b/python/holoscan/operators/holoviz/holoviz.cpp
@@ -40,6 +40,7 @@
 #include "../../core/io_context.hpp"                 // PyOutputContext
 #include "holoscan/core/codec_registry.hpp"
 #include "holoscan/core/condition.hpp"
+#include "holoscan/core/conditions/gxf/boolean.hpp"
 #include "holoscan/core/fragment.hpp"
 #include "holoscan/core/io_context.hpp"
 #include "holoscan/core/operator.hpp"
@@ -118,6 +119,7 @@ class PyHolovizOp : public HolovizOp {
       // NOLINTEND(performance-unnecessary-value-param)
       const std::string& font_path = ""s,
       std::shared_ptr<holoscan::CudaStreamPool> cuda_stream_pool = nullptr,
+      std::shared_ptr<holoscan::BooleanCondition> window_close_condition = nullptr,
       const std::string& name = "holoviz_op")
       : HolovizOp(ArgList{Arg{"allocator", allocator},
                           Arg{"color_lut", color_lut},
@@ -145,7 +147,9 @@ class PyHolovizOp : public HolovizOp {
     if (!tensors.empty()) { this->add_arg(Arg{"tensors", tensors}); }
     if (!receivers.empty()) { this->add_arg(Arg{"receivers", receivers}); }
     if (cuda_stream_pool) { this->add_arg(Arg{"cuda_stream_pool", cuda_stream_pool}); }
-
+    if (window_close_condition) {
+      this->add_arg(Arg{"window_close_condition", window_close_condition});
+    }
     // check if callbacks are provided, for each callback take the GIL before calling the function
     if (key_callback) {
       this->add_arg(
@@ -202,7 +206,6 @@ class PyHolovizOp : public HolovizOp {
                           window_size_callback(w, h);
                         })});
     }
-
     add_positional_condition_and_resource_args(this, args);
     name_ = name;
     fragment_ = fragment;
@@ -219,18 +222,10 @@ PYBIND11_MODULE(_holoviz, m) {
         --------------------------------------
         .. currentmodule:: _holoviz
     )pbdoc";
-
   py::class_<HolovizOp, PyHolovizOp, Operator, std::shared_ptr<HolovizOp>> holoviz_op(
       m, "HolovizOp", doc::HolovizOp::doc_HolovizOp);
 
-  py::enum_<HolovizOp::ColorSpace>(holoviz_op, "ColorSpace")
-      .value("SRGB_NONLINEAR", HolovizOp::ColorSpace::SRGB_NONLINEAR)
-      .value("EXTENDED_SRGB_LINEAR", HolovizOp::ColorSpace::EXTENDED_SRGB_LINEAR)
-      .value("BT2020_LINEAR", HolovizOp::ColorSpace::BT2020_LINEAR)
-      .value("HDR10_ST2084", HolovizOp::ColorSpace::HDR10_ST2084)
-      .value("PASS_THROUGH", HolovizOp::ColorSpace::PASS_THROUGH)
-      .value("BT709_LINEAR", HolovizOp::ColorSpace::BT709_LINEAR)
-      .value("AUTO", HolovizOp::ColorSpace::AUTO);
+  export_enum<HolovizOp::ColorSpace>(holoviz_op, "ColorSpace");
 
   holoviz_op.def(py::init<Fragment*,
                           const py::args&,
@@ -265,6 +260,7 @@ PYBIND11_MODULE(_holoviz, m) {
                           ops::HolovizOp::WindowSizeCallbackFunction,
                           const std::string&,
                           std::shared_ptr<holoscan::CudaStreamPool>,
+                          std::shared_ptr<holoscan::BooleanCondition>,
                           const std::string&>(),
                  "fragment"_a,
                  "allocator"_a,
@@ -298,6 +294,7 @@ PYBIND11_MODULE(_holoviz, m) {
                  "window_size_callback"_a = HolovizOp::WindowSizeCallbackFunction(),
                  "font_path"_a = ""s,
                  "cuda_stream_pool"_a = py::none(),
+                 "window_close_condition"_a = py::none(),
                  "name"_a = "holoviz_op"s,
                  doc::HolovizOp::doc_HolovizOp);
 
diff --git a/python/holoscan/operators/holoviz/pydoc.hpp b/python/holoscan/operators/holoviz/pydoc.hpp
index 4629155a..a02bd9ad 100644
--- a/python/holoscan/operators/holoviz/pydoc.hpp
+++ b/python/holoscan/operators/holoviz/pydoc.hpp
@@ -173,6 +173,12 @@ font_path : str, optional
 cuda_stream_pool : holoscan.resources.CudaStreamPool, optional
     ``holoscan.resources.CudaStreamPool`` instance to allocate CUDA streams. Default value is
     ``None``.
+window_close_condition : holoscan.conditions.BooleanCondition, optionally
+    This is the ``BooleanCondition`` on the operator that will cause it to stop executing if the
+    display window is closed. By default, this condition is created automatically during initialize.
+    The user may want to provide it if, for example, there are multiple HolovizOp operators and you
+    want to share the same window close condition across both. By sharing the same condition, if
+    one of the display windows is closed it would also close the other(s).
 name : str, optional (constructor only)
     The name of the operator. Default value is ``"holoviz_op"``.
 
diff --git a/python/holoscan/operators/video_stream_replayer/pydoc.hpp b/python/holoscan/operators/video_stream_replayer/pydoc.hpp
index c4f4ac4e..c6f1c5d0 100644
--- a/python/holoscan/operators/video_stream_replayer/pydoc.hpp
+++ b/python/holoscan/operators/video_stream_replayer/pydoc.hpp
@@ -44,8 +44,9 @@ Operator class to replay a video stream from a file.
 
      1. One block of host memory equal in size to a single uncompressed video frame
        is needed. Note that for RMMAllocator, the memory sizes should be specified in MiB, so the
-       minimum value can be obtained by:
-       ``math.ceil(height * width * channels * element_size_bytes) / (1024 * 1024))``.
+       minimum value can be obtained by
+       ``math.ceil(height * width * channels * element_size_bytes) / (1024 * 1024))``
+
      2. One block of device memory equal in size to the host memory block.
 
     When declaring an `RMMAllocator` memory pool, `host_memory_initial_size` and
diff --git a/python/holoscan/resources/CMakeLists.txt b/python/holoscan/resources/CMakeLists.txt
index 9f7794a1..96d92e53 100644
--- a/python/holoscan/resources/CMakeLists.txt
+++ b/python/holoscan/resources/CMakeLists.txt
@@ -23,5 +23,6 @@ holoscan_pybind11_module(resources
     resources.cpp
     serialization_buffers.cpp
     std_entity_serializer.cpp
+    system_resources.cpp
     transmitters.cpp
 )
diff --git a/python/holoscan/resources/__init__.py b/python/holoscan/resources/__init__.py
index 256a9120..8503b459 100644
--- a/python/holoscan/resources/__init__.py
+++ b/python/holoscan/resources/__init__.py
@@ -33,6 +33,7 @@
     holoscan.resources.StdComponentSerializer
     holoscan.resources.StdEntitySerializer
     holoscan.resources.StreamOrderedAllocator
+    holoscan.resources.ThreadPool
     holoscan.resources.Transmitter
     holoscan.resources.UnboundedAllocator
     holoscan.resources.UcxComponentSerializer
@@ -62,6 +63,7 @@
     StdComponentSerializer,
     StdEntitySerializer,
     StreamOrderedAllocator,
+    ThreadPool,
     Transmitter,
     UcxComponentSerializer,
     UcxEntitySerializer,
@@ -93,6 +95,7 @@
     "StdComponentSerializer",
     "StdEntitySerializer",
     "StreamOrderedAllocator",
+    "ThreadPool",
     "Transmitter",
     "UcxComponentSerializer",
     "UcxEntitySerializer",
diff --git a/python/holoscan/resources/gxf_component_resource_pydoc.hpp b/python/holoscan/resources/gxf_component_resource_pydoc.hpp
index cf077fc1..e919ac07 100644
--- a/python/holoscan/resources/gxf_component_resource_pydoc.hpp
+++ b/python/holoscan/resources/gxf_component_resource_pydoc.hpp
@@ -37,7 +37,7 @@ gxf_typename : str
     The GXF type name that identifies the specific GXF Component being wrapped.
 name : str, optional (constructor only)
     The name of the resource. Default value is ``"gxf_component"``.
-**kwargs : dict
+kwargs : dict
     The additional keyword arguments that can be passed depend on the underlying GXF Component.
     These parameters can provide further customization and functionality to the resource.
 )doc")
diff --git a/python/holoscan/resources/resources.cpp b/python/holoscan/resources/resources.cpp
index 699ab8c1..f1fdcaad 100644
--- a/python/holoscan/resources/resources.cpp
+++ b/python/holoscan/resources/resources.cpp
@@ -31,6 +31,7 @@ void init_serialization_buffers(py::module_&);
 void init_component_serializers(py::module_&);
 void init_entity_serializers(py::module_&);
 void init_std_entity_serializer(py::module_&);
+void init_system_resources(py::module_&);
 
 PYBIND11_MODULE(_resources, m) {
   m.doc() = R"pbdoc(
@@ -48,5 +49,6 @@ PYBIND11_MODULE(_resources, m) {
   init_component_serializers(m);
   init_entity_serializers(m);
   init_std_entity_serializer(m);
+  init_system_resources(m);
 }  // PYBIND11_MODULE
 }  // namespace holoscan
diff --git a/python/holoscan/resources/system_resources.cpp b/python/holoscan/resources/system_resources.cpp
new file mode 100644
index 00000000..0c61a6cb
--- /dev/null
+++ b/python/holoscan/resources/system_resources.cpp
@@ -0,0 +1,78 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "./system_resources_pydoc.hpp"
+#include "holoscan/core/component_spec.hpp"
+#include "holoscan/core/fragment.hpp"
+#include "holoscan/core/gxf/gxf_resource.hpp"
+#include "holoscan/core/operator.hpp"
+#include "holoscan/core/resources/gxf/system_resources.hpp"
+
+using std::string_literals::operator""s;  // NOLINT(misc-unused-using-decls)
+using pybind11::literals::operator""_a;   // NOLINT(misc-unused-using-decls)
+
+namespace py = pybind11;
+
+namespace holoscan {
+
+class PyThreadPool : public ThreadPool {
+ public:
+  /* Inherit the constructors */
+  using ThreadPool::ThreadPool;
+
+  // Define a constructor that fully initializes the object.
+  explicit PyThreadPool(Fragment* fragment, int64_t initial_size = 1,
+                        const std::string& name = "thread_pool")
+      : ThreadPool(ArgList{Arg("initial_size", initial_size)}) {
+    name_ = name;
+    fragment_ = fragment;
+    spec_ = std::make_shared<ComponentSpec>(fragment);
+    setup(*spec_);
+  }
+};
+
+void init_system_resources(py::module_& m) {
+  py::class_<ThreadPool,
+             PyThreadPool,
+             gxf::GXFSystemResourceBase,
+             gxf::GXFResource,
+             std::shared_ptr<ThreadPool>>(m, "ThreadPool", doc::ThreadPool::doc_ThreadPool_kwargs)
+      .def(py::init<Fragment*, int64_t, const std::string&>(),
+           "fragment"_a,
+           "initial_size"_a = 1,
+           "name"_a = "thread_pool"s,
+           doc::ThreadPool::doc_ThreadPool_kwargs)
+      .def("add",
+           py::overload_cast<const std::shared_ptr<Operator>&, bool>(&ThreadPool::add),
+           "op"_a,
+           "pin_operator"_a = false)
+      .def_property_readonly("operators", &ThreadPool::operators, doc::ThreadPool::doc_operators)
+      .def("add",
+           py::overload_cast<std::vector<std::shared_ptr<Operator>>, bool>(&ThreadPool::add),
+           "ops"_a,
+           "pin_operator"_a = false,
+           doc::ThreadPool::doc_add);
+}
+}  // namespace holoscan
diff --git a/python/holoscan/resources/system_resources_pydoc.hpp b/python/holoscan/resources/system_resources_pydoc.hpp
new file mode 100644
index 00000000..bd53007d
--- /dev/null
+++ b/python/holoscan/resources/system_resources_pydoc.hpp
@@ -0,0 +1,64 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef PYHOLOSCAN_RESOURCES_SYSTEM_RESOURCES_HPP
+#define PYHOLOSCAN_RESOURCES_SYSTEM_RESOURCES_HPP
+
+#include <string>
+
+#include "../macros.hpp"
+
+namespace holoscan::doc {
+
+namespace ThreadPool {
+
+PYDOC(ThreadPool_kwargs, R"doc(
+ThreadPool for operators scheduled by EventBasedScheduler or MultiThreadScheduler.
+
+Parameters
+----------
+initialize_size : int, optional
+    The initial number of worker threads in the pool.
+name : str, optional
+    The name of the thread pool.
+)doc")
+
+PYDOC(add, R"doc(
+Assign one or more operators to use the thread pool.
+
+Parameters
+----------
+ops : Operator or list[Operator]
+    The operator(s) to add to the thread pool.
+pin_operator : bool, optional
+    If True, the operator(s) will be pinned to a specific thread in the pool.
+)doc")
+
+PYDOC(operators, R"doc(
+The operators associated with this thread pool.
+
+Returns
+----------
+list[Operator]
+    The list of operators that have been added to this thread pool.
+)doc")
+
+}  // namespace ThreadPool
+
+}  // namespace holoscan::doc
+
+#endif /* PYHOLOSCAN_RESOURCES_SYSTEM_RESOURCES_HPP */
diff --git a/python/holoscan/schedulers/multithread_scheduler.cpp b/python/holoscan/schedulers/multithread_scheduler.cpp
index a03b7efa..a3957b15 100644
--- a/python/holoscan/schedulers/multithread_scheduler.cpp
+++ b/python/holoscan/schedulers/multithread_scheduler.cpp
@@ -58,11 +58,13 @@ class PyMultiThreadScheduler : public MultiThreadScheduler {
                                   double check_recession_period_ms = 5.0,
                                   int64_t max_duration_ms = -1LL,
                                   int64_t stop_on_deadlock_timeout = 0LL,
+                                  bool strict_job_thread_pinning = false,
                                   const std::string& name = "multithread_scheduler")
       : MultiThreadScheduler(ArgList{Arg{"worker_thread_number", worker_thread_number},
                                      Arg{"stop_on_deadlock", stop_on_deadlock},
                                      Arg{"check_recession_period_ms", check_recession_period_ms},
-                                     Arg{"stop_on_deadlock_timeout", stop_on_deadlock_timeout}}) {
+                                     Arg{"stop_on_deadlock_timeout", stop_on_deadlock_timeout},
+                                     Arg{"strict_job_thread_pinning", strict_job_thread_pinning}}) {
     // max_duration_ms is an optional argument in GXF. We use a negative value in this constructor
     // to indicate that the argument should not be set.
     if (max_duration_ms >= 0) { this->add_arg(Arg{"max_duration_ms", max_duration_ms}); }
@@ -93,6 +95,7 @@ void init_multithread_scheduler(py::module_& m) {
                     double,
                     int64_t,
                     int64_t,
+                    bool,
                     const std::string&>(),
            "fragment"_a,
            py::kw_only(),
@@ -102,6 +105,7 @@ void init_multithread_scheduler(py::module_& m) {
            "check_recession_period_ms"_a = 5.0,
            "max_duration_ms"_a = -1LL,
            "stop_on_deadlock_timeout"_a = 0LL,
+           "strict_job_thread_pinning"_a = false,
            "name"_a = "multithread_scheduler"s,
            doc::MultiThreadScheduler::doc_MultiThreadScheduler)
       .def_property_readonly("clock", &MultiThreadScheduler::clock)
diff --git a/python/holoscan/schedulers/multithread_scheduler_pydoc.hpp b/python/holoscan/schedulers/multithread_scheduler_pydoc.hpp
index 47b533bc..7616cf05 100644
--- a/python/holoscan/schedulers/multithread_scheduler_pydoc.hpp
+++ b/python/holoscan/schedulers/multithread_scheduler_pydoc.hpp
@@ -53,6 +53,10 @@ stop_on_deadlock_timeout : int, optional
     The scheduler will wait this amount of time before determining that it is in deadlock
     and should stop. It will reset if a job comes in during the wait. A negative value means not
     stop on deadlock. This parameter only applies when `stop_on_deadlock=true`",
+strict_job_thread_pinning : bool, optional
+    When true, the thread an operator is pinned to is not allowed to run any other operators.
+    When false, if the pinned operator is not in a READY state, another operator could run on the
+    thread.
 name : str, optional
     The name of the scheduler.
 )doc")
diff --git a/python/requirements.dev.txt b/python/requirements.dev.txt
index 4d2a3775..e65e9dca 100644
--- a/python/requirements.dev.txt
+++ b/python/requirements.dev.txt
@@ -1,4 +1,4 @@
-coverage==6.5
-pytest==7.4.3
-pytest-cov==4.1.0
-pytest-mock==3.12.0
+coverage>=6.5,<8
+pytest>=7.4.3,<9.0
+pytest-cov>=4.1.0,<7.0
+pytest-mock>=3.12.0,<4.0
diff --git a/python/requirements.lint.txt b/python/requirements.lint.txt
index e9a3b8b6..f682d417 100644
--- a/python/requirements.lint.txt
+++ b/python/requirements.lint.txt
@@ -1,4 +1,4 @@
-ruff==0.6.3
-cpplint==1.6.1
-cmakelint==1.4.2
-codespell==2.2.6
+ruff>=0.6.3,<1.0
+cpplint>=1.6.1,<2.0
+cmakelint>=1.4.2,<2.0
+codespell>=2.2.6,<3.0
diff --git a/python/requirements.txt b/python/requirements.txt
index e3556196..d93cb895 100644
--- a/python/requirements.txt
+++ b/python/requirements.txt
@@ -1,9 +1,10 @@
 pip>22.0.2
-cupy-cuda12x==12.2
-cloudpickle==2.2.1
-python-on-whales==0.60.1
-Jinja2==3.1.3
-packaging==23.1
-pyyaml==6.0
-requests==2.31.0
-psutil==6.0.0
+cupy-cuda12x>=12.2,<14.0
+numpy>=1.0.4,<2.0
+cloudpickle>=3.0,<4.0
+python-on-whales>=0.60.1,<1.0
+Jinja2>=3.1.3,<4.0
+packaging>=23.1
+pyyaml>=6.0,<7.0
+requests>=2.31.0,<3.0
+psutil>=6.0.0,<7.0
diff --git a/python/tests/cli/unit/common/test_sdk_utils.py b/python/tests/cli/unit/common/test_sdk_utils.py
index a6863087..3ffb9e77 100644
--- a/python/tests/cli/unit/common/test_sdk_utils.py
+++ b/python/tests/cli/unit/common/test_sdk_utils.py
@@ -18,6 +18,7 @@
 import pytest
 from packaging.version import Version
 
+import holoscan.cli.common.sdk_utils
 from holoscan.cli.common.artifact_sources import ArtifactSources
 from holoscan.cli.common.enum_types import SdkType
 from holoscan.cli.common.exceptions import FailedToDetectSDKVersionError, InvalidSdkError
@@ -87,32 +88,28 @@ def test_sdk_version_from_invalid_user_input(self, monkeypatch):
 
     def test_detect_sdk_version(self, monkeypatch):
         version = "1.0.0"
-
-        monkeypatch.setattr("importlib.metadata.version", lambda x: version)
+        holoscan.cli.common.sdk_utils.holoscan_version_string = version
 
         result = detect_holoscan_version(self._artifact_source)
         assert result == version
 
     def test_detect_sdk_version_with_patch(self, monkeypatch):
         version = "1.0.0-beta-1"
-
-        monkeypatch.setattr("importlib.metadata.version", lambda x: version)
+        holoscan.cli.common.sdk_utils.holoscan_version_string = version
 
         result = detect_holoscan_version(self._artifact_source)
         assert result == "1.0.0"
 
     def test_detect_sdk_version_with_unsupported_version(self, monkeypatch):
         version = "0.1.2"
-
-        monkeypatch.setattr("importlib.metadata.version", lambda x: version)
+        holoscan.cli.common.sdk_utils.holoscan_version_string = version
 
         with pytest.raises(FailedToDetectSDKVersionError):
             detect_holoscan_version(self._artifact_source)
 
     def test_detect_sdk_version_with_no_match(self, monkeypatch):
         version = "100"
-
-        monkeypatch.setattr("importlib.metadata.version", lambda x: version)
+        holoscan.cli.common.sdk_utils.holoscan_version_string = version
 
         with pytest.raises(FailedToDetectSDKVersionError):
             detect_holoscan_version(self._artifact_source)
@@ -122,7 +119,7 @@ def test_detect_sdk_version_with_no_match(self, monkeypatch):
         [("1.0a2+4.gcaa3b3fe", "1.0.0"), ("1", "1.0.0"), ("1.0", "1.0.0"), ("1.0.0.1", "1.0.0")],
     )
     def test_detect_sdk_version_with_non_semver_string(self, monkeypatch, version, expected):
-        monkeypatch.setattr("importlib.metadata.version", lambda x: version)
+        holoscan.cli.common.sdk_utils.holoscan_version_string = version
 
         result = detect_holoscan_version(self._artifact_source)
         assert result == expected
diff --git a/python/tests/cli/unit/packager/test_arguments.py b/python/tests/cli/unit/packager/test_arguments.py
index 55f4767c..3144e544 100644
--- a/python/tests/cli/unit/packager/test_arguments.py
+++ b/python/tests/cli/unit/packager/test_arguments.py
@@ -47,6 +47,10 @@ def _setup(self) -> None:
         self.input_args.platform = Platform.X64Workstation
         self.input_args.platform_config = PlatformConfiguration.dGPU
         self.input_args.includes = []
+        self.input_args.additional_libs = [
+            pathlib.Path("/path/to/lib"),
+            pathlib.Path("/path/to/so"),
+        ]
 
         self.source_load_called = False
 
@@ -144,6 +148,7 @@ def test_input_args(self, monkeypatch):
         assert args.build_parameters.version == "HoloscanVersionNum"
         assert args.build_parameters.command_filename == "app"
         assert args.build_parameters.sdk == SdkType.Holoscan
+        assert args.build_parameters.additional_libs == self.input_args.additional_libs
         assert args.application_manifest is not None
         assert args.package_manifest is not None
         assert args.build_parameters.build_cache == self.input_args.build_cache
diff --git a/python/tests/system/test_application_minimal.py b/python/tests/system/test_application_minimal.py
index b4ee31e3..51e5bc2b 100644
--- a/python/tests/system/test_application_minimal.py
+++ b/python/tests/system/test_application_minimal.py
@@ -16,9 +16,11 @@
 """  # noqa: E501
 
 import pytest
+from env_wrapper import env_var_context
 
 from holoscan.conditions import CountCondition
 from holoscan.core import Application, Operator, OperatorSpec
+from holoscan.operators import PingRxOp, PingTxOp
 from holoscan.resources import ManualClock, RealtimeClock
 from holoscan.schedulers import EventBasedScheduler, GreedyScheduler, MultiThreadScheduler
 
@@ -122,3 +124,41 @@ def test_app_config_keys(config_file):
 
     # other non-existent keys are not
     assert "abcdefg" not in keys
+
+
+class MyPingApp(Application):
+    def compose(self):
+        # Define the tx and rx operators, allowing tx to execute 10 times
+        tx = PingTxOp(self, CountCondition(self, 10), name="tx")
+        rx = PingRxOp(self, name="rx")
+
+        # Define the workflow:  tx -> rx
+        self.add_flow(tx, rx)
+
+
+def test_app_log_function(capfd):
+    """
+    The following debug log messages are expected to be printed:
+
+        Executing PyApplication::run()... (log_func_ptr=0x7ffff37dc660)
+        Executing Application::run()... (log_func_ptr=0x7ffff37dc660)
+
+    The addresses (log_func_ptr=0x<address>) should be the same for both log messages.
+    """
+
+    env_var_settings = {
+        ("HOLOSCAN_LOG_LEVEL", "DEBUG"),
+    }
+    with env_var_context(env_var_settings):
+        # Application class's constructor reads the environment variable HOLOSCAN_LOG_LEVEL so
+        # wrap the app in the context manager to ensure the environment variables are set
+        app = MyPingApp()
+        app.run()
+
+    captured = capfd.readouterr()
+    # Extract text (log_func_ptr=0x<address>) from the log message and check if the addresses are
+    # all same.
+    import re
+
+    addresses = re.findall(r"log_func_ptr=0x[0-9a-fA-F]+", captured.err)
+    assert len(set(addresses)) == 1
diff --git a/python/tests/system/test_holoviz_dual_window.py b/python/tests/system/test_holoviz_dual_window.py
new file mode 100644
index 00000000..11e0eae2
--- /dev/null
+++ b/python/tests/system/test_holoviz_dual_window.py
@@ -0,0 +1,129 @@
+"""
+SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+SPDX-License-Identifier: Apache-2.0
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""  # noqa: E501
+
+import math
+
+import cupy as cp
+import numpy as np
+
+from holoscan.conditions import BooleanCondition, CountCondition
+from holoscan.core import Application, Operator, OperatorSpec
+from holoscan.operators import HolovizOp
+
+
+def get_frame(xp, height, width, channels, dtype=np.uint8):
+    shape = (height, width, channels) if channels else (height, width)
+    size = math.prod(shape)
+    frame = xp.arange(size, dtype=dtype).reshape(shape)
+    return frame
+
+
+class FrameGeneratorOp(Operator):
+    def __init__(
+        self,
+        fragment,
+        *args,
+        width=800,
+        height=640,
+        channels=3,
+        on_host=False,
+        dtype=np.uint8,
+        **kwargs,
+    ):
+        self.height = height
+        self.width = width
+        self.channels = channels
+        self.on_host = on_host
+        self.dtype = dtype
+        # Need to call the base class constructor last
+        super().__init__(fragment, *args, **kwargs)
+
+    def setup(self, spec: OperatorSpec):
+        spec.output("frame")
+
+    def compute(self, op_input, op_output, context):
+        xp = np if self.on_host else cp
+        frame = get_frame(xp, self.height, self.width, self.channels, self.dtype)
+        print(f"Emitting frame with shape: {frame.shape}")
+        op_output.emit(dict(frame=frame), "frame")
+
+
+class HolovizHeadlessApp(Application):
+    def __init__(
+        self,
+        *args,
+        count=10,
+        width=800,
+        height=640,
+        on_host=False,
+        **kwargs,
+    ):
+        self.count = count
+        self.width = width
+        self.height = height
+        self.on_host = on_host
+        super().__init__(*args, **kwargs)
+
+    def compose(self):
+        source = FrameGeneratorOp(
+            self,
+            CountCondition(self, count=self.count),
+            width=self.width,
+            height=self.height,
+            on_host=self.on_host,
+            dtype=np.uint8,
+            name="video_source",
+        )
+
+        common_visualizer_kwargs = dict(
+            headless=True,
+            width=self.width,
+            height=self.height,
+            window_close_condition=BooleanCondition(self, name="window_close"),
+            tensors=[
+                # name="" here to match the output of FrameGenerationOp
+                dict(name="frame", type="color", opacity=0.5, priority=0),
+            ],
+        )
+
+        vizualizer = HolovizOp(self, **common_visualizer_kwargs, name="visualizer")
+        vizualizer2 = HolovizOp(self, **common_visualizer_kwargs, name="visualizer2")
+        visualizers = [vizualizer, vizualizer2]
+        for viz in visualizers:
+            self.add_flow(source, viz, {("frame", "receivers")})
+
+
+def test_holovizop_dual_window(capfd):
+    """Test HolovizOp with dual windows and shared window_close_condition."""
+    count = 3
+    width = 800
+    height = 640
+    holoviz_app = HolovizHeadlessApp(
+        count=count,
+        width=width,
+        height=height,
+        on_host=False,
+    )
+
+    holoviz_app.run()
+
+    captured = capfd.readouterr()
+
+    # assert that replayer_app emitted all frames
+    assert captured.out.count("Emitting frame") == count
+    # no warning about the deprecated parameter name is shown
+    assert "window_close_scheduling_term" not in captured.out
diff --git a/python/tests/unit/test_conditions.py b/python/tests/unit/test_conditions.py
index 165a9e66..a1e5675e 100644
--- a/python/tests/unit/test_conditions.py
+++ b/python/tests/unit/test_conditions.py
@@ -30,6 +30,8 @@
     DownstreamMessageAffordableCondition,
     ExpiringMessageAvailableCondition,
     MessageAvailableCondition,
+    MultiMessageAvailableCondition,
+    MultiMessageAvailableTimeoutCondition,
     PeriodicCondition,
 )
 from holoscan.core import Application, Condition, ConditionType, Operator
@@ -237,6 +239,58 @@ def test_positional_initialization(self, app):
         ExpiringMessageAvailableCondition(app, 1, 4, RealtimeClock(app, name="clock"), "expiring")
 
 
+class TestMultiMessageAvailableCondition:
+    def test_kwarg_based_initialization_sum_of_all(self, app, capfd):
+        name = "multi_message_available"
+        cond = MultiMessageAvailableCondition(
+            fragment=app,
+            name=name,
+            min_sum=4,
+            sampling_mode="SumOfAll",
+        )
+        assert isinstance(cond, GXFCondition)
+        assert isinstance(cond, Condition)
+        assert cond.gxf_typename == "nvidia::gxf::MultiMessageAvailableSchedulingTerm"
+
+        assert f"""
+name: {name}
+fragment: ""
+""" in repr(cond)
+
+        # assert no warnings or errors logged
+        captured = capfd.readouterr()
+        assert "error" not in captured.err
+        assert "warning" not in captured.err
+
+    def test_default_initialization(self, app):
+        MultiMessageAvailableCondition(app)
+
+
+class TestMultiMessageAvailableTimeoutCondition:
+    def test_kwarg_based_initialization_sum_of_all(self, app, capfd):
+        name = "multi_message_available_timeout"
+        cond = MultiMessageAvailableTimeoutCondition(
+            fragment=app,
+            execution_frequency="10Hz",
+            name=name,
+            min_sizes=[1, 2, 1],
+            sampling_mode="PerReceiver",
+        )
+        assert isinstance(cond, GXFCondition)
+        assert isinstance(cond, Condition)
+        assert cond.gxf_typename == "nvidia::gxf::MessageAvailableFrequencyThrottler"
+
+        assert f"""
+name: {name}
+fragment: ""
+""" in repr(cond)
+
+        # assert no warnings or errors logged
+        captured = capfd.readouterr()
+        assert "error" not in captured.err
+        assert "warning" not in captured.err
+
+
 class TestPeriodicCondition:
     def test_kwarg_based_initialization(self, app, capfd):
         name = "periodic"
diff --git a/python/tests/unit/test_core.py b/python/tests/unit/test_core.py
index d4aff1ce..f497b0c8 100644
--- a/python/tests/unit/test_core.py
+++ b/python/tests/unit/test_core.py
@@ -44,6 +44,7 @@
     Resource,
     Scheduler,
     _Fragment,
+    arglist_to_kwargs,
     io_type_registry,
     py_object_to_arg,
 )
@@ -332,40 +333,40 @@ def test_dynamic_attribute_allowed(self, fragment):
 
 class TestOperatorSpecBase:
     def test_init(self, fragment):
-        c = OperatorSpecBase(fragment)
-        assert c.params == {}
-        assert c.fragment is fragment
+        spec = OperatorSpecBase(fragment)
+        assert spec.params == {}
+        assert spec.fragment is fragment
 
     def test_input(self, fragment, capfd):
-        c = OperatorSpecBase(fragment)
-        iospec = c.input()
+        spec = OperatorSpecBase(fragment)
+        iospec = spec.input()
         assert isinstance(iospec, IOSpec)
         assert iospec.name == "__iospec_input"
         assert iospec.io_type == IOSpec.IOType.INPUT
         assert iospec.queue_size == int(IOSpec.IOSize(1))
 
-        iospec2 = c.input("input2")
+        iospec2 = spec.input("input2")
         assert iospec2.name == "input2"
         assert iospec.io_type == IOSpec.IOType.INPUT
 
         # Calling a second time with the same name will log an error to the
         # console.
-        iospec2 = c.input("input2")
+        iospec2 = spec.input("input2")
         captured = capfd.readouterr()
         assert "error" in captured.err
         assert "already exists" in captured.err
 
-    def test_input_condition_none(self, fragment, capfd):
-        c = OperatorSpecBase(fragment)
-        iospec = c.input("input_no_condition").condition(ConditionType.NONE)
+    def test_input_condition_none(self, fragment):
+        spec = OperatorSpecBase(fragment)
+        iospec = spec.input("input_no_condition").condition(ConditionType.NONE)
         assert isinstance(iospec, IOSpec)
         assert iospec.name == "input_no_condition"
         assert iospec.io_type == IOSpec.IOType.INPUT
         assert iospec.conditions == [(ConditionType.NONE, None)]
 
-    def test_input_condition_message_available(self, fragment, capfd):
-        c = OperatorSpecBase(fragment)
-        iospec = c.input("input_message_available_condition").condition(
+    def test_input_condition_message_available(self, fragment):
+        spec = OperatorSpecBase(fragment)
+        iospec = spec.input("input_message_available_condition").condition(
             ConditionType.MESSAGE_AVAILABLE, min_size=1
         )
         assert isinstance(iospec, IOSpec)
@@ -375,18 +376,18 @@ def test_input_condition_message_available(self, fragment, capfd):
         assert iospec.conditions[0][0] == ConditionType.MESSAGE_AVAILABLE
         assert iospec.conditions[0][1] is not None
 
-    def test_input_connector_default(self, fragment, capfd):
-        c = OperatorSpecBase(fragment)
-        iospec = c.input("input_no_condition").connector(IOSpec.ConnectorType.DEFAULT)
+    def test_input_connector_default(self, fragment):
+        spec = OperatorSpecBase(fragment)
+        iospec = spec.input("input_no_condition").connector(IOSpec.ConnectorType.DEFAULT)
         assert isinstance(iospec, IOSpec)
         assert iospec.name == "input_no_condition"
         assert iospec.io_type == IOSpec.IOType.INPUT
         assert iospec.connector() is None
 
     @pytest.mark.parametrize("kwargs", [{}, dict(capacity=4), dict(capacity=1, policy=1)])
-    def test_input_connector_double_buffer(self, fragment, capfd, kwargs):
-        c = OperatorSpecBase(fragment)
-        iospec = c.input("input_no_condition").connector(
+    def test_input_connector_double_buffer(self, fragment, kwargs):
+        spec = OperatorSpecBase(fragment)
+        iospec = spec.input("input_no_condition").connector(
             IOSpec.ConnectorType.DOUBLE_BUFFER, **kwargs
         )
         assert isinstance(iospec, IOSpec)
@@ -397,17 +398,17 @@ def test_input_connector_double_buffer(self, fragment, capfd, kwargs):
     @pytest.mark.parametrize(
         "kwargs", [{}, dict(capacity=4), dict(capacity=1, policy=1, address="0.0.0.0", port=13337)]
     )
-    def test_input_connector_ucx(self, fragment, capfd, kwargs):
-        c = OperatorSpecBase(fragment)
-        iospec = c.input("input_no_condition").connector(IOSpec.ConnectorType.UCX, **kwargs)
+    def test_input_connector_ucx(self, fragment, kwargs):
+        spec = OperatorSpecBase(fragment)
+        iospec = spec.input("input_no_condition").connector(IOSpec.ConnectorType.UCX, **kwargs)
         assert isinstance(iospec, IOSpec)
         assert iospec.name == "input_no_condition"
         assert iospec.io_type == IOSpec.IOType.INPUT
         assert isinstance(iospec.connector(), UcxReceiver)
 
-    def test_input_connector_and_condition(self, fragment, capfd):
-        c = OperatorSpecBase(fragment)
-        iospec = c.input("in").connector(
+    def test_input_connector_and_condition(self, fragment):
+        spec = OperatorSpecBase(fragment)
+        iospec = spec.input("in").connector(
             IOSpec.ConnectorType.DOUBLE_BUFFER,
             capacity=5,
             policy=1,
@@ -429,13 +430,13 @@ def test_input_connector_and_condition(self, fragment, capfd):
         assert iospec.conditions[0][0] == ConditionType.EXPIRING_MESSAGE_AVAILABLE
         assert iospec.conditions[0][1] is not None
 
-        assert c.inputs["in"] == iospec
-        assert len(c.inputs["in"].conditions) == 1
+        assert spec.inputs["in"] == iospec
+        assert len(spec.inputs["in"].conditions) == 1
 
-    def test_input_condition_and_connector(self, fragment, capfd):
-        c = OperatorSpecBase(fragment)
+    def test_input_condition_and_connector(self, fragment):
+        spec = OperatorSpecBase(fragment)
         iospec = (
-            c.input("in")
+            spec.input("in")
             .condition(
                 ConditionType.EXPIRING_MESSAGE_AVAILABLE,
                 max_batch_size=5,
@@ -456,9 +457,52 @@ def test_input_condition_and_connector(self, fragment, capfd):
         assert iospec.conditions[0][0] == ConditionType.EXPIRING_MESSAGE_AVAILABLE
         assert iospec.conditions[0][1] is not None
 
-        assert c.inputs["in"] == iospec
+        assert spec.inputs["in"] == iospec
+
+        assert len(spec.inputs["in"].conditions) == 1
+
+    @pytest.mark.parametrize(
+        "kind",
+        [ConditionType.MULTI_MESSAGE_AVAILABLE, ConditionType.MULTI_MESSAGE_AVAILABLE_TIMEOUT],
+    )
+    @pytest.mark.parametrize(
+        "sampling_mode",
+        ["SumOfAll", "PerReceiver"],
+    )
+    def test_multi_port_condition(self, fragment, kind, sampling_mode):
+        spec = OperatorSpecBase(fragment)
+        spec.input("in1")
+        spec.input("in2")
+        spec.input("in3")
+
+        if sampling_mode == "SumOfAll":
+            extra_kwargs = dict(min_sum=4)
+        elif sampling_mode == "PerReceiver":
+            extra_kwargs = dict(min_sizes=[1, 2, 1])
+
+        spec.multi_port_condition(
+            kind,
+            port_names=["in1", "in3"],
+            sampling_mode=sampling_mode,
+            **extra_kwargs,
+        )
+
+        # check that the condition was added
+        multi_port_conditions = spec.multi_port_conditions()
+        assert len(multi_port_conditions) == 1
+
+        # check the info on the condition
+        multi_port_condition_info = multi_port_conditions[0]
+        assert multi_port_condition_info.kind == kind
+        assert multi_port_condition_info.port_names == ["in1", "in3"]
 
-        assert len(c.inputs["in"].conditions) == 1
+        # check that the expected kwargs are present in each case
+        kwargs = arglist_to_kwargs(multi_port_condition_info.args)
+        if sampling_mode == "SumOfAll":
+            assert kwargs["min_sum"] == 4
+        elif sampling_mode == "PerReceiver":
+            assert kwargs["min_sizes"] == [1, 2, 1]
+        assert kwargs["sampling_mode"] == sampling_mode
 
     @pytest.mark.parametrize(
         "spec_args,spec_kwargs,expected_name,expected_size",
@@ -480,8 +524,8 @@ def test_input_condition_and_connector(self, fragment, capfd):
     def test_input_queue_size(
         self, fragment, capfd, spec_args, spec_kwargs, expected_name, expected_size
     ):
-        c = OperatorSpecBase(fragment)
-        iospec = c.input(*spec_args, **spec_kwargs)
+        spec = OperatorSpecBase(fragment)
+        iospec = spec.input(*spec_args, **spec_kwargs)
         assert isinstance(iospec, IOSpec)
         assert iospec.name == expected_name
         assert iospec.io_type == IOSpec.IOType.INPUT
@@ -489,39 +533,39 @@ def test_input_queue_size(
 
         # Calling a second time with the same name will log an error to the
         # console.
-        c.input(expected_name)
+        spec.input(expected_name)
         captured = capfd.readouterr()
         assert "error" in captured.err
         assert "already exists" in captured.err
 
     def test_output(self, fragment, capfd):
-        c = OperatorSpecBase(fragment)
-        iospec = c.output()
+        spec = OperatorSpecBase(fragment)
+        iospec = spec.output()
         assert isinstance(iospec, IOSpec)
         assert iospec.name == "__iospec_output"
         assert iospec.io_type == IOSpec.IOType.OUTPUT
 
-        iospec2 = c.input("output2")
+        iospec2 = spec.input("output2")
         assert iospec2.name == "output2"
         assert iospec.io_type == IOSpec.IOType.OUTPUT
 
         # Calling a second time with the same name will log an error
-        iospec2 = c.input("output2")
+        iospec2 = spec.input("output2")
         captured = capfd.readouterr()
         assert "error" in captured.err
         assert "already exists" in captured.err
 
-    def test_output_condition_none(self, fragment, capfd):
-        c = OperatorSpecBase(fragment)
-        iospec = c.output("output_no_condition").condition(ConditionType.NONE)
+    def test_output_condition_none(self, fragment):
+        spec = OperatorSpecBase(fragment)
+        iospec = spec.output("output_no_condition").condition(ConditionType.NONE)
         assert isinstance(iospec, IOSpec)
         assert iospec.name == "output_no_condition"
         assert iospec.io_type == IOSpec.IOType.OUTPUT
         assert iospec.conditions == [(ConditionType.NONE, None)]
 
-    def test_output_condition_downstream_message_affordable(self, fragment, capfd):
-        c = OperatorSpecBase(fragment)
-        iospec = c.output("output_downstream_message_affordable_condition").condition(
+    def test_output_condition_downstream_message_affordable(self, fragment):
+        spec = OperatorSpecBase(fragment)
+        iospec = spec.output("output_downstream_message_affordable_condition").condition(
             ConditionType.DOWNSTREAM_MESSAGE_AFFORDABLE, min_size=1
         )
         assert isinstance(iospec, IOSpec)
@@ -531,18 +575,18 @@ def test_output_condition_downstream_message_affordable(self, fragment, capfd):
         assert iospec.conditions[0][0] == ConditionType.DOWNSTREAM_MESSAGE_AFFORDABLE
         assert iospec.conditions[0][1] is not None
 
-    def test_output_connector_default(self, fragment, capfd):
-        c = OperatorSpecBase(fragment)
-        iospec = c.output("output_no_condition").connector(IOSpec.ConnectorType.DEFAULT)
+    def test_output_connector_default(self, fragment):
+        spec = OperatorSpecBase(fragment)
+        iospec = spec.output("output_no_condition").connector(IOSpec.ConnectorType.DEFAULT)
         assert isinstance(iospec, IOSpec)
         assert iospec.name == "output_no_condition"
         assert iospec.io_type == IOSpec.IOType.OUTPUT
         assert iospec.connector() is None
 
     @pytest.mark.parametrize("kwargs", [{}, dict(capacity=4), dict(capacity=1, policy=1)])
-    def test_output_connector_double_buffer(self, fragment, capfd, kwargs):
-        c = OperatorSpecBase(fragment)
-        iospec = c.output("output_no_condition").connector(
+    def test_output_connector_double_buffer(self, fragment, kwargs):
+        spec = OperatorSpecBase(fragment)
+        iospec = spec.output("output_no_condition").connector(
             IOSpec.ConnectorType.DOUBLE_BUFFER, **kwargs
         )
         assert isinstance(iospec, IOSpec)
@@ -553,9 +597,9 @@ def test_output_connector_double_buffer(self, fragment, capfd, kwargs):
     @pytest.mark.parametrize(
         "kwargs", [{}, dict(capacity=4), dict(capacity=1, policy=1, address="0.0.0.0", port=13337)]
     )
-    def test_output_connector_ucx(self, fragment, capfd, kwargs):
-        c = OperatorSpecBase(fragment)
-        iospec = c.output("output_no_condition").connector(IOSpec.ConnectorType.UCX, **kwargs)
+    def test_output_connector_ucx(self, fragment, kwargs):
+        spec = OperatorSpecBase(fragment)
+        iospec = spec.output("output_no_condition").connector(IOSpec.ConnectorType.UCX, **kwargs)
         assert isinstance(iospec, IOSpec)
         assert iospec.name == "output_no_condition"
         assert iospec.io_type == IOSpec.IOType.OUTPUT
@@ -568,8 +612,8 @@ def test_dynamic_attribute_not_allowed(self, fragment):
 
     def test_optional_parameter(self, fragment):
         op_tx, _ = get_tx_and_rx_ops(fragment)
-        c = PyOperatorSpec(fragment, op_tx)
-        c.param("optional_param", 5, flag=ParameterFlag.OPTIONAL)
+        spec = PyOperatorSpec(fragment, op_tx)
+        spec.param("optional_param", 5, flag=ParameterFlag.OPTIONAL)
 
 
 class TestInputContext:
@@ -596,9 +640,14 @@ def test_condition_type():
     (  # noqa: B018
         ConditionType.NONE,
         ConditionType.MESSAGE_AVAILABLE,
+        ConditionType.EXPIRING_MESSAGE_AVAILABLE,
+        ConditionType.MULTI_MESSAGE_AVAILABLE,
+        ConditionType.MULTI_MESSAGE_AVAILABLE_TIMEOUT,
         ConditionType.DOWNSTREAM_MESSAGE_AFFORDABLE,
         ConditionType.COUNT,
         ConditionType.BOOLEAN,
+        ConditionType.PERIODIC,
+        ConditionType.ASYNCHRONOUS,
     )
 
 
@@ -700,6 +749,31 @@ def test_add_operator(self, fragment, config_file):
         fragment.add_operator(op_tx)
         fragment.add_operator(op_rx)
 
+    def test_make_thread_pool(self, fragment, config_file):
+        fragment.config(config_file)
+
+        op_tx, op_rx = get_tx_and_rx_ops(fragment)
+        op_tx2, op_rx2 = get_tx_and_rx_ops(fragment)
+
+        pool1 = fragment.make_thread_pool("pool1", 2)
+        pool1.add(op_tx, True)
+        pool1.add(op_rx, False)
+
+        pool2 = fragment.make_thread_pool("pool2", 2)
+        pool2.add([op_tx2, op_rx2], True)
+
+        assert pool1.name == "pool1"
+        assert pool2.name == "pool2"
+
+        # check that the expected operators are associated with each pool
+        assert op_rx in pool1.operators
+        assert op_tx in pool1.operators
+        assert op_rx2 in pool2.operators
+        assert op_tx2 in pool2.operators
+
+        assert "gxf_typename: nvidia::gxf::ThreadPool" in repr(pool1)
+        assert "operators in pool" in repr(pool1)
+
     def test_add_flow(self, fragment, config_file, capfd):
         fragment.config(config_file)
 
diff --git a/python/tests/unit/test_resources.py b/python/tests/unit/test_resources.py
index 1ecdab65..c138580f 100644
--- a/python/tests/unit/test_resources.py
+++ b/python/tests/unit/test_resources.py
@@ -17,7 +17,7 @@
 
 from holoscan.core import ComponentSpec, Resource
 from holoscan.core import _Resource as ResourceBase
-from holoscan.gxf import GXFResource
+from holoscan.gxf import GXFResource, GXFSystemResourceBase
 from holoscan.operators import PingTxOp
 from holoscan.resources import (
     Allocator,
@@ -36,6 +36,7 @@
     StdComponentSerializer,
     StdEntitySerializer,
     StreamOrderedAllocator,
+    ThreadPool,
     Transmitter,
     UcxComponentSerializer,
     UcxEntitySerializer,
@@ -367,6 +368,30 @@ def test_kwarg_based_initialization(self, app, capfd):
         assert "warning" not in captured.err
 
 
+class TestThreadPool:
+    def test_kwarg_based_initialization(self, app, capfd):
+        name = "my_thread_pool"
+        pool = ThreadPool(
+            fragment=app,
+            name=name,
+            initial_size=1,
+        )
+        assert isinstance(pool, GXFSystemResourceBase)
+        assert isinstance(pool, GXFResource)
+        assert isinstance(pool, ResourceBase)
+        assert pool.id == -1
+        assert pool.gxf_typename == "nvidia::gxf::ThreadPool"
+
+        assert f"name: {name}" in repr(pool)
+
+        # assert no warnings or errors logged
+        captured = capfd.readouterr()
+        assert "error" not in captured.err
+
+    def test_default_initialization(self, app):
+        ThreadPool(fragment=app)
+
+
 class TestUcxSerializationBuffer:
     def test_kwarg_based_initialization(self, app, capfd):
         name = "ucx_serialization_buffer"
diff --git a/python/tests/unit/test_schedulers.py b/python/tests/unit/test_schedulers.py
index 1e0165c8..1dde4420 100644
--- a/python/tests/unit/test_schedulers.py
+++ b/python/tests/unit/test_schedulers.py
@@ -95,6 +95,7 @@ def test_init_kwargs(self, app, ClockClass):  # noqa: N803
             check_recession_period_ms=2.0,
             max_duration_ms=10000,
             stop_on_deadlock_timeout=10,
+            strict_job_thread_pinning=True,
             name=name,
         )
         assert isinstance(scheduler, GXFScheduler)
diff --git a/run b/run
index 835deb17..5758a96b 100755
--- a/run
+++ b/run
@@ -475,7 +475,7 @@ Arguments:
         Default: release
         Associated environment variable: CMAKE_BUILD_TYPE
   --parallel, -j <njobs> : Specify the maximum number of concurrent processes to be used when building
-        Default: maximum
+        Default: maximum (number of CPUs, using 'nproc')
         Associated environment variable: CMAKE_BUILD_PARALLEL_LEVEL
   --buildpath, -d <build_directory> : Change the build path.
         Default: build-<arch>[-<gpu>]
@@ -1097,6 +1097,10 @@ launch() {
     # -e CUPY_CACHE_DIR
     #   Define path for cupy' kernel cache, needed since $HOME does
     #   not exist when running with `-u id:group`
+    #
+    # Container launch command steps:
+    # - Append the Holoscan bin folder to the existing `PATH` before running
+    # - Add the GXF module to PYTHONPATH before running
 
     img="$(get_build_img_name):$(get_git_sha)"
     run_command $HOLOSCAN_DOCKER_EXE run \
@@ -1125,7 +1129,10 @@ launch() {
         --ulimit memlock=-1 \
         --ulimit stack=67108864 \
         ${extra_args[@]} \
-        $img -c "export PATH=\$PATH:${container_top}/${working_dir}/bin; $run_cmd" # Append the Holoscan bin folder to the existing `PATH` before running
+        $img -c "
+            export PATH=\$PATH:${container_top}/${working_dir}/bin;
+            export PYTHONPATH=\$PYTHONPATH:\$(realpath \$(find /opt/nvidia/gxf -name Gxf.py | head -n1 | xargs dirname)/../..);
+            $run_cmd" 
 }
 
 vscode_desc() { c_echo 'Launch VSCode in DevContainer
@@ -1133,8 +1140,8 @@ vscode_desc() { c_echo 'Launch VSCode in DevContainer
 Launch a VSCode instance in a Docker container with the development environment.
 
 Arguments:
-  --parallel <njobs> : Specify the maximum number of concurrent processes to be used when building
-        Default: maximum
+  --parallel, -j <njobs> : Specify the maximum number of concurrent processes to be used when building
+        Default: maximum (number of CPUs, using 'nproc')
         Associated environment variable: CMAKE_BUILD_PARALLEL_LEVEL
 '
 }
@@ -1158,12 +1165,20 @@ vscode() {
 
     # Parse CLI arguments next
     local args=("$@")
-    local i
-    local arg
-    for i in "${!args[@]}"; do
-        if [ "${args[i]}" = "--parallel" ]; then
-           build_njobs="${args[i+1]}"
-        fi
+    while [[ $# -gt 0 ]]; do
+        case $1 in
+            --parallel|-j)
+                build_njobs="$2"
+                shift 2
+                ;;
+            *)
+                # If $1 starts with a dash, it's an unknown option
+                if [[ $1 == -* ]]; then
+                    c_echo_err R "Unknown option: $1"
+                fi
+                shift
+                ;;
+        esac
     done
 
     # These environment variables will be passed to the container via `devcontainer.json`
diff --git a/runtime_docker/Dockerfile b/runtime_docker/Dockerfile
index 290ec48c..f60e629f 100644
--- a/runtime_docker/Dockerfile
+++ b/runtime_docker/Dockerfile
@@ -65,7 +65,7 @@ FROM base AS runtime_cpp_no_mkl
 #  libcufft - Holoscan-python-core/OnnxRT dependency
 #  libcurand - libtorch & CuPy dependency
 #  libcusparse - libtorch & CuPy dependency
-#  libcusparseLt - libtorch dependency (Patch package file installs to match TensorRT base container)
+#  libcusparseLt - libtorch dependency
 #  cuda-nvrtc - libtorch & CuPy dependency
 #  libnvjitlink - libtorch & CuPy dependency
 #  libcusolver - libtorch & CuPy dependency
@@ -75,16 +75,6 @@ FROM base AS runtime_cpp_no_mkl
 #  libcudnn_train.so is removed since training is not needed in a runtime environment (saves ~200 MB)
 ARG GPU_TYPE
 RUN apt-get update \
-    && if [ $(uname -m) = "aarch64" ] && [ ${GPU_TYPE} = "dgpu" ]; then \
-        dpkg-divert --rename --divert /usr/local/cuda/lib64/libcusparseLt.so.0.6.2.3 \
-            /usr/lib/sbsa-linux-gnu/libcusparseLt.so.0.6.2.3 \
-        && dpkg-divert --rename --divert /usr/local/cuda/lib64/libcusparseLt.so.0 \
-            /usr/lib/sbsa-linux-gnu/libcusparseLt.so.0 \
-        && dpkg-divert --rename --divert /usr/local/cuda/lib64/libcusparseLt.so \
-            /usr/lib/sbsa-linux-gnu/libcusparseLt.so \
-        && dpkg-divert --rename --divert /usr/local/cuda/include/cusparseLt.h \
-            /usr/include/cusparseLt.h \
-        ; fi \
     && apt-get install --no-install-recommends --allow-downgrades -y \
         libx11-6="2:1.7.5-*" \
         libxcursor1="1:1.2.0-*" \
@@ -112,7 +102,7 @@ RUN apt-get update \
         libcufft-12-6 \
         libcurand-12-6 \
         libcusparse-12-6 \
-        libcusparselt0="0.6.2.3-*" \
+        libcusparselt0="0.6.3.2-*" \
         cuda-nvrtc-12-6 \
         libnvjitlink-12-6 \
         libcusolver-12-6 \
diff --git a/scripts/.gitignore b/scripts/.gitignore
new file mode 100644
index 00000000..23a23592
--- /dev/null
+++ b/scripts/.gitignore
@@ -0,0 +1,3 @@
+*.pickle
+*.lock
+
diff --git a/scripts/CMakeLists.txt b/scripts/CMakeLists.txt
index a720b24a..33e0a738 100644
--- a/scripts/CMakeLists.txt
+++ b/scripts/CMakeLists.txt
@@ -22,9 +22,11 @@ install(
     convert_video_to_gxf_entities.py
     download_ngc_data
     generate_extension_uuids.py
+    generate_gxf_manifest.py
     graph_surgeon.py
     gxf_entity_codec.py
     video_validation.py
+    ctest_time_comparison.py
   DESTINATION "bin"
   PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ
   COMPONENT "holoscan-core"
diff --git a/scripts/README.md b/scripts/README.md
index 3c9cadb0..7aeed516 100644
--- a/scripts/README.md
+++ b/scripts/README.md
@@ -9,6 +9,7 @@ This folder includes the following scripts:
 - [`convert_video_to_gxf_entities.py`](#convert_video_to_gxf_entitiespy)
 - [`download_ngc_data`](#download_ngc_data)
 - [`generate_extension_uuids.py`](#generate_extension_uuidspy)
+- [`generate_gxf_manifest.py`](#generate_gxf_manifestpy)
 - [`graph_surgeon.py`](#graph_surgeonpy)
 - [`gxf_entity_codec.py`](#gxf_entity_codecpy)
 - [`video_validation.py`](#video_validationpy)
@@ -109,6 +110,15 @@ Above commands will parse the video starting at 00:00:05 and ending at 00:00:10.
 
 ____
 
+## ctest_time_comparison.py
+
+This script compares time from different tests to make sure they match expected timing comparison.
+
+### Usage
+
+`python3 ./scripts/ctest_time_comparison.py <filename> "TEST1" "LESS" "TEST2"`
+____
+
 ## download_ngc_data
 
 Download and unzip datasets from NGC. This can optionally run a script to convert video files to GXF tensor files compatible with the `video_stream_replayer` operator.
@@ -140,6 +150,36 @@ python3 scripts/generate_extension_uuids.py
 
 ____
 
+## generate_gxf_manifest.py
+
+Generates a GXF extension registry manifest. Refer to [Graph Composer Registry documentation](https://docs.nvidia.com/metropolis/deepstream/dev-guide/graphtools-docs/docs/text/GraphComposer_Registry.html) for registry details.
+
+Holoscan SDK provides the CMake function `generate_gxf_registry_manifest` to call this script each time a target is updated.
+You can also call this script directly for manual testing with your own Holoscan GXF extensions.
+
+Note that GXF manifests are not portable and typically include filepaths relative to the
+build environment. All extensions must be available in the local environment to use this script.
+
+The script accepts a number of optional arguments, including manifest content, extension search paths,
+a custom Python `.pickle` database path, and more. See `generate_gxf_manifest.py -h` for help.
+
+The GXF registry CLI need not be present in the environment to generate an extension manifest.
+
+### Usage
+
+```sh
+python3 scripts/generate_gxf_manifest.py \
+    --output <my_manifest.yaml>
+    --name <MyHoloscanExtension> \
+    --extension-library <path/to/my_extension.so> \
+    --uuid <uuid> \
+    --version <version> \
+    --extension-dependencies [libgxf_std.so,libgxf_ucx.so,...]
+    ...
+```
+
+____
+
 ## graph_surgeon.py
 
 When converting a model from PyTorch to ONNX, it is likely that the input of the model is in the form NCHW (batch, channels, height, width), and needs to be converted to NHWC (batch, height, width, channels). This script performs the conversion and generates a modified model.
diff --git a/scripts/ctest_time_comparison.py b/scripts/ctest_time_comparison.py
new file mode 100755
index 00000000..d93f916c
--- /dev/null
+++ b/scripts/ctest_time_comparison.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python3
+"""
+SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+SPDX-License-Identifier: Apache-2.0
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""  # noqa: E501
+
+import os
+import sys
+
+script_dir = os.path.dirname(os.path.abspath(__file__))
+
+
+def main():
+    # Define the valid operators
+    operators = ["LESS", "GREATER", "EQUAL"]
+
+    # Read the CTestCost file
+    with open(sys.argv[1]) as f:
+        ctest_costs = [section.split() for section in f.read().split("\n")]
+        test_times = {}
+        for _, row in enumerate(ctest_costs):
+            if len(row) == 2:
+                test_times[row[0]] = row[1]
+
+        if (len(sys.argv) - 2) % 3 != 0:
+            print("arguments should be a multiple of three")
+            return
+
+        valid_output = True
+        index = 0
+        operator = ""
+        time1 = 0
+        time2 = 0
+        for i in range(2, len(sys.argv)):
+            arg = sys.argv[i]
+            if arg in test_times:
+                if index == 0:
+                    time1 = test_times[arg]
+                else:
+                    time2 = test_times[arg]
+            elif arg in operators:
+                operator = arg
+            else:
+                valid_output = False
+                print("Argument " + arg + " is not recognized")
+                break
+            index += 1
+
+            if index == 3:
+                index = 0
+                if (
+                    (operator == "LESS" and time1 >= time2)
+                    or (operator == "EQUAL" and time1 != time2)
+                    or (operator == "GREATER" and time1 <= time2)
+                ):
+                    valid_output = False
+                    break
+                operator = ""
+                time1 = 0
+                time2 = 0
+
+        if valid_output:
+            print("Timing for tests matches expectations")
+        else:
+            print("Timing for tests does not match expectations")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/generate_gxf_manifest.py b/scripts/generate_gxf_manifest.py
new file mode 100755
index 00000000..47baf3ef
--- /dev/null
+++ b/scripts/generate_gxf_manifest.py
@@ -0,0 +1,670 @@
+#!/usr/bin/env python3
+"""
+SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+SPDX-License-Identifier: Apache-2.0
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""  # noqa: E501
+
+import argparse
+import atexit
+import fcntl
+import logging
+import os
+import pickle
+from os.path import abspath, dirname, join
+from pathlib import Path
+
+import gxf.core
+import yaml
+
+try:
+    from registry.core.version import GXF_CORE_COMPATIBLE_VERSION, REGISTRY_CORE_VERSION
+except ImportError:
+    GXF_CORE_COMPATIBLE_VERSION = "4.1.0"
+    REGISTRY_CORE_VERSION = "1.1"
+
+# Configure the logger
+logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
+logger = logging.getLogger(__name__)
+
+#######################################################################################
+# Extension dependency caching support
+#######################################################################################
+LOCAL_CACHE_DB_PATH = abspath(join(dirname(__file__), "gxf_extension_cache.pickle"))
+LOCAL_CACHE_DB_LOCK_PATH = abspath(join(dirname(__file__), "gxf_extension_cache.pickle.lock"))
+LOCAL_CACHE_DB_LOCK_FILE = None  # Lock file for the cache db
+
+# extension path to (name, uuid, version) map
+global_ext_path_to_extinfo_map = {}
+# extension path to extension dependencies map (set of extension paths)
+global_ext_path_to_dep_paths_map = {}
+
+#######################################################################################
+# Manifest generation data
+#######################################################################################
+DEFAULT_HOLOSCAN_DISTRIBUTION = "ubuntu_22.04"
+DEFAULT_HOLOSCAN_CUDA = "12.6"
+DEFAULT_HOLOSCAN_NAMESPACE = "holoscan"
+DEFAULT_HOLOSCAN_LICENSE_FILE = abspath(join(dirname(__file__), "..", "LICENSE.txt"))
+DEFAULT_HOLOSCAN_URL = "github.com/nvidia-holoscan"
+DEFAULT_HOLOSCAN_GIT_REPOSITORY = "github.com/nvidia-holoscan/holoscan-sdk"
+
+
+MANIFEST_HEADER = f"""# This file is auto-generated by generate_gxf_manifest.py
+#  GXF Core Version: {GXF_CORE_COMPATIBLE_VERSION}
+#  Registry Core Version: {REGISTRY_CORE_VERSION}"""
+
+MANIFEST_TEMPLATE = (
+    MANIFEST_HEADER
+    + """
+name:{name}
+extension_library:{extension_library}
+uuid:{uuid}
+version:{version}
+license_file:{license_file}
+url:{url}
+git_repository:{git_repository}
+labels:{labels}
+badges:{badges}
+priority:{priority}
+platform:
+  arch:{arch}
+  os:{os}
+  distribution:{distribution}
+compute:
+  cuda:{cuda}
+  tensorrt:{tensorrt}
+  cudnn:{cudnn}
+  deepstream:{deepstream}
+  triton:{triton}
+  vpi:{vpi}
+dependencies:{dependencies}
+headers:{headers}
+binaries:{binaries}
+python_alias:{python_alias}
+namespace:{namespace}
+python_bindings:{python_bindings}
+python_sources:{python_sources}
+data:{data}
+"""
+)
+
+DEFAULT_HOLOSCAN_MANIFEST_DATA = {
+    "name": None,
+    "extension_library": None,
+    "uuid": None,
+    "version": None,
+    "license_file": DEFAULT_HOLOSCAN_LICENSE_FILE,
+    "url": DEFAULT_HOLOSCAN_URL,
+    "git_repository": DEFAULT_HOLOSCAN_GIT_REPOSITORY,
+    "labels": ["holoscan"],
+    "badges": [],
+    "priority": 1,
+    "arch": "x86_64",
+    "os": "linux",
+    "distribution": DEFAULT_HOLOSCAN_DISTRIBUTION,
+    "cuda": DEFAULT_HOLOSCAN_CUDA,
+    "tensorrt": None,
+    "cudnn": None,
+    "deepstream": None,
+    "triton": None,
+    "vpi": None,
+    "dependencies": [],
+    "headers": [],
+    "binaries": [],
+    "python_alias": None,
+    "namespace": DEFAULT_HOLOSCAN_NAMESPACE,
+    "python_bindings": [],
+    "python_sources": [],
+    "data": [],
+}
+
+
+def load_cache_db() -> None:
+    """Import a local cache database with extension dependency information."""
+    global global_ext_path_to_extinfo_map
+    global global_ext_path_to_dep_paths_map
+    global LOCAL_CACHE_DB_LOCK_FILE
+    global LOCAL_CACHE_DB_PATH
+
+    if not LOCAL_CACHE_DB_PATH.endswith(".pickle"):
+        logger.warning(
+            f"Specified database file {LOCAL_CACHE_DB_PATH} does not look like a pickle file"
+        )
+
+    # Lock the cache db file
+    LOCAL_CACHE_DB_LOCK_FILE = open(LOCAL_CACHE_DB_LOCK_PATH, "w")  # noqa: SIM115
+    fcntl.flock(LOCAL_CACHE_DB_LOCK_FILE, fcntl.LOCK_EX)
+
+    if Path(LOCAL_CACHE_DB_PATH).exists():
+        try:
+            with open(LOCAL_CACHE_DB_PATH, "rb") as f:
+                cache_dict = pickle.load(f)
+                global_ext_path_to_extinfo_map = cache_dict["ext_path_to_extinfo_map"]
+                global_ext_path_to_dep_paths_map = cache_dict["ext_path_to_dep_paths_map"]
+        except Exception as e:
+            logger.warning(f"Failed to load cache from {LOCAL_CACHE_DB_PATH}: {e}")
+            global_ext_path_to_extinfo_map = {}
+            global_ext_path_to_dep_paths_map = {}
+    else:
+        global_ext_path_to_extinfo_map = {}
+        global_ext_path_to_dep_paths_map = {}
+
+
+def save_cache_db() -> None:
+    """Export a local cache database with extension dependency information."""
+    global LOCAL_CACHE_DB_LOCK_FILE
+
+    try:
+        with open(LOCAL_CACHE_DB_PATH, "wb") as f:
+            pickle.dump(
+                {
+                    "ext_path_to_extinfo_map": global_ext_path_to_extinfo_map,
+                    "ext_path_to_dep_paths_map": global_ext_path_to_dep_paths_map,
+                },
+                f,
+            )
+        logger.info(f"Cache saved to {LOCAL_CACHE_DB_PATH}")
+        # print(f"global_ext_path_to_dep_paths_map: {global_ext_path_to_dep_paths_map}")
+    except Exception as e:
+        logger.error(f"Failed to save cache to {LOCAL_CACHE_DB_PATH}: {e}")
+
+    # Unlock the cache db lock file
+    fcntl.flock(LOCAL_CACHE_DB_LOCK_FILE, fcntl.LOCK_UN)
+    LOCAL_CACHE_DB_LOCK_FILE.close()
+    LOCAL_CACHE_DB_LOCK_FILE = None
+
+
+def parse_list(value) -> list:
+    if isinstance(value, str):
+        # Split by comma or semicolon, then strip whitespace
+        return [item.strip() for item in value.replace(";", ",").split(",") if item.strip()]
+    elif isinstance(value, list):
+        return value
+    return []
+
+
+def parse_dependencies(value) -> list:
+    if isinstance(value, str):
+        dependencies = []
+        for dep_str in value.replace(";", ",").split(","):
+            dep_dict = {}
+            for pair in dep_str.strip().split():
+                key, val = pair.split(":", 1)
+                dep_dict[key.strip()] = val.strip()
+            if dep_dict:
+                dependencies.append(dep_dict)
+        return dependencies
+    elif isinstance(value, list):
+        return value
+    return []
+
+
+def load_extensions(context, ext_path, loaded_ext_path_set, loaded_ext_uuid_set) -> None:
+    """
+    Load an extension into a GXF context, optionally traversing a provided dependency graph
+    to pre-load known extension dependencies.
+    """
+    global global_ext_path_to_dep_paths_map, global_ext_path_to_extinfo_map
+    ext_path = abspath(ext_path)
+
+    # Check if the extension is already loaded
+    if ext_path in loaded_ext_path_set:
+        logger.info(f"Extension {ext_path} already loaded. Skipping.")
+        return
+
+    # Load the extension recursively
+    logger.info(f"Loading extension '{ext_path}'")
+    dependencies = global_ext_path_to_dep_paths_map.get(ext_path, [])
+    logger.info(f"Dependencies of {ext_path}: {dependencies}")
+
+    for dep_path in dependencies:
+        load_extensions(context, dep_path, loaded_ext_path_set, loaded_ext_uuid_set)
+
+    logger.info(f"Loading extension {ext_path}")
+    gxf.core.load_extensions(context=context, extension_filenames=[ext_path])
+
+    loaded_ext_path_set.add(ext_path)
+
+    # Cache identifying information about the loaded extension
+    try:
+        ext_list = gxf.core.get_extension_list(context)
+        # Identify the loaded extension uuid by comparing with loaded_ext_uuid_set
+        for uuid in ext_list:
+            if uuid not in loaded_ext_uuid_set:
+                loaded_ext_uuid_set.add(uuid)
+                ext_info = gxf.core.get_extension_info(context, uuid)
+                logger.info(
+                    f"Loaded extension {ext_info['name']} with uuid {uuid} and version "
+                    f"{ext_info['version']} from '{ext_path}'"
+                )
+                global_ext_path_to_extinfo_map[ext_path] = (
+                    ext_info["name"],
+                    uuid,
+                    ext_info["version"],
+                )
+    except ValueError as e:
+        logger.error(f"Failed to load extension {ext_path}: {e}")
+        raise e
+
+
+def generate_extension_dependencies(arg_dict: dict) -> list[dict]:
+    """
+    Generate a list of extension dependencies with version information for a given extension.
+
+    :param arg_dict: Argument dictionary with expected keys:
+    - extension_library: Path to the extension library under consideration.
+    - search_paths: List of paths to search for extension libraries.
+    - extensions_to_preload: Ordered list of extension filepaths to pre-load in the GXF context
+        without including them explicitly in the dependency graph.
+    - extension_dependencies: Ordered list of known direct extension dependencies.
+    :returns: An ordered list of dictionaries containing a name, uuid, and version for each
+        direct extension dependency.
+    :throws: FileNotFoundError if an extension library cannot be found on the system.
+    :throws: ValueError if an extension dependency fails to load in the GXF context,
+        typically due to some missing or out-of-order transitive dependency.
+    """
+    global global_ext_path_to_dep_paths_map
+
+    # List of (extension name, extension uuid, extension version) tuples to be returned
+    ext_uuid_name_list = []
+
+    ext_path = arg_dict["extension_library"]
+    search_paths = arg_dict["search_path"]
+    extensions_to_preload = arg_dict["extensions_preload"] or []
+    extension_dependencies = arg_dict["extension_dependencies"] or []
+
+    logger.info(f"Generating extension dependencies for {extension_dependencies}")
+
+    # Convert extension_dependencies to a list if it's a string
+    if isinstance(extension_dependencies, str):
+        extension_dependencies = extension_dependencies.replace(";", ",").split(",")
+    # Remove any empty strings from the list
+    extension_dependencies = [dep for dep in extension_dependencies if dep.strip()]
+
+    # If extension_dependencies is empty, return an empty dictionary
+    if not extension_dependencies:
+        return {}
+
+    # Set of loaded extension paths
+    loaded_ext_path_set = set()
+    # Set of loaded extension uuids
+    loaded_ext_uuid_set = set()
+
+    # List of dependency paths
+    dep_ext_paths = []
+
+    def find_absolute_path(file: str, search_paths: list):
+        """Find a requested extension on the local system"""
+        if os.path.isabs(file):
+            return file
+        for search_path in search_paths:
+            potential_path = os.path.join(search_path, file)
+            if os.path.exists(potential_path):
+                return potential_path
+        raise FileNotFoundError(f"Could not find {file} on the system")
+
+    preload_extension_filenames = [
+        find_absolute_path(gxf_component, search_paths) for gxf_component in extensions_to_preload
+    ]
+    dep_extension_filenames = [
+        find_absolute_path(gxf_component, search_paths) for gxf_component in extension_dependencies
+    ]
+
+    # Create a context
+    context = gxf.core.context_create()
+
+    # Pre-populate the context with specified dependencies and initialize cache entries
+    gxf.core.load_extensions(context=context, extension_filenames=preload_extension_filenames)
+    preloaded_ext_deps = {ext_path: [] for ext_path in preload_extension_filenames}
+    preloaded_ext_deps.update(global_ext_path_to_dep_paths_map)
+    global_ext_path_to_dep_paths_map = preloaded_ext_deps
+
+    # Iteratively load specified extension dependencies
+    for extension_filepath in dep_extension_filenames:
+        # Add the extension path to the set of dependency paths
+        dep_ext_paths.append(extension_filepath)
+
+        # Load the dependent extensions recursively
+        load_extensions(
+            context=context,
+            ext_path=extension_filepath,
+            loaded_ext_path_set=loaded_ext_path_set,
+            loaded_ext_uuid_set=loaded_ext_uuid_set,
+        )
+
+    # Store the dependency paths for the extension
+    global_ext_path_to_dep_paths_map[ext_path] = dep_extension_filenames
+
+    # For each dependency, get the extension name, uuid, and version
+    for dep_ext_path in dep_ext_paths:
+        if dep_ext_path in global_ext_path_to_extinfo_map:
+            ext_name, ext_uuid, ext_version = global_ext_path_to_extinfo_map[dep_ext_path]
+            ext_uuid_name_list.append(
+                {
+                    "extension": ext_name,
+                    "uuid": ext_uuid,
+                    "version": ext_version,
+                }
+            )
+        else:
+            raise ValueError(f"Extension {dep_ext_path} not found in the cache")
+
+    # Destroy the context
+    gxf.core.context_destroy(context)
+
+    return ext_uuid_name_list
+
+
+def generate_manifest_content(arg_dict) -> dict:
+    """Generate data to populate a GXF extension manifest file."""
+    global global_ext_path_to_extinfo_map
+    manifest_data = DEFAULT_HOLOSCAN_MANIFEST_DATA.copy()
+    for key, value in arg_dict.items():
+        if value is not None:
+            if key in [
+                "labels",
+                "badges",
+                "headers",
+                "binaries",
+                "python_bindings",
+                "python_sources",
+                "data",
+            ]:
+                manifest_data[key] = parse_list(value)
+            elif key == "dependencies":
+                manifest_data[key] = parse_dependencies(value)
+
+                ext_uuid_name_dict = None
+                # Generate extension dependencies
+                if arg_dict.get("extension_dependencies"):
+                    ext_uuid_name_dict = generate_extension_dependencies(arg_dict)
+
+                # If ext_uuid_name_dict not empty, replace the dependencies in arg_dict
+                if ext_uuid_name_dict:
+                    logger.info(f"replacing dependencies with {ext_uuid_name_dict}")
+                    manifest_data[key] = ext_uuid_name_dict
+
+            else:
+                manifest_data[key] = value
+        else:
+            manifest_data[key] = None
+
+    # Store uuid to extension info map
+    ext_path = abspath(manifest_data["extension_library"])
+    ext_name = manifest_data["name"]
+    ext_uuid = manifest_data["uuid"]
+    ext_version = manifest_data["version"]
+    global_ext_path_to_extinfo_map[ext_path] = (ext_name, ext_uuid, ext_version)
+
+    return manifest_data
+
+
+def convert_to_yaml_filler(manifest_data) -> dict:
+    yaml_filler = {}
+    for key, value in manifest_data.items():
+        if value is None:
+            yaml_filler[key] = ""
+        elif isinstance(value, list):
+            if key == "dependencies":
+                deps_yaml = yaml.dump(value, default_flow_style=False)
+                yaml_filler[key] = "\n" + deps_yaml.rstrip() if value else " []"
+            else:
+                yaml_filler[key] = (
+                    "\n" + "\n".join(f"- {item}" for item in value) if value else " []"
+                )
+        else:
+            yaml_filler[key] = f" {value}"
+
+    return yaml_filler
+
+
+def write_manifest_file(content, output_file) -> None:
+    with open(output_file, "w") as f:
+        f.write(content)
+    logger.info(f"Manifest file generated: {output_file}")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Generates a GXF manifest file for an extension."
+        "The manifest file can be used to register the extension with the GXF registry."
+        "Refer to Graph Composer documentation for more information on the GXF registry.\n"
+        "https://docs.nvidia.com/metropolis/deepstream/dev-guide/graphtools-docs/docs/text/GraphComposer_Registry.html"
+    )
+
+    # Required arguments
+    parser.add_argument("--name", required=True, help="Name of the extension")
+    parser.add_argument(
+        "--extension-library",
+        required=True,
+        help="Path of the extension library",
+    )
+    parser.add_argument("--uuid", required=True, help="UUID of the extension")
+    parser.add_argument("--version", required=True, help="Version of the extension")
+    parser.add_argument(
+        "--license-file",
+        required=False,
+        default=DEFAULT_HOLOSCAN_MANIFEST_DATA["license_file"],
+        help="Path of the license file",
+    )
+    parser.add_argument(
+        "--url",
+        required=False,
+        default=DEFAULT_HOLOSCAN_MANIFEST_DATA["url"],
+        help="URL of the extension",
+    )
+    parser.add_argument(
+        "--git-repository",
+        required=False,
+        default=DEFAULT_HOLOSCAN_MANIFEST_DATA["git_repository"],
+        help="Git repository of the extension",
+    )
+    parser.add_argument(
+        "--labels",
+        required=False,
+        default=DEFAULT_HOLOSCAN_MANIFEST_DATA["labels"],
+        help="List of labels (comma or semicolon-separated)",
+    )
+    parser.add_argument(
+        "--badges",
+        required=False,
+        default=DEFAULT_HOLOSCAN_MANIFEST_DATA["badges"],
+        help="List of badges (comma or semicolon-separated)",
+    )
+    parser.add_argument(
+        "--priority",
+        required=False,
+        default=DEFAULT_HOLOSCAN_MANIFEST_DATA["priority"],
+        help="Priority of the extension",
+    )
+    parser.add_argument(
+        "--arch",
+        required=False,
+        default=DEFAULT_HOLOSCAN_MANIFEST_DATA["arch"],
+        help="Architecture of the extension",
+    )
+    parser.add_argument(
+        "--os",
+        required=False,
+        default=DEFAULT_HOLOSCAN_MANIFEST_DATA["os"],
+        help="Operating system of the extension",
+    )
+    parser.add_argument(
+        "--distribution",
+        required=False,
+        default=DEFAULT_HOLOSCAN_MANIFEST_DATA["distribution"],
+        help="Distribution of the extension",
+    )
+    parser.add_argument(
+        "--cuda",
+        required=False,
+        default=DEFAULT_HOLOSCAN_MANIFEST_DATA["cuda"],
+        help="CUDA version of the extension",
+    )
+    parser.add_argument(
+        "--tensorrt",
+        required=False,
+        default=DEFAULT_HOLOSCAN_MANIFEST_DATA["tensorrt"],
+        help="TensorRT version of the extension",
+    )
+    parser.add_argument(
+        "--cudnn",
+        required=False,
+        default=DEFAULT_HOLOSCAN_MANIFEST_DATA["cudnn"],
+        help="cuDNN version of the extension",
+    )
+    parser.add_argument(
+        "--deepstream",
+        required=False,
+        default=DEFAULT_HOLOSCAN_MANIFEST_DATA["deepstream"],
+        help="DeepStream version of the extension",
+    )
+    parser.add_argument(
+        "--triton",
+        required=False,
+        default=DEFAULT_HOLOSCAN_MANIFEST_DATA["triton"],
+        help="Triton version of the extension",
+    )
+    parser.add_argument(
+        "--vpi",
+        required=False,
+        default=DEFAULT_HOLOSCAN_MANIFEST_DATA["vpi"],
+        help="VPI version of the extension",
+    )
+    parser.add_argument(
+        "--dependencies",
+        required=False,
+        default=DEFAULT_HOLOSCAN_MANIFEST_DATA["dependencies"],
+        help=(
+            "List of dependencies (comma or semicolon-separated, 'key:value' pairs in each"
+            " dependency string separated by spaces)."
+            " Refer to Graph Composer documentation for more information on the"
+            " dependency format."
+        ),
+    )
+    parser.add_argument(
+        "--headers",
+        required=False,
+        nargs="+",
+        default=DEFAULT_HOLOSCAN_MANIFEST_DATA["headers"],
+        help="List of header files",
+    )
+    parser.add_argument(
+        "--binaries",
+        required=False,
+        nargs="+",
+        default=DEFAULT_HOLOSCAN_MANIFEST_DATA["binaries"],
+        help="List of binary files",
+    )
+    parser.add_argument(
+        "--python-alias",
+        required=False,
+        default=DEFAULT_HOLOSCAN_MANIFEST_DATA["python_alias"],
+        help="Python alias of the extension",
+    )
+    parser.add_argument(
+        "--namespace",
+        required=False,
+        default=DEFAULT_HOLOSCAN_NAMESPACE,
+        help="Namespace of the extension",
+    )
+    parser.add_argument(
+        "--python-bindings",
+        required=False,
+        default=DEFAULT_HOLOSCAN_MANIFEST_DATA["python_bindings"],
+        help="List of Python bindings (comma or semicolon-separated)",
+    )
+    parser.add_argument(
+        "--python-sources",
+        required=False,
+        default=DEFAULT_HOLOSCAN_MANIFEST_DATA["python_sources"],
+        help="List of Python sources (comma or semicolon-separated)",
+    )
+    parser.add_argument(
+        "--data",
+        required=False,
+        default=DEFAULT_HOLOSCAN_MANIFEST_DATA["data"],
+        help="List of data files (comma or semicolon-separated)",
+    )
+    parser.add_argument(
+        "--extension-dependencies",
+        nargs="+",
+        required=False,
+        help="Ordered list of extension dependency filepaths (comma or semicolon-separated)."
+        " Filepaths must exist in the local environment.",
+    )
+    # Optional arguments to assist in collecting extension dependencies
+    parser.add_argument(
+        "--search-path",
+        nargs="+",
+        required=False,
+        help="Root path to search for extensions",
+    )
+    parser.add_argument(
+        "--extensions-preload",
+        nargs="+",
+        required=False,
+        help="Ordered list of extensions to pre-load in the GXF context before generating the"
+        " manifest. Can be used to load indirect transitive dependencies without adding them"
+        " to the manifest.",
+    )
+    parser.add_argument(
+        "--db",
+        required=False,
+        help="Path to a database `.pickle` file with cached extension dependency graph information"
+        " from previous runs of this script. "
+        " Knowledge of transitive dependencies is required to load and query a GXF context."
+        " If provided, the database will be loaded and saved after generating the manifest."
+        " The database file will be created if it does not exist.",
+    )
+    # Add output file argument
+    parser.add_argument("--output", required=True, help="Output manifest file path")
+    parser.add_argument("-q", "--quiet", action="store_true", help="Suppress logging output")
+
+    args = parser.parse_args()
+
+    if args.quiet:
+        logger.setLevel(logging.WARNING)
+
+    # Load cache db and register save_cache_db to be called when the program exits
+    if args.db:
+        global LOCAL_CACHE_DB_PATH
+        global LOCAL_CACHE_DB_LOCK_PATH
+        LOCAL_CACHE_DB_PATH = abspath(args.db)
+        LOCAL_CACHE_DB_LOCK_PATH = abspath(args.db + ".lock")
+    load_cache_db()
+    atexit.register(save_cache_db)
+
+    arg_dict = vars(args).copy()
+
+    manifest_content = generate_manifest_content(arg_dict)
+
+    # Remove the unnecessary arguments from the manifest_content
+    for key in [
+        "output",
+        "search_path",
+        "extension_dependencies",
+        "extensions_preload",
+    ]:
+        del manifest_content[key]
+
+    yaml_filler = convert_to_yaml_filler(manifest_content)
+    final_manifest = MANIFEST_TEMPLATE.format(**yaml_filler)
+
+    write_manifest_file(final_manifest, args.output)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/graph_surgeon.py b/scripts/graph_surgeon.py
index 3e419786..94a30751 100644
--- a/scripts/graph_surgeon.py
+++ b/scripts/graph_surgeon.py
@@ -30,7 +30,13 @@
 # Insert a transpose at the network input tensor and rebind it to the new node (1 x 3 x 512 x 512)
 nhwc_to_nchw_in = gs.Node("Transpose", name="transpose_input", attrs={"perm": [0, 3, 1, 2]})
 nhwc_to_nchw_in.outputs = graph.inputs
-graph.inputs = [gs.Variable("INPUT__0", dtype=graph.inputs[0].dtype, shape=[1, 512, 512, 3])]
+graph.inputs = [
+    gs.Variable(
+        "INPUT__0",
+        dtype=graph.inputs[0].dtype,
+        shape=[graph.inputs[0].shape[i] for i in [0, 2, 3, 1]],
+    )
+]
 nhwc_to_nchw_in.inputs = graph.inputs
 
 graph.nodes.extend([nhwc_to_nchw_in])
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 761df246..369d8b4f 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -142,6 +142,8 @@ add_holoscan_library(core
     core/conditions/gxf/downstream_affordable.cpp
     core/conditions/gxf/periodic.cpp
     core/conditions/gxf/message_available.cpp
+    core/conditions/gxf/multi_message_available.cpp
+    core/conditions/gxf/multi_message_available_timeout.cpp
     core/conditions/gxf/expiring_message.cpp
     core/config.cpp
     core/dataflow_tracker.cpp
@@ -155,6 +157,7 @@ add_holoscan_library(core
     core/fragment_scheduler.cpp
     core/graphs/flow_graph.cpp
     core/gxf/entity.cpp
+    core/gxf/entity_group.cpp
     core/gxf/gxf_component.cpp
     core/gxf/gxf_component_info.cpp
     core/gxf/gxf_condition.cpp
@@ -180,6 +183,7 @@ add_holoscan_library(core
     core/resources/gxf/annotated_double_buffer_transmitter.cpp
     core/resources/gxf/block_memory_pool.cpp
     core/resources/gxf/clock.cpp
+    core/resources/gxf/cpu_thread.cpp
     core/resources/gxf/cuda_allocator.cpp
     core/resources/gxf/cuda_stream_pool.cpp
     core/resources/gxf/double_buffer_receiver.cpp
@@ -196,6 +200,7 @@ add_holoscan_library(core
     core/resources/gxf/std_component_serializer.cpp
     core/resources/gxf/std_entity_serializer.cpp
     core/resources/gxf/stream_ordered_allocator.cpp
+    core/resources/gxf/system_resources.cpp
     core/resources/gxf/transmitter.cpp
     core/resources/gxf/ucx_component_serializer.cpp
     core/resources/gxf/ucx_entity_serializer.cpp
diff --git a/src/core/app_driver.cpp b/src/core/app_driver.cpp
index 19b8b2d4..c33ef682 100644
--- a/src/core/app_driver.cpp
+++ b/src/core/app_driver.cpp
@@ -20,7 +20,6 @@
 #include <stdlib.h>  // POSIX setenv
 
 #include <algorithm>
-#include <chrono>
 #include <cstdlib>
 #include <deque>
 #include <memory>
@@ -814,15 +813,16 @@ bool AppDriver::check_configuration() {
     return false;
   }
 
-  // If the environment variable HOLOSCAN_ENABLE_HEALTH_CHECK is set to true, we enable the
-  // health check service.
-  if (get_bool_env_var("HOLOSCAN_ENABLE_HEALTH_CHECK")) { need_health_check_ = true; }
-
   auto& app_options = *options_;
 
-  // Or, if the driver or worker is running, we need to launch the health check service.
+  // If the driver or worker is running, we launch the health check service by default.
   if (app_options.run_driver || app_options.run_worker) { need_health_check_ = true; }
 
+  // Or, if the environment variable HOLOSCAN_ENABLE_HEALTH_CHECK is set to true or false, we enable
+  // or disable the health check service. If the environment variable is not set or invalid, we use
+  // the default value.
+  need_health_check_ = get_bool_env_var("HOLOSCAN_ENABLE_HEALTH_CHECK", need_health_check_);
+
   // Check if the driver or worker service needs to be launched
   if (app_options.run_driver) { need_driver_ = true; }
   if (app_options.run_worker) { need_worker_ = true; }
diff --git a/src/core/application.cpp b/src/core/application.cpp
index e3e39ee1..7d0e4493 100644
--- a/src/core/application.cpp
+++ b/src/core/application.cpp
@@ -208,10 +208,27 @@ void Application::set_ucx_env() {
   setenv("UCX_CM_USE_ALL_DEVICES", "n", 0);
 }
 
+void Application::set_v4l2_env() {
+  const char* env_value = std::getenv("HOLOSCAN_DISABLE_V4L2_RTLD_NODELETE");
+  // Workaround to avoid v4l2 seg fault https://nvbugs/4210082
+  if (env_value == nullptr) {
+      HOLOSCAN_LOG_DEBUG("Enable the libnvv4l2 workaround by setting the "
+              "`LIBV4L2_ENABLE_RTLD_NODELETE` environment variable.");
+      setenv("LIBV4L2_ENABLE_RTLD_NODELETE", "1", 0);
+  }
+}
+
 void Application::run() {
+  // Debug log to show that the run() function is executed
+  // (with the logging function pointer info to check if the logging function pointer address is
+  // the same as the one set in the Python side).
+  // This message is checked by the test_app_log_function in test_application_minimal.py.
+  HOLOSCAN_LOG_DEBUG("Executing Application::run()... (log_func_ptr=0x{:x})",
+                     reinterpret_cast<uint64_t>(&nvidia::LoggingFunction));
   if (cli_parser_.has_error()) { return; }
 
   set_ucx_env();
+  set_v4l2_env();
   driver().run();
 }
 
@@ -224,7 +241,7 @@ std::future<void> Application::run_async() {
 
 std::unordered_map<std::string, DataFlowTracker*> Application::track_distributed(
     uint64_t num_start_messages_to_skip, uint64_t num_last_messages_to_discard,
-    int latency_threshold) {
+    int latency_threshold, bool is_limited_tracking) {
   if (!is_composed_) { compose_graph(); }
   std::unordered_map<std::string, DataFlowTracker*> trackers;
   auto& frag_graph = fragment_graph();
@@ -232,8 +249,10 @@ std::unordered_map<std::string, DataFlowTracker*> Application::track_distributed
   for (const auto& each_fragment : frag_graph.get_nodes()) {
     // if track has not been called on the fragment, then call the tracker
     if (!each_fragment->data_flow_tracker()) {
-      each_fragment->track(
-          num_start_messages_to_skip, num_last_messages_to_discard, latency_threshold);
+      each_fragment->track(num_start_messages_to_skip,
+                           num_last_messages_to_discard,
+                           latency_threshold,
+                           is_limited_tracking);
     }
     trackers[each_fragment->name()] = each_fragment->data_flow_tracker();
   }
diff --git a/src/core/conditions/gxf/multi_message_available.cpp b/src/core/conditions/gxf/multi_message_available.cpp
new file mode 100644
index 00000000..0af58817
--- /dev/null
+++ b/src/core/conditions/gxf/multi_message_available.cpp
@@ -0,0 +1,163 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "holoscan/core/conditions/gxf/multi_message_available.hpp"
+
+#include <algorithm>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <gxf/std/scheduling_terms.hpp>
+
+#include "holoscan/core/component_spec.hpp"
+#include "holoscan/core/fragment.hpp"
+#include "holoscan/core/resource.hpp"
+
+namespace holoscan {
+
+void MultiMessageAvailableCondition::setup(ComponentSpec& spec) {
+  spec.param(receivers_,
+             "receivers",
+             "Receivers",
+             "The scheduling term permits execution if the given channels have at least a given "
+             "number of messages available. Note that this parameter is not intended to be passed "
+             "via an argument to `Fragment::make_condition`. Instead, from the Operator::setup "
+             "method, the receiver port names specified in the `port_names` argument to "
+             "`OperatorSpec::multi_port_condition` will be used. Holoscan will take "
+             "care of setting this parameter with the actual receiver objects created during "
+             "initialization of the application.");
+  spec.param(sampling_mode_,
+             "sampling_mode",
+             "Sampling Mode",
+             "The sampling method to use when checking for messages in receiver queues. "
+             "Options: YAML::Node(\"SumOfAll\"), YAML::Node(\"PerReceiver\")",
+             YAML::Node("SumOfAll"));
+  spec.param(min_sizes_,
+             "min_sizes",
+             "Minimum message counts",
+             "The scheduling term permits execution if all given receivers have at least the "
+             "given number of messages available in this list. This option is only intended for "
+             "use with `sampling_mode = 1` (per-receiver mode). The size of `min_sizes` must match "
+             "the number of receivers associated with the condition.",
+             ParameterFlag::kOptional);
+  spec.param(min_sum_,
+             "min_sum",
+             "Minimum sum of message counts",
+             "The scheduling term permits execution if the sum of message counts of all "
+             "receivers have at least the given number of messages available. This option is only "
+             "intended for use with `sampling_mode = 0` (sum-of-all mode).",
+             ParameterFlag::kOptional);
+}
+
+nvidia::gxf::MultiMessageAvailableSchedulingTerm* MultiMessageAvailableCondition::get() const {
+  return static_cast<nvidia::gxf::MultiMessageAvailableSchedulingTerm*>(gxf_cptr_);
+}
+
+void MultiMessageAvailableCondition::min_sum(size_t min_sum) {
+  auto cond = get();
+  if (cond) { cond->setMinSum(min_sum); }
+  min_sum_ = min_sum;
+}
+
+void MultiMessageAvailableCondition::initialize() {
+  // Automatically convert string or enum to YAML::Node for 'sampling_mode' argument
+  auto& current_args = args();
+
+  auto find_it = std::find_if(current_args.begin(), current_args.end(), [](const auto& arg) {
+    bool check = (arg.name() == "sampling_mode" &&
+                  (arg.arg_type().element_type() == ArgElementType::kString ||
+                   arg.arg_type().element_type() == ArgElementType::kCustom) &&
+                  arg.arg_type().container_type() == ArgContainerType::kNative);
+    return check;
+  });
+
+  if (find_it != current_args.end()) {
+    bool yaml_conversion_failed = false;
+    YAML::Node sampling_mode;
+    if (find_it->arg_type().element_type() == ArgElementType::kString) {
+      auto mode_string = std::any_cast<std::string>(find_it->value());
+      if (mode_string == "SumOfAll") {
+        sampling_mode = YAML::Node("SumOfAll");
+      } else if (mode_string == "PerReceiver") {
+        sampling_mode = YAML::Node("PerReceiver");
+      } else {
+        HOLOSCAN_LOG_ERROR("Unrecognized sampling mode string value: {}", mode_string);
+        yaml_conversion_failed = true;
+      }
+    } else {
+      try {
+        auto mode_enum =
+            std::any_cast<MultiMessageAvailableCondition::SamplingMode>(find_it->value());
+        if (mode_enum == MultiMessageAvailableCondition::SamplingMode::kSumOfAll) {
+          sampling_mode = YAML::Node("SumOfAll");
+        } else if (mode_enum == MultiMessageAvailableCondition::SamplingMode::kPerReceiver) {
+          sampling_mode = YAML::Node("PerReceiver");
+        } else {
+          HOLOSCAN_LOG_ERROR("Unrecognized sampling mode enum value: {}",
+                             static_cast<int>(mode_enum));
+          yaml_conversion_failed = true;
+        }
+      } catch (const std::bad_any_cast& e) {
+        HOLOSCAN_LOG_ERROR(
+            "Unable to cast 'sampling_mode' argument to a "
+            "MultiMessageAvailableCondition::SamplingMode enum: {}",
+            e.what());
+        yaml_conversion_failed = true;
+      }
+    }
+    if (!yaml_conversion_failed) {
+      // remove the old, non-YAML version of the argument
+      auto new_arg_end =
+          std::remove_if(current_args.begin(), current_args.end(), [](const auto& arg) {
+            return arg.name() == "sampling_mode";
+          });
+      current_args.erase(new_arg_end, current_args.end());
+      // add the YAML::Node argument
+      add_arg(Arg("sampling_mode", sampling_mode));
+    }
+  }
+
+  // parent class initialize() call must be after the argument modification above
+  GXFCondition::initialize();
+}
+
+void MultiMessageAvailableCondition::sampling_mode(SamplingMode sampling_mode) {
+  auto cond = get();
+  if (cond) { cond->setSamplingMode(sampling_mode); }
+  switch (sampling_mode) {
+    case SamplingMode::kSumOfAll:
+      sampling_mode_ = YAML::Node("SumOfAll");
+      break;
+    case SamplingMode::kPerReceiver:
+      sampling_mode_ = YAML::Node("PerReceiver");
+      break;
+    default:
+      HOLOSCAN_LOG_ERROR("Unrecognized sampling mode value: {}", static_cast<int>(sampling_mode));
+      break;
+  }
+}
+
+void MultiMessageAvailableCondition::add_min_size(size_t value) {
+  auto cond = get();
+  if (cond) { cond->addMinSize(value); }
+  auto min_sizes = std::any_cast<std::vector<size_t>>(min_sizes_);
+  min_sizes.push_back(value);
+  min_sizes_ = min_sizes;
+}
+
+}  // namespace holoscan
diff --git a/src/core/conditions/gxf/multi_message_available_timeout.cpp b/src/core/conditions/gxf/multi_message_available_timeout.cpp
new file mode 100644
index 00000000..2f3a75d1
--- /dev/null
+++ b/src/core/conditions/gxf/multi_message_available_timeout.cpp
@@ -0,0 +1,142 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "holoscan/core/conditions/gxf/multi_message_available_timeout.hpp"
+
+#include <algorithm>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <gxf/std/scheduling_terms.hpp>
+
+#include "holoscan/core/component_spec.hpp"
+#include "holoscan/core/fragment.hpp"
+#include "holoscan/core/resource.hpp"
+
+namespace holoscan {
+
+void MultiMessageAvailableTimeoutCondition::setup(ComponentSpec& spec) {
+  spec.param(receivers_,
+             "receivers",
+             "Receivers",
+             "The scheduling term permits execution if the given channels have at least a given "
+             "number of messages available. Note that this parameter is not intended to be passed "
+             "via an argument to `Fragment::make_condition`. Instead, from the Operator::setup "
+             "method, the receiver port names specified in the `port_names` argument to "
+             "`OperatorSpec::multi_port_condition` will be used. Holoscan will take "
+             "care of setting this parameter with the actual receiver objects created during "
+             "initialization of the application.");
+  spec.param(execution_frequency_,
+             "execution_frequency",
+             "Execution frequency of the entity",
+             "The 'execution frequency' indicates the amount of time after which the entity will "
+             "be allowed to execute again, even if the specified number of messages have not yet "
+             "been received. The period is specified as a string containing  of a number and an "
+             "(optional) unit. If no unit is given the value is assumed to be in nanoseconds. "
+             "Supported units are: Hz, s, ms. Example: 10ms, 10000000, 0.2s, 50Hz");
+  spec.param(sampling_mode_,
+             "sampling_mode",
+             "Sampling Mode",
+             "The sampling method to use when checking for messages in receiver queues. "
+             "Options: YAML::Node(\"SumOfAll\"), YAML::Node(\"PerReceiver\")",
+             YAML::Node("SumOfAll"));
+  spec.param(min_sizes_,
+             "min_sizes",
+             "Minimum message counts",
+             "The scheduling term permits execution if all given receivers have at least the "
+             "given number of messages available in this list. This option is only intended for "
+             "use with `sampling_mode = 1` (per-receiver mode). The size of `min_sizes` must match "
+             "the number of receivers.",
+             ParameterFlag::kOptional);
+  spec.param(min_sum_,
+             "min_sum",
+             "Minimum sum of message counts",
+             "The scheduling term permits execution if the sum of message counts of all "
+             "receivers have at least the given number of messages available. This option is only "
+             "intended for use with `sampling_mode = 0` (sum-of-all mode).",
+             ParameterFlag::kOptional);
+}
+
+nvidia::gxf::MessageAvailableFrequencyThrottler* MultiMessageAvailableTimeoutCondition::get()
+    const {
+  return static_cast<nvidia::gxf::MessageAvailableFrequencyThrottler*>(gxf_cptr_);
+}
+
+void MultiMessageAvailableTimeoutCondition::initialize() {
+  // Automatically convert string or enum to YAML::Node for 'sampling_mode' argument
+  auto& current_args = args();
+
+  auto find_it = std::find_if(current_args.begin(), current_args.end(), [](const auto& arg) {
+    bool check = (arg.name() == "sampling_mode" &&
+                  (arg.arg_type().element_type() == ArgElementType::kString ||
+                   arg.arg_type().element_type() == ArgElementType::kCustom) &&
+                  arg.arg_type().container_type() == ArgContainerType::kNative);
+    return check;
+  });
+
+  if (find_it != current_args.end()) {
+    bool yaml_conversion_failed = false;
+    YAML::Node sampling_mode;
+    if (find_it->arg_type().element_type() == ArgElementType::kString) {
+      auto mode_string = std::any_cast<std::string>(find_it->value());
+      if (mode_string == "SumOfAll") {
+        sampling_mode = YAML::Node("SumOfAll");
+      } else if (mode_string == "PerReceiver") {
+        sampling_mode = YAML::Node("PerReceiver");
+      } else {
+        HOLOSCAN_LOG_ERROR("Unrecognized sampling mode string value: {}", mode_string);
+        yaml_conversion_failed = true;
+      }
+    } else {
+      try {
+        auto mode_enum =
+            std::any_cast<MultiMessageAvailableTimeoutCondition::SamplingMode>(find_it->value());
+        if (mode_enum == MultiMessageAvailableTimeoutCondition::SamplingMode::kSumOfAll) {
+          sampling_mode = YAML::Node("SumOfAll");
+        } else if (mode_enum == MultiMessageAvailableTimeoutCondition::SamplingMode::kPerReceiver) {
+          sampling_mode = YAML::Node("PerReceiver");
+        } else {
+          HOLOSCAN_LOG_ERROR("Unrecognized sampling mode enum value: {}",
+                             static_cast<int>(mode_enum));
+          yaml_conversion_failed = true;
+        }
+      } catch (const std::bad_any_cast& e) {
+        HOLOSCAN_LOG_ERROR(
+            "Unable to cast 'sampling_mode' argument to a "
+            "MultiMessageAvailableTimeoutCondition::SamplingMode enum: {}",
+            e.what());
+        yaml_conversion_failed = true;
+      }
+    }
+    if (!yaml_conversion_failed) {
+      // remove the old, non-YAML version of the argument
+      auto new_arg_end =
+          std::remove_if(current_args.begin(), current_args.end(), [](const auto& arg) {
+            return arg.name() == "sampling_mode";
+          });
+      current_args.erase(new_arg_end, current_args.end());
+      // add the YAML::Node argument
+      add_arg(Arg("sampling_mode", sampling_mode));
+    }
+  }
+
+  // parent class initialize() call must be after the argument modification above
+  GXFCondition::initialize();
+}
+
+}  // namespace holoscan
diff --git a/src/core/executors/gxf/gxf_executor.cpp b/src/core/executors/gxf/gxf_executor.cpp
index 1a93753f..494a1447 100644
--- a/src/core/executors/gxf/gxf_executor.cpp
+++ b/src/core/executors/gxf/gxf_executor.cpp
@@ -27,6 +27,7 @@
 #include <set>
 #include <string>
 #include <tuple>
+#include <typeinfo>
 #include <unordered_map>
 #include <unordered_set>
 #include <utility>
@@ -41,6 +42,8 @@
 #include "holoscan/core/conditions/gxf/downstream_affordable.hpp"
 #include "holoscan/core/conditions/gxf/expiring_message.hpp"
 #include "holoscan/core/conditions/gxf/message_available.hpp"
+#include "holoscan/core/conditions/gxf/multi_message_available.hpp"
+#include "holoscan/core/conditions/gxf/multi_message_available_timeout.hpp"
 #include "holoscan/core/config.hpp"
 #include "holoscan/core/domain/tensor.hpp"
 #include "holoscan/core/errors.hpp"
@@ -48,6 +51,7 @@
 #include "holoscan/core/graph.hpp"
 #include "holoscan/core/graphs/flow_graph.hpp"
 #include "holoscan/core/gxf/entity.hpp"
+#include "holoscan/core/gxf/entity_group.hpp"
 #include "holoscan/core/gxf/gxf_extension_registrar.hpp"
 #include "holoscan/core/gxf/gxf_network_context.hpp"
 #include "holoscan/core/gxf/gxf_operator.hpp"
@@ -66,6 +70,8 @@
 #include "holoscan/core/resources/gxf/double_buffer_transmitter.hpp"
 #include "holoscan/core/resources/gxf/holoscan_ucx_receiver.hpp"
 #include "holoscan/core/resources/gxf/holoscan_ucx_transmitter.hpp"
+#include "holoscan/core/resources/gxf/system_resources.hpp"
+#include "holoscan/core/schedulers/gxf/greedy_scheduler.hpp"
 #include "holoscan/core/services/common/forward_op.hpp"
 #include "holoscan/core/services/common/virtual_operator.hpp"
 #include "holoscan/core/signal_handler.hpp"
@@ -75,6 +81,7 @@
 #include "gxf/std/default_extension.hpp"
 #include "gxf/std/extension_factory_helper.hpp"
 #include "gxf/std/monitor.hpp"
+#include "gxf/std/receiver.hpp"
 #include "gxf/test/components/entity_monitor.hpp"
 
 namespace holoscan::gxf {
@@ -393,7 +400,7 @@ void bind_input_port(Fragment* fragment, gxf_context_t gxf_context, gxf_uid_t ei
         "Unable to support types other than ConnectorType::kDefault (rx_name: '{}')", rx_name));
   }
   const char* entity_name = "";
-  HOLOSCAN_GXF_CALL_FATAL(GxfComponentName(gxf_context, eid, &entity_name));
+  HOLOSCAN_GXF_CALL_FATAL(GxfEntityGetName(gxf_context, eid, &entity_name));
 
   gxf_tid_t receiver_find_tid{};
   HOLOSCAN_GXF_CALL_FATAL(
@@ -608,10 +615,22 @@ void GXFExecutor::create_input_port(Fragment* fragment, gxf_context_t gxf_contex
 
   // Set the default scheduling term for this input
   if (io_spec->conditions().empty()) {
-    ArgList args;
-    args.add(Arg("min_size") = static_cast<uint64_t>(queue_size));
-    io_spec->condition(
-        ConditionType::kMessageAvailable, Arg("receiver") = io_spec->connector(), args);
+    // Check if the receiver for this io_spec is already involved in a multi-message condition
+    bool port_has_multi_port_condition = false;
+    for (auto& condition_info : op->spec()->multi_port_conditions()) {
+      const auto& in_names = condition_info.port_names;
+      if (std::find(in_names.begin(), in_names.end(), rx_name) != in_names.end()) {
+        port_has_multi_port_condition = true;
+        break;
+      }
+    }
+    // Only add a MessageAvailable condition if it is not already associated with a condition
+    if (!port_has_multi_port_condition) {
+      ArgList args;
+      args.add(Arg("min_size") = static_cast<uint64_t>(queue_size));
+      io_spec->condition(
+          ConditionType::kMessageAvailable, Arg("receiver") = io_spec->connector(), args);
+    }
   }
 
   // Initialize conditions for this input
@@ -624,14 +643,14 @@ void GXFExecutor::create_input_port(Fragment* fragment, gxf_context_t gxf_contex
             std::dynamic_pointer_cast<MessageAvailableCondition>(condition);
         // Note: GraphEntity::addSchedulingTerm requires a unique name here
         std::string cond_name =
-            fmt::format("__{}_{}_cond_{}", op->name(), rx_name, condition_index);
+            fmt::format("__{}_{}_message_available{}", op->name(), rx_name, condition_index);
         message_available_condition->receiver(connector);
         message_available_condition->name(cond_name);
         message_available_condition->fragment(fragment);
         auto rx_condition_spec = std::make_shared<ComponentSpec>(fragment);
         message_available_condition->setup(*rx_condition_spec);
         message_available_condition->spec(std::move(rx_condition_spec));
-        // Add to the same entity as the operator and initialize
+        // Add to the same entity as the operator. initialize() will be called later
         message_available_condition->add_to_graph_entity(op);
         break;
       }
@@ -640,17 +659,36 @@ void GXFExecutor::create_input_port(Fragment* fragment, gxf_context_t gxf_contex
             std::dynamic_pointer_cast<ExpiringMessageAvailableCondition>(condition);
         // Note: GraphEntity::addSchedulingTerm requires a unique name here
         std::string cond_name =
-            fmt::format("__{}_{}_cond_{}", op->name(), rx_name, condition_index);
+            fmt::format("__{}_{}_expiring_message{}", op->name(), rx_name, condition_index);
         expiring_message_available_condition->receiver(connector);
         expiring_message_available_condition->name(cond_name);
         expiring_message_available_condition->fragment(fragment);
         auto rx_condition_spec = std::make_shared<ComponentSpec>(fragment);
         expiring_message_available_condition->setup(*rx_condition_spec);
         expiring_message_available_condition->spec(std::move(rx_condition_spec));
-        // Add to the same entity as the operator and initialize
+        // Add to the same entity as the operator. initialize() will be called later
         expiring_message_available_condition->add_to_graph_entity(op);
         break;
       }
+      case ConditionType::kMultiMessageAvailableTimeout: {
+        std::shared_ptr<MultiMessageAvailableTimeoutCondition> multi_message_timeout_condition =
+            std::dynamic_pointer_cast<MultiMessageAvailableTimeoutCondition>(condition);
+        // Note: GraphEntity::addSchedulingTerm requires a unique name here
+        std::string cond_name =
+            fmt::format("__{}_{}_message_timeout{}", op->name(), rx_name, condition_index);
+
+        // vector with a single receiver corresponding to this IOSpec
+        std::vector<std::shared_ptr<GXFResource>> receivers({connector});
+        multi_message_timeout_condition->receivers(receivers);
+        multi_message_timeout_condition->name(cond_name);
+        multi_message_timeout_condition->fragment(fragment);
+        auto rx_condition_spec = std::make_shared<ComponentSpec>(fragment);
+        multi_message_timeout_condition->setup(*rx_condition_spec);
+        multi_message_timeout_condition->spec(std::move(rx_condition_spec));
+        // Add to the same entity as the operator. initialize() will be called later
+        multi_message_timeout_condition->add_to_graph_entity(op);
+        break;
+      }
       case ConditionType::kNone:
         // No condition
         break;
@@ -679,7 +717,7 @@ void bind_output_port(Fragment* fragment, gxf_context_t gxf_context, gxf_uid_t e
         "Unable to support types other than ConnectorType::kDefault (tx_name: '{}')", tx_name));
   }
   const char* entity_name = "";
-  HOLOSCAN_GXF_CALL_FATAL(GxfComponentName(gxf_context, eid, &entity_name));
+  HOLOSCAN_GXF_CALL_FATAL(GxfEntityGetName(gxf_context, eid, &entity_name));
 
   gxf_tid_t transmitter_find_tid{};
   HOLOSCAN_GXF_CALL_FATAL(
@@ -1635,6 +1673,51 @@ bool GXFExecutor::initialize_fragment() {
       HOLOSCAN_LOG_DEBUG("No target of op {} has a UCX connector.", op_name);
     }
   }
+
+  // Finish initialization of any thread pools after all operators have been initialized.
+  if (!fragment_->thread_pools_.empty()) {
+    if (typeid(*fragment_->scheduler()) == typeid(GreedyScheduler)) {
+      HOLOSCAN_LOG_WARN(
+          "The GreedyScheduler does not support thread pools. The thread pools defined by this "
+          "application will be ignored. To use thread pools, switch to either the "
+          "EventBasedScheduler or MultiThreadScheduler.");
+    }
+
+    // Update entity groups for operators that were assigned to a thread pool
+    for (const auto& pool : fragment_->thread_pools_) {
+      HOLOSCAN_LOG_DEBUG("Configuring thread pool: {}", pool->name());
+      auto pool_entity_group = pool->entity_group();
+      // add all operators associated with this pool to its entity group
+      // (Note: This will also remove the operator from whatever entity group it was in previously)
+      int32_t gpu_device = -1;
+      for (auto& op : pool->operators()) {
+        pool_entity_group->add(op, entity_prefix_);
+
+        // Warn if not all operators in the thread pool are not on the same GPUDevice.
+        // (CudaStreamPool, RMMAllocator, StreamOrderedAllocator and BlockMemoryPool components for
+        //  all operators in the thread pool must have been defined with the same integer "dev_id"
+        //  parameter value).
+        auto current_dev_id = holoscan::gxf::gxf_device_id(context_, op->graph_entity()->eid());
+        if (current_dev_id.has_value()) {
+          if (gpu_device == -1) {
+            gpu_device = current_dev_id.value();
+          } else if (gpu_device != current_dev_id.value()) {
+            std::string err_msg = fmt::format(
+                "All operators in thread pool '{}' must be using the same GPU device. Operator "
+                "'{}' has a component using a GPUDevice with CUDA device id {} but a prior "
+                "operator in the pool was using a component with device id {}. Please use "
+                "separate thread pools for operators on different devices.",
+                pool->name(),
+                op->name(),
+                current_dev_id.value(),
+                gpu_device);
+            HOLOSCAN_LOG_ERROR(err_msg);
+            throw std::runtime_error(err_msg);
+          }
+        }
+      }
+    }
+  }
   return true;
 }
 
@@ -1678,6 +1761,7 @@ bool GXFExecutor::initialize_operator(Operator* op) {
   // Create Components for input
   const auto& inputs = spec.inputs();
   for (const auto& [name, io_spec] : inputs) {
+    HOLOSCAN_LOG_INFO("creating input IOSpec named '{}'", name);
     gxf::GXFExecutor::create_input_port(fragment(), context_, eid, io_spec.get(), op_eid_ != 0, op);
   }
 
@@ -1688,6 +1772,66 @@ bool GXFExecutor::initialize_operator(Operator* op) {
         fragment(), context_, eid, io_spec.get(), op_eid_ != 0, op);
   }
 
+  // Add any multi-message conditions
+  size_t multi_port_condition_index = 0;
+  for (auto& condition_info : spec.multi_port_conditions()) {
+    HOLOSCAN_LOG_INFO("Found a multi-message condition, adding it...");
+    // get receiver objects corresponding to the input port names specified
+    std::vector<std::shared_ptr<Resource>> condition_receivers;
+    condition_receivers.reserve(condition_info.port_names.size());
+    for (auto& input_port_name : condition_info.port_names) {
+      auto it = inputs.find(input_port_name);
+      if (it == inputs.end()) {
+        HOLOSCAN_LOG_ERROR("Input port '{}' requested by a multi-message condition was not found",
+                           input_port_name);
+        break;
+      }
+      condition_receivers.push_back(it->second->connector());
+    }
+    // skip adding the condition if any of the inputs was not found
+    if (condition_receivers.size() != condition_info.port_names.size()) {
+      HOLOSCAN_LOG_ERROR(
+          "Multi-message condition requested {} input ports, but {} were found. The requested "
+          "condition will not be added.",
+          condition_info.port_names.size(),
+          condition_receivers.size());
+      break;
+    }
+    // add the receiver objects to the argument list
+    condition_info.args.add(holoscan::Arg{"receivers", condition_receivers});
+    switch (condition_info.kind) {
+      case ConditionType::kMultiMessageAvailable: {
+        HOLOSCAN_LOG_TRACE("Adding a MultiMessageAvailableCondition to operator '{}'", op->name());
+        const std::string& condition_name =
+            fmt::format("__{}_multi_message{}", op->name(), multi_port_condition_index);
+        auto multi_port_condition = fragment()->make_condition<MultiMessageAvailableCondition>(
+            condition_name, condition_info.args);
+        // Add to the same entity as the operator
+        multi_port_condition->add_to_graph_entity(op);
+        op->add_arg(multi_port_condition);
+        break;
+      }
+      case ConditionType::kMultiMessageAvailableTimeout: {
+        HOLOSCAN_LOG_TRACE("Adding a MultiMessageAvailableTimeoutCondition to operator '{}'",
+                           op->name());
+        const std::string& condition_name =
+            fmt::format("__{}_multi_message_timeout{}", op->name(), multi_port_condition_index);
+        auto multi_port_condition =
+            fragment()->make_condition<MultiMessageAvailableTimeoutCondition>(condition_name,
+                                                                              condition_info.args);
+        // Add to the same entity as the operator
+        multi_port_condition->add_to_graph_entity(op);
+        op->add_arg(multi_port_condition);
+        break;
+      }
+      default:
+        throw std::runtime_error(
+            fmt::format("Condition type {} is not a supported multi-message condition",
+                        static_cast<int>(condition_info.kind)));
+    }
+    multi_port_condition_index++;
+  }
+
   HOLOSCAN_LOG_TRACE("Configuring operator: {}", op->name());
 
   // add Component(s) and/or Resource(s) added as Arg/ArgList to the graph entity
@@ -1744,6 +1888,25 @@ bool GXFExecutor::is_holoscan() const {
   return zero_eid && zero_cid;
 }
 
+std::shared_ptr<GPUDevice> GXFExecutor::add_gpu_device_to_graph_entity(
+    const std::string& device_name, std::shared_ptr<nvidia::gxf::GraphEntity> graph_entity,
+    std::optional<int32_t> device_id) {
+  int32_t gpu_id;
+  if (device_id.has_value()) {
+    gpu_id = device_id.value();
+  } else {
+    gpu_id = static_cast<int32_t>(AppDriver::get_int_env_var("HOLOSCAN_UCX_DEVICE_ID", 0));
+  }
+  auto gpu_device = fragment_->make_resource<GPUDevice>(
+      device_name, holoscan::Arg("dev_id", static_cast<int32_t>(gpu_id)));
+
+  gpu_device->gxf_eid(graph_entity->eid());
+  gpu_device->add_to_graph_entity(fragment_, graph_entity);
+  gpu_device->initialize();
+
+  return gpu_device;
+}
+
 bool GXFExecutor::initialize_gxf_graph(OperatorGraph& graph) {
   if (is_gxf_graph_initialized_) {
     HOLOSCAN_LOG_WARN("GXF graph is already initialized. Skipping initialization.");
@@ -1871,62 +2034,95 @@ bool GXFExecutor::initialize_gxf_graph(OperatorGraph& graph) {
       network_context->gxf_eid(eid);
       network_context->initialize();
 
-      auto entity_group_gid = ::holoscan::gxf::add_entity_group(context_, "network_entity_group");
+      // add network_context to the network_entity_gorup
+      auto network_entity_group =
+          std::make_shared<gxf::EntityGroup>(context_, "network_entity_group");
 
-      int32_t gpu_id =
-          static_cast<int32_t>(AppDriver::get_int_env_var("HOLOSCAN_UCX_DEVICE_ID", 0));
-      std::string device_entity_name = fmt::format("{}gpu_device_entity", entity_prefix_);
+      // add a GPUDevice to the network_entity_group
+      // This is for the NetworkContext and Broadcast codelets that have a UcxTransmitter or
+      // UcxReceiver.
+      network_entity_group->add(*network_context);
+      // create new Entity to hold the GPUDevice
+      std::string device_name = fmt::format("{}gpu_device_entity", entity_prefix_);
       gpu_device_entity_ = std::make_shared<nvidia::gxf::GraphEntity>();
-      auto maybe = gpu_device_entity_->setup(context, device_entity_name.c_str());
-      if (!maybe) {
-        throw std::runtime_error(
-            fmt::format("Failed to create GPU device entity: '{}'", device_entity_name));
-      }
-      // TODO (GXF4): should have an addResource to add to resources_ member instead of components_?
-      auto device_handle = gpu_device_entity_->addComponent(
-          "nvidia::gxf::GPUDevice", "gpu_device_component", {nvidia::gxf::Arg("dev_id", gpu_id)});
-      if (device_handle.is_null()) {
-        HOLOSCAN_LOG_ERROR("Failed to create GPU device resource for device {}", gpu_id);
+      auto maybe = gpu_device_entity_->setup(context_, device_name.c_str());
+      if (maybe) {
+        auto gpu_device = add_gpu_device_to_graph_entity(device_name, gpu_device_entity_);
+        network_entity_group->add(*gpu_device);
+      } else {
+        // failed to create GPUDevice, a default device would be used.
+        HOLOSCAN_LOG_ERROR(
+            "Failed to generate a new GraphEntity to hold a GPUDevice. CUDA device id 0 will be "
+            "used.");
       }
 
-      // Note: GxfUpdateEntityGroup
-      //   calls Runtime::GxfUpdateEntityGroup(gid, eid)
-      //     which calls  EntityGroups::groupAddEntity(gid, eid); (entity_groups_ in
-      //     SharedContext)
-      //       which calls EntityGroupItem::addEntity for the EntityGroupItem corresponding to
-      //       gid
-      //         any eid corresponding to a ResourceBase class like GPUDevice or ThreadPool is
-      //             stored in internal resources_ vector
-      //         all other eid are stored in the entities vector
-
-      // add GPUDevice resource to the networking entity group
-      GXF_ASSERT_SUCCESS(
-          GxfUpdateEntityGroup(context_, entity_group_gid, gpu_device_entity_->eid()));
-
-      // add the network context to the entity group
-      auto gxf_network_context =
-          std::dynamic_pointer_cast<holoscan::gxf::GXFNetworkContext>(fragment_->network_context());
-      HOLOSCAN_GXF_CALL_FATAL(
-          GxfUpdateEntityGroup(context, entity_group_gid, gxf_network_context->gxf_eid()));
-
-      // Loop through all operators and add any operators with a UCX port to the entity group
+      // Loop through all operators and define a GPUDevice resource for any operators with a UCX
+      // port (if one does not already exist).
       auto& operator_graph = static_cast<OperatorFlowGraph&>(fragment_->graph());
+      int generated_device_entity_count = 0;
+      std::unordered_set<std::string> groups_with_device;
       for (auto& node : operator_graph.get_nodes()) {
+        // exit early for virtual operators
+        if (node->operator_type() == Operator::OperatorType::kVirtual) { continue; }
+
         auto op_spec = node->spec();
-        bool already_added = false;
+        bool has_ucx_connector = false;
         for (const auto& [_, io_spec] : op_spec->inputs()) {
           if (io_spec->connector_type() == IOSpec::ConnectorType::kUCX) {
-            add_operator_to_entity_group(context, entity_group_gid, node);
-            already_added = true;
+            has_ucx_connector = true;
             break;
           }
         }
-        if (already_added) { continue; }
-        for (const auto& [_, io_spec] : op_spec->outputs()) {
-          if (io_spec->connector_type() == IOSpec::ConnectorType::kUCX) {
-            add_operator_to_entity_group(context, entity_group_gid, node);
-            break;
+        if (!has_ucx_connector) {
+          for (const auto& [_, io_spec] : op_spec->outputs()) {
+            if (io_spec->connector_type() == IOSpec::ConnectorType::kUCX) {
+              has_ucx_connector = true;
+              break;
+            }
+          }
+        }
+        // done if there is no UCX connector
+        if (!has_ucx_connector) { continue; }
+
+        // Add a GPUDevice to the entity group if one does not already exist there
+        // (UcxTransmitter and/or UcxReceiver expect to find a GPUDevice resource).
+        auto graph_entity = node->graph_entity();
+        if (!graph_entity) {
+          HOLOSCAN_LOG_ERROR(
+              "Operator '{}' with UCX connectors does not have a graph entity, "
+              "could not add GPUDevice",
+              node->name());
+          continue;
+        }
+
+        auto op_eid = graph_entity->eid();
+        auto maybe_device_id = holoscan::gxf::gxf_device_id(context_, op_eid);
+        auto entity_group_name = holoscan::gxf::gxf_entity_group_name(context_, op_eid);
+        if (groups_with_device.find(entity_group_name) != groups_with_device.end()) {
+          // already added a GPUDevice to this entity group
+          continue;
+        }
+        if (maybe_device_id) {
+          HOLOSCAN_LOG_DEBUG(
+              "operator '{}' is in EntityGroup '{}' with a GPUDevice having CUDA ID '{}'",
+              node->name(),
+              entity_group_name,
+              maybe_device_id.value());
+          groups_with_device.insert(entity_group_name);
+        } else {
+          HOLOSCAN_LOG_DEBUG("operator '{}' is in EntityGroup '{}' without a GPUDevice resource",
+                             node->name(),
+                             entity_group_name);
+          std::string device_name =
+              fmt::format("{}gpu_device_entity{}", entity_prefix_, generated_device_entity_count);
+          auto maybe_gpu_device = add_gpu_device_to_graph_entity(device_name, graph_entity);
+          if (maybe_gpu_device) {
+            HOLOSCAN_LOG_DEBUG(
+                "Generated GPUDevice '{}' for operator '{}'", device_name, node->name());
+            generated_device_entity_count++;
           }
+          // store in set to avoid adding multiple GPUDevice objects to the same entity group
+          groups_with_device.insert(entity_group_name);
         }
       }
 
@@ -1935,10 +2131,12 @@ bool GXFExecutor::initialize_gxf_graph(OperatorGraph& graph) {
         // Add the entity to the entity group if it has a UCX connector
         if (has_ucx_connector(broadcast_entity)) {
           auto broadcast_eid = broadcast_entity->eid();
-          HOLOSCAN_LOG_DEBUG("Adding implicit broadcast eid '{}' to entity group '{}'",
+          HOLOSCAN_LOG_DEBUG("Adding implicit broadcast eid '{}' to entity group '{}' with id '{}'",
                              broadcast_eid,
-                             entity_group_gid);
-          HOLOSCAN_GXF_CALL_FATAL(GxfUpdateEntityGroup(context, entity_group_gid, broadcast_eid));
+                             network_entity_group->name(),
+                             network_entity_group->gxf_gid());
+          HOLOSCAN_GXF_CALL_FATAL(
+              GxfUpdateEntityGroup(context, network_entity_group->gxf_gid(), broadcast_eid));
         }
       }
     } else {
diff --git a/src/core/flow_tracking_annotation.cpp b/src/core/flow_tracking_annotation.cpp
index f0f7ff1e..bfde6dbd 100644
--- a/src/core/flow_tracking_annotation.cpp
+++ b/src/core/flow_tracking_annotation.cpp
@@ -17,6 +17,7 @@
 
 #include "holoscan/core/flow_tracking_annotation.hpp"
 
+#include <memory>
 #include <utility>
 
 #include "holoscan/core/fragment.hpp"
@@ -50,13 +51,26 @@ gxf_result_t annotate_message(gxf_uid_t uid, const gxf_context_t& context, Opera
     // gxf_entity->deactivate();
     MessageLabel m;
     m = std::move(op->get_consolidated_input_label());
-    m.update_last_op_publish();
 
-    // Check if a message_label component already exists in the entity
+    std::shared_ptr<holoscan::Operator> op_shared_ptr(op, [](Operator*) {});
+
+    bool is_current_op_root = op->is_root() || op->is_user_defined_root() ||
+                              holoscan::Operator::is_all_operator_predecessor_virtual(
+                                  op_shared_ptr, op->fragment()->graph());
+    if (!op->fragment()->data_flow_tracker()->limited_tracking() ||
+        (op->fragment()->data_flow_tracker()->limited_tracking() &&
+         is_current_op_root)) {  // update the last timestamp if limited tracking is not enabled
+      m.update_last_op_publish();
+    }
+
+    HOLOSCAN_LOG_DEBUG("annotate_message: MessageLabel: {}", m.to_string());
+
     static gxf_tid_t message_label_tid = GxfTidNull();
     if (message_label_tid == GxfTidNull()) {
       GxfComponentTypeId(context, "holoscan::MessageLabel", &message_label_tid);
     }
+
+    // Check if a message_label component already exists in the entity
     // If a message_label component already exists in the entity, just update the value of the
     // MessageLabel
     if (gxf::has_component(context, uid, message_label_tid, "message_label")) {
@@ -108,7 +122,16 @@ gxf_result_t deannotate_message(gxf_uid_t* uid, const gxf_context_t& context, Op
     // Find whether current operator is already in the paths of message label m
     auto cyclic_path_indices = m.has_operator(op->qualified_name());
     if (cyclic_path_indices.empty()) {  // No cyclic paths
-      m.add_new_op_timestamp(cur_op_timestamp);
+      std::shared_ptr<holoscan::Operator> op_shared_ptr(op, [](Operator*) {});
+      bool is_current_op_leaf =
+          op->is_leaf() || holoscan::Operator::is_all_operator_successor_virtual(
+                               op_shared_ptr, op->fragment()->graph());
+      if (!op->fragment()->data_flow_tracker()->limited_tracking() ||
+          (op->fragment()->data_flow_tracker()->limited_tracking() &&
+           is_current_op_leaf)) {  // add a new timestamp if limited tracking is not enabled
+        m.add_new_op_timestamp(cur_op_timestamp);
+      }
+      HOLOSCAN_LOG_DEBUG("deannotate_message: MessageLabel: {}", m.to_string());
       op->update_input_message_label(receiver_name, m);
     } else {
       // Update the publish timestamp of current operator where the cycle ends, to be the same as
diff --git a/src/core/fragment.cpp b/src/core/fragment.cpp
index c72c3a68..fdfebcea 100644
--- a/src/core/fragment.cpp
+++ b/src/core/fragment.cpp
@@ -34,9 +34,11 @@
 #include "holoscan/core/dataflow_tracker.hpp"
 #include "holoscan/core/executors/gxf/gxf_executor.hpp"
 #include "holoscan/core/graphs/flow_graph.hpp"
-#include "holoscan/core/operator.hpp"
+#include "holoscan/core/gxf/entity_group.hpp"
 #include "holoscan/core/gxf/gxf_network_context.hpp"
 #include "holoscan/core/gxf/gxf_scheduler.hpp"
+#include "holoscan/core/operator.hpp"
+#include "holoscan/core/resources/gxf/system_resources.hpp"
 #include "holoscan/core/schedulers/gxf/greedy_scheduler.hpp"
 
 using std::string_literals::operator""s;
@@ -496,12 +498,13 @@ std::future<void> Fragment::run_async() {
 
 holoscan::DataFlowTracker& Fragment::track(uint64_t num_start_messages_to_skip,
                                            uint64_t num_last_messages_to_discard,
-                                           int latency_threshold) {
+                                           int latency_threshold, bool is_limited_tracking) {
   if (!data_flow_tracker_) {
     data_flow_tracker_ = std::make_shared<holoscan::DataFlowTracker>();
     data_flow_tracker_->set_skip_starting_messages(num_start_messages_to_skip);
     data_flow_tracker_->set_discard_last_messages(num_last_messages_to_discard);
     data_flow_tracker_->set_skip_latencies(latency_threshold);
+    data_flow_tracker_->set_limited_tracking(is_limited_tracking);
   }
   return *data_flow_tracker_;
 }
@@ -589,4 +592,45 @@ void Fragment::load_extensions_from_config() {
   }
 }
 
+/**
+ * @brief Create a new thread pool resource.
+ *
+ * @param name The name of the resource.
+ * @param args The arguments for the resource.
+ * @return The shared pointer to the resource.
+ */
+std::shared_ptr<ThreadPool> Fragment::make_thread_pool(const std::string& name,
+                                                       int64_t initial_size) {
+  // Create a dedicated GXF Entity for the ThreadPool
+  // (unlike a typical Condition/Resource, it does not belong to the same entity as an operator)
+  auto pool_entity = std::make_shared<nvidia::gxf::GraphEntity>();
+  auto pool_entity_name = fmt::format("{}_{}_entity", this->name(), name);
+  auto maybe_pool = pool_entity->setup(executor().context(), pool_entity_name.c_str());
+  if (!maybe_pool) {
+    throw std::runtime_error(
+        fmt::format("Failed to create thread pool entity: '{}'", pool_entity_name));
+  }
+
+  // Create the ThreadPool resource
+  auto pool_resource = make_resource<ThreadPool>(name, holoscan::Arg("initial_size", initial_size));
+
+  // Assign the pool to the entity that was created above and initialize it via add_to_graph_entity
+  pool_resource->gxf_eid(pool_entity->eid());
+  pool_resource->add_to_graph_entity(this, pool_entity);
+
+  auto pool_group = std::make_shared<gxf::EntityGroup>(executor().context(),
+                                                       fmt::format("{}_group", pool_entity_name));
+  pool_resource->entity_group(std::move(pool_group));
+
+  // Add this ThreadPool into the entity group
+  pool_resource->entity_group()->add(*pool_resource);
+
+  // Store pointers to all thread pools so initialization of entity groups can be
+  // performed later by GXFExecutor. We can only add operators to the entity group AFTER they have
+  // been initialized in GXFExecutor.
+  thread_pools_.push_back(pool_resource);
+
+  return pool_resource;
+}
+
 }  // namespace holoscan
diff --git a/src/core/gxf/entity_group.cpp b/src/core/gxf/entity_group.cpp
new file mode 100644
index 00000000..3236858c
--- /dev/null
+++ b/src/core/gxf/entity_group.cpp
@@ -0,0 +1,53 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "holoscan/core/gxf/entity_group.hpp"
+
+#include <gxf/core/gxf.h>
+#include <gxf/core/gxf_ext.h>
+
+#include <memory>
+#include <string>
+
+namespace holoscan::gxf {
+
+EntityGroup::EntityGroup(gxf_context_t context, const std::string& name) {
+  name_ = name;
+  gxf_context_ = context;
+  HOLOSCAN_GXF_CALL_FATAL(GxfCreateEntityGroup(context, name.c_str(), &gxf_gid_));
+}
+
+void EntityGroup::add(gxf_uid_t eid) {
+  HOLOSCAN_GXF_CALL_FATAL(GxfUpdateEntityGroup(gxf_context_, gxf_gid_, eid));
+}
+
+void EntityGroup::add(const GXFComponent& component) {
+  HOLOSCAN_GXF_CALL_FATAL(GxfUpdateEntityGroup(gxf_context_, gxf_gid_, component.gxf_eid()));
+}
+
+void EntityGroup::add(std::shared_ptr<Operator> op, const std::string& entity_prefix) {
+  gxf_uid_t op_eid = kNullUid;
+  if (op->operator_type() == Operator::OperatorType::kGXF) {
+    op_eid = std::dynamic_pointer_cast<holoscan::ops::GXFOperator>(op)->gxf_eid();
+  } else {
+    // get the GXF entity ID corresponding to the native operator's GXF Codelet
+    const std::string op_entity_name = fmt::format("{}{}", entity_prefix, op->name());
+    HOLOSCAN_GXF_CALL_FATAL(GxfEntityFind(gxf_context_, op_entity_name.c_str(), &op_eid));
+  }
+  add(op_eid);
+}
+}  // namespace holoscan::gxf
diff --git a/src/core/gxf/gxf_component.cpp b/src/core/gxf/gxf_component.cpp
index 22bcff67..59e0e0e1 100644
--- a/src/core/gxf/gxf_component.cpp
+++ b/src/core/gxf/gxf_component.cpp
@@ -142,4 +142,16 @@ void GXFComponent::set_gxf_parameter(const std::string& component_name, const st
   // TODO(unknown): handle error
 }
 
+std::string GXFComponent::gxf_entity_group_name() {
+  const char* name;
+  HOLOSCAN_GXF_CALL_FATAL(GxfEntityGroupName(gxf_context_, gxf_eid_, &name));
+  return std::string{name};
+}
+
+gxf_uid_t GXFComponent::gxf_entity_group_id() {
+  gxf_uid_t gid;
+  HOLOSCAN_GXF_CALL_FATAL(GxfEntityGroupId(gxf_context_, gxf_eid_, &gid));
+  return gid;
+}
+
 }  // namespace holoscan::gxf
diff --git a/src/core/gxf/gxf_condition.cpp b/src/core/gxf/gxf_condition.cpp
index 7e8a7268..691409dd 100644
--- a/src/core/gxf/gxf_condition.cpp
+++ b/src/core/gxf/gxf_condition.cpp
@@ -99,4 +99,12 @@ void GXFCondition::add_to_graph_entity(Operator* op) {
   this->initialize();
 }
 
+YAML::Node GXFCondition::to_yaml_node() const {
+  YAML::Node node = Condition::to_yaml_node();
+  node["gxf_eid"] = YAML::Node(gxf_eid());
+  node["gxf_cid"] = YAML::Node(gxf_cid());
+  node["gxf_typename"] = YAML::Node(gxf_typename());
+  return node;
+}
+
 }  // namespace holoscan::gxf
diff --git a/src/core/gxf/gxf_network_context.cpp b/src/core/gxf/gxf_network_context.cpp
index 0dc17e2b..fb77c81e 100644
--- a/src/core/gxf/gxf_network_context.cpp
+++ b/src/core/gxf/gxf_network_context.cpp
@@ -41,4 +41,12 @@ void GXFNetworkContext::reset_graph_entities() {
   reset_gxf_graph_entity();
 }
 
+YAML::Node GXFNetworkContext::to_yaml_node() const {
+  YAML::Node node = NetworkContext::to_yaml_node();
+  node["gxf_eid"] = YAML::Node(gxf_eid());
+  node["gxf_cid"] = YAML::Node(gxf_cid());
+  node["gxf_typename"] = YAML::Node(gxf_typename());
+  return node;
+}
+
 }  // namespace holoscan::gxf
diff --git a/src/core/gxf/gxf_operator.cpp b/src/core/gxf/gxf_operator.cpp
index e11f027d..11bf7f8c 100644
--- a/src/core/gxf/gxf_operator.cpp
+++ b/src/core/gxf/gxf_operator.cpp
@@ -21,6 +21,12 @@
 
 namespace holoscan::ops {
 
+std::string GXFOperator::gxf_entity_group_name() const {
+  const char* name;
+  HOLOSCAN_GXF_CALL_FATAL(GxfEntityGroupName(gxf_context_, gxf_eid_, &name));
+  return std::string{name};
+}
+
 void GXFOperator::initialize() {
   // Call base class initialize function.
   Operator::initialize();
@@ -55,4 +61,13 @@ void GXFOperator::set_parameters() {
   }
 }
 
+YAML::Node GXFOperator::to_yaml_node() const {
+  YAML::Node node = Operator::to_yaml_node();
+  node["gxf_eid"] = YAML::Node(gxf_eid());
+  node["gxf_cid"] = YAML::Node(gxf_cid());
+  node["gxf_typename"] = YAML::Node(gxf_typename());
+  node["gxf_entity_group_name"] = YAML::Node(gxf_entity_group_name());
+  return node;
+}
+
 }  // namespace holoscan::ops
diff --git a/src/core/gxf/gxf_resource.cpp b/src/core/gxf/gxf_resource.cpp
index b8a962d0..b80c2ebb 100644
--- a/src/core/gxf/gxf_resource.cpp
+++ b/src/core/gxf/gxf_resource.cpp
@@ -19,6 +19,7 @@
 
 #include <any>
 #include <functional>
+#include <memory>
 #include <string>
 #include <typeindex>
 #include <typeinfo>
@@ -101,15 +102,20 @@ void GXFResource::initialize() {
 }
 
 void GXFResource::add_to_graph_entity(Operator* op) {
+  add_to_graph_entity(op->fragment(), op->graph_entity());
+}
+
+void GXFResource::add_to_graph_entity(Fragment* fragment,
+                                      std::shared_ptr<nvidia::gxf::GraphEntity> graph_entity) {
   if (gxf_context_ == nullptr) {
     // cannot reassign to a different graph entity if the resource was already initialized with GXF
     if (gxf_graph_entity_ && is_initialized_) { return; }
 
-    gxf_graph_entity_ = op->graph_entity();
-    fragment_ = op->fragment();
+    gxf_graph_entity_ = graph_entity;
+    fragment_ = fragment;
     if (gxf_graph_entity_) {
-      gxf_context_ = gxf_graph_entity_->context();
-      gxf_eid_ = gxf_graph_entity_->eid();
+      gxf_context_ = graph_entity->context();
+      gxf_eid_ = graph_entity->eid();
     }
   }
   this->initialize();
@@ -214,4 +220,16 @@ bool GXFResource::handle_dev_id(std::optional<int32_t>& dev_id_value) {
   return false;
 }
 
+YAML::Node GXFResource::to_yaml_node() const {
+  YAML::Node node = Resource::to_yaml_node();
+  node["gxf_eid"] = YAML::Node(gxf_eid());
+  node["gxf_cid"] = YAML::Node(gxf_cid());
+  node["gxf_typename"] = YAML::Node(gxf_typename());
+  return node;
+}
+
+GXFSystemResourceBase::GXFSystemResourceBase(const std::string& name,
+                                             nvidia::gxf::ResourceBase* component)
+    : GXFResource(name, component) {}
+
 }  // namespace holoscan::gxf
diff --git a/src/core/gxf/gxf_scheduler.cpp b/src/core/gxf/gxf_scheduler.cpp
index b9310eaf..f95139d9 100644
--- a/src/core/gxf/gxf_scheduler.cpp
+++ b/src/core/gxf/gxf_scheduler.cpp
@@ -51,4 +51,12 @@ void GXFScheduler::reset_graph_entities() {
   reset_gxf_graph_entity();
 }
 
+YAML::Node GXFScheduler::to_yaml_node() const {
+  YAML::Node node = Scheduler::to_yaml_node();
+  node["gxf_eid"] = YAML::Node(gxf_eid());
+  node["gxf_cid"] = YAML::Node(gxf_cid());
+  node["gxf_typename"] = YAML::Node(gxf_typename());
+  return node;
+}
+
 }  // namespace holoscan::gxf
diff --git a/src/core/gxf/gxf_utils.cpp b/src/core/gxf/gxf_utils.cpp
index 12f46638..105c5a62 100644
--- a/src/core/gxf/gxf_utils.cpp
+++ b/src/core/gxf/gxf_utils.cpp
@@ -14,19 +14,21 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+#include <gxf/core/gxf.h>
+
 #include <algorithm>
 #include <cstdlib>
 #include <string>
 #include <utility>
 
+#include <common/fixed_vector.hpp>
+
 #include "holoscan/core/gxf/gxf_utils.hpp"
 
 #include "holoscan/core/common.hpp"
 #include "holoscan/core/gxf/gxf_execution_context.hpp"
 #include "holoscan/core/io_context.hpp"
 
-#include "gxf/std/transmitter.hpp"
-
 namespace holoscan::gxf {
 
 gxf_uid_t get_component_eid(gxf_context_t context, gxf_uid_t cid) {
@@ -100,4 +102,53 @@ uint64_t get_default_queue_policy() {
   return 2UL;  // fail
 }
 
+std::optional<int32_t> gxf_device_id(gxf_context_t context, gxf_uid_t eid) {
+  // Get handle to entity
+  auto maybe = nvidia::gxf::Entity::Shared(context, eid);
+  if (!maybe) {
+    HOLOSCAN_LOG_ERROR("Failed to create shared Entity for eid {}", eid);
+    return std::nullopt;
+  }
+  auto entity = maybe.value();
+  // Find all GPUDevice components
+  auto maybe_resources = entity.findAllHeap<nvidia::gxf::GPUDevice>();
+  if (!maybe_resources) {
+    HOLOSCAN_LOG_ERROR("Failed to find resources in entity");
+    return std::nullopt;
+  }
+  auto resources = std::move(maybe_resources.value());
+  if (resources.empty()) { return std::nullopt; }
+  if (resources.size() > 1) {
+    HOLOSCAN_LOG_WARN(
+        "Multiple ({}) GPUDevice resources found in entity {}.", resources.size(), eid);
+  }
+
+  int32_t device_id = resources.at(0).value()->device_id();
+  // Loop over any additional device ID(s), warning if there are multiple conflicting IDs
+  for (size_t i = 1; i < resources.size(); i++) {
+    int32_t this_dev_id = resources.at(i).value()->device_id();
+    if (this_dev_id != device_id) {
+      HOLOSCAN_LOG_WARN(
+          "Additional GPUDevice resources with conflicting CUDA device ID {} found in entity "
+          "{}. The CUDA device ID of the first device found ({}) will be returned.",
+          this_dev_id,
+          eid,
+          device_id);
+    }
+  }
+  return device_id;
+}
+
+std::string gxf_entity_group_name(gxf_context_t context, gxf_uid_t eid) {
+  const char* name;
+  HOLOSCAN_GXF_CALL_FATAL(GxfEntityGroupName(context, eid, &name));
+  return std::string{name};
+}
+
+gxf_uid_t gxf_entity_group_id(gxf_context_t context, gxf_uid_t eid) {
+  gxf_uid_t gid;
+  HOLOSCAN_GXF_CALL_FATAL(GxfEntityGroupId(context, eid, &gid));
+  return gid;
+}
+
 }  // namespace holoscan::gxf
diff --git a/src/core/resources/gxf/cpu_thread.cpp b/src/core/resources/gxf/cpu_thread.cpp
new file mode 100644
index 00000000..f4926872
--- /dev/null
+++ b/src/core/resources/gxf/cpu_thread.cpp
@@ -0,0 +1,44 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "holoscan/core/resources/gxf/cpu_thread.hpp"
+
+#include <gxf/core/gxf.h>
+
+#include <string>
+
+#include "holoscan/core/component_spec.hpp"
+#include "holoscan/core/gxf/gxf_utils.hpp"
+
+namespace holoscan {
+
+CPUThread::CPUThread(const std::string& name, nvidia::gxf::CPUThread* component)
+    : gxf::GXFResource(name, component) {
+  bool pin_entity = false;
+  HOLOSCAN_GXF_CALL_FATAL(GxfParameterGetBool(gxf_context_, gxf_cid_, "pin_entity", &pin_entity));
+  pin_entity_ = pin_entity;
+}
+
+void CPUThread::setup(ComponentSpec& spec) {
+  spec.param(pin_entity_,
+             "pin_entity",
+             "Pin Entity",
+             "Set the entity to be pinned to a worker thread or not.",
+             false);
+}
+
+}  // namespace holoscan
diff --git a/src/core/resources/gxf/system_resources.cpp b/src/core/resources/gxf/system_resources.cpp
new file mode 100644
index 00000000..03484808
--- /dev/null
+++ b/src/core/resources/gxf/system_resources.cpp
@@ -0,0 +1,110 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "holoscan/core/resources/gxf/system_resources.hpp"
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <typeinfo>
+#include <vector>
+
+#include <gxf/std/resources.hpp>
+#include "holoscan/core/component_spec.hpp"
+#include "holoscan/core/fragment.hpp"
+#include "holoscan/core/gxf/entity_group.hpp"
+#include "holoscan/core/operator.hpp"
+#include "holoscan/core/resource.hpp"
+#include "holoscan/core/resources/gxf/cpu_thread.hpp"
+
+namespace holoscan {
+
+ThreadPool::ThreadPool(const std::string& name, nvidia::gxf::ThreadPool* component)
+    : gxf::GXFSystemResourceBase(name, component) {}
+
+int64_t ThreadPool::size() const {
+  if (gxf_cptr_) {
+    nvidia::gxf::ThreadPool* pool = static_cast<nvidia::gxf::ThreadPool*>(gxf_cptr_);
+    return pool->size();
+  }
+  return 0;
+}
+
+void ThreadPool::setup(ComponentSpec& spec) {
+  spec.param(initial_size_,
+             "initial_size",
+             "Initial ThreadPool Size",
+             "Initial number of worker threads in the pool",
+             1L);
+}
+
+void ThreadPool::add(const std::shared_ptr<Operator>& op, bool pin_operator) {
+  // add a CPUThread argument if one did not already exist
+  const auto& resource_map = op->resources();
+  auto thread_arg_it = std::find_if(resource_map.begin(), resource_map.end(), [](const auto& r) {
+    // check type by whether pointer cast to CPUThread succeeds
+    return typeid(*r.second) == typeid(CPUThread);
+  });
+  if (thread_arg_it == resource_map.end()) {
+    // Create a CPUThread resource and add it to the Operator's list of arguments
+    const std::string thread_name = fmt::format("{}_cpu_thread", op->name());
+    auto cpu_thread =
+        fragment_->make_resource<CPUThread>(thread_name, Arg{"pin_entity", pin_operator});
+    auto cpu_thread_resource = std::dynamic_pointer_cast<holoscan::Resource>(cpu_thread);
+    if (!cpu_thread_resource) {
+      throw std::runtime_error(
+          "Failed to cast std::shared_ptr<CPUThread> to std::shared_ptr<holoscan::Resource>");
+    }
+    op->add_arg(cpu_thread_resource);
+  }
+
+  // store pointer to operators for later assignment to the entity group
+  // (need to do the assignment from GXFExecutor only after the Operator has been initialized)
+  operators_.push_back(op);
+}
+
+void ThreadPool::add(std::vector<std::shared_ptr<Operator>> ops, bool pin_operator) {
+  for (const auto& op : ops) { add(op, pin_operator); }
+}
+
+YAML::Node ThreadPool::to_yaml_node() const {
+  YAML::Node node = GXFSystemResourceBase::to_yaml_node();
+  node["operators in pool"] = YAML::Node(YAML::NodeType::Sequence);
+  for (const auto& op : operators_) { node["operators in pool"].push_back(YAML::Node(op->name())); }
+  return node;
+}
+
+GPUDevice::GPUDevice(const std::string& name, nvidia::gxf::GPUDevice* component)
+    : gxf::GXFSystemResourceBase(name, component) {}
+
+int32_t GPUDevice::device_id() const {
+  if (gxf_cptr_) {
+    nvidia::gxf::GPUDevice* gpu_device = static_cast<nvidia::gxf::GPUDevice*>(gxf_cptr_);
+    return gpu_device->device_id();
+  }
+  return 0;
+}
+
+void GPUDevice::setup(ComponentSpec& spec) {
+  spec.param(dev_id_,
+             "dev_id",
+             "Device Id",
+             "Create CUDA Stream on which device.",
+             static_cast<int32_t>(0));
+}
+
+}  // namespace holoscan
diff --git a/src/core/schedulers/gxf/multithread_scheduler.cpp b/src/core/schedulers/gxf/multithread_scheduler.cpp
index ca8b647d..d202fca1 100644
--- a/src/core/schedulers/gxf/multithread_scheduler.cpp
+++ b/src/core/schedulers/gxf/multithread_scheduler.cpp
@@ -61,6 +61,13 @@ void MultiThreadScheduler::setup(ComponentSpec& spec) {
              "negative value means not stop on deadlock. This parameter only applies when  "
              "stop_on_deadlock=true",
              0L);
+  spec.param(strict_job_thread_pinning_,
+             "strict_job_thread_pinning",
+             "Strict Job-Thread Pinning",
+             "When true, the thread an operator is pinned to is not allowed to run any other "
+             "operators. When false (the default), if the pinned operator is not in a READY state,"
+             " another operator could run on the thread.",
+             false);
 }
 
 nvidia::gxf::MultiThreadScheduler* MultiThreadScheduler::get() const {
diff --git a/src/core/services/app_driver/server.cpp b/src/core/services/app_driver/server.cpp
index c14bf2f1..c2ef8b0d 100644
--- a/src/core/services/app_driver/server.cpp
+++ b/src/core/services/app_driver/server.cpp
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -24,6 +24,7 @@
 
 #include "holoscan/core/app_driver.hpp"
 #include "holoscan/core/cli_options.hpp"
+#include "holoscan/core/system/network_utils.hpp"
 #include "holoscan/logger/logger.hpp"
 
 #include "../app_worker/client.hpp"
@@ -94,14 +95,25 @@ void AppDriverServer::run() {
   std::unique_ptr<grpc::Server> server;
   AppDriverServiceImpl app_driver_service(app_driver_);
   if (need_driver_) {
-    grpc::ServerBuilder builder;
-    builder.AddListeningPort(server_address, grpc::InsecureServerCredentials());
-    builder.RegisterService(&app_driver_service);
-    server = builder.BuildAndStart();
-    if (server) {
-      HOLOSCAN_LOG_INFO("AppDriverServer listening on {}", server_address);
+    // Check if the listening port is already in use
+    int server_port_int = std::stoi(server_port);
+    auto unused_ports =
+        get_unused_network_ports(1, server_port_int, server_port_int, {}, {server_port_int});
+    if (unused_ports.empty() || unused_ports[0] != server_port_int) {
+      HOLOSCAN_LOG_ERROR("Port {} is already in use", server_port_int);
+      should_stop_ = true;
     } else {
-      HOLOSCAN_LOG_ERROR("Failed to start AppDriverServer on {}", server_address);
+      // Start the gRPC server
+      grpc::ServerBuilder builder;
+      builder.AddListeningPort(server_address, grpc::InsecureServerCredentials());
+      builder.RegisterService(&app_driver_service);
+      server = builder.BuildAndStart();
+      if (server) {
+        HOLOSCAN_LOG_INFO("AppDriverServer listening on {}", server_address);
+      } else {
+        HOLOSCAN_LOG_ERROR("Failed to start AppDriverServer on {}", server_address);
+        should_stop_ = true;
+      }
     }
   }
 
diff --git a/src/core/services/app_worker/server.cpp b/src/core/services/app_worker/server.cpp
index b6bd17b4..42e745eb 100644
--- a/src/core/services/app_worker/server.cpp
+++ b/src/core/services/app_worker/server.cpp
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -51,7 +51,14 @@ void AppWorkerServer::start() {
 
   // If server_port is empty, find an unused network port and use it as the default port.
   if (server_port.empty()) {
-    auto unused_ports = get_unused_network_ports(1, kMinNetworkPort, kMaxNetworkPort);
+    // Get the driver port to exclude it from the list of unused ports.
+    std::string driver_address = app_worker_->options()->driver_address;
+    auto driver_port_str = holoscan::CLIOptions::parse_port(driver_address);
+    auto driver_port = kDefaultAppDriverPort;
+    if (!driver_port_str.empty()) { driver_port = std::stoi(driver_port_str); }
+    const std::vector<int> exclude_ports = {driver_port};
+    auto unused_ports =
+        get_unused_network_ports(1, kMinNetworkPort, kMaxNetworkPort, exclude_ports);
     if (unused_ports.empty()) {
       HOLOSCAN_LOG_ERROR("No unused ports found");
       return;
diff --git a/src/operators/holoviz/holoviz.cpp b/src/operators/holoviz/holoviz.cpp
index 08a2a97d..1a986cb6 100644
--- a/src/operators/holoviz/holoviz.cpp
+++ b/src/operators/holoviz/holoviz.cpp
@@ -709,9 +709,6 @@ YAML_CONVERTER(holoscan::ops::HolovizOp::LayerCallbackFunction);
 
 namespace holoscan::ops {
 
-// Initialize static members
-std::mutex HolovizOp::mutex_;
-
 /*static*/ void HolovizOp::key_callback_handler(void* user_pointer, viz::Key key,
                                                 viz::KeyAndButtonAction action,
                                                 viz::KeyModifiers modifiers) {
@@ -883,11 +880,22 @@ void HolovizOp::setup(OperatorSpec& spec) {
   spec.param(window_close_scheduling_term_,
              "window_close_scheduling_term",
              "WindowCloseSchedulingTerm",
-             "BooleanSchedulingTerm to stop the codelet from ticking when the window is closed.");
-
+             "This is a deprecated parameter name for `window_close_condition`. Please use "
+             "`window_close_condition` instead as `window_close_scheduling_term` will be removed "
+             "in a future release.",
+             ParameterFlag::kOptional);
+  spec.param(window_close_condition_,
+             "window_close_condition",
+             "window close condition",
+             "BooleanCondition on the operator that will cause it to stop executing if the "
+             "display window is closed. By default, this condition is created automatically "
+             "during HolovizOp::initialize. The user may want to provide it if, for example, "
+             "there are multiple HolovizOp operators and you want to share the same window close "
+             "condition across both. By sharing the same condition, if one of the display "
+             "windows is closed it would also close the other(s).",
+             ParameterFlag::kOptional);
   spec.param(
       allocator_, "allocator", "Allocator", "Allocator used to allocate render buffer output.");
-
   spec.param(font_path_,
              "font_path",
              "FontPath",
@@ -1281,23 +1289,6 @@ void HolovizOp::render_geometry(const ExecutionContext& context, const InputSpec
                     static_cast<int>(buffer_info.element_type)));
   }
 
-  // get pointer to tensor buffer
-  std::vector<nvidia::byte> host_buffer;
-  if (buffer_info.storage_type == nvidia::gxf::MemoryStorageType::kDevice) {
-    host_buffer.resize(buffer_info.bytes_size);
-
-    // copy from device to host
-    HOLOSCAN_CUDA_CALL(cudaMemcpyAsync(static_cast<void*>(host_buffer.data()),
-                                       static_cast<const void*>(buffer_info.buffer_ptr),
-                                       buffer_info.bytes_size,
-                                       cudaMemcpyDeviceToHost,
-                                       cuda_stream_handler_.get_cuda_stream(context.context())));
-    // When copying from device memory to pagable memory the call is synchronous with the host
-    // execution. No need to synchronize here.
-
-    buffer_info.buffer_ptr = host_buffer.data();
-  }
-
   // start a geometry layer
   viz::BeginGeometryLayer();
   set_input_spec_geometry(input_spec);
@@ -1315,7 +1306,25 @@ void HolovizOp::render_geometry(const ExecutionContext& context, const InputSpec
       throw std::runtime_error(
           fmt::format("No text has been specified by input spec '{}'.", input_spec.tensor_name_));
     }
-    uintptr_t src_coord = reinterpret_cast<uintptr_t>(buffer_info.buffer_ptr);
+
+    uintptr_t src_coord;
+    std::vector<nvidia::byte> host_buffer;
+    if (buffer_info.storage_type == nvidia::gxf::MemoryStorageType::kDevice) {
+      host_buffer.resize(buffer_info.bytes_size);
+
+      // copy from device to host
+      HOLOSCAN_CUDA_CALL(cudaMemcpyAsync(static_cast<void*>(host_buffer.data()),
+                                         static_cast<const void*>(buffer_info.buffer_ptr),
+                                         buffer_info.bytes_size,
+                                         cudaMemcpyDeviceToHost,
+                                         cuda_stream_handler_.get_cuda_stream(context.context())));
+      // When copying from device memory to pageable memory the call is synchronous with the host
+      // execution. No need to synchronize here.
+
+      src_coord = reinterpret_cast<uintptr_t>(host_buffer.data());
+    } else {
+      src_coord = reinterpret_cast<uintptr_t>(buffer_info.buffer_ptr);
+    }
     constexpr uint32_t values_per_coordinate = 3;
     float coords[values_per_coordinate]{0.F, 0.F, 0.05F};
     for (uint32_t index = 0; index < coordinates; ++index) {
@@ -1343,12 +1352,12 @@ void HolovizOp::render_geometry(const ExecutionContext& context, const InputSpec
               .c_str());
     }
   } else {
-    std::vector<float> coords;
     viz::PrimitiveTopology topology;
     uint32_t primitive_count;
     uint32_t coordinate_count;
     uint32_t values_per_coordinate;
     std::vector<float> default_coord;
+
     switch (input_spec.type_) {
       case InputType::POINTS:
         // point primitives, one coordinate (x, y) per primitive
@@ -1495,36 +1504,74 @@ void HolovizOp::render_geometry(const ExecutionContext& context, const InputSpec
             fmt::format("Unhandled tensor type '{}'", inputTypeToString(input_spec.type_)));
     }
 
-    // copy coordinates
-    uintptr_t src_coord = reinterpret_cast<uintptr_t>(buffer_info.buffer_ptr);
-    coords.reserve(coordinate_count * values_per_coordinate);
+    if (primitive_count) {
+      if ((buffer_info.element_type == nvidia::gxf::PrimitiveType::kFloat32) &&
+          (buffer_info.components == values_per_coordinate)) {
+        // can use the buffer directly, no copy required
+        if (buffer_info.storage_type == nvidia::gxf::MemoryStorageType::kSystem) {
+          viz::Primitive(topology,
+                         primitive_count,
+                         coordinate_count * values_per_coordinate,
+                         reinterpret_cast<const float*>(buffer_info.buffer_ptr));
+        } else {
+          viz::PrimitiveCudaDevice(topology,
+                                   primitive_count,
+                                   coordinate_count * values_per_coordinate,
+                                   reinterpret_cast<CUdeviceptr>(buffer_info.buffer_ptr));
+        }
 
-    for (uint32_t index = 0; index < coordinate_count; ++index) {
-      uint32_t component_index = 0;
-      // copy from source array
-      while (component_index < std::min(buffer_info.components, values_per_coordinate)) {
-        switch (buffer_info.element_type) {
-          case nvidia::gxf::PrimitiveType::kFloat32:
-            coords.push_back(reinterpret_cast<const float*>(src_coord)[component_index]);
-            break;
-          case nvidia::gxf::PrimitiveType::kFloat64:
-            coords.push_back(reinterpret_cast<const double*>(src_coord)[component_index]);
-            break;
-          default:
-            throw std::runtime_error("Unhandled element type");
+      } else {
+        // copy coordinates, convert from double to float if needed and add defaults
+        uintptr_t src_coord;
+        std::vector<nvidia::byte> host_buffer;
+        if (buffer_info.storage_type == nvidia::gxf::MemoryStorageType::kDevice) {
+          host_buffer.resize(buffer_info.bytes_size);
+
+          // copy from device to host
+          HOLOSCAN_CUDA_CALL(
+              cudaMemcpyAsync(static_cast<void*>(host_buffer.data()),
+                              static_cast<const void*>(buffer_info.buffer_ptr),
+                              buffer_info.bytes_size,
+                              cudaMemcpyDeviceToHost,
+                              cuda_stream_handler_.get_cuda_stream(context.context())));
+          // When copying from device memory to pageable memory the call is synchronous with the
+          // host execution. No need to synchronize here.
+
+          src_coord = reinterpret_cast<uintptr_t>(host_buffer.data());
+        } else {
+          src_coord = reinterpret_cast<uintptr_t>(buffer_info.buffer_ptr);
         }
-        ++component_index;
-      }
-      // fill from default array
-      while (component_index < values_per_coordinate) {
-        coords.push_back(default_coord[component_index]);
-        ++component_index;
-      }
-      src_coord += buffer_info.stride[1];
-    }
 
-    if (primitive_count) {
-      viz::Primitive(topology, primitive_count, coords.size(), coords.data());
+        // copy coordinates
+        std::vector<float> coords;
+        coords.reserve(coordinate_count * values_per_coordinate);
+
+        for (uint32_t index = 0; index < coordinate_count; ++index) {
+          uint32_t component_index = 0;
+          // copy from source array
+          while (component_index < std::min(buffer_info.components, values_per_coordinate)) {
+            switch (buffer_info.element_type) {
+              case nvidia::gxf::PrimitiveType::kFloat32:
+                coords.push_back(reinterpret_cast<const float*>(src_coord)[component_index]);
+                break;
+              case nvidia::gxf::PrimitiveType::kFloat64:
+                coords.push_back(reinterpret_cast<const double*>(src_coord)[component_index]);
+                break;
+              default:
+                throw std::runtime_error("Unhandled element type");
+            }
+            ++component_index;
+          }
+          // fill from default array
+          while (component_index < values_per_coordinate) {
+            coords.push_back(default_coord[component_index]);
+            ++component_index;
+          }
+          src_coord += buffer_info.stride[1];
+        }
+
+        viz::Primitive(topology, primitive_count, coords.size(), coords.data());
+      }
     }
   }
 
@@ -1630,16 +1677,46 @@ void HolovizOp::initialize() {
   // Set up prerequisite parameters before calling Operator::initialize()
   auto frag = fragment();
 
-  // Find if there is an argument for 'window_close_scheduling_term'
-  auto has_window_close_scheduling_term =
+  // Find if there is an argument for the deprecated 'window_close_scheduling_term' name or
+  // the newer 'window_close_condition' name.
+  auto window_scheduling_term_iter =
       std::find_if(args().begin(), args().end(), [](const auto& arg) {
         return (arg.name() == "window_close_scheduling_term");
       });
-  // Create the BooleanCondition if there is no argument provided.
-  if (has_window_close_scheduling_term == args().end()) {
-    window_close_scheduling_term_ =
-        frag->make_condition<holoscan::BooleanCondition>("window_close_scheduling_term");
-    add_arg(window_close_scheduling_term_.get());
+  auto window_condition_iter = std::find_if(args().begin(), args().end(), [](const auto& arg) {
+    return (arg.name() == "window_close_condition");
+  });
+  bool has_window_close_scheduling_term = window_scheduling_term_iter != args().end();
+  bool has_window_close_condition = window_condition_iter != args().end();
+  if (has_window_close_scheduling_term) {
+    if (has_window_close_condition) {
+      HOLOSCAN_LOG_WARN(
+          "Both \"window_close_scheduling_term\" and \"window_close_condition\" arguments "
+          "were provided. Please provide only \"window_close_condition\". Now discarding the "
+          "duplicate \"window_close_scheduling_term\" argument.");
+      // remove the duplicate argument using the deprecated name
+      args().erase(window_scheduling_term_iter);
+    } else {
+      HOLOSCAN_LOG_WARN(
+          "An argument named \"window_close_scheduling_term\" was provided, but this parameter "
+          "name is deprecated. Please provide this argument via its new name, "
+          "\"window_close_condition\", instead. Now renaming the argument to "
+          "\"window_close_condition\".");
+
+      // rename the existing argument in-place
+      std::string new_name{"window_close_condition"};
+      window_scheduling_term_iter->name(new_name);
+    }
+    // in either case above, we now have only "window_close_condition"
+    has_window_close_condition = true;
+    has_window_close_scheduling_term = false;
+  }
+
+  // Create the BooleanCondition if there was no window close argument provided.
+  if (!has_window_close_condition) {
+    window_close_condition_ =
+        frag->make_condition<holoscan::BooleanCondition>("window_close_condition");
+    add_arg(window_close_condition_.get());
   }
 
   // Conditional inputs and outputs are enabled using a boolean argument
@@ -1652,8 +1729,6 @@ void HolovizOp::initialize() {
 }
 
 void HolovizOp::start() {
-  std::lock_guard<std::mutex> guard(mutex_);
-
   // set the font to be used
   if (!font_path_.get().empty()) { viz::SetFont(font_path_.get().c_str(), 25.F); }
 
@@ -1750,7 +1825,7 @@ void HolovizOp::start() {
   }
 
   // cast Condition to BooleanCondition
-  window_close_scheduling_term_->enable_tick();
+  window_close_condition_->enable_tick();
 
   // Copy the user defined input spec list to the internal input spec list. If there is no user
   // defined input spec it will be generated from the first messages received.
@@ -1802,7 +1877,7 @@ void HolovizOp::compute(InputContext& op_input, OutputContext& op_output,
 
   // cast Condition to BooleanCondition
   if (viz::WindowShouldClose()) {
-    window_close_scheduling_term_->disable_tick();
+    window_close_condition_->disable_tick();
     return;
   }
 
diff --git a/src/operators/inference/inference.cpp b/src/operators/inference/inference.cpp
index b2f6e2e6..3d300b05 100644
--- a/src/operators/inference/inference.cpp
+++ b/src/operators/inference/inference.cpp
@@ -196,11 +196,6 @@ void InferenceOp::start() {
       HoloInfer::raise_error(module_, "Parameter Validation failed: " + status.get_message());
     }
 
-    bool is_aarch64 = HoloInfer::is_platform_aarch64();
-    if (is_aarch64 && backend_.get().compare("onnxrt") == 0 && !infer_on_cpu_.get()) {
-      HoloInfer::raise_error(module_, "Onnxruntime with CUDA not supported on aarch64.");
-    }
-
     // Create inference specification structure
     inference_specs_ =
         std::make_shared<HoloInfer::InferenceSpecs>(backend_.get(),
diff --git a/src/utils/holoinfer_utils.cpp b/src/utils/holoinfer_utils.cpp
index 607ca2e9..7c1f78f8 100644
--- a/src/utils/holoinfer_utils.cpp
+++ b/src/utils/holoinfer_utils.cpp
@@ -147,6 +147,11 @@ gxf_result_t get_data_per_model(InputContext& op_input, const std::vector<std::s
       const auto& in_tensor = maybe_in_tensor.value();
       const auto storage_type = in_tensor->storage_type();
 
+      if (!in_tensor->isContiguous()) {
+        HOLOSCAN_LOG_ERROR("Input tensor {} must have row-major memory layout.", in_tensors[i]);
+        return HoloInfer::report_error(module, "Data extraction, Memory layout not row-major.");
+      }
+
       if (storage_type != nvidia::gxf::MemoryStorageType::kHost &&
           storage_type != nvidia::gxf::MemoryStorageType::kDevice) {
         return HoloInfer::report_error(
@@ -176,8 +181,12 @@ gxf_result_t get_data_per_model(InputContext& op_input, const std::vector<std::s
         case nvidia::gxf::PrimitiveType::kUnsigned8:
           dtype = HoloInfer::holoinfer_datatype::h_UInt8;
           break;
+        case nvidia::gxf::PrimitiveType::kFloat16:
+          dtype = HoloInfer::holoinfer_datatype::h_Float16;
+          break;
         default: {
-          HOLOSCAN_LOG_INFO("Incoming tensors must be of type: float, int32, int64, int8, uint8");
+          HOLOSCAN_LOG_INFO(
+              "Incoming tensors must be of type: float, float16, int32, int64, int8, uint8");
           return HoloInfer::report_error(module,
                                          "Data extraction, data type not supported in extraction.");
         }
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index f468fea8..fcb25662 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -73,6 +73,7 @@ ConfigureTest(
   core/config.cpp
   core/data_exporter.cpp
   core/dataflow_tracker.cpp
+  core/entity_group.cpp
   core/fragment.cpp
   core/fragment_allocation.cpp
   core/io_spec.cpp
@@ -85,7 +86,14 @@ ConfigureTest(
   core/resource_classes.cpp
   core/scheduler_classes.cpp
   core/system_resource_manager.cpp
- )
+)
+
+if(HOLOSCAN_ENABLE_GOOGLE_SANITIZER)
+  # Set test properties for CORE_TEST
+  # (death tests have to be excluded because it causes an issue with ASAN)
+  # https://github.com/google/googletest/blob/main/docs/advanced.md#death-tests-and-threads
+  set_tests_properties(CORE_TEST PROPERTIES ENVIRONMENT "GTEST_FILTER=-Application.TestAppHelpOption:Application.TestAppVersionOption")
+endif()
 
 # ##################################################################################################
 # * codecs tests ----------------------------------------------------------------------------------
@@ -166,6 +174,11 @@ target_link_libraries(SYSTEM_TEST
   holoscan::ops::video_stream_replayer
 )
 
+if(HOLOSCAN_ENABLE_GOOGLE_SANITIZER)
+  # Exclude tests that are known to fail with ASAN
+  set_tests_properties(SYSTEM_TEST PROPERTIES ENVIRONMENT "GTEST_FILTER=-HolovizApps.*")
+endif()
+
 ConfigureTest(
   SYSTEM_DISTRIBUTED_TEST
   system/distributed/distributed_app.cpp
@@ -208,6 +221,13 @@ HOLOSCAN_DISTRIBUTED_APP_SCHEDULER=multi_thread\
   )
 endif()
 
+if(HOLOSCAN_ENABLE_GOOGLE_SANITIZER)
+  # Append additional flags to exclude tests that are known to fail with ASAN
+  string(APPEND CMAKE_SYSTEM_DISTRIBUTED_TEST_FLAGS
+    ";GTEST_FILTER=-DistributedApp.TestTwoMultiInputsOutputsFragmentsApp"
+  )
+endif()
+
 set_tests_properties(
   SYSTEM_DISTRIBUTED_TEST PROPERTIES ENVIRONMENT "${CMAKE_SYSTEM_DISTRIBUTED_TEST_FLAGS}"
 )
@@ -285,8 +305,9 @@ target_link_libraries(HOLOINFER_TEST
 # * Flow Tracking tests ----------------------------------------------------------------------------------
 ConfigureTest(
   FLOW_TRACKING_TEST
-  flow_tracking/flow_tracking_cycle.cpp
   flow_tracking/entity_passthrough.cpp
+  flow_tracking/flow_tracking_cycle.cpp
+  flow_tracking/limited_tracking_tests.cpp
 )
 target_link_libraries(FLOW_TRACKING_TEST
   PRIVATE
diff --git a/tests/core/condition_classes.cpp b/tests/core/condition_classes.cpp
index 7f66230a..edddd860 100644
--- a/tests/core/condition_classes.cpp
+++ b/tests/core/condition_classes.cpp
@@ -38,6 +38,8 @@
 #include "holoscan/core/conditions/gxf/downstream_affordable.hpp"
 #include "holoscan/core/conditions/gxf/periodic.hpp"
 #include "holoscan/core/conditions/gxf/message_available.hpp"
+#include "holoscan/core/conditions/gxf/multi_message_available.hpp"
+#include "holoscan/core/conditions/gxf/multi_message_available_timeout.hpp"
 #include "holoscan/core/conditions/gxf/expiring_message.hpp"
 #include "holoscan/core/config.hpp"
 #include "holoscan/core/executor.hpp"
@@ -253,6 +255,161 @@ TEST(ConditionClasses, TestExpiringMessageAvailableConditionDefaultConstructor)
   auto condition = F.make_condition<ExpiringMessageAvailableCondition>();
 }
 
+TEST(ConditionClasses, TestMultiMessageAvailableConditionSumOfAll) {
+  // Test supplying sampling_mode argument as an enum
+  Fragment F;
+  const std::string name{"multi-message-available"};
+  ArgList arglist{Arg{"sampling_mode", MultiMessageAvailableCondition::SamplingMode::kSumOfAll},
+                  Arg{"min_sum", static_cast<size_t>(4)}};
+  auto condition = F.make_condition<MultiMessageAvailableCondition>(name, arglist);
+  EXPECT_EQ(condition->name(), name);
+  EXPECT_EQ(typeid(condition), typeid(std::make_shared<MultiMessageAvailableCondition>(arglist)));
+  EXPECT_EQ(std::string(condition->gxf_typename()),
+            "nvidia::gxf::MultiMessageAvailableSchedulingTerm"s);
+  EXPECT_TRUE(condition->description().find("name: " + name) != std::string::npos);
+
+  // verify no error logged about failure to convert sampling_mode to YAML::Node was logged
+  testing::internal::CaptureStderr();
+
+  condition->initialize();
+
+  std::string log_output = testing::internal::GetCapturedStderr();
+  EXPECT_TRUE(log_output.find("Unable to convert argument type") == std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+}
+
+TEST(ConditionClasses, TestMultiMessageAvailableConditionPerReceiver) {
+  // Test supplying sampling_mode argument as a std::string
+  Fragment F;
+  const std::string name{"multi-message-available"};
+  ArgList arglist{Arg{"sampling_mode", std::string("PerReceiver")},
+                  Arg{"min_sizes", std::vector<size_t>({1, 2, 1})}};
+  auto condition = F.make_condition<MultiMessageAvailableCondition>(name, arglist);
+  EXPECT_EQ(condition->name(), name);
+  EXPECT_EQ(typeid(condition), typeid(std::make_shared<MultiMessageAvailableCondition>(arglist)));
+  EXPECT_EQ(std::string(condition->gxf_typename()),
+            "nvidia::gxf::MultiMessageAvailableSchedulingTerm"s);
+  EXPECT_TRUE(condition->description().find("name: " + name) != std::string::npos);
+
+  // verify no error logged about failure to convert sampling_mode to YAML::Node was logged
+  testing::internal::CaptureStderr();
+
+  condition->initialize();
+
+  std::string log_output = testing::internal::GetCapturedStderr();
+  EXPECT_TRUE(log_output.find("Unable to convert argument type") == std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+}
+
+TEST(ConditionClasses, TestMultiMessageAvailableConditionPerReceiverYAML) {
+  // Test supplying sampling_mode argument as a YAML::Node
+  Fragment F;
+  const std::string name{"multi-message-available"};
+  ArgList arglist{Arg{"sampling_mode", YAML::Node(std::string("PerReceiver"))},
+                  Arg{"min_sizes", std::vector<size_t>({1, 2, 1})}};
+  auto condition = F.make_condition<MultiMessageAvailableCondition>(name, arglist);
+  EXPECT_EQ(condition->name(), name);
+  EXPECT_EQ(typeid(condition), typeid(std::make_shared<MultiMessageAvailableCondition>(arglist)));
+  EXPECT_EQ(std::string(condition->gxf_typename()),
+            "nvidia::gxf::MultiMessageAvailableSchedulingTerm"s);
+  EXPECT_TRUE(condition->description().find("name: " + name) != std::string::npos);
+
+  // verify no error logged about failure to convert sampling_mode to YAML::Node was logged
+  testing::internal::CaptureStderr();
+
+  condition->initialize();
+
+  std::string log_output = testing::internal::GetCapturedStderr();
+  EXPECT_TRUE(log_output.find("Unable to convert argument type") == std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+}
+
+TEST(ConditionClasses, TestMultiMessageAvailableConditionDefaultConstructor) {
+  Fragment F;
+  auto condition = F.make_condition<MultiMessageAvailableCondition>();
+}
+
+TEST(ConditionClasses, TestMultiMessageAvailableTimeoutConditionSumOfAll) {
+  Fragment F;
+  const std::string name{"multi-message-available-timeout"};
+  ArgList arglist{Arg{"execution_frequency", std::string("1000000")},
+                  Arg{"sampling_mode", std::string("SumOfAll")},
+                  Arg{"min_sum", static_cast<size_t>(4)}};
+  auto condition = F.make_condition<MultiMessageAvailableTimeoutCondition>(name, arglist);
+  EXPECT_EQ(condition->name(), name);
+  EXPECT_EQ(typeid(condition),
+            typeid(std::make_shared<MultiMessageAvailableTimeoutCondition>(arglist)));
+  EXPECT_EQ(std::string(condition->gxf_typename()),
+            "nvidia::gxf::MessageAvailableFrequencyThrottler"s);
+  EXPECT_TRUE(condition->description().find("name: " + name) != std::string::npos);
+
+  // verify no error logged about failure to convert sampling_mode to YAML::Node was logged
+  testing::internal::CaptureStderr();
+
+  condition->initialize();
+
+  std::string log_output = testing::internal::GetCapturedStderr();
+  EXPECT_TRUE(log_output.find("Unable to convert argument type") == std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+}
+
+TEST(ConditionClasses, TestMultiMessageAvailableTimeoutConditionPerReceiver) {
+  // Test supplying sampling_mode argument as an enum
+  Fragment F;
+  const std::string name{"multi-message-available-timeout"};
+  ArgList arglist{
+      Arg{"execution_frequency", std::string("10ms")},
+      Arg{"sampling_mode", MultiMessageAvailableTimeoutCondition::SamplingMode::kPerReceiver},
+      Arg{"min_sizes", std::vector<size_t>({1, 2, 1})}};
+  auto condition = F.make_condition<MultiMessageAvailableTimeoutCondition>(name, arglist);
+  EXPECT_EQ(condition->name(), name);
+  EXPECT_EQ(typeid(condition),
+            typeid(std::make_shared<MultiMessageAvailableTimeoutCondition>(arglist)));
+  EXPECT_EQ(std::string(condition->gxf_typename()),
+            "nvidia::gxf::MessageAvailableFrequencyThrottler"s);
+  EXPECT_TRUE(condition->description().find("name: " + name) != std::string::npos);
+
+  // verify no error logged about failure to convert sampling_mode to YAML::Node was logged
+  testing::internal::CaptureStderr();
+
+  condition->initialize();
+
+  std::string log_output = testing::internal::GetCapturedStderr();
+  EXPECT_TRUE(log_output.find("Unable to convert argument type") == std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+}
+
+TEST(ConditionClasses, TestMultiMessageAvailableTimeoutConditionPerReceiverString) {
+  // Test supplying sampling_mode argument as a string
+  Fragment F;
+  const std::string name{"multi-message-available-timeout"};
+  ArgList arglist{Arg{"execution_frequency", std::string("10ms")},
+                  Arg{"sampling_mode", std::string("PerReceiver")},
+                  Arg{"min_sizes", std::vector<size_t>({1, 2, 1})}};
+  auto condition = F.make_condition<MultiMessageAvailableTimeoutCondition>(name, arglist);
+  EXPECT_EQ(condition->name(), name);
+  EXPECT_EQ(typeid(condition),
+            typeid(std::make_shared<MultiMessageAvailableTimeoutCondition>(arglist)));
+  EXPECT_EQ(std::string(condition->gxf_typename()),
+            "nvidia::gxf::MessageAvailableFrequencyThrottler"s);
+  EXPECT_TRUE(condition->description().find("name: " + name) != std::string::npos);
+
+  // verify no error logged about failure to convert sampling_mode to YAML::Node was logged
+  testing::internal::CaptureStderr();
+
+  condition->initialize();
+
+  std::string log_output = testing::internal::GetCapturedStderr();
+  EXPECT_TRUE(log_output.find("Unable to convert argument type") == std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+}
+
 TEST(ConditionClasses, TestPeriodicCondition) {
   Fragment F;
   const std::string name{"periodic-condition"};
diff --git a/tests/core/entity_group.cpp b/tests/core/entity_group.cpp
new file mode 100644
index 00000000..115948d7
--- /dev/null
+++ b/tests/core/entity_group.cpp
@@ -0,0 +1,130 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <gxf/core/gxf.h>
+
+#include <cstdlib>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "../config.hpp"
+#include "../utils.hpp"
+#include "holoscan/core/fragment.hpp"
+#include "holoscan/core/gxf/entity_group.hpp"
+#include "holoscan/core/resources/gxf/system_resources.hpp"
+#include "holoscan/core/resources/gxf/unbounded_allocator.hpp"
+
+using namespace std::string_literals;
+
+namespace holoscan {
+
+using EntityGroupWithGXFContext = TestWithGXFContext;
+
+TEST_F(EntityGroupWithGXFContext, TestEntityGroupAddBasedOnEid) {
+  auto g = gxf::EntityGroup(F.executor().context(), "group1");
+  auto allocator = F.make_resource<UnboundedAllocator>("allocator");
+  auto allocator2 = F.make_resource<UnboundedAllocator>("allocator2");
+
+  // need to initialize the resource before we can add it to an entity group.
+  allocator->initialize();
+  allocator2->initialize();
+
+  // add to the entity group via the GXF eid
+  g.add(allocator->gxf_eid());
+  g.add(allocator2->gxf_eid());
+
+  EXPECT_EQ(allocator->gxf_entity_group_name(), "group1"s);
+  EXPECT_EQ(allocator2->gxf_entity_group_name(), "group1"s);
+}
+
+TEST_F(EntityGroupWithGXFContext, TestEntityGroupAddBasedOnComponent) {
+  auto g = gxf::EntityGroup(F.executor().context(), "mygroup");
+  auto allocator = F.make_resource<UnboundedAllocator>("allocator");
+
+  // need to initialize the resource before we can add it to an entity group.
+  allocator->initialize();
+
+  // GXF will have initially assigned to a default entity group
+  // (not sure we should test the exact name here since it could be subject to change)
+  EXPECT_EQ(allocator->gxf_entity_group_name(), "default_entity_group"s);
+
+  // add a GXFComponent object to the entity group
+  g.add(*allocator);
+
+  // retrieve the name of the entity group to which the component belongs
+  EXPECT_EQ(allocator->gxf_entity_group_name(), "mygroup"s);
+
+  // adding the same component a second time will raise an exception
+  EXPECT_THROW(
+      {
+        try {
+          g.add(*allocator);
+        } catch (const std::runtime_error& e) {
+          ASSERT_TRUE(std::string(e.what()).find("failure during GXF call") != std::string::npos);
+          throw;
+        }
+      },
+      std::runtime_error);
+}
+
+TEST_F(EntityGroupWithGXFContext, TestEntityGroupAddThreadPool) {
+  auto g = gxf::EntityGroup(F.executor().context(), "mygroup");
+  auto allocator = F.make_resource<UnboundedAllocator>("allocator");
+  auto thread_pool = F.make_resource<ThreadPool>("thread_pool");
+
+  // need to initialize the resource before we can add it to an entity group.
+  allocator->initialize();
+  thread_pool->initialize();
+
+  // GXF will have initially assigned to a default entity group
+  // (not sure we should test the exact name here since it could be subject to change)
+  EXPECT_EQ(allocator->gxf_entity_group_name(), "default_entity_group"s);
+
+  // add a GXFComponent objects to the entity group
+  g.add(*allocator);
+  g.add(*thread_pool);
+
+  // retrieve the name of the entity group to which the component belongs
+  EXPECT_EQ(allocator->gxf_entity_group_name(), "mygroup"s);
+  EXPECT_EQ(thread_pool->gxf_entity_group_name(), "mygroup"s);
+}
+
+TEST_F(EntityGroupWithGXFContext, TestEntityGroupAddGPUDevice) {
+  auto g = gxf::EntityGroup(F.executor().context(), "mygroup");
+  auto allocator = F.make_resource<UnboundedAllocator>("allocator");
+  auto device = F.make_resource<GPUDevice>("device");
+
+  // need to initialize the resource before we can add it to an entity group.
+  allocator->initialize();
+  device->initialize();
+
+  // GXF will have initially assigned to a default entity group
+  // (not sure we should test the exact name here since it could be subject to change)
+  EXPECT_EQ(allocator->gxf_entity_group_name(), "default_entity_group"s);
+
+  // add a GXFComponent object to the entity group
+  g.add(*allocator);
+  g.add(*device);
+
+  // retrieve the name of the entity group to which the component belongs
+  EXPECT_EQ(allocator->gxf_entity_group_name(), "mygroup"s);
+  EXPECT_EQ(device->gxf_entity_group_name(), "mygroup"s);
+}
+
+}  // namespace holoscan
diff --git a/tests/core/fragment.cpp b/tests/core/fragment.cpp
index bb0c9884..9e94dbc4 100644
--- a/tests/core/fragment.cpp
+++ b/tests/core/fragment.cpp
@@ -200,6 +200,40 @@ TEST(Fragment, TestAddOperator) {
   EXPECT_EQ(nodes[0], op);
 }
 
+TEST(Fragment, TestMakeThreadPool) {
+  Fragment F;
+  auto op1 = F.make_operator<Operator>("op1");
+  auto op2 = F.make_operator<Operator>("op2");
+  auto op3 = F.make_operator<Operator>("op3");
+
+  // create a pool of size 1
+  auto pool1 = F.make_thread_pool("pool1", 1);
+  // add an individual operator without thread pinning
+  pool1->add(op1, false);
+
+  // create a pool of size 2
+  auto pool2 = F.make_thread_pool("pool2", 2);
+  // add multiple operators, each with thread pinning
+  pool2->add({op2, op3}, true);
+
+  EXPECT_EQ(pool1->name(), std::string{"pool1"});
+  EXPECT_EQ(pool2->name(), std::string{"pool2"});
+
+  // check that the associated operators are as expected
+  auto pool1_ops = pool1->operators();
+  EXPECT_EQ(pool1_ops.size(), 1);
+  EXPECT_EQ(pool1_ops[0]->name(), std::string{"op1"});
+  auto pool2_ops = pool2->operators();
+  EXPECT_EQ(pool2_ops.size(), 2);
+  EXPECT_EQ(pool2_ops[0]->name(), std::string{"op2"});
+  EXPECT_EQ(pool2_ops[1]->name(), std::string{"op3"});
+
+  // description contains the GXF typename and info on operators in the pool
+  auto description1 = pool1->description();
+  ASSERT_TRUE(description1.find("gxf_typename: nvidia::gxf::ThreadPool") != std::string::npos);
+  ASSERT_TRUE(description1.find("operators in pool") != std::string::npos);
+}
+
 TEST(Application, TestAddOperatorsWithSameName) {
   Fragment F;
 
diff --git a/tests/core/io_spec.cpp b/tests/core/io_spec.cpp
index faef146b..6d07e0d2 100644
--- a/tests/core/io_spec.cpp
+++ b/tests/core/io_spec.cpp
@@ -364,6 +364,9 @@ connector_type: kDoubleBuffer
         type: uint64_t
         description: ""
         flag: kNone
+  gxf_eid: 0
+  gxf_cid: 0
+  gxf_typename: nvidia::gxf::DoubleBufferTransmitter
 conditions:
   - id: -1
     name: message_available
@@ -373,6 +376,9 @@ connector_type: kDoubleBuffer
         type: uint64_t
         value: 5
     spec: ~
+    gxf_eid: 0
+    gxf_cid: 0
+    gxf_typename: nvidia::gxf::MessageAvailableSchedulingTerm
     type: kMessageAvailable)",
                                         entity_typename);
   EXPECT_EQ(spec.description(), description);
diff --git a/tests/core/resource_classes.cpp b/tests/core/resource_classes.cpp
index 03dd1768..4a853514 100644
--- a/tests/core/resource_classes.cpp
+++ b/tests/core/resource_classes.cpp
@@ -34,6 +34,7 @@
 #include "holoscan/core/resource.hpp"
 #include "holoscan/core/resources/gxf/block_memory_pool.hpp"
 #include "holoscan/core/resources/gxf/cuda_stream_pool.hpp"
+#include "holoscan/core/resources/gxf/cpu_thread.hpp"
 #include "holoscan/core/resources/gxf/double_buffer_receiver.hpp"
 #include "holoscan/core/resources/gxf/double_buffer_transmitter.hpp"
 #include "holoscan/core/resources/gxf/manual_clock.hpp"
@@ -43,6 +44,7 @@
 #include "holoscan/core/resources/gxf/std_component_serializer.hpp"
 #include "holoscan/core/resources/gxf/std_entity_serializer.hpp"
 #include "holoscan/core/resources/gxf/stream_ordered_allocator.hpp"
+#include "holoscan/core/resources/gxf/system_resources.hpp"
 #include "holoscan/core/resources/gxf/ucx_component_serializer.hpp"
 #include "holoscan/core/resources/gxf/ucx_entity_serializer.hpp"
 #include "holoscan/core/resources/gxf/ucx_holoscan_component_serializer.hpp"
@@ -457,4 +459,47 @@ TEST_F(ResourceClassesWithGXFContext, TestUcxTransmitter) {
 TEST_F(ResourceClassesWithGXFContext, TestUcxTransmitterDefaultConstructor) {
   auto resource = F.make_resource<UcxTransmitter>();
 }
+
+TEST_F(ResourceClassesWithGXFContext, TestCPUThread) {
+  const std::string name{"thread0"};
+  Arg pin_arg{"pin_entity", true};
+  auto resource = F.make_resource<CPUThread>(name, pin_arg);
+  EXPECT_EQ(resource->name(), name);
+  EXPECT_EQ(typeid(resource), typeid(std::make_shared<CPUThread>(pin_arg)));
+  EXPECT_EQ(std::string(resource->gxf_typename()), "nvidia::gxf::CPUThread"s);
+}
+
+TEST_F(ResourceClassesWithGXFContext, TestCPUThreadDefaultConstructor) {
+  auto resource = F.make_resource<CPUThread>();
+}
+
+TEST_F(ResourceClassesWithGXFContext, TestGPUDevice) {
+  const std::string name{"dev0"};
+  Arg id_arg{"dev_id", static_cast<int32_t>(0)};
+  auto resource = F.make_resource<GPUDevice>(name, id_arg);
+  EXPECT_EQ(resource->name(), name);
+  EXPECT_EQ(typeid(resource), typeid(std::make_shared<GPUDevice>(id_arg)));
+  EXPECT_EQ(std::string(resource->gxf_typename()), "nvidia::gxf::GPUDevice"s);
+}
+
+TEST_F(ResourceClassesWithGXFContext, TestGPUDeviceDefaultConstructor) {
+  auto resource = F.make_resource<GPUDevice>();
+}
+
+TEST_F(ResourceClassesWithGXFContext, TestThreadPool) {
+  const std::string name{"thread-pool0"};
+  ArgList arglist{
+      Arg{"initial_size", static_cast<int64_t>(5)},
+      Arg{"priority", static_cast<int64_t>(2)},
+  };
+  auto resource = F.make_resource<ThreadPool>(name, arglist);
+  EXPECT_EQ(resource->name(), name);
+  EXPECT_EQ(typeid(resource), typeid(std::make_shared<ThreadPool>(arglist)));
+  EXPECT_EQ(std::string(resource->gxf_typename()), "nvidia::gxf::ThreadPool"s);
+}
+
+TEST_F(ResourceClassesWithGXFContext, TestThreadPoolDefaultConstructor) {
+  auto resource = F.make_resource<ThreadPool>();
+}
+
 }  // namespace holoscan
diff --git a/tests/core/scheduler_classes.cpp b/tests/core/scheduler_classes.cpp
index ddb1a8d6..98391a1b 100644
--- a/tests/core/scheduler_classes.cpp
+++ b/tests/core/scheduler_classes.cpp
@@ -94,6 +94,7 @@ TEST_F(SchedulerClassesWithGXFContext, TestMultiThreadSchedulerWithArgs) {
       Arg{"check_recession_period_ms", 5.0},
       Arg{"max_duration_ms", 10000L},
       Arg{"stop_on_deadlock_timeout", 100LL},
+      Arg{"strict_job_thread_pinning", true},
   };
   auto scheduler = F.make_scheduler<MultiThreadScheduler>(name, arglist);
   EXPECT_TRUE(scheduler->description().find("name: " + name) != std::string::npos);
diff --git a/tests/flow_tracking/flow_tracking_cycle.cpp b/tests/flow_tracking/flow_tracking_cycle.cpp
index 9beb81db..acc57511 100644
--- a/tests/flow_tracking/flow_tracking_cycle.cpp
+++ b/tests/flow_tracking/flow_tracking_cycle.cpp
@@ -20,381 +20,10 @@
 #include <string>
 #include <vector>
 
-#include <holoscan/holoscan.hpp>
-#include <holoscan/operators/ping_rx/ping_rx.hpp>
+#include "sample_test_graphs.hpp"
 
 namespace holoscan {
 
-// Do not pollute holoscan namespace with utility classes
-namespace {
-
-///////////////////////////////////////////////////////////////////////////////
-// Utility Operators
-///////////////////////////////////////////////////////////////////////////////
-
-class OneInOp : public Operator {
- public:
-  HOLOSCAN_OPERATOR_FORWARD_ARGS(OneInOp)
-
-  OneInOp() = default;
-
-  void setup(OperatorSpec& spec) override { spec.input<gxf::Entity>("in"); }
-
-  void compute(InputContext& op_input, OutputContext& op_output,
-               ExecutionContext& context) override {
-    auto in_message = op_input.receive<gxf::Entity>("in");
-
-    HOLOSCAN_LOG_INFO("OneInOp count {}", count_++);
-  }
-
- private:
-  int count_ = 1;
-};
-
-class OneOutOp : public Operator {
- public:
-  HOLOSCAN_OPERATOR_FORWARD_ARGS(OneOutOp)
-
-  OneOutOp() = default;
-
-  void setup(OperatorSpec& spec) override { spec.output<gxf::Entity>("out"); }
-
-  void compute([[maybe_unused]] InputContext& op_input, OutputContext& op_output,
-               ExecutionContext& context) override {
-    auto out_message = gxf::Entity::New(&context);
-    op_output.emit(out_message);
-
-    HOLOSCAN_LOG_INFO("{} count {}", name(), count_++);
-  }
-
- private:
-  int count_ = 1;
-};
-
-class TwoInOneOutOp : public Operator {
- public:
-  HOLOSCAN_OPERATOR_FORWARD_ARGS(TwoInOneOutOp)
-
-  TwoInOneOutOp() = default;
-
-  void setup(OperatorSpec& spec) override {
-    spec.input<gxf::Entity>("in0").condition(ConditionType::kNone);
-    spec.input<gxf::Entity>("in1").condition(ConditionType::kNone);
-    spec.output<gxf::Entity>("out");
-  }
-
-  void compute(InputContext& op_input, OutputContext& op_output,
-               ExecutionContext& context) override {
-    auto in_message1 = op_input.receive<gxf::Entity>("in0");
-    auto in_message2 = op_input.receive<gxf::Entity>("in1");
-
-    auto out_message = gxf::Entity::New(&context);
-    op_output.emit(out_message);
-
-    HOLOSCAN_LOG_INFO("{} count {}", name(), count_++);
-  }
-
- private:
-  int count_ = 1;
-};
-
-class ThreeInOneOutOp : public Operator {
- public:
-  HOLOSCAN_OPERATOR_FORWARD_ARGS(ThreeInOneOutOp)
-
-  ThreeInOneOutOp() = default;
-
-  void setup(OperatorSpec& spec) override {
-    spec.input<gxf::Entity>("in0");
-    spec.input<gxf::Entity>("in1");
-    spec.input<gxf::Entity>("in2").condition(ConditionType::kNone);  // cycle in-port
-    spec.output<gxf::Entity>("out");
-  }
-
-  void compute(InputContext& op_input, OutputContext& op_output,
-               ExecutionContext& context) override {
-    auto in_message1 = op_input.receive<gxf::Entity>("in0");
-    auto in_message2 = op_input.receive<gxf::Entity>("in1");
-    auto in_message3 = op_input.receive<gxf::Entity>("in2");
-
-    auto out_message = gxf::Entity::New(&context);
-    op_output.emit(out_message);
-
-    HOLOSCAN_LOG_INFO("{} count {}", name(), count_++);
-  }
-
- private:
-  int count_ = 1;
-};
-
-class TwoInTwoOutOp : public Operator {
- public:
-  HOLOSCAN_OPERATOR_FORWARD_ARGS(TwoInTwoOutOp)
-
-  TwoInTwoOutOp() = default;
-
-  void setup(OperatorSpec& spec) override {
-    spec.input<gxf::Entity>("in0").condition(ConditionType::kNone);
-    spec.input<gxf::Entity>("in1").condition(ConditionType::kNone);
-    spec.output<gxf::Entity>("out0");
-    spec.output<gxf::Entity>("out1");
-  }
-
-  void compute(InputContext& op_input, OutputContext& op_output,
-               ExecutionContext& context) override {
-    auto in_message1 = op_input.receive<gxf::Entity>("in0");
-    auto in_message2 = op_input.receive<gxf::Entity>("in1");
-
-    auto out_message = gxf::Entity::New(&context);
-    op_output.emit(out_message, "out0");
-    op_output.emit(out_message, "out1");
-
-    HOLOSCAN_LOG_INFO("{} count {}", name(), count_++);
-  }
-
- private:
-  int count_ = 1;
-};
-
-class OneInOneOutOp : public Operator {
- public:
-  HOLOSCAN_OPERATOR_FORWARD_ARGS(OneInOneOutOp)
-
-  OneInOneOutOp() = default;
-
-  void setup(OperatorSpec& spec) override {
-    spec.input<gxf::Entity>("in");
-    spec.output<gxf::Entity>("out");
-  }
-
-  void compute(InputContext& op_input, OutputContext& op_output,
-               ExecutionContext& context) override {
-    auto in_message = op_input.receive<gxf::Entity>("in");
-
-    auto out_message = gxf::Entity::New(&context);
-    op_output.emit(out_message);
-
-    HOLOSCAN_LOG_INFO("{} count {}", name(), count_++);
-  }
-
- private:
-  int count_ = 1;
-};
-
-class OneOptionalInOneOutOp : public Operator {
- public:
-  HOLOSCAN_OPERATOR_FORWARD_ARGS(OneOptionalInOneOutOp)
-
-  OneOptionalInOneOutOp() = default;
-
-  void setup(OperatorSpec& spec) override {
-    spec.input<gxf::Entity>("in").condition(ConditionType::kNone);
-    spec.output<gxf::Entity>("out");
-  }
-
-  void compute(InputContext& op_input, OutputContext& op_output,
-               ExecutionContext& context) override {
-    auto in_message = op_input.receive<gxf::Entity>("in");
-
-    auto out_message = gxf::Entity::New(&context);
-    op_output.emit(out_message);
-
-    HOLOSCAN_LOG_INFO("{} count {}", name(), count_++);
-  }
-
- private:
-  int count_ = 1;
-};
-
-///////////////////////////////////////////////////////////////////////////////
-// Utility Applications
-///////////////////////////////////////////////////////////////////////////////
-
-// The ASCII graphs are Greg's excellent additions
-
-/* CycleWithSourceApp
- *
- * OneOut--->TwoInOneOut--->OneInOneOut
- *             ^               |
- *             |               |
- *             +---------------+
- */
-class CycleWithSourceApp : public holoscan::Application {
- public:
-  using Application::Application;
-
-  void compose() override {
-    using namespace holoscan;
-    auto one_out =
-        make_operator<OneOutOp>("OneOut", make_condition<CountCondition>("count-condition", 1));
-    auto two_in_one_out =
-        make_operator<TwoInOneOutOp>("TwoInOneOut", make_condition<CountCondition>(10));
-    auto one_in_one_out = make_operator<OneInOneOutOp>("OneInOneOut");
-
-    add_flow(one_out, two_in_one_out, {{"out", "in0"}});
-    add_flow(two_in_one_out, one_in_one_out, {{"out", "in"}});
-    add_flow(one_in_one_out, two_in_one_out, {{"out", "in1"}});
-  }
-};
-
-/* MiddleCycleApp
- *
- * OneOut--->TwoInOneOut--->OneInOneOut--->rx
- *              ^            |
- *              |            |
- *              +------------+
- */
-class MiddleCycleApp : public holoscan::Application {
- public:
-  using Application::Application;
-
-  void compose() override {
-    using namespace holoscan;
-    auto one_out =
-        make_operator<OneOutOp>("OneOut", make_condition<CountCondition>("count-condition", 1));
-    auto two_in_one_out =
-        make_operator<TwoInOneOutOp>("TwoInOneOut", make_condition<CountCondition>(10));
-    auto one_in_one_out = make_operator<OneInOneOutOp>("OneInOneOut");
-    auto rx = make_operator<OneInOp>("PingRx");
-
-    add_flow(one_out, two_in_one_out, {{"out", "in0"}});
-    add_flow(two_in_one_out, one_in_one_out, {{"out", "in"}});
-    add_flow(one_in_one_out, two_in_one_out, {{"out", "in1"}});
-    add_flow(one_in_one_out, rx);
-  }
-};
-
-/* CycleWithLeaf
- *
- * root--->middle--->leaf
- *   ^       |
- *   |       |
- *   +-------+
- */
-class CycleWithLeaf : public holoscan::Application {
- public:
-  using Application::Application;
-
-  void compose() override {
-    using namespace holoscan;
-    auto root = make_operator<OneOptionalInOneOutOp>(
-        "root", make_condition<CountCondition>("count-condition", 5));
-    auto middle = make_operator<OneInOneOutOp>("middle");
-    auto rx = make_operator<OneInOp>("leaf");
-
-    add_flow(root, middle);
-    add_flow(middle, root);
-    add_flow(middle, rx);
-  }
-};
-
-/* TwoRootsOneCycle
- *
- *             +--------+
- *             |        ^
- *             |        |
- * root2--->middle2--->last
- *             ^
- *             |
- *             |
- * root1--->middle1
- */
-class TwoRootsOneCycle : public holoscan::Application {
- public:
-  using Application::Application;
-
-  void compose() override {
-    using namespace holoscan;
-
-    auto root1 =
-        make_operator<OneOutOp>("root1", make_condition<CountCondition>("count-condition", 5));
-
-    auto root2 =
-        make_operator<OneOutOp>("root2", make_condition<CountCondition>("count-condition", 5));
-
-    auto middle1 = make_operator<OneInOneOutOp>("middle1");
-    auto middle2 = make_operator<ThreeInOneOutOp>("middle2");
-
-    auto last = make_operator<OneInOneOutOp>("last");
-
-    add_flow(root2, middle2, {{"out", "in0"}});
-    add_flow(middle2, last);
-    add_flow(last, middle2, {{"out", "in2"}});
-
-    add_flow(root1, middle1);
-    add_flow(middle1, middle2, {{"out", "in1"}});
-  }
-};
-
-/* TwoCyclesVariant1
- *
- * start--->middle--->end
- *   ^       |  ^      |
- *   |       |  |      |
- *   +-------+  +------+
- *
- * middle node is triggered first in this case as start and end have mandatory input ports
- */
-class TwoCyclesVariant1 : public holoscan::Application {
- public:
-  using Application::Application;
-
-  void compose() override {
-    using namespace holoscan;
-
-    auto start = make_operator<OneInOneOutOp>("start", make_condition<CountCondition>(5));
-
-    auto middle = make_operator<TwoInTwoOutOp>("middle");
-
-    auto end = make_operator<OneInOneOutOp>("end");
-
-    // First cycle
-    add_flow(start, middle, {{"out", "in0"}});
-    add_flow(middle, end, {{"out0", "in"}});
-
-    // Second cycle
-    add_flow(middle, start, {{"out1", "in"}});
-    add_flow(end, middle, {{"out", "in1"}});
-  }
-};
-
-/* TwoCyclesVariant2
- *
- * same layout as TwoCyclesVariant1
- *
- * start--->middle--->end
- *   ^       |  ^      |
- *   |       |  |      |
- *   +-------+  +------+
- *
- * The difference is that start is triggered first in this case as start and end have optional
- * input ports.
- */
-class TwoCyclesVariant2 : public holoscan::Application {
- public:
-  using Application::Application;
-
-  void compose() override {
-    using namespace holoscan;
-
-    auto start = make_operator<OneOptionalInOneOutOp>("start", make_condition<CountCondition>(5));
-
-    auto middle = make_operator<TwoInTwoOutOp>("middle");
-
-    auto end = make_operator<OneOptionalInOneOutOp>("end");
-
-    // First cycle
-    add_flow(start, middle, {{"out", "in0"}});
-    add_flow(middle, end, {{"out0", "in"}});
-
-    // Second cycle
-    add_flow(middle, start, {{"out1", "in"}});
-    add_flow(end, middle, {{"out", "in1"}});
-  }
-};
-
-}  // namespace
-
 TEST(Graphs, TestFlowTrackingForCycleWithSource) {
   auto app = make_application<CycleWithSourceApp>();
   auto& tracker = app->track(0, 0, 0);
diff --git a/tests/flow_tracking/limited_tracking_tests.cpp b/tests/flow_tracking/limited_tracking_tests.cpp
new file mode 100644
index 00000000..834bd838
--- /dev/null
+++ b/tests/flow_tracking/limited_tracking_tests.cpp
@@ -0,0 +1,70 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include <string>
+
+#include "sample_test_graphs.hpp"
+
+namespace holoscan {
+
+TEST(Graphs, TestThreePathsOneRootOneLeaf) {
+  auto app1 = make_application<ThreePathsOneRootOneLeaf>();
+  auto& tracker1 = app1->track(0, 0, 0, true);  // enabled limited tracking
+
+  // capture output so that we can check that the expected value is present
+  testing::internal::CaptureStdout();
+
+  app1->run();
+
+  tracker1.print();
+
+  std::string log_output1 = testing::internal::GetCapturedStdout();
+  EXPECT_TRUE(log_output1.find("root,leaf") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output1 << "\n===========\n";
+
+  EXPECT_TRUE(log_output1.find("Number of messages: 15") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output1 << "\n===========\n";
+
+  // Try with default option below
+  auto app2 = make_application<ThreePathsOneRootOneLeaf>();
+  auto& tracker2 = app2->track(0, 0, 0);  // default option
+
+  testing::internal::CaptureStdout();
+
+  app2->run();
+
+  tracker2.print();
+
+  std::string log_output2 = testing::internal::GetCapturedStdout();
+  EXPECT_TRUE(log_output2.find("root,middle1,middle4,leaf") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output2 << "\n===========\n";
+
+  EXPECT_TRUE(log_output2.find("root,middle2,middle4,leaf") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output2 << "\n===========\n";
+
+  EXPECT_TRUE(log_output2.find("root,middle3,middle4,leaf") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output2 << "\n===========\n";
+}
+
+}  // namespace holoscan
diff --git a/tests/flow_tracking/sample_test_graphs.hpp b/tests/flow_tracking/sample_test_graphs.hpp
new file mode 100644
index 00000000..f58adf24
--- /dev/null
+++ b/tests/flow_tracking/sample_test_graphs.hpp
@@ -0,0 +1,427 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TEST_FLOW_TRACKING_SAMPLE_TEST_GRAPHS_HPP
+#define TEST_FLOW_TRACKING_SAMPLE_TEST_GRAPHS_HPP
+
+#include <holoscan/holoscan.hpp>
+#include <holoscan/operators/ping_rx/ping_rx.hpp>
+
+///////////////////////////////////////////////////////////////////////////////
+// Utility Operators
+///////////////////////////////////////////////////////////////////////////////
+namespace holoscan {
+
+namespace {
+
+// Not polluting the Holoscan namespace with sample operartors and applications
+// by using an anonymous namespace.
+
+class OneInOp : public Operator {
+ public:
+  HOLOSCAN_OPERATOR_FORWARD_ARGS(OneInOp)
+
+  OneInOp() = default;
+
+  void setup(OperatorSpec& spec) override { spec.input<gxf::Entity>("in"); }
+
+  void compute(InputContext& op_input, OutputContext& op_output,
+               ExecutionContext& context) override {
+    auto in_message = op_input.receive<gxf::Entity>("in");
+
+    HOLOSCAN_LOG_INFO("OneInOp count {}", count_++);
+  }
+
+ private:
+  int count_ = 1;
+};
+
+class OneOutOp : public Operator {
+ public:
+  HOLOSCAN_OPERATOR_FORWARD_ARGS(OneOutOp)
+
+  OneOutOp() = default;
+
+  void setup(OperatorSpec& spec) override { spec.output<gxf::Entity>("out"); }
+
+  void compute([[maybe_unused]] InputContext& op_input, OutputContext& op_output,
+               ExecutionContext& context) override {
+    auto out_message = gxf::Entity::New(&context);
+    op_output.emit(out_message);
+
+    HOLOSCAN_LOG_INFO("{} count {}", name(), count_++);
+  }
+
+ private:
+  int count_ = 1;
+};
+
+class TwoInOneOutOp : public Operator {
+ public:
+  HOLOSCAN_OPERATOR_FORWARD_ARGS(TwoInOneOutOp)
+
+  TwoInOneOutOp() = default;
+
+  void setup(OperatorSpec& spec) override {
+    spec.input<gxf::Entity>("in0").condition(ConditionType::kNone);
+    spec.input<gxf::Entity>("in1").condition(ConditionType::kNone);
+    spec.output<gxf::Entity>("out");
+  }
+
+  void compute(InputContext& op_input, OutputContext& op_output,
+               ExecutionContext& context) override {
+    auto in_message1 = op_input.receive<gxf::Entity>("in0");
+    auto in_message2 = op_input.receive<gxf::Entity>("in1");
+
+    auto out_message = gxf::Entity::New(&context);
+    op_output.emit(out_message);
+
+    HOLOSCAN_LOG_INFO("{} count {}", name(), count_++);
+  }
+
+ private:
+  int count_ = 1;
+};
+
+class ThreeInOneOutOp : public Operator {
+ public:
+  HOLOSCAN_OPERATOR_FORWARD_ARGS(ThreeInOneOutOp)
+
+  ThreeInOneOutOp() = default;
+
+  void setup(OperatorSpec& spec) override {
+    spec.input<gxf::Entity>("in0");
+    spec.input<gxf::Entity>("in1");
+    spec.input<gxf::Entity>("in2").condition(ConditionType::kNone);  // cycle in-port
+    spec.output<gxf::Entity>("out");
+  }
+
+  void compute(InputContext& op_input, OutputContext& op_output,
+               ExecutionContext& context) override {
+    auto in_message1 = op_input.receive<gxf::Entity>("in0");
+    auto in_message2 = op_input.receive<gxf::Entity>("in1");
+    auto in_message3 = op_input.receive<gxf::Entity>("in2");
+
+    auto out_message = gxf::Entity::New(&context);
+    op_output.emit(out_message);
+
+    HOLOSCAN_LOG_INFO("{} count {}", name(), count_++);
+  }
+
+ private:
+  int count_ = 1;
+};
+
+class TwoInTwoOutOp : public Operator {
+ public:
+  HOLOSCAN_OPERATOR_FORWARD_ARGS(TwoInTwoOutOp)
+
+  TwoInTwoOutOp() = default;
+
+  void setup(OperatorSpec& spec) override {
+    spec.input<gxf::Entity>("in0").condition(ConditionType::kNone);
+    spec.input<gxf::Entity>("in1").condition(ConditionType::kNone);
+    spec.output<gxf::Entity>("out0");
+    spec.output<gxf::Entity>("out1");
+  }
+
+  void compute(InputContext& op_input, OutputContext& op_output,
+               ExecutionContext& context) override {
+    auto in_message1 = op_input.receive<gxf::Entity>("in0");
+    auto in_message2 = op_input.receive<gxf::Entity>("in1");
+
+    auto out_message = gxf::Entity::New(&context);
+    op_output.emit(out_message, "out0");
+    op_output.emit(out_message, "out1");
+
+    HOLOSCAN_LOG_INFO("{} count {}", name(), count_++);
+  }
+
+ private:
+  int count_ = 1;
+};
+
+class OneInOneOutOp : public Operator {
+ public:
+  HOLOSCAN_OPERATOR_FORWARD_ARGS(OneInOneOutOp)
+
+  OneInOneOutOp() = default;
+
+  void setup(OperatorSpec& spec) override {
+    spec.input<gxf::Entity>("in");
+    spec.output<gxf::Entity>("out");
+  }
+
+  void compute(InputContext& op_input, OutputContext& op_output,
+               ExecutionContext& context) override {
+    auto in_message = op_input.receive<gxf::Entity>("in");
+
+    auto out_message = gxf::Entity::New(&context);
+    op_output.emit(out_message);
+
+    HOLOSCAN_LOG_INFO("{} count {}", name(), count_++);
+  }
+
+ private:
+  int count_ = 1;
+};
+
+class OneOptionalInOneOutOp : public Operator {
+ public:
+  HOLOSCAN_OPERATOR_FORWARD_ARGS(OneOptionalInOneOutOp)
+
+  OneOptionalInOneOutOp() = default;
+
+  void setup(OperatorSpec& spec) override {
+    spec.input<gxf::Entity>("in").condition(ConditionType::kNone);
+    spec.output<gxf::Entity>("out");
+  }
+
+  void compute(InputContext& op_input, OutputContext& op_output,
+               ExecutionContext& context) override {
+    auto in_message = op_input.receive<gxf::Entity>("in");
+
+    auto out_message = gxf::Entity::New(&context);
+    op_output.emit(out_message);
+
+    HOLOSCAN_LOG_INFO("{} count {}", name(), count_++);
+  }
+
+ private:
+  int count_ = 1;
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// Utility Applications
+///////////////////////////////////////////////////////////////////////////////
+
+// The ASCII graphs are Greg's excellent additions
+
+/* CycleWithSourceApp
+ *
+ * OneOut--->TwoInOneOut--->OneInOneOut
+ *             ^               |
+ *             |               |
+ *             +---------------+
+ */
+class CycleWithSourceApp : public holoscan::Application {
+ public:
+  using Application::Application;
+
+  void compose() override {
+    using namespace holoscan;
+    auto one_out =
+        make_operator<OneOutOp>("OneOut", make_condition<CountCondition>("count-condition", 1));
+    auto two_in_one_out =
+        make_operator<TwoInOneOutOp>("TwoInOneOut", make_condition<CountCondition>(10));
+    auto one_in_one_out = make_operator<OneInOneOutOp>("OneInOneOut");
+
+    add_flow(one_out, two_in_one_out, {{"out", "in0"}});
+    add_flow(two_in_one_out, one_in_one_out, {{"out", "in"}});
+    add_flow(one_in_one_out, two_in_one_out, {{"out", "in1"}});
+  }
+};
+
+/* MiddleCycleApp
+ *
+ * OneOut--->TwoInOneOut--->OneInOneOut--->rx
+ *              ^            |
+ *              |            |
+ *              +------------+
+ */
+class MiddleCycleApp : public holoscan::Application {
+ public:
+  using Application::Application;
+
+  void compose() override {
+    using namespace holoscan;
+    auto one_out =
+        make_operator<OneOutOp>("OneOut", make_condition<CountCondition>("count-condition", 1));
+    auto two_in_one_out =
+        make_operator<TwoInOneOutOp>("TwoInOneOut", make_condition<CountCondition>(10));
+    auto one_in_one_out = make_operator<OneInOneOutOp>("OneInOneOut");
+    auto rx = make_operator<OneInOp>("PingRx");
+
+    add_flow(one_out, two_in_one_out, {{"out", "in0"}});
+    add_flow(two_in_one_out, one_in_one_out, {{"out", "in"}});
+    add_flow(one_in_one_out, two_in_one_out, {{"out", "in1"}});
+    add_flow(one_in_one_out, rx);
+  }
+};
+
+/* CycleWithLeaf
+ *
+ * root--->middle--->leaf
+ *   ^       |
+ *   |       |
+ *   +-------+
+ */
+class CycleWithLeaf : public holoscan::Application {
+ public:
+  using Application::Application;
+
+  void compose() override {
+    using namespace holoscan;
+    auto root = make_operator<OneOptionalInOneOutOp>(
+        "root", make_condition<CountCondition>("count-condition", 5));
+    auto middle = make_operator<OneInOneOutOp>("middle");
+    auto rx = make_operator<OneInOp>("leaf");
+
+    add_flow(root, middle);
+    add_flow(middle, root);
+    add_flow(middle, rx);
+  }
+};
+
+/* TwoRootsOneCycle
+ *
+ *             +--------+
+ *             |        ^
+ *             |        |
+ * root2--->middle2--->last
+ *             ^
+ *             |
+ *             |
+ * root1--->middle1
+ */
+class TwoRootsOneCycle : public holoscan::Application {
+ public:
+  using Application::Application;
+
+  void compose() override {
+    using namespace holoscan;
+
+    auto root1 =
+        make_operator<OneOutOp>("root1", make_condition<CountCondition>("count-condition", 5));
+
+    auto root2 =
+        make_operator<OneOutOp>("root2", make_condition<CountCondition>("count-condition", 5));
+
+    auto middle1 = make_operator<OneInOneOutOp>("middle1");
+    auto middle2 = make_operator<ThreeInOneOutOp>("middle2");
+
+    auto last = make_operator<OneInOneOutOp>("last");
+
+    add_flow(root2, middle2, {{"out", "in0"}});
+    add_flow(middle2, last);
+    add_flow(last, middle2, {{"out", "in2"}});
+
+    add_flow(root1, middle1);
+    add_flow(middle1, middle2, {{"out", "in1"}});
+  }
+};
+
+/* TwoCyclesVariant1
+ *
+ * start--->middle--->end
+ *   ^       |  ^      |
+ *   |       |  |      |
+ *   +-------+  +------+
+ *
+ * middle node is triggered first in this case as start and end have mandatory input ports
+ */
+class TwoCyclesVariant1 : public holoscan::Application {
+ public:
+  using Application::Application;
+
+  void compose() override {
+    using namespace holoscan;
+
+    auto start = make_operator<OneInOneOutOp>("start", make_condition<CountCondition>(5));
+
+    auto middle = make_operator<TwoInTwoOutOp>("middle");
+
+    auto end = make_operator<OneInOneOutOp>("end");
+
+    // First cycle
+    add_flow(start, middle, {{"out", "in0"}});
+    add_flow(middle, end, {{"out0", "in"}});
+
+    // Second cycle
+    add_flow(middle, start, {{"out1", "in"}});
+    add_flow(end, middle, {{"out", "in1"}});
+  }
+};
+
+/* TwoCyclesVariant2
+ *
+ * same layout as TwoCyclesVariant1
+ *
+ * start--->middle--->end
+ *   ^       |  ^      |
+ *   |       |  |      |
+ *   +-------+  +------+
+ *
+ * The difference is that start is triggered first in this case as start and end have optional
+ * input ports.
+ */
+class TwoCyclesVariant2 : public holoscan::Application {
+ public:
+  using Application::Application;
+
+  void compose() override {
+    using namespace holoscan;
+
+    auto start = make_operator<OneOptionalInOneOutOp>("start", make_condition<CountCondition>(5));
+
+    auto middle = make_operator<TwoInTwoOutOp>("middle");
+
+    auto end = make_operator<OneOptionalInOneOutOp>("end");
+
+    // First cycle
+    add_flow(start, middle, {{"out", "in0"}});
+    add_flow(middle, end, {{"out0", "in"}});
+
+    // Second cycle
+    add_flow(middle, start, {{"out1", "in"}});
+    add_flow(end, middle, {{"out", "in1"}});
+  }
+};
+
+class ThreePathsOneRootOneLeaf : public holoscan::Application {
+ public:
+  using Application::Application;
+
+  void compose() override {
+    using namespace holoscan;
+
+    auto root = make_operator<OneOutOp>("root", make_condition<CountCondition>(5));
+
+    auto middle1 = make_operator<OneInOneOutOp>("middle1");
+    auto middle2 = make_operator<OneInOneOutOp>("middle2");
+    auto middle3 = make_operator<OneInOneOutOp>("middle3");
+
+    auto middle4 = make_operator<ThreeInOneOutOp>("middle4");
+
+    auto leaf = make_operator<OneInOp>("leaf");
+
+    add_flow(root, middle1);
+    add_flow(root, middle2);
+    add_flow(root, middle3);
+
+    add_flow(middle1, middle4, {{"out", "in0"}});
+    add_flow(middle2, middle4, {{"out", "in1"}});
+    add_flow(middle3, middle4, {{"out", "in2"}});
+
+    add_flow(middle4, leaf);
+  }
+};
+
+}  // namespace
+}  // namespace holoscan
+
+#endif /* TEST_FLOW_TRACKING_SAMPLE_TEST_GRAPHS_HPP */
diff --git a/tests/holoinfer/inference/test_core.hpp b/tests/holoinfer/inference/test_core.hpp
index 9e6533c6..46218f33 100644
--- a/tests/holoinfer/inference/test_core.hpp
+++ b/tests/holoinfer/inference/test_core.hpp
@@ -174,7 +174,11 @@ class HoloInferTests {
       {31, "TRT backend, Parallel inference on multi-GPU with Output on host"},
       {32, "TRT backend, multi rank test (rank 5)"},
       {33, "TRT backend, multi rank test (rank 9)"},
-      {34, "Torch backend, Basic inference"}};
+      {34, "Torch backend, Basic inference"},
+      {35, "ONNX backend, Basic parallel end-to-end cuda inference"},
+      {36, "ONNX backend, Input on host, cuda inference"},
+      {37, "ONNX backend, Output on host, cuda inference"},
+      {38, "ONNX backend, Input and output on device, CPU inference"}};
 };
 
 #endif /* HOLOINFER_INFERENCE_TESTS_HPP */
diff --git a/tests/holoinfer/inference/test_inference.cpp b/tests/holoinfer/inference/test_inference.cpp
index 21fbaa79..1f03eda5 100644
--- a/tests/holoinfer/inference/test_inference.cpp
+++ b/tests/holoinfer/inference/test_inference.cpp
@@ -17,6 +17,8 @@
 
 #include "test_core.hpp"
 
+#include <yaml-cpp/yaml.h>
+
 #include <memory>
 #include <string>
 #include <utility>
@@ -149,11 +151,45 @@ void HoloInferTests::inference_tests() {
   inference_specs_->output_per_model_.at("m2_infer")->host_buffer_->resize(dbs);
 
   if (use_onnxruntime) {
+    backend = "onnxrt";
+
+    // Test: ONNX backend, Basic parallel end-to-end cuda inference
+    input_on_cuda = true;
+    output_on_cuda = true;
+    infer_on_cpu = false;
+    status = prepare_for_inference();
+    status = do_inference();
+    holoinfer_assert(status,
+                     test_module,
+                     35,
+                     test_identifier_infer.at(35),
+                     HoloInfer::holoinfer_code::H_SUCCESS);
+
+    // Test: ONNX backend, Input on host, cuda inference
+    input_on_cuda = false;
+    status = prepare_for_inference();
+    status = do_inference();
+    holoinfer_assert(status,
+                     test_module,
+                     36,
+                     test_identifier_infer.at(36),
+                     HoloInfer::holoinfer_code::H_SUCCESS);
+
+    // Test: ONNX backend, Output on host, cuda inference
+    input_on_cuda = true;
+    output_on_cuda = false;
+    status = prepare_for_inference();
+    status = do_inference();
+    holoinfer_assert(status,
+                     test_module,
+                     37,
+                     test_identifier_infer.at(37),
+                     HoloInfer::holoinfer_code::H_SUCCESS);
+
     // Test: ONNX backend, Basic parallel inference on CPU
     input_on_cuda = false;
     output_on_cuda = false;
     infer_on_cpu = true;
-    backend = "onnxrt";
     status = prepare_for_inference();
     status = do_inference();
     holoinfer_assert(status,
@@ -162,7 +198,21 @@ void HoloInferTests::inference_tests() {
                      test_identifier_infer.at(17),
                      HoloInfer::holoinfer_code::H_SUCCESS);
 
+    // Test: ONNX backend, Input and output on device, CPU inference
+    input_on_cuda = true;
+    output_on_cuda = true;
+    infer_on_cpu = true;
+    status = prepare_for_inference();
+    status = do_inference();
+    holoinfer_assert(status,
+                     test_module,
+                     38,
+                     test_identifier_infer.at(38),
+                     HoloInfer::holoinfer_code::H_SUCCESS);
+
     // Test: ONNX backend, Basic sequential inference on CPU
+    input_on_cuda = false;
+    output_on_cuda = false;
     parallel_inference = false;
     status = prepare_for_inference();
     status = do_inference();
@@ -172,197 +222,172 @@ void HoloInferTests::inference_tests() {
                      test_identifier_infer.at(18),
                      HoloInfer::holoinfer_code::H_SUCCESS);
 
-    if (is_x86_64) {
-      // Test: ONNX backend, Basic sequential inference on GPU
-      infer_on_cpu = false;
-      status = prepare_for_inference();
-      status = do_inference();
-      holoinfer_assert(status,
-                       test_module,
-                       19,
-                       test_identifier_infer.at(19),
-                       HoloInfer::holoinfer_code::H_SUCCESS);
-
-      // Test: ONNX backend, Basic parallel inference on GPU
-      parallel_inference = true;
-      status = prepare_for_inference();
-      status = do_inference();
-      holoinfer_assert(status,
-                       test_module,
-                       20,
-                       test_identifier_infer.at(20),
-                       HoloInfer::holoinfer_code::H_SUCCESS);
-
-      // Test: ONNX backend, Empty host input
-      dbs = inference_specs_->data_per_tensor_.at("m1_pre_proc")->host_buffer_->size();
-      inference_specs_->data_per_tensor_.at("m1_pre_proc")->host_buffer_->resize(0);
-      status = do_inference();
-      holoinfer_assert(status,
-                       test_module,
-                       21,
-                       test_identifier_infer.at(21),
-                       HoloInfer::holoinfer_code::H_ERROR);
-      inference_specs_->data_per_tensor_.at("m1_pre_proc")->host_buffer_->resize(dbs);
-
-      // Test: ONNX backend, Empty host output
-      dbs = inference_specs_->output_per_model_.at("m2_infer")->host_buffer_->size();
-      inference_specs_->output_per_model_.at("m2_infer")->host_buffer_->resize(0);
-      status = do_inference();
-      holoinfer_assert(status,
-                       test_module,
-                       22,
-                       test_identifier_infer.at(22),
-                       HoloInfer::holoinfer_code::H_ERROR);
-      inference_specs_->output_per_model_.at("m2_infer")->host_buffer_->resize(dbs);
-    } else {
-      // Test: ONNX backend on ARM, Basic sequential inference on GPU
-      infer_on_cpu = false;
-      status = prepare_for_inference();
-      holoinfer_assert(status,
-                       test_module,
-                       23,
-                       test_identifier_infer.at(23),
-                       HoloInfer::holoinfer_code::H_ERROR);
-    }
+    // Test: ONNX backend, Basic sequential inference on GPU
+    infer_on_cpu = false;
+    status = prepare_for_inference();
+    status = do_inference();
+    holoinfer_assert(status,
+                     test_module,
+                     19,
+                     test_identifier_infer.at(19),
+                     HoloInfer::holoinfer_code::H_SUCCESS);
 
-    // Multi-GPU tests
-    cudaDeviceProp device_prop;
-    auto dev_id = 1;
-    backend = "trt";
-    auto cstatus = cudaGetDeviceProperties(&device_prop, dev_id);
-    device_map.at("model_1") = "1";
-
-    if (cstatus == cudaSuccess) {
-      // Test: TRT backend, Basic sequential inference on multi-GPU
-      input_on_cuda = true;
-      output_on_cuda = true;
-      parallel_inference = false;
-      status = prepare_for_inference();
-      status = do_inference();
-      holoinfer_assert(status,
-                       test_module,
-                       27,
-                       test_identifier_infer.at(27),
-                       HoloInfer::holoinfer_code::H_SUCCESS);
-
-      // Test: TRT backend, Basic parallel inference on multi-GPU
-      parallel_inference = true;
-      status = prepare_for_inference();
-      status = do_inference();
-      holoinfer_assert(status,
-                       test_module,
-                       28,
-                       test_identifier_infer.at(28),
-                       HoloInfer::holoinfer_code::H_SUCCESS);
-
-      // Test: TRT backend, Parallel inference on multi-GPU with I/O on host
-      input_on_cuda = false;
-      output_on_cuda = false;
-      status = prepare_for_inference();
-      status = do_inference();
-      holoinfer_assert(status,
-                       test_module,
-                       29,
-                       test_identifier_infer.at(29),
-                       HoloInfer::holoinfer_code::H_SUCCESS);
-
-      // Test: TRT backend, Parallel inference on multi-GPU with Input on host
-      input_on_cuda = false;
-      output_on_cuda = true;
-      status = prepare_for_inference();
-      status = do_inference();
-      holoinfer_assert(status,
-                       test_module,
-                       30,
-                       test_identifier_infer.at(30),
-                       HoloInfer::holoinfer_code::H_SUCCESS);
-
-      // Test: TRT backend, Parallel inference on multi-GPU with Output on host
-      input_on_cuda = true;
-      output_on_cuda = false;
-      status = prepare_for_inference();
-      status = do_inference();
-      holoinfer_assert(status,
-                       test_module,
-                       31,
-                       test_identifier_infer.at(31),
-                       HoloInfer::holoinfer_code::H_SUCCESS);
-    } else {
-      // make sure the last error is reset, else Torch tests below will fail since they check for
-      // the last error without doing a CUDA call before.
-      cudaGetLastError();
-    }
-    device_map.at("model_1") = "0";
-
-    if (is_x86_64) {
-      device_map.at("model_2") = "1";
-      if (cstatus == cudaSuccess) {
-        // Test: ONNX backend, Basic sequential inference on multi-GPU
-        status = prepare_for_inference();
-        status = do_inference();
-        holoinfer_assert(status,
-                         test_module,
-                         24,
-                         test_identifier_infer.at(24),
-                         HoloInfer::holoinfer_code::H_SUCCESS);
-
-        // Test: ONNX backend, Basic parallel inference on multi-GPU
-        parallel_inference = true;
-        status = prepare_for_inference();
-        status = do_inference();
-        holoinfer_assert(status,
-                         test_module,
-                         26,
-                         test_identifier_infer.at(26),
-                         HoloInfer::holoinfer_code::H_SUCCESS);
-      } else {
-        // Test: ONNX backend, Inference single GPU with multi-GPU settings
-        status = prepare_for_inference();
-        holoinfer_assert(status,
-                         test_module,
-                         25,
-                         test_identifier_infer.at(25),
-                         HoloInfer::holoinfer_code::H_ERROR);
-      }
-      device_map.at("model_2") = "0";
-    }
+    // Test: ONNX backend, Basic parallel inference on GPU
+    parallel_inference = true;
+    status = prepare_for_inference();
+    status = do_inference();
+    holoinfer_assert(status,
+                     test_module,
+                     20,
+                     test_identifier_infer.at(20),
+                     HoloInfer::holoinfer_code::H_SUCCESS);
 
-    // test multi-rank
+    // Test: ONNX backend, Empty host input
+    dbs = inference_specs_->data_per_tensor_.at("m1_pre_proc")->host_buffer_->size();
+    inference_specs_->data_per_tensor_.at("m1_pre_proc")->host_buffer_->resize(0);
+    status = do_inference();
+    holoinfer_assert(
+        status, test_module, 21, test_identifier_infer.at(21), HoloInfer::holoinfer_code::H_ERROR);
+    inference_specs_->data_per_tensor_.at("m1_pre_proc")->host_buffer_->resize(dbs);
 
-    auto original_path = model_path_map["model_1"];
-    auto original_dim = in_tensor_dimensions["m1_pre_proc"];
+    // Test: ONNX backend, Empty host output
+    dbs = inference_specs_->output_per_model_.at("m2_infer")->host_buffer_->size();
+    inference_specs_->output_per_model_.at("m2_infer")->host_buffer_->resize(0);
+    status = do_inference();
+    holoinfer_assert(
+        status, test_module, 22, test_identifier_infer.at(22), HoloInfer::holoinfer_code::H_ERROR);
+    inference_specs_->output_per_model_.at("m2_infer")->host_buffer_->resize(dbs);
+  }
 
-    model_path_map["model_1"] = model_folder + "identity_model_5r.onnx";
-    model_path_map["model_2"] = model_folder + "identity_model_5r.onnx";
+  // Multi-GPU tests
+  cudaDeviceProp device_prop;
+  auto dev_id = 1;
+  backend = "trt";
+  auto cstatus = cudaGetDeviceProperties(&device_prop, dev_id);
+  device_map.at("model_1") = "1";
 
-    in_tensor_dimensions["m1_pre_proc"] = {1, 1, 1, 1, 1};
-    in_tensor_dimensions["m2_pre_proc"] = {1, 1, 1, 1, 1};
+  if (cstatus == cudaSuccess) {
+    // Test: TRT backend, Basic sequential inference on multi-GPU
+    input_on_cuda = true;
+    output_on_cuda = true;
+    parallel_inference = false;
+    status = prepare_for_inference();
+    status = do_inference();
+    holoinfer_assert(status,
+                     test_module,
+                     27,
+                     test_identifier_infer.at(27),
+                     HoloInfer::holoinfer_code::H_SUCCESS);
 
+    // Test: TRT backend, Basic parallel inference on multi-GPU
+    parallel_inference = true;
     status = prepare_for_inference();
     status = do_inference();
     holoinfer_assert(status,
                      test_module,
-                     32,
-                     test_identifier_infer.at(32),
+                     28,
+                     test_identifier_infer.at(28),
                      HoloInfer::holoinfer_code::H_SUCCESS);
 
-    model_path_map["model_1"] = model_folder + "identity_model_9r.onnx";
-    model_path_map["model_2"] = model_folder + "identity_model_9r.onnx";
+    // Test: TRT backend, Parallel inference on multi-GPU with I/O on host
+    input_on_cuda = false;
+    output_on_cuda = false;
+    status = prepare_for_inference();
+    status = do_inference();
+    holoinfer_assert(status,
+                     test_module,
+                     29,
+                     test_identifier_infer.at(29),
+                     HoloInfer::holoinfer_code::H_SUCCESS);
 
-    in_tensor_dimensions["m1_pre_proc"] = {1, 1, 1, 1, 1, 1, 1, 1, 1};
-    in_tensor_dimensions["m2_pre_proc"] = {1, 1, 1, 1, 1, 1, 1, 1, 1};
+    // Test: TRT backend, Parallel inference on multi-GPU with Input on host
+    input_on_cuda = false;
+    output_on_cuda = true;
+    status = prepare_for_inference();
+    status = do_inference();
+    holoinfer_assert(status,
+                     test_module,
+                     30,
+                     test_identifier_infer.at(30),
+                     HoloInfer::holoinfer_code::H_SUCCESS);
 
+    // Test: TRT backend, Parallel inference on multi-GPU with Output on host
+    input_on_cuda = true;
+    output_on_cuda = false;
     status = prepare_for_inference();
     status = do_inference();
-    holoinfer_assert(
-        status, test_module, 33, test_identifier_infer.at(33), HoloInfer::holoinfer_code::H_ERROR);
+    holoinfer_assert(status,
+                     test_module,
+                     31,
+                     test_identifier_infer.at(31),
+                     HoloInfer::holoinfer_code::H_SUCCESS);
+  } else {
+    // make sure the last error is reset, else Torch tests below will fail since they check for
+    // the last error without doing a CUDA call before.
+    cudaGetLastError();
+  }
+  device_map.at("model_1") = "0";
 
-    model_path_map["model_1"] = original_path;
-    model_path_map["model_2"] = original_path;
+  device_map.at("model_2") = "1";
+  if (cstatus == cudaSuccess) {
+    // Test: ONNX backend, Basic sequential inference on multi-GPU
+    status = prepare_for_inference();
+    status = do_inference();
+    holoinfer_assert(status,
+                     test_module,
+                     24,
+                     test_identifier_infer.at(24),
+                     HoloInfer::holoinfer_code::H_SUCCESS);
 
-    in_tensor_dimensions["m1_pre_proc"] = original_dim;
-    in_tensor_dimensions["m2_pre_proc"] = original_dim;
+    // Test: ONNX backend, Basic parallel inference on multi-GPU
+    parallel_inference = true;
+    status = prepare_for_inference();
+    status = do_inference();
+    holoinfer_assert(status,
+                     test_module,
+                     26,
+                     test_identifier_infer.at(26),
+                     HoloInfer::holoinfer_code::H_SUCCESS);
+  } else {
+    // Test: ONNX backend, Inference single GPU with multi-GPU settings
+    status = prepare_for_inference();
+    holoinfer_assert(
+        status, test_module, 25, test_identifier_infer.at(25), HoloInfer::holoinfer_code::H_ERROR);
   }
+  device_map.at("model_2") = "0";
+
+  // test multi-rank
+
+  auto original_path = model_path_map["model_1"];
+  auto original_dim = in_tensor_dimensions["m1_pre_proc"];
+
+  model_path_map["model_1"] = model_folder + "identity_model_5r.onnx";
+  model_path_map["model_2"] = model_folder + "identity_model_5r.onnx";
+
+  in_tensor_dimensions["m1_pre_proc"] = {1, 1, 1, 1, 1};
+  in_tensor_dimensions["m2_pre_proc"] = {1, 1, 1, 1, 1};
+
+  status = prepare_for_inference();
+  status = do_inference();
+  holoinfer_assert(
+      status, test_module, 32, test_identifier_infer.at(32), HoloInfer::holoinfer_code::H_SUCCESS);
+
+  model_path_map["model_1"] = model_folder + "identity_model_9r.onnx";
+  model_path_map["model_2"] = model_folder + "identity_model_9r.onnx";
+
+  in_tensor_dimensions["m1_pre_proc"] = {1, 1, 1, 1, 1, 1, 1, 1, 1};
+  in_tensor_dimensions["m2_pre_proc"] = {1, 1, 1, 1, 1, 1, 1, 1, 1};
+
+  status = prepare_for_inference();
+  status = do_inference();
+  holoinfer_assert(
+      status, test_module, 33, test_identifier_infer.at(33), HoloInfer::holoinfer_code::H_ERROR);
+
+  model_path_map["model_1"] = original_path;
+  model_path_map["model_2"] = original_path;
+
+  in_tensor_dimensions["m1_pre_proc"] = original_dim;
+  in_tensor_dimensions["m2_pre_proc"] = original_dim;
 
   if (use_torch) {
     // Test: torch backend, Basic inference
@@ -409,10 +434,16 @@ void HoloInferTests::inference_tests() {
   // cleaning engine files
   for (const auto& file : std::filesystem::directory_iterator(model_folder)) {
     if (file.is_regular_file()) {
-      auto filename = file.path().filename().string();
+      const auto filename = file.path().filename().string();
       if (filename.find(".engine.") != std::string::npos) {
         std::filesystem::remove(file.path());
-        HOLOSCAN_LOG_INFO("Cleaning up engine file {}: ", filename);
+        HOLOSCAN_LOG_INFO("Cleaning up engine file: {}", filename);
+      }
+    } else if (file.is_directory()) {
+      const auto directory = file.path().string();
+      if (directory.find("_onnx_cache_") != std::string::npos) {
+        std::filesystem::remove_all(file.path());
+        HOLOSCAN_LOG_INFO("Cleaning up onnx cache directory: {}", directory);
       }
     }
   }
diff --git a/tests/holoinfer/inference/test_parameters.cpp b/tests/holoinfer/inference/test_parameters.cpp
index 3cf19fdd..47276cee 100644
--- a/tests/holoinfer/inference/test_parameters.cpp
+++ b/tests/holoinfer/inference/test_parameters.cpp
@@ -17,6 +17,8 @@
 
 #include "test_core.hpp"
 
+#include <yaml-cpp/yaml.h>
+
 #include <string>
 #include <utility>
 
@@ -263,8 +265,11 @@ void HoloInferTests::parameter_setup_test() {
     backend = "onnxrt";
     status = create_specifications();
     clear_specs();
-    holoinfer_assert(
-        status, test_module, 20, test_identifier_params.at(20), HoloInfer::holoinfer_code::H_ERROR);
+    holoinfer_assert(status,
+                     test_module,
+                     20,
+                     test_identifier_params.at(20),
+                     HoloInfer::holoinfer_code::H_SUCCESS);
 
     // Test: ONNX backend, incorrect model file format
     backend = "onnxrt";
@@ -285,7 +290,6 @@ void HoloInferTests::parameter_setup_test() {
     holoinfer_assert(
         status, test_module, 22, test_identifier_params.at(22), HoloInfer::holoinfer_code::H_ERROR);
 
-    if (!is_x86_64) { infer_on_cpu = true; }
     // Test: ONNX backend, Default
     is_engine_path = false;
     input_on_cuda = false;
diff --git a/tests/operators/operator_classes.cpp b/tests/operators/operator_classes.cpp
index f378bc85..8e936109 100644
--- a/tests/operators/operator_classes.cpp
+++ b/tests/operators/operator_classes.cpp
@@ -27,8 +27,10 @@
 
 #include "../config.hpp"
 #include "../utils.hpp"
+#include "common/assert.hpp"
 #include "holoscan/core/arg.hpp"
 #include "holoscan/core/component_spec.hpp"
+#include "holoscan/core/conditions/gxf/boolean.hpp"
 #include "holoscan/core/config.hpp"
 #include "holoscan/core/executor.hpp"
 #include "holoscan/core/fragment.hpp"
@@ -39,7 +41,6 @@
 #include "holoscan/core/resources/gxf/block_memory_pool.hpp"
 #include "holoscan/core/resources/gxf/cuda_stream_pool.hpp"
 #include "holoscan/core/resources/gxf/unbounded_allocator.hpp"
-#include "common/assert.hpp"
 
 #ifdef HOLOSCAN_BUILD_AJA
 #include "holoscan/operators/aja_source/aja_source.hpp"
@@ -243,6 +244,94 @@ TEST_F(OperatorClassesWithGXFContext, TestHolovizOp) {
                                                              << log_output << "\n===========\n";
 }
 
+TEST_F(OperatorClassesWithGXFContext, TestHolovizOpWindowCloseNone) {
+  const std::string name{"holoviz"};
+  ArgList kwargs = F.from_config("holoviz");
+
+  testing::internal::CaptureStderr();
+
+  auto op = F.make_operator<ops::HolovizOp>(name, kwargs);
+  op->initialize();
+  std::string log_output = testing::internal::GetCapturedStderr();
+  // error will be logged due to initialize before Fragment was composed
+  EXPECT_TRUE(log_output.find("error") != std::string::npos) << "=== LOG ===\n"
+                                                             << log_output << "\n===========\n";
+  // no warnings if no window close argument is provided
+  EXPECT_TRUE(log_output.find("window_close_condition") == std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("window_close_scheduling_term") == std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+}
+
+TEST_F(OperatorClassesWithGXFContext, TestHolovizOpWindowCloseCurrentName) {
+  const std::string name{"holoviz"};
+  ArgList kwargs = F.from_config("holoviz");
+  auto close_condition = F.make_condition<BooleanCondition>("window_close");
+  kwargs.add(Arg("window_close_condition", close_condition));
+
+  testing::internal::CaptureStderr();
+
+  auto op = F.make_operator<ops::HolovizOp>(name, kwargs);
+  op->initialize();
+  std::string log_output = testing::internal::GetCapturedStderr();
+  // error will be logged due to initialize before Fragment was composed
+  EXPECT_TRUE(log_output.find("error") != std::string::npos) << "=== LOG ===\n"
+                                                             << log_output << "\n===========\n";
+  // no warnings about window close arguments if the new name is provided
+  EXPECT_TRUE(log_output.find("window_close_condition") == std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("window_close_scheduling_term") == std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+}
+
+TEST_F(OperatorClassesWithGXFContext, TestHolovizOpWindowCloseDeprecatedName) {
+  const std::string name{"holoviz"};
+  ArgList kwargs = F.from_config("holoviz");
+  auto close_condition = F.make_condition<BooleanCondition>("window_close");
+  kwargs.add(Arg("window_close_scheduling_term", close_condition));
+
+  testing::internal::CaptureStderr();
+
+  auto op = F.make_operator<ops::HolovizOp>(name, kwargs);
+  op->initialize();
+  std::string log_output = testing::internal::GetCapturedStderr();
+  // error will be logged due to initialize before Fragment was composed
+  EXPECT_TRUE(log_output.find("error") != std::string::npos) << "=== LOG ===\n"
+                                                             << log_output << "\n===========\n";
+  // warnings about
+  EXPECT_TRUE(log_output.find("\"window_close_scheduling_term\" was provided, but this parameter "
+                              "name is deprecated") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+}
+
+TEST_F(OperatorClassesWithGXFContext, TestHolovizOpWindowCloseBothNames) {
+  const std::string name{"holoviz"};
+  ArgList kwargs = F.from_config("holoviz");
+  auto close_condition = F.make_condition<BooleanCondition>("window_close");
+  kwargs.add(Arg("window_close_scheduling_term", close_condition));
+  kwargs.add(Arg("window_close_condition", close_condition));
+
+  testing::internal::CaptureStderr();
+
+  auto op = F.make_operator<ops::HolovizOp>(name, kwargs);
+  op->initialize();
+  std::string log_output = testing::internal::GetCapturedStderr();
+  // error will be logged due to initialize before Fragment was composed
+  EXPECT_TRUE(log_output.find("error") != std::string::npos) << "=== LOG ===\n"
+                                                             << log_output << "\n===========\n";
+  // warnings about
+  EXPECT_TRUE(
+      log_output.find("discarding the duplicate \"window_close_scheduling_term\" argument") !=
+      std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+}
+
 TEST_F(OperatorClassesWithGXFContext, TestHolovizOpInputSpec) {
   ops::HolovizOp::InputSpec tensor{"video", ops::HolovizOp::InputType::COLOR};
 
diff --git a/tests/system/multi_receiver_operator_ping_app.cpp b/tests/system/multi_receiver_operator_ping_app.cpp
index ae0cc60e..04d9f82b 100644
--- a/tests/system/multi_receiver_operator_ping_app.cpp
+++ b/tests/system/multi_receiver_operator_ping_app.cpp
@@ -641,7 +641,7 @@ TEST(MultiReceiverOperatorPingApp, TestPingMultiPortSinglePrecedingCount) {
 TEST(MultiReceiverOperatorPingApp, TestPingMultiPortSingleSizeFive) {
   // make sure that debug messages are logged
   EnvVarWrapper wrapper({
-      std::make_pair("HOLOSCAN_LOG_LEVEL", "DEBUG"),
+      std::make_pair("HOLOSCAN_LOG_LEVEL", "TRACE"),
       std::make_pair("HOLOSCAN_EXECUTOR_LOG_LEVEL", "INFO"),  // quiet multi_thread_scheduler.cpp
   });
 
@@ -662,10 +662,8 @@ TEST(MultiReceiverOperatorPingApp, TestPingMultiPortSingleSizeFive) {
       << "=== LOG ===\n"
       << log_output << "\n===========\n";
 
-  EXPECT_TRUE(
-      log_output.find(
-          "ReceiveError on input port 'receivers': No message received from the input port") !=
-      std::string::npos)
+  EXPECT_TRUE(log_output.find("Failure receiving message from input port 'receivers': No message "
+                              "received from the input port") != std::string::npos)
       << "=== LOG ===\n"
       << log_output << "\n===========\n";