From 3d673252095c11a99147a6e72b4c11150ebd0882 Mon Sep 17 00:00:00 2001
From: HailoRT-Automation
 <98901220+HailoRT-Automation@users.noreply.github.com>
Date: Sun, 29 Sep 2024 11:29:10 +0300
Subject: [PATCH] v4.19.0 (#21)

---
 CMakeLists.txt                                |   2 +-
 common/include/context_switch_defs.h          |   6 +
 common/include/control_protocol.h             |   2 +
 common/include/firmware_status.h              |   1 +
 hailort/.gitignore                            |   2 +
 hailort/CMakeLists.txt                        |  12 +-
 hailort/LICENSE-3RD-PARTY.md                  |   2 +-
 hailort/cmake/common_compiler_options.cmake   |   2 +-
 hailort/cmake/execute_cmake.cmake             |   2 +-
 hailort/cmake/external/pybind11.cmake         |   2 +-
 hailort/cmake/external/spdlog.cmake           |   2 +-
 hailort/common/CMakeLists.txt                 |   9 +-
 hailort/common/buffer_pool.cpp                |  58 ++
 hailort/common/buffer_pool.hpp                |  55 ++
 hailort/common/device_measurements.hpp        |   6 +
 hailort/common/env_vars.hpp                   |  33 +
 .../src/os => common}/file_descriptor.hpp     |   0
 hailort/common/file_utils.cpp                 |  12 +-
 hailort/common/file_utils.hpp                 |   6 +-
 hailort/common/internal_env_vars.hpp          | 106 +++
 hailort/common/logger_macros.hpp              |   2 +
 .../src/os => common}/mmap_buffer.hpp         |   6 +-
 .../os/posix/file_descriptor.cpp              |   2 +-
 .../src => common}/os/posix/mmap_buffer.cpp   |   3 +-
 .../common/os/posix/shared_memory_buffer.cpp  | 112 +++
 hailort/common/os/posix/socket.cpp            |  68 +-
 .../os/windows/file_descriptor.cpp            |   3 +-
 hailort/common/os/windows/mmap_buffer.cpp     |  48 ++
 .../common/os/windows/named_mutex_guard.cpp   |  45 ++
 .../common/os/windows/named_mutex_guard.hpp   |  39 ++
 .../os/windows/shared_memory_buffer.cpp       |  67 ++
 hailort/common/os/windows/socket.cpp          |  71 +-
 .../os/windows/virtual_alloc_guard.cpp        |   2 +-
 .../os/windows/virtual_alloc_guard.hpp        |   0
 hailort/common/shared_memory_buffer.hpp       |  81 +++
 hailort/common/socket.hpp                     |  13 +-
 hailort/common/string_utils.hpp               |   9 +
 hailort/common/thread_pool.hpp                |  90 +++
 .../utils => common}/thread_safe_queue.hpp    |   0
 hailort/common/utils.hpp                      |   5 +-
 hailort/drivers/common/hailo_ioctl_common.h   |  28 +-
 hailort/hailort_server/CMakeLists.txt         |  26 +-
 hailort/hailort_server/hailort_server.cpp     | 106 ++-
 hailort/hailort_service/CMakeLists.txt        |   4 +-
 hailort/hailort_service/cng_buffer_pool.cpp   |  78 +--
 hailort/hailort_service/cng_buffer_pool.hpp   |  41 +-
 .../hailort_service/hailort_rpc_service.cpp   | 186 ++++-
 .../hailort_service/hailort_rpc_service.hpp   |   7 +
 hailort/hailort_service/hailort_service       |   5 +-
 .../service_resource_manager.hpp              |   7 +
 .../hailort_service/unix/hailort_service.cpp  |  14 +-
 .../vdevice_callbacks_queue.hpp               |  10 +-
 .../windows/hailort_service.cpp               |  14 +-
 .../windows/hailort_service_env_vars.bat      |   7 +-
 hailort/hailortcli/CMakeLists.txt             |  13 +-
 hailort/hailortcli/benchmark_command.cpp      |  37 +-
 hailort/hailortcli/benchmark_command.hpp      |   4 +-
 hailort/hailortcli/board_config_command.cpp   |   3 -
 hailort/hailortcli/command.hpp                |   4 +-
 .../download_action_list_command.cpp          |  14 +-
 .../download_action_list_command.hpp          |   4 +
 hailort/hailortcli/fw_control_command.cpp     |  28 +-
 hailort/hailortcli/fw_control_command.hpp     |  13 +
 hailort/hailortcli/hailortcli.cpp             |   4 +-
 hailort/hailortcli/inference_result.hpp       |   3 +-
 .../measure_nnc_performance_command.cpp       |   9 +-
 hailort/hailortcli/mon_command.cpp            |   1 +
 hailort/hailortcli/run2/live_stats.cpp        |   2 +-
 .../run2/measurement_live_track.cpp           |   2 +-
 .../run2/measurement_live_track.hpp           |   4 +-
 hailort/hailortcli/run2/network_runner.cpp    | 144 ++--
 hailort/hailortcli/run2/run2_command.cpp      |  21 +-
 hailort/hailortcli/run2/run2_command.hpp      |   1 -
 hailort/hailortcli/run_command.cpp            |  51 +-
 hailort/hailortcli/run_command.hpp            |   4 +-
 hailort/hrpc/CMakeLists.txt                   |   2 +-
 hailort/hrpc/client.cpp                       |  50 +-
 hailort/hrpc/client.hpp                       |  11 +-
 .../hrpc/os/pcie/raw_connection_internal.cpp  |  75 +-
 .../hrpc/os/pcie/raw_connection_internal.hpp  |   9 +-
 .../hrpc/os/posix/raw_connection_internal.cpp | 182 ++++-
 .../hrpc/os/posix/raw_connection_internal.hpp |  18 +-
 .../os/windows/raw_connection_internal.cpp    |   6 +-
 .../os/windows/raw_connection_internal.hpp    |   6 +-
 hailort/hrpc/raw_connection.cpp               |  27 +-
 hailort/hrpc/raw_connection.hpp               |  15 +-
 hailort/hrpc/server.cpp                       |  21 +-
 hailort/hrpc/server.hpp                       |   8 +-
 hailort/hrpc_protocol/CMakeLists.txt          |   2 +-
 hailort/hrpc_protocol/rpc.proto               | 104 ++-
 hailort/hrpc_protocol/serializer.cpp          | 375 +++++++++-
 hailort/hrpc_protocol/serializer.hpp          |  74 +-
 hailort/libhailort/CMakeLists.txt             |   8 +-
 hailort/libhailort/bindings/CMakeLists.txt    |  13 +-
 .../bindings/gstreamer/CMakeLists.txt         |  85 ++-
 .../gstreamer/find_libs_for_gstreamer.cmake   |   9 +
 .../find_libs_for_gstreamer_linux.cmake       |   8 +
 .../find_libs_for_gstreamer_windows.cmake     |  87 +++
 .../bindings/gstreamer/gst-hailo/common.hpp   |  24 +-
 .../gst-hailo/gsthailo_allocator.cpp          |  26 +-
 .../gst-hailo/gsthailo_allocator.hpp          |   2 +-
 .../gst-hailo/gsthailo_dmabuf_allocator.cpp   |  33 +-
 .../gst-hailo/gsthailo_dmabuf_allocator.hpp   |   4 +-
 .../gst-hailo/gsthailodevicestats.cpp         |   4 +-
 .../gstreamer/gst-hailo/gsthailonet.cpp       | 654 ++++++++++--------
 .../gstreamer/gst-hailo/gsthailonet.hpp       |  25 +-
 .../gst-hailo/hailo_events/hailo_events.hpp   |   6 +-
 .../bindings/gstreamer/gst-hailo/hailo_gst.h  |  34 +
 .../metadata/hailo_buffer_flag_meta.hpp       |   5 +-
 .../gst-hailo/metadata/tensor_meta.hpp        |  17 +-
 .../os/linux/dma_buf_allocator_wrapper.cpp    |  43 ++
 .../os/linux/dma_buf_allocator_wrapper.hpp    |  37 +
 .../os/windows/dma_buf_allocator_wrapper.cpp  |  37 +
 .../os/windows/dma_buf_allocator_wrapper.hpp  |  36 +
 .../gstreamer/gst-hailo/sync_gsthailonet.cpp  |  16 +-
 .../gstreamer/gst-hailo/sync_gsthailonet.hpp  |   2 +-
 .../libhailort/bindings/python/CMakeLists.txt |   3 -
 .../platform/hailo_platform/__init__.py       |   2 +-
 .../hailo_platform/pyhailort/pyhailort.py     |  80 ++-
 ...b => HRT_0_Async_Inference_Tutorial.ipynb} |  20 +-
 ..._Inference_Multiple_Models_Tutorial.ipynb} |  37 +-
 ...2_Infer_Pipeline_Inference_Tutorial.ipynb} |  26 +-
 ..._Inference_Multiple_Models_Tutorial.ipynb} |  24 +-
 ...=> HRT_4_Power_measurement_Tutorial.ipynb} |  19 +-
 .../bindings/python/platform/setup.py         | 125 ++--
 .../bindings/python/src/CMakeLists.txt        |  89 +--
 .../bindings/python/src/__init__.py           |   0
 .../bindings/python/src/infer_model_api.cpp   |  14 +-
 .../bindings/python/src/infer_model_api.hpp   |   4 +-
 .../bindings/python/src/network_group_api.cpp |   3 +
 .../bindings/python/src/network_group_api.hpp |  22 +
 .../bindings/python/src/pyhailort.cpp         |   1 -
 .../bindings/python/src/vdevice_api.cpp       |   8 +-
 .../bindings/python/src/vdevice_api.hpp       |   4 +
 .../cmake/toolchains/linux.armv7l.cmake       |  13 +-
 .../cmake/toolchains/linux.armv7lhf.cmake     |   5 +-
 .../cmake/toolchains/qnx.aarch64.cmake        |   4 +-
 .../cmake/toolchains/qnx.x86_64.cmake         |   4 +-
 hailort/libhailort/doc/CMakeLists.txt         |   2 +-
 hailort/libhailort/examples/CMakeLists.txt    |   2 +-
 hailort/libhailort/examples/c/CMakeLists.txt  |   2 +-
 .../data_quantization_example/CMakeLists.txt  |   4 +-
 .../c/infer_pipeline_example/CMakeLists.txt   |   4 +-
 .../infer_pipeline_example.c                  |  56 +-
 .../c/multi_device_example/CMakeLists.txt     |   4 +-
 .../CMakeLists.txt                            |   4 +-
 .../CMakeLists.txt                            |   4 +-
 .../power_measurement_example/CMakeLists.txt  |   4 +-
 .../CMakeLists.txt                            |   4 +-
 .../c/raw_streams_example/CMakeLists.txt      |   4 +-
 .../CMakeLists.txt                            |   4 +-
 .../CMakeLists.txt                            |   4 +-
 .../c/vstreams_example/CMakeLists.txt         |   4 +-
 .../c/vstreams_example/vstreams_example.c     |   3 +-
 .../libhailort/examples/cpp/CMakeLists.txt    |   2 +-
 .../CMakeLists.txt                            |   4 +-
 .../async_infer_advanced_example.cpp          | 234 +++----
 .../async_infer_basic_example/CMakeLists.txt  |   4 +-
 .../async_infer_basic_example.cpp             | 117 ++--
 .../cpp/infer_pipeline_example/CMakeLists.txt |   4 +-
 .../infer_pipeline_example.cpp                |  47 +-
 .../cpp/multi_device_example/CMakeLists.txt   |   4 +-
 .../CMakeLists.txt                            |   4 +-
 .../cpp/multi_process_example/CMakeLists.txt  |   4 +-
 .../CMakeLists.txt                            |   4 +-
 .../power_measurement_example/CMakeLists.txt  |   4 +-
 .../CMakeLists.txt                            |   4 +-
 .../CMakeLists.txt                            |   4 +-
 .../cpp/raw_streams_example/CMakeLists.txt    |   4 +-
 .../CMakeLists.txt                            |   4 +-
 .../CMakeLists.txt                            |   4 +-
 .../cpp/vstreams_example/CMakeLists.txt       |   4 +-
 hailort/libhailort/hef.proto                  |  25 +-
 hailort/libhailort/include/hailo/buffer.hpp   |   6 +
 hailort/libhailort/include/hailo/device.hpp   |  22 +-
 hailort/libhailort/include/hailo/expected.hpp |  76 ++
 hailort/libhailort/include/hailo/hailort.h    |  15 +-
 .../include/hailo/hailort_common.hpp          |   4 -
 .../include/hailo/hailort_dma-heap.h          |   6 +-
 .../libhailort/include/hailo/infer_model.hpp  |  71 +-
 .../include/hailo/network_group.hpp           |   4 +
 hailort/libhailort/include/hailo/vdevice.hpp  |  14 +-
 hailort/libhailort/src/CMakeLists.txt         |  10 +-
 hailort/libhailort/src/core_op/CMakeLists.txt |   5 +-
 hailort/libhailort/src/core_op/core_op.cpp    |  16 +-
 hailort/libhailort/src/core_op/core_op.hpp    |   5 +-
 .../action_list_buffer_builder.cpp            | 105 +++
 .../action_list_buffer_builder.hpp            |  38 +-
 .../control_action_list_buffer_builder.cpp    |  70 --
 .../control_action_list_buffer_builder.hpp    |  55 --
 .../ddr_action_list_buffer_builder.cpp        |  98 ---
 .../ddr_action_list_buffer_builder.hpp        |  47 --
 .../core_op/resource_manager/cache_buffer.cpp |  49 +-
 .../core_op/resource_manager/cache_buffer.hpp |  11 +-
 .../resource_manager/cache_manager.cpp        | 376 ++++++----
 .../resource_manager/cache_manager.hpp        |  99 ++-
 .../resource_manager/config_buffer.cpp        |   3 +-
 .../resource_manager/intermediate_buffer.cpp  |  24 +-
 .../resource_manager/intermediate_buffer.hpp  |  14 +-
 .../internal_buffer_planner.cpp               |   9 +-
 .../resource_manager/resource_manager.cpp     | 132 ++--
 .../resource_manager/resource_manager.hpp     |  11 +-
 .../resource_manager_builder.cpp              |  34 +-
 .../src/device_common/CMakeLists.txt          |   2 +-
 .../libhailort/src/device_common/control.cpp  |  62 +-
 .../libhailort/src/device_common/control.hpp  |   4 +-
 .../src/device_common/d2h_event_queue.hpp     |   2 +-
 .../src/device_common/d2h_events_parser.cpp   |   2 +-
 .../libhailort/src/device_common/device.cpp   |  95 ++-
 .../src/device_common/device_internal.cpp     |  12 +-
 hailort/libhailort/src/eth/CMakeLists.txt     |   2 +-
 hailort/libhailort/src/eth/eth_device.cpp     |   2 +-
 .../libhailort/src/eth/hcp_config_core_op.cpp |  21 +
 .../libhailort/src/eth/hcp_config_core_op.hpp |   3 +
 hailort/libhailort/src/hailort.cpp            |   5 +
 hailort/libhailort/src/hailort_defaults.cpp   |  35 +-
 hailort/libhailort/src/hef/CMakeLists.txt     |   2 +-
 .../src/hef/context_switch_actions.cpp        |  58 +-
 .../src/hef/context_switch_actions.hpp        |  35 +
 hailort/libhailort/src/hef/hef.cpp            |  84 ++-
 hailort/libhailort/src/hef/hef_internal.hpp   |   4 +-
 hailort/libhailort/src/hef/layer_info.hpp     |  27 +
 hailort/libhailort/src/mipi/CMakeLists.txt    |   2 +-
 .../libhailort/src/net_flow/CMakeLists.txt    |   2 +-
 .../src/net_flow/ops/nms_post_process.cpp     |   2 -
 .../src/net_flow/ops/softmax_post_process.cpp |  20 +-
 .../src/net_flow/ops/ssd_post_process.cpp     |   2 +-
 .../src/net_flow/ops/ssd_post_process.hpp     |   2 +-
 .../ops/yolov5_bbox_only_post_process.cpp     |   4 +-
 .../src/net_flow/ops/yolov5_post_process.cpp  |   2 +-
 .../net_flow/ops/yolov5_seg_post_process.cpp  |  22 +-
 .../ops/yolov8_bbox_only_post_process.cpp     |   4 +-
 .../src/net_flow/ops/yolov8_post_process.cpp  |   2 +-
 .../src/net_flow/ops/yolox_post_process.cpp   |   2 +-
 .../net_flow/pipeline/async_infer_runner.cpp  |  15 +-
 .../net_flow/pipeline/async_infer_runner.hpp  |   5 +-
 .../pipeline/async_pipeline_builder.cpp       |  24 +-
 .../configured_infer_model_hrpc_client.cpp    | 264 +++----
 .../configured_infer_model_hrpc_client.hpp    |  81 +--
 .../src/net_flow/pipeline/infer_model.cpp     | 265 ++++---
 .../pipeline/infer_model_hrpc_client.cpp      |  30 +-
 .../pipeline/infer_model_hrpc_client.hpp      |  12 +-
 .../pipeline/infer_model_internal.hpp         |  33 +-
 .../net_flow/pipeline/multi_io_elements.cpp   |   7 +-
 .../src/net_flow/pipeline/pipeline.cpp        |   2 +-
 .../src/net_flow/pipeline/pipeline.hpp        |   2 +-
 .../src/net_flow/pipeline/queue_elements.cpp  |  49 +-
 .../src/net_flow/pipeline/queue_elements.hpp  |   5 +
 .../src/net_flow/pipeline/vstream.cpp         |  47 +-
 .../src/net_flow/pipeline/vstream_builder.cpp |   5 -
 .../net_flow/pipeline/vstream_internal.hpp    |  11 +-
 .../src/network_group/CMakeLists.txt          |   2 +-
 .../src/network_group/network_group.cpp       |  30 +-
 .../network_group/network_group_internal.hpp  | 142 +---
 hailort/libhailort/src/os/CMakeLists.txt      |   2 +-
 .../libhailort/src/os/posix/CMakeLists.txt    |   4 +-
 .../src/os/posix/linux/CMakeLists.txt         |   2 +-
 .../libhailort/src/os/posix/linux/event.cpp   |  31 +
 .../src/os/posix/qnx/CMakeLists.txt           |   2 +-
 .../libhailort/src/os/windows/CMakeLists.txt  |   5 +-
 .../libhailort/src/os/windows/mmap_buffer.cpp |  35 -
 .../src/rpc_callbacks/CMakeLists.txt          |   7 +
 .../rpc_callbacks_dispatcher.cpp              | 120 ++++
 .../rpc_callbacks_dispatcher.hpp              |  81 +++
 hailort/libhailort/src/service/CMakeLists.txt |   3 +-
 .../src/service/buffer_pool_per_stream.cpp    | 199 ++++++
 .../src/service/buffer_pool_per_stream.hpp    |  80 +++
 .../src/service/hailort_rpc_client.cpp        |  69 +-
 .../src/service/hailort_rpc_client.hpp        |  43 +-
 .../src/service/network_group_client.cpp      | 212 ++++--
 .../src/service/network_group_client.hpp      | 157 +++++
 .../src/stream_common/CMakeLists.txt          |   3 +-
 .../src/stream_common/async_stream_base.hpp   |   2 +-
 .../src/stream_common/nms_stream.cpp          |  15 +-
 .../src/stream_common/nms_stream.hpp          |   1 +
 .../src/stream_common/stream_buffer_pool.hpp  |   2 +-
 .../src/stream_common/stream_internal.cpp     |  12 +
 .../src/stream_common/stream_internal.hpp     |   6 +-
 .../libhailort/src/transform/CMakeLists.txt   |   2 +-
 hailort/libhailort/src/transform/eigen.hpp    |  25 +
 .../libhailort/src/transform/transform.cpp    |  74 +-
 hailort/libhailort/src/utils/CMakeLists.txt   |   6 +-
 .../libhailort/src/utils/buffer_storage.cpp   |  73 +-
 .../libhailort/src/utils/buffer_storage.hpp   |  29 +
 .../libhailort/src/utils/hailort_common.cpp   |  26 -
 .../libhailort/src/utils/hailort_logger.cpp   |  29 +-
 .../libhailort/src/utils/hailort_logger.hpp   |  11 +-
 .../src/utils/measurement_utils.cpp           |   1 +
 .../src/utils/profiler/CMakeLists.txt         |   2 +-
 .../libhailort/src/utils/profiler/handler.hpp |   5 +-
 .../src/utils/profiler/monitor_handler.cpp    |   9 +-
 .../src/utils/profiler/monitor_handler.hpp    |   4 +-
 .../src/utils/profiler/profiler_utils.hpp     |   2 +-
 .../profiler/scheduler_profiler_handler.cpp   | 225 ++----
 .../profiler/scheduler_profiler_handler.hpp   |  23 +-
 .../libhailort/src/utils/profiler/tracer.cpp  |  26 +-
 .../libhailort/src/utils/profiler/tracer.hpp  |  22 +-
 .../src/utils/profiler/tracer_macros.hpp      |  10 -
 .../soc_utils/partial_cluster_reader.cpp      |   6 +-
 .../libhailort/src/utils/thread_safe_map.hpp  |   1 +
 hailort/libhailort/src/vdevice/CMakeLists.txt |   2 +-
 .../src/vdevice/callback_reorder_queue.hpp    |   2 +-
 .../scheduler/infer_request_accumulator.hpp   |   2 +-
 .../scheduler/scheduled_core_op_state.cpp     |   4 +-
 .../scheduler/scheduled_core_op_state.hpp     |   2 +-
 .../src/vdevice/scheduler/scheduler.cpp       |  94 ++-
 .../src/vdevice/scheduler/scheduler.hpp       |  12 +-
 .../src/vdevice/scheduler/scheduler_base.hpp  |   1 +
 .../vdevice/scheduler/scheduler_oracle.cpp    |   3 +-
 hailort/libhailort/src/vdevice/vdevice.cpp    |  70 +-
 .../src/vdevice/vdevice_core_op.cpp           |  51 +-
 .../src/vdevice/vdevice_core_op.hpp           |  10 +-
 .../src/vdevice/vdevice_hrpc_client.cpp       |  93 ++-
 .../src/vdevice/vdevice_hrpc_client.hpp       |  15 +-
 .../src/vdevice/vdevice_internal.hpp          |   9 +-
 hailort/libhailort/src/vdma/CMakeLists.txt    |   4 +-
 .../src/vdma/channel/boundary_channel.cpp     | 143 +++-
 .../src/vdma/channel/boundary_channel.hpp     |  18 +-
 .../channel}/transfer_common.cpp              |  41 +-
 .../channel}/transfer_common.hpp              |   4 +-
 .../src/vdma/circular_stream_buffer_pool.cpp  |  17 +-
 .../src/vdma/circular_stream_buffer_pool.hpp  |   8 +-
 .../src/vdma/driver/hailort_driver.cpp        |  82 ++-
 .../src/vdma/driver/hailort_driver.hpp        |  32 +-
 .../src/vdma/driver/os/driver_os_specific.hpp |   2 +-
 .../driver/os/windows/driver_os_specific.cpp  |   3 +-
 .../src/vdma/integrated/integrated_device.cpp |  39 +-
 .../src/vdma/integrated/integrated_device.hpp |   7 +-
 .../src/vdma/memory/buffer_requirements.cpp   |  53 +-
 .../src/vdma/memory/buffer_requirements.hpp   |   3 +
 .../src/vdma/memory/continuous_buffer.hpp     |   1 -
 .../src/vdma/memory/continuous_edge_layer.hpp |   1 -
 .../src/vdma/memory/dma_able_buffer.cpp       |   3 +-
 .../src/vdma/memory/dma_able_buffer.hpp       |   1 -
 .../src/vdma/memory/mapped_buffer.cpp         |  22 +-
 .../src/vdma/memory/mapped_buffer.hpp         |   6 +-
 .../libhailort/src/vdma/pcie/pcie_device.cpp  |  48 +-
 .../libhailort/src/vdma/pcie/pcie_device.hpp  |   6 +-
 .../src/vdma/pcie/pcie_device_hrpc_client.cpp |  92 +++
 .../src/vdma/pcie/pcie_device_hrpc_client.hpp |  77 +++
 hailort/libhailort/src/vdma/pcie_session.cpp  |  16 +-
 hailort/libhailort/src/vdma/pcie_session.hpp  |  15 +-
 .../src/vdma/vdma_config_core_op.cpp          |  80 ++-
 .../src/vdma/vdma_config_core_op.hpp          |   7 +-
 .../src/vdma/vdma_config_manager.cpp          |   3 +-
 hailort/libhailort/src/vdma/vdma_device.cpp   |  19 -
 hailort/libhailort/src/vdma/vdma_device.hpp   |   2 -
 hailort/libhailort/src/vdma/vdma_stream.cpp   |  40 +-
 hailort/libhailort/src/vdma/vdma_stream.hpp   |   2 +
 hailort/prepare_externals.cmake               |   2 +-
 hailort/prepare_externals/CMakeLists.txt      |   2 +-
 hailort/rpc/hailort_rpc.proto                 |  18 +-
 hailort/rpc/rpc_definitions.hpp               |  13 +-
 hailort/scripts/download_firmware_eth.cmd     |   2 +-
 hailort/scripts/download_firmware_eth.sh      |   2 +-
 hailort/scripts/download_hefs.cmd             |   2 +-
 hailort/scripts/download_hefs.sh              |   2 +-
 357 files changed, 7489 insertions(+), 3690 deletions(-)
 create mode 100644 hailort/common/buffer_pool.cpp
 create mode 100644 hailort/common/buffer_pool.hpp
 create mode 100644 hailort/common/env_vars.hpp
 rename hailort/{libhailort/src/os => common}/file_descriptor.hpp (100%)
 create mode 100644 hailort/common/internal_env_vars.hpp
 rename hailort/{libhailort/src/os => common}/mmap_buffer.hpp (96%)
 rename hailort/{libhailort/src => common}/os/posix/file_descriptor.cpp (96%)
 rename hailort/{libhailort/src => common}/os/posix/mmap_buffer.cpp (97%)
 create mode 100644 hailort/common/os/posix/shared_memory_buffer.cpp
 rename hailort/{libhailort/src => common}/os/windows/file_descriptor.cpp (94%)
 create mode 100644 hailort/common/os/windows/mmap_buffer.cpp
 create mode 100644 hailort/common/os/windows/named_mutex_guard.cpp
 create mode 100644 hailort/common/os/windows/named_mutex_guard.hpp
 create mode 100644 hailort/common/os/windows/shared_memory_buffer.cpp
 rename hailort/{libhailort/src => common}/os/windows/virtual_alloc_guard.cpp (96%)
 rename hailort/{libhailort/src => common}/os/windows/virtual_alloc_guard.hpp (100%)
 create mode 100644 hailort/common/shared_memory_buffer.hpp
 create mode 100644 hailort/common/thread_pool.hpp
 rename hailort/{libhailort/src/utils => common}/thread_safe_queue.hpp (100%)
 create mode 100644 hailort/libhailort/bindings/gstreamer/find_libs_for_gstreamer.cmake
 create mode 100644 hailort/libhailort/bindings/gstreamer/find_libs_for_gstreamer_linux.cmake
 create mode 100644 hailort/libhailort/bindings/gstreamer/find_libs_for_gstreamer_windows.cmake
 create mode 100644 hailort/libhailort/bindings/gstreamer/gst-hailo/hailo_gst.h
 create mode 100644 hailort/libhailort/bindings/gstreamer/gst-hailo/os/linux/dma_buf_allocator_wrapper.cpp
 create mode 100644 hailort/libhailort/bindings/gstreamer/gst-hailo/os/linux/dma_buf_allocator_wrapper.hpp
 create mode 100644 hailort/libhailort/bindings/gstreamer/gst-hailo/os/windows/dma_buf_allocator_wrapper.cpp
 create mode 100644 hailort/libhailort/bindings/gstreamer/gst-hailo/os/windows/dma_buf_allocator_wrapper.hpp
 delete mode 100644 hailort/libhailort/bindings/python/CMakeLists.txt
 rename hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/{HRT_3_Inference_Single_Model_Tutorial.ipynb => HRT_0_Async_Inference_Tutorial.ipynb} (76%)
 rename hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/{HRT_4_Async_Inference_Multiple_Models_Tutorial.ipynb => HRT_1_Async_Inference_Multiple_Models_Tutorial.ipynb} (77%)
 rename hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/{HRT_0_Inference_Tutorial.ipynb => HRT_2_Infer_Pipeline_Inference_Tutorial.ipynb} (86%)
 rename hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/{HRT_2_Inference_Tutorial_Multi_Process_Service.ipynb => HRT_3_Infer_Pipeline_Inference_Multiple_Models_Tutorial.ipynb} (78%)
 rename hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/{HRT_1_Power_Measurement_Tutorial.ipynb => HRT_4_Power_measurement_Tutorial.ipynb} (82%)
 delete mode 100644 hailort/libhailort/bindings/python/src/__init__.py
 create mode 100644 hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/action_list_buffer_builder.cpp
 delete mode 100644 hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/control_action_list_buffer_builder.cpp
 delete mode 100644 hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/control_action_list_buffer_builder.hpp
 delete mode 100644 hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/ddr_action_list_buffer_builder.cpp
 delete mode 100644 hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/ddr_action_list_buffer_builder.hpp
 delete mode 100644 hailort/libhailort/src/os/windows/mmap_buffer.cpp
 create mode 100644 hailort/libhailort/src/rpc_callbacks/CMakeLists.txt
 create mode 100644 hailort/libhailort/src/rpc_callbacks/rpc_callbacks_dispatcher.cpp
 create mode 100644 hailort/libhailort/src/rpc_callbacks/rpc_callbacks_dispatcher.hpp
 create mode 100644 hailort/libhailort/src/service/buffer_pool_per_stream.cpp
 create mode 100644 hailort/libhailort/src/service/buffer_pool_per_stream.hpp
 create mode 100644 hailort/libhailort/src/service/network_group_client.hpp
 create mode 100644 hailort/libhailort/src/transform/eigen.hpp
 rename hailort/libhailort/src/{stream_common => vdma/channel}/transfer_common.cpp (73%)
 rename hailort/libhailort/src/{stream_common => vdma/channel}/transfer_common.hpp (97%)
 create mode 100644 hailort/libhailort/src/vdma/pcie/pcie_device_hrpc_client.cpp
 create mode 100644 hailort/libhailort/src/vdma/pcie/pcie_device_hrpc_client.hpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 16ebff31..9a08432c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 find_program(CCACHE_PROGRAM ccache)
 find_program(CLACHE_PROGRAM clcache)
diff --git a/common/include/context_switch_defs.h b/common/include/context_switch_defs.h
index 2f671636..61eff6c7 100644
--- a/common/include/context_switch_defs.h
+++ b/common/include/context_switch_defs.h
@@ -125,6 +125,8 @@ typedef enum __attribute__((packed)) {
     CONTEXT_SWITCH_DEFS__ACTION_TYPE_ACTIVATE_CACHE_INPUT,
     CONTEXT_SWITCH_DEFS__ACTION_TYPE_ACTIVATE_CACHE_OUTPUT,
     CONTEXT_SWITCH_DEFS__ACTION_TYPE_WAIT_FOR_CACHE_UPDATED,
+    CONTEXT_SWITCH_DEFS__ACTION_TYPE_SLEEP,
+    CONTEXT_SWITCH_DEFS__ACTION_TYPE_HALT,
 
     /* Must be last */
     CONTEXT_SWITCH_DEFS__ACTION_TYPE_COUNT
@@ -447,6 +449,10 @@ typedef struct {
     uint8_t packed_vdma_channel_id;
 } CONTEXT_SWITCH_DEFS__change_boundary_input_batch_t;
 
+typedef struct {
+    uint32_t sleep_time;
+} CONTEXT_SWITCH_DEFS__sleep_action_data_t;
+
 #pragma pack(pop)
 
 #ifdef __cplusplus
diff --git a/common/include/control_protocol.h b/common/include/control_protocol.h
index 1932eb72..7c2d49b0 100644
--- a/common/include/control_protocol.h
+++ b/common/include/control_protocol.h
@@ -1033,6 +1033,8 @@ typedef struct {
     uint8_t is_action_list_end;
     uint32_t batch_counter_length;
     uint32_t batch_counter;
+    uint32_t idle_time_length;
+    uint32_t idle_time;
     uint32_t action_list_length;
     uint8_t action_list[0];
 } CONTROL_PROTOCOL__download_context_action_list_response_t;
diff --git a/common/include/firmware_status.h b/common/include/firmware_status.h
index 10fa142c..45a814bc 100644
--- a/common/include/firmware_status.h
+++ b/common/include/firmware_status.h
@@ -769,6 +769,7 @@ Updating rules:
    FIRMWARE_STATUS__X(CONTEXT_SWITCH_STATUS_INVALID_EXTERNAL_ACTION_LIST_ADDRESS)\
    FIRMWARE_STATUS__X(CONTEXT_SWITCH_STATUS_INVALID_CACHE_SIZE)\
    FIRMWARE_STATUS__X(CONTEXT_SWITCH_STATUS_INVALID_READ_OFFSET_SIZE)\
+   FIRMWARE_STATUS__X(CONTEXT_SWITCH_STATUS_INVALID_SLEEP_TIME)\
    \
    FIRMWARE_MODULE__X(FIRMWARE_MODULE__D2H_EVENT_MANAGER)\
    FIRMWARE_STATUS__X(HAILO_D2H_EVENT_MANAGER_STATUS_MESSAGE_HIGH_PRIORITY_QUEUE_CREATE_FAILED)\
diff --git a/hailort/.gitignore b/hailort/.gitignore
index 1f1d25b2..8f3372c3 100644
--- a/hailort/.gitignore
+++ b/hailort/.gitignore
@@ -1,3 +1,5 @@
 build/
+build-*x86_64/
+build-*aarch64/
 dist/
 /external/
diff --git a/hailort/CMakeLists.txt b/hailort/CMakeLists.txt
index 5dcaa06c..60fa29be 100644
--- a/hailort/CMakeLists.txt
+++ b/hailort/CMakeLists.txt
@@ -1,14 +1,12 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
-option(HAILO_BUILD_PYBIND "Build Python binding" OFF)
 option(HAILO_BUILD_EMULATOR "Build hailort for emulator" OFF)
 option(HAILO_BUILD_UT "Build Unit Tests" OFF)
-option(HAILO_BUILD_HW_DEBUG_TOOL "Build hw debug tool" OFF)
+option(HAILO_INTERNAL_BUILD "Build internal hailort componments" OFF)
 option(HAILO_BUILD_GSTREAMER "Compile gstreamer plugins" OFF)
 option(HAILO_BUILD_EXAMPLES "Build examples" OFF)
 option(HAILO_OFFLINE_COMPILATION "Don't download external dependencies" OFF)
 option(HAILO_BUILD_SERVICE "Build hailort service" OFF)
-option(HAILO_BUILD_PROFILER "Build hailort profiler" ON)
 option(HAILO_COMPILE_WARNING_AS_ERROR "Add compilation flag for treating compilation warnings as errors" OFF)
 option(HAILO_SUPPORT_PACKAGING "Create HailoRT package (internal)" OFF)
 option(HAILO_BUILD_DOC "Build doc" OFF)
@@ -31,7 +29,7 @@ endif()
 
 # Set firmware version
 add_definitions( -DFIRMWARE_VERSION_MAJOR=4 )
-add_definitions( -DFIRMWARE_VERSION_MINOR=18 )
+add_definitions( -DFIRMWARE_VERSION_MINOR=19 )
 add_definitions( -DFIRMWARE_VERSION_REVISION=0 )
 if(HAILO_BUILD_SERVICE)
     add_definitions( -DHAILO_SUPPORT_MULTI_PROCESS )
@@ -75,8 +73,10 @@ add_subdirectory(hrpc)
 add_subdirectory(hrpc_protocol)
 add_subdirectory(libhailort)
 add_subdirectory(hailortcli)
-if(HAILO_BUILD_HW_DEBUG_TOOL)
+if(HAILO_INTERNAL_BUILD)
     add_subdirectory(tools/hw_debug)
+    add_subdirectory(tools/pcie_tunnel)
+    add_subdirectory(tools/loopback_server)
 endif()
 
 if(HAILO_BUILD_SERVICE)
diff --git a/hailort/LICENSE-3RD-PARTY.md b/hailort/LICENSE-3RD-PARTY.md
index 2d2b6934..ba467447 100644
--- a/hailort/LICENSE-3RD-PARTY.md
+++ b/hailort/LICENSE-3RD-PARTY.md
@@ -4,7 +4,7 @@
 | Catch2                           | Catch2 Authors                    | BSL-1.0                    | 2.13.7         | Cloned entire package                         | https://github.com/catchorg/Catch2                                            |
 | protobuf                         | Google Inc.                       | BSD                        | 21.12          | Cloned entire package                         | https://github.com/protocolbuffers/protobuf                                   |
 | pybind11                         | Wenzel Jakob                      | BSD                        | 2.10.1         | Cloned entire package                         | https://github.com/pybind/pybind11                                            |
-| spdlog                           | Gabi Melman                       | MIT                        | 1.6.1          | Cloned entire package                         | https://github.com/gabime/spdlog                                              |
+| spdlog                           | Gabi Melman                       | MIT                        | 1.14.1         | Cloned entire package                         | https://github.com/gabime/spdlog                                              |
 | folly                            | Facebook, Inc. and its affiliates | Apache License 2.0         | v2020.08.17.00 | Copied only the file `folly/TokenBucket.h`    | https://github.com/facebook/folly                                             |
 | nlohmann_json_cmake_fetchcontent | ArthurSonzogni                    | MIT License                | v3.9.1         | Cloned entire package                         | https://github.com/ArthurSonzogni/nlohmann_json_cmake_fetchcontent            |
 | readerwriterqueue                | Cameron Desrochers                | Simplified BSD             | 1.0.3          | Cloned entire package                         | https://github.com/cameron314/readerwriterqueue                               |
diff --git a/hailort/cmake/common_compiler_options.cmake b/hailort/cmake/common_compiler_options.cmake
index 071eed9d..a3e73fe0 100644
--- a/hailort/cmake/common_compiler_options.cmake
+++ b/hailort/cmake/common_compiler_options.cmake
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 FUNCTION(disable_exceptions target)
     if(WIN32)
diff --git a/hailort/cmake/execute_cmake.cmake b/hailort/cmake/execute_cmake.cmake
index 12ed41cf..0898d2b0 100644
--- a/hailort/cmake/execute_cmake.cmake
+++ b/hailort/cmake/execute_cmake.cmake
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 function(execute_process_in_clean_env)
     cmake_parse_arguments(execute_process_in_clean_env "" "RESULT_VARIABLE" "" ${ARGN})
diff --git a/hailort/cmake/external/pybind11.cmake b/hailort/cmake/external/pybind11.cmake
index 3e6c7297..51f49452 100644
--- a/hailort/cmake/external/pybind11.cmake
+++ b/hailort/cmake/external/pybind11.cmake
@@ -29,4 +29,4 @@ if(NOT pybind11_POPULATED)
         endif()
         add_subdirectory(${pybind11_SOURCE_DIR} ${pybind11_BINARY_DIR} EXCLUDE_FROM_ALL)
     endif()
-endif()
\ No newline at end of file
+endif()
diff --git a/hailort/cmake/external/spdlog.cmake b/hailort/cmake/external/spdlog.cmake
index 1e5502c6..14a0949e 100644
--- a/hailort/cmake/external/spdlog.cmake
+++ b/hailort/cmake/external/spdlog.cmake
@@ -5,7 +5,7 @@ include(FetchContent)
 FetchContent_Declare(
     spdlog
     GIT_REPOSITORY https://github.com/gabime/spdlog
-    GIT_TAG 22a169bc319ac06948e7ee0be6b9b0ac81386604
+    GIT_TAG 27cb4c76708608465c413f6d0e6b8d99a4d84302 # version 1.14.1
     GIT_SHALLOW TRUE
     SOURCE_DIR ${HAILO_EXTERNAL_DIR}/spdlog-src
     SUBBUILD_DIR ${HAILO_EXTERNAL_DIR}/spdlog-subbuild
diff --git a/hailort/common/CMakeLists.txt b/hailort/common/CMakeLists.txt
index dcaeaf6a..c04c6598 100644
--- a/hailort/common/CMakeLists.txt
+++ b/hailort/common/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 if(WIN32)
     set(HAILORT_COMMON_OS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/os/windows")
@@ -15,12 +15,16 @@ set(SRC_FILES
     ${HAILORT_COMMON_OS_DIR}/socket.cpp
     ${HAILORT_COMMON_OS_DIR}/process.cpp
     ${HAILORT_COMMON_OS_DIR}/os_utils.cpp
+    ${HAILORT_COMMON_OS_DIR}/file_descriptor.cpp
+    ${HAILORT_COMMON_OS_DIR}/mmap_buffer.cpp
+    ${HAILORT_COMMON_OS_DIR}/shared_memory_buffer.cpp
 
     ${CMAKE_CURRENT_SOURCE_DIR}/barrier.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/file_utils.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/string_utils.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/event_internal.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/fork_support.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/buffer_pool.cpp
 
     ${CMAKE_CURRENT_SOURCE_DIR}/device_measurements.cpp
 )
@@ -29,6 +33,8 @@ if(WIN32)
     # Windows only modules:
     set(SRC_FILES ${SRC_FILES}
         ${HAILORT_COMMON_OS_DIR}/string_conversion.cpp
+        ${HAILORT_COMMON_OS_DIR}/virtual_alloc_guard.cpp
+        ${HAILORT_COMMON_OS_DIR}/named_mutex_guard.cpp
     )
 elseif(UNIX)
     # Unix only modules
@@ -37,4 +43,5 @@ elseif(UNIX)
     )
 endif()
 
+
 set(HAILORT_COMMON_CPP_SOURCES ${SRC_FILES} PARENT_SCOPE)
\ No newline at end of file
diff --git a/hailort/common/buffer_pool.cpp b/hailort/common/buffer_pool.cpp
new file mode 100644
index 00000000..89a73b27
--- /dev/null
+++ b/hailort/common/buffer_pool.cpp
@@ -0,0 +1,58 @@
+/**
+ * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file buffer_pool.cpp
+ * @brief Buffer pool implementation
+ **/
+
+#include "buffer_pool.hpp"
+#include "hailo/hailort.h"
+
+namespace hailort
+{
+
+BasicBufferPool::BasicBufferPool(size_t buffer_size, std::vector<BufferPtr> &&buffers,
+        SpscQueue<BufferPtr> &&free_buffers_queue, size_t buffers_count) :
+    m_buffer_size(buffer_size),
+    m_buffers_count(buffers_count),
+    m_buffers(std::move(buffers)),
+    m_free_buffers_queue(std::move(free_buffers_queue))
+{}
+
+Expected<BufferPtr> BasicBufferPool::acquire_buffer()
+{
+    TRY_WITH_ACCEPTABLE_STATUS(HAILO_SHUTDOWN_EVENT_SIGNALED, auto buffer,
+        m_free_buffers_queue.dequeue(DEFAULT_TRANSFER_TIMEOUT));
+    return buffer;
+}
+
+size_t BasicBufferPool::current_size()
+{
+    return m_free_buffers_queue.size_approx();
+}
+
+hailo_status BasicBufferPool::return_to_pool(BufferPtr buffer)
+{
+    CHECK(buffer->size() == m_buffer_size, HAILO_INTERNAL_FAILURE,
+        "Buffer size is not the same as expected for pool! ({} != {})", buffer->size(), m_buffer_size);
+
+    std::unique_lock<std::mutex> lock(m_mutex);
+    auto status = m_free_buffers_queue.enqueue(buffer);
+    CHECK_SUCCESS(status);
+
+    return HAILO_SUCCESS;
+}
+
+size_t BasicBufferPool::buffers_count()
+{
+    return m_buffers_count;
+}
+
+size_t BasicBufferPool::buffer_size()
+{
+    return m_buffer_size;
+}
+
+} /* namespace hailort */
diff --git a/hailort/common/buffer_pool.hpp b/hailort/common/buffer_pool.hpp
new file mode 100644
index 00000000..dadc3eb0
--- /dev/null
+++ b/hailort/common/buffer_pool.hpp
@@ -0,0 +1,55 @@
+/**
+ * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file buffer_pool.hpp
+ * @brief Buffer pool
+ **/
+
+#ifndef _HAILO_BUFFER_POOL_HPP_
+#define _HAILO_BUFFER_POOL_HPP_
+
+#include "hailo/hailort.h"
+#include "hailo/hailort_common.hpp"
+#include "hailo/buffer.hpp"
+#include "hailo/vdevice.hpp"
+#include "hailo/dma_mapped_buffer.hpp"
+#include "common/thread_safe_queue.hpp"
+
+#include <mutex>
+
+namespace hailort
+{
+
+// TODO: HRT-12690 - Make other buffer pools to use this as base class
+class BasicBufferPool
+{
+public:
+    BasicBufferPool(size_t buffer_size, std::vector<BufferPtr> &&buffers,
+        SpscQueue<BufferPtr> &&m_free_buffers_queue, size_t buffers_count);
+
+    BasicBufferPool(BasicBufferPool &&) = delete;
+    BasicBufferPool(const BasicBufferPool &) = delete;
+    BasicBufferPool &operator=(BasicBufferPool &&) = delete;
+    BasicBufferPool &operator=(const BasicBufferPool &) = delete;
+    virtual ~BasicBufferPool() = default;
+
+    Expected<BufferPtr> acquire_buffer();
+    size_t current_size();
+    hailo_status return_to_pool(BufferPtr buffer);
+    size_t buffers_count();
+    size_t buffer_size();
+
+private:
+    size_t m_buffer_size;
+    size_t m_buffers_count;
+    std::vector<BufferPtr> m_buffers;
+    SpscQueue<BufferPtr> m_free_buffers_queue;
+    std::mutex m_mutex;
+};
+using BasicBufferPoolPtr = std::shared_ptr<BasicBufferPool>;
+
+} /* namespace hailort */
+
+#endif /* _HAILO_BUFFER_POOL_HPP_ */
diff --git a/hailort/common/device_measurements.hpp b/hailort/common/device_measurements.hpp
index a3c266c7..fbf28128 100644
--- a/hailort/common/device_measurements.hpp
+++ b/hailort/common/device_measurements.hpp
@@ -20,6 +20,12 @@
 #include <atomic>
 
 
+enum class ShouldMeasurePower {
+    AUTO_DETECT, // auto detect if should measure power, based on device.get_capabilities()
+    NO,
+    YES
+};
+
 class BaseMeasurement
 {
 public:
diff --git a/hailort/common/env_vars.hpp b/hailort/common/env_vars.hpp
new file mode 100644
index 00000000..0961ace8
--- /dev/null
+++ b/hailort/common/env_vars.hpp
@@ -0,0 +1,33 @@
+/**
+ * Copyright (c) 2020-2024 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file env_vars.hpp
+ * @brief: defines a set of environment variables used in the HailoRT
+ * **/
+
+#ifndef HAILO_ENV_VARS_HPP_
+#define HAILO_ENV_VARS_HPP_
+
+
+namespace hailort
+{
+
+#define HAILORT_LOGGER_PATH_ENV_VAR ("HAILORT_LOGGER_PATH")
+
+#define HAILORT_CONSOLE_LOGGER_LEVEL_ENV_VAR ("HAILORT_CONSOLE_LOGGER_LEVEL")
+
+#define SCHEDULER_MON_ENV_VAR ("HAILO_MONITOR")
+#define SCHEDULER_MON_ENV_VAR_VALUE ("1")
+
+#define TRACE_ENV_VAR ("HAILO_TRACE")
+#define TRACE_ENV_VAR_VALUE ("scheduler")
+#define TRACE_ENV_VAR_TIME_IN_SECONDS_BOUNDED_DUMP ("HAILO_TRACE_TIME_IN_SECONDS_BOUNDED_DUMP")
+#define TRACE_ENV_VAR_SIZE_IN_KB_BOUNDED_DUMP ("HAILO_TRACE_SIZE_IN_KB_BOUNDED_DUMP")
+
+#define PROFILER_FILE_ENV_VAR ("HAILO_TRACE_PATH")
+
+} /* namespace hailort */
+
+#endif /* HAILO_ENV_VARS_HPP_ */
\ No newline at end of file
diff --git a/hailort/libhailort/src/os/file_descriptor.hpp b/hailort/common/file_descriptor.hpp
similarity index 100%
rename from hailort/libhailort/src/os/file_descriptor.hpp
rename to hailort/common/file_descriptor.hpp
diff --git a/hailort/common/file_utils.cpp b/hailort/common/file_utils.cpp
index 1b95b32b..e1d5e119 100644
--- a/hailort/common/file_utils.cpp
+++ b/hailort/common/file_utils.cpp
@@ -72,7 +72,7 @@ hailo_status FileReader::read(uint8_t *buffer, size_t n)
     return m_fstream->good() ? HAILO_SUCCESS : HAILO_FILE_OPERATION_FAILURE;
 }
 
-hailo_status FileReader::read_from_offset(size_t offset, MemoryView &dst, size_t size)
+hailo_status FileReader::read_from_offset(uint64_t offset, MemoryView dst, size_t size)
 {
     assert(nullptr != m_fstream);
 
@@ -93,10 +93,12 @@ hailo_status FileReader::open()
 {
     if (nullptr == m_fstream) { // The first call to open creates the ifstream object
         m_fstream = std::make_shared<std::ifstream>(m_file_path, std::ios::in | std::ios::binary);
-        return m_fstream->good() ? HAILO_SUCCESS : HAILO_OPEN_FILE_FAILURE;
+    } else {
+        m_fstream->open(m_file_path, std::ios::in | std::ios::binary);
     }
-    m_fstream->open(m_file_path, std::ios::in | std::ios::binary);
-    return m_fstream->good() ? HAILO_SUCCESS : HAILO_OPEN_FILE_FAILURE;
+
+    CHECK(m_fstream->good(), HAILO_OPEN_FILE_FAILURE, "Failed opening file, path: {}", m_file_path);
+    return HAILO_SUCCESS;
 }
 
 bool FileReader::is_open() const
@@ -173,7 +175,7 @@ hailo_status BufferReader::read(uint8_t *buffer, size_t n)
     return HAILO_SUCCESS;
 }
 
-hailo_status BufferReader::read_from_offset(size_t offset, MemoryView &dst, size_t size)
+hailo_status BufferReader::read_from_offset(uint64_t offset, MemoryView dst, size_t size)
 {
     memcpy(dst.data(), m_memview.data() + offset, size);
     return HAILO_SUCCESS;
diff --git a/hailort/common/file_utils.hpp b/hailort/common/file_utils.hpp
index fbcbe31b..430d6aa1 100644
--- a/hailort/common/file_utils.hpp
+++ b/hailort/common/file_utils.hpp
@@ -35,7 +35,7 @@ class SeekableBytesReader
 public:
     virtual ~SeekableBytesReader() = default;
     virtual hailo_status read(uint8_t *buffer, size_t n) = 0;
-    virtual hailo_status read_from_offset(size_t offset, MemoryView &dst, size_t n) = 0;
+    virtual hailo_status read_from_offset(uint64_t offset, MemoryView dst, size_t n) = 0;
     virtual hailo_status open() = 0;
     virtual bool is_open() const = 0;
     virtual hailo_status seek(size_t position) = 0;
@@ -54,7 +54,7 @@ class FileReader : public SeekableBytesReader
     FileReader(const std::string &file_path);
 
     virtual hailo_status read(uint8_t *buffer, size_t n);
-    virtual hailo_status read_from_offset(size_t offset, MemoryView &dst, size_t n);
+    virtual hailo_status read_from_offset(uint64_t offset, MemoryView dst, size_t n);
     virtual hailo_status open();
     virtual bool is_open() const;
     virtual hailo_status seek(size_t position);
@@ -78,7 +78,7 @@ class BufferReader : public SeekableBytesReader
     BufferReader(const MemoryView &memview);
 
     virtual hailo_status read(uint8_t *buffer, size_t n);
-    virtual hailo_status read_from_offset(size_t offset, MemoryView &dst, size_t n);
+    virtual hailo_status read_from_offset(uint64_t offset, MemoryView dst, size_t n);
     virtual hailo_status open();
     virtual bool is_open() const;
     virtual hailo_status seek(size_t position);
diff --git a/hailort/common/internal_env_vars.hpp b/hailort/common/internal_env_vars.hpp
new file mode 100644
index 00000000..2f2a954a
--- /dev/null
+++ b/hailort/common/internal_env_vars.hpp
@@ -0,0 +1,106 @@
+/**
+ * Copyright (c) 2020-2024 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file internal_env_Vars.hpp
+ * @brief: defines a set of internal environment variables used for development
+ * **/
+
+#ifndef HAILO_INTERNAL_ENV_VARS_HPP_
+#define HAILO_INTERNAL_ENV_VARS_HPP_
+
+
+namespace hailort
+{
+
+/* Service, hrpc-server, comunication */
+
+/* Changes the default address for grpc communication. used for the service-over-ip feature */
+#define HAILORT_SERVICE_ADDRESS_ENV_VAR ("HAILORT_SERVICE_ADDRESS")
+
+/* Indicates to the HailoRT gRPC Service whether to use shared memory for the tesnors data.
+    Note: Cannot be used for service-over-ip */
+#define HAILO_SERVICE_SHARED_MEMORY_ENV_VAR ("HAILO_SERVICE_SHARED_MEMORY_OFF")
+#define HAILO_SERVICE_SHARED_MEMORY_OFF "1"
+
+/* Defines a costum pcie port for raw-connection */
+#define HAILO_CONNECTION_PCIE_PORT_ENV_VAR ("HAILO_CONNECTION_PCIE_PORT")
+
+/* Forces the client to use socket-based communication on a specific address. if not set, socket communicaiton wont be used. */
+#define HAILO_SOCKET_COM_ADDR_CLIENT_ENV_VAR ("HAILO_SOCKET_COM_ADDR_CLIENT")
+
+/* Forces the hrpc-server to use socket-based communication on a specific address. if not set, socket communicaiton wont be used. */
+#define HAILO_SOCKET_COM_ADDR_SERVER_ENV_VAR ("HAILO_SOCKET_COM_ADDR_SERVER")
+
+/* HAILO_SOCKET_COM_ADDR_CLIENT_ENV_VAR and HAILO_SOCKET_COM_ADDR_SERVER_ENV_VAR can be set to either ip:port ("X.X.X.X:P"),
+    or to HAILO_SOCKET_COM_ADDR_UNIX_SOCKET which forces working with unix-socket*/
+#define HAILO_SOCKET_COM_ADDR_UNIX_SOCKET ("localhost")
+
+
+/* General */
+
+/* Defines whether the offset of the kv cache will be updated automatically or not.
+    can be set to either HAILORT_AUTO_UPDATE_CACHE_OFFSET_ENV_VAR_DEFAULT or
+     HAILORT_AUTO_UPDATE_CACHE_OFFSET_ENV_VAR_DISABLED, or to a numeric value defining the offset update value in bytes`*/
+#define HAILORT_AUTO_UPDATE_CACHE_OFFSET_ENV_VAR ("HAILORT_AUTO_UPDATE_CACHE_OFFSET")
+#define HAILORT_AUTO_UPDATE_CACHE_OFFSET_ENV_VAR_DEFAULT ("default")
+#define HAILORT_AUTO_UPDATE_CACHE_OFFSET_ENV_VAR_DISABLED ("disabled")
+
+/* Used for the internal CLI mode `measure-nnc-performance` */
+#define HAILO_CONFIGURE_FOR_HW_INFER_ENV_VAR ("HAILO_CONFIGURE_FOR_HW_INFER")
+
+/* Disable context switch intermediate buffer reuse (naive plan) */
+#define HAILO_FORCE_NAIVE_PER_BUFFER_TYPE_ALOCATION_ENV_VAR ("HAILO_FORCE_NAIVE_PER_BUFFER_TYPE_ALOCATION")
+
+/* forces the minimum FD used events to be above `HIGH_FD_OFFSET`.
+    useful for systems with limitations on the FD count and values */
+#define HAILO_USE_HIGH_FD_ENV_VAR ("HAILO_USE_HIGH_FD")
+
+/* Force hailo15m partial cluster layout bitmap (which clusters are activated) */
+#define FORCE_LAYOUT_INTERNAL_ENV_VAR ("FORCE_LAYOUT_INTERNAL")
+
+
+/* Logger */
+
+/* Forces flush of the logger to file on every trace, instead of the default (warnings and above) */
+#define HAILORT_LOGGER_FLUSH_EVERY_PRINT_ENV_VAR ("HAILORT_LOGGER_FLUSH_EVERY_PRINT")
+
+/* Force QNX Driver logs to be flushed to specific file - or if left undefined - to stderr */
+#define HAILO_QNX_DRIVER_LOG_STDERR_ENV_VAR ("HAILO_QNX_DRIVER_LOG_STDERR")
+
+
+/* Inference */
+
+/* Disables the hrt-multiplexer */
+#define DISABLE_MULTIPLEXER_ENV_VAR ("HAILO_DISABLE_MULTIPLEXER_INTERNAL")
+
+/* Disable scheduler Idle optimization */
+#define HAILO_DISABLE_IDLE_OPT_ENV_VAR ("HAILO_DISABLE_IDLE_OPT")
+
+
+/* Model configuration */
+
+/* If not set, hailort will try to use default desc-size, and only then fallback to larger desc-sizes */
+#define HAILO_LEGACY_BOUNDARY_CHANNEL_PAGE_SIZE_ENV_VAR ("HAILO_LEGACY_BOUNDARY_CHANNEL_PAGE_SIZE")
+
+/* If set - Action list will be sent to Firmware SRAM over DDR unrelated to the size of the action list
+    (Otherwise - DDR will only be used if infinite action list is needed) */
+#define DDR_ACTION_LIST_ENV_VAR ("HAILO_DDR_ACTION_LIST")
+#define DDR_ACTION_LIST_ENV_VAR_VALUE ("1")
+
+/* Forces using descriptor-lists instead of CCB for config-channels on h1x devices */
+#define HAILO_FORCE_CONF_CHANNEL_OVER_DESC_ENV_VAR ("HAILO_FORCE_CONF_CHANNEL_OVER_DESC")
+
+/* Forces using descriptor-lists instead of CCB for inter-context-channels on h1x devices */
+#define HAILO_FORCE_INFER_CONTEXT_CHANNEL_OVER_DESC_ENV_VAR ("HAILO_FORCE_INFER_CONTEXT_CHANNEL_OVER_DESC")
+
+/* Forces using descriptor-lists instead of CCB for ddr-channels on h1x devices */
+#define HAILO_FORCE_DDR_CHANNEL_OVER_CCB_ENV_VAR ("HAILO_FORCE_DDR_CHANNEL_OVER_CCB")
+
+/* Sets the default power-mode of the ConfiguredNetworkGroups to `HAILO_POWER_MODE_ULTRA_PERFORMANCE` */
+#define FORCE_POWER_MODE_ULTRA_PERFORMANCE_ENV_VAR ("FORCE_POWER_MODE_ULTRA_PERFORMANCE")
+
+} /* namespace hailort */
+
+#endif /* HAILO_INTERNAL_ENV_VARS_HPP_ */
\ No newline at end of file
diff --git a/hailort/common/logger_macros.hpp b/hailort/common/logger_macros.hpp
index 544c619c..fbfe21a0 100644
--- a/hailort/common/logger_macros.hpp
+++ b/hailort/common/logger_macros.hpp
@@ -43,6 +43,8 @@ inline std::ostream& operator<<(std::ostream& os, const hailo_status& status)
     return os << status_str << "(" << static_cast<int>(status) << ")";
 }
 
+template <> struct fmt::formatter<hailo_status> : fmt::ostream_formatter {};
+
 namespace hailort
 {
 
diff --git a/hailort/libhailort/src/os/mmap_buffer.hpp b/hailort/common/mmap_buffer.hpp
similarity index 96%
rename from hailort/libhailort/src/os/mmap_buffer.hpp
rename to hailort/common/mmap_buffer.hpp
index 90c1572a..3d47fa00 100644
--- a/hailort/libhailort/src/os/mmap_buffer.hpp
+++ b/hailort/common/mmap_buffer.hpp
@@ -16,7 +16,7 @@
 #include "hailo/expected.hpp"
 #include "common/logger_macros.hpp"
 #include "common/utils.hpp"
-#include "os/file_descriptor.hpp"
+#include "common/file_descriptor.hpp"
 
 namespace hailort
 {
@@ -85,14 +85,14 @@ class MmapBuffer final
     {
         auto mmap = MmapBufferImpl::create_shared_memory(length);
         CHECK_EXPECTED(mmap);
-        return MmapBuffer<T>(std::move(mmap.release()));
+        return MmapBuffer<T>(mmap.release());
     }
 
     static Expected<MmapBuffer<T>> create_file_map(size_t length, FileDescriptor &file, uintptr_t offset)
     {
         auto mmap = MmapBufferImpl::create_file_map(length, file, offset);
         CHECK_EXPECTED(mmap);
-        return MmapBuffer<T>(std::move(mmap.release()));
+        return MmapBuffer<T>(mmap.release());
     }
 
 #if defined(__QNX__)
diff --git a/hailort/libhailort/src/os/posix/file_descriptor.cpp b/hailort/common/os/posix/file_descriptor.cpp
similarity index 96%
rename from hailort/libhailort/src/os/posix/file_descriptor.cpp
rename to hailort/common/os/posix/file_descriptor.cpp
index a70c6694..0cc62654 100644
--- a/hailort/libhailort/src/os/posix/file_descriptor.cpp
+++ b/hailort/common/os/posix/file_descriptor.cpp
@@ -9,7 +9,7 @@
 
 
 #include "common/logger_macros.hpp"
-#include "os/file_descriptor.hpp"
+#include "common/file_descriptor.hpp"
 #include <errno.h>
 
 namespace hailort
diff --git a/hailort/libhailort/src/os/posix/mmap_buffer.cpp b/hailort/common/os/posix/mmap_buffer.cpp
similarity index 97%
rename from hailort/libhailort/src/os/posix/mmap_buffer.cpp
rename to hailort/common/os/posix/mmap_buffer.cpp
index 6144f05e..b3ee5001 100644
--- a/hailort/libhailort/src/os/posix/mmap_buffer.cpp
+++ b/hailort/common/os/posix/mmap_buffer.cpp
@@ -7,9 +7,8 @@
  * @brief Wrapper around unix memory mapping (mmap)
  **/
 
-#include "os/mmap_buffer.hpp"
+#include "common/mmap_buffer.hpp"
 #include "vdma/driver/hailort_driver.hpp"
-#include "hailo_ioctl_common.h"
 #include <sys/ioctl.h>
 
 #include <sys/mman.h>
diff --git a/hailort/common/os/posix/shared_memory_buffer.cpp b/hailort/common/os/posix/shared_memory_buffer.cpp
new file mode 100644
index 00000000..2dd16b0b
--- /dev/null
+++ b/hailort/common/os/posix/shared_memory_buffer.cpp
@@ -0,0 +1,112 @@
+/**
+ * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file shared_memory_buffer.cpp
+ * @brief Posix Shared memory implementation
+ **/
+
+#include "common/shared_memory_buffer.hpp"
+#include "common/utils.hpp"
+
+#include "hailo/hailort.h"
+
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <errno.h>
+
+namespace hailort
+{
+
+#ifndef __ANDROID__
+
+Expected<SharedMemoryBufferPtr> SharedMemoryBuffer::create(size_t size, const std::string &shm_name)
+{
+    auto shm_segment_fd = shm_open(shm_name.c_str(), (O_CREAT | O_RDWR), (S_IRWXU | S_IRWXG | S_IRWXO)); // mode 0777
+    CHECK_AS_EXPECTED((shm_segment_fd != -1), HAILO_INTERNAL_FAILURE, "Failed to create shared memory object, errno = {}", errno);
+    auto shm_fd = FileDescriptor(shm_segment_fd);
+
+    auto res = ftruncate(shm_fd, size);
+    CHECK_AS_EXPECTED(res != -1, HAILO_INTERNAL_FAILURE, "Failed to set size of shared memory object, errno = {}", errno);
+
+    TRY(auto mmapped_buffer, MmapBuffer<void>::create_file_map(size, shm_fd, 0));
+    auto result = make_shared_nothrow<SharedMemoryBuffer>(shm_name, std::move(shm_fd), std::move(mmapped_buffer), true);
+    CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
+
+    return result;
+}
+
+Expected<SharedMemoryBufferPtr> SharedMemoryBuffer::open(size_t size, const std::string &shm_name)
+{
+    auto shm_segment_fd = shm_open(shm_name.c_str(), O_RDWR, (S_IRWXU | S_IRWXG | S_IRWXO)); // mode 0777
+    CHECK_AS_EXPECTED((shm_segment_fd != -1), HAILO_INTERNAL_FAILURE, "Failed to open shared memory object, errno = {}", errno);
+    auto shm_fd = FileDescriptor(shm_segment_fd);
+
+    TRY(auto mmapped_buffer, MmapBuffer<void>::create_file_map(size, shm_fd, 0));
+    auto result = make_shared_nothrow<SharedMemoryBuffer>(shm_name, std::move(shm_fd), std::move(mmapped_buffer), false);
+    CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
+
+    return result;
+}
+
+SharedMemoryBuffer::~SharedMemoryBuffer()
+{
+    if (m_memory_owner) {
+        shm_unlink(m_shm_name.c_str());
+    }
+}
+
+size_t SharedMemoryBuffer::size() const
+{
+    return m_shm_mmap_buffer.size();
+}
+
+void *SharedMemoryBuffer::user_address()
+{
+    return m_shm_mmap_buffer.address();
+}
+
+std::string SharedMemoryBuffer::shm_name()
+{
+    return m_shm_name;
+}
+
+#else
+
+// TODO: HRT-14770 support android shared memory
+Expected<SharedMemoryBufferPtr> SharedMemoryBuffer::create(size_t, const std::string &)
+{
+    LOGGER__ERROR("SharedMemoryBuffer::create is not implemented for Android");
+    return make_unexpected(HAILO_NOT_IMPLEMENTED);
+}
+
+Expected<SharedMemoryBufferPtr> SharedMemoryBuffer::open(size_t, const std::string &)
+{
+    LOGGER__ERROR("SharedMemoryBuffer::open is not implemented for Android");
+    return make_unexpected(HAILO_NOT_IMPLEMENTED);
+}
+
+size_t SharedMemoryBuffer::size() const
+{
+    LOGGER__ERROR("SharedMemoryBuffer::size is not implemented for Android");
+    return 0;
+}
+
+void *SharedMemoryBuffer::user_address()
+{
+    LOGGER__ERROR("SharedMemoryBuffer::user_address is not implemented for Android");
+    return nullptr;
+}
+
+std::string SharedMemoryBuffer::shm_name()
+{
+    LOGGER__ERROR("SharedMemoryBuffer::shm_name is not implemented for Android");
+    return "";
+}
+
+#endif
+
+} /* namespace hailort */
diff --git a/hailort/common/os/posix/socket.cpp b/hailort/common/os/posix/socket.cpp
index 6f2fb610..32ab9474 100644
--- a/hailort/common/os/posix/socket.cpp
+++ b/hailort/common/os/posix/socket.cpp
@@ -35,13 +35,17 @@ hailo_status Socket::SocketModuleWrapper::free_module()
 Expected<Socket> Socket::create(int af, int type, int protocol)
 {
     TRY(auto module_wrapper, SocketModuleWrapper::create());
+
+    auto module_wrapper_ptr = make_shared_nothrow<SocketModuleWrapper>(std::move(module_wrapper));
+    CHECK_NOT_NULL(module_wrapper_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
     TRY(const auto socket_fd, create_socket_fd(af, type, protocol));
 
-    auto obj = Socket(std::move(module_wrapper), socket_fd);
+    auto obj = Socket(module_wrapper_ptr, socket_fd);
     return obj;
 }
 
-Socket::Socket(SocketModuleWrapper &&module_wrapper, const socket_t socket_fd) :
+Socket::Socket(std::shared_ptr<SocketModuleWrapper> module_wrapper, const socket_t socket_fd) :
   m_module_wrapper(std::move(module_wrapper)), m_socket_fd(socket_fd)
 {
 }
@@ -107,6 +111,56 @@ hailo_status Socket::get_sock_name(sockaddr *addr, socklen_t *len)
     return HAILO_SUCCESS;
 }
 
+hailo_status Socket::listen(int backlog)
+{
+    auto res = ::listen(m_socket_fd, backlog);
+    CHECK(0 == res, HAILO_ETH_FAILURE, "Failed to listen on socket. errno={}", errno);
+    return HAILO_SUCCESS;
+}
+
+Expected<Socket> Socket::accept()
+{
+    auto client_socket = ::accept(m_socket_fd, nullptr, nullptr);
+    CHECK(client_socket != INVALID_SOCKET, make_unexpected(HAILO_ETH_FAILURE), "Failed to accept connection {}", errno);
+
+    return Socket(m_module_wrapper, client_socket);
+}
+
+hailo_status Socket::connect(const sockaddr *addr, socklen_t len)
+{
+    int ret = ::connect(m_socket_fd, addr, len);
+    CHECK(0 == ret, HAILO_ETH_FAILURE, "Failed to connect to socket {}", errno);
+    return HAILO_SUCCESS;
+}
+
+Expected<size_t> Socket::recv(uint8_t *buffer, size_t size, int flags)
+{
+    auto read_bytes = ::recv(m_socket_fd, buffer, size, flags);
+    CHECK(read_bytes >= 0, make_unexpected(HAILO_ETH_FAILURE), "Failed to read from socket {}", errno);
+    return Expected<size_t>(read_bytes);
+}
+
+Expected<size_t> Socket::send(const uint8_t *buffer, size_t size, int flags)
+{
+    auto bytes_written = ::send(m_socket_fd, buffer, size, flags);
+    CHECK(bytes_written >= 0, make_unexpected(HAILO_ETH_FAILURE), "Failed to write to socket {}", errno);
+    return Expected<size_t>(bytes_written);
+}
+
+hailo_status Socket::sendall(const uint8_t *buffer, size_t size, int flags)
+{
+    size_t offset = 0;
+    while (offset < size) {
+        const auto size_to_write = size - offset;
+        TRY(auto bytes_written, send(buffer + offset, size_to_write, flags));
+        if (bytes_written == 0) {
+            return HAILO_ETH_SEND_FAILURE;
+        }
+        offset += bytes_written;
+    }
+    return HAILO_SUCCESS;
+}
+
 hailo_status Socket::ntop(int af, const void *src, char *dst, socklen_t size)
 { 
     CHECK_ARG_NOT_NULL(src);
@@ -205,6 +259,16 @@ hailo_status Socket::enable_broadcast()
     return HAILO_SUCCESS;
 }
 
+hailo_status Socket::allow_reuse_address()
+{
+    int allow_reuse = 1;
+
+    auto socket_rc = setsockopt(m_socket_fd, SOL_SOCKET, SO_REUSEADDR, &allow_reuse, sizeof(allow_reuse));
+    CHECK(0 == socket_rc, HAILO_ETH_FAILURE, "Cannot set socket to be broadcast");
+
+    return HAILO_SUCCESS;
+}
+
 hailo_status Socket::send_to(const uint8_t *src_buffer, size_t src_buffer_size, int flags,
     const sockaddr *dest_addr, socklen_t dest_addr_size, size_t *bytes_sent)
 {
diff --git a/hailort/libhailort/src/os/windows/file_descriptor.cpp b/hailort/common/os/windows/file_descriptor.cpp
similarity index 94%
rename from hailort/libhailort/src/os/windows/file_descriptor.cpp
rename to hailort/common/os/windows/file_descriptor.cpp
index f6b62bde..e26cc4a7 100644
--- a/hailort/libhailort/src/os/windows/file_descriptor.cpp
+++ b/hailort/common/os/windows/file_descriptor.cpp
@@ -9,8 +9,7 @@
 
 
 #include "common/logger_macros.hpp"
-#include "os/file_descriptor.hpp"
-#include "os/windows/osdep.hpp"
+#include "common/file_descriptor.hpp"
 
 namespace hailort
 {
diff --git a/hailort/common/os/windows/mmap_buffer.cpp b/hailort/common/os/windows/mmap_buffer.cpp
new file mode 100644
index 00000000..8fbf3ce2
--- /dev/null
+++ b/hailort/common/os/windows/mmap_buffer.cpp
@@ -0,0 +1,48 @@
+/**
+ * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file mmap_buffer.cpp
+ * @brief Wrapper around windows memory mapping (mmap). Not implemented yet
+ **/
+
+#include "common/mmap_buffer.hpp"
+
+#include <windows.h>
+#include <stdio.h>
+#include <conio.h>
+#include <tchar.h>
+
+namespace hailort
+{
+
+void * const MmapBufferImpl::INVALID_ADDR = NULL;
+
+Expected<MmapBufferImpl> MmapBufferImpl::create_shared_memory(size_t)
+{
+    LOGGER__ERROR("Creating shared memory is not implemented on windows");
+    return make_unexpected(HAILO_NOT_IMPLEMENTED);
+}
+
+Expected<MmapBufferImpl> MmapBufferImpl::create_file_map(size_t size, FileDescriptor &fd, uintptr_t offset)
+{
+    DWORD offset_high = static_cast<DWORD>(offset >> 32); // High 32 bits
+    DWORD offset_low = static_cast<DWORD>(offset & 0xFFFFFFFF); // Low 32 bits
+
+    auto file_view_ptr = MapViewOfFile(fd, FILE_MAP_ALL_ACCESS, offset_high, offset_low, size);
+    CHECK_AS_EXPECTED((file_view_ptr != nullptr), HAILO_INTERNAL_FAILURE, "Failed to map view of file, error = {}", GetLastError());
+    
+    return MmapBufferImpl(file_view_ptr, size);
+}
+
+hailo_status MmapBufferImpl::unmap()
+{
+    if (m_address != nullptr) {
+        UnmapViewOfFile(m_address);
+        m_address = nullptr;
+    }
+    return HAILO_SUCCESS;
+}
+
+} /* namespace hailort */
diff --git a/hailort/common/os/windows/named_mutex_guard.cpp b/hailort/common/os/windows/named_mutex_guard.cpp
new file mode 100644
index 00000000..c7cd773d
--- /dev/null
+++ b/hailort/common/os/windows/named_mutex_guard.cpp
@@ -0,0 +1,45 @@
+/**
+ * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file named_mutex_guard.hpp
+ * @brief Named mutex guard implementation
+ **/
+
+#include "named_mutex_guard.hpp"
+#include "hailo/hailort.h"
+#include "common/logger_macros.hpp"
+#include "common/utils.hpp"
+
+namespace hailort
+{
+
+Expected<std::unique_ptr<NamedMutexGuard>> NamedMutexGuard::create(const std::string &named_mutex)
+{
+    // Create a named mutex
+    HANDLE mutex_handle = CreateMutex(NULL, FALSE, named_mutex.c_str());
+    CHECK_AS_EXPECTED(mutex_handle != NULL, HAILO_INTERNAL_FAILURE, "Failed to create named mutex, error = {}", GetLastError());
+
+    // Check if the mutex is already acquired by another instance
+    if (GetLastError() == ERROR_ALREADY_EXISTS) {
+        LOGGER__ERROR("Another instance of {} is already running", named_mutex);
+        CloseHandle(mutex_handle);
+        return make_unexpected(HAILO_INVALID_OPERATION);
+    }
+
+    auto guarded_named_mutex = make_unique_nothrow<NamedMutexGuard>(mutex_handle);
+    CHECK_NOT_NULL_AS_EXPECTED(guarded_named_mutex, HAILO_OUT_OF_HOST_MEMORY);
+
+    return guarded_named_mutex;
+}
+
+NamedMutexGuard::NamedMutexGuard(HANDLE mutex_handle) : m_mutex_handle(mutex_handle)
+{}
+
+NamedMutexGuard::~NamedMutexGuard()
+{
+    CloseHandle(m_mutex_handle);
+}
+
+} /* namespace hailort */
diff --git a/hailort/common/os/windows/named_mutex_guard.hpp b/hailort/common/os/windows/named_mutex_guard.hpp
new file mode 100644
index 00000000..9cea4a6e
--- /dev/null
+++ b/hailort/common/os/windows/named_mutex_guard.hpp
@@ -0,0 +1,39 @@
+/**
+ * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file named_mutex_guard.hpp
+ * @brief Named mutex guard
+ **/
+
+#ifndef _HAILO_NAMED_MUTEX_GUARD_HPP_
+#define _HAILO_NAMED_MUTEX_GUARD_HPP_
+
+#include "hailo/hailort.h"
+#include "hailo/expected.hpp"
+#include <string>
+#include <memory>
+
+namespace hailort
+{
+
+class NamedMutexGuard
+{
+public:
+    static Expected<std::unique_ptr<NamedMutexGuard>> create(const std::string &named_mutex);
+
+    NamedMutexGuard(NamedMutexGuard &&) = delete;
+    NamedMutexGuard(const NamedMutexGuard &) = delete;
+    NamedMutexGuard &operator=(NamedMutexGuard &&) = delete;
+    NamedMutexGuard &operator=(const NamedMutexGuard &) = delete;
+    virtual ~NamedMutexGuard();
+
+    NamedMutexGuard(HANDLE mutex_handle);
+private:
+    HANDLE m_mutex_handle;
+};
+
+} /* namespace hailort */
+
+#endif /* _HAILO_NAMED_MUTEX_GUARD_HPP_ */
diff --git a/hailort/common/os/windows/shared_memory_buffer.cpp b/hailort/common/os/windows/shared_memory_buffer.cpp
new file mode 100644
index 00000000..66816ad1
--- /dev/null
+++ b/hailort/common/os/windows/shared_memory_buffer.cpp
@@ -0,0 +1,67 @@
+/**
+ * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file shared_memory_buffer.cpp
+ * @brief Shared memory implementaion in Windows. 
+ * Based on Windows docs: https://learn.microsoft.com/en-us/windows/win32/memory/creating-named-shared-memory
+ **/
+
+#include "common/shared_memory_buffer.hpp"
+#include "common/utils.hpp"
+#include "hailo/hailort.h"
+
+#include <windows.h>
+
+namespace hailort
+{
+
+Expected<SharedMemoryBufferPtr> SharedMemoryBuffer::create(size_t size, const std::string &shm_name)
+{
+    HANDLE handle_map_file = CreateFileMapping(INVALID_HANDLE_VALUE, nullptr, PAGE_READWRITE, 0,
+            static_cast<DWORD>(size), static_cast<LPCSTR>(shm_name.c_str()));
+    CHECK_AS_EXPECTED((handle_map_file != nullptr), HAILO_INTERNAL_FAILURE, "Failed to create shared memory object, error = {}", GetLastError());
+    
+    auto shm_fd = FileDescriptor(handle_map_file);
+    TRY(auto mmapped_buffer, MmapBuffer<void>::create_file_map(size, shm_fd, 0));
+
+    auto result = make_shared_nothrow<SharedMemoryBuffer>(shm_name, std::move(shm_fd), std::move(mmapped_buffer), true);
+    CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
+
+    return result;
+}
+
+Expected<SharedMemoryBufferPtr> SharedMemoryBuffer::open(size_t size, const std::string &shm_name)
+{
+    HANDLE handle_map_file = OpenFileMapping(FILE_MAP_ALL_ACCESS, FALSE,  static_cast<LPCSTR>(shm_name.c_str()));
+    CHECK_AS_EXPECTED((handle_map_file != nullptr), HAILO_INTERNAL_FAILURE, "Failed to open file mapping object, error = {}", GetLastError());
+
+    auto shm_fd = FileDescriptor(handle_map_file);
+    TRY(auto mmapped_buffer, MmapBuffer<void>::create_file_map(size, shm_fd, 0));
+
+    auto result = make_shared_nothrow<SharedMemoryBuffer>(shm_name, std::move(shm_fd), std::move(mmapped_buffer), false);
+    CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
+
+    return result;
+}
+
+SharedMemoryBuffer::~SharedMemoryBuffer()
+{}
+
+size_t SharedMemoryBuffer::size() const
+{
+    return m_shm_mmap_buffer.size();
+}
+
+void *SharedMemoryBuffer::user_address()
+{
+    return m_shm_mmap_buffer.address();
+}
+
+std::string SharedMemoryBuffer::shm_name()
+{
+    return m_shm_name;
+}
+
+} /* namespace hailort */
diff --git a/hailort/common/os/windows/socket.cpp b/hailort/common/os/windows/socket.cpp
index c21160e4..c8f401c8 100644
--- a/hailort/common/os/windows/socket.cpp
+++ b/hailort/common/os/windows/socket.cpp
@@ -40,14 +40,18 @@ hailo_status Socket::SocketModuleWrapper::free_module()
 
 Expected<Socket> Socket::create(int af, int type, int protocol)
 {
-    TRY(auto module_wrapper, SocketModuleWrapper::create());    
+    TRY(auto module_wrapper, SocketModuleWrapper::create());
+
+    auto module_wrapper_ptr = make_shared_nothrow<SocketModuleWrapper>(std::move(module_wrapper));
+    CHECK_NOT_NULL(module_wrapper_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
     TRY(const auto socket_fd, create_socket_fd(af, type, protocol));
 
-    auto obj = Socket(std::move(module_wrapper), socket_fd);
+    auto obj = Socket(std::move(module_wrapper_ptr), socket_fd);
     return std::move(obj);
 }
 
-Socket::Socket(SocketModuleWrapper &&module_wrapper, const socket_t socket_fd) :
+Socket::Socket(std::shared_ptr<SocketModuleWrapper> module_wrapper, const socket_t socket_fd) :
   m_module_wrapper(std::move(module_wrapper)), m_socket_fd(socket_fd)
 {
 }
@@ -110,6 +114,56 @@ hailo_status Socket::get_sock_name(sockaddr *addr, socklen_t *len)
     return HAILO_SUCCESS;
 }
 
+hailo_status Socket::listen(int backlog)
+{
+    auto res = ::listen(m_socket_fd, backlog);
+    CHECK(0 == res, HAILO_ETH_FAILURE, "Failed to listen on socket. errno={}", errno);
+    return HAILO_SUCCESS;
+}
+
+Expected<Socket> Socket::accept()
+{
+    auto client_socket = ::accept(m_socket_fd, nullptr, nullptr);
+    CHECK(client_socket != INVALID_SOCKET, make_unexpected(HAILO_ETH_FAILURE), "Failed to accept connection {}", errno);
+
+    return Socket(m_module_wrapper, client_socket);
+}
+
+hailo_status Socket::connect(const sockaddr *addr, socklen_t len)
+{
+    int ret = ::connect(m_socket_fd, addr, len);
+    CHECK(0 == ret, HAILO_ETH_FAILURE, "Failed to connect to socket {}", errno);
+    return HAILO_SUCCESS;
+}
+
+Expected<size_t> Socket::recv(uint8_t *buffer, size_t size, int flags)
+{
+    auto read_bytes = ::recv(m_socket_fd, reinterpret_cast<char*>(buffer), static_cast<int>(size), flags);
+    CHECK(read_bytes >= 0, make_unexpected(HAILO_ETH_FAILURE), "Failed to read from socket {}", errno);
+    return Expected<size_t>(read_bytes);
+}
+
+Expected<size_t> Socket::send(const uint8_t *buffer, size_t size, int flags)
+{
+    auto bytes_written = ::send(m_socket_fd, reinterpret_cast<const char*>(buffer), static_cast<int>(size), flags);
+    CHECK(bytes_written >= 0, make_unexpected(HAILO_ETH_FAILURE), "Failed to write to socket {}", errno);
+    return Expected<size_t>(bytes_written);
+}
+
+hailo_status Socket::sendall(const uint8_t *buffer, size_t size, int flags)
+{
+    size_t offset = 0;
+    while (offset < size) {
+        const auto size_to_write = size - offset;
+        TRY(auto bytes_written, send(buffer + offset, size_to_write, flags));
+        if (bytes_written == 0) {
+            return HAILO_ETH_SEND_FAILURE;
+        }
+        offset += bytes_written;
+    }
+    return HAILO_SUCCESS;
+}
+
 hailo_status Socket::ntop(int af, const void *src, char *dst, socklen_t size)
 {
     CHECK_ARG_NOT_NULL(src);
@@ -190,6 +244,17 @@ hailo_status Socket::enable_broadcast()
     return HAILO_SUCCESS;
 }
 
+hailo_status Socket::allow_reuse_address()
+{
+    int allow_reuse = 1;
+
+    auto socket_rc = setsockopt(m_socket_fd, SOL_SOCKET, SO_REUSEADDR,
+        reinterpret_cast<const char*>(&allow_reuse), sizeof(allow_reuse));
+    CHECK(0 == socket_rc, HAILO_ETH_FAILURE, "Cannot set socket to be broadcast");
+
+    return HAILO_SUCCESS;
+}
+
 hailo_status Socket::send_to(const uint8_t *src_buffer, size_t src_buffer_size, int flags,
     const sockaddr *dest_addr, socklen_t dest_addr_size, size_t *bytes_sent)
 {
diff --git a/hailort/libhailort/src/os/windows/virtual_alloc_guard.cpp b/hailort/common/os/windows/virtual_alloc_guard.cpp
similarity index 96%
rename from hailort/libhailort/src/os/windows/virtual_alloc_guard.cpp
rename to hailort/common/os/windows/virtual_alloc_guard.cpp
index 425454a5..6d07ba4a 100644
--- a/hailort/libhailort/src/os/windows/virtual_alloc_guard.cpp
+++ b/hailort/common/os/windows/virtual_alloc_guard.cpp
@@ -7,7 +7,7 @@
  * @brief Guard object for VirtualAlloc and VirtualFree
  **/
 
-#include "os/windows/virtual_alloc_guard.hpp"
+#include "virtual_alloc_guard.hpp"
 #include "common/logger_macros.hpp"
 #include "common/utils.hpp"
 
diff --git a/hailort/libhailort/src/os/windows/virtual_alloc_guard.hpp b/hailort/common/os/windows/virtual_alloc_guard.hpp
similarity index 100%
rename from hailort/libhailort/src/os/windows/virtual_alloc_guard.hpp
rename to hailort/common/os/windows/virtual_alloc_guard.hpp
diff --git a/hailort/common/shared_memory_buffer.hpp b/hailort/common/shared_memory_buffer.hpp
new file mode 100644
index 00000000..2460af77
--- /dev/null
+++ b/hailort/common/shared_memory_buffer.hpp
@@ -0,0 +1,81 @@
+/**
+ * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file shared_memory_buffer.hpp
+ * @brief Shared memory buffer
+ **/
+
+#ifndef _HAILO_SHARED_MEMORY_BUFFER_HPP_
+#define _HAILO_SHARED_MEMORY_BUFFER_HPP_
+
+#include "common/file_descriptor.hpp"
+#include "common/mmap_buffer.hpp"
+
+#include "hailo/hailort.h"
+#include "hailo/expected.hpp"
+#include "hailo/buffer.hpp"
+
+namespace hailort
+{
+
+#define SHARED_MEMORY_NAME_SEPERATOR '_'
+#define INVALID_SHARED_MEMORY_CHAR '/'
+
+#if defined(_MSC_VER)
+#define SHARED_MEMORY_NAME_PREFIX "Global\\"
+#else
+#define SHARED_MEMORY_NAME_PREFIX '/'
+#endif
+
+class SharedMemoryBuffer;
+using SharedMemoryBufferPtr = std::shared_ptr<SharedMemoryBuffer>;
+
+class SharedMemoryBuffer
+{
+public:
+    static Expected<SharedMemoryBufferPtr> create(size_t size, const std::string &shm_name);
+    static Expected<SharedMemoryBufferPtr> open(size_t size, const std::string &shm_name);
+
+    SharedMemoryBuffer(const SharedMemoryBuffer &) = delete;
+    SharedMemoryBuffer &operator=(SharedMemoryBuffer &&) = delete;
+    SharedMemoryBuffer &operator=(const SharedMemoryBuffer &) = delete;
+    virtual ~SharedMemoryBuffer();
+
+    SharedMemoryBuffer(const std::string &shm_name, FileDescriptor &&shm_fd, MmapBuffer<void> &&shm_mmap_buffer, bool memory_owner) :
+        m_shm_name(shm_name),
+        m_shm_fd(std::move(shm_fd)),
+        m_shm_mmap_buffer(std::move(shm_mmap_buffer)),
+        m_memory_owner(memory_owner)
+    {}
+
+    SharedMemoryBuffer(SharedMemoryBuffer&& other) noexcept :
+        m_shm_name(std::exchange(other.m_shm_name, "")),
+        m_shm_fd(std::move(other.m_shm_fd)),
+        m_shm_mmap_buffer(std::move(other.m_shm_mmap_buffer)),
+        m_memory_owner(std::exchange(other.m_memory_owner, false))
+    {}
+
+    virtual size_t size() const;
+    virtual void *user_address();
+    std::string shm_name();
+
+    static std::string get_valid_shm_name(const std::string &name)
+    {
+        std::string valid_shm_name = name;
+        std::replace(valid_shm_name.begin(), valid_shm_name.end(), INVALID_SHARED_MEMORY_CHAR, SHARED_MEMORY_NAME_SEPERATOR);
+        valid_shm_name = SHARED_MEMORY_NAME_PREFIX + valid_shm_name;
+        return valid_shm_name;
+    }
+
+private:
+    std::string m_shm_name;
+    FileDescriptor m_shm_fd;
+    MmapBuffer<void> m_shm_mmap_buffer;
+    bool m_memory_owner;
+};
+
+} /* namespace hailort */
+
+#endif /* _HAILO_SHARED_MEMORY_BUFFER_HPP_ */
diff --git a/hailort/common/socket.hpp b/hailort/common/socket.hpp
index c70df329..25ad081d 100644
--- a/hailort/common/socket.hpp
+++ b/hailort/common/socket.hpp
@@ -50,9 +50,18 @@ class Socket final {
     hailo_status socket_bind(const sockaddr *addr, socklen_t len);
     hailo_status get_sock_name(sockaddr *addr, socklen_t *len);
 
+    hailo_status listen(int backlog);
+    Expected<Socket> accept();
+    hailo_status connect(const sockaddr *addr, socklen_t len);
+
+    Expected<size_t> recv(uint8_t *buffer, size_t size, int flags = 0);
+    Expected<size_t> send(const uint8_t *buffer, size_t size, int flags = 0);
+    hailo_status sendall(const uint8_t *buffer, size_t size, int flags = 0);
+
     hailo_status set_recv_buffer_size_max();
     hailo_status set_timeout(const std::chrono::milliseconds timeout_ms, timeval_t *timeout);
     hailo_status enable_broadcast();
+    hailo_status allow_reuse_address();
     hailo_status abort();
 
     // TODO: Should these be in udp.cpp?
@@ -96,12 +105,12 @@ class Socket final {
         static hailo_status free_module();
     };
 
-    Socket(SocketModuleWrapper &&module_wrapper, const socket_t socket_fd);
+    Socket(std::shared_ptr<SocketModuleWrapper> module_wrapper, const socket_t socket_fd);
     static Expected<socket_t> create_socket_fd(int af, int type, int protocol);
     hailo_status close_socket_fd();
 
     // Itialization dependency
-    SocketModuleWrapper m_module_wrapper;
+    std::shared_ptr<SocketModuleWrapper> m_module_wrapper;
     socket_t m_socket_fd;
 };
 
diff --git a/hailort/common/string_utils.hpp b/hailort/common/string_utils.hpp
index b1f03668..ebd55feb 100644
--- a/hailort/common/string_utils.hpp
+++ b/hailort/common/string_utils.hpp
@@ -12,6 +12,7 @@
 
 #include "hailo/expected.hpp"
 #include <string>
+#include <algorithm>
 
 namespace hailort
 {
@@ -22,6 +23,14 @@ class StringUtils {
     static Expected<uint8_t> to_uint8(const std::string &str, int base);
     static Expected<uint32_t> to_uint32(const std::string &str, int base);
 
+    static std::string to_lower(const std::string &str)
+    {
+        std::string lower_str = str;
+        std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(),
+            [](auto ch) { return static_cast<char>(::tolower(ch)); });
+        return lower_str;
+    }
+
     static std::string to_hex_string(const uint8_t *array, size_t size, bool uppercase, const std::string &delimiter="");
 };
 
diff --git a/hailort/common/thread_pool.hpp b/hailort/common/thread_pool.hpp
new file mode 100644
index 00000000..afb720d6
--- /dev/null
+++ b/hailort/common/thread_pool.hpp
@@ -0,0 +1,90 @@
+/**
+ * Copyright (c) 2020-2024 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file thread_pool.hpp
+ * @brief Implementation of thread pool that uses async threads
+ **/
+
+#ifndef _THREAD_POOL_HPP_
+#define _THREAD_POOL_HPP_
+
+#include "async_thread.hpp"
+
+namespace hailort {
+
+class HailoThreadPool {
+public:
+    HailoThreadPool(size_t num_worker_threads) : m_num_threads(num_worker_threads), m_kill_threads(false) {
+        auto shutdown_event = Event::create_shared(Event::State::not_signalled).release();
+
+        for (size_t i = 0; i < num_worker_threads; i++) {
+            m_threads.emplace_back(std::make_unique<AsyncThread<hailo_status>>(
+            [this]() -> hailo_status {
+                while(true) {
+                    std::function<hailo_status()> func;
+                    {
+                        std::unique_lock<std::mutex> lock(m_mutex);
+                        m_cv.wait(lock, [this](){ return (m_kill_threads || !m_queue.empty()); });
+                        if (m_kill_threads && m_queue.empty()) {
+                            return HAILO_SUCCESS;
+                        }
+                        func = std::move(m_queue.front());
+                        m_queue.pop();
+                    }
+
+                    hailo_status status = func();
+                    if (HAILO_SUCCESS != status) {
+                        LOGGER__ERROR("thread failed with status {}");
+                    }
+               }
+            }
+        ));
+        }
+    }
+
+    HailoThreadPool(const HailoThreadPool &) = delete;
+    HailoThreadPool(HailoThreadPool &&other) = delete;
+    HailoThreadPool& operator=(const HailoThreadPool&) = delete;
+    HailoThreadPool& operator=(HailoThreadPool &&) = delete;
+
+    template<class F, class... Args>
+    void add_job(F&& func, Args&&... args) {
+        auto job = std::bind(std::forward<F>(func), std::forward<Args>(args)...);
+        {
+            std::unique_lock<std::mutex> lock(m_mutex);
+            if (m_kill_threads) {
+                LOGGER__ERROR("Cannot add jobs after threadpool has been terminated");
+                return;
+            }
+            m_queue.emplace(job);
+        }
+        m_cv.notify_one();
+    }
+
+    ~HailoThreadPool() {
+        {
+            std::unique_lock<std::mutex> lock(m_mutex);
+            m_kill_threads = true;
+        }
+        m_cv.notify_all();
+        for (size_t i = 0; i < m_num_threads; i++) {
+            AsyncThreadPtr<hailo_status> thread = std::move(m_threads[i]);
+            thread->get();
+        }
+    }
+
+private:
+    size_t m_num_threads;
+    std::vector<AsyncThreadPtr<hailo_status>> m_threads;
+    std::queue<std::function<hailo_status()>> m_queue;
+    std::mutex m_mutex;
+    std::condition_variable m_cv;
+    std::atomic<bool> m_kill_threads;
+    
+};
+
+} /* namespace hailort*/
+
+#endif // _THREAD_POOL_HPP_
\ No newline at end of file
diff --git a/hailort/libhailort/src/utils/thread_safe_queue.hpp b/hailort/common/thread_safe_queue.hpp
similarity index 100%
rename from hailort/libhailort/src/utils/thread_safe_queue.hpp
rename to hailort/common/thread_safe_queue.hpp
diff --git a/hailort/common/utils.hpp b/hailort/common/utils.hpp
index 3eb719e2..e3d922a7 100644
--- a/hailort/common/utils.hpp
+++ b/hailort/common/utils.hpp
@@ -26,6 +26,7 @@
 #include <cstdint>
 #include <cstddef>
 #include <fstream>
+#include <algorithm>
 
 
 namespace hailort
@@ -261,7 +262,7 @@ inline hailo_status get_status(const Expected<T> &exp)
 #define _CHECK_GRPC_STATUS(status, ret_val, warning_msg)                                                                         \
     do {                                                                                                                         \
         if (!status.ok()) {                                                                                                      \
-            LOGGER__ERROR("CHECK_GRPC_STATUS failed with error code: {}.", status.error_code());                                 \
+            LOGGER__ERROR("CHECK_GRPC_STATUS failed with error code: {}.", static_cast<int>(status.error_code()));               \
             LOGGER__WARNING(warning_msg);                                                                                        \
             return ret_val;                                                                                                      \
         }                                                                                                                        \
@@ -367,7 +368,7 @@ static uint32_t get_min_value_of_unordered_map(const std::unordered_map<K, V> &m
     return min_count;
 }
 
-static inline bool is_env_variable_on(const char* env_var_name, const std::string &required_value = "1")
+static inline bool is_env_variable_on(const char *env_var_name, const std::string &required_value = "1")
 {
     auto env_var  = std::getenv(env_var_name);
     return ((nullptr != env_var) && (strncmp(env_var, required_value.c_str(), required_value.size()) == 0));
diff --git a/hailort/drivers/common/hailo_ioctl_common.h b/hailort/drivers/common/hailo_ioctl_common.h
index 5f6cddf5..5e15acb3 100644
--- a/hailort/drivers/common/hailo_ioctl_common.h
+++ b/hailort/drivers/common/hailo_ioctl_common.h
@@ -1,13 +1,13 @@
 // SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) AND MIT
 /**
- * Copyright (c) 2019-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
  **/
 
 #ifndef _HAILO_IOCTL_COMMON_H_
 #define _HAILO_IOCTL_COMMON_H_
 
 #define HAILO_DRV_VER_MAJOR 4
-#define HAILO_DRV_VER_MINOR 18
+#define HAILO_DRV_VER_MINOR 19
 #define HAILO_DRV_VER_REVISION 0
 
 #define _STRINGIFY_EXPANDED( x ) #x
@@ -17,10 +17,11 @@
 
 // This value is not easily changeable.
 // For example: the channel interrupts ioctls assume we have up to 32 channels
-#define MAX_VDMA_CHANNELS_PER_ENGINE    (32)
-#define MAX_VDMA_ENGINES                (3)
-#define SIZE_OF_VDMA_DESCRIPTOR         (16)
-#define VDMA_DEST_CHANNELS_START        (16)
+#define MAX_VDMA_CHANNELS_PER_ENGINE            (32)
+#define VDMA_CHANNELS_PER_ENGINE_PER_DIRECTION  (16)
+#define MAX_VDMA_ENGINES                        (3)
+#define SIZE_OF_VDMA_DESCRIPTOR                 (16)
+#define VDMA_DEST_CHANNELS_START                (16)
 
 #define HAILO_VDMA_MAX_ONGOING_TRANSFERS (128)
 #define HAILO_VDMA_MAX_ONGOING_TRANSFERS_MASK (HAILO_VDMA_MAX_ONGOING_TRANSFERS - 1)
@@ -37,8 +38,8 @@
 #define FW_ACCESS_APP_CPU_CONTROL_MASK      (1 << FW_ACCESS_CONTROL_INTERRUPT_SHIFT)
 #define FW_ACCESS_DRIVER_SHUTDOWN_SHIFT     (2)
 #define FW_ACCESS_DRIVER_SHUTDOWN_MASK      (1 << FW_ACCESS_DRIVER_SHUTDOWN_SHIFT)
-#define FW_ACCESS_SOC_CONNECT_SHIFT         (3)
-#define FW_ACCESS_SOC_CONNECT_MASK          (1 << FW_ACCESS_SOC_CONNECT_SHIFT)
+#define FW_ACCESS_SOC_CONTROL_SHIFT         (3)
+#define FW_ACCESS_SOC_CONTROL_MASK          (1 << FW_ACCESS_SOC_CONTROL_SHIFT)
 
 #define INVALID_VDMA_CHANNEL                (0xff)
 
@@ -245,6 +246,12 @@ struct hailo_desc_list_release_params {
     uintptr_t desc_handle;      // in
 };
 
+struct hailo_write_action_list_params {
+    uint8_t *data;              // in
+    size_t size;                // in
+    uint64_t dma_address;       // out
+};
+
 /* structure used in ioctl HAILO_DESC_LIST_BIND_VDMA_BUFFER */
 struct hailo_desc_list_program_params {
     size_t buffer_handle;       // in
@@ -508,6 +515,7 @@ struct hailo_vdma_launch_transfer_params {
 
 /* structure used in ioctl HAILO_SOC_CONNECT */
 struct hailo_soc_connect_params {
+    uint16_t port_number;           // in
     uint8_t input_channel_index;    // out
     uint8_t output_channel_index;   // out
     uintptr_t input_desc_handle;    // in
@@ -522,6 +530,7 @@ struct hailo_soc_close_params {
 
 /* structure used in ioctl HAILO_PCI_EP_ACCEPT */
 struct hailo_pci_ep_accept_params {
+    uint16_t port_number;           // in
     uint8_t input_channel_index;    // out
     uint8_t output_channel_index;   // out
     uintptr_t input_desc_handle;    // in
@@ -562,6 +571,7 @@ struct tCompatibleHailoIoctlData
         struct hailo_soc_close_params SocCloseParams;
         struct hailo_pci_ep_accept_params AcceptParams;
         struct hailo_pci_ep_close_params PciEpCloseParams;
+        struct hailo_write_action_list_params WriteActionListParams;
     } Buffer;
 };
 #endif // _MSC_VER
@@ -632,6 +642,7 @@ enum hailo_nnc_ioctl_code {
     HAILO_DISABLE_NOTIFICATION_CODE,
     HAILO_READ_LOG_CODE,
     HAILO_RESET_NN_CORE_CODE,
+    HAILO_WRITE_ACTION_LIST_CODE,
 
     // Must be last
     HAILO_NNC_IOCTL_MAX_NR
@@ -642,6 +653,7 @@ enum hailo_nnc_ioctl_code {
 #define HAILO_DISABLE_NOTIFICATION      _IO_(HAILO_NNC_IOCTL_MAGIC,    HAILO_DISABLE_NOTIFICATION_CODE)
 #define HAILO_READ_LOG                  _IOWR_(HAILO_NNC_IOCTL_MAGIC,  HAILO_READ_LOG_CODE,                   struct hailo_read_log_params)
 #define HAILO_RESET_NN_CORE             _IO_(HAILO_NNC_IOCTL_MAGIC,    HAILO_RESET_NN_CORE_CODE)
+#define HAILO_WRITE_ACTION_LIST         _IOW_(HAILO_NNC_IOCTL_MAGIC,    HAILO_WRITE_ACTION_LIST_CODE,     struct hailo_write_action_list_params)
 
 enum hailo_soc_ioctl_code {
     HAILO_SOC_IOCTL_CONNECT_CODE,
diff --git a/hailort/hailort_server/CMakeLists.txt b/hailort/hailort_server/CMakeLists.txt
index d288066d..add1ba73 100644
--- a/hailort/hailort_server/CMakeLists.txt
+++ b/hailort/hailort_server/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 include(${HAILO_EXTERNALS_CMAKE_SCRIPTS}/spdlog.cmake)
 include(${HAILO_EXTERNALS_CMAKE_SCRIPTS}/readerwriterqueue.cmake)
@@ -10,32 +10,22 @@ set(HAILORT_SERVER_SOURCES
     hailort_server.cpp
     ${HRPC_CPP_SOURCES}
     ${HRPC_PROTOCOL_CPP_SOURCES}
-    ${HAILORT_COMMON_OS_DIR}/os_utils.cpp
     ${HAILORT_SERVICE_DIR}/cng_buffer_pool.cpp
-    ${HAILORT_COMMON_DIR}/common/event_internal.cpp
     ${HAILO_FULL_OS_DIR}/event.cpp # TODO HRT-10681: move to common
     ${DRIVER_OS_DIR}/driver_os_specific.cpp
-    ${HAILO_OS_DIR}/file_descriptor.cpp
-    ${HAILO_OS_DIR}/mmap_buffer.cpp
     ${HAILORT_SRC_DIR}/vdma/pcie_session.cpp
     ${HAILORT_SRC_DIR}/vdma/memory/descriptor_list.cpp
     ${HAILORT_SRC_DIR}/vdma/memory/mapped_buffer.cpp
     ${HAILORT_SRC_DIR}/vdma/memory/dma_able_buffer.cpp
-    ${HAILORT_SRC_DIR}/vdma/memory/vdma_edge_layer.cpp
     ${HAILORT_SRC_DIR}/vdma/driver/hailort_driver.cpp
     ${HAILORT_SRC_DIR}/vdma/channel/interrupts_dispatcher.cpp
     ${HAILORT_SRC_DIR}/vdma/channel/transfer_launcher.cpp
     ${HAILORT_SRC_DIR}/vdma/channel/boundary_channel.cpp
     ${HAILORT_SRC_DIR}/vdma/channel/channels_group.cpp
-    ${HAILORT_SRC_DIR}/stream_common/transfer_common.cpp
+    ${HAILORT_SRC_DIR}/vdma/channel/transfer_common.cpp
+
+    ${HAILORT_COMMON_CPP_SOURCES}
 )
-if(WIN32)
-    # hailort_driver.cpp in windows depends on string_conversion
-    # dma_able_buffer.cpp in windows depends on virtual_alloc_guard
-    set(HAILORT_SERVER_SOURCES ${HAILORT_SERVER_SOURCES}
-        ${HAILORT_COMMON_OS_DIR}/string_conversion.cpp
-        ${HAILO_FULL_OS_DIR}/virtual_alloc_guard.cpp)
-endif()
 
 add_executable(hailort_server ${HAILORT_SERVER_SOURCES})
 target_include_directories(hailort_server PRIVATE
@@ -52,4 +42,10 @@ target_link_libraries(hailort_server PRIVATE
     rpc_proto
     spdlog::spdlog
     readerwriterqueue
-)
\ No newline at end of file
+)
+if(WIN32)
+    target_link_libraries(hailort_server PRIVATE Ws2_32 Iphlpapi Shlwapi winmm.lib)
+elseif(NOT CMAKE_SYSTEM_NAME STREQUAL Android)
+    # TODO: HRT-14770 fix android build
+    target_link_libraries(hailort_server PRIVATE rt)
+endif()
\ No newline at end of file
diff --git a/hailort/hailort_server/hailort_server.cpp b/hailort/hailort_server/hailort_server.cpp
index d8cc83f0..6bc7e31b 100644
--- a/hailort/hailort_server/hailort_server.cpp
+++ b/hailort/hailort_server/hailort_server.cpp
@@ -41,6 +41,16 @@ using namespace hailort;
             return make_unexpected(HAILO_INTERNAL_FAILURE); \
         } \
     } while (0)
+#define CHECK_AS_HRPC_STATUS(_cond, _status, T) \
+    do { \
+        if (!(_cond)) { \
+            LOGGER__ERROR("CHECK_AS_HRPC_STATUS failed, status: {}", _status); \
+            auto reply = T::serialize_reply(_status); \
+            if (reply) return reply; \
+            LOGGER__CRITICAL("Failed to create reply with status: {}", reply.status()); \
+            return make_unexpected(HAILO_INTERNAL_FAILURE); \
+        } \
+    } while (0)
 
 #define __HAILO_CONCAT(x, y) x ## y
 #define _HAILO_CONCAT(x, y) __HAILO_CONCAT(x, y)
@@ -115,7 +125,7 @@ hailo_status hrpc::HailoRTServer::cleanup_client_resources(RpcConnection client_
 
 Expected<std::unique_ptr<hrpc::HailoRTServer>> hrpc::HailoRTServer::create_unique()
 {
-    TRY(auto connection_context, ConnectionContext::create_shared(true));
+    TRY(auto connection_context, ConnectionContext::create_server_shared());
     auto res = make_unique_nothrow<HailoRTServer>(connection_context);
     CHECK_NOT_NULL(res, HAILO_OUT_OF_HOST_MEMORY);
     return res;
@@ -158,6 +168,7 @@ int main()
         TRY_AS_HRPC_STATUS(auto tuple, CreateInferModelSerializer::deserialize_request(request), CreateInferModelSerializer);
         auto vdevice_handle = std::get<0>(tuple);
         uint64_t hef_size = std::get<1>(tuple);
+        auto name = std::get<2>(tuple);
 
         assert(hef_size <= SIZE_MAX);
         TRY_AS_HRPC_STATUS(auto hef_buffer, Buffer::create(static_cast<size_t>(hef_size), BufferStorageParams::create_dma()), CreateInferModelSerializer);
@@ -166,8 +177,8 @@ int main()
         CHECK_SUCCESS_AS_HRPC_STATUS(status, CreateInferModelSerializer);
 
         auto &vdevice_manager = ServiceResourceManager<VDevice>::get_instance();
-        auto lambda = [view = MemoryView(hef_buffer)] (std::shared_ptr<VDevice> vdevice) {
-            return vdevice->create_infer_model(view);
+        auto lambda = [view = MemoryView(hef_buffer), &name] (std::shared_ptr<VDevice> vdevice) {
+            return vdevice->create_infer_model(view, name);
         };
         auto infer_model = vdevice_manager.execute<Expected<std::shared_ptr<InferModel>>>(vdevice_handle, lambda);
         CHECK_EXPECTED_AS_HRPC_STATUS(infer_model, CreateInferModelSerializer);
@@ -434,10 +445,10 @@ int main()
         auto bindings_lambda = [] (std::shared_ptr<ConfiguredInferModel> configured_infer_model) {
             return configured_infer_model->create_bindings();
         };
-        TRY_AS_HRPC_STATUS(auto request_tuple, RunAsyncSerializer::deserialize_request(request), RunAsyncSerializer);
-        auto configured_infer_model_handle = std::get<0>(request_tuple);
-        auto infer_model_handle = std::get<1>(request_tuple);
-        auto callback_id = std::get<2>(request_tuple);
+        TRY_AS_HRPC_STATUS(auto request_struct, RunAsyncSerializer::deserialize_request(request), RunAsyncSerializer);
+        auto configured_infer_model_handle = request_struct.configured_infer_model_handle;
+        auto infer_model_handle = request_struct.infer_model_handle;
+        auto callback_id = request_struct.callback_handle;
 
         auto bindings = cim_manager.execute<Expected<ConfiguredInferModel::Bindings>>(configured_infer_model_handle, bindings_lambda);
         CHECK_EXPECTED_AS_HRPC_STATUS(bindings, RunAsyncSerializer);
@@ -452,17 +463,28 @@ int main()
 
         std::vector<BufferPtr> inputs; // TODO: add infer vector pool
         inputs.reserve(infer_model_info->inputs_names.size());
+        uint32_t buffer_size_index = 0;
+
         for (const auto &input_name : infer_model_info->inputs_names) {
             TRY_AS_HRPC_STATUS(auto input, bindings->input(input_name), RunAsyncSerializer);
 
             TRY_AS_HRPC_STATUS(auto buffer_ptr, buffer_pool_per_cim[configured_infer_model_handle]->acquire_buffer(input_name),
                 RunAsyncSerializer);
 
-            auto status = server_context->connection().read_buffer(MemoryView(*buffer_ptr));
-            CHECK_SUCCESS_AS_HRPC_STATUS(status, RunAsyncSerializer);
+            uint32_t read_size = 0;
+            while (read_size < buffer_ptr->size()) {
+                uint32_t current_size = request_struct.input_buffer_sizes[buffer_size_index++];
+                CHECK_AS_HRPC_STATUS(read_size + current_size <= buffer_ptr->size(), HAILO_INTERNAL_FAILURE,
+                    RunAsyncSerializer);
+
+                auto status = server_context->connection().read_buffer(MemoryView(buffer_ptr->data() + read_size, current_size));
+                CHECK_SUCCESS_AS_HRPC_STATUS(status, RunAsyncSerializer);
+
+                read_size += current_size;
+            }
 
             inputs.emplace_back(buffer_ptr);
-            status = input.set_buffer(MemoryView(*buffer_ptr));
+            auto status = input.set_buffer(MemoryView(*buffer_ptr));
             CHECK_SUCCESS_AS_HRPC_STATUS(status, RunAsyncSerializer);
         }
 
@@ -488,7 +510,16 @@ int main()
                 return configured_infer_model->run_async(bindings,
                     [callback_id, server_context, inputs, outputs, &buffer_pool_per_cim, configured_infer_model_handle, infer_model_info]
                         (const AsyncInferCompletionInfo &completion_info) {
-                    auto status = server_context->trigger_callback(callback_id, completion_info.status, [outputs, completion_info] (hrpc::RpcConnection connection) -> hailo_status {
+                    for (uint32_t i = 0; i < inputs.size(); i++) {
+                        auto status = buffer_pool_per_cim[configured_infer_model_handle]->return_to_pool(infer_model_info->inputs_names[i], inputs[i]);
+                        if (status != HAILO_SUCCESS) {
+                            LOGGER__CRITICAL("return_to_pool failed for input {}, status = {}. Server should restart!", infer_model_info->inputs_names[i], status);
+                            return;
+                        }
+                    }
+
+                    auto status = server_context->trigger_callback(callback_id, completion_info.status, configured_infer_model_handle,
+                    [outputs, completion_info] (hrpc::RpcConnection connection) -> hailo_status {
                         if (HAILO_SUCCESS == completion_info.status) {
                             for (auto output : outputs) {
                                 auto status = connection.write_buffer(MemoryView(*output));
@@ -503,13 +534,6 @@ int main()
                         LOGGER__CRITICAL("Error {} returned from connection.write(). Server Should restart!", status);
                     }
 
-                    for (uint32_t i = 0; i < inputs.size(); i++) {
-                        status = buffer_pool_per_cim[configured_infer_model_handle]->return_to_pool(infer_model_info->inputs_names[i], inputs[i]);
-                        if (status != HAILO_SUCCESS) {
-                            LOGGER__CRITICAL("return_to_pool failed for input {}, status = {}. Server should restart!", infer_model_info->inputs_names[i], status);
-                            return;
-                        }
-                    }
                     for (uint32_t i = 0; i < outputs.size(); i++) {
                         status = buffer_pool_per_cim[configured_infer_model_handle]->return_to_pool(infer_model_info->outputs_names[i], outputs[i]);
                         if (status != HAILO_SUCCESS) {
@@ -527,6 +551,52 @@ int main()
         TRY_AS_HRPC_STATUS(auto reply, RunAsyncSerializer::serialize_reply(HAILO_SUCCESS), RunAsyncSerializer);
         return reply;
     });
+    dispatcher.register_action(HailoRpcActionID::DEVICE__CREATE,
+    [] (const MemoryView &request, hrpc::ServerContextPtr /*server_context*/) -> Expected<Buffer> {
+        auto status = CreateDeviceSerializer::deserialize_request(request);
+        CHECK_SUCCESS_AS_HRPC_STATUS(status, CreateDeviceSerializer);
+
+        TRY_AS_HRPC_STATUS(auto device, Device::create(), CreateDeviceSerializer);
+
+        auto &manager = ServiceResourceManager<Device>::get_instance();
+        auto id = manager.register_resource(SINGLE_CLIENT_PID, std::move(device));
+        auto reply = CreateDeviceSerializer::serialize_reply(HAILO_SUCCESS, id);
+        return reply;
+    });
+    dispatcher.register_action(HailoRpcActionID::DEVICE__DESTROY,
+    [] (const MemoryView &request, hrpc::ServerContextPtr /*server_context*/) -> Expected<Buffer> {
+        auto &manager = ServiceResourceManager<Device>::get_instance();
+        TRY_AS_HRPC_STATUS(auto device_handle, DestroyDeviceSerializer::deserialize_request(request), DestroyDeviceSerializer);
+        (void)manager.release_resource(device_handle, SINGLE_CLIENT_PID);
+        TRY_AS_HRPC_STATUS(auto reply, DestroyDeviceSerializer::serialize_reply(HAILO_SUCCESS), DestroyDeviceSerializer);
+        return reply;
+    });
+    dispatcher.register_action(HailoRpcActionID::DEVICE__IDENTIFY,
+    [] (const MemoryView &request, hrpc::ServerContextPtr /*server_context*/) -> Expected<Buffer> {
+        TRY_AS_HRPC_STATUS(auto device_handle, IdentifyDeviceSerializer::deserialize_request(request), IdentifyDeviceSerializer);
+
+        auto &manager = ServiceResourceManager<Device>::get_instance();
+        auto device_lambda = [] (std::shared_ptr<Device> device) {
+            return device->identify();
+        };
+        TRY_AS_HRPC_STATUS(auto identity,
+            manager.execute<Expected<hailo_device_identity_t>>(device_handle, device_lambda), IdentifyDeviceSerializer);
+        TRY_AS_HRPC_STATUS(auto reply, IdentifyDeviceSerializer::serialize_reply(HAILO_SUCCESS, identity), IdentifyDeviceSerializer);
+        return reply;
+    });
+    dispatcher.register_action(HailoRpcActionID::DEVICE__EXTENDED_INFO,
+    [] (const MemoryView &request, hrpc::ServerContextPtr /*server_context*/) -> Expected<Buffer> {
+        TRY_AS_HRPC_STATUS(auto device_handle, ExtendedDeviceInfoSerializer::deserialize_request(request), ExtendedDeviceInfoSerializer);
+
+        auto &manager = ServiceResourceManager<Device>::get_instance();
+        auto device_lambda = [] (std::shared_ptr<Device> device) {
+            return device->get_extended_device_information();
+        };
+        TRY_AS_HRPC_STATUS(auto extended_info,
+            manager.execute<Expected<hailo_extended_device_information_t>>(device_handle, device_lambda), ExtendedDeviceInfoSerializer);
+        TRY_AS_HRPC_STATUS(auto reply, ExtendedDeviceInfoSerializer::serialize_reply(HAILO_SUCCESS, extended_info), ExtendedDeviceInfoSerializer);
+        return reply;
+    });
 
     server->set_dispatcher(dispatcher);
     auto status = server->serve();
diff --git a/hailort/hailort_service/CMakeLists.txt b/hailort/hailort_service/CMakeLists.txt
index a470a3dd..7808d859 100644
--- a/hailort/hailort_service/CMakeLists.txt
+++ b/hailort/hailort_service/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 include(${HAILO_EXTERNALS_CMAKE_SCRIPTS}/spdlog.cmake)
 
@@ -30,6 +30,8 @@ target_link_libraries(hailort_service
 if(WIN32)
     # Needed in order to compile eth utils (we compile here ${HAILORT_COMMON_CPP_SOURCES}, consider removing)
     target_link_libraries(hailort_service Iphlpapi Shlwapi Kernel32 Advapi32)
+elseif(NOT CMAKE_SYSTEM_NAME STREQUAL Android)
+    target_link_libraries(hailort_service rt)
 endif()
 
 target_include_directories(hailort_service
diff --git a/hailort/hailort_service/cng_buffer_pool.cpp b/hailort/hailort_service/cng_buffer_pool.cpp
index f2a268c5..6d398b89 100644
--- a/hailort/hailort_service/cng_buffer_pool.cpp
+++ b/hailort/hailort_service/cng_buffer_pool.cpp
@@ -14,9 +14,8 @@
 namespace hailort
 {
 
-
-Expected<std::shared_ptr<ServiceStreamBufferPool>> ServiceStreamBufferPool::create(uint32_t vdevice_handle,
-    size_t buffer_size, size_t buffer_count, hailo_dma_buffer_direction_t direction, EventPtr shutdown_event)
+Expected<BasicBufferPoolPtr> ServiceNetworkGroupBufferPool::create_stream_buffer_pool(size_t buffer_size,
+    size_t buffer_count, hailo_dma_buffer_direction_t direction, EventPtr shutdown_event)
 {
     auto map_buffer_lambda = [direction](std::shared_ptr<VDevice> vdevice, BufferPtr buffer) {
         return DmaMappedBuffer::create(*vdevice, buffer->data(), buffer->size(), direction);
@@ -26,59 +25,28 @@ Expected<std::shared_ptr<ServiceStreamBufferPool>> ServiceStreamBufferPool::crea
     TRY(auto free_buffers_queue,
         SpscQueue<BufferPtr>::create(buffer_count, shutdown_event, DEFAULT_TRANSFER_TIMEOUT));
 
-    std::vector<AllocatedMappedBuffer> buffers;
+    std::vector<BufferPtr> buffers;
     buffers.reserve(buffer_count);
     for (size_t i = 0; i < buffer_count; i++) {
         TRY(auto buffer, Buffer::create_shared(buffer_size, BufferStorageParams::create_dma()));
 
         TRY(auto mapped_buffer,
-            vdevice_manager.execute<Expected<DmaMappedBuffer>>(vdevice_handle, map_buffer_lambda, buffer));
+            vdevice_manager.execute<Expected<DmaMappedBuffer>>(m_vdevice_handle, map_buffer_lambda, buffer));
 
         auto status = free_buffers_queue.enqueue(buffer);
         CHECK_SUCCESS(status);
 
-        buffers.emplace_back(AllocatedMappedBuffer{ buffer, std::move(mapped_buffer)});
+        buffers.emplace_back(buffer);
+        m_mapped_buffers.emplace_back(DmaMappedBuffer(std::move(mapped_buffer)));
     }
 
-    auto buffer_pool_ptr = make_shared_nothrow<ServiceStreamBufferPool>(buffer_size, std::move(buffers),
+    auto buffer_pool_ptr = make_shared_nothrow<BasicBufferPool>(buffer_size, std::move(buffers),
         std::move(free_buffers_queue), buffer_count);
     CHECK_NOT_NULL_AS_EXPECTED(buffer_pool_ptr, HAILO_OUT_OF_HOST_MEMORY);
 
     return buffer_pool_ptr;
 }
 
-ServiceStreamBufferPool::ServiceStreamBufferPool(size_t buffer_size, std::vector<AllocatedMappedBuffer> &&buffers,
-        SpscQueue<BufferPtr> &&free_buffers_queue, size_t buffers_count) :
-    m_buffer_size(buffer_size),
-    m_buffers_count(buffers_count),
-    m_buffers(std::move(buffers)),
-    m_free_buffers_queue(std::move(free_buffers_queue))
-{}
-
-Expected<BufferPtr> ServiceStreamBufferPool::acquire_buffer()
-{
-    TRY_WITH_ACCEPTABLE_STATUS(HAILO_SHUTDOWN_EVENT_SIGNALED, auto buffer,
-        m_free_buffers_queue.dequeue(DEFAULT_TRANSFER_TIMEOUT));
-    return buffer;
-}
-
-hailo_status ServiceStreamBufferPool::return_to_pool(BufferPtr buffer)
-{
-    CHECK(buffer->size() == m_buffer_size, HAILO_INTERNAL_FAILURE,
-        "Buffer size is not the same as expected for pool! ({} != {})", buffer->size(), m_buffer_size);
-
-    std::unique_lock<std::mutex> lock(m_mutex);
-    auto status = m_free_buffers_queue.enqueue(buffer);
-    CHECK_SUCCESS(status);
-
-    return HAILO_SUCCESS;
-}
-
-size_t ServiceStreamBufferPool::buffers_count()
-{
-    return m_buffers_count;
-}
-
 Expected<std::shared_ptr<ServiceNetworkGroupBufferPool>> ServiceNetworkGroupBufferPool::create(uint32_t vdevice_handle)
 {
     TRY(auto shutdown_event, Event::create_shared(Event::State::not_signalled));
@@ -90,14 +58,13 @@ Expected<std::shared_ptr<ServiceNetworkGroupBufferPool>> ServiceNetworkGroupBuff
 }
 
 ServiceNetworkGroupBufferPool::ServiceNetworkGroupBufferPool(EventPtr shutdown_event, uint32_t vdevice_handle) :
-    m_stream_name_to_buffer_pool(), m_shutdown_event(shutdown_event), m_vdevice_handle(vdevice_handle)
+    m_stream_name_to_buffer_pool(), m_mapped_buffers(), m_shutdown_event(shutdown_event), m_vdevice_handle(vdevice_handle), m_is_shutdown(false)
 {}
 
 hailo_status ServiceNetworkGroupBufferPool::allocate_pool(const std::string &name,
     hailo_dma_buffer_direction_t direction, size_t frame_size, size_t pool_size)
 {
-    TRY(auto buffer_pool, ServiceStreamBufferPool::create(m_vdevice_handle, frame_size,
-        pool_size, direction, m_shutdown_event));
+    TRY(auto buffer_pool, create_stream_buffer_pool(frame_size, pool_size, direction, m_shutdown_event));
 
     std::unique_lock<std::mutex> lock(m_mutex);
     m_stream_name_to_buffer_pool[name] = buffer_pool;
@@ -111,9 +78,9 @@ hailo_status ServiceNetworkGroupBufferPool::reallocate_pool(const std::string &n
     std::unique_lock<std::mutex> lock(m_mutex);
     auto pool_size = m_stream_name_to_buffer_pool[name]->buffers_count();
     m_stream_name_to_buffer_pool[name].reset();
+    m_mapped_buffers.clear();
 
-    TRY(auto buffer_pool, ServiceStreamBufferPool::create(m_vdevice_handle, frame_size,
-        pool_size, direction, m_shutdown_event));
+    TRY(auto buffer_pool, create_stream_buffer_pool(frame_size, pool_size, direction, m_shutdown_event));
     m_stream_name_to_buffer_pool[name] = buffer_pool;
 
     return HAILO_SUCCESS;
@@ -125,8 +92,15 @@ Expected<BufferPtr> ServiceNetworkGroupBufferPool::acquire_buffer(const std::str
         "acquire_buffer() for stream {} failed, stream name does not exist in buffer pool", stream_name);
 
     std::unique_lock<std::mutex> lock(m_mutex);
-    TRY(auto buffer, m_stream_name_to_buffer_pool.at(stream_name)->acquire_buffer());
+    auto pool = m_stream_name_to_buffer_pool.at(stream_name);
+    m_cv.wait(lock, [this, pool] () {
+        return (pool->current_size() > 0) || m_is_shutdown;
+    });
+    if (m_is_shutdown) {
+        return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED);
+    }
 
+    TRY(auto buffer, pool->acquire_buffer());
     return buffer;
 }
 
@@ -135,15 +109,23 @@ hailo_status ServiceNetworkGroupBufferPool::return_to_pool(const std::string &st
     CHECK(contains(m_stream_name_to_buffer_pool, stream_name), HAILO_INTERNAL_FAILURE,
         "acquire_buffer() for stream {} failed, stream name does not exist in buffer pool", stream_name);
 
-    std::unique_lock<std::mutex> lock(m_mutex);
-    auto status = m_stream_name_to_buffer_pool.at(stream_name)->return_to_pool(buffer);
-    CHECK_SUCCESS(status);
+    {
+        std::unique_lock<std::mutex> lock(m_mutex);
+        auto status = m_stream_name_to_buffer_pool.at(stream_name)->return_to_pool(buffer);
+        CHECK_SUCCESS(status);
+    }
+    m_cv.notify_all();
 
     return HAILO_SUCCESS;
 }
 
 hailo_status ServiceNetworkGroupBufferPool::shutdown()
 {
+    {
+        std::unique_lock<std::mutex> lock(m_mutex);
+        m_is_shutdown = true;
+    }
+    m_cv.notify_all();
     return m_shutdown_event->signal();
 }
 
diff --git a/hailort/hailort_service/cng_buffer_pool.hpp b/hailort/hailort_service/cng_buffer_pool.hpp
index 399027a4..35744949 100644
--- a/hailort/hailort_service/cng_buffer_pool.hpp
+++ b/hailort/hailort_service/cng_buffer_pool.hpp
@@ -15,40 +15,12 @@
 #include "hailo/buffer.hpp"
 #include "hailo/vdevice.hpp"
 #include "hailo/dma_mapped_buffer.hpp"
-#include "utils/thread_safe_queue.hpp"
+#include "common/thread_safe_queue.hpp"
+#include "common/buffer_pool.hpp"
 
 namespace hailort
 {
 
-class ServiceStreamBufferPool
-{
-public:
-    static Expected<std::shared_ptr<ServiceStreamBufferPool>> create(uint32_t vdevice_handle, size_t buffer_size,
-        size_t buffer_count, hailo_dma_buffer_direction_t direction, EventPtr shutdown_event);
-
-    struct AllocatedMappedBuffer {
-        BufferPtr buffer;
-        DmaMappedBuffer mapped_buffer;
-    };
-
-    ServiceStreamBufferPool(size_t buffer_size, std::vector<AllocatedMappedBuffer> &&buffers,
-        SpscQueue<BufferPtr> &&m_free_buffers_queue, size_t buffers_count);
-    virtual ~ServiceStreamBufferPool() = default;
-
-    Expected<BufferPtr> acquire_buffer();
-    hailo_status return_to_pool(BufferPtr buffer);
-    size_t buffers_count();
-
-private:
-
-    size_t m_buffer_size;
-    size_t m_buffers_count;
-    std::vector<AllocatedMappedBuffer> m_buffers;
-    SpscQueue<BufferPtr> m_free_buffers_queue;
-    std::mutex m_mutex;
-};
-
-using BufferPoolPtr = std::shared_ptr<ServiceStreamBufferPool>;
 using stream_name_t = std::string;
 
 // This object holds a buffer pool for each stream of the network group.
@@ -77,10 +49,17 @@ class ServiceNetworkGroupBufferPool
     hailo_status shutdown();
 
 private:
-    std::unordered_map<stream_name_t, BufferPoolPtr> m_stream_name_to_buffer_pool;
+    Expected<BasicBufferPoolPtr> create_stream_buffer_pool(size_t buffer_size,
+        size_t buffer_count, hailo_dma_buffer_direction_t direction, EventPtr shutdown_event);
+
+    std::unordered_map<stream_name_t, BasicBufferPoolPtr> m_stream_name_to_buffer_pool;
+    // This is in order to keep the DmaMappedBuffer buffers alive while using the buffers pool.
+    std::vector<DmaMappedBuffer> m_mapped_buffers;
     EventPtr m_shutdown_event;
     uint32_t m_vdevice_handle;
     std::mutex m_mutex;
+    std::condition_variable m_cv;
+    bool m_is_shutdown;
 };
 
 } /* namespace hailort */
diff --git a/hailort/hailort_service/hailort_rpc_service.cpp b/hailort/hailort_service/hailort_rpc_service.cpp
index d40fc1e1..d887cbcd 100644
--- a/hailort/hailort_service/hailort_rpc_service.cpp
+++ b/hailort/hailort_service/hailort_rpc_service.cpp
@@ -94,6 +94,53 @@ void HailoRtRpcService::abort_vstreams_by_pids(std::set<uint32_t> &pids)
     }
 }
 
+hailo_status HailoRtRpcService::shutdown_configured_network_group(uint32_t vdevice_handle)
+{
+    auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng) {
+        return cng->shutdown();
+    };
+
+    auto &cng_manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
+    auto status = cng_manager.execute(vdevice_handle, lambda);
+    CHECK_SUCCESS(status);
+
+    return HAILO_SUCCESS;
+}
+
+
+void HailoRtRpcService::shutdown_configured_network_groups_by_pids(std::set<uint32_t> &pids)
+{
+    auto cng_handles = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance().resources_handles_by_pids(pids);
+    for (auto &handle : cng_handles) {
+        auto status = shutdown_configured_network_group(handle);
+        if (status != HAILO_SUCCESS) {
+            LOGGER__ERROR("Failed to shutdown configured network group queue with handle={}, status={}", handle, status);
+        }
+    }
+}
+
+void HailoRtRpcService::shutdown_buffer_pool_by_pids(std::set<uint32_t> &pids)
+{
+    auto buffer_pools_handles = ServiceResourceManager<ServiceNetworkGroupBufferPool>::get_instance().resources_handles_by_pids(pids);
+    for (auto &handle : buffer_pools_handles) {
+        auto status = shutdown_cng_buffer_pool(handle);
+        if (status != HAILO_SUCCESS) {
+            LOGGER__ERROR("Failed to shutdown cng buffer pool with handle={}, status={}", handle, status);
+        }
+    }
+}
+
+void HailoRtRpcService::shutdown_vdevice_cb_queue_by_pids(std::set<uint32_t> &pids)
+{
+    auto vdevice_cb_queue_handles = ServiceResourceManager<VDeviceCallbacksQueue>::get_instance().resources_handles_by_pids(pids);
+    for (auto &handle : vdevice_cb_queue_handles) {
+        auto status = shutdown_vdevice_cb_queue(handle);
+        if (status != HAILO_SUCCESS) {
+            LOGGER__ERROR("Failed to shutdown vdevice callbacks queue with handle={}, status={}", handle, status);
+        }
+    }
+}
+
 void HailoRtRpcService::remove_disconnected_clients()
 {
     std::this_thread::sleep_for(hailort::HAILO_KEEPALIVE_INTERVAL / 2);
@@ -113,10 +160,17 @@ void HailoRtRpcService::remove_disconnected_clients()
         // blocking operation (which will be finished with timeout).
         // To release the vstream the ServiceResourceManager is waiting for the resource_mutex which is also locked in execute.
         abort_vstreams_by_pids(pids_to_remove);
+
+        // It is important to shutdown the cb Queue before the NG shutdown, as ongoing callbacks might continue to try to enqueue
+        shutdown_vdevice_cb_queue_by_pids(pids_to_remove);
+        shutdown_configured_network_groups_by_pids(pids_to_remove);
+        shutdown_buffer_pool_by_pids(pids_to_remove);
         for (auto &client_pid : pids_to_remove) {
             ServiceResourceManager<OutputVStream>::get_instance().release_by_pid(client_pid);
             ServiceResourceManager<InputVStream>::get_instance().release_by_pid(client_pid);
             ServiceResourceManager<ConfiguredNetworkGroup>::get_instance().release_by_pid(client_pid);
+            ServiceResourceManager<VDeviceCallbacksQueue>::get_instance().release_by_pid(client_pid);
+            ServiceResourceManager<ServiceNetworkGroupBufferPool>::get_instance().release_by_pid(client_pid);
             ServiceResourceManager<VDevice>::get_instance().release_by_pid(client_pid);
 
             LOGGER__INFO("Client disconnected, pid: {}", client_pid);
@@ -126,7 +180,6 @@ void HailoRtRpcService::remove_disconnected_clients()
     }
 }
 
-
 void HailoRtRpcService::keep_alive()
 {
     while (true) {
@@ -191,12 +244,17 @@ grpc::Status HailoRtRpcService::VDevice_create(grpc::ServerContext *, const VDev
     update_client_id_timestamp(request->pid());
     std::unique_lock<std::mutex> lock(m_vdevice_mutex);
     auto &vdevice_manager = ServiceResourceManager<VDevice>::get_instance();
+    auto &cb_queue_manager = ServiceResourceManager<VDeviceCallbacksQueue>::get_instance();
+
     auto vdevice_handle = vdevice_manager.register_resource(request->pid(), std::move(vdevice.release()));
 
     auto cb_queue = VDeviceCallbacksQueue::create(MAX_QUEUE_SIZE);
+    if (HAILO_SUCCESS != cb_queue.status()) {
+        // cb_queue_handle and vdevice_handle indexes must be the same
+        cb_queue_manager.advance_current_handle_index();
+    }
     CHECK_EXPECTED_AS_RPC_STATUS(cb_queue, reply);
 
-    auto &cb_queue_manager = ServiceResourceManager<VDeviceCallbacksQueue>::get_instance();
     auto cb_queue_handle = cb_queue_manager.register_resource(request->pid(), std::move(cb_queue.release()));
     if (cb_queue_handle != vdevice_handle) {
         LOGGER__ERROR("cb_queue_handle = {} must be equal to vdevice_handle ={}", cb_queue_handle, vdevice_handle);
@@ -209,17 +267,26 @@ grpc::Status HailoRtRpcService::VDevice_create(grpc::ServerContext *, const VDev
     return grpc::Status::OK;
 }
 
-grpc::Status HailoRtRpcService::VDevice_release(grpc::ServerContext*, const Release_Request *request,
-    Release_Reply *reply)
+hailo_status HailoRtRpcService::shutdown_vdevice_cb_queue(uint32_t vdevice_handle)
 {
     auto lambda = [](std::shared_ptr<VDeviceCallbacksQueue> cb_queue) {
         return cb_queue->shutdown();
     };
 
     auto &cb_queue_manager = ServiceResourceManager<VDeviceCallbacksQueue>::get_instance();
-    auto status = cb_queue_manager.execute(request->vdevice_identifier().vdevice_handle(), lambda);
+    auto status = cb_queue_manager.execute(vdevice_handle, lambda);
+    CHECK_SUCCESS(status);
+
+    return HAILO_SUCCESS;
+}
+
+grpc::Status HailoRtRpcService::VDevice_release(grpc::ServerContext*, const Release_Request *request,
+    Release_Reply *reply)
+{
+    auto status = shutdown_vdevice_cb_queue(request->vdevice_identifier().vdevice_handle());
     CHECK_SUCCESS_AS_RPC_STATUS(status, reply);
 
+    auto &cb_queue_manager = ServiceResourceManager<VDeviceCallbacksQueue>::get_instance();
     cb_queue_manager.release_resource(request->vdevice_identifier().vdevice_handle(), request->pid());
 
     auto &manager = ServiceResourceManager<VDevice>::get_instance();
@@ -307,9 +374,16 @@ grpc::Status HailoRtRpcService::VDevice_configure(grpc::ServerContext*, const VD
 hailo_status HailoRtRpcService::create_buffer_pools_for_ng(uint32_t vdevice_handle, uint32_t ng_handle, uint32_t request_pid,
     bool allocate_for_raw_streams)
 {
-    TRY(auto cng_buffer_pool, ServiceNetworkGroupBufferPool::create(vdevice_handle));
-
     auto &cng_buffer_pool_manager = ServiceResourceManager<ServiceNetworkGroupBufferPool>::get_instance();
+
+    auto cng_buffer_pool_exp = ServiceNetworkGroupBufferPool::create(vdevice_handle);
+    if (HAILO_SUCCESS != cng_buffer_pool_exp.status()) {
+        // cng_buffer_pool_handle and network_group_handle indexes must be the same
+        cng_buffer_pool_manager.advance_current_handle_index();
+        return cng_buffer_pool_exp.status();
+    }
+    auto cng_buffer_pool = cng_buffer_pool_exp.release();
+
     auto cng_buffer_pool_handle = cng_buffer_pool_manager.register_resource(request_pid, cng_buffer_pool);
     CHECK(cng_buffer_pool_handle == ng_handle, HAILO_INTERNAL_FAILURE,
         "cng_buffer_pool_handle = {} must be equal to network_group_handle ={}", cng_buffer_pool_handle, ng_handle);
@@ -431,16 +505,44 @@ ProtoCallbackIdentifier serialize_callback_identifier(uint32_t vdevice_handle, u
     return cb_identifier;
 }
 
-grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_release(grpc::ServerContext*, const Release_Request *request,
-    Release_Reply *reply)
+ProtoCallbackIdentifier serialize_callback_identifier_shm(uint32_t vdevice_handle, uint32_t ng_handle, callback_type_t cb_type,
+    const std::string &stream_name, uint32_t cb_idx,  hailo_status status, const ProtoShmBufferIdentifier &shm_buffer_identifier)
+{
+    ProtoCallbackIdentifier cb_identifier;
+    cb_identifier.set_vdevice_handle(vdevice_handle);
+    cb_identifier.set_network_group_handle(ng_handle);
+    cb_identifier.set_cb_type(cb_type);
+    cb_identifier.set_stream_name(stream_name);
+    cb_identifier.set_cb_idx(cb_idx);
+    cb_identifier.set_status(status);
+
+    auto proto_shm_identifier = cb_identifier.mutable_shared_memory_identifier();
+    proto_shm_identifier->set_name(shm_buffer_identifier.name());
+    proto_shm_identifier->set_size(shm_buffer_identifier.size());
+
+    return cb_identifier;
+}
+
+hailo_status HailoRtRpcService::shutdown_cng_buffer_pool(uint32_t network_group_handle)
 {
     auto buffer_shutdown_lambda = [](std::shared_ptr<ServiceNetworkGroupBufferPool> cng_buffer_pool) {
         return cng_buffer_pool->shutdown();
     };
 
     auto &buffer_pool_manager = ServiceResourceManager<ServiceNetworkGroupBufferPool>::get_instance();
-    auto status = buffer_pool_manager.execute(request->network_group_identifier().network_group_handle(), buffer_shutdown_lambda);
+    auto status = buffer_pool_manager.execute(network_group_handle, buffer_shutdown_lambda);
+    CHECK_SUCCESS(status);
+
+    return HAILO_SUCCESS;
+}
+
+grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_release(grpc::ServerContext*, const Release_Request *request,
+    Release_Reply *reply)
+{
+    auto status = shutdown_cng_buffer_pool(request->network_group_identifier().network_group_handle());
     CHECK_SUCCESS_AS_RPC_STATUS(status, reply);
+
+    auto &buffer_pool_manager = ServiceResourceManager<ServiceNetworkGroupBufferPool>::get_instance();
     buffer_pool_manager.release_resource(request->network_group_identifier().network_group_handle(), request->pid());
 
     auto &manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
@@ -456,20 +558,30 @@ hailo_status HailoRtRpcService::add_input_named_buffer(const ProtoTransferReques
     // Prepare input buffer
     BufferPtr buffer;
     MemoryView mem_view;
-    auto *data = reinterpret_cast<const uint8_t*>(proto_stream_transfer_request.data().c_str());
-    if (reinterpret_cast<size_t>(data) % HailoRTCommon::HW_DATA_ALIGNMENT == 0) {
-        // Input buffers is aligned to 8
-        mem_view = MemoryView::create_const(data, proto_stream_transfer_request.data().size());
-    } else {
-        // The memory is not aligned to 8, therefore we need to copy the data into a buffer
-        TRY(buffer, Buffer::create_shared(data, proto_stream_transfer_request.data().size(),
-            BufferStorageParams::create_dma()));
+    if (proto_stream_transfer_request.has_shared_memory_identifier()) {
+        TRY(buffer, Buffer::create_shared(proto_stream_transfer_request.shared_memory_identifier().size(),
+            BufferStorageParams::open_shared_memory(proto_stream_transfer_request.shared_memory_identifier().name())));
         mem_view = MemoryView(*buffer);
+    } else {
+        auto *data = reinterpret_cast<const uint8_t*>(proto_stream_transfer_request.data().c_str());
+        if (reinterpret_cast<size_t>(data) % HailoRTCommon::HW_DATA_ALIGNMENT == 0) {
+            // Input buffers is aligned to 8
+            mem_view = MemoryView::create_const(data, proto_stream_transfer_request.data().size());
+        } else {
+            // The memory is not aligned to 8, therefore we need to copy the data into a buffer
+            TRY(buffer, Buffer::create_shared(data, proto_stream_transfer_request.data().size(),
+                BufferStorageParams::create_dma()));
+            mem_view = MemoryView(*buffer);
+        }
     }
 
     // Preparing callback
     auto &stream_name = proto_stream_transfer_request.stream_name();
+    CHECK(stream_name != INVALID_STREAM_NAME, HAILO_INTERNAL_FAILURE, "Got invalid stream name");
+    
     auto cb_idx = proto_stream_transfer_request.cb_idx();
+    CHECK(cb_idx != INVALID_CB_INDEX, HAILO_INTERNAL_FAILURE, "Got invalid callback index");
+
     std::function<void(hailo_status)> transfer_done = [this, vdevice_handle, ng_handle, cb_idx, stream_name, buffer, infer_async_request]
         (hailo_status status)
     {
@@ -493,18 +605,38 @@ hailo_status HailoRtRpcService::add_input_named_buffer(const ProtoTransferReques
 hailo_status HailoRtRpcService::add_output_named_buffer(const ProtoTransferRequest &proto_stream_transfer_request, uint32_t vdevice_handle,
     uint32_t ng_handle, NamedBuffersCallbacks &named_buffers_callbacks)
 {
-    // Prepare output buffer
     auto &stream_name = proto_stream_transfer_request.stream_name();
-    TRY(auto buffer, acquire_buffer_from_cng_pool(ng_handle, stream_name));
+    CHECK(stream_name != INVALID_STREAM_NAME, HAILO_INTERNAL_FAILURE, "Got invalid stream name");
+
+    // Prepare output buffer
+    BufferPtr buffer;
+    bool is_shared_mem = proto_stream_transfer_request.has_shared_memory_identifier();
+    auto shm_identifier = proto_stream_transfer_request.shared_memory_identifier();
+    
+    if (is_shared_mem) {
+        TRY(buffer, Buffer::create_shared(shm_identifier.size(),
+            BufferStorageParams::open_shared_memory(shm_identifier.name())));
+    } else {
+        TRY(buffer, acquire_buffer_from_cng_pool(ng_handle, stream_name));
+    }
 
     // Prepare callback
     auto cb_idx = proto_stream_transfer_request.cb_idx();
-    std::function<void(hailo_status)> transfer_done = [this, vdevice_handle, ng_handle, cb_idx, stream_name, buffer]
+    CHECK(cb_idx != INVALID_CB_INDEX, HAILO_INTERNAL_FAILURE, "Got invalid callback index");
+    
+    std::function<void(hailo_status)> transfer_done = [this, vdevice_handle, ng_handle, cb_idx, stream_name, buffer,
+        is_shared_mem, shm_identifier]
         (hailo_status status)
     {
-        auto cb_identifier = serialize_callback_identifier(vdevice_handle, ng_handle, CALLBACK_TYPE_TRANSFER,
-            stream_name, cb_idx, status, buffer);
-        return_buffer_to_cng_pool(ng_handle, stream_name, buffer);
+        ProtoCallbackIdentifier cb_identifier;
+        if (is_shared_mem) {
+            cb_identifier = serialize_callback_identifier_shm(vdevice_handle, ng_handle, CALLBACK_TYPE_TRANSFER,
+                stream_name, cb_idx, status, shm_identifier);
+        } else {
+            cb_identifier = serialize_callback_identifier(vdevice_handle, ng_handle, CALLBACK_TYPE_TRANSFER,
+                stream_name, cb_idx, status, buffer);
+            return_buffer_to_cng_pool(ng_handle, stream_name, buffer);
+        }
         enqueue_cb_identifier(vdevice_handle, std::move(cb_identifier));
     };
 
@@ -542,6 +674,9 @@ void HailoRtRpcService::enqueue_cb_identifier(uint32_t vdevice_handle, ProtoCall
 
     auto &cb_queue_manager = ServiceResourceManager<VDeviceCallbacksQueue>::get_instance();
     auto status = cb_queue_manager.execute(vdevice_handle, lambda, std::move(cb_identifier));
+    if (HAILO_SHUTDOWN_EVENT_SIGNALED != status) {
+        LOGGER__TRACE("Failed to enqueue callback to VDeviceCallbacksQueue '{}' because it is shutdown", vdevice_handle);
+    }
     if (status != HAILO_SUCCESS) {
         LOGGER__ERROR("Failed to enqueue callback to VDeviceCallbacksQueue with status={}", status);
     }
@@ -1323,6 +1458,7 @@ grpc::Status HailoRtRpcService::InputVStreams_create(grpc::ServerContext *, cons
 
     auto &vstreams_manager = ServiceResourceManager<InputVStream>::get_instance();
     for (size_t i = 0; i < vstreams.size(); i++) {
+        reply->add_names(vstreams[i].name());
         auto handle = vstreams_manager.register_resource(client_pid, make_shared_nothrow<InputVStream>(std::move(vstreams[i])));
         reply->add_handles(handle);
     }
@@ -1388,7 +1524,7 @@ grpc::Status HailoRtRpcService::OutputVStreams_create(grpc::ServerContext *, con
                 vstreams[i].get_frame_size(), output_params.at(vstreams[i].name()).queue_size);
         };
         CHECK_SUCCESS_AS_RPC_STATUS(cng_buffer_pool_manager.execute(network_group_handle, allocate_lambda), reply);
-
+        reply->add_names(vstreams[i].name());
         auto handle = vstream_manager.register_resource(client_pid, make_shared_nothrow<OutputVStream>(std::move(vstreams[i])));
         reply->add_handles(handle);
     }
diff --git a/hailort/hailort_service/hailort_rpc_service.hpp b/hailort/hailort_service/hailort_rpc_service.hpp
index 5e022cc3..8840e5f8 100644
--- a/hailort/hailort_service/hailort_rpc_service.hpp
+++ b/hailort/hailort_service/hailort_rpc_service.hpp
@@ -227,6 +227,7 @@ class HailoRtRpcService final : public ProtoHailoRtRpc::Service {
     hailo_status abort_input_vstream(uint32_t handle);
     hailo_status abort_output_vstream(uint32_t handle);
     void abort_vstreams_by_pids(std::set<uint32_t> &pids);
+    void release_configured_network_groups_by_pid(uint32_t client_pid);
     void remove_disconnected_clients();
     void update_client_id_timestamp(uint32_t pid);
     Expected<size_t> get_min_buffer_pool_size(uint32_t ng_handle);
@@ -247,6 +248,12 @@ class HailoRtRpcService final : public ProtoHailoRtRpc::Service {
     Expected<BufferPtr> acquire_buffer_from_cng_pool(uint32_t ng_handle, const std::string &output_name);
     Expected<size_t> output_vstream_frame_size(uint32_t vstream_handle);
     hailo_status update_buffer_size_in_pool(uint32_t vstream_handle, uint32_t network_group_handle);
+    void shutdown_configured_network_groups_by_pids(std::set<uint32_t> &pids);
+    void shutdown_buffer_pool_by_pids(std::set<uint32_t> &pids);
+    void shutdown_vdevice_cb_queue_by_pids(std::set<uint32_t> &pids);
+    hailo_status shutdown_cng_buffer_pool(uint32_t network_group_handle);
+    hailo_status shutdown_vdevice_cb_queue(uint32_t vdevice_handle);
+    hailo_status shutdown_configured_network_group(uint32_t vdevice_handle);
 
     std::mutex m_keep_alive_mutex;
     std::map<uint32_t, std::chrono::time_point<std::chrono::high_resolution_clock>> m_clients_pids;
diff --git a/hailort/hailort_service/hailort_service b/hailort/hailort_service/hailort_service
index 32305f52..72b635fa 100644
--- a/hailort/hailort_service/hailort_service
+++ b/hailort/hailort_service/hailort_service
@@ -8,5 +8,8 @@
 
 [Service]
 HAILORT_LOGGER_PATH="/var/log/hailo"
-HAILORT_LOGGER_FLUSH_EVERY_PRINT=0
 HAILO_MONITOR=0
+HAILO_TRACE=0
+HAILO_TRACE_TIME_IN_SECONDS_BOUNDED_DUMP=0
+HAILO_TRACE_SIZE_IN_KB_BOUNDED_DUMP=0
+HAILO_TRACE_PATH=""
\ No newline at end of file
diff --git a/hailort/hailort_service/service_resource_manager.hpp b/hailort/hailort_service/service_resource_manager.hpp
index 82b88d1b..8d8d9622 100644
--- a/hailort/hailort_service/service_resource_manager.hpp
+++ b/hailort/hailort_service/service_resource_manager.hpp
@@ -83,6 +83,13 @@ class ServiceResourceManager
         return index;
     }
 
+    // For cases where other resources are already registered and we want to align the indexes
+    void advance_current_handle_index()
+    {
+        std::unique_lock<std::mutex> lock(m_mutex);
+        m_current_handle_index++;
+    }
+
     Expected<uint32_t> dup_handle(uint32_t handle, uint32_t pid)
     {
         std::unique_lock<std::mutex> lock(m_mutex);
diff --git a/hailort/hailort_service/unix/hailort_service.cpp b/hailort/hailort_service/unix/hailort_service.cpp
index 81343037..d009e786 100644
--- a/hailort/hailort_service/unix/hailort_service.cpp
+++ b/hailort/hailort_service/unix/hailort_service.cpp
@@ -30,28 +30,30 @@
 #include <syslog.h>
 #include <sys/stat.h>
 
-void RunService() {
-    const std::string server_address = hailort::HAILORT_SERVICE_ADDRESS;
-    hailort::HailoRtRpcService service;
+using namespace hailort;
 
+void RunService()
+{
+    const std::string server_address = HAILORT_SERVICE_ADDRESS;
+    HailoRtRpcService service;
     grpc::ServerBuilder builder;
     builder.AddListeningPort(server_address, grpc::InsecureServerCredentials());
     builder.SetMaxReceiveMessageSize(-1);
     builder.RegisterService(&service);
     std::unique_ptr<grpc::Server> server(builder.BuildAndStart());
-    chmod(hailort::HAILO_DEFAULT_SERVICE_ADDR.c_str(), S_IROTH | S_IWOTH | S_IRUSR | S_IWUSR);
+    chmod(HAILO_DEFAULT_SERVICE_ADDR.c_str(), S_IROTH | S_IWOTH | S_IRUSR | S_IWUSR);
     server->Wait();
 }
 
 void write_pid_to_lock_file()
 {
-    auto status = hailort::Filesystem::create_directory(HAILO_DAEMON_PID_DIR);
+    auto status = Filesystem::create_directory(HAILO_DAEMON_PID_DIR);
     if (status != HAILO_SUCCESS) {
         HAILORT_OS_LOG_ERROR("Cannot create directory at path, status={}", status);
         return;
     }
 
-    auto locked_file = hailort::LockedFile::create(HAILO_DAEMON_PID_FILE, "wx");
+    auto locked_file = LockedFile::create(HAILO_DAEMON_PID_FILE, "wx");
     if (HAILO_SUCCESS != locked_file.status()) {
         HAILORT_OS_LOG_ERROR("Failed to lock pid file for hailort service, status={}", locked_file.status());
         return;
diff --git a/hailort/hailort_service/vdevice_callbacks_queue.hpp b/hailort/hailort_service/vdevice_callbacks_queue.hpp
index 821c71af..41c3f21c 100644
--- a/hailort/hailort_service/vdevice_callbacks_queue.hpp
+++ b/hailort/hailort_service/vdevice_callbacks_queue.hpp
@@ -17,7 +17,7 @@
 #include "hailo/hailort.h"
 #include "hailo/network_group.hpp"
 #include "hailo/hailort_common.hpp"
-#include "utils/thread_safe_queue.hpp"
+#include "common/thread_safe_queue.hpp"
 
 namespace hailort
 {
@@ -27,6 +27,11 @@ namespace hailort
 class VDeviceCallbacksQueue final
 {
 public:
+    ~VDeviceCallbacksQueue()
+    {
+        shutdown();
+    };
+
     static Expected<std::unique_ptr<VDeviceCallbacksQueue>> create(uint32_t max_queue_size)
     {
         TRY(auto shutdown_event, Event::create_shared(Event::State::not_signalled));
@@ -48,6 +53,9 @@ class VDeviceCallbacksQueue final
     {
         std::unique_lock<std::mutex> lock(m_mutex);
         auto status = m_callbacks_ids_queue.enqueue(std::move(callback_id));
+        if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) {
+            return status;
+        }
         CHECK_SUCCESS(status);
 
         return HAILO_SUCCESS;
diff --git a/hailort/hailort_service/windows/hailort_service.cpp b/hailort/hailort_service/windows/hailort_service.cpp
index 9b416964..ded53879 100644
--- a/hailort/hailort_service/windows/hailort_service.cpp
+++ b/hailort/hailort_service/windows/hailort_service.cpp
@@ -29,12 +29,15 @@
 #include "hailort_rpc_service.hpp"
 #include "rpc/rpc_definitions.hpp"
 #include "common/os_utils.hpp"
+#include "common/os/windows/named_mutex_guard.hpp"
 
 #include <winsvc.h>
 #include <windows.h>
 #include <tchar.h>
 #include <strsafe.h>
 
+using namespace hailort;
+
 #define SERVICE_NAME ("hailort_service")
 static const DWORD HRT_SERVICE_INIT_WAIT_TIME_MS(3000);
 static const DWORD HRT_SERVICE_ZERO_WAIT_TIME_MS(0);
@@ -46,9 +49,16 @@ std::unique_ptr<grpc::Server> g_hailort_rpc_server = nullptr;
 
 void RunService()
 {
-    const std::string server_address = hailort::HAILORT_SERVICE_ADDRESS;
-    hailort::HailoRtRpcService service;
+    // Create a named mutex
+    auto service_named_mutex = NamedMutexGuard::create(HAILORT_SERVICE_NAMED_MUTEX);
+    if (HAILO_SUCCESS != service_named_mutex.status()) {
+        LOGGER__ERROR("Failed to create service named mutex with status={}. Please check if another instance is already running.",
+            service_named_mutex.status());
+        return;
+    }
 
+    const std::string server_address = HAILORT_SERVICE_ADDRESS;
+    HailoRtRpcService service;
     grpc::ServerBuilder builder;
     builder.AddListeningPort(server_address, grpc::InsecureServerCredentials());
     builder.SetMaxReceiveMessageSize(-1);
diff --git a/hailort/hailort_service/windows/hailort_service_env_vars.bat b/hailort/hailort_service/windows/hailort_service_env_vars.bat
index 2b4e82ad..11978b52 100644
--- a/hailort/hailort_service/windows/hailort_service_env_vars.bat
+++ b/hailort/hailort_service/windows/hailort_service_env_vars.bat
@@ -7,4 +7,9 @@
 @REM Running this script requires Administrator permissions.
 
 reg ADD HKLM\SYSTEM\CurrentControlSet\Services\hailort_service /f /v Environment /t REG_MULTI_SZ /d ^
-HAILORT_LOGGER_PATH="%PROGRAMDATA%\HailoRT_Service\logs"\0^
\ No newline at end of file
+HAILORT_LOGGER_PATH="%PROGRAMDATA%\HailoRT_Service\logs"\0^
+HAILO_TRACE=0\0^
+HAILO_TRACE_TIME_IN_SECONDS_BOUNDED_DUMP=0\0^
+HAILO_TRACE_SIZE_IN_KB_BOUNDED_DUMP=0\0^
+HAILO_TRACE_PATH=""\0
+@REM TODO: HRT-7304 - Add `HAILO_MONITOR`
\ No newline at end of file
diff --git a/hailort/hailortcli/CMakeLists.txt b/hailort/hailortcli/CMakeLists.txt
index 8b180dad..b557db5a 100644
--- a/hailort/hailortcli/CMakeLists.txt
+++ b/hailort/hailortcli/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 include(GNUInstallDirs)
 include(${HAILO_EXTERNALS_CMAKE_SCRIPTS}/cli11.cmake)
@@ -65,7 +65,11 @@ add_executable(hailortcli
 
 target_compile_options(hailortcli PRIVATE ${HAILORT_COMPILE_OPTIONS})
 set_property(TARGET hailortcli PROPERTY CXX_STANDARD 14)
-set_property(TARGET hailortcli PROPERTY INSTALL_RPATH "$ORIGIN" "../lib/") # Link with a relative libhailort
+
+# Link with a relative libhailort
+set_property(TARGET hailortcli PROPERTY INSTALL_RPATH "$ORIGIN")
+set_property(TARGET hailortcli APPEND PROPERTY INSTALL_RPATH "\$ORIGIN/../lib/")
+
 target_link_libraries(hailortcli
     libhailort
     CLI11::CLI11
@@ -82,7 +86,10 @@ if(WIN32)
 elseif(CMAKE_SYSTEM_NAME STREQUAL QNX)
     include(${HAILO_EXTERNALS_CMAKE_SCRIPTS}/pevents.cmake)
     target_link_libraries(hailortcli pevents)
+elseif(NOT CMAKE_SYSTEM_NAME STREQUAL Android)
+    target_link_libraries(hailortcli rt)
 endif()
+
 target_include_directories(hailortcli
     PRIVATE
     ${CMAKE_CURRENT_BINARY_DIR} # CMAKE_CURRENT_BINARY_DIR is necessary for config_definitions_header
@@ -95,4 +102,4 @@ install(TARGETS hailortcli
    RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
    CONFIGURATIONS Release
 )
-cli11_install_completion_file(hailortcli)
\ No newline at end of file
+cli11_install_completion_file(hailortcli)
diff --git a/hailort/hailortcli/benchmark_command.cpp b/hailort/hailortcli/benchmark_command.cpp
index fb0bef2b..5aef961b 100644
--- a/hailort/hailortcli/benchmark_command.cpp
+++ b/hailort/hailortcli/benchmark_command.cpp
@@ -8,6 +8,7 @@
  **/
 
 #include "benchmark_command.hpp"
+#include "CLI/App.hpp"
 #include "hailortcli.hpp"
 #include "infer_stats_printer.hpp"
 
@@ -30,8 +31,8 @@ BenchmarkCommand::BenchmarkCommand(CLI::App &parent_app) :
      m_app->add_option("-t, --time-to-run", m_params.time_to_run, "Measurement time in seconds per hw_only/streaming/latency measurement mode")
         ->check(CLI::PositiveNumber)
         ->default_val(15);
-    m_app->add_option("--no-power", m_not_measure_power, "Skip power measurement, even if the platform supports it. The default value is False")
-        ->default_val("false");
+    auto no_power_opt = m_app->add_option("--no-power", m_not_measure_power, "Skip power measurement, even if the platform supports it. The default value is False");
+    hailo_deprecate_options(m_app, { std::make_shared<OptionDeprecation>(no_power_opt) }, false);
     m_app->add_option("--batch-size", m_params.batch_size, "Inference batch size (default is 1)")
         ->default_val(1);
     m_app->add_option("--power-mode", m_params.power_mode,
@@ -44,7 +45,7 @@ BenchmarkCommand::BenchmarkCommand(CLI::App &parent_app) :
     m_app->add_option("--input-files", m_params.inputs_name_and_file_path, "  The input files need to be in UINT8 before transformations.")
         ->check(InputNameToFileMap);
     m_app->add_option("--csv", m_csv_file_path, "If set print the output as csv to the specified path");
-    
+
     auto measure_power_group = m_app->add_option_group("Measure Power");
     CLI::Option *power_sampling_period = measure_power_group->add_option("--sampling-period",
         m_params.power_measurement.sampling_period, "Sampling Period");
@@ -61,18 +62,18 @@ BenchmarkCommand::BenchmarkCommand(CLI::App &parent_app) :
 }
 
 hailo_status BenchmarkCommand::execute()
-{   
+{
     std::cout << "Starting Measurements..." << std::endl;
-    
-    std::cout << "Measuring FPS in hw_only mode" << std::endl;
-    TRY(auto hw_only_mode_info, hw_only_mode(), "hw_only measuring failed");
-    
-    std::cout << "Measuring FPS " << (!m_not_measure_power ? "and Power " : "") << "in streaming mode" << std::endl; 
-    TRY(auto streaming_mode_info, fps_streaming_mode(), "FPS in streaming mode failed");
-
-    // TODO - HRT-6931 - measure latency only in the case of single device. 
+
+    std::cout << "Measuring FPS in HW-only mode" << std::endl;
+    TRY(auto hw_only_mode_info, hw_only_mode(), "Measuring FPS in HW-only mode failed");
+
+    std::cout << "Measuring FPS (and Power on supported platforms) in streaming mode" << std::endl; 
+    TRY(auto streaming_mode_info, fps_streaming_mode(), "Measuring FPS (and Power on supported platforms) in streaming mode failed");
+
+    // TODO - HRT-6931 - measure latency only in the case of single device.
     std::cout << "Measuring HW Latency" << std::endl;
-    TRY(auto latency_info, latency(), "Latency measuring failed");
+    TRY(auto latency_info, latency(), "Measuring Latency failed");
 
     assert(hw_only_mode_info.network_group_results().size() == streaming_mode_info.network_group_results().size());
     assert(latency_info.network_group_results().size() == streaming_mode_info.network_group_results().size());
@@ -101,7 +102,7 @@ hailo_status BenchmarkCommand::execute()
             std::cout << "        (overall)                 = " << InferStatsPrinter::latency_result_to_ms(overall_latency.value()) << " ms" << std::endl;
         }
     }
-    if (!m_not_measure_power) {
+    if (streaming_mode_info.power_measurements_are_valid) {
         for (const auto &pair : streaming_mode_info.m_power_measurements) {
             std::cout << "Device " << pair.first << ":" << std::endl;
             const auto &data = pair.second->data();
@@ -123,7 +124,7 @@ hailo_status BenchmarkCommand::execute()
 Expected<InferResult> BenchmarkCommand::hw_only_mode()
 {
     m_params.transform.transform = (m_params.inputs_name_and_file_path.size() > 0);
-    m_params.power_measurement.measure_power = false;
+    m_params.power_measurement.measure_power = ShouldMeasurePower::NO;
     m_params.measure_latency = false;
     m_params.mode = InferMode::HW_ONLY;
     return run_command_hef(m_params);
@@ -131,7 +132,7 @@ Expected<InferResult> BenchmarkCommand::hw_only_mode()
 
 Expected<InferResult> BenchmarkCommand::fps_streaming_mode()
 {
-    m_params.power_measurement.measure_power = !m_not_measure_power;
+    m_params.power_measurement.measure_power = ShouldMeasurePower::AUTO_DETECT;
     m_params.mode = InferMode::STREAMING;
     m_params.measure_latency = false;
     m_params.transform.transform = true;
@@ -141,10 +142,10 @@ Expected<InferResult> BenchmarkCommand::fps_streaming_mode()
 
 Expected<InferResult> BenchmarkCommand::latency()
 {
-    m_params.power_measurement.measure_power = false;
+    m_params.power_measurement.measure_power = ShouldMeasurePower::NO;
     m_params.measure_latency = true;
     m_params.mode = InferMode::STREAMING;
     m_params.transform.transform = true;
     m_params.transform.quantized = false;
     return run_command_hef(m_params);
-}
\ No newline at end of file
+}
diff --git a/hailort/hailortcli/benchmark_command.hpp b/hailort/hailortcli/benchmark_command.hpp
index 2b8c3995..350cf12d 100644
--- a/hailort/hailortcli/benchmark_command.hpp
+++ b/hailort/hailortcli/benchmark_command.hpp
@@ -24,10 +24,10 @@ class BenchmarkCommand : public Command {
     Expected<InferResult> hw_only_mode();
     Expected<InferResult> fps_streaming_mode();
     Expected<InferResult> latency();
-    
+
     inference_runner_params m_params;
     bool m_not_measure_power;
     std::string m_csv_file_path;
 };
 
-#endif /*_HAILO_BENCHMARK_COMMAND_HPP_*/
\ No newline at end of file
+#endif /*_HAILO_BENCHMARK_COMMAND_HPP_*/
diff --git a/hailort/hailortcli/board_config_command.cpp b/hailort/hailortcli/board_config_command.cpp
index 11baca0f..ee9fc347 100644
--- a/hailort/hailortcli/board_config_command.cpp
+++ b/hailort/hailortcli/board_config_command.cpp
@@ -15,9 +15,6 @@
 BoardConfigCommand::BoardConfigCommand(CLI::App &parent_app) :
     ContainerCommand(parent_app.add_subcommand("board-config", "Board configuration tool"))
 {
-    // This will make the board-config command to be hidden in the --help print in the command line.
-    m_app->group("");
-
     add_subcommand<BoardConfigReadSubcommand>();
     add_subcommand<BoardConfigWriteSubcommand>();
 }
diff --git a/hailort/hailortcli/command.hpp b/hailort/hailortcli/command.hpp
index eb4e051b..70cfa54f 100644
--- a/hailort/hailortcli/command.hpp
+++ b/hailort/hailortcli/command.hpp
@@ -44,11 +44,11 @@ class ContainerCommand : public Command {
 protected:
 
     template<typename CommandType>
-    CommandType &add_subcommand(bool hidden = false)
+    CommandType &add_subcommand(OptionVisibility visibility = OptionVisibility::VISIBLE)
     {
         // Unnamed "option groups" hide subcommands/options from the help message
         // (see https://github.com/CLIUtils/CLI11/blob/main/README.md)
-        auto *parent = hidden ? m_app->add_option_group("") : m_app;
+        auto *parent = (visibility == OptionVisibility::HIDDEN) ? m_app->add_option_group("") : m_app;
         auto command = std::make_shared<CommandType>(*parent);
         m_subcommands.push_back(command);
         return *command;
diff --git a/hailort/hailortcli/download_action_list_command.cpp b/hailort/hailortcli/download_action_list_command.cpp
index a1256edb..2f09ada7 100644
--- a/hailort/hailortcli/download_action_list_command.cpp
+++ b/hailort/hailortcli/download_action_list_command.cpp
@@ -362,6 +362,14 @@ Expected<ordered_json> DownloadActionListCommand::parse_action_data(uint32_t bas
             data_json = *reinterpret_cast<CONTEXT_SWITCH_DEFS__resume_vdma_channel_action_data_t *>(action);
             action_length_local = sizeof(CONTEXT_SWITCH_DEFS__resume_vdma_channel_action_data_t);
             break;
+        case CONTEXT_SWITCH_DEFS__ACTION_TYPE_SLEEP:
+            data_json = *reinterpret_cast<CONTEXT_SWITCH_DEFS__sleep_action_data_t *>(action);
+            action_length_local = sizeof(CONTEXT_SWITCH_DEFS__sleep_action_data_t);
+            break;
+        case CONTEXT_SWITCH_DEFS__ACTION_TYPE_HALT:
+            data_json = json({});
+            action_length_local = 0;
+            break;
         case CONTEXT_SWITCH_DEFS__ACTION_TYPE_COUNT:
             // Fallthrough
             // Handling CONTEXT_SWITCH_DEFS__ACTION_TYPE_COUNT is needed because we compile this file with -Wswitch-enum
@@ -413,9 +421,10 @@ Expected<ordered_json> DownloadActionListCommand::parse_context(Device &device,
     uint8_t converted_context_type = static_cast<uint8_t>(context_type);
     uint32_t action_list_base_address = 0;
     uint32_t batch_counter = 0;
+    uint32_t idle_time = 0;
 
     TRY(auto action_list, device.download_context_action_list(network_group_id, converted_context_type, context_index,
-        &action_list_base_address, &batch_counter));
+        &action_list_base_address, &batch_counter, &idle_time));
     // Needs to fit in 2 bytes due to firmware limitation of action list size
     CHECK_AS_EXPECTED(IS_FIT_IN_UINT16(action_list.size()), HAILO_INTERNAL_FAILURE,
         "Action list size is expected to fit in 2B. actual size is {}", action_list.size());
@@ -424,6 +433,7 @@ Expected<ordered_json> DownloadActionListCommand::parse_context(Device &device,
         {"action_list_base_address", action_list_base_address},
         {"action_list_size", action_list.size() },
         {"batch_counter", batch_counter},
+        {"idle_time", idle_time},
         {"context_name", context_name},
     };
 
@@ -716,4 +726,4 @@ void to_json(json &j, const CONTEXT_SWITCH_DEFS__resume_vdma_channel_action_data
 void to_json(json &j, const CONTEXT_SWITCH_DEFS__change_boundary_input_batch_t &data)
 {
     j = unpack_vdma_channel_id(data);
-}
+}
\ No newline at end of file
diff --git a/hailort/hailortcli/download_action_list_command.hpp b/hailort/hailortcli/download_action_list_command.hpp
index cc88691d..743331c6 100644
--- a/hailort/hailortcli/download_action_list_command.hpp
+++ b/hailort/hailortcli/download_action_list_command.hpp
@@ -25,6 +25,7 @@ using ordered_json = nlohmann::ordered_json;
 class DownloadActionListCommand : public DeviceCommand
 {
 public:
+    using DeviceCommand::execute;
     explicit DownloadActionListCommand(CLI::App &parent_app);
     // To be used from external commands
     static hailo_status execute(Device &device, const std::string &output_file_path,
@@ -115,6 +116,8 @@ static std::pair<CONTEXT_SWITCH_DEFS__ACTION_TYPE_t, std::string> mapping[] = {
     {CONTEXT_SWITCH_DEFS__ACTION_TYPE_CHANGE_BOUNDARY_INPUT_BATCH, "change boundary input batch"},
     {CONTEXT_SWITCH_DEFS__ACTION_TYPE_PAUSE_VDMA_CHANNEL, "pause vdma channel"},
     {CONTEXT_SWITCH_DEFS__ACTION_TYPE_RESUME_VDMA_CHANNEL, "resume vdma channel"},
+    {CONTEXT_SWITCH_DEFS__ACTION_TYPE_SLEEP, "sleep"},
+    {CONTEXT_SWITCH_DEFS__ACTION_TYPE_HALT, "halt"},
 };
 static_assert(ARRAY_ENTRIES(mapping) == CONTEXT_SWITCH_DEFS__ACTION_TYPE_COUNT,
     "Missing a mapping from a CONTEXT_SWITCH_DEFS__ACTION_TYPE_t to it's string value");
@@ -130,6 +133,7 @@ NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(CONTEXT_SWITCH_DEFS__module_config_done_inter
 NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(CONTEXT_SWITCH_DEFS__fetch_ccw_bursts_action_data_t, config_stream_index, ccw_bursts);
 NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(CONTEXT_SWITCH_DEFS__enable_nms_action_t, nms_unit_index, network_index, number_of_classes, burst_size);
 NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(CONTEXT_SWITCH_DEFS__write_data_by_type_action_t, address, data_type, data, shift, mask, network_index);
+NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(CONTEXT_SWITCH_DEFS__sleep_action_data_t, sleep_time);
 
 // Non-default implementations
 void to_json(json &j, const CONTEXT_SWITCH_DEFS__deactivate_vdma_channel_action_data_t &data);
diff --git a/hailort/hailortcli/fw_control_command.cpp b/hailort/hailortcli/fw_control_command.cpp
index 77e0e734..fbaa1150 100644
--- a/hailort/hailortcli/fw_control_command.cpp
+++ b/hailort/hailortcli/fw_control_command.cpp
@@ -197,7 +197,7 @@ FwControlResetCommand::FwControlResetCommand(CLI::App &parent_app) :
     m_app->add_option("--reset-type", m_reset_mode, "Reset type")
         ->required()
         ->transform(HailoCheckedTransformer<hailo_reset_device_mode_t>({
-            { "chip", HAILO_RESET_DEVICE_MODE_CHIP },
+            { "chip", HAILO_RESET_DEVICE_MODE_CHIP  },
             { "nn_core", HAILO_RESET_DEVICE_MODE_NN_CORE },
             { "soft", HAILO_RESET_DEVICE_MODE_SOFT },
             { "forced_soft", HAILO_RESET_DEVICE_MODE_FORCED_SOFT },
@@ -226,11 +226,31 @@ hailo_status FwControlTestMemoriesCommand::execute_on_device(Device &device)
     return HAILO_SUCCESS;
 }
 
+FwControlDebugHaltContinueCommand::FwControlDebugHaltContinueCommand(CLI::App &parent_app) :
+    DeviceCommand(parent_app.add_subcommand("continue", "Continue breakpoint action"))
+{}
+
+hailo_status FwControlDebugHaltContinueCommand::execute_on_device(Device &device)
+{
+    auto status = device.continue_context_switch_breakpoint(0);
+    CHECK_SUCCESS(status, "Failed to excute debug operation");
+
+    std::cout << "Control Operation Debug Continue completed successfully" << std::endl;
+    return HAILO_SUCCESS;
+}
+
+FwControlDebugCommand::FwControlDebugCommand(CLI::App &parent_app) :
+    ContainerCommand(parent_app.add_subcommand("debug", "Access to usefull debug operations"))
+{
+    add_subcommand<FwControlDebugHaltContinueCommand>();
+}
+
 FwControlCommand::FwControlCommand(CLI::App &parent_app) :
     ContainerCommand(parent_app.add_subcommand("fw-control", "Useful firmware control operations"))
 {
     add_subcommand<FwControlIdentifyCommand>();
-    add_subcommand<FwControlResetCommand>();
-    add_subcommand<FwControlTestMemoriesCommand>();
-    add_subcommand<DownloadActionListCommand>();
+    add_subcommand<FwControlResetCommand>(OptionVisibility::HIDDEN);
+    add_subcommand<FwControlTestMemoriesCommand>(OptionVisibility::HIDDEN);
+    add_subcommand<DownloadActionListCommand>(OptionVisibility::HIDDEN);
+    add_subcommand<FwControlDebugCommand>(OptionVisibility::HIDDEN);
 }
diff --git a/hailort/hailortcli/fw_control_command.hpp b/hailort/hailortcli/fw_control_command.hpp
index 67df9682..e7cdb900 100644
--- a/hailort/hailortcli/fw_control_command.hpp
+++ b/hailort/hailortcli/fw_control_command.hpp
@@ -44,6 +44,19 @@ class FwControlTestMemoriesCommand : public DeviceCommand {
     virtual hailo_status execute_on_device(Device &device) override;
 };
 
+class FwControlDebugHaltContinueCommand : public DeviceCommand {
+public:
+    explicit FwControlDebugHaltContinueCommand(CLI::App &parent_app);
+
+protected:
+    virtual hailo_status execute_on_device(Device &device) override;
+};
+
+class FwControlDebugCommand : public ContainerCommand {
+public:
+    explicit FwControlDebugCommand(CLI::App &parent_app);
+};
+
 class FwControlCommand : public ContainerCommand {
 public:
     explicit FwControlCommand(CLI::App &parent_app);
diff --git a/hailort/hailortcli/hailortcli.cpp b/hailort/hailortcli/hailortcli.cpp
index a0318072..a6463db5 100644
--- a/hailort/hailortcli/hailortcli.cpp
+++ b/hailort/hailortcli/hailortcli.cpp
@@ -190,7 +190,7 @@ class HailoRTCLI : public ContainerCommand {
         add_subcommand<BenchmarkCommand>();
         add_subcommand<PowerMeasurementSubcommand>();
         add_subcommand<SensorConfigCommand>();
-        add_subcommand<BoardConfigCommand>();
+        add_subcommand<BoardConfigCommand>(OptionVisibility::HIDDEN);
         add_subcommand<FwConfigCommand>();
         add_subcommand<FwLoggerCommand>();
         add_subcommand<FwUpdateCommand>();
@@ -198,7 +198,7 @@ class HailoRTCLI : public ContainerCommand {
         add_subcommand<MonCommand>();
 #if defined(__GNUC__)
         add_subcommand<UdpRateLimiterCommand>();
-        add_subcommand<HwInferEstimatorCommand>();
+        add_subcommand<HwInferEstimatorCommand>(OptionVisibility::HIDDEN);
 #endif
         add_subcommand<ParseHefCommand>();
         add_subcommand<FwControlCommand>();
diff --git a/hailort/hailortcli/inference_result.hpp b/hailort/hailortcli/inference_result.hpp
index 0396ca67..97c6c25f 100644
--- a/hailort/hailortcli/inference_result.hpp
+++ b/hailort/hailortcli/inference_result.hpp
@@ -342,9 +342,10 @@ struct InferResult
     std::map<std::string, std::shared_ptr<LongPowerMeasurement>> m_power_measurements;
     std::map<std::string, std::shared_ptr<LongPowerMeasurement>> m_current_measurements;
     std::map<std::string, std::shared_ptr<AccumulatorResults>> m_temp_measurements;
+    bool power_measurements_are_valid = false;
 
 private:
     std::vector<NetworkGroupInferResult> m_network_group_results;
 };
 
-#endif /* _HAILO_INFER_RESULT_ */
\ No newline at end of file
+#endif /* _HAILO_INFER_RESULT_ */
diff --git a/hailort/hailortcli/measure_nnc_performance_command.cpp b/hailort/hailortcli/measure_nnc_performance_command.cpp
index da352bc3..9db4855c 100644
--- a/hailort/hailortcli/measure_nnc_performance_command.cpp
+++ b/hailort/hailortcli/measure_nnc_performance_command.cpp
@@ -16,6 +16,8 @@
 #include "hailo/vstream.hpp"
 #include "hailo/vdevice.hpp"
 
+#include "common/internal_env_vars.hpp"
+
 #include <iostream>
 
 #define BYTES_TO_KILOBYTES (1024)
@@ -25,9 +27,6 @@ HwInferEstimatorCommand::HwInferEstimatorCommand(CLI::App &parent_app) :
         "measure nerual network performance for given network using only the HW components without host SW")),
     m_params({})
 {
-    // This will make the command to be hidden in the --help print in the command line.
-    m_app->group("");
-
     add_vdevice_options(m_app, m_params.vdevice_params);
     m_app->add_option("hef", m_params.hef_path, "Path of the HEF to load")
         ->check(CLI::ExistingFile)
@@ -81,10 +80,10 @@ hailo_status HwInferEstimatorCommand::execute()
     TRY(auto configure_params, get_configure_params(m_params, hef, interface));
 
     /* Use Env var to configure all desc list with max depth */
-    setenv("HAILO_CONFIGURE_FOR_HW_INFER","Y",1);
+    setenv(HAILO_CONFIGURE_FOR_HW_INFER_ENV_VAR,"Y",1);
     TRY(auto network_group_list,
         device->configure(hef, configure_params), "Failed configure device from hef");
-    unsetenv("HAILO_CONFIGURE_FOR_HW_INFER");
+    unsetenv(HAILO_CONFIGURE_FOR_HW_INFER_ENV_VAR);
 
     CHECK(1 == network_group_list.size(), HAILO_INVALID_OPERATION,
         "HW Inference is not supported on HEFs with multiple network groups");
diff --git a/hailort/hailortcli/mon_command.cpp b/hailort/hailortcli/mon_command.cpp
index 7efdb4ee..f9581f6b 100644
--- a/hailort/hailortcli/mon_command.cpp
+++ b/hailort/hailortcli/mon_command.cpp
@@ -10,6 +10,7 @@
 #include "hailo/hailort.h"
 
 #include "common/filesystem.hpp"
+#include "common/env_vars.hpp"
 
 #include "mon_command.hpp"
 #include "common.hpp"
diff --git a/hailort/hailortcli/run2/live_stats.cpp b/hailort/hailortcli/run2/live_stats.cpp
index 30715aff..4d9531b7 100644
--- a/hailort/hailortcli/run2/live_stats.cpp
+++ b/hailort/hailortcli/run2/live_stats.cpp
@@ -88,7 +88,7 @@ void LiveStats::print()
             }
         }
     }
-    CliCommon::reset_cursor(m_prev_count); 
+    CliCommon::reset_cursor(m_prev_count);
     // On the first print m_prev_count = 0, so no lines will be deleted
     std::cout << ss.str() << std::flush;
     m_prev_count = count;
diff --git a/hailort/hailortcli/run2/measurement_live_track.cpp b/hailort/hailortcli/run2/measurement_live_track.cpp
index 9278e39f..28efe2c7 100644
--- a/hailort/hailortcli/run2/measurement_live_track.cpp
+++ b/hailort/hailortcli/run2/measurement_live_track.cpp
@@ -158,4 +158,4 @@ void MeasurementLiveTrack::push_json_impl(nlohmann::ordered_json &json)
         push_json_measurment_val(device_json, m_temp_measurement, "temperature");
     }
     json["devices"].emplace_back(device_json);
-}
\ No newline at end of file
+}
diff --git a/hailort/hailortcli/run2/measurement_live_track.hpp b/hailort/hailortcli/run2/measurement_live_track.hpp
index 782681e0..7d64b6db 100644
--- a/hailort/hailortcli/run2/measurement_live_track.hpp
+++ b/hailort/hailortcli/run2/measurement_live_track.hpp
@@ -36,8 +36,8 @@ class MeasurementLiveTrack : public LiveStats::Track
     std::shared_ptr<PowerMeasurement> m_power_measurement;
     std::shared_ptr<PowerMeasurement> m_current_measurement;
     std::shared_ptr<TemperatureMeasurement> m_temp_measurement;
-    
+
     std::string m_device_id;
 };
 
-#endif /* _HAILO_HAILORTCLI_RUN2_MEASUREMENT_LIVE_TRACK_HPP_ */
\ No newline at end of file
+#endif /* _HAILO_HAILORTCLI_RUN2_MEASUREMENT_LIVE_TRACK_HPP_ */
diff --git a/hailort/hailortcli/run2/network_runner.cpp b/hailort/hailortcli/run2/network_runner.cpp
index 266f26ac..61273786 100644
--- a/hailort/hailortcli/run2/network_runner.cpp
+++ b/hailort/hailortcli/run2/network_runner.cpp
@@ -138,67 +138,78 @@ Expected<std::string> NetworkRunner::get_network_group_name(const NetworkParams
 Expected<std::shared_ptr<FullAsyncNetworkRunner>> FullAsyncNetworkRunner::create_shared(VDevice &vdevice,
     NetworkParams params)
 {
-        TRY(auto infer_model_ptr, vdevice.create_infer_model(params.hef_path));
-        TRY(auto net_group_name, get_network_group_name(params, infer_model_ptr->hef()));
-
-        /* Configure Params */
-        infer_model_ptr->set_batch_size(params.batch_size);
-        if (params.batch_size == HAILO_DEFAULT_BATCH_SIZE) {
-            // Changing batch_size to 1 (after configuring the vdevice) - as we iterate over 'params.batch_size' in latency measurements scenarios
-            params.batch_size = 1;
-        }
-        if (params.measure_hw_latency) {
-            infer_model_ptr->set_hw_latency_measurement_flags(HAILO_LATENCY_MEASURE);
-        }
+    std::string net_group_name = params.net_group_name;
+    if (net_group_name.empty()) {
+        TRY(auto hef, Hef::create(params.hef_path));
+        TRY(net_group_name, get_network_group_name(params, hef));
+    }
+    TRY(auto infer_model_ptr, vdevice.create_infer_model(params.hef_path, net_group_name));
 
-        /* Pipeline Params */
-        for (const auto &input_name : infer_model_ptr->get_input_names()) {
-            auto input_params_it = std::find_if(params.vstream_params.begin(), params.vstream_params.end(),
-                [&input_name](const VStreamParams &params) -> bool {
-                    return params.name == input_name;
-                });
-            auto input_params = (input_params_it == params.vstream_params.end()) ? VStreamParams() : *input_params_it;
-
-            TRY(auto input_config, infer_model_ptr->input(input_name));
-            input_config.set_format_order(input_params.params.user_buffer_format.order);
-            input_config.set_format_type(input_params.params.user_buffer_format.type);
-        }
-        for (const auto &output_name : infer_model_ptr->get_output_names()) {
-            auto output_params_it = std::find_if(params.vstream_params.begin(), params.vstream_params.end(),
-                [&output_name](const VStreamParams &params) -> bool {
-                    return params.name == output_name;
-                });
-            auto output_params = (output_params_it == params.vstream_params.end()) ? VStreamParams() : *output_params_it;
-
-            TRY(auto output_config, infer_model_ptr->output(output_name));
-            output_config.set_format_order(output_params.params.user_buffer_format.order);
-            output_config.set_format_type(output_params.params.user_buffer_format.type);
-        }
+    /* Validate params */
+    for (const auto &vstream_params : params.vstream_params) {
+        CHECK_AS_EXPECTED((contains(infer_model_ptr->get_input_names(), vstream_params.name)) ||
+            (contains(infer_model_ptr->get_output_names(), vstream_params.name)),
+            HAILO_INVALID_ARGUMENT, "The model doesnt have an edge with the given name '{}'", vstream_params.name);
+    }
 
-        TRY(auto configured_model, infer_model_ptr->configure());
-        auto configured_infer_model_ptr = make_shared_nothrow<ConfiguredInferModel>(std::move(configured_model));
-        CHECK_NOT_NULL_AS_EXPECTED(configured_infer_model_ptr, HAILO_OUT_OF_HOST_MEMORY);
+    /* Configure Params */
+    infer_model_ptr->set_batch_size(params.batch_size);
+    if (params.batch_size == HAILO_DEFAULT_BATCH_SIZE) {
+        // Changing batch_size to 1 (after configuring the vdevice) - as we iterate over 'params.batch_size' in latency measurements scenarios
+        params.batch_size = 1;
+    }
+    if (params.measure_hw_latency) {
+        infer_model_ptr->set_hw_latency_measurement_flags(HAILO_LATENCY_MEASURE);
+    }
 
-        auto res = make_shared_nothrow<FullAsyncNetworkRunner>(params, net_group_name, vdevice,
-            infer_model_ptr, configured_infer_model_ptr);
-        CHECK_NOT_NULL_AS_EXPECTED(res, HAILO_OUT_OF_HOST_MEMORY);
+    /* Pipeline Params */
+    for (const auto &input_name : infer_model_ptr->get_input_names()) {
+        auto input_params_it = std::find_if(params.vstream_params.begin(), params.vstream_params.end(),
+            [&input_name](const VStreamParams &params) -> bool {
+                return params.name == input_name;
+            });
+        auto input_params = (input_params_it == params.vstream_params.end()) ? VStreamParams() : *input_params_it;
+
+        TRY(auto input_config, infer_model_ptr->input(input_name));
+        input_config.set_format_order(input_params.params.user_buffer_format.order);
+        input_config.set_format_type(input_params.params.user_buffer_format.type);
+    }
+    for (const auto &output_name : infer_model_ptr->get_output_names()) {
+        auto output_params_it = std::find_if(params.vstream_params.begin(), params.vstream_params.end(),
+            [&output_name](const VStreamParams &params) -> bool {
+                return params.name == output_name;
+            });
+        auto output_params = (output_params_it == params.vstream_params.end()) ? VStreamParams() : *output_params_it;
+
+        TRY(auto output_config, infer_model_ptr->output(output_name));
+        output_config.set_format_order(output_params.params.user_buffer_format.order);
+        output_config.set_format_type(output_params.params.user_buffer_format.type);
+    }
 
-        if (params.measure_overall_latency || params.measure_hw_latency) {
-            CHECK_AS_EXPECTED((1 == res->get_input_names().size()), HAILO_INVALID_OPERATION,
-                "Latency measurement over multiple inputs network is not supported");
+    TRY(auto configured_model, infer_model_ptr->configure());
+    auto configured_infer_model_ptr = make_shared_nothrow<ConfiguredInferModel>(std::move(configured_model));
+    CHECK_NOT_NULL_AS_EXPECTED(configured_infer_model_ptr, HAILO_OUT_OF_HOST_MEMORY);
 
-            if (params.measure_overall_latency) {
-                auto overall_latency_meter = make_shared_nothrow<LatencyMeter>(std::set<std::string>{ "INFERENCE" }, // Since we check 'infer()' with single callback, we only address 1 output
-                    OVERALL_LATENCY_TIMESTAMPS_LIST_LENGTH);
-                CHECK_NOT_NULL_AS_EXPECTED(overall_latency_meter, HAILO_OUT_OF_HOST_MEMORY);
-                res->set_overall_latency_meter(overall_latency_meter);
-            }
+    auto res = make_shared_nothrow<FullAsyncNetworkRunner>(params, net_group_name, vdevice,
+        infer_model_ptr, configured_infer_model_ptr);
+    CHECK_NOT_NULL_AS_EXPECTED(res, HAILO_OUT_OF_HOST_MEMORY);
 
-            // We use a barrier for both hw and overall latency
-            auto latency_barrier = make_shared_nothrow<Barrier>(1); // Only 1 frame at a time
-            CHECK_NOT_NULL_AS_EXPECTED(latency_barrier, HAILO_OUT_OF_HOST_MEMORY);
-            res->set_latency_barrier(latency_barrier);
+    if (params.measure_overall_latency || params.measure_hw_latency) {
+        CHECK_AS_EXPECTED((1 == res->get_input_names().size()), HAILO_INVALID_OPERATION,
+            "Latency measurement over multiple inputs network is not supported");
+
+        if (params.measure_overall_latency) {
+            auto overall_latency_meter = make_shared_nothrow<LatencyMeter>(std::set<std::string>{ "INFERENCE" }, // Since we check 'infer()' with single callback, we only address 1 output
+                OVERALL_LATENCY_TIMESTAMPS_LIST_LENGTH);
+            CHECK_NOT_NULL_AS_EXPECTED(overall_latency_meter, HAILO_OUT_OF_HOST_MEMORY);
+            res->set_overall_latency_meter(overall_latency_meter);
         }
+
+        // We use a barrier for both hw and overall latency
+        auto latency_barrier = make_shared_nothrow<Barrier>(1); // Only 1 frame at a time
+        CHECK_NOT_NULL_AS_EXPECTED(latency_barrier, HAILO_OUT_OF_HOST_MEMORY);
+        res->set_latency_barrier(latency_barrier);
+    }
     return res;
 }
 
@@ -263,6 +274,14 @@ Expected<std::shared_ptr<NetworkRunner>> NetworkRunner::create_shared(VDevice &v
             auto output_streams = cfgr_net_group->get_output_streams();
             CHECK_AS_EXPECTED(output_streams.size() > 0, HAILO_INTERNAL_FAILURE);
 
+            /* Validate params */
+            for (const auto &stream_param : final_net_params.stream_params) {
+                CHECK_AS_EXPECTED(
+                    (std::any_of(input_streams.begin(), input_streams.end(), [name = stream_param.name] (const auto &stream) { return name == stream.get().name(); })) ||
+                        (std::any_of(output_streams.begin(), output_streams.end(), [name = stream_param.name] (const auto &stream) { return name == stream.get().name(); })),
+                    HAILO_INVALID_ARGUMENT, "The model doesnt have an edge with the given name '{}'", stream_param.name);
+            }
+
             auto net_runner = make_shared_nothrow<RawNetworkRunner>(final_net_params, net_group_name, vdevice,
                 std::move(input_streams), std::move(output_streams), cfgr_net_group);
             CHECK_NOT_NULL_AS_EXPECTED(net_runner, HAILO_OUT_OF_HOST_MEMORY);
@@ -370,13 +389,20 @@ double NetworkRunner::get_last_measured_fps()
 Expected<std::pair<std::vector<InputVStream>, std::vector<OutputVStream>>> NetworkRunner::create_vstreams(
     ConfiguredNetworkGroup &net_group, const std::map<std::string, hailo_vstream_params_t> &params)
 {//TODO: support network name
-    size_t match_count = 0;
 
-    std::map<std::string, hailo_vstream_params_t> input_vstreams_params;
+    /* Validate params */
     TRY(auto input_vstreams_info, net_group.get_input_vstream_infos());
+    TRY(auto output_vstreams_info, net_group.get_output_vstream_infos());
+    for (const auto &pair : params) {
+        CHECK_AS_EXPECTED(
+            (std::any_of(input_vstreams_info.begin(), input_vstreams_info.end(), [name = pair.first] (const auto &info) { return name == std::string(info.name); })) ||
+                (std::any_of(output_vstreams_info.begin(), output_vstreams_info.end(), [name = pair.first] (const auto &info) { return name == std::string(info.name); })),
+            HAILO_INVALID_ARGUMENT, "The model doesnt have an edge with the given name '{}'", pair.first);
+    }
+
+    std::map<std::string, hailo_vstream_params_t> input_vstreams_params;
     for (auto &input_vstream_info : input_vstreams_info) {
         if (params.end() != params.find(input_vstream_info.name)) {
-            match_count++;
             input_vstreams_params.emplace(input_vstream_info.name, params.at(input_vstream_info.name));
         } else {
             input_vstreams_params.emplace(input_vstream_info.name, HailoRTDefaults::get_vstreams_params());
@@ -384,18 +410,14 @@ Expected<std::pair<std::vector<InputVStream>, std::vector<OutputVStream>>> Netwo
     }
 
     std::map<std::string, hailo_vstream_params_t> output_vstreams_params;
-    TRY(auto output_vstreams_info, net_group.get_output_vstream_infos());
     for (auto &output_vstream_info : output_vstreams_info) {
         if (params.end() != params.find(output_vstream_info.name)) {
-            match_count++;
             output_vstreams_params.emplace(output_vstream_info.name, params.at(output_vstream_info.name));
         } else {
             output_vstreams_params.emplace(output_vstream_info.name, HailoRTDefaults::get_vstreams_params());
         }
     }
 
-    CHECK(match_count == params.size(), make_unexpected(HAILO_INVALID_ARGUMENT), "One of the params has an invalid vStream name");
-
     TRY(auto input_vstreams, VStreamsBuilder::create_input_vstreams(net_group, input_vstreams_params));
     TRY(auto output_vstreams, VStreamsBuilder::create_output_vstreams(net_group, output_vstreams_params));
 
diff --git a/hailort/hailortcli/run2/run2_command.cpp b/hailort/hailortcli/run2/run2_command.cpp
index 6914e5a5..c5468959 100644
--- a/hailort/hailortcli/run2/run2_command.cpp
+++ b/hailort/hailortcli/run2/run2_command.cpp
@@ -739,18 +739,15 @@ Expected<std::vector<std::shared_ptr<NetworkRunner>>> Run2::init_and_run_net_run
 
     if (get_measure_power() || get_measure_current() || get_measure_temp()) {
         TRY(auto physical_devices, vdevice->get_physical_devices());
-
         for (auto &device : physical_devices) {
-            TRY(const auto identity, device.get().identify());
-            CHECK_AS_EXPECTED(HailoRTCommon::is_power_measurement_supported(identity.device_architecture) || !(get_measure_power()),
-                HAILO_INVALID_OPERATION, "HW arch {} does not support power measurement. Disable the power-measure option", 
-                HailoRTCommon::get_device_arch_str(identity.device_architecture));
-            CHECK_AS_EXPECTED(HailoRTCommon::is_current_measurement_supported(identity.device_architecture) || !(get_measure_current()),
-                HAILO_INVALID_OPERATION, "HW arch {} does not support current measurement. Disable the current-measure option",
-                HailoRTCommon::get_device_arch_str(identity.device_architecture));
-            CHECK_AS_EXPECTED(HailoRTCommon::is_temp_measurement_supported(identity.device_architecture) || !(get_measure_temp()),
-                HAILO_INVALID_OPERATION, "HW arch {} does not support temperature measurement. Disable the temp-measure option",
-                HailoRTCommon::get_device_arch_str(identity.device_architecture));
+            TRY(auto caps, device.get().get_capabilities(), "Failed getting device capabilities");
+
+            CHECK_AS_EXPECTED((caps.power_measurements || (get_measure_power())),
+                HAILO_INVALID_OPERATION, "Power measurement not supported. Disable the power-measure option");
+            CHECK_AS_EXPECTED((caps.current_measurements || !(get_measure_current())),
+                HAILO_INVALID_OPERATION, "Current measurement not supported. Disable the current-measure option");
+            CHECK_AS_EXPECTED((caps.temperature_measurements || !(get_measure_temp())),
+                HAILO_INVALID_OPERATION, "Temperature measurement not supported. Disable the temp-measure option");
 
             TRY(auto measurement_live_track, MeasurementLiveTrack::create_shared(device.get(),
                 get_measure_power(), get_measure_current(), get_measure_temp()));
@@ -836,4 +833,4 @@ hailo_status Run2Command::execute()
         CHECK_SUCCESS(DownloadActionListCommand::write_to_json(action_list_json, runtime_data_output_path));
     }
     return HAILO_SUCCESS;
-}
\ No newline at end of file
+}
diff --git a/hailort/hailortcli/run2/run2_command.hpp b/hailort/hailortcli/run2/run2_command.hpp
index 015fe8c5..741eca4d 100644
--- a/hailort/hailortcli/run2/run2_command.hpp
+++ b/hailort/hailortcli/run2/run2_command.hpp
@@ -90,5 +90,4 @@ class NetworkApp : public CLI::App
     NetworkParams m_params;
 };
 
-
 #endif /* _HAILO_HAILORTCLI_RUN2_RUN2_COMMAND_HPP_ */
diff --git a/hailort/hailortcli/run_command.cpp b/hailort/hailortcli/run_command.cpp
index ef6074ab..433f723c 100644
--- a/hailort/hailortcli/run_command.cpp
+++ b/hailort/hailortcli/run_command.cpp
@@ -107,7 +107,7 @@ static void add_run_command_params(CLI::App *run_subcommand, inference_runner_pa
     // TODO: init values in RunCommand ctor
     params.measure_latency = false;
     params.measure_overall_latency = false;
-    params.power_measurement.measure_power = false;
+    params.power_measurement.measure_power = ShouldMeasurePower::NO;
     params.power_measurement.measure_current = false;
     params.show_progress = true;
     params.time_to_run = 0;
@@ -162,13 +162,13 @@ static void add_run_command_params(CLI::App *run_subcommand, inference_runner_pa
     run_subcommand->add_flag("--measure-overall-latency", params.measure_overall_latency,
         "Include overall latency measurement")
         ->needs("--measure-latency");
-    
+
     static const char *DOT_SUFFIX = ".dot";
     run_subcommand->add_option("--dot", params.dot_output,
         "If set print the pipeline graph as a .dot file at the specified path")
         ->check(FileSuffixValidator(DOT_SUFFIX));
-    CLI::Option *measure_power_opt = run_subcommand->add_flag("--measure-power",
-        params.power_measurement.measure_power, "Measure power consumption");
+    auto measure_power_cb = [&params] (bool measure_power) { params.power_measurement.measure_power = measure_power ? ShouldMeasurePower::YES : ShouldMeasurePower::NO; };
+    CLI::Option *measure_power_opt = run_subcommand->add_flag( "--measure-power", measure_power_cb, "Measure power consumption");
     CLI::Option *measure_current_opt = run_subcommand->add_flag("--measure-current",
         params.power_measurement.measure_current, "Measure current")->excludes(measure_power_opt);
     measure_power_opt->excludes(measure_current_opt);
@@ -271,7 +271,7 @@ static void add_run_command_params(CLI::App *run_subcommand, inference_runner_pa
             "--batch-size should be a divisor of --frames-count if provided");
         // TODO HRT-5363 support multiple devices
         PARSE_CHECK((params.vdevice_params.device_count == 1) || params.csv_output.empty() ||
-            !(params.power_measurement.measure_power || params.power_measurement.measure_current || params.measure_temp),
+            !((ShouldMeasurePower::YES == params.power_measurement.measure_power) || params.power_measurement.measure_current || params.measure_temp),
             "Writing measurements in csv format is not supported for multiple devices");
 
         if ((0 == params.time_to_run) && (0 == params.frames_count)) {
@@ -1036,10 +1036,12 @@ Expected<InferResult> activate_and_run_single_device(
     CHECK_AS_EXPECTED(1 == network_groups.size(), HAILO_INVALID_OPERATION, "Inference is not supported on HEFs with multiple network groups");
     TRY(auto activated_net_group, network_groups[0]->activate(), "Failed activate network_group");
     TRY(auto input_dataset, create_dataset(network_groups, params));
+    TRY(auto caps, device.get_capabilities());
 
     hailo_power_measurement_types_t measurement_type = HAILO_POWER_MEASUREMENT_TYPES__MAX_ENUM;
     bool should_measure_power = false;
-    if (params.power_measurement.measure_power) {
+    if ((ShouldMeasurePower::YES == params.power_measurement.measure_power) ||
+        ((ShouldMeasurePower::AUTO_DETECT == params.power_measurement.measure_power) && caps.power_measurements)) {
         measurement_type = HAILO_POWER_MEASUREMENT_TYPES__POWER;
         should_measure_power = true;
     } else if (params.power_measurement.measure_current) {
@@ -1079,6 +1081,7 @@ Expected<InferResult> activate_and_run_single_device(
             status = inference_result.set_power_measurement(device.get_dev_id(), std::move(long_power_measurement_ptr));
             CHECK_SUCCESS_AS_EXPECTED(status);
         }
+        inference_result.power_measurements_are_valid = true;
     }
 
     if (should_measure_temp) {
@@ -1157,10 +1160,20 @@ Expected<InferResult> activate_and_run_vdevice(
     }
 
     TRY(const auto input_dataset, create_dataset(network_groups, params), "Failed creating input dataset");
+    // we currently support all devices or none for power measurements
+    auto all_phy_devices_support_power_measurements = true;
+    for (const auto &device : physical_devices) {
+        TRY(const auto caps, device.get().get_capabilities(), "Failed getting device capabilities");
+        if (!caps.power_measurements) {
+            all_phy_devices_support_power_measurements = false;
+            break;
+        }
+    }
 
     hailo_power_measurement_types_t measurement_type = HAILO_POWER_MEASUREMENT_TYPES__MAX_ENUM;
     bool should_measure_power = false;
-    if (params.power_measurement.measure_power) {
+    if ((ShouldMeasurePower::YES == params.power_measurement.measure_power) ||
+        ((ShouldMeasurePower::AUTO_DETECT == params.power_measurement.measure_power) && all_phy_devices_support_power_measurements)) {
         measurement_type = HAILO_POWER_MEASUREMENT_TYPES__POWER;
         should_measure_power = true;
     } else if (params.power_measurement.measure_current) {
@@ -1215,6 +1228,7 @@ Expected<InferResult> activate_and_run_vdevice(
             }
         }
         CHECK_SUCCESS_AS_EXPECTED(status);
+        inference_result.power_measurements_are_valid = true;
     }
 
     if (params.measure_temp) {
@@ -1261,23 +1275,18 @@ Expected<InferResult> run_command_hef_vdevice(const inference_runner_params &par
     }
 
     TRY(const auto interface, vdevice->get_default_streams_interface(), "Failed to get default streams interface");
-    TRY(auto configure_params, get_configure_params(params, hef, interface));
+    TRY(auto configure_params, get_configure_params(params, hef, interface), "Failed getting configure params");
     TRY(auto network_group_list, vdevice->configure(hef, configure_params), "Failed configure vdevice from hef");
 
     for (auto &device : physical_devices) {
-        TRY(const auto identity, device.get().identify());
-        CHECK_AS_EXPECTED((HailoRTCommon::is_power_measurement_supported(identity.device_architecture) ||
-            !(params.power_measurement.measure_power)), HAILO_INVALID_OPERATION,
-            "HW arch {} does not support power measurement. Disable the power-measure option",
-            HailoRTCommon::get_device_arch_str(identity.device_architecture));
-        CHECK_AS_EXPECTED((HailoRTCommon::is_current_measurement_supported(identity.device_architecture) ||
-            !(params.power_measurement.measure_current)), HAILO_INVALID_OPERATION,
-            "HW arch {} does not support current measurement. Disable the current-measure option",
-            HailoRTCommon::get_device_arch_str(identity.device_architecture));
-        CHECK_AS_EXPECTED((HailoRTCommon::is_temp_measurement_supported(identity.device_architecture) ||
-            !(params.measure_temp)), HAILO_INVALID_OPERATION,
-            "HW arch {} does not support temperature measurement. Disable the temp-measure option",
-            HailoRTCommon::get_device_arch_str(identity.device_architecture));
+        TRY(auto caps, device.get().get_capabilities(), "Failed getting device capabilities");
+
+        CHECK_AS_EXPECTED((caps.power_measurements || (params.power_measurement.measure_power != ShouldMeasurePower::YES)),
+            HAILO_INVALID_OPERATION, "Power measurement not supported. Disable the power-measure option");
+        CHECK_AS_EXPECTED((caps.current_measurements || !(params.power_measurement.measure_current)),
+            HAILO_INVALID_OPERATION, "Current measurement not supported. Disable the current-measure option");
+        CHECK_AS_EXPECTED((caps.temperature_measurements || !(params.measure_temp)),
+            HAILO_INVALID_OPERATION, "Temperature measurement not supported. Disable the temp-measure option");
 
         if (use_batch_to_measure_opt(params)) {
             status = DownloadActionListCommand::set_batch_to_measure(device.get(), params.runtime_data.batch_to_measure);
diff --git a/hailort/hailortcli/run_command.hpp b/hailort/hailortcli/run_command.hpp
index 502911dc..14dd9515 100644
--- a/hailort/hailortcli/run_command.hpp
+++ b/hailort/hailortcli/run_command.hpp
@@ -30,7 +30,7 @@ struct transformation_params {
 };
 
 struct measure_power_params {
-    bool measure_power;
+    ShouldMeasurePower measure_power;
     bool measure_current;
     uint32_t sampling_period;
     uint32_t averaging_factor;
@@ -177,4 +177,4 @@ class NetworkBatchValidator : public CLI::Validator {
 
 const static NetworkBatchValidator NetworkBatchMap;
 
-#endif /* _HAILO_RUN_COMMAND_HPP_ */
\ No newline at end of file
+#endif /* _HAILO_RUN_COMMAND_HPP_ */
diff --git a/hailort/hrpc/CMakeLists.txt b/hailort/hrpc/CMakeLists.txt
index ac354e8f..59e25e8c 100644
--- a/hailort/hrpc/CMakeLists.txt
+++ b/hailort/hrpc/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 set(HRPC_IMPL_DIR "${CMAKE_CURRENT_SOURCE_DIR}/os")
 if(WIN32)
diff --git a/hailort/hrpc/client.cpp b/hailort/hrpc/client.cpp
index 5b3bc819..746a9f65 100644
--- a/hailort/hrpc/client.cpp
+++ b/hailort/hrpc/client.cpp
@@ -42,7 +42,7 @@ hailo_status ResultEvent::wait(std::chrono::milliseconds timeout)
 
 Client::~Client()
 {
-    is_running = false;
+    m_is_running = false;
     (void)m_connection.close();
     if (m_thread.joinable()) {
         m_thread.join();
@@ -51,7 +51,7 @@ Client::~Client()
 
 hailo_status Client::connect()
 {
-    TRY(m_conn_context, ConnectionContext::create_shared(false));
+    TRY(m_conn_context, ConnectionContext::create_client_shared(m_device_id));
     TRY(auto conn, RawConnection::create_shared(m_conn_context));
     auto status = conn->connect();
     CHECK_SUCCESS(status);
@@ -68,7 +68,7 @@ hailo_status Client::connect()
 
 hailo_status Client::message_loop()
 {
-    while (is_running) {
+    while (m_is_running) {
         rpc_message_header_t header;
         TRY_WITH_ACCEPTABLE_STATUS(HAILO_COMMUNICATION_CLOSED, auto message, m_connection.read_message(header));
 
@@ -80,9 +80,16 @@ hailo_status Client::message_loop()
             continue;
         }
 
-        std::unique_lock<std::mutex> lock(m_message_mutex);
-        auto event = m_events[header.message_id];
-        lock.unlock();
+        std::shared_ptr<ResultEvent> event = nullptr;
+        {
+            std::unique_lock<std::mutex> lock(m_events_mutex);
+            m_events_cv.wait(lock, [this, &header] () {
+                return contains(m_events, header.message_id);
+            });
+            event = m_events[header.message_id];
+            m_events.erase(header.message_id);
+        }
+
         auto status = event->signal(std::move(message));
         CHECK_SUCCESS(status);
     }
@@ -93,27 +100,32 @@ hailo_status Client::message_loop()
 Expected<Buffer> Client::execute_request(HailoRpcActionID action_id, const MemoryView &request,
     std::function<hailo_status(RpcConnection)> write_buffers_callback)
 {
-    std::unique_lock<std::mutex> lock(m_message_mutex);
     rpc_message_header_t header;
-    header.size = static_cast<uint32_t>(request.size());
-    header.message_id = m_messages_sent++;
-    header.action_id = static_cast<uint32_t>(action_id);
+    {
+        std::unique_lock<std::mutex> lock(m_write_mutex);
+        header.size = static_cast<uint32_t>(request.size());
+        header.message_id = m_messages_sent++;
+        header.action_id = static_cast<uint32_t>(action_id);
 
-    auto status = m_connection.write_message(header, request);
-    CHECK_SUCCESS_AS_EXPECTED(status);
-    if (write_buffers_callback) {
-        status = write_buffers_callback(m_connection);
+        auto status = m_connection.write_message(header, request);
         CHECK_SUCCESS_AS_EXPECTED(status);
+        if (write_buffers_callback) {
+            status = write_buffers_callback(m_connection);
+            CHECK_SUCCESS_AS_EXPECTED(status);
+        }
     }
 
-    TRY(auto event, ResultEvent::create_shared());
-    m_events[header.message_id] = event;
+    std::shared_ptr<ResultEvent> event = nullptr;
+    {
+        std::unique_lock<std::mutex> events_lock(m_events_mutex);
+        TRY(event, ResultEvent::create_shared());
+        m_events[header.message_id] = event;
+    }
+    m_events_cv.notify_all();
 
-    lock.unlock();
-    status = event->wait(REQUEST_TIMEOUT);
+    auto status = event->wait(REQUEST_TIMEOUT);
     CHECK_SUCCESS_AS_EXPECTED(status);
 
-    m_events.erase(header.message_id);
     return event->release();
 }
 
diff --git a/hailort/hrpc/client.hpp b/hailort/hrpc/client.hpp
index ef53132e..488e600b 100644
--- a/hailort/hrpc/client.hpp
+++ b/hailort/hrpc/client.hpp
@@ -42,7 +42,7 @@ class ResultEvent
 class Client
 {
 public:
-    Client() = default;
+    Client(const std::string &device_id) : m_device_id(device_id), m_is_running(true) {}
     ~Client();
 
     hailo_status connect();
@@ -53,16 +53,19 @@ class Client
 protected:
     hailo_status message_loop();
 
-    bool is_running = true;
+    std::string m_device_id;
+    bool m_is_running;
     std::shared_ptr<ConnectionContext> m_conn_context;
     RpcConnection m_connection;
     std::thread m_thread;
     std::unordered_map<uint32_t, std::shared_ptr<ResultEvent>> m_events;
     std::unordered_map<HailoRpcActionID, std::function<hailo_status(const MemoryView&, RpcConnection)>> m_custom_callbacks;
     uint32_t m_messages_sent = 0;
-    std::mutex m_message_mutex;
+    std::mutex m_write_mutex;
+    std::condition_variable m_events_cv;
+    std::mutex m_events_mutex;
 };
 
 } // namespace hrpc
 
-#endif // _CLIENT_HPP_
\ No newline at end of file
+#endif // _CLIENT_HPP_
diff --git a/hailort/hrpc/os/pcie/raw_connection_internal.cpp b/hailort/hrpc/os/pcie/raw_connection_internal.cpp
index d2dd46a1..b545ee47 100644
--- a/hailort/hrpc/os/pcie/raw_connection_internal.cpp
+++ b/hailort/hrpc/os/pcie/raw_connection_internal.cpp
@@ -10,44 +10,59 @@
 #include "hrpc/os/pcie/raw_connection_internal.hpp"
 #include "common/logger_macros.hpp"
 #include "common/utils.hpp"
+#include "common/internal_env_vars.hpp"
 #include "hailo/hailort.h"
 #include "vdma/driver/hailort_driver.hpp"
 
 // TODO: Remove this after we can choose ports in the driver
-#define PCIE_PORT (1213355091)
+#define DEFAULT_PCIE_PORT (12133)
+
+uint16_t get_pcie_port()
+{
+    auto port_str = get_env_variable(HAILO_CONNECTION_PCIE_PORT_ENV_VAR);
+    if (port_str) {
+        return static_cast<uint16_t>(std::stoi(port_str.value()));
+    }
+    return DEFAULT_PCIE_PORT;
+}
 
 using namespace hrpc;
 
-Expected<std::shared_ptr<ConnectionContext>> PcieConnectionContext::create_shared(bool is_accepting)
+Expected<std::shared_ptr<ConnectionContext>> PcieConnectionContext::create_client_shared(const std::string &device_id)
 {
     const auto max_size = PcieSession::max_transfer_size();
     TRY(auto write_buffer, Buffer::create(static_cast<size_t>(max_size), BufferStorageParams::create_dma()));
     TRY(auto read_buffer, Buffer::create(static_cast<size_t>(max_size), BufferStorageParams::create_dma()));
 
-    std::shared_ptr<PcieConnectionContext> ptr = nullptr;
-    if (is_accepting) {
-        // Server side
-        TRY(auto driver, HailoRTDriver::create_pcie_ep());
-        ptr = make_shared_nothrow<PcieConnectionContext>(std::move(driver), is_accepting,
+    if (device_id.size() > 0) {
+        TRY(auto driver, HailoRTDriver::create_pcie(device_id));
+        auto ptr = make_shared_nothrow<PcieConnectionContext>(std::move(driver), false,
             std::move(write_buffer), std::move(read_buffer));
         CHECK_NOT_NULL(ptr, HAILO_OUT_OF_HOST_MEMORY);
         return std::dynamic_pointer_cast<ConnectionContext>(ptr);
-    } else {
-        // Client side
-        TRY(auto device_infos, HailoRTDriver::scan_devices());
-        CHECK(device_infos.size() > 0, HAILO_NOT_FOUND, "No devices found");
-        for (auto &device_info : device_infos) {
-            if (HailoRTDriver::AcceleratorType::SOC_ACCELERATOR == device_info.accelerator_type) {
-                TRY(auto driver, HailoRTDriver::create(device_info.device_id, device_info.dev_path));
-                ptr = make_shared_nothrow<PcieConnectionContext>(std::move(driver), is_accepting,
-                    std::move(write_buffer), std::move(read_buffer));
-                CHECK_NOT_NULL(ptr, HAILO_OUT_OF_HOST_MEMORY);
-                return std::dynamic_pointer_cast<ConnectionContext>(ptr);
-            }
-        }
     }
-    LOGGER__ERROR("No suitable device found");
-    return make_unexpected(HAILO_NOT_FOUND);
+
+    TRY(auto device_infos, HailoRTDriver::scan_devices(HailoRTDriver::AcceleratorType::SOC_ACCELERATOR));
+    CHECK(device_infos.size() > 0, HAILO_NOT_FOUND, "No devices found");
+
+    TRY(auto driver, HailoRTDriver::create(device_infos[0].device_id, device_infos[0].dev_path));
+    auto ptr = make_shared_nothrow<PcieConnectionContext>(std::move(driver), false,
+        std::move(write_buffer), std::move(read_buffer));
+    CHECK_NOT_NULL(ptr, HAILO_OUT_OF_HOST_MEMORY);
+    return std::dynamic_pointer_cast<ConnectionContext>(ptr);
+}
+
+Expected<std::shared_ptr<ConnectionContext>> PcieConnectionContext::create_server_shared()
+{
+    const auto max_size = PcieSession::max_transfer_size();
+    TRY(auto write_buffer, Buffer::create(static_cast<size_t>(max_size), BufferStorageParams::create_dma()));
+    TRY(auto read_buffer, Buffer::create(static_cast<size_t>(max_size), BufferStorageParams::create_dma()));
+
+    TRY(auto driver, HailoRTDriver::create_pcie_ep());
+    auto ptr = make_shared_nothrow<PcieConnectionContext>(std::move(driver), true,
+        std::move(write_buffer), std::move(read_buffer));
+    CHECK_NOT_NULL(ptr, HAILO_OUT_OF_HOST_MEMORY);
+    return std::dynamic_pointer_cast<ConnectionContext>(ptr);
 }
 
 hailo_status PcieConnectionContext::wait_for_available_connection()
@@ -86,7 +101,7 @@ Expected<std::shared_ptr<RawConnection>> PcieRawConnection::accept()
     auto new_conn = make_shared_nothrow<PcieRawConnection>(m_context);
     CHECK_NOT_NULL_AS_EXPECTED(new_conn, HAILO_OUT_OF_HOST_MEMORY);
 
-    TRY(auto session, PcieSession::accept(m_context->driver(), PCIE_PORT));
+    TRY(auto session, PcieSession::accept(m_context->driver(), get_pcie_port()));
     status = new_conn->set_session(std::move(session));
     CHECK_SUCCESS(status);
 
@@ -103,14 +118,14 @@ hailo_status PcieRawConnection::set_session(PcieSession &&session)
 
 hailo_status PcieRawConnection::connect()
 {
-    TRY(auto session, PcieSession::connect(m_context->driver(), PCIE_PORT));
+    TRY(auto session, PcieSession::connect(m_context->driver(), get_pcie_port()));
     auto status = set_session(std::move(session));
     CHECK_SUCCESS(status);
 
     return HAILO_SUCCESS;
 }
 
-hailo_status PcieRawConnection::write(const uint8_t *buffer, size_t size)
+hailo_status PcieRawConnection::write(const uint8_t *buffer, size_t size, std::chrono::milliseconds timeout)
 {
     if (0 == size) {
         return HAILO_SUCCESS;
@@ -126,7 +141,7 @@ hailo_status PcieRawConnection::write(const uint8_t *buffer, size_t size)
         auto size_left = size - bytes_written;
         if (is_aligned) {
             amount_to_write = std::min(static_cast<size_t>(size_left), static_cast<size_t>(max_size));
-            auto status = m_session->write(buffer + bytes_written, amount_to_write, m_timeout);
+            auto status = m_session->write(buffer + bytes_written, amount_to_write, timeout);
             if (HAILO_STREAM_ABORT == status) {
                 return HAILO_COMMUNICATION_CLOSED;
             }
@@ -134,7 +149,7 @@ hailo_status PcieRawConnection::write(const uint8_t *buffer, size_t size)
         } else {
             amount_to_write = std::min(static_cast<size_t>(size_left), m_context->write_buffer().size());
             memcpy(m_context->write_buffer().data(), buffer + bytes_written, amount_to_write);
-            auto status = m_session->write(m_context->write_buffer().data(), amount_to_write, m_timeout);
+            auto status = m_session->write(m_context->write_buffer().data(), amount_to_write, timeout);
             if (HAILO_STREAM_ABORT == status) {
                 return HAILO_COMMUNICATION_CLOSED;
             }
@@ -147,7 +162,7 @@ hailo_status PcieRawConnection::write(const uint8_t *buffer, size_t size)
     return HAILO_SUCCESS;
 }
 
-hailo_status PcieRawConnection::read(uint8_t *buffer, size_t size)
+hailo_status PcieRawConnection::read(uint8_t *buffer, size_t size, std::chrono::milliseconds timeout)
 {
     if (0 == size) {
         return HAILO_SUCCESS;
@@ -163,14 +178,14 @@ hailo_status PcieRawConnection::read(uint8_t *buffer, size_t size)
         auto size_left = size - bytes_read;
         if (is_aligned) {
             amount_to_read = std::min(static_cast<size_t>(size_left), static_cast<size_t>(max_size));
-            auto status = m_session->read(buffer + bytes_read, amount_to_read, m_timeout);
+            auto status = m_session->read(buffer + bytes_read, amount_to_read, timeout);
             if (HAILO_STREAM_ABORT == status) {
                 return HAILO_COMMUNICATION_CLOSED;
             }
             CHECK_SUCCESS(status);
         } else {
             amount_to_read = std::min(static_cast<size_t>(size_left), m_context->read_buffer().size());
-            auto status = m_session->read(m_context->read_buffer().data(), amount_to_read, m_timeout);
+            auto status = m_session->read(m_context->read_buffer().data(), amount_to_read, timeout);
             if (HAILO_STREAM_ABORT == status) {
                 return HAILO_COMMUNICATION_CLOSED;
             }
diff --git a/hailort/hrpc/os/pcie/raw_connection_internal.hpp b/hailort/hrpc/os/pcie/raw_connection_internal.hpp
index 2753f9ea..85dbceb1 100644
--- a/hailort/hrpc/os/pcie/raw_connection_internal.hpp
+++ b/hailort/hrpc/os/pcie/raw_connection_internal.hpp
@@ -25,7 +25,8 @@ namespace hrpc
 class PcieConnectionContext : public ConnectionContext
 {
 public:
-    static Expected<std::shared_ptr<ConnectionContext>> create_shared(bool is_accepting);
+    static Expected<std::shared_ptr<ConnectionContext>> create_client_shared(const std::string &device_id);
+    static Expected<std::shared_ptr<ConnectionContext>> create_server_shared();
 
     PcieConnectionContext(std::shared_ptr<HailoRTDriver> &&driver, bool is_accepting,
         Buffer &&write_buffer, Buffer &&read_buffer)
@@ -61,8 +62,10 @@ class PcieRawConnection : public RawConnection
 
     virtual Expected<std::shared_ptr<RawConnection>> accept() override;
     virtual hailo_status connect() override;
-    virtual hailo_status write(const uint8_t *buffer, size_t size) override;
-    virtual hailo_status read(uint8_t *buffer, size_t size) override;
+    virtual hailo_status write(const uint8_t *buffer, size_t size,
+        std::chrono::milliseconds timeout = DEFAULT_WRITE_TIMEOUT) override;
+    virtual hailo_status read(uint8_t *buffer, size_t size,
+        std::chrono::milliseconds timeout = DEFAULT_READ_TIMEOUT) override;
     virtual hailo_status close() override;
 
     explicit PcieRawConnection(std::shared_ptr<PcieConnectionContext> context) : m_context(context) {}
diff --git a/hailort/hrpc/os/posix/raw_connection_internal.cpp b/hailort/hrpc/os/posix/raw_connection_internal.cpp
index 752c3af9..ab8ae7d4 100644
--- a/hailort/hrpc/os/posix/raw_connection_internal.cpp
+++ b/hailort/hrpc/os/posix/raw_connection_internal.cpp
@@ -13,9 +13,11 @@
 #include <sys/un.h>
 #include <string>
 #include <unistd.h>
-#include <common/logger_macros.hpp>
-#include <common/utils.hpp>
-#include <hailo/hailort.h>
+
+#include "common/logger_macros.hpp"
+#include "common/utils.hpp"
+#include "common/internal_env_vars.hpp"
+#include "hailo/hailort.h"
 
 using namespace hrpc;
 
@@ -27,38 +29,123 @@ Expected<std::shared_ptr<ConnectionContext>> OsConnectionContext::create_shared(
     return std::dynamic_pointer_cast<ConnectionContext>(ptr);
 }
 
-Expected<std::shared_ptr<RawConnection>> OsRawConnection::create_shared(std::shared_ptr<OsConnectionContext> context)
+Expected<std::shared_ptr<OsRawConnection>> OsRawConnection::create_localhost_server(std::shared_ptr<OsConnectionContext> context)
 {
-    std::shared_ptr<RawConnection> ptr;
-    if (context->is_accepting()) {
-        int fd = ::socket(AF_UNIX, SOCK_STREAM, 0);
-        CHECK_AS_EXPECTED(fd >= 0, HAILO_OPEN_FILE_FAILURE, "Socket creation error, errno = {}", errno);
+    int fd = ::socket(AF_UNIX, SOCK_STREAM, 0);
+    CHECK_AS_EXPECTED(fd >= 0, HAILO_OPEN_FILE_FAILURE, "Socket creation error, errno = {}", errno);
 
-        struct sockaddr_un server_addr;
-        memset(&server_addr, 0, sizeof(server_addr));
-        server_addr.sun_family = AF_UNIX;
-        std::string addr = "/tmp/unix_socket";
-        strncpy(server_addr.sun_path, addr.c_str(), addr.size());
+    struct sockaddr_un server_addr;
+    memset(&server_addr, 0, sizeof(server_addr));
+    server_addr.sun_family = AF_UNIX;
+    std::string addr = "/tmp/unix_socket";
+    strncpy(server_addr.sun_path, addr.c_str(), addr.size());
 
-        unlink(addr.c_str());
-        int result = ::bind(fd, (struct sockaddr*)&server_addr, sizeof(server_addr));
-        CHECK_AS_EXPECTED(result >= 0, HAILO_FILE_OPERATION_FAILURE, "Bind error, errno = {}", errno);
+    unlink(addr.c_str());
+    int result = ::bind(fd, (struct sockaddr*)&server_addr, sizeof(server_addr));
+    CHECK_AS_EXPECTED(result >= 0, HAILO_FILE_OPERATION_FAILURE, "Bind error, errno = {}", errno);
 
-        result = ::listen(fd, 5);
-        CHECK_AS_EXPECTED(result >= 0, HAILO_FILE_OPERATION_FAILURE, "Listen error, errno = {}", errno);
+    result = ::listen(fd, 5);
+    CHECK_AS_EXPECTED(result >= 0, HAILO_FILE_OPERATION_FAILURE, "Listen error, errno = {}", errno);
 
-        ptr = make_shared_nothrow<OsRawConnection>(fd, context);
-    } else {
+    auto ptr = make_shared_nothrow<OsRawConnection>(fd, context);
+    CHECK_NOT_NULL_AS_EXPECTED(ptr, HAILO_OUT_OF_HOST_MEMORY);
+    return ptr;
+}
 
-        int fd = ::socket(AF_UNIX, SOCK_STREAM, 0);
-        CHECK_AS_EXPECTED(fd >= 0, HAILO_OPEN_FILE_FAILURE, "Socket creation error, errno = {}", errno);
-        ptr = make_shared_nothrow<OsRawConnection>(fd, context);
-    }
+Expected<std::shared_ptr<OsRawConnection>> OsRawConnection::create_localhost_client(std::shared_ptr<OsConnectionContext> context)
+{
+    int fd = ::socket(AF_UNIX, SOCK_STREAM, 0);
+    CHECK_AS_EXPECTED(fd >= 0, HAILO_OPEN_FILE_FAILURE, "Socket creation error, errno = {}", errno);
     
+    auto ptr = make_shared_nothrow<OsRawConnection>(fd, context);
     CHECK_NOT_NULL_AS_EXPECTED(ptr, HAILO_OUT_OF_HOST_MEMORY);
     return ptr;
 }
 
+Expected<std::shared_ptr<OsRawConnection>> OsRawConnection::create_by_addr_server(std::shared_ptr<OsConnectionContext> context,
+    const std::string &ip, uint16_t port)
+{
+    int fd = ::socket(AF_INET, SOCK_STREAM, 0);
+    CHECK_AS_EXPECTED(fd >= 0, HAILO_OPEN_FILE_FAILURE, "Socket creation error, errno = {}", errno);
+
+    sockaddr_in server_addr = {};
+    socklen_t addr_len = sizeof(server_addr);
+
+    memset(&server_addr, 0, sizeof(server_addr));
+    server_addr.sin_family = AF_INET;
+    server_addr.sin_port = htons(port);
+    auto inet_rc = inet_pton(AF_INET, ip.c_str(), &server_addr.sin_addr);
+    CHECK_AS_EXPECTED(1 == inet_rc, HAILO_ETH_FAILURE,
+        "Failed to run 'inet_pton', errno = {}. make sure 'HAILO_SOCKET_COM_ADDR_SERVER' is set correctly (ip:port)", errno);
+
+    int result = ::bind(fd, (struct sockaddr*)&server_addr, addr_len);
+    CHECK_AS_EXPECTED(result >= 0, HAILO_FILE_OPERATION_FAILURE, "Bind error, errno = {}", errno);
+
+    result = ::listen(fd, 5);
+    CHECK_AS_EXPECTED(result >= 0, HAILO_FILE_OPERATION_FAILURE, "Listen error, errno = {}", errno);
+
+    auto res = make_shared_nothrow<OsRawConnection>(fd, context);
+    CHECK_NOT_NULL_AS_EXPECTED(res, HAILO_OUT_OF_HOST_MEMORY);
+    return res;
+}
+
+Expected<std::shared_ptr<OsRawConnection>> OsRawConnection::create_by_addr_client(std::shared_ptr<OsConnectionContext> context,
+    const std::string &ip, uint16_t port)
+{
+    int fd = ::socket(AF_INET, SOCK_STREAM, 0);
+    CHECK_AS_EXPECTED(fd >= 0, HAILO_OPEN_FILE_FAILURE, "Socket creation error, errno = {}", errno);
+
+    sockaddr_in server_addr = {};
+    memset(&server_addr, 0, sizeof(server_addr));
+    server_addr.sin_family = AF_INET;
+    server_addr.sin_port = htons(port);
+    auto inet_rc = inet_pton(AF_INET, ip.c_str(), &server_addr.sin_addr);
+    CHECK_AS_EXPECTED(1 == inet_rc, HAILO_ETH_FAILURE,
+        "Failed to run 'inet_pton', errno = {}. make sure 'HAILO_SOCKET_COM_ADDR_CLIENT' is set correctly (ip:port)", errno);
+
+    auto res = make_shared_nothrow<OsRawConnection>(fd, context);
+    CHECK_NOT_NULL_AS_EXPECTED(res, HAILO_OUT_OF_HOST_MEMORY);
+    return res;
+}
+
+Expected<std::pair<std::string, uint16_t>> OsRawConnection::parse_ip_port(const std::string &ip_port)
+{
+    std::istringstream ss(ip_port);
+    std::string ip;
+    uint16_t port;
+
+    if (std::getline(ss, ip, ':') && (ss >> port)) {
+        return std::make_pair(ip, port);
+    }
+    CHECK_AS_EXPECTED(false, HAILO_INVALID_ARGUMENT ,"Failed to parse ip and port. Format should be as follows: 'X.X.X.X:PP' (e.g. 127.0.0.1:2000)");
+}
+
+Expected<std::shared_ptr<RawConnection>> OsRawConnection::create_shared(std::shared_ptr<OsConnectionContext> context)
+{
+    std::shared_ptr<RawConnection> ptr;
+    if (context->is_accepting()) {
+        auto force_socket_com_value = get_env_variable(HAILO_SOCKET_COM_ADDR_SERVER_ENV_VAR);
+        CHECK_EXPECTED(force_socket_com_value); // We know its set, otherwise we'll be working with PCIeRawCon
+        if (HAILO_SOCKET_COM_ADDR_UNIX_SOCKET == force_socket_com_value.value()) {
+            TRY(ptr, create_localhost_server(context));
+        } else {
+            TRY(auto ip_port_pair, parse_ip_port(force_socket_com_value.value()));
+            TRY(ptr, create_by_addr_server(context, std::get<0>(ip_port_pair), std::get<1>(ip_port_pair)));
+        }
+    } else {
+        auto force_socket_com_value = get_env_variable(HAILO_SOCKET_COM_ADDR_CLIENT_ENV_VAR);
+        CHECK_EXPECTED(force_socket_com_value); // We know its set, otherwise we'll be working with PCIeRawCon
+        if (HAILO_SOCKET_COM_ADDR_UNIX_SOCKET == force_socket_com_value.value()) {
+            TRY(ptr, create_localhost_client(context));
+        } else {
+            TRY(auto ip_port_pair, parse_ip_port(force_socket_com_value.value()));
+            TRY(ptr, create_by_addr_client(context, std::get<0>(ip_port_pair), std::get<1>(ip_port_pair)));
+        }
+    }
+
+    return ptr;
+}
+
 Expected<std::shared_ptr<RawConnection>> OsRawConnection::accept()
 {
     int fd = ::accept(m_fd, nullptr, nullptr);
@@ -70,7 +157,7 @@ Expected<std::shared_ptr<RawConnection>> OsRawConnection::accept()
     return ptr;
 }
 
-hailo_status OsRawConnection::connect()
+hailo_status OsRawConnection::connect_localhost()
 {
     struct sockaddr_un server_addr;
     std::string addr = "/tmp/unix_socket";
@@ -85,7 +172,48 @@ hailo_status OsRawConnection::connect()
     return HAILO_SUCCESS;
 }
 
-hailo_status OsRawConnection::write(const uint8_t *buffer, size_t size)
+hailo_status OsRawConnection::connect_by_addr(const std::string &ip, uint16_t port)
+{
+    sockaddr_in server_addr = {};
+    socklen_t addr_len = sizeof(server_addr);
+
+    memset(&server_addr, 0, sizeof(server_addr));
+    server_addr.sin_family = AF_INET;
+    server_addr.sin_port = htons(port);
+    auto inet_rc = inet_pton(AF_INET, ip.c_str(), &server_addr.sin_addr);
+    CHECK(1 == inet_rc, HAILO_ETH_FAILURE,
+        "Failed to run 'inet_pton', errno = {}. make sure 'HAILO_SOCKET_COM_ADDR_XX' is set correctly (ip:port)", errno);
+    auto result = ::connect(m_fd, (struct sockaddr*)&server_addr, addr_len);
+    CHECK(result >= 0, HAILO_FILE_OPERATION_FAILURE, "Connect error, errno = {}. "
+        "make sure 'HAILO_SOCKET_COM_ADDR_XX' is set correctly (ip:port)", errno);
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status OsRawConnection::connect()
+{
+    if (m_context->is_accepting()) {
+        auto force_socket_com_value = get_env_variable(HAILO_SOCKET_COM_ADDR_SERVER_ENV_VAR);
+        CHECK_EXPECTED(force_socket_com_value); // We know its set, otherwise we'll be working with PCIeRawCon
+        if (HAILO_SOCKET_COM_ADDR_UNIX_SOCKET == force_socket_com_value.value()) {
+            return connect_localhost();
+        } else {
+            TRY(auto ip_port_pair, parse_ip_port(force_socket_com_value.value()));
+            return connect_by_addr(std::get<0>(ip_port_pair), std::get<1>(ip_port_pair));
+        }
+    } else {
+        auto force_socket_com_value = get_env_variable(HAILO_SOCKET_COM_ADDR_CLIENT_ENV_VAR);
+        CHECK_EXPECTED(force_socket_com_value); // We know its set, otherwise we'll be working with PCIeRawCon
+        if (HAILO_SOCKET_COM_ADDR_UNIX_SOCKET == force_socket_com_value.value()) {
+            return connect_localhost();
+        } else {
+            TRY(auto ip_port_pair, parse_ip_port(force_socket_com_value.value()));
+            return connect_by_addr(std::get<0>(ip_port_pair), std::get<1>(ip_port_pair));
+        }
+    }
+}
+
+hailo_status OsRawConnection::write(const uint8_t *buffer, size_t size, std::chrono::milliseconds /*timeout*/)
 {
     size_t bytes_written = 0;
     while (bytes_written < size) {
@@ -96,7 +224,7 @@ hailo_status OsRawConnection::write(const uint8_t *buffer, size_t size)
     return HAILO_SUCCESS;
 }
 
-hailo_status OsRawConnection::read(uint8_t *buffer, size_t size)
+hailo_status OsRawConnection::read(uint8_t *buffer, size_t size, std::chrono::milliseconds /*timeout*/)
 {
     size_t bytes_read = 0;
     while (bytes_read < size) {
diff --git a/hailort/hrpc/os/posix/raw_connection_internal.hpp b/hailort/hrpc/os/posix/raw_connection_internal.hpp
index cbde53fa..0f0ec178 100644
--- a/hailort/hrpc/os/posix/raw_connection_internal.hpp
+++ b/hailort/hrpc/os/posix/raw_connection_internal.hpp
@@ -39,12 +39,26 @@ class OsRawConnection : public RawConnection
 
     virtual Expected<std::shared_ptr<RawConnection>> accept() override;
     virtual hailo_status connect() override;
-    virtual hailo_status write(const uint8_t *buffer, size_t size) override;
-    virtual hailo_status read(uint8_t *buffer, size_t size) override;
+    virtual hailo_status write(const uint8_t *buffer, size_t size,
+        std::chrono::milliseconds timeout = DEFAULT_WRITE_TIMEOUT) override;
+    virtual hailo_status read(uint8_t *buffer, size_t size,
+        std::chrono::milliseconds timeout = DEFAULT_READ_TIMEOUT) override;
     virtual hailo_status close() override;
 
     OsRawConnection(int fd, std::shared_ptr<OsConnectionContext> context) : m_fd(fd), m_context(context) {}
 private:
+    static Expected<std::shared_ptr<OsRawConnection>> create_by_addr_server(std::shared_ptr<OsConnectionContext> context,
+        const std::string &ip, uint16_t port);
+    static Expected<std::shared_ptr<OsRawConnection>> create_by_addr_client(std::shared_ptr<OsConnectionContext> context,
+        const std::string &ip, uint16_t port);
+    static Expected<std::shared_ptr<OsRawConnection>> create_localhost_server(std::shared_ptr<OsConnectionContext> context);
+    static Expected<std::shared_ptr<OsRawConnection>> create_localhost_client(std::shared_ptr<OsConnectionContext> context);
+
+    hailo_status connect_by_addr(const std::string &ip, uint16_t port);
+    hailo_status connect_localhost();
+
+    static Expected<std::pair<std::string, uint16_t>> parse_ip_port(const std::string &ip_port);
+
     int m_fd;
     std::shared_ptr<OsConnectionContext> m_context;
 };
diff --git a/hailort/hrpc/os/windows/raw_connection_internal.cpp b/hailort/hrpc/os/windows/raw_connection_internal.cpp
index cfb17a50..29cbff92 100644
--- a/hailort/hrpc/os/windows/raw_connection_internal.cpp
+++ b/hailort/hrpc/os/windows/raw_connection_internal.cpp
@@ -37,17 +37,19 @@ hailo_status OsRawConnection::connect()
     return HAILO_NOT_IMPLEMENTED;
 }
 
-hailo_status OsRawConnection::write(const uint8_t *buffer, size_t size)
+hailo_status OsRawConnection::write(const uint8_t *buffer, size_t size, std::chrono::milliseconds timeout)
 {
     (void)buffer;
     (void)size;
+    (void)timeout;
     return HAILO_NOT_IMPLEMENTED;
 }
 
-hailo_status OsRawConnection::read(uint8_t *buffer, size_t size)
+hailo_status OsRawConnection::read(uint8_t *buffer, size_t size, std::chrono::milliseconds timeout)
 {
     (void)buffer;
     (void)size;
+    (void)timeout;
     return HAILO_NOT_IMPLEMENTED;
 }
 
diff --git a/hailort/hrpc/os/windows/raw_connection_internal.hpp b/hailort/hrpc/os/windows/raw_connection_internal.hpp
index 3b667174..9efcfff1 100644
--- a/hailort/hrpc/os/windows/raw_connection_internal.hpp
+++ b/hailort/hrpc/os/windows/raw_connection_internal.hpp
@@ -36,8 +36,10 @@ class OsRawConnection : public RawConnection
 
     virtual Expected<std::shared_ptr<RawConnection>> accept() override;
     virtual hailo_status connect() override;
-    virtual hailo_status write(const uint8_t *buffer, size_t size) override;
-    virtual hailo_status read(uint8_t *buffer, size_t size) override;
+    virtual hailo_status write(const uint8_t *buffer, size_t size,
+        std::chrono::milliseconds timeout = DEFAULT_WRITE_TIMEOUT) override;
+    virtual hailo_status read(uint8_t *buffer, size_t size,
+        std::chrono::milliseconds timeout = DEFAULT_READ_TIMEOUT) override;
     virtual hailo_status close() override;
 
     explicit OsRawConnection(std::shared_ptr<OsConnectionContext> /*context*/) {}
diff --git a/hailort/hrpc/raw_connection.cpp b/hailort/hrpc/raw_connection.cpp
index cdc5cafc..4682d524 100644
--- a/hailort/hrpc/raw_connection.cpp
+++ b/hailort/hrpc/raw_connection.cpp
@@ -10,6 +10,7 @@
 #include "hailo/vdevice.hpp"
 #include "hrpc/raw_connection.hpp"
 #include "hrpc/os/pcie/raw_connection_internal.hpp"
+#include "common/internal_env_vars.hpp"
 
 #ifdef _WIN32
 #include "hrpc/os/windows/raw_connection_internal.hpp"
@@ -17,21 +18,31 @@
 #include "hrpc/os/posix/raw_connection_internal.hpp"
 #endif
 
-#define HAILO_FORCE_SOCKET_COM_ENV_VAR "HAILO_FORCE_SOCKET_COM"
 
 using namespace hrpc;
 
 
-Expected<std::shared_ptr<ConnectionContext>> ConnectionContext::create_shared(bool is_accepting)
+Expected<std::shared_ptr<ConnectionContext>> ConnectionContext::create_client_shared(const std::string &device_id)
 {
-    // The env var HAILO_FORCE_HRPC_CLIENT_ENV_VAR is supported for debug purposes
-    char *socket_com = std::getenv(HAILO_FORCE_SOCKET_COM_ENV_VAR); // TODO: Remove duplication
-    auto force_socket_com = (nullptr != socket_com) && ("1" == std::string(socket_com));
+    auto should_force_socket_com = get_env_variable(HAILO_SOCKET_COM_ADDR_CLIENT_ENV_VAR);
 
-    if (force_socket_com || VDevice::force_hrpc_client()) {// If forcing hrpc service, its because we work without EP driver -> use sockets
-        return OsConnectionContext::create_shared(is_accepting);
+    // If forcing hrpc service, its because we work without EP driver -> use sockets
+    if (should_force_socket_com.has_value() || VDevice::should_force_hrpc_client()) {
+        return OsConnectionContext::create_shared(false);
     } else {
-        return PcieConnectionContext::create_shared(is_accepting);
+        return PcieConnectionContext::create_client_shared(device_id);
+    }
+}
+
+Expected<std::shared_ptr<ConnectionContext>> ConnectionContext::create_server_shared()
+{
+    auto should_force_socket_com = get_env_variable(HAILO_SOCKET_COM_ADDR_SERVER_ENV_VAR);
+
+    // If forcing hrpc service, its because we work without EP driver -> use sockets
+    if (should_force_socket_com.has_value() || VDevice::should_force_hrpc_client()) {
+        return OsConnectionContext::create_shared(true);
+    } else {
+        return PcieConnectionContext::create_server_shared();
     }
 }
 
diff --git a/hailort/hrpc/raw_connection.hpp b/hailort/hrpc/raw_connection.hpp
index d7e12185..55bf201c 100644
--- a/hailort/hrpc/raw_connection.hpp
+++ b/hailort/hrpc/raw_connection.hpp
@@ -15,6 +15,9 @@
 
 #include <memory>
 
+#define DEFAULT_WRITE_TIMEOUT (std::chrono::milliseconds(10000))
+#define DEFAULT_READ_TIMEOUT (std::chrono::milliseconds(HAILO_INFINITE))
+
 using namespace hailort;
 
 namespace hrpc
@@ -23,7 +26,8 @@ namespace hrpc
 class ConnectionContext
 {
 public:
-    static Expected<std::shared_ptr<ConnectionContext>> create_shared(bool is_accepting);
+    static Expected<std::shared_ptr<ConnectionContext>> create_client_shared(const std::string &device_id = "");
+    static Expected<std::shared_ptr<ConnectionContext>> create_server_shared();
 
     bool is_accepting() const { return m_is_accepting; }
 
@@ -45,12 +49,11 @@ class RawConnection
 
     virtual Expected<std::shared_ptr<RawConnection>> accept() = 0;
     virtual hailo_status connect() = 0;
-    virtual hailo_status write(const uint8_t *buffer, size_t size) = 0;
-    virtual hailo_status read(uint8_t *buffer, size_t size) = 0;
+    virtual hailo_status write(const uint8_t *buffer, size_t size,
+        std::chrono::milliseconds timeout = DEFAULT_WRITE_TIMEOUT) = 0;
+    virtual hailo_status read(uint8_t *buffer, size_t size,
+        std::chrono::milliseconds timeout = DEFAULT_READ_TIMEOUT) = 0;
     virtual hailo_status close() = 0;
-
-protected:
-    std::chrono::milliseconds m_timeout = std::chrono::milliseconds(HAILO_INFINITE);
 };
 
 } // namespace hrpc
diff --git a/hailort/hrpc/server.cpp b/hailort/hrpc/server.cpp
index 304c41c6..a55fa60f 100644
--- a/hailort/hrpc/server.cpp
+++ b/hailort/hrpc/server.cpp
@@ -15,9 +15,10 @@ namespace hrpc
 ServerContext::ServerContext(Server &server, RpcConnection connection) :
     m_server(server), m_connection(connection) {}
 
-hailo_status ServerContext::trigger_callback(uint32_t callback_id, hailo_status callback_status, std::function<hailo_status(RpcConnection)> write_buffers_callback)
+hailo_status ServerContext::trigger_callback(uint32_t callback_id, hailo_status callback_status,
+    rpc_object_handle_t callback_owner_handle, std::function<hailo_status(RpcConnection)> write_buffers_callback)
 {
-    return m_server.trigger_callback(callback_id, m_connection, callback_status, write_buffers_callback);
+    return m_server.trigger_callback(callback_id, callback_status, callback_owner_handle, m_connection, write_buffers_callback);
 }
 
 RpcConnection &ServerContext::connection()
@@ -36,14 +37,15 @@ Expected<Buffer> Dispatcher::call_action(HailoRpcActionID action_id, const Memor
     if (m_actions.find(action_id) != m_actions.end()) {
         return m_actions[action_id](request, server_context);
     }
-    LOGGER__ERROR("Failed to find RPC action {}", action_id);
+    LOGGER__ERROR("Failed to find RPC action {}", static_cast<int>(action_id));
     return make_unexpected(HAILO_RPC_FAILED);
 }
 
 hailo_status Server::serve()
 {
+    TRY(auto server_connection, RawConnection::create_shared(m_connection_context));
     while (true) {
-        TRY(auto client_connection, create_client_connection());
+        TRY(auto client_connection, create_client_connection(server_connection));
         auto th = std::thread([this, client_connection]() { serve_client(client_connection); });
         th.detach();
     }
@@ -55,11 +57,9 @@ void Server::set_dispatcher(Dispatcher dispatcher)
     m_dispatcher = dispatcher;
 }
 
-Expected<RpcConnection> Server::create_client_connection()
+Expected<RpcConnection> Server::create_client_connection(std::shared_ptr<hrpc::RawConnection> server_connection)
 {
-    TRY(auto server_connection, RawConnection::create_shared(m_connection_context));
     TRY(auto conn, server_connection->accept());
-
     return RpcConnection(conn);
 }
 
@@ -95,10 +95,11 @@ hailo_status Server::serve_client(RpcConnection client_connection)
     return HAILO_SUCCESS;
 }
 
-hailo_status Server::trigger_callback(uint32_t callback_id, RpcConnection connection, hailo_status callback_status,
-    std::function<hailo_status(RpcConnection)> write_buffers_callback)
+hailo_status Server::trigger_callback(uint32_t callback_id, hailo_status callback_status, rpc_object_handle_t callback_owner_handle,
+    RpcConnection connection, std::function<hailo_status(RpcConnection)> write_buffers_callback)
 {
-    TRY(auto reply, CallbackCalledSerializer::serialize_reply(callback_status, callback_id));
+    // TODO: callback handling should be outside of HRPC (HRT-14638)
+    TRY(auto reply, CallbackCalledSerializer::serialize_reply(callback_status, callback_id, callback_owner_handle));
 
     std::unique_lock<std::mutex> lock(m_write_mutex);
     rpc_message_header_t header;
diff --git a/hailort/hrpc/server.hpp b/hailort/hrpc/server.hpp
index 45d52459..55f3d43e 100644
--- a/hailort/hrpc/server.hpp
+++ b/hailort/hrpc/server.hpp
@@ -27,7 +27,7 @@ class ServerContext
 public:
     ServerContext(Server &server, RpcConnection connection);
     hailo_status trigger_callback(uint32_t callback_id, hailo_status callback_status,
-        std::function<hailo_status(RpcConnection)> write_buffers_callback = nullptr);
+        rpc_object_handle_t callback_owner_handle, std::function<hailo_status(RpcConnection)> write_buffers_callback = nullptr);
     RpcConnection &connection();
 
 private:
@@ -63,10 +63,10 @@ class Server
 protected:
     std::shared_ptr<ConnectionContext> m_connection_context;
 private:
-    Expected<RpcConnection> create_client_connection();
+    Expected<RpcConnection> create_client_connection(std::shared_ptr<hrpc::RawConnection> server_connection);
     hailo_status serve_client(RpcConnection client_connection);
-    hailo_status trigger_callback(uint32_t callback_id, RpcConnection connection, hailo_status callback_status,
-        std::function<hailo_status(RpcConnection)> write_buffers_callback = nullptr);
+    hailo_status trigger_callback(uint32_t callback_id, hailo_status callback_status, rpc_object_handle_t callback_owner_handle,
+        RpcConnection connection, std::function<hailo_status(RpcConnection)> write_buffers_callback = nullptr);
     virtual hailo_status cleanup_client_resources(RpcConnection client_connection) = 0;
 
     Dispatcher m_dispatcher;
diff --git a/hailort/hrpc_protocol/CMakeLists.txt b/hailort/hrpc_protocol/CMakeLists.txt
index d6b925ad..fce92db3 100644
--- a/hailort/hrpc_protocol/CMakeLists.txt
+++ b/hailort/hrpc_protocol/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 protobuf_generate_cpp(PROTO_RPC_SRC PROTO_RPC_HEADER rpc.proto)
 get_filename_component(PROTO_HEADER_DIRECTORY ${PROTO_RPC_HEADER} DIRECTORY)
diff --git a/hailort/hrpc_protocol/rpc.proto b/hailort/hrpc_protocol/rpc.proto
index d99bd5ca..2a8ac722 100644
--- a/hailort/hrpc_protocol/rpc.proto
+++ b/hailort/hrpc_protocol/rpc.proto
@@ -20,6 +20,11 @@ message RpcRequest {
         ConfiguredInferModel_Deactivate_Request deactivate_request = 12;
         ConfiguredInferModel_Shutdown_Request shutdown_request = 13;
         ConfiguredInferModel_AsyncInfer_Request async_infer_request = 14;
+
+        Device_Create_Request create_device_request = 15;
+        Device_Destroy_Request destroy_device_request = 16;
+        Device_Identify_Request identify_device_request = 17;
+        Device_ExtendedInfo_Request extended_device_info_request = 18;
     }
 }
 
@@ -42,7 +47,13 @@ message RpcReply {
         ConfiguredInferModel_Shutdown_Reply shutdown_reply = 13;
         ConfiguredInferModel_AsyncInfer_Reply async_infer_reply = 14;
 
-        CallbackCalled_Reply callback_called_reply = 15;
+        Device_Create_Reply create_device_reply = 15;
+        Device_Destroy_Reply destroy_device_reply = 16;
+        Device_Identify_Reply identify_device_reply = 17;
+        Device_ExtendedInfo_Reply extended_device_info_reply = 18;
+
+        // Here comes replies that have no matching requests
+        CallbackCalled_Reply callback_called_reply = 19;
     }
 }
 
@@ -79,6 +90,7 @@ message VDevice_Destroy_Reply {
 message VDevice_CreateInferModel_Request {
     HailoObjectHandle vdevice_handle = 1;
     uint64 hef_size = 2;
+    string name = 3;
     // Protocol note: After this message, server expects to get HEF data (buffer of size 'hef_size')
 }
 
@@ -193,6 +205,7 @@ message ConfiguredInferModel_AsyncInfer_Request {
     HailoObjectHandle configured_infer_model_handle = 1;
     HailoObjectHandle infer_model_handle = 2;
     HailoCallbackHandle callback_handle = 3;
+    repeated uint32 input_buffer_sizes = 4;
     // Protocol note: After this messgae, server expects to get the input buffers, one after the other, in order
 }
 
@@ -203,5 +216,92 @@ message ConfiguredInferModel_AsyncInfer_Reply {
 message CallbackCalled_Reply {
     uint32 status = 1;
     HailoCallbackHandle callback_handle = 2;
+    HailoObjectHandle configured_infer_model_handle = 3;
     // Protocol note: After this messgae, and only if status is HAILO_SUCCESS, server expects to get the output buffers, one after the other, in order
-}
\ No newline at end of file
+}
+
+message Device_Create_Request {
+}
+
+message Device_Create_Reply {
+    uint32 status = 1;
+    HailoObjectHandle device_handle = 2;
+}
+
+message Device_Destroy_Request {
+    HailoObjectHandle device_handle = 1;
+}
+
+message Device_Destroy_Reply {
+    uint32 status = 1;
+}
+
+message Device_Identify_Request {
+    HailoObjectHandle device_handle = 1;
+}
+
+// Added "_value" to the names so that the symbols will not clash with the macros defined in <sys/types.h>
+message FirmwareVersionProto {
+    uint32 major_value = 1;
+    uint32 minor_value = 2;
+    uint32 revision_value = 3;
+}
+
+enum DeviceArchitectureProto {
+    HAILO8_A0 = 0;
+    HAILO8 = 1;
+    HAILO8L = 2;
+    HAILO15H = 3;
+    PLUTO = 4;
+    HAILO15M = 5;
+    HAILO10H = 6;
+}
+
+message DeviceIdentityProto {
+    uint32 protocol_version = 1;
+    FirmwareVersionProto fw_version = 2;
+    uint32 logger_version = 3;
+    string board_name = 4;
+    bool is_release = 5;
+    bool extended_context_switch_buffer = 6;
+    DeviceArchitectureProto device_architecture = 7;
+    repeated uint32 serial_number = 8;
+    repeated uint32 part_number = 9;
+    string product_name = 10;
+}
+
+message Device_Identify_Reply {
+    uint32 status = 1;
+    DeviceIdentityProto identity = 2;
+}
+
+message Device_ExtendedInfo_Request {
+    HailoObjectHandle device_handle = 1;
+}
+
+message DeviceSupportedFeaturesProto {
+    bool ethernet = 1;
+    bool mipi = 2;
+    bool pcie = 3;
+    bool current_monitoring = 4;
+    bool mdio = 5;
+}
+
+enum DeviceBootSourceProto {
+    BOOT_SOURCE_INVALID = 0;
+    BOOT_SOURCE_PCIE = 1;
+    BOOT_SOURCE_FLASH = 2;
+    BOOT_SOURCE_MAX = 3;
+}
+
+message Device_ExtendedInfo_Reply {
+    uint32 status = 1;
+    uint32 neural_network_core_clock_rate = 2;
+    DeviceSupportedFeaturesProto supported_features = 3;
+    DeviceBootSourceProto boot_source = 4;
+    repeated uint32 soc_id = 5;
+    uint32 lcs = 6;
+    repeated uint32 eth_mac_address = 7;
+    repeated uint32 unit_level_tracking_id = 8;
+    repeated uint32 soc_pm_values = 9;
+}
diff --git a/hailort/hrpc_protocol/serializer.cpp b/hailort/hrpc_protocol/serializer.cpp
index 5dad7553..3ea842ce 100644
--- a/hailort/hrpc_protocol/serializer.cpp
+++ b/hailort/hrpc_protocol/serializer.cpp
@@ -40,7 +40,7 @@ Expected<Buffer> CreateVDeviceSerializer::serialize_request(const hailo_vdevice_
     proto_params->set_scheduling_algorithm(params.scheduling_algorithm);
     proto_params->set_group_id(params.group_id == nullptr ? "" : std::string(params.group_id));
 
-    // TODO (HRT-13983) - check if we can use GetCachedSize
+    // TODO (HRT-14732) - check if we can use GetCachedSize
     TRY(auto serialized_request, Buffer::create(request.ByteSizeLong(), BufferStorageParams::create_dma()));
 
     CHECK_AS_EXPECTED(request.SerializeToArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
@@ -101,7 +101,7 @@ Expected<Buffer> DestroyVDeviceSerializer::serialize_request(rpc_object_handle_t
     auto proto_vdevice_handle= request.mutable_vdevice_handle();
     proto_vdevice_handle->set_id(vdevice_handle);
 
-    // TODO (HRT-13983) - check if we can use GetCachedSize
+    // TODO (HRT-14732) - check if we can use GetCachedSize
     TRY(auto serialized_request, Buffer::create(request.ByteSizeLong(), BufferStorageParams::create_dma()));
     CHECK_AS_EXPECTED(request.SerializeToArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
         HAILO_RPC_FAILED, "Failed to serialize 'DestroyVDevice'");
@@ -142,16 +142,17 @@ hailo_status DestroyVDeviceSerializer::deserialize_reply(const MemoryView &seria
     return static_cast<hailo_status>(reply.status());
 }
 
-Expected<Buffer> CreateInferModelSerializer::serialize_request(rpc_object_handle_t vdevice_handle, uint64_t hef_size)
+Expected<Buffer> CreateInferModelSerializer::serialize_request(rpc_object_handle_t vdevice_handle, uint64_t hef_size,
+    const std::string &name)
 {
     VDevice_CreateInferModel_Request request;
 
     auto proto_vdevice_handle = request.mutable_vdevice_handle();
     proto_vdevice_handle->set_id(vdevice_handle);
-
     request.set_hef_size(hef_size);
+    request.set_name(name);
 
-    // TODO (HRT-13983) - check if we can use GetCachedSize
+    // TODO (HRT-14732) - check if we can use GetCachedSize
     TRY(auto serialized_request, Buffer::create(request.ByteSizeLong(), BufferStorageParams::create_dma()));
     CHECK_AS_EXPECTED(request.SerializeToArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
         HAILO_RPC_FAILED, "Failed to serialize 'CreateVInferModel'");
@@ -159,14 +160,14 @@ Expected<Buffer> CreateInferModelSerializer::serialize_request(rpc_object_handle
     return serialized_request;
 }
 
-Expected<std::tuple<rpc_object_handle_t, uint64_t>> CreateInferModelSerializer::deserialize_request(const MemoryView &serialized_request)
+Expected<std::tuple<rpc_object_handle_t, uint64_t, std::string>> CreateInferModelSerializer::deserialize_request(const MemoryView &serialized_request)
 {
     VDevice_CreateInferModel_Request request;
 
     CHECK_AS_EXPECTED(request.ParseFromArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
         HAILO_RPC_FAILED, "Failed to de-serialize 'CreateVInferModel'");
 
-    return std::make_tuple(request.vdevice_handle().id(), request.hef_size());
+    return std::make_tuple(request.vdevice_handle().id(), request.hef_size(), request.name());
 }
 
 Expected<Buffer> CreateInferModelSerializer::serialize_reply(hailo_status status, rpc_object_handle_t infer_model_handle)
@@ -202,7 +203,7 @@ Expected<Buffer> DestroyInferModelSerializer::serialize_request(rpc_object_handl
     auto proto_infer_model_handle = request.mutable_infer_model_handle();
     proto_infer_model_handle->set_id(infer_model_handle);
 
-    // TODO (HRT-13983) - check if we can use GetCachedSize
+    // TODO (HRT-14732) - check if we can use GetCachedSize
     TRY(auto serialized_request, Buffer::create(request.ByteSizeLong(), BufferStorageParams::create_dma()));
     CHECK_AS_EXPECTED(request.SerializeToArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
         HAILO_RPC_FAILED, "Failed to serialize 'DestroyInferModel'");
@@ -279,7 +280,7 @@ Expected<Buffer> CreateConfiguredInferModelSerializer::serialize_request(rpc_cre
     request.set_power_mode(static_cast<uint32_t>(params.power_mode));
     request.set_latency_flag(static_cast<uint32_t>(params.latency_flag));
 
-    // TODO (HRT-13983) - check if we can use GetCachedSize
+    // TODO (HRT-14732) - check if we can use GetCachedSize
     TRY(auto serialized_request, Buffer::create(request.ByteSizeLong(), BufferStorageParams::create_dma()));
     CHECK_AS_EXPECTED(request.SerializeToArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
         HAILO_RPC_FAILED, "Failed to serialize 'CreateConfiguredInferModel'");
@@ -363,7 +364,7 @@ Expected<Buffer> DestroyConfiguredInferModelSerializer::serialize_request(rpc_ob
     auto proto_infer_model_handle = request.mutable_configured_infer_model_handle();
     proto_infer_model_handle->set_id(configured_infer_model_handle);
 
-    // TODO (HRT-13983) - check if we can use GetCachedSize
+    // TODO (HRT-14732) - check if we can use GetCachedSize
     TRY(auto serialized_request, Buffer::create(request.ByteSizeLong(), BufferStorageParams::create_dma()));
     CHECK_AS_EXPECTED(request.SerializeToArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
         HAILO_RPC_FAILED, "Failed to serialize 'DestroyConfiguredInferModel'");
@@ -413,7 +414,7 @@ Expected<Buffer> SetSchedulerTimeoutSerializer::serialize_request(rpc_object_han
     proto_configured_infer_model_handle->set_id(configured_infer_model_handle);
     request.set_timeout(static_cast<uint32_t>(timeout.count()));
 
-    // TODO (HRT-13983) - check if we can use GetCachedSize
+    // TODO (HRT-14732) - check if we can use GetCachedSize
     TRY(auto serialized_request, Buffer::create(request.ByteSizeLong(), BufferStorageParams::create_dma()));
     CHECK_AS_EXPECTED(request.SerializeToArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
         HAILO_RPC_FAILED, "Failed to serialize 'SetSchedulerTimeout'");
@@ -463,7 +464,7 @@ Expected<Buffer> SetSchedulerThresholdSerializer::serialize_request(rpc_object_h
     proto_configured_infer_model_handle->set_id(configured_infer_model_handle);
     request.set_threshold(threshold);
 
-    // TODO (HRT-13983) - check if we can use GetCachedSize
+    // TODO (HRT-14732) - check if we can use GetCachedSize
     TRY(auto serialized_request, Buffer::create(request.ByteSizeLong(), BufferStorageParams::create_dma()));
     CHECK_AS_EXPECTED(request.SerializeToArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
         HAILO_RPC_FAILED, "Failed to serialize 'SetSchedulerThreshold'");
@@ -513,7 +514,7 @@ Expected<Buffer> SetSchedulerPrioritySerializer::serialize_request(rpc_object_ha
     proto_configured_infer_model_handle->set_id(configured_infer_model_handle);
     request.set_priority(priority);
 
-    // TODO (HRT-13983) - check if we can use GetCachedSize
+    // TODO (HRT-14732) - check if we can use GetCachedSize
     TRY(auto serialized_request, Buffer::create(request.ByteSizeLong(), BufferStorageParams::create_dma()));
     CHECK_AS_EXPECTED(request.SerializeToArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
         HAILO_RPC_FAILED, "Failed to serialize 'SetSchedulerPriority'");
@@ -562,7 +563,7 @@ Expected<Buffer> GetHwLatencyMeasurementSerializer::serialize_request(rpc_object
     auto proto_configured_infer_model_handle = request.mutable_configured_infer_model_handle();
     proto_configured_infer_model_handle->set_id(configured_infer_model_handle);
 
-    // TODO (HRT-13983) - check if we can use GetCachedSize
+    // TODO (HRT-14732) - check if we can use GetCachedSize
     TRY(auto serialized_request, Buffer::create(request.ByteSizeLong(), BufferStorageParams::create_dma()));
     CHECK_AS_EXPECTED(request.SerializeToArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
         HAILO_RPC_FAILED, "Failed to serialize 'GetHwLatencyMeasurement'");
@@ -611,7 +612,7 @@ Expected<Buffer> ActivateSerializer::serialize_request(rpc_object_handle_t confi
     auto proto_configured_infer_model_handle = request.mutable_configured_infer_model_handle();
     proto_configured_infer_model_handle->set_id(configured_infer_model_handle);
 
-    // TODO (HRT-13983) - check if we can use GetCachedSize
+    // TODO (HRT-14732) - check if we can use GetCachedSize
     TRY(auto serialized_request, Buffer::create(request.ByteSizeLong(), BufferStorageParams::create_dma()));
     CHECK_AS_EXPECTED(request.SerializeToArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
         HAILO_RPC_FAILED, "Failed to serialize 'Activate'");
@@ -659,7 +660,7 @@ Expected<Buffer> DeactivateSerializer::serialize_request(rpc_object_handle_t con
     auto proto_configured_infer_model_handle = request.mutable_configured_infer_model_handle();
     proto_configured_infer_model_handle->set_id(configured_infer_model_handle);
 
-    // TODO (HRT-13983) - check if we can use GetCachedSize
+    // TODO (HRT-14732) - check if we can use GetCachedSize
     TRY(auto serialized_request, Buffer::create(request.ByteSizeLong(), BufferStorageParams::create_dma()));
     CHECK_AS_EXPECTED(request.SerializeToArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
         HAILO_RPC_FAILED, "Failed to serialize 'Deactivate'");
@@ -707,7 +708,7 @@ Expected<Buffer> ShutdownSerializer::serialize_request(rpc_object_handle_t confi
     auto proto_configured_infer_model_handle = request.mutable_configured_infer_model_handle();
     proto_configured_infer_model_handle->set_id(configured_infer_model_handle);
 
-    // TODO (HRT-13983) - check if we can use GetCachedSize
+    // TODO (HRT-14732) - check if we can use GetCachedSize
     TRY(auto serialized_request, Buffer::create(request.ByteSizeLong(), BufferStorageParams::create_dma()));
     CHECK_AS_EXPECTED(request.SerializeToArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
         HAILO_RPC_FAILED, "Failed to serialize 'Shutdown'");
@@ -748,21 +749,22 @@ hailo_status ShutdownSerializer::deserialize_reply(const MemoryView &serialized_
     return static_cast<hailo_status>(reply.status());
 }
 
-Expected<Buffer> RunAsyncSerializer::serialize_request(rpc_object_handle_t configured_infer_model_handle, rpc_object_handle_t infer_model_handle,
-    rpc_object_handle_t callback_handle)
+Expected<Buffer> RunAsyncSerializer::serialize_request(const RunAsyncSerializer::Request &request_struct)
 {
     ConfiguredInferModel_AsyncInfer_Request request;
 
     auto proto_configured_infer_model_handle = request.mutable_configured_infer_model_handle();
-    proto_configured_infer_model_handle->set_id(configured_infer_model_handle);
+    proto_configured_infer_model_handle->set_id(request_struct.configured_infer_model_handle);
 
     auto proto_infer_model_handle = request.mutable_infer_model_handle();
-    proto_infer_model_handle->set_id(infer_model_handle);
+    proto_infer_model_handle->set_id(request_struct.infer_model_handle);
 
     auto proto_cb_handle = request.mutable_callback_handle();
-    proto_cb_handle->set_id(callback_handle);
+    proto_cb_handle->set_id(request_struct.callback_handle);
 
-    // TODO (HRT-13983) - check if we can use GetCachedSize
+    *request.mutable_input_buffer_sizes() = {request_struct.input_buffer_sizes.begin(), request_struct.input_buffer_sizes.end()};
+
+    // TODO (HRT-14732) - check if we can use GetCachedSize
     TRY(auto serialized_request, Buffer::create(request.ByteSizeLong(), BufferStorageParams::create_dma()));
     CHECK_AS_EXPECTED(request.SerializeToArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
         HAILO_RPC_FAILED, "Failed to serialize 'RunAsync'");
@@ -770,7 +772,7 @@ Expected<Buffer> RunAsyncSerializer::serialize_request(rpc_object_handle_t confi
     return serialized_request;
 }
 
-Expected<std::tuple<rpc_object_handle_t, rpc_object_handle_t, rpc_object_handle_t>> RunAsyncSerializer::deserialize_request(
+Expected<RunAsyncSerializer::Request> RunAsyncSerializer::deserialize_request(
     const MemoryView &serialized_request)
 {
     ConfiguredInferModel_AsyncInfer_Request request;
@@ -778,8 +780,14 @@ Expected<std::tuple<rpc_object_handle_t, rpc_object_handle_t, rpc_object_handle_
     CHECK_AS_EXPECTED(request.ParseFromArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
         HAILO_RPC_FAILED, "Failed to de-serialize 'RunAsync'");
 
-    return std::make_tuple(request.configured_infer_model_handle().id(), request.infer_model_handle().id(),
-        request.callback_handle().id());
+    std::vector<uint32_t> input_buffer_sizes(request.input_buffer_sizes().begin(), request.input_buffer_sizes().end());
+
+    RunAsyncSerializer::Request request_struct;
+    request_struct.configured_infer_model_handle = request.configured_infer_model_handle().id();
+    request_struct.infer_model_handle = request.infer_model_handle().id();
+    request_struct.callback_handle = request.callback_handle().id();
+    request_struct.input_buffer_sizes = input_buffer_sizes;
+    return request_struct;
 }
 
 Expected<Buffer> RunAsyncSerializer::serialize_reply(hailo_status status)
@@ -805,7 +813,8 @@ hailo_status RunAsyncSerializer::deserialize_reply(const MemoryView &serialized_
     return static_cast<hailo_status>(reply.status());
 }
 
-Expected<Buffer> CallbackCalledSerializer::serialize_reply(hailo_status status, rpc_object_handle_t callback_handle)
+Expected<Buffer> CallbackCalledSerializer::serialize_reply(hailo_status status, rpc_object_handle_t callback_handle,
+    rpc_object_handle_t configured_infer_model_handle)
 {
     CallbackCalled_Reply reply;
 
@@ -813,6 +822,9 @@ Expected<Buffer> CallbackCalledSerializer::serialize_reply(hailo_status status,
     auto proto_callback_handle = reply.mutable_callback_handle();
     proto_callback_handle->set_id(callback_handle);
 
+    auto proto_cim_handle = reply.mutable_configured_infer_model_handle();
+    proto_cim_handle->set_id(configured_infer_model_handle);
+
     TRY(auto serialized_reply, Buffer::create(reply.ByteSizeLong(), BufferStorageParams::create_dma()));
 
     CHECK_AS_EXPECTED(reply.SerializeToArray(serialized_reply.data(), static_cast<int>(serialized_reply.size())),
@@ -821,14 +833,319 @@ Expected<Buffer> CallbackCalledSerializer::serialize_reply(hailo_status status,
     return serialized_reply;
 }
 
-Expected<std::tuple<hailo_status, rpc_object_handle_t>> CallbackCalledSerializer::deserialize_reply(const MemoryView &serialized_reply)
+Expected<std::tuple<hailo_status, rpc_object_handle_t, rpc_object_handle_t>>
+CallbackCalledSerializer::deserialize_reply(const MemoryView &serialized_reply)
 {
     CallbackCalled_Reply reply;
 
     CHECK_AS_EXPECTED(reply.ParseFromArray(serialized_reply.data(), static_cast<int>(serialized_reply.size())),
         HAILO_RPC_FAILED, "Failed to de-serialize 'CallbackCalled'");
 
-    return std::make_tuple(static_cast<hailo_status>(reply.status()), reply.callback_handle().id());
+    return std::make_tuple(static_cast<hailo_status>(reply.status()), reply.callback_handle().id(),
+        reply.configured_infer_model_handle().id());
+}
+
+Expected<Buffer> CreateDeviceSerializer::serialize_request()
+{
+    Device_Create_Request request;
+
+    // TODO (HRT-14732) - check if we can use GetCachedSize
+    TRY(auto serialized_request, Buffer::create(request.ByteSizeLong(), BufferStorageParams::create_dma()));
+
+    CHECK_AS_EXPECTED(request.SerializeToArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
+        HAILO_RPC_FAILED, "Failed to serialize 'CreateDevice'");
+
+    return serialized_request;
+}
+
+hailo_status CreateDeviceSerializer::deserialize_request(const MemoryView &serialized_request)
+{
+    Device_Create_Request request;
+
+    CHECK_AS_EXPECTED(request.ParseFromArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
+        HAILO_RPC_FAILED, "Failed to de-serialize 'CreateDevice'");
+
+    return HAILO_SUCCESS;
+}
+
+Expected<Buffer> CreateDeviceSerializer::serialize_reply(hailo_status status, rpc_object_handle_t device_handle)
+{
+    Device_Create_Reply reply;
+
+    reply.set_status(status);
+    auto proto_device_handle = reply.mutable_device_handle();
+    proto_device_handle->set_id(device_handle);
+
+    TRY(auto serialized_reply, Buffer::create(reply.ByteSizeLong(), BufferStorageParams::create_dma()));
+
+    CHECK_AS_EXPECTED(reply.SerializeToArray(serialized_reply.data(), static_cast<int>(serialized_reply.size())),
+        HAILO_RPC_FAILED, "Failed to serialize 'CreateDevice'");
+
+    return serialized_reply;
+}
+
+Expected<std::tuple<hailo_status, rpc_object_handle_t>> CreateDeviceSerializer::deserialize_reply(const MemoryView &serialized_reply)
+{
+    Device_Create_Reply reply;
+
+    CHECK_AS_EXPECTED(reply.ParseFromArray(serialized_reply.data(), static_cast<int>(serialized_reply.size())),
+        HAILO_RPC_FAILED, "Failed to de-serialize 'CreateDevice'");
+
+    return std::make_tuple(static_cast<hailo_status>(reply.status()), reply.device_handle().id());
+}
+
+Expected<Buffer> DestroyDeviceSerializer::serialize_request(rpc_object_handle_t device_handle)
+{
+    Device_Destroy_Request request;
+
+    auto proto_device_handle= request.mutable_device_handle();
+    proto_device_handle->set_id(device_handle);
+
+    // TODO (HRT-14732) - check if we can use GetCachedSize
+    TRY(auto serialized_request, Buffer::create(request.ByteSizeLong(), BufferStorageParams::create_dma()));
+    CHECK_AS_EXPECTED(request.SerializeToArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
+        HAILO_RPC_FAILED, "Failed to serialize 'DestroyDevice'");
+
+    return serialized_request;
+}
+
+Expected<rpc_object_handle_t> DestroyDeviceSerializer::deserialize_request(const MemoryView &serialized_request)
+{
+    Device_Destroy_Request request;
+
+    CHECK_AS_EXPECTED(request.ParseFromArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
+        HAILO_RPC_FAILED, "Failed to de-serialize 'DestroyDevice'");
+
+    return request.device_handle().id();
+}
+
+Expected<Buffer> DestroyDeviceSerializer::serialize_reply(hailo_status status)
+{
+    Device_Destroy_Reply reply;
+    reply.set_status(status);
+
+    TRY(auto serialized_reply, Buffer::create(reply.ByteSizeLong(), BufferStorageParams::create_dma()));
+
+    CHECK_AS_EXPECTED(reply.SerializeToArray(serialized_reply.data(), static_cast<int>(serialized_reply.size())),
+        HAILO_RPC_FAILED, "Failed to serialize 'DestroyDevice'");
+
+    return serialized_reply;
+}
+
+hailo_status DestroyDeviceSerializer::deserialize_reply(const MemoryView &serialized_reply)
+{
+    Device_Destroy_Reply reply;
+
+    CHECK_AS_EXPECTED(reply.ParseFromArray(serialized_reply.data(), static_cast<int>(serialized_reply.size())),
+        HAILO_RPC_FAILED, "Failed to de-serialize 'DestroyDevice'");
+
+    return static_cast<hailo_status>(reply.status());
+}
+
+Expected<Buffer> IdentifyDeviceSerializer::serialize_request(rpc_object_handle_t device_handle)
+{
+    Device_Identify_Request request;
+
+    auto proto_device_handle = request.mutable_device_handle();
+    proto_device_handle->set_id(device_handle);
+
+    // TODO (HRT-14732) - check if we can use GetCachedSize
+    TRY(auto serialized_request, Buffer::create(request.ByteSizeLong(), BufferStorageParams::create_dma()));
+    CHECK_AS_EXPECTED(request.SerializeToArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
+        HAILO_RPC_FAILED, "Failed to serialize 'IdentifyDevice'");
+
+    return serialized_request;
+}
+
+Expected<rpc_object_handle_t> IdentifyDeviceSerializer::deserialize_request(const MemoryView &serialized_request)
+{
+    Device_Identify_Request request;
+
+    CHECK_AS_EXPECTED(request.ParseFromArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
+        HAILO_RPC_FAILED, "Failed to de-serialize 'IdentifyDevice'");
+
+    return request.device_handle().id();
+}
+
+Expected<Buffer> IdentifyDeviceSerializer::serialize_reply(hailo_status status, const hailo_device_identity_t &identity)
+{
+    Device_Identify_Reply reply;
+
+    reply.set_status(status);
+    auto proto_identity = reply.mutable_identity();
+    proto_identity->set_protocol_version(identity.protocol_version);
+    proto_identity->set_logger_version(identity.logger_version);
+    proto_identity->set_board_name(identity.board_name);
+    proto_identity->set_is_release(identity.is_release);
+    proto_identity->set_extended_context_switch_buffer(identity.extended_context_switch_buffer);
+    proto_identity->set_device_architecture(static_cast<DeviceArchitectureProto>(identity.device_architecture));
+
+    auto mut_serial_number = proto_identity->mutable_serial_number();
+    for (uint8_t i = 0; i < identity.serial_number_length; i++) {
+        mut_serial_number->Add(identity.serial_number[i]);
+    }
+    auto mut_part_number = proto_identity->mutable_part_number();
+    for (uint8_t i = 0; i < identity.part_number_length; i++) {
+        mut_part_number->Add(identity.part_number[i]);
+    }
+    proto_identity->set_product_name(identity.product_name);
+
+    auto fw_version = proto_identity->mutable_fw_version();
+    fw_version->set_major_value(identity.fw_version.major);
+    fw_version->set_minor_value(identity.fw_version.minor);
+    fw_version->set_revision_value(identity.fw_version.revision);
+
+    TRY(auto serialized_reply, Buffer::create(reply.ByteSizeLong(), BufferStorageParams::create_dma()));
+
+    CHECK_AS_EXPECTED(reply.SerializeToArray(serialized_reply.data(), static_cast<int>(serialized_reply.size())),
+        HAILO_RPC_FAILED, "Failed to serialize 'IdentifyDevice'");
+
+    return serialized_reply;
+}
+
+Expected<std::tuple<hailo_status, hailo_device_identity_t>> IdentifyDeviceSerializer::deserialize_reply(const MemoryView &serialized_reply)
+{
+    Device_Identify_Reply reply;
+
+    CHECK_AS_EXPECTED(reply.ParseFromArray(serialized_reply.data(), static_cast<int>(serialized_reply.size())),
+        HAILO_RPC_FAILED, "Failed to de-serialize 'IdentifyDevice'");
+
+    hailo_device_identity_t identity = {};
+
+    identity.protocol_version = reply.identity().protocol_version();
+    identity.logger_version = reply.identity().logger_version();
+    identity.is_release = reply.identity().is_release();
+    identity.extended_context_switch_buffer = reply.identity().extended_context_switch_buffer();
+    identity.device_architecture = static_cast<hailo_device_architecture_t>(reply.identity().device_architecture());
+
+    std::memcpy(identity.board_name, reply.identity().board_name().c_str(), reply.identity().board_name().size());
+    identity.board_name_length = static_cast<uint8_t>(reply.identity().board_name().size());
+
+    std::transform(reply.identity().serial_number().begin(), reply.identity().serial_number().end(), identity.serial_number, [](uint32_t val) {
+        return static_cast<uint8_t>(val);
+    });
+    identity.part_number_length = static_cast<uint8_t>(reply.identity().part_number().size());
+    std::transform(reply.identity().part_number().begin(), reply.identity().part_number().end(), identity.part_number, [](uint32_t val) {
+        return static_cast<uint8_t>(val);
+    });
+    identity.part_number_length = static_cast<uint8_t>(reply.identity().serial_number().size());
+
+    std::memcpy(identity.product_name, reply.identity().product_name().c_str(), reply.identity().product_name().size());
+    identity.product_name_length = static_cast<uint8_t>(reply.identity().product_name().size());
+
+    identity.fw_version.major = reply.identity().fw_version().major_value();
+    identity.fw_version.minor = reply.identity().fw_version().minor_value();
+    identity.fw_version.revision = reply.identity().fw_version().revision_value();
+
+    return std::make_tuple(static_cast<hailo_status>(reply.status()), identity);
+}
+
+Expected<Buffer> ExtendedDeviceInfoSerializer::serialize_request(rpc_object_handle_t device_handle)
+{
+    Device_ExtendedInfo_Request request;
+
+    auto proto_device_handle = request.mutable_device_handle();
+    proto_device_handle->set_id(device_handle);
+
+    // TODO (HRT-14732) - check if we can use GetCachedSize
+    TRY(auto serialized_request, Buffer::create(request.ByteSizeLong(), BufferStorageParams::create_dma()));
+    CHECK_AS_EXPECTED(request.SerializeToArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
+        HAILO_RPC_FAILED, "Failed to serialize 'ExtendedDeviceInfo'");
+
+    return serialized_request;
+}
+
+Expected<rpc_object_handle_t> ExtendedDeviceInfoSerializer::deserialize_request(const MemoryView &serialized_request)
+{
+    Device_ExtendedInfo_Request request;
+
+    CHECK_AS_EXPECTED(request.ParseFromArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
+        HAILO_RPC_FAILED, "Failed to de-serialize 'ExtendedDeviceInfo'");
+
+    return request.device_handle().id();
+}
+
+Expected<Buffer> ExtendedDeviceInfoSerializer::serialize_reply(hailo_status status, const hailo_extended_device_information_t &extended_info)
+{
+    Device_ExtendedInfo_Reply reply;
+
+    reply.set_status(status);
+    reply.set_neural_network_core_clock_rate(extended_info.neural_network_core_clock_rate);
+
+    auto supported_features = reply.mutable_supported_features();
+    supported_features->set_ethernet(extended_info.supported_features.ethernet);
+    supported_features->set_mipi(extended_info.supported_features.mipi);
+    supported_features->set_pcie(extended_info.supported_features.pcie);
+    supported_features->set_current_monitoring(extended_info.supported_features.current_monitoring);
+    supported_features->set_mdio(extended_info.supported_features.mdio);
+
+    reply.set_boot_source(static_cast<DeviceBootSourceProto>(extended_info.boot_source));
+
+    auto soc_id = reply.mutable_soc_id();
+    for (auto i = 0; i < HAILO_SOC_ID_LENGTH; i++) {
+        soc_id->Add(extended_info.soc_id[i]);
+    }
+
+    reply.set_lcs(extended_info.lcs);
+
+    auto eth_mac_address = reply.mutable_eth_mac_address();
+    for (auto i = 0; i < HAILO_ETH_MAC_LENGTH; i++) {
+        eth_mac_address->Add(extended_info.eth_mac_address[i]);
+    }
+
+    auto unit_level_tracking_id = reply.mutable_unit_level_tracking_id();
+    for (auto i = 0; i < HAILO_UNIT_LEVEL_TRACKING_BYTES_LENGTH; i++) {
+        unit_level_tracking_id->Add(extended_info.unit_level_tracking_id[i]);
+    }
+
+    auto soc_pm_values = reply.mutable_soc_pm_values();
+    for (auto i = 0; i < HAILO_SOC_PM_VALUES_BYTES_LENGTH; i++) {
+        soc_pm_values->Add(extended_info.soc_pm_values[i]);
+    }
+
+    TRY(auto serialized_reply, Buffer::create(reply.ByteSizeLong(), BufferStorageParams::create_dma()));
+
+    CHECK_AS_EXPECTED(reply.SerializeToArray(serialized_reply.data(), static_cast<int>(serialized_reply.size())),
+        HAILO_RPC_FAILED, "Failed to serialize 'ExtendedDeviceInfo'");
+
+    return serialized_reply;
+}
+
+Expected<std::tuple<hailo_status, hailo_extended_device_information_t>> ExtendedDeviceInfoSerializer::deserialize_reply(const MemoryView &serialized_reply)
+{
+    Device_ExtendedInfo_Reply reply;
+
+    CHECK_AS_EXPECTED(reply.ParseFromArray(serialized_reply.data(), static_cast<int>(serialized_reply.size())),
+        HAILO_RPC_FAILED, "Failed to de-serialize 'ExtendedDeviceInfo'");
+
+    hailo_extended_device_information_t extended_info = {};
+
+    extended_info.neural_network_core_clock_rate = reply.neural_network_core_clock_rate();
+    extended_info.supported_features.ethernet = reply.supported_features().ethernet();
+    extended_info.supported_features.mipi = reply.supported_features().mipi();
+    extended_info.supported_features.pcie = reply.supported_features().pcie();
+    extended_info.supported_features.current_monitoring = reply.supported_features().current_monitoring();
+    extended_info.supported_features.mdio = reply.supported_features().mdio();
+    extended_info.boot_source = static_cast<hailo_device_boot_source_t>(reply.boot_source());
+    std::transform(reply.soc_id().begin(), reply.soc_id().end(), extended_info.soc_id, [](uint32_t val) {
+        return static_cast<uint8_t>(val);
+    });
+    extended_info.lcs = static_cast<uint8_t>(reply.lcs());
+
+    // Ensure that the sizes of the input and output arrays match before transformation
+    assert(reply.eth_mac_address().size() == HAILO_ETH_MAC_LENGTH);
+    std::transform(reply.eth_mac_address().begin(), reply.eth_mac_address().begin() + HAILO_ETH_MAC_LENGTH,
+        extended_info.eth_mac_address, [](uint32_t val) { return static_cast<uint8_t>(val); });
+
+    assert(reply.unit_level_tracking_id().size() == HAILO_UNIT_LEVEL_TRACKING_BYTES_LENGTH);
+    std::transform(reply.unit_level_tracking_id().begin(), reply.unit_level_tracking_id().begin() + HAILO_UNIT_LEVEL_TRACKING_BYTES_LENGTH,
+        extended_info.unit_level_tracking_id, [](uint32_t val) { return static_cast<uint8_t>(val); });
+
+    assert(reply.soc_pm_values().size() == HAILO_SOC_PM_VALUES_BYTES_LENGTH);
+    std::transform(reply.soc_pm_values().begin(), reply.soc_pm_values().begin() + HAILO_SOC_PM_VALUES_BYTES_LENGTH, 
+        extended_info.soc_pm_values, [](uint32_t val) { return static_cast<uint8_t>(val); });
+
+    return std::make_tuple(static_cast<hailo_status>(reply.status()), extended_info);
 }
 
 } /* namespace hailort */
diff --git a/hailort/hrpc_protocol/serializer.hpp b/hailort/hrpc_protocol/serializer.hpp
index a4d435c2..b4059bff 100644
--- a/hailort/hrpc_protocol/serializer.hpp
+++ b/hailort/hrpc_protocol/serializer.hpp
@@ -16,6 +16,7 @@
 
 #include <chrono>
 #include <unordered_map>
+#include <vector>
 
 namespace hailort
 {
@@ -41,6 +42,11 @@ enum class HailoRpcActionID {
     CONFIGURED_INFER_MODEL__SHUTDOWN,
     CONFIGURED_INFER_MODEL__RUN_ASYNC,
 
+    DEVICE__CREATE,
+    DEVICE__DESTROY,
+    DEVICE__IDENTIFY,
+    DEVICE__EXTENDED_INFO,
+
     CALLBACK_CALLED,
 
     MAX_VALUE,
@@ -97,8 +103,8 @@ class CreateInferModelSerializer
 public:
     CreateInferModelSerializer() = delete;
 
-    static Expected<Buffer> serialize_request(rpc_object_handle_t vdevice_handle, uint64_t hef_size);
-    static Expected<std::tuple<rpc_object_handle_t, uint64_t>> deserialize_request(const MemoryView &serialized_request);
+    static Expected<Buffer> serialize_request(rpc_object_handle_t vdevice_handle, uint64_t hef_size, const std::string &name);
+    static Expected<std::tuple<rpc_object_handle_t, uint64_t, std::string>> deserialize_request(const MemoryView &serialized_request);
 
     static Expected<Buffer> serialize_reply(hailo_status status, rpc_object_handle_t infer_model_handle = INVALID_HANDLE_ID);
     static Expected<std::tuple<hailo_status, rpc_object_handle_t>> deserialize_reply(const MemoryView &serialized_reply);
@@ -230,9 +236,16 @@ class RunAsyncSerializer
 public:
     RunAsyncSerializer() = delete;
 
-    static Expected<Buffer> serialize_request(rpc_object_handle_t configured_infer_model_handle, rpc_object_handle_t infer_model_handle,
-        rpc_object_handle_t callback_handle);
-    static Expected<std::tuple<rpc_object_handle_t, rpc_object_handle_t, rpc_object_handle_t>> deserialize_request(const MemoryView &serialized_request);
+    struct Request
+    {
+        rpc_object_handle_t configured_infer_model_handle;
+        rpc_object_handle_t infer_model_handle;
+        rpc_object_handle_t callback_handle;
+        std::vector<uint32_t> input_buffer_sizes;
+    };
+
+    static Expected<Buffer> serialize_request(const Request &request_struct);
+    static Expected<Request> deserialize_request(const MemoryView &serialized_request);
 
     static Expected<Buffer> serialize_reply(hailo_status status);
     static hailo_status deserialize_reply(const MemoryView &serialized_reply);
@@ -243,10 +256,59 @@ class CallbackCalledSerializer
 public:
     CallbackCalledSerializer() = delete;
 
-    static Expected<Buffer> serialize_reply(hailo_status status, rpc_object_handle_t callback_handle = INVALID_HANDLE_ID);
+    static Expected<Buffer> serialize_reply(hailo_status status, rpc_object_handle_t callback_handle = INVALID_HANDLE_ID,
+        rpc_object_handle_t configured_infer_model_handle = INVALID_HANDLE_ID);
+    static Expected<std::tuple<hailo_status, rpc_object_handle_t, rpc_object_handle_t>> deserialize_reply(const MemoryView &serialized_reply);
+};
+
+class CreateDeviceSerializer
+{
+public:
+    CreateDeviceSerializer() = delete;
+
+    static Expected<Buffer> serialize_request();
+    static hailo_status deserialize_request(const MemoryView &serialized_request);
+
+    static Expected<Buffer> serialize_reply(hailo_status status, rpc_object_handle_t device_handle = INVALID_HANDLE_ID);
     static Expected<std::tuple<hailo_status, rpc_object_handle_t>> deserialize_reply(const MemoryView &serialized_reply);
 };
 
+class DestroyDeviceSerializer
+{
+public:
+    DestroyDeviceSerializer() = delete;
+
+    static Expected<Buffer> serialize_request(rpc_object_handle_t device_handle);
+    static Expected<rpc_object_handle_t> deserialize_request(const MemoryView &serialized_request);
+
+    static Expected<Buffer> serialize_reply(hailo_status status);
+    static hailo_status deserialize_reply(const MemoryView &serialized_reply);
+};
+
+class IdentifyDeviceSerializer
+{
+public:
+    IdentifyDeviceSerializer() = delete;
+
+    static Expected<Buffer> serialize_request(rpc_object_handle_t device_handle);
+    static Expected<rpc_object_handle_t> deserialize_request(const MemoryView &serialized_request);
+
+    static Expected<Buffer> serialize_reply(hailo_status status, const hailo_device_identity_t &identity = {});
+    static Expected<std::tuple<hailo_status, hailo_device_identity_t>> deserialize_reply(const MemoryView &serialized_reply);
+};
+
+class ExtendedDeviceInfoSerializer
+{
+public:
+    ExtendedDeviceInfoSerializer() = delete;
+
+    static Expected<Buffer> serialize_request(rpc_object_handle_t device_handle);
+    static Expected<rpc_object_handle_t> deserialize_request(const MemoryView &serialized_request);
+
+    static Expected<Buffer> serialize_reply(hailo_status status, const hailo_extended_device_information_t &extended_info = {});
+    static Expected<std::tuple<hailo_status, hailo_extended_device_information_t>> deserialize_reply(const MemoryView &serialized_reply);
+};
+
 
 } /* namespace hailort */
 
diff --git a/hailort/libhailort/CMakeLists.txt b/hailort/libhailort/CMakeLists.txt
index 33e183a9..4f07eaa9 100644
--- a/hailort/libhailort/CMakeLists.txt
+++ b/hailort/libhailort/CMakeLists.txt
@@ -1,8 +1,8 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 # set(CMAKE_C_CLANG_TIDY "clang-tidy;-checks=*")
 
 set(HAILORT_MAJOR_VERSION    4)
-set(HAILORT_MINOR_VERSION    18)
+set(HAILORT_MINOR_VERSION    19)
 set(HAILORT_REVISION_VERSION 0)
 
 # Add the cmake folder so the modules there are found
@@ -27,10 +27,6 @@ target_include_directories(hef_proto
     $<BUILD_INTERFACE: ${Protobuf_INCLUDE_DIRS}>
 )
 
-if(HAILO_BUILD_PROFILER)
-    add_definitions( -DHAILO_ENABLE_PROFILER_BUILD )
-endif()
-
 protobuf_generate_cpp(PROTO_SCHEDULER_MON_SRC PROTO_SCHEDULER_MON_HEADR scheduler_mon.proto)
 add_library(scheduler_mon_proto ${PROTO_SCHEDULER_MON_SRC} ${PROTO_SCHEDULER_MON_HEADR})
 target_link_libraries(scheduler_mon_proto libprotobuf-lite)
diff --git a/hailort/libhailort/bindings/CMakeLists.txt b/hailort/libhailort/bindings/CMakeLists.txt
index c8037279..febc69a3 100644
--- a/hailort/libhailort/bindings/CMakeLists.txt
+++ b/hailort/libhailort/bindings/CMakeLists.txt
@@ -1,9 +1,12 @@
-cmake_minimum_required(VERSION 3.0.0)
-if(HAILO_BUILD_PYBIND)
-    add_subdirectory(python)
-endif()
+cmake_minimum_required(VERSION 3.5.0)
 
 # QNX currently doesnt support GStreamer
-if(HAILO_BUILD_GSTREAMER AND CMAKE_HOST_UNIX AND NOT CMAKE_SYSTEM_NAME STREQUAL QNX)
+if(HAILO_BUILD_GSTREAMER AND NOT CMAKE_SYSTEM_NAME STREQUAL QNX)
     add_subdirectory(gstreamer)
 endif()
+
+option(HAILO_BUILD_PYHAILORT_INTERNAL OFF)
+option(HAILO_BUILD_RAW_CONNECTION OFF)
+if(HAILO_BUILD_RAW_CONNECTION OR HAILO_BUILD_PYHAILORT_INTERNAL)
+    add_subdirectory(python/src/internal/)
+endif()
diff --git a/hailort/libhailort/bindings/gstreamer/CMakeLists.txt b/hailort/libhailort/bindings/gstreamer/CMakeLists.txt
index 5a601802..2cdfb282 100644
--- a/hailort/libhailort/bindings/gstreamer/CMakeLists.txt
+++ b/hailort/libhailort/bindings/gstreamer/CMakeLists.txt
@@ -1,34 +1,24 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 project(gsthailo)
 
 include(GNUInstallDirs)
 
-if(NOT CMAKE_HOST_UNIX)
-    message(FATAL_ERROR "Only unix hosts are supported, stopping build")
-endif()
-
-find_package(HailoRT 4.18.0 EXACT REQUIRED)
+find_package(HailoRT 4.19.0 EXACT REQUIRED)
 
 # GST_PLUGIN_DEFINE needs PACKAGE to be defined
 set(GST_HAILO_PACKAGE_NAME "hailo")
 set(GST_HAILO_VERSION "1.0")
 
-find_package(PkgConfig REQUIRED)
-pkg_search_module(GLIB REQUIRED glib-2.0)
-pkg_search_module(GSTREAMER REQUIRED gstreamer-1.0)
-pkg_search_module(GSTREAMER_BASE REQUIRED gstreamer-base-1.0)
-pkg_search_module(GSTREAMER_VIDEO REQUIRED gstreamer-video-1.0)
-pkg_search_module(GSTREAMER_PLUGINS_BASE REQUIRED gstreamer-plugins-base-1.0)
+include(find_libs_for_gstreamer.cmake)
 
-add_library(gsthailo SHARED
+set(GSTHAILO_SOURCES
     gst-hailo/gsthailoplugin.cpp
     gst-hailo/sync_gsthailonet.cpp
     gst-hailo/sync_gst_hailosend.cpp
     gst-hailo/sync_gst_hailorecv.cpp
     gst-hailo/gsthailonet.cpp
     gst-hailo/gsthailo_allocator.cpp
-    gst-hailo/gsthailo_dmabuf_allocator.cpp
     gst-hailo/gsthailodevicestats.cpp
     gst-hailo/common.cpp
     gst-hailo/network_group_handle.cpp
@@ -36,8 +26,18 @@ add_library(gsthailo SHARED
     gst-hailo/metadata/tensor_meta.cpp
     gst-hailo/hailo_events/hailo_events.cpp)
 
+# dmabuf is supported only on linux
+if (UNIX)
+    list(APPEND GSTHAILO_SOURCES gst-hailo/gsthailo_dmabuf_allocator.cpp gst-hailo/os/linux/dma_buf_allocator_wrapper.cpp)
+elseif (MSVC)
+    list(APPEND GSTHAILO_SOURCES gst-hailo/os/windows/dma_buf_allocator_wrapper.cpp)
+endif ()
+
+add_library(gsthailo SHARED ${GSTHAILO_SOURCES})
+
 set_property(TARGET gsthailo PROPERTY CXX_STANDARD 14)
 
+# TODO HRT-14797: After creating a directory containing all the relevant Hailo GST files (tensor_meta.hpp and hailo_gst.h) - update the PUBLIC_HEADER to be that dir
 set_target_properties(gsthailo PROPERTIES
     PUBLIC_HEADER "gst-hailo/metadata/tensor_meta.hpp"
     CXX_STANDARD              14
@@ -48,17 +48,54 @@ set_target_properties(gsthailo PROPERTIES
     # VISIBILITY_INLINES_HIDDEN YES
 )
 
-target_compile_options(gsthailo PRIVATE
-    -Werror -Wall -Wextra -Wconversion
-    -DVERSION="${GST_HAILO_VERSION}"
-    -DPACKAGE="${GST_HAILO_PACKAGE_NAME}")
+if (UNIX)
+    set(HAILORT_COMPILE_OPTIONS
+        ${HAILORT_COMPILE_OPTIONS}
+        -DVERSION="${GST_HAILO_VERSION}"
+        -DPACKAGE="${GST_HAILO_PACKAGE_NAME}"
+    )
+elseif (WIN32)
+    set(HAILORT_COMPILE_OPTIONS
+        ${HAILORT_COMPILE_OPTIONS}
+        /DWIN32_LEAN_AND_MEAN
+        /DNOMINMAX                  # NOMINMAX is required in order to play nice with std::min/std::max (otherwise Windows.h defines it's own)
+        /D_HAILO_EXPORTING
+        /wd4201                     # Anonymous union/struct
+        /wd4251                     # C++ ABI with STL
+        -DVERSION="${GST_HAILO_VERSION}"
+        -DPACKAGE="${GST_HAILO_PACKAGE_NAME}"
+    )
+else()
+    message(FATAL_ERROR "Unexpeced host, stopping build")
+endif()
 
-target_include_directories(gsthailo PRIVATE ${GSTREAMER_VIDEO_INCLUDE_DIRS})
-target_link_libraries(gsthailo HailoRT::libhailort ${GSTREAMER_VIDEO_LDFLAGS} -lgstallocators-1.0)
+target_compile_options(gsthailo PRIVATE ${HAILORT_COMPILE_OPTIONS})
+
+if (UNIX)
+    target_include_directories(gsthailo PRIVATE ${GSTREAMER_VIDEO_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR}/gst-hailo ${CMAKE_CURRENT_SOURCE_DIR}/gst-hailo/os/linux)
+    target_link_libraries(gsthailo HailoRT::libhailort ${GSTREAMER_VIDEO_LDFLAGS} -lgstallocators-1.0)
+else()
+    target_include_directories(gsthailo PRIVATE ${GSTREAMER_INCLUDE_DIRS} ${GSTREAMER_VIDEO_INCLUDE_DIRS} ${GLIB_INCLUDE_DIRS} ${GLIBCONFIG_INCLUDE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/gst-hailo ${CMAKE_CURRENT_SOURCE_DIR}/gst-hailo/os/windows)
+    target_link_libraries(gsthailo HailoRT::libhailort ${GSTREAMER_LIBRARIES} ${GSTREAMER_BASE_LIBRARIES} ${GSTREAMER_VIDEO_LIBRARIES} ${GLIB_LIBRARIES} ${GOBJECT_LIBRARIES} -lgstallocators-1.0)
+endif()
+
+if (UNIX)
+    set(GSTREAMER_LIB_DEST "${CMAKE_INSTALL_LIBDIR}/${CMAKE_SYSTEM_PROCESSOR}-linux-gnu/gstreamer-1.0/")
+    set(PUBLIC_HEADER_DEST "${CMAKE_INSTALL_INCLUDEDIR}/gstreamer-1.0/gst/hailo/")
+elseif (MSVC)
+    if ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "AMD64" OR "${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x64")
+        set(GSTREAMER_DEST "C:\\gstreamer\\1.0\\msvc_x86_64")
+    else()
+        message(FATAL_ERROR "HailoRT GStreamer elements installation is not supported on this architecture.")
+    endif()
+    set(GSTREAMER_LIB_DEST "${GSTREAMER_DEST}\\lib\\gstreamer-1.0")
+    set(PUBLIC_HEADER_DEST "${CMAKE_INSTALL_INCLUDEDIR}\\gstreamer-1.0\\gst\\hailo")
+endif()
 
+# Install command using the set variables
 install(TARGETS gsthailo
-    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
-    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
-    # TODO: get gstreamer-1.0 in an automate way
-    PUBLIC_HEADER DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/gstreamer-1.0/gst/hailo/"
+    RUNTIME DESTINATION "${GSTREAMER_LIB_DEST}"
+    LIBRARY DESTINATION "${GSTREAMER_LIB_DEST}"
+    ARCHIVE DESTINATION "${GSTREAMER_LIB_DEST}"
+    PUBLIC_HEADER DESTINATION "${PUBLIC_HEADER_DEST}"
     CONFIGURATIONS Release)
diff --git a/hailort/libhailort/bindings/gstreamer/find_libs_for_gstreamer.cmake b/hailort/libhailort/bindings/gstreamer/find_libs_for_gstreamer.cmake
new file mode 100644
index 00000000..915df1bd
--- /dev/null
+++ b/hailort/libhailort/bindings/gstreamer/find_libs_for_gstreamer.cmake
@@ -0,0 +1,9 @@
+cmake_minimum_required(VERSION 3.0.0)
+
+if (UNIX)
+    include(find_libs_for_gstreamer_linux.cmake)
+elseif (MSVC)
+    include(find_libs_for_gstreamer_windows.cmake)
+else()
+    message(FATAL_ERROR "HailoRT GStreamer elements compilation is supported only on UNIX or MSVC.")
+endif()
\ No newline at end of file
diff --git a/hailort/libhailort/bindings/gstreamer/find_libs_for_gstreamer_linux.cmake b/hailort/libhailort/bindings/gstreamer/find_libs_for_gstreamer_linux.cmake
new file mode 100644
index 00000000..f4a6957b
--- /dev/null
+++ b/hailort/libhailort/bindings/gstreamer/find_libs_for_gstreamer_linux.cmake
@@ -0,0 +1,8 @@
+cmake_minimum_required(VERSION 3.0.0)
+
+find_package(PkgConfig REQUIRED)
+pkg_search_module(GLIB REQUIRED glib-2.0)
+pkg_search_module(GSTREAMER REQUIRED gstreamer-1.0)
+pkg_search_module(GSTREAMER_BASE REQUIRED gstreamer-base-1.0)
+pkg_search_module(GSTREAMER_VIDEO REQUIRED gstreamer-video-1.0)
+pkg_search_module(GSTREAMER_PLUGINS_BASE REQUIRED gstreamer-plugins-base-1.0)
\ No newline at end of file
diff --git a/hailort/libhailort/bindings/gstreamer/find_libs_for_gstreamer_windows.cmake b/hailort/libhailort/bindings/gstreamer/find_libs_for_gstreamer_windows.cmake
new file mode 100644
index 00000000..8218fa58
--- /dev/null
+++ b/hailort/libhailort/bindings/gstreamer/find_libs_for_gstreamer_windows.cmake
@@ -0,0 +1,87 @@
+cmake_minimum_required(VERSION 3.0.0)
+
+# CMake variable GSTREAMER_ROOT_DIR defines the location of the gstreamer files.
+# It's default value is C:/gstreamer/1.0/msvc_x86_64.
+if (NOT GSTREAMER_ROOT_DIR)
+    message("Gstreamer Windows compilation - GSTREAMER_ROOT_DIR is not set. default value is C:/gstreamer/1.0/msvc_x86_64")
+    set(GSTREAMER_ROOT_DIR "C:/gstreamer/1.0/msvc_x86_64")
+endif()
+set(GLIB_ROOT_DIR ${GSTREAMER_ROOT_DIR})
+
+# Find the GStreamer library and include directories
+find_path(GSTREAMER_INCLUDE_DIRS
+    NAMES gst/gst.h
+    PATHS ${GSTREAMER_ROOT_DIR}/include/gstreamer-1.0
+    REQUIRED
+)
+
+find_library(GSTREAMER_LIBRARIES
+    NAMES gstreamer-1.0
+    PATHS ${GSTREAMER_ROOT_DIR}/lib
+)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(GStreamer DEFAULT_MSG
+    GSTREAMER_LIBRARIES GSTREAMER_INCLUDE_DIRS
+    REQUIRED
+)
+
+# Find the GStreamer base library and include directories
+find_path(GSTREAMER_BASE_INCLUDE_DIRS
+    NAMES gst/base/base.h
+    PATHS ${GSTREAMER_ROOT_DIR}/include/gstreamer-1.0
+)
+
+find_library(GSTREAMER_BASE_LIBRARIES
+    NAMES gstbase-1.0
+    PATHS ${GSTREAMER_ROOT_DIR}/lib
+)
+
+find_package_handle_standard_args(GStreamerPluginsBase DEFAULT_MSG
+    GSTREAMER_BASE_LIBRARIES GSTREAMER_BASE_INCLUDE_DIRS
+)
+
+# Find the GStreamer video library and include directories
+find_path(GSTREAMER_VIDEO_INCLUDE_DIRS
+    NAMES gst/video/video.h
+    PATHS ${GSTREAMER_ROOT_DIR}/include/gstreamer-1.0
+)
+
+find_library(GSTREAMER_VIDEO_LIBRARIES
+    NAMES gstvideo-1.0
+    PATHS ${GSTREAMER_ROOT_DIR}/lib
+)
+
+find_package_handle_standard_args(GStreamerVideo DEFAULT_MSG
+    GSTREAMER_VIDEO_LIBRARIES GSTREAMER_VIDEO_INCLUDE_DIRS
+)
+
+# Find the GLib library and include directories
+find_path(GLIB_INCLUDE_DIRS
+    NAMES glib.h
+    PATHS ${GLIB_ROOT_DIR}/include/glib-2.0
+        ${GLIB_ROOT_DIR}/lib/glib-2.0/include
+    REQUIRED
+)
+
+find_library(GLIB_LIBRARIES
+    NAMES glib-2.0
+    PATHS ${GLIB_ROOT_DIR}/lib
+)
+
+find_library(GOBJECT_LIBRARIES
+    NAMES gobject-2.0
+    PATHS ${GLIB_ROOT_DIR}/lib
+)
+
+# Add the directory containing glibconfig.h to the include directories
+find_path(GLIBCONFIG_INCLUDE_DIR
+    NAMES glibconfig.h
+    PATHS ${GLIB_ROOT_DIR}/lib/glib-2.0/include
+    REQUIRED
+)
+
+find_package_handle_standard_args(GLib DEFAULT_MSG
+    GLIB_LIBRARIES GLIB_INCLUDE_DIRS GLIBCONFIG_INCLUDE_DIR
+    REQUIRED
+)
\ No newline at end of file
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/common.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/common.hpp
index 2a95dd76..ffaf451a 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/common.hpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/common.hpp
@@ -23,20 +23,22 @@
 #include "hailo/device.hpp"
 #include "hailo/network_group.hpp"
 #include "hailo/vstream.hpp"
-
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wconversion"
-#include <gst/gst.h>
-#pragma GCC diagnostic pop
+#include "hailo_gst.h"
 
 #include <vector>
 
 using namespace hailort;
 
-#define ERROR(msg, ...) g_print("HailoNet Error: " msg, ##__VA_ARGS__)
+#define GST_HAILO_USE_DMA_BUFFER_ENV_VAR "GST_HAILO_USE_DMA_BUFFER"
+
+#define HAILONET_ERROR(msg, ...) g_print("HailoNet Error: " msg, ##__VA_ARGS__)
 #define PLUGIN_AUTHOR "Hailo Technologies Ltd. (\"Hailo\")"
 
-#define MAX_STRING_SIZE (PATH_MAX)
+#ifdef _MSC_VER
+    #define MAX_STRING_SIZE (MAX_PATH)
+#else
+    #define MAX_STRING_SIZE (PATH_MAX)
+#endif
 
 #define MAX_QUEUED_BUFFERS_IN_INPUT (16)
 #define MAX_QUEUED_BUFFERS_IN_OUTPUT (16)
@@ -115,6 +117,9 @@ using namespace hailort;
 
 #define CHECK_NOT_NULL(arg, status) _CHECK(nullptr != (arg), status, "CHECK_NOT_NULL for %s failed", #arg)
 
+#define CHECK_NOT_NULL_AS_EXPECTED(arg, status) \
+    _CHECK(nullptr != (arg), make_unexpected(status), "CHECK_NOT_NULL_AS_EXPECTED for %s failed", #arg)
+
 #define _CHECK_SUCCESS(status, ...)                                                                            \
     do {                                                                                                                        \
         const auto &__check_success_status = (status);                                                                          \
@@ -214,7 +219,7 @@ class HailoElemProperty final
     HailoElemProperty(T default_val) : m_value(default_val), m_was_changed(false) {}
 
     ~HailoElemProperty() {}
-    
+
     HailoElemProperty<T> &operator=(const T &value)
     {
         m_was_changed = true;
@@ -269,9 +274,6 @@ class HailoElemStringProperty final
     bool m_was_changed;
 };
 
-template<>
-HailoElemProperty<gchar*>::~HailoElemProperty();
-
 #define GST_TYPE_SCHEDULING_ALGORITHM (gst_scheduling_algorithm_get_type ())
 GType gst_scheduling_algorithm_get_type (void);
 
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailo_allocator.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailo_allocator.cpp
index 08d455f0..50bb0cd8 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailo_allocator.cpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailo_allocator.cpp
@@ -27,33 +27,47 @@ static GstMemory *gst_hailo_allocator_alloc(GstAllocator* allocator, gsize size,
     GstHailoAllocator *hailo_allocator = GST_HAILO_ALLOCATOR(allocator);
     auto buffer = Buffer::create(size, BufferStorageParams::create_dma());
     if (!buffer) {
-        ERROR("Creating buffer for allocator has failed, status = %d\n", buffer.status());
+        HAILONET_ERROR("Creating buffer for allocator has failed, status = %d\n", buffer.status());
         return nullptr;
     }
 
     GstMemory *memory = gst_memory_new_wrapped(static_cast<GstMemoryFlags>(0), buffer->data(),
         buffer->size(), 0, buffer->size(), nullptr, nullptr);
     if (nullptr == memory) {
-        ERROR("Creating new GstMemory for allocator has failed!\n");
+        HAILONET_ERROR("Creating new GstMemory for allocator has failed!\n");
         return nullptr;
     }
 
-    hailo_allocator->buffers[memory] = std::move(buffer.release());
+    assert(nullptr != hailo_allocator->buffers);
+    (*hailo_allocator->buffers)[memory] = std::move(buffer.release());
     return memory;
 }
 
 static void gst_hailo_allocator_free(GstAllocator* allocator, GstMemory *mem) {
     GstHailoAllocator *hailo_allocator = GST_HAILO_ALLOCATOR(allocator);
-    hailo_allocator->buffers.erase(mem);
+    hailo_allocator->buffers->erase(mem);
+}
+
+static void gst_hailo_allocator_dispose(GObject *object) {
+    GstHailoAllocator *allocator = GST_HAILO_ALLOCATOR(object);
+
+    if (allocator->buffers != nullptr) {
+        delete allocator->buffers;
+        allocator->buffers = nullptr;
+    }
+
+    G_OBJECT_CLASS(gst_hailo_allocator_parent_class)->dispose(object);
 }
 
 static void gst_hailo_allocator_class_init(GstHailoAllocatorClass* klass) {
-    GstAllocatorClass* allocator_class = GST_ALLOCATOR_CLASS(klass);
+    GObjectClass *gobject_class = G_OBJECT_CLASS(klass);
+    gobject_class->dispose = gst_hailo_allocator_dispose;
 
+    GstAllocatorClass* allocator_class = GST_ALLOCATOR_CLASS(klass);
     allocator_class->alloc = gst_hailo_allocator_alloc;
     allocator_class->free = gst_hailo_allocator_free;
 }
 
 static void gst_hailo_allocator_init(GstHailoAllocator* allocator) {
-    allocator->buffers = std::unordered_map<GstMemory*, Buffer>();    
+    allocator->buffers = new std::unordered_map<GstMemory*, Buffer>();
 }
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailo_allocator.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailo_allocator.hpp
index 7495fbeb..a6ea0038 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailo_allocator.hpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailo_allocator.hpp
@@ -35,7 +35,7 @@ G_BEGIN_DECLS
 struct GstHailoAllocator
 {
     GstAllocator parent;
-    std::unordered_map<GstMemory*, Buffer> buffers;
+    std::unordered_map<GstMemory*, Buffer> *buffers;
 };
 
 struct GstHailoAllocatorClass
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailo_dmabuf_allocator.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailo_dmabuf_allocator.cpp
index cf9b21e3..3553f3bf 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailo_dmabuf_allocator.cpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailo_dmabuf_allocator.cpp
@@ -39,7 +39,7 @@ static GstMemory *gst_hailo_dmabuf_allocator_alloc(GstAllocator* allocator, gsiz
     if (!GstHailoDmaHeapControl::dma_heap_fd_open) {
         GstHailoDmaHeapControl::dma_heap_fd = open(DEVPATH, O_RDWR | O_CLOEXEC);
         if (GstHailoDmaHeapControl::dma_heap_fd < 0) {
-            ERROR("open fd failed!\n");
+            HAILONET_ERROR("open fd failed!\n");
             return nullptr;
         }
         GstHailoDmaHeapControl::dma_heap_fd_open = true;
@@ -55,37 +55,52 @@ static GstMemory *gst_hailo_dmabuf_allocator_alloc(GstAllocator* allocator, gsiz
 
     int ret = ioctl(GstHailoDmaHeapControl::dma_heap_fd, DMA_HEAP_IOCTL_ALLOC, &heap_data);
     if (ret < 0) {
-        ERROR("ioctl DMA_HEAP_IOCTL_ALLOC failed! ret = %d\n", ret);
+        HAILONET_ERROR("ioctl DMA_HEAP_IOCTL_ALLOC failed! ret = %d\n", ret);
         return nullptr;
     }
 
     if (GST_IS_DMABUF_ALLOCATOR(hailo_allocator) == false) {
-        ERROR("hailo_allocator is not dmabuf!\n");
+        HAILONET_ERROR("hailo_allocator is not dmabuf!\n");
         return nullptr;
     }
 
     GstMemory *memory = gst_dmabuf_allocator_alloc(allocator, heap_data.fd, size);
     if (nullptr == memory) {
-        ERROR("Creating new GstMemory for allocator has failed!\n");
+        HAILONET_ERROR("Creating new GstMemory for allocator has failed!\n");
         return nullptr;
     }
 
-    hailo_allocator->dma_buffers[memory] = heap_data;
+    assert(nullptr != hailo_allocator->dma_buffers);
+    (*hailo_allocator->dma_buffers)[memory] = heap_data;
     return memory;
 }
 
 static void gst_hailo_dmabuf_allocator_free(GstAllocator* allocator, GstMemory *mem) {
     GstHailoDmabufAllocator *hailo_allocator = GST_HAILO_DMABUF_ALLOCATOR(allocator);
-    close(hailo_allocator->dma_buffers[mem].fd);
-    hailo_allocator->dma_buffers.erase(mem);
+    assert(nullptr != hailo_allocator->dma_buffers);
+    close((*hailo_allocator->dma_buffers)[mem].fd);
+    hailo_allocator->dma_buffers->erase(mem);
 }
 
+static void gst_hailo_dmabuf_allocator_dispose(GObject *object) {
+    GstHailoDmabufAllocator *allocator = GST_HAILO_DMABUF_ALLOCATOR(object);
+    if (nullptr != allocator->dma_buffers) {
+        delete allocator->dma_buffers;
+        allocator->dma_buffers = nullptr;
+    }
+    G_OBJECT_CLASS(gst_hailo_dmabuf_allocator_parent_class)->dispose(object);
+}
+
+
 static void gst_hailo_dmabuf_allocator_class_init(GstHailoDmabufAllocatorClass* klass) {
+    GObjectClass *gobject_class = G_OBJECT_CLASS(klass);
+    gobject_class->dispose = gst_hailo_dmabuf_allocator_dispose;
+
     GstAllocatorClass* allocator_class = GST_ALLOCATOR_CLASS(klass);
     allocator_class->alloc = gst_hailo_dmabuf_allocator_alloc;
     allocator_class->free = gst_hailo_dmabuf_allocator_free;
 }
 
 static void gst_hailo_dmabuf_allocator_init(GstHailoDmabufAllocator* allocator) {
-    allocator->dma_buffers = std::unordered_map<GstMemory*, dma_heap_allocation_data>();
-}
+    allocator->dma_buffers = new std::unordered_map<GstMemory*, dma_heap_allocation_data>();
+}
\ No newline at end of file
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailo_dmabuf_allocator.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailo_dmabuf_allocator.hpp
index e45c7778..bdf9511f 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailo_dmabuf_allocator.hpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailo_dmabuf_allocator.hpp
@@ -35,8 +35,6 @@ G_BEGIN_DECLS
 #define GST_IS_HAILO_DMABUF_ALLOCATOR(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), GST_TYPE_HAILO_DMABUF_ALLOCATOR))
 #define GST_IS_HAILO_DMABUF_ALLOCATOR_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), GST_TYPE_HAILO_DMABUF_ALLOCATOR))
 
-#define GST_HAILO_USE_DMA_BUFFER_ENV_VAR "GST_HAILO_USE_DMA_BUFFER"
-
 class GstHailoDmaHeapControl {
 public:
     static bool dma_heap_fd_open;
@@ -46,7 +44,7 @@ class GstHailoDmaHeapControl {
 struct GstHailoDmabufAllocator
 {
     GstDmaBufAllocator parent;
-    std::unordered_map<GstMemory*, dma_heap_allocation_data> dma_buffers;
+    std::unordered_map<GstMemory*, dma_heap_allocation_data> *dma_buffers;
 };
 
 struct GstHailoDmabufAllocatorClass
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailodevicestats.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailodevicestats.cpp
index f4a2f31f..e3c87c24 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailodevicestats.cpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailodevicestats.cpp
@@ -219,8 +219,8 @@ void HailoDeviceStatsImpl::join_thread()
 hailo_status HailoDeviceStatsImpl::run_measure_loop()
 {
     // Checking temperature sensor before starting thread
-    auto temp_info = m_device->get_chip_temperature();
-    GST_CHECK_EXPECTED_AS_STATUS(temp_info, m_element, RESOURCE, "Getting chip temperature failed, status = %d", temp_info.status());
+    auto initial_temp_info = m_device->get_chip_temperature();
+    GST_CHECK_EXPECTED_AS_STATUS(initial_temp_info, m_element, RESOURCE, "Getting chip temperature failed, status = %d", initial_temp_info.status());
 
     hailo_status status = m_device->stop_power_measurement();
     GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Stopping power measurement failed, status = %d", status);
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.cpp
index 271440de..fa33c92c 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.cpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.cpp
@@ -63,6 +63,16 @@ static GstStaticPadTemplate src_template = GST_STATIC_PAD_TEMPLATE("src", GST_PA
 
 G_DEFINE_TYPE (GstHailoNet, gst_hailonet, GST_TYPE_ELEMENT);
 
+static void gst_hailonet_dispose(GObject *object) {
+    GstHailoNet *self = GST_HAILONET(object);
+
+    assert(nullptr != self->impl);
+    delete self->impl;
+    self->impl = nullptr;
+
+    G_OBJECT_CLASS(gst_hailonet_parent_class)->dispose(object);
+}
+
 static std::atomic_uint32_t hailonet_count(0);
 
 static bool gst_hailo_should_use_dma_buffers()
@@ -74,87 +84,85 @@ static bool gst_hailo_should_use_dma_buffers()
 static hailo_status gst_hailonet_deconfigure(GstHailoNet *self)
 {
     // This will wakeup any blocking calls to deuque
-    for (auto &name_pool_pair : self->output_buffer_pools) {
+    for (auto &name_pool_pair : self->impl->output_buffer_pools) {
         gst_buffer_pool_set_flushing(name_pool_pair.second, TRUE);
     }
 
-    std::unique_lock<std::mutex> lock(self->infer_mutex);
-    self->configured_infer_model.reset();
-    self->is_configured = false;
+    std::unique_lock<std::mutex> lock(self->impl->infer_mutex);
+    self->impl->configured_infer_model.reset();
+    self->impl->is_configured = false;
     return HAILO_SUCCESS;
 }
 
 static void gst_hailonet_unref_input_caps(GstHailoNet *self)
 {
-    if (nullptr != self->input_caps) {
-        gst_caps_unref(self->input_caps);
-        self->input_caps = nullptr;
+    if (nullptr != self->impl->input_caps) {
+        gst_caps_unref(self->impl->input_caps);
+        self->impl->input_caps = nullptr;
     }
 }
 
 static hailo_status gst_hailonet_free(GstHailoNet *self)
 {
-    std::unique_lock<std::mutex> lock(self->infer_mutex);
-    self->configured_infer_model.reset();
-    self->infer_model.reset();
-    self->vdevice.reset();
+    std::unique_lock<std::mutex> lock(self->impl->infer_mutex);
+    self->impl->configured_infer_model.reset();
+    self->impl->infer_model.reset();
+    self->impl->vdevice.reset();
 
     {
-        std::unique_lock<std::mutex> lock(self->thread_queue_mutex);
-        self->is_thread_running = false;
+        std::unique_lock<std::mutex> lock2(self->impl->thread_queue_mutex);
+        self->impl->is_thread_running = false;
     }
-    self->thread_cv.notify_all();
+    self->impl->thread_cv.notify_all();
 
-    if (self->thread.joinable()) {
-        self->thread.join();
+    if (self->impl->thread.joinable()) {
+        self->impl->thread.join();
     }
 
-    if (nullptr != self->input_queue) {
-        gst_queue_array_free(self->input_queue);
+    if (nullptr != self->impl->input_queue) {
+        gst_queue_array_free(self->impl->input_queue);
     }
 
-    if (nullptr != self->thread_queue) {
-        gst_queue_array_free(self->thread_queue);
+    if (nullptr != self->impl->thread_queue) {
+        gst_queue_array_free(self->impl->thread_queue);
     }
 
-    while(!self->curr_event_queue.empty()) {
-        auto event = self->curr_event_queue.front();
+    while(!self->impl->curr_event_queue.empty()) {
+        auto event = self->impl->curr_event_queue.front();
         gst_event_unref(event);
-        self->curr_event_queue.pop();
+        self->impl->curr_event_queue.pop();
     }
 
-    for (auto &buffer_events_queue_pair : self->events_queue_per_buffer) {
+    for (auto &buffer_events_queue_pair : self->impl->events_queue_per_buffer) {
         while(!buffer_events_queue_pair.second.empty()) {
             auto event = buffer_events_queue_pair.second.front();
             gst_event_unref(event);
             buffer_events_queue_pair.second.pop();
         }
     }
-    self->events_queue_per_buffer.clear();
+    self->impl->events_queue_per_buffer.clear();
 
     {
-        std::unique_lock<std::mutex> lock(self->input_caps_mutex);
+        std::unique_lock<std::mutex> lock3(self->impl->input_caps_mutex);
         gst_hailonet_unref_input_caps(self);
     }
 
-    for (auto &name_pool_pair : self->output_buffer_pools) {
+    for (auto &name_pool_pair : self->impl->output_buffer_pools) {
         gboolean result = gst_buffer_pool_set_active(name_pool_pair.second, FALSE);
         CHECK(result, HAILO_INTERNAL_FAILURE, "Could not release buffer pool");
         gst_object_unref(name_pool_pair.second);
     }
-    self->output_buffer_pools.clear();
+    self->impl->output_buffer_pools.clear();
 
     if (gst_hailo_should_use_dma_buffers()) {
-        if (GstHailoDmaHeapControl::dma_heap_fd_open) {
-            close(GstHailoDmaHeapControl::dma_heap_fd);
-            GstHailoDmaHeapControl::dma_heap_fd_open = false;
-        }
+        auto status = self->impl->dmabuf_allocator->close_dma_heap_fd();
+        CHECK_SUCCESS(status);
 
-        if (nullptr != self->dmabuf_allocator) {
-            gst_object_unref(self->dmabuf_allocator);
+        if (nullptr != self->impl->dmabuf_allocator->impl) {
+            gst_object_unref(self->impl->dmabuf_allocator->impl);
         }
-    } else if (nullptr != self->allocator) {
-        gst_object_unref(self->allocator);
+    } else if (nullptr != self->impl->allocator) {
+        gst_object_unref(self->impl->allocator);
     }
 
     return HAILO_SUCCESS;
@@ -162,17 +170,17 @@ static hailo_status gst_hailonet_free(GstHailoNet *self)
 
 static hailo_status gst_hailonet_set_format_types(GstHailoNet *self, std::shared_ptr<InferModel> infer_model)
 {
-    if (self->props.m_input_format_type.was_changed()) {
+    if (self->impl->props.m_input_format_type.was_changed()) {
         for (const auto &input_name : infer_model->get_input_names()) {
             TRY(auto input, infer_model->input(input_name));
-            input.set_format_type(self->props.m_input_format_type.get());
+            input.set_format_type(self->impl->props.m_input_format_type.get());
         }
     }
-    if (self->props.m_output_format_type.was_changed()) {
+    if (self->impl->props.m_output_format_type.was_changed()) {
         for (const auto &output_name : infer_model->get_output_names()) {
             TRY(auto output, infer_model->output(output_name));
 
-            output.set_format_type(self->props.m_output_format_type.get());
+            output.set_format_type(self->impl->props.m_output_format_type.get());
         }
     }
 
@@ -190,22 +198,22 @@ static hailo_status gst_hailonet_set_nms_params(GstHailoNet *self, std::shared_p
     for (const auto &output_name : infer_model->get_output_names()) {
         TRY(auto output, infer_model->output(output_name));
 
-        if (self->props.m_nms_score_threshold.was_changed()) {
+        if (self->impl->props.m_nms_score_threshold.was_changed()) {
             CHECK(has_nms_output, HAILO_INVALID_OPERATION, "NMS score threshold is set, but there is no NMS output in this model.");
             if (output.is_nms()) {
-                output.set_nms_score_threshold(self->props.m_nms_score_threshold.get());
+                output.set_nms_score_threshold(self->impl->props.m_nms_score_threshold.get());
             }
         }
-        if (self->props.m_nms_iou_threshold.was_changed()) {
+        if (self->impl->props.m_nms_iou_threshold.was_changed()) {
             CHECK(has_nms_output, HAILO_INVALID_OPERATION, "NMS IoU threshold is set, but there is no NMS output in this model.");
             if (output.is_nms()) {
-                output.set_nms_iou_threshold(self->props.m_nms_iou_threshold.get());
+                output.set_nms_iou_threshold(self->impl->props.m_nms_iou_threshold.get());
             }
         }
-        if (self->props.m_nms_max_proposals_per_class.was_changed()) {
+        if (self->impl->props.m_nms_max_proposals_per_class.was_changed()) {
             CHECK(has_nms_output, HAILO_INVALID_OPERATION, "NMS max proposals per class is set, but there is no NMS output in this model.");
             if (output.is_nms()) {
-                output.set_nms_max_proposals_per_class(self->props.m_nms_max_proposals_per_class.get());
+                output.set_nms_max_proposals_per_class(self->impl->props.m_nms_max_proposals_per_class.get());
             }
         }
     }
@@ -215,17 +223,17 @@ static hailo_status gst_hailonet_set_nms_params(GstHailoNet *self, std::shared_p
 
 static hailo_status gst_hailonet_set_scheduler_params(GstHailoNet *self, std::shared_ptr<ConfiguredInferModel> configured_infer_model)
 {
-    if (self->props.m_scheduler_timeout_ms.was_changed()) {
-        auto millis = std::chrono::milliseconds(self->props.m_scheduler_timeout_ms.get());
+    if (self->impl->props.m_scheduler_timeout_ms.was_changed()) {
+        auto millis = std::chrono::milliseconds(self->impl->props.m_scheduler_timeout_ms.get());
         auto status = configured_infer_model->set_scheduler_timeout(millis);
         CHECK_SUCCESS(status, "Setting scheduler timeout failed, status = %d", status);
     }
-    if (self->props.m_scheduler_threshold.was_changed()) {
-        auto status = configured_infer_model->set_scheduler_threshold(self->props.m_scheduler_threshold.get());
+    if (self->impl->props.m_scheduler_threshold.was_changed()) {
+        auto status = configured_infer_model->set_scheduler_threshold(self->impl->props.m_scheduler_threshold.get());
         CHECK_SUCCESS(status, "Setting scheduler threshold failed, status = %d", status);
     }
-    if (self->props.m_scheduler_priority.was_changed()) {
-        auto status = configured_infer_model->set_scheduler_priority(self->props.m_scheduler_priority.get());
+    if (self->impl->props.m_scheduler_priority.was_changed()) {
+        auto status = configured_infer_model->set_scheduler_priority(self->impl->props.m_scheduler_priority.get());
         CHECK_SUCCESS(status, "Setting scheduler priority failed, status = %d", status);
     }
 
@@ -237,13 +245,13 @@ static Expected<GstBufferPool*> gst_hailonet_create_buffer_pool(GstHailoNet *sel
     GstBufferPool *pool = gst_buffer_pool_new();
 
     GstStructure *config = gst_buffer_pool_get_config(pool);
-    gst_buffer_pool_config_set_params(config, nullptr, static_cast<guint>(frame_size), self->props.m_outputs_min_pool_size.get(),
-        self->props.m_outputs_max_pool_size.get());
+    gst_buffer_pool_config_set_params(config, nullptr, static_cast<guint>(frame_size), self->impl->props.m_outputs_min_pool_size.get(),
+        self->impl->props.m_outputs_max_pool_size.get());
 
     if (gst_hailo_should_use_dma_buffers()) {
-        gst_buffer_pool_config_set_allocator(config, GST_ALLOCATOR(self->dmabuf_allocator), nullptr);
+        gst_buffer_pool_config_set_allocator(config, GST_ALLOCATOR(self->impl->dmabuf_allocator->impl), nullptr);
     } else {
-        gst_buffer_pool_config_set_allocator(config, GST_ALLOCATOR(self->allocator), nullptr);
+        gst_buffer_pool_config_set_allocator(config, GST_ALLOCATOR(self->impl->allocator), nullptr);
     }
 
     gboolean result = gst_buffer_pool_set_config(pool, config);
@@ -257,8 +265,8 @@ static Expected<GstBufferPool*> gst_hailonet_create_buffer_pool(GstHailoNet *sel
 
 static void gst_hailonet_push_event_to_queue(GstHailoNet *self, GstEvent *event)
 {
-    std::unique_lock<std::mutex> lock(self->input_queue_mutex);
-    self->curr_event_queue.push(event);
+    std::unique_lock<std::mutex> lock(self->impl->input_queue_mutex);
+    self->impl->curr_event_queue.push(event);
 }
 
 static gboolean gst_hailonet_handle_queued_event(GstHailoNet *self, GstEvent *event)
@@ -279,66 +287,66 @@ static gboolean gst_hailonet_handle_queued_event(GstHailoNet *self, GstEvent *ev
 
 static void gst_hailonet_handle_buffer_events(GstHailoNet *self, GstBuffer *buffer)
 {
-    if (self->events_queue_per_buffer.find(buffer) == self->events_queue_per_buffer.end()) {
+    if (self->impl->events_queue_per_buffer.find(buffer) == self->impl->events_queue_per_buffer.end()) {
         // The buffer does not have any events to send
         return;
     }
 
-    while (!self->events_queue_per_buffer.at(buffer).empty()) {
-        GstEvent* event = self->events_queue_per_buffer.at(buffer).front();
+    while (!self->impl->events_queue_per_buffer.at(buffer).empty()) {
+        GstEvent* event = self->impl->events_queue_per_buffer.at(buffer).front();
         (void)gst_hailonet_handle_queued_event(self, event);
-        self->events_queue_per_buffer.at(buffer).pop();
+        self->impl->events_queue_per_buffer.at(buffer).pop();
     }
-    self->events_queue_per_buffer.erase(buffer);
+    self->impl->events_queue_per_buffer.erase(buffer);
 }
 
 static hailo_status gst_hailonet_configure(GstHailoNet *self)
 {
-    if (self->is_configured) {
+    if (self->impl->is_configured) {
         return HAILO_SUCCESS;
     }
 
-    for (auto &name_pool_pair : self->output_buffer_pools) {
+    for (auto &name_pool_pair : self->impl->output_buffer_pools) {
         gst_buffer_pool_set_flushing(name_pool_pair.second, FALSE);
     }
 
-    self->infer_model->set_batch_size(self->props.m_batch_size.get());
+    self->impl->infer_model->set_batch_size(self->impl->props.m_batch_size.get());
 
-    auto status = gst_hailonet_set_format_types(self, self->infer_model);
+    auto status = gst_hailonet_set_format_types(self, self->impl->infer_model);
     CHECK_SUCCESS(status);
 
-    status = gst_hailonet_set_nms_params(self, self->infer_model);
+    status = gst_hailonet_set_nms_params(self, self->impl->infer_model);
     CHECK_SUCCESS(status);
 
     // In RGB formats, Gstreamer is padding each row to 4.
-    for (const auto &input_name : self->infer_model->get_input_names()) {
-        if(self->props.m_no_transform.get()) {
+    for (const auto &input_name : self->impl->infer_model->get_input_names()) {
+        if(self->impl->props.m_no_transform.get()) {
             // In case transformation is disabled - format order will be the same as we get from the HW (stream info).
-            TRY(const auto input_stream_infos, self->infer_model->hef().get_stream_info_by_name(input_name, HAILO_H2D_STREAM));
-            self->infer_model->input(input_name)->set_format_order(input_stream_infos.format.order);
-        } else if (self->infer_model->input(input_name)->format().order == HAILO_FORMAT_ORDER_NHWC) {
-            self->infer_model->input(input_name)->set_format_order(HAILO_FORMAT_ORDER_RGB4);
+            TRY(const auto input_stream_infos, self->impl->infer_model->hef().get_stream_info_by_name(input_name, HAILO_H2D_STREAM));
+            self->impl->infer_model->input(input_name)->set_format_order(input_stream_infos.format.order);
+        } else if (self->impl->infer_model->input(input_name)->format().order == HAILO_FORMAT_ORDER_NHWC) {
+            self->impl->infer_model->input(input_name)->set_format_order(HAILO_FORMAT_ORDER_RGB4);
         }
     }
 
-    if (self->props.m_no_transform.get()) {
-        for (const auto &output_name : self->infer_model->get_output_names()) {
+    if (self->impl->props.m_no_transform.get()) {
+        for (const auto &output_name : self->impl->infer_model->get_output_names()) {
             // In case transformation is disabled - format order will be the same as we get from the HW (stream info).
-            TRY(const auto output_stream_infos, self->infer_model->hef().get_stream_info_by_name(output_name, HAILO_D2H_STREAM));
-            self->infer_model->output(output_name)->set_format_order(output_stream_infos.format.order);
+            TRY(const auto output_stream_infos, self->impl->infer_model->hef().get_stream_info_by_name(output_name, HAILO_D2H_STREAM));
+            self->impl->infer_model->output(output_name)->set_format_order(output_stream_infos.format.order);
         }
     }
 
-    TRY(auto configured_infer_model, self->infer_model->configure());
+    TRY(auto configured_infer_model, self->impl->infer_model->configure());
 
     auto ptr = make_shared_nothrow<ConfiguredInferModel>(std::move(configured_infer_model));
     CHECK_NOT_NULL(ptr, HAILO_OUT_OF_HOST_MEMORY);
-    self->configured_infer_model = ptr;
+    self->impl->configured_infer_model = ptr;
 
-    status = gst_hailonet_set_scheduler_params(self, self->configured_infer_model);
+    status = gst_hailonet_set_scheduler_params(self, self->impl->configured_infer_model);
     CHECK_SUCCESS(status);
 
-    self->is_configured = true;
+    self->impl->is_configured = true;
     return HAILO_SUCCESS;
 }
 
@@ -349,11 +357,18 @@ static void gst_hailonet_init_allocator(GstHailoNet *self)
     g_free(parent_name);
 
     if (gst_hailo_should_use_dma_buffers()) {
-        self->dmabuf_allocator = GST_HAILO_DMABUF_ALLOCATOR(g_object_new(GST_TYPE_HAILO_DMABUF_ALLOCATOR, "name", name, NULL));
-        gst_object_ref_sink(self->dmabuf_allocator);
+        auto expected_dmabuf_allocator = HailoDmaBuffAllocator::create(name);
+        if (HAILO_NOT_IMPLEMENTED == expected_dmabuf_allocator.status()) {
+            HAILONET_ERROR("dma buff is not supported on this OS");
+        } else if (HAILO_SUCCESS != expected_dmabuf_allocator.status()) {
+            HAILONET_ERROR("dma buff creation failed with status %d\n", expected_dmabuf_allocator.status());
+        } else {
+            self->impl->dmabuf_allocator = expected_dmabuf_allocator.release();
+            gst_object_ref_sink(self->impl->dmabuf_allocator->impl);
+        }
     } else {
-        self->allocator = GST_HAILO_ALLOCATOR(g_object_new(GST_TYPE_HAILO_ALLOCATOR, "name", name, NULL));
-        gst_object_ref_sink(self->allocator);
+        self->impl->allocator = GST_HAILO_ALLOCATOR(g_object_new(GST_TYPE_HAILO_ALLOCATOR, "name", name, NULL));
+        gst_object_ref_sink(self->impl->allocator);
     }
 
     g_free(name);
@@ -361,35 +376,35 @@ static void gst_hailonet_init_allocator(GstHailoNet *self)
 
 static hailo_status gst_hailonet_allocate_infer_resources(GstHailoNet *self)
 {
-    TRY(self->infer_bindings, self->configured_infer_model->create_bindings());
+    TRY(self->impl->infer_bindings, self->impl->configured_infer_model->create_bindings());
 
-    self->output_buffer_pools = std::unordered_map<std::string, GstBufferPool*>();
-    self->output_vstream_infos = std::unordered_map<std::string, hailo_vstream_info_t>();
+    self->impl->output_buffer_pools = std::unordered_map<std::string, GstBufferPool*>();
+    self->impl->output_vstream_infos = std::unordered_map<std::string, hailo_vstream_info_t>();
 
-    TRY(const auto async_queue_size, self->configured_infer_model->get_async_queue_size());
-    self->input_queue = gst_queue_array_new(static_cast<guint>(async_queue_size));
-    self->thread_queue = gst_queue_array_new(static_cast<guint>(async_queue_size));
-    self->is_thread_running = true;
-    self->thread = std::thread([self] () {
-        while (self->is_thread_running) {
+    TRY(const auto async_queue_size, self->impl->configured_infer_model->get_async_queue_size());
+    self->impl->input_queue = gst_queue_array_new(static_cast<guint>(async_queue_size));
+    self->impl->thread_queue = gst_queue_array_new(static_cast<guint>(async_queue_size));
+    self->impl->is_thread_running = true;
+    self->impl->thread = std::thread([self] () {
+        while (self->impl->is_thread_running) {
             GstBuffer *buffer = nullptr;
             {
-                std::unique_lock<std::mutex> lock(self->thread_queue_mutex);
-                self->thread_cv.wait(lock, [self] () {
-                    return (self->buffers_in_thread_queue > 0) || !self->is_thread_running;
+                std::unique_lock<std::mutex> lock(self->impl->thread_queue_mutex);
+                self->impl->thread_cv.wait(lock, [self] () {
+                    return ((self->impl->buffers_in_thread_queue > 0) || !self->impl->is_thread_running);
                 });
-                if (!self->is_thread_running) {
+                if (!self->impl->is_thread_running) {
                     break;
                 }
 
-                buffer = static_cast<GstBuffer*>(gst_queue_array_pop_head(self->thread_queue));
-                self->buffers_in_thread_queue--;
+                buffer = static_cast<GstBuffer*>(gst_queue_array_pop_head(self->impl->thread_queue));
+                self->impl->buffers_in_thread_queue--;
             }
-            self->thread_cv.notify_all();
+            self->impl->thread_cv.notify_all();
             if (GST_IS_PAD(self->srcpad)) { // Checking because we fail here when exiting the application
                 GstFlowReturn ret = gst_pad_push(self->srcpad, buffer);
-                if ((GST_FLOW_OK != ret) && (GST_FLOW_FLUSHING != ret) && ((GST_FLOW_EOS != ret)) && (!self->has_got_eos)) {
-                    ERROR("gst_pad_push failed with status = %d\n", ret);
+                if ((GST_FLOW_OK != ret) && (GST_FLOW_FLUSHING != ret) && ((GST_FLOW_EOS != ret)) && (!self->impl->has_got_eos)) {
+                    HAILONET_ERROR("gst_pad_push failed with status = %d\n", ret);
                     break;
                 }
             }
@@ -397,13 +412,13 @@ static hailo_status gst_hailonet_allocate_infer_resources(GstHailoNet *self)
     });
 
     gst_hailonet_init_allocator(self);
-    for (auto &output : self->infer_model->outputs()) {
-        TRY(self->output_buffer_pools[output.name()], gst_hailonet_create_buffer_pool(self, output.get_frame_size()));
+    for (auto &output : self->impl->infer_model->outputs()) {
+        TRY(self->impl->output_buffer_pools[output.name()], gst_hailonet_create_buffer_pool(self, output.get_frame_size()));
     }
 
-    TRY(const auto vstream_infos, self->infer_model->hef().get_output_vstream_infos());
+    TRY(const auto vstream_infos, self->impl->infer_model->hef().get_output_vstream_infos());
     for (const auto &vstream_info : vstream_infos) {
-        self->output_vstream_infos[vstream_info.name] = vstream_info;
+        self->impl->output_vstream_infos[vstream_info.name] = vstream_info;
     }
 
     return HAILO_SUCCESS;
@@ -412,9 +427,9 @@ static hailo_status gst_hailonet_allocate_infer_resources(GstHailoNet *self)
 static GstPadProbeReturn gst_hailonet_sink_probe(GstPad */*pad*/, GstPadProbeInfo */*info*/, gpointer user_data)
 {
     GstHailoNet *self = static_cast<GstHailoNet*>(user_data);
-    std::unique_lock<std::mutex> lock(self->sink_probe_change_state_mutex);
+    std::unique_lock<std::mutex> lock(self->impl->sink_probe_change_state_mutex);
 
-    if (self->did_critical_failure_happen) {
+    if (self->impl->did_critical_failure_happen) {
         return GST_PAD_PROBE_REMOVE;
     }
 
@@ -428,23 +443,23 @@ static GstPadProbeReturn gst_hailonet_sink_probe(GstPad */*pad*/, GstPadProbeInf
         return GST_PAD_PROBE_REMOVE;
     }
 
-    if (HAILO_SCHEDULING_ALGORITHM_NONE != self->props.m_scheduling_algorithm.get()) {
-        self->props.m_is_active = true;
+    if (HAILO_SCHEDULING_ALGORITHM_NONE != self->impl->props.m_scheduling_algorithm.get()) {
+        self->impl->props.m_is_active = true;
         return GST_PAD_PROBE_REMOVE;
     }
 
-    if ((1 == hailonet_count) && (!self->props.m_is_active.was_changed())) {
-        self->props.m_is_active = true;
+    if ((1 == hailonet_count) && (!self->impl->props.m_is_active.was_changed())) {
+        self->impl->props.m_is_active = true;
     }
 
-    if (self->props.m_is_active.get()) {
-        status = self->configured_infer_model->activate();
+    if (self->impl->props.m_is_active.get()) {
+        status = self->impl->configured_infer_model->activate();
         if (HAILO_SUCCESS != status) {
             return GST_PAD_PROBE_REMOVE;
         }
     }
 
-    self->has_called_activate = true;
+    self->impl->has_called_activate = true;
     return GST_PAD_PROBE_REMOVE;
 }
 
@@ -456,7 +471,7 @@ static GstStateChangeReturn gst_hailonet_change_state(GstElement *element, GstSt
     }
 
     GstHailoNet *self = GST_HAILONET(element);
-    std::unique_lock<std::mutex> lock(self->sink_probe_change_state_mutex);
+    std::unique_lock<std::mutex> lock(self->impl->sink_probe_change_state_mutex);
 
     switch (transition) {
     case GST_STATE_CHANGE_PAUSED_TO_PLAYING:
@@ -494,30 +509,30 @@ static GstStateChangeReturn gst_hailonet_change_state(GstElement *element, GstSt
 
 static hailo_status gst_hailonet_toggle_activation(GstHailoNet *self, gboolean old_is_active, gboolean new_is_active)
 {
-    std::unique_lock<std::mutex> lock(self->infer_mutex);
+    std::unique_lock<std::mutex> lock(self->impl->infer_mutex);
 
-    if (self->props.m_scheduling_algorithm.was_changed() && (HAILO_SCHEDULING_ALGORITHM_NONE != self->props.m_scheduling_algorithm.get())) {
+    if (self->impl->props.m_scheduling_algorithm.was_changed() && (HAILO_SCHEDULING_ALGORITHM_NONE != self->impl->props.m_scheduling_algorithm.get())) {
         g_error("scheduling-algorithm different than HAILO_SCHEDULING_ALGORITHM_NONE in combination with 'is-active' is not supported.");
         return HAILO_INVALID_OPERATION;
     }
 
-    if (self->has_called_activate) {
+    if (self->impl->has_called_activate) {
         // Should we keep this? If the user changes the is-active property when we are not configured, it's his fault.
-        if (!self->is_configured) {
+        if (!self->impl->is_configured) {
             g_warning("Trying to change is-active property when network is not configured!");
             return HAILO_INVALID_OPERATION;
         }
         if (old_is_active && !new_is_active) {
-            self->configured_infer_model->deactivate();
+            self->impl->configured_infer_model->deactivate();
         } else if (!old_is_active && new_is_active) {
-            auto status = self->configured_infer_model->activate();
+            auto status = self->impl->configured_infer_model->activate();
             CHECK_SUCCESS(status);
         } else {
             g_warning("Trying to change is-active property from %d to %d", old_is_active, new_is_active);
         }
     }
 
-    self->props.m_is_active = new_is_active;
+    self->impl->props.m_is_active = new_is_active;
     return HAILO_SUCCESS;
 }
 
@@ -526,168 +541,168 @@ static void gst_hailonet_set_property(GObject *object, guint property_id, const
     GstHailoNet *self = GST_HAILONET(object);
     switch (property_id) {
     case PROP_HEF_PATH:
-        if (self->is_configured) {
+        if (self->impl->is_configured) {
             g_warning("The network was already configured so changing the HEF path will not take place!");
             break;
         }
-        self->props.m_hef_path = g_value_get_string(value);
+        self->impl->props.m_hef_path = g_value_get_string(value);
         break;
     case PROP_BATCH_SIZE:
-        if (self->is_configured) {
+        if (self->impl->is_configured) {
             g_warning("The network was already configured so changing the batch size will not take place!");
             break;
         }
-        self->props.m_batch_size = static_cast<guint16>(g_value_get_uint(value));
+        self->impl->props.m_batch_size = static_cast<guint16>(g_value_get_uint(value));
         break;
     case PROP_DEVICE_ID:
-        if (0 != self->props.m_device_count.get()) {
+        if (0 != self->impl->props.m_device_count.get()) {
             g_error("device-id and device-count excludes eachother. received device-id=%s, device-count=%d",
-                g_value_get_string(value), self->props.m_device_count.get());
+                g_value_get_string(value), self->impl->props.m_device_count.get());
             break;
         }
-        if (self->is_configured) {
+        if (self->impl->is_configured) {
             g_warning("The network was already configured so changing the device ID will not take place!");
             break;
         }
-        self->props.m_device_id = g_value_get_string(value);
+        self->impl->props.m_device_id = g_value_get_string(value);
         break;
     case PROP_DEVICE_COUNT:
-        if (!self->props.m_device_id.get().empty()) {
+        if (!self->impl->props.m_device_id.get().empty()) {
             g_error("device-id and device-count excludes eachother. received device-id=%s, device-count=%d",
-                self->props.m_device_id.get().c_str(), g_value_get_uint(value));
+                self->impl->props.m_device_id.get().c_str(), g_value_get_uint(value));
             break;
         }
-        if (self->is_configured) {
+        if (self->impl->is_configured) {
             g_warning("The network was already configured so changing the device count will not take place!");
             break;
         }
-        self->props.m_device_count = static_cast<guint16>(g_value_get_uint(value));
+        self->impl->props.m_device_count = static_cast<guint16>(g_value_get_uint(value));
         break;
     case PROP_VDEVICE_GROUP_ID:
-        if (self->is_configured) {
+        if (self->impl->is_configured) {
             g_warning("The network was already configured so changing the vdevice group ID will not take place!");
             break;
         }
-        self->props.m_vdevice_group_id = g_value_get_string(value);
+        self->impl->props.m_vdevice_group_id = g_value_get_string(value);
         break;
     case PROP_IS_ACTIVE:
-        (void)gst_hailonet_toggle_activation(self, self->props.m_is_active.get(), g_value_get_boolean(value));
+        (void)gst_hailonet_toggle_activation(self, self->impl->props.m_is_active.get(), g_value_get_boolean(value));
         break;
     case PROP_PASS_THROUGH:
-        self->props.m_pass_through = g_value_get_boolean(value);
+        self->impl->props.m_pass_through = g_value_get_boolean(value);
         break;
     case PROP_FORCE_WRITABLE:
-        self->props.m_should_force_writable = g_value_get_boolean(value);
+        self->impl->props.m_should_force_writable = g_value_get_boolean(value);
         break;
     case PROP_OUTPUTS_MIN_POOL_SIZE:
-        if (self->is_configured) {
+        if (self->impl->is_configured) {
             g_warning("The network has already been configured, the output's minimum pool size cannot be changed!");
             break;
         }
-        self->props.m_outputs_min_pool_size = g_value_get_uint(value);
+        self->impl->props.m_outputs_min_pool_size = g_value_get_uint(value);
         break;
     case PROP_OUTPUTS_MAX_POOL_SIZE:
-        if (self->is_configured) {
+        if (self->impl->is_configured) {
             g_warning("The network was already configured so changing the outputs maximum pool size will not take place!");
             break;
         }
-        self->props.m_outputs_max_pool_size = g_value_get_uint(value);
+        self->impl->props.m_outputs_max_pool_size = g_value_get_uint(value);
         break;
     case PROP_SCHEDULING_ALGORITHM:
-        if (self->is_configured) {
+        if (self->impl->is_configured) {
             g_warning("The network was already configured so changing the scheduling algorithm will not take place!");
             break;
         }
-        if (self->props.m_is_active.was_changed() && (g_value_get_enum(value) != HAILO_SCHEDULING_ALGORITHM_NONE)) {
+        if (self->impl->props.m_is_active.was_changed() && (g_value_get_enum(value) != HAILO_SCHEDULING_ALGORITHM_NONE)) {
             g_error("scheduling-algorithm different than HAILO_SCHEDULING_ALGORITHM_NONE in combination with 'is-active' is not supported.");
             break;
         }
-        self->props.m_scheduling_algorithm = static_cast<hailo_scheduling_algorithm_t>(g_value_get_enum(value));
+        self->impl->props.m_scheduling_algorithm = static_cast<hailo_scheduling_algorithm_t>(g_value_get_enum(value));
         break;
     case PROP_SCHEDULER_TIMEOUT_MS:
-        if (self->is_configured) {
+        if (self->impl->is_configured) {
             g_warning("The network was already configured so changing the scheduling timeout will not take place!");
             break;
         }
-        self->props.m_scheduler_timeout_ms = g_value_get_uint(value);
+        self->impl->props.m_scheduler_timeout_ms = g_value_get_uint(value);
         break;
     case PROP_SCHEDULER_THRESHOLD:
-        if (self->is_configured) {
+        if (self->impl->is_configured) {
             g_warning("The network was already configured so changing the scheduling threshold will not take place!");
             break;
         }
-        self->props.m_scheduler_threshold = g_value_get_uint(value);
+        self->impl->props.m_scheduler_threshold = g_value_get_uint(value);
         break;
     case PROP_SCHEDULER_PRIORITY:
-        if (self->is_configured) {
+        if (self->impl->is_configured) {
             g_warning("The network was already configured so changing the scheduling priority will not take place!");
             break;
         }
-        self->props.m_scheduler_priority = static_cast<guint8>(g_value_get_uint(value));
+        self->impl->props.m_scheduler_priority = static_cast<guint8>(g_value_get_uint(value));
         break;
     case PROP_INPUT_FORMAT_TYPE:
-        if (self->is_configured) {
+        if (self->impl->is_configured) {
             g_warning("The network was already configured so changing the format type will not take place!");
             break;
         }
-        self->props.m_input_format_type = static_cast<hailo_format_type_t>(g_value_get_enum(value));
+        self->impl->props.m_input_format_type = static_cast<hailo_format_type_t>(g_value_get_enum(value));
         break;
     case PROP_OUTPUT_FORMAT_TYPE:
-        if (self->is_configured) {
+        if (self->impl->is_configured) {
             g_warning("The network was already configured so changing the format type will not take place!");
             break;
         }
-        self->props.m_output_format_type = static_cast<hailo_format_type_t>(g_value_get_enum(value));
+        self->impl->props.m_output_format_type = static_cast<hailo_format_type_t>(g_value_get_enum(value));
         break;
     case PROP_NMS_SCORE_THRESHOLD:
-        if (self->is_configured) {
+        if (self->impl->is_configured) {
             g_warning("The network was already configured so changing the score threshold will not take place!");
             break;
         }
-        self->props.m_nms_score_threshold = static_cast<gfloat>(g_value_get_float(value));
+        self->impl->props.m_nms_score_threshold = static_cast<gfloat>(g_value_get_float(value));
         break;
     case PROP_NMS_IOU_THRESHOLD:
-        if (self->is_configured) {
+        if (self->impl->is_configured) {
             g_warning("The network was already configured so changing the IoU threshold will not take place!");
             break;
         }
-        self->props.m_nms_iou_threshold = static_cast<gfloat>(g_value_get_float(value));
+        self->impl->props.m_nms_iou_threshold = static_cast<gfloat>(g_value_get_float(value));
         break;
     case PROP_NMS_MAX_PROPOSALS_PER_CLASS:
-        if (self->is_configured) {
+        if (self->impl->is_configured) {
             g_warning("The network was already configured so changing the max proposals per class will not take place!");
             break;
         }
-        self->props.m_nms_max_proposals_per_class = static_cast<guint32>(g_value_get_uint(value));
+        self->impl->props.m_nms_max_proposals_per_class = static_cast<guint32>(g_value_get_uint(value));
         break;
     case PROP_INPUT_FROM_META:
-        if (self->is_configured) {
+        if (self->impl->is_configured) {
             g_warning("The network was already configured so changing the input method will not take place!");
             break;
         }
-        self->props.m_input_from_meta = g_value_get_boolean(value);
+        self->impl->props.m_input_from_meta = g_value_get_boolean(value);
         break;
     case PROP_NO_TRANSFORM:
-        if (self->is_configured) {
+        if (self->impl->is_configured) {
             g_warning("The network was already configured so disabling the transformation will not take place!");
         }
-        self->props.m_no_transform = g_value_get_boolean(value);
+        self->impl->props.m_no_transform = g_value_get_boolean(value);
         break;
     case PROP_MULTI_PROCESS_SERVICE:
-        if (self->is_configured) {
+        if (self->impl->is_configured) {
             g_warning("The network was already configured so changing the multi-process-service property will not take place!");
             break;
         }
-        self->props.m_multi_process_service = g_value_get_boolean(value);
+        self->impl->props.m_multi_process_service = g_value_get_boolean(value);
         break;
     
     // Deprecated
     case PROP_VDEVICE_KEY:
-        if (self->is_configured) {
+        if (self->impl->is_configured) {
             g_warning("The network was already configured so changing the vdevice key will not take place!");
             break;
         }
-        self->props.m_vdevice_key = static_cast<guint32>(g_value_get_uint(value));
+        self->impl->props.m_vdevice_key = static_cast<guint32>(g_value_get_uint(value));
         break;
     default:
         G_OBJECT_WARN_INVALID_PROPERTY_ID(object, property_id, pspec);
@@ -700,75 +715,75 @@ static void gst_hailonet_get_property(GObject *object, guint property_id, GValue
     GstHailoNet *self = GST_HAILONET(object);
     switch (property_id) {
     case PROP_HEF_PATH:
-        g_value_set_string(value, self->props.m_hef_path.get().c_str());
+        g_value_set_string(value, self->impl->props.m_hef_path.get().c_str());
         break;
     case PROP_BATCH_SIZE:
-        g_value_set_uint(value, self->props.m_batch_size.get());
+        g_value_set_uint(value, self->impl->props.m_batch_size.get());
         break;
     case PROP_DEVICE_ID:
-        g_value_set_string(value, self->props.m_device_id.get().c_str());
+        g_value_set_string(value, self->impl->props.m_device_id.get().c_str());
         break;
     case PROP_DEVICE_COUNT:
-        g_value_set_uint(value, self->props.m_device_count.get());
+        g_value_set_uint(value, self->impl->props.m_device_count.get());
         break;
     case PROP_VDEVICE_GROUP_ID:
-        g_value_set_string(value, self->props.m_vdevice_group_id.get().c_str());
+        g_value_set_string(value, self->impl->props.m_vdevice_group_id.get().c_str());
         break;
     case PROP_IS_ACTIVE:
-        g_value_set_boolean(value, self->props.m_is_active.get());
+        g_value_set_boolean(value, self->impl->props.m_is_active.get());
         break;
     case PROP_PASS_THROUGH:
-        g_value_set_boolean(value, self->props.m_pass_through.get());
+        g_value_set_boolean(value, self->impl->props.m_pass_through.get());
         break;
     case PROP_FORCE_WRITABLE:
-        g_value_set_boolean(value, self->props.m_should_force_writable.get());
+        g_value_set_boolean(value, self->impl->props.m_should_force_writable.get());
         break;
     case PROP_OUTPUTS_MIN_POOL_SIZE:
-        g_value_set_uint(value, self->props.m_outputs_min_pool_size.get());
+        g_value_set_uint(value, self->impl->props.m_outputs_min_pool_size.get());
         break;
     case PROP_OUTPUTS_MAX_POOL_SIZE:
-        g_value_set_uint(value, self->props.m_outputs_max_pool_size.get());
+        g_value_set_uint(value, self->impl->props.m_outputs_max_pool_size.get());
         break;
     case PROP_SCHEDULING_ALGORITHM:
-        g_value_set_enum(value, self->props.m_scheduling_algorithm.get());
+        g_value_set_enum(value, self->impl->props.m_scheduling_algorithm.get());
         break;
     case PROP_SCHEDULER_TIMEOUT_MS:
-        g_value_set_uint(value, self->props.m_scheduler_timeout_ms.get());
+        g_value_set_uint(value, self->impl->props.m_scheduler_timeout_ms.get());
         break;
     case PROP_SCHEDULER_THRESHOLD:
-        g_value_set_uint(value, self->props.m_scheduler_threshold.get());
+        g_value_set_uint(value, self->impl->props.m_scheduler_threshold.get());
         break;
     case PROP_SCHEDULER_PRIORITY:
-        g_value_set_uint(value, self->props.m_scheduler_priority.get());
+        g_value_set_uint(value, self->impl->props.m_scheduler_priority.get());
         break;
     case PROP_INPUT_FORMAT_TYPE:
-        g_value_set_enum(value, self->props.m_input_format_type.get());
+        g_value_set_enum(value, self->impl->props.m_input_format_type.get());
         break;
     case PROP_OUTPUT_FORMAT_TYPE:
-        g_value_set_enum(value, self->props.m_output_format_type.get());
+        g_value_set_enum(value, self->impl->props.m_output_format_type.get());
         break;
     case PROP_NMS_SCORE_THRESHOLD:
-        g_value_set_float(value, self->props.m_nms_score_threshold.get());
+        g_value_set_float(value, self->impl->props.m_nms_score_threshold.get());
         break;
     case PROP_NMS_IOU_THRESHOLD:
-        g_value_set_float(value, self->props.m_nms_iou_threshold.get());
+        g_value_set_float(value, self->impl->props.m_nms_iou_threshold.get());
         break;
     case PROP_NMS_MAX_PROPOSALS_PER_CLASS:
-        g_value_set_uint(value, self->props.m_nms_max_proposals_per_class.get());
+        g_value_set_uint(value, self->impl->props.m_nms_max_proposals_per_class.get());
         break; 
     case PROP_INPUT_FROM_META:
-        g_value_set_boolean(value, self->props.m_input_from_meta.get());
+        g_value_set_boolean(value, self->impl->props.m_input_from_meta.get());
         break;
     case PROP_NO_TRANSFORM:
-        g_value_set_boolean(value, self->props.m_no_transform.get());
+        g_value_set_boolean(value, self->impl->props.m_no_transform.get());
         break;
     case PROP_MULTI_PROCESS_SERVICE:
-        g_value_set_boolean(value, self->props.m_multi_process_service.get());
+        g_value_set_boolean(value, self->impl->props.m_multi_process_service.get());
         break;
     
     // Deprecated
     case PROP_VDEVICE_KEY:
-        g_value_set_uint(value, self->props.m_vdevice_key.get());
+        g_value_set_uint(value, self->impl->props.m_vdevice_key.get());
         break;
     default:
         G_OBJECT_WARN_INVALID_PROPERTY_ID(object, property_id, pspec);
@@ -781,6 +796,8 @@ static void gst_hailonet_class_init(GstHailoNetClass *klass)
     GObjectClass *gobject_class = G_OBJECT_CLASS(klass);
     GstElementClass *element_class = GST_ELEMENT_CLASS(klass);
 
+    gobject_class->dispose = gst_hailonet_dispose;
+
     gst_element_class_add_pad_template(element_class, gst_static_pad_template_get(&sink_template));
     gst_element_class_add_pad_template(element_class, gst_static_pad_template_get(&src_template));
     element_class->change_state = gst_hailonet_change_state;
@@ -911,16 +928,16 @@ static void gst_hailonet_class_init(GstHailoNetClass *klass)
 static void gst_hailonet_push_buffer_to_thread(GstHailoNet *self, GstBuffer *buffer)
 {
     {
-        std::unique_lock<std::mutex> lock(self->thread_queue_mutex);
-        self->thread_cv.wait(lock, [self] () {
-            bool is_unlimited_pool_not_empty = (self->props.m_outputs_max_pool_size.get() == 0) && (self->buffers_in_thread_queue < MAX_OUTPUTS_POOL_SIZE);
-            bool is_pool_empty = self->buffers_in_thread_queue < self->props.m_outputs_max_pool_size.get();
+        std::unique_lock<std::mutex> lock(self->impl->thread_queue_mutex);
+        self->impl->thread_cv.wait(lock, [self] () {
+            bool is_unlimited_pool_not_empty = (self->impl->props.m_outputs_max_pool_size.get() == 0) && (self->impl->buffers_in_thread_queue < MAX_OUTPUTS_POOL_SIZE);
+            bool is_pool_empty = self->impl->buffers_in_thread_queue < self->impl->props.m_outputs_max_pool_size.get();
             return is_unlimited_pool_not_empty || is_pool_empty;
         });
-        gst_queue_array_push_tail(self->thread_queue, buffer);
-        self->buffers_in_thread_queue++;
+        gst_queue_array_push_tail(self->impl->thread_queue, buffer);
+        self->impl->buffers_in_thread_queue++;
     }
-    self->thread_cv.notify_all();
+    self->impl->thread_cv.notify_all();
 }
 
 // TODO: This function should be refactored. It does many unrelated things and the user need to know that he should unmap the buffer
@@ -945,18 +962,23 @@ static bool set_infos(GstParentBufferMeta *parent_buffer_meta, hailo_vstream_inf
 static Expected<std::unordered_map<std::string, hailo_dma_buffer_t>> gst_hailonet_read_input_dma_buffers_from_meta(GstHailoNet *self, GstBuffer *buffer)
 {
     std::unordered_map<std::string, hailo_dma_buffer_t> input_buffer_metas;
-    gpointer state = NULL;
-    GstMeta *meta;
+    gpointer state = nullptr;
+    GstMeta *meta = nullptr;
 
-    while ((meta = gst_buffer_iterate_meta_filtered(buffer, &state, GST_PARENT_BUFFER_META_API_TYPE))) {
+    while (true) {
+        meta = gst_buffer_iterate_meta_filtered(buffer, &state, GST_PARENT_BUFFER_META_API_TYPE);
+        if (!meta) {
+            break;
+        }
         GstParentBufferMeta *parent_buffer_meta = reinterpret_cast<GstParentBufferMeta*>(meta);
         GstMapInfo info;
         hailo_vstream_info_t vstream_info;
         bool result = set_infos(parent_buffer_meta, vstream_info, info);
         if (result) {
-            CHECK_AS_EXPECTED(gst_is_dmabuf_memory(info.memory), HAILO_INTERNAL_FAILURE, "GstMemory is not a DMA buf as expected!");
+            TRY(auto is_dma_buf_memory, HailoDmaBuffAllocator::is_dma_buf_memory(info));
+            CHECK_AS_EXPECTED(is_dma_buf_memory, HAILO_INTERNAL_FAILURE, "GstMemory is not a DMA buf as expected!");
 
-            int fd = gst_fd_memory_get_fd(info.memory);
+            TRY(auto fd, HailoDmaBuffAllocator::memory_get_fd(info));
             CHECK_AS_EXPECTED(fd != -1, HAILO_INTERNAL_FAILURE, "Failed to get FD from GstMemory!");
 
             hailo_dma_buffer_t dma_buffer = {fd, info.size};
@@ -966,7 +988,7 @@ static Expected<std::unordered_map<std::string, hailo_dma_buffer_t>> gst_hailone
     }
     CHECK_AS_EXPECTED(!input_buffer_metas.empty(),HAILO_INTERNAL_FAILURE, "No GstHailoTensorMeta was found in buffer!");
 
-    for (auto &input : self->infer_model->inputs()) {
+    for (auto &input : self->impl->infer_model->inputs()) {
         CHECK_AS_EXPECTED(input_buffer_metas.find(input.name()) != input_buffer_metas.end(),
             HAILO_INTERNAL_FAILURE, "No GstHailoTensorMeta was found in buffer for input: %s", input.name().c_str());
     }
@@ -977,8 +999,8 @@ static Expected<std::unordered_map<std::string, hailo_dma_buffer_t>> gst_hailone
 static hailo_status gst_hailonet_fill_multiple_input_bindings_dma_buffers(GstHailoNet *self, GstBuffer *buffer)
 {
     TRY(auto input_buffers, gst_hailonet_read_input_dma_buffers_from_meta(self, buffer));
-    for (const auto &name : self->infer_model->get_input_names()) {
-        auto status = self->infer_bindings.input(name)->set_dma_buffer(input_buffers.at(name));
+    for (const auto &name : self->impl->infer_model->get_input_names()) {
+        auto status = self->impl->infer_bindings.input(name)->set_dma_buffer(input_buffers.at(name));
         CHECK_SUCCESS(status);
     }
 
@@ -988,10 +1010,14 @@ static hailo_status gst_hailonet_fill_multiple_input_bindings_dma_buffers(GstHai
 static Expected<std::unordered_map<std::string, uint8_t*>> gst_hailonet_read_input_buffers_from_meta(GstHailoNet *self, GstBuffer *buffer)
 {
     std::unordered_map<std::string, uint8_t*> input_buffer_metas;
-    gpointer state = NULL;
-    GstMeta *meta;
+    gpointer state = nullptr;
+    GstMeta *meta = nullptr;
 
-    while ((meta = gst_buffer_iterate_meta_filtered(buffer, &state, GST_PARENT_BUFFER_META_API_TYPE))) {
+    while (true) {
+        meta = gst_buffer_iterate_meta_filtered(buffer, &state, GST_PARENT_BUFFER_META_API_TYPE);
+        if (!meta) {
+            break;
+        }
         GstParentBufferMeta *parent_buffer_meta = reinterpret_cast<GstParentBufferMeta*>(meta);
         GstMapInfo info;
         hailo_vstream_info_t vstream_info;
@@ -1003,7 +1029,7 @@ static Expected<std::unordered_map<std::string, uint8_t*>> gst_hailonet_read_inp
     }
     CHECK_AS_EXPECTED(!input_buffer_metas.empty(),HAILO_INTERNAL_FAILURE, "No GstHailoTensorMeta was found in buffer!");
 
-    for (auto &input : self->infer_model->inputs()) {
+    for (auto &input : self->impl->infer_model->inputs()) {
         CHECK_AS_EXPECTED(input_buffer_metas.find(input.name()) != input_buffer_metas.end(),
             HAILO_INTERNAL_FAILURE, "No GstHailoTensorMeta was found in buffer for input: %s", input.name().c_str());
     }
@@ -1014,9 +1040,9 @@ static Expected<std::unordered_map<std::string, uint8_t*>> gst_hailonet_read_inp
 static hailo_status gst_hailonet_fill_multiple_input_bindings(GstHailoNet *self, GstBuffer *buffer)
 {
     TRY(auto input_buffers, gst_hailonet_read_input_buffers_from_meta(self, buffer));
-    for (const auto &name : self->infer_model->get_input_names()) {
-        auto status = self->infer_bindings.input(name)->set_buffer(MemoryView(input_buffers.at(name),
-            self->infer_model->input(name)->get_frame_size()));
+    for (const auto &name : self->impl->infer_model->get_input_names()) {
+        auto status = self->impl->infer_bindings.input(name)->set_buffer(MemoryView(input_buffers.at(name),
+            self->impl->infer_model->input(name)->get_frame_size()));
         CHECK_SUCCESS(status);
     }
 
@@ -1025,19 +1051,19 @@ static hailo_status gst_hailonet_fill_multiple_input_bindings(GstHailoNet *self,
 
 static void store_buffer_events(GstHailoNet *self, GstBuffer *buffer)
 {
-    self->events_queue_per_buffer[buffer] = std::queue<GstEvent*>();
-    while (!self->curr_event_queue.empty()) {
-        GstEvent *event = self->curr_event_queue.front();
-        self->events_queue_per_buffer[buffer].push(event);
-        self->curr_event_queue.pop();
+    self->impl->events_queue_per_buffer[buffer] = std::queue<GstEvent*>();
+    while (!self->impl->curr_event_queue.empty()) {
+        GstEvent *event = self->impl->curr_event_queue.front();
+        self->impl->events_queue_per_buffer[buffer].push(event);
+        self->impl->curr_event_queue.pop();
     }
 }
 
 static hailo_status gst_hailonet_push_buffer_to_input_queue(GstHailoNet *self, GstBuffer *buffer)
 {
-    std::unique_lock<std::mutex> lock(self->input_queue_mutex);
+    std::unique_lock<std::mutex> lock(self->impl->input_queue_mutex);
     store_buffer_events(self, buffer);
-    gst_queue_array_push_tail(self->input_queue, buffer);
+    gst_queue_array_push_tail(self->impl->input_queue, buffer);
 
     return HAILO_SUCCESS;
 }
@@ -1045,9 +1071,9 @@ static hailo_status gst_hailonet_push_buffer_to_input_queue(GstHailoNet *self, G
 Expected<std::unordered_map<std::string, TensorInfo>> gst_hailonet_fill_output_bindings(GstHailoNet *self)
 {
     std::unordered_map<std::string, TensorInfo> tensors;
-    for (auto &output : self->infer_model->outputs()) {
+    for (auto &output : self->impl->infer_model->outputs()) {
         GstBuffer *output_buffer = nullptr;
-        GstFlowReturn flow_result = gst_buffer_pool_acquire_buffer(self->output_buffer_pools[output.name()], &output_buffer, nullptr);
+        GstFlowReturn flow_result = gst_buffer_pool_acquire_buffer(self->impl->output_buffer_pools[output.name()], &output_buffer, nullptr);
         if (GST_FLOW_FLUSHING == flow_result) {
             return make_unexpected(HAILO_STREAM_ABORT);
         } else {
@@ -1059,16 +1085,17 @@ Expected<std::unordered_map<std::string, TensorInfo>> gst_hailonet_fill_output_b
         CHECK_AS_EXPECTED(result, HAILO_INTERNAL_FAILURE, "Failed mapping buffer!");
 
         if (gst_hailo_should_use_dma_buffers()) {
-            CHECK_AS_EXPECTED(gst_is_dmabuf_memory(buffer_info.memory), HAILO_INTERNAL_FAILURE, "GstMemory is not a DMA buf as expected!");
+            TRY(auto is_dma_buf_memory, HailoDmaBuffAllocator::is_dma_buf_memory(buffer_info));
+            CHECK_AS_EXPECTED(is_dma_buf_memory, HAILO_INTERNAL_FAILURE, "GstMemory is not a DMA buf as expected!");
 
-            int fd = gst_fd_memory_get_fd(buffer_info.memory);
+            TRY(auto fd, HailoDmaBuffAllocator::memory_get_fd(buffer_info));
             CHECK_AS_EXPECTED(fd != -1, HAILO_INTERNAL_FAILURE, "Failed to get FD from GstMemory!");
 
             hailo_dma_buffer_t dma_buffer = {fd, buffer_info.size};
-            auto status = self->infer_bindings.output(output.name())->set_dma_buffer(dma_buffer);
+            auto status = self->impl->infer_bindings.output(output.name())->set_dma_buffer(dma_buffer);
             CHECK_SUCCESS_AS_EXPECTED(status);
         } else {
-            auto status = self->infer_bindings.output(output.name())->set_buffer(MemoryView(buffer_info.data, buffer_info.size));
+            auto status = self->impl->infer_bindings.output(output.name())->set_buffer(MemoryView(buffer_info.data, buffer_info.size));
             CHECK_SUCCESS_AS_EXPECTED(status);
         }
 
@@ -1079,7 +1106,7 @@ Expected<std::unordered_map<std::string, TensorInfo>> gst_hailonet_fill_output_b
 
 static hailo_status gst_hailonet_fill_single_input_binding(GstHailoNet *self, hailo_pix_buffer_t pix_buffer)
 {
-    auto status = self->infer_bindings.input()->set_pix_buffer(pix_buffer);
+    auto status = self->impl->infer_bindings.input()->set_pix_buffer(pix_buffer);
     CHECK_SUCCESS(status);
 
     return HAILO_SUCCESS;
@@ -1087,38 +1114,38 @@ static hailo_status gst_hailonet_fill_single_input_binding(GstHailoNet *self, ha
 
 static hailo_status gst_hailonet_call_run_async(GstHailoNet *self, const std::unordered_map<std::string, TensorInfo> &tensors)
 {
-    auto status = self->configured_infer_model->wait_for_async_ready(WAIT_FOR_ASYNC_READY_TIMEOUT);
+    auto status = self->impl->configured_infer_model->wait_for_async_ready(WAIT_FOR_ASYNC_READY_TIMEOUT);
     CHECK_SUCCESS(status);
 
     {
-        std::unique_lock<std::mutex> lock(self->flush_mutex);
-        self->ongoing_frames++;
+        std::unique_lock<std::mutex> lock(self->impl->flush_mutex);
+        self->impl->ongoing_frames++;
     }
 
-    TRY(auto job, self->configured_infer_model->run_async(self->infer_bindings, [self, tensors] (const AsyncInferCompletionInfo &/*completion_info*/) {
+    TRY(auto job, self->impl->configured_infer_model->run_async(self->impl->infer_bindings, [self, tensors] (const AsyncInferCompletionInfo &/*completion_info*/) {
         GstBuffer *buffer = nullptr;
         {
-            std::unique_lock<std::mutex> lock(self->input_queue_mutex);
-            buffer = static_cast<GstBuffer*>(gst_queue_array_pop_head(self->input_queue));
+            std::unique_lock<std::mutex> lock(self->impl->input_queue_mutex);
+            buffer = static_cast<GstBuffer*>(gst_queue_array_pop_head(self->impl->input_queue));
             gst_hailonet_handle_buffer_events(self, buffer);
         }
 
-        for (auto &output : self->infer_model->outputs()) {
+        for (auto &output : self->impl->infer_model->outputs()) {
             auto info = tensors.at(output.name());
             gst_buffer_unmap(info.buffer, &info.buffer_info);
 
             GstHailoTensorMeta *buffer_meta = GST_TENSOR_META_ADD(info.buffer);
-            buffer_meta->info = self->output_vstream_infos[output.name()];
+            buffer_meta->info = self->impl->output_vstream_infos[output.name()];
 
             (void)gst_buffer_add_parent_buffer_meta(buffer, info.buffer);
             gst_buffer_unref(info.buffer);
         }
 
         {
-            std::unique_lock<std::mutex> lock(self->flush_mutex);
-            self->ongoing_frames--;
+            std::unique_lock<std::mutex> lock(self->impl->flush_mutex);
+            self->impl->ongoing_frames--;
         }
-        self->flush_cv.notify_all();
+        self->impl->flush_cv.notify_all();
 
         gst_hailonet_push_buffer_to_thread(self, buffer);
     }));
@@ -1171,10 +1198,24 @@ static hailo_status gst_hailonet_async_infer_single_input(GstHailoNet *self, Gst
     return HAILO_SUCCESS;
 }
 
+uint32_t get_frame_width(const GstVideoFrame *frame, uint32_t plane_index)
+{
+    switch (frame->info.finfo->format) {
+    case GST_VIDEO_FORMAT_NV12:
+    case GST_VIDEO_FORMAT_NV21:
+    case GST_VIDEO_FORMAT_I420:
+        /* On multi-planar formats, GStreamer can add padding to plane's width without any way to know the padding size,
+            so we use the original width set by the caps */
+        return frame->info.width;
+    default:
+        return GST_VIDEO_INFO_PLANE_STRIDE(&(frame->info), plane_index);
+    }
+}
+
 static Expected<hailo_pix_buffer_t> gst_hailonet_construct_pix_buffer(GstHailoNet *self, GstBuffer *buffer)
 {
     GstVideoFrame frame;
-    auto result = gst_video_frame_map(&frame, &self->input_frame_info, buffer,
+    auto result = gst_video_frame_map(&frame, &self->impl->input_frame_info, buffer,
         static_cast<GstMapFlags>(GST_MAP_READ | GST_VIDEO_FRAME_MAP_FLAG_NO_REF));
     CHECK_AS_EXPECTED(result,HAILO_INTERNAL_FAILURE, "gst_video_frame_map failed!");
 
@@ -1184,8 +1225,8 @@ static Expected<hailo_pix_buffer_t> gst_hailonet_construct_pix_buffer(GstHailoNe
     pix_buffer.memory_type = HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR;
 
     for (uint32_t plane_index = 0; plane_index < pix_buffer.number_of_planes; plane_index++) {
-        pix_buffer.planes[plane_index].bytes_used = GST_VIDEO_INFO_PLANE_STRIDE(&frame.info, plane_index) * GST_VIDEO_INFO_COMP_HEIGHT(&frame.info, plane_index);
-        pix_buffer.planes[plane_index].plane_size = GST_VIDEO_INFO_PLANE_STRIDE(&frame.info, plane_index) * GST_VIDEO_INFO_COMP_HEIGHT(&frame.info, plane_index);
+        pix_buffer.planes[plane_index].bytes_used = get_frame_width(&frame, plane_index) * GST_VIDEO_INFO_COMP_HEIGHT(&frame.info, plane_index);
+        pix_buffer.planes[plane_index].plane_size = pix_buffer.planes[plane_index].bytes_used;
         pix_buffer.planes[plane_index].user_ptr = GST_VIDEO_FRAME_PLANE_DATA(&frame, plane_index);
     }
 
@@ -1196,31 +1237,31 @@ static Expected<hailo_pix_buffer_t> gst_hailonet_construct_pix_buffer(GstHailoNe
 static GstFlowReturn gst_hailonet_chain(GstPad * /*pad*/, GstObject * parent, GstBuffer * buffer)
 {
     GstHailoNet *self = GST_HAILONET(parent);
-    std::unique_lock<std::mutex> lock(self->infer_mutex);
+    std::unique_lock<std::mutex> lock(self->impl->infer_mutex);
 
-    if (self->did_critical_failure_happen) {
+    if (self->impl->did_critical_failure_happen) {
         return GST_FLOW_ERROR;
     }
 
-    if (self->props.m_pass_through.get() || !self->props.m_is_active.get() || !self->is_configured) {
+    if (self->impl->props.m_pass_through.get() || !self->impl->props.m_is_active.get() || !self->impl->is_configured) {
         gst_hailonet_push_buffer_to_thread(self, buffer);
         return GST_FLOW_OK;
     }
 
     if (!gst_buffer_is_writable(buffer)) {
-        if (self->props.m_should_force_writable.get()) {
+        if (self->impl->props.m_should_force_writable.get()) {
             buffer = gst_buffer_make_writable(buffer);
             if (nullptr == buffer) {
-                ERROR("Failed to make buffer writable!\n");
+                HAILONET_ERROR("Failed to make buffer writable!\n");
                 return GST_FLOW_ERROR;
             }
         } else {
-            ERROR("Input buffer is not writable! Use force-writable property to force the buffer to be writable\n");
+            HAILONET_ERROR("Input buffer is not writable! Use force-writable property to force the buffer to be writable\n");
             return GST_FLOW_ERROR;
         }
     }
 
-    if (self->props.m_input_from_meta.get()) {
+    if (self->impl->props.m_input_from_meta.get()) {
         auto status = gst_hailonet_async_infer_multi_input(self, buffer);
         if (HAILO_SUCCESS != status) {
             return GST_FLOW_ERROR;
@@ -1244,33 +1285,33 @@ static hailo_status gst_hailonet_init_infer_model(GstHailoNet * self)
     auto vdevice_params = HailoRTDefaults::get_vdevice_params();
 
     hailo_device_id_t device_id = {0};
-    if (self->props.m_device_id.was_changed()) {
-        TRY(device_id, HailoRTCommon::to_device_id(self->props.m_device_id.get()));
+    if (self->impl->props.m_device_id.was_changed()) {
+        TRY(device_id, HailoRTCommon::to_device_id(self->impl->props.m_device_id.get()));
         vdevice_params.device_ids = &device_id;
     }
-    if (self->props.m_device_count.was_changed()) {
-        vdevice_params.device_count = self->props.m_device_count.get();
+    if (self->impl->props.m_device_count.was_changed()) {
+        vdevice_params.device_count = self->impl->props.m_device_count.get();
     }
-    if (self->props.m_vdevice_group_id.was_changed()) {
-        vdevice_params.group_id = self->props.m_vdevice_group_id.get().c_str();
-    } else if (self->props.m_vdevice_key.was_changed()) {
-        auto key_str = std::to_string(self->props.m_vdevice_key.get());
+    if (self->impl->props.m_vdevice_group_id.was_changed()) {
+        vdevice_params.group_id = self->impl->props.m_vdevice_group_id.get().c_str();
+    } else if (self->impl->props.m_vdevice_key.was_changed()) {
+        auto key_str = std::to_string(self->impl->props.m_vdevice_key.get());
         vdevice_params.group_id = key_str.c_str();
     }
-    if (self->props.m_scheduling_algorithm.was_changed()) {
-        vdevice_params.scheduling_algorithm = self->props.m_scheduling_algorithm.get();
+    if (self->impl->props.m_scheduling_algorithm.was_changed()) {
+        vdevice_params.scheduling_algorithm = self->impl->props.m_scheduling_algorithm.get();
     }
-    if (self->props.m_multi_process_service.was_changed()) {
-        vdevice_params.multi_process_service = self->props.m_multi_process_service.get();
-        CHECK(self->props.m_scheduling_algorithm.get() != HAILO_SCHEDULING_ALGORITHM_NONE, HAILO_INVALID_OPERATION,
+    if (self->impl->props.m_multi_process_service.was_changed()) {
+        vdevice_params.multi_process_service = self->impl->props.m_multi_process_service.get();
+        CHECK(self->impl->props.m_scheduling_algorithm.get() != HAILO_SCHEDULING_ALGORITHM_NONE, HAILO_INVALID_OPERATION,
             "To use multi-process-service please set scheduling-algorithm to a value other than 'none'");
     }
 
-    TRY(self->vdevice, VDevice::create(vdevice_params));
-    TRY(self->infer_model, self->vdevice->create_infer_model(self->props.m_hef_path.get()));
+    TRY(self->impl->vdevice, VDevice::create(vdevice_params));
+    TRY(self->impl->infer_model, self->impl->vdevice->create_infer_model(self->impl->props.m_hef_path.get()));
 
-    if(!(self->props.m_input_from_meta.get())){
-        CHECK(self->infer_model->inputs().size() == 1, HAILO_INVALID_OPERATION,
+    if(!(self->impl->props.m_input_from_meta.get())){
+        CHECK(self->impl->infer_model->inputs().size() == 1, HAILO_INVALID_OPERATION,
             "In case you want to run a multiple input model, please set the input-from-meta flag.");
     }
 
@@ -1320,7 +1361,7 @@ static const gchar *gst_hailonet_get_format_string(const InferModel::InferStream
             input.shape().features);
         return "I420";
     default:
-        ERROR("Input %s has an unsupported format order! order = %d\n", input.name().c_str(), input.format().order);
+        HAILONET_ERROR("Input %s has an unsupported format order! order = %d\n", input.name().c_str(), input.format().order);
         return nullptr;
     }
 }
@@ -1339,34 +1380,34 @@ static uint32_t get_height_by_order(uint32_t original_height, hailo_format_order
 
 static GstCaps *gst_hailonet_get_caps(GstHailoNet *self)
 {
-    if (self->did_critical_failure_happen) {
+    if (self->impl->did_critical_failure_happen) {
         // Sometimes gst_hailonet_get_caps will get called again even after a critical failure happened and nullptr was returned
         return nullptr;
     }
 
-    if (nullptr != self->input_caps) {
-        return gst_caps_copy(self->input_caps);
+    if (nullptr != self->impl->input_caps) {
+        return gst_caps_copy(self->impl->input_caps);
     }
 
-    if (nullptr == self->vdevice) {
+    if (nullptr == self->impl->vdevice) {
         auto status = gst_hailonet_init_infer_model(self);
         if (HAILO_SUCCESS != status) {
-            self->did_critical_failure_happen = true;
+            self->impl->did_critical_failure_happen = true;
             return nullptr;
         }
     }
 
-    if (self->props.m_input_from_meta.get()) {
+    if (self->impl->props.m_input_from_meta.get()) {
         GstCaps *new_caps = gst_caps_new_any();
-        std::unique_lock<std::mutex> lock(self->input_caps_mutex);
+        std::unique_lock<std::mutex> lock(self->impl->input_caps_mutex);
         gst_hailonet_unref_input_caps(self);
-        self->input_caps = new_caps;
+        self->impl->input_caps = new_caps;
         return gst_caps_copy(new_caps);
     }
 
-    auto input = self->infer_model->input();
+    auto input = self->impl->infer_model->input();
     if (!input) {
-        ERROR("Getting input has failed with status = %d\n", input.status());
+        HAILONET_ERROR("Getting input has failed with status = %d\n", input.status());
         return nullptr;
     }
 
@@ -1381,14 +1422,14 @@ static GstCaps *gst_hailonet_get_caps(GstHailoNet *self)
         "height", G_TYPE_INT, get_height_by_order(input->shape().height, input->format().order),
         nullptr);
 
-    if (!gst_video_info_from_caps(&self->input_frame_info, new_caps)) {
-        ERROR("gst_video_info_from_caps failed\n");
+    if (!gst_video_info_from_caps(&self->impl->input_frame_info, new_caps)) {
+        HAILONET_ERROR("gst_video_info_from_caps failed\n");
         return nullptr;
     }
 
-    std::unique_lock<std::mutex> lock(self->input_caps_mutex);
+    std::unique_lock<std::mutex> lock(self->impl->input_caps_mutex);
     gst_hailonet_unref_input_caps(self);
-    self->input_caps = new_caps;
+    self->impl->input_caps = new_caps;
     return gst_caps_copy(new_caps);
 }
 
@@ -1421,7 +1462,7 @@ static gboolean gst_hailonet_sink_event(GstPad *pad, GstObject *parent, GstEvent
 {
     GstHailoNet *self = GST_HAILONET(parent);
     if (GST_EVENT_TYPE(event) == GST_EVENT_EOS) {
-        self->has_got_eos = true;
+        self->impl->has_got_eos = true;
         return gst_pad_push_event(self->srcpad, event);
     }
     if (GST_EVENT_IS_STICKY(event)) {
@@ -1434,12 +1475,26 @@ static gboolean gst_hailonet_sink_event(GstPad *pad, GstObject *parent, GstEvent
 
 static void gst_hailonet_flush_callback(GstHailoNet *self, gpointer /*data*/)
 {
-    std::unique_lock<std::mutex> lock(self->flush_mutex);
-    self->flush_cv.wait(lock, [self] () {
-        return 0 == self->ongoing_frames;
+    std::unique_lock<std::mutex> lock(self->impl->flush_mutex);
+    self->impl->flush_cv.wait(lock, [self] () {
+        return 0 == self->impl->ongoing_frames;
     });
 }
 
+HailoNetImpl::HailoNetImpl() :
+    events_queue_per_buffer(), curr_event_queue(), input_queue(nullptr), thread_queue(nullptr), buffers_in_thread_queue(0),
+    props(), input_caps(nullptr), is_thread_running(false), has_got_eos(false),
+    did_critical_failure_happen(false), vdevice(nullptr), is_configured(false), has_called_activate(false), ongoing_frames(0)
+{}
+
+Expected<std::unique_ptr<HailoNetImpl>> HailoNetImpl::create()
+{
+    auto ptr = make_unique_nothrow<HailoNetImpl>();
+    CHECK_NOT_NULL_AS_EXPECTED(ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+    return ptr;
+}
+
 static void gst_hailonet_init(GstHailoNet *self)
 {
     if (!do_versions_match(GST_ELEMENT(self))) {
@@ -1456,20 +1511,13 @@ static void gst_hailonet_init(GstHailoNet *self)
     self->srcpad = gst_pad_new_from_static_template(&src_template, "src");
     gst_element_add_pad(GST_ELEMENT (self), self->srcpad);
 
-    self->input_caps = nullptr;
-    self->input_queue = nullptr;
-    self->thread_queue = nullptr;
-    self->is_thread_running = false;
-    self->has_got_eos = false;
-    self->buffers_in_thread_queue = 0;
-    self->props = HailoNetProperties();
-    self->vdevice = nullptr;
-    self->is_configured = false;
-    self->has_called_activate = false;
-    self->ongoing_frames = 0;
-    self->did_critical_failure_happen = false;
-    self->events_queue_per_buffer = std::unordered_map<GstBuffer*, std::queue<GstEvent*>>();
-    self->curr_event_queue = std::queue<GstEvent*>();
+    auto hailonet_impl = HailoNetImpl::create();
+    if (!hailonet_impl) {
+        GST_ELEMENT_ERROR(self, RESOURCE, FAILED, ("Creating hailonet implementation has failed! status = %d", hailonet_impl.status()), (NULL));
+        return;
+    }
+
+    self->impl = hailonet_impl->release();
 
     g_signal_connect(self, "flush", G_CALLBACK(gst_hailonet_flush_callback), nullptr);
 
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.hpp
index a47cd4c7..b7bc053f 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.hpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.hpp
@@ -20,10 +20,7 @@
 #ifndef _GST_HAILONET_HPP_
 #define _GST_HAILONET_HPP_
 
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wconversion"
-#include <gst/gst.h>
-#pragma GCC diagnostic pop
+#include "hailo_gst.h"
 
 #include <gst/base/gstqueuearray.h>
 #include <gst/video/gstvideofilter.h>
@@ -31,7 +28,11 @@
 #include "hailo/infer_model.hpp"
 #include "common.hpp"
 #include "gsthailo_allocator.hpp"
-#include "gsthailo_dmabuf_allocator.hpp"
+#include "dma_buf_allocator_wrapper.hpp"
+
+#if defined(__linux__)
+  #include "gsthailo_dmabuf_allocator.hpp"
+#endif /* __linux__ */
 
 #include <queue>
 #include <condition_variable>
@@ -86,11 +87,21 @@ struct HailoNetProperties final
     HailoElemProperty<guint32> m_vdevice_key;
 };
 
+class HailoNetImpl;
 typedef struct _GstHailoNet {
     GstElement element;
     GstPad *sinkpad;
     GstPad *srcpad;
 
+    HailoNetImpl *impl;
+} GstHailoNet;
+
+class HailoNetImpl final
+{
+public:
+    static Expected<std::unique_ptr<HailoNetImpl>> create();
+    HailoNetImpl();
+
     std::unordered_map<GstBuffer*, std::queue<GstEvent*>> events_queue_per_buffer;
     std::queue<GstEvent*> curr_event_queue;
     GstQueueArray *input_queue;
@@ -121,14 +132,14 @@ typedef struct _GstHailoNet {
     GstVideoInfo input_frame_info;
 
     GstHailoAllocator *allocator;
-    GstHailoDmabufAllocator *dmabuf_allocator;
+    std::shared_ptr<HailoDmaBuffAllocator> dmabuf_allocator;
     std::unordered_map<std::string, GstBufferPool*> output_buffer_pools;
     std::unordered_map<std::string, hailo_vstream_info_t> output_vstream_infos;
 
     std::mutex input_queue_mutex;
     std::mutex thread_queue_mutex;
     std::condition_variable thread_cv;
-} GstHailoNet;
+};
 
 typedef struct _GstHailoNetClass {
   GstElementClass parent_class;
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/hailo_events/hailo_events.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/hailo_events/hailo_events.hpp
index 70f5cb41..74398a43 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/hailo_events/hailo_events.hpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/hailo_events/hailo_events.hpp
@@ -22,11 +22,7 @@
 
 #include "hailo/hailort.h"
 #include "hailo/expected.hpp"
-
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wconversion"
-#include <gst/gst.h>
-#pragma GCC diagnostic pop
+#include "hailo_gst.h"
 
 #include <vector>
 
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/hailo_gst.h b/hailort/libhailort/bindings/gstreamer/gst-hailo/hailo_gst.h
new file mode 100644
index 00000000..63a3e173
--- /dev/null
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/hailo_gst.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2021-2022 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+/**
+ * @file hailo_gst.h
+ * @brief Includes the hailo_gst header file with the required compiler instructions
+ **/
+#ifdef _MSC_VER
+    #pragma warning(push)
+    #pragma warning(disable : 4244)  // Disable conversion warnings
+    #include <gst/gst.h>
+    #pragma warning(pop)
+#else
+    #pragma GCC diagnostic push
+    #pragma GCC diagnostic ignored "-Wconversion"
+    #include <gst/gst.h>
+    #pragma GCC diagnostic pop
+#endif
\ No newline at end of file
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/metadata/hailo_buffer_flag_meta.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/metadata/hailo_buffer_flag_meta.hpp
index f0e1e6be..cd7d7b1a 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/metadata/hailo_buffer_flag_meta.hpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/metadata/hailo_buffer_flag_meta.hpp
@@ -20,10 +20,7 @@
 #ifndef __HAILO_BUFFER_FLAG_META_HPP__
 #define __HAILO_BUFFER_FLAG_META_HPP__
 
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wconversion"
-#include <gst/gst.h>
-#pragma GCC diagnostic pop
+#include "hailo_gst.h"
 
 #define HAILO_BUFFER_FLAG_META_API_NAME "GstHailoBufferFlagMetaAPI"
 #define HAILO_BUFFER_FLAG_META_IMPL_NAME "GstHailoBufferFlagMeta"
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/metadata/tensor_meta.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/metadata/tensor_meta.hpp
index f60f7fa6..85e76cee 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/metadata/tensor_meta.hpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/metadata/tensor_meta.hpp
@@ -21,11 +21,18 @@
 #define __TENSOR_META_HPP__
 
 #include "hailo/hailort.h"
-
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wconversion"
-#include <gst/gst.h>
-#pragma GCC diagnostic pop
+// TODO HRT-14797: Remove these ifdefs + return the hailo_gst.h include - after fixing deb_packaging.py + gstreamer/cmakelists.txt
+#ifdef _MSC_VER
+    #pragma warning(push)
+    #pragma warning(disable : 4244)  // Disable conversion warnings
+    #include <gst/gst.h>
+    #pragma warning(pop)
+#else
+    #pragma GCC diagnostic push
+    #pragma GCC diagnostic ignored "-Wconversion"
+    #include <gst/gst.h>
+    #pragma GCC diagnostic pop
+#endif
 
 #define TENSOR_META_API_NAME "GstHailoTensorMetaAPI"
 #define TENSOR_META_IMPL_NAME "GstHailoTensorMeta"
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/os/linux/dma_buf_allocator_wrapper.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/os/linux/dma_buf_allocator_wrapper.cpp
new file mode 100644
index 00000000..98ba6e5f
--- /dev/null
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/os/linux/dma_buf_allocator_wrapper.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2021-2024 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#include "dma_buf_allocator_wrapper.hpp"
+
+Expected<std::shared_ptr<HailoDmaBuffAllocator>> HailoDmaBuffAllocator::create(gchar *name) {
+    std::shared_ptr<HailoDmaBuffAllocator> hailo_dma_buff_allocator = std::make_shared<HailoDmaBuffAllocator>();
+    hailo_dma_buff_allocator->impl = GST_HAILO_DMABUF_ALLOCATOR(g_object_new(GST_TYPE_HAILO_DMABUF_ALLOCATOR, "name", name, NULL));
+    return hailo_dma_buff_allocator;
+}
+
+hailo_status HailoDmaBuffAllocator::close_dma_heap_fd() {
+    if (GstHailoDmaHeapControl::dma_heap_fd_open) {
+        close(GstHailoDmaHeapControl::dma_heap_fd);
+        GstHailoDmaHeapControl::dma_heap_fd_open = false;
+    }
+    return HAILO_SUCCESS;
+}
+
+Expected<bool> HailoDmaBuffAllocator::is_dma_buf_memory(GstMapInfo &info) {
+    return gst_is_dmabuf_memory(info.memory);
+}
+
+Expected<int> HailoDmaBuffAllocator::memory_get_fd(GstMapInfo &info) {
+    return gst_fd_memory_get_fd(info.memory);
+}
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/os/linux/dma_buf_allocator_wrapper.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/os/linux/dma_buf_allocator_wrapper.hpp
new file mode 100644
index 00000000..4e5d3cf6
--- /dev/null
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/os/linux/dma_buf_allocator_wrapper.hpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021-2024 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+#ifndef _HAILO_DMABUF_ALLOCATOR_HPP_
+#define _HAILO_DMABUF_ALLOCATOR_HPP_
+
+#include "../../common.hpp"
+#include "../../gsthailo_dmabuf_allocator.hpp"
+
+class HailoDmaBuffAllocator{
+public:
+    static Expected<std::shared_ptr<HailoDmaBuffAllocator>> create(gchar *name);
+    hailo_status close_dma_heap_fd();
+    static Expected<bool> is_dma_buf_memory(GstMapInfo &info);
+    static Expected<int> memory_get_fd(GstMapInfo &info);
+
+    GstHailoDmabufAllocator *impl;
+};
+
+#endif /* _HAILO_DMABUF_ALLOCATOR_HPP_ */
+
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/os/windows/dma_buf_allocator_wrapper.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/os/windows/dma_buf_allocator_wrapper.cpp
new file mode 100644
index 00000000..7bcf05bf
--- /dev/null
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/os/windows/dma_buf_allocator_wrapper.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021-2024 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#include "dma_buf_allocator_wrapper.hpp"
+
+Expected<std::shared_ptr<HailoDmaBuffAllocator>> HailoDmaBuffAllocator::create(gchar */*name*/) {
+    return make_unexpected(HAILO_NOT_IMPLEMENTED);
+}
+
+hailo_status HailoDmaBuffAllocator::close_dma_heap_fd() {
+    return HAILO_NOT_IMPLEMENTED;
+}
+
+Expected<bool> HailoDmaBuffAllocator::is_dma_buf_memory(GstMapInfo &/*info*/) {
+    return make_unexpected(HAILO_NOT_IMPLEMENTED);
+}
+
+Expected<int> HailoDmaBuffAllocator::memory_get_fd(GstMapInfo &/*info*/) {
+    return make_unexpected(HAILO_NOT_IMPLEMENTED);
+}
\ No newline at end of file
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/os/windows/dma_buf_allocator_wrapper.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/os/windows/dma_buf_allocator_wrapper.hpp
new file mode 100644
index 00000000..b5720efb
--- /dev/null
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/os/windows/dma_buf_allocator_wrapper.hpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021-2024 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+#ifndef _HAILO_DMABUF_ALLOCATOR_HPP_
+#define _HAILO_DMABUF_ALLOCATOR_HPP_
+
+#include "../../common.hpp"
+
+class HailoDmaBuffAllocator{
+public:
+    static Expected<std::shared_ptr<HailoDmaBuffAllocator>> create(gchar *name);
+    hailo_status close_dma_heap_fd();
+    static Expected<bool> is_dma_buf_memory(GstMapInfo &info);
+    static Expected<int> memory_get_fd(GstMapInfo &info);
+
+    void* impl = nullptr;
+};
+
+#endif /* _HAILO_DMABUF_ALLOCATOR_HPP_ */
+
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gsthailonet.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gsthailonet.cpp
index dbfed03c..a553a6aa 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gsthailonet.cpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gsthailonet.cpp
@@ -69,11 +69,23 @@ enum
 
 G_DEFINE_TYPE(GstSyncHailoNet, gst_sync_hailonet, GST_TYPE_BIN);
 
+static void gst_sync_hailonet_dispose(GObject *object) {
+    GstSyncHailoNet *self = GST_SYNC_HAILONET(object);
+
+    assert(nullptr != self->impl);
+    delete self->impl;
+    self->impl = nullptr;
+
+    G_OBJECT_CLASS(gst_sync_hailonet_parent_class)->dispose(object);
+}
+
 static void gst_sync_hailonet_class_init(GstSyncHailoNetClass *klass)
 {
     GObjectClass *gobject_class = G_OBJECT_CLASS(klass);
     GstElementClass *element_class = GST_ELEMENT_CLASS(klass);
 
+    gobject_class->dispose = gst_sync_hailonet_dispose;
+
     GstStaticPadTemplate src_template = GST_STATIC_PAD_TEMPLATE("src", GST_PAD_SRC, GST_PAD_ALWAYS, GST_STATIC_CAPS_ANY);
     gst_element_class_add_pad_template(element_class, gst_static_pad_template_get(&src_template));
 
@@ -898,7 +910,7 @@ static void gst_sync_hailonet_init(GstSyncHailoNet *self)
         return;
     }
 
-    self->impl = sync_hailonet_impl.release();
+    self->impl = sync_hailonet_impl->release();
 }
 
 static void gst_sync_hailonet_set_property(GObject *object, guint property_id, const GValue *value, GParamSpec *pspec)
@@ -961,8 +973,6 @@ static GstStateChangeReturn gst_sync_hailonet_change_state(GstElement *element,
             GST_CHECK(HAILO_SUCCESS == status, GST_STATE_CHANGE_FAILURE, element, RESOURCE, "Deactivating network group failed, status = %d\n", status);
         }
 
-        // Cleanup all of hailonet memory
-        sync_hailonet.reset();
         break;
     }
     default:
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gsthailonet.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gsthailonet.hpp
index 00b26125..682939d1 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gsthailonet.hpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gsthailonet.hpp
@@ -40,7 +40,7 @@ class HailoSyncNetImpl;
 struct GstSyncHailoNet
 {
     GstBin parent;
-    std::unique_ptr<HailoSyncNetImpl> impl;
+    HailoSyncNetImpl *impl;
 };
 
 struct GstSyncHailoNetClass
diff --git a/hailort/libhailort/bindings/python/CMakeLists.txt b/hailort/libhailort/bindings/python/CMakeLists.txt
deleted file mode 100644
index 14556745..00000000
--- a/hailort/libhailort/bindings/python/CMakeLists.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-cmake_minimum_required(VERSION 3.11.0)
-
-add_subdirectory(src)
diff --git a/hailort/libhailort/bindings/python/platform/hailo_platform/__init__.py b/hailort/libhailort/bindings/python/platform/hailo_platform/__init__.py
index 26cacf47..3282daa0 100644
--- a/hailort/libhailort/bindings/python/platform/hailo_platform/__init__.py
+++ b/hailort/libhailort/bindings/python/platform/hailo_platform/__init__.py
@@ -42,7 +42,7 @@ def _verify_pyhailort_lib_exists():
 
 _verify_pyhailort_lib_exists()
 
-__version__ = "4.18.0"
+__version__ = "4.19.0"
 if _pyhailort.__version__ != __version__:
     raise ImportError(
         f"_pyhailort version ({_pyhailort.__version__}) does not match pyhailort version ({__version__})"
diff --git a/hailort/libhailort/bindings/python/platform/hailo_platform/pyhailort/pyhailort.py b/hailort/libhailort/bindings/python/platform/hailo_platform/pyhailort/pyhailort.py
index 858dbdef..6f6bcf3a 100644
--- a/hailort/libhailort/bindings/python/platform/hailo_platform/pyhailort/pyhailort.py
+++ b/hailort/libhailort/bindings/python/platform/hailo_platform/pyhailort/pyhailort.py
@@ -1,4 +1,5 @@
 from enum import Enum, IntEnum
+from typing import List
 import signal
 import struct
 
@@ -762,6 +763,14 @@ def get_cache_info(self):
     def update_cache_offset(self, offset_delta_bytes):
         return self._configured_network.update_cache_offset(offset_delta_bytes)
 
+    def get_cache_ids(self) -> List[int]:
+        return self._configured_network.get_cache_ids()
+
+    def read_cache_buffer(self, cache_id: int) -> bytes:
+        return self._configured_network.read_cache_buffer(cache_id)
+
+    def write_cache_buffer(self, cache_id: int, buffer: bytes):
+        return self._configured_network.write_cache_buffer(cache_id, buffer)
 
 class EmptyContextManager(object):
     """An empty context manager that returns instead of activated network group when scheduler is enabled`."""
@@ -1513,7 +1522,7 @@ class HailoFormatFlags(_pyhailort.FormatFlags):
 
 SUPPORTED_PROTOCOL_VERSION = 2
 SUPPORTED_FW_MAJOR = 4
-SUPPORTED_FW_MINOR = 18
+SUPPORTED_FW_MINOR = 19
 SUPPORTED_FW_REVISION = 0
 
 MEGA_MULTIPLIER = 1000.0 * 1000.0
@@ -2832,6 +2841,9 @@ def configure(self):
             A :obj:`ConfiguredInferModel` should be used inside a context manager, and should not be passed to a different process.
         """
         with ExceptionWrapper():
+            if len(self.output_names) == 1 and self.output().format.order == FormatOrder.HAILO_NMS_WITH_BYTE_MASK:
+                raise HailoRTException("this model is not supported on async infer model API")
+
             configured_infer_model_cpp_obj = self._infer_model.configure()
             return ConfiguredInferModel(configured_infer_model_cpp_obj, self)
 
@@ -2926,7 +2938,6 @@ class for NMS transformation info.
         max_bboxes_per_class: int
         quant_info: _pyhailort.QuantInfo
         output_dtype: numpy.dtype = numpy.dtype('float32')
-        batch_size: int = 1
 
     @dataclass
     class NmsHailoTransformationInfo(NmsTransformationInfo):
@@ -2976,6 +2987,11 @@ def __init__(self, infer_stream, nms_info=None):
                 else:
                     self._nms_info = None
 
+            @staticmethod
+            def _validate_c_contiguous(buffer):
+                if not buffer.flags.c_contiguous:
+                    raise HailoRTException("Buffer must be C_CONTIGUOUS")
+
             def set_buffer(self, buffer):
                 """
                 Sets the edge's buffer to a new one.
@@ -2984,6 +3000,7 @@ def set_buffer(self, buffer):
                     buffer (numpy.array): The new buffer to set. The array's shape should match the edge's shape.
                 """
                 with ExceptionWrapper():
+                    self._validate_c_contiguous(buffer)
                     self._infer_stream.set_buffer(buffer)
 
                 self._buffer = buffer
@@ -2996,20 +3013,23 @@ def get_buffer(self, tf_format=False):
                     tf_format (bool, optional): Whether the output format is tf or hailo. Relevant for NMS outputs. The output
                         can be re-formatted into two formats (TF, Hailo) and the user through choosing the True/False function
                         parameter, can decide which format to receive.
+
                         For detection outputs:
                         TF format is an :obj:`numpy.array` with shape [number of classes, bounding box params, max bounding boxes per class]
-                        where the 3rd dimension (bounding box params) is of a fixed length of 5 (y_min, x_min, y_max, x_max, score).
+                        where the 2nd dimension (bounding box params) is of a fixed length of 5 (y_min, x_min, y_max, x_max, score).
 
-                        Hailo format is a list of detections per class: [[class_0 detections], [class_1 detections], ... [class_n-1 detections]]
-                        where each detection is an :obj:`numpy.array` with shape (y_min, x_min, y_max, x_max, score).
+                        Hailo format is a list of :obj:`numpy.array` where each array represents the detections for a specific class:
+                        [cls0_detections, cls1_detections, ...]. The length of the list is the number of classes.
+                        Each :obj:`numpy.array` shape is (number of detections, bounding box params) where the 2nd dimension
+                        (bounding box params) is of a fixed length of 5 (y_min, x_min, y_max, x_max, score).
 
                         For segmentation outputs:
                         TF format is an :obj:`numpy.array` with shape [1, image_size + number_of_params, max bounding boxes per class]
-                        where the 3rd dimension (image_size + number_of_params) is calculated as: mask (image_width - image_height) + (y_min, x_min, y_max, x_max, score, class_id).
+                        where the 2nd dimension (image_size + number_of_params) is calculated as: mask (image_width * image_height) + (y_min, x_min, y_max, x_max, score, class_id).
                         The mask is a binary mask of the segmentation output where the ROI (region of interest) is mapped to 1 and the background is mapped to 0.
 
                         Hailo format is a list of detections per class: [detecion0, detection1, ... detection_m]
-                        where each detection is an :obj:`HailoDetection`
+                        where each detection is an :obj:`HailoDetection`. The detections are sorted decreasingly by score.
 
                 Returns:
                     buffer (numpy.array): the buffer of the edge.
@@ -3029,14 +3049,14 @@ def get_buffer(self, tf_format=False):
                         buffer = HailoRTTransformUtils._output_raw_buffer_to_nms_with_byte_mask_format(
                             [self._buffer],
                             nms_info.number_of_classes,
-                            nms_info.batch_size,
+                            1,
                             nms_info.input_height,
                             nms_info.input_width,
                             nms_info.max_bboxes_per_class,
                             nms_info.output_dtype,
                             nms_info.use_tf_nms_format,
-                        )
-                    else:
+                        )[0]
+                    elif nms_info.format_order == FormatOrder.HAILO_NMS:
                         if nms_info.use_tf_nms_format:
                             nms_shape = [
                                 nms_info.number_of_classes,
@@ -3044,7 +3064,7 @@ def get_buffer(self, tf_format=False):
                                 nms_info.max_bboxes_per_class,
                             ]
 
-                            shape = [nms_info.batch_size, *nms_shape]
+                            shape = [1, *nms_shape]
                             flat_result = self._buffer.reshape(-1)
 
                             buffer = HailoRTTransformUtils.output_raw_buffer_to_nms_tf_format(
@@ -3052,15 +3072,14 @@ def get_buffer(self, tf_format=False):
                                 shape,
                                 nms_info.output_dtype,
                                 self._quantized_empty_bbox,
-                            )
+                            )[0]
                         else:
-                            buffer = HailoRTTransformUtils.output_raw_buffer_to_nms_format(
-                                [self._buffer],
+                            buffer = HailoRTTransformUtils.output_raw_buffer_to_nms_format_single_frame(
+                                self._buffer,
                                 nms_info.number_of_classes,
                             )
-
-                if tf_format:
-                    buffer = buffer[0]
+                    else:
+                        raise HailoRTException(f"Unsupported NMS format order: {nms_info.format_order}.")
 
                 return buffer
 
@@ -3206,12 +3225,17 @@ def create_bindings(self, input_buffers=None, output_buffers=None):
 
     def wait_for_async_ready(self, timeout_ms=1000, frames_count=1):
         """
-        Waits until the model is ready to launch a new asynchronous inference operation.
-        The readiness of the model is determined by the ability to push buffers to the asynchronous inference pipeline.
+        The readiness of the model to launch is determined by the ability to push buffers to the asynchronous inference pipeline.
+        If the model is ready, the method will return immediately.
+        If the model is not ready, the method will wait for the model to be ready.
 
-        args:
-            timeout_ms (int, optional): Amount of time to wait until the model is ready in milliseconds.
-            frames_count (int, optional): The count of buffers you intent to infer in the next request. Useful for batch inference. Default is 1
+        Args:
+            timeout_ms (int, optional): Max amount of time to wait until the model is ready in milliseconds.
+                Defaults to 1000
+            frames_count (int, optional): The number of buffers you intend to infer in the next request.
+                Useful for batch inference. Defaults to 1
+
+        Note: Calling this function with frames_count greater than :func:`ConfiguredInferModel.get_async_queue_size` will timeout.
 
         Raises:
             :class:`HailoRTTimeout` in case the model is not ready in the given timeout.
@@ -3254,6 +3278,10 @@ def run_async(self, bindings, callback=None):
             As a standard, callbacks should be executed as quickly as possible.
             In case of an error, the pipeline will be shut down.
 
+        Note:
+            To ensure the inference pipeline can handle new buffers, it is recommended to first call
+                 :func:`ConfiguredInferModel.wait_for_async_ready`.
+
         Returns:
             AsyncInferJob: The async inference job object.
 
@@ -3523,7 +3551,7 @@ def get_physical_devices_ids(self):
         with ExceptionWrapper():
             return self._vdevice.get_physical_devices_ids()
 
-    def create_infer_model(self, hef_source, network_name=""):
+    def create_infer_model(self, hef_source, name=""):
         """
         Creates the infer model from an hef.
 
@@ -3531,7 +3559,7 @@ def create_infer_model(self, hef_source, network_name=""):
             hef_source (str or bytes): The source from which the HEF object will be created. If the
                 source type is `str`, it is treated as a path to an hef file. If the source type is
                 `bytes`, it is treated as a buffer. Any other type will raise a ValueError.
-            network_name (str, optional): The string of the network name.
+            name (str, optional): The string of the model name.
 
         Returns:
             :obj:`InferModel`: The infer model object.
@@ -3551,9 +3579,9 @@ def create_infer_model(self, hef_source, network_name=""):
 
         with ExceptionWrapper():
             if type(hef_source) is bytes:
-                infer_model_cpp_obj = self._vdevice.create_infer_model_from_buffer(hef_source, network_name)
+                infer_model_cpp_obj = self._vdevice.create_infer_model_from_buffer(hef_source, name)
             else:
-                infer_model_cpp_obj = self._vdevice.create_infer_model_from_file(hef_source, network_name)
+                infer_model_cpp_obj = self._vdevice.create_infer_model_from_file(hef_source, name)
 
         infer_model = InferModel(infer_model_cpp_obj, hef_source)
         return infer_model
diff --git a/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_3_Inference_Single_Model_Tutorial.ipynb b/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_0_Async_Inference_Tutorial.ipynb
similarity index 76%
rename from hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_3_Inference_Single_Model_Tutorial.ipynb
rename to hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_0_Async_Inference_Tutorial.ipynb
index c6c5267f..806db9b9 100644
--- a/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_3_Inference_Single_Model_Tutorial.ipynb
+++ b/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_0_Async_Inference_Tutorial.ipynb
@@ -5,36 +5,36 @@
    "metadata": {},
    "source": [
     "\n",
-    "# Python inference tutorial\n",
+    "# Python Async Inference Tutorial - Single Model\n",
     "\n",
-    "This tutorial will describe how to use the Inference Process.\n",
+    "This tutorial describes how to run an inference process using `InferModel` (Async) API, which is the recommended option\n",
     "\n",
     "\n",
     "**Requirements:**\n",
     "\n",
     "* Run the notebook inside the Python virtual environment: ```source hailo_virtualenv/bin/activate```\n",
     "\n",
-    "It is recommended to use the command ``hailo tutorial`` (when inside the virtualenv) to open a Jupyter server that contains the tutorials."
+    "When inside the ```virtualenv```, use the command ``hailo tutorial`` to open a Jupyter server that contains the tutorials."
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Standalone hardware deployment\n",
-    "\n",
-    "The standalone flow allows direct access to the HW, developing applications directly on top of Hailo\n",
-    "core HW, using HailoRT. This way the Hailo hardware can be used without Tensorflow, and\n",
-    "even without the Hailo SDK (after the HEF is built).\n",
+    "A few things to note:\n",
     "\n",
+    "### HEF\n",
     "An HEF is Hailo's binary format for neural networks. The HEF files contain:\n",
     "\n",
     "* Target HW configuration\n",
     "* Weights\n",
     "* Metadata for HailoRT (e.g. input/output scaling)\n",
     "\n",
-    "First create the desired target object.\n",
-    "In this example the Hailo-8 PCIe interface is used."
+    "### Model Scheduler \n",
+    "The Model Scheduler is an HailoRT component that enhances and simplifies the usage\n",
+    "of the same Hailo device by multiple networks. The responsibility for activating/deactivating the network\n",
+    "groups is done **automatically** without user application intervention.\n",
+    "Enabling the Model Scheduler is best practice, but it is not necessary for single model inference."
    ]
   },
   {
diff --git a/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_4_Async_Inference_Multiple_Models_Tutorial.ipynb b/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_1_Async_Inference_Multiple_Models_Tutorial.ipynb
similarity index 77%
rename from hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_4_Async_Inference_Multiple_Models_Tutorial.ipynb
rename to hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_1_Async_Inference_Multiple_Models_Tutorial.ipynb
index cf6106e3..3922d631 100644
--- a/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_4_Async_Inference_Multiple_Models_Tutorial.ipynb
+++ b/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_1_Async_Inference_Multiple_Models_Tutorial.ipynb
@@ -5,36 +5,17 @@
    "metadata": {},
    "source": [
     "\n",
-    "# Python Async Inference Tutorial - Multiple Models with Model Scheduler\n",
+    "# Python Async Inference Tutorial - Multiple Models\n",
     "\n",
-    "This tutorial will describe how to run an inference process.\n",
+    "This tutorial describes how to run an inference process with multiple models using `InferModel` (Async) API, which is the recommended option\n",
     "\n",
     "\n",
     "**Requirements:**\n",
     "\n",
     "* Run the notebook inside the Python virtual environment: ```source hailo_virtualenv/bin/activate```\n",
+    "* Enable HailoRT Multi-Process Service before running inference. For instructions, see [Multi Process Service](https://hailo.ai/developer-zone/documentation/hailort/latest/?sp_referrer=inference/inference.html#multi-process-service).\n",
     "\n",
-    "It is recommended to use the command ``hailo tutorial`` (when inside the ```virtualenv```) to open a Jupyter server that contains the tutorials."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Running Inference using HailoRT\n",
-    "\n",
-    "In this example we will use the Model Scheduler to run inference on multiple models.\n",
-    "Each model is represented by an HEF which is built using the Hailo Dataflow Compiler.\n",
-    "An HEF is Hailo's binary format for neural networks. The HEF files contain:\n",
-    "\n",
-    "* Target HW configuration\n",
-    "* Weights\n",
-    "* Metadata for HailoRT (e.g. input/output scaling)\n",
-    "\n",
-    "The Model Scheduler is an HailoRT component that comes to enhance and simplify the usage\n",
-    "of the same Hailo device by multiple networks. The responsibility for activating/deactivating the network\n",
-    "groups is now under HailoRT, and done **automatically** without user application intervention.\n",
-    "In order to use the Model Scheduler, create the VDevice with scheduler enabled, configure all models to the device, and start inference on all models:\n"
+    "When inside the ```virtualenv```, use the command ``hailo tutorial`` to open a Jupyter server that contains the tutorials."
    ]
   },
   {
@@ -67,12 +48,12 @@
     "number_of_frames = 4\n",
     "timeout_ms = 10000\n",
     "\n",
-    "def infer(multi_process_service):\n",
+    "def infer(should_use_multi_process_service):\n",
     "    # Create a VDevice\n",
     "    params = VDevice.create_params()\n",
     "    params.scheduling_algorithm = HailoSchedulingAlgorithm.ROUND_ROBIN\n",
     "    params.group_id = \"SHARED\" \n",
-    "    if multi_process_service:\n",
+    "    if should_use_multi_process_service:\n",
     "        params.multi_process_service = multi_process_service\n",
     "    \n",
     "    with VDevice(params) as vdevice:\n",
@@ -112,9 +93,11 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Running multiple models concurrently\n",
+    "### Running Multiple Models Concurrently\n",
+    "\n",
+    "The models can be run concurrently using either multiple `Thread` objects or multiple `Process` objects\n",
     "\n",
-    "The models can be run concurrently using either multiple `Thread` objects or multiple `Process` objects"
+    "* Using `Thread`s does not require activiating the `hailort_service`"
    ]
   },
   {
diff --git a/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_0_Inference_Tutorial.ipynb b/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_2_Infer_Pipeline_Inference_Tutorial.ipynb
similarity index 86%
rename from hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_0_Inference_Tutorial.ipynb
rename to hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_2_Infer_Pipeline_Inference_Tutorial.ipynb
index a6776914..95c607d8 100644
--- a/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_0_Inference_Tutorial.ipynb
+++ b/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_2_Infer_Pipeline_Inference_Tutorial.ipynb
@@ -5,36 +5,16 @@
    "metadata": {},
    "source": [
     "\n",
-    "# Python inference tutorial\n",
+    "# Python Inference Tutorial - Single Model\n",
     "\n",
-    "This tutorial will describe how to use the Inference Process.\n",
+    "This tutorial describes how to run an inference process using `InferPipeline` API (sync API), which is an alternative to the recommended Async API\n",
     "\n",
     "\n",
     "**Requirements:**\n",
     "\n",
     "* Run the notebook inside the Python virtual environment: ```source hailo_virtualenv/bin/activate```\n",
     "\n",
-    "It is recommended to use the command ``hailo tutorial`` (when inside the virtualenv) to open a Jupyter server that contains the tutorials."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Standalone hardware deployment\n",
-    "\n",
-    "The standalone flow allows direct access to the HW, developing applications directly on top of Hailo\n",
-    "core HW, using HailoRT. This way the Hailo hardware can be used without Tensorflow, and\n",
-    "even without the Hailo SDK (after the HEF is built).\n",
-    "\n",
-    "An HEF is Hailo's binary format for neural networks. The HEF files contain:\n",
-    "\n",
-    "* Target HW configuration\n",
-    "* Weights\n",
-    "* Metadata for HailoRT (e.g. input/output scaling)\n",
-    "\n",
-    "First create the desired target object.\n",
-    "In this example the Hailo-8 PCIe interface is used."
+    "When inside the ```virtualenv```, use the command ``hailo tutorial`` to open a Jupyter server that contains the tutorials."
    ]
   },
   {
diff --git a/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_2_Inference_Tutorial_Multi_Process_Service.ipynb b/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_3_Infer_Pipeline_Inference_Multiple_Models_Tutorial.ipynb
similarity index 78%
rename from hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_2_Inference_Tutorial_Multi_Process_Service.ipynb
rename to hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_3_Infer_Pipeline_Inference_Multiple_Models_Tutorial.ipynb
index 0f450ef8..6f21a89b 100644
--- a/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_2_Inference_Tutorial_Multi_Process_Service.ipynb
+++ b/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_3_Infer_Pipeline_Inference_Multiple_Models_Tutorial.ipynb
@@ -7,7 +7,7 @@
     "\n",
     "# Python Inference Tutorial - Multi Process Service and Model Scheduler\n",
     "\n",
-    "This tutorial describes how to run an inference process using the multi-process service.\n",
+    "This tutorial describes how to run an inference process using `InferPipeline` API (sync API), which is an alternative to the recommended Async API with, multi-process service and the Model Scheduler\n",
     "\n",
     "\n",
     "**Requirements:**\n",
@@ -15,27 +15,7 @@
     "* Enable HailoRT Multi-Process Service before running inference. For instructions, see [Multi Process Service](https://hailo.ai/developer-zone/documentation/hailort/latest/?sp_referrer=inference/inference.html#multi-process-service).\n",
     "* Run the notebook inside the Python virtual environment: ```source hailo_virtualenv/bin/activate```\n",
     "\n",
-    "It is recommended to use the command ``hailo tutorial`` (when inside the ```virtualenv```) to open a Jupyter server that contains the tutorials."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Running Inference using HailoRT\n",
-    "\n",
-    "In this example we will use the Model Scheduler to run inference on multiple models.\n",
-    "Each model is represented by an HEF which is built using the Hailo Dataflow Compiler.\n",
-    "An HEF is Hailo's binary format for neural networks. The HEF files contain:\n",
-    "\n",
-    "* Target HW configuration\n",
-    "* Weights\n",
-    "* Metadata for HailoRT (e.g. input/output scaling)\n",
-    "\n",
-    "The Model Scheduler is an HailoRT component that comes to enhance and simplify the usage\n",
-    "of the same Hailo device by multiple networks. The responsibility for activating/deactivating the network\n",
-    "groups is now under HailoRT, and done **automatically** without user application intervention.\n",
-    "In order to use the Model Scheduler, create the VDevice with scheduler enabled, configure all models to the device, and start inference on all models:"
+    "When inside the ```virtualenv```, use the command ``hailo tutorial`` to open a Jupyter server that contains the tutorials."
    ]
   },
   {
diff --git a/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_1_Power_Measurement_Tutorial.ipynb b/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_4_Power_measurement_Tutorial.ipynb
similarity index 82%
rename from hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_1_Power_Measurement_Tutorial.ipynb
rename to hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_4_Power_measurement_Tutorial.ipynb
index 73be8adf..3b3358a1 100644
--- a/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_1_Power_Measurement_Tutorial.ipynb
+++ b/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_4_Power_measurement_Tutorial.ipynb
@@ -4,11 +4,11 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Python power measurement tutorial\n",
+    "# Python Power Measurement Tutorial\n",
     "\n",
-    "This tutorial will show how to perform a power measurement on the chip.\n",
+    "This tutorial shows how to perform a power measurement on the chip.\n",
     "\n",
-    "The Hailo chip supports power measurement which is done via the control protocol.\n",
+    "The Hailo-8 chip supports power measurement which is done via the control protocol, if an INA231 is assembled on the board.\n",
     "\n",
     "**Requirements:**\n",
     "\n",
@@ -18,14 +18,14 @@
     "\n",
     "* These examples should run in a different process than the one that performs the actual inference.\n",
     "\n",
-    "It is recommended to use the command ``hailo tutorial`` (when inside the virtualenv) to open a Jupyter server that contains the tutorials."
+    "When inside the ```virtualenv```, run ``hailo tutorial`` to open a Jupyter server that contains the tutorials."
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Single power measurement"
+    "## Single Power Measurement"
    ]
   },
   {
@@ -60,14 +60,11 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "When using the ``power_measurement()`` function with no parameters, the function tries to detect which board is connected (evaluation board, M.2 or mPCIe) and determine the DVM accordingly (at the moment only the mentioned boards are supported). \n",
+    "When using the ``power_measurement()`` function with no parameters, the function tries to detect which Hailo-8 board is connected (evaluation board, M.2 or mPCIe) and determine the DVM accordingly (at the moment only the boards referred to in this section have INA231 assembelled and are supported). \n",
     "\n",
     "The parameter ``dvm`` (of type ``DvmTypes``) defines which DVM will be measured. The user can choose a specific DVM or choose the default DVM. The meaning of the default DVM changes according to the board or module in use. \n",
     "\n",
-    "The default for the evaluation board is the sum of three DVMs: ``DvmTypes.VDD_CORE``, ``DvmTypes.MIPI_AVDD`` and ``DvmTypes.AVDD_H``. The sum of these three DVMs approximates of the total power consumption of the chip in PCIe setups. Only power can be measured using this default option, as voltage and current can't be summed up this way. \n",
-    "\n",
-    "The default for platforms supporting current monitoring, such as M.2 and mPCIe modules, is ``DvmTypes.OVERCURRENT_PROTECTION``, which measures the power consumption of the whole module. \n",
-    "\n",
+    "The default for the evaluation board is the sum of three DVMs: ``DvmTypes.VDD_CORE``, ``DvmTypes.MIPI_AVDD`` and ``DvmTypes.AVDD_H``. The sum of these three DVMs approximates of the total power consumption of the chip in PCIe setups. Only power can be measured using this default option, as voltage and current cannot be totaled (or calculated) this way.\n", "\n", "The default for platforms supporting current monitoring, such as M.2 and mPCIe modules, is ``DvmTypes.OVERCURRENT_PROTECTION``, which measures the power consumption of the whole module. \n", "\n",
     "See the API documentation for further details about the supported DVMs and measurement types.\n"
    ]
   },
@@ -122,7 +119,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Clear old samples and statistics (min, max, average) each time the measurement is taken from the chip."
+    "Clear the old samples and statistics (min, max, average) each time the measurement is taken from the chip."
    ]
   },
   {
diff --git a/hailort/libhailort/bindings/python/platform/setup.py b/hailort/libhailort/bindings/python/platform/setup.py
index 9cd3ceb3..c567c949 100644
--- a/hailort/libhailort/bindings/python/platform/setup.py
+++ b/hailort/libhailort/bindings/python/platform/setup.py
@@ -1,102 +1,113 @@
 """
-builds hailo_platform python package and its C++ dependencies using cmake
+Builds hailo_platform python package and its C++ dependencies using cmake
 """
-import platform
+
+import re
 import os
 import subprocess
 import sys
+import shutil
+import glob
 
 from pathlib import Path
 from setuptools import setup, Extension, find_packages
 from setuptools.command.build_ext import build_ext as orig_build_ext
+from setuptools.command.install_lib import install_lib as orig_install_lib
 from wheel.bdist_wheel import bdist_wheel as orig_bdist_wheel
 
+_build_type = os.environ.get("CMAKE_BUILD_TYPE", "Release")
+_plat_name = ""
+
+
+class install_lib(orig_install_lib):
+    def install(self):
+        """
+        When cross compiling, the extension is not automatically copied into the install dir, and therefore it needs to be done manually.
+        """
+        outfiles = super().install()
+
+        arch = re.sub(_plat_name, "linux[_-]", "")  # remove linux prefix as the extension architecture does not include it
+        extension = "pyd" if os.name == "nt" else "so"
+        py_version = f"{sys.version_info.major}{sys.version_info.minor}"
+        lib_regex = f"_pyhailort*{py_version}*{arch}*.{extension}"
+        dst = os.path.join(self.install_dir, "hailo_platform", "pyhailort")
+        already_copied = len(glob.glob(f"{dst}/{lib_regex}")) > 0
+        if not already_copied:
+            # Windows (nt) adds a <build_type> dir after the build dir, so if DCMAKE_LIBRARY_OUTPUT_DIRECTORY=build,
+            # Windows will put the library in build/<build_type> (e.g. build/Release) while Linux will put it where it was asked
+            build_dir = f"build/{_build_type}" if os.name == "nt" else "build"
+            lib = glob.glob(os.path.join(build_dir, lib_regex))[0]
+            shutil.copy2(lib, dst)
 
-_plat_name = None
-def _fix_plat_name(s):
-    # plat_name does not require the "linux_" prefix
-    return s.replace(platform.processor(), _plat_name.replace("linux_", ""))
+        return outfiles
 
 
 class bdist_wheel(orig_bdist_wheel):
-    """makes the wheel platform-dependant so it can be based on the _pyhailort architecture"""
     def finalize_options(self):
-        # Save the plat_name option and pass it along to build_ext which will use it to change the processor in the
-        # extension name.
-        # All other paths will still use the naive processor, but that's ok, since the only thing that is packed into 
-        # the wheel is the actual shared library, so only its name is relevant. Fixing all paths will require tweaking
-        # build_py, install, install_lib commands or fixing this somehow all accross setuptools
-        global _plat_name
-        _plat_name = self.plat_name
-        orig_bdist_wheel.finalize_options(self)
+        """
+        Force the wheel name to include the platform name based on the extension module.
+        """
+        super().finalize_options()
         self.root_is_pure = False
 
+        global _plat_name
+        _plat_name = self.plat_name  # update plat_name to allow access in install_lib
+
 
 class build_ext(orig_build_ext):
     OPTIONAL_CMAKE_ENV_VARIABLES = [
+        "CMAKE_BUILD_TYPE",
+        "CMAKE_GENERATOR",
         "CMAKE_TOOLCHAIN_FILE",
         "HAILORT_INCLUDE_DIR",
         "LIBHAILORT_PATH",
+        "PYTHON_EXECUTABLE",
         "PYTHON_INCLUDE_DIRS",
-        "CMAKE_GENERATOR",
         "PYTHON_LIBRARY",
+        "PYBIND11_FINDPYTHON",
     ]
 
-    """defines a cmake command that will be called from the python build process"""
     def run(self):
-        cfg = 'Debug' if self.debug else 'Release'
-
-        build_args = f"--config {cfg}"
-        build_directory = os.path.abspath(self.build_temp)
-        cmake_list_dir = Path(__file__).absolute().parents[1] / "src"
+        """
+        Defines a cmake command that will be called from the python build process.
+        The cmake command will build the C++ extension (_pyhailort) and install it.
+        Multiple CMake variables can be passed as environment variables to control the target library.
+        """
+        build_args = f"--config {_build_type} --target install"
         python_version = f"{sys.version_info.major}.{sys.version_info.minor}"
 
+        current_dir = Path(__file__).parent.absolute()
+        cmake_list_dir = current_dir.parent / "src"
+        build_dir = current_dir / "build"
+
         cmake_args = [
-            f'-DCMAKE_BUILD_TYPE={cfg}',
-            f'-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={build_directory}',
-            f'-DPYBIND11_PYTHON_VERSION={python_version}',
-            f'-DPYTHON_EXECUTABLE={sys.executable}',
+            f"-B{build_dir}",
+            f"-DCMAKE_BUILD_TYPE={_build_type}",
+            f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={build_dir}",
+            f'-DPYBIND11_PYTHON_VERSION="{python_version}"',
         ]
 
         for env_var in self.OPTIONAL_CMAKE_ENV_VARIABLES:
             if env_var in os.environ:
-                if env_var == "CMAKE_GENERATOR":
-                    cmake_args.append(f'-G "{os.environ[env_var]}"')
-                else:
-                    cmake_args.append(f"-D{env_var}={os.environ[env_var]}")
+                cmake_args.append(f'-D{env_var}="{os.environ[env_var]}"')
 
-        if not os.path.exists(self.build_temp):
-            os.makedirs(self.build_temp)
+        if not build_dir.exists():
+            os.makedirs(build_dir)
 
         subprocess.run(
             f"cmake {cmake_list_dir} {' '.join(cmake_args)}",
-            cwd=self.build_temp,
+            cwd=cmake_list_dir,
             shell=True,
-            check=True
+            check=True,
         )
 
         subprocess.run(
             f"cmake --build . {build_args}",
-            cwd=self.build_temp,
+            cwd=build_dir,
             shell=True,
             check=True,
         )
 
-        for ext in self.extensions:
-            ext_filename = self.get_ext_filename(ext.name)
-            if platform.system() == "Linux" and _plat_name:
-                ext_filename = _fix_plat_name(ext_filename)
-
-            dst = Path(self.get_ext_fullpath(ext.name)).resolve().parent / "hailo_platform/pyhailort/"
-
-            build_temp = Path(self.build_temp).resolve()
-            if os.name == "nt":
-                src = build_temp / cfg / ext_filename
-            else:
-                src = build_temp / ext_filename
-
-            self.copy_file(src, dst)
-
 
 if __name__ == "__main__":
     setup(
@@ -104,7 +115,8 @@ def run(self):
         author_email="contact@hailo.ai",
         cmdclass={
             "bdist_wheel": bdist_wheel,
-            "build_ext": build_ext, # Build the C++ extension (_pyhailort) using cmake
+            "build_ext": build_ext,  # Build the C++ extension (_pyhailort) using cmake
+            "install_lib": install_lib,  # Copy the extension to the install dir
         },
         description="HailoRT",
         entry_points={
@@ -112,8 +124,8 @@ def run(self):
                 "hailo=hailo_platform.tools.hailocli.main:main",
             ]
         },
-        ext_modules= [
-            Extension('_pyhailort', sources=[]),
+        ext_modules=[
+            Extension("_pyhailort", sources=[]),
         ],
         install_requires=[
             "argcomplete",
@@ -122,14 +134,13 @@ def run(self):
             "netaddr",
             "netifaces",
             "verboselogs",
-            # Pinned versions
-            "numpy==1.23.3",
+            "numpy<2",
         ],
         name="hailort",
         package_data={
             "hailo_platform": [
                 "../hailo_tutorials/notebooks/*",
-                "../hailo_tutorials/hefs/*"
+                "../hailo_tutorials/hefs/*",
             ]
         },
         packages=find_packages(),
@@ -138,6 +149,6 @@ def run(self):
             "linux_aarch64",
         ],
         url="https://hailo.ai/",
-        version="4.18.0",
+        version="4.19.0",
         zip_safe=False,
     )
diff --git a/hailort/libhailort/bindings/python/src/CMakeLists.txt b/hailort/libhailort/bindings/python/src/CMakeLists.txt
index 1b5706c9..587b11b1 100644
--- a/hailort/libhailort/bindings/python/src/CMakeLists.txt
+++ b/hailort/libhailort/bindings/python/src/CMakeLists.txt
@@ -1,44 +1,33 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 project(pyhailort)
 
-get_filename_component(HAILORT_PROJECT_SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/../../../../../" ABSOLUTE)
-get_filename_component(HAILORT_COMMON_DIR "${HAILORT_PROJECT_SOURCE_DIR}/hailort/" ABSOLUTE)
-get_filename_component(PYHAILORT_DIR "${CMAKE_CURRENT_LIST_DIR}" ABSOLUTE)
+if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.9")
+    set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
+endif()
 
-set(HAILO_EXTERNAL_DIR ${HAILORT_COMMON_DIR}/external)
+set(HAILORT_COMMON_DIR "${CMAKE_CURRENT_LIST_DIR}/../../../../../hailort/")
 set(HAILO_EXTERNALS_CMAKE_SCRIPTS ${HAILORT_COMMON_DIR}/cmake/external/)
+set(HAILO_EXTERNAL_DIR ${HAILORT_COMMON_DIR}/external) # used in pybind11.cmake
+
+if(UNIX)
+    # PYTHON_MODULE_EXTENSION does not take the CMAKE_SYSTEM_PROCESSOR into account so it needs to be added manually
+    string(REGEX REPLACE "\\." "" PYBIND11_PYTHON_VERSION_NO_DOT ${PYBIND11_PYTHON_VERSION})
+    set(PYTHON_MODULE_EXTENSION ".cpython-${PYBIND11_PYTHON_VERSION_NO_DOT}-${CMAKE_SYSTEM_PROCESSOR}-linux-gnu.so")
+endif()
+
+get_filename_component(
+    HAILO_PYHAILORT_DIR
+    "${CMAKE_CURRENT_LIST_DIR}/../platform/hailo_platform/pyhailort/"
+    ABSOLUTE
+)
 
 option(LIBHAILORT_PATH "Path to libhailort to link against" "")
 option(HAILORT_INCLUDE_DIR "Path to include dir of libhailort" "")
 
-include(ExternalProject)
 include(GNUInstallDirs)
 include(${HAILO_EXTERNALS_CMAKE_SCRIPTS}/pybind11.cmake)
 include_directories(${HAILORT_COMMON_DIR})
 
-FUNCTION(exclude_archive_libs_symbols target) # should be same as in common_compiler_options.cmake
-    if(WIN32)
-        # TODO: check if there are required actions for Windows
-    elseif(UNIX)
-        get_property(TEMP_LINK_FLAGS TARGET ${target} PROPERTY LINK_FLAGS)
-        set(TEMP_LINK_FLAGS "${TEMP_LINK_FLAGS} -Wl,--exclude-libs=ALL")
-        set_property(TARGET ${target} PROPERTY LINK_FLAGS ${TEMP_LINK_FLAGS})
-    endif()
-ENDFUNCTION()
-
-if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-    if(NOT DEFINED PYBIND11_PYTHON_VERSION)
-        message(FATAL_ERROR "PYBIND11_PYTHON_VERSION is not defined. To build _pyhailort, pass python version")
-    endif()
-    string(REPLACE "." "" dpython ${PYBIND11_PYTHON_VERSION}) # E.g "3.5" -> "35"
-    if(${dpython} LESS "38")
-        set(m_flag "m")
-    else()
-        set(m_flag "")
-    endif()
-    set(PYTHON_MODULE_EXTENSION ".cpython-${dpython}${m_flag}-${CMAKE_SYSTEM_PROCESSOR}-linux-gnu.so")
-endif()
-
 pybind11_add_module(_pyhailort
     pyhailort.cpp
     device_api.cpp
@@ -56,33 +45,25 @@ set_target_properties(_pyhailort PROPERTIES
     CXX_EXTENSIONS            NO
     C_VISIBILITY_PRESET       hidden
     CXX_VISIBILITY_PRESET     hidden
-    # VISIBILITY_INLINES_HIDDEN YES
 )
 
-# allow user to inject a specific libhailort (and headers) to link against.
-# use case: cross compilation
 if(LIBHAILORT_PATH AND HAILORT_INCLUDE_DIR)
-    message(STATUS "LIBHAILORT_PATH is set. Will link against given libhailort: ${LIBHAILORT_PATH}")
-    message(STATUS "HAILORT_INCLUDE_DIR is set. Will include given include dir: ${HAILORT_INCLUDE_DIR}")
+    message(STATUS "Will link against given libhailort: ${LIBHAILORT_PATH}")
+    message(STATUS "Will include given include dir:     ${HAILORT_INCLUDE_DIR}")
 
-    # the library to link against
     target_link_libraries(_pyhailort PRIVATE ${LIBHAILORT_PATH})
-
-    # the include dir
     include_directories(${HAILORT_INCLUDE_DIR})
-
-    # since we are linking against an injected libhailort, we need to define the version
     target_compile_definitions(
         _pyhailort
         PUBLIC
         HAILORT_MAJOR_VERSION=4
-        HAILORT_MINOR_VERSION=18
+        HAILORT_MINOR_VERSION=19
         HAILORT_REVISION_VERSION=0
     )
 elseif(LIBHAILORT_PATH OR HAILORT_INCLUDE_DIR)
-    message(FATAL_ERROR "Both LIBHAILORT_PATH and HAILORT_INCLUDE_DIR must be defined or none of them")
+  message(FATAL_ERROR "Both LIBHAILORT_PATH and HAILORT_INCLUDE_DIR must be defined or none of them. LIBHAILORT_PATH: '${LIBHAILORT_PATH}', HAILORT_INCLUDE_DIR: '${HAILORT_INCLUDE_DIR}'")
 else()
-    find_package(HailoRT 4.18.0 EXACT REQUIRED)
+    find_package(HailoRT 4.19.0 EXACT REQUIRED)
     target_link_libraries(_pyhailort PRIVATE HailoRT::libhailort)
 endif()
 
@@ -91,23 +72,15 @@ if(WIN32)
     target_compile_options(_pyhailort PRIVATE
         /DWIN32_LEAN_AND_MEAN
         /DNOMINMAX                  # NOMINMAX is required in order to play nice with std::min/std::max (otherwise Windows.h defines it's own)
-        /wd4201 /wd4251
+        /wd4201                     # Anonymous union/struct
+        /wd4251                     # C++ ABI with STL
     )
+elseif(NOT CMAKE_SYSTEM_NAME STREQUAL Android)
+    # TODO: HRT-14770 fix android build
+    target_link_libraries(_pyhailort PRIVATE rt)
 endif()
 
-target_compile_options(_pyhailort PRIVATE ${HAILORT_COMPILE_OPTIONS})
-exclude_archive_libs_symbols(_pyhailort)
-
-# TODO (HRT-8637): change this hard-coded path
-set(HAILO_PYHAILORT_TARGET_DIR ${CMAKE_CURRENT_LIST_DIR}/../platform/hailo_platform/pyhailort/)
-
-add_custom_target(pyhailort_venv ALL
-    COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_FILE:_pyhailort> ${HAILO_PYHAILORT_TARGET_DIR}
+install(
+    TARGETS _pyhailort
+    LIBRARY DESTINATION ${HAILO_PYHAILORT_DIR}
 )
-add_dependencies(pyhailort_venv _pyhailort)
-
-install(TARGETS _pyhailort
-    LIBRARY DESTINATION ${HAILO_PYHAILORT_TARGET_DIR}
-    CONFIGURATIONS Release
-)
-
diff --git a/hailort/libhailort/bindings/python/src/__init__.py b/hailort/libhailort/bindings/python/src/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/hailort/libhailort/bindings/python/src/infer_model_api.cpp b/hailort/libhailort/bindings/python/src/infer_model_api.cpp
index 6719db7f..babd51bc 100644
--- a/hailort/libhailort/bindings/python/src/infer_model_api.cpp
+++ b/hailort/libhailort/bindings/python/src/infer_model_api.cpp
@@ -21,6 +21,7 @@
 #include <pybind11/functional.h>    // handle std::function
 #include <pybind11/chrono.h>        // handle std::chrono::milliseconds
 
+
 using namespace hailort;
 
 void InferModelWrapper::set_batch_size(uint16_t batch_size)
@@ -203,7 +204,7 @@ void ConfiguredInferModelWrapper::run(
     ConfiguredInferModelBindingsWrapper bindings,
     std::chrono::milliseconds timeout)
 {
-    auto status = m_configured_infer_model.run(bindings.get(), timeout);
+    auto status = m_configured_infer_model.run(bindings.release(), timeout);
     VALIDATE_STATUS(status);
 }
 
@@ -259,7 +260,7 @@ AsyncInferJobWrapper ConfiguredInferModelWrapper::run_async(
 
     std::vector<ConfiguredInferModel::Bindings> bindings;
     std::transform(user_bindings.begin(), user_bindings.end(), std::back_inserter(bindings),
-        [](ConfiguredInferModelBindingsWrapper &wrapper) { return wrapper.get(); });
+        [](ConfiguredInferModelBindingsWrapper &wrapper) { return wrapper.release(); });
 
     std::vector<void*> user_output_buffers;
     std::vector<BufferPtr> aligned_output_buffers;
@@ -341,10 +342,9 @@ void ConfiguredInferModelWrapper::execute_callbacks()
     while (true)
     {
         std::unique_lock<std::mutex> lock(m_queue_mutex);
-        m_cv.wait_for(lock, std::chrono::minutes(1), [this](){ return !m_callbacks_queue->empty() || !m_is_alive.load(); });
+        auto ret = m_cv.wait_for(lock, std::chrono::minutes(1), [this](){ return !m_callbacks_queue->empty() || !m_is_alive.load(); });
 
-        if (!m_is_alive.load())
-        {
+        if (!m_is_alive.load()) {
             while (!m_callbacks_queue->empty()) {
                 auto cb_status_pair = m_callbacks_queue->front();
                 auto &cb = cb_status_pair.first;
@@ -355,6 +355,10 @@ void ConfiguredInferModelWrapper::execute_callbacks()
             return;
         }
 
+        if (!ret) {
+            continue;
+        }
+
         auto cb_status_pair = m_callbacks_queue->front();
 
         m_callbacks_queue->pop();
diff --git a/hailort/libhailort/bindings/python/src/infer_model_api.hpp b/hailort/libhailort/bindings/python/src/infer_model_api.hpp
index e444a255..dce57822 100644
--- a/hailort/libhailort/bindings/python/src/infer_model_api.hpp
+++ b/hailort/libhailort/bindings/python/src/infer_model_api.hpp
@@ -124,13 +124,13 @@ class ConfiguredInferModelWrapper final
 class ConfiguredInferModelBindingsWrapper final
 {
 public:
-    ConfiguredInferModelBindingsWrapper(ConfiguredInferModel::Bindings&& bindings, std::vector<std::string> output_names) :
+    ConfiguredInferModelBindingsWrapper(ConfiguredInferModel::Bindings &&bindings, std::vector<std::string> output_names) :
         m_bindings(std::move(bindings)),
         m_output_names(output_names)
     {}
     ConfiguredInferModelBindingsInferStreamWrapper input(const std::string &name);
     ConfiguredInferModelBindingsInferStreamWrapper output(const std::string &name);
-    ConfiguredInferModel::Bindings get() { return m_bindings; }
+    ConfiguredInferModel::Bindings &&release() { return std::move(m_bindings); }
 
     static void bind(py::module &m);
 
diff --git a/hailort/libhailort/bindings/python/src/network_group_api.cpp b/hailort/libhailort/bindings/python/src/network_group_api.cpp
index 1fb66e8b..2de1a7c1 100644
--- a/hailort/libhailort/bindings/python/src/network_group_api.cpp
+++ b/hailort/libhailort/bindings/python/src/network_group_api.cpp
@@ -29,6 +29,9 @@ void ConfiguredNetworkGroupWrapper::bind(py::module &m)
         .def("init_cache", &ConfiguredNetworkGroupWrapper::init_cache)
         .def("get_cache_info", &ConfiguredNetworkGroupWrapper::get_cache_info)
         .def("update_cache_offset", &ConfiguredNetworkGroupWrapper::update_cache_offset)
+        .def("get_cache_ids", &ConfiguredNetworkGroupWrapper::get_cache_ids)
+        .def("read_cache_buffer", &ConfiguredNetworkGroupWrapper::read_cache_buffer)
+        .def("write_cache_buffer", &ConfiguredNetworkGroupWrapper::write_cache_buffer)
         .def("get_networks_names", &ConfiguredNetworkGroupWrapper::get_networks_names)
         .def("get_sorted_output_names", &ConfiguredNetworkGroupWrapper::get_sorted_output_names)
         .def("get_input_vstream_infos", &ConfiguredNetworkGroupWrapper::get_input_vstream_infos)
diff --git a/hailort/libhailort/bindings/python/src/network_group_api.hpp b/hailort/libhailort/bindings/python/src/network_group_api.hpp
index 0d17d544..f98fb802 100644
--- a/hailort/libhailort/bindings/python/src/network_group_api.hpp
+++ b/hailort/libhailort/bindings/python/src/network_group_api.hpp
@@ -156,6 +156,28 @@ class ConfiguredNetworkGroupWrapper final {
         VALIDATE_STATUS(status);
     }
 
+    auto get_cache_ids()
+    {
+        auto ids = get().get_cache_ids();
+        VALIDATE_EXPECTED(ids);
+        return ids;
+    }
+
+    py::bytes read_cache_buffer(uint32_t cache_id)
+    {
+        auto buffer = get().read_cache_buffer(cache_id);
+        VALIDATE_EXPECTED(buffer);
+        return py::bytes(buffer->as_pointer<char>(), buffer->size());
+    }
+
+    void write_cache_buffer(uint32_t cache_id, py::bytes buffer)
+    {
+        auto buffer_str = std::string(buffer);
+        auto buffer_view = MemoryView::create_const(buffer_str.data(), buffer_str.size());
+        auto status = get().write_cache_buffer(cache_id, buffer_view);
+        VALIDATE_STATUS(status);
+    }
+
     auto get_networks_names()
     {
         auto network_infos = get().get_network_infos();
diff --git a/hailort/libhailort/bindings/python/src/pyhailort.cpp b/hailort/libhailort/bindings/python/src/pyhailort.cpp
index 03d0a473..3691b979 100644
--- a/hailort/libhailort/bindings/python/src/pyhailort.cpp
+++ b/hailort/libhailort/bindings/python/src/pyhailort.cpp
@@ -563,7 +563,6 @@ PYBIND11_MODULE(_pyhailort, m) {
     py::enum_<hailo_format_flags_t>(m, "FormatFlags", py::arithmetic())
         .value("NONE", HAILO_FORMAT_FLAGS_NONE)
         .value("TRANSPOSED", HAILO_FORMAT_FLAGS_TRANSPOSED)
-        .value("HOST_ARGMAX", HAILO_FORMAT_FLAGS_HOST_ARGMAX)
         ;
 
     py::enum_<hailo_stream_transform_mode_t>(m, "TransformMode")
diff --git a/hailort/libhailort/bindings/python/src/vdevice_api.cpp b/hailort/libhailort/bindings/python/src/vdevice_api.cpp
index fc93cfa7..6dfb8076 100644
--- a/hailort/libhailort/bindings/python/src/vdevice_api.cpp
+++ b/hailort/libhailort/bindings/python/src/vdevice_api.cpp
@@ -11,15 +11,15 @@
 
 using namespace hailort;
 
-InferModelWrapper VDeviceWrapper::create_infer_model_from_file(const std::string &hef_path, const std::string &network_name)
+InferModelWrapper VDeviceWrapper::create_infer_model_from_file(const std::string &hef_path, const std::string &name)
 {
-    auto infer_model = m_vdevice->create_infer_model(hef_path, network_name);
+    auto infer_model = m_vdevice->create_infer_model(hef_path, name);
     VALIDATE_EXPECTED(infer_model);
 
     return InferModelWrapper(infer_model.release(), m_is_using_service);
 }
 
-InferModelWrapper VDeviceWrapper::create_infer_model_from_buffer(const py::bytes &buffer, const std::string &network_name)
+InferModelWrapper VDeviceWrapper::create_infer_model_from_buffer(const py::bytes &buffer, const std::string &name)
 {
     // there are 3 ways to get the buffer from python and convert it to MemoryView:
     // 1. py::bytes -> std::string -> MemoryView
@@ -29,7 +29,7 @@ InferModelWrapper VDeviceWrapper::create_infer_model_from_buffer(const py::bytes
     // 1+3 are copying the data, while 2 isn't, resulting in 700X faster transfer between python and c++ (tested on yolov5s [~15MB])
     py::buffer_info info(py::buffer(buffer).request());
     MemoryView hef_buffer(MemoryView(info.ptr, static_cast<size_t>(info.size)));
-    auto infer_model = m_vdevice->create_infer_model(hef_buffer, network_name);
+    auto infer_model = m_vdevice->create_infer_model(hef_buffer, name);
     VALIDATE_EXPECTED(infer_model);
 
     return InferModelWrapper(infer_model.release(), m_is_using_service);
diff --git a/hailort/libhailort/bindings/python/src/vdevice_api.hpp b/hailort/libhailort/bindings/python/src/vdevice_api.hpp
index fa7dbbc7..0ab55d94 100644
--- a/hailort/libhailort/bindings/python/src/vdevice_api.hpp
+++ b/hailort/libhailort/bindings/python/src/vdevice_api.hpp
@@ -118,6 +118,10 @@ class VDeviceWrapper {
         py::list results;
         m_net_groups.reserve(m_net_groups.size() + network_groups->size());
         for (const auto &network_group : network_groups.value()) {
+
+            // Since the pybind's ConfiguredNetworkGroupWrapper doesnt hold the cng (weak ptr), we need to keep it alive in the vdevice scope
+            VALIDATE_STATUS(m_vdevice->add_network_group_ref_count(network_group));
+
             auto wrapper = ConfiguredNetworkGroupWrapper::create(network_group);
             results.append(wrapper);
             m_net_groups.emplace_back(wrapper);
diff --git a/hailort/libhailort/cmake/toolchains/linux.armv7l.cmake b/hailort/libhailort/cmake/toolchains/linux.armv7l.cmake
index 2ecb6fcb..574d9f73 100644
--- a/hailort/libhailort/cmake/toolchains/linux.armv7l.cmake
+++ b/hailort/libhailort/cmake/toolchains/linux.armv7l.cmake
@@ -1,12 +1,9 @@
 set(CMAKE_SYSTEM_NAME Linux)
 set(CMAKE_SYSTEM_PROCESSOR arm)
 
-set(CMAKE_C_COMPILER arm-linux-gnueabi-gcc)
-set(CMAKE_CXX_COMPILER arm-linux-gnueabi-g++)
-set(CMAKE_STRIP arm-linux-gnueabi--strip CACHE FILEPATH "Strip")
-set(CMAKE_LINKER arm-linux-gnueabi-ld)
+set(CMAKE_C_COMPILER arm-linux-gnueabihf-gcc)
+set(CMAKE_CXX_COMPILER arm-linux-gnueabihf-g++)
+set(CMAKE_STRIP arm-linux-gnueabihf-strip CACHE FILEPATH "Strip")
+set(CMAKE_LINKER arm-linux-gnueabihf-ld)
 
-add_compile_options(-march=armv7-a)
-
-# pybind is not supported in this platform
-set(HAILO_BUILD_PYBIND "OFF" CACHE STRING "hailo_build_pybind" FORCE)
+add_compile_options(-march=armv7-a -mfloat-abi=hard -mfpu=vfpv3)
diff --git a/hailort/libhailort/cmake/toolchains/linux.armv7lhf.cmake b/hailort/libhailort/cmake/toolchains/linux.armv7lhf.cmake
index a5e5105b..4333a262 100644
--- a/hailort/libhailort/cmake/toolchains/linux.armv7lhf.cmake
+++ b/hailort/libhailort/cmake/toolchains/linux.armv7lhf.cmake
@@ -6,7 +6,4 @@ set(CMAKE_CXX_COMPILER arm-linux-gnueabihf-g++)
 set(CMAKE_STRIP arm-linux-gnueabihf--strip CACHE FILEPATH "Strip")
 set(CMAKE_LINKER arm-linux-gnueabihf-ld)
 
-add_compile_options(-march=armv7-a)
-
-# pybind is not supported in this platform
-set(HAILO_BUILD_PYBIND "OFF" CACHE STRING "hailo_build_pybind" FORCE)
+add_compile_options(-march=armv7-a+fp)
diff --git a/hailort/libhailort/cmake/toolchains/qnx.aarch64.cmake b/hailort/libhailort/cmake/toolchains/qnx.aarch64.cmake
index 75c74217..dffa0782 100644
--- a/hailort/libhailort/cmake/toolchains/qnx.aarch64.cmake
+++ b/hailort/libhailort/cmake/toolchains/qnx.aarch64.cmake
@@ -46,11 +46,9 @@ SET(CMAKE_CXX_FLAGS "-Vgcc_nto${QNX_PROCESSOR} -lang-c++ -Y_cxx" CACHE STRING "q
 set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--build-id=md5 -lang-c++ -lsocket ${EXTRA_CMAKE_LINKER_FLAGS}" CACHE STRING "exe_linker_flags")
 set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--build-id=md5 -lang-c++ -lsocket ${EXTRA_CMAKE_LINKER_FLAGS}" CACHE STRING "so_linker_flags")
 
-# pybind is not supported in this platform
-set(HAILO_BUILD_PYBIND "OFF" CACHE STRING "hailo_build_pybind" FORCE)
 # GStreamer does not work on QNX currently
 set(HAILO_BUILD_GSTREAMER "OFF" CACHE STRING "hailo_build_gstreamer" FORCE)
 # Hailort service does not work on QNX currently
 set(HAILO_BUILD_SERVICE "OFF" CACHE STRING "hailo_build_service" FORCE)
 # Set little endian flag for protobuf to work correctly on QNX
-add_definitions("-D__LITTLE_ENDIAN__")
\ No newline at end of file
+add_definitions("-D__LITTLE_ENDIAN__")
diff --git a/hailort/libhailort/cmake/toolchains/qnx.x86_64.cmake b/hailort/libhailort/cmake/toolchains/qnx.x86_64.cmake
index 9ba24e5e..530c22f9 100644
--- a/hailort/libhailort/cmake/toolchains/qnx.x86_64.cmake
+++ b/hailort/libhailort/cmake/toolchains/qnx.x86_64.cmake
@@ -46,11 +46,9 @@ SET(CMAKE_CXX_FLAGS "-Vgcc_nto${QNX_PROCESSOR} -lang-c++ -Y_cxx" CACHE STRING "q
 set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--build-id=md5 -lang-c++ -lsocket ${EXTRA_CMAKE_LINKER_FLAGS}" CACHE STRING "exe_linker_flags")
 set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--build-id=md5 -lang-c++ -lsocket ${EXTRA_CMAKE_LINKER_FLAGS}" CACHE STRING "so_linker_flags")
 
-# pybind is not supported in this platform
-set(HAILO_BUILD_PYBIND "OFF" CACHE STRING "hailo_build_pybind" FORCE)
 # GStreamer does not work on QNX currently
 set(HAILO_BUILD_GSTREAMER "OFF" CACHE STRING "hailo_build_gstreamer" FORCE)
 # Hailort service does not work on QNX currently
 set(HAILO_BUILD_SERVICE "OFF" CACHE STRING "hailo_build_service" FORCE)
 # Set little endian flag for protobuf to work correctly on QNX
-add_definitions("-D__LITTLE_ENDIAN__")
\ No newline at end of file
+add_definitions("-D__LITTLE_ENDIAN__")
diff --git a/hailort/libhailort/doc/CMakeLists.txt b/hailort/libhailort/doc/CMakeLists.txt
index 73d0964c..81a89c65 100644
--- a/hailort/libhailort/doc/CMakeLists.txt
+++ b/hailort/libhailort/doc/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 find_package(Doxygen)
 if(DOXYGEN_FOUND)
diff --git a/hailort/libhailort/examples/CMakeLists.txt b/hailort/libhailort/examples/CMakeLists.txt
index ae2aacd9..5ac7f2a0 100644
--- a/hailort/libhailort/examples/CMakeLists.txt
+++ b/hailort/libhailort/examples/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 project(hailort-examples)
 
diff --git a/hailort/libhailort/examples/c/CMakeLists.txt b/hailort/libhailort/examples/c/CMakeLists.txt
index f006e356..b02d95dd 100644
--- a/hailort/libhailort/examples/c/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 add_subdirectory(data_quantization_example)
 add_subdirectory(raw_streams_example)
diff --git a/hailort/libhailort/examples/c/data_quantization_example/CMakeLists.txt b/hailort/libhailort/examples/c/data_quantization_example/CMakeLists.txt
index e2d6a724..eaab24cd 100644
--- a/hailort/libhailort/examples/c/data_quantization_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/data_quantization_example/CMakeLists.txt
@@ -1,9 +1,9 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.18.0 EXACT REQUIRED)
+find_package(HailoRT 4.19.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(data_quantization_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/c/infer_pipeline_example/CMakeLists.txt b/hailort/libhailort/examples/c/infer_pipeline_example/CMakeLists.txt
index 5379d9b2..c7bd49d3 100644
--- a/hailort/libhailort/examples/c/infer_pipeline_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/infer_pipeline_example/CMakeLists.txt
@@ -1,9 +1,9 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.18.0 EXACT REQUIRED)
+find_package(HailoRT 4.19.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(infer_pipeline_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/c/infer_pipeline_example/infer_pipeline_example.c b/hailort/libhailort/examples/c/infer_pipeline_example/infer_pipeline_example.c
index 4f17debb..3d591c6d 100644
--- a/hailort/libhailort/examples/c/infer_pipeline_example/infer_pipeline_example.c
+++ b/hailort/libhailort/examples/c/infer_pipeline_example/infer_pipeline_example.c
@@ -1,11 +1,11 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2020-2024 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
  * @file infer_pipeline_example.c
  * This example demonstrates the basic data-path on HailoRT using the high level API - Virtual Stream Pipeline.
- * The program scans for Hailo-8 devices connected to a provided Ethernet interface, generates a random dataset,
+ * The program scans for Hailo devices connected to a provided PCIe interface, generates a random dataset,
  * and runs it through the device with virtual streams pipeline.
  **/
 
@@ -13,14 +13,10 @@
 #include "string.h"
 #include "hailo/hailort.h"
 
-#define MAX_NUM_OF_DEVICES (5)
-#define SCAN_TIMEOUT_MILLISECONDS (2000)
 #define INFER_FRAME_COUNT (100)
 #define MAX_EDGE_LAYERS (16)
 #define HEF_FILE ("hefs/shortcut_net.hef")
 
-#define USAGE_ERROR_MSG ("Args parsing error.\nUsage: infer_pipeline_example <interface_name>\n")
-
 hailo_status infer(hailo_configured_network_group configured_network_group,
     hailo_input_vstream_params_by_name_t *input_params, hailo_output_vstream_params_by_name_t *output_params,
     hailo_vstream_info_t *vstreams_infos, size_t vstreams_infos_size)
@@ -73,23 +69,10 @@ hailo_status infer(hailo_configured_network_group configured_network_group,
     return HAILO_SUCCESS;
 }
 
-void parse_arguments(int argc, char **argv, const char **interface_name)
-{
-    if (2 != argc) {
-        printf(USAGE_ERROR_MSG);
-        exit(1);
-    }
-    *interface_name = argv[1];
-}
-
-int main(int argc, char **argv)
+int main()
 {
     hailo_status status = HAILO_UNINITIALIZED;
-    const char *interface_name = NULL;
-    hailo_eth_device_info_t device_infos[MAX_NUM_OF_DEVICES] = {0};
-    size_t num_of_devices = 0;
-    uint32_t timeout = SCAN_TIMEOUT_MILLISECONDS;
-    hailo_device device = NULL;
+    hailo_vdevice vdevice = NULL;
     hailo_hef hef = NULL;
     hailo_configure_params_t config_params = {0};
     hailo_configured_network_group network_group = NULL;
@@ -98,29 +81,22 @@ int main(int argc, char **argv)
     hailo_output_vstream_params_by_name_t output_vstream_params[MAX_EDGE_LAYERS] = {0};
     size_t input_vstreams_size = MAX_EDGE_LAYERS;
     size_t output_vstreams_size = MAX_EDGE_LAYERS;
-    hailo_activated_network_group activated_network_group = NULL;
     size_t vstreams_infos_size = MAX_EDGE_LAYERS;
     hailo_vstream_info_t vstreams_infos[MAX_EDGE_LAYERS] = {0};
     bool unused = {0};
 
-    parse_arguments(argc, argv, &interface_name);
-
-    status = hailo_scan_ethernet_devices(interface_name, device_infos, MAX_NUM_OF_DEVICES, &num_of_devices, timeout);
-    REQUIRE_SUCCESS(status, l_exit, "Failed to scan ethernet devices");
-    REQUIRE_ACTION(num_of_devices > 0, status = HAILO_INVALID_ARGUMENT, l_exit, 
-        "Failed to find ethernet devices");
 
-    status = hailo_create_ethernet_device(&device_infos[0], &device);
-    REQUIRE_SUCCESS(status, l_exit, "Failed to create eth_device");
+    status = hailo_create_vdevice(NULL, &vdevice);
+    REQUIRE_SUCCESS(status, l_exit, "Failed to create vdevice");
 
     status = hailo_create_hef_file(&hef, HEF_FILE);
-    REQUIRE_SUCCESS(status, l_release_device, "Failed reading hef file");
+    REQUIRE_SUCCESS(status, l_release_vdevice, "Failed reading hef file");
 
-    status = hailo_init_configure_params_by_device(hef, device, &config_params);
+    status = hailo_init_configure_params_by_vdevice(hef, vdevice, &config_params);
     REQUIRE_SUCCESS(status, l_release_hef, "Failed initializing configure parameters");
 
-    status = hailo_configure_device(device, hef, &config_params, &network_group, &network_group_size);
-    REQUIRE_SUCCESS(status, l_release_hef, "Failed configure devcie from hef");
+    status = hailo_configure_vdevice(vdevice, hef, &config_params, &network_group, &network_group_size);
+    REQUIRE_SUCCESS(status, l_release_hef, "Failed configure vdevice from hef");
     REQUIRE_ACTION(network_group_size == 1, status = HAILO_INVALID_ARGUMENT, l_release_hef, 
         "Invalid network group size");
 
@@ -142,20 +118,16 @@ int main(int argc, char **argv)
     REQUIRE_ACTION(vstreams_infos_size == 2, status = HAILO_INVALID_ARGUMENT, l_release_hef, 
         "Invalid number of virtual streams size");
 
-    status = hailo_activate_network_group(network_group, NULL, &activated_network_group);
-    REQUIRE_SUCCESS(status, l_release_hef, "Failed activate network group");
-
     status = infer(network_group, input_vstream_params, output_vstream_params, vstreams_infos, vstreams_infos_size);
-    REQUIRE_SUCCESS(status, l_deactivate_network_group, "Failed running inference");
+    REQUIRE_SUCCESS(status, l_release_hef, "Failed running inference");
 
     printf("Inference ran successfully\n");
     status = HAILO_SUCCESS;
-l_deactivate_network_group:
-    (void)hailo_deactivate_network_group(activated_network_group);
+
 l_release_hef:
     (void) hailo_release_hef(hef);
-l_release_device:
-    (void) hailo_release_device(device);
+l_release_vdevice:
+    (void) hailo_release_vdevice(vdevice);
 l_exit:
     return (int)status;
 }
diff --git a/hailort/libhailort/examples/c/multi_device_example/CMakeLists.txt b/hailort/libhailort/examples/c/multi_device_example/CMakeLists.txt
index 7b54ddea..93377ec6 100644
--- a/hailort/libhailort/examples/c/multi_device_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/multi_device_example/CMakeLists.txt
@@ -1,9 +1,9 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.18.0 EXACT REQUIRED)
+find_package(HailoRT 4.19.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(multi_device_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/c/multi_network_vstream_example/CMakeLists.txt b/hailort/libhailort/examples/c/multi_network_vstream_example/CMakeLists.txt
index d8677f73..2f6ca9f9 100644
--- a/hailort/libhailort/examples/c/multi_network_vstream_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/multi_network_vstream_example/CMakeLists.txt
@@ -1,9 +1,9 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 find_package(Threads REQUIRED)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 
-find_package(HailoRT 4.18.0 EXACT REQUIRED)
+find_package(HailoRT 4.19.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(multi_network_vstream_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/c/notification_callback_example/CMakeLists.txt b/hailort/libhailort/examples/c/notification_callback_example/CMakeLists.txt
index fd72f994..c835d4e7 100644
--- a/hailort/libhailort/examples/c/notification_callback_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/notification_callback_example/CMakeLists.txt
@@ -1,9 +1,9 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.18.0 EXACT REQUIRED)
+find_package(HailoRT 4.19.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(notification_callback_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/c/power_measurement_example/CMakeLists.txt b/hailort/libhailort/examples/c/power_measurement_example/CMakeLists.txt
index 85a8e2e6..760f26e1 100644
--- a/hailort/libhailort/examples/c/power_measurement_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/power_measurement_example/CMakeLists.txt
@@ -1,6 +1,6 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
-find_package(HailoRT 4.18.0 EXACT REQUIRED)
+find_package(HailoRT 4.19.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(power_measurement_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/CMakeLists.txt b/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/CMakeLists.txt
index 0bb957e5..b9c2e506 100644
--- a/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/CMakeLists.txt
@@ -1,6 +1,6 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
-find_package(HailoRT 4.18.0 EXACT REQUIRED)
+find_package(HailoRT 4.19.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(raw_async_streams_single_thread_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/c/raw_streams_example/CMakeLists.txt b/hailort/libhailort/examples/c/raw_streams_example/CMakeLists.txt
index 24eb068e..3ac1db57 100644
--- a/hailort/libhailort/examples/c/raw_streams_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/raw_streams_example/CMakeLists.txt
@@ -1,9 +1,9 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.18.0 EXACT REQUIRED)
+find_package(HailoRT 4.19.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(raw_streams_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/c/switch_network_groups_example/CMakeLists.txt b/hailort/libhailort/examples/c/switch_network_groups_example/CMakeLists.txt
index 66011c4d..f85fed91 100644
--- a/hailort/libhailort/examples/c/switch_network_groups_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/switch_network_groups_example/CMakeLists.txt
@@ -1,9 +1,9 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.18.0 EXACT REQUIRED)
+find_package(HailoRT 4.19.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(switch_network_groups_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/c/switch_network_groups_manually_example/CMakeLists.txt b/hailort/libhailort/examples/c/switch_network_groups_manually_example/CMakeLists.txt
index ac63eb87..2033554f 100644
--- a/hailort/libhailort/examples/c/switch_network_groups_manually_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/switch_network_groups_manually_example/CMakeLists.txt
@@ -1,9 +1,9 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.18.0 EXACT REQUIRED)
+find_package(HailoRT 4.19.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(switch_network_groups_manually_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/c/vstreams_example/CMakeLists.txt b/hailort/libhailort/examples/c/vstreams_example/CMakeLists.txt
index 398dd294..431a1d1e 100644
--- a/hailort/libhailort/examples/c/vstreams_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/vstreams_example/CMakeLists.txt
@@ -1,9 +1,9 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.18.0 EXACT REQUIRED)
+find_package(HailoRT 4.19.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(vstreams_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/c/vstreams_example/vstreams_example.c b/hailort/libhailort/examples/c/vstreams_example/vstreams_example.c
index 30ce1a56..17f32f1f 100644
--- a/hailort/libhailort/examples/c/vstreams_example/vstreams_example.c
+++ b/hailort/libhailort/examples/c/vstreams_example/vstreams_example.c
@@ -5,7 +5,7 @@
 /**
  * @file vstreams_example.c
  * This example demonstrates the basic data-path on HailoRT using the high level API - Virtual Stream Pipeline.
- * The program scans for Hailo-8 devices connected to a provided PCIe interface, generates random dataset,
+ * The program scans for Hailo devices connected to a provided PCIe interface, generates random dataset,
  * and runs it through the VDevice with virtual streams.
  **/
 
@@ -153,7 +153,6 @@ int main()
     REQUIRE_ACTION(network_group_size == 1, status = HAILO_INVALID_ARGUMENT, l_release_hef, 
         "Invalid network group size");
 
-
     // Set input format type to auto - libhailort will not scale the data before writing to the HW
     status = hailo_make_input_vstream_params(network_group, unused, HAILO_FORMAT_TYPE_AUTO,
         input_vstream_params, &input_vstreams_size);
diff --git a/hailort/libhailort/examples/cpp/CMakeLists.txt b/hailort/libhailort/examples/cpp/CMakeLists.txt
index c0b31e4b..8ad305bd 100644
--- a/hailort/libhailort/examples/cpp/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 add_subdirectory(vstreams_example)
 add_subdirectory(infer_pipeline_example)
diff --git a/hailort/libhailort/examples/cpp/async_infer_advanced_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/async_infer_advanced_example/CMakeLists.txt
index 185f9041..925ccea1 100644
--- a/hailort/libhailort/examples/cpp/async_infer_advanced_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/async_infer_advanced_example/CMakeLists.txt
@@ -1,6 +1,6 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
-find_package(HailoRT 4.18.0 EXACT REQUIRED)
+find_package(HailoRT 4.19.0 EXACT REQUIRED)
 
 add_executable(cpp_async_infer_advanced_example async_infer_advanced_example.cpp)
 target_link_libraries(cpp_async_infer_advanced_example PRIVATE HailoRT::libhailort)
diff --git a/hailort/libhailort/examples/cpp/async_infer_advanced_example/async_infer_advanced_example.cpp b/hailort/libhailort/examples/cpp/async_infer_advanced_example/async_infer_advanced_example.cpp
index 0c8c581e..800ed9ee 100644
--- a/hailort/libhailort/examples/cpp/async_infer_advanced_example/async_infer_advanced_example.cpp
+++ b/hailort/libhailort/examples/cpp/async_infer_advanced_example/async_infer_advanced_example.cpp
@@ -39,138 +39,130 @@ static std::shared_ptr<uint8_t> page_aligned_alloc(size_t size)
 
 int main()
 {
-    auto vdevice = VDevice::create();
-    if (!vdevice) {
-        std::cerr << "Failed create vdevice, status = " << vdevice.status() << std::endl;
-        return vdevice.status();
-    }
-    std::cout << "VDevice created" << std::endl;
-
-    // Create infer model from HEF file.
-    auto infer_model_exp = vdevice.value()->create_infer_model("hefs/shortcut_net_nv12.hef");
-    if (!infer_model_exp) {
-        std::cerr << "Failed to create infer model, status = " << infer_model_exp.status() << std::endl;
-        return infer_model_exp.status();
-    }
-    std::cout << "InferModel created" << std::endl;
-    auto infer_model = infer_model_exp.release();
-
-    infer_model->output()->set_format_type(HAILO_FORMAT_TYPE_FLOAT32);
-    std::cout << "Set output format_type to float32" << std::endl;
-    infer_model->set_batch_size(BATCH_SIZE);
-    std::cout << "Set batch_size to " << BATCH_SIZE << std::endl;
-
-    // Configure the infer model
-    auto configured_infer_model = infer_model->configure();
-    if (!configured_infer_model) {
-        std::cerr << "Failed to create configured infer model, status = " << configured_infer_model.status() << std::endl;
-        return configured_infer_model.status();
-    }
-    std::cout << "ConfiguredInferModel created" << std::endl;
-
-    // The buffers are stored here as a guard for the memory. The buffer will be freed only after
-    // configured_infer_model will be released.
-    std::vector<std::shared_ptr<uint8_t>> buffer_guards;
-
-    // Create input buffers.
-    std::unordered_map<std::string, hailo_pix_buffer_t> input_buffers;
-    for (const auto &input_name : infer_model->get_input_names()) {
-        size_t input_frame_size = infer_model->input(input_name)->get_frame_size();
-
-        // create pix_buffer
-        const auto Y_PLANE_SIZE = static_cast<uint32_t>(input_frame_size * 2 / 3);
-        const auto UV_PLANE_SIZE = static_cast<uint32_t>(input_frame_size * 1 / 3);
-        assert (Y_PLANE_SIZE + UV_PLANE_SIZE == input_frame_size);
-        auto y_plane_buffer = page_aligned_alloc(Y_PLANE_SIZE);
-        auto uv_plane_buffer = page_aligned_alloc(UV_PLANE_SIZE);
-        hailo_pix_buffer_t pix_buffer{};
-        pix_buffer.memory_type = HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR;
-        pix_buffer.number_of_planes = 2;
-        // Y Plane
-        pix_buffer.planes[0].bytes_used = Y_PLANE_SIZE;
-        pix_buffer.planes[0].plane_size = Y_PLANE_SIZE;
-        pix_buffer.planes[0].user_ptr = reinterpret_cast<void*>(y_plane_buffer.get());
-        // UV Plane
-        pix_buffer.planes[1].bytes_used = UV_PLANE_SIZE;
-        pix_buffer.planes[1].plane_size = UV_PLANE_SIZE;
-        pix_buffer.planes[1].user_ptr = reinterpret_cast<void*>(uv_plane_buffer.get());
-
-        input_buffers[input_name] = pix_buffer;
-        buffer_guards.push_back(y_plane_buffer);
-        buffer_guards.push_back(uv_plane_buffer);
-    }
-
-    // Create output buffers.
-    std::unordered_map<std::string, MemoryView> output_buffers;
-    for (const auto &output_name : infer_model->get_output_names()) {
-        size_t output_frame_size = infer_model->output(output_name)->get_frame_size();
-        auto output_buffer = page_aligned_alloc(output_frame_size);
-
-        output_buffers[output_name] = MemoryView(output_buffer.get(), output_frame_size);
-        buffer_guards.push_back(output_buffer);
-    }
-
-    std::cout << "Running inference..." << std::endl;
-    AsyncInferJob last_infer_job;
-    for (uint32_t i = 0; i < BATCH_COUNT; i++) {
-        // Waiting for available requests in the pipeline
-        auto status = configured_infer_model->wait_for_async_ready(std::chrono::milliseconds(1000), BATCH_SIZE);
-        if (HAILO_SUCCESS != status) {
-            std::cerr << "Failed to wait for async ready, status = " << status << std::endl;
-            return status;
+    try {
+        auto vdevice = VDevice::create().expect("Failed create vdevice");
+        std::cout << "VDevice created" << std::endl;
+
+        // Create infer model from HEF file.
+        auto infer_model = vdevice->create_infer_model("hefs/shortcut_net_nv12.hef").expect("Failed to create infer model");
+        std::cout << "InferModel created" << std::endl;
+
+        infer_model->output()->set_format_type(HAILO_FORMAT_TYPE_FLOAT32);
+        std::cout << "Set output format_type to float32" << std::endl;
+        infer_model->set_batch_size(BATCH_SIZE);
+        std::cout << "Set batch_size to " << BATCH_SIZE << std::endl;
+
+        /* Buffers are stored here to ensure memory safety and are only freed after
+           the configured_infer_model is released, ensuring they remain intact until the model has finished using them. */
+        std::vector<std::shared_ptr<uint8_t>> buffer_guards;
+
+        /* When the same buffers are used multiple times on async-io, to improve performance, it is recommended to pre-map it
+           into the VDevice. The DmaMappedBuffer object manages the mapping, and it'll be unmapped when it is destroyed.
+           Notice that the buffer must be alive as long as the mapping is alive, so it is defined after 'buffer_guards'. */
+        std::vector<DmaMappedBuffer> buffer_map_guards;
+
+        // Configure the infer model
+        auto configured_infer_model = infer_model->configure().expect("Failed to create configured infer model");
+        std::cout << "ConfiguredInferModel created" << std::endl;
+
+        // Create infer bindings
+        auto bindings = configured_infer_model.create_bindings().expect("Failed to create infer bindings");
+
+        // Set the input buffers of the bindings
+        for (const auto &input_name : infer_model->get_input_names()) {
+            size_t input_frame_size = infer_model->input(input_name)->get_frame_size();
+
+            const auto Y_PLANE_SIZE = static_cast<uint32_t>(input_frame_size * 2 / 3);
+            const auto UV_PLANE_SIZE = static_cast<uint32_t>(input_frame_size * 1 / 3);
+            assert (Y_PLANE_SIZE + UV_PLANE_SIZE == input_frame_size);
+
+            // Allocate and map Y-plane buffer
+            auto y_plane_buffer = page_aligned_alloc(Y_PLANE_SIZE);
+            buffer_guards.push_back(y_plane_buffer);
+            auto input_mapping_y = DmaMappedBuffer::create(*vdevice, y_plane_buffer.get(), Y_PLANE_SIZE, HAILO_DMA_BUFFER_DIRECTION_H2D).expect("Failed to map input buffer to VDevice");
+            buffer_map_guards.push_back(std::move(input_mapping_y));
+
+            // Allocate and map UV-plane buffer
+            auto uv_plane_buffer = page_aligned_alloc(UV_PLANE_SIZE);
+            buffer_guards.push_back(uv_plane_buffer);
+            auto input_mapping_uv = DmaMappedBuffer::create(*vdevice, uv_plane_buffer.get(), Y_PLANE_SIZE, HAILO_DMA_BUFFER_DIRECTION_H2D).expect("Failed to map input buffer to VDevice");
+            buffer_map_guards.push_back(std::move(input_mapping_uv));
+
+            // create pix_buffer
+            hailo_pix_buffer_t pix_buffer{};
+            pix_buffer.memory_type = HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR;
+            pix_buffer.number_of_planes = 2;
+            // Y Plane
+            pix_buffer.planes[0].bytes_used = Y_PLANE_SIZE;
+            pix_buffer.planes[0].plane_size = Y_PLANE_SIZE;
+            pix_buffer.planes[0].user_ptr = reinterpret_cast<void*>(y_plane_buffer.get());
+            // UV Plane
+            pix_buffer.planes[1].bytes_used = UV_PLANE_SIZE;
+            pix_buffer.planes[1].plane_size = UV_PLANE_SIZE;
+            pix_buffer.planes[1].user_ptr = reinterpret_cast<void*>(uv_plane_buffer.get());
+
+            auto status = bindings.input(input_name)->set_pix_buffer(pix_buffer);
+            if (HAILO_SUCCESS != status) {
+                throw hailort_error(status, "Failed to set infer input buffer");
+            }
         }
 
-        // In this example we infer the same buffers, so setting 'BATCH_SIZE' identical bindings in the 'multiple_bindings' vector
-        std::vector<ConfiguredInferModel::Bindings> bindings_batch;
-        for (uint32_t b = 0; b < BATCH_SIZE; b++) {
-            auto bindings = configured_infer_model->create_bindings();
-            if (!bindings) {
-                std::cerr << "Failed to create infer bindings, status = " << bindings.status() << std::endl;
-                return bindings.status();
+        // Set the output buffers of the bindings
+        for (const auto &output_name : infer_model->get_output_names()) {
+            size_t output_frame_size = infer_model->output(output_name)->get_frame_size();
+            auto output_buffer = page_aligned_alloc(output_frame_size);
+            buffer_guards.push_back(output_buffer);
+            auto output_mapping = DmaMappedBuffer::create(*vdevice, output_buffer.get(), output_frame_size, HAILO_DMA_BUFFER_DIRECTION_D2H).expect("Failed to map output buffer to VDevice");
+            buffer_map_guards.push_back(std::move(output_mapping));
+
+            auto status = bindings.output(output_name)->set_buffer(MemoryView(output_buffer.get(), output_frame_size));
+            if (HAILO_SUCCESS != status) {
+                throw hailort_error(status, "Failed to set infer output buffer");
             }
+        }
+        std::cout << "ConfiguredInferModel::Bindings created and configured" << std::endl;
 
-            for (auto &input_buffer : input_buffers) {
-                status = bindings->input(input_buffer.first)->set_pix_buffer(input_buffer.second);
-                if (HAILO_SUCCESS != status) {
-                    std::cerr << "Failed to set infer input buffer, status = " << status << std::endl;
-                    return status;
-                }
-            }
-            for (auto &output_buffer : output_buffers) {
-                status = bindings->output(output_buffer.first)->set_buffer(output_buffer.second);
-                if (HAILO_SUCCESS != status) {
-                    std::cerr << "Failed to set infer output buffer, status = " << status << std::endl;
-                    return status;
-                }
+        // In this example we infer the same buffers, so setting 'BATCH_SIZE' identical bindings in the 'multiple_bindings' vector
+        std::vector<ConfiguredInferModel::Bindings> multiple_bindings(BATCH_SIZE, bindings);
+
+        std::cout << "Running inference..." << std::endl;
+        AsyncInferJob last_infer_job;
+        for (uint32_t i = 0; i < BATCH_COUNT; i++) {
+            // Waiting for available requests in the pipeline
+            auto status = configured_infer_model.wait_for_async_ready(std::chrono::milliseconds(1000), BATCH_SIZE);
+            if (HAILO_SUCCESS != status) {
+                throw hailort_error(status, "Failed to wait for async ready");
             }
 
-            bindings_batch.emplace_back(bindings.release());
-        }
+            auto job = configured_infer_model.run_async(multiple_bindings, [multiple_bindings] (const AsyncInferCompletionInfo &completion_info) {
+                // Use completion_info to get the async operation status
+                // Note that this callback must be executed as quickly as possible
+                (void)completion_info.status;
+
+                // If you want to use the bindings in the callback, capture them by value to make a copy
+                // so that they won't be changed in the next infer request
+                (void)multiple_bindings;
+            }).expect("Failed to start async infer job");
+
+            // detach() is called in order for jobs to run in parallel (and not one after the other)
+            job.detach();
 
-        auto job = configured_infer_model->run_async(bindings_batch, [] (const AsyncInferCompletionInfo &completion_info) {
-            // Use completion_info to get the async operation status
-            // Note that this callback must be executed as quickly as possible
-            (void)completion_info.status;
-        });
-        if (!job) {
-            std::cerr << "Failed to start async infer job, status = " << job.status() << std::endl;
-            return job.status();
+            if (i == BATCH_COUNT - 1) {
+                last_infer_job = std::move(job);
+            }
         }
-        // detach() is called in order for jobs to run in parallel (and not one after the other)
-        job->detach();
 
-        if (i == BATCH_COUNT - 1) {
-            last_infer_job = job.release();
+        // Wait for last infer to finish
+        auto status = last_infer_job.wait(std::chrono::milliseconds(1000));
+        if (HAILO_SUCCESS != status) {
+            throw hailort_error(status, "Failed to wait for infer to finish");
         }
-    }
 
-    // Wait for last infer to finish
-    auto status = last_infer_job.wait(std::chrono::milliseconds(1000));
-    if (HAILO_SUCCESS != status) {
-        std::cerr << "Failed to wait for infer to finish, status = " << status << std::endl;
-        return status;
-    }
+        std::cout << "Inference finished successfully on " << BATCH_COUNT * BATCH_SIZE << " frames" << std::endl;
+    } catch (const hailort_error &exception) {
+        std::cout << "Failed to run inference. status=" << exception.status() << ", error message: " << exception.what() << std::endl;
+        return exception.status();
+    };
 
-    std::cout << "Inference finished successfully on " << BATCH_COUNT * BATCH_SIZE << " frames" << std::endl;
     return HAILO_SUCCESS;
 }
diff --git a/hailort/libhailort/examples/cpp/async_infer_basic_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/async_infer_basic_example/CMakeLists.txt
index 86886a79..fe436f83 100644
--- a/hailort/libhailort/examples/cpp/async_infer_basic_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/async_infer_basic_example/CMakeLists.txt
@@ -1,6 +1,6 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
-find_package(HailoRT 4.18.0 EXACT REQUIRED)
+find_package(HailoRT 4.19.0 EXACT REQUIRED)
 
 add_executable(cpp_async_infer_basic_example async_infer_basic_example.cpp)
 target_link_libraries(cpp_async_infer_basic_example PRIVATE HailoRT::libhailort)
diff --git a/hailort/libhailort/examples/cpp/async_infer_basic_example/async_infer_basic_example.cpp b/hailort/libhailort/examples/cpp/async_infer_basic_example/async_infer_basic_example.cpp
index b498f837..a88c89a1 100644
--- a/hailort/libhailort/examples/cpp/async_infer_basic_example/async_infer_basic_example.cpp
+++ b/hailort/libhailort/examples/cpp/async_infer_basic_example/async_infer_basic_example.cpp
@@ -36,79 +36,60 @@ static std::shared_ptr<uint8_t> page_aligned_alloc(size_t size)
 
 int main()
 {
-    auto vdevice = VDevice::create();
-    if (!vdevice) {
-        std::cerr << "Failed create vdevice, status = " << vdevice.status() << std::endl;
-        return vdevice.status();
-    }
-    std::cout << "VDevice created" << std::endl;
-
-    // Create infer model from HEF file.
-    auto infer_model_exp = vdevice.value()->create_infer_model(HEF_FILE);
-    if (!infer_model_exp) {
-        std::cerr << "Failed to create infer model, status = " << infer_model_exp.status() << std::endl;
-        return infer_model_exp.status();
-    }
-    std::cout << "InferModel created" << std::endl;
-    auto infer_model = infer_model_exp.release();
-
-    // Configure the infer model
-    auto configured_infer_model = infer_model->configure();
-    if (!configured_infer_model) {
-        std::cerr << "Failed to create configured infer model, status = " << configured_infer_model.status() << std::endl;
-        return configured_infer_model.status();
-    }
-    std::cout << "ConfiguredInferModel created" << std::endl;
-
-    // The buffers are stored here as a guard for the memory. The buffer will be freed only after
-    // configured_infer_model will be released.
-    std::vector<std::shared_ptr<uint8_t>> buffer_guards;
-
-    auto bindings = configured_infer_model->create_bindings();
-    if (!bindings) {
-        std::cerr << "Failed to create infer bindings, status = " << bindings.status() << std::endl;
-        return bindings.status();
-    }
-
-    for (const auto &input_name : infer_model->get_input_names()) {
-        size_t input_frame_size = infer_model->input(input_name)->get_frame_size();
-        auto input_buffer = page_aligned_alloc(input_frame_size);
-        auto status = bindings->input(input_name)->set_buffer(MemoryView(input_buffer.get(), input_frame_size));
-        if (HAILO_SUCCESS != status) {
-            std::cerr << "Failed to set infer input buffer, status = " << status << std::endl;
-            return status;
+    try {
+        auto vdevice = VDevice::create().expect("Failed create vdevice");
+        std::cout << "VDevice created" << std::endl;
+
+        // Create infer model from HEF file.
+        auto infer_model = vdevice->create_infer_model(HEF_FILE).expect("Failed to create infer model");
+        std::cout << "InferModel created" << std::endl;
+
+        /* The buffers are stored here to ensure memory safety. They will only be freed once
+           the configured_infer_model is released, guaranteeing that the buffers remain intact 
+           until the configured_infer_model is done using them */
+        std::vector<std::shared_ptr<uint8_t>> buffer_guards;
+
+        // Configure the infer model
+        auto configured_infer_model = infer_model->configure().expect("Failed to create configured infer model");
+        std::cout << "ConfiguredInferModel created" << std::endl;
+
+        auto bindings = configured_infer_model.create_bindings().expect("Failed to create infer bindings");
+        for (const auto &input_name : infer_model->get_input_names()) {
+            size_t input_frame_size = infer_model->input(input_name)->get_frame_size();
+            auto input_buffer = page_aligned_alloc(input_frame_size);
+            auto status = bindings.input(input_name)->set_buffer(MemoryView(input_buffer.get(), input_frame_size));
+            if (HAILO_SUCCESS != status) {
+                throw hailort_error(status, "Failed to set infer input buffer");
+            }
+
+            buffer_guards.push_back(input_buffer);
         }
 
-        buffer_guards.push_back(input_buffer);
-    }
+        for (const auto &output_name : infer_model->get_output_names()) {
+            size_t output_frame_size = infer_model->output(output_name)->get_frame_size();
+            auto output_buffer = page_aligned_alloc(output_frame_size);
+            auto status = bindings.output(output_name)->set_buffer(MemoryView(output_buffer.get(), output_frame_size));
+            if (HAILO_SUCCESS != status) {
+                throw hailort_error(status, "Failed to set infer output buffer");
+            }
+
+            buffer_guards.push_back(output_buffer);
+        }
+        std::cout << "ConfiguredInferModel::Bindings created and configured" << std::endl;
 
-    for (const auto &output_name : infer_model->get_output_names()) {
-        size_t output_frame_size = infer_model->output(output_name)->get_frame_size();
-        auto output_buffer = page_aligned_alloc(output_frame_size);
-        auto status = bindings->output(output_name)->set_buffer(MemoryView(output_buffer.get(), output_frame_size));
+        std::cout << "Running inference..." << std::endl;
+        // Run the async infer job
+        auto job = configured_infer_model.run_async(bindings).expect("Failed to start async infer job");
+        auto status = job.wait(std::chrono::milliseconds(1000));
         if (HAILO_SUCCESS != status) {
-            std::cerr << "Failed to set infer output buffer, status = " << status << std::endl;
-            return status;
+            throw hailort_error(status, "Failed to wait for infer to finish");
         }
 
-        buffer_guards.push_back(output_buffer);
-    }
-    std::cout << "ConfiguredInferModel::Bindings created and configured" << std::endl;
-
-    std::cout << "Running inference..." << std::endl;
-    // Run the async infer job.
-    auto job = configured_infer_model->run_async(bindings.value());
-    if (!job) {
-        std::cerr << "Failed to start async infer job, status = " << job.status() << std::endl;
-        return job.status();
-    }
-
-    auto status = job->wait(std::chrono::milliseconds(1000));
-    if (HAILO_SUCCESS != status) {
-        std::cerr << "Failed to wait for infer to finish, status = " << status << std::endl;
-        return status;
-    }
-
-    std::cout << "Inference finished successfully" << std::endl;
+        std::cout << "Inference finished successfully" << std::endl;
+    } catch (const hailort_error &exception) {
+        std::cout << "Failed to run inference. status=" << exception.status() << ", error message: " << exception.what() << std::endl;
+        return exception.status();
+    };
+
     return HAILO_SUCCESS;
 }
diff --git a/hailort/libhailort/examples/cpp/infer_pipeline_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/infer_pipeline_example/CMakeLists.txt
index 5c5265c3..7233f146 100644
--- a/hailort/libhailort/examples/cpp/infer_pipeline_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/infer_pipeline_example/CMakeLists.txt
@@ -1,6 +1,6 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
-find_package(HailoRT 4.18.0 EXACT REQUIRED)
+find_package(HailoRT 4.19.0 EXACT REQUIRED)
 
 add_executable(cpp_infer_pipeline_example infer_pipeline_example.cpp)
 target_link_libraries(cpp_infer_pipeline_example PRIVATE HailoRT::libhailort)
diff --git a/hailort/libhailort/examples/cpp/infer_pipeline_example/infer_pipeline_example.cpp b/hailort/libhailort/examples/cpp/infer_pipeline_example/infer_pipeline_example.cpp
index 2700e026..250bfc13 100644
--- a/hailort/libhailort/examples/cpp/infer_pipeline_example/infer_pipeline_example.cpp
+++ b/hailort/libhailort/examples/cpp/infer_pipeline_example/infer_pipeline_example.cpp
@@ -1,12 +1,12 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2020-2024 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
  * @file infer_pipeline_example.cpp
  * This example demonstrates the basic data-path on HailoRT using the high level API - Virtual Stream Pipeline.
- * The program creates a device according to the provided IP address, generates a random dataset,
- * and runs it through the device with virtual streams pipeline.
+ * The program creates a virtual device, generates a random dataset,
+ * and runs it through a Hailo device with virtual streams pipeline.
  **/
 
 #include "hailo/hailort.hpp"
@@ -18,23 +18,21 @@
 constexpr size_t FRAMES_COUNT = 100;
 constexpr hailo_format_type_t FORMAT_TYPE = HAILO_FORMAT_TYPE_AUTO;
 
-#define USAGE_ERROR_MSG ("Args parsing error.\nUsage: infer_pipeline_example <ip_address>\n")
-
 using namespace hailort;
 
-Expected<std::shared_ptr<ConfiguredNetworkGroup>> configure_network_group(Device &device)
+Expected<std::shared_ptr<ConfiguredNetworkGroup>> configure_network_group(VDevice &vdevice)
 {
     auto hef = Hef::create(HEF_FILE);
     if (!hef) {
         return make_unexpected(hef.status());
     }
 
-    auto configure_params = device.create_configure_params(hef.value());
+    auto configure_params = vdevice.create_configure_params(hef.value());
     if (!configure_params) {
         return make_unexpected(configure_params.status());
     }
 
-    auto network_groups = device.configure(hef.value(), configure_params.value());
+    auto network_groups = vdevice.configure(hef.value(), configure_params.value());
     if (!network_groups) {
         return make_unexpected(network_groups.status());
     }
@@ -80,30 +78,15 @@ hailo_status infer(InferVStreams &pipeline)
     return status;
 }
 
-Expected<std::string> parse_arguments(int argc, char **argv)
-{
-    if (2 != argc) {
-        std::cerr << USAGE_ERROR_MSG << std::endl;
-        return make_unexpected(HAILO_INVALID_ARGUMENT);
-    }
-    return std::string(argv[1]);
-}
-
-int main(int argc, char **argv)
+int main()
 {
-    auto device_ip = parse_arguments(argc, argv);
-    if (!device_ip) {
-        std::cerr << "Failed parsing arguments " << device_ip.status() << std::endl;
-        return device_ip.status();
+    auto vdevice = VDevice::create();
+    if (!vdevice) {
+        std::cerr << "Failed to create vdevice, status = " << vdevice.status() << std::endl;
+        return vdevice.status();
     }
 
-    auto device = Device::create_eth(device_ip.value());
-    if (!device) {
-        std::cerr << "Failed create_eth " << device.status() << std::endl;
-        return device.status();
-    }
-
-    auto network_group = configure_network_group(*device.value());
+    auto network_group = configure_network_group(*vdevice.value());
     if (!network_group) {
         std::cerr << "Failed to configure network group " << HEF_FILE << std::endl;
         return network_group.status();
@@ -121,12 +104,6 @@ int main(int argc, char **argv)
         return output_params.status();
     }
 
-    auto activated_network_group = network_group.value()->activate();
-    if (!activated_network_group) {
-        std::cerr << "Failed activated network group "  << activated_network_group.status();
-        return activated_network_group.status();
-    }
-
     auto pipeline = InferVStreams::create(*network_group.value(), input_params.value(), output_params.value());
     if (!pipeline) {
         std::cerr << "Failed to create inference pipeline " << pipeline.status() << std::endl;
diff --git a/hailort/libhailort/examples/cpp/multi_device_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/multi_device_example/CMakeLists.txt
index 49681ec0..0dfc21f9 100644
--- a/hailort/libhailort/examples/cpp/multi_device_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/multi_device_example/CMakeLists.txt
@@ -1,9 +1,9 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.18.0 EXACT REQUIRED)
+find_package(HailoRT 4.19.0 EXACT REQUIRED)
 
 add_executable(cpp_multi_device_example multi_device_example.cpp)
 target_link_libraries(cpp_multi_device_example PRIVATE HailoRT::libhailort Threads::Threads)
diff --git a/hailort/libhailort/examples/cpp/multi_network_vstream_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/multi_network_vstream_example/CMakeLists.txt
index 6bf25bb5..f7508c70 100644
--- a/hailort/libhailort/examples/cpp/multi_network_vstream_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/multi_network_vstream_example/CMakeLists.txt
@@ -1,9 +1,9 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 find_package(Threads REQUIRED)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 
-find_package(HailoRT 4.18.0 EXACT REQUIRED)
+find_package(HailoRT 4.19.0 EXACT REQUIRED)
 
 add_executable(cpp_multi_network_vstream_example multi_network_vstream_example.cpp)
 target_link_libraries(cpp_multi_network_vstream_example PRIVATE HailoRT::libhailort Threads::Threads)
diff --git a/hailort/libhailort/examples/cpp/multi_process_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/multi_process_example/CMakeLists.txt
index 7ddec347..b0106f1e 100644
--- a/hailort/libhailort/examples/cpp/multi_process_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/multi_process_example/CMakeLists.txt
@@ -1,9 +1,9 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.18.0 EXACT REQUIRED)
+find_package(HailoRT 4.19.0 EXACT REQUIRED)
 
 add_executable(cpp_multi_process_example multi_process_example.cpp)
 target_link_libraries(cpp_multi_process_example PRIVATE HailoRT::libhailort Threads::Threads)
diff --git a/hailort/libhailort/examples/cpp/notification_callback_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/notification_callback_example/CMakeLists.txt
index c57af5a7..dbda67d4 100644
--- a/hailort/libhailort/examples/cpp/notification_callback_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/notification_callback_example/CMakeLists.txt
@@ -1,6 +1,6 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
-find_package(HailoRT 4.18.0 EXACT REQUIRED)
+find_package(HailoRT 4.19.0 EXACT REQUIRED)
 
 add_executable(cpp_notification_callback_example notification_callback_example.cpp)
 target_link_libraries(cpp_notification_callback_example PRIVATE HailoRT::libhailort)
diff --git a/hailort/libhailort/examples/cpp/power_measurement_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/power_measurement_example/CMakeLists.txt
index 939d60cf..ab02e0b1 100644
--- a/hailort/libhailort/examples/cpp/power_measurement_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/power_measurement_example/CMakeLists.txt
@@ -1,6 +1,6 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
-find_package(HailoRT 4.18.0 EXACT REQUIRED)
+find_package(HailoRT 4.19.0 EXACT REQUIRED)
 
 add_executable(cpp_power_measurement_example power_measurement_example.cpp)
 target_link_libraries(cpp_power_measurement_example PRIVATE HailoRT::libhailort)
diff --git a/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/CMakeLists.txt
index a7e276ce..3fd78325 100644
--- a/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/CMakeLists.txt
@@ -1,9 +1,9 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.18.0 EXACT REQUIRED)
+find_package(HailoRT 4.19.0 EXACT REQUIRED)
 
 add_executable(cpp_raw_async_streams_multi_thread_example raw_async_streams_multi_thread_example.cpp)
 target_link_libraries(cpp_raw_async_streams_multi_thread_example PRIVATE HailoRT::libhailort Threads::Threads)
diff --git a/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/CMakeLists.txt
index 97f42018..353ea05a 100644
--- a/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/CMakeLists.txt
@@ -1,9 +1,9 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.18.0 EXACT REQUIRED)
+find_package(HailoRT 4.19.0 EXACT REQUIRED)
 
 add_executable(cpp_raw_async_streams_single_thread_example raw_async_streams_single_thread_example.cpp)
 target_link_libraries(cpp_raw_async_streams_single_thread_example PRIVATE HailoRT::libhailort Threads::Threads)
diff --git a/hailort/libhailort/examples/cpp/raw_streams_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/raw_streams_example/CMakeLists.txt
index 59920d70..39f68b71 100644
--- a/hailort/libhailort/examples/cpp/raw_streams_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/raw_streams_example/CMakeLists.txt
@@ -1,9 +1,9 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.18.0 EXACT REQUIRED)
+find_package(HailoRT 4.19.0 EXACT REQUIRED)
 
 add_executable(cpp_raw_streams_example raw_streams_example.cpp)
 target_link_libraries(cpp_raw_streams_example PRIVATE HailoRT::libhailort Threads::Threads)
diff --git a/hailort/libhailort/examples/cpp/switch_network_groups_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/switch_network_groups_example/CMakeLists.txt
index 27f6f1b9..a7adf0d2 100644
--- a/hailort/libhailort/examples/cpp/switch_network_groups_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/switch_network_groups_example/CMakeLists.txt
@@ -1,9 +1,9 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.18.0 EXACT REQUIRED)
+find_package(HailoRT 4.19.0 EXACT REQUIRED)
 
 add_executable(cpp_switch_network_groups_example switch_network_groups_example.cpp)
 target_link_libraries(cpp_switch_network_groups_example PRIVATE HailoRT::libhailort Threads::Threads)
diff --git a/hailort/libhailort/examples/cpp/switch_network_groups_manually_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/switch_network_groups_manually_example/CMakeLists.txt
index 33b5fe02..5d6f7098 100644
--- a/hailort/libhailort/examples/cpp/switch_network_groups_manually_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/switch_network_groups_manually_example/CMakeLists.txt
@@ -1,9 +1,9 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 find_package(Threads REQUIRED)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 
-find_package(HailoRT 4.18.0 EXACT REQUIRED)
+find_package(HailoRT 4.19.0 EXACT REQUIRED)
 
 add_executable(cpp_switch_network_groups_manually_example switch_network_groups_manually_example.cpp)
 target_link_libraries(cpp_switch_network_groups_manually_example PRIVATE HailoRT::libhailort Threads::Threads)
diff --git a/hailort/libhailort/examples/cpp/vstreams_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/vstreams_example/CMakeLists.txt
index 601d3180..18c9d3df 100644
--- a/hailort/libhailort/examples/cpp/vstreams_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/vstreams_example/CMakeLists.txt
@@ -1,9 +1,9 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.18.0 EXACT REQUIRED)
+find_package(HailoRT 4.19.0 EXACT REQUIRED)
 
 add_executable(cpp_vstreams_example vstreams_example.cpp)
 target_link_libraries(cpp_vstreams_example PRIVATE HailoRT::libhailort Threads::Threads)
diff --git a/hailort/libhailort/hef.proto b/hailort/libhailort/hef.proto
index 1b65d2a8..7e7d4cc1 100644
--- a/hailort/libhailort/hef.proto
+++ b/hailort/libhailort/hef.proto
@@ -33,7 +33,7 @@ enum ProtoHEFExtensionType {
     TEST_ONE = 7;
     MULTI_NETWORK_VARIABLE_BATCH_SIZE = 8;
     IS_NMS_MULTI_CONTEXT = 9;
-    OFFLOAD_ARGMAX = 10;
+    OFFLOAD_ARGMAX = 10;    // Deprecated
     HW_PADDING = 11;
     KO_RUN_ASAP = 12;
     HAILO_NET_FLOW = 13;
@@ -577,7 +577,7 @@ message ProtoHEFAction {
         ProtoHEFActionNone none = 9;
         ProtoHEFActionAllowInputDataflow allow_input_dataflow = 10;
         ProtoHEFActionWaitForModuleConfigDone wait_for_module_config_done = 11;
-        ProtoHEFActionDebugSleep debug_sleep = 12;
+        ProtoHEFActionDebug debug = 12;
         ProtoHEFActionEnableNMS enable_nms = 13;
         ProtoHEFActionWriteDataByType write_data_by_type = 14;
         ProtoHEFActionSwitchLcuBatch switch_lcu_batch = 15;
@@ -610,6 +610,15 @@ message ProtoHEFActionWriteDataCcw {
     uint32 cfg_channel_index = 2;
 }
 
+enum ProtoHEFDebugType {
+    SLEEP = 0;
+    HALT = 1;
+};
+
+//halt actions, to stop the system
+message ProtoHEFActionDebugHalt {
+}
+
 message ProtoHEFActionWriteCompressedData {
     // The address to write the data to
     uint64 address = 1;
@@ -623,6 +632,16 @@ message ProtoHEFActionDebugSleep {
     uint64 duration_in_usec = 1;
 }
 
+
+message ProtoHEFActionDebug {
+    ProtoHEFDebugType type = 1;
+    oneof action {
+        ProtoHEFActionDebugSleep sleep = 2;
+        ProtoHEFActionDebugHalt halt = 3;
+    }
+}
+
+
 enum ProtoHEFWriteDataType {
     DATA_FROM_ACTION = 0;
     BATCH_SIZE = 1;
@@ -925,7 +944,7 @@ message ProtoHEFEdgeLayerBase {
     uint32 data_bytes = 12;
 
     repeated ProtoHEFResourceIndices buffer_indices = 13;
-    bool host_argmax = 14;
+    bool host_argmax = 14;  // Deprecated
     uint32 max_shmifo_size = 15;
     uint32 engine_id = 16;
 }
diff --git a/hailort/libhailort/include/hailo/buffer.hpp b/hailort/libhailort/include/hailo/buffer.hpp
index 08c6b136..e9be816d 100644
--- a/hailort/libhailort/include/hailo/buffer.hpp
+++ b/hailort/libhailort/include/hailo/buffer.hpp
@@ -36,10 +36,16 @@ struct HAILORTAPI BufferStorageParams
 public:
 
     static BufferStorageParams create_dma();
+    static BufferStorageParams create_shared_memory(const std::string &shm_name, bool memory_owner = true);
+    static BufferStorageParams open_shared_memory(const std::string &shm_name);
     // Defaults to heap params
     BufferStorageParams();
 
     hailo_buffer_flags_t flags;
+
+    // params for shared_memory_buffer
+    std::string shared_memory_name;
+    bool memory_owner;
 };
 
 class HAILORTAPI Buffer final
diff --git a/hailort/libhailort/include/hailo/device.hpp b/hailort/libhailort/include/hailo/device.hpp
index f5b421d1..921c71cd 100644
--- a/hailort/libhailort/include/hailo/device.hpp
+++ b/hailort/libhailort/include/hailo/device.hpp
@@ -48,6 +48,13 @@ class HAILORTAPI Device
         INTEGRATED
     };
 
+    /** The device supported capabilities */
+    struct Capabilities {
+        bool power_measurements;
+        bool current_measurements;
+        bool temperature_measurements;
+    };
+
     /**
      * Returns the device_id string on all available devices in the system.
      * The device id is a unique identitier for the device on the system.
@@ -239,7 +246,7 @@ class HAILORTAPI Device
      * @return Upon success, returns Expected of ::hailo_device_identity_t.
      *         Otherwise, returns Unexpected of ::hailo_status error.
      */
-    Expected<hailo_device_identity_t> identify();
+    virtual Expected<hailo_device_identity_t> identify();
 
     /**
      * Receive information about the core cpu.
@@ -255,7 +262,7 @@ class HAILORTAPI Device
      * @return Upon success, returns Expected of ::hailo_extended_device_information_t containing the extended information about the device.
      *         Otherwise, returns Unexpected of ::hailo_status error.
      */
-    Expected<hailo_extended_device_information_t> get_extended_device_information();
+    virtual Expected<hailo_extended_device_information_t> get_extended_device_information();
 
     /**
      * Configure fw logger level and interface of sending.
@@ -768,6 +775,13 @@ class HAILORTAPI Device
      */
     virtual hailo_status dma_unmap_dmabuf(int dmabuf_fd, size_t size, hailo_dma_buffer_direction_t direction);
 
+    /**
+     * Gets a struct specifying the device's capabilities.
+     *
+     * @return Upon success, returns Expected of Capabilities.
+     *         Otherwise, returns Unexpected of ::hailo_status error.
+     */
+    Expected<Capabilities> get_capabilities();
 
     virtual hailo_status direct_write_memory(uint32_t address, const void *buffer, uint32_t size);
     virtual hailo_status direct_read_memory(uint32_t address, void *buffer, uint32_t size);
@@ -778,7 +792,7 @@ class HAILORTAPI Device
     // The sum of the number of contexts will fit in uint8_t
     Expected<std::vector<uint8_t>> get_number_of_dynamic_contexts_per_network_group();
     Expected<Buffer> download_context_action_list(uint32_t network_group_id, uint8_t context_type,
-        uint16_t context_index, uint32_t *base_address, uint32_t *batch_counter, uint16_t max_size = 10000);
+        uint16_t context_index, uint32_t *base_address, uint32_t *batch_counter, uint32_t *idle_time_local, uint16_t max_size = 10000);
     // The batch configured is reset between network groups
     hailo_status set_context_action_list_timestamp_batch(uint16_t batch_index);
     hailo_status set_context_switch_breakpoint(uint8_t breakpoint_id, bool break_at_any_network_group_index,
@@ -819,6 +833,8 @@ class HAILORTAPI Device
 private:
     uint32_t get_control_sequence();
     bool is_control_version_supported();
+    Expected<bool> has_INA231_H8();
+    Expected<bool> has_INA231_H15();
 
     friend class Control;
 };
diff --git a/hailort/libhailort/include/hailo/expected.hpp b/hailort/libhailort/include/hailo/expected.hpp
index 13dcaff2..23ef181f 100644
--- a/hailort/libhailort/include/hailo/expected.hpp
+++ b/hailort/libhailort/include/hailo/expected.hpp
@@ -167,11 +167,52 @@
 #include <utility>
 #include <type_traits>
 
+#ifdef __cpp_exceptions
+#include <stdexcept>
+#include <sstream>
+/** hailort namespace */
+namespace hailort
+{
+
+/*! hailort_error is an Exception object that inherits from std::runtime_error.
+    Using this class requires compilation with exceptions */
+class hailort_error : public std::runtime_error
+{
+public:
+    template<typename... Args>
+    hailort_error(hailo_status status, Args&&... args) :
+        std::runtime_error(std::forward<Args>(args)...), m_status(status)
+    {}
+
+    /**
+     * Returns the error status that caused this exception.
+     */
+    hailo_status status() const {
+        return m_status;
+    }
 
+private:
+    hailo_status m_status;
+};
+#else
 /** hailort namespace */
 namespace hailort
 {
 
+/*! hailort_error is an Exception object that inherits from std::runtime_error.
+    Using this class requires compilation with exceptions */
+class hailort_error {
+public:
+
+    /**
+     * Returns the error status that caused this exception.
+     */
+    hailo_status status() const {
+        return HAILO_INVALID_OPERATION;
+    }
+};
+#endif
+
 // TODO(oro): constexpr
 // TODO(oro): noexcept
 // TODO(oro): std::is_default_constructible
@@ -429,6 +470,41 @@ class Expected final
         return tmp;
     }
 
+#ifdef __cpp_exceptions
+
+    /**
+     * If the object contains a value, releases ownership of the stored value by returning its value and making this object Unexpected.
+     * If the object is Unexpected, throws an exception of type hailort_error.
+     * @note Using this method requires compilation with exceptions.
+     */
+    T expect(const std::string &msg) &&
+    {
+        if (!has_value()) {
+            std::stringstream ss;
+            ss << "Expected::expect() failed with status=";
+            ss << status();
+            ss << ". ";
+            ss << msg;
+            throw hailort_error(status(), ss.str());
+        }
+        return release();
+    }
+#else
+    template <typename...>
+    struct always_false { static constexpr bool value = false; };
+    template <typename... Args>
+    /**
+     * If the object contains a value, releases ownership of the stored value by returning its value and making this object Unexpected.
+     * If the object is Unexpected, throws an exception of type hailort_error.
+     * @note Using this method requires compilation with exceptions.
+     */
+    T expect(Args &&...)
+    {
+        static_assert(always_false<Args...>::value, "Expected::expect() can't be used since exceptions are disabled.");
+    }
+#endif
+
+
     /**
      * Pointer of the contained value
      */
diff --git a/hailort/libhailort/include/hailo/hailort.h b/hailort/libhailort/include/hailo/hailort.h
index 98500498..b2817652 100644
--- a/hailort/libhailort/include/hailo/hailort.h
+++ b/hailort/libhailort/include/hailo/hailort.h
@@ -421,7 +421,7 @@ typedef enum hailo_device_architecture_e {
     HAILO_ARCH_PLUTO,
     HAILO_ARCH_HAILO15M,
     HAILO_ARCH_HAILO10H,
-    
+
     /** Max enum value to maintain ABI Integrity */
     HAILO_ARCH_MAX_ENUM = HAILO_MAX_ENUM
 } hailo_device_architecture_t;
@@ -771,12 +771,6 @@ typedef enum {
      */
     HAILO_FORMAT_FLAGS_TRANSPOSED          = 1 << 1,
 
-    /**
-     * If set, argmax will be called on the feature dimension.
-     * Only set on device side.
-     */
-    HAILO_FORMAT_FLAGS_HOST_ARGMAX         = 1 << 2,
-
     /** Max enum value to maintain ABI Integrity */
     HAILO_FORMAT_FLAGS_MAX_ENUM             = HAILO_MAX_ENUM
 } hailo_format_flags_t;
@@ -838,9 +832,10 @@ typedef enum {
 // **************************************************************************************** //
 /** Hailo buffer flags */
 typedef enum {
-    HAILO_BUFFER_FLAGS_NONE         = 0,            /*!< No flags - heap allocated buffer */
-    HAILO_BUFFER_FLAGS_DMA          = 1 << 0,       /*!< Buffer is mapped to DMA (will be page aligned implicitly) */
-    HAILO_BUFFER_FLAGS_CONTINUOUS   = 1 << 1,       /*!< Buffer is physically continuous (will be page aligned implicitly) */
+    HAILO_BUFFER_FLAGS_NONE             = 0,        /*!< No flags - heap allocated buffer */
+    HAILO_BUFFER_FLAGS_DMA              = 1 << 0,   /*!< Buffer is mapped to DMA (will be page aligned implicitly) */
+    HAILO_BUFFER_FLAGS_CONTINUOUS       = 1 << 1,   /*!< Buffer is physically continuous (will be page aligned implicitly) */
+    HAILO_BUFFER_FLAGS_SHARED_MEMORY    = 1 << 2,   /*!< Buffer is shared memory (will be page aligned implicitly) */
 
     /** Max enum value to maintain ABI Integrity */
     HAILO_BUFFER_FLAGS_MAX_ENUM     = HAILO_MAX_ENUM
diff --git a/hailort/libhailort/include/hailo/hailort_common.hpp b/hailort/libhailort/include/hailo/hailort_common.hpp
index 2f11231e..474357be 100644
--- a/hailort/libhailort/include/hailo/hailort_common.hpp
+++ b/hailort/libhailort/include/hailo/hailort_common.hpp
@@ -425,10 +425,6 @@ class HAILORTAPI HailoRTCommon final
     static Expected<hailo_device_id_t> to_device_id(const std::string &device_id);
     static Expected<std::vector<hailo_device_id_t>> to_device_ids_vector(const std::vector<std::string> &device_ids_str);
     static Expected<hailo_pix_buffer_t> as_hailo_pix_buffer(MemoryView memory_view, hailo_format_order_t order);
-
-    static bool is_power_measurement_supported(const hailo_device_architecture_t &hw_arch);
-    static bool is_current_measurement_supported(const hailo_device_architecture_t &hw_arch);
-    static bool is_temp_measurement_supported(const hailo_device_architecture_t &hw_arch);
 };
 
 #ifndef HAILO_EMULATOR
diff --git a/hailort/libhailort/include/hailo/hailort_dma-heap.h b/hailort/libhailort/include/hailo/hailort_dma-heap.h
index 1992c766..55298b9e 100644
--- a/hailort/libhailort/include/hailo/hailort_dma-heap.h
+++ b/hailort/libhailort/include/hailo/hailort_dma-heap.h
@@ -10,6 +10,8 @@
 #ifndef _HAILO_HAILORT_DMAHEAP_H
 #define _HAILO_HAILORT_DMAHEAP_H
 
+#if defined(__linux__)
+
 #include <linux/ioctl.h>
 #include <linux/types.h>
 
@@ -52,4 +54,6 @@ struct dma_heap_allocation_data {
 #define DMA_HEAP_IOCTL_ALLOC	_IOWR(DMA_HEAP_IOC_MAGIC, 0x0,\
 				      struct dma_heap_allocation_data)
 
-#endif /* _HAILO_HAILORT_DMAHEAP_H */
+#endif /* __linux__ */
+
+#endif /* _HAILO_HAILORT_DMAHEAP_H */
\ No newline at end of file
diff --git a/hailort/libhailort/include/hailo/infer_model.hpp b/hailort/libhailort/include/hailo/infer_model.hpp
index 589a2fde..fc72d51b 100644
--- a/hailort/libhailort/include/hailo/infer_model.hpp
+++ b/hailort/libhailort/include/hailo/infer_model.hpp
@@ -26,7 +26,7 @@ class AsyncInferRunnerImpl;
 class HAILORTAPI AsyncInferJob
 {
 public:
-    AsyncInferJob() = default;
+    AsyncInferJob() : m_should_wait_in_dtor(false) {};
     virtual ~AsyncInferJob();
 
     AsyncInferJob(const AsyncInferJob &other) = delete;
@@ -72,6 +72,8 @@ class HAILORTAPI ConfiguredInferModel
     {
     public:
         Bindings() = default;
+        Bindings(const Bindings &other);
+        Bindings &operator=(const Bindings &other);
 
         /** Holds the input and output buffers of the Bindings infer request */
         class HAILORTAPI InferStream
@@ -98,7 +100,7 @@ class HAILORTAPI ConfiguredInferModel
             * Otherwise, returns Unexpected of ::hailo_status error.
             * @note If buffer type is not MemoryView, will return ::HAILO_INVALID_OPERATION.
             */
-            Expected<MemoryView> get_buffer();
+            Expected<MemoryView> get_buffer() const;
 
             /**
              * Sets the edge's buffer to a new one, of type hailo_pix_buffer_t.
@@ -117,7 +119,7 @@ class HAILORTAPI ConfiguredInferModel
             * Otherwise, returns Unexpected of ::hailo_status error.
             * @note If buffer type is not ::hailo_pix_buffer_t, will return ::HAILO_INVALID_OPERATION.
             */
-            Expected<hailo_pix_buffer_t> get_pix_buffer();
+            Expected<hailo_pix_buffer_t> get_pix_buffer() const;
 
             /**
              * Sets the edge's buffer from a DMA buffer.
@@ -134,11 +136,14 @@ class HAILORTAPI ConfiguredInferModel
             * @note If buffer type is not ::hailo_dma_buffer_t, will return ::HAILO_INVALID_OPERATION.
             * @note Supported on Linux only.
             */
-            Expected<hailo_dma_buffer_t> get_dma_buffer();
+            Expected<hailo_dma_buffer_t> get_dma_buffer() const;
 
         private:
             friend class ConfiguredInferModelBase;
             friend class AsyncInferRunnerImpl;
+            friend class Bindings;
+
+            Expected<InferStream> inner_copy() const;
 
             class Impl;
             InferStream(std::shared_ptr<Impl> pimpl);
@@ -179,9 +184,45 @@ class HAILORTAPI ConfiguredInferModel
          */
         Expected<InferStream> output(const std::string &name);
 
+        /**
+         * Returns the single input's InferStream object, as readonly.
+         *
+         * @return Upon success, returns Expected of the single input's InferStream object. Otherwise, returns Unexpected of ::hailo_status error.
+         * @note If Bindings has multiple inputs, will return ::HAILO_INVALID_OPERATION.
+         *  In that case - use input(const std::string &name) instead.
+         */
+        Expected<InferStream> input() const;
+
+        /**
+         * Returns the single output's InferStream object, as readonly.
+         *
+         * @return Upon success, returns Expected of the single output's InferStream object. Otherwise, returns Unexpected of ::hailo_status error.
+         * @note If Bindings has multiple outputs, will return ::HAILO_INVALID_OPERATION.
+         *  In that case - use output(const std::string &name) instead.
+         */
+        Expected<InferStream> output() const;
+
+        /**
+         * Gets an input's InferStream object, as readonly.
+         *
+         * @param[in] name                    The name of the input edge.
+         * @return Upon success, returns Expected of the relevant InferStream object. Otherwise, returns a ::hailo_status error.
+         */
+        Expected<InferStream> input(const std::string &name) const;
+
+        /**
+         * Gets an output's InferStream object, as readonly.
+         *
+         * @param[in] name                    The name of the output edge.
+         * @return Upon success, returns Expected of the relevant InferStream object. Otherwise, returns a ::hailo_status error.
+         */
+        Expected<InferStream> output(const std::string &name) const;
+
     private:
         friend class ConfiguredInferModelBase;
 
+        void init_bindings_from(const Bindings &other);
+
         Bindings(std::unordered_map<std::string, InferStream> &&inputs,
             std::unordered_map<std::string, InferStream> &&outputs);
 
@@ -197,8 +238,9 @@ class HAILORTAPI ConfiguredInferModel
     Expected<Bindings> create_bindings();
 
     /**
-     * Waits until the model is ready to launch a new asynchronous inference operation.
-     * The readiness of the model is determined by the ability to push buffers to the asynchronous inference pipeline.
+     * The readiness of the model to launch is determined by the ability to push buffers to the asynchronous inference pipeline.
+     * If the model is ready, the method will return immediately.
+     * If the model is not ready, the method will wait for the model to be ready.
      *
      * @param[in] timeout           Amount of time to wait until the model is ready in milliseconds.
      * @param[in] frames_count      The count of buffers you intent to infer in the next request. Useful for batch inference.
@@ -206,6 +248,8 @@ class HAILORTAPI ConfiguredInferModel
      * @return Upon success, returns ::HAILO_SUCCESS. Otherwise:
      *           - If @a timeout has passed and the model is not ready, returns ::HAILO_TIMEOUT.
      *           - In any other error case, returns ::hailo_status error.
+     *
+     * @note Calling this function with frames_count greater than get_async_queue_size() will timeout.
      */
     hailo_status wait_for_async_ready(std::chrono::milliseconds timeout, uint32_t frames_count = 1);
 
@@ -233,7 +277,7 @@ class HAILORTAPI ConfiguredInferModel
      * @return Upon success, returns ::HAILO_SUCCESS.
      *  Otherwise, returns Unexpected of ::hailo_status error.
      */
-    hailo_status run(Bindings bindings, std::chrono::milliseconds timeout);
+    hailo_status run(const Bindings &bindings, std::chrono::milliseconds timeout);
 
     /**
      * Launches an asynchronous inference operation with the provided bindings.
@@ -245,9 +289,10 @@ class HAILORTAPI ConfiguredInferModel
      * @return Upon success, returns an instance of Expected<AsyncInferJob> representing the launched job.
      *  Otherwise, returns Unexpected of ::hailo_status error, and the interface shuts down completly.
      * @note @a callback should execute as quickly as possible.
-     * @note The bindings' buffers should be kept intact until the async job is completed
+     * @note The bindings' buffers should be kept intact until the async job is completed.
+     * @note To ensure the inference pipeline can handle new buffers, it is recommended to first call \ref wait_for_async_ready
      */
-    Expected<AsyncInferJob> run_async(Bindings bindings,
+    Expected<AsyncInferJob> run_async(const Bindings &bindings,
         std::function<void(const AsyncInferCompletionInfo &)> callback = ASYNC_INFER_EMPTY_CALLBACK);
 
     /**
@@ -260,7 +305,9 @@ class HAILORTAPI ConfiguredInferModel
      *
      * @return Upon success, returns an instance of Expected<AsyncInferJob> representing the launched job.
      *  Otherwise, returns Unexpected of ::hailo_status error, and the interface shuts down completly.
-     * @note The bindings' buffers should be kept intact until the async job is completed
+     * @note @a callback should execute as quickly as possible.
+     * @note The bindings' buffers should be kept intact until the async job is completed.
+     * @note To ensure the inference pipeline can handle new buffers, it is recommended to first call \ref wait_for_async_ready
      */
     Expected<AsyncInferJob> run_async(const std::vector<Bindings> &bindings,
         std::function<void(const AsyncInferCompletionInfo &)> callback = ASYNC_INFER_EMPTY_CALLBACK);
@@ -332,9 +379,7 @@ class HAILORTAPI ConfiguredInferModel
     std::shared_ptr<ConfiguredInferModelBase> m_pimpl;
 };
 
-/**
- * Context passed to the callback function after the asynchronous inference operation was completed or has failed.
- */
+/** Context passed to the callback function after the asynchronous inference operation was completed or has failed. */
 struct HAILORTAPI AsyncInferCompletionInfo
 {
     /**
diff --git a/hailort/libhailort/include/hailo/network_group.hpp b/hailort/libhailort/include/hailo/network_group.hpp
index cfd7f7c7..26eba3be 100644
--- a/hailort/libhailort/include/hailo/network_group.hpp
+++ b/hailort/libhailort/include/hailo/network_group.hpp
@@ -458,6 +458,10 @@ class HAILORTAPI ConfiguredNetworkGroup
     virtual Expected<hailo_cache_info_t> get_cache_info() const = 0;
     virtual hailo_status update_cache_offset(int32_t offset_delta_bytes) = 0;
 
+    virtual Expected<std::vector<uint32_t>> get_cache_ids() const = 0;
+    virtual Expected<Buffer> read_cache_buffer(uint32_t cache_id) = 0;
+    virtual hailo_status write_cache_buffer(uint32_t cache_id, MemoryView buffer) = 0;
+
 protected:
     ConfiguredNetworkGroup();
 
diff --git a/hailort/libhailort/include/hailo/vdevice.hpp b/hailort/libhailort/include/hailo/vdevice.hpp
index e23ce6f4..7af27e7b 100644
--- a/hailort/libhailort/include/hailo/vdevice.hpp
+++ b/hailort/libhailort/include/hailo/vdevice.hpp
@@ -69,25 +69,25 @@ class HAILORTAPI VDevice
      * Creates the infer model from an hef
      *
      * @param[in] hef_path                    A string of an hef file.
-     * @param[in] network_name                A string of the network name (optional).
+     * @param[in] name                        A string of the model name (optional).
      * @return Upon success, returns Expected of a shared pointer of infer model.
      *         Otherwise, returns Unexpected of ::hailo_status error.
      * @note the Hef file must be maintained until the completion of the configuration phase.
      */
     virtual Expected<std::shared_ptr<InferModel>> create_infer_model(const std::string &hef_path,
-        const std::string &network_name = "");
+        const std::string &name = "");
 
     /**
      * Creates the infer model from an hef buffer
      *
      * @param[in] hef_buffer                  A pointer to a buffer containing the hef file.
-     * @param[in] network_name                A string of the network name (optional).
+     * @param[in] name                        A string of the model name (optional).
      * @return Upon success, returns Expected of a shared pointer of infer model.
      *         Otherwise, returns Unexpected of ::hailo_status error.
      * @note the Hef buffer must be maintained until the completion of the configuration phase.
      */
     virtual Expected<std::shared_ptr<InferModel>> create_infer_model(const MemoryView hef_buffer,
-        const std::string &network_name = "");
+        const std::string &name = "");
 
     /**
      * Gets the underlying physical devices.
@@ -128,7 +128,7 @@ class HAILORTAPI VDevice
      *
      * @param[in] hef                         A reference to an Hef object to create configure params by
      * @param[in] network_group_name          Name of network_group to make configure params for.
-     * @return Upon success, returns Expected of a NetworkGroupsParamsMap (map of string and ConfiguredNetworkParams).
+     * @return Upon success, returns Expected of a ConfigureNetworkParams.
      *         Otherwise, returns Unexpected of ::hailo_status error.
      */
     Expected<ConfigureNetworkParams> create_configure_params(Hef &hef, const std::string &network_group_name) const;
@@ -211,6 +211,8 @@ class HAILORTAPI VDevice
     virtual hailo_status after_fork_in_parent();
     virtual hailo_status after_fork_in_child();
 
+    virtual hailo_status add_network_group_ref_count(std::shared_ptr<ConfiguredNetworkGroup> network_group_ptr);
+
     virtual ~VDevice() = default;
     VDevice(const VDevice &) = delete;
     VDevice &operator=(const VDevice &) = delete;
@@ -218,7 +220,7 @@ class HAILORTAPI VDevice
     VDevice &operator=(VDevice &&other) = delete;
 
     static bool service_over_ip_mode();
-    static bool force_hrpc_client();
+    static bool should_force_hrpc_client();
 
 protected:
     VDevice() = default;
diff --git a/hailort/libhailort/src/CMakeLists.txt b/hailort/libhailort/src/CMakeLists.txt
index 51360244..f82907e8 100644
--- a/hailort/libhailort/src/CMakeLists.txt
+++ b/hailort/libhailort/src/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 find_package(Threads REQUIRED)
 
@@ -36,6 +36,7 @@ add_subdirectory(hef)
 add_subdirectory(network_group)
 add_subdirectory(core_op)
 add_subdirectory(net_flow)
+add_subdirectory(rpc_callbacks)
 
 if(HAILO_BUILD_SERVICE)
     add_subdirectory(service)
@@ -81,6 +82,13 @@ else()
         m # libmath
         atomic
     )
+
+    if(NOT CMAKE_SYSTEM_NAME STREQUAL Android)
+        # TODO: HRT-14770 fix android build
+         # for common dir, TODO: Make common directory a library
+        target_link_libraries(libhailort PRIVATE rt)
+    endif()
+
     set(THREADS_PREFER_PTHREAD_FLAG ON)
     # Hack to support cross-compilation - https://stackoverflow.com/a/49086560
     set(THREADS_PTHREAD_ARG "0" CACHE STRING "Result from TRY_RUN" FORCE)
diff --git a/hailort/libhailort/src/core_op/CMakeLists.txt b/hailort/libhailort/src/core_op/CMakeLists.txt
index 432b3e8f..22519672 100644
--- a/hailort/libhailort/src/core_op/CMakeLists.txt
+++ b/hailort/libhailort/src/core_op/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 set(SRC_FILES
     ${CMAKE_CURRENT_SOURCE_DIR}/core_op.cpp
@@ -9,8 +9,7 @@ set(SRC_FILES
     ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/intermediate_buffer.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/cache_buffer.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/channel_allocator.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/action_list_buffer_builder/control_action_list_buffer_builder.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/action_list_buffer_builder/ddr_action_list_buffer_builder.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/action_list_buffer_builder/action_list_buffer_builder.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/periph_calculator.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/internal_buffer_manager.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/internal_buffer_planner.cpp
diff --git a/hailort/libhailort/src/core_op/core_op.cpp b/hailort/libhailort/src/core_op/core_op.cpp
index 87a91e57..131fce54 100644
--- a/hailort/libhailort/src/core_op/core_op.cpp
+++ b/hailort/libhailort/src/core_op/core_op.cpp
@@ -388,18 +388,6 @@ Expected<Buffer> CoreOp::get_intermediate_buffer(const IntermediateBufferKey &)
     return make_unexpected(HAILO_NOT_SUPPORTED);
 }
 
-Expected<Buffer> CoreOp::get_cache_buffer(uint32_t)
-{
-    LOGGER__ERROR("Getting cache buffer is not supported for this core op");
-    return make_unexpected(HAILO_NOT_SUPPORTED);
-}
-
-Expected<std::map<uint32_t, Buffer>> CoreOp::get_cache_buffers()
-{
-    LOGGER__ERROR("Getting cache buffers is not supported for this core op");
-    return make_unexpected(HAILO_NOT_SUPPORTED);
-}
-
 hailo_status CoreOp::wrap_streams_for_remote_process()
 {
     for (auto &input_stream_pair : m_input_streams) {
@@ -500,7 +488,7 @@ Expected<std::shared_ptr<InputStreamBase>> CoreOp::create_input_stream_from_conf
             }
 
         default:
-            LOGGER__ERROR("{} interface is not supported.", stream_params.stream_interface);
+            LOGGER__ERROR("{} interface is not supported.", static_cast<int>(stream_params.stream_interface));
             return make_unexpected(HAILO_NOT_IMPLEMENTED);
     }
 
@@ -612,7 +600,7 @@ Expected<std::shared_ptr<OutputStreamBase>> CoreOp::create_output_stream_from_co
             }
 
         default:
-            LOGGER__ERROR("{} interface is not supported.", stream_params.stream_interface);
+            LOGGER__ERROR("{} interface is not supported.", static_cast<int>(stream_params.stream_interface));
             return make_unexpected(HAILO_NOT_IMPLEMENTED);
     }
 
diff --git a/hailort/libhailort/src/core_op/core_op.hpp b/hailort/libhailort/src/core_op/core_op.hpp
index 9a8968fe..4189f081 100644
--- a/hailort/libhailort/src/core_op/core_op.hpp
+++ b/hailort/libhailort/src/core_op/core_op.hpp
@@ -101,8 +101,6 @@ class CoreOp
     bool is_default_batch_size() const;
 
     virtual Expected<Buffer> get_intermediate_buffer(const IntermediateBufferKey &key);
-    virtual Expected<Buffer> get_cache_buffer(uint32_t cache_id);
-    virtual Expected<std::map<uint32_t, Buffer>> get_cache_buffers();
 
     hailo_status wrap_streams_for_remote_process();
 
@@ -127,6 +125,9 @@ class CoreOp
     virtual hailo_status init_cache(uint32_t read_offset, int32_t write_offset_delta) = 0;
     virtual Expected<hailo_cache_info_t> get_cache_info() const = 0;
     virtual hailo_status update_cache_offset(int32_t offset_delta_bytes) = 0;
+    virtual Expected<std::vector<uint32_t>> get_cache_ids() const = 0;
+    virtual Expected<Buffer> read_cache_buffer(uint32_t cache_id) = 0;
+    virtual hailo_status write_cache_buffer(uint32_t cache_id, MemoryView buffer) = 0;
 
     std::map<std::string, std::shared_ptr<InputStreamBase>> m_input_streams;
     std::map<std::string, std::shared_ptr<OutputStreamBase>> m_output_streams;
diff --git a/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/action_list_buffer_builder.cpp b/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/action_list_buffer_builder.cpp
new file mode 100644
index 00000000..9eae89d7
--- /dev/null
+++ b/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/action_list_buffer_builder.cpp
@@ -0,0 +1,105 @@
+/**
+ * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file action_list_buffer_builder.cpp
+ * @brief Class used to build action list and context buffers to be sent via controls or being written to ddr.
+ **/
+
+#include "action_list_buffer_builder.hpp"
+#include "context_switch_defs.h"
+
+namespace hailort
+{
+
+Expected<std::shared_ptr<ActionListBufferBuilder>> ActionListBufferBuilder::create()
+{
+    return make_shared_nothrow<ActionListBufferBuilder>();
+}
+
+hailo_status ActionListBufferBuilder::build_context(MemoryView action,
+    CONTROL_PROTOCOL__context_switch_context_type_t context_type, bool is_new_context)
+{
+    assert(action.size() < std::numeric_limits<uint32_t>::max());
+    const uint32_t action_size = static_cast<uint32_t>(action.size());
+    const auto should_start_new_control = (is_new_context || !has_space_for_action(action_size));
+
+    if (should_start_new_control) {
+        start_new_control(context_type, is_new_context);
+    }
+
+    auto &control = current_control();
+    memcpy(&control.context_network_data[control.context_network_data_length], action.data(), action_size);
+    control.context_network_data_length += action_size;
+    return HAILO_SUCCESS;
+}
+
+Expected<uint64_t> ActionListBufferBuilder::write_controls_to_ddr(HailoRTDriver &driver)
+{
+    CONTROL_PROTOCOL__context_switch_context_info_chunk_t context_info{};
+    context_info.is_first_chunk_per_context = true;
+    context_info.is_last_chunk_per_context = true;
+    uint64_t dma_address = 0;
+
+    for (const auto &control : m_controls) {
+        if (control.is_first_chunk_per_context) {
+            context_info.context_type = control.context_type;
+            context_info.context_network_data_length = 0;
+        }
+
+        memcpy(&(context_info.context_network_data[context_info.context_network_data_length]),
+            control.context_network_data, control.context_network_data_length);
+        context_info.context_network_data_length += control.context_network_data_length;
+        if (control.is_last_chunk_per_context) {
+            TRY(auto dma_address_ret, driver.write_action_list(reinterpret_cast<uint8_t*>(&context_info),
+                sizeof(CONTROL_PROTOCOL__context_switch_context_info_chunk_t)));
+            // If this is the first write in the context, save the DMA address
+            if (0 == dma_address) {
+                dma_address = dma_address_ret;
+            }
+        }
+    }
+
+    return dma_address;
+}
+
+size_t ActionListBufferBuilder::get_action_list_buffer_size() const {
+    size_t size = 0;
+
+    for (const auto &control : m_controls) {
+        size += control.context_network_data_length;
+    }
+
+    return size;
+}
+
+CONTROL_PROTOCOL__context_switch_context_info_chunk_t &ActionListBufferBuilder::current_control()
+{
+    assert(!m_controls.empty());
+    return m_controls.back();
+}
+
+bool ActionListBufferBuilder::has_space_for_action(uint32_t action_size)
+{
+    auto &control = current_control();
+    return (control.context_network_data_length + action_size) <= CONTROL_PROTOCOL__CONTEXT_NETWORK_DATA_SINGLE_CONTROL_MAX_SIZE;
+}
+
+void ActionListBufferBuilder::start_new_control(CONTROL_PROTOCOL__context_switch_context_type_t context_type,
+    bool is_new_context)
+{
+    if (!is_new_context) {
+        current_control().is_last_chunk_per_context = false;
+    }
+
+    // Creating a new control directly inside the vector to avoid copying the control struct.
+    m_controls.emplace_back();
+    auto &new_control = current_control();
+    new_control.context_network_data_length = 0;
+    new_control.context_type = static_cast<uint8_t>(context_type);
+    new_control.is_first_chunk_per_context = is_new_context;
+    new_control.is_last_chunk_per_context = true;
+}
+
+} /* namespace hailort */
diff --git a/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/action_list_buffer_builder.hpp b/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/action_list_buffer_builder.hpp
index 03b360c4..e0d2b2f9 100644
--- a/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/action_list_buffer_builder.hpp
+++ b/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/action_list_buffer_builder.hpp
@@ -4,11 +4,7 @@
 **/
 /**
  * @file action_list_buffer_builder.hpp
- * @brief Pure virtual class that represents the basic functions and members for building the action list for the FW.
- * Implemented and derived by two different classes:
- * ControlActionListBufferBuilder - uses control messages to send Action list to FW
- * DDRActionListBufferBuilder (only relevant in hailo1x) - Action list is written to M4 mapped memory in DDR - and read
- * from there directly by FW
+ * @brief Class used to build action list and context buffers to be sent via controls or being written to ddr.
  **/
 #ifndef _HAILO_ACTION_LIST_BUFFER_BUILDER_HPP_
 #define _HAILO_ACTION_LIST_BUFFER_BUILDER_HPP_
@@ -16,36 +12,36 @@
 #include "hailo/hailort.h"
 #include "hailo/expected.hpp"
 #include "hailo/buffer.hpp"
+#include "vdma/driver/hailort_driver.hpp"
+#include "control_protocol.h"
+#include "common/internal_env_vars.hpp"
 
 #include <vector>
 
-#include "control_protocol.h"
 
 namespace hailort
 {
 
 class ActionListBufferBuilder {
 public:
-    enum class Type {
-        CONTROL,
-        DDR
-    };
+    static Expected<std::shared_ptr<ActionListBufferBuilder>> create();
 
-    virtual hailo_status write_action(MemoryView action, CONTROL_PROTOCOL__context_switch_context_type_t context_type,
-        bool is_new_context, bool last_action_buffer_in_context) = 0;
+    ActionListBufferBuilder() = default;
+    ~ActionListBufferBuilder() = default;
 
-    virtual uint64_t get_mapped_buffer_dma_address() const = 0;
+    hailo_status build_context(MemoryView action,
+        CONTROL_PROTOCOL__context_switch_context_type_t context_type, bool is_new_context);
+    size_t get_action_list_buffer_size() const;
+    Expected<uint64_t> write_controls_to_ddr(HailoRTDriver &driver);
 
-    ActionListBufferBuilder::Type get_builder_type() const {
-         return m_builder_type;
+    const std::vector<CONTROL_PROTOCOL__context_switch_context_info_chunk_t> &get_controls() const {
+        return m_controls;
     }
-protected:
-    ActionListBufferBuilder(ActionListBufferBuilder::Type builder_type) :
-        m_builder_type(builder_type)
-    {}
-    virtual ~ActionListBufferBuilder() = default;
 private:
-    const ActionListBufferBuilder::Type m_builder_type;
+    void start_new_control(CONTROL_PROTOCOL__context_switch_context_type_t context_type, bool is_new_context);
+    bool has_space_for_action(uint32_t action_size);
+    CONTROL_PROTOCOL__context_switch_context_info_chunk_t &current_control();
+    std::vector<CONTROL_PROTOCOL__context_switch_context_info_chunk_t> m_controls;
 };
 
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/control_action_list_buffer_builder.cpp b/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/control_action_list_buffer_builder.cpp
deleted file mode 100644
index 92ccf303..00000000
--- a/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/control_action_list_buffer_builder.cpp
+++ /dev/null
@@ -1,70 +0,0 @@
-/**
- * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
-/**
- * @file control_action_list_buffer_builder.cpp
- * @brief Class used to build the vector of controls containing the action list content sent to the firmware.
- **/
-
-#include "control_action_list_buffer_builder.hpp"
-
-namespace hailort
-{
-
-ControlActionListBufferBuilder::ControlActionListBufferBuilder() :
-    ActionListBufferBuilder(ActionListBufferBuilder::Type::CONTROL)
-{}
-
-Expected<std::shared_ptr<ControlActionListBufferBuilder>> ControlActionListBufferBuilder::create()
-{
-    return make_shared_nothrow<ControlActionListBufferBuilder>();
-}
-
-hailo_status ControlActionListBufferBuilder::write_action(MemoryView action,
-    CONTROL_PROTOCOL__context_switch_context_type_t context_type, bool is_new_context, bool last_action_buffer_in_context)
-{
-    (void) last_action_buffer_in_context;
-    assert(action.size() < std::numeric_limits<uint32_t>::max());
-    const uint32_t action_size = static_cast<uint32_t>(action.size());
-    const auto should_start_new_control = (is_new_context || !has_space_for_action(action_size));
-    
-    if (should_start_new_control) {
-        start_new_control(context_type, is_new_context);
-    }
-
-    auto &control = current_control();
-    memcpy(&control.context_network_data[control.context_network_data_length], action.data(), action_size);
-    control.context_network_data_length += action_size;
-    return HAILO_SUCCESS;
-}
-
-CONTROL_PROTOCOL__context_switch_context_info_chunk_t &ControlActionListBufferBuilder::current_control()
-{
-    assert(!m_controls.empty());
-    return m_controls.back();
-}
-
-bool ControlActionListBufferBuilder::has_space_for_action(uint32_t action_size)
-{
-    auto &control = current_control();
-    return (control.context_network_data_length + action_size) <= CONTROL_PROTOCOL__CONTEXT_NETWORK_DATA_SINGLE_CONTROL_MAX_SIZE;
-}
-
-void ControlActionListBufferBuilder::start_new_control(CONTROL_PROTOCOL__context_switch_context_type_t context_type,
-    bool is_new_context)
-{
-    if (!is_new_context) {
-        current_control().is_last_chunk_per_context = false;
-    }
-
-    // Creating a new control directly inside the vector to avoid copying the control struct.
-    m_controls.emplace_back();
-    auto &new_control = current_control();
-    new_control.context_network_data_length = 0;
-    new_control.context_type = static_cast<uint8_t>(context_type);
-    new_control.is_first_chunk_per_context = is_new_context;
-    new_control.is_last_chunk_per_context = true;
-}
-
-} /* namespace hailort */
diff --git a/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/control_action_list_buffer_builder.hpp b/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/control_action_list_buffer_builder.hpp
deleted file mode 100644
index d417df58..00000000
--- a/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/control_action_list_buffer_builder.hpp
+++ /dev/null
@@ -1,55 +0,0 @@
-/**
- * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
-/**
- * @file control_action_list_buffer_builder.cpp
- * @brief Class used to build the vector of controls containing the action list content sent to the firmware.
- **/
-
-#ifndef _HAILO_CONTROL_ACTION_LIST_BUFFER_BUILDER_HPP_
-#define _HAILO_CONTROL_ACTION_LIST_BUFFER_BUILDER_HPP_
-
-#include "hailo/hailort.h"
-
-#include "context_switch_defs.h"
-#include "core_op/resource_manager/action_list_buffer_builder/action_list_buffer_builder.hpp"
-
-#include "vdma/channel/channel_id.hpp"
-#include "device_common/control_protocol.hpp"
-#include "hef/layer_info.hpp"
-
-
-namespace hailort
-{
-
-// This class manages a vector of CONTROL_PROTOCOL__context_switch_context_info_chunk_t controls to be sent
-// to the firmware. Actions are written to the control buffer, until we reach the maximum control size, then we will
-// start a new control. 
-class ControlActionListBufferBuilder : public ActionListBufferBuilder {
-public:
-    ControlActionListBufferBuilder();
-    static Expected<std::shared_ptr<ControlActionListBufferBuilder>> create();
-    virtual ~ControlActionListBufferBuilder() = default;
-
-    virtual hailo_status write_action(MemoryView action, CONTROL_PROTOCOL__context_switch_context_type_t context_type,
-        bool is_new_context, bool last_action_buffer_in_context) override;
-
-    virtual uint64_t get_mapped_buffer_dma_address() const override {
-        return CONTEXT_SWITCH_DEFS__INVALID_DDR_CONTEXTS_BUFFER_ADDRESS;
-    }
-
-    const std::vector<CONTROL_PROTOCOL__context_switch_context_info_chunk_t> &get_controls() const {
-        return m_controls;
-    }
-private:
-    CONTROL_PROTOCOL__context_switch_context_info_chunk_t &current_control();
-    bool has_space_for_action(uint32_t action_size);
-    void start_new_control(CONTROL_PROTOCOL__context_switch_context_type_t context_type, bool is_new_context);
-
-    std::vector<CONTROL_PROTOCOL__context_switch_context_info_chunk_t> m_controls;
-};
-
-} /* namespace hailort */
-
-#endif /* _HAILO_CONTROL_ACTION_LIST_BUFFER_BUILDER_HPP_ */
diff --git a/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/ddr_action_list_buffer_builder.cpp b/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/ddr_action_list_buffer_builder.cpp
deleted file mode 100644
index 7794152a..00000000
--- a/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/ddr_action_list_buffer_builder.cpp
+++ /dev/null
@@ -1,98 +0,0 @@
-/**
- * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
-/**
- * @file ddr_action_list_buffer_builder.cpp
- * @brief Class used to build the action list sent to the firmware through DDR.
- **/
-
-#include "ddr_action_list_buffer_builder.hpp"
-#include "common/os_utils.hpp"
-#include "vdma/integrated/integrated_device.hpp"
-
-namespace hailort
-{
-
-// TODO: HRT-12512 : Can remove these variables when / if continuous buffer comes from designated region
-// In hailo15 - the DDR memory range of 0x80000000 - 0x90000000 is mapped to the M4 using a LUT (look up table) to addresses
-// 0x50000000 - 0x60000000, Currently this is the range the CMA allocation should come from seeing as this is one of the first CMA allocations
-// and the linux cma memory pool according to the hailo15 dtsi is - "alloc-ranges = <0 0x80000000 0 0x40000000>"
-// (meaning starts from 0x80000000 and goes for 992 MB) - so anything allocated from 0x90000000 and on ward will be outside the mapped area
-// The solution to this issue is to create a specific range for this allocation inide the mapped area - seeing as this affects other components
-// Like the dsp etc...need to check with them before doing so. For now - this should almost always retirn in the mapped area and we will verify
-// to double check
-
-DDRActionListBufferBuilder::DDRActionListBufferBuilder(void* user_address, uint64_t dma_address) :
-    ActionListBufferBuilder(ActionListBufferBuilder::Type::DDR),
-    m_user_address(user_address),
-    m_dma_address(dma_address),
-    m_write_offset(0),
-    m_current_context_info{}
-{}
-
-bool DDRActionListBufferBuilder::verify_dma_addr(vdma::ContinuousBuffer &buffer)
-{
-    // verify that buffer starts and ends inside mapped range
-    if (buffer.dma_address() < CONTEXT_SWITCH_DEFS__START_M4_MAPPED_DDR_ADDRESS ||
-        (buffer.dma_address() + buffer.size() >= CONTEXT_SWITCH_DEFS__END_M4_MAPPED_DDR_ADDRESS)) {
-        return false;
-    }
-    return true;
-}
-
-Expected<std::shared_ptr<DDRActionListBufferBuilder>> DDRActionListBufferBuilder::create(size_t num_contexts,
-    VdmaDevice &vdma_device)
-{
-    auto integrated_device = dynamic_cast<IntegratedDevice*>(&vdma_device);
-    
-    size_t size_of_contexts = HailoRTCommon::align_to(num_contexts *
-        sizeof(CONTROL_PROTOCOL__context_switch_context_info_chunk_t), OsUtils::get_page_size());
-
-    TRY(auto addr_pair, integrated_device->allocate_infinite_action_list_buffer(size_of_contexts));
-
-    auto ddr_action_list_buiffer_builder = make_shared_nothrow<DDRActionListBufferBuilder>(
-        addr_pair.first, addr_pair.second);
-    CHECK_NOT_NULL_AS_EXPECTED(ddr_action_list_buiffer_builder, HAILO_OUT_OF_HOST_MEMORY);
-    
-    return ddr_action_list_buiffer_builder;
-}
-
-hailo_status DDRActionListBufferBuilder::write_action(MemoryView action,
-    CONTROL_PROTOCOL__context_switch_context_type_t context_type, bool is_new_context, bool is_last_action_in_context)
-{
-    assert(action.size() < std::numeric_limits<uint32_t>::max());
-    const uint32_t action_size = static_cast<uint32_t>(action.size());
-
-    if (is_new_context) {
-        m_current_context_info.is_first_chunk_per_context = true;
-        m_current_context_info.is_last_chunk_per_context = true;
-        m_current_context_info.context_type = static_cast<uint8_t>(context_type);
-        m_current_context_info.context_network_data_length = 0;
-    }
-
-    CHECK(m_current_context_info.context_network_data_length + action_size <=
-        ARRAY_ENTRIES(m_current_context_info.context_network_data), HAILO_INVALID_ARGUMENT,
-        "Context exceeds maximum context size {}", ARRAY_ENTRIES(m_current_context_info.context_network_data));
-
-    // TODO HRT-12788 - make more efficient by writing directly to DDR without using the local context_info_single_control_t
-    memcpy(&(m_current_context_info.context_network_data[m_current_context_info.context_network_data_length]),
-        action.data(), action_size);
-    m_current_context_info.context_network_data_length += action_size;
-
-    if (is_last_action_in_context) {
-        const auto write_size = sizeof(CONTROL_PROTOCOL__context_switch_context_info_chunk_t);
-        memcpy(static_cast<void*>(reinterpret_cast<uint8_t*>(m_user_address) + m_write_offset), &m_current_context_info,
-            write_size);
-        m_write_offset += write_size;
-    }
-
-    return HAILO_SUCCESS;
-}
-
-uint64_t DDRActionListBufferBuilder::get_mapped_buffer_dma_address() const
-{
-    return m_dma_address;
-}
-
-} /* namespace hailort */
\ No newline at end of file
diff --git a/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/ddr_action_list_buffer_builder.hpp b/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/ddr_action_list_buffer_builder.hpp
deleted file mode 100644
index ddb4ee17..00000000
--- a/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/ddr_action_list_buffer_builder.hpp
+++ /dev/null
@@ -1,47 +0,0 @@
-/**
- * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
-/**
- * @file ddr_action_list_buffer_builder.hpp
- * @brief Class used to build the action list sent to the firmware through DDR.
- **/
-#ifndef _HAILO_DDR_ACTION_LIST_BUFFER_BUILDER_HPP_
-#define _HAILO_DDR_ACTION_LIST_BUFFER_BUILDER_HPP_
-
-#include "hailo/hailort.h"
-#include "context_switch_defs.h"
-#include "core_op/resource_manager/action_list_buffer_builder/action_list_buffer_builder.hpp"
-#include "vdma/memory/continuous_buffer.hpp"
-#include "vdma/vdma_device.hpp"
-
-#define DDR_ACTION_LIST_ENV_VAR         ("HAILO_DDR_ACTION_LIST")
-#define DDR_ACTION_LIST_ENV_VAR_VALUE   ("1")
-
-namespace hailort
-{
-
-class DDRActionListBufferBuilder : public ActionListBufferBuilder {
-public:
-    DDRActionListBufferBuilder(void* user_address, uint64_t dma_address);
-    virtual ~DDRActionListBufferBuilder() = default;
-    static Expected<std::shared_ptr<DDRActionListBufferBuilder>> create(size_t num_contexts, VdmaDevice &vdma_device);
-
-    virtual hailo_status write_action(MemoryView action, CONTROL_PROTOCOL__context_switch_context_type_t context_type,
-        bool is_new_context, bool last_action_buffer_in_context) override;
-
-    virtual uint64_t get_mapped_buffer_dma_address() const override;
-
-    // TODO: HRT-12512 : Can remove this check when / if continuous buffer comes from designated region
-    static bool verify_dma_addr(vdma::ContinuousBuffer &buffer);
-private:    
-    // vdma::ContinuousBuffer m_action_list_buffer;
-    void* m_user_address;
-    uint64_t m_dma_address;
-    size_t m_write_offset;
-    CONTROL_PROTOCOL__context_switch_context_info_chunk_t m_current_context_info;
-};
-
-} /* namespace hailort */
-
-#endif /* _HAILO_DDR_ACTION_LIST_BUFFER_BUILDER_HPP_ */
\ No newline at end of file
diff --git a/hailort/libhailort/src/core_op/resource_manager/cache_buffer.cpp b/hailort/libhailort/src/core_op/resource_manager/cache_buffer.cpp
index d92e2d1b..21dcfb24 100644
--- a/hailort/libhailort/src/core_op/resource_manager/cache_buffer.cpp
+++ b/hailort/libhailort/src/core_op/resource_manager/cache_buffer.cpp
@@ -15,27 +15,29 @@
 namespace hailort
 {
 
-Expected<CacheBuffer> CacheBuffer::create(HailoRTDriver &driver, uint32_t cache_size,
-    uint32_t input_size, uint32_t output_size)
+Expected<CacheBuffer> CacheBuffer::create(std::shared_ptr<vdma::VdmaBuffer> backing_buffer, uint32_t cache_size,
+    uint32_t input_size, uint32_t output_size, uint32_t entry_size)
 {
-    CHECK(cache_size > 0, HAILO_INVALID_ARGUMENT);
+    CHECK_ARG_NOT_NULL(backing_buffer);
+    CHECK((cache_size > 0) && (cache_size == backing_buffer->size()), HAILO_INVALID_ARGUMENT);
     CHECK((input_size > 0) && (input_size < cache_size), HAILO_INVALID_ARGUMENT,
         "Invalid cache input size: {} (cache size: {})", input_size, cache_size);
     CHECK((output_size > 0) && (output_size < cache_size), HAILO_INVALID_ARGUMENT,
         "Invalid cache output size: {} (cache size: {})", output_size, cache_size);
 
-    // Cache buffers are by sg buffers
-    TRY(auto buffer, vdma::SgBuffer::create(driver, cache_size, HailoRTDriver::DmaDirection::BOTH));
-    auto buffer_ptr = make_shared_nothrow<vdma::SgBuffer>(std::move(buffer));
-    CHECK_NOT_NULL(buffer_ptr, HAILO_OUT_OF_HOST_MEMORY);
-    return CacheBuffer(cache_size, input_size, output_size, buffer_ptr);
+    CHECK((entry_size > 0) && (entry_size <= std::numeric_limits<uint16_t>::max()) &&
+        ((cache_size % entry_size) == 0) && ((input_size % entry_size) == 0) && ((output_size % entry_size) == 0),
+        HAILO_INVALID_ARGUMENT, "Invalid cache entry size: {}", entry_size);
+
+    return CacheBuffer(cache_size, input_size, output_size, static_cast<uint16_t>(entry_size), backing_buffer);
 }
 
-CacheBuffer::CacheBuffer(uint32_t cache_size, uint32_t input_size, uint32_t output_size,
+CacheBuffer::CacheBuffer(uint32_t cache_size, uint32_t input_size, uint32_t output_size, uint16_t entry_size,
                          std::shared_ptr<vdma::VdmaBuffer> backing_buffer) :
     m_cache_size(cache_size),
     m_input_size(input_size),
     m_output_size(output_size),
+    m_entry_size(entry_size),
     m_backing_buffer(backing_buffer)
 {}
 
@@ -47,8 +49,12 @@ ExpectedRef<IntermediateBuffer> CacheBuffer::set_input_channel(HailoRTDriver &dr
 
     static const auto SINGLE_BATCH = 1;
     static const auto BUFFER_START = 0;
+    // Passing the entry size as the max desc size, so that we can update the cache by entry granularity, even if the
+    // entry is smaller than the default desc size. E.g. Updating the cache by one 64B entry, won't work if the desc size
+    // is 512B, so the desc list should be programmed with 64B. If it is g.t.e. than 512B, the desc list will be programmed
+    // as usual.
     TRY(auto intermediate_buffer, IntermediateBuffer::create_shared(driver, m_input_size, SINGLE_BATCH, channel_id,
-        IntermediateBuffer::StreamingType::BURST, m_backing_buffer, BUFFER_START));
+        IntermediateBuffer::StreamingType::BURST, m_backing_buffer, BUFFER_START, m_entry_size));
     m_cache_input = intermediate_buffer;
     return std::ref(*m_cache_input);
 }
@@ -61,8 +67,12 @@ ExpectedRef<IntermediateBuffer> CacheBuffer::set_output_channel(HailoRTDriver &d
 
     static const auto SINGLE_BATCH = 1;
     static const auto BUFFER_START = 0;
+    // Passing the entry size as the max desc size, so that we can update the cache by entry granularity, even if the
+    // entry is smaller than the default desc size. E.g. Updating the cache by one 64B entry, won't work if the desc size
+    // is 512B, so the desc list should be programmed with 64B. If it is g.t.e. than 512B, the desc list will be programmed
+    // as usual.
     TRY(auto intermediate_buffer, IntermediateBuffer::create_shared(driver, m_output_size, SINGLE_BATCH, channel_id,
-        IntermediateBuffer::StreamingType::BURST, m_backing_buffer, BUFFER_START));
+        IntermediateBuffer::StreamingType::BURST, m_backing_buffer, BUFFER_START, m_entry_size));
     m_cache_output = intermediate_buffer;
     return std::ref(*m_cache_output);
 }
@@ -79,11 +89,22 @@ ExpectedRef<IntermediateBuffer> CacheBuffer::get_output()
     return std::ref(*m_cache_output);
 }
 
-Expected<Buffer> CacheBuffer::read_entire_cache()
+Expected<Buffer> CacheBuffer::read_cache()
+{
+    CHECK(m_backing_buffer, HAILO_INTERNAL_FAILURE, "Backing buffer not set");
+
+    TRY(auto buffer, Buffer::create(m_backing_buffer->size()));
+    CHECK_SUCCESS(m_backing_buffer->read(buffer.data(), buffer.size(), 0));
+    return buffer;
+}
+
+hailo_status CacheBuffer::write_cache(MemoryView buffer)
 {
-    CHECK(m_cache_input && m_cache_output, HAILO_INTERNAL_FAILURE, "Input or output not set");
+    CHECK(m_backing_buffer, HAILO_INTERNAL_FAILURE, "Backing buffer not set");
+    CHECK(buffer.size() == m_backing_buffer->size(), HAILO_INVALID_ARGUMENT,
+        "Buffer size ({}) does not match cache size ({})", buffer.size(), m_backing_buffer->size());
 
-    return m_cache_input->read(m_cache_size);
+    return m_backing_buffer->write(buffer.data(), buffer.size(), 0);
 }
 
 uint32_t CacheBuffer::cache_size() const
diff --git a/hailort/libhailort/src/core_op/resource_manager/cache_buffer.hpp b/hailort/libhailort/src/core_op/resource_manager/cache_buffer.hpp
index 52bf4f01..9c27da72 100644
--- a/hailort/libhailort/src/core_op/resource_manager/cache_buffer.hpp
+++ b/hailort/libhailort/src/core_op/resource_manager/cache_buffer.hpp
@@ -19,8 +19,8 @@ namespace hailort
 class CacheBuffer final
 {
 public:
-    static Expected<CacheBuffer> create(HailoRTDriver &driver, uint32_t cache_size,
-        uint32_t input_size, uint32_t output_size);
+    static Expected<CacheBuffer> create(std::shared_ptr<vdma::VdmaBuffer> backing_buffer, uint32_t cache_size,
+        uint32_t input_size, uint32_t output_size, uint32_t entry_size);
 
     CacheBuffer(CacheBuffer &&) = default;
     CacheBuffer(const CacheBuffer &) = delete;
@@ -34,7 +34,9 @@ class CacheBuffer final
     ExpectedRef<IntermediateBuffer> set_output_channel(HailoRTDriver &driver, vdma::ChannelId channel_id);
     ExpectedRef<IntermediateBuffer> get_input();
     ExpectedRef<IntermediateBuffer> get_output();
-    Expected<Buffer> read_entire_cache();
+    Expected<Buffer> read_cache();
+    hailo_status write_cache(MemoryView buffer);
+
     uint32_t cache_size() const;
     uint32_t input_size() const;
     uint32_t output_size() const;
@@ -42,12 +44,13 @@ class CacheBuffer final
     bool is_configured() const;
 
 private:
-    CacheBuffer(uint32_t cache_size, uint32_t input_size, uint32_t output_size,
+    CacheBuffer(uint32_t cache_size, uint32_t input_size, uint32_t output_size, uint16_t entry_size,
         std::shared_ptr<vdma::VdmaBuffer> backing_buffer);
 
     const uint32_t m_cache_size;
     const uint32_t m_input_size;
     const uint32_t m_output_size;
+    const uint16_t m_entry_size;
     const std::shared_ptr<vdma::VdmaBuffer> m_backing_buffer;
     // Each cache buffer has an input and output IntermediateBuffer -
     // * They both share the same backing buffer.
diff --git a/hailort/libhailort/src/core_op/resource_manager/cache_manager.cpp b/hailort/libhailort/src/core_op/resource_manager/cache_manager.cpp
index 1e1233ec..d23045d4 100644
--- a/hailort/libhailort/src/core_op/resource_manager/cache_manager.cpp
+++ b/hailort/libhailort/src/core_op/resource_manager/cache_manager.cpp
@@ -7,8 +7,10 @@
  * @brief Manges creation and configuration of cache buffers
  **/
 
-#include "cache_manager.hpp"
 #include "hailo/hailort.h"
+#include "cache_manager.hpp"
+
+#include "vdma/memory/sg_buffer.hpp"
 
 namespace hailort
 {
@@ -23,51 +25,40 @@ Expected<CacheManagerPtr> CacheManager::create_shared(HailoRTDriver &driver)
 
 CacheManager::CacheManager(HailoRTDriver &driver) :
     m_driver(driver),
+    m_storage_manager(driver),
+    m_core_op_managers(),
     m_caches_created(false),
-    m_initialized(false),
-    m_cache_input_size(0),
-    m_cache_output_size(0),
     m_cache_size(0),
-    m_read_offset_bytes(0),
-    m_write_offset_bytes_delta(0),
-    m_cache_buffers(),
-    m_uninitialized_caches()
-{
-}
+    m_cache_entry_size(0),
+    m_read_offset_bytes(0)
+{}
 
 hailo_status CacheManager::create_caches_from_core_op(std::shared_ptr<CoreOpMetadata> core_op_metadata)
 {
-    if (m_caches_created) {
-        // Already created caches, nothing to do
-        // In debug, validate that the cache sizes + ids are the same as the ones we already have
-        assert(m_cache_input_size == get_cache_input_size(core_op_metadata));
-        assert(m_cache_output_size == get_cache_output_size(core_op_metadata));
-        assert(validate_cache_ids(core_op_metadata, m_cache_buffers));
-
-        return HAILO_SUCCESS;
-    }
-
     if (!core_op_has_caches(core_op_metadata)) {
         // No cache layers found, nothing to do
         return HAILO_SUCCESS;
     }
 
-    m_cache_input_size = get_cache_input_size(core_op_metadata);
-    m_cache_output_size = get_cache_output_size(core_op_metadata);
-    // TODO: cache size should be a param of the hef (via sdk)
-    //       that way we can also immediately know if caches are used or not
-    //       it should be a param that appears once in the hef, and is used by all caches (HRT-13584)
-    m_cache_size = m_cache_input_size + m_cache_output_size;
-    m_write_offset_bytes_delta = m_cache_input_size;
-
-    assert(validate_cache_edge_layers(core_op_metadata, m_cache_input_size, m_cache_output_size));
-    auto status = allocate_cache_buffers(core_op_metadata);
-    CHECK_SUCCESS(status, "Failed to allocate cache buffers");
+    const auto input_size = core_op_cache_input_size(core_op_metadata);
+    const auto output_size = core_op_cache_output_size(core_op_metadata);
+    const auto entry_size = core_op_cache_entry_size(core_op_metadata);
+    const auto cache_size = input_size + output_size;
+    if (m_caches_created) {
+        CHECK(m_cache_size == cache_size, HAILO_INVALID_OPERATION,
+            "Cache size mismatch: expected {}, got {}", m_cache_size, cache_size);
+        assert(validate_cache_ids(core_op_metadata, m_core_op_managers));
+    }
 
+    const auto core_op_name = core_op_metadata->core_op_name();
+    TRY(auto core_op_manager, CoreOpManager::create(m_driver, m_storage_manager, core_op_metadata));
     m_caches_created = true;
+    m_cache_size = cache_size;
+    m_cache_entry_size = entry_size;
 
-    return HAILO_SUCCESS;
+    m_core_op_managers.emplace(core_op_name, std::move(core_op_manager));
 
+    return HAILO_SUCCESS;
 }
 
 bool CacheManager::core_op_has_caches(std::shared_ptr<CoreOpMetadata> core_op_metadata)
@@ -111,34 +102,56 @@ bool CacheManager::validate_cache_edge_layers(std::shared_ptr<CoreOpMetadata> co
     return true;
 }
 
-uint32_t CacheManager::get_cache_input_size(std::shared_ptr<CoreOpMetadata> core_op_metadata)
+uint32_t CacheManager::core_op_cache_entry_size(std::shared_ptr<CoreOpMetadata> core_op_metadata)
+{
+    // All cache layers have the same entry size (this will be asserted in debug)
+    const auto &dynamic_contexts = core_op_metadata->dynamic_contexts();
+    if (dynamic_contexts.size() == 0) {
+        return 0;
+    }
+
+    const auto &cache_input_layers = dynamic_contexts[0].get_cache_input_layers();
+    if (cache_input_layers.size() == 0) {
+        return 0;
+    }
+
+    return cache_input_layers[0].hw_shape.features;
+}
+
+uint32_t CacheManager::core_op_cache_input_size(std::shared_ptr<CoreOpMetadata> core_op_metadata)
 {
     // All cache layers have the same input size (this will be asserted in debug)
-    for (const auto &context_metadata : core_op_metadata->dynamic_contexts()) {
-        for (const auto &layer_info : context_metadata.get_cache_input_layers()) {
-            return LayerInfoUtils::get_layer_transfer_size(layer_info);
-        }
+    const auto &dynamic_contexts = core_op_metadata->dynamic_contexts();
+    if (dynamic_contexts.size() == 0) {
+        return 0;
     }
 
-    // No cache layers found
-    return 0;
+    const auto &cache_input_layers = dynamic_contexts[0].get_cache_input_layers();
+    if (cache_input_layers.size() == 0) {
+        return 0;
+    }
+
+    return LayerInfoUtils::get_layer_transfer_size(cache_input_layers[0]);
 }
 
-uint32_t CacheManager::get_cache_output_size(std::shared_ptr<CoreOpMetadata> core_op_metadata)
+uint32_t CacheManager::core_op_cache_output_size(std::shared_ptr<CoreOpMetadata> core_op_metadata)
 {
     // All cache layers have the same output size (this will be asserted in debug)
-    for (const auto &context_metadata : core_op_metadata->dynamic_contexts()) {
-        for (const auto &layer_info : context_metadata.get_cache_output_layers()) {
-            return LayerInfoUtils::get_layer_transfer_size(layer_info);
-        }
+    const auto &dynamic_contexts = core_op_metadata->dynamic_contexts();
+    if (dynamic_contexts.size() == 0) {
+        return 0;
+    }
+
+    const auto &cache_output_layers = dynamic_contexts[0].get_cache_output_layers();
+    if (cache_output_layers.size() == 0) {
+        return 0;
     }
 
-    // No cache layers found
-    return 0;
+    return LayerInfoUtils::get_layer_transfer_size(cache_output_layers[0]);
 }
 
 bool CacheManager::validate_cache_ids(std::shared_ptr<CoreOpMetadata> core_op_metadata,
-    const std::unordered_map<uint32_t, CacheBuffer> &current_cache_buffers)
+    const std::unordered_map<std::string, CoreOpManager> &current_core_op_managers)
 {
     std::unordered_set<uint32_t> cache_ids;
     for (const auto &context_metadata : core_op_metadata->dynamic_contexts()) {
@@ -151,71 +164,180 @@ bool CacheManager::validate_cache_ids(std::shared_ptr<CoreOpMetadata> core_op_me
         }
     }
 
-    if (cache_ids.size() != current_cache_buffers.size()) {
-        return false;
-    }
-
-    for (const auto &cache_id : cache_ids) {
-        if (std::end(current_cache_buffers) == current_cache_buffers.find(cache_id)) {
+    for (const auto &core_op_manager : current_core_op_managers) {
+        const auto &cache_buffers = core_op_manager.second.get_cache_buffers();
+        if (cache_ids.size() != cache_buffers.size()) {
             return false;
         }
+
+        for (const auto &cache_id : cache_ids) {
+            if (std::end(cache_buffers) == cache_buffers.find(cache_id)) {
+                return false;
+            }
+        }
     }
 
     return true;
 }
 
-ExpectedRef<CacheBuffer> CacheManager::get_cache_buffer(uint32_t cache_id)
+ExpectedRef<std::unordered_map<uint32_t, CacheBuffer>> CacheManager::get_cache_buffers(const std::string &core_op_name)
 {
-    const auto cache_buffer_it = m_cache_buffers.find(cache_id);
-    if (std::end(m_cache_buffers) != cache_buffer_it) {
-        return std::ref(cache_buffer_it->second);
+    const auto core_op_manager_it = m_core_op_managers.find(core_op_name);
+    if (std::end(m_core_op_managers) == core_op_manager_it) {
+        return make_unexpected(HAILO_NOT_FOUND);
     }
 
-    return make_unexpected(HAILO_NOT_FOUND);
+    return std::ref(core_op_manager_it->second.get_cache_buffers());
 }
 
-hailo_status CacheManager::allocate_cache_buffers(std::shared_ptr<CoreOpMetadata> core_op_metadata)
+ExpectedRef<IntermediateBuffer> CacheManager::set_cache_input_channel(const std::string &core_op_name,
+    uint32_t cache_id, uint16_t batch_size, vdma::ChannelId channel_id)
 {
-    // It's enough to go over cache_output_layers, as each cache has both input and output layers (that share the same buffer)
-    for (const auto &context_metadata : core_op_metadata->dynamic_contexts()) {
-        for (const auto &layer_info : context_metadata.get_cache_output_layers()) {
-            const auto cache_id = layer_info.cache_info.id;
-            TRY(auto cache_buffer, CacheBuffer::create(m_driver, m_cache_size, m_cache_input_size, m_cache_output_size));
-            auto emplace_res = m_cache_buffers.emplace(cache_id, std::move(cache_buffer));
-            CHECK(emplace_res.second, HAILO_INTERNAL_FAILURE);
+    const auto core_op_manager_it = m_core_op_managers.find(core_op_name);
+    if (std::end(m_core_op_managers) == core_op_manager_it) {
+        return make_unexpected(HAILO_NOT_FOUND);
+    }
 
-            // The cache buffer is yet to be initialized (will be initalized when it is configured with input/output channels)
-            m_uninitialized_caches.insert(cache_id);
-        }
+    return core_op_manager_it->second.set_cache_input_channel(cache_id, batch_size, channel_id);
+}
+
+ExpectedRef<IntermediateBuffer> CacheManager::set_cache_output_channel(const std::string &core_op_name,
+    uint32_t cache_id, uint16_t batch_size, vdma::ChannelId channel_id)
+{
+    const auto core_op_manager_it = m_core_op_managers.find(core_op_name);
+    if (std::end(m_core_op_managers) == core_op_manager_it) {
+        return make_unexpected(HAILO_NOT_FOUND);
     }
 
-    return HAILO_SUCCESS;
+    return core_op_manager_it->second.set_cache_output_channel(cache_id, batch_size, channel_id);
 }
 
-hailo_status CacheManager::program_cache_buffers()
+// TODO: Support write_offset_bytes_delta in CacheManager::init_caches (HRT-14397)
+hailo_status CacheManager::init_caches(uint32_t initial_read_offset_bytes, int32_t write_offset_bytes_delta)
 {
-    // Set the cache to the initial configuration (program the descriptors to the initial offset)
+    if (!m_caches_created) {
+        // No cache layers found, nothing to do
+        LOGGER__WARNING("No cache layers found, but init_cache was called");
+        return HAILO_SUCCESS;
+    }
+
+    CHECK(initial_read_offset_bytes < m_cache_size, HAILO_INVALID_ARGUMENT);
+    CHECK(write_offset_bytes_delta != 0, HAILO_INVALID_ARGUMENT);
+
+    m_read_offset_bytes = initial_read_offset_bytes;
+
+    LOGGER__INFO("Initializing caches [read_offset={}, write_offset_delta={}]",
+        initial_read_offset_bytes, write_offset_bytes_delta);
+
     static const auto INITIAL_CONFIGURATION_OFFSET = 0;
     return update_cache_offset(INITIAL_CONFIGURATION_OFFSET);
 }
 
-hailo_status CacheManager::try_complete_cache_initialization()
+hailo_status CacheManager::update_cache_offset(int32_t offset_delta_bytes)
 {
-    // If all caches are now initialized, program their desc list and set the CacheManager as initialized
-    if (m_uninitialized_caches.empty() && !m_initialized) {
-        m_initialized = true;
+    if (!m_caches_created) {
+        // No cache layers found, nothing to do
+        LOGGER__WARNING("No cache layers found, but update_cache_offset was called");
+        return HAILO_SUCCESS;
+    }
 
-        auto status = program_cache_buffers();
-        CHECK_SUCCESS(status, "Failed to program cache buffers");
+    auto new_read_offset = (m_read_offset_bytes + offset_delta_bytes) % m_cache_size;
+
+    for (auto &core : m_core_op_managers) {
+        auto status = core.second.update_cache_offset(new_read_offset);
+        CHECK_SUCCESS(status, "Failed to update cache offset for core_op {}", core.first);
     }
 
+    m_read_offset_bytes = new_read_offset;
+
     return HAILO_SUCCESS;
 }
 
-ExpectedRef<IntermediateBuffer> CacheManager::set_cache_input_channel(uint32_t cache_id, uint16_t batch_size,
-    vdma::ChannelId channel_id)
+uint32_t CacheManager::get_cache_size() const
+{
+    return m_cache_size;
+}
+
+CacheManager::StorageManager::StorageManager(HailoRTDriver &driver) :
+    m_driver(driver),
+    m_backing_buffers()
+{}
+
+Expected<std::shared_ptr<vdma::VdmaBuffer>> CacheManager::StorageManager::get_backing_buffer(uint32_t cache_id, uint32_t cache_size)
+{
+    CHECK(cache_size > 0, HAILO_INVALID_ARGUMENT);
+
+    // Check if the buffer already exists
+    auto buffer_it = m_backing_buffers.find(cache_id);
+    if (std::end(m_backing_buffers) != buffer_it) {
+        CHECK(buffer_it->second->size() == cache_size, HAILO_INTERNAL_FAILURE,
+            "Cache size mismatch for cache_id {}", cache_id);
+        return Expected<std::shared_ptr<vdma::VdmaBuffer>>(buffer_it->second);
+    }
+
+    // Otherwise, create a new buffer (cache buffers are by sg buffers)
+    TRY(auto buffer, vdma::SgBuffer::create(m_driver, cache_size, HailoRTDriver::DmaDirection::BOTH));
+    auto buffer_ptr = make_shared_nothrow<vdma::SgBuffer>(std::move(buffer));
+    CHECK_NOT_NULL(buffer_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+    // Store the buffer in the manager
+    m_backing_buffers.emplace(cache_id, buffer_ptr);
+
+    return std::shared_ptr<vdma::VdmaBuffer>(buffer_ptr);
+}
+
+Expected<CacheManager::CoreOpManager> CacheManager::CoreOpManager::create(HailoRTDriver &driver,
+    StorageManager &storage_manager, std::shared_ptr<CoreOpMetadata> core_op_metadata)
 {
+    hailo_status status = HAILO_SUCCESS;
+    CoreOpManager result(driver, storage_manager, core_op_metadata, status);
+    CHECK_SUCCESS(status);
+
+    return result;
+}
 
+CacheManager::CoreOpManager::CoreOpManager(HailoRTDriver &driver, StorageManager &storage_manager,
+                                           std::shared_ptr<CoreOpMetadata> core_op_metadata, hailo_status &status) :
+    m_driver(driver),
+    m_initialized(false),
+    m_cache_input_size(core_op_cache_input_size(core_op_metadata)),
+    m_cache_output_size(core_op_cache_output_size(core_op_metadata)),
+    m_cache_size(m_cache_input_size + m_cache_output_size),
+    m_cache_entry_size(core_op_cache_entry_size(core_op_metadata)),
+    m_write_offset_bytes_delta(m_cache_input_size),
+    m_cache_buffers(),
+    m_uninitialized_caches()
+{
+    status = allocate_cache_buffers(storage_manager, core_op_metadata);
+    if (HAILO_SUCCESS != status) {
+        return;
+    }
+
+    status = HAILO_SUCCESS;
+}
+
+std::unordered_map<uint32_t, CacheBuffer> &CacheManager::CoreOpManager::get_cache_buffers()
+{
+    return m_cache_buffers;
+}
+const std::unordered_map<uint32_t, CacheBuffer> &CacheManager::CoreOpManager::get_cache_buffers() const
+{
+    return m_cache_buffers;
+}
+
+ExpectedRef<CacheBuffer> CacheManager::CoreOpManager::get_cache_buffer(uint32_t cache_id)
+{
+    const auto cache_buffer_it = m_cache_buffers.find(cache_id);
+    if (std::end(m_cache_buffers) != cache_buffer_it) {
+        return std::ref(cache_buffer_it->second);
+    }
+
+    return make_unexpected(HAILO_NOT_FOUND);
+}
+
+ExpectedRef<IntermediateBuffer> CacheManager::CoreOpManager::set_cache_input_channel(uint32_t cache_id,
+    uint16_t batch_size, vdma::ChannelId channel_id)
+{
     // TODO: Support non-1 batches? (HRT-13628)
     CHECK(1 == batch_size, HAILO_INVALID_ARGUMENT, "Cache input batch size must be 1");
     TRY(auto cache_buffer, get_cache_buffer(cache_id));
@@ -235,8 +357,8 @@ ExpectedRef<IntermediateBuffer> CacheManager::set_cache_input_channel(uint32_t c
     return result;
 }
 
-ExpectedRef<IntermediateBuffer> CacheManager::set_cache_output_channel(uint32_t cache_id, uint16_t batch_size,
-    vdma::ChannelId channel_id)
+ExpectedRef<IntermediateBuffer> CacheManager::CoreOpManager::set_cache_output_channel(uint32_t cache_id,
+    uint16_t batch_size, vdma::ChannelId channel_id)
 {
     // TODO: Support non-1 batches? (HRT-13628)
     CHECK(1 == batch_size, HAILO_INVALID_ARGUMENT, "Cache output batch size must be 1");
@@ -257,44 +379,13 @@ ExpectedRef<IntermediateBuffer> CacheManager::set_cache_output_channel(uint32_t
     return result;
 }
 
-std::unordered_map<uint32_t, CacheBuffer> &CacheManager::get_cache_buffers()
-{
-    return m_cache_buffers;
-}
-
-hailo_status CacheManager::init_caches(uint32_t initial_read_offset_bytes, int32_t write_offset_bytes_delta)
-{
-    if (!m_caches_created) {
-        // No cache layers found, nothing to do
-        LOGGER__WARNING("No cache layers found, but init_cache was called");
-        return HAILO_SUCCESS;
-    }
-
-    CHECK(initial_read_offset_bytes < m_cache_size, HAILO_INVALID_ARGUMENT);
-    CHECK(write_offset_bytes_delta != 0, HAILO_INVALID_ARGUMENT);
-
-    m_read_offset_bytes = initial_read_offset_bytes;
-    m_write_offset_bytes_delta = write_offset_bytes_delta;
-
-    LOGGER__WARNING("Initializing caches [read_offset={}, write_offset_delta={}]",
-        m_read_offset_bytes, m_write_offset_bytes_delta);
-
-    return program_cache_buffers();
-}
-
-hailo_status CacheManager::update_cache_offset(int32_t offset_delta_bytes)
+hailo_status CacheManager::CoreOpManager::update_cache_offset(uint32_t read_offset)
 {
-    if (!m_caches_created) {
-        // No cache layers found, nothing to do
-        LOGGER__WARNING("No cache layers found, but update_cache_offset was called");
-        return HAILO_SUCCESS;
-    }
-
     CHECK(m_initialized, HAILO_INVALID_OPERATION, "CacheManager not initialized");
 
     auto status = HAILO_UNINITIALIZED;
-    auto new_read_offset = (m_read_offset_bytes + offset_delta_bytes) % m_cache_size;
-    auto new_write_offset = (m_read_offset_bytes + offset_delta_bytes + m_write_offset_bytes_delta) % m_cache_size;
+    auto new_read_offset = read_offset % m_cache_size;
+    auto new_write_offset = (read_offset + m_write_offset_bytes_delta) % m_cache_size;
 
     for (auto &cache_buffer : m_cache_buffers) {
         TRY(auto cache_input, cache_buffer.second.get_input());
@@ -308,24 +399,63 @@ hailo_status CacheManager::update_cache_offset(int32_t offset_delta_bytes)
             new_write_offset, cache_buffer.first);
     }
 
-    m_read_offset_bytes = new_read_offset;
-
     return HAILO_SUCCESS;
 }
 
-uint32_t CacheManager::get_cache_size() const
+uint32_t CacheManager::CoreOpManager::get_cache_size() const
 {
     return m_cache_size;
 }
 
-uint32_t CacheManager::get_read_offset_bytes() const
+uint32_t CacheManager::CoreOpManager::get_input_size() const
+{
+    return m_cache_input_size;
+}
+
+uint32_t CacheManager::CoreOpManager::get_output_size() const
 {
-    return m_read_offset_bytes;
+    return m_cache_output_size;
 }
 
-int32_t CacheManager::get_write_offset_bytes_delta() const
+hailo_status CacheManager::CoreOpManager::allocate_cache_buffers(StorageManager &storage_manager,
+    std::shared_ptr<CoreOpMetadata> core_op_metadata)
 {
-    return m_write_offset_bytes_delta;
+    // It's enough to go over cache_output_layers, as each cache has both input and output layers (that share the same buffer)
+    for (const auto &context_metadata : core_op_metadata->dynamic_contexts()) {
+        for (const auto &layer_info : context_metadata.get_cache_output_layers()) {
+            const auto cache_id = layer_info.cache_info.id;
+            TRY(auto backing_buffer, storage_manager.get_backing_buffer(cache_id, m_cache_size));
+            TRY(auto cache_buffer, CacheBuffer::create(backing_buffer, m_cache_size, m_cache_input_size,
+                m_cache_output_size, m_cache_entry_size));
+            auto emplace_res = m_cache_buffers.emplace(cache_id, std::move(cache_buffer));
+            CHECK(emplace_res.second, HAILO_INTERNAL_FAILURE);
+
+            // The cache buffer is yet to be initialized (will be initalized when it is configured with input/output channels)
+            m_uninitialized_caches.insert(cache_id);
+        }
+    }
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status CacheManager::CoreOpManager::try_complete_cache_initialization()
+{
+    // If all caches are now initialized, program their desc list and set the CacheManager as initialized
+    if (m_uninitialized_caches.empty() && !m_initialized) {
+        m_initialized = true;
+
+        auto status = program_cache_buffers();
+        CHECK_SUCCESS(status, "Failed to program cache buffers");
+    }
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status CacheManager::CoreOpManager::program_cache_buffers()
+{
+    // Set the cache to the initial configuration (program the descriptors to the initial offset)
+    static const auto INITIAL_CONFIGURATION_OFFSET = 0;
+    return update_cache_offset(INITIAL_CONFIGURATION_OFFSET);
 }
 
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/core_op/resource_manager/cache_manager.hpp b/hailort/libhailort/src/core_op/resource_manager/cache_manager.hpp
index 1c8f9abe..e6aa9521 100644
--- a/hailort/libhailort/src/core_op/resource_manager/cache_manager.hpp
+++ b/hailort/libhailort/src/core_op/resource_manager/cache_manager.hpp
@@ -40,9 +40,11 @@ class CacheManager final
     ~CacheManager() = default;
 
     hailo_status create_caches_from_core_op(std::shared_ptr<CoreOpMetadata> core_op_metadata);
-    ExpectedRef<IntermediateBuffer> set_cache_input_channel(uint32_t cache_id, uint16_t batch_size, vdma::ChannelId channel_id);
-    ExpectedRef<IntermediateBuffer> set_cache_output_channel(uint32_t cache_id, uint16_t batch_size, vdma::ChannelId channel_id);
-    std::unordered_map<uint32_t, CacheBuffer> &get_cache_buffers();
+    ExpectedRef<IntermediateBuffer> set_cache_input_channel(const std::string &core_op_name, uint32_t cache_id,
+        uint16_t batch_size, vdma::ChannelId channel_id);
+    ExpectedRef<IntermediateBuffer> set_cache_output_channel(const std::string &core_op_name, uint32_t cache_id,
+        uint16_t batch_size, vdma::ChannelId channel_id);
+    ExpectedRef<std::unordered_map<uint32_t, CacheBuffer>> get_cache_buffers(const std::string &core_op_name);
 
     // Note: These functions are not thread-safe!
     // Programs the CacheManager instance with the given offsets, overriding the current offsets.
@@ -51,36 +53,91 @@ class CacheManager final
     hailo_status update_cache_offset(int32_t offset_delta_bytes);
 
     uint32_t get_cache_size() const;
-    uint32_t get_read_offset_bytes() const;
-    int32_t get_write_offset_bytes_delta() const;
 
 private:
+    class StorageManager final
+    {
+    public:
+        StorageManager(HailoRTDriver &driver);
+        StorageManager(StorageManager &&) = default;
+        StorageManager(const StorageManager &) = delete;
+        StorageManager &operator=(StorageManager &&) = delete;
+        StorageManager &operator=(const StorageManager &) = delete;
+        ~StorageManager() = default;
+
+        // Creates a new backing buffer of the given size and stores it in the manager, or returns an existing one.
+        Expected<std::shared_ptr<vdma::VdmaBuffer>> get_backing_buffer(uint32_t cache_id, uint32_t cache_size);
+
+    private:
+        HailoRTDriver &m_driver;
+        std::unordered_map<uint32_t, std::shared_ptr<vdma::VdmaBuffer>> m_backing_buffers;
+    };
+
+    class CoreOpManager final
+    {
+    public:
+        static Expected<CoreOpManager> create(HailoRTDriver &driver, StorageManager &storage_manager,
+            std::shared_ptr<CoreOpMetadata> core_op_metadata);
+        CoreOpManager(CoreOpManager &&) = default;
+        CoreOpManager(const CoreOpManager &) = delete;
+        CoreOpManager &operator=(CoreOpManager &&) = delete;
+        CoreOpManager &operator=(const CoreOpManager &) = delete;
+        ~CoreOpManager() = default;
+
+        std::unordered_map<uint32_t, CacheBuffer> &get_cache_buffers();
+        const std::unordered_map<uint32_t, CacheBuffer> &get_cache_buffers() const;
+        ExpectedRef<CacheBuffer> get_cache_buffer(uint32_t cache_id);
+        ExpectedRef<IntermediateBuffer> set_cache_input_channel(uint32_t cache_id, uint16_t batch_size,
+            vdma::ChannelId channel_id);
+        ExpectedRef<IntermediateBuffer> set_cache_output_channel(uint32_t cache_id, uint16_t batch_size,
+            vdma::ChannelId channel_id);
+        // Note: read_offset is absolute, not relative to the current read offset
+        hailo_status update_cache_offset(uint32_t read_offset);
+        uint32_t get_cache_size() const;
+        uint32_t get_input_size() const;
+        uint32_t get_output_size() const;
+
+    private:
+        CoreOpManager(HailoRTDriver &driver, StorageManager &storage_manager,
+            std::shared_ptr<CoreOpMetadata> core_op_metadata, hailo_status &status);
+
+        hailo_status allocate_cache_buffers(StorageManager &storage_manager,
+            std::shared_ptr<CoreOpMetadata> core_op_metadata);
+        hailo_status try_complete_cache_initialization();
+        hailo_status program_cache_buffers();
+
+        HailoRTDriver &m_driver;
+        // This class is initialized (and the member is set to true) when all caches are allocated and configured with
+        // input/output channels. This is done in two steps: (1) cache allocation; (2) channel configuration
+        // Two steps are necessary because this class allocates the buffers, however the input/output channels are assigned
+        // by the resource manager
+        bool m_initialized;
+        const uint32_t m_cache_input_size;
+        const uint32_t m_cache_output_size;
+        const uint32_t m_cache_size;
+        const uint32_t m_cache_entry_size;
+        int32_t m_write_offset_bytes_delta;
+        std::unordered_map<uint32_t, CacheBuffer> m_cache_buffers;
+        std::unordered_set<uint32_t> m_uninitialized_caches;
+    };
+
     static bool core_op_has_caches(std::shared_ptr<CoreOpMetadata> core_op_metadata);
     static bool validate_cache_edge_layers(std::shared_ptr<CoreOpMetadata> core_op_metadata,
         uint32_t cache_input_size, uint32_t cache_output_size);
-    static uint32_t get_cache_input_size(std::shared_ptr<CoreOpMetadata> core_op_metadata);
-    static uint32_t get_cache_output_size(std::shared_ptr<CoreOpMetadata> core_op_metadata);
+    static uint32_t core_op_cache_entry_size(std::shared_ptr<CoreOpMetadata> core_op_metadata);
+    static uint32_t core_op_cache_input_size(std::shared_ptr<CoreOpMetadata> core_op_metadata);
+    static uint32_t core_op_cache_output_size(std::shared_ptr<CoreOpMetadata> core_op_metadata);
     static bool validate_cache_ids(std::shared_ptr<CoreOpMetadata> core_op_metadata,
-        const std::unordered_map<uint32_t, CacheBuffer> &current_cache_buffers);
-    ExpectedRef<CacheBuffer> get_cache_buffer(uint32_t cache_id);
-    hailo_status allocate_cache_buffers(std::shared_ptr<CoreOpMetadata> core_op_metadata);
+        const std::unordered_map<std::string, CoreOpManager> &current_core_op_managers);
     hailo_status program_cache_buffers();
-    hailo_status try_complete_cache_initialization();
 
     HailoRTDriver &m_driver;
+    StorageManager m_storage_manager;
+    std::unordered_map<std::string, CoreOpManager> m_core_op_managers;
     bool m_caches_created;
-    // This class is initialized (and the member is set to true) when all caches are allocated and configured with
-    // input/output channels. This is done in two steps: (1) cache allocation; (2) channel configuration
-    // Two steps are necessary because this class allocates the buffers, however the input/output channels are assigned
-    // by the resource manager
-    bool m_initialized;
-    uint32_t m_cache_input_size;
-    uint32_t m_cache_output_size;
     uint32_t m_cache_size;
+    uint32_t m_cache_entry_size;
     uint32_t m_read_offset_bytes;
-    int32_t m_write_offset_bytes_delta;
-    std::unordered_map<uint32_t, CacheBuffer> m_cache_buffers;
-    std::unordered_set<uint32_t> m_uninitialized_caches;
 };
 
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/core_op/resource_manager/config_buffer.cpp b/hailort/libhailort/src/core_op/resource_manager/config_buffer.cpp
index bf2c1208..7e6356f2 100644
--- a/hailort/libhailort/src/core_op/resource_manager/config_buffer.cpp
+++ b/hailort/libhailort/src/core_op/resource_manager/config_buffer.cpp
@@ -12,6 +12,7 @@
 #include "vdma/memory/sg_edge_layer.hpp"
 #include "vdma/memory/continuous_edge_layer.hpp"
 #include "vdma/memory/buffer_requirements.hpp"
+#include "common/internal_env_vars.hpp"
 
 #include <numeric>
 
@@ -200,7 +201,7 @@ bool ConfigBuffer::should_use_ccb(HailoRTDriver &driver)
         return false; // not supported
     }
 
-    if (nullptr != std::getenv("HAILO_FORCE_CONF_CHANNEL_OVER_DESC")) {
+    if (is_env_variable_on(HAILO_FORCE_CONF_CHANNEL_OVER_DESC_ENV_VAR)) {
         LOGGER__WARNING("Using desc instead of CCB for config channel is not optimal for performance.\n");
         return false;
     }
diff --git a/hailort/libhailort/src/core_op/resource_manager/intermediate_buffer.cpp b/hailort/libhailort/src/core_op/resource_manager/intermediate_buffer.cpp
index 3b2f08b7..a9c619cb 100644
--- a/hailort/libhailort/src/core_op/resource_manager/intermediate_buffer.cpp
+++ b/hailort/libhailort/src/core_op/resource_manager/intermediate_buffer.cpp
@@ -19,26 +19,30 @@ namespace hailort
 {
 Expected<std::unique_ptr<vdma::VdmaEdgeLayer>> IntermediateBuffer::create_edge_layer(
     std::shared_ptr<vdma::VdmaBuffer> buffer, size_t buffer_offset, HailoRTDriver &driver, uint32_t transfer_size,
-    uint16_t max_batch_size, vdma::ChannelId d2h_channel_id, StreamingType streaming_type)
+    uint16_t max_batch_size, vdma::ChannelId d2h_channel_id, StreamingType streaming_type, uint16_t max_desc_size)
 {
     const bool is_circular = (streaming_type == StreamingType::CIRCULAR_CONTINUOS);
     auto buffer_exp = (vdma::VdmaBuffer::Type::CONTINUOUS == buffer->type()) ?
         create_ccb_edge_layer(buffer, buffer_offset, driver, transfer_size, max_batch_size, is_circular) :
-        create_sg_edge_layer(buffer, buffer_offset, driver, transfer_size, max_batch_size, d2h_channel_id, is_circular);
+        create_sg_edge_layer(buffer, buffer_offset, driver, transfer_size, max_batch_size, d2h_channel_id, is_circular,
+            max_desc_size);
 
     return buffer_exp;
 }
 
 Expected<IntermediateBuffer> IntermediateBuffer::create(HailoRTDriver &driver, uint32_t transfer_size,
     uint16_t max_batch_size, vdma::ChannelId d2h_channel_id, StreamingType streaming_type,
-    std::shared_ptr<vdma::VdmaBuffer> buffer, size_t buffer_offset)
+    std::shared_ptr<vdma::VdmaBuffer> buffer, size_t buffer_offset, uint16_t max_desc_size)
 {
+    max_desc_size = std::min(max_desc_size, driver.desc_max_page_size());
+
     LOGGER__TRACE("Creating IntermediateBuffer: transfer_size = {}, max_batch_size = {}, d2h_channel_id = {}, "
-        "streaming_type = {}, buffer = 0x{:X}, buffer_offset = {}",
-        transfer_size, max_batch_size, d2h_channel_id, streaming_type, (uintptr_t)buffer.get(), buffer_offset);
+        "streaming_type = {}, buffer = 0x{:X}, buffer_offset = {}, max_desc_size = {}",
+        transfer_size, max_batch_size, d2h_channel_id, streaming_type, (uintptr_t)buffer.get(), buffer_offset,
+        max_desc_size);
 
     TRY(auto edge_layer_ptr, create_edge_layer(buffer, buffer_offset, driver, transfer_size, max_batch_size,
-        d2h_channel_id, streaming_type));
+        d2h_channel_id, streaming_type, max_desc_size));
 
     if (streaming_type == StreamingType::BURST) {
         // We have max_batch_size transfers, so we program them one by one. The last transfer should report interrupt
@@ -67,10 +71,10 @@ Expected<IntermediateBuffer> IntermediateBuffer::create(HailoRTDriver &driver, u
 
 Expected<std::shared_ptr<IntermediateBuffer>> IntermediateBuffer::create_shared(HailoRTDriver &driver,
     uint32_t transfer_size, uint16_t max_batch_size, vdma::ChannelId d2h_channel_id, StreamingType streaming_type,
-    std::shared_ptr<vdma::VdmaBuffer> buffer, size_t buffer_offset)
+    std::shared_ptr<vdma::VdmaBuffer> buffer, size_t buffer_offset, uint16_t max_desc_size)
 {
     TRY(auto intermediate_buffer, create(driver, transfer_size, max_batch_size, d2h_channel_id, streaming_type,
-        buffer, buffer_offset));
+        buffer, buffer_offset, max_desc_size));
 
     auto intermediate_buffer_ptr = make_shared_nothrow<IntermediateBuffer>(std::move(intermediate_buffer));
     CHECK_NOT_NULL_AS_EXPECTED(intermediate_buffer_ptr, HAILO_OUT_OF_HOST_MEMORY);
@@ -147,13 +151,13 @@ IntermediateBuffer::IntermediateBuffer(std::unique_ptr<vdma::VdmaEdgeLayer> &&ed
 
 Expected<std::unique_ptr<vdma::VdmaEdgeLayer>> IntermediateBuffer::create_sg_edge_layer(
     std::shared_ptr<vdma::VdmaBuffer> buffer, size_t buffer_offset, HailoRTDriver &driver, uint32_t transfer_size,
-    uint16_t batch_size, vdma::ChannelId d2h_channel_id, bool is_circular)
+    uint16_t batch_size, vdma::ChannelId d2h_channel_id, bool is_circular, uint16_t max_desc_size)
 {
     static const auto DONT_FORCE_DEFAULT_PAGE_SIZE = false;
     static const auto FORCE_BATCH_SIZE = true;
     static const auto IS_VDMA_ALIGNED_BUFFER = true;
     TRY(const auto buffer_requirements, vdma::BufferSizesRequirements::get_buffer_requirements_single_transfer(
-        vdma::VdmaBuffer::Type::SCATTER_GATHER, driver.desc_max_page_size(), batch_size, batch_size, transfer_size,
+        vdma::VdmaBuffer::Type::SCATTER_GATHER, max_desc_size, batch_size, batch_size, transfer_size,
         is_circular, DONT_FORCE_DEFAULT_PAGE_SIZE, FORCE_BATCH_SIZE, IS_VDMA_ALIGNED_BUFFER));
     const auto desc_page_size = buffer_requirements.desc_page_size();
     const auto descs_count = buffer_requirements.descs_count();
diff --git a/hailort/libhailort/src/core_op/resource_manager/intermediate_buffer.hpp b/hailort/libhailort/src/core_op/resource_manager/intermediate_buffer.hpp
index f2394cff..9e5d18d4 100644
--- a/hailort/libhailort/src/core_op/resource_manager/intermediate_buffer.hpp
+++ b/hailort/libhailort/src/core_op/resource_manager/intermediate_buffer.hpp
@@ -15,7 +15,7 @@
 
 #include "vdma/driver/hailort_driver.hpp"
 #include "vdma/memory/vdma_edge_layer.hpp"
-
+#include "vdma/memory/descriptor_list.hpp"
 #include "control_protocol.h"
 
 
@@ -33,12 +33,16 @@ class IntermediateBuffer final {
         CIRCULAR_CONTINUOS,
     };
 
+    // The default value of max_desc_size (= vdma::MAX_SG_PAGE_SIZE) corresponds to the maximum descriptor size
+    // supported by the sg dma.
     static Expected<IntermediateBuffer> create(HailoRTDriver &driver, uint32_t transfer_size,
         uint16_t max_batch_size, vdma::ChannelId d2h_channel_id, StreamingType streaming_type,
-        std::shared_ptr<vdma::VdmaBuffer> buffer, size_t buffer_offset);
+        std::shared_ptr<vdma::VdmaBuffer> buffer, size_t buffer_offset,
+        uint16_t max_desc_size = vdma::MAX_SG_PAGE_SIZE);
     static Expected<std::shared_ptr<IntermediateBuffer>> create_shared(HailoRTDriver &driver, uint32_t transfer_size,
         uint16_t max_batch_size, vdma::ChannelId d2h_channel_id, StreamingType streaming_type,
-        std::shared_ptr<vdma::VdmaBuffer> buffer, size_t buffer_offset);
+        std::shared_ptr<vdma::VdmaBuffer> buffer, size_t buffer_offset,
+        uint16_t max_desc_size = vdma::MAX_SG_PAGE_SIZE);
 
     // If size is 0, the entire buffer is read (based on the transfer size passed in the create function)
     Expected<Buffer> read(size_t size=0);
@@ -52,12 +56,12 @@ class IntermediateBuffer final {
 
     static Expected<std::unique_ptr<vdma::VdmaEdgeLayer>> create_sg_edge_layer(std::shared_ptr<vdma::VdmaBuffer> buffer,
         size_t buffer_offset, HailoRTDriver &driver, uint32_t transfer_size, uint16_t batch_size,
-        vdma::ChannelId d2h_channel_id, bool is_circular);
+        vdma::ChannelId d2h_channel_id, bool is_circular, uint16_t max_desc_size);
     static Expected<std::unique_ptr<vdma::VdmaEdgeLayer>> create_ccb_edge_layer(std::shared_ptr<vdma::VdmaBuffer> buffer,
         size_t buffer_offset, HailoRTDriver &driver, uint32_t transfer_size, uint16_t batch_size, bool is_circular);
     static Expected<std::unique_ptr<vdma::VdmaEdgeLayer>> create_edge_layer(std::shared_ptr<vdma::VdmaBuffer> buffer,
         size_t buffer_offset, HailoRTDriver &driver, uint32_t transfer_size, uint16_t max_batch_size,
-        vdma::ChannelId d2h_channel_id, StreamingType streaming_type);
+        vdma::ChannelId d2h_channel_id, StreamingType streaming_type, uint16_t max_desc_size);
 
     std::unique_ptr<vdma::VdmaEdgeLayer> m_edge_layer;
     const uint32_t m_transfer_size;
diff --git a/hailort/libhailort/src/core_op/resource_manager/internal_buffer_planner.cpp b/hailort/libhailort/src/core_op/resource_manager/internal_buffer_planner.cpp
index 51ff94e0..519dbd35 100644
--- a/hailort/libhailort/src/core_op/resource_manager/internal_buffer_planner.cpp
+++ b/hailort/libhailort/src/core_op/resource_manager/internal_buffer_planner.cpp
@@ -12,6 +12,7 @@
 
 #include "vdma/memory/buffer_requirements.hpp"
 #include "internal_buffer_planner.hpp"
+#include "common/internal_env_vars.hpp"
 
 #include <numeric>
 
@@ -39,7 +40,7 @@ bool InternalBufferPlanner::should_edge_layer_use_ccb(const LayerType &layer_typ
     case LayerType::INTER_CONTEXT:
         // On burst (aka inter-context), because the buffers are big (And depends on the max_batch_size), we currently
         // don't want to use CCB by default.
-        if (nullptr != std::getenv("HAILO_FORCE_INFER_CONTEXT_CHANNEL_OVER_DESC")) {
+        if (is_env_variable_on(HAILO_FORCE_INFER_CONTEXT_CHANNEL_OVER_DESC_ENV_VAR)) {
             LOGGER__WARNING("Using desc instead of CCB for inter context channels is not optimal for performance.");
             return false;
         } else {
@@ -49,14 +50,14 @@ bool InternalBufferPlanner::should_edge_layer_use_ccb(const LayerType &layer_typ
         // On circular_continuous (aka ddr), the buffers are relatively small and we want to verify the C2C mechanism,
         // therefore the CCB is the default behaviour.
         // Due to request from the DFC group (Memory issues) - DDR buffers would run over DESC and not CCB buffers.
-        if (nullptr != std::getenv("HAILO_FORCE_DDR_CHANNEL_OVER_CCB")) {
+        if (is_env_variable_on(HAILO_FORCE_DDR_CHANNEL_OVER_CCB_ENV_VAR)) {
             LOGGER__WARNING("Using Non default buffer type (CCB instead of DESC) for ddr channel.");
             return true;
         } else {
             return false;
         }
     case LayerType::CFG:
-        if (nullptr != std::getenv("HAILO_FORCE_CONF_CHANNEL_OVER_DESC")) {
+        if (is_env_variable_on(HAILO_FORCE_CONF_CHANNEL_OVER_DESC_ENV_VAR)) {
             LOGGER__WARNING("Using desc instead of CCB for config channel is not optimal for performance.");
             return false;
         }
@@ -310,7 +311,7 @@ Expected<InternalBufferPlanning> InternalBufferPlanner::create_buffer_planning(
 {
     static const bool FORCE_SG_BUFFER_TYPE = true;
     // Force plan by user flag
-    if (nullptr != std::getenv("HAILO_FORCE_NAIVE_PER_BUFFER_TYPE_ALOCATION")) {
+    if (is_env_variable_on(HAILO_FORCE_NAIVE_PER_BUFFER_TYPE_ALOCATION_ENV_VAR)) {
         LOGGER__INFO("Forced buffer planning of type 'NAIVE_PER_BUFFER_TYPE.");
         plan_type = Type::NAIVE_PER_BUFFER_TYPE;
     }
diff --git a/hailort/libhailort/src/core_op/resource_manager/resource_manager.cpp b/hailort/libhailort/src/core_op/resource_manager/resource_manager.cpp
index c36edaa7..7fc540ad 100644
--- a/hailort/libhailort/src/core_op/resource_manager/resource_manager.cpp
+++ b/hailort/libhailort/src/core_op/resource_manager/resource_manager.cpp
@@ -5,11 +5,15 @@
 #include "vdma/memory/buffer_requirements.hpp"
 #include "device_common/control.hpp"
 #include "core_op/resource_manager/internal_buffer_manager.hpp"
+#include "common/internal_env_vars.hpp"
 
 #include <numeric>
 
 #define HAILO15H_NMS_MAX_CLASSES (1024)
 #define MAX_NUM_CONTEXTS_FOR_CONTROL_BUILDER (64)
+/* The context data buffers are save to a buffer in fw which is limited to 80kb,
+    After taking into consideration the headers and pointers in it, we limit the max to 75kb (instead of 80kb) */
+#define CONTEXT_SWITCH_CONFIG__MAX_BUFFER_SIZE_WITHOUT_HEADERS (1024 * 75)
 
 namespace hailort
 {
@@ -238,8 +242,7 @@ Expected<ResourcesManager> ResourcesManager::create(VdmaDevice &vdma_device, Hai
     }
 
     TRY(auto internal_buffer_manager, InternalBufferManager::create(driver, config_params));
-    TRY_V(auto action_list_buffer_builder, create_action_list_buffer_builder(core_op_metadata->dynamic_contexts().size(),
-        vdma_device));
+    TRY(auto action_list_buffer_builder, ActionListBufferBuilder::create());
     TRY(auto latency_meters, create_latency_meters_from_config_params(config_params, core_op_metadata));
     auto network_index_map = core_op_metadata->get_network_names();
 
@@ -372,7 +375,7 @@ Expected<uint16_t> ResourcesManager::get_batch_size() const
 }
 
 std::pair<size_t, size_t> ResourcesManager::calculate_transfer_queue_sizes(const vdma::DescriptorList &desc_list,
-    uint32_t transfer_size, uint32_t max_active_trans, bool use_latency_meter)
+    uint32_t transfer_size, size_t max_active_trans, bool use_latency_meter)
 {
     // Calculate m_ongoing_transfers capacity - transfers that are already bound to the descriptor list
     // Add desc for boundary channel because might need extra for non aligned async API
@@ -393,11 +396,7 @@ std::pair<size_t, size_t> ResourcesManager::calculate_transfer_queue_sizes(const
     // * Otherwise, we set it to max_active_trans. In this case, we will use m_pending_transfers to queue up
     //   transfers that can't fit in m_ongoing_transfers. We will then launch them as soon as there is room in
     //   m_ongoing_transfers, via the transfer launcher.
-    // TODO: Bring back commented out impl bellow (HRT-13644)
-    //       Setting pending_transfers to zero, s.t. the pending transfer queue won't be used.
-    (void)max_active_trans;
-    const auto pending_transfers = 0;
-    // const auto pending_transfers = (max_active_trans > ongoing_transfers) ? max_active_trans : 0;
+    const auto pending_transfers = (max_active_trans > ongoing_transfers) ? max_active_trans : 0;
 
     return std::make_pair(ongoing_transfers, pending_transfers);
 }
@@ -411,11 +410,10 @@ hailo_status ResourcesManager::create_boundary_vdma_channel(const LayerInfo &lay
         channel_direction, layer_info.dma_engine_index));
     TRY(const auto network_batch_size, get_network_batch_size(layer_info.network_name));
 
-    const auto nms_max_detections_per_frame =
-        layer_info.nms_info.number_of_classes * layer_info.nms_info.max_bboxes_per_class * layer_info.nms_info.chunks_per_frame;
+    const auto transfers_per_frame = (layer_info.format.order == HAILO_FORMAT_ORDER_HAILO_NMS) ?
+        LayerInfoUtils::get_nms_layer_max_transfers_per_frame(layer_info) : 1;
 
-    const auto max_active_transfers_scale = (layer_info.format.order == HAILO_FORMAT_ORDER_HAILO_NMS) ?
-        (nms_max_detections_per_frame * MAX_ACTIVE_TRANSFERS_SCALE) : MAX_ACTIVE_TRANSFERS_SCALE;
+    const auto max_active_transfers_scale = (transfers_per_frame * MAX_ACTIVE_TRANSFERS_SCALE);
 
     TRY(const auto device_arch, m_vdma_device.get_architecture());
     /* Add error in configure phase for invalid NMS parameters */
@@ -428,7 +426,7 @@ hailo_status ResourcesManager::create_boundary_vdma_channel(const LayerInfo &lay
     const auto min_active_trans = MIN_ACTIVE_TRANSFERS_SCALE * network_batch_size;
     const auto max_active_trans = (layer_info.format.order == HAILO_FORMAT_ORDER_HAILO_NMS) ?
         /* NMS Case - Value be be higher than UINT16_MAX. in this case we only limit to UART16_MAX with no error */
-        std::min(static_cast<uint32_t>(UINT16_MAX), max_active_transfers_scale * network_batch_size) :
+        std::min(static_cast<size_t>(UINT16_MAX), max_active_transfers_scale * network_batch_size) :
         max_active_transfers_scale * network_batch_size;
 
     CHECK(IS_FIT_IN_UINT16(min_active_trans), HAILO_INVALID_ARGUMENT,
@@ -436,37 +434,16 @@ hailo_status ResourcesManager::create_boundary_vdma_channel(const LayerInfo &lay
     CHECK(IS_FIT_IN_UINT16(max_active_trans), HAILO_INVALID_ARGUMENT,
         "calculated min_active_trans for vdma descriptor list is out of UINT16 range");
 
-    auto latency_meter = (contains(m_latency_meters, layer_info.network_name)) ? m_latency_meters.at(layer_info.network_name) : nullptr;
-
-    /* TODO - HRT-6829- page_size should be calculated inside the vDMA channel class create function */
-    static const bool IS_CIRCULAR = true;
-    static const bool IS_VDMA_ALIGNED_BUFFER = false;
     const auto transfer_size = LayerInfoUtils::get_layer_transfer_size(layer_info);
-
-    const auto DONT_FORCE_DEFAULT_PAGE_SIZE = false;
-    const auto DONT_FORCE_BATCH_SIZE = false;
-    // Hack to reduce max page size if the driver page size is equal to stream size. 
-    // In this case page size == stream size is invalid solution. 
-    // TODO - remove this WA after HRT-11747
-    const uint16_t max_page_size = (m_driver.desc_max_page_size() == layer_info.max_shmifo_size) ?
-        (m_driver.desc_max_page_size() / 2) : m_driver.desc_max_page_size();
-    auto buffer_sizes_requirements = vdma::BufferSizesRequirements::get_buffer_requirements_single_transfer(
-        vdma::VdmaBuffer::Type::SCATTER_GATHER, max_page_size, static_cast<uint16_t>(min_active_trans),
-        static_cast<uint16_t>(max_active_trans), transfer_size, IS_CIRCULAR, DONT_FORCE_DEFAULT_PAGE_SIZE,
-        DONT_FORCE_BATCH_SIZE, IS_VDMA_ALIGNED_BUFFER);
-    if (HAILO_CANT_MEET_BUFFER_REQUIREMENTS == buffer_sizes_requirements.status()) {
-        LOGGER__ERROR("Network shapes and batch size exceeds driver descriptors capabilities. "
-                "(A common cause for this error could be the batch size - which is {}).", network_batch_size);
-    }
-    CHECK_EXPECTED_AS_STATUS(buffer_sizes_requirements); // TODO (HRT-13278): Figure out how to remove CHECK_EXPECTED here
-
-    const auto page_size = buffer_sizes_requirements->desc_page_size();
-    const auto descs_count = (nullptr != std::getenv("HAILO_CONFIGURE_FOR_HW_INFER")) ?
-        MAX_SG_DESCS_COUNT : buffer_sizes_requirements->descs_count();
+    TRY(auto buffer_requirements, vdma::BufferSizesRequirements::get_buffer_requirements_for_boundary_channels(m_driver,
+        layer_info.max_shmifo_size, static_cast<uint16_t>(min_active_trans), static_cast<uint16_t>(max_active_trans),
+        transfer_size));
 
     const bool CIRCULAR = true;
-    TRY(auto desc_list, vdma::DescriptorList::create(descs_count, page_size, CIRCULAR, m_driver));
+    TRY(auto desc_list, vdma::DescriptorList::create(buffer_requirements.descs_count(), buffer_requirements.desc_page_size(),
+        CIRCULAR, m_driver));
 
+    auto latency_meter = (contains(m_latency_meters, layer_info.network_name)) ? m_latency_meters.at(layer_info.network_name) : nullptr;
     size_t pending_transfers = 0, ongoing_transfers = 0;
     std::tie(ongoing_transfers, pending_transfers) = calculate_transfer_queue_sizes(desc_list, transfer_size,
         max_active_trans, (latency_meter != nullptr));
@@ -522,18 +499,18 @@ ExpectedRef<IntermediateBuffer> ResourcesManager::get_intermediate_buffer(const
 ExpectedRef<IntermediateBuffer> ResourcesManager::set_cache_input_channel(uint32_t cache_id, uint16_t batch_size,
     vdma::ChannelId channel_id)
 {
-    return m_cache_manager->set_cache_input_channel(cache_id, batch_size, channel_id);
+    return m_cache_manager->set_cache_input_channel(m_core_op_metadata->core_op_name(), cache_id, batch_size, channel_id);
 }
 
 ExpectedRef<IntermediateBuffer> ResourcesManager::set_cache_output_channel(uint32_t cache_id, uint16_t batch_size,
     vdma::ChannelId channel_id)
 {
-    return m_cache_manager->set_cache_output_channel(cache_id, batch_size, channel_id);
+    return m_cache_manager->set_cache_output_channel(m_core_op_metadata->core_op_name(), cache_id, batch_size, channel_id);
 }
 
-std::unordered_map<uint32_t, CacheBuffer> &ResourcesManager::get_cache_buffers()
+ExpectedRef<std::unordered_map<uint32_t, CacheBuffer>> ResourcesManager::get_cache_buffers()
 {
-    return m_cache_manager->get_cache_buffers();
+    return m_cache_manager->get_cache_buffers(m_core_op_metadata->core_op_name());
 }
 
 Expected<CONTROL_PROTOCOL__application_header_t> ResourcesManager::get_control_core_op_header()
@@ -550,10 +527,7 @@ Expected<CONTROL_PROTOCOL__application_header_t> ResourcesManager::get_control_c
     status = fill_csm_buffer_size(app_header);
     CHECK_SUCCESS_AS_EXPECTED(status, "Invalid csm buffer size");
 
-    const auto mapped_addr = get_action_list_buffer_builder()->get_mapped_buffer_dma_address();
-    CHECK(IS_FIT_IN_UINT32(mapped_addr), HAILO_INVALID_ARGUMENT, "Invalid Mapped Address {} must fit in uint32",
-        mapped_addr);
-    app_header.external_action_list_address = static_cast<uint32_t>(mapped_addr);
+    app_header.external_action_list_address = CONTEXT_SWITCH_DEFS__INVALID_DDR_CONTEXTS_BUFFER_ADDRESS;
 
     return app_header;
 }
@@ -625,18 +599,19 @@ Expected<Buffer> ResourcesManager::read_intermediate_buffer(const IntermediateBu
 
 Expected<Buffer> ResourcesManager::read_cache_buffer(uint32_t cache_id)
 {
-    auto &cache_buffers_map = m_cache_manager->get_cache_buffers();
-    auto cache_buffer_it = cache_buffers_map.find(cache_id);
-    CHECK_AS_EXPECTED(std::end(cache_buffers_map) != cache_buffer_it, HAILO_NOT_FOUND,
+    TRY(auto cache_buffers, get_cache_buffers());
+    auto cache_buffer_it = cache_buffers.get().find(cache_id);
+    CHECK_AS_EXPECTED(std::end(cache_buffers.get()) != cache_buffer_it, HAILO_NOT_FOUND,
         "Failed to find cache buffer for cache_id {}", cache_id);
-    return cache_buffer_it->second.read_entire_cache();
+    return cache_buffer_it->second.read_cache();
 }
 
 Expected<std::map<uint32_t, Buffer>> ResourcesManager::read_cache_buffers()
 {
     std::map<uint32_t, Buffer> result;
-    for (auto &cache_buffer : m_cache_manager->get_cache_buffers()) {
-        TRY(auto buffer, cache_buffer.second.read_entire_cache());
+    TRY(auto cache_buffers, get_cache_buffers());
+    for (auto &cache_buffer : cache_buffers.get()) {
+        TRY(auto buffer, cache_buffer.second.read_cache());
         result.emplace(cache_buffer.first, std::move(buffer));
     }
 
@@ -645,18 +620,23 @@ Expected<std::map<uint32_t, Buffer>> ResourcesManager::read_cache_buffers()
 
 hailo_status ResourcesManager::configure()
 {
-    CHECK(!m_is_configured, HAILO_INTERNAL_FAILURE, "Can't configure the same core-op twice");
     m_is_configured = true;
 
-    TRY(const auto core_op_header, get_control_core_op_header());
-    auto status = Control::context_switch_set_network_group_header(m_vdma_device, core_op_header);
-    CHECK_SUCCESS(status);
+    TRY(auto core_op_header, get_control_core_op_header());
+    if ((Device::Type::INTEGRATED == m_vdma_device.get_type())
+        && ((CONTEXT_SWITCH_CONFIG__MAX_BUFFER_SIZE_WITHOUT_HEADERS < get_action_list_buffer_builder()->get_action_list_buffer_size())
+        || (is_env_variable_on(DDR_ACTION_LIST_ENV_VAR, DDR_ACTION_LIST_ENV_VAR_VALUE)))) {
+        TRY(auto dma_address ,get_action_list_buffer_builder()->write_controls_to_ddr(m_driver));
+        CHECK(IS_FIT_IN_UINT32(dma_address), HAILO_INVALID_ARGUMENT, "Invalid Mapped Address {} must fit in uint32",
+            dma_address);
+        core_op_header.external_action_list_address = static_cast<uint32_t>(dma_address);
 
-    // Only send controls to FW in case of control action list builder
-    if (ActionListBufferBuilder::Type::CONTROL == get_action_list_buffer_builder()->get_builder_type()) {
-        const auto control_action_list = std::static_pointer_cast<ControlActionListBufferBuilder>(
-            get_action_list_buffer_builder());
-        status = Control::context_switch_set_context_info(m_vdma_device, control_action_list->get_controls());
+        auto status = Control::context_switch_set_network_group_header(m_vdma_device, core_op_header);
+        CHECK_SUCCESS(status);
+    } else {
+        auto status = Control::context_switch_set_network_group_header(m_vdma_device, core_op_header);
+        CHECK_SUCCESS(status);
+        status = Control::context_switch_set_context_info(m_vdma_device, get_action_list_buffer_builder()->get_controls());
         CHECK_SUCCESS(status);
     }
 
@@ -925,30 +905,4 @@ hailo_status ResourcesManager::fill_internal_buffers_info()
     return HAILO_SUCCESS;
 }
 
-bool ResourcesManager::should_use_ddr_action_list(size_t num_contexts, HailoRTDriver::DmaType dma_type)
-{
-    // Only allow env variable to affect in case of DmaType DRAM
-    if ((HailoRTDriver::DmaType::DRAM == dma_type) && ((MAX_NUM_CONTEXTS_FOR_CONTROL_BUILDER < num_contexts)
-        || (is_env_variable_on(DDR_ACTION_LIST_ENV_VAR, DDR_ACTION_LIST_ENV_VAR_VALUE)))) {
-        return true;
-    }
-    return false;
-}
-
-Expected<std::shared_ptr<ActionListBufferBuilder>> ResourcesManager::create_action_list_buffer_builder(
-    size_t num_dynamic_contexts, VdmaDevice &vdma_device)
-{
-    static const auto total_num_contexts = CONTROL_PROTOCOL__CONTEXT_SWITCH_NUMBER_OF_NON_DYNAMIC_CONTEXTS +
-        num_dynamic_contexts;
-
-    if (should_use_ddr_action_list(total_num_contexts, vdma_device.get_driver().dma_type())) {
-        TRY(auto ddr_action_list_buffer_builder, DDRActionListBufferBuilder::create(total_num_contexts, vdma_device));
-        return std::static_pointer_cast<ActionListBufferBuilder>(std::move(ddr_action_list_buffer_builder));
-    } else {
-        TRY(auto control_action_list_buffer_builder, ControlActionListBufferBuilder::create());
-        return std::static_pointer_cast<ActionListBufferBuilder>(std::move(control_action_list_buffer_builder));
-    }
-
-}
-
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/core_op/resource_manager/resource_manager.hpp b/hailort/libhailort/src/core_op/resource_manager/resource_manager.hpp
index fc6204c4..138d410a 100644
--- a/hailort/libhailort/src/core_op/resource_manager/resource_manager.hpp
+++ b/hailort/libhailort/src/core_op/resource_manager/resource_manager.hpp
@@ -33,8 +33,7 @@
 #include "core_op/resource_manager/cache_manager.hpp"
 #include "core_op/resource_manager/config_buffer.hpp"
 #include "core_op/resource_manager/channel_allocator.hpp"
-#include "core_op/resource_manager/action_list_buffer_builder/control_action_list_buffer_builder.hpp"
-#include "core_op/resource_manager/action_list_buffer_builder/ddr_action_list_buffer_builder.hpp"
+#include "core_op/resource_manager/action_list_buffer_builder/action_list_buffer_builder.hpp"
 #include "device_common/control_protocol.hpp"
 #include "vdma/channel/boundary_channel.hpp"
 #include "vdma/pcie/pcie_device.hpp"
@@ -152,11 +151,13 @@ class ResourcesManager final
     ExpectedRef<IntermediateBuffer> get_intermediate_buffer(const IntermediateBufferKey &key);
     ExpectedRef<IntermediateBuffer> set_cache_input_channel(uint32_t cache_id, uint16_t batch_size, vdma::ChannelId channel_id);
     ExpectedRef<IntermediateBuffer> set_cache_output_channel(uint32_t cache_id, uint16_t batch_size, vdma::ChannelId channel_id);
-    std::unordered_map<uint32_t, CacheBuffer> &get_cache_buffers();
+    ExpectedRef<std::unordered_map<uint32_t, CacheBuffer>> get_cache_buffers();
     hailo_status create_boundary_vdma_channel(const LayerInfo &layer_info);
 
     Expected<CONTROL_PROTOCOL__application_header_t> get_control_core_op_header();
 
+    HailoRTDriver &get_driver() { return m_driver; }
+
     Expected<std::reference_wrapper<ContextResources>> add_new_context(
         CONTROL_PROTOCOL__context_switch_context_type_t context_type,
         const uint16_t context_index, const ConfigBufferInfoMap &config_info={});
@@ -223,8 +224,6 @@ class ResourcesManager final
     Expected<HwInferResults> run_hw_only_infer();
     hailo_status fill_internal_buffers_info();
     static bool should_use_ddr_action_list(size_t num_contexts, HailoRTDriver::DmaType dma_type);
-    static Expected<std::shared_ptr<ActionListBufferBuilder>> create_action_list_buffer_builder(
-        size_t num_dynamic_contexts, VdmaDevice &vdma_device);
     bool get_can_fast_batch_switch()
     {
         return m_core_op_metadata->get_can_fast_batch_switch();
@@ -249,7 +248,7 @@ class ResourcesManager final
 
     // <ongoing_transfers, pending_transfers>
     static std::pair<size_t, size_t> calculate_transfer_queue_sizes(const vdma::DescriptorList &desc_list,
-        uint32_t transfer_size, uint32_t max_active_trans, bool use_latency_meter);
+        uint32_t transfer_size, size_t max_active_trans, bool use_latency_meter);
 
     std::vector<ContextResources> m_contexts_resources;
     ChannelAllocator m_channel_allocator;
diff --git a/hailort/libhailort/src/core_op/resource_manager/resource_manager_builder.cpp b/hailort/libhailort/src/core_op/resource_manager/resource_manager_builder.cpp
index 0f8985f0..9d588a3d 100644
--- a/hailort/libhailort/src/core_op/resource_manager/resource_manager_builder.cpp
+++ b/hailort/libhailort/src/core_op/resource_manager/resource_manager_builder.cpp
@@ -12,6 +12,7 @@
 #include "periph_calculator.hpp"
 #include "hef/hef_internal.hpp"
 #include "common/file_utils.hpp"
+#include "vdma/memory/vdma_edge_layer.hpp"
 
 namespace hailort
 {
@@ -101,6 +102,13 @@ static Expected<LayerInfo> update_layer_info(const LayerInfo &original_layer_inf
     return updated_local_layer_info;
 }
 
+static CONTROL_PROTOCOL__host_buffer_info_t get_boundary_buffer_info(vdma::BoundaryChannel &channel, uint32_t transfer_size)
+{
+    auto &desc_list = channel.get_desc_list();
+    return vdma::VdmaEdgeLayer::get_host_buffer_info(vdma::VdmaEdgeLayer::Type::SCATTER_GATHER, desc_list.dma_address(),
+        desc_list.desc_page_size(), desc_list.count(), transfer_size);
+}
+
 static hailo_status fill_boundary_input_layer_impl(ContextResources &context_resources,
     ResourcesManager &resources_manager, const LayerInfo layer_info, const CONTROL_PROTOCOL__hw_consts_t &hw_consts,
     const HEFHwArch &hw_arch, bool should_optimize_credits)
@@ -109,7 +117,7 @@ static hailo_status fill_boundary_input_layer_impl(ContextResources &context_res
 
     TRY(const auto vdma_channel, resources_manager.get_boundary_vdma_channel_by_stream_name(layer_info.name));
 
-    const auto buffer_info = vdma_channel->get_boundary_buffer_info(transfer_size);
+    const auto buffer_info = get_boundary_buffer_info(*vdma_channel, transfer_size);
     const bool is_periph_calculated_in_hailort = resources_manager.get_supported_features().periph_calculation_in_hailort;
     const bool is_core_hw_padding_config_in_dfc = resources_manager.get_supported_features().core_hw_padding_config_in_dfc;
     TRY(auto local_layer_info, update_layer_info(layer_info, buffer_info, hw_consts, hw_arch, should_optimize_credits,
@@ -177,7 +185,7 @@ static hailo_status fill_boundary_output_layer(ContextResources &context_resourc
 
     TRY(const auto vdma_channel, resources_manager.get_boundary_vdma_channel_by_stream_name(layer_info.name));
 
-    const auto buffer_info = vdma_channel->get_boundary_buffer_info(transfer_size);
+    const auto buffer_info = get_boundary_buffer_info(*vdma_channel, transfer_size);
     const bool is_periph_calculated_in_hailort = resources_manager.get_supported_features().periph_calculation_in_hailort;
     const bool is_core_hw_padding_config_in_dfc = resources_manager.get_supported_features().core_hw_padding_config_in_dfc;
     TRY(auto local_layer_info, update_layer_info(layer_info, buffer_info, hw_consts, hw_arch, should_optimize_credits,
@@ -948,10 +956,7 @@ static hailo_status write_action_list(const ContextResources & context_resources
         TRY(auto action_buffers, action->serialize(context_resources));
 
         for (auto &action_buffer : action_buffers) {
-            const bool last_action_buffer_in_context = (action_buffer == *(action_buffers.end() - 1)) &&
-                (action == *(actions.end() - 1));
-            builder->write_action(MemoryView(action_buffer), context_resources.get_context_type(),
-                is_first_action_buffer_of_context, last_action_buffer_in_context);
+            builder->build_context(MemoryView(action_buffer), context_resources.get_context_type(), is_first_action_buffer_of_context);
             is_first_action_buffer_of_context = false;
         }
     }
@@ -963,12 +968,15 @@ static hailo_status add_edge_layer_end_of_context_actions(const ContextResources
     std::vector<ContextSwitchConfigActionPtr> &actions, const bool is_batch_switch_context)
 {
     for (const auto &edge_layer : context_resources.get_edge_layers()) {
-        const bool should_validate = (edge_layer.layer_info.type == LayerType::BOUNDARY);
-        auto action = should_validate ?
-            ValidateChannelAction::create(edge_layer, is_batch_switch_context) :
-            DeactivateChannelAction::create(edge_layer, is_batch_switch_context);
-        CHECK_EXPECTED_AS_STATUS(action); // TODO (HRT-13278): Figure out how to remove CHECK_EXPECTED here
-        actions.emplace_back(action.release());
+#ifndef NDEBUG
+        TRY(auto validate_action, ValidateChannelAction::create(edge_layer, is_batch_switch_context));
+        actions.emplace_back(validate_action);
+#endif
+        const bool should_deactivate = (edge_layer.layer_info.type != LayerType::BOUNDARY);
+        if (should_deactivate) {
+            TRY(auto deactive_action, DeactivateChannelAction::create(edge_layer, is_batch_switch_context));
+            actions.emplace_back(deactive_action);
+        }
     }
 
     /* Pause the boundary input channel */
@@ -1096,7 +1104,7 @@ static Expected<ContextSwitchConfigActionPtr> create_switch_lcu_batch_action(con
     CHECK_AS_EXPECTED((ContextSwitchConfigAction::Type::EnableLcuDefault == action->get_type()) ||
         (ContextSwitchConfigAction::Type::SwitchLcuBatch == action->get_type()) ||
         (ContextSwitchConfigAction::Type::EnableLcuNonDefault == action->get_type()), HAILO_INVALID_ARGUMENT,
-        "Invalid action type - must be enable lcu (default or non default) or switch lcu batch, Received type {}", action->get_type());
+        "Invalid action type - must be enable lcu (default or non default) or switch lcu batch, Received type {}", static_cast<int>(action->get_type()));
 
     TRY(const auto params_buffer, action->serialize_params(context_resources));
 
diff --git a/hailort/libhailort/src/device_common/CMakeLists.txt b/hailort/libhailort/src/device_common/CMakeLists.txt
index af1fd9fd..dbdbecbe 100644
--- a/hailort/libhailort/src/device_common/CMakeLists.txt
+++ b/hailort/libhailort/src/device_common/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 set(SRC_FILES
     ${CMAKE_CURRENT_SOURCE_DIR}/device.cpp
diff --git a/hailort/libhailort/src/device_common/control.cpp b/hailort/libhailort/src/device_common/control.cpp
index 16bbb115..e39c5d6c 100644
--- a/hailort/libhailort/src/device_common/control.cpp
+++ b/hailort/libhailort/src/device_common/control.cpp
@@ -9,6 +9,8 @@
 
 #include "common/utils.hpp"
 #include "common/logger_macros.hpp"
+#include "common/internal_env_vars.hpp"
+#include "common/process.hpp"
 
 #include "hailo/hailort_common.hpp"
 #include "hef/core_op_metadata.hpp"
@@ -40,8 +42,6 @@ namespace hailort
         "If doing continuous measurement, to enable overcurrent protection again you have to stop the power measurement on this dvm." \
     )
 
-#define FORCE_LAYOUT_INTERNAL_ENV_VAR "FORCE_LAYOUT_INTERNAL"
-
 typedef std::array<std::array<float64_t, CONTROL_PROTOCOL__POWER_MEASUREMENT_TYPES__COUNT>, CONTROL_PROTOCOL__DVM_OPTIONS_COUNT> power_conversion_multiplier_t;
 
 
@@ -96,6 +96,15 @@ Expected<hailo_device_identity_t> control__parse_identify_results(CONTROL_PROTOC
         board_info.device_architecture = dev_arch;
     }
 
+    // Check if we're on H10 - relevant only for linux
+#ifdef __linux__
+    TRY(auto host_name_pair, Process::create_and_wait_for_output("hostname", 20));
+    CHECK_AS_EXPECTED(0 == host_name_pair.first, HAILO_INTERNAL_FAILURE, "Failed to run 'hostname'");
+    if (host_name_pair.second.find("hailo10") != std::string::npos) {
+        board_info.device_architecture = HAILO_ARCH_HAILO10H;
+    }
+#endif
+
     /* Write identify results to log */
     LOGGER__INFO("firmware_version is: {}.{}.{}",
             board_info.fw_version.major,
@@ -104,7 +113,7 @@ Expected<hailo_device_identity_t> control__parse_identify_results(CONTROL_PROTOC
             );
     LOGGER__DEBUG("Protocol version: {}", board_info.protocol_version);
     LOGGER__DEBUG("Logger version: {}", board_info.logger_version);
-    LOGGER__DEBUG("Device architecture code: {}", board_info.device_architecture);
+    LOGGER__DEBUG("Device architecture code: {}", static_cast<int>(board_info.device_architecture));
 
     return board_info;
 }
@@ -118,15 +127,15 @@ Expected<hailo_extended_device_information_t> control__parse_get_extended_device
     local_supported_features = (uint8_t)BYTE_ORDER__ntohl(get_extended_device_information_response.supported_features);
 
     device_info.supported_features.ethernet = (local_supported_features &
-                                               (1 << CONTROL_PROTOCOL__SUPPORTED_FEATURES_ETHERNET_BIT_OFFSET)) != 0;
+                                            (1 << CONTROL_PROTOCOL__SUPPORTED_FEATURES_ETHERNET_BIT_OFFSET)) != 0;
     device_info.supported_features.pcie = (local_supported_features &
-                                           (1 << CONTROL_PROTOCOL__SUPPORTED_FEATURES_PCIE_BIT_OFFSET)) != 0;
+                                        (1 << CONTROL_PROTOCOL__SUPPORTED_FEATURES_PCIE_BIT_OFFSET)) != 0;
     device_info.supported_features.mipi = (local_supported_features &
-                                           (1 << CONTROL_PROTOCOL__SUPPORTED_FEATURES_MIPI_BIT_OFFSET)) != 0;
+                                        (1 << CONTROL_PROTOCOL__SUPPORTED_FEATURES_MIPI_BIT_OFFSET)) != 0;
     device_info.supported_features.current_monitoring = (local_supported_features &
-                                                         (1 << CONTROL_PROTOCOL__SUPPORTED_FEATURES_CURRENT_MONITORING_BIT_OFFSET)) != 0;
+                                                        (1 << CONTROL_PROTOCOL__SUPPORTED_FEATURES_CURRENT_MONITORING_BIT_OFFSET)) != 0;
     device_info.supported_features.mdio = (local_supported_features &
-                                           (1 << CONTROL_PROTOCOL__SUPPORTED_FEATURES_MDIO_BIT_OFFSET)) != 0;
+                                        (1 << CONTROL_PROTOCOL__SUPPORTED_FEATURES_MDIO_BIT_OFFSET)) != 0;
     device_info.neural_network_core_clock_rate = BYTE_ORDER__ntohl(get_extended_device_information_response.neural_network_core_clock_rate);
 
     LOGGER__DEBUG("Max Neural Network Core Clock Rate: {}", device_info.neural_network_core_clock_rate);
@@ -135,8 +144,8 @@ Expected<hailo_extended_device_information_t> control__parse_get_extended_device
         BYTE_ORDER__ntohl(get_extended_device_information_response.boot_source));
 
     (void)memcpy(device_info.soc_id,
-                 get_extended_device_information_response.soc_id,
-                 BYTE_ORDER__ntohl(get_extended_device_information_response.soc_id_length));
+                get_extended_device_information_response.soc_id,
+                BYTE_ORDER__ntohl(get_extended_device_information_response.soc_id_length));
 
     device_info.lcs = get_extended_device_information_response.lcs;
 
@@ -225,14 +234,14 @@ hailo_status log_detailed_fw_error(const Device &device, const CONTROL_PROTOCOL_
         LOGGER__ERROR("Firmware major status: {}", firmware_status_text);
     } else {
         LOGGER__ERROR("Cannot find textual address for firmware status {:#x}, common_status = {}",
-            (FIRMWARE_STATUS_t)fw_status.major_status, common_status);
+            static_cast<int>((FIRMWARE_STATUS_t)fw_status.major_status), static_cast<int>(common_status));
     }
     common_status = FIRMWARE_STATUS__get_textual((FIRMWARE_STATUS_t)fw_status.minor_status, &firmware_status_text);
     if (HAILO_COMMON_STATUS__SUCCESS == common_status) {
         LOGGER__ERROR("Firmware minor status: {}", firmware_status_text);
     } else {
         LOGGER__ERROR("Cannot find textual address for firmware status {:#x}, common_status = {}",
-            (FIRMWARE_STATUS_t)fw_status.minor_status, common_status);
+            static_cast<int>((FIRMWARE_STATUS_t)fw_status.minor_status), static_cast<int>(common_status));
     }
 
     if ((CONTROL_PROTOCOL_STATUS_CONTROL_UNSUPPORTED == fw_status.minor_status) ||
@@ -1181,7 +1190,7 @@ hailo_status Control::set_power_measurement(Device &device, hailo_measurement_bu
     CONTROL_PROTOCOL__set_power_measurement_response_t *response = NULL;
 
     CHECK(CONTROL_PROTOCOL__MAX_NUMBER_OF_POWER_MEASUREMETS > buffer_index,
-        HAILO_INVALID_ARGUMENT, "Invalid power measurement index {}", buffer_index);
+        HAILO_INVALID_ARGUMENT, "Invalid power measurement index {}", static_cast<int>(buffer_index));
 
     common_status = CONTROL_PROTOCOL__pack_set_power_measurement_request(&request, &request_size, device.get_control_sequence(),
             buffer_index, dvm, measurement_type);
@@ -1229,7 +1238,7 @@ hailo_status Control::get_power_measurement(Device &device, hailo_measurement_bu
 
     /* Validate arguments */
     CHECK(CONTROL_PROTOCOL__MAX_NUMBER_OF_POWER_MEASUREMETS > buffer_index,
-        HAILO_INVALID_ARGUMENT, "Invalid power measurement index {}", buffer_index);
+        HAILO_INVALID_ARGUMENT, "Invalid power measurement index {}", static_cast<int>(buffer_index));
     CHECK_ARG_NOT_NULL(measurement_data);
     common_status = CONTROL_PROTOCOL__pack_get_power_measurement_request(&request, &request_size, device.get_control_sequence(),
             buffer_index, should_clear);
@@ -2562,7 +2571,7 @@ hailo_status Control::idle_time_get_measurement(Device &device, uint64_t *measur
     common_status = CONTROL_PROTOCOL__pack_idle_time_get_measuremment_request(&request, &request_size, device.get_control_sequence());
     status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
     if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("failed CONTROL_PROTOCOL__pack_idle_time_get_measuremment_request with status {:#X}", common_status);
+        LOGGER__ERROR("failed CONTROL_PROTOCOL__pack_idle_time_get_measuremment_request with status {:#X}", static_cast<int>(common_status));
         goto exit;
     }
 
@@ -2609,7 +2618,7 @@ hailo_status Control::idle_time_set_measurement(Device &device, uint8_t measurem
     common_status = CONTROL_PROTOCOL__pack_idle_time_set_measuremment_request(&request, &request_size, device.get_control_sequence(), measurement_enable);
     status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
     if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("failed CONTROL_PROTOCOL__pack_idle_time_set_measuremment_request with status {:#X}", common_status);
+        LOGGER__ERROR("failed CONTROL_PROTOCOL__pack_idle_time_set_measuremment_request with status {:#X}", static_cast<int>(common_status));
         goto exit;
     }
 
@@ -2658,7 +2667,7 @@ hailo_status Control::set_pause_frames(Device &device, uint8_t rx_pause_frames_e
 hailo_status Control::download_context_action_list_chunk(Device &device, uint32_t network_group_id,
     CONTROL_PROTOCOL__context_switch_context_type_t context_type, uint16_t context_index,
     uint16_t action_list_offset, size_t action_list_max_size, uint32_t *base_address, uint8_t *action_list,
-    uint16_t *action_list_length, bool *is_action_list_end, uint32_t *batch_counter)
+    uint16_t *action_list_length, bool *is_action_list_end, uint32_t *batch_counter, uint32_t *idle_time )
 {
     hailo_status status = HAILO_UNINITIALIZED;
     HAILO_COMMON_STATUS_t common_status = HAILO_COMMON_STATUS__UNINITIALIZED;
@@ -2720,6 +2729,7 @@ hailo_status Control::download_context_action_list_chunk(Device &device, uint32_
     *base_address = BYTE_ORDER__ntohl(context_action_list_response->base_address);
     *is_action_list_end = context_action_list_response->is_action_list_end;
     *batch_counter = BYTE_ORDER__ntohl(context_action_list_response->batch_counter);
+    *idle_time = BYTE_ORDER__ntohl(context_action_list_response->idle_time);
 
     status = HAILO_SUCCESS;
 exit:
@@ -2728,7 +2738,7 @@ hailo_status Control::download_context_action_list_chunk(Device &device, uint32_
 
 hailo_status Control::download_context_action_list(Device &device, uint32_t network_group_id,
     CONTROL_PROTOCOL__context_switch_context_type_t context_type, uint16_t context_index, size_t action_list_max_size,
-    uint32_t *base_address, uint8_t *action_list, uint16_t *action_list_length, uint32_t *batch_counter)
+    uint32_t *base_address, uint8_t *action_list, uint16_t *action_list_length, uint32_t *batch_counter, uint32_t *idle_time)
 {
     hailo_status status = HAILO_UNINITIALIZED;
     bool is_action_list_end = false;
@@ -2738,6 +2748,7 @@ hailo_status Control::download_context_action_list(Device &device, uint32_t netw
     size_t remaining_action_list_max_size = 0;
     uint32_t chunk_base_address = 0;
     uint32_t batch_counter_local = 0;
+    uint32_t idle_time_local = 0;
 
     /* Validate arguments */
     CHECK_ARG_NOT_NULL(base_address);
@@ -2750,7 +2761,7 @@ hailo_status Control::download_context_action_list(Device &device, uint32_t netw
     do {
         status = download_context_action_list_chunk(device, network_group_id, context_type, context_index,
             accumulated_action_list_length, remaining_action_list_max_size, &chunk_base_address,
-            action_list_current_offset, &chunk_action_list_length, &is_action_list_end, &batch_counter_local);
+            action_list_current_offset, &chunk_action_list_length, &is_action_list_end, &batch_counter_local, &idle_time_local);
         CHECK_SUCCESS(status);
 
         accumulated_action_list_length = (uint16_t)(accumulated_action_list_length + chunk_action_list_length);
@@ -2763,6 +2774,7 @@ hailo_status Control::download_context_action_list(Device &device, uint32_t netw
     *base_address =  chunk_base_address;
     *action_list_length = accumulated_action_list_length;
     *batch_counter = batch_counter_local;
+    *idle_time =  idle_time_local;
 
     return HAILO_SUCCESS;
 }
@@ -2834,7 +2846,7 @@ hailo_status Control::wd_enable(Device &device, uint8_t cpu_id, bool should_enab
     common_status = CONTROL_PROTOCOL__pack_wd_enable(&request, &request_size, device.get_control_sequence(), cpu_id, should_enable);
     status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
     if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("failed CONTROL_PROTOCOL__pack_wd_enable with status {:#X}", common_status);
+        LOGGER__ERROR("failed CONTROL_PROTOCOL__pack_wd_enable with status {:#X}", static_cast<int>(common_status));
         goto exit;
     }
 
@@ -2869,7 +2881,7 @@ hailo_status Control::wd_config(Device &device, uint8_t cpu_id, uint32_t wd_cycl
     common_status = CONTROL_PROTOCOL__pack_wd_config(&request, &request_size, device.get_control_sequence(), cpu_id, wd_cycles, wd_mode);
     status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
     if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("failed CONTROL_PROTOCOL__pack_wd_config with status {:#X}", common_status);
+        LOGGER__ERROR("failed CONTROL_PROTOCOL__pack_wd_config with status {:#X}", static_cast<int>(common_status));
         goto exit;
     }
 
@@ -2908,7 +2920,7 @@ hailo_status Control::previous_system_state(Device &device, uint8_t cpu_id, CONT
     common_status = CONTROL_PROTOCOL__pack_previous_system_state(&request, &request_size, device.get_control_sequence(), cpu_id);
     status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
     if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("failed CONTROL_PROTOCOL__pack_previous_system_state with status {:#X}", common_status);
+        LOGGER__ERROR("failed CONTROL_PROTOCOL__pack_previous_system_state with status {:#X}", static_cast<int>(common_status));
         goto exit;
     }
 
@@ -3059,7 +3071,7 @@ hailo_status Control::clear_configured_apps(Device &device)
     status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
     if (HAILO_SUCCESS != status) {
         LOGGER__ERROR("failed CONTROL_PROTOCOL__pack_context_switch_clear_configured_apps_request with status {:#X}",
-            common_status);
+            static_cast<int>(common_status));
         goto exit;
     }
 
@@ -3181,9 +3193,9 @@ Expected<CONTROL_PROTOCOL__get_extended_device_information_response_t> Control::
 
 Expected<uint32_t> Control::get_partial_clusters_layout_bitmap(Device &device)
 {
-    auto force_layout_env = std::getenv(FORCE_LAYOUT_INTERNAL_ENV_VAR);
+    auto force_layout_env = get_env_variable(FORCE_LAYOUT_INTERNAL_ENV_VAR);
     if (force_layout_env) {
-        return std::stoi(std::string(force_layout_env));
+        return std::stoi(force_layout_env.value());
     }
 
     TRY(const auto dev_arch, device.get_architecture());
diff --git a/hailort/libhailort/src/device_common/control.hpp b/hailort/libhailort/src/device_common/control.hpp
index eb4f1ba1..2253bb62 100644
--- a/hailort/libhailort/src/device_common/control.hpp
+++ b/hailort/libhailort/src/device_common/control.hpp
@@ -282,7 +282,7 @@ class Control final
     static hailo_status download_context_action_list(Device &device, uint32_t network_group_id,
         CONTROL_PROTOCOL__context_switch_context_type_t context_type, uint16_t context_index,
         size_t action_list_max_size, uint32_t *base_address, uint8_t *action_list, uint16_t *action_list_length,
-        uint32_t *batch_counter);
+        uint32_t *batch_counter, uint32_t *idle_time_local);
             
     /**
      *  Enable core-op
@@ -404,7 +404,7 @@ class Control final
     static hailo_status download_context_action_list_chunk(Device &device, uint32_t network_group_id,
         CONTROL_PROTOCOL__context_switch_context_type_t context_type, uint16_t context_index, uint16_t action_list_offset,
         size_t action_list_max_size, uint32_t *base_address, uint8_t *action_list, uint16_t *action_list_length,
-        bool *is_action_list_end, uint32_t *batch_counter);
+        bool *is_action_list_end, uint32_t *batch_counter, uint32_t *idle_time_local);
     static hailo_status context_switch_set_context_info_chunk(Device &device,
         const CONTROL_PROTOCOL__context_switch_context_info_chunk_t &context_info);
     static hailo_status change_context_switch_status(Device &device,
diff --git a/hailort/libhailort/src/device_common/d2h_event_queue.hpp b/hailort/libhailort/src/device_common/d2h_event_queue.hpp
index d08244e8..09440acb 100644
--- a/hailort/libhailort/src/device_common/d2h_event_queue.hpp
+++ b/hailort/libhailort/src/device_common/d2h_event_queue.hpp
@@ -10,7 +10,7 @@
 #ifndef HAILO_D2H_EVENT_QUEUE_HPP_
 #define HAILO_D2H_EVENT_QUEUE_HPP_
 
-#include "utils/thread_safe_queue.hpp"
+#include "common/thread_safe_queue.hpp"
 
 #include "d2h_events.h"
 
diff --git a/hailort/libhailort/src/device_common/d2h_events_parser.cpp b/hailort/libhailort/src/device_common/d2h_events_parser.cpp
index dda41214..1e4972b8 100644
--- a/hailort/libhailort/src/device_common/d2h_events_parser.cpp
+++ b/hailort/libhailort/src/device_common/d2h_events_parser.cpp
@@ -436,7 +436,7 @@ static HAILO_COMMON_STATUS_t D2H_EVENTS__parse_context_switch_run_time_error_not
     
     status = FIRMWARE_STATUS__get_textual((FIRMWARE_STATUS_t)run_time_error_status, &run_time_error_status_text);
     CHECK_COMMON_STATUS((HAILO_COMMON_STATUS__SUCCESS == status), status, 
-        "Cannot find textual address for run time status {:#x}, status = {}", (FIRMWARE_STATUS_t)run_time_error_status, status);
+        "Cannot find textual address for run time status {:#x}, status = {}", static_cast<int>((FIRMWARE_STATUS_t)run_time_error_status), static_cast<int>(status));
 
     LOGGER__ERROR("Got Context switch run time error on net_group index {}, batch index {}, context index {}, action index {} with status {}",
         d2h_notification_message->message_parameters.context_switch_run_time_error_event.application_index,
diff --git a/hailort/libhailort/src/device_common/device.cpp b/hailort/libhailort/src/device_common/device.cpp
index 97c92a5f..8ba359d1 100644
--- a/hailort/libhailort/src/device_common/device.cpp
+++ b/hailort/libhailort/src/device_common/device.cpp
@@ -8,6 +8,10 @@
  *
  * TODO: doc
  **/
+#ifdef __unix__
+#include <glob.h>
+#include <fstream>
+#endif
 
 #include "hailo/hailort.h"
 #include "hailo/device.hpp"
@@ -33,8 +37,9 @@ namespace hailort
 
 #define WRITE_CHUNK_SIZE (1024)
 #define DEVICE_WORD_SIZE (4)
+#define SENSOR_NAME_FILE_PATHS "/sys/class/hwmon/hwmon*/name"
 
-Device::Device(Type type) : 
+Device::Device(Type type) :
     m_type(type),
     m_control_sequence(0),
     m_is_control_version_supported(false),
@@ -104,14 +109,11 @@ Expected<std::unique_ptr<Device>> Device::create(const std::string &device_id)
     const bool DONT_LOG_ON_FAILURE = false;
     if (IntegratedDevice::DEVICE_ID == device_id) {
         return create_core();
-    }
-    else if (auto pcie_info = PcieDevice::parse_pcie_device_info(device_id, DONT_LOG_ON_FAILURE)) {
+    } else if (auto pcie_info = PcieDevice::parse_pcie_device_info(device_id, DONT_LOG_ON_FAILURE)) {
         return create_pcie(pcie_info.release());
-    }
-    else if (auto eth_info = EthernetDevice::parse_eth_device_info(device_id, DONT_LOG_ON_FAILURE)) {
+    } else if (auto eth_info = EthernetDevice::parse_eth_device_info(device_id, DONT_LOG_ON_FAILURE)) {
         return create_eth(eth_info.release());
-    }
-    else {
+    } else {
         LOGGER__ERROR("Invalid device id {}", device_id);
         return make_unexpected(HAILO_INVALID_ARGUMENT);
     }
@@ -119,17 +121,13 @@ Expected<std::unique_ptr<Device>> Device::create(const std::string &device_id)
 
 Expected<std::unique_ptr<Device>> Device::create_pcie()
 {
-    TRY(auto pcie_device, PcieDevice::create());
-    // Upcasting to Device unique_ptr (from PcieDevice unique_ptr)
-    auto device = std::unique_ptr<Device>(std::move(pcie_device));
+    TRY(auto device, PcieDevice::create());
     return device;
 }
 
 Expected<std::unique_ptr<Device>> Device::create_pcie(const hailo_pcie_device_info_t &device_info)
 {
-    TRY(auto pcie_device, PcieDevice::create(device_info));
-    // Upcasting to Device unique_ptr (from PcieDevice unique_ptr)
-    auto device = std::unique_ptr<Device>(std::move(pcie_device));
+    TRY(auto device, PcieDevice::create(device_info));
     return device;
 }
 
@@ -539,7 +537,7 @@ Expected<std::vector<uint8_t>> Device::get_number_of_dynamic_contexts_per_networ
 }
 
 Expected<Buffer> Device::download_context_action_list(uint32_t network_group_id, uint8_t context_type,
-    uint16_t context_index, uint32_t *base_address, uint32_t *batch_counter, uint16_t max_size)
+    uint16_t context_index, uint32_t *base_address, uint32_t *batch_counter, uint32_t *idle_time, uint16_t max_size)
 {
     CHECK_ARG_NOT_NULL_AS_EXPECTED(base_address);
     CHECK_ARG_NOT_NULL_AS_EXPECTED(batch_counter);
@@ -549,10 +547,11 @@ Expected<Buffer> Device::download_context_action_list(uint32_t network_group_id,
 
     uint32_t base_address_local = 0;
     uint32_t batch_counter_local = 0;
+    uint32_t idle_time_local = 0;
     uint16_t actual_size = 0;
     const auto status = Control::download_context_action_list(*this, network_group_id,
         (CONTROL_PROTOCOL__context_switch_context_type_t)context_type, context_index, action_list.size(),
-        &base_address_local, action_list.data(), &actual_size, &batch_counter_local);
+        &base_address_local, action_list.data(), &actual_size, &batch_counter_local, &idle_time_local);
     CHECK_SUCCESS_AS_EXPECTED(status);
     CHECK_AS_EXPECTED(actual_size <= max_size, HAILO_INTERNAL_FAILURE);
 
@@ -562,6 +561,7 @@ Expected<Buffer> Device::download_context_action_list(uint32_t network_group_id,
     // Transfer ownership of out params
     *base_address = base_address_local;
     *batch_counter = batch_counter_local;
+    *idle_time = idle_time_local;
 
     return final_action_list;
 }
@@ -669,4 +669,69 @@ Expected<ConfigureNetworkParams> Device::create_configure_params(Hef &hef, const
     return hef.create_configure_params(stream_interface, network_group_name);
 }
 
+Expected<bool> Device::has_INA231_H8()
+{
+    TRY(auto info, get_extended_device_information(), "Failed to get extended device information");
+    TRY(auto id, identify(), "Failed to identify device");
+    auto is_evb = std::string(id.product_name).find("EVB") != std::string::npos;
+    auto has_INA231 = info.supported_features.current_monitoring || is_evb;
+    return has_INA231;
+}
+
+// checks if the H15 board has INA231. Even if true, libhailort can't read power
+// / current / temperature values. User can read them via "sensors" CLI command
+Expected<bool> Device::has_INA231_H15()
+{
+    bool has_INA231 = false;
+#ifdef __unix__
+    glob_t glob_result;
+    glob(SENSOR_NAME_FILE_PATHS, GLOB_TILDE, NULL, &glob_result);
+
+    for(unsigned int i = 0; i < glob_result.gl_pathc; ++i) {
+        std::ifstream file(glob_result.gl_pathv[i]);
+        if (!file.is_open()) {
+            return make_unexpected(HAILO_FILE_OPERATION_FAILURE);
+        }
+
+        std::string line;
+        std::getline(file, line);
+        if (line == "ina231_precise") {
+            has_INA231 = true;
+            break;
+        }
+    }
+    globfree(&glob_result);
+#endif
+    return has_INA231;
+}
+
+Expected<Device::Capabilities> Device::get_capabilities()
+{
+    Device::Capabilities result = {};
+    switch (m_device_architecture) {
+    case HAILO_ARCH_HAILO8:
+    case HAILO_ARCH_HAILO8L:
+    {
+        TRY(result.current_measurements, has_INA231_H8(), "Failed to check INA231_H8");
+        TRY(result.power_measurements, has_INA231_H8(), "Failed to check INA231_H8");
+        result.temperature_measurements = true;
+        break;
+    }
+    case HAILO_ARCH_HAILO15H:
+    case HAILO_ARCH_HAILO15M:
+    case HAILO_ARCH_PLUTO:
+    {
+        result.current_measurements = false;
+        result.power_measurements = false;
+        result.temperature_measurements = false;
+        break;
+    }
+    case HAILO_ARCH_HAILO10H:
+        return make_unexpected(HAILO_INVALID_DEVICE_ARCHITECTURE);
+    default:
+        return make_unexpected(HAILO_INVALID_DEVICE_ARCHITECTURE);
+    }
+    return result;
+}
+
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/device_common/device_internal.cpp b/hailort/libhailort/src/device_common/device_internal.cpp
index 525abae7..1e55760c 100644
--- a/hailort/libhailort/src/device_common/device_internal.cpp
+++ b/hailort/libhailort/src/device_common/device_internal.cpp
@@ -172,7 +172,7 @@ Expected<firmware_type_t> DeviceBase::get_fw_type()
         firmware_type = FIRMWARE_TYPE_PLUTO;
     }
     else {
-        LOGGER__ERROR("Invalid device arcitecture. {}", architecture);
+        LOGGER__ERROR("Invalid device arcitecture. {}", static_cast<int>(architecture));
         return make_unexpected(HAILO_INVALID_DEVICE_ARCHITECTURE);
     }
 
@@ -203,7 +203,7 @@ hailo_status DeviceBase::firmware_update(const MemoryView &firmware_binary, bool
         static_cast<uint32_t>(firmware_binary.size()), false, &new_app_firmware_header,
         &new_core_firmware_header, NULL, firmware_type);
     CHECK(HAILO_COMMON_STATUS__SUCCESS == fw_header_status, HAILO_INVALID_FIRMWARE,
-        "FW update validation failed with status {}", fw_header_status);
+        "FW update validation failed with status {}", static_cast<int>(fw_header_status));
 
     // TODO: Are we ok with doing another identify here?
     TRY(auto board_info_before_update, Control::identify(*this));
@@ -281,7 +281,7 @@ hailo_status DeviceBase::firmware_update(const MemoryView &firmware_binary, bool
         LOGGER__INFO("Resetting...");
         status = reset(get_default_reset_mode());
         CHECK(HAILO_COMMON_STATUS__SUCCESS == fw_header_status, HAILO_INVALID_FIRMWARE,
-            "FW update validation failed with status {}", fw_header_status);
+            "FW update validation failed with status {}", static_cast<int>(fw_header_status));
         CHECK((status == HAILO_SUCCESS) || (status == HAILO_UNSUPPORTED_CONTROL_PROTOCOL_VERSION), status);
 
         auto board_info_after_install_expected = Control::identify(*this);
@@ -344,7 +344,7 @@ hailo_status DeviceBase::second_stage_update(uint8_t* second_stage_binary, uint3
     second_stage_header_status = FIRMWARE_HEADER_UTILS__validate_second_stage_headers((uintptr_t)second_stage_binary,
         second_stage_binary_length, &new_second_stage_header, firmware_type);
     CHECK(HAILO_COMMON_STATUS__SUCCESS == second_stage_header_status, HAILO_INVALID_SECOND_STAGE,
-            "Second stage update validation failed with status {}", second_stage_header_status);
+            "Second stage update validation failed with status {}", static_cast<int>(second_stage_header_status));
 
     new_second_stage_version.firmware_major = new_second_stage_header->firmware_major;
     new_second_stage_version.firmware_minor = new_second_stage_header->firmware_minor;
@@ -547,7 +547,7 @@ void DeviceBase::d2h_notification_thread_main(const std::string &device_id)
         /* Parse and print the Event info */
         auto d2h_status = D2H_EVENTS__parse_event(&notification);
         if (HAILO_COMMON_STATUS__SUCCESS != d2h_status) {
-            LOGGER__ERROR("[{}] Fail to Parse firmware notification {} status is {}", device_id, notification.header.event_id, d2h_status);
+            LOGGER__ERROR("[{}] Fail to Parse firmware notification {} status is {}", device_id, notification.header.event_id, static_cast<int>(d2h_status));
             continue;
         }
 
@@ -688,7 +688,7 @@ hailo_status DeviceBase::validate_binary_version_for_platform(firmware_version_t
     HAILO_COMMON_STATUS_t binary_status = FIRMWARE_HEADER_UTILS__validate_binary_version(new_binary_version, min_supported_binary_version,
                                                                                          fw_binary_type);
     CHECK(HAILO_COMMON_STATUS__SUCCESS == binary_status, HAILO_INVALID_FIRMWARE,
-                    "FW binary version validation failed with status {}", binary_status);
+                    "FW binary version validation failed with status {}", static_cast<int>(binary_status));
     return HAILO_SUCCESS;
 }
 
diff --git a/hailort/libhailort/src/eth/CMakeLists.txt b/hailort/libhailort/src/eth/CMakeLists.txt
index 4bf7ebdc..fdc6e617 100644
--- a/hailort/libhailort/src/eth/CMakeLists.txt
+++ b/hailort/libhailort/src/eth/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 set(SRC_FILES
     ${CMAKE_CURRENT_SOURCE_DIR}/eth_device.cpp
diff --git a/hailort/libhailort/src/eth/eth_device.cpp b/hailort/libhailort/src/eth/eth_device.cpp
index f3f3ecba..641e1241 100644
--- a/hailort/libhailort/src/eth/eth_device.cpp
+++ b/hailort/libhailort/src/eth/eth_device.cpp
@@ -491,7 +491,7 @@ Expected<std::vector<WriteMemoryInfo>> EthernetDevice::create_core_op_metadata(H
 
     /* Update preliminary_config and dynamic_contexts recepies */
     auto &proto_preliminary_config = partial_core_op->preliminary_config;
-    TRY(auto core_op_config, Hef::Impl::create_single_context_core_op_config(proto_preliminary_config));
+    TRY(auto core_op_config, Hef::Impl::create_single_context_core_op_config(proto_preliminary_config, hef));
 
     return core_op_config;
 }
diff --git a/hailort/libhailort/src/eth/hcp_config_core_op.cpp b/hailort/libhailort/src/eth/hcp_config_core_op.cpp
index 4a325a09..068413f3 100644
--- a/hailort/libhailort/src/eth/hcp_config_core_op.cpp
+++ b/hailort/libhailort/src/eth/hcp_config_core_op.cpp
@@ -108,6 +108,27 @@ hailo_status HcpConfigCoreOp::update_cache_offset(int32_t offset_delta_bytes)
     return HAILO_INVALID_OPERATION;
 }
 
+Expected<std::vector<uint32_t>> HcpConfigCoreOp::get_cache_ids() const
+{
+    LOGGER__ERROR("get_cache_ids function is not supported on ETH core-ops");
+    return make_unexpected(HAILO_INVALID_OPERATION);
+}
+
+Expected<Buffer> HcpConfigCoreOp::read_cache_buffer(uint32_t cache_id)
+{
+    (void) cache_id;
+    LOGGER__ERROR("read_cache_buffer function is not supported on ETH core-ops");
+    return make_unexpected(HAILO_INVALID_OPERATION);
+}
+
+hailo_status HcpConfigCoreOp::write_cache_buffer(uint32_t cache_id, MemoryView buffer)
+{
+    (void) cache_id;
+    (void) buffer;
+    LOGGER__ERROR("write_cache_buffer function is not supported on ETH core-ops");
+    return HAILO_INVALID_OPERATION;
+}
+
 hailo_status HcpConfigCoreOp::activate_impl(uint16_t /* dynamic_batch_size */)
 {
     // Close older dataflows
diff --git a/hailort/libhailort/src/eth/hcp_config_core_op.hpp b/hailort/libhailort/src/eth/hcp_config_core_op.hpp
index 13f98498..c7795d4a 100644
--- a/hailort/libhailort/src/eth/hcp_config_core_op.hpp
+++ b/hailort/libhailort/src/eth/hcp_config_core_op.hpp
@@ -59,6 +59,9 @@ class HcpConfigCoreOp : public CoreOp
     virtual hailo_status init_cache(uint32_t read_offset, int32_t write_offset_delta) override;
     virtual Expected<hailo_cache_info_t> get_cache_info() const override;
     virtual hailo_status update_cache_offset(int32_t offset_delta_bytes) override;
+    virtual Expected<std::vector<uint32_t>> get_cache_ids() const override;
+    virtual Expected<Buffer> read_cache_buffer(uint32_t cache_id) override;
+    virtual hailo_status write_cache_buffer(uint32_t cache_id, MemoryView buffer) override;
 
     virtual ~HcpConfigCoreOp() = default;
     HcpConfigCoreOp(const HcpConfigCoreOp &other) = delete;
diff --git a/hailort/libhailort/src/hailort.cpp b/hailort/libhailort/src/hailort.cpp
index 2874da9b..c3bb98e6 100644
--- a/hailort/libhailort/src/hailort.cpp
+++ b/hailort/libhailort/src/hailort.cpp
@@ -2405,6 +2405,11 @@ hailo_status hailo_configure_vdevice(hailo_vdevice vdevice, hailo_hef hef,
         network_groups[i] = reinterpret_cast<hailo_configured_network_group>(added_net_groups.value()[i].get());
     }
 
+    // Since the C API doesnt let the user to hold the cng, we need to keep it alive in the vdevice scope
+    for (auto &cng : added_net_groups.value()) {
+        CHECK_SUCCESS(reinterpret_cast<VDevice*>(vdevice)->add_network_group_ref_count(cng));
+    }
+
     *number_of_network_groups = added_net_groups.value().size();
     return HAILO_SUCCESS;
 }
diff --git a/hailort/libhailort/src/hailort_defaults.cpp b/hailort/libhailort/src/hailort_defaults.cpp
index 5c819a86..e035a428 100644
--- a/hailort/libhailort/src/hailort_defaults.cpp
+++ b/hailort/libhailort/src/hailort_defaults.cpp
@@ -13,7 +13,7 @@
 
 #include "common/logger_macros.hpp"
 #include "common/utils.hpp"
-
+#include "common/internal_env_vars.hpp"
 
 namespace hailort
 {
@@ -42,30 +42,6 @@ static const hailo_format_order_t DEFAULT_FORMAT_ORDER_MAP[] = {
     HAILO_FORMAT_ORDER_I420                 // HAILO_FORMAT_ORDER_HAILO_YYYYUV,
 };
 
-static const hailo_format_order_t DEFAULT_FORMAT_ARGMAX_ORDER_MAP[] = {
-    // Key is device_format_order, value is default user_format_order
-    HAILO_FORMAT_ORDER_AUTO,                // HAILO_FORMAT_ORDER_AUTO, - Should not be used!
-    HAILO_FORMAT_ORDER_NHW,                 // HAILO_FORMAT_ORDER_NHWC,
-    HAILO_FORMAT_ORDER_NHW,                 // HAILO_FORMAT_ORDER_NHCW,
-    HAILO_FORMAT_ORDER_NHW,                 // HAILO_FORMAT_ORDER_FCR,
-    HAILO_FORMAT_ORDER_NHW,                 // HAILO_FORMAT_ORDER_F8CR,
-    HAILO_FORMAT_ORDER_NHW,                 // HAILO_FORMAT_ORDER_NHW,
-    HAILO_FORMAT_ORDER_NC,                  // HAILO_FORMAT_ORDER_NC,
-    HAILO_FORMAT_ORDER_NHW,                 // HAILO_FORMAT_ORDER_BAYER_RGB,
-    HAILO_FORMAT_ORDER_NHW,                 // HAILO_FORMAT_ORDER_12_BIT_BAYER_RGB,
-    HAILO_FORMAT_ORDER_HAILO_NMS,           // HAILO_FORMAT_ORDER_HAILO_NMS,
-    HAILO_FORMAT_ORDER_NHW,                 // HAILO_FORMAT_ORDER_RGB888,
-    HAILO_FORMAT_ORDER_NHW,                 // HAILO_FORMAT_ORDER_NCHW,
-    HAILO_FORMAT_ORDER_YUY2,                // HAILO_FORMAT_ORDER_YUY2,
-    HAILO_FORMAT_ORDER_MAX_ENUM,            // Not used in device side - HAILO_FORMAT_ORDER_NV12,
-    HAILO_FORMAT_ORDER_MAX_ENUM,            // Not used in device side - HAILO_FORMAT_ORDER_NV21,
-    HAILO_FORMAT_ORDER_NV12,                // HAILO_FORMAT_ORDER_HAILO_YYUV,
-    HAILO_FORMAT_ORDER_NV21,                // HAILO_FORMAT_ORDER_HAILO_YYVU,
-    HAILO_FORMAT_ORDER_MAX_ENUM,            // Not used in device side - HAILO_FORMAT_ORDER_RGB4,
-    HAILO_FORMAT_ORDER_MAX_ENUM,            // Not used in device side - HAILO_FORMAT_ORDER_I420,
-    HAILO_FORMAT_ORDER_I420                 // HAILO_FORMAT_ORDER_HAILO_YYYYUV,
-};
-
 // This func must be aligned to SDK!
 Expected<hailo_format_order_t> HailoRTDefaults::get_device_format_order(uint32_t compiler_format_order)
 {
@@ -120,12 +96,7 @@ Expected<hailo_format_order_t> HailoRTDefaults::get_device_format_order(uint32_t
 
 hailo_format_order_t HailoRTDefaults::get_default_host_format_order(const hailo_format_t &device_format)
 {
-    const bool is_argmax = (0 != (device_format.flags & HAILO_FORMAT_FLAGS_HOST_ARGMAX));
-    if (!is_argmax) {
-        return DEFAULT_FORMAT_ORDER_MAP[device_format.order];
-    } else {
-        return DEFAULT_FORMAT_ARGMAX_ORDER_MAP[device_format.order];
-    }
+    return DEFAULT_FORMAT_ORDER_MAP[device_format.order];
 }
 
 struct sockaddr_in HailoRTDefaults::get_sockaddr()
@@ -327,7 +298,7 @@ ConfigureNetworkParams HailoRTDefaults::get_configure_params(uint16_t batch_size
 {
     ConfigureNetworkParams params = {};
     params.batch_size = batch_size;
-    if (std::getenv("FORCE_POWER_MODE_ULTRA_PERFORMANCE") != nullptr) {
+    if (is_env_variable_on(FORCE_POWER_MODE_ULTRA_PERFORMANCE_ENV_VAR)) {
         power_mode = HAILO_POWER_MODE_ULTRA_PERFORMANCE;
     }
     params.power_mode = power_mode;
diff --git a/hailort/libhailort/src/hef/CMakeLists.txt b/hailort/libhailort/src/hef/CMakeLists.txt
index 48cb5092..2483ba6f 100644
--- a/hailort/libhailort/src/hef/CMakeLists.txt
+++ b/hailort/libhailort/src/hef/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 set(SRC_FILES
     ${CMAKE_CURRENT_SOURCE_DIR}/hef.cpp
diff --git a/hailort/libhailort/src/hef/context_switch_actions.cpp b/hailort/libhailort/src/hef/context_switch_actions.cpp
index 42c85c32..b2bd42e8 100644
--- a/hailort/libhailort/src/hef/context_switch_actions.cpp
+++ b/hailort/libhailort/src/hef/context_switch_actions.cpp
@@ -258,7 +258,7 @@ hailo_status WriteDataCcwAction::write_to_config_buffer(ConfigBuffer& config_buf
         TRY(auto buffer, Buffer::create_shared(ccw_write_ptr.size));
         MemoryView mem_view(buffer->data(), buffer->size());
         assert(ccw_write_ptr.offset <= SIZE_MAX);
-        status = m_hef_reader->read_from_offset(static_cast<size_t>(ccw_write_ptr.offset), mem_view, ccw_write_ptr.size);
+        status = m_hef_reader->read_from_offset(ccw_write_ptr.offset, mem_view, ccw_write_ptr.size);
         CHECK_SUCCESS(status);
         status = config_buffer.write(mem_view);
         CHECK_SUCCESS(status);
@@ -1654,4 +1654,60 @@ Expected<Buffer> SwitchLcuBatchAction::serialize_params(const ContextResources &
     return Buffer::create(reinterpret_cast<uint8_t*>(&params), sizeof(params));
 }
 
+Expected<ContextSwitchConfigActionPtr> SleepAction::create(uint64_t sleep_time)
+{
+    // truncating to uint32_t
+    uint32_t sleep_u32 = 0; 
+    
+    if (sleep_time > UINT32_MAX) {
+        LOGGER__WARNING("Sleep time is too large, truncating to UINT32_MAX");
+        sleep_u32 = UINT32_MAX;
+    }
+    else {
+        sleep_u32 = static_cast<uint32_t>(sleep_time);
+    }
+
+    auto result = ContextSwitchConfigActionPtr(new (std::nothrow) SleepAction(sleep_u32));
+    CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
+    return result;
+}
+
+SleepAction::SleepAction(uint32_t sleep_time) :	
+    ContextSwitchConfigAction(Type::Sleep, CONTEXT_SWITCH_DEFS__ACTION_TYPE_SLEEP),
+    m_sleep_time(sleep_time)
+{}
+
+bool SleepAction::supports_repeated_block() const
+{
+    return false;
+}
+
+Expected<Buffer> SleepAction::serialize_params(const ContextResources &) const
+{
+    CONTEXT_SWITCH_DEFS__sleep_action_data_t params{};
+    params.sleep_time = m_sleep_time;
+    return Buffer::create(reinterpret_cast<uint8_t*>(&params), sizeof(params));
+}
+
+Expected<ContextSwitchConfigActionPtr> HaltAction::create()
+{
+    auto result = ContextSwitchConfigActionPtr(new (std::nothrow) HaltAction());
+    CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
+    return result;
+}
+
+HaltAction::HaltAction() :	
+    ContextSwitchConfigAction(Type::Halt, CONTEXT_SWITCH_DEFS__ACTION_TYPE_HALT)
+{}
+
+bool HaltAction::supports_repeated_block() const
+{
+    return false;
+}
+
+Expected<Buffer> HaltAction::serialize_params(const ContextResources &) const
+{
+    return Buffer::create(0);
+}
+
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/hef/context_switch_actions.hpp b/hailort/libhailort/src/hef/context_switch_actions.hpp
index fab2375f..d278ee7d 100644
--- a/hailort/libhailort/src/hef/context_switch_actions.hpp
+++ b/hailort/libhailort/src/hef/context_switch_actions.hpp
@@ -88,6 +88,8 @@ class ContextSwitchConfigAction
         PauseVdmaChannel,
         ResumeVdmaChannel,
         WaitForCacheUpdated,
+        Sleep,
+        Halt,
     };
 
     ContextSwitchConfigAction(ContextSwitchConfigAction &&) = default;
@@ -977,6 +979,39 @@ class SwitchLcuBatchAction : public ContextSwitchConfigAction
     const uint32_t m_kernel_done_count;
 };
 
+class SleepAction : public ContextSwitchConfigAction
+{
+public:
+    static Expected<ContextSwitchConfigActionPtr> create(uint64_t sleep_time);
+    SleepAction(SleepAction &&) = default;
+    SleepAction(const SleepAction &) = delete;
+    SleepAction &operator=(SleepAction &&) = delete;
+    SleepAction &operator=(const SleepAction &) = delete;
+    virtual ~SleepAction() = default;
+    virtual bool supports_repeated_block() const override;
+    virtual Expected<Buffer> serialize_params(const ContextResources &context_resources) const override;
+
+private:
+    SleepAction(uint32_t sleep_time);
+
+    const uint32_t m_sleep_time = 0;
+};
+
+class HaltAction : public ContextSwitchConfigAction
+{
+public:
+    static Expected<ContextSwitchConfigActionPtr> create();
+    HaltAction(HaltAction &&) = default;
+    HaltAction(const HaltAction &) = delete;
+    HaltAction &operator=(HaltAction &&) = delete;
+    HaltAction &operator=(const HaltAction &) = delete;
+    virtual ~HaltAction() = default;
+    virtual bool supports_repeated_block() const override;
+    virtual Expected<Buffer> serialize_params(const ContextResources &context_resources) const override;
+
+private:
+    HaltAction();
+};
 
 } /* namespace hailort */
 
diff --git a/hailort/libhailort/src/hef/hef.cpp b/hailort/libhailort/src/hef/hef.cpp
index c7b63151..728e1823 100644
--- a/hailort/libhailort/src/hef/hef.cpp
+++ b/hailort/libhailort/src/hef/hef.cpp
@@ -62,6 +62,7 @@ namespace hailort
 #define DEFAULT_BATCH_SIZE (1)
 #define SKIP_SPACE_COMMA_CHARACTERS (2)
 #define ALIGNED_TO_4_BYTES (4)
+#define MIN_SLEEP_TIME_USEC (1000)
 constexpr uint8_t DEFAULT_DIVISION_FACTOR = 1;
 
 static const uint8_t ENABLE_LCU_CONTROL_WORD[4] = {1, 0, 0, 0};
@@ -515,6 +516,7 @@ hailo_status Hef::Impl::parse_hef_file(const std::string &hef_path)
         CHECK_SUCCESS(status);
 
         ccws_offset = HEF_HEADER_SIZE_V1 + hef_header.hef_proto_size;
+        m_ccws_offset = ccws_offset;
 
         TRY(auto calculated_residue_size, calc_hef_residue_size(hef_reader, hef_header.version));
         TRY(auto calculated_crc, CRC32::calc_crc_on_stream(*hef_reader->get_fstream(), calculated_residue_size));
@@ -603,6 +605,7 @@ hailo_status Hef::Impl::parse_hef_memview(const MemoryView &hef_memview)
         auto proto_size = hef_memview.size() - HEF_HEADER_SIZE_V1 - hef_header.distinct.v1.ccws_size;
 
         ccws_offset = HEF_HEADER_SIZE_V1 + hef_header.hef_proto_size;
+        m_ccws_offset = ccws_offset;
 
         TRY(auto proto_and_ccws_size, calc_hef_residue_size(hef_reader, hef_header.version));
         auto proto_and_ccws_buffer = MemoryView::create_const(hef_memview.data() + HEF_HEADER_SIZE_V1, proto_and_ccws_size);
@@ -1669,7 +1672,7 @@ bool HefConfigurator::is_core_hw_padding_supported(const LayerInfo &layer_info,
     case HAILO_FORMAT_ORDER_NHCW:
         break;
     default:
-        LOGGER__DEBUG("HW padding is not supported for format {} ", layer_info.format.order);
+        LOGGER__DEBUG("HW padding is not supported for format {} ", static_cast<int>(layer_info.format.order));
         return false;
     }
 
@@ -1873,7 +1876,7 @@ Expected<std::pair<std::string, std::string>> Hef::Impl::get_network_group_and_n
         }
     }
 
-    LOGGER__ERROR("Failed to find network or network_group with the name {}",
+    LOGGER__ERROR("Failed to find network or network_group with the name '{}'",
         name);
     return make_unexpected(HAILO_NOT_FOUND);
 }
@@ -1884,7 +1887,7 @@ Expected<std::shared_ptr<ProtoHEFCoreOpMock>> Hef::Impl::get_core_op_by_net_grou
     if ("" == net_group_name) {
         auto network_group_ptr = m_groups[0];
         auto network_group_name = HefUtils::get_network_group_name(*network_group_ptr, m_supported_features);
-        LOGGER__INFO("No network_group name was given. Addressing default network_group: {}", network_group_name);
+        LOGGER__TRACE("No network_group name was given. Addressing default network_group: {}", network_group_name);
         const auto &core_op = m_core_ops_per_group[network_group_name][0];
         if (is_multi_layout(get_device_arch())) {
             auto partial_core_op = core_op.partial_core_ops[0];
@@ -1926,7 +1929,7 @@ static Expected<LayerType> get_layer_type(const ProtoHEFEdgeConnectionType &edge
     case PROTO__EDGE_CONNECTION_TYPE__DDR:
         return LayerType::DDR;
     default:
-        LOGGER__ERROR("Not supported edge connection type {}", edge_connection_type);
+        LOGGER__ERROR("Not supported edge connection type {}", static_cast<int>(edge_connection_type));
         return make_unexpected(HAILO_INVALID_HEF);
     }
 }
@@ -1973,8 +1976,8 @@ hailo_status HefUtils::fill_layer_info_with_base_info(const ProtoHEFEdgeLayerBas
     }
 
     if (base_info.host_argmax()) {
-        layer_info.format.flags |= HAILO_FORMAT_FLAGS_HOST_ARGMAX;
-        layer_info.shape.features = 1;
+        LOGGER__ERROR("Using legacy implementation of Argmax in host. Please re-compile your model with latest DFC version");
+        return HAILO_INVALID_HEF;
     }
 
     TRY(layer_info.format.type, HailoRTCommon::get_format_type(layer_info.hw_data_bytes));
@@ -2429,7 +2432,7 @@ static Expected<ContextSwitchConfigActionPtr> parse_trigger_action(const ProtoHE
         return NoneAction::create();
     }
     default:
-        LOGGER__ERROR("Unsupported trigger given {}", trigger_proto.trigger_case());
+        LOGGER__ERROR("Unsupported trigger given {}", static_cast<int>(trigger_proto.trigger_case()));
         return make_unexpected(HAILO_INVALID_HEF);
     }
 }
@@ -2609,7 +2612,7 @@ static Expected<ContextSwitchConfigActionPtr> parse_action(const ProtoHEFAction
                 proto_action.write_data_by_type().address());
             CHECK_AS_EXPECTED(proto_action.write_data_by_type().data_type() == ProtoHEFWriteDataType::DATA_FROM_ACTION ||
                 proto_action.write_data_by_type().data_type() == ProtoHEFWriteDataType::BATCH_SIZE, HAILO_INVALID_HEF,
-                "Failed to parse HEF. Invalid write_data_by_type data_type: {} ", proto_action.write_data_by_type().data_type());
+                "Failed to parse HEF. Invalid write_data_by_type data_type: {} ", static_cast<int>(proto_action.write_data_by_type().data_type()));
             CHECK_AS_EXPECTED(proto_action.write_data_by_type().data().length() <= CONTEXT_SWITCH_DEFS__WRITE_ACTION_BY_TYPE_MAX_SIZE, HAILO_INVALID_HEF,
                 "Failed to parse HEF. Invalid write_data_by_type data size: {} ", proto_action.write_data_by_type().data().length());
             CHECK_AS_EXPECTED(IS_FIT_IN_UINT8(proto_action.write_data_by_type().shift()), HAILO_INVALID_HEF,
@@ -2635,8 +2638,22 @@ static Expected<ContextSwitchConfigActionPtr> parse_action(const ProtoHEFAction
 
             return WriteDataByTypeAction::create(address, data_type, data, shift, mask, network_index);
         }
+
+        case ProtoHEFAction::kDebug:
+        {
+            if (proto_action.debug().has_sleep()) {
+                CHECK(proto_action.debug().sleep().duration_in_usec() >= MIN_SLEEP_TIME_USEC, HAILO_INVALID_HEF, "Sleep time must be at least {} & must be in microseconds", MIN_SLEEP_TIME_USEC);
+                return SleepAction::create(proto_action.debug().sleep().duration_in_usec());
+            } else if (proto_action.debug().has_halt()) {
+                return HaltAction::create();
+            } else {
+                LOGGER__ERROR("Debug action must have sleep or halt field - action case: {}, action type: {}", static_cast<int>(proto_action.debug().action_case()),
+                    static_cast<int>(proto_action.debug().type()));
+                return make_unexpected(HAILO_INVALID_HEF);
+            }
+        }
         default:
-            LOGGER__ERROR("Action {} not implemented", proto_action.action_case());
+            LOGGER__ERROR("Action {} not implemented", static_cast<int>(proto_action.action_case()));
             break;
     }
 
@@ -2880,7 +2897,7 @@ Expected<ContextMetadata> HefUtils::parse_single_dynamic_context(const ProtoHEFC
                 supported_features, context_metadata);
             CHECK_SUCCESS_AS_EXPECTED(status);
         } else {
-            LOGGER__ERROR("Unsupported edge connection type given {}", edge_layer.context_switch_info().edge_connection_type());
+            LOGGER__ERROR("Unsupported edge connection type given {}", static_cast<int>(edge_layer.context_switch_info().edge_connection_type()));
             return make_unexpected(HAILO_INVALID_HEF);
         }
     }
@@ -2942,7 +2959,7 @@ static Expected<hailo_nms_burst_type_t> get_nms_burst_mode(const ProtoHEFNmsInfo
         case PROTO__NMS_BURST_TYPE__H8_PER_CLASS:
             return HAILO_BURST_TYPE_H8_PER_CLASS;
         default:
-            LOGGER__ERROR("Unsupported burst type was given {} for arch {}", nms_info.burst_type(), hef_arch);
+            LOGGER__ERROR("Unsupported burst type was given {} for arch {}", static_cast<int>(nms_info.burst_type()), static_cast<int>(hef_arch));
             return make_unexpected(HAILO_INVALID_HEF);
         }
     case PROTO__HW_ARCH__HAILO15H:
@@ -2958,11 +2975,11 @@ static Expected<hailo_nms_burst_type_t> get_nms_burst_mode(const ProtoHEFNmsInfo
         case PROTO__NMS_BURST_TYPE__H15_PER_FRAME:
             return HAILO_BURST_TYPE_H15_PER_FRAME;
         default:
-            LOGGER__ERROR("Unsupported burst type was given {} for arch {}", nms_info.burst_type(), hef_arch);
+            LOGGER__ERROR("Unsupported burst type was given {} for arch {}", static_cast<int>(nms_info.burst_type()), static_cast<int>(hef_arch));
             return make_unexpected(HAILO_INVALID_HEF);
         }
     default:
-        LOGGER__ERROR("Not supported hef arch {}", hef_arch);
+        LOGGER__ERROR("Not supported hef arch {}", static_cast<int>(hef_arch));
         return make_unexpected(HAILO_INTERNAL_FAILURE);
     }
 }
@@ -2972,7 +2989,7 @@ static Expected<hailo_nms_burst_type_t> get_nms_bbox_mode(const ProtoHEFNmsInfo
 {
     CHECK_AS_EXPECTED(0 == nms_info.burst_type(),
         HAILO_INVALID_HEF, "Invalid HEF, nms burst extension is disabled yet burst type {} is not zero",
-        nms_info.burst_type());
+        static_cast<int>(nms_info.burst_type()));
 
     switch (hef_arch) {
     case PROTO__HW_ARCH__HAILO8:
@@ -2990,7 +3007,7 @@ static Expected<hailo_nms_burst_type_t> get_nms_bbox_mode(const ProtoHEFNmsInfo
         return HAILO_BURST_TYPE_H15_BBOX;
 
     default:
-        LOGGER__ERROR("Not supported hef arch {}", hef_arch);
+        LOGGER__ERROR("Not supported hef arch {}", static_cast<int>(hef_arch));
         return make_unexpected(HAILO_INTERNAL_FAILURE);
     }
 }
@@ -3337,9 +3354,27 @@ static Expected<WriteMemoryInfo> parse_ccw_buffer(const std::string &ccw_buffer)
     return write_memory_info;
 }
 
+static Expected<WriteMemoryInfo> parse_ccw_buffer_from_ptr(const size_t size, const uint64_t offset, std::shared_ptr<SeekableBytesReader> hef_reader)
+{
+    TRY(auto buffer, Buffer::create_shared(size));
+
+    auto status = hef_reader->open();
+    CHECK_SUCCESS(status);
+
+    status = hef_reader->read_from_offset(offset, MemoryView(*buffer), size);
+    CHECK_SUCCESS(status);
+
+    status = hef_reader->close();
+    CHECK_SUCCESS(status);
+
+    auto raw_str = reinterpret_cast<const char*>(MemoryView(*buffer).data());
+    TRY(auto write_memory_info, parse_ccw_buffer(raw_str));
+    return write_memory_info;
+}
+
 /* HcpConfigCoreOp funcs */
 
-Expected<std::vector<WriteMemoryInfo>> Hef::Impl::create_single_context_core_op_config(const ProtoHEFPreliminaryConfig& proto_config)
+Expected<std::vector<WriteMemoryInfo>> Hef::Impl::create_single_context_core_op_config(const ProtoHEFPreliminaryConfig& proto_config, const Hef &hef)
 {
     std::vector<WriteMemoryInfo> config_buffers;
 
@@ -3369,7 +3404,17 @@ Expected<std::vector<WriteMemoryInfo>> Hef::Impl::create_single_context_core_op_
                     break;
                 }
                 case ProtoHEFAction::kWriteDataCcw: {
-                    TRY(auto config_buffer, parse_ccw_buffer(action.write_data_ccw().data())); // TODO: make this not supported in sHEF
+                    CHECK(HEADER_VERSION_1 != hef.pimpl->m_hef_version, HAILO_INVALID_HEF, "WriteDataCcw is not supported on V1 HEF");
+                    TRY(auto config_buffer, parse_ccw_buffer(action.write_data_ccw().data()));
+                    config_buffers.emplace_back(std::move(config_buffer));
+                    break;
+                }
+                case ProtoHEFAction::kWriteDataCcwPtr :{
+                    CHECK(HEADER_VERSION_0 != hef.pimpl->m_hef_version, HAILO_INVALID_HEF, "WriteDataCcwPtr is not supported on V0 HEF");
+                    const auto size = action.write_data_ccw_ptr().size();
+                    const auto offset = action.write_data_ccw_ptr().offset() + hef.pimpl->get_ccws_offset();
+                    auto hef_reader = hef.pimpl->get_hef_reader();
+                    TRY(auto config_buffer, parse_ccw_buffer_from_ptr(size, offset, hef_reader));
                     config_buffers.emplace_back(std::move(config_buffer));
                     break;
                 }
@@ -3425,6 +3470,11 @@ std::shared_ptr<SeekableBytesReader> Hef::Impl::get_hef_reader()
     return m_hef_reader;
 }
 
+size_t Hef::Impl::get_ccws_offset()
+{
+    return m_ccws_offset;
+}
+
 Expected<float64_t> Hef::Impl::get_bottleneck_fps(const std::string &net_group_name)
 {
     TRY(const auto core_op, get_core_op_by_net_group_name(net_group_name));
diff --git a/hailort/libhailort/src/hef/hef_internal.hpp b/hailort/libhailort/src/hef/hef_internal.hpp
index ad11dde9..baa7727b 100644
--- a/hailort/libhailort/src/hef/hef_internal.hpp
+++ b/hailort/libhailort/src/hef/hef_internal.hpp
@@ -284,6 +284,7 @@ class Hef::Impl final
     Expected<size_t> get_number_of_output_streams(const std::string &net_group_name="");
     ProtoHEFHwArch get_device_arch();
     std::shared_ptr<SeekableBytesReader> get_hef_reader();
+    size_t get_ccws_offset();
     Expected<float64_t> get_bottleneck_fps(const std::string &net_group_name="");
     static bool contains_ddr_layers(const ProtoHEFCoreOpMock &core_op);
     static hailo_status validate_core_op_unique_layer_names(const ProtoHEFCoreOpMock &core_op);
@@ -308,7 +309,7 @@ class Hef::Impl final
         const hailo_mipi_input_stream_params_t &mipi_params, const std::string &network_group_name);
 
     static Expected<std::vector<WriteMemoryInfo>> create_single_context_core_op_config(
-        const ProtoHEFPreliminaryConfig& proto_config);
+        const ProtoHEFPreliminaryConfig& proto_config, const Hef &hef);
 
     static Expected<std::shared_ptr<ProtoHEFCoreOpMock>> get_core_op_per_arch(const ProtoHEFCoreOpMock &core_op,
         ProtoHEFHwArch hef_arch, hailo_device_architecture_t device_arch, uint32_t partial_clusters_layout_bitmap);
@@ -456,6 +457,7 @@ class Hef::Impl final
     MD5_SUM_t m_md5;
     uint32_t m_crc;
     std::shared_ptr<SeekableBytesReader> m_hef_reader;
+    size_t m_ccws_offset;
 
 #ifdef HAILO_SUPPORT_MULTI_PROCESS
     Buffer m_hef_buffer;
diff --git a/hailort/libhailort/src/hef/layer_info.hpp b/hailort/libhailort/src/hef/layer_info.hpp
index e2d16f03..205082c8 100644
--- a/hailort/libhailort/src/hef/layer_info.hpp
+++ b/hailort/libhailort/src/hef/layer_info.hpp
@@ -257,6 +257,33 @@ class LayerInfoUtils {
         }
     }
 
+    static constexpr size_t get_nms_layer_max_transfers_per_frame(const LayerInfo &layer_info)
+    {
+        const auto &nms_info = layer_info.nms_info;
+        switch (nms_info.burst_type) {
+            // If No Burst mode - size of transfer is size of bbox
+            case HAILO_BURST_TYPE_H8_BBOX:
+            case HAILO_BURST_TYPE_H15_BBOX:
+                return nms_info.number_of_classes * nms_info.max_bboxes_per_class * nms_info.chunks_per_frame;
+            // In hailo8 per class and hailo15 per class mode - check if can support interrupt per frame and if not do interrupt per burst
+            case HAILO_BURST_TYPE_H8_PER_CLASS:
+            case HAILO_BURST_TYPE_H15_PER_CLASS:
+            {
+                // In case of hailo8 - nn-core adds one delimeter per burst - in case of hailo15 nn-core adds delimeter and image delimeter per class
+                const size_t bboxes_needed_for_delimeter = (HAILO_BURST_TYPE_H8_PER_CLASS == nms_info.burst_type) ?
+                    1 : 2;
+                const size_t max_bboxes_per_class = nms_info.max_bboxes_per_class + bboxes_needed_for_delimeter;
+                const size_t bursts_per_class = (max_bboxes_per_class + nms_info.burst_size - 1) / nms_info.burst_size;
+                return bursts_per_class * nms_info.number_of_classes * nms_info.chunks_per_frame;
+            }
+            // Currently HAILO_BURST_TYPE_H15_PER_FRAME mode isnt supported - Shouldn't reach here
+            case HAILO_BURST_TYPE_H15_PER_FRAME:
+            default:
+                assert(false);
+                return 0;
+        }
+    }
+
     /**
      * Return if layer is NMS Burst layers.
      *
diff --git a/hailort/libhailort/src/mipi/CMakeLists.txt b/hailort/libhailort/src/mipi/CMakeLists.txt
index bd101a2c..a33d2764 100644
--- a/hailort/libhailort/src/mipi/CMakeLists.txt
+++ b/hailort/libhailort/src/mipi/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 set(SRC_FILES
     ${CMAKE_CURRENT_SOURCE_DIR}/mipi_stream.cpp
diff --git a/hailort/libhailort/src/net_flow/CMakeLists.txt b/hailort/libhailort/src/net_flow/CMakeLists.txt
index abf393ae..d726b000 100644
--- a/hailort/libhailort/src/net_flow/CMakeLists.txt
+++ b/hailort/libhailort/src/net_flow/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 set(SRC_FILES
     ${CMAKE_CURRENT_SOURCE_DIR}/ops/nms_post_process.cpp
diff --git a/hailort/libhailort/src/net_flow/ops/nms_post_process.cpp b/hailort/libhailort/src/net_flow/ops/nms_post_process.cpp
index 735f2b20..089c2c0e 100644
--- a/hailort/libhailort/src/net_flow/ops/nms_post_process.cpp
+++ b/hailort/libhailort/src/net_flow/ops/nms_post_process.cpp
@@ -46,8 +46,6 @@ hailo_status NmsOpMetadata::validate_format_info()
 
         CHECK(!(HAILO_FORMAT_FLAGS_TRANSPOSED & output_metadata.second.format.flags), HAILO_INVALID_ARGUMENT, "Output {} is marked as transposed, which is not supported for this model.",
             output_metadata.first);
-        CHECK(!(HAILO_FORMAT_FLAGS_HOST_ARGMAX & output_metadata.second.format.flags), HAILO_INVALID_ARGUMENT, "Output {} is marked as argmax, which is not supported for this model.",
-            output_metadata.first);
     }
     if (m_type == OperationType::IOU) {
         assert(1 == m_inputs_metadata.size());
diff --git a/hailort/libhailort/src/net_flow/ops/softmax_post_process.cpp b/hailort/libhailort/src/net_flow/ops/softmax_post_process.cpp
index 8e07a684..081263bf 100644
--- a/hailort/libhailort/src/net_flow/ops/softmax_post_process.cpp
+++ b/hailort/libhailort/src/net_flow/ops/softmax_post_process.cpp
@@ -12,24 +12,10 @@
 #include "hailo/hailort_common.hpp"
 #include "hailo/hailort_defaults.hpp"
 
-#if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable: 4244 4267 4127)
-#else
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wconversion"
-#pragma GCC diagnostic ignored "-Wunused-parameter"
-#pragma GCC diagnostic ignored "-Wclass-memaccess"
-#endif
-#include <Eigen/Dense>
-#if defined(_MSC_VER)
-#pragma warning(pop)
-#else
-#pragma GCC diagnostic pop
-#endif
-
 #include "common/utils.hpp"
 
+#include "transform/eigen.hpp"
+
 #include <limits>
 
 namespace hailort
@@ -225,8 +211,6 @@ hailo_status SoftmaxOpMetadata::validate_format_info()
         HAILO_INVALID_OPERATION, "The given output format type {} is not valid, should be {}",
         HailoRTCommon::get_format_type_str(output_metadata.format.type),
         HailoRTCommon::get_format_type_str(HAILO_FORMAT_TYPE_FLOAT32));
-    CHECK(!(HAILO_FORMAT_FLAGS_HOST_ARGMAX & output_metadata.format.flags), HAILO_INVALID_ARGUMENT, "Output {} is marked as argmax, which is not supported for this model.",
-        m_outputs_metadata.begin()->first);
 
     return HAILO_SUCCESS;
 }
diff --git a/hailort/libhailort/src/net_flow/ops/ssd_post_process.cpp b/hailort/libhailort/src/net_flow/ops/ssd_post_process.cpp
index 82bb735f..ae29f8d5 100644
--- a/hailort/libhailort/src/net_flow/ops/ssd_post_process.cpp
+++ b/hailort/libhailort/src/net_flow/ops/ssd_post_process.cpp
@@ -225,7 +225,7 @@ hailo_status SSDPostProcessOp::extract_detections(const std::string &reg_input_n
                     CHECK_SUCCESS(status);
                 } else {
                     CHECK_SUCCESS(HAILO_INVALID_ARGUMENT, "SSD post-process received invalid reg input type: {}",
-                        inputs_metadata.at(reg_input_name).format.type);
+                        static_cast<int>(inputs_metadata.at(reg_input_name).format.type));
                 }
             }
         }
diff --git a/hailort/libhailort/src/net_flow/ops/ssd_post_process.hpp b/hailort/libhailort/src/net_flow/ops/ssd_post_process.hpp
index 25d6077e..b5b2aa84 100644
--- a/hailort/libhailort/src/net_flow/ops/ssd_post_process.hpp
+++ b/hailort/libhailort/src/net_flow/ops/ssd_post_process.hpp
@@ -136,7 +136,7 @@ class SSDPostProcessOp : public NmsPostProcessOp
                 cls_index);
         } else {
             CHECK_SUCCESS(HAILO_INVALID_ARGUMENT, "SSD post-process received invalid cls input type: {}",
-                cls_metadata.format.type);
+                static_cast<int>(cls_metadata.format.type));
         }
         return HAILO_SUCCESS;
     }
diff --git a/hailort/libhailort/src/net_flow/ops/yolov5_bbox_only_post_process.cpp b/hailort/libhailort/src/net_flow/ops/yolov5_bbox_only_post_process.cpp
index c44eece7..a7221032 100644
--- a/hailort/libhailort/src/net_flow/ops/yolov5_bbox_only_post_process.cpp
+++ b/hailort/libhailort/src/net_flow/ops/yolov5_bbox_only_post_process.cpp
@@ -45,8 +45,6 @@ hailo_status Yolov5BboxOnlyOpMetadata::validate_format_info()
 
         CHECK(!(HAILO_FORMAT_FLAGS_TRANSPOSED & output_metadata.second.format.flags), HAILO_INVALID_ARGUMENT, "Output {} is marked as transposed, which is not supported for this model.",
             output_metadata.first);
-        CHECK(!(HAILO_FORMAT_FLAGS_HOST_ARGMAX & output_metadata.second.format.flags), HAILO_INVALID_ARGUMENT, "Output {} is marked as argmax, which is not supported for this model.",
-            output_metadata.first);
     }
 
     assert(1 <= m_inputs_metadata.size());
@@ -117,7 +115,7 @@ hailo_status YOLOv5BboxOnlyPostProcessOp::execute(const std::map<std::string, Me
             status = add_bboxes<float32_t, uint16_t>(dst_ptr, next_bbox_output_offset, name_to_input.second,
                 input_metadata.quant_info, input_metadata.shape, input_metadata.padded_shape, yolo_config.anchors.at(name));
         } else {
-            CHECK_SUCCESS(HAILO_INVALID_ARGUMENT, "YOLO post-process received invalid input type {}", input_metadata.format.type);
+            CHECK_SUCCESS(HAILO_INVALID_ARGUMENT, "YOLO post-process received invalid input type {}", static_cast<int>(input_metadata.format.type));
         }
         CHECK_SUCCESS(status);
     }
diff --git a/hailort/libhailort/src/net_flow/ops/yolov5_post_process.cpp b/hailort/libhailort/src/net_flow/ops/yolov5_post_process.cpp
index 70151993..cb5db95d 100644
--- a/hailort/libhailort/src/net_flow/ops/yolov5_post_process.cpp
+++ b/hailort/libhailort/src/net_flow/ops/yolov5_post_process.cpp
@@ -89,7 +89,7 @@ hailo_status YOLOv5PostProcessOp::execute(const std::map<std::string, MemoryView
             status = extract_detections<float32_t, uint16_t>(name_to_input.second, input_metadata.quant_info, input_metadata.shape,
                 input_metadata.padded_shape, yolo_config.anchors.at(name));
         } else {
-            CHECK_SUCCESS(HAILO_INVALID_ARGUMENT, "YOLO post-process received invalid input type {}", input_metadata.format.type);
+            CHECK_SUCCESS(HAILO_INVALID_ARGUMENT, "YOLO post-process received invalid input type {}", static_cast<int>(input_metadata.format.type));
         }
         CHECK_SUCCESS(status);
     }
diff --git a/hailort/libhailort/src/net_flow/ops/yolov5_seg_post_process.cpp b/hailort/libhailort/src/net_flow/ops/yolov5_seg_post_process.cpp
index a7284df1..af7d4da2 100644
--- a/hailort/libhailort/src/net_flow/ops/yolov5_seg_post_process.cpp
+++ b/hailort/libhailort/src/net_flow/ops/yolov5_seg_post_process.cpp
@@ -10,23 +10,17 @@
 #include "yolov5_seg_post_process.hpp"
 #include "hailo/hailort.h"
 
-#if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable: 4244 4267 4127)
-#else
+#include "transform/eigen.hpp"
+
+#ifndef _MSC_VER
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wconversion"
-#pragma GCC diagnostic ignored "-Wunused-parameter"
-#pragma GCC diagnostic ignored "-Wclass-memaccess"
-#endif
+#endif // Not MSC
 #define STB_IMAGE_RESIZE_IMPLEMENTATION
 #include "stb_image_resize.h"
-#include <Eigen/Dense>
-#if defined(_MSC_VER)
-#pragma warning(pop)
-#else
+#ifndef _MSC_VER
 #pragma GCC diagnostic pop
-#endif
+#endif // Not MSC
 
 namespace hailort
 {
@@ -71,8 +65,6 @@ hailo_status Yolov5SegOpMetadata::validate_format_info()
 
         CHECK(!(HAILO_FORMAT_FLAGS_TRANSPOSED & output_metadata.second.format.flags), HAILO_INVALID_ARGUMENT,
             "Output {} is marked as transposed, which is not supported for this model.", output_metadata.first);
-        CHECK(!(HAILO_FORMAT_FLAGS_HOST_ARGMAX & output_metadata.second.format.flags), HAILO_INVALID_ARGUMENT,
-            "Output {} is marked as argmax, which is not supported for this model.", output_metadata.first);
     }
 
     assert(1 <= m_inputs_metadata.size());
@@ -151,7 +143,7 @@ hailo_status Yolov5SegPostProcess::execute(const std::map<std::string, MemoryVie
         auto &input_metadata = inputs_metadata.at(name);
 
         CHECK(((input_metadata.format.type == HAILO_FORMAT_TYPE_UINT16) || (input_metadata.format.type == HAILO_FORMAT_TYPE_UINT8)),
-            HAILO_INVALID_ARGUMENT, "YOLO post-process received invalid input type {}", input_metadata.format.type);
+            HAILO_INVALID_ARGUMENT, "YOLO post-process received invalid input type {}", static_cast<int>(input_metadata.format.type));
 
         // Prepare proto layer
         if (name == yolov5seg_config.proto_layer_name) {
diff --git a/hailort/libhailort/src/net_flow/ops/yolov8_bbox_only_post_process.cpp b/hailort/libhailort/src/net_flow/ops/yolov8_bbox_only_post_process.cpp
index 3407e2cc..260968db 100644
--- a/hailort/libhailort/src/net_flow/ops/yolov8_bbox_only_post_process.cpp
+++ b/hailort/libhailort/src/net_flow/ops/yolov8_bbox_only_post_process.cpp
@@ -46,8 +46,6 @@ hailo_status Yolov8BboxOnlyOpMetadata::validate_format_info()
 
         CHECK(!(HAILO_FORMAT_FLAGS_TRANSPOSED & output_metadata.second.format.flags), HAILO_INVALID_ARGUMENT, "Output {} is marked as transposed, which is not supported for this model.",
             output_metadata.first);
-        CHECK(!(HAILO_FORMAT_FLAGS_HOST_ARGMAX & output_metadata.second.format.flags), HAILO_INVALID_ARGUMENT, "Output {} is marked as argmax, which is not supported for this model.",
-            output_metadata.first);
     }
 
     assert(1 <= m_inputs_metadata.size());
@@ -115,7 +113,7 @@ hailo_status YOLOv8BboxOnlyPostProcessOp::execute(const std::map<std::string, Me
             status = add_bboxes<float32_t, uint16_t>(dst_ptr, next_bbox_output_offset, reg_to_cls_name,
                 inputs.at(reg_to_cls_name.reg), inputs.at(reg_to_cls_name.cls), reg_to_cls_name.stride);
         } else {
-            CHECK_SUCCESS(HAILO_INVALID_ARGUMENT, "YOLOV8 bbox only post-process received invalid input type {}", input_metadata.format.type);
+            CHECK_SUCCESS(HAILO_INVALID_ARGUMENT, "YOLOV8 bbox only post-process received invalid input type {}", static_cast<int>(input_metadata.format.type));
         }
 
         CHECK_SUCCESS(status);
diff --git a/hailort/libhailort/src/net_flow/ops/yolov8_post_process.cpp b/hailort/libhailort/src/net_flow/ops/yolov8_post_process.cpp
index c5c8be07..4576a41d 100644
--- a/hailort/libhailort/src/net_flow/ops/yolov8_post_process.cpp
+++ b/hailort/libhailort/src/net_flow/ops/yolov8_post_process.cpp
@@ -111,7 +111,7 @@ hailo_status YOLOV8PostProcessOp::execute(const std::map<std::string, MemoryView
             status = extract_detections<float32_t, uint16_t>(reg_to_cls_name, inputs.at(reg_to_cls_name.reg),
                 inputs.at(reg_to_cls_name.cls), reg_to_cls_name.stride);
         } else {
-            CHECK_SUCCESS(HAILO_INVALID_ARGUMENT, "YOLO post-process received invalid input type {}", input_metadata.format.type);
+            CHECK_SUCCESS(HAILO_INVALID_ARGUMENT, "YOLO post-process received invalid input type {}", static_cast<int>(input_metadata.format.type));
         }
 
         CHECK_SUCCESS(status);
diff --git a/hailort/libhailort/src/net_flow/ops/yolox_post_process.cpp b/hailort/libhailort/src/net_flow/ops/yolox_post_process.cpp
index 98812cd7..8f98d23f 100644
--- a/hailort/libhailort/src/net_flow/ops/yolox_post_process.cpp
+++ b/hailort/libhailort/src/net_flow/ops/yolox_post_process.cpp
@@ -121,7 +121,7 @@ hailo_status YOLOXPostProcessOp::execute(const std::map<std::string, MemoryView>
             status = extract_detections<float32_t, uint16_t>(layers_names_triplet, inputs.at(layers_names_triplet.reg), inputs.at(layers_names_triplet.cls),
                 inputs.at(layers_names_triplet.obj));
         } else {
-            CHECK_SUCCESS(HAILO_INVALID_ARGUMENT, "YOLO post-process received invalid input type {}", input_metadata.format.type);
+            CHECK_SUCCESS(HAILO_INVALID_ARGUMENT, "YOLO post-process received invalid input type {}", static_cast<int>(input_metadata.format.type));
         }
 
         CHECK_SUCCESS(status);
diff --git a/hailort/libhailort/src/net_flow/pipeline/async_infer_runner.cpp b/hailort/libhailort/src/net_flow/pipeline/async_infer_runner.cpp
index 3707d1e2..9d236cc7 100644
--- a/hailort/libhailort/src/net_flow/pipeline/async_infer_runner.cpp
+++ b/hailort/libhailort/src/net_flow/pipeline/async_infer_runner.cpp
@@ -221,23 +221,23 @@ void AsyncInferRunnerImpl::abort()
     return;
 }
 
-Expected<bool> AsyncInferRunnerImpl::can_push_buffers(uint32_t frames_count)
+Expected<std::pair<bool, std::string>> AsyncInferRunnerImpl::can_push_buffers(uint32_t frames_count)
 {
     for (auto &last_element : m_async_pipeline->get_last_elements()) {
         TRY(const auto can_push_buffer, last_element.second->can_push_buffer_upstream(frames_count));
         if (!can_push_buffer) {
-            return false;
+            return std::make_pair(false, last_element.first);
         }
     }
 
     for (auto &entry_element : m_async_pipeline->get_entry_elements()) {
         TRY(const auto can_push_buffer, entry_element.second->can_push_buffer_downstream(frames_count));
         if (!can_push_buffer) {
-            return false;
+            return std::make_pair(false, entry_element.first);
         }
     }
 
-    return true;
+    return std::make_pair(true, std::string(""));
 }
 
 hailo_status AsyncInferRunnerImpl::set_buffers(std::unordered_map<std::string, PipelineBuffer> &inputs,
@@ -279,14 +279,14 @@ void AsyncInferRunnerImpl::set_pix_buffer_inputs(std::unordered_map<std::string,
     }
 }
 
-hailo_status AsyncInferRunnerImpl::run(ConfiguredInferModel::Bindings &bindings, TransferDoneCallbackAsyncInfer transfer_done)
+hailo_status AsyncInferRunnerImpl::run(const ConfiguredInferModel::Bindings &bindings, TransferDoneCallbackAsyncInfer transfer_done)
 {
     std::unique_lock<std::mutex> lock(m_mutex);
     hailo_status status = m_async_pipeline->get_pipeline_status()->load();
     CHECK_SUCCESS(status, "Can't handle infer request since Pipeline status is {}.", status);
 
-    TRY(auto are_pools_ready, can_push_buffers(1));
-    CHECK(are_pools_ready, HAILO_QUEUE_IS_FULL, "Can't handle infer request since a queue in the pipeline is full.");
+    TRY(auto are_pools_ready_pair, can_push_buffers(1));
+    CHECK(are_pools_ready_pair.first, HAILO_QUEUE_IS_FULL, "Can't handle infer request since a queue in the pipeline is full.");
 
     std::unordered_map<std::string, PipelineBuffer> outputs;
 
@@ -333,7 +333,6 @@ hailo_status AsyncInferRunnerImpl::run(ConfiguredInferModel::Bindings &bindings,
     }
 
     status = set_buffers(inputs, outputs);
-    // TODO: (HRT-14283) If set_buffers fails after a buffer is enqueued, the buffer's CB will be called - and might call user's CB
     CHECK_SUCCESS(status);
     return HAILO_SUCCESS;
 }
diff --git a/hailort/libhailort/src/net_flow/pipeline/async_infer_runner.hpp b/hailort/libhailort/src/net_flow/pipeline/async_infer_runner.hpp
index 4e9a4e8a..e116b451 100644
--- a/hailort/libhailort/src/net_flow/pipeline/async_infer_runner.hpp
+++ b/hailort/libhailort/src/net_flow/pipeline/async_infer_runner.hpp
@@ -67,13 +67,14 @@ class AsyncInferRunnerImpl
     virtual ~AsyncInferRunnerImpl();
     AsyncInferRunnerImpl(std::shared_ptr<AsyncPipeline> async_pipeline, std::shared_ptr<std::atomic<hailo_status>> pipeline_status);
 
-    hailo_status run(ConfiguredInferModel::Bindings &bindings, TransferDoneCallbackAsyncInfer transfer_done);
+    hailo_status run(const ConfiguredInferModel::Bindings &bindings, TransferDoneCallbackAsyncInfer transfer_done);
     hailo_status set_buffers(std::unordered_map<std::string, PipelineBuffer> &inputs,
         std::unordered_map<std::string, PipelineBuffer> &outputs);
 
     void abort();
 
-    Expected<bool> can_push_buffers(uint32_t frames_count);
+    // string - if can't push buffer - element name on which we cant push. if can push buffer - empty
+    Expected<std::pair<bool, std::string>> can_push_buffers(uint32_t frames_count);
 
     void add_element_to_pipeline(std::shared_ptr<PipelineElement> pipeline_element);
     void add_entry_element(std::shared_ptr<PipelineElement> pipeline_element, const std::string &input_name);
diff --git a/hailort/libhailort/src/net_flow/pipeline/async_pipeline_builder.cpp b/hailort/libhailort/src/net_flow/pipeline/async_pipeline_builder.cpp
index 6f5933ef..63a6b8ca 100644
--- a/hailort/libhailort/src/net_flow/pipeline/async_pipeline_builder.cpp
+++ b/hailort/libhailort/src/net_flow/pipeline/async_pipeline_builder.cpp
@@ -104,29 +104,29 @@ hailo_status AsyncPipelineBuilder::create_pre_async_hw_elements_per_input(std::s
     for (const auto &stream_name : stream_names) {
         CHECK(contains(named_stream_infos, stream_name), HAILO_INTERNAL_FAILURE);
         const auto &input_stream_info = named_stream_infos.at(stream_name);
-
         auto src_format = inputs_formats.at(vstream_name);
+        if (is_multi_planar) {
+            /* In multi-planar case, the format order of each plane (stream) is determined by the ll-stream's order.
+               Type and flags are determined by the vstream params */
+            src_format.order = input_stream_info.format.order;
+        }
+
         TRY(const auto sink_index, async_pipeline->get_async_hw_element()->get_sink_index_from_input_stream_name(stream_name));
+        TRY(const auto should_transform, InputTransformContext::is_transformation_required(input_stream_info.shape,
+            src_format, input_stream_info.hw_shape, input_stream_info.format,
+            std::vector<hailo_quant_info_t>(1, input_stream_info.quant_info))); // Inputs always have single quant_info
 
         if(is_multi_planar) {
-            is_empty = true;
-            interacts_with_hw = false;
+            is_empty = true; // pix buffer splitter doesnt do any copy, so no need to allocate buffers for its following queues
+            interacts_with_hw = !should_transform; // If not doing transformations, this queue interacts with HW elem
             TRY(auto post_split_push_queue, add_push_queue_element(
                 PipelineObject::create_element_name("PostSplitPushQEl", stream_name, sink_index),
                 async_pipeline, 0, is_empty, interacts_with_hw, nullptr));
             CHECK_SUCCESS(PipelinePad::link_pads(multi_plane_splitter, post_split_push_queue, plane_index++));
 
             last_element_connected_to_pipeline = post_split_push_queue;
-
-            /* In multi-planar case, the format order of each plane (stream) is determined by the ll-stream's order.
-               Type and flags are determined by the vstream params */
-            src_format.order = input_stream_info.format.order;
         }
 
-        TRY(const auto should_transform, InputTransformContext::is_transformation_required(input_stream_info.shape,
-            src_format, input_stream_info.hw_shape, input_stream_info.format,
-            std::vector<hailo_quant_info_t>(1, input_stream_info.quant_info))); // Inputs always have single quant_info
-
         if (should_transform) {
             TRY(auto pre_infer_elem, PreInferElement::create(input_stream_info.shape, src_format,
                 input_stream_info.hw_shape, input_stream_info.format, { input_stream_info.quant_info },
@@ -334,7 +334,7 @@ hailo_status AsyncPipelineBuilder::add_output_demux_flow(const std::string &outp
                 post_infer_elem));
         } else {
             TRY(auto last_async_element, add_last_async_element(async_pipeline, output_format.first, edge_info.hw_frame_size,
-                demux_elem));
+                demux_elem, i));
         }
         i++;
     }
diff --git a/hailort/libhailort/src/net_flow/pipeline/configured_infer_model_hrpc_client.cpp b/hailort/libhailort/src/net_flow/pipeline/configured_infer_model_hrpc_client.cpp
index 32ad5d12..8bc85a2a 100644
--- a/hailort/libhailort/src/net_flow/pipeline/configured_infer_model_hrpc_client.cpp
+++ b/hailort/libhailort/src/net_flow/pipeline/configured_infer_model_hrpc_client.cpp
@@ -9,161 +9,20 @@
 
 #include "configured_infer_model_hrpc_client.hpp"
 #include "hailo/hailort.h"
-#include <iostream>
 
 namespace hailort
 {
 
-Expected<MemoryView> InferStreamOnStack::get_buffer()
-{
-    return MemoryView(m_buffer);
-}
-
-Expected<OutputBindingsOnStack> OutputBindingsOnStack::create(ConfiguredInferModel::Bindings bindings,
-    const std::vector<std::string> &outputs_names)
-{
-    std::unordered_map<std::string, InferStreamOnStack> output_streams;
-    for (const auto &output_name : outputs_names) {
-        TRY(auto output, bindings.output(output_name));
-        TRY(auto buffer, output.get_buffer());
-        output_streams.emplace(output_name, InferStreamOnStack(buffer));
-    }
-    return OutputBindingsOnStack(std::move(output_streams));
-}
-
-Expected<InferStreamOnStack> OutputBindingsOnStack::output()
-{
-    CHECK_AS_EXPECTED(1 == m_output_streams.size(), HAILO_INVALID_OPERATION, "Model has more than one output!");
-    auto copy = m_output_streams.begin()->second;
-    return copy;
-}
-
-Expected<InferStreamOnStack> OutputBindingsOnStack::output(const std::string &name)
-{
-    CHECK_AS_EXPECTED(contains(m_output_streams, name), HAILO_NOT_FOUND, "Output {}, not found!", name);
-    auto copy = m_output_streams.at(name);
-    return copy;
-}
-
-AsyncInferJobHrpcClient::AsyncInferJobHrpcClient(EventPtr event) : m_event(event)
-{
-}
-
-hailo_status AsyncInferJobHrpcClient::wait(std::chrono::milliseconds timeout)
-{
-    return m_event->wait(timeout);
-}
-
-CallbacksQueue::CallbacksQueue(std::shared_ptr<hrpc::Client> client, const std::vector<std::string> &outputs_names) :
-    m_outputs_names(outputs_names)
-{
-    client->register_custom_reply(HailoRpcActionID::CALLBACK_CALLED,
-    [this, &outputs_names] (const MemoryView &serialized_reply, hrpc::RpcConnection connection) -> hailo_status {
-        TRY(auto tuple, CallbackCalledSerializer::deserialize_reply(serialized_reply));
-
-        auto callback_status = std::get<0>(tuple);
-        auto callback_handle_id = std::get<1>(tuple);
-
-        {
-            std::unique_lock<std::mutex> lock(m_mutex);
-            CHECK(contains(m_callbacks, callback_handle_id), HAILO_NOT_FOUND, "Callback handle not found!");
-            m_callbacks_status[callback_handle_id] = callback_status;
-
-            if (HAILO_SUCCESS == callback_status) {
-                CHECK(contains(m_bindings, callback_handle_id), HAILO_NOT_FOUND, "Callback handle not found!");
-                for (const auto &output_name : outputs_names) {
-                    TRY(auto buffer, m_bindings.at(callback_handle_id).output(output_name)->get_buffer());
-                    auto status = connection.read_buffer(buffer);
-                    // TODO: Errors here should be unrecoverable (HRT-14275)
-                    CHECK_SUCCESS(status);
-                }
-            }
-            m_callbacks_queue.push(callback_handle_id);
-        }
-
-        m_cv.notify_one();
-        return HAILO_SUCCESS;
-    });
-
-    m_is_running = true;
-    m_callback_thread = std::thread([this] {
-        while (true) {
-            callback_id_t callback_id;
-            hailo_status info_status = HAILO_UNINITIALIZED;
-            std::function<void(const AsyncInferCompletionInfo&)> cb;
-            {
-                std::unique_lock<std::mutex> lock(m_mutex);
-                m_cv.wait(lock, [this] { return !m_is_running || !m_callbacks_queue.empty(); });
-                if (!m_is_running) {
-                    break;
-                }
-
-                callback_id = m_callbacks_queue.front();
-                m_callbacks_queue.pop();
-
-                m_cv.wait(lock, [this, callback_id] { return !m_is_running || (m_callbacks.find(callback_id) != m_callbacks.end()); });
-                if (!m_is_running) {
-                    break;
-                }
-
-                info_status = m_callbacks_status[callback_id];
-                cb = m_callbacks[callback_id];
-                m_callbacks.erase(callback_id);
-                m_callbacks_status.erase(callback_id);
-                m_bindings.erase(callback_id);
-            }
-            AsyncInferCompletionInfo info(info_status);
-            cb(info);
-        }
-    });
-}
-
-CallbacksQueue::~CallbacksQueue()
-{
-    {
-        std::unique_lock<std::mutex> lock(m_mutex);
-        m_is_running = false;
-    }
-    m_cv.notify_one();
-    m_callback_thread.join();
-}
-Expected<std::shared_ptr<AsyncInferJobHrpcClient>> CallbacksQueue::register_callback(callback_id_t id,
-    ConfiguredInferModel::Bindings bindings,
-    std::function<void(const AsyncInferCompletionInfo&)> callback)
-{
-    TRY(auto event_ptr, Event::create_shared(Event::State::not_signalled));
-
-    {
-        std::unique_lock<std::mutex> lock(m_mutex);
-        TRY(auto output_bindings, OutputBindingsOnStack::create(bindings, m_outputs_names));
-        m_bindings.emplace(id, output_bindings);
-        m_callbacks_status[id] = HAILO_SUCCESS;
-        m_callbacks[id] = [callback, event_ptr] (const AsyncInferCompletionInfo &info) {
-            auto status = event_ptr->signal();
-            if (HAILO_SUCCESS != status) {
-                LOGGER__CRITICAL("Could not signal event! status = {}", status);
-            }
-            callback(info);
-        };
-    }
-    m_cv.notify_one();
-
-    auto ptr = make_shared_nothrow<AsyncInferJobHrpcClient>(event_ptr);
-    CHECK_NOT_NULL(ptr, HAILO_OUT_OF_HOST_MEMORY);
-
-    return ptr;
-}
-
 Expected<std::shared_ptr<ConfiguredInferModelHrpcClient>> ConfiguredInferModelHrpcClient::create(std::shared_ptr<hrpc::Client> client,
     rpc_object_handle_t handle_id, std::vector<hailo_vstream_info_t> &&input_vstream_infos,
     std::vector<hailo_vstream_info_t> &&output_vstream_infos, uint32_t max_ongoing_transfers,
-    std::unique_ptr<CallbacksQueue> &&callbacks_queue, rpc_object_handle_t infer_model_id,
+    std::shared_ptr<CallbacksQueue> callbacks_queue, rpc_object_handle_t infer_model_id,
     const std::unordered_map<std::string, size_t> inputs_frame_sizes,
     const std::unordered_map<std::string, size_t> outputs_frame_sizes)
 {
     // TODO: consider create a separate client object here - HRT-13687
     auto ptr = make_shared_nothrow<ConfiguredInferModelHrpcClient>(client, handle_id, std::move(input_vstream_infos),
-        std::move(output_vstream_infos), max_ongoing_transfers, std::move(callbacks_queue), infer_model_id, inputs_frame_sizes,
+        std::move(output_vstream_infos), max_ongoing_transfers, callbacks_queue, infer_model_id, inputs_frame_sizes,
         outputs_frame_sizes);
     CHECK_NOT_NULL(ptr, HAILO_OUT_OF_HOST_MEMORY);
 
@@ -187,10 +46,12 @@ ConfiguredInferModelHrpcClient::~ConfiguredInferModelHrpcClient()
         auto result = client->execute_request(HailoRpcActionID::CONFIGURED_INFER_MODEL__DESTROY, MemoryView(*request));
         if (!result) {
             LOGGER__CRITICAL("Failed to destroy configured infer model! status = {}", result.status());
+            return;
         }
 
-        if (HAILO_SUCCESS != DestroyConfiguredInferModelSerializer::deserialize_reply(MemoryView(*result))) {
-            LOGGER__CRITICAL("Failed to destroy configured infer model! status = {}", result.status());
+        auto status = DestroyConfiguredInferModelSerializer::deserialize_reply(MemoryView(*result));
+        if (HAILO_SUCCESS != status) {
+            LOGGER__CRITICAL("Failed to destroy configured infer model! status = {}", status);
         }
     }
 }
@@ -220,12 +81,12 @@ hailo_status ConfiguredInferModelHrpcClient::wait_for_async_ready(std::chrono::m
     bool done = m_cv.wait_for(lock, timeout, [this, frames_count] () {
         return (m_max_ongoing_transfers - m_ongoing_transfers.load()) >= frames_count;
     });
-    CHECK(done, HAILO_TIMEOUT);
+    CHECK(done, HAILO_TIMEOUT, "Waiting for async pipeline to be ready has timed out!");
 
     return HAILO_SUCCESS;
 }
 
-Expected<AsyncInferJob> ConfiguredInferModelHrpcClient::run_async(ConfiguredInferModel::Bindings bindings,
+Expected<AsyncInferJob> ConfiguredInferModelHrpcClient::run_async(const ConfiguredInferModel::Bindings &bindings,
     std::function<void(const AsyncInferCompletionInfo &)> callback)
 {
     auto async_job = run_async_impl(bindings, callback);
@@ -236,7 +97,7 @@ Expected<AsyncInferJob> ConfiguredInferModelHrpcClient::run_async(ConfiguredInfe
     return async_job.release();
 }
 
-Expected<AsyncInferJob> ConfiguredInferModelHrpcClient::run_async_impl(ConfiguredInferModel::Bindings bindings,
+Expected<AsyncInferJob> ConfiguredInferModelHrpcClient::run_async_impl(const ConfiguredInferModel::Bindings &bindings,
     std::function<void(const AsyncInferCompletionInfo &)> callback)
 {
     CHECK_SUCCESS_AS_EXPECTED(validate_bindings(bindings));
@@ -255,43 +116,16 @@ Expected<AsyncInferJob> ConfiguredInferModelHrpcClient::run_async_impl(Configure
 
     TRY(auto job_ptr, m_callbacks_queue->register_callback(m_callbacks_counter, bindings, callback_wrapper));
 
-    TRY(auto request, RunAsyncSerializer::serialize_request(m_handle_id, m_infer_model_handle_id,
-        m_callbacks_counter));
+    TRY(auto input_buffer_sizes, get_input_buffer_sizes(bindings));
+    TRY(auto request, RunAsyncSerializer::serialize_request({m_handle_id, m_infer_model_handle_id,
+        m_callbacks_counter, input_buffer_sizes}));
 
     auto client = m_client.lock();
     CHECK_AS_EXPECTED(nullptr != client, HAILO_INTERNAL_FAILURE,
         "Lost comunication with the server. This may happen if VDevice is released while the ConfiguredInferModel is in use.");
     TRY(auto serialized_result, client->execute_request(HailoRpcActionID::CONFIGURED_INFER_MODEL__RUN_ASYNC,
         MemoryView(request), [this, &bindings] (hrpc::RpcConnection connection) -> hailo_status {
-        for (const auto &input_vstream : m_input_vstream_infos) {
-            TRY(auto input, bindings.input(input_vstream.name));
-            auto buffer_type = ConfiguredInferModelBase::get_infer_stream_buffer_type(input);
-            switch(buffer_type) {
-            case BufferType::VIEW:
-            {
-                TRY(auto buffer, input.get_buffer());
-                auto status = connection.write_buffer(MemoryView(buffer));
-                CHECK_SUCCESS(status);
-                break;
-            }
-            case BufferType::PIX_BUFFER:
-            {
-                TRY(auto pix_buffer, input.get_pix_buffer());
-                for (uint32_t i = 0; i < pix_buffer.number_of_planes; i++) {
-                    auto status = connection.write_buffer(MemoryView(pix_buffer.planes[i].user_ptr, pix_buffer.planes[i].bytes_used));
-                    CHECK_SUCCESS(status);
-                }
-                break;
-            }
-            case BufferType::DMA_BUFFER:
-                LOGGER__CRITICAL("DMA_BUFFER is not supported in HRPC");
-                return HAILO_NOT_IMPLEMENTED;
-            default:
-                LOGGER__CRITICAL("Unknown buffer type");
-                return HAILO_INTERNAL_FAILURE;
-            }
-        }
-        return HAILO_SUCCESS;
+        return write_async_inputs(bindings, connection);
     }));
     auto status = RunAsyncSerializer::deserialize_reply(MemoryView(serialized_result));
     CHECK_SUCCESS_AS_EXPECTED(status);
@@ -304,6 +138,76 @@ Expected<AsyncInferJob> ConfiguredInferModelHrpcClient::run_async_impl(Configure
     return AsyncInferJobBase::create(job_ptr);
 }
 
+Expected<std::vector<uint32_t>> ConfiguredInferModelHrpcClient::get_input_buffer_sizes(const ConfiguredInferModel::Bindings &bindings)
+{
+    std::vector<uint32_t> buffer_sizes;
+    for (const auto &input_vstream : m_input_vstream_infos) {
+        TRY(auto input, bindings.input(input_vstream.name));
+        auto buffer_type = ConfiguredInferModelBase::get_infer_stream_buffer_type(input);
+        switch(buffer_type) {
+        case BufferType::VIEW:
+        {
+            TRY(auto buffer, input.get_buffer());
+            buffer_sizes.push_back(static_cast<uint32_t>(buffer.size()));
+            break;
+        }
+        case BufferType::PIX_BUFFER:
+        {
+            TRY(auto pix_buffer, input.get_pix_buffer());
+            CHECK_AS_EXPECTED(HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR == pix_buffer.memory_type, HAILO_NOT_SUPPORTED,
+                "Currently, only userptr pix buffers are supported in HRPC!"); // TODO: HRT-14391
+            for (uint32_t i = 0; i < pix_buffer.number_of_planes; i++) {
+                buffer_sizes.push_back(pix_buffer.planes[i].bytes_used);
+            }
+            break;
+        }
+        case BufferType::DMA_BUFFER:
+            LOGGER__CRITICAL("DMA_BUFFER is not supported in HRPC");
+            return make_unexpected(HAILO_NOT_IMPLEMENTED);
+        default:
+            LOGGER__CRITICAL("Unknown buffer type");
+            return make_unexpected(HAILO_INTERNAL_FAILURE);
+        }
+    }
+    return buffer_sizes;
+}
+
+hailo_status ConfiguredInferModelHrpcClient::write_async_inputs(const ConfiguredInferModel::Bindings &bindings,
+    hrpc::RpcConnection connection)
+{
+    for (const auto &input_vstream : m_input_vstream_infos) {
+        TRY(auto input, bindings.input(input_vstream.name));
+        auto buffer_type = ConfiguredInferModelBase::get_infer_stream_buffer_type(input);
+        switch(buffer_type) {
+        case BufferType::VIEW:
+        {
+            TRY(auto buffer, input.get_buffer());
+            auto status = connection.write_buffer(MemoryView(buffer));
+            CHECK_SUCCESS(status);
+            break;
+        }
+        case BufferType::PIX_BUFFER:
+        {
+            TRY(auto pix_buffer, input.get_pix_buffer());
+            CHECK(HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR == pix_buffer.memory_type, HAILO_NOT_SUPPORTED,
+                "Currently, only userptr pix buffers are supported in HRPC!"); // TODO: HRT-14391
+            for (uint32_t i = 0; i < pix_buffer.number_of_planes; i++) {
+                auto status = connection.write_buffer(MemoryView(pix_buffer.planes[i].user_ptr, pix_buffer.planes[i].bytes_used));
+                CHECK_SUCCESS(status);
+            }
+            break;
+        }
+        case BufferType::DMA_BUFFER:
+            LOGGER__CRITICAL("DMA_BUFFER is not supported in HRPC");
+            return HAILO_NOT_IMPLEMENTED;
+        default:
+            LOGGER__CRITICAL("Unknown buffer type");
+            return HAILO_INTERNAL_FAILURE;
+        }
+    }
+    return HAILO_SUCCESS;
+}
+
 hailo_status ConfiguredInferModelHrpcClient::set_scheduler_timeout(const std::chrono::milliseconds &timeout)
 {
     TRY(auto serialized_request, SetSchedulerTimeoutSerializer::serialize_request(m_handle_id, timeout));
@@ -394,7 +298,7 @@ Expected<size_t> ConfiguredInferModelHrpcClient::get_async_queue_size()
     return queue_size;
 }
 
-hailo_status ConfiguredInferModelHrpcClient::validate_bindings(ConfiguredInferModel::Bindings bindings)
+hailo_status ConfiguredInferModelHrpcClient::validate_bindings(const ConfiguredInferModel::Bindings &bindings)
 {
     for (const auto &input_vstream : m_input_vstream_infos) {
         TRY(auto input, bindings.input(input_vstream.name));
diff --git a/hailort/libhailort/src/net_flow/pipeline/configured_infer_model_hrpc_client.hpp b/hailort/libhailort/src/net_flow/pipeline/configured_infer_model_hrpc_client.hpp
index 9cac5206..ed813536 100644
--- a/hailort/libhailort/src/net_flow/pipeline/configured_infer_model_hrpc_client.hpp
+++ b/hailort/libhailort/src/net_flow/pipeline/configured_infer_model_hrpc_client.hpp
@@ -13,87 +13,23 @@
 #include "hailo/infer_model.hpp"
 #include "infer_model_internal.hpp"
 #include "hrpc/client.hpp"
+#include "rpc_callbacks/rpc_callbacks_dispatcher.hpp"
 
 namespace hailort
 {
 
-using callback_id_t = uint32_t;
-
-class InferStreamOnStack final
-{
-public:
-    InferStreamOnStack(MemoryView buffer) : m_buffer(buffer) {}
-    Expected<MemoryView> get_buffer();
-
-private:
-    MemoryView m_buffer;
-};
-
-class OutputBindingsOnStack final
-{
-public:
-    static Expected<OutputBindingsOnStack> create(ConfiguredInferModel::Bindings bindings,
-        const std::vector<std::string> &outputs_names);
-    Expected<InferStreamOnStack> output();
-    Expected<InferStreamOnStack> output(const std::string &name);
-
-private:
-    OutputBindingsOnStack(std::unordered_map<std::string, InferStreamOnStack> &&output_streams) :
-        m_output_streams(std::move(output_streams)) {}
-
-    std::unordered_map<std::string, InferStreamOnStack> m_output_streams;
-};
-
-class AsyncInferJobHrpcClient : public AsyncInferJobBase
-{
-public:
-    AsyncInferJobHrpcClient(EventPtr event);
-
-    virtual hailo_status wait(std::chrono::milliseconds timeout) override;
-
-private:
-    EventPtr m_event;
-};
-
-class CallbacksQueue
-{
-public:
-    CallbacksQueue(std::shared_ptr<hrpc::Client> client, const std::vector<std::string> &outputs_names);
-    ~CallbacksQueue();
-
-    CallbacksQueue(const CallbacksQueue &other) = delete;
-    CallbacksQueue& operator=(const CallbacksQueue &other) = delete;
-    CallbacksQueue(CallbacksQueue &&other) = delete;
-    CallbacksQueue& operator=(CallbacksQueue &&other) = delete;
-
-    Expected<std::shared_ptr<AsyncInferJobHrpcClient>> register_callback(callback_id_t id,
-        ConfiguredInferModel::Bindings bindings,
-        std::function<void(const AsyncInferCompletionInfo&)> callback);
-
-private:
-    const std::vector<std::string> m_outputs_names;
-    std::mutex m_mutex;
-    std::condition_variable m_cv;
-    std::queue<callback_id_t> m_callbacks_queue;
-    std::unordered_map<callback_id_t, std::function<void(const AsyncInferCompletionInfo&)>> m_callbacks;
-    std::atomic_bool m_is_running;
-    std::thread m_callback_thread;
-    std::unordered_map<callback_id_t, OutputBindingsOnStack> m_bindings;
-    std::unordered_map<callback_id_t, hailo_status> m_callbacks_status;
-};
-
 class ConfiguredInferModelHrpcClient : public ConfiguredInferModelBase
 {
 public:
     static Expected<std::shared_ptr<ConfiguredInferModelHrpcClient>> create(std::shared_ptr<hrpc::Client> client,
         rpc_object_handle_t handle_id, std::vector<hailo_vstream_info_t> &&input_vstream_infos,
         std::vector<hailo_vstream_info_t> &&output_vstream_infos, uint32_t max_ongoing_transfers,
-        std::unique_ptr<CallbacksQueue> &&callbacks_queue, rpc_object_handle_t infer_model_handle_id,
+        std::shared_ptr<CallbacksQueue> callbacks_queue, rpc_object_handle_t infer_model_handle_id,
         const std::unordered_map<std::string, size_t> inputs_frame_sizes,
         const std::unordered_map<std::string, size_t> outputs_frame_sizes);
     ConfiguredInferModelHrpcClient(std::shared_ptr<hrpc::Client> client, rpc_object_handle_t handle_id,
         std::vector<hailo_vstream_info_t> &&input_vstream_infos, std::vector<hailo_vstream_info_t> &&output_vstream_infos,
-        uint32_t max_ongoing_transfers, std::unique_ptr<CallbacksQueue> &&callbacks_queue, rpc_object_handle_t infer_model_handle_id,
+        uint32_t max_ongoing_transfers, std::shared_ptr<CallbacksQueue> callbacks_queue, rpc_object_handle_t infer_model_handle_id,
         const std::unordered_map<std::string, size_t> inputs_frame_sizes,
         const std::unordered_map<std::string, size_t> outputs_frame_sizes) :
             ConfiguredInferModelBase(inputs_frame_sizes, outputs_frame_sizes),
@@ -114,7 +50,7 @@ class ConfiguredInferModelHrpcClient : public ConfiguredInferModelBase
     virtual hailo_status activate() override;
     virtual hailo_status deactivate() override;
 
-    virtual Expected<AsyncInferJob> run_async(ConfiguredInferModel::Bindings bindings,
+    virtual Expected<AsyncInferJob> run_async(const ConfiguredInferModel::Bindings &bindings,
         std::function<void(const AsyncInferCompletionInfo &)> callback) override;
 
     virtual Expected<LatencyMeasurementResult> get_hw_latency_measurement() override;
@@ -128,9 +64,12 @@ class ConfiguredInferModelHrpcClient : public ConfiguredInferModelBase
     virtual hailo_status shutdown() override;
 
 private:
-    virtual hailo_status validate_bindings(ConfiguredInferModel::Bindings bindings);
-    Expected<AsyncInferJob> run_async_impl(ConfiguredInferModel::Bindings bindings,
+    virtual hailo_status validate_bindings(const ConfiguredInferModel::Bindings &bindings) override;
+    Expected<AsyncInferJob> run_async_impl(const ConfiguredInferModel::Bindings &bindings,
         std::function<void(const AsyncInferCompletionInfo &)> callback);
+    Expected<std::vector<uint32_t>> get_input_buffer_sizes(const ConfiguredInferModel::Bindings &bindings);
+    hailo_status write_async_inputs(const ConfiguredInferModel::Bindings &bindings,
+        hrpc::RpcConnection connection);
 
     std::weak_ptr<hrpc::Client> m_client;
     rpc_object_handle_t m_handle_id;
@@ -140,7 +79,7 @@ class ConfiguredInferModelHrpcClient : public ConfiguredInferModelBase
     std::mutex m_ongoing_transfers_mutex;
     std::condition_variable m_cv;
     std::atomic_uint32_t m_ongoing_transfers;
-    std::unique_ptr<CallbacksQueue> m_callbacks_queue;
+    std::shared_ptr<CallbacksQueue> m_callbacks_queue;
     rpc_object_handle_t m_infer_model_handle_id;
     std::atomic_uint32_t m_callbacks_counter;
     std::mutex m_infer_mutex;
diff --git a/hailort/libhailort/src/net_flow/pipeline/infer_model.cpp b/hailort/libhailort/src/net_flow/pipeline/infer_model.cpp
index 65f46bd1..b8be3c03 100644
--- a/hailort/libhailort/src/net_flow/pipeline/infer_model.cpp
+++ b/hailort/libhailort/src/net_flow/pipeline/infer_model.cpp
@@ -201,57 +201,65 @@ uint32_t InferModelBase::InferStream::nms_max_accumulated_mask_size() const
     return m_pimpl->nms_max_accumulated_mask_size();
 }
 
-Expected<std::shared_ptr<InferModelBase>> InferModelBase::create(VDevice &vdevice, const std::string &hef_path)
+Expected<std::shared_ptr<InferModelBase>> InferModelBase::create(VDevice &vdevice, const std::string &hef_path, const std::string &network_name)
 {
     TRY(auto hef, Hef::create(hef_path));
-    TRY(auto inputs, create_infer_stream_inputs(hef));
-    TRY(auto outputs, create_infer_stream_outputs(hef));
+    TRY(auto inputs, create_infer_stream_inputs(hef, network_name));
+    TRY(auto outputs, create_infer_stream_outputs(hef, network_name));
+
+    if (!network_name.empty()) {
+        // 'network_name' is not really supported (as partial inference is not really supported).
+        // We do support it as network_group_name for LLM models that uses multiple network-groups in a single HEF (not released)
+        const auto network_group_names = hef.get_network_groups_names();
+        CHECK_AS_EXPECTED(std::any_of(network_group_names.begin(), network_group_names.end(),
+            [&network_name] (const auto &name) { return name == network_name; }), HAILO_NOT_IMPLEMENTED,
+            "Passing network name is not supported yet!");
+    }
 
-    auto ptr = make_shared_nothrow<InferModelBase>(vdevice, std::move(hef), std::move(inputs), std::move(outputs));
+    auto ptr = make_shared_nothrow<InferModelBase>(vdevice, std::move(hef), network_name, std::move(inputs), std::move(outputs));
     CHECK_NOT_NULL_AS_EXPECTED(ptr, HAILO_OUT_OF_HOST_MEMORY);
 
     return ptr;
 }
 
-Expected<std::shared_ptr<InferModelBase>> InferModelBase::create(VDevice &vdevice, const MemoryView hef_buffer)
+Expected<std::shared_ptr<InferModelBase>> InferModelBase::create(VDevice &vdevice, const MemoryView hef_buffer, const std::string &network_name)
 {
     TRY(auto hef, Hef::create(hef_buffer));
-    TRY(auto inputs, create_infer_stream_inputs(hef));
-    TRY(auto outputs, create_infer_stream_outputs(hef));
+    TRY(auto inputs, create_infer_stream_inputs(hef, network_name));
+    TRY(auto outputs, create_infer_stream_outputs(hef, network_name));
 
-    auto ptr = make_shared_nothrow<InferModelBase>(vdevice, std::move(hef), std::move(inputs), std::move(outputs));
+    auto ptr = make_shared_nothrow<InferModelBase>(vdevice, std::move(hef), network_name, std::move(inputs), std::move(outputs));
     CHECK_NOT_NULL_AS_EXPECTED(ptr, HAILO_OUT_OF_HOST_MEMORY);
 
     return ptr;
 }
 
-InferModelBase::InferModelBase(VDevice &vdevice, Hef &&hef, std::unordered_map<std::string, InferModelBase::InferStream> &&inputs,
-        std::unordered_map<std::string, InferModelBase::InferStream> &&outputs)
-    : m_vdevice(vdevice), m_hef(std::move(hef)), m_inputs(std::move(inputs)), m_outputs(std::move(outputs)),
-    m_config_params(HailoRTDefaults::get_configure_params())
+InferModelBase::InferModelBase(VDevice &vdevice, Hef &&hef, const std::string &network_name,
+        std::vector<InferModelBase::InferStream> &&inputs, std::vector<InferModelBase::InferStream> &&outputs)
+    : m_vdevice(vdevice), m_hef(std::move(hef)), m_network_name(network_name), m_inputs_vector(std::move(inputs)),
+    m_outputs_vector(std::move(outputs)), m_config_params(HailoRTDefaults::get_configure_params())
 {
-    m_inputs_vector.reserve(m_inputs.size());
-    m_input_names.reserve(m_inputs.size());
-    for (const auto &pair : m_inputs) {
-        m_inputs_vector.push_back(pair.second);
-        m_input_names.push_back(pair.first);
+    m_input_names.reserve(m_inputs_vector.size());
+    for (const auto &input : m_inputs_vector) {
+        m_inputs.emplace(input.name(), input);
+        m_input_names.push_back(input.name());
     }
 
-    m_outputs_vector.reserve(m_outputs.size());
-    m_output_names.reserve(m_outputs.size());
-    for (const auto &pair : m_outputs) {
-        m_outputs_vector.push_back(pair.second);
-        m_output_names.push_back(pair.first);
+    m_output_names.reserve(m_outputs_vector.size());
+    for (const auto &output : m_outputs_vector) {
+        m_outputs.emplace(output.name(), output);
+        m_output_names.push_back(output.name());
     }
 }
 
 InferModelBase::InferModelBase(InferModelBase &&other) :
     m_vdevice(std::move(other.m_vdevice)),
     m_hef(std::move(other.m_hef)),
-    m_inputs(std::move(other.m_inputs)),
-    m_outputs(std::move(other.m_outputs)),
+    m_network_name(other.m_network_name),
     m_inputs_vector(std::move(other.m_inputs_vector)),
     m_outputs_vector(std::move(other.m_outputs_vector)),
+    m_inputs(std::move(other.m_inputs)),
+    m_outputs(std::move(other.m_outputs)),
     m_input_names(std::move(other.m_input_names)),
     m_output_names(std::move(other.m_output_names)),
     m_config_params(std::move(other.m_config_params))
@@ -280,10 +288,15 @@ void InferModelBase::set_hw_latency_measurement_flags(hailo_latency_measurement_
 
 Expected<ConfiguredInferModel> InferModelBase::configure()
 {
-    auto configure_params = m_vdevice.get().create_configure_params(m_hef);
-    CHECK_EXPECTED(configure_params);
+    NetworkGroupsParamsMap configure_params = {};
+    if (!m_network_name.empty()) {
+        TRY(auto specific_configure_params, m_vdevice.get().create_configure_params(m_hef, m_network_name));
+        configure_params[m_network_name] = specific_configure_params;
+    } else {
+        TRY(configure_params, m_vdevice.get().create_configure_params(m_hef));
+    }
 
-    for (auto &network_group_name_params_pair : *configure_params) {
+    for (auto &network_group_name_params_pair : configure_params) {
         for (auto &stream_params_name_pair : network_group_name_params_pair.second.stream_params_by_name) {
             stream_params_name_pair.second.flags = HAILO_STREAM_FLAGS_ASYNC;
         }
@@ -296,13 +309,20 @@ Expected<ConfiguredInferModel> InferModelBase::configure()
         network_group_name_params_pair.second.latency = m_config_params.latency;
     }
 
-    auto network_groups = m_vdevice.get().configure(m_hef, configure_params.value());
+    auto network_groups = m_vdevice.get().configure(m_hef, configure_params);
     CHECK_EXPECTED(network_groups);
 
-    CHECK_AS_EXPECTED(1 == network_groups->size(), HAILO_INVALID_HEF,
-        "InferModel expects HEF with a single network group. found {}.", network_groups->size());
+    if (network_groups->empty()) {
+        // Given NG name wasnt found in the HEF
+        LOGGER__ERROR("Failed to find model '{}' in the given HEF.", m_network_name);
+        return make_unexpected(HAILO_INVALID_ARGUMENT);
+    } else if (1 != network_groups->size()) {
+        // No name was given, and there are multiple NGs in the HEF
+        LOGGER__ERROR("HEF contains multiple network groups ({}). Please provide a specific model name to infer.", network_groups->size());
+        return make_unexpected(HAILO_INVALID_ARGUMENT);
+    }
 
-    // TODO (HRT-11293) : Remove this check
+    // internal_queue_size should be derived from batch_size, keeping this validation to make sure the logic doesnt change
     TRY(auto internal_queue_size, network_groups.value()[0]->get_min_buffer_pool_size());
     CHECK_AS_EXPECTED(internal_queue_size >= m_config_params.batch_size, HAILO_INVALID_OPERATION,
         "Trying to configure a model with a batch={} bigger than internal_queue_size={}, which is not supported. Try using a smaller batch.",
@@ -412,15 +432,15 @@ Expected<ConfiguredInferModel> InferModelBase::configure_for_ut(std::shared_ptr<
 
 Expected<InferModelBase::InferStream> InferModelBase::input()
 {
-    CHECK_AS_EXPECTED(1 == m_inputs.size(), HAILO_INVALID_OPERATION, "Model has more than one input!");
-    auto copy = m_inputs.begin()->second;
+    CHECK_AS_EXPECTED(1 == m_inputs_vector.size(), HAILO_INVALID_OPERATION, "Model has more than one input!");
+    auto copy = *m_inputs_vector.begin();
     return copy;
 }
 
 Expected<InferModelBase::InferStream> InferModelBase::output()
 {
-    CHECK_AS_EXPECTED(1 == m_outputs.size(), HAILO_INVALID_OPERATION, "Model has more than one output!");
-    auto copy = m_outputs.begin()->second;
+    CHECK_AS_EXPECTED(1 == m_outputs_vector.size(), HAILO_INVALID_OPERATION, "Model has more than one output!");
+    auto copy = *m_outputs_vector.begin();
     return copy;
 }
 
@@ -458,35 +478,35 @@ const std::vector<std::string> &InferModelBase::get_output_names() const
     return m_output_names;
 }
 
-Expected<std::unordered_map<std::string, InferModel::InferStream>> InferModelBase::create_infer_stream_inputs(Hef &hef)
+Expected<std::vector< InferModel::InferStream>> InferModelBase::create_infer_stream_inputs(Hef &hef, const std::string &network_name)
 {
-    auto input_vstream_infos = hef.get_input_vstream_infos();
+    auto input_vstream_infos = hef.get_input_vstream_infos(network_name);
     CHECK_EXPECTED(input_vstream_infos);
 
-    std::unordered_map<std::string, InferModel::InferStream> inputs;
+    std::vector<InferModel::InferStream> inputs;
     for (const auto &vstream_info : input_vstream_infos.value()) {
         auto pimpl = make_shared_nothrow<InferModel::InferStream::Impl>(vstream_info);
         CHECK_NOT_NULL_AS_EXPECTED(pimpl, HAILO_OUT_OF_HOST_MEMORY);
 
         InferModel::InferStream stream(pimpl);
-        inputs.emplace(vstream_info.name, std::move(stream));
+        inputs.emplace_back(std::move(stream));
     }
 
     return inputs;
 }
 
-Expected<std::unordered_map<std::string, InferModel::InferStream>> InferModelBase::create_infer_stream_outputs(Hef &hef)
+Expected<std::vector<InferModel::InferStream>> InferModelBase::create_infer_stream_outputs(Hef &hef, const std::string &network_name)
 {
-    auto output_vstream_infos = hef.get_output_vstream_infos();
+    auto output_vstream_infos = hef.get_output_vstream_infos(network_name);
     CHECK_EXPECTED(output_vstream_infos);
 
-    std::unordered_map<std::string, InferModel::InferStream> outputs;
+    std::vector<InferModel::InferStream> outputs;
     for (const auto &vstream_info : output_vstream_infos.value()) {
         auto pimpl = make_shared_nothrow<InferModel::InferStream::Impl>(vstream_info);
         CHECK_NOT_NULL_AS_EXPECTED(pimpl, HAILO_OUT_OF_HOST_MEMORY);
 
         InferModel::InferStream stream(pimpl);
-        outputs.emplace(vstream_info.name, std::move(stream));
+        outputs.emplace_back(std::move(stream));
     }
 
     return outputs;
@@ -527,12 +547,12 @@ hailo_status ConfiguredInferModel::deactivate()
     return m_pimpl->deactivate();
 }
 
-hailo_status ConfiguredInferModel::run(ConfiguredInferModel::Bindings bindings, std::chrono::milliseconds timeout)
+hailo_status ConfiguredInferModel::run(const ConfiguredInferModel::Bindings &bindings, std::chrono::milliseconds timeout)
 {
     return m_pimpl->run(bindings, timeout);
 }
 
-Expected<AsyncInferJob> ConfiguredInferModel::run_async(ConfiguredInferModel::Bindings bindings,
+Expected<AsyncInferJob> ConfiguredInferModel::run_async(const ConfiguredInferModel::Bindings &bindings,
     std::function<void(const AsyncInferCompletionInfo &)> callback)
 {
     auto async_infer_job = m_pimpl->run_async(bindings, callback);
@@ -592,7 +612,7 @@ Expected<AsyncInferJob> ConfiguredInferModel::run_async(const std::vector<Config
         }
     };
 
-    for (const auto &binding : bindings) {
+    for (auto &binding : bindings) {
         TRY(auto partial_job, run_async(binding, transfer_done));
         partial_job.detach();
     }
@@ -637,7 +657,7 @@ void ConfiguredInferModelBase::mark_callback_done(std::shared_ptr<AsyncInferJobI
     job_pimpl->mark_callback_done();
 }
 
-hailo_status ConfiguredInferModelBase::run(ConfiguredInferModel::Bindings bindings, std::chrono::milliseconds timeout)
+hailo_status ConfiguredInferModelBase::run(const ConfiguredInferModel::Bindings &bindings, std::chrono::milliseconds timeout)
 {
     auto job = run_async(bindings, [] (const AsyncInferCompletionInfo &) {});
     CHECK_EXPECTED_AS_STATUS(job);
@@ -706,10 +726,7 @@ Expected<ConfiguredInferModel::Bindings> ConfiguredInferModelImpl::create_bindin
     std::unordered_map<std::string, ConfiguredInferModel::Bindings::InferStream> inputs;
     std::unordered_map<std::string, ConfiguredInferModel::Bindings::InferStream> outputs;
 
-    auto cng = m_cng.lock();
-    CHECK_NOT_NULL_AS_EXPECTED(cng, HAILO_INTERNAL_FAILURE);
-
-    auto input_vstream_infos = cng->get_input_vstream_infos();
+    auto input_vstream_infos = m_cng->get_input_vstream_infos();
     CHECK_EXPECTED(input_vstream_infos);
 
     for (const auto &vstream_info : input_vstream_infos.value()) {
@@ -717,7 +734,7 @@ Expected<ConfiguredInferModel::Bindings> ConfiguredInferModelImpl::create_bindin
         inputs.emplace(vstream_info.name, std::move(stream));
     }
 
-    auto output_vstream_infos = cng->get_output_vstream_infos();
+    auto output_vstream_infos = m_cng->get_output_vstream_infos();
     CHECK_EXPECTED(output_vstream_infos);
 
     for (const auto &vstream_info : output_vstream_infos.value()) {
@@ -733,17 +750,21 @@ hailo_status ConfiguredInferModelImpl::wait_for_async_ready(std::chrono::millise
 {
     std::unique_lock<std::mutex> lock(m_mutex);
     hailo_status status = HAILO_SUCCESS;
-    bool was_successful = m_cv.wait_for(lock, timeout, [this, frames_count, &status] () -> bool {
-        auto pools_are_ready = m_async_infer_runner->can_push_buffers(frames_count);
-        if (HAILO_SUCCESS != pools_are_ready.status()) {
-            status = pools_are_ready.status();
+    std::string elem_name = "";
+    bool was_successful = m_cv.wait_for(lock, timeout, [this, frames_count, &status, &elem_name] () -> bool {
+        auto pools_are_ready_pair = m_async_infer_runner->can_push_buffers(frames_count);
+        if (HAILO_SUCCESS != pools_are_ready_pair.status()) {
+            status = pools_are_ready_pair.status();
             return true;
         }
-        return pools_are_ready.release();
+        elem_name = pools_are_ready_pair->second;
+        return pools_are_ready_pair->first;
     });
     CHECK_SUCCESS(status);
 
-    CHECK(was_successful, HAILO_TIMEOUT, "Got timeout in `wait_for_async_ready`");
+    CHECK(was_successful, HAILO_TIMEOUT,
+        "Got timeout in `wait_for_async_ready` ({}ms) - the edge '{}' could not receive {} transfer-requests",
+        timeout.count(), elem_name, frames_count);
 
     return HAILO_SUCCESS;
 }
@@ -761,10 +782,7 @@ hailo_status ConfiguredInferModelImpl::shutdown()
 
 hailo_status ConfiguredInferModelImpl::activate()
 {
-    auto cng = m_cng.lock();
-    CHECK_NOT_NULL(cng, HAILO_INTERNAL_FAILURE);
-
-    auto activated_ng = cng->activate();
+    auto activated_ng = m_cng->activate();
     CHECK_EXPECTED_AS_STATUS(activated_ng);
 
     m_ang = activated_ng.release();
@@ -778,7 +796,7 @@ hailo_status ConfiguredInferModelImpl::deactivate()
     return HAILO_SUCCESS;
 }
 
-hailo_status ConfiguredInferModelImpl::validate_bindings(ConfiguredInferModel::Bindings bindings)
+hailo_status ConfiguredInferModelImpl::validate_bindings(const ConfiguredInferModel::Bindings &bindings)
 {
     for (const auto &input_name : m_input_names) {
         TRY(auto input, bindings.input(input_name));
@@ -849,7 +867,7 @@ hailo_status ConfiguredInferModelImpl::validate_bindings(ConfiguredInferModel::B
     return HAILO_SUCCESS;
 }
 
-Expected<AsyncInferJob> ConfiguredInferModelImpl::run_async(ConfiguredInferModel::Bindings bindings,
+Expected<AsyncInferJob> ConfiguredInferModelImpl::run_async(const ConfiguredInferModel::Bindings &bindings,
     std::function<void(const AsyncInferCompletionInfo &)> callback)
 {
     CHECK_SUCCESS_AS_EXPECTED(validate_bindings(bindings));
@@ -887,42 +905,27 @@ Expected<AsyncInferJob> ConfiguredInferModelImpl::run_async(ConfiguredInferModel
 
 Expected<LatencyMeasurementResult> ConfiguredInferModelImpl::get_hw_latency_measurement()
 {
-    auto cng = m_cng.lock();
-    CHECK_NOT_NULL_AS_EXPECTED(cng, HAILO_INTERNAL_FAILURE);
-
-    return cng->get_latency_measurement();
+    return m_cng->get_latency_measurement();
 }
 
 hailo_status ConfiguredInferModelImpl::set_scheduler_timeout(const std::chrono::milliseconds &timeout)
 {
-    auto cng = m_cng.lock();
-    CHECK_NOT_NULL(cng, HAILO_INTERNAL_FAILURE);
-
-    return cng->set_scheduler_timeout(timeout);
+    return m_cng->set_scheduler_timeout(timeout);
 }
 
 hailo_status ConfiguredInferModelImpl::set_scheduler_threshold(uint32_t threshold)
 {
-    auto cng = m_cng.lock();
-    CHECK_NOT_NULL(cng, HAILO_INTERNAL_FAILURE);
-
-    return cng->set_scheduler_threshold(threshold);
+    return m_cng->set_scheduler_threshold(threshold);
 }
 
 hailo_status ConfiguredInferModelImpl::set_scheduler_priority(uint8_t priority)
 {
-    auto cng = m_cng.lock();
-    CHECK_NOT_NULL(cng, HAILO_INTERNAL_FAILURE);
-
-    return cng->set_scheduler_priority(priority);
+    return m_cng->set_scheduler_priority(priority);
 }
 
 Expected<size_t> ConfiguredInferModelImpl::get_async_queue_size()
 {
-    auto cng = m_cng.lock();
-    CHECK_NOT_NULL(cng, HAILO_INTERNAL_FAILURE);
-
-    return cng->get_min_buffer_pool_size();
+    return m_cng->get_min_buffer_pool_size();
 }
 
 AsyncInferJob::AsyncInferJob(std::shared_ptr<AsyncInferJobBase> pimpl) : m_pimpl(pimpl), m_should_wait_in_dtor(true)
@@ -943,12 +946,6 @@ AsyncInferJob &AsyncInferJob::operator=(AsyncInferJob &&other)
 
 AsyncInferJob::~AsyncInferJob()
 {
-    if (m_pimpl == nullptr) {
-        // In case the user defines AsyncInferJob object without initializing it with a real object,
-        // the parameter `m_should_wait_in_dtor` is initialized to true and the d'tor calls for `wait()`,
-        // but `m_pimpl` is not initialized, resulting in seg-fault.
-        return;
-    }
     if (m_should_wait_in_dtor) {
         auto status = wait(WAIT_FOR_ASYNC_IN_DTOR_TIMEOUT);
         if (HAILO_SUCCESS != status) {
@@ -960,6 +957,13 @@ AsyncInferJob::~AsyncInferJob()
 hailo_status AsyncInferJob::wait(std::chrono::milliseconds timeout)
 {
     m_should_wait_in_dtor = false;
+    if (m_pimpl == nullptr) {
+        // In case the user defines AsyncInferJob object without initializing it with a real object,
+        // the parameter `m_should_wait_in_dtor` is initialized to true and the d'tor calls for `wait()`,
+        // but `m_pimpl` is not initialized, resulting in seg-fault.
+        return HAILO_SUCCESS;
+    }
+
     auto status = m_pimpl->wait(timeout);
     CHECK_SUCCESS(status);
 
@@ -1027,6 +1031,38 @@ ConfiguredInferModel::Bindings::Bindings(std::unordered_map<std::string, Binding
 {
 }
 
+ConfiguredInferModel::Bindings::Bindings(const Bindings &other)
+{
+    init_bindings_from(other);
+}
+
+ConfiguredInferModel::Bindings &ConfiguredInferModel::Bindings::operator=(const Bindings &other)
+{
+    init_bindings_from(other);
+    return *this;
+}
+
+void ConfiguredInferModel::Bindings::init_bindings_from(const Bindings &other)
+{
+    for (const auto &input_pair : other.m_inputs) {
+        auto stream = input_pair.second.inner_copy();
+        if (!stream) {
+            LOGGER__CRITICAL("Failed to copy input stream '{}', status = {}", input_pair.first, stream.status());
+            continue;
+        }
+        m_inputs.emplace(input_pair.first, stream.release());
+    }
+
+    for (const auto &output_pair : other.m_outputs) {
+        auto stream = output_pair.second.inner_copy();
+        if (!stream) {
+            LOGGER__CRITICAL("Failed to copy output stream '{}', status = {}", output_pair.first, stream.status());
+            continue;
+        }
+        m_outputs.emplace(output_pair.first, stream.release());
+    }
+}
+
 Expected<ConfiguredInferModel::Bindings::InferStream> ConfiguredInferModel::Bindings::input()
 {
     CHECK_AS_EXPECTED(1 == m_inputs.size(), HAILO_INVALID_OPERATION, "Model has more than one input!");
@@ -1055,8 +1091,36 @@ Expected<ConfiguredInferModel::Bindings::InferStream> ConfiguredInferModel::Bind
     return copy;
 }
 
+Expected<ConfiguredInferModel::Bindings::InferStream> ConfiguredInferModel::Bindings::input() const
+{
+    CHECK_AS_EXPECTED(1 == m_inputs.size(), HAILO_INVALID_OPERATION, "Model has more than one input!");
+    auto copy = m_inputs.begin()->second;
+    return copy;
+}
+
+Expected<ConfiguredInferModel::Bindings::InferStream> ConfiguredInferModel::Bindings::output() const
+{
+    CHECK_AS_EXPECTED(1 == m_outputs.size(), HAILO_INVALID_OPERATION, "Model has more than one output!");
+    auto copy = m_outputs.begin()->second;
+    return copy;
+}
+
+Expected<ConfiguredInferModel::Bindings::InferStream> ConfiguredInferModel::Bindings::input(const std::string &name) const
+{
+    CHECK_AS_EXPECTED(contains(m_inputs, name), HAILO_NOT_FOUND, "Input {} not found!", name);
+    auto copy = m_inputs.at(name);
+    return copy;
+}
+
+Expected<ConfiguredInferModel::Bindings::InferStream> ConfiguredInferModel::Bindings::output(const std::string &name) const
+{
+    CHECK_AS_EXPECTED(contains(m_outputs, name), HAILO_NOT_FOUND, "Output {}, not found!", name);
+    auto copy = m_outputs.at(name);
+    return copy;
+}
+
 ConfiguredInferModel::Bindings::InferStream::Impl::Impl(const hailo_vstream_info_t &vstream_info) :
-    m_name(vstream_info.name),m_buffer_type(BufferType::UNINITIALIZED)
+    m_name(vstream_info.name), m_buffer_type(BufferType::UNINITIALIZED)
 {
 }
 
@@ -1082,7 +1146,7 @@ hailo_status ConfiguredInferModel::Bindings::InferStream::Impl::set_pix_buffer(c
     return HAILO_SUCCESS;
 }
 
-Expected<hailo_pix_buffer_t> ConfiguredInferModel::Bindings::InferStream::Impl::get_pix_buffer()
+Expected<hailo_pix_buffer_t> ConfiguredInferModel::Bindings::InferStream::Impl::get_pix_buffer() const
 {
     CHECK_AS_EXPECTED(BufferType::PIX_BUFFER == m_buffer_type, HAILO_INVALID_OPERATION,
         "Trying to get buffer as pix_buffer for '{}', while it is not configured as pix_buffer", m_name);
@@ -1094,11 +1158,10 @@ hailo_status ConfiguredInferModel::Bindings::InferStream::Impl::set_dma_buffer(h
 {
     m_buffer_type = BufferType::DMA_BUFFER;
     m_dma_buffer = dma_buffer;
-
     return HAILO_SUCCESS;
 }
 
-Expected<hailo_dma_buffer_t> ConfiguredInferModel::Bindings::InferStream::Impl::get_dma_buffer()
+Expected<hailo_dma_buffer_t> ConfiguredInferModel::Bindings::InferStream::Impl::get_dma_buffer() const
 {
     CHECK_AS_EXPECTED(BufferType::DMA_BUFFER == m_buffer_type, HAILO_INVALID_OPERATION,
         "Trying to get buffer as dma_buffer for '{}', while it is not configured as dma_buffer", m_name);
@@ -1135,19 +1198,27 @@ hailo_status ConfiguredInferModel::Bindings::InferStream::set_dma_buffer(hailo_d
     return m_pimpl->set_dma_buffer(dma_buffer);
 }
 
-Expected<MemoryView> ConfiguredInferModel::Bindings::InferStream::get_buffer()
+Expected<MemoryView> ConfiguredInferModel::Bindings::InferStream::get_buffer() const
 {
     return m_pimpl->get_buffer();
 }
 
-Expected<hailo_pix_buffer_t> ConfiguredInferModel::Bindings::InferStream::get_pix_buffer()
+Expected<hailo_pix_buffer_t> ConfiguredInferModel::Bindings::InferStream::get_pix_buffer() const
 {
     return m_pimpl->get_pix_buffer();
 }
 
-Expected<hailo_dma_buffer_t> ConfiguredInferModel::Bindings::InferStream::get_dma_buffer()
+Expected<hailo_dma_buffer_t> ConfiguredInferModel::Bindings::InferStream::get_dma_buffer() const
 {
     return m_pimpl->get_dma_buffer();
 }
 
+Expected<ConfiguredInferModel::Bindings::InferStream> ConfiguredInferModel::Bindings::InferStream::inner_copy() const
+{
+    auto pimpl = make_shared_nothrow<ConfiguredInferModel::Bindings::InferStream::Impl>(*m_pimpl);
+    CHECK_NOT_NULL_AS_EXPECTED(pimpl, HAILO_OUT_OF_HOST_MEMORY);
+
+    return ConfiguredInferModel::Bindings::InferStream(pimpl);
+}
+
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/net_flow/pipeline/infer_model_hrpc_client.cpp b/hailort/libhailort/src/net_flow/pipeline/infer_model_hrpc_client.cpp
index ce0032da..2ed0cb5d 100644
--- a/hailort/libhailort/src/net_flow/pipeline/infer_model_hrpc_client.cpp
+++ b/hailort/libhailort/src/net_flow/pipeline/infer_model_hrpc_client.cpp
@@ -13,26 +13,28 @@
 namespace hailort
 {
 
-Expected<std::shared_ptr<InferModelHrpcClient>> InferModelHrpcClient::create(Hef &&hef,
-    std::shared_ptr<hrpc::Client> client, uint32_t infer_model_handle_id, uint32_t vdevice_handle, VDevice &vdevice)
+Expected<std::shared_ptr<InferModelHrpcClient>> InferModelHrpcClient::create(Hef &&hef, const std::string &network_name,
+    std::shared_ptr<hrpc::Client> client, uint32_t infer_model_handle_id, uint32_t vdevice_handle, VDevice &vdevice,
+    std::shared_ptr<CallbacksDispatcher> callbacks_dispatcher)
 {
-    TRY(auto inputs, create_infer_stream_inputs(hef));
-    TRY(auto outputs, create_infer_stream_outputs(hef));
+    TRY(auto inputs, create_infer_stream_inputs(hef, network_name));
+    TRY(auto outputs, create_infer_stream_outputs(hef, network_name));
 
     auto ptr = make_shared_nothrow<InferModelHrpcClient>(client, infer_model_handle_id,
-        vdevice_handle, vdevice, std::move(hef), std::move(inputs), std::move(outputs));
+        vdevice_handle, vdevice, callbacks_dispatcher, std::move(hef), network_name, std::move(inputs), std::move(outputs));
     CHECK_NOT_NULL_AS_EXPECTED(ptr, HAILO_OUT_OF_HOST_MEMORY);
 
     return ptr;
 }
 
 InferModelHrpcClient::InferModelHrpcClient(std::shared_ptr<hrpc::Client> client, uint32_t handle,
-    uint32_t vdevice_handle, VDevice &vdevice, Hef &&hef,
-    std::unordered_map<std::string, InferStream> &&inputs, std::unordered_map<std::string, InferStream> &&outputs) :
-        InferModelBase(vdevice, std::move(hef), std::move(inputs), std::move(outputs)),
+    uint32_t vdevice_handle, VDevice &vdevice, std::shared_ptr<CallbacksDispatcher> callbacks_dispatcher,
+    Hef &&hef, const std::string &network_name, std::vector<InferStream> &&inputs, std::vector<InferStream> &&outputs) :
+        InferModelBase(vdevice, std::move(hef), network_name, std::move(inputs), std::move(outputs)),
         m_client(client),
         m_handle(handle),
-        m_vdevice_handle(vdevice_handle)
+        m_vdevice_handle(vdevice_handle),
+        m_callbacks_dispatcher(callbacks_dispatcher)
 {
 }
 
@@ -105,7 +107,7 @@ Expected<ConfiguredInferModel> InferModelHrpcClient::configure()
         MemoryView(request)));
     TRY(auto tuple, CreateConfiguredInferModelSerializer::deserialize_reply(MemoryView(result)));
     CHECK_SUCCESS_AS_EXPECTED(std::get<0>(tuple));
-    auto configured_infer_handle = std::get<1>(tuple);
+    auto configured_infer_model_handle = std::get<1>(tuple);
     auto async_queue_size = std::get<2>(tuple);
 
     std::unordered_map<std::string, size_t> inputs_frame_sizes;
@@ -117,15 +119,17 @@ Expected<ConfiguredInferModel> InferModelHrpcClient::configure()
         outputs_frame_sizes.emplace(output.second.name(), output.second.get_frame_size());
     }
 
-    auto callbacks_queue = make_unique_nothrow<CallbacksQueue>(client, m_output_names);
+    auto callbacks_queue = make_shared_nothrow<CallbacksQueue>(m_output_names);
     CHECK_NOT_NULL_AS_EXPECTED(callbacks_queue, HAILO_OUT_OF_HOST_MEMORY);
 
+    m_callbacks_dispatcher->add(configured_infer_model_handle, callbacks_queue);
+
     TRY(auto input_vstream_infos, m_hef.get_input_vstream_infos());
     TRY(auto output_vstream_infos, m_hef.get_output_vstream_infos());
     TRY(auto cim_client_ptr, ConfiguredInferModelHrpcClient::create(client,
-        configured_infer_handle,
+        configured_infer_model_handle,
         std::move(input_vstream_infos), std::move(output_vstream_infos),
-        async_queue_size, std::move(callbacks_queue), m_handle,
+        async_queue_size, callbacks_queue, m_handle,
         inputs_frame_sizes, outputs_frame_sizes));
 
     return ConfiguredInferModelBase::create(cim_client_ptr);
diff --git a/hailort/libhailort/src/net_flow/pipeline/infer_model_hrpc_client.hpp b/hailort/libhailort/src/net_flow/pipeline/infer_model_hrpc_client.hpp
index 7595406b..14fc084a 100644
--- a/hailort/libhailort/src/net_flow/pipeline/infer_model_hrpc_client.hpp
+++ b/hailort/libhailort/src/net_flow/pipeline/infer_model_hrpc_client.hpp
@@ -14,6 +14,7 @@
 #include "hailo/infer_model.hpp"
 #include "hrpc/client.hpp"
 #include "net_flow/pipeline/infer_model_internal.hpp"
+#include "rpc_callbacks/rpc_callbacks_dispatcher.hpp"
 
 namespace hailort
 {
@@ -21,13 +22,13 @@ namespace hailort
 class InferModelHrpcClient : public InferModelBase
 {
 public:
-    static Expected<std::shared_ptr<InferModelHrpcClient>> create(Hef &&hef,
-        std::shared_ptr<hrpc::Client> client, uint32_t infer_model_handle_id,
-            uint32_t vdevice_handle, VDevice &vdevice);
+    static Expected<std::shared_ptr<InferModelHrpcClient>> create(Hef &&hef, const std::string &network_name,
+        std::shared_ptr<hrpc::Client> client, uint32_t infer_model_handle_id, uint32_t vdevice_handle, VDevice &vdevice,
+        std::shared_ptr<CallbacksDispatcher> callbacks_dispatcher);
 
     InferModelHrpcClient(std::shared_ptr<hrpc::Client> client, uint32_t id,
-        uint32_t vdevice_handle, VDevice &vdevice, Hef &&hef, std::unordered_map<std::string, InferStream> &&inputs,
-        std::unordered_map<std::string, InferStream> &&outputs);
+        uint32_t vdevice_handle, VDevice &vdevice, std::shared_ptr<CallbacksDispatcher> callbacks_dispatcher,
+        Hef &&hef, const std::string &network_name, std::vector<InferStream> &&inputs, std::vector<InferStream> &&outputs);
     virtual ~InferModelHrpcClient();
 
     InferModelHrpcClient(const InferModelHrpcClient &) = delete;
@@ -47,6 +48,7 @@ class InferModelHrpcClient : public InferModelBase
     std::weak_ptr<hrpc::Client> m_client;
     uint32_t m_handle;
     uint32_t m_vdevice_handle;
+    std::shared_ptr<CallbacksDispatcher> m_callbacks_dispatcher;
 };
 
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/net_flow/pipeline/infer_model_internal.hpp b/hailort/libhailort/src/net_flow/pipeline/infer_model_internal.hpp
index 72b22a29..9a3b3702 100644
--- a/hailort/libhailort/src/net_flow/pipeline/infer_model_internal.hpp
+++ b/hailort/libhailort/src/net_flow/pipeline/infer_model_internal.hpp
@@ -29,9 +29,9 @@ class ConfiguredInferModel::Bindings::InferStream::Impl
     hailo_status set_buffer(MemoryView view);
     Expected<MemoryView> get_buffer() const;
     hailo_status set_pix_buffer(const hailo_pix_buffer_t &pix_buffer);
-    Expected<hailo_pix_buffer_t> get_pix_buffer();
+    Expected<hailo_pix_buffer_t> get_pix_buffer() const;
     hailo_status set_dma_buffer(hailo_dma_buffer_t dma_buffer);
-    Expected<hailo_dma_buffer_t> get_dma_buffer();
+    Expected<hailo_dma_buffer_t> get_dma_buffer() const;
     BufferType get_type();
 
     void set_stream_callback(TransferDoneCallbackAsyncInfer callback);
@@ -50,11 +50,11 @@ class ConfiguredInferModel::Bindings::InferStream::Impl
 class InferModelBase : public InferModel
 {
 public:
-    static Expected<std::shared_ptr<InferModelBase>> create(VDevice &vdevice, const std::string &hef_path);
-    static Expected<std::shared_ptr<InferModelBase>> create(VDevice &vdevice, const MemoryView hef_buffer);
+    static Expected<std::shared_ptr<InferModelBase>> create(VDevice &vdevice, const std::string &hef_path, const std::string &network_name);
+    static Expected<std::shared_ptr<InferModelBase>> create(VDevice &vdevice, const MemoryView hef_buffer, const std::string &network_name);
 
-    InferModelBase(VDevice &vdevice, Hef &&hef, std::unordered_map<std::string, InferStream> &&inputs,
-        std::unordered_map<std::string, InferStream> &&outputs);
+    InferModelBase(VDevice &vdevice, Hef &&hef, const std::string &network_name, std::vector<InferStream> &&inputs,
+        std::vector<InferStream> &&outputs);
     virtual ~InferModelBase() = default;
     InferModelBase(InferModelBase &&);
 
@@ -79,15 +79,16 @@ class InferModelBase : public InferModel
         std::shared_ptr<ConfiguredNetworkGroup> net_group = nullptr) override;
 
 protected:
-    static Expected<std::unordered_map<std::string, InferModel::InferStream>> create_infer_stream_inputs(Hef &hef);
-    static Expected<std::unordered_map<std::string, InferModel::InferStream>> create_infer_stream_outputs(Hef &hef);
+    static Expected<std::vector<InferModel::InferStream>> create_infer_stream_inputs(Hef &hef, const std::string &network_name);
+    static Expected<std::vector<InferModel::InferStream>> create_infer_stream_outputs(Hef &hef, const std::string &network_name);
 
     std::reference_wrapper<VDevice> m_vdevice;
     Hef m_hef;
-    std::unordered_map<std::string, InferStream> m_inputs;
-    std::unordered_map<std::string, InferStream> m_outputs;
+    const std::string m_network_name;
     std::vector<InferStream> m_inputs_vector;
     std::vector<InferStream> m_outputs_vector;
+    std::map<std::string, InferStream> m_inputs;
+    std::map<std::string, InferStream> m_outputs;
     std::vector<std::string> m_input_names;
     std::vector<std::string> m_output_names;
     ConfigureNetworkParams m_config_params;
@@ -184,8 +185,8 @@ class ConfiguredInferModelBase
     virtual hailo_status wait_for_async_ready(std::chrono::milliseconds timeout, uint32_t frames_count = 1) = 0;
     virtual hailo_status activate() = 0;
     virtual hailo_status deactivate() = 0;
-    virtual hailo_status run(ConfiguredInferModel::Bindings bindings, std::chrono::milliseconds timeout);
-    virtual Expected<AsyncInferJob> run_async(ConfiguredInferModel::Bindings bindings,
+    virtual hailo_status run(const ConfiguredInferModel::Bindings &bindings, std::chrono::milliseconds timeout);
+    virtual Expected<AsyncInferJob> run_async(const ConfiguredInferModel::Bindings &bindings,
         std::function<void(const AsyncInferCompletionInfo &)> callback = ASYNC_INFER_EMPTY_CALLBACK) = 0;
     virtual Expected<LatencyMeasurementResult> get_hw_latency_measurement() = 0;
     virtual hailo_status set_scheduler_timeout(const std::chrono::milliseconds &timeout) = 0;
@@ -204,7 +205,7 @@ class ConfiguredInferModelBase
     static void mark_callback_done(std::shared_ptr<AsyncInferJobImpl> job_pimpl);
 
 private:
-    virtual hailo_status validate_bindings(ConfiguredInferModel::Bindings bindings) = 0;
+    virtual hailo_status validate_bindings(const ConfiguredInferModel::Bindings &bindings) = 0;
 
 protected:
     std::unordered_map<std::string, size_t> m_inputs_frame_sizes;
@@ -229,7 +230,7 @@ class ConfiguredInferModelImpl : public ConfiguredInferModelBase
     virtual hailo_status wait_for_async_ready(std::chrono::milliseconds timeout, uint32_t frames_count) override;
     virtual hailo_status activate() override;
     virtual hailo_status deactivate() override;
-    virtual Expected<AsyncInferJob> run_async(ConfiguredInferModel::Bindings bindings,
+    virtual Expected<AsyncInferJob> run_async(const ConfiguredInferModel::Bindings &bindings,
         std::function<void(const AsyncInferCompletionInfo &)> callback) override;
     virtual Expected<LatencyMeasurementResult> get_hw_latency_measurement() override;
     virtual hailo_status set_scheduler_timeout(const std::chrono::milliseconds &timeout) override;
@@ -243,9 +244,9 @@ class ConfiguredInferModelImpl : public ConfiguredInferModelBase
         const std::unordered_map<std::string, size_t> inputs_frame_sizes, const std::unordered_map<std::string, size_t> outputs_frame_sizes);
 
 private:
-    virtual hailo_status validate_bindings(ConfiguredInferModel::Bindings bindings);
+    virtual hailo_status validate_bindings(const ConfiguredInferModel::Bindings &bindings) override;
 
-    std::weak_ptr<ConfiguredNetworkGroup> m_cng;
+    std::shared_ptr<ConfiguredNetworkGroup> m_cng;
     std::unique_ptr<ActivatedNetworkGroup> m_ang;
     std::shared_ptr<AsyncInferRunnerImpl> m_async_infer_runner;
     uint32_t m_ongoing_parallel_transfers;
diff --git a/hailort/libhailort/src/net_flow/pipeline/multi_io_elements.cpp b/hailort/libhailort/src/net_flow/pipeline/multi_io_elements.cpp
index c4b41bf8..8cb08d6b 100644
--- a/hailort/libhailort/src/net_flow/pipeline/multi_io_elements.cpp
+++ b/hailort/libhailort/src/net_flow/pipeline/multi_io_elements.cpp
@@ -113,6 +113,9 @@ void BaseMuxElement::run_push_async(PipelineBuffer &&buffer, const PipelinePad &
 
             m_input_buffers.clear();
         }
+    } else {
+        std::unique_lock<std::mutex> lock(m_mutex);
+        m_input_buffers.clear();
     }
 }
 
@@ -975,12 +978,14 @@ void AsyncHwElement::run_push_async(PipelineBuffer &&buffer, const PipelinePad &
     assert(contains(m_sink_name_to_index, sink.name()));
 
     m_barrier->arrive_and_wait();
+    std::unique_lock<std::mutex> lock(m_mutex);
     if (HAILO_SUCCESS == m_pipeline_status->load()) {
-        std::unique_lock<std::mutex> lock(m_mutex);
         m_input_buffers[sink.name()] = std::move(buffer);
         if (m_input_buffers.size() == m_sink_name_to_index.size()) { // Last sink to set its buffer
             action();
         }
+    } else {
+        m_input_buffers.clear();
     }
 }
 
diff --git a/hailort/libhailort/src/net_flow/pipeline/pipeline.cpp b/hailort/libhailort/src/net_flow/pipeline/pipeline.cpp
index ee309a57..b24bbd5e 100644
--- a/hailort/libhailort/src/net_flow/pipeline/pipeline.cpp
+++ b/hailort/libhailort/src/net_flow/pipeline/pipeline.cpp
@@ -898,7 +898,7 @@ hailo_status PipelineElement::empty_buffer_pool(BufferPoolPtr pool, hailo_status
         auto acquired_buffer = pool->acquire_buffer(timeout, true);
 
         if (HAILO_SUCCESS != acquired_buffer.status()) {
-            LOGGER__CRITICAL("Failed to aquire from pool in {} element!", name());
+            LOGGER__CRITICAL("Failed to acquire from pool in {} element!", name());
             return acquired_buffer.status();
         }
 
diff --git a/hailort/libhailort/src/net_flow/pipeline/pipeline.hpp b/hailort/libhailort/src/net_flow/pipeline/pipeline.hpp
index 1f6e1f19..6f20fc61 100644
--- a/hailort/libhailort/src/net_flow/pipeline/pipeline.hpp
+++ b/hailort/libhailort/src/net_flow/pipeline/pipeline.hpp
@@ -17,7 +17,7 @@
 #include "hailo/dma_mapped_buffer.hpp"
 #include "net_flow/ops/nms_post_process.hpp"
 #include "hailo/network_group.hpp"
-#include "utils/thread_safe_queue.hpp"
+#include "common/thread_safe_queue.hpp"
 
 #include <memory>
 #include <thread>
diff --git a/hailort/libhailort/src/net_flow/pipeline/queue_elements.cpp b/hailort/libhailort/src/net_flow/pipeline/queue_elements.cpp
index 5c89b60a..55519ddc 100644
--- a/hailort/libhailort/src/net_flow/pipeline/queue_elements.cpp
+++ b/hailort/libhailort/src/net_flow/pipeline/queue_elements.cpp
@@ -140,6 +140,12 @@ hailo_status BaseQueueElement::execute_post_deactivate(bool should_clear_abort)
     return PipelineElementInternal::execute_post_deactivate(should_clear_abort);
 }
 
+hailo_status BaseQueueElement::clear_queue()
+{
+    std::unique_lock<std::mutex> lock(m_dequeue_mutex);
+    return m_queue.clear();
+}
+
 hailo_status BaseQueueElement::execute_clear()
 {
     auto status = PipelineElementInternal::execute_clear();
@@ -147,7 +153,7 @@ hailo_status BaseQueueElement::execute_clear()
         LOGGER__ERROR("Failed to clear() in {} with status {}", name(), status);
     }
 
-    auto queue_clear_status = m_queue.clear();
+    auto queue_clear_status = clear_queue();
     if (HAILO_SUCCESS != queue_clear_status) {
         LOGGER__ERROR("Failed to clear() in {} with status {}", name(), queue_clear_status);
         status = queue_clear_status;
@@ -295,6 +301,7 @@ hailo_status PushQueueElement::run_push(PipelineBuffer &&buffer, const PipelineP
     if (nullptr != m_queue_size_accumulator) {
         m_queue_size_accumulator->add_data_point(static_cast<double>(m_queue.size_approx()));
     }
+
     status = m_queue.enqueue(std::move(buffer), m_timeout);
     if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) {
         auto queue_thread_status = pipeline_status();
@@ -485,15 +492,24 @@ hailo_status AsyncPushQueueElement::run_push(PipelineBuffer &&/*buffer*/, const
 
 hailo_status AsyncPushQueueElement::run_in_thread()
 {
-    auto buffer = m_queue.dequeue(INIFINITE_TIMEOUT());
-    auto buffer_status = buffer.status();
+    PipelineBuffer buffer;
+    hailo_status buffer_status = HAILO_UNINITIALIZED;
+    {
+        std::unique_lock<std::mutex> lock(m_dequeue_mutex);
+        auto buffer_exp = m_queue.dequeue(INIFINITE_TIMEOUT());
+        buffer_status = buffer_exp.status();
+        if (HAILO_SUCCESS == buffer_status) {
+            buffer = buffer_exp.release();
+        }
+    }
+
     switch (buffer_status) {
     case HAILO_SHUTDOWN_EVENT_SIGNALED:
         break;
 
     case HAILO_SUCCESS:
         // Return if deactivated
-        if (PipelineBuffer::Type::DEACTIVATE == buffer->get_type()) {
+        if (PipelineBuffer::Type::DEACTIVATE == buffer.get_type()) {
             hailo_status status = m_shutdown_event->signal();
             CHECK_SUCCESS(status);
 
@@ -505,7 +521,7 @@ hailo_status AsyncPushQueueElement::run_in_thread()
             return HAILO_SHUTDOWN_EVENT_SIGNALED;
         }
 
-        next_pad().run_push_async(buffer.release());
+        next_pad().run_push_async(std::move(buffer));
         break;
 
     default:
@@ -561,7 +577,8 @@ hailo_status AsyncPushQueueElement::execute_terminate(hailo_status error_status)
 hailo_status AsyncPushQueueElement::execute_dequeue_user_buffers(hailo_status error_status)
 {
     auto dequeue_status = PipelineElement::execute_dequeue_user_buffers(error_status);
-    auto clear_queues_status = m_queue.clear();
+
+    auto clear_queues_status = clear_queue();
     auto empty_pool_status = empty_buffer_pool(m_pool, error_status, m_timeout);
 
     CHECK_SUCCESS(dequeue_status);
@@ -816,10 +833,12 @@ hailo_status UserBufferQueueElement::run_in_thread()
     auto buffer = next_pad().run_pull(optional.release());
     if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) {
         LOGGER__INFO("Shutdown event was signaled in run_pull of {}!", name());
+
         return HAILO_SHUTDOWN_EVENT_SIGNALED;
     }
     if (HAILO_STREAM_ABORT == buffer.status()) {
         LOGGER__INFO("run_pull of {} was aborted!", name());
+
         return HAILO_STREAM_ABORT;
     }
     CHECK_EXPECTED_AS_STATUS(buffer);
@@ -839,4 +858,22 @@ std::vector<AccumulatorPtr> UserBufferQueueElement::get_queue_size_accumulators(
     return std::vector<AccumulatorPtr>(); // Since this element is sync, queue state will always be 0
 }
 
+hailo_status PullQueueElement::execute_abort()
+{
+    m_pipeline_status->store(HAILO_STREAM_ABORT);
+
+    // Signal shutdown-event to make run_in_thread finish execution
+    auto status = m_shutdown_event->signal();
+    CHECK_SUCCESS(status);
+
+    status = PipelineElementInternal::execute_abort();
+    CHECK_SUCCESS(status);
+
+    // Wait to confirm the thread running 'run_in_thread' finished execution and the abort flow finished
+    status = m_deactivation_event.wait(std::chrono::milliseconds(HAILO_DEFAULT_VSTREAM_TIMEOUT_MS));
+    CHECK_SUCCESS(status, "Failed to confirm abortion of {}", name());
+
+    return HAILO_SUCCESS;
+}
+
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/net_flow/pipeline/queue_elements.hpp b/hailort/libhailort/src/net_flow/pipeline/queue_elements.hpp
index 77135879..661362da 100644
--- a/hailort/libhailort/src/net_flow/pipeline/queue_elements.hpp
+++ b/hailort/libhailort/src/net_flow/pipeline/queue_elements.hpp
@@ -58,6 +58,9 @@ class BaseQueueElement : public IntermediateElement
     virtual hailo_status run_in_thread() = 0;
     virtual std::string thread_name() = 0;
 
+    hailo_status clear_queue();
+
+    std::mutex m_dequeue_mutex;
     SpscQueue<PipelineBuffer> m_queue;
     EventPtr m_shutdown_event;
     std::chrono::milliseconds m_timeout;
@@ -147,6 +150,8 @@ class PullQueueElement : public BaseQueueElement
     virtual hailo_status execute_deactivate() override;
     virtual hailo_status run_in_thread() override;
     virtual std::string thread_name() override { return "PULL_QUEUE"; };
+    virtual hailo_status execute_abort() override;
+
 };
 
 class UserBufferQueueElement : public PullQueueElement
diff --git a/hailort/libhailort/src/net_flow/pipeline/vstream.cpp b/hailort/libhailort/src/net_flow/pipeline/vstream.cpp
index 500ed3b4..e6e54bb7 100644
--- a/hailort/libhailort/src/net_flow/pipeline/vstream.cpp
+++ b/hailort/libhailort/src/net_flow/pipeline/vstream.cpp
@@ -163,14 +163,7 @@ hailo_status BaseVStream::stop_vstream()
 
 hailo_status BaseVStream::stop_and_clear()
 {
-    auto status = HAILO_SUCCESS;
-    if (nullptr != m_core_op_activated_event) {
-        status = m_core_op_activated_event->wait(std::chrono::milliseconds(0));
-        CHECK(HAILO_TIMEOUT == status, HAILO_INVALID_OPERATION,
-            "Trying to clear {} vstream before its network group is deactivated", name());
-    }
-
-    status = stop_vstream();
+    auto status = stop_vstream();
     CHECK_SUCCESS(status);
 
     status = m_entry_element->clear();
@@ -490,7 +483,14 @@ Expected<OutputVStream> OutputVStream::create(
 
 hailo_status OutputVStream::read(MemoryView buffer)
 {
-    return m_vstream->read(std::move(buffer));
+    auto status = m_vstream->read(std::move(buffer));
+    if (HAILO_TIMEOUT == status) {
+        auto clear_status = m_vstream->clear();
+        if (HAILO_SUCCESS != clear_status) {
+            LOGGER__ERROR("Failed to clear output pipeline '{}' after a timeout. This pipeline is not usable and should be re-created.", name());
+        }
+    }
+    return status;
 }
 
 hailo_status OutputVStream::clear(std::vector<OutputVStream> &vstreams)
@@ -827,7 +827,7 @@ bool InputVStreamImpl::is_multi_planar() const
 }
 
 #ifdef HAILO_SUPPORT_MULTI_PROCESS
-Expected<std::shared_ptr<InputVStreamClient>> InputVStreamClient::create(VStreamIdentifier &&identifier)
+Expected<std::shared_ptr<InputVStreamClient>> InputVStreamClient::create(VStreamIdentifier &&identifier, const std::chrono::milliseconds &timeout)
 {
     grpc::ChannelArguments ch_args;
     ch_args.SetMaxReceiveMessageSize(-1);
@@ -844,12 +844,12 @@ Expected<std::shared_ptr<InputVStreamClient>> InputVStreamClient::create(VStream
     CHECK_EXPECTED(vstream_info);
 
     return std::shared_ptr<InputVStreamClient>(new InputVStreamClient(std::move(client), std::move(identifier),
-        user_buffer_format.release(), vstream_info.release()));
+        user_buffer_format.release(), vstream_info.release(), timeout));
 }
 
 InputVStreamClient::InputVStreamClient(std::unique_ptr<HailoRtRpcClient> client, VStreamIdentifier &&identifier, hailo_format_t &&user_buffer_format,
-    hailo_vstream_info_t &&info) :
-        m_client(std::move(client)), m_identifier(std::move(identifier)), m_user_buffer_format(user_buffer_format), m_info(info) {}
+    hailo_vstream_info_t &&info, const std::chrono::milliseconds &timeout) :
+        m_client(std::move(client)), m_identifier(std::move(identifier)), m_user_buffer_format(user_buffer_format), m_info(info), m_timeout(timeout) {}
 
 InputVStreamClient::~InputVStreamClient()
 {
@@ -861,12 +861,12 @@ InputVStreamClient::~InputVStreamClient()
 
 hailo_status InputVStreamClient::write(const MemoryView &buffer)
 {
-    return m_client->InputVStream_write(m_identifier, buffer);
+    return m_client->InputVStream_write(m_identifier, buffer, m_timeout);
 }
 
 hailo_status InputVStreamClient::write(const hailo_pix_buffer_t &buffer)
 {
-    return m_client->InputVStream_write(m_identifier, buffer);
+    return m_client->InputVStream_write(m_identifier, buffer, m_timeout);
 }
 
 hailo_status InputVStreamClient::flush()
@@ -1053,6 +1053,13 @@ OutputVStreamInternal::OutputVStreamInternal(const hailo_vstream_info_t &vstream
     std::reverse(m_pipeline.begin(), m_pipeline.end());
 }
 
+hailo_status OutputVStreamInternal::clear()
+{
+    CHECK_SUCCESS(stop_and_clear());
+    CHECK_SUCCESS(start_vstream());
+    return HAILO_SUCCESS;
+}
+
 Expected<std::shared_ptr<OutputVStreamImpl>> OutputVStreamImpl::create(const hailo_vstream_info_t &vstream_info,
     const std::vector<hailo_quant_info_t> &quant_infos, const hailo_vstream_params_t &vstream_params,
     std::shared_ptr<PipelineElement> pipeline_entry, std::vector<std::shared_ptr<PipelineElement>> &&pipeline,
@@ -1215,7 +1222,7 @@ hailo_status OutputVStreamImpl::set_nms_max_accumulated_mask_size(uint32_t max_a
 }
 
 #ifdef HAILO_SUPPORT_MULTI_PROCESS
-Expected<std::shared_ptr<OutputVStreamClient>> OutputVStreamClient::create(const VStreamIdentifier &&identifier)
+Expected<std::shared_ptr<OutputVStreamClient>> OutputVStreamClient::create(const VStreamIdentifier &&identifier, const std::chrono::milliseconds &timeout)
 {
     grpc::ChannelArguments ch_args;
     ch_args.SetMaxReceiveMessageSize(-1);
@@ -1232,12 +1239,12 @@ Expected<std::shared_ptr<OutputVStreamClient>> OutputVStreamClient::create(const
     CHECK_EXPECTED(info);
 
     return std::shared_ptr<OutputVStreamClient>(new OutputVStreamClient(std::move(client), std::move(identifier),
-        user_buffer_format.release(), info.release()));
+        user_buffer_format.release(), info.release(), timeout));
 }
 
 OutputVStreamClient::OutputVStreamClient(std::unique_ptr<HailoRtRpcClient> client, const VStreamIdentifier &&identifier, hailo_format_t &&user_buffer_format,
-    hailo_vstream_info_t &&info) :
-        m_client(std::move(client)), m_identifier(std::move(identifier)), m_user_buffer_format(user_buffer_format), m_info(info) {}
+    hailo_vstream_info_t &&info, const std::chrono::milliseconds &timeout) :
+        m_client(std::move(client)), m_identifier(std::move(identifier)), m_user_buffer_format(user_buffer_format), m_info(info), m_timeout(timeout) {}
 
 OutputVStreamClient::~OutputVStreamClient()
 {
@@ -1249,7 +1256,7 @@ OutputVStreamClient::~OutputVStreamClient()
 
 hailo_status OutputVStreamClient::read(MemoryView buffer)
 {
-    return m_client->OutputVStream_read(m_identifier, buffer);
+    return m_client->OutputVStream_read(m_identifier, buffer, m_timeout);
 }
 
 hailo_status OutputVStreamClient::abort()
diff --git a/hailort/libhailort/src/net_flow/pipeline/vstream_builder.cpp b/hailort/libhailort/src/net_flow/pipeline/vstream_builder.cpp
index e8b4e74e..b8fccaa9 100644
--- a/hailort/libhailort/src/net_flow/pipeline/vstream_builder.cpp
+++ b/hailort/libhailort/src/net_flow/pipeline/vstream_builder.cpp
@@ -120,11 +120,6 @@ Expected<std::vector<OutputVStream>> VStreamsBuilderUtils::create_outputs(std::s
     std::vector<std::shared_ptr<PipelineElement>> elements;
     std::vector<OutputVStream> vstreams;
 
-    if (0 != (HAILO_FORMAT_FLAGS_HOST_ARGMAX & output_stream->get_info().format.flags))
-    {
-        LOGGER__WARNING("Using legacy implementation of Argmax in host. Please re-compile your model with latest DFC version");
-    }
-
     EventPtr core_op_activated_event = nullptr;
     if (!output_stream->is_scheduled()) {
         core_op_activated_event = output_stream->get_core_op_activated_event();
diff --git a/hailort/libhailort/src/net_flow/pipeline/vstream_internal.hpp b/hailort/libhailort/src/net_flow/pipeline/vstream_internal.hpp
index 779ced76..57b33b69 100644
--- a/hailort/libhailort/src/net_flow/pipeline/vstream_internal.hpp
+++ b/hailort/libhailort/src/net_flow/pipeline/vstream_internal.hpp
@@ -147,6 +147,7 @@ class OutputVStreamInternal : public BaseVStream
     OutputVStreamInternal &operator=(OutputVStreamInternal &&other) noexcept = default;
     virtual ~OutputVStreamInternal() = default;
 
+    hailo_status clear();
 
     virtual hailo_status read(MemoryView buffer) = 0;
     virtual std::string get_pipeline_description() const override;
@@ -224,7 +225,7 @@ class OutputVStreamImpl : public OutputVStreamInternal
 class InputVStreamClient : public InputVStreamInternal
 {
 public:
-    static Expected<std::shared_ptr<InputVStreamClient>> create(VStreamIdentifier &&identifier);
+    static Expected<std::shared_ptr<InputVStreamClient>> create(VStreamIdentifier &&identifier, const std::chrono::milliseconds &timeout);
     InputVStreamClient(InputVStreamClient &&) noexcept = default;
     InputVStreamClient(const InputVStreamClient &) = delete;
     InputVStreamClient &operator=(InputVStreamClient &&) noexcept = default;
@@ -257,19 +258,20 @@ class InputVStreamClient : public InputVStreamInternal
 
 private:
     InputVStreamClient(std::unique_ptr<HailoRtRpcClient> client, VStreamIdentifier &&identifier, hailo_format_t &&user_buffer_format,
-        hailo_vstream_info_t &&info);
+        hailo_vstream_info_t &&info, const std::chrono::milliseconds &timeout);
     hailo_status create_client();
 
     std::unique_ptr<HailoRtRpcClient> m_client;
     VStreamIdentifier m_identifier;
     hailo_format_t m_user_buffer_format;
     hailo_vstream_info_t m_info;
+    const std::chrono::milliseconds m_timeout;
 };
 
 class OutputVStreamClient : public OutputVStreamInternal
 {
 public:
-    static Expected<std::shared_ptr<OutputVStreamClient>> create(const VStreamIdentifier &&identifier);
+    static Expected<std::shared_ptr<OutputVStreamClient>> create(const VStreamIdentifier &&identifier, const std::chrono::milliseconds &timeout);
     OutputVStreamClient(OutputVStreamClient &&) noexcept = default;
     OutputVStreamClient(const OutputVStreamClient &) = delete;
     OutputVStreamClient &operator=(OutputVStreamClient &&) noexcept = default;
@@ -304,7 +306,7 @@ class OutputVStreamClient : public OutputVStreamInternal
 
 private:
     OutputVStreamClient(std::unique_ptr<HailoRtRpcClient> client, const VStreamIdentifier &&identifier, hailo_format_t &&user_buffer_format,
-        hailo_vstream_info_t &&info);
+        hailo_vstream_info_t &&info, const std::chrono::milliseconds &timeout);
 
     hailo_status create_client();
 
@@ -312,6 +314,7 @@ class OutputVStreamClient : public OutputVStreamInternal
     VStreamIdentifier m_identifier;
     hailo_format_t m_user_buffer_format;
     hailo_vstream_info_t m_info;
+    const std::chrono::milliseconds m_timeout;
 };
 #endif // HAILO_SUPPORT_MULTI_PROCESS
 
diff --git a/hailort/libhailort/src/network_group/CMakeLists.txt b/hailort/libhailort/src/network_group/CMakeLists.txt
index beec975b..88481fa8 100644
--- a/hailort/libhailort/src/network_group/CMakeLists.txt
+++ b/hailort/libhailort/src/network_group/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 set(SRC_FILES
     ${CMAKE_CURRENT_SOURCE_DIR}/network_group.cpp
diff --git a/hailort/libhailort/src/network_group/network_group.cpp b/hailort/libhailort/src/network_group/network_group.cpp
index bc3de09b..f260925c 100644
--- a/hailort/libhailort/src/network_group/network_group.cpp
+++ b/hailort/libhailort/src/network_group/network_group.cpp
@@ -27,6 +27,10 @@
 #include "utils/buffer_storage.hpp"
 #include "hef/hef_internal.hpp"
 
+#ifdef HAILO_SUPPORT_MULTI_PROCESS
+#include "service/network_group_client.hpp"
+#endif // HAILO_SUPPORT_MULTI_PROCESS
+
 namespace hailort
 {
 
@@ -819,7 +823,7 @@ hailo_status ConfiguredNetworkGroupBase::infer_async(const NamedBuffersCallbacks
             const auto &dma_buffer = named_buffer_callback.second.first.dma_buffer;
             infer_request.transfers.emplace(name, TransferRequest{dma_buffer, callback});
         } else {
-            LOGGER__ERROR("infer_async does not support buffers with type {}", named_buffer_callback.second.first.buffer_type);
+            LOGGER__ERROR("infer_async does not support buffers with type {}", static_cast<int>(named_buffer_callback.second.first.buffer_type));
             return HAILO_INVALID_ARGUMENT;
         }
     }
@@ -889,4 +893,28 @@ hailo_status ConfiguredNetworkGroupBase::update_cache_offset(int32_t offset_delt
     return m_core_ops[0]->update_cache_offset(offset_delta_bytes);
 }
 
+Expected<std::vector<uint32_t>> ConfiguredNetworkGroupBase::get_cache_ids() const
+{
+    CHECK(m_core_ops.size() == 1, HAILO_INVALID_OPERATION,
+        "get_cache_ids() is not supported for multi core-op network groups");
+
+    return m_core_ops[0]->get_cache_ids();
+}
+
+Expected<Buffer> ConfiguredNetworkGroupBase::read_cache_buffer(uint32_t cache_id)
+{
+    CHECK(m_core_ops.size() == 1, HAILO_INVALID_OPERATION,
+        "read_cache_buffer() is not supported for multi core-op network groups");
+
+    return m_core_ops[0]->read_cache_buffer(cache_id);
+}
+
+hailo_status ConfiguredNetworkGroupBase::write_cache_buffer(uint32_t cache_id, MemoryView buffer)
+{
+    CHECK(m_core_ops.size() == 1, HAILO_INVALID_OPERATION,
+        "write_cache_buffer() is not supported for multi core-op network groups");
+
+    return m_core_ops[0]->write_cache_buffer(cache_id, buffer);
+}
+
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/network_group/network_group_internal.hpp b/hailort/libhailort/src/network_group/network_group_internal.hpp
index e037912a..8507dfd4 100644
--- a/hailort/libhailort/src/network_group/network_group_internal.hpp
+++ b/hailort/libhailort/src/network_group/network_group_internal.hpp
@@ -31,26 +31,18 @@
 #include "hailo/network_group.hpp"
 
 #include "common/latency_meter.hpp"
+#include "common/internal_env_vars.hpp"
 
 #include "core_op/active_core_op_holder.hpp"
 #include "core_op/core_op.hpp"
 
 #include "net_flow/ops_metadata/nms_op_metadata.hpp"
 
-#ifdef HAILO_SUPPORT_MULTI_PROCESS
-#include "service/hailort_rpc_client.hpp"
-#include "rpc/rpc_definitions.hpp"
-#endif // HAILO_SUPPORT_MULTI_PROCESS
-
-
 namespace hailort
 {
 using stream_name_t = std::string;
 using op_name_t = std::string;
 
-#define HAILORT_AUTO_UPDATE_CACHE_OFFSET_ENV_VAR "HAILORT_AUTO_UPDATE_CACHE_OFFSET"
-#define HAILORT_AUTO_UPDATE_CACHE_OFFSET_ENV_VAR_DEFAULT "default"
-#define HAILORT_AUTO_UPDATE_CACHE_OFFSET_ENV_VAR_DISABLED "disabled"
 
 class ConfiguredNetworkGroupBase : public ConfiguredNetworkGroup
 {
@@ -207,6 +199,9 @@ class ConfiguredNetworkGroupBase : public ConfiguredNetworkGroup
     virtual hailo_status init_cache(uint32_t read_offset, int32_t write_offset_delta) override;
     virtual Expected<hailo_cache_info_t> get_cache_info() const override;
     virtual hailo_status update_cache_offset(int32_t offset_delta_bytes) override;
+    virtual Expected<std::vector<uint32_t>> get_cache_ids() const override;
+    virtual Expected<Buffer> read_cache_buffer(uint32_t cache_id) override;
+    virtual hailo_status write_cache_buffer(uint32_t cache_id, MemoryView buffer) override;
 
 private:
     ConfiguredNetworkGroupBase(const ConfigureNetworkParams &config_params,
@@ -233,135 +228,6 @@ class ConfiguredNetworkGroupBase : public ConfiguredNetworkGroup
     friend class AsyncPipelineBuilder;
 };
 
-// Move client ng to different header
-#ifdef HAILO_SUPPORT_MULTI_PROCESS
-using NamedBufferCallbackTuple = std::tuple<std::string, MemoryView, std::function<void(hailo_status)>>;
-using NamedBufferCallbackTuplePtr = std::shared_ptr<std::tuple<std::string, MemoryView, std::function<void(hailo_status)>>>;
-
-class ConfiguredNetworkGroupClient : public ConfiguredNetworkGroup
-{
-public:
-    ConfiguredNetworkGroupClient(std::unique_ptr<HailoRtRpcClient> client, NetworkGroupIdentifier &&identifier);
-
-    virtual ~ConfiguredNetworkGroupClient();
-    ConfiguredNetworkGroupClient(const ConfiguredNetworkGroupClient &other) = delete;
-    ConfiguredNetworkGroupClient &operator=(const ConfiguredNetworkGroupClient &other) = delete;
-    ConfiguredNetworkGroupClient &operator=(ConfiguredNetworkGroupClient &&other) = delete;
-    ConfiguredNetworkGroupClient(ConfiguredNetworkGroupClient &&other) = delete;
-
-    virtual const std::string &get_network_group_name() const override;
-    virtual const std::string &name() const override;
-    virtual Expected<hailo_stream_interface_t> get_default_streams_interface() override;
-    virtual std::vector<std::reference_wrapper<InputStream>> get_input_streams_by_interface(hailo_stream_interface_t stream_interface) override;
-    virtual std::vector<std::reference_wrapper<OutputStream>> get_output_streams_by_interface(hailo_stream_interface_t stream_interface) override;
-    virtual ExpectedRef<InputStream> get_input_stream_by_name(const std::string &name) override;
-    virtual ExpectedRef<OutputStream> get_output_stream_by_name(const std::string &name) override;
-    virtual Expected<InputStreamRefVector> get_input_streams_by_network(const std::string &network_name="") override;
-    virtual Expected<OutputStreamRefVector> get_output_streams_by_network(const std::string &network_name="") override;
-    virtual InputStreamRefVector get_input_streams() override;
-    virtual OutputStreamRefVector get_output_streams() override;
-    virtual Expected<OutputStreamWithParamsVector> get_output_streams_from_vstream_names(
-        const std::map<std::string, hailo_vstream_params_t> &outputs_params) override;
-
-    virtual Expected<LatencyMeasurementResult> get_latency_measurement(const std::string &network_name="") override;
-    virtual Expected<std::unique_ptr<ActivatedNetworkGroup>> activate(const hailo_activate_network_group_params_t &network_group_params) override;
-    virtual hailo_status wait_for_activation(const std::chrono::milliseconds &timeout) override;
-    virtual hailo_status shutdown() override;
-
-    virtual Expected<std::map<std::string, hailo_vstream_params_t>> make_input_vstream_params(
-        bool unused, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size,
-        const std::string &network_name="") override;
-    virtual Expected<std::map<std::string, hailo_vstream_params_t>> make_output_vstream_params(
-        bool unused, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size,
-        const std::string &network_name="") override;
-    virtual Expected<std::vector<std::map<std::string, hailo_vstream_params_t>>> make_output_vstream_params_groups(
-        bool unused, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size) override;
-    virtual Expected<std::vector<std::vector<std::string>>> get_output_vstream_groups() override;
-
-    virtual Expected<std::vector<hailo_stream_info_t>> get_all_stream_infos(const std::string &network_name="") const override;
-    virtual Expected<std::vector<hailo_network_info_t>> get_network_infos() const override;
-    virtual Expected<std::vector<hailo_vstream_info_t>> get_input_vstream_infos(const std::string &network_name="") const override;
-    virtual Expected<std::vector<hailo_vstream_info_t>> get_output_vstream_infos(const std::string &network_name="") const override;
-    virtual Expected<std::vector<hailo_vstream_info_t>> get_all_vstream_infos(const std::string &network_name="") const override;
-
-    virtual bool is_scheduled() const override;
-    virtual hailo_status set_scheduler_timeout(const std::chrono::milliseconds &timeout, const std::string &network_name) override;
-    virtual hailo_status set_scheduler_threshold(uint32_t threshold, const std::string &network_name) override;
-    virtual hailo_status set_scheduler_priority(uint8_t priority, const std::string &network_name) override;
-
-    virtual AccumulatorPtr get_activation_time_accumulator() const override;
-    virtual AccumulatorPtr get_deactivation_time_accumulator() const override;
-
-    virtual bool is_multi_context() const override;
-    virtual const ConfigureNetworkParams get_config_params() const override;
-
-    virtual Expected<std::vector<std::string>> get_sorted_output_names() override;
-    virtual Expected<std::vector<std::string>> get_stream_names_from_vstream_name(const std::string &vstream_name) override;
-    virtual Expected<std::vector<std::string>> get_vstream_names_from_stream_name(const std::string &stream_name) override;
-
-    virtual Expected<HwInferResults> run_hw_infer_estimator() override;
-
-    virtual Expected<std::vector<InputVStream>> create_input_vstreams(const std::map<std::string, hailo_vstream_params_t> &inputs_params);
-    virtual Expected<std::vector<OutputVStream>> create_output_vstreams(const std::map<std::string, hailo_vstream_params_t> &outputs_params);
-    virtual Expected<size_t> get_min_buffer_pool_size() override;
-
-    virtual hailo_status before_fork() override;
-    virtual hailo_status after_fork_in_parent() override;
-    virtual hailo_status after_fork_in_child() override;
-
-    virtual Expected<uint32_t> get_client_handle() const override
-    {
-        auto val = m_identifier.m_network_group_handle;
-        return val;
-    };
-
-    virtual Expected<uint32_t> get_vdevice_client_handle() const override
-    {
-        auto val = m_identifier.m_vdevice_identifier.m_vdevice_handle;
-        return val;
-    };
-
-    static Expected<std::shared_ptr<ConfiguredNetworkGroupClient>> duplicate_network_group_client(uint32_t handle, uint32_t vdevice_handle,
-        const std::string &network_group_name);
-
-    virtual hailo_status infer_async(const NamedBuffersCallbacks &named_buffers_callbacks,
-        const std::function<void(hailo_status)> &infer_request_done_cb) override;
-    hailo_status execute_callback(const ProtoCallbackIdentifier &cb_id);
-
-    void execute_callbacks_on_error(hailo_status error_status);
-
-    virtual Expected<std::unique_ptr<LayerInfo>> get_layer_info(const std::string &stream_name) override;
-    virtual Expected<std::vector<net_flow::PostProcessOpMetadataPtr>> get_ops_metadata() override;
-
-    virtual hailo_status set_nms_score_threshold(const std::string &edge_name, float32_t nms_score_threshold) override;
-    virtual hailo_status set_nms_iou_threshold(const std::string &edge_name, float32_t iou_threshold) override;
-    virtual hailo_status set_nms_max_bboxes_per_class(const std::string &edge_name, uint32_t max_bboxes_per_class) override;
-    virtual hailo_status set_nms_max_accumulated_mask_size(const std::string &edge_name, uint32_t max_accumulated_mask_size) override;
-
-    virtual hailo_status init_cache(uint32_t read_offset, int32_t write_offset_delta) override;
-    virtual Expected<hailo_cache_info_t> get_cache_info() const override;
-    virtual hailo_status update_cache_offset(int32_t offset_delta_bytes) override;
-
-private:
-    ConfiguredNetworkGroupClient(NetworkGroupIdentifier &&identifier, const std::string &network_group_name);
-    hailo_status create_client();
-    hailo_status dup_handle();
-    callback_idx_t get_unique_callback_idx();
-    hailo_status execute_infer_request_callback(const ProtoCallbackIdentifier &cb_id);
-    hailo_status execute_transfer_callback(const ProtoCallbackIdentifier &cb_id);
-
-    std::unique_ptr<HailoRtRpcClient> m_client;
-    NetworkGroupIdentifier m_identifier;
-    std::string m_network_group_name;
-    std::atomic<callback_idx_t> m_current_cb_index;
-    std::unordered_set<std::string> m_input_streams_names;
-    std::unordered_set<std::string> m_output_streams_names;
-    std::mutex m_mutex;
-    std::unordered_map<callback_idx_t, NamedBufferCallbackTuplePtr> m_idx_to_callbacks;
-    std::unordered_map<callback_idx_t, std::function<void(hailo_status)>> m_infer_request_idx_to_callbacks;
-};
-#endif // HAILO_SUPPORT_MULTI_PROCESS
-
 } /* namespace hailort */
 
 #endif /* _HAILO_NETWORK_GROUP_INTERNAL_HPP_ */
diff --git a/hailort/libhailort/src/os/CMakeLists.txt b/hailort/libhailort/src/os/CMakeLists.txt
index 8e8273cd..862dee36 100644
--- a/hailort/libhailort/src/os/CMakeLists.txt
+++ b/hailort/libhailort/src/os/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 if(WIN32)
     set(HAILO_OS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/windows")
diff --git a/hailort/libhailort/src/os/posix/CMakeLists.txt b/hailort/libhailort/src/os/posix/CMakeLists.txt
index b8fd6eea..feccad3e 100644
--- a/hailort/libhailort/src/os/posix/CMakeLists.txt
+++ b/hailort/libhailort/src/os/posix/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 if (CMAKE_SYSTEM_NAME STREQUAL QNX)
     add_subdirectory(qnx)
@@ -8,8 +8,6 @@ endif()
 
 set(files
     ${CMAKE_CURRENT_SOURCE_DIR}/microsec_timer.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/file_descriptor.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/mmap_buffer.cpp
 )
 
 set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${files} PARENT_SCOPE)
diff --git a/hailort/libhailort/src/os/posix/linux/CMakeLists.txt b/hailort/libhailort/src/os/posix/linux/CMakeLists.txt
index 779505b3..aafa9b04 100644
--- a/hailort/libhailort/src/os/posix/linux/CMakeLists.txt
+++ b/hailort/libhailort/src/os/posix/linux/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 set(files
     ${CMAKE_CURRENT_SOURCE_DIR}/event.cpp
diff --git a/hailort/libhailort/src/os/posix/linux/event.cpp b/hailort/libhailort/src/os/posix/linux/event.cpp
index b12d0d32..eae98be9 100644
--- a/hailort/libhailort/src/os/posix/linux/event.cpp
+++ b/hailort/libhailort/src/os/posix/linux/event.cpp
@@ -12,8 +12,10 @@
 
 #include "common/utils.hpp"
 #include "common/event_internal.hpp"
+#include "common/internal_env_vars.hpp"
 
 #include <sys/eventfd.h>
+#include <fcntl.h>
 #include <poll.h>
 #include <utility>
 
@@ -21,6 +23,23 @@
 namespace hailort
 {
 
+#define HIGH_FD_OFFSET (1024)
+
+bool should_use_high_fd()
+{
+    return is_env_variable_on(HAILO_USE_HIGH_FD_ENV_VAR);
+}
+
+int move_fd_to_higher(int handle)
+{
+    int new_handle = fcntl(handle, F_DUPFD, HIGH_FD_OFFSET);
+    if (-1 == new_handle) {
+        LOGGER__ERROR("failed to duplicate event FD. errno={}", errno);
+    }
+    close(handle);
+    return new_handle;
+}
+
 Waitable::~Waitable()
 {
     if (-1 != m_handle) {
@@ -167,7 +186,13 @@ underlying_waitable_handle_t Event::open_event_handle(const State& initial_state
     const auto handle = eventfd(state, NO_FLAGS);
     if (-1 == handle) {
         LOGGER__ERROR("Call to eventfd failed with errno={}", errno);
+        return handle;
+    }
+
+    if (should_use_high_fd()) {
+        return move_fd_to_higher(handle);
     }
+
     return handle;
 }
 
@@ -212,7 +237,13 @@ underlying_waitable_handle_t Semaphore::open_semaphore_handle(uint32_t initial_c
     const auto handle = eventfd(initial_count, SEMAPHORE);
     if (-1 == handle) {
         LOGGER__ERROR("Call to eventfd failed with errno={}", errno);
+        return handle;
     }
+
+    if (should_use_high_fd()) {
+        return move_fd_to_higher(handle);
+    }
+
     return handle;
 }
 
diff --git a/hailort/libhailort/src/os/posix/qnx/CMakeLists.txt b/hailort/libhailort/src/os/posix/qnx/CMakeLists.txt
index 779505b3..aafa9b04 100644
--- a/hailort/libhailort/src/os/posix/qnx/CMakeLists.txt
+++ b/hailort/libhailort/src/os/posix/qnx/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 set(files
     ${CMAKE_CURRENT_SOURCE_DIR}/event.cpp
diff --git a/hailort/libhailort/src/os/windows/CMakeLists.txt b/hailort/libhailort/src/os/windows/CMakeLists.txt
index c7ad1a20..b5fae217 100644
--- a/hailort/libhailort/src/os/windows/CMakeLists.txt
+++ b/hailort/libhailort/src/os/windows/CMakeLists.txt
@@ -1,12 +1,9 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 set(files
     ${CMAKE_CURRENT_SOURCE_DIR}/microsec_timer.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/file_descriptor.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/mmap_buffer.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/event.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/dma_buffer_utils.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/virtual_alloc_guard.cpp
 )
 
 set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${files} PARENT_SCOPE)
diff --git a/hailort/libhailort/src/os/windows/mmap_buffer.cpp b/hailort/libhailort/src/os/windows/mmap_buffer.cpp
deleted file mode 100644
index 95391e91..00000000
--- a/hailort/libhailort/src/os/windows/mmap_buffer.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
-/**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)
- **/
-/**
- * @file mmap_buffer.cpp
- * @brief Wrapper around windows memory mapping (mmap). Not implemented yet
- **/
-
-#include "os/mmap_buffer.hpp"
-
-namespace hailort
-{
-
-void * const MmapBufferImpl::INVALID_ADDR = NULL;
-
-Expected<MmapBufferImpl> MmapBufferImpl::create_shared_memory(size_t)
-{
-    LOGGER__ERROR("Creating shared memory is not implemented on windows");
-    return make_unexpected(HAILO_NOT_IMPLEMENTED);
-}
-
-Expected<MmapBufferImpl> MmapBufferImpl::create_file_map(size_t, FileDescriptor &, uintptr_t )
-{
-    LOGGER__ERROR("Creating file mapping is not implemented on windows");
-    return make_unexpected(HAILO_NOT_IMPLEMENTED);
-}
-
-hailo_status MmapBufferImpl::unmap()
-{
-    LOGGER__ERROR("Unmapping is not implemented on windows");
-    return HAILO_NOT_IMPLEMENTED;
-}
-
-} /* namespace hailort */
diff --git a/hailort/libhailort/src/rpc_callbacks/CMakeLists.txt b/hailort/libhailort/src/rpc_callbacks/CMakeLists.txt
new file mode 100644
index 00000000..7301b741
--- /dev/null
+++ b/hailort/libhailort/src/rpc_callbacks/CMakeLists.txt
@@ -0,0 +1,7 @@
+cmake_minimum_required(VERSION 3.5.0)
+
+set(SRC_FILES
+    ${CMAKE_CURRENT_SOURCE_DIR}/rpc_callbacks_dispatcher.cpp
+)
+
+set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${SRC_FILES} PARENT_SCOPE)
diff --git a/hailort/libhailort/src/rpc_callbacks/rpc_callbacks_dispatcher.cpp b/hailort/libhailort/src/rpc_callbacks/rpc_callbacks_dispatcher.cpp
new file mode 100644
index 00000000..2662e466
--- /dev/null
+++ b/hailort/libhailort/src/rpc_callbacks/rpc_callbacks_dispatcher.cpp
@@ -0,0 +1,120 @@
+/**
+ * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file rpc_callbacks_dispatcher.cpp
+ * @brief Implementation of the dispatcher and the callbacks queue
+ **/
+
+#include "rpc_callbacks_dispatcher.hpp"
+
+namespace hailort
+{
+
+AsyncInferJobHrpcClient::AsyncInferJobHrpcClient(EventPtr event) : m_event(event)
+{
+}
+
+hailo_status AsyncInferJobHrpcClient::wait(std::chrono::milliseconds timeout)
+{
+    return m_event->wait(timeout);
+}
+
+CallbacksQueue::CallbacksQueue(const std::vector<std::string> &outputs_names) : m_outputs_names(outputs_names)
+{
+    m_is_running = true;
+    m_callback_thread = std::thread([this] {
+        while (true) {
+            callback_id_t callback_id;
+            hailo_status info_status = HAILO_UNINITIALIZED;
+            std::function<void(const AsyncInferCompletionInfo&)> cb;
+            {
+                std::unique_lock<std::mutex> lock(m_mutex);
+                m_cv.wait(lock, [this] { return !m_is_running || !m_callbacks_queue.empty(); });
+                if (!m_is_running) {
+                    break;
+                }
+
+                callback_id = m_callbacks_queue.front();
+                m_callbacks_queue.pop();
+
+                m_cv.wait(lock, [this, callback_id] { return !m_is_running || (m_callbacks.find(callback_id) != m_callbacks.end()); });
+                if (!m_is_running) {
+                    break;
+                }
+
+                info_status = m_callbacks_status[callback_id];
+                cb = m_callbacks[callback_id];
+                m_callbacks.erase(callback_id);
+                m_callbacks_status.erase(callback_id);
+                m_bindings.erase(callback_id);
+            }
+            AsyncInferCompletionInfo info(info_status);
+            cb(info);
+        }
+    });
+}
+
+CallbacksQueue::~CallbacksQueue()
+{
+    {
+        std::unique_lock<std::mutex> lock(m_mutex);
+        m_is_running = false;
+    }
+    m_cv.notify_one();
+    m_callback_thread.join();
+}
+
+Expected<std::shared_ptr<AsyncInferJobHrpcClient>> CallbacksQueue::register_callback(callback_id_t id,
+    const ConfiguredInferModel::Bindings &bindings,
+    std::function<void(const AsyncInferCompletionInfo&)> callback)
+{
+    TRY(auto event_ptr, Event::create_shared(Event::State::not_signalled));
+
+    {
+        std::unique_lock<std::mutex> lock(m_mutex);
+        m_bindings[id] = bindings;
+        m_callbacks_status[id] = HAILO_SUCCESS;
+        m_callbacks[id] = [callback, event_ptr] (const AsyncInferCompletionInfo &info) {
+            auto status = event_ptr->signal();
+            if (HAILO_SUCCESS != status) {
+                LOGGER__CRITICAL("Could not signal event! status = {}", status);
+            }
+            callback(info);
+        };
+    }
+    m_cv.notify_one();
+
+    auto ptr = make_shared_nothrow<AsyncInferJobHrpcClient>(event_ptr);
+    CHECK_NOT_NULL(ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+    return ptr;
+}
+
+hailo_status CallbacksQueue::push_callback(hailo_status callback_status, rpc_object_handle_t callback_handle_id,
+    hrpc::RpcConnection connection)
+{
+    {
+        std::unique_lock<std::mutex> lock(m_mutex);
+        CHECK(contains(m_callbacks, callback_handle_id), HAILO_NOT_FOUND, "Callback handle (id={}) not found!", callback_handle_id);
+        m_callbacks_status[callback_handle_id] = callback_status;
+
+        if (HAILO_SUCCESS == callback_status) {
+            CHECK(contains(m_bindings, callback_handle_id), HAILO_NOT_FOUND, "Callback handle not found!");
+            for (const auto &output_name : m_outputs_names) {
+                TRY(auto buffer, m_bindings[callback_handle_id].output(output_name)->get_buffer());
+                auto status = connection.read_buffer(buffer);
+                // TODO: Errors here should be unrecoverable (HRT-14275)
+                CHECK_SUCCESS(status);
+            }
+        }
+        m_callbacks_queue.push(callback_handle_id);
+    }
+
+    m_cv.notify_one();
+    return HAILO_SUCCESS;
+}
+
+
+} // namespace hailort
diff --git a/hailort/libhailort/src/rpc_callbacks/rpc_callbacks_dispatcher.hpp b/hailort/libhailort/src/rpc_callbacks/rpc_callbacks_dispatcher.hpp
new file mode 100644
index 00000000..03b9ca1b
--- /dev/null
+++ b/hailort/libhailort/src/rpc_callbacks/rpc_callbacks_dispatcher.hpp
@@ -0,0 +1,81 @@
+/**
+ * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file rpc_callbacks_dispatcher.hpp
+ * @brief Dispatches callbacks to its specified destination (for each configured infer model).
+ **/
+
+#ifndef _HAILO_RPC_CALLBACKS_DISPATCHER_HPP_
+#define _HAILO_RPC_CALLBACKS_DISPATCHER_HPP_
+
+#include "hailo/infer_model.hpp"
+#include "net_flow/pipeline/infer_model_internal.hpp"
+#include "hrpc_protocol/serializer.hpp"
+
+namespace hailort
+{
+
+using callback_id_t = uint32_t;
+class CallbacksQueue;
+class CallbacksDispatcher
+{
+public:
+    void add(rpc_object_handle_t cim_handle, std::shared_ptr<CallbacksQueue> callbacks_queue)
+    {
+        m_callbacks_dispatcher[cim_handle] = callbacks_queue;
+    }
+
+    std::shared_ptr<CallbacksQueue> at(rpc_object_handle_t cim_handle)
+    {
+        return m_callbacks_dispatcher.at(cim_handle);
+    }
+
+private:
+    std::unordered_map<rpc_object_handle_t, std::shared_ptr<CallbacksQueue>> m_callbacks_dispatcher;
+};
+
+class AsyncInferJobHrpcClient : public AsyncInferJobBase
+{
+public:
+    AsyncInferJobHrpcClient(EventPtr event);
+
+    virtual hailo_status wait(std::chrono::milliseconds timeout) override;
+
+private:
+    EventPtr m_event;
+};
+
+class CallbacksQueue
+{
+public:
+    CallbacksQueue(const std::vector<std::string> &outputs_names);
+    ~CallbacksQueue();
+
+    CallbacksQueue(const CallbacksQueue &other) = delete;
+    CallbacksQueue& operator=(const CallbacksQueue &other) = delete;
+    CallbacksQueue(CallbacksQueue &&other) = delete;
+    CallbacksQueue& operator=(CallbacksQueue &&other) = delete;
+
+    Expected<std::shared_ptr<AsyncInferJobHrpcClient>> register_callback(callback_id_t id,
+        const ConfiguredInferModel::Bindings &bindings,
+        std::function<void(const AsyncInferCompletionInfo&)> callback);
+    hailo_status push_callback(hailo_status callback_status, rpc_object_handle_t callback_handle_id,
+        hrpc::RpcConnection connection);
+
+private:
+    const std::vector<std::string> m_outputs_names;
+    std::mutex m_mutex;
+    std::condition_variable m_cv;
+    std::queue<callback_id_t> m_callbacks_queue;
+    std::unordered_map<callback_id_t, std::function<void(const AsyncInferCompletionInfo&)>> m_callbacks;
+    std::atomic_bool m_is_running;
+    std::thread m_callback_thread;
+    std::unordered_map<callback_id_t, ConfiguredInferModel::Bindings> m_bindings;
+    std::unordered_map<callback_id_t, hailo_status> m_callbacks_status;
+};
+
+} /* namespace hailort */
+
+#endif /* _HAILO_RPC_CALLBACKS_DISPATCHER_HPP_ */
diff --git a/hailort/libhailort/src/service/CMakeLists.txt b/hailort/libhailort/src/service/CMakeLists.txt
index fd42ad34..b34292f2 100644
--- a/hailort/libhailort/src/service/CMakeLists.txt
+++ b/hailort/libhailort/src/service/CMakeLists.txt
@@ -1,6 +1,7 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 set(SRC_FILES
+    ${CMAKE_CURRENT_SOURCE_DIR}/buffer_pool_per_stream.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/hailort_rpc_client.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/network_group_client.cpp
 )
diff --git a/hailort/libhailort/src/service/buffer_pool_per_stream.cpp b/hailort/libhailort/src/service/buffer_pool_per_stream.cpp
new file mode 100644
index 00000000..963e4c03
--- /dev/null
+++ b/hailort/libhailort/src/service/buffer_pool_per_stream.cpp
@@ -0,0 +1,199 @@
+/**
+ * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file buffer_pool_per_stream.cpp
+ * @brief Buffer pool per stream implementation
+ **/
+
+#include "service/buffer_pool_per_stream.hpp"
+#include "hailo/hailort.h"
+#include "rpc/rpc_definitions.hpp"
+#include "common/shared_memory_buffer.hpp"
+
+#include <sstream>
+
+namespace hailort
+{
+
+std::string create_shm_name(const std::string &stream_name, const NetworkGroupIdentifier &identifier, size_t buffer_index)
+{
+    auto stream_shm_name =  SharedMemoryBuffer::get_valid_shm_name(stream_name);
+
+    std::ostringstream shm_name;
+    shm_name << stream_shm_name << SHARED_MEMORY_NAME_SEPERATOR << std::to_string(buffer_index)
+        << SHARED_MEMORY_NAME_SEPERATOR << std::to_string(identifier.m_vdevice_identifier.m_vdevice_handle)
+        << SHARED_MEMORY_NAME_SEPERATOR << std::to_string(identifier.m_network_group_handle);
+    return shm_name.str();
+}
+
+Expected<std::shared_ptr<BufferPoolPerStream>> BufferPoolPerStream::create()
+{
+    TRY(auto shutdown_event, Event::create_shared(Event::State::not_signalled));
+
+    auto buffer_pool_ptr = make_shared_nothrow<BufferPoolPerStream>(shutdown_event);
+    CHECK_NOT_NULL_AS_EXPECTED(buffer_pool_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+    return buffer_pool_ptr;
+}
+
+BufferPoolPerStream::BufferPoolPerStream(EventPtr shutdown_event) :
+    m_stream_name_to_buffer_pool(), m_shutdown_event(shutdown_event), m_mutex(), m_cv(), m_is_shutdown(false)
+{}
+
+Expected<BasicBufferPoolPtr> BufferPoolPerStream::create_stream_buffer_pool(const std::string &stream_name,
+    NetworkGroupIdentifier &identifier, size_t buffer_size, size_t buffer_count,
+    EventPtr shutdown_event)
+{
+    TRY(auto free_buffers_queue,
+        SpscQueue<BufferPtr>::create(buffer_count, shutdown_event, DEFAULT_TRANSFER_TIMEOUT));
+
+    std::vector<BufferPtr> buffers;
+    buffers.reserve(buffer_count);
+    for (size_t i = 0; i < buffer_count; i++) {
+        auto shm_name = create_shm_name(stream_name, identifier, i);
+        m_stream_name_to_shm_name[stream_name] = shm_name;
+        TRY(auto buffer, Buffer::create_shared(buffer_size, BufferStorageParams::create_shared_memory(shm_name)));
+
+        auto status = free_buffers_queue.enqueue(buffer);
+        CHECK_SUCCESS(status);
+
+        buffers.emplace_back(buffer);
+    }
+
+    auto buffer_pool_ptr = make_shared_nothrow<BasicBufferPool>(buffer_size, std::move(buffers),
+        std::move(free_buffers_queue), buffer_count);
+    CHECK_NOT_NULL_AS_EXPECTED(buffer_pool_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+    return buffer_pool_ptr;
+}
+
+hailo_status BufferPoolPerStream::allocate_pool(const std::string &stream_name, NetworkGroupIdentifier identifier,
+    size_t frame_size, size_t pool_size)
+{
+    TRY(auto buffer_pool, create_stream_buffer_pool(stream_name, identifier, frame_size, pool_size, m_shutdown_event));
+
+    std::unique_lock<std::mutex> lock(m_mutex);
+    m_stream_name_to_buffer_pool[stream_name] = buffer_pool;
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status BufferPoolPerStream::reallocate_pool(const std::string &stream_name, NetworkGroupIdentifier identifier,
+    size_t frame_size)
+{
+    std::unique_lock<std::mutex> lock(m_mutex);
+    auto pool_size = m_stream_name_to_buffer_pool[stream_name]->buffers_count();
+    m_stream_name_to_buffer_pool[stream_name].reset();
+
+    TRY(auto buffer_pool, create_stream_buffer_pool(stream_name, identifier, frame_size, pool_size, m_shutdown_event));
+    m_stream_name_to_buffer_pool[stream_name] = buffer_pool;
+
+    return HAILO_SUCCESS;
+}
+
+Expected<BufferPtr> BufferPoolPerStream::acquire_buffer(const std::string &stream_name)
+{
+    CHECK_AS_EXPECTED(contains(m_stream_name_to_buffer_pool, stream_name), HAILO_INTERNAL_FAILURE,
+        "acquire_buffer() for stream {} failed, stream name does not exist in buffer pool", stream_name);
+
+    std::unique_lock<std::mutex> lock(m_mutex);
+    auto pool = m_stream_name_to_buffer_pool.at(stream_name);
+    m_cv.wait(lock, [this, pool] () {
+        return (pool->current_size() > 0) || m_is_shutdown;
+    });
+    if (m_is_shutdown) {
+        LOGGER__INFO("Got shutdown signal while trying to acquire_buffer() for stream {}", stream_name);
+        return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED);
+    }
+
+    TRY(auto buffer, pool->acquire_buffer());
+    return buffer;
+}
+
+hailo_status BufferPoolPerStream::return_to_pool(const std::string &stream_name, BufferPtr buffer)
+{
+    CHECK(contains(m_stream_name_to_buffer_pool, stream_name), HAILO_INTERNAL_FAILURE,
+        "acquire_buffer() for stream {} failed, stream name does not exist in buffer pool", stream_name);
+
+    {
+        std::unique_lock<std::mutex> lock(m_mutex);
+        auto status = m_stream_name_to_buffer_pool.at(stream_name)->return_to_pool(buffer);
+        if (status == HAILO_SHUTDOWN_EVENT_SIGNALED) {
+            LOGGER__INFO("return_to_pool for buffer {} got status {}", stream_name, status);
+        } else {
+            CHECK_SUCCESS(status);
+        }
+    }
+    m_cv.notify_all();
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status BufferPoolPerStream::shutdown()
+{
+    {
+        std::unique_lock<std::mutex> lock(m_mutex);
+        m_is_shutdown = true;
+    }
+    m_cv.notify_all();
+    return m_shutdown_event->signal();
+}
+
+Expected<BasicBufferPoolPtr> BufferPoolPerStream::get_pool(const std::string &stream_name)
+{
+    CHECK_AS_EXPECTED(contains(m_stream_name_to_buffer_pool, stream_name), HAILO_INTERNAL_FAILURE,
+        "get_buffer_size() for stream {} failed, stream name does not exist in buffer pool", stream_name);
+
+    auto res = m_stream_name_to_buffer_pool.at(stream_name);
+    return res;
+}
+
+Expected<size_t> BufferPoolPerStream::get_buffer_size(const std::string &stream_name)
+{
+    CHECK(contains(m_stream_name_to_buffer_pool, stream_name), HAILO_INTERNAL_FAILURE,
+        "get_buffer_size() for stream {} failed, stream name does not exist in buffer pool", stream_name);
+
+    return m_stream_name_to_buffer_pool[stream_name]->buffer_size();
+}
+
+Expected<std::shared_ptr<AcquiredBuffer>> AcquiredBuffer::acquire_from_pool(BasicBufferPoolPtr pool)
+{
+    TRY(auto buffer, pool->acquire_buffer());
+
+    auto acquired_buffer_ptr = make_shared_nothrow<AcquiredBuffer>(pool, buffer);
+    CHECK_NOT_NULL_AS_EXPECTED(acquired_buffer_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+    return acquired_buffer_ptr;
+}
+
+AcquiredBuffer::AcquiredBuffer(BasicBufferPoolPtr pool, BufferPtr buffer) :
+    m_pool(pool), m_buffer(buffer)
+{}
+
+AcquiredBuffer::~AcquiredBuffer()
+{
+    auto status = m_pool->return_to_pool(m_buffer);
+    if (HAILO_SUCCESS != status) {
+        LOGGER__ERROR("Failed to return buffer to pool");
+    }
+}
+
+uint8_t* AcquiredBuffer::data()
+{
+    return m_buffer->data();
+}
+
+size_t AcquiredBuffer::size() const
+{
+    return m_buffer->size();
+}
+
+BufferPtr AcquiredBuffer::buffer()
+{
+    return m_buffer;
+}
+
+
+} /* namespace hailort */
diff --git a/hailort/libhailort/src/service/buffer_pool_per_stream.hpp b/hailort/libhailort/src/service/buffer_pool_per_stream.hpp
new file mode 100644
index 00000000..7f894f7e
--- /dev/null
+++ b/hailort/libhailort/src/service/buffer_pool_per_stream.hpp
@@ -0,0 +1,80 @@
+/**
+ * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file buffer_pool_per_stream.hpp
+ * @brief Buffers pool per stream for Network group's streams
+ **/
+
+#ifndef _HAILO_BUFFER_POOL_PER_STREAM_HPP_
+#define _HAILO_BUFFER_POOL_PER_STREAM_HPP_
+
+#include "hailo/hailort.h"
+#include "hailo/hailort_common.hpp"
+#include "hailo/dma_mapped_buffer.hpp"
+#include "hailo/buffer.hpp"
+#include "hailo/vdevice.hpp"
+#include "common/thread_safe_queue.hpp"
+#include "common/buffer_pool.hpp"
+#include "rpc/rpc_definitions.hpp"
+
+namespace hailort
+{
+
+using stream_name_t = std::string;
+
+class BufferPoolPerStream
+{
+public:
+    static Expected<std::shared_ptr<BufferPoolPerStream>> create();
+
+    BufferPoolPerStream(BufferPoolPerStream &&) = delete;
+    BufferPoolPerStream(const BufferPoolPerStream &) = delete;
+    BufferPoolPerStream &operator=(BufferPoolPerStream &&) = delete;
+    BufferPoolPerStream &operator=(const BufferPoolPerStream &) = delete;
+    virtual ~BufferPoolPerStream() = default;
+
+    hailo_status allocate_pool(const std::string &stream_name, NetworkGroupIdentifier identifier, size_t frame_size, size_t pool_size);
+    // Used in order to reallocate the pool buffers with different frame_size
+    hailo_status reallocate_pool(const std::string &stream_name, NetworkGroupIdentifier identifier, size_t frame_size);
+    Expected<BufferPtr> acquire_buffer(const std::string &stream_name);
+    hailo_status return_to_pool(const std::string &stream_name, BufferPtr buffer);
+    hailo_status shutdown();
+    Expected<size_t> get_buffer_size(const std::string &stream_name);
+    Expected<BasicBufferPoolPtr> get_pool(const std::string &stream_name);
+
+    BufferPoolPerStream(EventPtr shutdown_event);
+private:
+    Expected<BasicBufferPoolPtr> create_stream_buffer_pool(const std::string &stream_name,
+        NetworkGroupIdentifier &identifier, size_t buffer_size, size_t buffer_count,
+        EventPtr shutdown_event);
+
+    std::unordered_map<stream_name_t, BasicBufferPoolPtr> m_stream_name_to_buffer_pool;
+    std::unordered_map<stream_name_t, std::string> m_stream_name_to_shm_name;
+    EventPtr m_shutdown_event;
+    std::mutex m_mutex;
+    std::condition_variable m_cv;
+    bool m_is_shutdown;
+};
+
+// Guard for a buffer from the pool, to return it to pool once destructed
+class AcquiredBuffer 
+{
+public:
+    static Expected<std::shared_ptr<AcquiredBuffer>> acquire_from_pool(BasicBufferPoolPtr pool);
+    AcquiredBuffer(BasicBufferPoolPtr pool, BufferPtr buffer);
+    virtual ~AcquiredBuffer();
+
+    uint8_t *data();
+    size_t size() const;
+    BufferPtr buffer();
+private:
+    BasicBufferPoolPtr m_pool;
+    BufferPtr m_buffer;
+};
+using AcquiredBufferPtr = std::shared_ptr<AcquiredBuffer>;
+
+} /* namespace hailort */
+
+#endif /* _HAILO_BUFFER_POOL_PER_STREAM_HPP_ */
diff --git a/hailort/libhailort/src/service/hailort_rpc_client.cpp b/hailort/libhailort/src/service/hailort_rpc_client.cpp
index db30581b..68e516f5 100644
--- a/hailort/libhailort/src/service/hailort_rpc_client.cpp
+++ b/hailort/libhailort/src/service/hailort_rpc_client.cpp
@@ -97,7 +97,7 @@ hailo_status HailoRtRpcClient::VDevice_release(const VDeviceIdentifier &identifi
     return HAILO_SUCCESS;
 }
 
-Expected<std::vector<uint32_t>> HailoRtRpcClient::InputVStreams_create(const NetworkGroupIdentifier &identifier,
+Expected<std::unordered_map<std::string, uint32_t>> HailoRtRpcClient::InputVStreams_create(const NetworkGroupIdentifier &identifier,
     const std::map<std::string, hailo_vstream_params_t> &inputs_params, uint32_t pid)
 {
     VStream_create_Request request;
@@ -133,12 +133,13 @@ Expected<std::vector<uint32_t>> HailoRtRpcClient::InputVStreams_create(const Net
     CHECK_GRPC_STATUS_AS_EXPECTED(status);
     assert(reply.status() < HAILO_STATUS_COUNT);
     CHECK_SUCCESS_AS_EXPECTED(static_cast<hailo_status>(reply.status()));
-    std::vector<uint32_t> input_vstreams_handles;
-    input_vstreams_handles.reserve(reply.handles_size());
-    for (auto &handle : *reply.mutable_handles()) {
-        input_vstreams_handles.push_back(handle);
+    std::unordered_map<std::string, uint32_t> input_vstreams_names_to_handles;
+    assert(reply.handles_size() == reply.names_size());
+    for (int i = 0; i < reply.handles_size(); i++) {
+        input_vstreams_names_to_handles.emplace(reply.names(i), reply.handles(i));
     }
-    return input_vstreams_handles;
+
+    return input_vstreams_names_to_handles;
 }
 
 hailo_status HailoRtRpcClient::InputVStream_release(const VStreamIdentifier &identifier, uint32_t pid)
@@ -157,7 +158,7 @@ hailo_status HailoRtRpcClient::InputVStream_release(const VStreamIdentifier &ide
     return HAILO_SUCCESS;
 }
 
-Expected<std::vector<uint32_t>> HailoRtRpcClient::OutputVStreams_create(const NetworkGroupIdentifier &identifier,
+Expected<std::unordered_map<std::string, uint32_t>> HailoRtRpcClient::OutputVStreams_create(const NetworkGroupIdentifier &identifier,
         const std::map<std::string, hailo_vstream_params_t> &output_params, uint32_t pid)
 {
     VStream_create_Request request;
@@ -193,12 +194,13 @@ Expected<std::vector<uint32_t>> HailoRtRpcClient::OutputVStreams_create(const Ne
     CHECK_GRPC_STATUS_AS_EXPECTED(status);
     assert(reply.status() < HAILO_STATUS_COUNT);
     CHECK_SUCCESS_AS_EXPECTED(static_cast<hailo_status>(reply.status()));
-    std::vector<uint32_t> output_vstreams_handles;
-    output_vstreams_handles.reserve(reply.handles_size());
-    for (auto &handle : *reply.mutable_handles()) {
-        output_vstreams_handles.push_back(handle);
+    std::unordered_map<std::string, uint32_t> output_vstreams_names_to_handles;
+    assert(reply.handles_size() == reply.names_size());
+    for (int i = 0; i < reply.handles_size(); i++) {
+        output_vstreams_names_to_handles.emplace(reply.names(i), reply.handles(i));
     }
-    return output_vstreams_handles;
+
+    return output_vstreams_names_to_handles;
 }
 
 hailo_status HailoRtRpcClient::OutputVStream_release(const VStreamIdentifier &identifier, uint32_t pid)
@@ -1466,7 +1468,7 @@ Expected<std::vector<std::string>> HailoRtRpcClient::ConfiguredNetworkGroup_get_
 }
 
 hailo_status HailoRtRpcClient::ConfiguredNetworkGroup_infer_async(const NetworkGroupIdentifier &identifier,
-   const std::vector<std::tuple<callback_idx_t, std::string, MemoryView>> &cb_idx_to_stream_buffer,
+   const std::vector<StreamCbParamsPtr> &streams_cb_params,
    const callback_idx_t infer_request_done_cb, const std::unordered_set<std::string> &input_streams_names)
 {
     ConfiguredNetworkGroup_infer_async_Request request;
@@ -1474,16 +1476,21 @@ hailo_status HailoRtRpcClient::ConfiguredNetworkGroup_infer_async(const NetworkG
     auto proto_identifier = request.mutable_identifier();
     ConfiguredNetworkGroup_convert_identifier_to_proto(identifier, proto_identifier);
     auto proto_transfer_buffers = request.mutable_transfer_requests();
-    for (const auto &idx_named_buffer : cb_idx_to_stream_buffer) {
+    for (const auto &stream_cb_params : streams_cb_params) {
         ProtoTransferRequest proto_transfer_request;
-        proto_transfer_request.set_cb_idx(std::get<0>(idx_named_buffer));
-        const auto &stream_name = std::get<1>(idx_named_buffer);
-        proto_transfer_request.set_stream_name(stream_name);
-        if (contains(input_streams_names, stream_name)) {
-            proto_transfer_request.set_direction(HAILO_H2D_STREAM);
-            proto_transfer_request.set_data(std::get<2>(idx_named_buffer).data(), std::get<2>(idx_named_buffer).size());
+        proto_transfer_request.set_cb_idx(stream_cb_params->m_cb_idx);
+        proto_transfer_request.set_stream_name(stream_cb_params->m_stream_name);
+        auto direction = contains(input_streams_names, stream_cb_params->m_stream_name) ? HAILO_H2D_STREAM : HAILO_D2H_STREAM;
+        proto_transfer_request.set_direction(direction);
+
+        if (stream_cb_params->m_is_shm) {
+            // Use share memory
+            auto shared_memory_identifier = proto_transfer_request.mutable_shared_memory_identifier();
+            shared_memory_identifier->set_name(stream_cb_params->m_shm_name);
+            shared_memory_identifier->set_size(static_cast<uint32_t>(stream_cb_params->m_size));
         } else {
-            proto_transfer_request.set_direction(HAILO_D2H_STREAM);
+            // copy data
+            proto_transfer_request.set_data(stream_cb_params->m_user_mem_view.data(), stream_cb_params->m_user_mem_view.size());
         }
         proto_transfer_buffers->Add(std::move(proto_transfer_request));
     }
@@ -1516,7 +1523,7 @@ Expected<bool> HailoRtRpcClient::InputVStream_is_multi_planar(const VStreamIdent
     return is_multi_planar;
 }
 
-hailo_status HailoRtRpcClient::InputVStream_write(const VStreamIdentifier &identifier, const hailo_pix_buffer_t &buffer)
+hailo_status HailoRtRpcClient::InputVStream_write(const VStreamIdentifier &identifier, const hailo_pix_buffer_t &buffer, const std::chrono::milliseconds &timeout)
 {
     CHECK(HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR == buffer.memory_type, HAILO_NOT_SUPPORTED, "Memory type of pix buffer must be of type USERPTR!");
 
@@ -1529,9 +1536,11 @@ hailo_status HailoRtRpcClient::InputVStream_write(const VStreamIdentifier &ident
         request.add_planes_data(buffer.planes[i].user_ptr, buffer.planes[i].bytes_used);
     }
 
-    ClientContextWithTimeout context;
+    ClientContextWithTimeout context(timeout);
     InputVStream_write_pix_Reply reply;
     grpc::Status status = m_stub->InputVStream_write_pix(&context, request, &reply);
+    CHECK(grpc::StatusCode::DEADLINE_EXCEEDED != status.error_code(), HAILO_TIMEOUT,
+        "Interaction between client and service received a timeout ({}ms)", timeout.count());
     CHECK_GRPC_STATUS(status);
     assert(reply.status() < HAILO_STATUS_COUNT);
     if (reply.status() == HAILO_STREAM_ABORT) {
@@ -1541,16 +1550,18 @@ hailo_status HailoRtRpcClient::InputVStream_write(const VStreamIdentifier &ident
     return HAILO_SUCCESS;
 }
 
-hailo_status HailoRtRpcClient::InputVStream_write(const VStreamIdentifier &identifier, const MemoryView &buffer)
+hailo_status HailoRtRpcClient::InputVStream_write(const VStreamIdentifier &identifier, const MemoryView &buffer, const std::chrono::milliseconds &timeout)
 {
     InputVStream_write_Request request;
     auto proto_identifier = request.mutable_identifier();
     VStream_convert_identifier_to_proto(identifier, proto_identifier);
     request.set_data(buffer.data(), buffer.size());
 
-    ClientContextWithTimeout context;
+    ClientContextWithTimeout context(timeout);
     InputVStream_write_Reply reply;
     grpc::Status status = m_stub->InputVStream_write(&context, request, &reply);
+    CHECK(grpc::StatusCode::DEADLINE_EXCEEDED != status.error_code(), HAILO_TIMEOUT,
+        "Interaction between client and service received a timeout ({}ms)", timeout.count());
     CHECK_GRPC_STATUS(status);
     assert(reply.status() < HAILO_STATUS_COUNT);
     if (reply.status() == HAILO_STREAM_ABORT) {
@@ -1560,16 +1571,20 @@ hailo_status HailoRtRpcClient::InputVStream_write(const VStreamIdentifier &ident
     return HAILO_SUCCESS;
 }
 
-hailo_status HailoRtRpcClient::OutputVStream_read(const VStreamIdentifier &identifier, MemoryView buffer)
+hailo_status HailoRtRpcClient::OutputVStream_read(const VStreamIdentifier &identifier, MemoryView buffer, const std::chrono::milliseconds &timeout)
 {
     OutputVStream_read_Request request;
     auto proto_identifier = request.mutable_identifier();
     VStream_convert_identifier_to_proto(identifier, proto_identifier);
     request.set_size(static_cast<uint32_t>(buffer.size()));
 
-    ClientContextWithTimeout context;
+    ClientContextWithTimeout context(timeout);
     OutputVStream_read_Reply reply;
     grpc::Status status = m_stub->OutputVStream_read(&context, request, &reply);
+    if (grpc::StatusCode::DEADLINE_EXCEEDED == status.error_code()) {
+        LOGGER__ERROR("Interaction between client and service received a timeout ({}ms)", timeout.count());
+        return HAILO_TIMEOUT;
+    }
     CHECK_GRPC_STATUS(status);
     assert(reply.status() < HAILO_STATUS_COUNT);
     if (reply.status() == HAILO_STREAM_ABORT) {
diff --git a/hailort/libhailort/src/service/hailort_rpc_client.hpp b/hailort/libhailort/src/service/hailort_rpc_client.hpp
index 2f83319d..ad8cac77 100644
--- a/hailort/libhailort/src/service/hailort_rpc_client.hpp
+++ b/hailort/libhailort/src/service/hailort_rpc_client.hpp
@@ -14,6 +14,7 @@
 #include "hailo/expected.hpp"
 #include "hailo/device.hpp"
 #include "rpc/rpc_definitions.hpp"
+#include "service/buffer_pool_per_stream.hpp"
 
 #if defined(_MSC_VER)
 #pragma warning(push)
@@ -37,6 +38,35 @@ namespace hailort
 // Higher then default-hrt-timeout so we can differentiate errors
 static const std::chrono::milliseconds CONTEXT_TIMEOUT(HAILO_DEFAULT_VSTREAM_TIMEOUT_MS + 500);
 using callback_idx_t = uint32_t;
+using StreamCallback = std::function<void(hailo_status)>;
+
+class StreamCbParams
+{
+public:
+    StreamCbParams() :
+        m_is_shm(false), m_cb_idx(INVALID_CB_INDEX), m_stream_name(INVALID_STREAM_NAME) {}
+
+    StreamCbParams(callback_idx_t cb_idx, std::string stream_name, StreamCallback callback, MemoryView user_mem_view) :
+        m_is_shm(false), m_cb_idx(cb_idx), m_stream_name(stream_name), m_size(user_mem_view.size()),
+            m_callback(callback), m_user_mem_view(user_mem_view) {}
+
+    StreamCbParams(callback_idx_t cb_idx, std::string stream_name, StreamCallback callback, MemoryView user_mem_view,
+        const std::string &shm_name, AcquiredBufferPtr acquired_shm_buffer) :
+        m_is_shm(true), m_cb_idx(cb_idx), m_stream_name(stream_name), m_size(user_mem_view.size()), m_callback(callback),
+        m_user_mem_view(user_mem_view), m_shm_name(shm_name), m_acquired_shm_buffer(acquired_shm_buffer) {}
+
+    bool m_is_shm;
+    callback_idx_t m_cb_idx;
+    std::string m_stream_name;
+    size_t m_size;
+    StreamCallback m_callback;
+
+    // TODO: HRT-14821 - Try make it a uinion
+    MemoryView m_user_mem_view;
+    std::string m_shm_name;
+    AcquiredBufferPtr m_acquired_shm_buffer;
+};
+using StreamCbParamsPtr = std::shared_ptr<StreamCbParams>;
 
 class ClientContextWithTimeout : public grpc::ClientContext {
 public:
@@ -104,10 +134,13 @@ class HailoRtRpcClient final {
     hailo_status ConfiguredNetworkGroup_infer_async(const NetworkGroupIdentifier &identifier,
         const std::vector<std::tuple<callback_idx_t, std::string, MemoryView>> &cb_idx_to_stream_buffer,
         const callback_idx_t infer_request_done_cb, const std::unordered_set<std::string> &input_streams_names);
+    hailo_status ConfiguredNetworkGroup_infer_async(const NetworkGroupIdentifier &identifier,
+        const std::vector<StreamCbParamsPtr> &strems_cb_params, const callback_idx_t infer_request_done_cb,
+        const std::unordered_set<std::string> &input_streams_names);
 
-    Expected<std::vector<uint32_t>> InputVStreams_create(const NetworkGroupIdentifier &identifier,
+    Expected<std::unordered_map<std::string, uint32_t>> InputVStreams_create(const NetworkGroupIdentifier &identifier,
         const std::map<std::string, hailo_vstream_params_t> &inputs_params, uint32_t pid);
-    Expected<std::vector<uint32_t>> OutputVStreams_create(const NetworkGroupIdentifier &identifier,
+    Expected<std::unordered_map<std::string, uint32_t>> OutputVStreams_create(const NetworkGroupIdentifier &identifier,
         const std::map<std::string, hailo_vstream_params_t> &output_params, uint32_t pid);
 
     Expected<uint32_t> InputVStream_dup_handle(const VStreamIdentifier &identifier, uint32_t pid);
@@ -116,9 +149,9 @@ class HailoRtRpcClient final {
 
     hailo_status OutputVStream_release(const VStreamIdentifier &identifier, uint32_t pid);
     Expected<bool> InputVStream_is_multi_planar(const VStreamIdentifier &identifier);
-    hailo_status InputVStream_write(const VStreamIdentifier &identifier, const MemoryView &buffer);
-    hailo_status InputVStream_write(const VStreamIdentifier &identifier, const hailo_pix_buffer_t &buffer);
-    hailo_status OutputVStream_read(const VStreamIdentifier &identifier, MemoryView buffer);
+    hailo_status InputVStream_write(const VStreamIdentifier &identifier, const MemoryView &buffer, const std::chrono::milliseconds &timeout);
+    hailo_status InputVStream_write(const VStreamIdentifier &identifier, const hailo_pix_buffer_t &buffer, const std::chrono::milliseconds &timeout);
+    hailo_status OutputVStream_read(const VStreamIdentifier &identifier, MemoryView buffer, const std::chrono::milliseconds &timeout);
     Expected<size_t> InputVStream_get_frame_size(const VStreamIdentifier &identifier);
     Expected<size_t> OutputVStream_get_frame_size(const VStreamIdentifier &identifier);
 
diff --git a/hailort/libhailort/src/service/network_group_client.cpp b/hailort/libhailort/src/service/network_group_client.cpp
index c27b8ffc..dbab91d8 100644
--- a/hailort/libhailort/src/service/network_group_client.cpp
+++ b/hailort/libhailort/src/service/network_group_client.cpp
@@ -7,48 +7,74 @@
  * @brief: Network group client object
  **/
 
+#include "network_group_client.hpp"
+
 #include "hailo/vstream.hpp"
 #include "hailo/hailort_defaults.hpp"
 
 #include "common/utils.hpp"
 #include "common/os_utils.hpp"
+#include "common/shared_memory_buffer.hpp"
+#include "common/internal_env_vars.hpp"
+#include "utils/buffer_storage.hpp"
 
-#include "network_group/network_group_internal.hpp"
 #include "net_flow/pipeline/vstream_builder.hpp"
 #include "net_flow/ops/nms_post_process.hpp"
 #include "rpc_client_utils.hpp"
 
-
 namespace hailort
 {
 
-ConfiguredNetworkGroupClient::ConfiguredNetworkGroupClient(std::unique_ptr<HailoRtRpcClient> client, NetworkGroupIdentifier &&identifier) :
-    ConfiguredNetworkGroup(),
-    m_client(std::move(client)),
-    m_identifier(identifier),
-    m_current_cb_index(0)
+static bool should_use_shared_memory()
 {
-    auto reply = m_client->ConfiguredNetworkGroup_name(m_identifier);
-    if (!reply) {
-        LOGGER__ERROR("get_network_group_name failed with status {}", reply.status());
-        return;
-    }
-    m_network_group_name = reply.value();
+    return ((!is_env_variable_on(HAILO_SERVICE_SHARED_MEMORY_ENV_VAR)) &&
+        (!get_env_variable(HAILORT_SERVICE_ADDRESS_ENV_VAR)));
+}
+
+Expected<std::shared_ptr<ConfiguredNetworkGroupClient>> ConfiguredNetworkGroupClient::create(
+    std::unique_ptr<HailoRtRpcClient> client, NetworkGroupIdentifier &&identifier)
+{
+    TRY(auto ng_name, client->ConfiguredNetworkGroup_name(identifier));
+    TRY(auto streams_infos, client->ConfiguredNetworkGroup_get_all_stream_infos(identifier, ng_name));
+    TRY(auto min_buffer_pool_size, client->ConfiguredNetworkGroup_get_min_buffer_pool_size(identifier));
+
+    std::unordered_set<stream_name_t> input_streams_names;
+    std::unordered_set<stream_name_t> output_streams_names;
+    TRY(auto cng_buffer_pool, BufferPoolPerStream::create());
+    for (auto &stream_info : streams_infos) {
+        if (should_use_shared_memory()) {
+            cng_buffer_pool->allocate_pool(stream_info.name, identifier, stream_info.hw_frame_size, min_buffer_pool_size);
+        }
 
-    auto streams_infos = get_all_stream_infos();
-    if (!streams_infos) {
-        LOGGER__ERROR("get_all_stream_infos failed with status {}", reply.status());
-        return;
-    }
-    for (auto &stream_info : streams_infos.value()) {
         if (stream_info.direction == HAILO_H2D_STREAM) {
-            m_input_streams_names.insert(stream_info.name);
+            input_streams_names.insert(stream_info.name);
         } else {
-            m_output_streams_names.insert(stream_info.name);
+            output_streams_names.insert(stream_info.name);
         }
     }
+
+    auto network_group_ptr = make_shared_nothrow<ConfiguredNetworkGroupClient>(std::move(client),
+        std::move(identifier), ng_name, std::move(input_streams_names), std::move(output_streams_names),
+        cng_buffer_pool);
+    CHECK_NOT_NULL_AS_EXPECTED(network_group_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+    return network_group_ptr;
 }
 
+ConfiguredNetworkGroupClient::ConfiguredNetworkGroupClient(std::unique_ptr<HailoRtRpcClient> client,
+    NetworkGroupIdentifier &&identifier, const std::string &network_group_name,
+        std::unordered_set<std::string> &&input_streams_names, std::unordered_set<std::string> &&output_streams_names,
+        std::shared_ptr<BufferPoolPerStream> buffer_pool_per_stream) :
+    ConfiguredNetworkGroup(),
+    m_client(std::move(client)),
+    m_identifier(identifier),
+    m_network_group_name(network_group_name),
+    m_current_cb_index(0),
+    m_input_streams_names(std::move(input_streams_names)),
+    m_output_streams_names(std::move(output_streams_names)),
+    m_buffer_pool_per_stream(std::move(buffer_pool_per_stream))
+{}
+
 ConfiguredNetworkGroupClient::ConfiguredNetworkGroupClient(NetworkGroupIdentifier &&identifier, const std::string &network_group_name) :
     ConfiguredNetworkGroup(),
     m_identifier(identifier),
@@ -79,10 +105,16 @@ ConfiguredNetworkGroupClient::~ConfiguredNetworkGroupClient()
         LOGGER__CRITICAL("ConfiguredNetworkGroup_release failed with status: {}", reply);
     }
     execute_callbacks_on_error(HAILO_INTERNAL_FAILURE); // At this point there should'nt be any callbacks left. if there are any, raise HAILO_INTERNAL_FAILURE
+
     auto status = wait_for_ongoing_callbacks_count_under(1);
     if (HAILO_SUCCESS != status) {
         LOGGER__CRITICAL("Failed to wait for callbacks to finish");
     }
+
+    status = m_buffer_pool_per_stream->shutdown();
+    if (HAILO_SUCCESS != status) {
+        LOGGER__CRITICAL("Failed to shutdown for network group buffers pool");
+    }
 }
 
 hailo_status ConfiguredNetworkGroupClient::before_fork()
@@ -219,6 +251,7 @@ hailo_status ConfiguredNetworkGroupClient::shutdown()
 {
     auto status = m_client->ConfiguredNetworkGroup_shutdown(m_identifier);
     CHECK_SUCCESS(status, "Failed to shutdown");
+
     status = wait_for_ongoing_callbacks_count_under(1);
     CHECK_SUCCESS(status, "Failed to wait for callbacks to finish");
 
@@ -363,12 +396,13 @@ Expected<std::vector<InputVStream>> ConfiguredNetworkGroupClient::create_input_v
 {
     auto reply = m_client->InputVStreams_create(m_identifier, inputs_params, OsUtils::get_curr_pid());
     CHECK_EXPECTED(reply);
-    auto input_vstreams_handles = reply.release();
+    auto input_vstreams_names_to_handles = reply.release();
     std::vector<InputVStream> vstreams;
-    vstreams.reserve(input_vstreams_handles.size());
+    vstreams.reserve(input_vstreams_names_to_handles.size());
 
-    for (uint32_t handle : input_vstreams_handles) {
-        auto vstream_client = InputVStreamClient::create(VStreamIdentifier(m_identifier, handle));
+    for(const auto &name_handle_pair : input_vstreams_names_to_handles) {
+        auto timeout = std::chrono::milliseconds(inputs_params.at(name_handle_pair.first).timeout_ms);
+        auto vstream_client = InputVStreamClient::create(VStreamIdentifier(m_identifier, name_handle_pair.second), timeout);
         CHECK_EXPECTED(vstream_client);
         auto vstream = VStreamsBuilderUtils::create_input(vstream_client.release());
         vstreams.push_back(std::move(vstream));
@@ -380,12 +414,13 @@ Expected<std::vector<OutputVStream>> ConfiguredNetworkGroupClient::create_output
 {
     auto reply = m_client->OutputVStreams_create(m_identifier, outputs_params, OsUtils::get_curr_pid());
     CHECK_EXPECTED(reply);
-    auto output_vstreams_handles = reply.release();
+    auto output_vstreams_names_to_handles = reply.release();
     std::vector<OutputVStream> vstreams;
-    vstreams.reserve(output_vstreams_handles.size());
+    vstreams.reserve(output_vstreams_names_to_handles.size());
 
-    for(uint32_t handle : output_vstreams_handles) {
-        auto vstream_client = OutputVStreamClient::create(VStreamIdentifier(m_identifier, handle));
+    for(const auto &name_handle_pair : output_vstreams_names_to_handles) {
+        auto timeout = std::chrono::milliseconds(outputs_params.at(name_handle_pair.first).timeout_ms);
+        auto vstream_client = OutputVStreamClient::create(VStreamIdentifier(m_identifier, name_handle_pair.second), timeout);
         CHECK_EXPECTED(vstream_client);
         auto vstream = VStreamsBuilderUtils::create_output(vstream_client.release());
         vstreams.push_back(std::move(vstream));
@@ -444,25 +479,38 @@ hailo_status ConfiguredNetworkGroupClient::update_cache_offset(int32_t /* offset
     return HAILO_NOT_IMPLEMENTED;
 }
 
+Expected<std::vector<uint32_t>> ConfiguredNetworkGroupClient::get_cache_ids() const
+{
+    return make_unexpected(HAILO_NOT_IMPLEMENTED);
+}
+
+Expected<Buffer> ConfiguredNetworkGroupClient::read_cache_buffer(uint32_t)
+{
+    return make_unexpected(HAILO_NOT_IMPLEMENTED);
+}
+
+hailo_status ConfiguredNetworkGroupClient::write_cache_buffer(uint32_t, MemoryView)
+{
+    return HAILO_NOT_IMPLEMENTED;
+}
+
 hailo_status ConfiguredNetworkGroupClient::execute_callback(const ProtoCallbackIdentifier &cb_id)
 {
     if (cb_id.cb_type() == CALLBACK_TYPE_TRANSFER) {
-        execute_transfer_callback(cb_id);
+        return execute_transfer_callback(cb_id);
     } else if (cb_id.cb_type() == CALLBACK_TYPE_INFER_REQUEST) {
-        execute_infer_request_callback(cb_id);
+        return execute_infer_request_callback(cb_id);
     } else {
         LOGGER__ERROR("Got invalid callback type = {}", cb_id.cb_type());
         return HAILO_INTERNAL_FAILURE;
     }
-
-    return HAILO_SUCCESS;
 }
 
 void ConfiguredNetworkGroupClient::execute_callbacks_on_error(hailo_status error_status)
 {
     std::unique_lock<std::mutex> lock(m_mutex);
     for (auto cb_pair : m_idx_to_callbacks) {
-        std::get<2>(*cb_pair.second)(error_status);
+        cb_pair.second->m_callback(error_status);
     }
     m_idx_to_callbacks.clear();
     for (auto cb_pair : m_infer_request_idx_to_callbacks) {
@@ -476,7 +524,7 @@ hailo_status ConfiguredNetworkGroupClient::execute_infer_request_callback(const
     std::function<void(hailo_status)> cb;
     {
         std::unique_lock<std::mutex> lock(m_mutex);
-        CHECK(contains(m_infer_request_idx_to_callbacks, cb_id.cb_idx()), HAILO_NOT_FOUND);
+        CHECK(contains(m_infer_request_idx_to_callbacks, cb_id.cb_idx()), HAILO_NOT_FOUND, "Failed to find cb with index {}", cb_id.cb_idx());
         cb = m_infer_request_idx_to_callbacks.at(cb_id.cb_idx());
         m_infer_request_idx_to_callbacks.erase(cb_id.cb_idx());
     }
@@ -485,22 +533,41 @@ hailo_status ConfiguredNetworkGroupClient::execute_infer_request_callback(const
     return HAILO_SUCCESS;
 }
 
+hailo_status ConfiguredNetworkGroupClient::copy_data_from_shm_buffer(StreamCbParamsPtr stream_callback, const ProtoCallbackIdentifier &cb_id)
+{
+    CHECK(cb_id.has_shared_memory_identifier(), HAILO_INVALID_OPERATION,
+        "Shared memory env var '{}' is on but callback does not contain shared memory identifier",
+        HAILO_SERVICE_SHARED_MEMORY_ENV_VAR);
+    memcpy(stream_callback->m_user_mem_view.data(),
+        stream_callback->m_acquired_shm_buffer->data(), stream_callback->m_acquired_shm_buffer->size());
+
+    return HAILO_SUCCESS;
+}
+
 hailo_status ConfiguredNetworkGroupClient::execute_transfer_callback(const ProtoCallbackIdentifier &cb_id)
 {
-    NamedBufferCallbackTuplePtr name_buffer_callback_ptr;
+    StreamCbParamsPtr stream_callback;
     {
         std::unique_lock<std::mutex> lock(m_mutex);
-        CHECK(contains(m_idx_to_callbacks, cb_id.cb_idx()), HAILO_NOT_FOUND);
-        name_buffer_callback_ptr = m_idx_to_callbacks.at(cb_id.cb_idx());
+        CHECK(contains(m_idx_to_callbacks, cb_id.cb_idx()), HAILO_NOT_FOUND, "Failed to find cb with index {}", cb_id.cb_idx());
+        stream_callback = m_idx_to_callbacks.at(cb_id.cb_idx());
         m_idx_to_callbacks.erase(cb_id.cb_idx());
     }
+
     const auto &stream_name = cb_id.stream_name();
-    CHECK((std::get<0>(*name_buffer_callback_ptr.get()) == stream_name), HAILO_INTERNAL_FAILURE,
+    CHECK((stream_callback->m_stream_name == stream_name), HAILO_INTERNAL_FAILURE,
         "Callback identifier does not match stream name {}", stream_name);
-    if (contains(m_output_streams_names, stream_name)) {
-        memcpy(std::get<1>(*name_buffer_callback_ptr.get()).data(), cb_id.data().data(), cb_id.data().size());
+
+    if (contains(m_output_streams_names, stream_callback->m_stream_name)) {
+        if (should_use_shared_memory()) {
+            auto status = copy_data_from_shm_buffer(stream_callback, cb_id);
+            CHECK_SUCCESS(status);
+        } else {
+            memcpy(stream_callback->m_user_mem_view.data(), cb_id.data().data(), cb_id.data().size());
+        }
     }
-    std::get<2>(*name_buffer_callback_ptr.get())(static_cast<hailo_status>(cb_id.status()));
+
+    stream_callback->m_callback(static_cast<hailo_status>(cb_id.status()));
 
     return HAILO_SUCCESS;
 }
@@ -510,24 +577,59 @@ callback_idx_t ConfiguredNetworkGroupClient::get_unique_callback_idx()
     return m_current_cb_index.fetch_add(1);
 }
 
-hailo_status ConfiguredNetworkGroupClient::infer_async(const NamedBuffersCallbacks &named_buffers_callbacks,
-    const std::function<void(hailo_status)> &infer_request_done_cb)
+Expected<std::vector<StreamCbParamsPtr>> ConfiguredNetworkGroupClient::create_streams_callbacks_params(const NamedBuffersCallbacks &named_buffers_callbacks)
 {
-    std::vector<std::tuple<callback_idx_t, std::string, MemoryView>> cb_idx_to_stream_buffer;
-    cb_idx_to_stream_buffer.reserve(named_buffers_callbacks.size());
+    std::vector<StreamCbParamsPtr> streams_cb_params;
+    streams_cb_params.reserve(named_buffers_callbacks.size());
     {
         std::unique_lock<std::mutex> lock(m_mutex);
         for (const auto &name_buffer_cb : named_buffers_callbacks) {
+            StreamCbParams stream_cb_params;
             auto cb_idx = get_unique_callback_idx();
-            CHECK(BufferType::VIEW == name_buffer_cb.second.first.buffer_type, HAILO_INVALID_OPERATION,
+            auto &stream_name = name_buffer_cb.first;
+            CHECK_AS_EXPECTED(BufferType::VIEW == name_buffer_cb.second.first.buffer_type, HAILO_INVALID_OPERATION,
                 "Using dmabuf is not supported when working with hailort_service");
 
-            auto name_buffer_cb_tuple = std::make_tuple(name_buffer_cb.first, name_buffer_cb.second.first.view, name_buffer_cb.second.second);
-            auto tuple_ptr = make_shared_nothrow<NamedBufferCallbackTuple>(name_buffer_cb_tuple);
-            CHECK_NOT_NULL(tuple_ptr, HAILO_OUT_OF_HOST_MEMORY);
+            if (should_use_shared_memory()) {
+                // Copy to shared memory buffer
+                TRY(auto stream_pool, m_buffer_pool_per_stream->get_pool(stream_name));
+                TRY(auto acquired_buffer, AcquiredBuffer::acquire_from_pool(stream_pool));
+                CHECK_AS_EXPECTED(name_buffer_cb.second.first.view.size() == acquired_buffer->size(), HAILO_INVALID_ARGUMENT,
+                    "For stream '{}', passed buffer size is {} (expected {})", stream_name, name_buffer_cb.second.first.view.size(),
+                    acquired_buffer->size());
+
+                if (contains(m_input_streams_names, stream_name)) {
+                    memcpy(acquired_buffer->data(), name_buffer_cb.second.first.view.data(), name_buffer_cb.second.first.view.size());
+                }
+
+                TRY(auto shm_name, acquired_buffer->buffer()->storage().shm_name());
+                stream_cb_params = StreamCbParams(cb_idx, stream_name, name_buffer_cb.second.second,
+                    name_buffer_cb.second.first.view, shm_name, acquired_buffer);
+            } else {
+                stream_cb_params = StreamCbParams(cb_idx, stream_name, name_buffer_cb.second.second,
+                    name_buffer_cb.second.first.view);
+            }
+
+            auto cb_params_ptr = make_shared_nothrow<StreamCbParams>(std::move(stream_cb_params));
+            CHECK_NOT_NULL_AS_EXPECTED(cb_params_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+            streams_cb_params.emplace_back(cb_params_ptr);
+        }
+    }
 
-            m_idx_to_callbacks.emplace(cb_idx, tuple_ptr);
-            cb_idx_to_stream_buffer.emplace_back(std::make_tuple(cb_idx, name_buffer_cb.first, name_buffer_cb.second.first.view));
+    return streams_cb_params;
+}
+
+hailo_status ConfiguredNetworkGroupClient::infer_async(const NamedBuffersCallbacks &named_buffers_callbacks,
+    const std::function<void(hailo_status)> &infer_request_done_cb)
+{
+    auto streams_cb_params = create_streams_callbacks_params(named_buffers_callbacks);
+    if (!streams_cb_params) {
+        return make_unexpected(streams_cb_params.status());
+    } else {
+        std::unique_lock<std::mutex> lock(m_mutex);
+        for (auto &stream_cb_params : streams_cb_params.value()) {
+            m_idx_to_callbacks.emplace(stream_cb_params->m_cb_idx, stream_cb_params);
         }
     }
 
@@ -551,15 +653,15 @@ hailo_status ConfiguredNetworkGroupClient::infer_async(const NamedBuffersCallbac
     }
 
     increase_ongoing_callbacks(); // Increase before lunch, as the cb may be called before we got the chance to increase the counter
-    auto status = m_client->ConfiguredNetworkGroup_infer_async(m_identifier, cb_idx_to_stream_buffer,
+    auto status = m_client->ConfiguredNetworkGroup_infer_async(m_identifier, streams_cb_params.value(),
         infer_request_cb_idx, m_input_streams_names);
 
     if (HAILO_SUCCESS != status) {
         // If we got error in `infer_async()`, then the callbacks will not be called in the service domain.
         // remove them from the cb lists so they wont be called in the client domain as well.
         std::unique_lock<std::mutex> lock(m_mutex);
-        for (auto &pair : cb_idx_to_stream_buffer) {
-            m_idx_to_callbacks.erase(std::get<0>(pair));
+        for (auto &stream_cb_params : streams_cb_params.value()) {
+            m_idx_to_callbacks.erase(stream_cb_params->m_cb_idx);
         }
         m_infer_request_idx_to_callbacks.erase(infer_request_cb_idx);
         decrease_ongoing_callbacks();
diff --git a/hailort/libhailort/src/service/network_group_client.hpp b/hailort/libhailort/src/service/network_group_client.hpp
new file mode 100644
index 00000000..45f53763
--- /dev/null
+++ b/hailort/libhailort/src/service/network_group_client.hpp
@@ -0,0 +1,157 @@
+/**
+ * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file network_group_client.hpp
+ * @brief Network Group client for HailoRT gRPC Service
+ **/
+
+#ifndef _HAILO_NETWORK_GROUP_CLIENT_HPP_
+#define _HAILO_NETWORK_GROUP_CLIENT_HPP_
+
+#include "hailo/hailort.h"
+#include "service/buffer_pool_per_stream.hpp"
+#include "network_group/network_group_internal.hpp"
+#include "service/hailort_rpc_client.hpp"
+#include "rpc/rpc_definitions.hpp"
+
+namespace hailort
+{
+
+class ConfiguredNetworkGroupClient : public ConfiguredNetworkGroup
+{
+public:
+    static Expected<std::shared_ptr<ConfiguredNetworkGroupClient>> create(std::unique_ptr<HailoRtRpcClient> client,
+        NetworkGroupIdentifier &&identifier);
+    ConfiguredNetworkGroupClient(std::unique_ptr<HailoRtRpcClient> client, NetworkGroupIdentifier &&identifier,
+        const std::string &network_group_name, std::unordered_set<std::string> &&input_streams_names,
+        std::unordered_set<std::string> &&output_streams_names, std::shared_ptr<BufferPoolPerStream> buffer_pool_per_stream);
+
+    virtual ~ConfiguredNetworkGroupClient();
+    ConfiguredNetworkGroupClient(const ConfiguredNetworkGroupClient &other) = delete;
+    ConfiguredNetworkGroupClient &operator=(const ConfiguredNetworkGroupClient &other) = delete;
+    ConfiguredNetworkGroupClient &operator=(ConfiguredNetworkGroupClient &&other) = delete;
+    ConfiguredNetworkGroupClient(ConfiguredNetworkGroupClient &&other) = delete;
+
+    virtual const std::string &get_network_group_name() const override;
+    virtual const std::string &name() const override;
+    virtual Expected<hailo_stream_interface_t> get_default_streams_interface() override;
+    virtual std::vector<std::reference_wrapper<InputStream>> get_input_streams_by_interface(hailo_stream_interface_t stream_interface) override;
+    virtual std::vector<std::reference_wrapper<OutputStream>> get_output_streams_by_interface(hailo_stream_interface_t stream_interface) override;
+    virtual ExpectedRef<InputStream> get_input_stream_by_name(const std::string &name) override;
+    virtual ExpectedRef<OutputStream> get_output_stream_by_name(const std::string &name) override;
+    virtual Expected<InputStreamRefVector> get_input_streams_by_network(const std::string &network_name="") override;
+    virtual Expected<OutputStreamRefVector> get_output_streams_by_network(const std::string &network_name="") override;
+    virtual InputStreamRefVector get_input_streams() override;
+    virtual OutputStreamRefVector get_output_streams() override;
+    virtual Expected<OutputStreamWithParamsVector> get_output_streams_from_vstream_names(
+        const std::map<std::string, hailo_vstream_params_t> &outputs_params) override;
+
+    virtual Expected<LatencyMeasurementResult> get_latency_measurement(const std::string &network_name="") override;
+    virtual Expected<std::unique_ptr<ActivatedNetworkGroup>> activate(const hailo_activate_network_group_params_t &network_group_params) override;
+    virtual hailo_status wait_for_activation(const std::chrono::milliseconds &timeout) override;
+    virtual hailo_status shutdown() override;
+
+    virtual Expected<std::map<std::string, hailo_vstream_params_t>> make_input_vstream_params(
+        bool unused, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size,
+        const std::string &network_name="") override;
+    virtual Expected<std::map<std::string, hailo_vstream_params_t>> make_output_vstream_params(
+        bool unused, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size,
+        const std::string &network_name="") override;
+    virtual Expected<std::vector<std::map<std::string, hailo_vstream_params_t>>> make_output_vstream_params_groups(
+        bool unused, hailo_format_type_t format_type, uint32_t timeout_ms, uint32_t queue_size) override;
+    virtual Expected<std::vector<std::vector<std::string>>> get_output_vstream_groups() override;
+
+    virtual Expected<std::vector<hailo_stream_info_t>> get_all_stream_infos(const std::string &network_name="") const override;
+    virtual Expected<std::vector<hailo_network_info_t>> get_network_infos() const override;
+    virtual Expected<std::vector<hailo_vstream_info_t>> get_input_vstream_infos(const std::string &network_name="") const override;
+    virtual Expected<std::vector<hailo_vstream_info_t>> get_output_vstream_infos(const std::string &network_name="") const override;
+    virtual Expected<std::vector<hailo_vstream_info_t>> get_all_vstream_infos(const std::string &network_name="") const override;
+
+    virtual bool is_scheduled() const override;
+    virtual hailo_status set_scheduler_timeout(const std::chrono::milliseconds &timeout, const std::string &network_name) override;
+    virtual hailo_status set_scheduler_threshold(uint32_t threshold, const std::string &network_name) override;
+    virtual hailo_status set_scheduler_priority(uint8_t priority, const std::string &network_name) override;
+
+    virtual AccumulatorPtr get_activation_time_accumulator() const override;
+    virtual AccumulatorPtr get_deactivation_time_accumulator() const override;
+
+    virtual bool is_multi_context() const override;
+    virtual const ConfigureNetworkParams get_config_params() const override;
+
+    virtual Expected<std::vector<std::string>> get_sorted_output_names() override;
+    virtual Expected<std::vector<std::string>> get_stream_names_from_vstream_name(const std::string &vstream_name) override;
+    virtual Expected<std::vector<std::string>> get_vstream_names_from_stream_name(const std::string &stream_name) override;
+
+    virtual Expected<HwInferResults> run_hw_infer_estimator() override;
+
+    virtual Expected<std::vector<InputVStream>> create_input_vstreams(const std::map<std::string, hailo_vstream_params_t> &inputs_params);
+    virtual Expected<std::vector<OutputVStream>> create_output_vstreams(const std::map<std::string, hailo_vstream_params_t> &outputs_params);
+    virtual Expected<size_t> get_min_buffer_pool_size() override;
+
+    virtual hailo_status before_fork() override;
+    virtual hailo_status after_fork_in_parent() override;
+    virtual hailo_status after_fork_in_child() override;
+
+    virtual Expected<uint32_t> get_client_handle() const override
+    {
+        auto val = m_identifier.m_network_group_handle;
+        return val;
+    };
+
+    virtual Expected<uint32_t> get_vdevice_client_handle() const override
+    {
+        auto val = m_identifier.m_vdevice_identifier.m_vdevice_handle;
+        return val;
+    };
+
+    static Expected<std::shared_ptr<ConfiguredNetworkGroupClient>> duplicate_network_group_client(uint32_t handle, uint32_t vdevice_handle,
+        const std::string &network_group_name);
+
+    virtual hailo_status infer_async(const NamedBuffersCallbacks &named_buffers_callbacks,
+        const std::function<void(hailo_status)> &infer_request_done_cb) override;
+    hailo_status execute_callback(const ProtoCallbackIdentifier &cb_id);
+
+    void execute_callbacks_on_error(hailo_status error_status);
+
+    virtual Expected<std::unique_ptr<LayerInfo>> get_layer_info(const std::string &stream_name) override;
+    virtual Expected<std::vector<net_flow::PostProcessOpMetadataPtr>> get_ops_metadata() override;
+
+    virtual hailo_status set_nms_score_threshold(const std::string &edge_name, float32_t nms_score_threshold) override;
+    virtual hailo_status set_nms_iou_threshold(const std::string &edge_name, float32_t iou_threshold) override;
+    virtual hailo_status set_nms_max_bboxes_per_class(const std::string &edge_name, uint32_t max_bboxes_per_class) override;
+    virtual hailo_status set_nms_max_accumulated_mask_size(const std::string &edge_name, uint32_t max_accumulated_mask_size) override;
+
+    virtual hailo_status init_cache(uint32_t read_offset, int32_t write_offset_delta) override;
+    virtual Expected<hailo_cache_info_t> get_cache_info() const override;
+    virtual hailo_status update_cache_offset(int32_t offset_delta_bytes) override;
+    virtual Expected<std::vector<uint32_t>> get_cache_ids() const override;
+    virtual Expected<Buffer> read_cache_buffer(uint32_t cache_id) override;
+    virtual hailo_status write_cache_buffer(uint32_t cache_id, MemoryView buffer) override;
+
+private:
+    ConfiguredNetworkGroupClient(NetworkGroupIdentifier &&identifier, const std::string &network_group_name);
+    hailo_status create_client();
+    hailo_status dup_handle();
+    callback_idx_t get_unique_callback_idx();
+    hailo_status execute_infer_request_callback(const ProtoCallbackIdentifier &cb_id);
+    hailo_status execute_transfer_callback(const ProtoCallbackIdentifier &cb_id);
+    Expected<std::vector<StreamCbParamsPtr>> create_streams_callbacks_params(const NamedBuffersCallbacks &named_buffers_callbacks);
+    hailo_status copy_data_from_shm_buffer(StreamCbParamsPtr stream_callback, const ProtoCallbackIdentifier &cb_id);
+
+    std::unique_ptr<HailoRtRpcClient> m_client;
+    NetworkGroupIdentifier m_identifier;
+    std::string m_network_group_name;
+    std::atomic<callback_idx_t> m_current_cb_index;
+    std::unordered_set<std::string> m_input_streams_names;
+    std::unordered_set<std::string> m_output_streams_names;
+    std::shared_ptr<BufferPoolPerStream> m_buffer_pool_per_stream;
+    std::mutex m_mutex;
+    std::unordered_map<callback_idx_t, StreamCbParamsPtr> m_idx_to_callbacks;
+    std::unordered_map<callback_idx_t, std::function<void(hailo_status)>> m_infer_request_idx_to_callbacks;
+};
+
+} /* namespace hailort */
+
+#endif /* _HAILO_NETWORK_GROUP_CLIENT_HPP_ */
diff --git a/hailort/libhailort/src/stream_common/CMakeLists.txt b/hailort/libhailort/src/stream_common/CMakeLists.txt
index 06d4e60f..811663ba 100644
--- a/hailort/libhailort/src/stream_common/CMakeLists.txt
+++ b/hailort/libhailort/src/stream_common/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 set(SRC_FILES
     ${CMAKE_CURRENT_SOURCE_DIR}/stream.cpp
@@ -6,7 +6,6 @@ set(SRC_FILES
     ${CMAKE_CURRENT_SOURCE_DIR}/async_stream_base.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/nms_stream.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/remote_process_stream.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/transfer_common.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/queued_stream_buffer_pool.cpp
 )
 
diff --git a/hailort/libhailort/src/stream_common/async_stream_base.hpp b/hailort/libhailort/src/stream_common/async_stream_base.hpp
index 739bfef6..10eb42cf 100644
--- a/hailort/libhailort/src/stream_common/async_stream_base.hpp
+++ b/hailort/libhailort/src/stream_common/async_stream_base.hpp
@@ -16,7 +16,7 @@
 #include "stream_common/stream_buffer_pool.hpp"
 #include "queued_stream_buffer_pool.hpp"
 
-#include "utils/thread_safe_queue.hpp"
+#include "common/thread_safe_queue.hpp"
 
 namespace hailort
 {
diff --git a/hailort/libhailort/src/stream_common/nms_stream.cpp b/hailort/libhailort/src/stream_common/nms_stream.cpp
index 79aa29c4..b9b3a4f0 100644
--- a/hailort/libhailort/src/stream_common/nms_stream.cpp
+++ b/hailort/libhailort/src/stream_common/nms_stream.cpp
@@ -77,7 +77,7 @@ hailo_status NMSStreamReader::advance_state_machine(NMSBurstState *burst_state,
             if (HAILO_BURST_TYPE_H8_PER_CLASS == burst_type) {
                 CHECK_IN_DEBUG((NMSBurstState::NMS_BURST_STATE_WAITING_FOR_DELIMETER == (*burst_state)) ||
                     (NMSBurstState::NMS_BURST_STATE_WAITING_FOR_PADDING == (*burst_state)), HAILO_NMS_BURST_INVALID_DATA,
-                    "Invalid state, H8 NMS burst cannot receive delimeter while in state {}", (*burst_state));
+                    "Invalid state, H8 NMS burst cannot receive delimeter while in state {}", static_cast<int>(*burst_state));
                 // To differentiate from H8 padding - where we should not increment amount of delimeters found
                 if ((*burst_state) == NMSBurstState::NMS_BURST_STATE_WAITING_FOR_DELIMETER) {
                     (*num_delimeters_received)++;
@@ -99,12 +99,12 @@ hailo_status NMSStreamReader::advance_state_machine(NMSBurstState *burst_state,
 
             } else if (HAILO_BURST_TYPE_H15_PER_CLASS == burst_type) {
                 CHECK_IN_DEBUG(NMSBurstState::NMS_BURST_STATE_WAITING_FOR_DELIMETER == (*burst_state), HAILO_NMS_BURST_INVALID_DATA,
-                    "Invalid state, H15 Per class NMS burst cannot receive delimeter while in state {}", (*burst_state));
+                    "Invalid state, H15 Per class NMS burst cannot receive delimeter while in state {}", static_cast<int>(*burst_state));
                 (*num_delimeters_received)++;
                 *burst_state = NMSBurstState::NMS_BURST_STATE_WAITING_FOR_IMAGE_DELIMETER;
             } else {
                 CHECK_IN_DEBUG(NMSBurstState::NMS_BURST_STATE_WAITING_FOR_DELIMETER == (*burst_state), HAILO_NMS_BURST_INVALID_DATA,
-                    "Invalid state, H15 Per Frame NMS burst cannot receive delimeter while in state {}", (*burst_state));
+                    "Invalid state, H15 Per Frame NMS burst cannot receive delimeter while in state {}", static_cast<int>(*burst_state));
                 // in hailo15 per frame - if number of delimeter is same as num classes - we expect image delimeter next 
                 // otherwise expect another delimeter
                 (*num_delimeters_received)++;
@@ -121,7 +121,7 @@ hailo_status NMSStreamReader::advance_state_machine(NMSBurstState *burst_state,
                 "Invalid state, H8 NMS burst cannot receive image delimeter");
 
             CHECK_IN_DEBUG(NMSBurstState::NMS_BURST_STATE_WAITING_FOR_IMAGE_DELIMETER == (*burst_state), HAILO_NMS_BURST_INVALID_DATA,
-                "Invalid state, H15 NMS burst cannot receive image delimeter in state {}", (*burst_state));
+                "Invalid state, H15 NMS burst cannot receive image delimeter in state {}", static_cast<int>(*burst_state));
 
             // in both hailo15 per class and per frame - when receiving image delimeter we move to expecting padding
             *burst_state = NMSBurstState::NMS_BURST_STATE_WAITING_FOR_PADDING;
@@ -143,7 +143,7 @@ hailo_status NMSStreamReader::advance_state_machine(NMSBurstState *burst_state,
         {
             if ((HAILO_BURST_TYPE_H15_PER_CLASS == burst_type) || (HAILO_BURST_TYPE_H15_PER_FRAME == burst_type)) {
                 CHECK_IN_DEBUG(NMSBurstState::NMS_BURST_STATE_WAITING_FOR_PADDING == (*burst_state), HAILO_NMS_BURST_INVALID_DATA,
-                    "Invalid state, H15 NMS burst cannot receive padding in state {}", (*burst_state));
+                    "Invalid state, H15 NMS burst cannot receive padding in state {}", static_cast<int>(*burst_state));
             }
             // In case of padding next state is wait for padding unless it is last padding of burst - then next state will be
             // Wait for delimeter - will only get to this stage in debug - in release once image delimeter is read we ignore rest of
@@ -395,6 +395,11 @@ hailo_status NmsOutputStream::cancel_pending_transfers()
     return m_base_stream->cancel_pending_transfers();
 }
 
+// Binding buffer not supported on nms stream, returning success so it won't fail the scheduler
+hailo_status NmsOutputStream::bind_buffer(TransferRequest &&) {
+    return HAILO_SUCCESS;
+}
+
 NmsReaderThread::NmsReaderThread(std::shared_ptr<OutputStreamBase> base_stream, size_t max_queue_size,
     hailo_stream_interface_t stream_interface) :
     m_base_stream(base_stream),
diff --git a/hailort/libhailort/src/stream_common/nms_stream.hpp b/hailort/libhailort/src/stream_common/nms_stream.hpp
index 9b11ef37..3adaca00 100644
--- a/hailort/libhailort/src/stream_common/nms_stream.hpp
+++ b/hailort/libhailort/src/stream_common/nms_stream.hpp
@@ -98,6 +98,7 @@ class NmsOutputStream : public AsyncOutputStreamBase {
     void set_vdevice_core_op_handle(vdevice_core_op_handle_t core_op_handle) override;
 
     virtual hailo_status cancel_pending_transfers() override;
+    virtual hailo_status bind_buffer(TransferRequest &&transfer_request) override;
 
 protected:
     virtual Expected<std::unique_ptr<StreamBufferPool>> allocate_buffer_pool() override;
diff --git a/hailort/libhailort/src/stream_common/stream_buffer_pool.hpp b/hailort/libhailort/src/stream_common/stream_buffer_pool.hpp
index 71c830e1..85a772c7 100644
--- a/hailort/libhailort/src/stream_common/stream_buffer_pool.hpp
+++ b/hailort/libhailort/src/stream_common/stream_buffer_pool.hpp
@@ -11,7 +11,7 @@
 #define _HAILO_STREAM_BUFFER_POOL_HPP_
 
 #include "hailo/expected.hpp"
-#include "stream_common/transfer_common.hpp"
+#include "vdma/channel/transfer_common.hpp"
 
 namespace hailort
 {
diff --git a/hailort/libhailort/src/stream_common/stream_internal.cpp b/hailort/libhailort/src/stream_common/stream_internal.cpp
index 3bb9768f..5ec7b5cb 100644
--- a/hailort/libhailort/src/stream_common/stream_internal.cpp
+++ b/hailort/libhailort/src/stream_common/stream_internal.cpp
@@ -74,6 +74,12 @@ hailo_status InputStreamBase::write_async(TransferRequest &&)
     return HAILO_NOT_IMPLEMENTED;
 }
 
+hailo_status InputStreamBase::bind_buffer(TransferRequest &&)
+{
+    LOGGER__ERROR("bind_buffer not implemented for sync API");
+    return HAILO_NOT_IMPLEMENTED;
+}
+
 hailo_status InputStreamBase::abort()
 {
     LOGGER__ERROR("InputStream::abort is deprecated. One should use ConfiguredNetworkGroup::shutdown()");
@@ -159,6 +165,12 @@ hailo_status OutputStreamBase::read_async(int dmabuf_fd, size_t size, const Tran
     return read_async(TransferRequest(hailo_dma_buffer_t{dmabuf_fd, size}, wrapped_callback));
 }
 
+hailo_status OutputStreamBase::bind_buffer(TransferRequest &&)
+{
+    LOGGER__ERROR("bind_buffer not implemented for sync API");
+    return HAILO_NOT_IMPLEMENTED;
+}
+
 hailo_status OutputStreamBase::read_unaligned_address_async(const MemoryView &, const TransferDoneCallback &)
 {
     LOGGER__ERROR("read_unaligned_address_async not implemented OutputStreamBase");
diff --git a/hailort/libhailort/src/stream_common/stream_internal.hpp b/hailort/libhailort/src/stream_common/stream_internal.hpp
index 4228f2f9..0e8d2f7c 100644
--- a/hailort/libhailort/src/stream_common/stream_internal.hpp
+++ b/hailort/libhailort/src/stream_common/stream_internal.hpp
@@ -37,7 +37,7 @@
 #include "hailo/event.hpp"
 #include "hailo/hailort_common.hpp"
 
-#include "stream_common/transfer_common.hpp"
+#include "vdma/channel/transfer_common.hpp"
 #include "device_common/control_protocol.hpp"
 #include "hef/layer_info.hpp"
 
@@ -99,6 +99,8 @@ class InputStreamBase : public InputStream
     virtual hailo_status write_async(const void *buffer, size_t size, const TransferDoneCallback &user_callback) override final;
     virtual hailo_status write_async(int dmabuf_fd, size_t size, const TransferDoneCallback &user_callback) override final;
 
+    virtual hailo_status bind_buffer(TransferRequest &&transfer_request);
+
     virtual hailo_status write_async(TransferRequest &&transfer_request);
 
     virtual hailo_status abort() override final;
@@ -163,6 +165,8 @@ class OutputStreamBase : public OutputStream
     virtual hailo_status read_async(void *buffer, size_t size, const TransferDoneCallback &user_callback) override final;
     virtual hailo_status read_async(int dmabuf_fd, size_t size, const TransferDoneCallback &user_callback) override final;
 
+    virtual hailo_status bind_buffer(TransferRequest &&transfer_request);
+
     virtual hailo_status read_async(TransferRequest &&transfer_request);
     virtual hailo_status read_unaligned_address_async(const MemoryView &buffer, const TransferDoneCallback &user_callback);
 
diff --git a/hailort/libhailort/src/transform/CMakeLists.txt b/hailort/libhailort/src/transform/CMakeLists.txt
index 7c0f9c73..8db98c07 100644
--- a/hailort/libhailort/src/transform/CMakeLists.txt
+++ b/hailort/libhailort/src/transform/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 set(SRC_FILES
     ${CMAKE_CURRENT_SOURCE_DIR}/transform.cpp
diff --git a/hailort/libhailort/src/transform/eigen.hpp b/hailort/libhailort/src/transform/eigen.hpp
new file mode 100644
index 00000000..1658115e
--- /dev/null
+++ b/hailort/libhailort/src/transform/eigen.hpp
@@ -0,0 +1,25 @@
+/**
+ * Copyright (c) 2020-2024 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file eigen.hpp
+ * @brief Includes the Eigen library with the required compiler instructions
+ **/
+
+#if defined(_MSC_VER)
+#pragma warning(push)
+#pragma warning(disable: 4127)
+#else // Not MSC
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wconversion"
+#if defined(__GNUC__) && (__GNUC__ >= 11)
+    #pragma GCC diagnostic ignored "-Wclass-memaccess"
+#endif // GCC version
+#endif // Not MSC
+#include <Eigen/Dense>
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#else
+#pragma GCC diagnostic pop
+#endif
\ No newline at end of file
diff --git a/hailort/libhailort/src/transform/transform.cpp b/hailort/libhailort/src/transform/transform.cpp
index 87fd1f97..7bdfe989 100644
--- a/hailort/libhailort/src/transform/transform.cpp
+++ b/hailort/libhailort/src/transform/transform.cpp
@@ -81,8 +81,8 @@ bool TransformContextUtils::should_reorder(const hailo_3d_image_shape_t &src_ima
            (src_image_shape.features        == 1)                        &&
            (src_image_shape.height          == dst_image_shape.height)   &&
            (src_image_shape.width           == dst_image_shape.width)    &&
-           (((src_format.order == HAILO_FORMAT_ORDER_NHCW) && (dst_format.order == HAILO_FORMAT_ORDER_NHWC)) ||
-           ((src_format.order == HAILO_FORMAT_ORDER_NHWC) && (dst_format.order == HAILO_FORMAT_ORDER_NHCW)))) {
+           (((src_format.order == HAILO_FORMAT_ORDER_NHCW) && ((dst_format.order == HAILO_FORMAT_ORDER_NHWC) || (dst_format.order == HAILO_FORMAT_ORDER_FCR))) ||
+           (((src_format.order == HAILO_FORMAT_ORDER_NHWC) || src_format.order == HAILO_FORMAT_ORDER_FCR) && (dst_format.order == HAILO_FORMAT_ORDER_NHCW)))) {
         return false;
     }
 
@@ -250,7 +250,7 @@ hailo_status transform__transpose_buffer(const void *src_ptr, const hailo_3d_ima
     case HAILO_FORMAT_ORDER_F8CR:
         return transform__transpose_NHWC(src_ptr, shape, HailoRTCommon::get_format_data_bytes(format), dst_ptr);
     default:
-        LOGGER__ERROR("Transpose is not supported for order {}", format.order);
+        LOGGER__ERROR("Transpose is not supported for order {}", static_cast<int>(format.order));
         return HAILO_INVALID_OPERATION;
     }
 }
@@ -769,55 +769,6 @@ hailo_status transform__h2d_NCHW_to_NHCW(
     return HAILO_SUCCESS;
 }
 
-template<typename T>
-hailo_status transform__d2h_argmax_NHCW_to_NHW(const T *src_ptr, const hailo_3d_image_shape_t &src_image_shape,
-    T *dst_ptr, const hailo_3d_image_shape_t &dst_image_shape)
-{
-    assert(nullptr != src_ptr);
-    assert(nullptr != dst_ptr);
-
-    CHECK(src_image_shape.height == dst_image_shape.height, HAILO_INVALID_OPERATION,
-        "NHCW_to_NHW argmax Transform is supported only when src height ({}) is equal to dst height ({})",
-        src_image_shape.height, dst_image_shape.height);
-    CHECK(src_image_shape.width >= dst_image_shape.width, HAILO_INVALID_OPERATION,
-        "NHCW_to_NHW argmax Transform is supported only when src width ({}) is equal/larger than dst width ({})",
-        src_image_shape.width, dst_image_shape.width);
-    CHECK(dst_image_shape.features == 1, HAILO_INVALID_OPERATION,
-        "NHCW_to_NHW argmax Transform is supported only when dst features ({}) is 1",
-        dst_image_shape.features);
-    CHECK(src_image_shape.features <= std::numeric_limits<T>::max(), HAILO_INVALID_OPERATION,
-        "NHCW_to_NHW argmax Transform is supported only when src features ({}) is equal/smaller than {}",
-        src_image_shape.features, std::numeric_limits<T>::max());
-
-    const auto src_row_size = src_image_shape.width * src_image_shape.features;
-    const auto dst_row_size = dst_image_shape.width;
-    for (uint32_t r = 0; r < src_image_shape.height; r++) {
-        // For each row, we iterate on all columns, and find the max feature. It can be implemented better by iteratre
-        // over all features, and on each iteration save the max value for each column.
-        const T *src_row = src_ptr + (r * src_row_size);
-        T *dst_row = dst_ptr + (r * dst_row_size);
-        for (uint32_t w = 0; w < dst_image_shape.width; w++) {
-            const T *offset_in_row = src_row + w;
-            T max_index = 0;
-            T max_value = *offset_in_row;
-
-            for (uint32_t c = 1; c < src_image_shape.features; c++) {
-                offset_in_row += src_image_shape.width;
-                const auto &current_value = *offset_in_row;
-                if (current_value > max_value) {
-                    max_index = static_cast<T>(c);
-                    max_value = current_value;
-                }
-            }
-
-            dst_row[w] = max_index;
-        }
-    }
-
-    return HAILO_SUCCESS;
-}
-
-
 template<typename T>
 hailo_status transform__h2d_YUY2_to_YUY2(const T *src_ptr, T *dst_ptr, uint32_t shape_size)
 {
@@ -944,7 +895,6 @@ hailo_status FrameOutputTransformContext::quantize_stream(const void *dst_ptr)
             }
             break;
         case HAILO_FORMAT_TYPE_FLOAT32:
-            /* if output layer is argmax - do not rescale */
             if (HAILO_FORMAT_ORDER_NHW != m_dst_format.order) {
                 if (HAILO_FORMAT_TYPE_UINT8 == m_src_format.type) {
                     if (m_are_all_qps_the_same) {
@@ -1174,7 +1124,7 @@ hailo_status reorder_input_stream(const void *src_ptr, hailo_3d_image_shape_t sr
                 transform__h2d_NV12_to_NV12<uint16_t>((uint16_t*)src_ptr, &src_image_shape, (uint16_t*)dst_ptr, &dst_image_shape);
                 break;
             default:
-                LOGGER__ERROR("Invalid src-buffer's type format {}", src_format.type);
+                LOGGER__ERROR("Invalid src-buffer's type format {}", static_cast<int>(src_format.type));
                 return HAILO_INVALID_ARGUMENT;
         }
         return HAILO_SUCCESS;
@@ -1190,7 +1140,7 @@ hailo_status reorder_input_stream(const void *src_ptr, hailo_3d_image_shape_t sr
                 transform__h2d_I420_to_YYYYUV<uint16_t>((uint16_t*)src_ptr, &src_image_shape, (uint16_t*)dst_ptr, &dst_image_shape);
                 break;
             default:
-                LOGGER__ERROR("Invalid src-buffer's type format {}", src_format.type);
+                LOGGER__ERROR("Invalid src-buffer's type format {}", static_cast<int>(src_format.type));
                 return HAILO_INVALID_ARGUMENT;
         }
         return HAILO_SUCCESS;
@@ -1352,18 +1302,6 @@ hailo_status reorder_output_stream(const void *src_ptr, hailo_3d_image_shape_t s
                     LOGGER__ERROR("Invalid src-buffer's type format");
                     return HAILO_INVALID_ARGUMENT;
             }
-    } else if ((HAILO_FORMAT_ORDER_NHCW == src_format.order) &&
-               (HAILO_FORMAT_ORDER_NHW == dst_format.order)  &&
-               (0 != (HAILO_FORMAT_FLAGS_HOST_ARGMAX & src_format.flags)))  {
-            switch (src_format.type) {
-            case HAILO_FORMAT_TYPE_UINT8:
-                return transform__d2h_argmax_NHCW_to_NHW<uint8_t>((uint8_t*)src_ptr, src_image_shape, (uint8_t*)dst_ptr, dst_image_shape);
-            case HAILO_FORMAT_TYPE_UINT16:
-                return transform__d2h_argmax_NHCW_to_NHW<uint16_t>((uint16_t*)src_ptr, src_image_shape, (uint16_t*)dst_ptr, dst_image_shape);
-            default:
-                LOGGER__ERROR("Invalid src-buffer's type format");
-                return HAILO_INVALID_ARGUMENT;
-            }
     } else if ((HAILO_FORMAT_ORDER_NHWC == src_format.order) &&
                (HAILO_FORMAT_ORDER_NHWC) == dst_format.order) {
             switch (src_format.type) {
@@ -1374,7 +1312,7 @@ hailo_status reorder_output_stream(const void *src_ptr, hailo_3d_image_shape_t s
                     transform__d2h_NHWC_to_NHWC<uint16_t>((uint16_t*)src_ptr, &src_image_shape, (uint16_t*)dst_ptr, &dst_image_shape);
                     break;
                 default:
-                    LOGGER__ERROR("Invalid src-buffer's type format {}", src_format.type);
+                    LOGGER__ERROR("Invalid src-buffer's type format {}", static_cast<int>(src_format.type));
                     return HAILO_INVALID_ARGUMENT;
             }
     } else {
diff --git a/hailort/libhailort/src/utils/CMakeLists.txt b/hailort/libhailort/src/utils/CMakeLists.txt
index 57d45d50..a2998006 100644
--- a/hailort/libhailort/src/utils/CMakeLists.txt
+++ b/hailort/libhailort/src/utils/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 set(SRC_FILES
     ${CMAKE_CURRENT_SOURCE_DIR}/hailort_common.cpp
@@ -11,8 +11,6 @@ set(SRC_FILES
     ${CMAKE_CURRENT_SOURCE_DIR}/measurement_utils.cpp
 )
 
-if(HAILO_BUILD_PROFILER)
-    add_subdirectory(profiler)
-endif()
+add_subdirectory(profiler)
 
 set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${SRC_FILES} PARENT_SCOPE)
diff --git a/hailort/libhailort/src/utils/buffer_storage.cpp b/hailort/libhailort/src/utils/buffer_storage.cpp
index 009357bf..441f7899 100644
--- a/hailort/libhailort/src/utils/buffer_storage.cpp
+++ b/hailort/libhailort/src/utils/buffer_storage.cpp
@@ -34,6 +34,24 @@ BufferStorageParams BufferStorageParams::create_dma()
     return result;
 }
 
+BufferStorageParams BufferStorageParams::create_shared_memory(const std::string &shm_name, bool memory_owner)
+{
+    BufferStorageParams result{};
+    result.flags = HAILO_BUFFER_FLAGS_SHARED_MEMORY;
+    result.shared_memory_name = shm_name;
+    result.memory_owner = memory_owner;
+    return result;
+}
+
+BufferStorageParams BufferStorageParams::open_shared_memory(const std::string &shm_name)
+{
+    BufferStorageParams result{};
+    result.flags = HAILO_BUFFER_FLAGS_SHARED_MEMORY;
+    result.shared_memory_name = shm_name;
+    result.memory_owner = false;
+    return result;
+}
+
 BufferStorageParams::BufferStorageParams() :
     flags(HAILO_BUFFER_FLAGS_NONE)
 {}
@@ -52,10 +70,14 @@ Expected<BufferStoragePtr> BufferStorage::create(size_t size, const BufferStorag
         auto result = ContinuousStorage::create(size);
         CHECK_EXPECTED(result);
         return std::static_pointer_cast<BufferStorage>(result.release());
+    } else if (0 != (params.flags & HAILO_BUFFER_FLAGS_SHARED_MEMORY)) {
+        auto result = SharedMemoryStorage::create(size, params.shared_memory_name, params.memory_owner);
+        CHECK_EXPECTED(result);
+        return std::static_pointer_cast<BufferStorage>(result.release());
     }
 
     // TODO: HRT-10903
-    LOGGER__ERROR("Buffer storage flags not currently supported {}", params.flags);
+    LOGGER__ERROR("Buffer storage flags not currently supported {}", static_cast<int>(params.flags));
     return make_unexpected(HAILO_NOT_IMPLEMENTED);
 }
 
@@ -69,6 +91,11 @@ Expected<uint64_t> BufferStorage::dma_address()
     return make_unexpected(HAILO_NOT_IMPLEMENTED);
 }
 
+Expected<std::string> BufferStorage::shm_name()
+{
+    return make_unexpected(HAILO_INVALID_OPERATION);
+}
+
 Expected<HeapStoragePtr> HeapStorage::create(size_t size)
 {
     std::unique_ptr<uint8_t[]> data(new (std::nothrow) uint8_t[size]);
@@ -184,4 +211,48 @@ Expected<void *> ContinuousStorage::release() noexcept
     return make_unexpected(HAILO_INVALID_OPERATION);
 }
 
+Expected<SharedMemoryStoragePtr> SharedMemoryStorage::create(size_t size, const std::string &shm_name, bool memory_owner)
+{
+    SharedMemoryBufferPtr shm_buffer;
+    if (memory_owner) {
+        TRY(shm_buffer, SharedMemoryBuffer::create(size, shm_name));
+    } else {
+        TRY(shm_buffer, SharedMemoryBuffer::open(size, shm_name));
+    }
+
+    auto result = make_shared_nothrow<SharedMemoryStorage>(shm_buffer);
+    CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
+
+    return result;
+}
+
+SharedMemoryStorage::SharedMemoryStorage(SharedMemoryBufferPtr shm_buffer) :
+    m_shm_buffer(shm_buffer)
+{}
+
+SharedMemoryStorage::SharedMemoryStorage(SharedMemoryStorage&& other) noexcept :
+    BufferStorage(std::move(other)),
+    m_shm_buffer(other.m_shm_buffer)
+{}
+
+size_t SharedMemoryStorage::size() const
+{
+    return m_shm_buffer->size();
+}
+
+void *SharedMemoryStorage::user_address()
+{
+    return m_shm_buffer->user_address();
+}
+
+Expected<void *> SharedMemoryStorage::release() noexcept
+{
+    return make_unexpected(HAILO_INVALID_OPERATION);
+}
+
+Expected<std::string> SharedMemoryStorage::shm_name()
+{
+    return m_shm_buffer->shm_name();
+}
+
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/utils/buffer_storage.hpp b/hailort/libhailort/src/utils/buffer_storage.hpp
index c074153c..43d7f294 100644
--- a/hailort/libhailort/src/utils/buffer_storage.hpp
+++ b/hailort/libhailort/src/utils/buffer_storage.hpp
@@ -14,6 +14,8 @@
 #include "hailo/expected.hpp"
 #include "hailo/buffer.hpp"
 
+#include "common/shared_memory_buffer.hpp"
+
 #include "utils/exported_resource_manager.hpp"
 #include "vdma/memory/continuous_buffer.hpp"
 
@@ -37,6 +39,7 @@ class BufferStorage;
 class HeapStorage;
 class DmaStorage;
 class ContinuousStorage;
+class SharedMemoryStorage;
 class HailoRTDriver;
 class Buffer;
 
@@ -88,6 +91,7 @@ class BufferStorage
     // Internal functions
     virtual Expected<vdma::DmaAbleBufferPtr> get_dma_able_buffer();
     virtual Expected<uint64_t> dma_address();
+    virtual Expected<std::string> shm_name();
 
     BufferStorage() = default;
 };
@@ -173,6 +177,31 @@ class ContinuousStorage : public BufferStorage
     vdma::ContinuousBuffer m_continuous_buffer;
 };
 
+using SharedMemoryStoragePtr = std::shared_ptr<SharedMemoryStorage>;
+
+/**
+ * Shared memory buffer
+ */
+class SharedMemoryStorage : public BufferStorage
+{
+public:
+    static Expected<SharedMemoryStoragePtr> create(size_t size, const std::string &shm_name, bool memory_owner);
+    SharedMemoryStorage(SharedMemoryBufferPtr shm_buffer);
+    SharedMemoryStorage(SharedMemoryStorage&& other) noexcept;
+    SharedMemoryStorage(const SharedMemoryStorage &) = delete;
+    SharedMemoryStorage &operator=(SharedMemoryStorage &&) = delete;
+    SharedMemoryStorage &operator=(const SharedMemoryStorage &) = delete;
+    virtual ~SharedMemoryStorage() = default;
+
+    virtual size_t size() const override;
+    virtual void *user_address() override;
+    virtual Expected<void *> release() noexcept override;
+    virtual Expected<std::string> shm_name() override;
+
+private:
+    SharedMemoryBufferPtr m_shm_buffer;
+};
+
 } /* namespace hailort */
 
 #endif /* _HAILO_BUFFER_STORAGE_HPP_ */
diff --git a/hailort/libhailort/src/utils/hailort_common.cpp b/hailort/libhailort/src/utils/hailort_common.cpp
index be19c895..8ca3401a 100644
--- a/hailort/libhailort/src/utils/hailort_common.cpp
+++ b/hailort/libhailort/src/utils/hailort_common.cpp
@@ -108,30 +108,4 @@ Expected<hailo_pix_buffer_t> HailoRTCommon::as_hailo_pix_buffer(MemoryView memor
     }
 }
 
-bool HailoRTCommon::is_power_measurement_supported(const hailo_device_architecture_t &hw_arch)
-{
-    switch(hw_arch) {
-    case HAILO_ARCH_HAILO8:
-        return true;
-    default:
-        return false;
-    }
-}
-
-bool HailoRTCommon::is_current_measurement_supported(const hailo_device_architecture_t &hw_arch)
-{
-    return is_power_measurement_supported(hw_arch);
-}
-
-bool HailoRTCommon::is_temp_measurement_supported(const hailo_device_architecture_t &hw_arch)
-{
-    switch(hw_arch) {
-    case HAILO_ARCH_HAILO8:
-    case HAILO_ARCH_HAILO8L:
-        return true;
-    default:
-        return false;
-    }
-}
-
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/utils/hailort_logger.cpp b/hailort/libhailort/src/utils/hailort_logger.cpp
index 39ef76f1..b9302f54 100644
--- a/hailort/libhailort/src/utils/hailort_logger.cpp
+++ b/hailort/libhailort/src/utils/hailort_logger.cpp
@@ -9,6 +9,8 @@
 
 #include "common/utils.hpp"
 #include "common/filesystem.hpp"
+#include "common/internal_env_vars.hpp"
+#include "common/env_vars.hpp"
 
 #include "utils/hailort_logger.hpp"
 
@@ -30,6 +32,7 @@
 namespace hailort
 {
 
+
 #define MAX_LOG_FILE_SIZE (1024 * 1024) // 1MB
 
 #define HAILORT_NAME ("HailoRT")
@@ -44,8 +47,6 @@ namespace hailort
 #define HAILORT_LOCAL_FILE_LOGGER_PATTERN ("[%Y-%m-%d %X.%e] [%t] [%n] [%l] [%s:%#] [%!] %v") // File logger will print: [timestamp] [TID] [hailort] [log level] [source file:line number] [function name] msg
 #define HAILORT_ANDROID_LOGGER_PATTERN ("%v")               // Android logger will print only message (additional info are built-in)
 
-#define HAILORT_LOGGER_PATH_ENV_VAR ("HAILORT_LOGGER_PATH")
-#define HAILORT_LOGGER_FLUSH_EVERY_PRINT_ENV_VAR ("HAILORT_LOGGER_FLUSH_EVERY_PRINT")
 #define PERIODIC_FLUSH_INTERVAL_IN_SECONDS (5)
 
 
@@ -65,8 +66,9 @@ std::string HailoRTLogger::parse_log_path(const char *log_path)
 
 std::string HailoRTLogger::get_log_path(const std::string &path_env_var)
 {
-    auto log_path_c_str = std::getenv(path_env_var.c_str());
-    return parse_log_path(log_path_c_str);
+    auto log_path_c_str_exp = get_env_variable(path_env_var.c_str());
+    std::string log_path_c_str = (log_path_c_str_exp) ? log_path_c_str_exp.value() : "";
+    return parse_log_path(log_path_c_str.c_str());
 }
 
 std::string HailoRTLogger::get_main_log_path()
@@ -202,22 +204,6 @@ HailoRTLogger::HailoRTLogger(spdlog::level::level_enum console_level, spdlog::le
     spdlog::set_default_logger(m_hailort_logger);
 }
 
-bool HailoRTLogger::should_flush_every_print(const std::string &flush_every_print_env_var)
-{
-    auto flush_every_print_c_str = std::getenv(flush_every_print_env_var.c_str());
-    if ((nullptr == flush_every_print_c_str) || (std::strlen(flush_every_print_c_str) == 0)) {
-        return false;
-    }
-    std::string flush_every_print_c_str_lower_case(flush_every_print_c_str);
-    for (char& ch : flush_every_print_c_str_lower_case) {
-        ch = static_cast<char>(std::tolower(ch));
-    }
-    if (strcmp(flush_every_print_c_str_lower_case.c_str(), "1") == 0) {
-        return true;
-    }
-    return false;
-}
-
 void HailoRTLogger::set_levels(spdlog::level::level_enum console_level, spdlog::level::level_enum file_level,
     spdlog::level::level_enum flush_level)
 {
@@ -225,8 +211,7 @@ void HailoRTLogger::set_levels(spdlog::level::level_enum console_level, spdlog::
     m_main_log_file_sink->set_level(file_level);
     m_local_log_file_sink->set_level(file_level);
 
-    bool flush_every_print = should_flush_every_print(HAILORT_LOGGER_FLUSH_EVERY_PRINT_ENV_VAR);
-    if (flush_every_print){
+    if (is_env_variable_on(HAILORT_LOGGER_FLUSH_EVERY_PRINT_ENV_VAR)) {
         m_hailort_logger->flush_on(spdlog::level::trace);
         std::cerr << "HailoRT warning: Flushing log file on every print. May reduce HailoRT performance!" << std::endl;
     } else {
diff --git a/hailort/libhailort/src/utils/hailort_logger.hpp b/hailort/libhailort/src/utils/hailort_logger.hpp
index 4120f10c..626dc849 100644
--- a/hailort/libhailort/src/utils/hailort_logger.hpp
+++ b/hailort/libhailort/src/utils/hailort_logger.hpp
@@ -18,10 +18,10 @@
 #include "hailo/hailort.h"
 #include "common/logger_macros.hpp"
 #include "common/utils.hpp"
+#include "common/env_vars.hpp"
 
 namespace hailort
 {
-#define HAILORT_CONSOLE_LOGGER_LEVEL ("HAILORT_CONSOLE_LOGGER_LEVEL")
 
 #ifdef _WIN32
 #define PATH_SEPARATOR "\\"
@@ -40,14 +40,14 @@ class HailoRTLogger {
 #endif
     {
         static std::unique_ptr<HailoRTLogger> instance = nullptr;
-        auto user_console_logger_level = std::getenv(HAILORT_CONSOLE_LOGGER_LEVEL);
-        if ((nullptr != user_console_logger_level) && (std::strlen(user_console_logger_level) > 0)){
-            auto expected_console_level = get_console_logger_level_from_string(user_console_logger_level);
+        auto user_console_logger_level = get_env_variable(HAILORT_CONSOLE_LOGGER_LEVEL_ENV_VAR);
+        if (user_console_logger_level) {
+            auto expected_console_level = get_console_logger_level_from_string(user_console_logger_level.value());
             if (expected_console_level) {
                 console_level = expected_console_level.release();
             } else {
                 LOGGER__WARNING("Failed to parse console logger level from environment variable: {}, status: {}", 
-                    user_console_logger_level, expected_console_level.status());
+                    user_console_logger_level.value(), expected_console_level.status());
             }
         }
         if (nullptr == instance) {
@@ -62,7 +62,6 @@ class HailoRTLogger {
     void operator=(HailoRTLogger const&) = delete;
 
     static std::string get_log_path(const std::string &path_env_var);
-    static bool should_flush_every_print(const std::string &flush_every_print_env_var);
     static std::string get_main_log_path();
     static std::shared_ptr<spdlog::sinks::sink> create_file_sink(const std::string &dir_path, const std::string &filename, bool rotate);
 
diff --git a/hailort/libhailort/src/utils/measurement_utils.cpp b/hailort/libhailort/src/utils/measurement_utils.cpp
index b958eaaa..1fc3b122 100644
--- a/hailort/libhailort/src/utils/measurement_utils.cpp
+++ b/hailort/libhailort/src/utils/measurement_utils.cpp
@@ -9,6 +9,7 @@
 
 #include "hailo/hailort.h"
 #include "measurement_utils.hpp"
+#include <algorithm>
 
 
 namespace hailort {
diff --git a/hailort/libhailort/src/utils/profiler/CMakeLists.txt b/hailort/libhailort/src/utils/profiler/CMakeLists.txt
index 56ab701f..917cfc1a 100644
--- a/hailort/libhailort/src/utils/profiler/CMakeLists.txt
+++ b/hailort/libhailort/src/utils/profiler/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 set(SRC_FILES
     ${CMAKE_CURRENT_SOURCE_DIR}/tracer.cpp
diff --git a/hailort/libhailort/src/utils/profiler/handler.hpp b/hailort/libhailort/src/utils/profiler/handler.hpp
index 4b46a422..fff75bf0 100644
--- a/hailort/libhailort/src/utils/profiler/handler.hpp
+++ b/hailort/libhailort/src/utils/profiler/handler.hpp
@@ -271,6 +271,9 @@ class Handler
     virtual void handle_trace(const DumpProfilerStateTrace&) {};
     virtual void handle_trace(const InitProfilerProtoTrace&) {};
     virtual void handle_trace(const HefLoadedTrace&) {};
+    virtual bool should_dump_trace_file() { return false; }
+    virtual bool should_stop () { return false; }
+    virtual hailo_status dump_trace_file() { return HAILO_SUCCESS; }
 
 };
 
@@ -278,4 +281,4 @@ struct JSON;
 
 }
 
-#endif /* _HAILO_HANDLER_HPP */
\ No newline at end of file
+#endif /* _HAILO_HANDLER_HPP */
diff --git a/hailort/libhailort/src/utils/profiler/monitor_handler.cpp b/hailort/libhailort/src/utils/profiler/monitor_handler.cpp
index 598bb905..1b639327 100644
--- a/hailort/libhailort/src/utils/profiler/monitor_handler.cpp
+++ b/hailort/libhailort/src/utils/profiler/monitor_handler.cpp
@@ -22,8 +22,9 @@ MonitorHandler::~MonitorHandler()
     clear_monitor();
 }
 
-void MonitorHandler::clear_monitor() {
-
+void MonitorHandler::clear_monitor()
+{
+    m_unique_vdevice_hash = {};
     if (m_is_monitor_currently_working) {
         m_is_monitor_currently_working = false;
         m_mon_shutdown_event->signal();
@@ -43,7 +44,7 @@ void MonitorHandler::handle_trace(const MonitorStartTrace &trace)
 void MonitorHandler::handle_trace(const MonitorEndTrace &trace)
 {
     if (m_unique_vdevice_hash == trace.unique_vdevice_hash) {
-        m_unique_vdevice_hash = {};
+        clear_monitor();
     }
 }
 
@@ -425,4 +426,4 @@ void MonitorHandler::clear_accumulators()
     }
 }
 
-}
\ No newline at end of file
+}
diff --git a/hailort/libhailort/src/utils/profiler/monitor_handler.hpp b/hailort/libhailort/src/utils/profiler/monitor_handler.hpp
index 5ae124de..4509a6b0 100644
--- a/hailort/libhailort/src/utils/profiler/monitor_handler.hpp
+++ b/hailort/libhailort/src/utils/profiler/monitor_handler.hpp
@@ -45,8 +45,6 @@ namespace hailort
 {
 
 #define SCHEDULER_MON_TMP_DIR ("/tmp/hmon_files/")
-#define SCHEDULER_MON_ENV_VAR ("HAILO_MONITOR")
-#define SCHEDULER_MON_ENV_VAR_VALUE ("1")
 #define DEFAULT_SCHEDULER_MON_INTERVAL (std::chrono::seconds(1))
 #define SCHEDULER_MON_NAN_VAL (-1)
 
@@ -210,4 +208,4 @@ class MonitorHandler : public Handler
 };
 }
 
-#endif /* _MONITOR_HANDLER_HPP_ */
\ No newline at end of file
+#endif /* _MONITOR_HANDLER_HPP_ */
diff --git a/hailort/libhailort/src/utils/profiler/profiler_utils.hpp b/hailort/libhailort/src/utils/profiler/profiler_utils.hpp
index daf284ed..6d2038f3 100644
--- a/hailort/libhailort/src/utils/profiler/profiler_utils.hpp
+++ b/hailort/libhailort/src/utils/profiler/profiler_utils.hpp
@@ -196,4 +196,4 @@ std::string get_libhailort_version_representation()
 
 }
 
-#endif // _HAILO_PROFILER_UTILS_HPP_
\ No newline at end of file
+#endif // _HAILO_PROFILER_UTILS_HPP_
diff --git a/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.cpp b/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.cpp
index c86f6a93..c0ee8c48 100644
--- a/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.cpp
+++ b/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.cpp
@@ -9,30 +9,17 @@
 
 #include "scheduler_profiler_handler.hpp"
 #include "profiler_utils.hpp"
-
 #include "common/logger_macros.hpp"
-
 #include "utils/hailort_logger.hpp"
 
-#include <spdlog/sinks/rotating_file_sink.h>
-#include <spdlog/sinks/stdout_color_sinks.h>
-#include <spdlog/sinks/android_sink.h>
-#include <spdlog/sinks/null_sink.h>
-
 #include <google/protobuf/io/zero_copy_stream.h>
 #include <google/protobuf/io/zero_copy_stream_impl.h>
 
 #include <fstream>
 #include <iomanip>
+#include <mutex>
 
 
-#define SCHEDULER_PROFILER_NAME ("SchedulerProfiler")
-#define PROFILER_FILE_ENV_VAR ("HAILO_TRACE_PATH")
-#define SCHEDULER_PROFILER_LOGGER_FILENAME ("scheduler_profiler.json")
-#define SCHEDULER_PROFILER_LOGGER_PATTERN ("%v")
-
-#define SCHEDULER_PROFILER_LOGGER_PATH ("SCHEDULER_PROFILER_LOGGER_PATH")
-
 static const std::string PROFILER_DEFAULT_FILE_NAME_PREFIX("hailort");
 static const std::string PROFILER_DEFAULT_FILE_NAME_SUFFIX(".hrtt");
 
@@ -47,36 +34,45 @@ std::string get_current_datetime() {
   return ss.str();
 }
 
-SchedulerProfilerHandler::SchedulerProfilerHandler(int64_t &start_time)
-#ifndef __ANDROID__
-    : m_file_sink(HailoRTLogger::create_file_sink(HailoRTLogger::get_log_path(SCHEDULER_PROFILER_LOGGER_PATH), SCHEDULER_PROFILER_LOGGER_FILENAME, false)),
-      m_first_write(true)
-#endif
+SchedulerProfilerHandler::SchedulerProfilerHandler(size_t dump_after_n_seconds, size_t dump_after_n_kb) :
+      m_time_in_seconds_bounded_dump(dump_after_n_seconds),
+      m_size_in_kb_bounded_dump(dump_after_n_kb)
 {
-#ifndef __ANDROID__
-    spdlog::sinks_init_list sink_list = { m_file_sink };
-    m_profiler_logger = make_shared_nothrow<spdlog::logger>(SCHEDULER_PROFILER_NAME, sink_list.begin(), sink_list.end());
-    m_file_sink->set_level(spdlog::level::level_enum::info);
-    m_file_sink->set_pattern(SCHEDULER_PROFILER_LOGGER_PATTERN);
-    std::stringstream ss;
-    ss << "{\"ns_since_epoch_zero_time\": \"" << start_time << "\",\n\"scheduler_actions\": [\n";
-    m_profiler_logger->info(ss.str());
-#else
-    (void)start_time;
-#endif
+    if (m_time_in_seconds_bounded_dump) {
+        m_timer_thread = std::thread([this](){
+            std::unique_lock<std::mutex> lock(m_cv_mutex);
+            m_cv.wait_for(lock,
+                std::chrono::seconds(m_time_in_seconds_bounded_dump),
+                [this] { return m_shutting_down; });
+            dump_trace_file();
+        });
+    }
 }
 
 SchedulerProfilerHandler::~SchedulerProfilerHandler()
 {
-    m_profiler_logger->info("]\n}");
+    {
+        std::lock_guard<std::mutex> lock(m_cv_mutex);
+        m_shutting_down = true;
+    }
+    m_cv.notify_all();
+    if (m_timer_thread.joinable()) {
+        m_timer_thread.join();
+    }
 }
 
-void SchedulerProfilerHandler::serialize_and_dump_proto()
+hailo_status SchedulerProfilerHandler::serialize_and_dump_proto()
 {
-    auto file_env_var = std::getenv(PROFILER_FILE_ENV_VAR);
+    std::lock_guard<std::mutex> lock(m_dump_file_mutex);
+
+    if(m_file_already_dumped) {
+        return HAILO_SUCCESS;
+    }
+
     std::string file_name = PROFILER_DEFAULT_FILE_NAME_PREFIX + "_" + get_current_datetime() + PROFILER_DEFAULT_FILE_NAME_SUFFIX;
-    if (nullptr != file_env_var) {
-        file_name = std::string(file_env_var) + PATH_SEPARATOR + file_name;
+    auto file_env_var = get_env_variable(PROFILER_FILE_ENV_VAR);
+    if (file_env_var) {
+        file_name = file_env_var.value() + PATH_SEPARATOR + file_name;
     }
 
     std::ofstream output_file(std::string(file_name), std::ios::out |std::ios::binary);
@@ -84,66 +80,13 @@ void SchedulerProfilerHandler::serialize_and_dump_proto()
 
     if(!m_profiler_trace_proto.SerializeToZeroCopyStream(&stream)) {
         LOGGER__ERROR("Failed writing profiling data to file {}.", file_name);
+        return HAILO_FILE_OPERATION_FAILURE;
+    } else {
+        m_file_already_dumped = true;
+        return HAILO_SUCCESS;
     }
 }
 
-struct JSON
-{
-    std::unordered_map<std::string, std::string> members;
-    JSON(const std::initializer_list<std::pair<const std::string, std::string>> &dict) : members{dict} {}
-    JSON(const std::unordered_map<std::string, uint32_t> &dict) {
-        for (auto &pair : dict) {
-            members.insert({pair.first, std::to_string(pair.second)});
-        }
-    }
-};
-
-template<class T>
-std::string json_to_string(const T &val) {
-    return std::to_string(val);
-}
-
-template<>
-std::string json_to_string(const std::string &val) {
-    std::ostringstream os;
-    os << std::quoted(val);
-    return os.str();
-}
-
-template<>
-std::string json_to_string(const bool &bool_val) {
-    return bool_val ? "true" : "false";
-}
-
-template<>
-std::string json_to_string(const JSON &json_val) {
-    std::ostringstream os;
-    os << "{\n";
-    size_t i = 0;
-    for (const auto &kv : json_val.members) {
-        ++i;
-        os << std::quoted(kv.first) << " : ";
-        os << kv.second;
-        if (i != json_val.members.size()) {
-            os << ",\n";
-        }
-    }
-    os << "\n}";
-    return os.str();
-}
-
-bool SchedulerProfilerHandler::comma()
-{
-    auto result = !m_first_write;
-    m_first_write = false;
-    return result;
-}
-
-void SchedulerProfilerHandler::log(JSON json)
-{
-    m_profiler_logger->info("{}{}", comma() ? ",\n" : "", json_to_string(json));
-}
-
 void SchedulerProfilerHandler::handle_trace(const InitProfilerProtoTrace &trace)
 {
     ProfilerTime curr_time = get_curr_time();
@@ -186,16 +129,6 @@ void SchedulerProfilerHandler::handle_trace(const HefLoadedTrace &trace)
 
 void SchedulerProfilerHandler::handle_trace(const AddCoreOpTrace &trace)
 {
-    log(JSON({
-        {"action", json_to_string(trace.name)},
-        {"timestamp", json_to_string(trace.timestamp)},
-        {"core_op_name", json_to_string(trace.core_op_name)},
-        {"core_op_handle", json_to_string(trace.core_op_handle)},
-        {"timeout", json_to_string((uint64_t)trace.timeout)},
-        {"threshold", json_to_string((uint64_t)trace.threshold)},
-        {"max_batch_size", json_to_string((uint64_t)trace.batch_size)}
-    }));
-
     std::lock_guard<std::mutex> lock(m_proto_lock);
     auto added_trace = m_profiler_trace_proto.add_added_trace();
     added_trace->mutable_added_core_op()->set_time_stamp(trace.timestamp);
@@ -215,15 +148,6 @@ void SchedulerProfilerHandler::handle_trace(const AddDeviceTrace &trace)
 
 void SchedulerProfilerHandler::handle_trace(const AddStreamH2DTrace &trace)
 {
-    log(JSON({
-        {"action", json_to_string(trace.name)},
-        {"timestamp", json_to_string(trace.timestamp)},
-        {"device_id", json_to_string(trace.device_id)},
-        {"core_op_name", json_to_string(trace.core_op_name)},
-        {"stream_name", json_to_string(trace.stream_name)},
-        {"queue_size", json_to_string(trace.queue_size)}
-    }));
-
     std::lock_guard<std::mutex> lock(m_proto_lock);
     auto added_trace = m_profiler_trace_proto.add_added_trace();
     added_trace->mutable_added_stream()->set_device_id(trace.device_id);
@@ -236,15 +160,6 @@ void SchedulerProfilerHandler::handle_trace(const AddStreamH2DTrace &trace)
 
 void SchedulerProfilerHandler::handle_trace(const AddStreamD2HTrace &trace)
 {
-    log(JSON({
-        {"action", json_to_string(trace.name)},
-        {"timestamp", json_to_string(trace.timestamp)},
-        {"device_id", json_to_string(trace.device_id)},
-        {"core_op_name", json_to_string(trace.core_op_name)},
-        {"stream_name", json_to_string(trace.stream_name)},
-        {"queue_size", json_to_string(trace.queue_size)}
-    }));
-
     std::lock_guard<std::mutex> lock(m_proto_lock);
     auto added_trace = m_profiler_trace_proto.add_added_trace();
     added_trace->mutable_added_stream()->set_device_id(trace.device_id);
@@ -257,13 +172,6 @@ void SchedulerProfilerHandler::handle_trace(const AddStreamD2HTrace &trace)
 
 void SchedulerProfilerHandler::handle_trace(const FrameEnqueueH2DTrace &trace)
 {
-    log(JSON({
-        {"action", json_to_string(trace.name)},
-        {"timestamp", json_to_string(trace.timestamp)},
-        {"core_op_handle", json_to_string(trace.core_op_handle)},
-        {"queue_name", json_to_string(trace.queue_name)}
-    }));
-
     std::lock_guard<std::mutex> lock(m_proto_lock);
     auto added_trace = m_profiler_trace_proto.add_added_trace();
     added_trace->mutable_frame_enqueue()->set_direction(ProtoProfilerStreamDirection::PROTO__STREAM_DIRECTION__H2D);
@@ -274,14 +182,6 @@ void SchedulerProfilerHandler::handle_trace(const FrameEnqueueH2DTrace &trace)
 
 void SchedulerProfilerHandler::handle_trace(const FrameDequeueH2DTrace &trace)
 {
-    log(JSON({
-        {"action", json_to_string(trace.name)},
-        {"timestamp", json_to_string(trace.timestamp)},
-        {"device_id", json_to_string(trace.device_id)},
-        {"core_op_handle", json_to_string(trace.core_op_handle)},
-        {"queue_name", json_to_string(trace.queue_name)}
-    }));
-
     std::lock_guard<std::mutex> lock(m_proto_lock);
     auto added_trace = m_profiler_trace_proto.add_added_trace();
     added_trace->mutable_frame_dequeue()->set_direction(ProtoProfilerStreamDirection::PROTO__STREAM_DIRECTION__H2D);
@@ -293,13 +193,6 @@ void SchedulerProfilerHandler::handle_trace(const FrameDequeueH2DTrace &trace)
 
 void SchedulerProfilerHandler::handle_trace(const FrameDequeueD2HTrace &trace)
 {
-    log(JSON({
-        {"action", json_to_string(trace.name)},
-        {"timestamp", json_to_string(trace.timestamp)},
-        {"core_op_handle", json_to_string(trace.core_op_handle)},
-        {"queue_name", json_to_string(trace.queue_name)}
-    }));
-
     std::lock_guard<std::mutex> lock(m_proto_lock);
     auto added_trace = m_profiler_trace_proto.add_added_trace();
     added_trace->mutable_frame_dequeue()->set_direction(ProtoProfilerStreamDirection::PROTO__STREAM_DIRECTION__D2H);
@@ -310,14 +203,6 @@ void SchedulerProfilerHandler::handle_trace(const FrameDequeueD2HTrace &trace)
 
 void SchedulerProfilerHandler::handle_trace(const FrameEnqueueD2HTrace &trace)
 {
-    log(JSON({
-        {"action", json_to_string(trace.name)},
-        {"timestamp", json_to_string(trace.timestamp)},
-        {"device_id", json_to_string(trace.device_id)},
-        {"core_op_handle", json_to_string(trace.core_op_handle)},
-        {"queue_name", json_to_string(trace.queue_name)}
-    }));
-
     std::lock_guard<std::mutex> lock(m_proto_lock);
     auto added_trace = m_profiler_trace_proto.add_added_trace();
     added_trace->mutable_frame_enqueue()->set_direction(ProtoProfilerStreamDirection::PROTO__STREAM_DIRECTION__D2H);
@@ -329,13 +214,6 @@ void SchedulerProfilerHandler::handle_trace(const FrameEnqueueD2HTrace &trace)
 
 void SchedulerProfilerHandler::handle_trace(const ActivateCoreOpTrace &trace)
 {
-    log(JSON({
-        {"action", json_to_string(trace.name)},
-        {"timestamp", json_to_string(trace.timestamp)},
-        {"device_id", json_to_string(trace.device_id)},
-        {"core_op_handle", json_to_string(trace.core_op_handle)}
-    }));
-
     std::lock_guard<std::mutex> lock(m_proto_lock);
     auto added_trace = m_profiler_trace_proto.add_added_trace();
     added_trace->mutable_activate_core_op()->set_device_id(trace.device_id);
@@ -357,11 +235,6 @@ void SchedulerProfilerHandler::handle_trace(const DeactivateCoreOpTrace &trace)
 
 void SchedulerProfilerHandler::handle_trace(const SetCoreOpTimeoutTrace &trace)
 {
-    log(JSON({
-        {"action", json_to_string(trace.name)},
-        {"core_op_handle", json_to_string(trace.core_op_handle)}
-    }));
-
     std::lock_guard<std::mutex> lock(m_proto_lock);
     auto added_trace = m_profiler_trace_proto.add_added_trace();
     added_trace->mutable_core_op_set_value()->set_timeout((trace.timeout).count());
@@ -371,11 +244,6 @@ void SchedulerProfilerHandler::handle_trace(const SetCoreOpTimeoutTrace &trace)
 
 void SchedulerProfilerHandler::handle_trace(const SetCoreOpThresholdTrace &trace)
 {
-    log(JSON({
-        {"action", json_to_string(trace.name)},
-        {"core_op_handle", json_to_string(trace.core_op_handle)}
-    }));
-
     std::lock_guard<std::mutex> lock(m_proto_lock);
     auto added_trace = m_profiler_trace_proto.add_added_trace();
     added_trace->mutable_core_op_set_value()->set_threshold(trace.threshold);
@@ -385,11 +253,6 @@ void SchedulerProfilerHandler::handle_trace(const SetCoreOpThresholdTrace &trace
 
 void SchedulerProfilerHandler::handle_trace(const SetCoreOpPriorityTrace &trace)
 {
-    log(JSON({
-        {"action", json_to_string(trace.name)},
-        {"core_op_handle", json_to_string(trace.core_op_handle)}
-    }));
-
     std::lock_guard<std::mutex> lock(m_proto_lock);
     auto added_trace = m_profiler_trace_proto.add_added_trace();
     added_trace->mutable_core_op_set_value()->set_priority(trace.priority);
@@ -399,12 +262,6 @@ void SchedulerProfilerHandler::handle_trace(const SetCoreOpPriorityTrace &trace)
 
 void SchedulerProfilerHandler::handle_trace(const OracleDecisionTrace &trace)
 {
-    log(JSON({
-        {"action", json_to_string(trace.name)},
-        {"reason", json_to_string(trace.reason_idle)},
-        {"core_op_handle", json_to_string(trace.core_op_handle)}
-    }));
-
     std::lock_guard<std::mutex> lock(m_proto_lock);
     auto added_trace = m_profiler_trace_proto.add_added_trace();
     added_trace->mutable_switch_core_op_decision()->set_core_op_handle(trace.core_op_handle);
@@ -421,4 +278,14 @@ void SchedulerProfilerHandler::handle_trace(const DumpProfilerStateTrace &trace)
     m_profiler_trace_proto.Clear();
 }
 
-}
\ No newline at end of file
+bool SchedulerProfilerHandler::should_dump_trace_file()
+{
+    // Should dump only when the profiler is size bounded or time bounded.
+    // If size bounded, the trace proto file size should be big enough.
+    // If time bounded, there is a separate thread that will dump the file, so no need to dump here.
+    auto is_size_bounded = m_size_in_kb_bounded_dump != 0;
+    auto proto_file_size_in_kb = (m_profiler_trace_proto.ByteSizeLong() / 1024);
+    return is_size_bounded && (proto_file_size_in_kb >= m_size_in_kb_bounded_dump);
+}
+
+}
diff --git a/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.hpp b/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.hpp
index 81924df9..0b5eeedc 100644
--- a/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.hpp
+++ b/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.hpp
@@ -11,6 +11,7 @@
 #define _HAILO_SCHEDULER_PROFILER_HANDLER_HPP_
 
 #include "hailo/hailort.h"
+#include <condition_variable>
 #if defined(_MSC_VER)
 #pragma warning(push)
 #pragma warning(disable: 4244 4267 4127)
@@ -36,7 +37,7 @@ class SchedulerProfilerHandler : public Handler
     SchedulerProfilerHandler(SchedulerProfilerHandler const&) = delete;
     void operator=(SchedulerProfilerHandler const&) = delete;
 
-    SchedulerProfilerHandler(int64_t &start_time);
+    SchedulerProfilerHandler(size_t dump_after_n_seconds=0, size_t dump_after_n_kb=0);
     ~SchedulerProfilerHandler();
 
     virtual void handle_trace(const AddCoreOpTrace&) override;
@@ -56,19 +57,25 @@ class SchedulerProfilerHandler : public Handler
     virtual void handle_trace(const DumpProfilerStateTrace&) override;
     virtual void handle_trace(const InitProfilerProtoTrace&) override;
     virtual void handle_trace(const HefLoadedTrace&) override;
+    virtual bool should_dump_trace_file() override;
+    virtual bool should_stop () override { return m_file_already_dumped; }
+    virtual hailo_status dump_trace_file() override { return serialize_and_dump_proto(); };
 
 private:
-    void log(JSON json);
-    bool comma();
-    void serialize_and_dump_proto();
+    hailo_status serialize_and_dump_proto();
 
-    std::shared_ptr<spdlog::sinks::sink> m_file_sink;
-    std::shared_ptr<spdlog::logger> m_profiler_logger;
-    std::atomic<bool> m_first_write;
     ProtoProfiler m_profiler_trace_proto;
     std::mutex m_proto_lock;
+    std::mutex m_dump_file_mutex;
+    std::mutex m_cv_mutex;
+    std::thread m_timer_thread;
+    std::condition_variable m_cv;
+    size_t m_time_in_seconds_bounded_dump; // if != 0, generate trace file after N seconds
+    size_t m_size_in_kb_bounded_dump; // if != 0, generate trace file after N KB
+    bool m_file_already_dumped = false;
+    bool m_shutting_down = false;
 };
 
 }
 
-#endif /* _SCHEDULER_PROFILER_HANDLER_HPP_ */
\ No newline at end of file
+#endif /* _SCHEDULER_PROFILER_HANDLER_HPP_ */
diff --git a/hailort/libhailort/src/utils/profiler/tracer.cpp b/hailort/libhailort/src/utils/profiler/tracer.cpp
index c83e2e48..32772b99 100644
--- a/hailort/libhailort/src/utils/profiler/tracer.cpp
+++ b/hailort/libhailort/src/utils/profiler/tracer.cpp
@@ -8,11 +8,9 @@
  **/
 
 #include "common/utils.hpp"
-
+#include "common/env_vars.hpp"
 #include "utils/profiler/tracer.hpp"
 
-#define PROFILER_ENV_VAR ("HAILO_TRACE")
-#define PROFILER_ENV_VAR_VALUE ("scheduler")
 
 namespace hailort
 {
@@ -25,19 +23,27 @@ Tracer::Tracer()
 
 void Tracer::init_scheduler_profiler_handler()
 {
-    const char* env_var_name = PROFILER_ENV_VAR;
-    m_should_trace = is_env_variable_on(env_var_name, PROFILER_ENV_VAR_VALUE);
+    m_should_trace = is_env_variable_on(TRACE_ENV_VAR, TRACE_ENV_VAR_VALUE);
     if (m_should_trace) {
-        m_start_time = std::chrono::high_resolution_clock::now();
-        int64_t time_since_epoch = std::chrono::duration_cast<std::chrono::nanoseconds>(m_start_time.time_since_epoch()).count();
-        m_handlers.push_back(std::make_unique<SchedulerProfilerHandler>(time_since_epoch));
+        auto profiler_time_bounded = get_env_variable(TRACE_ENV_VAR_TIME_IN_SECONDS_BOUNDED_DUMP);
+        auto profiler_time_bounded_time_in_seconds = (profiler_time_bounded) ? std::stoull(profiler_time_bounded.value()) : 0;
+        auto profiler_size_bounded = get_env_variable(TRACE_ENV_VAR_SIZE_IN_KB_BOUNDED_DUMP);
+        auto profiler_size_bounded_size_in_kb = (profiler_size_bounded) ? std::stoull(profiler_size_bounded.value()) : 0;
+
+        if ((0 != profiler_time_bounded_time_in_seconds) && (0 != profiler_size_bounded_size_in_kb)) {
+            LOGGER__WARNING("Scheduler profiler cannot be initialized. Both {} and {} are set. Only one can be set at a time",
+                TRACE_ENV_VAR_TIME_IN_SECONDS_BOUNDED_DUMP,
+                TRACE_ENV_VAR_SIZE_IN_KB_BOUNDED_DUMP);
+        } else {
+            m_handlers.push_back(std::make_unique<SchedulerProfilerHandler>(
+                profiler_time_bounded_time_in_seconds, profiler_size_bounded_size_in_kb));
+        }
     }
 }
 
 void Tracer::init_monitor_handler()
 {
-    const char* env_var_name = SCHEDULER_MON_ENV_VAR;
-    m_should_monitor = is_env_variable_on(env_var_name, SCHEDULER_MON_ENV_VAR_VALUE);
+    m_should_monitor = is_env_variable_on(SCHEDULER_MON_ENV_VAR, SCHEDULER_MON_ENV_VAR_VALUE);
     if (m_should_monitor) {
         m_handlers.push_back(std::make_unique<MonitorHandler>());
     }
diff --git a/hailort/libhailort/src/utils/profiler/tracer.hpp b/hailort/libhailort/src/utils/profiler/tracer.hpp
index 6388e2d3..27a38f58 100644
--- a/hailort/libhailort/src/utils/profiler/tracer.hpp
+++ b/hailort/libhailort/src/utils/profiler/tracer.hpp
@@ -49,8 +49,23 @@ class Tracer
         TraceType trace_struct(trace_args...);
         auto curr_time = std::chrono::high_resolution_clock::now();
         trace_struct.timestamp = std::chrono::duration_cast<std::chrono::nanoseconds>(curr_time - this->m_start_time).count();
-        for (auto &handler : this->m_handlers) {
-            handler->handle_trace(trace_struct);
+
+        // m_handlers might be modified by other threads so the loop is protected by a mutex
+        {
+            std::lock_guard<std::mutex> lock(m_mutex);
+            for (auto it = m_handlers.begin(); it != m_handlers.end();) {
+                (*it)->handle_trace(trace_struct);
+
+                if ((*it)->should_dump_trace_file()) {
+                    (*it)->dump_trace_file();
+                }
+
+                if ((*it)->should_stop()) {
+                    it = m_handlers.erase(it);
+                } else {
+                    it++;
+                }
+            }
         }
     }
 
@@ -58,8 +73,9 @@ class Tracer
     bool m_should_monitor = false;
     std::chrono::high_resolution_clock::time_point m_start_time;
     std::vector<std::unique_ptr<Handler>> m_handlers;
+    std::mutex m_mutex;
 };
 
 }
 
-#endif
\ No newline at end of file
+#endif
diff --git a/hailort/libhailort/src/utils/profiler/tracer_macros.hpp b/hailort/libhailort/src/utils/profiler/tracer_macros.hpp
index 757555ff..84242541 100644
--- a/hailort/libhailort/src/utils/profiler/tracer_macros.hpp
+++ b/hailort/libhailort/src/utils/profiler/tracer_macros.hpp
@@ -10,22 +10,12 @@
 #ifndef _HAILO_TRACER_MACROS_HPP_
 #define _HAILO_TRACER_MACROS_HPP_
 
-#if defined HAILO_ENABLE_PROFILER_BUILD
 #include "tracer.hpp"
-#endif
 
 namespace hailort
 {
 
-struct VoidAll {
-    template<typename... Args> VoidAll(Args const& ...) {}
-};
-
-#if defined HAILO_ENABLE_PROFILER_BUILD
 #define TRACE(type, ...) (Tracer::trace<type>(__VA_ARGS__))
-#else
-#define TRACE(type, ...) {VoidAll temporary_name{__VA_ARGS__};}
-#endif
 
 }
 
diff --git a/hailort/libhailort/src/utils/soc_utils/partial_cluster_reader.cpp b/hailort/libhailort/src/utils/soc_utils/partial_cluster_reader.cpp
index fff74272..11f425aa 100644
--- a/hailort/libhailort/src/utils/soc_utils/partial_cluster_reader.cpp
+++ b/hailort/libhailort/src/utils/soc_utils/partial_cluster_reader.cpp
@@ -127,14 +127,14 @@ Expected<uint32_t> PartialClusterReader::get_partial_clusters_layout_bitmap(hail
     switch (dev_arch) {
         case HAILO_ARCH_HAILO15H:
             CHECK_AS_EXPECTED((HAILO15H_SKU_VALUE == sku_value), HAILO_INTERNAL_FAILURE,
-                "Device arch is of type {} but sku is {}", dev_arch, sku_value);
+                "Device arch is of type {} but sku is {}", static_cast<int>(dev_arch), sku_value);
             break;
         case HAILO_ARCH_HAILO15M:
             CHECK_AS_EXPECTED((HAILO15M_SKU_VALUE == sku_value), HAILO_INTERNAL_FAILURE,
-                "Device arch is of type {} but sku is {}", dev_arch, sku_value);
+                "Device arch is of type {} but sku is {}", static_cast<int>(dev_arch), sku_value);
             break;
         default:
-            LOGGER__ERROR("Error, Device architecture {} doesnt support partial cluster layout", dev_arch);
+            LOGGER__ERROR("Error, Device architecture {} doesnt support partial cluster layout", static_cast<int>(dev_arch));
             return make_unexpected(HAILO_INTERNAL_FAILURE);
     }
 
diff --git a/hailort/libhailort/src/utils/thread_safe_map.hpp b/hailort/libhailort/src/utils/thread_safe_map.hpp
index cc1093b1..5bd321e8 100644
--- a/hailort/libhailort/src/utils/thread_safe_map.hpp
+++ b/hailort/libhailort/src/utils/thread_safe_map.hpp
@@ -14,6 +14,7 @@
 #include <mutex>
 #include <unordered_map>
 #include <shared_mutex>
+#include <algorithm>
 
 namespace hailort
 {
diff --git a/hailort/libhailort/src/vdevice/CMakeLists.txt b/hailort/libhailort/src/vdevice/CMakeLists.txt
index 1a0179f4..ad50a90b 100644
--- a/hailort/libhailort/src/vdevice/CMakeLists.txt
+++ b/hailort/libhailort/src/vdevice/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 set(SRC_FILES
     ${CMAKE_CURRENT_SOURCE_DIR}/vdevice.cpp
diff --git a/hailort/libhailort/src/vdevice/callback_reorder_queue.hpp b/hailort/libhailort/src/vdevice/callback_reorder_queue.hpp
index e6f01089..0532135f 100644
--- a/hailort/libhailort/src/vdevice/callback_reorder_queue.hpp
+++ b/hailort/libhailort/src/vdevice/callback_reorder_queue.hpp
@@ -12,7 +12,7 @@
 #ifndef _HAILO_CALLBACK_REORDER_QUEUE_HPP_
 #define _HAILO_CALLBACK_REORDER_QUEUE_HPP_
 
-#include "stream_common/transfer_common.hpp"
+#include "vdma/channel/transfer_common.hpp"
 
 #include <mutex>
 #include <queue>
diff --git a/hailort/libhailort/src/vdevice/scheduler/infer_request_accumulator.hpp b/hailort/libhailort/src/vdevice/scheduler/infer_request_accumulator.hpp
index 5efe8571..75c54584 100644
--- a/hailort/libhailort/src/vdevice/scheduler/infer_request_accumulator.hpp
+++ b/hailort/libhailort/src/vdevice/scheduler/infer_request_accumulator.hpp
@@ -11,7 +11,7 @@
 #ifndef _HAILO_INFER_REQUEST_ACCUMULATOR_HPP_
 #define _HAILO_INFER_REQUEST_ACCUMULATOR_HPP_
 
-#include "stream_common/transfer_common.hpp"
+#include "vdma/channel/transfer_common.hpp"
 
 #include <mutex>
 #include <condition_variable>
diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_state.cpp b/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_state.cpp
index 7e157cf0..4d73d4af 100644
--- a/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_state.cpp
+++ b/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_state.cpp
@@ -65,7 +65,7 @@ bool ScheduledCoreOp::use_dynamic_batch_flow() const
 hailo_status ScheduledCoreOp::set_timeout(const std::chrono::milliseconds &timeout)
 {
     m_timeout = timeout;
-    LOGGER__INFO("Setting scheduler timeout of {} to {}ms", m_core_op->name(), timeout.count());
+    LOGGER__INFO("Setting scheduler threshold timeout of {} to {}ms", m_core_op->name(), timeout.count());
     return HAILO_SUCCESS;
 }
 
@@ -94,7 +94,7 @@ bool ScheduledCoreOp::is_over_threshold() const
     return m_requested_infer_requests.load() >= m_min_threshold;
 }
 
-bool ScheduledCoreOp::is_over_timeout() const
+bool ScheduledCoreOp::is_over_threshold_timeout() const
 {
     return m_timeout <= (std::chrono::steady_clock::now() - m_last_run_time_stamp);
 }
diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_state.hpp b/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_state.hpp
index b9711e54..47277a13 100644
--- a/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_state.hpp
+++ b/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_state.hpp
@@ -67,7 +67,7 @@ class ScheduledCoreOp
     void set_priority(core_op_priority_t priority);
 
     bool is_over_threshold() const;
-    bool is_over_timeout() const;
+    bool is_over_threshold_timeout() const;
 
     std::chrono::time_point<std::chrono::steady_clock> get_last_run_timestamp();
     void set_last_run_timestamp(const std::chrono::time_point<std::chrono::steady_clock> &timestamp);
diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduler.cpp b/hailort/libhailort/src/vdevice/scheduler/scheduler.cpp
index ad987ace..030c2349 100644
--- a/hailort/libhailort/src/vdevice/scheduler/scheduler.cpp
+++ b/hailort/libhailort/src/vdevice/scheduler/scheduler.cpp
@@ -26,6 +26,7 @@ namespace hailort
 CoreOpsScheduler::CoreOpsScheduler(hailo_scheduling_algorithm_t algorithm, std::vector<std::string> &devices_ids,
     std::vector<std::string> &devices_arch) :
     SchedulerBase(algorithm, devices_ids, devices_arch),
+    m_closest_threshold_timeout(std::chrono::steady_clock::now() + std::chrono::milliseconds(UINT32_MAX)),
     m_scheduler_thread(*this)
 {}
 
@@ -63,6 +64,7 @@ hailo_status CoreOpsScheduler::add_core_op(scheduler_core_op_handle_t core_op_ha
         // scheduled should limit themself. Since the ctor accept no argument, we init it using operator[].
         // TODO HRT-12136: limit the queue size (based on instances count)
         m_infer_requests[core_op_handle];
+        m_bounded_infer_requests[core_op_handle];
 
         const core_op_priority_t normal_priority = HAILO_SCHEDULER_PRIORITY_NORMAL;
         m_core_op_priority[normal_priority].add(core_op_handle);
@@ -75,7 +77,7 @@ void CoreOpsScheduler::remove_core_op(scheduler_core_op_handle_t core_op_handle)
 {
     std::unique_lock<std::shared_timed_mutex> lock(m_scheduler_mutex);
     m_scheduled_core_ops.at(core_op_handle)->remove_instance();
-    m_scheduler_thread.signal();
+    m_scheduler_thread.signal(true);
 }
 
 void CoreOpsScheduler::shutdown()
@@ -190,7 +192,7 @@ hailo_status CoreOpsScheduler::infer_async(const scheduler_core_op_handle_t &cor
     auto original_callback = infer_request->callback;
     infer_request->callback = [current_device_info, this, original_callback](hailo_status status) {
         current_device_info->ongoing_infer_requests.fetch_sub(1);
-        m_scheduler_thread.signal();
+        m_scheduler_thread.signal(true);
         original_callback(status);
     };
     auto status = vdma_core_op->infer_async(infer_request.release());
@@ -215,7 +217,7 @@ CoreOpsScheduler::ReadyInfo CoreOpsScheduler::is_core_op_ready(const scheduler_c
 
     if (check_threshold) {
         result.over_threshold = scheduled_core_op->is_over_threshold();
-        result.over_timeout = scheduled_core_op->is_over_timeout();
+        result.over_timeout = scheduled_core_op->is_over_threshold_timeout();
 
         if (!result.over_threshold && !result.over_timeout){
             result.is_ready = false;
@@ -236,11 +238,13 @@ hailo_status CoreOpsScheduler::enqueue_infer_request(const scheduler_core_op_han
     auto status = m_infer_requests.at(core_op_handle).enqueue(std::move(infer_request));
     if (HAILO_SUCCESS == status) {
         m_scheduled_core_ops.at(core_op_handle)->requested_infer_requests().fetch_add(1);
-        m_scheduler_thread.signal();
+        m_scheduler_thread.signal(true);
     }
     return status;
 }
 
+// Note: set_timeout is defined to be that if timeout passes and not threshold amount of frames has been sent since the
+// last time frames were sent on this core op - send all the frames that are ready to be sent.
 hailo_status CoreOpsScheduler::set_timeout(const scheduler_core_op_handle_t &core_op_handle, const std::chrono::milliseconds &timeout, const std::string &/*network_name*/)
 {
     std::shared_lock<std::shared_timed_mutex> lock(m_scheduler_mutex);
@@ -249,6 +253,12 @@ hailo_status CoreOpsScheduler::set_timeout(const scheduler_core_op_handle_t &cor
     if (HAILO_SUCCESS == status) {
         TRACE(SetCoreOpTimeoutTrace, core_op_handle, timeout);
     }
+
+    // this will have to trigger event to recalculate timeouts and check if any have timed out - but dont execute 
+    // worker thread unless threshold timeout on core op has actually expired
+    update_closest_threshold_timeout();
+    m_scheduler_thread.signal(false);
+
     return status;
 }
 
@@ -284,6 +294,31 @@ hailo_status CoreOpsScheduler::set_priority(const scheduler_core_op_handle_t &co
     return HAILO_SUCCESS;
 }
 
+hailo_status CoreOpsScheduler::bind_buffers()
+{
+    // For now, binding buffers will take place only on one device
+    if (m_devices.size() > 1) {
+        return HAILO_SUCCESS;
+    }
+
+    auto active_core_op_handle = m_devices.begin()->second->current_core_op_handle;
+    for (auto &core_op_pair : m_scheduled_core_ops) {
+        // Checking if that the core op is deactivated, has no bounded buffer and has unbounded buffer pending
+        if ((m_bounded_infer_requests.at(core_op_pair.first).size() > 0) || 
+            (core_op_pair.second->requested_infer_requests().load() <= 0) ||
+            (core_op_pair.first == active_core_op_handle)) {
+            continue;
+        }
+
+        TRY(auto infer_request, m_infer_requests.at(core_op_pair.first).dequeue());
+        TRY(auto vdma_core_op, get_vdma_core_op(core_op_pair.first, m_devices.begin()->second->device_id));
+        CHECK_SUCCESS(vdma_core_op->bind_buffers(infer_request.transfers));
+        m_bounded_infer_requests[core_op_pair.first].enqueue(std::move(infer_request));
+    }
+
+    return HAILO_SUCCESS;
+}
+
 hailo_status CoreOpsScheduler::optimize_streaming_if_enabled(const scheduler_core_op_handle_t &core_op_handle)
 {
     auto scheduled_core_op = m_scheduled_core_ops.at(core_op_handle);
@@ -305,11 +340,15 @@ hailo_status CoreOpsScheduler::optimize_streaming_if_enabled(const scheduler_cor
 
 Expected<InferRequest> CoreOpsScheduler::dequeue_infer_request(scheduler_core_op_handle_t core_op_handle)
 {
-    auto infer_request = m_infer_requests.at(core_op_handle).dequeue();
-    CHECK_EXPECTED(infer_request);
+    hailort::InferRequest infer_request;
+    if (m_bounded_infer_requests.at(core_op_handle).size() > 0) {
+        TRY(infer_request, m_bounded_infer_requests.at(core_op_handle).dequeue());
+    } else {
+        TRY(infer_request, m_infer_requests.at(core_op_handle).dequeue());
+    }
 
     m_scheduled_core_ops.at(core_op_handle)->requested_infer_requests().fetch_sub(1);
-    return infer_request.release();
+    return infer_request;
 }
 
 uint16_t CoreOpsScheduler::get_frames_ready_to_transfer(scheduler_core_op_handle_t core_op_handle,
@@ -399,6 +438,38 @@ void CoreOpsScheduler::schedule()
             shutdown_core_op(core_op_pair.first);
         }
     }
+
+    // If possible, bind buffer for all non activated core ops
+    auto status = bind_buffers();
+    if (HAILO_SUCCESS != status) {
+        LOGGER__ERROR("Scheduler thread failed with status={}", status);
+    }
+
+    update_closest_threshold_timeout();
+}
+
+void CoreOpsScheduler::update_closest_threshold_timeout()
+{
+    m_closest_threshold_timeout = std::chrono::steady_clock::now() + std::chrono::milliseconds(UINT32_MAX);
+    for (const auto &core_op_pair : m_scheduled_core_ops) {
+        auto scheduled_core_op = core_op_pair.second;
+        // Only update the closest threshold timeout if the core op has instances and timeout set to non default
+        if ((0 < scheduled_core_op->instances_count()) && (std::chrono::milliseconds(0) != scheduled_core_op->get_timeout())) {
+            m_closest_threshold_timeout = std::min(m_closest_threshold_timeout,
+                scheduled_core_op->get_last_run_timestamp() + scheduled_core_op->get_timeout());
+        }
+    }
+}
+
+std::chrono::milliseconds CoreOpsScheduler::get_closest_threshold_timeout() const
+{
+    // Get closest timeout and wait for it or for signal
+    const auto time_now = std::chrono::steady_clock::now();
+    return (m_closest_threshold_timeout > time_now) ?
+        std::chrono::duration_cast<std::chrono::milliseconds>(m_closest_threshold_timeout - time_now) :
+        // In case time_now is bigger than m_closest_threshold_timeout - timeout has already occured and we should
+        // signal the worker thread
+        std::chrono::milliseconds(0);
 }
 
 CoreOpsScheduler::SchedulerThread::SchedulerThread(CoreOpsScheduler &scheduler) :
@@ -413,11 +484,11 @@ CoreOpsScheduler::SchedulerThread::~SchedulerThread()
     stop();
 }
 
-void CoreOpsScheduler::SchedulerThread::signal()
+void CoreOpsScheduler::SchedulerThread::signal(bool execute_worker_thread)
 {
     {
         std::lock_guard<std::mutex> lock(m_mutex);
-        m_execute_worker_thread = true;
+        m_execute_worker_thread = execute_worker_thread;
     }
     m_cv.notify_one();
 }
@@ -426,7 +497,7 @@ void CoreOpsScheduler::SchedulerThread::stop()
 {
     if (m_thread.joinable()) {
         m_is_running = false;
-        signal();
+        signal(true);
         m_thread.join();
     }
 }
@@ -438,7 +509,8 @@ void CoreOpsScheduler::SchedulerThread::worker_thread_main()
     while (m_is_running) {
         {
             std::unique_lock<std::mutex> lock(m_mutex);
-            m_cv.wait(lock, [this]() {
+            const auto next_timeout_in_ms = m_scheduler.get_closest_threshold_timeout();
+            m_cv.wait_for(lock, next_timeout_in_ms, [this]() {
                 return m_execute_worker_thread.load();
             });
             m_execute_worker_thread = false;
diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduler.hpp b/hailort/libhailort/src/vdevice/scheduler/scheduler.hpp
index 6d3c5e81..8a53a9d1 100644
--- a/hailort/libhailort/src/vdevice/scheduler/scheduler.hpp
+++ b/hailort/libhailort/src/vdevice/scheduler/scheduler.hpp
@@ -17,7 +17,7 @@
 #include "common/filesystem.hpp"
 
 #include "utils/thread_safe_map.hpp"
-#include "utils/thread_safe_queue.hpp"
+#include "common/thread_safe_queue.hpp"
 
 #include "vdevice/scheduler/scheduled_core_op_state.hpp"
 #include "vdevice/scheduler/scheduler_base.hpp"
@@ -81,12 +81,17 @@ class CoreOpsScheduler : public SchedulerBase
     Expected<InferRequest> dequeue_infer_request(scheduler_core_op_handle_t core_op_handle);
     uint16_t get_frames_ready_to_transfer(scheduler_core_op_handle_t core_op_handle, const device_id_t &device_id) const;
 
+    hailo_status bind_buffers();
+
     Expected<std::shared_ptr<VdmaConfigCoreOp>> get_vdma_core_op(scheduler_core_op_handle_t core_op_handle,
         const device_id_t &device_id);
 
     void shutdown_core_op(scheduler_core_op_handle_t core_op_handle);
     void schedule();
 
+    void update_closest_threshold_timeout();
+    std::chrono::milliseconds get_closest_threshold_timeout() const;
+
     class SchedulerThread final {
     public:
         SchedulerThread(CoreOpsScheduler &scheduler);
@@ -96,7 +101,7 @@ class CoreOpsScheduler : public SchedulerBase
         SchedulerThread(const SchedulerThread &) = delete;
         SchedulerThread &operator=(const SchedulerThread &) = delete;
 
-        void signal();
+        void signal(bool execute_worker_thread);
         void stop();
 
     private:
@@ -114,6 +119,7 @@ class CoreOpsScheduler : public SchedulerBase
 
     using InferRequestQueue = SafeQueue<InferRequest>;
     std::unordered_map<vdevice_core_op_handle_t, InferRequestQueue> m_infer_requests;
+    std::unordered_map<vdevice_core_op_handle_t, InferRequestQueue> m_bounded_infer_requests;
 
     // This shared mutex guards accessing the scheduler data structures including:
     //   - m_scheduled_core_ops
@@ -124,6 +130,8 @@ class CoreOpsScheduler : public SchedulerBase
     // m_scheduled_core_ops.at(core_op_handle) can use shared_lock.
     std::shared_timed_mutex m_scheduler_mutex;
 
+    std::chrono::steady_clock::time_point m_closest_threshold_timeout;
+
     SchedulerThread m_scheduler_thread;
 };
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduler_base.hpp b/hailort/libhailort/src/vdevice/scheduler/scheduler_base.hpp
index e07d62d9..338fec23 100644
--- a/hailort/libhailort/src/vdevice/scheduler/scheduler_base.hpp
+++ b/hailort/libhailort/src/vdevice/scheduler/scheduler_base.hpp
@@ -19,6 +19,7 @@
 #include "stream_common/stream_internal.hpp"
 
 #include <condition_variable>
+#include <algorithm>
 
 
 namespace hailort
diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduler_oracle.cpp b/hailort/libhailort/src/vdevice/scheduler/scheduler_oracle.cpp
index a4018bd7..b70bb9c3 100644
--- a/hailort/libhailort/src/vdevice/scheduler/scheduler_oracle.cpp
+++ b/hailort/libhailort/src/vdevice/scheduler/scheduler_oracle.cpp
@@ -9,6 +9,7 @@
 
 #include "vdevice/scheduler/scheduler_oracle.hpp"
 #include "utils/profiler/tracer_macros.hpp"
+#include "common/internal_env_vars.hpp"
 
 
 namespace hailort
@@ -99,7 +100,7 @@ std::vector<RunParams> CoreOpsSchedulerOracle::get_oracle_decisions(SchedulerBas
 
             // If there is no suitable model when checking with threshold, and the idle optimization is disabled,
             // try again without threshold.
-            if (!is_env_variable_on("HAILO_DISABLE_IDLE_OPT") && (core_op_handle == INVALID_CORE_OP_HANDLE)) {
+            if (!is_env_variable_on(HAILO_DISABLE_IDLE_OPT_ENV_VAR) && (core_op_handle == INVALID_CORE_OP_HANDLE)) {
                 core_op_handle = choose_next_model(scheduler, active_device_info->device_id, !CHECK_THRESHOLD);
             }
 
diff --git a/hailort/libhailort/src/vdevice/vdevice.cpp b/hailort/libhailort/src/vdevice/vdevice.cpp
index c3d0763d..96978d12 100644
--- a/hailort/libhailort/src/vdevice/vdevice.cpp
+++ b/hailort/libhailort/src/vdevice/vdevice.cpp
@@ -27,14 +27,13 @@
 #include "core_op/core_op.hpp"
 #include "hef/hef_internal.hpp"
 
+#include "common/string_utils.hpp"
+
 #ifdef HAILO_SUPPORT_MULTI_PROCESS
 #include "service/rpc_client_utils.hpp"
 #include "rpc/rpc_definitions.hpp"
 #endif // HAILO_SUPPORT_MULTI_PROCESS
 
-#define HAILO_FORCE_HRPC_CLIENT_ENV_VAR "HAILO_FORCE_HRPC"
-#define HAILO_FORCE_HRPC_CLIENT_ON "1"
-
 
 namespace hailort
 {
@@ -189,13 +188,13 @@ Expected<hailo_stream_interface_t> VDeviceHandle::get_default_streams_interface(
 }
 
 Expected<std::shared_ptr<InferModel>> VDeviceHandle::create_infer_model(const std::string &hef_path,
-    const std::string &network_name)
+    const std::string &name)
 {
     auto &manager = SharedResourceManager<std::string, VDeviceBase>::get_instance();
     auto vdevice = manager.resource_lookup(m_handle);
     CHECK_EXPECTED(vdevice);
 
-    return vdevice.value()->create_infer_model(hef_path, network_name);
+    return vdevice.value()->create_infer_model(hef_path, name);
 }
 
 hailo_status VDeviceHandle::dma_map(void *address, size_t size, hailo_dma_buffer_direction_t direction)
@@ -234,6 +233,15 @@ hailo_status VDeviceHandle::dma_unmap_dmabuf(int dmabuf_fd, size_t size, hailo_d
     return vdevice.value()->dma_unmap_dmabuf(dmabuf_fd, size, direction);
 }
 
+hailo_status VDeviceHandle::add_network_group_ref_count(std::shared_ptr<ConfiguredNetworkGroup> network_group_ptr)
+{
+    auto &manager = SharedResourceManager<std::string, VDeviceBase>::get_instance();
+    auto vdevice = manager.resource_lookup(m_handle);
+    CHECK_EXPECTED_AS_STATUS(vdevice);
+
+    return vdevice.value()->add_network_group_ref_count(network_group_ptr);
+}
+
 bool VDevice::service_over_ip_mode()
 {
 #ifdef HAILO_SUPPORT_MULTI_PROCESS
@@ -243,11 +251,9 @@ bool VDevice::service_over_ip_mode()
     return false; // no service -> no service over ip
 }
 
-bool VDevice::force_hrpc_client()
+bool VDevice::should_force_hrpc_client()
 {
-    // The env var HAILO_FORCE_HRPC_CLIENT_ENV_VAR is supported for debug purposes
-    char *pcie_service_var = std::getenv(HAILO_FORCE_HRPC_CLIENT_ENV_VAR); // TODO: Remove duplication
-    return (nullptr != pcie_service_var) && (HAILO_FORCE_HRPC_CLIENT_ON == std::string(pcie_service_var));
+    return get_env_variable(HAILO_SOCKET_COM_ADDR_CLIENT_ENV_VAR).has_value();
 }
 
 #ifdef HAILO_SUPPORT_MULTI_PROCESS
@@ -370,9 +376,8 @@ Expected<ConfiguredNetworkGroupVector> VDeviceClient::configure(Hef &hef,
         CHECK_EXPECTED(expected_client);
 
         auto client = expected_client.release();
-        auto network_group = make_shared_nothrow<ConfiguredNetworkGroupClient>(std::move(client), NetworkGroupIdentifier(m_identifier, ng_handle));
-        CHECK_NOT_NULL_AS_EXPECTED(network_group, HAILO_OUT_OF_HOST_MEMORY);
-
+        TRY(auto network_group, ConfiguredNetworkGroupClient::create(std::move(client),
+            NetworkGroupIdentifier(m_identifier, ng_handle)));
         networks.emplace_back(network_group);
         {
             std::unique_lock<std::mutex> lock(m_mutex);
@@ -547,11 +552,9 @@ Expected<std::unique_ptr<VDevice>> VDevice::create(const hailo_vdevice_params_t
         } else {
             TRY(acc_type, VDeviceBase::get_accelerator_type(params.device_ids, params.device_count));
         }
-        if ((acc_type == HailoRTDriver::AcceleratorType::SOC_ACCELERATOR) || force_hrpc_client()) {
-            // Creating VDeviceClient
+        if ((acc_type == HailoRTDriver::AcceleratorType::SOC_ACCELERATOR) || should_force_hrpc_client()) {
             TRY(vdevice, VDeviceHrpcClient::create(params));
         } else {
-            // Creating VDeviceHandle
             TRY(vdevice, VDeviceHandle::create(params));
         }
     }
@@ -744,7 +747,6 @@ Expected<ConfiguredNetworkGroupVector> VDeviceBase::configure(Hef &hef,
         auto network_group_ptr = net_group_expected.release();
 
         added_network_groups.push_back(network_group_ptr);
-        m_network_groups.push_back(network_group_ptr);
     }
 
     auto elapsed_time_ms = std::chrono::duration<double, std::milli>(std::chrono::steady_clock::now() - start_time).count();
@@ -753,17 +755,26 @@ Expected<ConfiguredNetworkGroupVector> VDeviceBase::configure(Hef &hef,
     return added_network_groups;
 }
 
-Expected<std::shared_ptr<InferModel>> VDevice::create_infer_model(const std::string &hef_path, const std::string &network_name)
+hailo_status VDeviceBase::add_network_group_ref_count(std::shared_ptr<ConfiguredNetworkGroup> network_group_ptr)
 {
-    CHECK_AS_EXPECTED(network_name.empty(), HAILO_NOT_IMPLEMENTED, "Passing network name is not supported yet!");
-    TRY(auto infer_model_base, InferModelBase::create(*this, hef_path));
+    m_network_groups.push_back(network_group_ptr);
+    return HAILO_SUCCESS;
+}
+
+hailo_status VDevice::add_network_group_ref_count(std::shared_ptr<ConfiguredNetworkGroup> /*network_group_ptr*/)
+{
+    return HAILO_SUCCESS;
+}
+
+Expected<std::shared_ptr<InferModel>> VDevice::create_infer_model(const std::string &hef_path, const std::string &name)
+{
+    TRY(auto infer_model_base, InferModelBase::create(*this, hef_path, name));
     return std::shared_ptr<InferModel>(std::move(infer_model_base));
 }
 
-Expected<std::shared_ptr<InferModel>> VDevice::create_infer_model(const MemoryView hef_buffer, const std::string &network_name)
+Expected<std::shared_ptr<InferModel>> VDevice::create_infer_model(const MemoryView hef_buffer, const std::string &name)
 {
-    CHECK_AS_EXPECTED(network_name.empty(), HAILO_NOT_IMPLEMENTED, "Passing network name is not supported yet!");
-    TRY(auto infer_model_base, InferModelBase::create(*this, hef_buffer));
+    TRY(auto infer_model_base, InferModelBase::create(*this, hef_buffer, name));
     return std::shared_ptr<InferModel>(std::move(infer_model_base));
 }
 
@@ -839,7 +850,7 @@ Expected<std::vector<std::string>> VDeviceBase::get_device_ids(const hailo_vdevi
         device_ids.reserve(params.device_count);
 
         for (size_t i = 0; i < params.device_count; i++) {
-            device_ids.emplace_back(params.device_ids[i].id);
+            device_ids.emplace_back(StringUtils::to_lower(params.device_ids[i].id));
         }
 
         return device_ids;
@@ -943,14 +954,15 @@ vdevice_core_op_handle_t VDeviceBase::allocate_core_op_handle()
 
 bool VDeviceBase::should_use_multiplexer()
 {
-    auto disable_multiplexer_env = std::getenv(DISABLE_MULTIPLEXER_ENV_VAR);
-    bool disabled_by_flag = (nullptr != disable_multiplexer_env) &&
-        (strnlen(disable_multiplexer_env, 2) == 1) &&
-        (strncmp(disable_multiplexer_env, "1", 1) == 0);
-    if (disabled_by_flag) {
+    if (!m_core_ops_scheduler) {
+        return false;
+    }
+
+    auto is_disabled_by_user = is_env_variable_on(DISABLE_MULTIPLEXER_ENV_VAR);
+    if (is_disabled_by_user) {
         LOGGER__WARNING("Usage of '{}' env variable is deprecated.", DISABLE_MULTIPLEXER_ENV_VAR);
     }
-    return (!disabled_by_flag && m_core_ops_scheduler);
+    return !is_disabled_by_user;
 }
 
 Expected<bool> VDeviceBase::device_ids_contains_eth(const hailo_vdevice_params_t &params)
diff --git a/hailort/libhailort/src/vdevice/vdevice_core_op.cpp b/hailort/libhailort/src/vdevice/vdevice_core_op.cpp
index ac8e83d6..fc4f6455 100644
--- a/hailort/libhailort/src/vdevice/vdevice_core_op.cpp
+++ b/hailort/libhailort/src/vdevice/vdevice_core_op.cpp
@@ -11,6 +11,7 @@
 #include "vdevice/scheduler/scheduled_stream.hpp"
 #include "vdevice/vdevice_native_stream.hpp"
 #include "net_flow/pipeline/vstream_internal.hpp"
+#include "common/utils.hpp"
 
 #define INVALID_BATCH_SIZE (-1)
 
@@ -458,20 +459,6 @@ Expected<Buffer> VDeviceCoreOp::get_intermediate_buffer(const IntermediateBuffer
     return m_core_ops.begin()->second->get_intermediate_buffer(key);
 }
 
-Expected<Buffer> VDeviceCoreOp::get_cache_buffer(uint32_t cache_id)
-{
-    CHECK_AS_EXPECTED(1 == m_core_ops.size(), HAILO_INVALID_OPERATION,
-        "get_cache_buffer function is not supported on more than 1 physical device.");
-    return m_core_ops.begin()->second->get_cache_buffer(cache_id);
-}
-
-Expected<std::map<uint32_t, Buffer>> VDeviceCoreOp::get_cache_buffers()
-{
-    CHECK_AS_EXPECTED(1 == m_core_ops.size(), HAILO_INVALID_OPERATION,
-        "get_cache_buffers function is not supported on more than 1 physical device.");
-    return m_core_ops.begin()->second->get_cache_buffers();
-}
-
 Expected<uint32_t> VDeviceCoreOp::get_cache_read_size() const
 {
     CHECK(1 == m_core_ops.size(), HAILO_INVALID_OPERATION,
@@ -518,6 +505,27 @@ hailo_status VDeviceCoreOp::update_cache_offset(int32_t offset_delta_bytes)
     return m_core_ops.begin()->second->update_cache_offset(offset_delta_bytes);
 }
 
+Expected<std::vector<uint32_t>> VDeviceCoreOp::get_cache_ids() const
+{
+    CHECK(1 == m_core_ops.size(), HAILO_INVALID_OPERATION,
+        "get_cache_ids function is not supported on more than 1 physical device.");
+    return m_core_ops.begin()->second->get_cache_ids();
+}
+
+Expected<Buffer> VDeviceCoreOp::read_cache_buffer(uint32_t cache_id)
+{
+    CHECK(1 == m_core_ops.size(), HAILO_INVALID_OPERATION,
+        "read_cache_buffer function is not supported on more than 1 physical device.");
+    return m_core_ops.begin()->second->read_cache_buffer(cache_id);
+}
+
+hailo_status VDeviceCoreOp::write_cache_buffer(uint32_t cache_id, MemoryView buffer)
+{
+    CHECK(1 == m_core_ops.size(), HAILO_INVALID_OPERATION,
+        "write_cache_buffer function is not supported on more than 1 physical device.");
+    return m_core_ops.begin()->second->write_cache_buffer(cache_id, buffer);
+}
+
 hailo_status VDeviceCoreOp::add_to_trace()
 {
     const auto batch_size = get_stream_batch_size(m_config_params.stream_params_by_name.begin()->first);
@@ -546,4 +554,19 @@ hailo_status VDeviceCoreOp::add_to_trace()
     return HAILO_SUCCESS;
 }
 
+bool VDeviceCoreOp::equal_batch(const std::map<std::string, hailo_network_parameters_t> &lhs, const std::map<std::string, hailo_network_parameters_t> &rhs)
+{
+    if (lhs.size() != rhs.size()) {
+        return false;
+    }
+
+    for (const auto &lhs_pair : lhs) {
+        if ((!contains(rhs, lhs_pair.first)) || (rhs.at(lhs_pair.first).batch_size != lhs_pair.second.batch_size)) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/vdevice/vdevice_core_op.hpp b/hailort/libhailort/src/vdevice/vdevice_core_op.hpp
index f057d47f..d033e8a6 100644
--- a/hailort/libhailort/src/vdevice/vdevice_core_op.hpp
+++ b/hailort/libhailort/src/vdevice/vdevice_core_op.hpp
@@ -51,7 +51,8 @@ class VDeviceCoreOp : public CoreOp
     {
         if ((params_pair.first == name()) && (hef.hash() == m_hef_hash)) {
             if ((params_pair.second.batch_size == m_config_params.batch_size) &&
-                (params_pair.second.power_mode == m_config_params.power_mode)) {
+                (params_pair.second.power_mode == m_config_params.power_mode) &&
+                (equal_batch(params_pair.second.network_params_by_name, m_config_params.network_params_by_name))) {
                     return true;
             }
             LOGGER__INFO("The network group: {} was already configured to the device with different params."
@@ -92,14 +93,15 @@ class VDeviceCoreOp : public CoreOp
 
     virtual Expected<HwInferResults> run_hw_infer_estimator() override;
     virtual Expected<Buffer> get_intermediate_buffer(const IntermediateBufferKey &) override;
-    virtual Expected<Buffer> get_cache_buffer(uint32_t cache_id) override;
-    virtual Expected<std::map<uint32_t, Buffer>> get_cache_buffers() override;
     virtual bool has_caches() const override;
     virtual Expected<uint32_t> get_cache_read_size() const override;
     virtual Expected<uint32_t> get_cache_write_size() const override;
     virtual hailo_status init_cache(uint32_t read_offset, int32_t write_offset_delta) override;
     virtual Expected<hailo_cache_info_t> get_cache_info() const;
     virtual hailo_status update_cache_offset(int32_t offset_delta_bytes) override;
+    virtual Expected<std::vector<uint32_t>> get_cache_ids() const override;
+    virtual Expected<Buffer> read_cache_buffer(uint32_t cache_id) override;
+    virtual hailo_status write_cache_buffer(uint32_t cache_id, MemoryView buffer) override;
 
     VDeviceCoreOp(VDevice &vdevice,
         ActiveCoreOpHolder &active_core_op_holder,
@@ -121,6 +123,8 @@ class VDeviceCoreOp : public CoreOp
 
     hailo_status add_to_trace();
 
+    bool equal_batch(const std::map<std::string, hailo_network_parameters_t> &lhs, const std::map<std::string, hailo_network_parameters_t> &rhs);
+
     VDevice &m_vdevice;
     std::map<device_id_t, std::shared_ptr<CoreOp>> m_core_ops;
     CoreOpsSchedulerWeakPtr m_core_ops_scheduler;
diff --git a/hailort/libhailort/src/vdevice/vdevice_hrpc_client.cpp b/hailort/libhailort/src/vdevice/vdevice_hrpc_client.cpp
index 628a7ad9..1e61019e 100644
--- a/hailort/libhailort/src/vdevice/vdevice_hrpc_client.cpp
+++ b/hailort/libhailort/src/vdevice/vdevice_hrpc_client.cpp
@@ -19,20 +19,56 @@ Expected<std::unique_ptr<VDevice>> VDeviceHrpcClient::create(const hailo_vdevice
 {
     CHECK_AS_EXPECTED(params.device_count == 1, HAILO_OUT_OF_PHYSICAL_DEVICES, "Only single device is supported!");
 
-    auto client = make_shared_nothrow<hrpc::Client>();
+    std::string device_id;
+    if (nullptr != params.device_ids) {
+        device_id = params.device_ids[0].id;
+    } else {
+        auto acc_type = HailoRTDriver::AcceleratorType::SOC_ACCELERATOR;
+
+        // If forcing hrpc service, its because we work without EP driver -> use sockets
+        if (VDevice::should_force_hrpc_client()) {
+            acc_type = HailoRTDriver::AcceleratorType::NNC_ACCELERATOR;
+        }
+
+        TRY(auto scan_results, HailoRTDriver::scan_devices(acc_type));
+        CHECK_AS_EXPECTED(scan_results.size() > 0, HAILO_OUT_OF_PHYSICAL_DEVICES, "No devices found");
+
+        device_id = scan_results[0].device_id;
+    }
+
+    auto client = make_shared_nothrow<hrpc::Client>(device_id);
     CHECK_NOT_NULL(client, HAILO_INTERNAL_FAILURE);
 
     auto status = client->connect();
     CHECK_SUCCESS_AS_EXPECTED(status, "Failed to connect to server");
 
+    auto callbacks_dispatcher = make_shared_nothrow<CallbacksDispatcher>();
+    CHECK_NOT_NULL_AS_EXPECTED(callbacks_dispatcher, HAILO_OUT_OF_HOST_MEMORY);
+
+    client->register_custom_reply(HailoRpcActionID::CALLBACK_CALLED,
+    [callbacks_dispatcher] (const MemoryView &serialized_reply, hrpc::RpcConnection connection) -> hailo_status {
+        TRY(auto tuple, CallbackCalledSerializer::deserialize_reply(serialized_reply));
+        auto callback_status = std::get<0>(tuple);
+        auto callback_handle_id = std::get<1>(tuple);
+        auto cim_handle = std::get<2>(tuple);
+
+        auto status = callbacks_dispatcher->at(cim_handle)->push_callback(callback_status, callback_handle_id, connection);
+        CHECK_SUCCESS(status);
+
+        return HAILO_SUCCESS;
+    });
+
     TRY(auto request, CreateVDeviceSerializer::serialize_request(params));
     TRY(auto result, client->execute_request(HailoRpcActionID::VDEVICE__CREATE, MemoryView(request)));
     TRY(auto tuple, CreateVDeviceSerializer::deserialize_reply(MemoryView(result)));
     status = std::get<0>(tuple);
     CHECK_SUCCESS_AS_EXPECTED(status);
 
+    TRY(auto device, PcieDeviceHrpcClient::create(device_id, client));
+
     auto vdevice_handle = std::get<1>(tuple);
-    auto vdevice_client = make_unique_nothrow<VDeviceHrpcClient>(std::move(client), vdevice_handle);
+    auto vdevice_client = make_unique_nothrow<VDeviceHrpcClient>(std::move(client), vdevice_handle, callbacks_dispatcher,
+        std::move(device), device_id);
     CHECK_NOT_NULL(vdevice_client, HAILO_OUT_OF_HOST_MEMORY);
 
     return std::unique_ptr<VDevice>(std::move(vdevice_client));
@@ -53,6 +89,7 @@ VDeviceHrpcClient::~VDeviceHrpcClient()
     auto result = m_client->execute_request(HailoRpcActionID::VDEVICE__DESTROY, MemoryView(*request));
     if (!result) {
         LOGGER__CRITICAL("Failed to destroy VDevice! status = {}", result.status());
+        return;
     }
 
     if (HAILO_SUCCESS != DestroyVDeviceSerializer::deserialize_reply(MemoryView(*result))) {
@@ -60,27 +97,13 @@ VDeviceHrpcClient::~VDeviceHrpcClient()
     }
 }
 
-Expected<std::shared_ptr<InferModel>> VDeviceHrpcClient::create_infer_model(const std::string &hef_path, const std::string &network_name)
+Expected<std::shared_ptr<InferModel>> VDeviceHrpcClient::create_infer_model(const MemoryView hef_buffer, const std::string &name)
 {
-    CHECK_AS_EXPECTED(network_name.empty(), HAILO_NOT_IMPLEMENTED, "Passing network name is not supported yet!");
-
-    FileReader hef_reader(hef_path);
-    auto status = hef_reader.open();
-    CHECK_SUCCESS(status);
-
-    TRY(auto hef_size, hef_reader.get_size());
-    TRY(auto hef_buffer, Buffer::create(hef_size));
-    status = hef_reader.read(hef_buffer.data(), hef_size);
-    CHECK_SUCCESS(status);
-
-    status = hef_reader.close();
-    CHECK_SUCCESS(status);
-
-    TRY(auto request, CreateInferModelSerializer::serialize_request(m_handle, hef_size));
+    TRY(auto request, CreateInferModelSerializer::serialize_request(m_handle, hef_buffer.size(), name));
     TRY(auto result, m_client->execute_request(HailoRpcActionID::VDEVICE__CREATE_INFER_MODEL,
         MemoryView(request), [&hef_buffer] (hrpc::RpcConnection connection) -> hailo_status {
         // TODO: change write to accept uint64_t, or accept file stream instead or write in chunks
-        auto status = connection.write_buffer(MemoryView(hef_buffer));
+        auto status = connection.write_buffer(hef_buffer);
         CHECK_SUCCESS(status);
 
         return HAILO_SUCCESS;
@@ -90,12 +113,30 @@ Expected<std::shared_ptr<InferModel>> VDeviceHrpcClient::create_infer_model(cons
     CHECK_SUCCESS_AS_EXPECTED(std::get<0>(tuple));
     auto infer_model_handle = std::get<1>(tuple);
 
-    TRY(auto hef, Hef::create(MemoryView(hef_buffer)));
-    TRY(auto infer_model, InferModelHrpcClient::create(std::move(hef), m_client, infer_model_handle, m_handle, *this));
+    TRY(auto hef, Hef::create(hef_buffer));
+    TRY(auto infer_model, InferModelHrpcClient::create(std::move(hef), name, m_client, infer_model_handle, m_handle,
+        *this, m_callbacks_dispatcher));
 
     return std::shared_ptr<InferModel>(std::move(infer_model));
 }
 
+Expected<std::shared_ptr<InferModel>> VDeviceHrpcClient::create_infer_model(const std::string &hef_path, const std::string &name)
+{
+    FileReader hef_reader(hef_path);
+    auto status = hef_reader.open();
+    CHECK_SUCCESS(status);
+
+    TRY(auto hef_size, hef_reader.get_size());
+    TRY(auto hef_buffer, Buffer::create(hef_size));
+    status = hef_reader.read(hef_buffer.data(), hef_size);
+    CHECK_SUCCESS(status);
+
+    status = hef_reader.close();
+    CHECK_SUCCESS(status);
+
+    return create_infer_model(MemoryView(hef_buffer), name);
+}
+
 Expected<ConfiguredNetworkGroupVector> VDeviceHrpcClient::configure(Hef &hef, const NetworkGroupsParamsMap &configure_params)
 {
     (void)m_handle;
@@ -106,12 +147,18 @@ Expected<ConfiguredNetworkGroupVector> VDeviceHrpcClient::configure(Hef &hef, co
 
 Expected<std::vector<std::reference_wrapper<Device>>> VDeviceHrpcClient::get_physical_devices() const
 {
-    return make_unexpected(HAILO_NOT_IMPLEMENTED);
+    std::vector<std::reference_wrapper<Device>> result;
+    result.reserve(1);
+    result.push_back(*m_device);
+    return result;
 }
 
 Expected<std::vector<std::string>> VDeviceHrpcClient::get_physical_devices_ids() const
 {
-    return make_unexpected(HAILO_NOT_IMPLEMENTED);
+    std::vector<std::string> result;
+    result.reserve(1);
+    result.push_back(m_device_id);
+    return result;
 }
 
 // Currently only homogeneous vDevice is allow (= all devices are from the same type)
diff --git a/hailort/libhailort/src/vdevice/vdevice_hrpc_client.hpp b/hailort/libhailort/src/vdevice/vdevice_hrpc_client.hpp
index bcccdd6a..e80ec771 100644
--- a/hailort/libhailort/src/vdevice/vdevice_hrpc_client.hpp
+++ b/hailort/libhailort/src/vdevice/vdevice_hrpc_client.hpp
@@ -13,6 +13,8 @@
 #include "hailo/hailort.h"
 #include "hrpc/client.hpp"
 #include "vdevice/vdevice_internal.hpp"
+#include "rpc_callbacks/rpc_callbacks_dispatcher.hpp"
+#include "vdma/pcie/pcie_device_hrpc_client.hpp"
 
 namespace hailort
 {
@@ -22,8 +24,10 @@ class VDeviceHrpcClient : public VDevice
 public:
     static Expected<std::unique_ptr<VDevice>> create(const hailo_vdevice_params_t &params);
 
-    VDeviceHrpcClient(std::shared_ptr<hrpc::Client> client, uint32_t handle)
-        : m_client(client), m_handle(handle) {}
+    VDeviceHrpcClient(std::shared_ptr<hrpc::Client> client, uint32_t handle, std::shared_ptr<CallbacksDispatcher> callbacks_dispatcher,
+        std::unique_ptr<PcieDeviceHrpcClient> &&device, std::string device_id)
+        : m_client(client), m_handle(handle), m_callbacks_dispatcher(callbacks_dispatcher), m_device(std::move(device)),
+        m_device_id(device_id) {}
 
     VDeviceHrpcClient(VDeviceHrpcClient &&) = delete;
     VDeviceHrpcClient(const VDeviceHrpcClient &) = delete;
@@ -32,7 +36,9 @@ class VDeviceHrpcClient : public VDevice
     virtual ~VDeviceHrpcClient();
 
     virtual Expected<std::shared_ptr<InferModel>> create_infer_model(const std::string &hef_path,
-        const std::string &network_name = "") override;
+        const std::string &name = "") override;
+    virtual Expected<std::shared_ptr<InferModel>> create_infer_model(const MemoryView hef_buffer,
+        const std::string &name = "") override;
     virtual Expected<ConfiguredNetworkGroupVector> configure(Hef &hef, const NetworkGroupsParamsMap &configure_params={}) override;
     virtual Expected<std::vector<std::reference_wrapper<Device>>> get_physical_devices() const override;
     virtual Expected<std::vector<std::string>> get_physical_devices_ids() const override;
@@ -45,6 +51,9 @@ class VDeviceHrpcClient : public VDevice
 private:
     std::shared_ptr<hrpc::Client> m_client;
     uint32_t m_handle;
+    std::shared_ptr<CallbacksDispatcher> m_callbacks_dispatcher;
+    std::unique_ptr<PcieDeviceHrpcClient> m_device;
+    std::string m_device_id;
 };
 
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/vdevice/vdevice_internal.hpp b/hailort/libhailort/src/vdevice/vdevice_internal.hpp
index 3c272d69..70419b43 100644
--- a/hailort/libhailort/src/vdevice/vdevice_internal.hpp
+++ b/hailort/libhailort/src/vdevice/vdevice_internal.hpp
@@ -25,6 +25,7 @@
 #include "hailo/vdevice.hpp"
 
 #include "common/async_thread.hpp"
+#include "common/internal_env_vars.hpp"
 #include "vdma/vdma_device.hpp"
 #include "vdma/vdma_config_manager.hpp"
 #include "vdevice/vdevice_core_op.hpp"
@@ -32,13 +33,13 @@
 
 #ifdef HAILO_SUPPORT_MULTI_PROCESS
 #include "service/hailort_rpc_client.hpp"
+#include "service/network_group_client.hpp"
 #endif // HAILO_SUPPORT_MULTI_PROCESS
 
 
 namespace hailort
 {
 
-#define DISABLE_MULTIPLEXER_ENV_VAR "HAILO_DISABLE_MULTIPLEXER_INTERNAL"
 class VDeviceBase : public VDevice
 {
 public:
@@ -142,6 +143,8 @@ class VDeviceBase : public VDevice
     static hailo_status validate_params(const hailo_vdevice_params_t &params);
     static Expected<bool> device_ids_contains_eth(const hailo_vdevice_params_t &params);
 
+    virtual hailo_status add_network_group_ref_count(std::shared_ptr<ConfiguredNetworkGroup> network_group_ptr) override;
+
 private:
     VDeviceBase(std::map<device_id_t, std::unique_ptr<Device>> &&devices, CoreOpsSchedulerPtr core_ops_scheduler,
         const std::string &unique_vdevice_hash="") :
@@ -238,12 +241,14 @@ class VDeviceHandle : public VDevice
     Expected<std::vector<std::string>> get_physical_devices_ids() const override;
     Expected<hailo_stream_interface_t> get_default_streams_interface() const override;
     Expected<std::shared_ptr<InferModel>> create_infer_model(const std::string &hef_path,
-        const std::string &network_name = "") override;
+        const std::string &name = "") override;
     virtual hailo_status dma_map(void *address, size_t size, hailo_dma_buffer_direction_t direction) override;
     virtual hailo_status dma_unmap(void *address, size_t size, hailo_dma_buffer_direction_t direction) override;
     virtual hailo_status dma_map_dmabuf(int dmabuf_fd, size_t size, hailo_dma_buffer_direction_t direction) override;
     virtual hailo_status dma_unmap_dmabuf(int dmabuf_fd, size_t size, hailo_dma_buffer_direction_t direction) override;
 
+    virtual hailo_status add_network_group_ref_count(std::shared_ptr<ConfiguredNetworkGroup> network_group_ptr) override;
+
 private:
     VDeviceHandle(uint32_t handle);
     uint32_t m_handle;
diff --git a/hailort/libhailort/src/vdma/CMakeLists.txt b/hailort/libhailort/src/vdma/CMakeLists.txt
index e2be112c..99042b10 100644
--- a/hailort/libhailort/src/vdma/CMakeLists.txt
+++ b/hailort/libhailort/src/vdma/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 if(WIN32)
     set(DRIVER_OS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/driver/os/windows")
@@ -31,11 +31,13 @@ set(SRC_FILES
 
     ${CMAKE_CURRENT_SOURCE_DIR}/pcie/pcie_device.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/integrated/integrated_device.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/pcie/pcie_device_hrpc_client.cpp
 
     ${CMAKE_CURRENT_SOURCE_DIR}/channel/boundary_channel.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/channel/channels_group.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/channel/interrupts_dispatcher.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/channel/transfer_launcher.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/channel/transfer_common.cpp
 
     ${CMAKE_CURRENT_SOURCE_DIR}/memory/descriptor_list.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/memory/vdma_edge_layer.cpp
diff --git a/hailort/libhailort/src/vdma/channel/boundary_channel.cpp b/hailort/libhailort/src/vdma/channel/boundary_channel.cpp
index 5cdb82a7..06c0566c 100644
--- a/hailort/libhailort/src/vdma/channel/boundary_channel.cpp
+++ b/hailort/libhailort/src/vdma/channel/boundary_channel.cpp
@@ -12,7 +12,6 @@
 #include "common/os_utils.hpp"
 
 #include "vdma/channel/boundary_channel.hpp"
-#include "vdma/memory/vdma_edge_layer.hpp"
 
 #include <list>
 #include <chrono>
@@ -109,33 +108,52 @@ hailo_status BoundaryChannel::trigger_channel_completion(const ChannelIrqData &i
 
     CHECK(irq_data.transfers_completed <= m_ongoing_transfers.size(), HAILO_INTERNAL_FAILURE,
         "Invalid amount of completed transfers {} max {}", irq_data.transfers_completed, m_ongoing_transfers.size());
-    
+
     auto callback_status = get_callback_status(m_channel_id, irq_data);
+
     // If channel is no longer active - all transfers should be completed
     const size_t num_transfers_to_trigger = (HAILO_SUCCESS == callback_status) ? irq_data.transfers_completed :
         m_ongoing_transfers.size();
-    for (size_t i = 0; i < num_transfers_to_trigger; i++) {
+    size_t i = 0;
+    while (i < num_transfers_to_trigger) {
         auto transfer = std::move(m_ongoing_transfers.front());
         m_ongoing_transfers.pop_front();
+        if (HAILO_SUCCESS != transfer.launch_status) {
+            // The transfer failed to launch to begin with. We need to call the callback with the failure status.
+            on_request_complete(lock, transfer.request, transfer.launch_status);
+
+            // Continue to the next transfer, without counting it, since num_transfers_to_trigger is the number of
+            // completed transfers that were launched to begin with.
+            continue;
+        }
 
         // We increase desc num_proc (can happen only in this flow). After it is increased -
         //  1. On D2H channels - the output can be read by the user.
         //  2. On H2D channels - new input can be written to the buffer.
         m_descs.set_tail((transfer.last_desc + 1) & m_descs.size_mask());
 
-        // We've freed up room in the descriptor list, so we can launch another transfer
         if (!m_pending_transfers.empty()) {
             m_transfer_launcher.enqueue_transfer([this]() {
                 std::unique_lock<std::mutex> lock(m_channel_mutex);
+                // There can be more transfers deferred to the m_transfer_launcher than need to be launched.
+                // E.g. If num_transfers_to_trigger is 2, but only one transfer is pending in m_pending_transfers.
+                //      (we still need to handle the transfers via the m_transfer_launcher to keep their order).
                 if (m_pending_transfers.empty()) {
                     return;
                 }
+                // If at a given moment there's no room for new m_ongoing_transfers, we'll leave the pending transfers
+                // in the m_pending_transfers queue, and they will be launched when there's room.
+                if (m_ongoing_transfers.full()) {
+                    return;
+                }
                 auto transfer_request = std::move(m_pending_transfers.front());
                 m_pending_transfers.pop_front();
-                const auto status = launch_transfer_impl(std::move(transfer_request));
-                if (status != HAILO_SUCCESS) {
-                    on_request_complete(lock, transfer_request, status);
-                }
+
+                // Note: We don't check the return value of launch_and_enqueue_transfer, since failed transfers will be queued
+                //       to m_ongoing_transfers (due to QUEUE_FAILED_TRANSFER being true). This is needed to keep the
+                //       callback order consistent.
+                static const auto QUEUE_FAILED_TRANSFER = true;
+                (void) launch_and_enqueue_transfer(std::move(transfer_request), QUEUE_FAILED_TRANSFER);
             });
         }
 
@@ -145,18 +163,12 @@ hailo_status BoundaryChannel::trigger_channel_completion(const ChannelIrqData &i
         // Also, we want to make sure that the callbacks are called after the descriptors can be reused (so the user
         // will be able to start new transfer).
         on_request_complete(lock, transfer.request, callback_status);
+        i++;
     }
 
     return HAILO_SUCCESS;
 }
 
-CONTROL_PROTOCOL__host_buffer_info_t BoundaryChannel::get_boundary_buffer_info(uint32_t transfer_size) const
-{
-    // Boundary channels always have scatter gather buffers
-    return VdmaEdgeLayer::get_host_buffer_info(VdmaEdgeLayer::Type::SCATTER_GATHER, m_desc_list.dma_address(),
-        m_desc_list.desc_page_size(), m_desc_list.count(), transfer_size);
-}
-
 hailo_status BoundaryChannel::activate()
 {
     std::lock_guard<std::mutex> lock(m_channel_mutex);
@@ -185,10 +197,10 @@ hailo_status BoundaryChannel::launch_transfer(TransferRequest &&transfer_request
     }
 
     if ((m_ongoing_transfers.size() < m_ongoing_transfers.capacity()) && (m_pending_transfers.size() == 0)) {
-        // There's room in the desc list and there are no pending transfers => execute on user's thread
+        // There's room in the desc list and there are no pending transfers or callbacks => execute on user's thread
         // We can't use the user thread to launch the transfer if there are pending transfers, because we need to
         // preserve the order of the transfers.
-        return launch_transfer_impl(std::move(transfer_request));
+        return launch_and_enqueue_transfer(std::move(transfer_request));
     }
 
     if (m_pending_transfers.size() >= m_pending_transfers.capacity()) {
@@ -200,15 +212,31 @@ hailo_status BoundaryChannel::launch_transfer(TransferRequest &&transfer_request
     return HAILO_SUCCESS;
 }
 
-// Assumes that the m_channel_mutex is locked!
-hailo_status BoundaryChannel::launch_transfer_impl(TransferRequest &&transfer_request)
+// Assumes:
+// * m_channel_mutex is locked
+// * m_ongoing_transfers.size() < m_ongoing_transfers.capacity()
+hailo_status BoundaryChannel::launch_and_enqueue_transfer(TransferRequest &&transfer_request, bool queue_failed_transfer)
 {
-    if (!m_is_channel_activated) {
-        return HAILO_STREAM_NOT_ACTIVATED;
+    auto last_desc = launch_transfer_impl(transfer_request);
+    if (!last_desc) {
+        if (queue_failed_transfer) {
+            m_ongoing_transfers.push_back(OngoingTransfer{std::move(transfer_request), 0, last_desc.status()});
+        }
+        return last_desc.status();
     }
+    m_ongoing_transfers.push_back(OngoingTransfer{std::move(transfer_request), last_desc.value()});
 
-    if (m_ongoing_transfers.size() >= m_ongoing_transfers.capacity()) {
-        return HAILO_QUEUE_IS_FULL;
+    return HAILO_SUCCESS;
+}
+
+// Assumes:
+// * m_channel_mutex is locked
+// * m_ongoing_transfers.size() < m_ongoing_transfers.capacity()
+Expected<uint16_t> BoundaryChannel::launch_transfer_impl(TransferRequest &transfer_request)
+{
+    assert(m_ongoing_transfers.size() < m_ongoing_transfers.capacity());
+    if (!m_is_channel_activated) {
+        return make_unexpected(HAILO_STREAM_NOT_ACTIVATED);
     }
 
     auto num_available = static_cast<uint16_t>(m_descs.head());
@@ -216,8 +244,10 @@ hailo_status BoundaryChannel::launch_transfer_impl(TransferRequest &&transfer_re
     uint16_t last_desc = std::numeric_limits<uint16_t>::max();
     uint16_t total_descs_count = 0;
 
-    const bool should_bind = !m_bounded_buffer;
-    if (!should_bind) {
+    TRY(const bool should_bind, should_bind_buffer(transfer_request));
+    if (should_bind) {
+        m_bounded_buffer = nullptr;
+    } else {
         CHECK_SUCCESS(validate_bound_buffer(transfer_request));
     }
 
@@ -252,7 +282,7 @@ hailo_status BoundaryChannel::launch_transfer_impl(TransferRequest &&transfer_re
     int num_processed = m_descs.tail();
     int num_free = m_descs.avail(num_available, num_processed);
     if (total_descs_count > num_free) {
-        return HAILO_OUT_OF_DESCRIPTORS;
+        return make_unexpected(HAILO_OUT_OF_DESCRIPTORS);
     }
 
     if (m_latency_meter) {
@@ -272,17 +302,17 @@ hailo_status BoundaryChannel::launch_transfer_impl(TransferRequest &&transfer_re
         ));
     CHECK(total_descs_count == desc_programmed, HAILO_INTERNAL_FAILURE,
         "Inconsistent desc programed expecting {} got {}", total_descs_count, desc_programmed);
-    m_ongoing_transfers.push_back(OngoingTransfer{std::move(transfer_request), last_desc});
 
-    return HAILO_SUCCESS;
+    return last_desc;
 }
 
 hailo_status BoundaryChannel::bind_buffer(MappedBufferPtr buffer)
 {
+    std::lock_guard<std::mutex> lock(m_channel_mutex);
     CHECK(m_bounded_buffer == nullptr, HAILO_INTERNAL_FAILURE,
         "Buffer is already bound to channel {}", m_channel_id);
     const auto expected_size = static_cast<size_t>(m_desc_list.desc_page_size()) * m_desc_list.count();
-    CHECK(buffer->size() == expected_size, HAILO_INVALID_ARGUMENT,
+    CHECK(buffer->size() <= expected_size, HAILO_INVALID_ARGUMENT,
         "Buffer size {} does not fit in desc list - descs count {} desc page size {}", buffer->size(),
         m_desc_list.count(), m_desc_list.desc_page_size());
     static const size_t DEFAULT_BUFFER_OFFSET = 0;
@@ -291,9 +321,22 @@ hailo_status BoundaryChannel::bind_buffer(MappedBufferPtr buffer)
     return HAILO_SUCCESS;
 }
 
+hailo_status BoundaryChannel::map_and_bind_buffer(hailort::TransferBuffer &transfer_request)
+{
+    TRY(auto mapped_buffer, transfer_request.map_buffer(m_driver, m_direction));
+    return(bind_buffer(mapped_buffer));
+}
+
+void BoundaryChannel::remove_buffer_binding()
+{
+    std::lock_guard<std::mutex> lock(m_channel_mutex);
+    m_bounded_buffer = nullptr;
+}
+
 void BoundaryChannel::cancel_pending_transfers()
 {
     std::unique_lock<std::mutex> lock(m_channel_mutex);
+
     // Cancel all ongoing transfers
     while (!m_ongoing_transfers.empty()) {
         auto transfer = std::move(m_ongoing_transfers.front());
@@ -401,13 +444,45 @@ hailo_status BoundaryChannel::validate_bound_buffer(TransferRequest &transfer_re
     const auto expected_offset = static_cast<size_t>(m_desc_list.desc_page_size()) * num_available;
     CHECK(transfer_buffer.offset() == expected_offset, HAILO_INTERNAL_FAILURE,
         "Unexpected buffer offset, expected {} actual {}", expected_offset, transfer_buffer.offset());
-    TRY(auto base_buffer, transfer_buffer.base_buffer());
-    CHECK(base_buffer.data() == reinterpret_cast<const uint8_t*>(m_bounded_buffer->user_address()), HAILO_INTERNAL_FAILURE,
-        "Got the wrong buffer");
-    CHECK(base_buffer.size() == m_bounded_buffer->size(), HAILO_INTERNAL_FAILURE,
-        "Got invalid buffer size {}, expected {}", base_buffer.size(), m_bounded_buffer->size());
+    TRY(auto is_same_buffer, is_same_buffer(m_bounded_buffer, transfer_buffer));
+    if (!is_same_buffer) {
+        LOGGER__ERROR("Got diff in buffers");
+        return HAILO_INTERNAL_FAILURE;
+    }
+
     return HAILO_SUCCESS;
 }
 
+Expected<bool> BoundaryChannel::is_same_buffer(MappedBufferPtr mapped_buff, TransferBuffer &transfer_buffer)
+{
+    if (transfer_buffer.type() == TransferBufferType::DMABUF) {
+        TRY(auto buf_fd, transfer_buffer.dmabuf_fd());
+        TRY(auto mapped_buf_fd, mapped_buff->fd());
+        return ((buf_fd == mapped_buf_fd) && (transfer_buffer.size() == mapped_buff->size()));
+    } else {
+        auto base_buffer = transfer_buffer.base_buffer();
+        return ((base_buffer.value().data() == mapped_buff->user_address()) &&
+            (base_buffer.value().size() == mapped_buff->size()));
+    }
+}
+
+Expected<bool> BoundaryChannel::should_bind_buffer(TransferRequest &transfer_request)
+{
+    if ((nullptr == m_bounded_buffer) || (1 < transfer_request.transfer_buffers.size())) {
+        return true;
+    }
+
+    bool is_cyclic_buffer = (static_cast<size_t>(m_descs.size() * m_desc_list.desc_page_size()) == m_bounded_buffer->size());
+    /* If the buffer is cyclic, sync api is used, means no bind needed.
+        Checking if the bounded buffer points correctly to the received buffer
+        and the descriptors are pointing to the beginning of the buffer. */
+    if (!is_cyclic_buffer) {
+        TRY(auto is_same_buffer, is_same_buffer(m_bounded_buffer, transfer_request.transfer_buffers[0]));
+        return !(is_same_buffer && (0 == m_descs.head()));
+    }
+
+    return false;
+}
+
 } /* namespace vdma */
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/vdma/channel/boundary_channel.hpp b/hailort/libhailort/src/vdma/channel/boundary_channel.hpp
index 55e5596a..fc0ae5b1 100644
--- a/hailort/libhailort/src/vdma/channel/boundary_channel.hpp
+++ b/hailort/libhailort/src/vdma/channel/boundary_channel.hpp
@@ -12,13 +12,11 @@
 
 #include "vdma/channel/channel_id.hpp"
 #include "vdma/channel/transfer_launcher.hpp"
+#include "vdma/channel/transfer_common.hpp"
 #include "vdma/memory/descriptor_list.hpp"
-#include "stream_common/transfer_common.hpp"
 
 #include "common/latency_meter.hpp"
 
-#include "context_switch_defs.h"
-
 #include <memory>
 
 
@@ -28,6 +26,8 @@ namespace vdma {
 struct OngoingTransfer {
     TransferRequest request;
     uint16_t last_desc;
+    // Will be set to != HAILO_SUCCESS if the transfer failed to be launched in BoundaryChannel::launch_transfer_impl
+    hailo_status launch_status = HAILO_SUCCESS;
 };
 
 class BoundaryChannel;
@@ -77,12 +77,12 @@ class BoundaryChannel final
     // size should be exactly desc_page_size() * descs_count() of current descriptors list.
     hailo_status bind_buffer(MappedBufferPtr buffer);
 
+    hailo_status map_and_bind_buffer(hailort::TransferBuffer &buffer);
+
     // TODO: rename BoundaryChannel::get_max_ongoing_transfers to BoundaryChannel::get_max_parallel_transfers (HRT-13513)
     size_t get_max_ongoing_transfers(size_t transfer_size) const;
     size_t get_max_aligned_transfers_in_desc_list(size_t transfer_size) const;
 
-    CONTROL_PROTOCOL__host_buffer_info_t get_boundary_buffer_info(uint32_t transfer_size) const;
-
     vdma::ChannelId get_channel_id() const
     {
         return m_channel_id;
@@ -100,16 +100,22 @@ class BoundaryChannel final
 
     bool should_measure_timestamp() const { return m_latency_meter != nullptr; }
 
+    void remove_buffer_binding();
+
 private:
     hailo_status update_latency_meter();
 
     void on_request_complete(std::unique_lock<std::mutex> &lock, TransferRequest &request,
         hailo_status complete_status);
-    hailo_status launch_transfer_impl(TransferRequest &&transfer_request);
+    hailo_status launch_and_enqueue_transfer(TransferRequest &&transfer_request, bool queue_failed_transfer = false);
+    Expected<uint16_t> launch_transfer_impl(TransferRequest &transfer_request);
 
     static bool is_desc_between(uint16_t begin, uint16_t end, uint16_t desc);
     hailo_status validate_bound_buffer(TransferRequest &transfer_request);
 
+    Expected<bool> should_bind_buffer(TransferRequest &transfer_request);
+    static Expected<bool> is_same_buffer(MappedBufferPtr mapped_buff, TransferBuffer &transfer_buffer);
+
     const vdma::ChannelId m_channel_id;
     const Direction m_direction;
     HailoRTDriver &m_driver;
diff --git a/hailort/libhailort/src/stream_common/transfer_common.cpp b/hailort/libhailort/src/vdma/channel/transfer_common.cpp
similarity index 73%
rename from hailort/libhailort/src/stream_common/transfer_common.cpp
rename to hailort/libhailort/src/vdma/channel/transfer_common.cpp
index ef8db782..96242c49 100644
--- a/hailort/libhailort/src/stream_common/transfer_common.cpp
+++ b/hailort/libhailort/src/vdma/channel/transfer_common.cpp
@@ -8,7 +8,6 @@
 
 #include "transfer_common.hpp"
 #include "vdma/memory/mapped_buffer.hpp"
-#include "utils/buffer_storage.hpp"
 
 namespace hailort
 {
@@ -50,35 +49,27 @@ Expected<MemoryView> TransferBuffer::base_buffer()
     return Expected<MemoryView>(m_base_buffer);
 }
 
+Expected<int> TransferBuffer::dmabuf_fd()
+{
+    CHECK(TransferBufferType::DMABUF == m_type, HAILO_INTERNAL_FAILURE,
+        "dmabuf_fd is only supported for DMABUF type TransferBuffer");
+
+    return Expected<int>(m_dmabuf.fd);
+}
+
 Expected<vdma::MappedBufferPtr> TransferBuffer::map_buffer(HailoRTDriver &driver, HailoRTDriver::DmaDirection direction)
 {
-    CHECK_AS_EXPECTED(!m_mappings, HAILO_INTERNAL_FAILURE, "Buffer is already mapped");
+    if (m_mappings) {
+        return Expected<vdma::MappedBufferPtr>{m_mappings};
+    }
     if (TransferBufferType::DMABUF == m_type) {
-        auto mapped_buffer = vdma::MappedBuffer::create_shared_from_dmabuf(m_dmabuf.fd, m_dmabuf.size, driver, direction);
-        CHECK_EXPECTED(mapped_buffer);
-
-        m_mappings = mapped_buffer.value();
-        return mapped_buffer;
+        TRY(m_mappings, vdma::MappedBuffer::create_shared_from_dmabuf(m_dmabuf.fd, m_dmabuf.size, driver, direction));
     } else {
-
-        vdma::DmaAbleBufferPtr dma_able_buffer;
-        const auto storage_key = std::make_pair(m_base_buffer.data(), m_base_buffer.size());
-        if (auto storage = BufferStorageResourceManager::get_resource(storage_key)) {
-            auto dma_able_buffer_exp = storage->get()->get_dma_able_buffer();
-            CHECK_EXPECTED(dma_able_buffer_exp);
-            dma_able_buffer = dma_able_buffer_exp.release();
-        } else {
-            auto dma_able_buffer_exp = vdma::DmaAbleBuffer::create_from_user_address(m_base_buffer.data(), m_base_buffer.size());
-            CHECK_EXPECTED(dma_able_buffer_exp);
-            dma_able_buffer = dma_able_buffer_exp.release();
-        }
-
-        auto mapped_buffer = vdma::MappedBuffer::create_shared(std::move(dma_able_buffer), driver, direction);
-        CHECK_EXPECTED(mapped_buffer);
-
-        m_mappings = mapped_buffer.value();
-        return mapped_buffer;
+        TRY(auto dma_able_buffer, vdma::DmaAbleBuffer::create_from_user_address(m_base_buffer.data(), m_base_buffer.size()));
+        TRY(m_mappings, vdma::MappedBuffer::create_shared(std::move(dma_able_buffer), driver, direction));
     }
+
+    return Expected<vdma::MappedBufferPtr>{m_mappings};
 }
 
 hailo_status TransferBuffer::copy_to(MemoryView buffer)
diff --git a/hailort/libhailort/src/stream_common/transfer_common.hpp b/hailort/libhailort/src/vdma/channel/transfer_common.hpp
similarity index 97%
rename from hailort/libhailort/src/stream_common/transfer_common.hpp
rename to hailort/libhailort/src/vdma/channel/transfer_common.hpp
index 572bb534..f44d0966 100644
--- a/hailort/libhailort/src/stream_common/transfer_common.hpp
+++ b/hailort/libhailort/src/vdma/channel/transfer_common.hpp
@@ -36,6 +36,8 @@ class TransferBuffer final {
     TransferBuffer(MemoryView base_buffer, size_t size, size_t offset);
 
     Expected<MemoryView> base_buffer();
+    Expected<int> dmabuf_fd();
+
     size_t offset() const { return m_offset; }
     size_t size() const { return m_size; }
 
@@ -44,7 +46,7 @@ class TransferBuffer final {
     hailo_status copy_to(MemoryView buffer);
     hailo_status copy_from(const MemoryView buffer);
 
-    TransferBufferType type () const {return m_type;}
+    TransferBufferType type () const { return m_type; }
 
 private:
 
diff --git a/hailort/libhailort/src/vdma/circular_stream_buffer_pool.cpp b/hailort/libhailort/src/vdma/circular_stream_buffer_pool.cpp
index ea504bd2..8b658992 100644
--- a/hailort/libhailort/src/vdma/circular_stream_buffer_pool.cpp
+++ b/hailort/libhailort/src/vdma/circular_stream_buffer_pool.cpp
@@ -27,27 +27,26 @@ Expected<std::unique_ptr<CircularStreamBufferPool>> CircularStreamBufferPool::cr
     CHECK(transfer_size < buffer_size, HAILO_INTERNAL_FAILURE, "Transfer size {} must be smaller than buffer size {}",
         transfer_size, buffer_size);
 
-    TRY(auto base_buffer, allocate_buffer(device, buffer_size));
-    TRY(auto mapping, DmaMappedBuffer::create(device, base_buffer.data(), base_buffer.size(), direction));
+    TRY(auto mapped_buffer, vdma::MappedBuffer::create_shared_by_allocation(buffer_size, device.get_driver(),
+        to_hailo_driver_direction(direction)));
 
     auto circular_buffer_pool = make_unique_nothrow<CircularStreamBufferPool>(desc_page_size, descs_count,
-        transfer_size, std::move(base_buffer), std::move(mapping));
+        transfer_size, std::move(mapped_buffer));
     CHECK_NOT_NULL(circular_buffer_pool, HAILO_OUT_OF_HOST_MEMORY);
 
     return circular_buffer_pool;
 }
 
 CircularStreamBufferPool::CircularStreamBufferPool(size_t desc_page_size, size_t descs_count, size_t transfer_size,
-    Buffer &&base_buffer, DmaMappedBuffer &&mappings) :
+    vdma::MappedBufferPtr &&mapped_buffer) :
         m_desc_page_size(desc_page_size),
         m_transfer_size(transfer_size),
-        m_base_buffer(std::move(base_buffer)),
-        m_mappings(std::move(mappings)),
+        m_mapped_buffer(std::move(mapped_buffer)),
         m_queue(static_cast<int>(descs_count)),
         m_next_enqueue_desc_offset(0)
 {
     assert(is_powerof2(descs_count) && (descs_count > 0));
-    assert(m_base_buffer.size() == (m_desc_page_size * descs_count));
+    assert(m_mapped_buffer->size() == (m_desc_page_size * descs_count));
     m_queue.set_head(static_cast<int>(descs_count) - 1);
 }
 
@@ -69,7 +68,7 @@ Expected<TransferBuffer> CircularStreamBufferPool::dequeue()
     const size_t offset_in_buffer = m_queue.tail() * m_desc_page_size;
     m_queue.dequeue(static_cast<int>(descs_in_transfer()));
     return TransferBuffer {
-        MemoryView(m_base_buffer),
+        MemoryView(m_mapped_buffer->user_address(), m_mapped_buffer->size()),
         m_transfer_size,
         offset_in_buffer
     };
@@ -81,7 +80,7 @@ hailo_status CircularStreamBufferPool::enqueue(TransferBuffer &&buffer_info)
     const size_t descs_available = m_queue.avail(m_queue.head(), m_queue.tail());
     CHECK(descs_available >= descs_required, HAILO_INTERNAL_FAILURE, "Can enqueue without previous dequeue");
     TRY(auto base_buffer, buffer_info.base_buffer());
-    CHECK(base_buffer.data() == m_base_buffer.data(), HAILO_INTERNAL_FAILURE, "Got the wrong buffer");
+    CHECK(base_buffer.data() == m_mapped_buffer->user_address(), HAILO_INTERNAL_FAILURE, "Got the wrong buffer");
     CHECK(buffer_info.size() == m_transfer_size, HAILO_INTERNAL_FAILURE, "Got invalid buffer size {}, expected {}",
         buffer_info.size(), m_transfer_size);
 
diff --git a/hailort/libhailort/src/vdma/circular_stream_buffer_pool.hpp b/hailort/libhailort/src/vdma/circular_stream_buffer_pool.hpp
index 17ba1cb9..a7526aa1 100644
--- a/hailort/libhailort/src/vdma/circular_stream_buffer_pool.hpp
+++ b/hailort/libhailort/src/vdma/circular_stream_buffer_pool.hpp
@@ -14,7 +14,6 @@
 #include "common/circular_buffer.hpp"
 #include "stream_common/stream_buffer_pool.hpp"
 #include "vdma/vdma_device.hpp"
-#include "hailo/dma_mapped_buffer.hpp"
 
 #include <condition_variable>
 
@@ -33,7 +32,7 @@ class CircularStreamBufferPool final : public StreamBufferPool {
         hailo_dma_buffer_direction_t direction, size_t desc_page_size, size_t descs_count, size_t transfer_size);
 
     CircularStreamBufferPool(size_t desc_page_size, size_t descs_count, size_t transfer_size,
-        Buffer &&base_buffer, DmaMappedBuffer &&mappings);
+        vdma::MappedBufferPtr &&base_buffer);
 
     virtual size_t max_queue_size() const override;
     size_t buffers_ready_to_dequeue() const;
@@ -42,7 +41,7 @@ class CircularStreamBufferPool final : public StreamBufferPool {
 
     virtual hailo_status enqueue(TransferBuffer &&buffer_info) override;
 
-    Buffer &get_base_buffer() { return m_base_buffer; }
+    vdma::MappedBufferPtr get_base_buffer() { return m_mapped_buffer; }
 
     virtual void reset_pointers() override;
 
@@ -57,8 +56,7 @@ class CircularStreamBufferPool final : public StreamBufferPool {
     const size_t m_transfer_size;
 
     // m_mapped_buffer.size() must be m_queue.size() * m_desc_page_size
-    Buffer m_base_buffer;
-    DmaMappedBuffer m_mappings;
+    vdma::MappedBufferPtr m_mapped_buffer;
 
     // Head/tail based queue that manages the buffer pool.
     // The head and tail are in m_desc_page_size granularity.
diff --git a/hailort/libhailort/src/vdma/driver/hailort_driver.cpp b/hailort/libhailort/src/vdma/driver/hailort_driver.cpp
index 891eb92e..ed86aa92 100755
--- a/hailort/libhailort/src/vdma/driver/hailort_driver.cpp
+++ b/hailort/libhailort/src/vdma/driver/hailort_driver.cpp
@@ -12,6 +12,7 @@
 
 #include "common/logger_macros.hpp"
 #include "common/utils.hpp"
+#include "common/string_utils.hpp"
 #include "hailo_ioctl_common.h"
 
 #if defined(__linux__)
@@ -128,6 +129,28 @@ static hailo_dma_data_direction direction_to_dma_data_direction(HailoRTDriver::D
     return HAILO_DMA_NONE;
 }
 
+static HailoRTDriver::DeviceBoardType board_type_to_device_board_type(enum hailo_board_type board_type) {
+    switch (board_type) {
+    case HAILO_BOARD_TYPE_HAILO8:
+        return HailoRTDriver::DeviceBoardType::DEVICE_BOARD_TYPE_HAILO8;
+    case HAILO_BOARD_TYPE_HAILO15:
+        return HailoRTDriver::DeviceBoardType::DEVICE_BOARD_TYPE_HAILO15;
+    case HAILO_BOARD_TYPE_PLUTO:
+        return HailoRTDriver::DeviceBoardType::DEVICE_BOARD_TYPE_PLUTO;
+    case HAILO_BOARD_TYPE_HAILO10H:
+        return HailoRTDriver::DeviceBoardType::DEVICE_BOARD_TYPE_HAILO10H;
+    case HAILO_BOARD_TYPE_HAILO10H_LEGACY:
+        return HailoRTDriver::DeviceBoardType::DEVICE_BOARD_TYPE_HAILO10H_LEGACY;
+    default:
+        LOGGER__ERROR("Invalid board type from ioctl {}", static_cast<int>(board_type));
+        break;
+    }
+
+    assert(false);
+    // On release build Return value that will make ioctls to fail.
+    return HailoRTDriver::DeviceBoardType::DEVICE_BOARD_TYPE_COUNT;
+}
+
 // TODO: validate wraparounds for buffer/mapping handles in the driver (HRT-9509)
 const uintptr_t HailoRTDriver::INVALID_DRIVER_BUFFER_HANDLE_VALUE = INVALID_DRIVER_HANDLE_VALUE;
 const size_t HailoRTDriver::INVALID_DRIVER_VDMA_MAPPING_HANDLE_VALUE = INVALID_DRIVER_HANDLE_VALUE;
@@ -145,8 +168,10 @@ Expected<std::unique_ptr<HailoRTDriver>> HailoRTDriver::create(const std::string
 {
     TRY(auto fd, open_device_file(dev_path));
 
+    auto device_id_lower = StringUtils::to_lower(device_id);
+
     hailo_status status = HAILO_UNINITIALIZED;
-    std::unique_ptr<HailoRTDriver> driver(new (std::nothrow) HailoRTDriver(device_id, std::move(fd), status));
+    std::unique_ptr<HailoRTDriver> driver(new (std::nothrow) HailoRTDriver(device_id_lower, std::move(fd), status));
     CHECK_NOT_NULL_AS_EXPECTED(driver, HAILO_OUT_OF_HOST_MEMORY);
     CHECK_SUCCESS_AS_EXPECTED(status);
 
@@ -158,7 +183,7 @@ Expected<std::unique_ptr<HailoRTDriver>> HailoRTDriver::create_pcie(const std::s
     TRY(const auto scan_results, scan_devices());
 
     auto device_found = std::find_if(scan_results.cbegin(), scan_results.cend(),
-        [&device_id](const auto &compared_scan_result) {
+        [device_id=StringUtils::to_lower(device_id)](const auto &compared_scan_result) {
             return (device_id == compared_scan_result.device_id);
         });
     CHECK(device_found != scan_results.cend(), HAILO_INVALID_ARGUMENT, "Requested device not found");
@@ -239,6 +264,11 @@ HailoRTDriver::HailoRTDriver(const std::string &device_id, FileDescriptor &&fd,
     m_desc_max_page_size = device_properties.desc_max_page_size;
     m_allocate_driver_buffer = (HAILO_ALLOCATION_MODE_DRIVER == device_properties.allocation_mode);
     m_dma_engines_count = device_properties.dma_engines_count;
+    m_board_type = board_type_to_device_board_type(device_properties.board_type);
+    if (DeviceBoardType::DEVICE_BOARD_TYPE_COUNT == m_board_type) {
+        status = HAILO_DRIVER_FAIL;
+        return;
+    }
 
     switch (device_properties.dma_type) {
     case HAILO_DMA_TYPE_PCIE:
@@ -251,7 +281,7 @@ HailoRTDriver::HailoRTDriver(const std::string &device_id, FileDescriptor &&fd,
         m_dma_type = DmaType::PCIE_EP;
         break;
     default:
-        LOGGER__ERROR("Invalid dma type returned from ioctl {}", device_properties.dma_type);
+        LOGGER__ERROR("Invalid dma type returned from ioctl {}", static_cast<int>(device_properties.dma_type));
         status = HAILO_DRIVER_FAIL;
         return;
     }
@@ -530,12 +560,24 @@ hailo_status HailoRTDriver::reset_nn_core()
     return HAILO_SUCCESS;
 }
 
+Expected<uint64_t> HailoRTDriver::write_action_list(uint8_t *data, size_t size)
+{
+    hailo_write_action_list_params params{};
+    params.size = size;
+    params.data = data;
+
+    CHECK_IOCTL_RESULT(run_ioctl(HAILO_WRITE_ACTION_LIST, &params), "Failed write action list");
+
+    uint64_t dma_address = params.dma_address;
+    return dma_address;
+}
+
 Expected<HailoRTDriver::VdmaBufferHandle> HailoRTDriver::vdma_buffer_map_dmabuf(int dmabuf_fd, size_t required_size, DmaDirection data_direction,
     DmaBufferType buffer_type)
 {
     CHECK_AS_EXPECTED (DmaBufferType::DMABUF_BUFFER == buffer_type, HAILO_INVALID_ARGUMENT,
-        "Error, Invalid buffer type given, buffer type {}", buffer_type);
-    
+        "Error, Invalid buffer type given, buffer type {}", static_cast<int>(buffer_type));
+
     return vdma_buffer_map(dmabuf_fd, required_size, data_direction, INVALID_MAPPED_BUFFER_DRIVER_IDENTIFIER,
         buffer_type);
 }
@@ -554,8 +596,10 @@ Expected<HailoRTDriver::VdmaBufferHandle> HailoRTDriver::vdma_buffer_map(uintptr
     if (mapped_buffer != m_mapped_buffer.end()) {
         // Buffer already mapped, increase ref count and use it.
         assert(mapped_buffer->mapped_count > 0);
-        CHECK_AS_EXPECTED(mapped_buffer->driver_buff_handle == driver_buff_handle, HAILO_INVALID_ARGUMENT,
-            "Mapped buffer driver handle {} is different than required handle {}", mapped_buffer->driver_buff_handle,
+        const bool mismatched_driver_handle = (driver_buff_handle != INVALID_MAPPED_BUFFER_DRIVER_IDENTIFIER) &&
+            (mapped_buffer->driver_buff_handle != driver_buff_handle);
+        CHECK(!mismatched_driver_handle, HAILO_INVALID_ARGUMENT,
+            "Mapped buffer driver handle 0x{:x} is different than required handle 0x{:x}", mapped_buffer->driver_buff_handle,
             driver_buff_handle);
 
         mapped_buffer->mapped_count++;
@@ -803,10 +847,11 @@ hailo_status HailoRTDriver::mark_as_used()
     return params.in_use ? HAILO_DEVICE_IN_USE : HAILO_SUCCESS;
 }
 
-Expected<std::pair<vdma::ChannelId, vdma::ChannelId>> HailoRTDriver::soc_connect(uintptr_t input_buffer_desc_handle,
-    uintptr_t output_buffer_desc_handle)
+Expected<std::pair<vdma::ChannelId, vdma::ChannelId>> HailoRTDriver::soc_connect(uint16_t port_number,
+    uintptr_t input_buffer_desc_handle, uintptr_t output_buffer_desc_handle)
 {
     hailo_soc_connect_params params{};
+    params.port_number = port_number;
     params.input_desc_handle = input_buffer_desc_handle;
     params.output_desc_handle = output_buffer_desc_handle;
     CHECK_IOCTL_RESULT(run_ioctl(HAILO_SOC_CONNECT, &params), "Failed soc_connect");
@@ -815,9 +860,11 @@ Expected<std::pair<vdma::ChannelId, vdma::ChannelId>> HailoRTDriver::soc_connect
     return std::make_pair(input_channel, output_channel);
 }
 
-Expected<std::pair<vdma::ChannelId, vdma::ChannelId>> HailoRTDriver::pci_ep_accept(uintptr_t input_buffer_desc_handle, uintptr_t output_buffer_desc_handle)
+Expected<std::pair<vdma::ChannelId, vdma::ChannelId>> HailoRTDriver::pci_ep_accept(uint16_t port_number,
+    uintptr_t input_buffer_desc_handle, uintptr_t output_buffer_desc_handle)
 {
     hailo_pci_ep_accept_params params{};
+    params.port_number = port_number;
     params.input_desc_handle = input_buffer_desc_handle;
     params.output_desc_handle = output_buffer_desc_handle;
     CHECK_IOCTL_RESULT(run_ioctl(HAILO_PCI_EP_ACCEPT, &params), "Failed pci_ep accept");
@@ -842,7 +889,7 @@ hailo_status HailoRTDriver::close_connection(vdma::ChannelId input_channel, vdma
         CHECK_IOCTL_RESULT(run_ioctl(HAILO_SOC_CLOSE, &params), "Failed soc_close");
         return HAILO_SUCCESS;
     } else {
-        LOGGER__ERROR("close_connection not supported with session type {}", session_type);
+        LOGGER__ERROR("close_connection not supported with session type {}", static_cast<int>(session_type));
         return HAILO_NOT_SUPPORTED;
     }
 }
@@ -854,6 +901,7 @@ static bool is_blocking_ioctl(unsigned long request)
     case HAILO_VDMA_INTERRUPTS_WAIT:
     case HAILO_FW_CONTROL:
     case HAILO_READ_NOTIFICATION:
+    case HAILO_PCI_EP_ACCEPT:
         return true;
     default:
         return false;
@@ -1012,7 +1060,17 @@ Expected<DescriptorsListInfo> HailoRTDriver::descriptors_list_create(size_t desc
     create_desc_info.desc_page_size = desc_page_size;
     create_desc_info.is_circular = is_circular;
 
-    CHECK_IOCTL_RESULT(run_ioctl(HAILO_DESC_LIST_CREATE, &create_desc_info), "Failed create desc list");
+    int err = run_ioctl(HAILO_DESC_LIST_CREATE, &create_desc_info);
+    if (err != 0) {
+    #if defined(__linux__)
+        if (ENOMEM == err) {
+            LOGGER__ERROR("Failed to create desc list due to due to insufficient amount of CMA memory");
+            return make_unexpected(HAILO_OUT_OF_HOST_CMA_MEMORY);
+        }
+    #endif
+        LOGGER__ERROR("Failed create desc list with errno:{}", err);
+        return make_unexpected(HAILO_DRIVER_FAIL);
+    }
 
     return DescriptorsListInfo{create_desc_info.desc_handle, create_desc_info.dma_address};
 }
diff --git a/hailort/libhailort/src/vdma/driver/hailort_driver.hpp b/hailort/libhailort/src/vdma/driver/hailort_driver.hpp
index 75d47461..2b8f0e51 100755
--- a/hailort/libhailort/src/vdma/driver/hailort_driver.hpp
+++ b/hailort/libhailort/src/vdma/driver/hailort_driver.hpp
@@ -6,7 +6,7 @@
  * @file hailort_driver.hpp
  * @brief Low level interface to PCI driver
  *
- * 
+ *
  **/
 #ifndef _HAILORT_DRIVER_HPP_
 #define _HAILORT_DRIVER_HPP_
@@ -16,7 +16,7 @@
 
 #include "common/utils.hpp"
 
-#include "os/file_descriptor.hpp"
+#include "common/file_descriptor.hpp"
 #include "vdma/channel/channel_id.hpp"
 
 #include <mutex>
@@ -165,6 +165,16 @@ class HailoRTDriver final
         PCIE_EP
     };
 
+    // Should match enum hailo_board_type
+    enum class DeviceBoardType {
+        DEVICE_BOARD_TYPE_HAILO8 = 0,
+        DEVICE_BOARD_TYPE_HAILO15,
+        DEVICE_BOARD_TYPE_PLUTO,
+        DEVICE_BOARD_TYPE_HAILO10H,
+        DEVICE_BOARD_TYPE_HAILO10H_LEGACY,
+        DEVICE_BOARD_TYPE_COUNT,
+    };
+
     enum class MemoryType {
         DIRECT_MEMORY,
 
@@ -238,6 +248,8 @@ class HailoRTDriver final
 
     hailo_status reset_nn_core();
 
+    Expected<uint64_t> write_action_list(uint8_t *data, size_t size);
+
     /**
      * Maps a dmabuf to physical memory.
      *
@@ -245,7 +257,7 @@ class HailoRTDriver final
      * @param[in] required_size - size of dmabug we are mapping.
      * @param[in] data_direction - direction is used for optimization.
      * @param[in] buffer_type - buffer type must be DMABUF
-     */ 
+     */
     Expected<VdmaBufferHandle> vdma_buffer_map_dmabuf(int dmabuf_fd, size_t required_size, DmaDirection data_direction,
         DmaBufferType buffer_type);
 
@@ -326,11 +338,11 @@ class HailoRTDriver final
      */
     hailo_status mark_as_used();
 
-    Expected<std::pair<vdma::ChannelId, vdma::ChannelId>> soc_connect(uintptr_t input_buffer_desc_handle,
-        uintptr_t output_buffer_desc_handle);
+    Expected<std::pair<vdma::ChannelId, vdma::ChannelId>> soc_connect(uint16_t port_number,
+        uintptr_t input_buffer_desc_handle, uintptr_t output_buffer_desc_handle);
 
-    Expected<std::pair<vdma::ChannelId, vdma::ChannelId>> pci_ep_accept(uintptr_t input_buffer_desc_handle,
-        uintptr_t output_buffer_desc_handle);
+    Expected<std::pair<vdma::ChannelId, vdma::ChannelId>> pci_ep_accept(uint16_t port_number,
+        uintptr_t input_buffer_desc_handle, uintptr_t output_buffer_desc_handle);
 
     hailo_status close_connection(vdma::ChannelId input_channel, vdma::ChannelId output_channel,
         PcieSessionType session_type);
@@ -345,6 +357,11 @@ class HailoRTDriver final
         return m_dma_type;
     }
 
+    inline DeviceBoardType board_type() const
+    {
+        return m_board_type;
+    }
+
     FileDescriptor& fd() {return m_fd;}
 
     inline bool allocate_driver_buffer() const
@@ -419,6 +436,7 @@ class HailoRTDriver final
     DmaType m_dma_type;
     bool m_allocate_driver_buffer;
     size_t m_dma_engines_count;
+    DeviceBoardType m_board_type;
     bool m_is_fw_loaded;
 #ifdef __QNX__
     pid_t m_resource_manager_pid;
diff --git a/hailort/libhailort/src/vdma/driver/os/driver_os_specific.hpp b/hailort/libhailort/src/vdma/driver/os/driver_os_specific.hpp
index 610749c4..e849ddcb 100644
--- a/hailort/libhailort/src/vdma/driver/os/driver_os_specific.hpp
+++ b/hailort/libhailort/src/vdma/driver/os/driver_os_specific.hpp
@@ -11,7 +11,7 @@
 #define _HAILO_DRIVER_OS_SPECIFIC_HPP_
 
 #include "hailo/expected.hpp"
-#include "os/file_descriptor.hpp"
+#include "common/file_descriptor.hpp"
 #include "vdma/driver/hailort_driver.hpp"
 
 #ifdef _WIN32
diff --git a/hailort/libhailort/src/vdma/driver/os/windows/driver_os_specific.cpp b/hailort/libhailort/src/vdma/driver/os/windows/driver_os_specific.cpp
index cc0f30d2..9506b5d6 100644
--- a/hailort/libhailort/src/vdma/driver/os/windows/driver_os_specific.cpp
+++ b/hailort/libhailort/src/vdma/driver/os/windows/driver_os_specific.cpp
@@ -246,7 +246,7 @@ Expected<HailoRTDriver::DeviceInfo> query_device_info(const std::string &device_
     CHECK_EXPECTED(device_func);
 
     HailoRTDriver::DeviceInfo device_info{};
-    device_info.device_id = fmt::format("{:04X}:{:02X}:{:02X}.{}", 0, *bus, DEVICE_ADDRESS_GET_DEV(*device_func),
+    device_info.device_id = fmt::format("{:04x}:{:02x}:{:02x}.{}", 0, *bus, DEVICE_ADDRESS_GET_DEV(*device_func),
         DEVICE_ADDRESS_GET_FUNC(*device_func));
     device_info.dev_path = device_name;
     return device_info;
@@ -292,6 +292,7 @@ COMPATIBLE_PARAM_CAST(hailo_soc_connect_params, ConnectParams)
 COMPATIBLE_PARAM_CAST(hailo_soc_close_params, SocCloseParams)
 COMPATIBLE_PARAM_CAST(hailo_pci_ep_accept_params, AcceptParams)
 COMPATIBLE_PARAM_CAST(hailo_pci_ep_close_params, PciEpCloseParams)
+COMPATIBLE_PARAM_CAST(hailo_write_action_list_params, WriteActionListParams)
 
 // Special handle for nullptr_t. This case occurs when there is no parameters passed.
 tCompatibleHailoIoctlData WindowsIoctlParamCast<nullptr_t>::to_compatible(nullptr_t data)
diff --git a/hailort/libhailort/src/vdma/integrated/integrated_device.cpp b/hailort/libhailort/src/vdma/integrated/integrated_device.cpp
index 922f6619..d427c052 100644
--- a/hailort/libhailort/src/vdma/integrated/integrated_device.cpp
+++ b/hailort/libhailort/src/vdma/integrated/integrated_device.cpp
@@ -12,59 +12,26 @@
 namespace hailort
 {
 
-// 16 MB 
-#define INTEGRATED_DEVICE_INFINITE_ACTION_LIST_POOL_SIZE (16777216)
-
 bool IntegratedDevice::is_loaded()
 {
     return HailoRTDriver::is_integrated_nnc_loaded();
 }
 
-Expected<std::pair<void*, uint64_t>> IntegratedDevice::allocate_infinite_action_list_buffer(size_t size)
-{
-    CHECK_AS_EXPECTED(0 == (size % OsUtils::get_page_size()), HAILO_INVALID_ARGUMENT,
-        "Infinte action list buffer size must be a multiple of page size");
-    CHECK_AS_EXPECTED(m_device_infinite_action_list_pool_allocation_offset + size <= m_device_infinite_action_list_pool.size(),
-        HAILO_INVALID_ARGUMENT, "Buffer pool size is too small for requested infinte action list buffer");
-
-    auto user_addres = static_cast<void*>(reinterpret_cast<uint8_t*>(m_device_infinite_action_list_pool.user_address()) +
-        m_device_infinite_action_list_pool_allocation_offset);
-    auto dma_address = m_device_infinite_action_list_pool.dma_address() + m_device_infinite_action_list_pool_allocation_offset;
-
-    m_device_infinite_action_list_pool_allocation_offset += size;
-
-    return std::make_pair(user_addres, dma_address);
-}
-
 Expected<std::unique_ptr<IntegratedDevice>> IntegratedDevice::create()
 {
     hailo_status status = HAILO_UNINITIALIZED;
 
     TRY(auto driver, HailoRTDriver::create_integrated_nnc());
 
-    // Create pool of memory for infinite action list so can all be in the LUT memory area
-    // TODO: remove this when infinite action list allocates from its own pool of CMA memory
-    TRY(auto infinite_action_list_pool, vdma::ContinuousBuffer::create(INTEGRATED_DEVICE_INFINITE_ACTION_LIST_POOL_SIZE,
-        *driver));
-    
-    // Verify pool is in mapped range
-    CHECK_AS_EXPECTED(DDRActionListBufferBuilder::verify_dma_addr(infinite_action_list_pool), HAILO_INTERNAL_FAILURE,
-        "Failed to allocate continous buffer pool M4 mapped memory region");
-
-
-    auto device = std::unique_ptr<IntegratedDevice>(new (std::nothrow) IntegratedDevice(std::move(driver),
-        std::move(infinite_action_list_pool), status));
+    auto device = std::unique_ptr<IntegratedDevice>(new (std::nothrow) IntegratedDevice(std::move(driver), status));
     CHECK_AS_EXPECTED((nullptr != device), HAILO_OUT_OF_HOST_MEMORY);
     CHECK_SUCCESS_AS_EXPECTED(status, "Failed creating IntegratedDevice");
 
     return device;
 }
 
-IntegratedDevice::IntegratedDevice(std::unique_ptr<HailoRTDriver> &&driver, vdma::ContinuousBuffer &&pool,
-                                   hailo_status &status) :
-    VdmaDevice::VdmaDevice(std::move(driver), Device::Type::INTEGRATED, status),
-    m_device_infinite_action_list_pool(std::move(pool)),
-    m_device_infinite_action_list_pool_allocation_offset(0)
+IntegratedDevice::IntegratedDevice(std::unique_ptr<HailoRTDriver> &&driver, hailo_status &status) :
+    VdmaDevice::VdmaDevice(std::move(driver), Device::Type::INTEGRATED, status)
 {
     if (status != HAILO_SUCCESS) {
         LOGGER__ERROR("Failed to create VdmaDevice");
diff --git a/hailort/libhailort/src/vdma/integrated/integrated_device.hpp b/hailort/libhailort/src/vdma/integrated/integrated_device.hpp
index 85882107..766d1f56 100644
--- a/hailort/libhailort/src/vdma/integrated/integrated_device.hpp
+++ b/hailort/libhailort/src/vdma/integrated/integrated_device.hpp
@@ -49,16 +49,11 @@ class IntegratedDevice : public VdmaDevice {
 
     static constexpr const char *DEVICE_ID = HailoRTDriver::INTEGRATED_NNC_DEVICE_ID;
 
-    Expected<std::pair<void*, uint64_t>> allocate_infinite_action_list_buffer(size_t size);
-
 protected:
     virtual hailo_status reset_impl(CONTROL_PROTOCOL__reset_type_t reset_type) override;
 
 private:
-    IntegratedDevice(std::unique_ptr<HailoRTDriver> &&driver, vdma::ContinuousBuffer &&pool, hailo_status &status);
-
-    vdma::ContinuousBuffer m_device_infinite_action_list_pool;
-    size_t m_device_infinite_action_list_pool_allocation_offset;
+    IntegratedDevice(std::unique_ptr<HailoRTDriver> &&driver, hailo_status &status);
 };
 
 
diff --git a/hailort/libhailort/src/vdma/memory/buffer_requirements.cpp b/hailort/libhailort/src/vdma/memory/buffer_requirements.cpp
index ec27e22b..b4fa8c32 100644
--- a/hailort/libhailort/src/vdma/memory/buffer_requirements.cpp
+++ b/hailort/libhailort/src/vdma/memory/buffer_requirements.cpp
@@ -10,12 +10,60 @@
 #include "vdma/memory/descriptor_list.hpp"
 #include "vdma/memory/continuous_edge_layer.hpp"
 #include "utils.h"
+#include "common/internal_env_vars.hpp"
 
 #include <numeric>
 
 namespace hailort {
 namespace vdma {
 
+Expected<BufferSizesRequirements> BufferSizesRequirements::get_buffer_requirements_for_boundary_channels(
+    HailoRTDriver &driver, uint32_t max_shmifo_size, uint16_t min_active_trans, uint16_t max_active_trans,
+    uint32_t transfer_size)
+{
+    // TODO: Simplify this code + get rid of the for loop (?) (HRT-14822)
+    // We'll first try to use the default page size. Next we'll try to increase the page size until we find a valid
+    // page size that fits the requirements.
+    // uint32_t to avoid overflow
+    uint32_t max_page_size = is_env_variable_on(HAILO_LEGACY_BOUNDARY_CHANNEL_PAGE_SIZE_ENV_VAR) ?
+        driver.desc_max_page_size() : std::min(DEFAULT_SG_PAGE_SIZE, driver.desc_max_page_size());
+    while (true) {
+        if (max_page_size > driver.desc_max_page_size()) {
+            // We exceeded the driver's max page size
+            return make_unexpected(HAILO_CANT_MEET_BUFFER_REQUIREMENTS);
+        }
+
+        if (max_page_size == max_shmifo_size) {
+            // Hack to reduce max page size if the driver page size is equal to stream size.
+            // In this case page size == stream size is invalid solution.
+            // TODO - remove this WA after HRT-11747
+            max_page_size /= 2;
+        }
+
+        const auto DONT_FORCE_DEFAULT_PAGE_SIZE = false;
+        const auto DONT_FORCE_BATCH_SIZE = false;
+        static const bool IS_CIRCULAR = true;
+        static const bool IS_VDMA_ALIGNED_BUFFER = false;
+        auto buffer_sizes_requirements_exp = vdma::BufferSizesRequirements::get_buffer_requirements_single_transfer(
+            vdma::VdmaBuffer::Type::SCATTER_GATHER, static_cast<uint16_t>(max_page_size), min_active_trans,
+            max_active_trans, transfer_size, IS_CIRCULAR, DONT_FORCE_DEFAULT_PAGE_SIZE, DONT_FORCE_BATCH_SIZE,
+            IS_VDMA_ALIGNED_BUFFER);
+        if (HAILO_SUCCESS == buffer_sizes_requirements_exp.status()) {
+            // We found a valid page size
+            const auto desc_page_size = buffer_sizes_requirements_exp->desc_page_size();
+            const auto descs_count = (is_env_variable_on(HAILO_CONFIGURE_FOR_HW_INFER_ENV_VAR)) ?
+                MAX_SG_DESCS_COUNT : buffer_sizes_requirements_exp->descs_count();
+            return BufferSizesRequirements(descs_count, desc_page_size);
+        } else if (HAILO_CANT_MEET_BUFFER_REQUIREMENTS == buffer_sizes_requirements_exp.status()) {
+            // If we can't meet the requirements, try to double the page size and try again
+            max_page_size <<= static_cast<uint32_t>(1);
+        } else {
+            // Unexpected error
+            return buffer_sizes_requirements_exp;
+        }
+    }
+}
+
 Expected<BufferSizesRequirements> BufferSizesRequirements::get_buffer_requirements_multiple_transfers(
     vdma::VdmaBuffer::Type buffer_type, uint16_t max_desc_page_size, uint16_t batch_size,
     const std::vector<uint32_t> &transfer_sizes, bool is_circular, bool force_default_page_size,
@@ -46,8 +94,9 @@ Expected<BufferSizesRequirements> BufferSizesRequirements::get_buffer_requiremen
     CHECK_AS_EXPECTED(initial_desc_page_size >= MIN_PAGE_SIZE, HAILO_INTERNAL_FAILURE,
         "Initial descriptor page size ({}) is smaller than minimum descriptor page size ({})",
         initial_desc_page_size, MIN_PAGE_SIZE);
-    CHECK_AS_EXPECTED(MAX_DESCS_COUNT >= get_required_descriptor_count(transfer_sizes, max_desc_page_size),
-        HAILO_CANT_MEET_BUFFER_REQUIREMENTS);
+    if (get_required_descriptor_count(transfer_sizes, max_desc_page_size) > MAX_DESCS_COUNT) {
+        return make_unexpected(HAILO_CANT_MEET_BUFFER_REQUIREMENTS);
+    }
 
     // Defined as uint32_t to prevent overflow (as we multiply it by two in each iteration of the while loop bellow)
     auto local_desc_page_size = static_cast<uint32_t>(initial_desc_page_size);
diff --git a/hailort/libhailort/src/vdma/memory/buffer_requirements.hpp b/hailort/libhailort/src/vdma/memory/buffer_requirements.hpp
index 7d1126a8..37d68bd6 100644
--- a/hailort/libhailort/src/vdma/memory/buffer_requirements.hpp
+++ b/hailort/libhailort/src/vdma/memory/buffer_requirements.hpp
@@ -36,6 +36,9 @@ class BufferSizesRequirements final {
     uint16_t desc_page_size() const { return m_desc_page_size; }
     uint32_t buffer_size() const { return m_descs_count * m_desc_page_size; }
 
+    static Expected<BufferSizesRequirements> get_buffer_requirements_for_boundary_channels(HailoRTDriver &driver,
+        uint32_t max_shmifo_size, uint16_t min_active_trans, uint16_t max_active_trans, uint32_t transfer_size);
+
     static Expected<BufferSizesRequirements> get_buffer_requirements_multiple_transfers(
         vdma::VdmaBuffer::Type buffer_type, uint16_t max_desc_page_size,
         uint16_t batch_size, const std::vector<uint32_t> &transfer_sizes, bool is_circular,
diff --git a/hailort/libhailort/src/vdma/memory/continuous_buffer.hpp b/hailort/libhailort/src/vdma/memory/continuous_buffer.hpp
index 62aa56a1..55c38ff9 100644
--- a/hailort/libhailort/src/vdma/memory/continuous_buffer.hpp
+++ b/hailort/libhailort/src/vdma/memory/continuous_buffer.hpp
@@ -11,7 +11,6 @@
 #define _HAILO_VDMA_CONTINUOUS_BUFFER_HPP_
 
 #include "vdma/driver/hailort_driver.hpp"
-#include "os/mmap_buffer.hpp"
 #include "vdma/memory/vdma_buffer.hpp"
 
 #define MAX_CCB_DESCS_COUNT (0x00040000)
diff --git a/hailort/libhailort/src/vdma/memory/continuous_edge_layer.hpp b/hailort/libhailort/src/vdma/memory/continuous_edge_layer.hpp
index 3206030c..755b0d77 100644
--- a/hailort/libhailort/src/vdma/memory/continuous_edge_layer.hpp
+++ b/hailort/libhailort/src/vdma/memory/continuous_edge_layer.hpp
@@ -11,7 +11,6 @@
 #define _HAILO_VDMA_CONTINUOUS_EDGE_LAYER_HPP_
 
 #include "vdma/driver/hailort_driver.hpp"
-#include "os/mmap_buffer.hpp"
 #include "vdma/memory/vdma_edge_layer.hpp"
 #include "vdma/memory/continuous_buffer.hpp"
 
diff --git a/hailort/libhailort/src/vdma/memory/dma_able_buffer.cpp b/hailort/libhailort/src/vdma/memory/dma_able_buffer.cpp
index 5a9aaf67..5971dd10 100644
--- a/hailort/libhailort/src/vdma/memory/dma_able_buffer.cpp
+++ b/hailort/libhailort/src/vdma/memory/dma_able_buffer.cpp
@@ -11,9 +11,10 @@
 #include "hailo/hailort_common.hpp"
 #include "dma_able_buffer.hpp"
 #include "common/os_utils.hpp"
+#include "common/mmap_buffer.hpp"
 
 #if defined(_MSC_VER)
-#include "os/windows/virtual_alloc_guard.hpp"
+#include "common/os/windows/virtual_alloc_guard.hpp"
 #else
 #include <sys/mman.h>
 #endif /* defined(_MSC_VER) */
diff --git a/hailort/libhailort/src/vdma/memory/dma_able_buffer.hpp b/hailort/libhailort/src/vdma/memory/dma_able_buffer.hpp
index 79f56aa6..fd674ab1 100644
--- a/hailort/libhailort/src/vdma/memory/dma_able_buffer.hpp
+++ b/hailort/libhailort/src/vdma/memory/dma_able_buffer.hpp
@@ -20,7 +20,6 @@
 
 #include "hailo/expected.hpp"
 #include "vdma/driver/hailort_driver.hpp"
-#include "os/mmap_buffer.hpp"
 
 namespace hailort {
 namespace vdma {
diff --git a/hailort/libhailort/src/vdma/memory/mapped_buffer.cpp b/hailort/libhailort/src/vdma/memory/mapped_buffer.cpp
index 3fcf9cb5..306942cb 100644
--- a/hailort/libhailort/src/vdma/memory/mapped_buffer.cpp
+++ b/hailort/libhailort/src/vdma/memory/mapped_buffer.cpp
@@ -19,7 +19,7 @@ Expected<MappedBufferPtr> MappedBuffer::create_shared(DmaAbleBufferPtr buffer, H
     TRY(auto buffer_handle, driver.vdma_buffer_map(reinterpret_cast<uintptr_t>(buffer->user_address()), buffer->size(), data_direction,
         buffer->buffer_identifier(), HailoRTDriver::DmaBufferType::USER_PTR_BUFFER));
 
-    auto result = make_shared_nothrow<MappedBuffer>(driver, buffer, data_direction, buffer_handle);
+    auto result = make_shared_nothrow<MappedBuffer>(driver, buffer, data_direction, buffer_handle, buffer->size());
     CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
 
     return result;
@@ -41,18 +41,20 @@ Expected<MappedBufferPtr> MappedBuffer::create_shared_from_dmabuf(int dmabuf_fd,
         HailoRTDriver::DmaBufferType::DMABUF_BUFFER));
 
     // TODO: if need user address for dmabuf use DmaBufDmaAbleBuffer
-    auto result = make_shared_nothrow<MappedBuffer>(driver, nullptr, data_direction, buffer_handle);
+    auto result = make_shared_nothrow<MappedBuffer>(driver, nullptr, data_direction, buffer_handle, size, dmabuf_fd);
     CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
 
     return result;
 }
 
 MappedBuffer::MappedBuffer(HailoRTDriver &driver, DmaAbleBufferPtr buffer, HailoRTDriver::DmaDirection data_direction,
-    HailoRTDriver::VdmaBufferHandle vdma_buffer_handle) :
+    HailoRTDriver::VdmaBufferHandle vdma_buffer_handle, size_t size, int fd) :
     m_driver(driver),
     m_buffer(buffer),
     m_mapping_handle(vdma_buffer_handle),
-    m_data_direction(data_direction)
+    m_data_direction(data_direction),
+    m_size(size),
+    m_fd(fd)
 {}
 
 MappedBuffer::~MappedBuffer()
@@ -67,7 +69,8 @@ MappedBuffer::MappedBuffer(MappedBuffer &&other) noexcept :
     m_driver(other.m_driver),
     m_buffer(std::move(other.m_buffer)),
     m_mapping_handle(std::exchange(other.m_mapping_handle, HailoRTDriver::INVALID_DRIVER_VDMA_MAPPING_HANDLE_VALUE)),
-    m_data_direction(other.m_data_direction)
+    m_data_direction(other.m_data_direction),
+    m_size(other.m_size)
 {}
 
 void* MappedBuffer::user_address()
@@ -77,7 +80,14 @@ void* MappedBuffer::user_address()
 
 size_t MappedBuffer::size() const
 {
-    return m_buffer->size();
+    return m_size;
+}
+
+Expected<int> MappedBuffer::fd()
+{
+    CHECK(INVALID_FD != m_fd, HAILO_INTERNAL_FAILURE, "fd is only supported for DMABUF type MappedBuffer");
+
+    return Expected<int>(m_fd);
 }
 
 HailoRTDriver::VdmaBufferHandle MappedBuffer::handle()
diff --git a/hailort/libhailort/src/vdma/memory/mapped_buffer.hpp b/hailort/libhailort/src/vdma/memory/mapped_buffer.hpp
index 7578118b..1558e224 100644
--- a/hailort/libhailort/src/vdma/memory/mapped_buffer.hpp
+++ b/hailort/libhailort/src/vdma/memory/mapped_buffer.hpp
@@ -31,6 +31,7 @@
 namespace hailort {
 namespace vdma {
 
+#define INVALID_FD (-1)
 
 class MappedBuffer;
 using MappedBufferPtr = std::shared_ptr<MappedBuffer>;
@@ -51,7 +52,7 @@ class MappedBuffer final
         HailoRTDriver::DmaDirection data_direction);
 
     MappedBuffer(HailoRTDriver &driver, DmaAbleBufferPtr buffer, HailoRTDriver::DmaDirection data_direction,
-        HailoRTDriver::VdmaBufferHandle vdma_buffer_handle);
+        HailoRTDriver::VdmaBufferHandle vdma_buffer_handle, size_t size, int fd = INVALID_FD);
     MappedBuffer(MappedBuffer &&other) noexcept;
     MappedBuffer(const MappedBuffer &other) = delete;
     MappedBuffer &operator=(const MappedBuffer &other) = delete;
@@ -64,6 +65,7 @@ class MappedBuffer final
     hailo_status synchronize(HailoRTDriver::DmaSyncDirection sync_direction);
     // TODO: validate that offset is cache aligned (HRT-9811)
     hailo_status synchronize(size_t offset, size_t count, HailoRTDriver::DmaSyncDirection sync_direction);
+    Expected<int> fd();
 
     /**
      * Copy data from buf_src parameter to this buffer.
@@ -97,6 +99,8 @@ class MappedBuffer final
     DmaAbleBufferPtr m_buffer;
     HailoRTDriver::VdmaBufferHandle m_mapping_handle;
     const HailoRTDriver::DmaDirection m_data_direction;
+    size_t m_size;
+    int m_fd;
 };
 
 } /* namespace vdma */
diff --git a/hailort/libhailort/src/vdma/pcie/pcie_device.cpp b/hailort/libhailort/src/vdma/pcie/pcie_device.cpp
index 483c44f5..e95861ca 100644
--- a/hailort/libhailort/src/vdma/pcie/pcie_device.cpp
+++ b/hailort/libhailort/src/vdma/pcie/pcie_device.cpp
@@ -21,6 +21,7 @@
 #include "vdma/driver/hailort_driver.hpp"
 #include "core_op/resource_manager/resource_manager.hpp"
 #include "vdma/vdma_config_manager.hpp"
+#include "vdma/pcie/pcie_device_hrpc_client.hpp"
 
 #include <new>
 #include <algorithm>
@@ -34,9 +35,14 @@ Expected<std::vector<hailo_pcie_device_info_t>> PcieDevice::scan()
     auto scan_results = HailoRTDriver::scan_devices();
     CHECK_EXPECTED(scan_results);
 
+    return get_pcie_devices_infos(scan_results.value());
+}
+
+Expected<std::vector<hailo_pcie_device_info_t>> PcieDevice::get_pcie_devices_infos(const std::vector<HailoRTDriver::DeviceInfo> &scan_results)
+{
     std::vector<hailo_pcie_device_info_t> out_results;
-    out_results.reserve(scan_results->size());
-    for (const auto &scan_result : scan_results.value()) {
+    out_results.reserve(scan_results.size());
+    for (const auto &scan_result : scan_results) {
         const bool DONT_LOG_ON_FAILURE = true;
         auto device_info = parse_pcie_device_info(scan_result.device_id, DONT_LOG_ON_FAILURE);
         if (device_info) {
@@ -47,30 +53,50 @@ Expected<std::vector<hailo_pcie_device_info_t>> PcieDevice::scan()
     return out_results;
 }
 
-Expected<std::unique_ptr<PcieDevice>> PcieDevice::create()
+Expected<std::unique_ptr<Device>> PcieDevice::create()
 {
-    // Take the first device
-    auto scan_result = scan();
-    CHECK_EXPECTED(scan_result, "Failed scanning pcie devices");
-    CHECK_AS_EXPECTED(scan_result->size() >= 1, HAILO_INVALID_OPERATION,
+    auto scan_results = HailoRTDriver::scan_devices();
+    CHECK_EXPECTED(scan_results);
+    
+    CHECK_AS_EXPECTED(scan_results->size() >= 1, HAILO_INVALID_OPERATION,
         "There are no PCIe devices on the system");
+    if (scan_results->size() > 1) {
+        auto first_acc_type = scan_results->at(0).accelerator_type;
+        for (const auto &scan_result : scan_results.value()) {
+            CHECK_AS_EXPECTED(first_acc_type == scan_result.accelerator_type, HAILO_INVALID_OPERATION,
+                "Multiple accelerator types detected (Hailo8, Hailo10). Please specify the device to use.");
+        }
+    }
+
+    auto pcie_infos = get_pcie_devices_infos(scan_results.value());
+    CHECK_EXPECTED(pcie_infos, "Failed getting pcie devices infos");
 
     // choose first device
-    return create(scan_result->at(0));
+    return create(pcie_infos->at(0));
 }
 
-Expected<std::unique_ptr<PcieDevice>> PcieDevice::create(const hailo_pcie_device_info_t &pcie_device_info)
+Expected<std::unique_ptr<Device>> PcieDevice::create(const hailo_pcie_device_info_t &pcie_device_info)
 {
     auto device_info = find_device_info(pcie_device_info);
     CHECK_EXPECTED(device_info);
 
+    if ((get_env_variable(HAILO_SOCKET_COM_ADDR_CLIENT_ENV_VAR).has_value()) || (HailoRTDriver::AcceleratorType::SOC_ACCELERATOR == device_info->accelerator_type)) {
+        TRY(auto pcie_device, PcieDeviceHrpcClient::create(device_info->device_id));
+        // Upcasting to Device unique_ptr (from PcieDeviceHrpcClient unique_ptr)
+        auto device = std::unique_ptr<Device>(std::move(pcie_device));
+        return device;
+    }
+
     auto driver = HailoRTDriver::create(device_info->device_id, device_info->dev_path);
     CHECK_EXPECTED(driver);
 
     hailo_status status = HAILO_UNINITIALIZED;
-    auto device = std::unique_ptr<PcieDevice>(new (std::nothrow) PcieDevice(driver.release(), status));
-    CHECK_AS_EXPECTED((nullptr != device), HAILO_OUT_OF_HOST_MEMORY);
+    auto pcie_device = std::unique_ptr<PcieDevice>(new (std::nothrow) PcieDevice(driver.release(), status));
+    CHECK_NOT_NULL_AS_EXPECTED(pcie_device, HAILO_OUT_OF_HOST_MEMORY);
     CHECK_SUCCESS_AS_EXPECTED(status, "Failed creating PcieDevice");
+
+    // Upcasting to Device unique_ptr (from PcieDevice unique_ptr)
+    auto device = std::unique_ptr<Device>(std::move(pcie_device));
     return device;
 }
 
diff --git a/hailort/libhailort/src/vdma/pcie/pcie_device.hpp b/hailort/libhailort/src/vdma/pcie/pcie_device.hpp
index 271de77f..647d71dc 100644
--- a/hailort/libhailort/src/vdma/pcie/pcie_device.hpp
+++ b/hailort/libhailort/src/vdma/pcie/pcie_device.hpp
@@ -24,8 +24,8 @@ namespace hailort
 class PcieDevice : public VdmaDevice {
 public:
     static Expected<std::vector<hailo_pcie_device_info_t>> scan();
-    static Expected<std::unique_ptr<PcieDevice>> create();
-    static Expected<std::unique_ptr<PcieDevice>> create(const hailo_pcie_device_info_t &device_info);
+    static Expected<std::unique_ptr<Device>> create();
+    static Expected<std::unique_ptr<Device>> create(const hailo_pcie_device_info_t &device_info);
     static Expected<hailo_pcie_device_info_t> parse_pcie_device_info(const std::string &device_info_str,
         bool log_on_failure);
     static Expected<std::string> pcie_device_info_to_string(const hailo_pcie_device_info_t &device_info);
@@ -56,6 +56,8 @@ class PcieDevice : public VdmaDevice {
     virtual Expected<hailo_device_architecture_t> get_architecture() const override;
 
 private:
+    static Expected<std::vector<hailo_pcie_device_info_t>> get_pcie_devices_infos(const std::vector<HailoRTDriver::DeviceInfo> &scan_results);
+
     PcieDevice(std::unique_ptr<HailoRTDriver> &&driver, hailo_status &status);
 
     static Expected<HailoRTDriver::DeviceInfo> find_device_info(const hailo_pcie_device_info_t &pcie_device_info);
diff --git a/hailort/libhailort/src/vdma/pcie/pcie_device_hrpc_client.cpp b/hailort/libhailort/src/vdma/pcie/pcie_device_hrpc_client.cpp
new file mode 100644
index 00000000..563714b3
--- /dev/null
+++ b/hailort/libhailort/src/vdma/pcie/pcie_device_hrpc_client.cpp
@@ -0,0 +1,92 @@
+/**
+ * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file pcie_device_hrpc_client.cpp
+ * @brief Pcie Device HRPC client implementation
+ **/
+
+#include "pcie_device_hrpc_client.hpp"
+#include "vdma/driver/hailort_driver.hpp"
+
+
+namespace hailort
+{
+
+Expected<std::unique_ptr<PcieDeviceHrpcClient>> PcieDeviceHrpcClient::create(const std::string &device_id)
+{
+    auto client = make_shared_nothrow<hrpc::Client>(device_id);
+    CHECK_NOT_NULL(client, HAILO_INTERNAL_FAILURE);
+
+    auto status = client->connect();
+    CHECK_SUCCESS_AS_EXPECTED(status, "Failed to connect to server");
+
+    return PcieDeviceHrpcClient::create(device_id, client);
+}
+
+Expected<std::unique_ptr<PcieDeviceHrpcClient>> PcieDeviceHrpcClient::create(const std::string &device_id,
+    std::shared_ptr<hrpc::Client> client)
+{
+    TRY(auto request, CreateDeviceSerializer::serialize_request());
+    TRY(auto result, client->execute_request(HailoRpcActionID::DEVICE__CREATE, MemoryView(request)));
+    TRY(auto tuple, CreateDeviceSerializer::deserialize_reply(MemoryView(result)));
+    auto status = std::get<0>(tuple);
+    CHECK_SUCCESS_AS_EXPECTED(status);
+
+    auto device_handle = std::get<1>(tuple);
+    auto device = make_unique_nothrow<PcieDeviceHrpcClient>(device_id, client, device_handle);
+    CHECK_NOT_NULL(device, HAILO_OUT_OF_HOST_MEMORY);
+
+    return std::unique_ptr<PcieDeviceHrpcClient>(std::move(device));
+}
+
+PcieDeviceHrpcClient::~PcieDeviceHrpcClient()
+{
+    if (INVALID_HANDLE_ID == m_handle) {
+        return;
+    }
+
+    auto request = DestroyDeviceSerializer::serialize_request(m_handle);
+    if (!request) {
+        LOGGER__CRITICAL("Failed to serialize Device_release request");
+        return;
+    }
+
+    auto result = m_client->execute_request(HailoRpcActionID::DEVICE__DESTROY, MemoryView(*request));
+    if (!result) {
+        LOGGER__CRITICAL("Failed to destroy Device! status = {}", result.status());
+        return;
+    }
+
+    if (HAILO_SUCCESS != DestroyDeviceSerializer::deserialize_reply(MemoryView(*result))) {
+        LOGGER__CRITICAL("Failed to destroy Device! status = {}", result.status());
+    }
+}
+
+Expected<hailo_device_identity_t> PcieDeviceHrpcClient::identify()
+{
+    TRY(auto request, IdentifyDeviceSerializer::serialize_request(m_handle));
+    TRY(auto result, m_client->execute_request(HailoRpcActionID::DEVICE__IDENTIFY, MemoryView(request)));
+    TRY(auto tuple, IdentifyDeviceSerializer::deserialize_reply(MemoryView(result)));
+
+    CHECK_SUCCESS_AS_EXPECTED(std::get<0>(tuple));
+    auto identity = std::get<1>(tuple);
+
+    return identity;
+}
+
+Expected<hailo_extended_device_information_t> PcieDeviceHrpcClient::get_extended_device_information()
+{
+    TRY(auto request, ExtendedDeviceInfoSerializer::serialize_request(m_handle));
+    TRY(auto result, m_client->execute_request(HailoRpcActionID::DEVICE__EXTENDED_INFO, MemoryView(request)));
+    TRY(auto tuple, ExtendedDeviceInfoSerializer::deserialize_reply(MemoryView(result)));
+
+    CHECK_SUCCESS_AS_EXPECTED(std::get<0>(tuple));
+    auto extended_info = std::get<1>(tuple);
+
+    return extended_info;
+}
+
+
+} /* namespace hailort */
diff --git a/hailort/libhailort/src/vdma/pcie/pcie_device_hrpc_client.hpp b/hailort/libhailort/src/vdma/pcie/pcie_device_hrpc_client.hpp
new file mode 100644
index 00000000..023252c7
--- /dev/null
+++ b/hailort/libhailort/src/vdma/pcie/pcie_device_hrpc_client.hpp
@@ -0,0 +1,77 @@
+/**
+ * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file pcie_device_hrpc_client.hpp
+ * @brief Pcie Device HRPC client, represents the user's handle to the Device object (held in the hailort server)
+ **/
+
+#ifndef HAILO_PCIE_DEVICE_HRPC_CLIENT_HPP_
+#define HAILO_PCIE_DEVICE_HRPC_CLIENT_HPP_
+
+#include "hailo/device.hpp"
+#include "hrpc/client.hpp"
+
+
+namespace hailort
+{
+
+class PcieDeviceHrpcClient : public Device {
+public:
+    static Expected<std::unique_ptr<PcieDeviceHrpcClient>> create(const std::string &device_id);
+    static Expected<std::unique_ptr<PcieDeviceHrpcClient>> create(const std::string &device_id,
+        std::shared_ptr<hrpc::Client> client);
+
+    PcieDeviceHrpcClient(const std::string &device_id, std::shared_ptr<hrpc::Client> client, uint32_t handle) :
+        Device(Device::Type::PCIE), m_device_id(device_id), m_client(client), m_handle(handle) {}
+    virtual ~PcieDeviceHrpcClient();
+
+    virtual Expected<ConfiguredNetworkGroupVector> configure(Hef &/*hef*/,
+        const NetworkGroupsParamsMap &configure_params={}) override { (void)configure_params; return make_unexpected(HAILO_NOT_IMPLEMENTED); }
+    virtual Expected<size_t> read_log(MemoryView &/*buffer*/, hailo_cpu_id_t /*cpu_id*/) override { return make_unexpected(HAILO_NOT_IMPLEMENTED); }
+    virtual hailo_status reset(hailo_reset_device_mode_t /*mode*/) override { return HAILO_NOT_IMPLEMENTED; }
+    virtual hailo_status set_notification_callback(const NotificationCallback &/*func*/, hailo_notification_id_t /*notification_id*/,
+        void */*opaque*/) override { return HAILO_NOT_IMPLEMENTED; }
+    virtual hailo_status remove_notification_callback(hailo_notification_id_t /*notification_id*/) override { return HAILO_NOT_IMPLEMENTED; }
+    virtual hailo_status firmware_update(const MemoryView &/*firmware_binary*/, bool /*should_reset*/) override { return HAILO_NOT_IMPLEMENTED; }
+    virtual hailo_status second_stage_update(uint8_t */*second_stage_binary*/, uint32_t /*second_stage_binary_length*/) override { return HAILO_NOT_IMPLEMENTED; }
+    virtual hailo_status store_sensor_config(uint32_t /*section_index*/, hailo_sensor_types_t /*sensor_type*/,
+        uint32_t /*reset_config_size*/, uint16_t /*config_height*/, uint16_t /*config_width*/, uint16_t /*config_fps*/,
+        const std::string &/*config_file_path*/, const std::string &/*config_name*/) override { return HAILO_NOT_IMPLEMENTED; }
+    virtual hailo_status store_isp_config(uint32_t /*reset_config_size*/, uint16_t /*config_height*/, uint16_t /*config_width*/, uint16_t /*config_fps*/,
+        const std::string &/*isp_static_config_file_path*/, const std::string &/*isp_runtime_config_file_path*/, const std::string &/*config_name*/) override { return HAILO_NOT_IMPLEMENTED; }
+    virtual Expected<Buffer> sensor_get_sections_info() override { return make_unexpected(HAILO_NOT_IMPLEMENTED); }
+    virtual hailo_status sensor_dump_config(uint32_t /*section_index*/, const std::string &/*config_file_path*/) override { return HAILO_NOT_IMPLEMENTED; }
+    virtual hailo_status sensor_set_i2c_bus_index(hailo_sensor_types_t /*sensor_type*/, uint32_t /*bus_index*/) override { return HAILO_NOT_IMPLEMENTED; }
+    virtual hailo_status sensor_load_and_start_config(uint32_t /*section_index*/) override { return HAILO_NOT_IMPLEMENTED; }
+    virtual hailo_status sensor_reset(uint32_t /*section_index*/) override { return HAILO_NOT_IMPLEMENTED; }
+    virtual hailo_status sensor_set_generic_i2c_slave(uint16_t /*slave_address*/, uint8_t /*offset_size*/, uint8_t /*bus_index*/,
+        uint8_t /*should_hold_bus*/, uint8_t /*slave_endianness*/) override { return HAILO_NOT_IMPLEMENTED; }
+    virtual Expected<Buffer> read_board_config() override { return make_unexpected(HAILO_NOT_IMPLEMENTED); }
+    virtual hailo_status write_board_config(const MemoryView &/*buffer*/) override { return HAILO_NOT_IMPLEMENTED; }
+    virtual Expected<hailo_fw_user_config_information_t> examine_user_config() override { return make_unexpected(HAILO_NOT_IMPLEMENTED); }
+    virtual Expected<Buffer> read_user_config() override { return make_unexpected(HAILO_NOT_IMPLEMENTED); }
+    virtual hailo_status write_user_config(const MemoryView &/*buffer*/) override { return HAILO_NOT_IMPLEMENTED; }
+    virtual hailo_status erase_user_config() override { return HAILO_NOT_IMPLEMENTED; }
+    virtual Expected<hailo_device_architecture_t> get_architecture() const override { return make_unexpected(HAILO_NOT_IMPLEMENTED); }
+    virtual const char* get_dev_id() const override { return m_device_id.c_str(); }
+    virtual bool is_stream_interface_supported(const hailo_stream_interface_t &/*stream_interface*/) const override { return false; }
+
+    virtual hailo_status wait_for_wakeup() override { return make_unexpected(HAILO_NOT_IMPLEMENTED); }
+    virtual void increment_control_sequence() override {}
+    virtual hailo_status fw_interact_impl(uint8_t */*request_buffer*/, size_t /*request_size*/, uint8_t */*response_buffer*/, 
+                                          size_t */*response_size*/, hailo_cpu_id_t /*cpu_id*/) override { return HAILO_NOT_IMPLEMENTED; }
+
+    virtual Expected<hailo_device_identity_t> identify() override;
+    virtual Expected<hailo_extended_device_information_t> get_extended_device_information() override;
+
+private:
+    std::string m_device_id;
+    std::shared_ptr<hrpc::Client> m_client;
+    uint32_t m_handle;
+};
+
+} /* namespace hailort */
+
+#endif /* HAILO_PCIE_DEVICE_HRPC_CLIENT_HPP_ */
diff --git a/hailort/libhailort/src/vdma/pcie_session.cpp b/hailort/libhailort/src/vdma/pcie_session.cpp
index 318ab98a..90939394 100644
--- a/hailort/libhailort/src/vdma/pcie_session.cpp
+++ b/hailort/libhailort/src/vdma/pcie_session.cpp
@@ -19,9 +19,7 @@ Expected<PcieSession> PcieSession::connect(std::shared_ptr<HailoRTDriver> driver
     TRY(auto input_desc_list, create_desc_list(*driver));
     TRY(auto output_desc_list, create_desc_list(*driver));
 
-    TRY(auto channel_pair, driver->soc_connect(input_desc_list.handle(), output_desc_list.handle()));
-
-    (void)port;
+    TRY(auto channel_pair, driver->soc_connect(port, input_desc_list.handle(), output_desc_list.handle()));
 
     return PcieSession::create(driver, channel_pair.first, channel_pair.second, std::move(input_desc_list),
         std::move(output_desc_list), PcieSessionType::CLIENT);
@@ -32,9 +30,7 @@ Expected<PcieSession> PcieSession::accept(std::shared_ptr<HailoRTDriver> driver,
     TRY(auto input_desc_list, create_desc_list(*driver));
     TRY(auto output_desc_list, create_desc_list(*driver));
 
-    TRY(auto channel_pair, driver->pci_ep_accept(input_desc_list.handle(), output_desc_list.handle()));
-
-    (void)port;
+    TRY(auto channel_pair, driver->pci_ep_accept(port, input_desc_list.handle(), output_desc_list.handle()));
 
     return PcieSession::create(driver, channel_pair.first, channel_pair.second, std::move(input_desc_list),
         std::move(output_desc_list), PcieSessionType::SERVER);
@@ -44,9 +40,6 @@ Expected<PcieSession> PcieSession::create(std::shared_ptr<HailoRTDriver> driver,
     vdma::ChannelId output_channel_id, vdma::DescriptorList &&input_desc_list, vdma::DescriptorList &&output_desc_list,
     PcieSessionType session_type)
 {
-    // TODO: HRT-14038 - remove this to support multiple connections. Until then, mark as used to allow ctrl+c handle
-    CHECK_SUCCESS(driver->mark_as_used());
-
     TRY(auto interrupts_dispatcher, vdma::InterruptsDispatcher::create(*driver));
     TRY(auto transfer_launcher, vdma::TransferLauncher::create());
 
@@ -91,6 +84,9 @@ hailo_status PcieSession::read_async(void *buffer, size_t size, std::function<vo
 hailo_status PcieSession::close()
 {
     hailo_status status = HAILO_SUCCESS; // Success orietnted
+    if (!m_should_close) {
+        return status;
+    }
 
     // First, close all host resources, disallow new transfers
     m_input->deactivate();
@@ -119,6 +115,7 @@ hailo_status PcieSession::close()
         status = stop_status;
     }
 
+    m_should_close = false;
     return status;
 }
 
@@ -136,6 +133,7 @@ hailo_status PcieSession::launch_transfer_sync(vdma::BoundaryChannel &channel,
     std::condition_variable cv;
     hailo_status transfer_status = HAILO_UNINITIALIZED;
     auto callback = [&](hailo_status status) mutable {
+        // TODO: HRT-14965 - when the wait_for returns on timeout, this reference capture will be invalid and cause a SEGFAULT
         {
             std::unique_lock<std::mutex> lock(mutex);
             assert(status != HAILO_UNINITIALIZED);
diff --git a/hailort/libhailort/src/vdma/pcie_session.hpp b/hailort/libhailort/src/vdma/pcie_session.hpp
index 3b089c96..93e66cf9 100644
--- a/hailort/libhailort/src/vdma/pcie_session.hpp
+++ b/hailort/libhailort/src/vdma/pcie_session.hpp
@@ -21,7 +21,7 @@ namespace hailort
 
 // A special magic number used to match each accept() with the corresponding connect().
 // By using this magic, multiple servers can be implemented and run simultaneously on the same device.
-using pcie_connection_port_t = uint32_t;
+using pcie_connection_port_t = uint16_t;
 using PcieSessionType = HailoRTDriver::PcieSessionType;
 
 /**
@@ -57,6 +57,8 @@ class PcieSession final {
     static Expected<PcieSession> connect(std::shared_ptr<HailoRTDriver> driver, pcie_connection_port_t port);
     static Expected<PcieSession> accept(std::shared_ptr<HailoRTDriver> driver, pcie_connection_port_t port);
 
+    ~PcieSession() { close(); }
+
     hailo_status write(const void *buffer, size_t size, std::chrono::milliseconds timeout);
     hailo_status read(void *buffer, size_t size, std::chrono::milliseconds timeout);
 
@@ -72,6 +74,16 @@ class PcieSession final {
 
     static uint64_t max_transfer_size();
 
+    PcieSession(PcieSession &&other) :
+        m_should_close(std::exchange(other.m_should_close, false)),
+        m_driver(std::move(other.m_driver)),
+        m_interrupts_dispatcher(std::move(other.m_interrupts_dispatcher)),
+        m_transfer_launcher(std::move(other.m_transfer_launcher)),
+        m_input(std::move(other.m_input)),
+        m_output(std::move(other.m_output)),
+        m_session_type(other.m_session_type)
+    {}
+
 private:
 
     using ChannelIdsPair = std::pair<vdma::ChannelId, vdma::ChannelId>;
@@ -99,6 +111,7 @@ class PcieSession final {
         void *buffer, size_t size, std::function<void(hailo_status)> &&callback);
     static Expected<vdma::DescriptorList> create_desc_list(HailoRTDriver &driver);
 
+    bool m_should_close = true;
     std::shared_ptr<HailoRTDriver> m_driver;
 
     std::unique_ptr<vdma::InterruptsDispatcher> m_interrupts_dispatcher;
diff --git a/hailort/libhailort/src/vdma/vdma_config_core_op.cpp b/hailort/libhailort/src/vdma/vdma_config_core_op.cpp
index bd22024b..58709d71 100644
--- a/hailort/libhailort/src/vdma/vdma_config_core_op.cpp
+++ b/hailort/libhailort/src/vdma/vdma_config_core_op.cpp
@@ -230,6 +230,29 @@ hailo_status VdmaConfigCoreOp::set_scheduler_priority(uint8_t /*priority*/, cons
     return HAILO_INVALID_OPERATION;
 }
 
+hailo_status VdmaConfigCoreOp::bind_buffers(std::unordered_map<std::string, TransferRequest> &transfers)
+{
+    for (auto &input : m_input_streams) {
+        auto transfer = transfers.find(input.second->name());
+        CHECK(transfer != transfers.end(), HAILO_INTERNAL_FAILURE, "Invalid stream {}", input.second->name());
+        if (transfer->second.transfer_buffers.size() > 1) {
+            break;
+        }
+        CHECK_SUCCESS(input.second->bind_buffer(TransferRequest{transfer->second}));
+    }
+
+    for (auto &output : m_output_streams) {
+        auto transfer = transfers.find(output.second->name());
+        CHECK(transfer != transfers.end(), HAILO_INTERNAL_FAILURE, "Invalid stream {}", output.second->name());
+        if (transfer->second.transfer_buffers.size() > 1) {
+            break;
+        }
+        CHECK_SUCCESS(output.second->bind_buffer(TransferRequest{transfer->second}));
+    }
+
+    return HAILO_SUCCESS;
+}
+
 Expected<std::shared_ptr<LatencyMetersMap>> VdmaConfigCoreOp::get_latency_meters()
 {
     auto latency_meters = m_resources_manager->get_latency_meters();
@@ -254,26 +277,18 @@ Expected<Buffer> VdmaConfigCoreOp::get_intermediate_buffer(const IntermediateBuf
     return m_resources_manager->read_intermediate_buffer(key);
 }
 
-Expected<Buffer> VdmaConfigCoreOp::get_cache_buffer(uint32_t cache_id)
-{
-    return m_resources_manager->read_cache_buffer(cache_id);
-}
-
-Expected<std::map<uint32_t, Buffer>> VdmaConfigCoreOp::get_cache_buffers()
-{
-    return m_resources_manager->read_cache_buffers();
-}
-
 bool VdmaConfigCoreOp::has_caches() const
 {
-    return m_resources_manager->get_cache_buffers().size() > 0;
+    const auto cache_buffers = m_cache_manager->get_cache_buffers(name());
+    return cache_buffers && !(cache_buffers->get()).empty();
 }
 
 Expected<uint32_t> VdmaConfigCoreOp::get_cache_read_size() const
 {
     // Input to the core == cache read
     size_t input_size = 0;
-    for (auto &cache_buffer : m_resources_manager->get_cache_buffers()) {
+    TRY(const auto cache_buffers, m_cache_manager->get_cache_buffers(name()));
+    for (auto &cache_buffer : cache_buffers.get()) {
         const auto curr_input_size = cache_buffer.second.input_size();
         if (input_size == 0) {
             input_size = curr_input_size;
@@ -289,7 +304,8 @@ Expected<uint32_t> VdmaConfigCoreOp::get_cache_write_size() const
 {
     // Output from the core == cache write
     size_t output_size = 0;
-    for (auto &cache_buffer : m_resources_manager->get_cache_buffers()) {
+    TRY(const auto cache_buffers, m_cache_manager->get_cache_buffers(name()));
+    for (auto &cache_buffer : cache_buffers.get()) {
         const auto curr_output_size = cache_buffer.second.output_size();
         if (output_size == 0) {
             output_size = curr_output_size;
@@ -308,15 +324,10 @@ hailo_status VdmaConfigCoreOp::init_cache(uint32_t read_offset, int32_t write_of
     return m_cache_manager->init_caches(read_offset, write_offset_delta);
 }
 
+// TODO: remove get_cache_info (HRT-14396)
 Expected<hailo_cache_info_t> VdmaConfigCoreOp::get_cache_info() const
 {
-    CHECK(has_caches(), HAILO_INVALID_OPERATION, "No caches in core-op");
-
-    return hailo_cache_info_t{
-        m_cache_manager->get_cache_size(),
-        m_cache_manager->get_read_offset_bytes(),
-        m_cache_manager->get_write_offset_bytes_delta()
-    };
+    return make_unexpected(HAILO_NOT_IMPLEMENTED);
 }
 
 hailo_status VdmaConfigCoreOp::update_cache_offset(int32_t offset_delta_bytes)
@@ -338,4 +349,33 @@ hailo_status VdmaConfigCoreOp::update_cache_offset(int32_t offset_delta_bytes)
     return HAILO_SUCCESS;
 }
 
+Expected<std::vector<uint32_t>> VdmaConfigCoreOp::get_cache_ids() const
+{
+    TRY(const auto cache_buffers, m_cache_manager->get_cache_buffers(name()));
+
+    std::vector<uint32_t> result;
+    result.reserve(cache_buffers.get().size());
+    for (const auto &id_buffer_pair : cache_buffers.get()) {
+        result.emplace_back(id_buffer_pair.first);
+    }
+
+    return result;
+}
+
+Expected<Buffer> VdmaConfigCoreOp::read_cache_buffer(uint32_t cache_id)
+{
+    TRY(const auto cache_buffers, m_cache_manager->get_cache_buffers(name()));
+    auto cache_buffer_it = cache_buffers.get().find(cache_id);
+    CHECK(cache_buffer_it != cache_buffers.get().end(), HAILO_INVALID_ARGUMENT, "Cache buffer with id {} not found", cache_id);
+    return cache_buffer_it->second.read_cache();
+}
+
+hailo_status VdmaConfigCoreOp::write_cache_buffer(uint32_t cache_id, MemoryView buffer)
+{
+    TRY(const auto cache_buffers, m_cache_manager->get_cache_buffers(name()));
+    auto cache_buffer_it = cache_buffers.get().find(cache_id);
+    CHECK(cache_buffer_it != cache_buffers.get().end(), HAILO_INVALID_ARGUMENT, "Cache buffer with id {} not found", cache_id);
+    return cache_buffer_it->second.write_cache(buffer);
+}
+
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/vdma/vdma_config_core_op.hpp b/hailort/libhailort/src/vdma/vdma_config_core_op.hpp
index 5a86248b..e69cec46 100644
--- a/hailort/libhailort/src/vdma/vdma_config_core_op.hpp
+++ b/hailort/libhailort/src/vdma/vdma_config_core_op.hpp
@@ -69,6 +69,8 @@ class VdmaConfigCoreOp : public CoreOp
     hailo_status register_cache_update_callback();
     hailo_status unregister_cache_update_callback();
 
+    hailo_status bind_buffers(std::unordered_map<std::string, TransferRequest> &transfers);
+
     virtual Expected<hailo_stream_interface_t> get_default_streams_interface() override;
 
     virtual Expected<std::shared_ptr<LatencyMetersMap>> get_latency_meters() override;
@@ -81,14 +83,15 @@ class VdmaConfigCoreOp : public CoreOp
     virtual hailo_status set_scheduler_priority(uint8_t priority, const std::string &network_name) override;
     virtual Expected<HwInferResults> run_hw_infer_estimator() override;
     virtual Expected<Buffer> get_intermediate_buffer(const IntermediateBufferKey &) override;
-    virtual Expected<Buffer> get_cache_buffer(uint32_t cache_id) override;
-    virtual Expected<std::map<uint32_t, Buffer>> get_cache_buffers() override;
     virtual bool has_caches() const override;
     virtual Expected<uint32_t> get_cache_read_size() const override;
     virtual Expected<uint32_t> get_cache_write_size() const override;
     virtual hailo_status init_cache(uint32_t read_offset, int32_t write_offset_delta) override;
     virtual Expected<hailo_cache_info_t> get_cache_info() const;
     virtual hailo_status update_cache_offset(int32_t offset_delta_bytes) override;
+    virtual Expected<std::vector<uint32_t>> get_cache_ids() const override;
+    virtual Expected<Buffer> read_cache_buffer(uint32_t cache_id) override;
+    virtual hailo_status write_cache_buffer(uint32_t cache_id, MemoryView buffer) override;
 
     virtual ~VdmaConfigCoreOp() = default;
     VdmaConfigCoreOp(const VdmaConfigCoreOp &other) = delete;
diff --git a/hailort/libhailort/src/vdma/vdma_config_manager.cpp b/hailort/libhailort/src/vdma/vdma_config_manager.cpp
index 71fbcb38..630665f0 100644
--- a/hailort/libhailort/src/vdma/vdma_config_manager.cpp
+++ b/hailort/libhailort/src/vdma/vdma_config_manager.cpp
@@ -44,6 +44,7 @@ hailo_status VdmaConfigManager::deactivate_core_op(std::shared_ptr<VdmaConfigCor
     return switch_core_op(current_active_core_op, DEACTIVATE_NEXT_CORE_OP, DEACTIVATE_BATCH_SIZE);
 }
 
+// TODO: fix callback registration and deregistration in the case of switch NG (HRT-14287)
 hailo_status VdmaConfigManager::set_state_machine(std::shared_ptr<VdmaConfigCoreOp> current,
     std::shared_ptr<VdmaConfigCoreOp> next, uint16_t batch_size)
 {
@@ -70,6 +71,7 @@ hailo_status VdmaConfigManager::switch_core_op(std::shared_ptr<VdmaConfigCoreOp>
     assert((nullptr != current) || (nullptr != next));
 
     if (current != nullptr) {
+        CHECK_SUCCESS(current->unregister_cache_update_callback(), "Failed unregistering cache updates from previous core-op");
         CHECK_SUCCESS(current->deactivate_host_resources(), "Failed deactivating host resources for current core-op");
 
         // TODO: In mercury we need to reset after deactivate. This will be fixed in MSW-762 and the "if" will be removed
@@ -88,7 +90,6 @@ hailo_status VdmaConfigManager::switch_core_op(std::shared_ptr<VdmaConfigCoreOp>
 
     if (current != nullptr) {
         CHECK_SUCCESS(current->cancel_pending_transfers(), "Failed canceling pending transfers from previous core-op");
-        CHECK_SUCCESS(current->unregister_cache_update_callback(), "Failed unregistering cache updates from previous core-op");
     }
 
     return HAILO_SUCCESS;
diff --git a/hailort/libhailort/src/vdma/vdma_device.cpp b/hailort/libhailort/src/vdma/vdma_device.cpp
index 93d4353f..17522c52 100644
--- a/hailort/libhailort/src/vdma/vdma_device.cpp
+++ b/hailort/libhailort/src/vdma/vdma_device.cpp
@@ -45,25 +45,6 @@ VdmaDevice::VdmaDevice(std::unique_ptr<HailoRTDriver> &&driver, Device::Type typ
     status = HAILO_SUCCESS;
 }
 
-Expected<std::unique_ptr<VdmaDevice>> VdmaDevice::create(const std::string &device_id)
-{
-    const bool DONT_LOG_ON_FAILURE = false;
-    if (IntegratedDevice::DEVICE_ID == device_id) {
-        auto device = IntegratedDevice::create();
-        CHECK_EXPECTED(device);;
-        return std::unique_ptr<VdmaDevice>(device.release());
-    }
-    else if (auto pcie_info = PcieDevice::parse_pcie_device_info(device_id, DONT_LOG_ON_FAILURE)) {
-        auto device = PcieDevice::create(pcie_info.release());
-        CHECK_EXPECTED(device);
-        return std::unique_ptr<VdmaDevice>(device.release());
-    }
-    else {
-        LOGGER__ERROR("Invalid device id {}", device_id);
-        return make_unexpected(HAILO_INVALID_ARGUMENT);
-    }
-}
-
 hailo_status VdmaDevice::wait_for_wakeup()
 {
     return HAILO_SUCCESS;
diff --git a/hailort/libhailort/src/vdma/vdma_device.hpp b/hailort/libhailort/src/vdma/vdma_device.hpp
index f7bcc909..ac2db512 100644
--- a/hailort/libhailort/src/vdma/vdma_device.hpp
+++ b/hailort/libhailort/src/vdma/vdma_device.hpp
@@ -26,8 +26,6 @@ namespace hailort
 
 class VdmaDevice : public DeviceBase {
 public:
-    static Expected<std::unique_ptr<VdmaDevice>> create(const std::string &device_id);
-
     virtual ~VdmaDevice();
 
     virtual hailo_status wait_for_wakeup() override;
diff --git a/hailort/libhailort/src/vdma/vdma_stream.cpp b/hailort/libhailort/src/vdma/vdma_stream.cpp
index 51f856f3..4368d8b0 100644
--- a/hailort/libhailort/src/vdma/vdma_stream.cpp
+++ b/hailort/libhailort/src/vdma/vdma_stream.cpp
@@ -48,7 +48,7 @@ Expected<BounceBufferQueuePtr> VdmaInputStream::init_dma_bounce_buffer_pool(
     CHECK_NOT_NULL(bounce_buffers_pool, HAILO_OUT_OF_HOST_MEMORY);
 
     for (size_t i = 0; i < dma_bounce_buffer_pool_size; i++) {
-        TRY(auto dma_able_buffer, vdma::DmaAbleBuffer::create_by_allocation(bounce_buffer_size, device.get_driver()));
+        TRY(auto dma_able_buffer, vdma::DmaAbleBuffer::create_by_allocation(bounce_buffer_size));
 
         auto dma_storage = make_shared_nothrow<DmaStorage>(std::move(dma_able_buffer));
         CHECK_NOT_NULL(dma_storage, HAILO_OUT_OF_HOST_MEMORY);
@@ -116,10 +116,7 @@ Expected<std::unique_ptr<StreamBufferPool>> VdmaInputStream::allocate_buffer_poo
             m_channel->get_desc_list().desc_page_size(), m_channel->get_desc_list().count(), frame_size));
 
         // Bind the buffer to the channel to avoid the need to do it on every transfer.
-        TRY(auto pool_dma_able_buffer, circular_pool->get_base_buffer().storage().get_dma_able_buffer());
-        TRY(auto mapped_buffer, vdma::MappedBuffer::create_shared(pool_dma_able_buffer, m_device.get_driver(),
-            HailoRTDriver::DmaDirection::H2D));
-        CHECK_SUCCESS(m_channel->bind_buffer(mapped_buffer));
+        CHECK_SUCCESS(m_channel->bind_buffer(circular_pool->get_base_buffer()));
 
         return std::unique_ptr<StreamBufferPool>(std::move(circular_pool));
     }
@@ -171,6 +168,20 @@ Expected<TransferRequest> VdmaInputStream::align_transfer_request(TransferReques
     return TransferRequest(std::move(transfer_buffers), wrapped_callback);
 }
 
+hailo_status VdmaInputStream::bind_buffer(TransferRequest &&transfer_request)
+{
+    m_channel->remove_buffer_binding();
+    if (TransferBufferType::MEMORYVIEW == transfer_request.transfer_buffers[0].type()) {
+        TRY(auto is_request_aligned, transfer_request.is_request_aligned());
+        if (!is_request_aligned) {
+            // Best effort, if buffer is not aligned - will program descriptors later
+            return HAILO_SUCCESS;
+        }
+    }
+
+    return m_channel->map_and_bind_buffer(transfer_request.transfer_buffers[0]);
+}
+
 hailo_status VdmaInputStream::write_async_impl(TransferRequest &&transfer_request)
 {
     TRACE(FrameDequeueH2DTrace, m_device.get_dev_id(), m_core_op_handle, name());
@@ -270,10 +281,7 @@ Expected<std::unique_ptr<StreamBufferPool>> VdmaOutputStream::allocate_buffer_po
             m_channel->get_desc_list().desc_page_size(), m_channel->get_desc_list().count(), m_transfer_size));
 
         // Bind the buffer to the channel to avoid the need to do it on every transfer.
-        TRY(auto pool_dma_able_buffer, circular_pool->get_base_buffer().storage().get_dma_able_buffer());
-        TRY(auto mapped_buffer, vdma::MappedBuffer::create_shared(pool_dma_able_buffer, m_device.get_driver(),
-            HailoRTDriver::DmaDirection::D2H));
-        CHECK_SUCCESS(m_channel->bind_buffer(mapped_buffer));
+        CHECK_SUCCESS(m_channel->bind_buffer(circular_pool->get_base_buffer()));
 
         return std::unique_ptr<StreamBufferPool>(std::move(circular_pool));
     }
@@ -332,6 +340,20 @@ hailo_status VdmaOutputStream::read_async_impl(TransferRequest &&transfer_reques
     }
 }
 
+hailo_status VdmaOutputStream::bind_buffer(TransferRequest &&transfer_request)
+{
+    m_channel->remove_buffer_binding();
+    if (TransferBufferType::MEMORYVIEW == transfer_request.transfer_buffers[0].type()) {
+        TRY(auto is_request_aligned, transfer_request.is_request_aligned());
+        if (!is_request_aligned) {
+            // Best effort, if buffer is not aligned - will program descriptors later
+            return HAILO_SUCCESS;
+        }
+    }
+
+    return m_channel->map_and_bind_buffer(transfer_request.transfer_buffers[0]);
+}
+
 hailo_status VdmaOutputStream::activate_stream_impl()
 {
     return m_channel->activate();
diff --git a/hailort/libhailort/src/vdma/vdma_stream.hpp b/hailort/libhailort/src/vdma/vdma_stream.hpp
index 836472af..f82ec82d 100644
--- a/hailort/libhailort/src/vdma/vdma_stream.hpp
+++ b/hailort/libhailort/src/vdma/vdma_stream.hpp
@@ -44,6 +44,7 @@ class VdmaInputStream : public AsyncInputStreamBase {
     virtual hailo_stream_interface_t get_interface() const override;
     virtual void set_vdevice_core_op_handle(vdevice_core_op_handle_t core_op_handle) override;
     virtual hailo_status cancel_pending_transfers() override;
+    virtual hailo_status bind_buffer(TransferRequest &&transfer_request) override final;
 
 private:
     Expected<std::unique_ptr<StreamBufferPool>> allocate_buffer_pool() override;
@@ -91,6 +92,7 @@ class VdmaOutputStream : public AsyncOutputStreamBase
     virtual inline vdevice_core_op_handle_t get_vdevice_core_op_handle() override { return m_core_op_handle; };
     virtual hailo_status cancel_pending_transfers() override;
     void set_d2h_callback(std::function<void(hailo_status)> callback);
+    virtual hailo_status bind_buffer(TransferRequest &&transfer_request) override final;
 
 private:
     static void default_d2h_callback(hailo_status) {};
diff --git a/hailort/prepare_externals.cmake b/hailort/prepare_externals.cmake
index 7d3fdbb7..a3fba2e1 100644
--- a/hailort/prepare_externals.cmake
+++ b/hailort/prepare_externals.cmake
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 
 # TODO: remove execute_cmake. support script mode?
 execute_process(COMMAND
diff --git a/hailort/prepare_externals/CMakeLists.txt b/hailort/prepare_externals/CMakeLists.txt
index 6166c4ce..8505c8c2 100644
--- a/hailort/prepare_externals/CMakeLists.txt
+++ b/hailort/prepare_externals/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.5.0)
 project(hailort_prebuild)
 
 set(HAILO_EXTERNALS_EXCLUDE_TARGETS ON)
diff --git a/hailort/rpc/hailort_rpc.proto b/hailort/rpc/hailort_rpc.proto
index f53b4f07..5634b02b 100644
--- a/hailort/rpc/hailort_rpc.proto
+++ b/hailort/rpc/hailort_rpc.proto
@@ -111,14 +111,25 @@ message ProtoCallbackIdentifier {
     uint32 cb_idx = 4;
     string stream_name = 5;
     uint32 direction = 6;
-    bytes data = 7;
-    uint32 status = 8;
+    oneof transfer_data {
+        bytes data = 7;
+        ProtoShmBufferIdentifier shared_memory_identifier = 8;
+    }
+    uint32 status = 9;
+}
+
+message ProtoShmBufferIdentifier {
+    string name = 1;
+    uint32 size = 2;
 }
 
 message ProtoTransferRequest {
     string stream_name = 1;
     uint32 direction = 2;
-    bytes data = 3;
+    oneof transfer_data {
+        bytes data = 3;
+        ProtoShmBufferIdentifier shared_memory_identifier = 4;
+    }
     uint32 cb_idx = 5;
 }
 
@@ -194,6 +205,7 @@ message Release_Reply {
 message VStreams_create_Reply {
     uint32 status = 1;
     repeated uint32 handles = 2;
+    repeated string names = 3;
 }
 
 message VStream_create_Request {
diff --git a/hailort/rpc/rpc_definitions.hpp b/hailort/rpc/rpc_definitions.hpp
index 024d5c3b..b0a6c81e 100644
--- a/hailort/rpc/rpc_definitions.hpp
+++ b/hailort/rpc/rpc_definitions.hpp
@@ -10,11 +10,14 @@
 #ifndef _HAILO_RPC_DEFINITIONS_HPP_
 #define _HAILO_RPC_DEFINITIONS_HPP_
 
+#include "common/internal_env_vars.hpp"
+
 namespace hailort
 {
 
 #ifdef _WIN32
 static const std::string HAILORT_SERVICE_DEFAULT_ADDR = "127.0.0.1:50051";
+static const std::string HAILORT_SERVICE_NAMED_MUTEX = "Global\\HailoRTServiceMutex";
 #else
 static const std::string HAILO_UDS_PREFIX = "unix://";
 static const std::string HAILO_DEFAULT_SERVICE_ADDR = "/tmp/hailort_uds.sock";
@@ -22,11 +25,13 @@ static const std::string HAILORT_SERVICE_DEFAULT_ADDR = HAILO_UDS_PREFIX + HAILO
 #endif
 static const std::chrono::seconds HAILO_KEEPALIVE_INTERVAL(2);
 
-#define HAILORT_SERVICE_ADDRESS_ENV_VAR ("HAILORT_SERVICE_ADDRESS")
+#define INVALID_CB_INDEX (UINT32_MAX)
+#define INVALID_STREAM_NAME ("INVALID_STREAM_NAME")
+
 static const std::string HAILORT_SERVICE_ADDRESS = []() {
-    const char* env_var = std::getenv(HAILORT_SERVICE_ADDRESS_ENV_VAR);
-    if (env_var) {
-        return std::string(env_var);
+    auto addr = get_env_variable(HAILORT_SERVICE_ADDRESS_ENV_VAR);
+    if (addr) {
+        return addr.value();
     } else {
         return HAILORT_SERVICE_DEFAULT_ADDR; // Default value if environment variable is not set
     }
diff --git a/hailort/scripts/download_firmware_eth.cmd b/hailort/scripts/download_firmware_eth.cmd
index e2502e91..9bb6cc94 100644
--- a/hailort/scripts/download_firmware_eth.cmd
+++ b/hailort/scripts/download_firmware_eth.cmd
@@ -2,7 +2,7 @@
 @ECHO OFF
 
 set BASE_URI=https://hailo-hailort.s3.eu-west-2.amazonaws.com
-set HRT_VERSION=4.18.0
+set HRT_VERSION=4.19.0
 set FW_DIR=Hailo8/%HRT_VERSION%/FW
 set FW=hailo8_fw.%HRT_VERSION%_eth.bin
 
diff --git a/hailort/scripts/download_firmware_eth.sh b/hailort/scripts/download_firmware_eth.sh
index 72da7a1d..6b18f54f 100755
--- a/hailort/scripts/download_firmware_eth.sh
+++ b/hailort/scripts/download_firmware_eth.sh
@@ -2,7 +2,7 @@
 set -e
 
 readonly BASE_URI="https://hailo-hailort.s3.eu-west-2.amazonaws.com"
-readonly HRT_VERSION=4.18.0
+readonly HRT_VERSION=4.19.0
 readonly FW_AWS_DIR="Hailo8/${HRT_VERSION}/FW"
 readonly FW="hailo8_fw.${HRT_VERSION}_eth.bin"
 
diff --git a/hailort/scripts/download_hefs.cmd b/hailort/scripts/download_hefs.cmd
index ac442c58..9aa54fe8 100644
--- a/hailort/scripts/download_hefs.cmd
+++ b/hailort/scripts/download_hefs.cmd
@@ -1,7 +1,7 @@
 :: cmd
 @ECHO OFF
 set BASE_URI=https://hailo-hailort.s3.eu-west-2.amazonaws.com
-set HRT_VERSION=4.18.0
+set HRT_VERSION=4.19.0
 set REMOTE_HEF_DIR=Hailo8/%HRT_VERSION%/HEFS
 set LOCAL_EXAMPLES_HEF_DIR=..\libhailort\examples\hefs
 set LOCAL_TUTORIALS_HEF_DIR=..\libhailort\bindings\python\platform\hailo_tutorials\hefs
diff --git a/hailort/scripts/download_hefs.sh b/hailort/scripts/download_hefs.sh
index a6baf2cb..fe0fec3e 100755
--- a/hailort/scripts/download_hefs.sh
+++ b/hailort/scripts/download_hefs.sh
@@ -2,7 +2,7 @@
 set -e
 
 readonly BASE_URI="https://hailo-hailort.s3.eu-west-2.amazonaws.com"
-readonly HRT_VERSION=4.18.0
+readonly HRT_VERSION=4.19.0
 readonly REMOTE_HEF_DIR="Hailo8/${HRT_VERSION}/HEFS"
 readonly LOCAL_EXAMPLES_HEF_DIR="../libhailort/examples/hefs"
 readonly LOCAL_TUTORIALS_HEF_DIR="../libhailort/bindings/python/platform/hailo_tutorials/hefs"