From 36fb9a987d31e813398c39e2383988ef4597adbb Mon Sep 17 00:00:00 2001
From: Zhenzhong1 <109137058+Zhenzhong1@users.noreply.github.com>
Date: Tue, 10 Sep 2024 11:07:48 +0800
Subject: [PATCH] [ChatQnA] Update benchmarking manifests (#766)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .../four_gaudi/chatqna_config_map.yaml        |  23 -
 .../four_gaudi/chatqna_mega_service_run.yaml  |  55 --
 .../four_gaudi/dataprep-microservice_run.yaml |  75 --
 .../four_gaudi/embedding-dependency_run.yaml  |  62 --
 .../embedding-microservice_run.yaml           |  54 --
 .../four_gaudi/llm-dependency_run.yaml        |  88 --
 .../four_gaudi/llm-microservice_run.yaml      |  54 --
 .../oob_four_gaudi_with_rerank.yaml           | 734 +++++++++++++++++
 .../four_gaudi/reranking-dependency_run.yaml  |  85 --
 .../reranking-microservice_run.yaml           |  54 --
 .../retrieval-microservice_run.yaml           |  72 --
 .../with_rerank/four_gaudi/vector-db_run.yaml |  48 --
 .../single_gaudi/chatqna_config_map.yaml      |  23 -
 .../chatqna_mega_service_run.yaml             |  55 --
 .../dataprep-microservice_run.yaml            |  75 --
 .../embedding-dependency_run.yaml             |  62 --
 .../embedding-microservice_run.yaml           |  54 --
 .../single_gaudi/llm-dependency_run.yaml      |  88 --
 .../single_gaudi/llm-microservice_run.yaml    |  54 --
 .../oob_single_gaudi_with_rerank.yaml         | 734 +++++++++++++++++
 .../reranking-dependency_run.yaml             |  85 --
 .../reranking-microservice_run.yaml           |  54 --
 .../retrieval-microservice_run.yaml           |  72 --
 .../single_gaudi/vector-db_run.yaml           |  48 --
 .../two_gaudi/chatqna_config_map.yaml         |  23 -
 .../two_gaudi/chatqna_mega_service_run.yaml   |  55 --
 .../two_gaudi/dataprep-microservice_run.yaml  |  75 --
 .../two_gaudi/embedding-dependency_run.yaml   |  62 --
 .../two_gaudi/embedding-microservice_run.yaml |  54 --
 .../two_gaudi/llm-dependency_run.yaml         |  88 --
 .../two_gaudi/llm-microservice_run.yaml       |  54 --
 .../two_gaudi/oob_two_gaudi_with_rerank.yaml  | 734 +++++++++++++++++
 .../two_gaudi/reranking-dependency_run.yaml   |  85 --
 .../two_gaudi/reranking-microservice_run.yaml |  54 --
 .../two_gaudi/retrieval-microservice_run.yaml |  72 --
 .../with_rerank/two_gaudi/vector-db_run.yaml  |  48 --
 .../four_gaudi/chatqna_config_map.yaml        |  23 -
 .../four_gaudi/chatqna_mega_service_run.yaml  |  55 --
 .../four_gaudi/dataprep-microservice_run.yaml |  75 --
 .../four_gaudi/embedding-dependency_run.yaml  |  62 --
 .../embedding-microservice_run.yaml           |  54 --
 .../four_gaudi/llm-dependency_run.yaml        |  88 --
 .../four_gaudi/llm-microservice_run.yaml      |  54 --
 .../oob_four_gaudi_without_rerank.yaml        | 734 +++++++++++++++++
 .../four_gaudi/reranking-dependency_run.yaml  |  85 --
 .../reranking-microservice_run.yaml           |  54 --
 .../retrieval-microservice_run.yaml           |  72 --
 .../four_gaudi/vector-db_run.yaml             |  48 --
 .../single_gaudi/chatqna_config_map.yaml      |  23 -
 .../chatqna_mega_service_run.yaml             |  55 --
 .../dataprep-microservice_run.yaml            |  75 --
 .../embedding-dependency_run.yaml             |  62 --
 .../embedding-microservice_run.yaml           |  54 --
 .../single_gaudi/llm-dependency_run.yaml      |  88 --
 .../single_gaudi/llm-microservice_run.yaml    |  54 --
 .../oob_single_gaudi_without_rerank.yaml      | 583 +++++++++++++
 .../retrieval-microservice_run.yaml           |  72 --
 .../single_gaudi/vector-db_run.yaml           |  48 --
 .../two_gaudi/chatqna_config_map.yaml         |  23 -
 .../two_gaudi/chatqna_mega_service_run.yaml   |  55 --
 .../two_gaudi/dataprep-microservice_run.yaml  |  75 --
 .../two_gaudi/embedding-dependency_run.yaml   |  62 --
 .../two_gaudi/embedding-microservice_run.yaml |  54 --
 .../two_gaudi/llm-dependency_run.yaml         |  88 --
 .../two_gaudi/llm-microservice_run.yaml       |  54 --
 .../oob_two_gaudi_without_rerank.yaml         | 583 +++++++++++++
 .../two_gaudi/retrieval-microservice_run.yaml |  72 --
 .../two_gaudi/vector-db_run.yaml              |  48 --
 .../four_gaudi/chatqna_config_map.yaml        |  23 -
 .../four_gaudi/chatqna_mega_service_run.yaml  |  62 --
 .../four_gaudi/dataprep-microservice_run.yaml |  75 --
 .../four_gaudi/embedding-dependency_run.yaml  |  69 --
 .../embedding-microservice_run.yaml           |  59 --
 .../four_gaudi/llm-dependency_run.yaml        |  88 --
 .../four_gaudi/llm-microservice_run.yaml      |  59 --
 .../four_gaudi/reranking-dependency_run.yaml  |  85 --
 .../reranking-microservice_run.yaml           |  59 --
 .../retrieval-microservice_run.yaml           |  79 --
 .../tuned_four_gaudi_with_rerank.yaml         | 770 ++++++++++++++++++
 .../with_rerank/four_gaudi/vector-db_run.yaml |  48 --
 .../single_gaudi/chatqna_config_map.yaml      |  23 -
 .../chatqna_mega_service_run.yaml             |  62 --
 .../dataprep-microservice_run.yaml            |  75 --
 .../embedding-dependency_run.yaml             |  69 --
 .../embedding-microservice_run.yaml           |  59 --
 .../single_gaudi/llm-dependency_run.yaml      |  88 --
 .../single_gaudi/llm-microservice_run.yaml    |  59 --
 .../reranking-dependency_run.yaml             |  85 --
 .../reranking-microservice_run.yaml           |  59 --
 .../retrieval-microservice_run.yaml           |  79 --
 .../tuned_single_gaudi_with_rerank.yaml       | 770 ++++++++++++++++++
 .../single_gaudi/vector-db_run.yaml           |  48 --
 .../two_gaudi/chatqna_config_map.yaml         |  23 -
 .../two_gaudi/chatqna_mega_service_run.yaml   |  62 --
 .../two_gaudi/dataprep-microservice_run.yaml  |  75 --
 .../two_gaudi/embedding-dependency_run.yaml   |  69 --
 .../two_gaudi/embedding-microservice_run.yaml |  59 --
 .../two_gaudi/llm-dependency_run.yaml         |  88 --
 .../two_gaudi/llm-microservice_run.yaml       |  59 --
 .../two_gaudi/reranking-dependency_run.yaml   |  85 --
 .../two_gaudi/reranking-microservice_run.yaml |  59 --
 .../two_gaudi/retrieval-microservice_run.yaml |  79 --
 .../tuned_two_gaudi_with_rerank.yaml          | 770 ++++++++++++++++++
 .../with_rerank/two_gaudi/vector-db_run.yaml  |  48 --
 .../four_gaudi/chatqna_config_map.yaml        |  23 -
 .../four_gaudi/chatqna_mega_service_run.yaml  |  62 --
 .../four_gaudi/dataprep-microservice_run.yaml |  75 --
 .../four_gaudi/embedding-dependency_run.yaml  |  69 --
 .../embedding-microservice_run.yaml           |  59 --
 .../four_gaudi/llm-dependency_run.yaml        |  88 --
 .../four_gaudi/llm-microservice_run.yaml      |  59 --
 .../retrieval-microservice_run.yaml           |  79 --
 .../tuned_four_gaudi_without_rerank.yaml      | 614 ++++++++++++++
 .../four_gaudi/vector-db_run.yaml             |  48 --
 .../single_gaudi/chatqna_config_map.yaml      |  23 -
 .../chatqna_mega_service_run.yaml             |  62 --
 .../dataprep-microservice_run.yaml            |  75 --
 .../embedding-dependency_run.yaml             |  69 --
 .../embedding-microservice_run.yaml           |  59 --
 .../single_gaudi/llm-dependency_run.yaml      |  88 --
 .../single_gaudi/llm-microservice_run.yaml    |  59 --
 .../retrieval-microservice_run.yaml           |  79 --
 .../tuned_single_gaudi_without_rerank.yaml    | 614 ++++++++++++++
 .../single_gaudi/vector-db_run.yaml           |  48 --
 .../two_gaudi/chatqna_config_map.yaml         |  23 -
 .../two_gaudi/chatqna_mega_service_run.yaml   |  62 --
 .../two_gaudi/dataprep-microservice_run.yaml  |  75 --
 .../two_gaudi/embedding-dependency_run.yaml   |  69 --
 .../two_gaudi/embedding-microservice_run.yaml |  59 --
 .../two_gaudi/llm-dependency_run.yaml         |  88 --
 .../two_gaudi/llm-microservice_run.yaml       |  59 --
 .../two_gaudi/retrieval-microservice_run.yaml |  79 --
 .../tuned_two_gaudi_without_rerank.yaml       | 614 ++++++++++++++
 .../two_gaudi/vector-db_run.yaml              |  48 --
 134 files changed, 8254 insertions(+), 7546 deletions(-)
 delete mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/chatqna_config_map.yaml
 delete mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/chatqna_mega_service_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/dataprep-microservice_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/embedding-dependency_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/embedding-microservice_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml
 delete mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/reranking-dependency_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/reranking-microservice_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/retrieval-microservice_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/with_rerank/four_gaudi/vector-db_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/chatqna_config_map.yaml
 delete mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/chatqna_mega_service_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/dataprep-microservice_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/embedding-dependency_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/embedding-microservice_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml
 delete mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/reranking-dependency_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/reranking-microservice_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/retrieval-microservice_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/with_rerank/single_gaudi/vector-db_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/chatqna_config_map.yaml
 delete mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/chatqna_mega_service_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/dataprep-microservice_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/embedding-dependency_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/embedding-microservice_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml
 delete mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/reranking-dependency_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/reranking-microservice_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/retrieval-microservice_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/with_rerank/two_gaudi/vector-db_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_config_map.yaml
 delete mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_mega_service_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/dataprep-microservice_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-dependency_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-microservice_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/oob_four_gaudi_without_rerank.yaml
 delete mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-dependency_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-microservice_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/retrieval-microservice_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/without_rerank/four_gaudi/vector-db_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_config_map.yaml
 delete mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_mega_service_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/dataprep-microservice_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-dependency_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-microservice_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/oob_single_gaudi_without_rerank.yaml
 delete mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/retrieval-microservice_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/without_rerank/single_gaudi/vector-db_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_config_map.yaml
 delete mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_mega_service_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/dataprep-microservice_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-dependency_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-microservice_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/oob_two_gaudi_without_rerank.yaml
 delete mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/retrieval-microservice_run.yaml
 delete mode 100644 ChatQnA/benchmark/oob/without_rerank/two_gaudi/vector-db_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/four_gaudi/chatqna_config_map.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/four_gaudi/chatqna_mega_service_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/four_gaudi/dataprep-microservice_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/four_gaudi/embedding-dependency_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/four_gaudi/embedding-microservice_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/four_gaudi/llm-dependency_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/four_gaudi/llm-microservice_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/four_gaudi/reranking-dependency_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/four_gaudi/reranking-microservice_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/four_gaudi/retrieval-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/four_gaudi/vector-db_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/single_gaudi/chatqna_config_map.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/single_gaudi/chatqna_mega_service_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/single_gaudi/dataprep-microservice_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/single_gaudi/embedding-dependency_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/single_gaudi/embedding-microservice_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/single_gaudi/llm-dependency_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/single_gaudi/llm-microservice_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/single_gaudi/reranking-dependency_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/single_gaudi/reranking-microservice_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/single_gaudi/retrieval-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/single_gaudi/vector-db_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/two_gaudi/chatqna_config_map.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/two_gaudi/chatqna_mega_service_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/two_gaudi/dataprep-microservice_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/two_gaudi/embedding-dependency_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/two_gaudi/embedding-microservice_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/two_gaudi/llm-dependency_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/two_gaudi/llm-microservice_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/two_gaudi/reranking-dependency_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/two_gaudi/reranking-microservice_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/two_gaudi/retrieval-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/two_gaudi/vector-db_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_config_map.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_mega_service_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/dataprep-microservice_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-dependency_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-microservice_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-dependency_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-microservice_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/retrieval-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/tuned_four_gaudi_without_rerank.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/four_gaudi/vector-db_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_config_map.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_mega_service_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/dataprep-microservice_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-dependency_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-microservice_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-dependency_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-microservice_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/retrieval-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/tuned_single_gaudi_without_rerank.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/single_gaudi/vector-db_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_config_map.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_mega_service_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/dataprep-microservice_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-dependency_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-microservice_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-dependency_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-microservice_run.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/retrieval-microservice_run.yaml
 create mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/tuned_two_gaudi_without_rerank.yaml
 delete mode 100644 ChatQnA/benchmark/tuned/without_rerank/two_gaudi/vector-db_run.yaml

diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/chatqna_config_map.yaml
deleted file mode 100644
index 368c800e4..000000000
--- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/chatqna_config_map.yaml
+++ /dev/null
@@ -1,23 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: qna-config
-  namespace: default
-data:
-  EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
-  RERANK_MODEL_ID: BAAI/bge-reranker-base
-  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
-  TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
-  TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
-  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
-  REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
-  INDEX_NAME: rag-redis
-  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-  EMBEDDING_SERVICE_HOST_IP: embedding-svc
-  RETRIEVER_SERVICE_HOST_IP: retriever-svc
-  RERANK_SERVICE_HOST_IP: reranking-svc
-  NODE_SELECTOR: chatqna-opea
-  LLM_SERVICE_HOST_IP: llm-svc
diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/chatqna_mega_service_run.yaml
deleted file mode 100644
index 98422525f..000000000
--- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/chatqna_mega_service_run.yaml
+++ /dev/null
@@ -1,55 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: chatqna-backend-server-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: chatqna-backend-server-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: chatqna-backend-server-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: chatqna-backend-server-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/chatqna:latest
-        imagePullPolicy: IfNotPresent
-        name: chatqna-backend-server-deploy
-        args: null
-        ports:
-        - containerPort: 8888
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: chatqna-backend-server-svc
-spec:
-  type: NodePort
-  selector:
-    app: chatqna-backend-server-deploy
-  ports:
-  - name: service
-    port: 8888
-    targetPort: 8888
-    nodePort: 30888
diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/dataprep-microservice_run.yaml
deleted file mode 100644
index 4c71df7ce..000000000
--- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/dataprep-microservice_run.yaml
+++ /dev/null
@@ -1,75 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: dataprep-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: dataprep-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: dataprep-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: dataprep-deploy
-      hostIPC: true
-      containers:
-      - env:
-        - name: REDIS_URL
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: REDIS_URL
-        - name: TEI_ENDPOINT
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: TEI_EMBEDDING_ENDPOINT
-        - name: INDEX_NAME
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: INDEX_NAME
-        image: opea/dataprep-redis:latest
-        imagePullPolicy: IfNotPresent
-        name: dataprep-deploy
-        args: null
-        ports:
-        - containerPort: 6007
-        - containerPort: 6008
-        - containerPort: 6009
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: dataprep-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: dataprep-deploy
-  ports:
-  - name: port1
-    port: 6007
-    targetPort: 6007
-  - name: port2
-    port: 6008
-    targetPort: 6008
-  - name: port3
-    port: 6009
-    targetPort: 6009
diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/embedding-dependency_run.yaml
deleted file mode 100644
index 42a20871d..000000000
--- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/embedding-dependency_run.yaml
+++ /dev/null
@@ -1,62 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: embedding-dependency-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: embedding-dependency-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: embedding-dependency-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
-        name: embedding-dependency-deploy
-        args:
-        - --model-id
-        - $(EMBEDDING_MODEL_ID)
-        - --auto-truncate
-        volumeMounts:
-        - mountPath: /data
-          name: model-volume
-        - mountPath: /dev/shm
-          name: shm
-        ports:
-        - containerPort: 80
-      serviceAccountName: default
-      volumes:
-      - name: model-volume
-        hostPath:
-          path: /mnt/models
-          type: Directory
-      - name: shm
-        emptyDir:
-          medium: Memory
-          sizeLimit: 1Gi
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: embedding-dependency-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: embedding-dependency-deploy
-  ports:
-  - name: service
-    port: 6006
-    targetPort: 80
diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/embedding-microservice_run.yaml
deleted file mode 100644
index 3af5b9859..000000000
--- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/embedding-microservice_run.yaml
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: embedding-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: embedding-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: embedding-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: embedding-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/embedding-tei:latest
-        imagePullPolicy: IfNotPresent
-        name: embedding-deploy
-        args: null
-        ports:
-        - containerPort: 6000
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: embedding-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: embedding-deploy
-  ports:
-  - name: service
-    port: 6000
-    targetPort: 6000
diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml
deleted file mode 100644
index 130089f87..000000000
--- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-dependency_run.yaml
+++ /dev/null
@@ -1,88 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: llm-dependency-deploy
-  namespace: default
-spec:
-  replicas: 31
-  selector:
-    matchLabels:
-      app: llm-dependency-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: llm-dependency-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: ghcr.io/huggingface/tgi-gaudi:2.0.1
-        name: llm-dependency-deploy-demo
-        securityContext:
-          capabilities:
-            add:
-            - SYS_NICE
-        args:
-        - --model-id
-        - $(LLM_MODEL_ID)
-        - --max-input-length
-        - '2048'
-        - --max-total-tokens
-        - '4096'
-        - --max-batch-total-tokens
-        - '65536'
-        - --max-batch-prefill-tokens
-        - '4096'
-        volumeMounts:
-        - mountPath: /data
-          name: model-volume
-        - mountPath: /dev/shm
-          name: shm
-        ports:
-        - containerPort: 80
-        resources:
-          limits:
-            habana.ai/gaudi: 1
-        env:
-        - name: OMPI_MCA_btl_vader_single_copy_mechanism
-          value: none
-        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
-          value: 'true'
-        - name: runtime
-          value: habana
-        - name: HABANA_VISIBLE_DEVICES
-          value: all
-        - name: HF_TOKEN
-          value: ${HF_TOKEN}
-      serviceAccountName: default
-      volumes:
-      - name: model-volume
-        hostPath:
-          path: /mnt/models
-          type: Directory
-      - name: shm
-        emptyDir:
-          medium: Memory
-          sizeLimit: 1Gi
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: llm-dependency-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: llm-dependency-deploy
-  ports:
-  - name: service
-    port: 9009
-    targetPort: 80
diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-microservice_run.yaml
deleted file mode 100644
index 3056dbc1d..000000000
--- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/llm-microservice_run.yaml
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: llm-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: llm-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: llm-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: llm-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/llm-tgi:latest
-        imagePullPolicy: IfNotPresent
-        name: llm-deploy
-        args: null
-        ports:
-        - containerPort: 9000
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: llm-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: llm-deploy
-  ports:
-  - name: service
-    port: 9000
-    targetPort: 9000
diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml
new file mode 100644
index 000000000..f8684c239
--- /dev/null
+++ b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml
@@ -0,0 +1,734 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: qna-config
+  namespace: default
+data:
+  EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
+  RERANK_MODEL_ID: BAAI/bge-reranker-base
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+  TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
+  TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
+  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
+  REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
+  INDEX_NAME: rag-redis
+  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+  EMBEDDING_SERVICE_HOST_IP: embedding-svc
+  RETRIEVER_SERVICE_HOST_IP: retriever-svc
+  RERANK_SERVICE_HOST_IP: reranking-svc
+  NODE_SELECTOR: chatqna-opea
+  LLM_SERVICE_HOST_IP: llm-svc
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: chatqna-backend-server-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: chatqna-backend-server-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: chatqna-backend-server-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: chatqna-backend-server-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/chatqna:latest
+        imagePullPolicy: IfNotPresent
+        name: chatqna-backend-server-deploy
+        args: null
+        ports:
+        - containerPort: 8888
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: chatqna-backend-server-svc
+  namespace: default
+spec:
+  type: NodePort
+  selector:
+    app: chatqna-backend-server-deploy
+  ports:
+  - name: service
+    port: 8888
+    targetPort: 8888
+    nodePort: 30888
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: dataprep-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: dataprep-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: dataprep-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: dataprep-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: TEI_ENDPOINT
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: TEI_EMBEDDING_ENDPOINT
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/dataprep-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: dataprep-deploy
+        args: null
+        ports:
+        - containerPort: 6007
+        - containerPort: 6008
+        - containerPort: 6009
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: dataprep-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: dataprep-deploy
+  ports:
+  - name: port1
+    port: 6007
+    targetPort: 6007
+  - name: port2
+    port: 6008
+    targetPort: 6008
+  - name: port3
+    port: 6009
+    targetPort: 6009
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-dependency-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: embedding-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+        name: embedding-dependency-deploy
+        args:
+        - --model-id
+        - $(EMBEDDING_MODEL_ID)
+        - --auto-truncate
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-dependency-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-dependency-deploy
+  ports:
+  - name: service
+    port: 6006
+    targetPort: 80
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: embedding-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: embedding-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/embedding-tei:latest
+        imagePullPolicy: IfNotPresent
+        name: embedding-deploy
+        args: null
+        ports:
+        - containerPort: 6000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-deploy
+  ports:
+  - name: service
+    port: 6000
+    targetPort: 6000
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-dependency-deploy
+  namespace: default
+spec:
+  replicas: 31
+  selector:
+    matchLabels:
+      app: llm-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.1
+        name: llm-dependency-deploy-demo
+        securityContext:
+          capabilities:
+            add:
+            - SYS_NICE
+        args:
+        - --model-id
+        - $(LLM_MODEL_ID)
+        - --max-input-length
+        - '2048'
+        - --max-total-tokens
+        - '4096'
+        - --max-batch-total-tokens
+        - '65536'
+        - --max-batch-prefill-tokens
+        - '4096'
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+        env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: HF_TOKEN
+          value: ${HF_TOKEN}
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-dependency-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-dependency-deploy
+  ports:
+  - name: service
+    port: 9009
+    targetPort: 80
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: llm-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: llm-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/llm-tgi:latest
+        imagePullPolicy: IfNotPresent
+        name: llm-deploy
+        args: null
+        ports:
+        - containerPort: 9000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-deploy
+  ports:
+  - name: service
+    port: 9000
+    targetPort: 9000
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: reranking-dependency-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: reranking-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: reranking-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: reranking-dependency-deploy
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/tei-gaudi:latest
+        name: reranking-dependency-deploy
+        args:
+        - --model-id
+        - $(RERANK_MODEL_ID)
+        - --auto-truncate
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+        env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: HF_TOKEN
+          value: ${HF_TOKEN}
+        - name: MAX_WARMUP_SEQUENCE_LENGTH
+          value: '512'
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: reranking-dependency-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: reranking-dependency-deploy
+  ports:
+  - name: service
+    port: 8808
+    targetPort: 80
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: reranking-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: reranking-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: reranking-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: reranking-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/reranking-tei:latest
+        imagePullPolicy: IfNotPresent
+        name: reranking-deploy
+        args: null
+        ports:
+        - containerPort: 8000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: reranking-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: reranking-deploy
+  ports:
+  - name: service
+    port: 8000
+    targetPort: 8000
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: retriever-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: retriever-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: retriever-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: retriever-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: TEI_EMBEDDING_ENDPOINT
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: TEI_EMBEDDING_ENDPOINT
+        - name: HUGGINGFACEHUB_API_TOKEN
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: HUGGINGFACEHUB_API_TOKEN
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/retriever-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: retriever-deploy
+        args: null
+        ports:
+        - containerPort: 7000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: retriever-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: retriever-deploy
+  ports:
+  - name: service
+    port: 7000
+    targetPort: 7000
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: vector-db
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: vector-db
+  template:
+    metadata:
+      labels:
+        app: vector-db
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: vector-db
+      containers:
+      - name: vector-db
+        image: redis/redis-stack:7.2.0-v9
+        ports:
+        - containerPort: 6379
+        - containerPort: 8001
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: vector-db
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: vector-db
+  ports:
+  - name: vector-db-service
+    port: 6379
+    targetPort: 6379
+  - name: vector-db-insight
+    port: 8001
+    targetPort: 8001
+
+
+---
diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/reranking-dependency_run.yaml
deleted file mode 100644
index af908ecd1..000000000
--- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/reranking-dependency_run.yaml
+++ /dev/null
@@ -1,85 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: reranking-dependency-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: reranking-dependency-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: reranking-dependency-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: reranking-dependency-deploy
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/tei-gaudi:latest
-        name: reranking-dependency-deploy
-        args:
-        - --model-id
-        - $(RERANK_MODEL_ID)
-        - --auto-truncate
-        volumeMounts:
-        - mountPath: /data
-          name: model-volume
-        - mountPath: /dev/shm
-          name: shm
-        ports:
-        - containerPort: 80
-        resources:
-          limits:
-            habana.ai/gaudi: 1
-        env:
-        - name: OMPI_MCA_btl_vader_single_copy_mechanism
-          value: none
-        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
-          value: 'true'
-        - name: runtime
-          value: habana
-        - name: HABANA_VISIBLE_DEVICES
-          value: all
-        - name: HF_TOKEN
-          value: ${HF_TOKEN}
-        - name: MAX_WARMUP_SEQUENCE_LENGTH
-          value: '512'
-      serviceAccountName: default
-      volumes:
-      - name: model-volume
-        hostPath:
-          path: /mnt/models
-          type: Directory
-      - name: shm
-        emptyDir:
-          medium: Memory
-          sizeLimit: 1Gi
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: reranking-dependency-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: reranking-dependency-deploy
-  ports:
-  - name: service
-    port: 8808
-    targetPort: 80
diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/reranking-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/reranking-microservice_run.yaml
deleted file mode 100644
index 0723d46a8..000000000
--- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/reranking-microservice_run.yaml
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: reranking-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: reranking-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: reranking-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: reranking-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/reranking-tei:latest
-        imagePullPolicy: IfNotPresent
-        name: reranking-deploy
-        args: null
-        ports:
-        - containerPort: 8000
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: reranking-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: reranking-deploy
-  ports:
-  - name: service
-    port: 8000
-    targetPort: 8000
diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/retrieval-microservice_run.yaml
deleted file mode 100644
index ac6c12fdc..000000000
--- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/retrieval-microservice_run.yaml
+++ /dev/null
@@ -1,72 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: retriever-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: retriever-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: retriever-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: retriever-deploy
-      hostIPC: true
-      containers:
-      - env:
-        - name: REDIS_URL
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: REDIS_URL
-        - name: TEI_EMBEDDING_ENDPOINT
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: TEI_EMBEDDING_ENDPOINT
-        - name: HUGGINGFACEHUB_API_TOKEN
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: HUGGINGFACEHUB_API_TOKEN
-        - name: INDEX_NAME
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: INDEX_NAME
-        image: opea/retriever-redis:latest
-        imagePullPolicy: IfNotPresent
-        name: retriever-deploy
-        args: null
-        ports:
-        - containerPort: 7000
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: retriever-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: retriever-deploy
-  ports:
-  - name: service
-    port: 7000
-    targetPort: 7000
diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/vector-db_run.yaml
deleted file mode 100644
index e04e8c5fe..000000000
--- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/vector-db_run.yaml
+++ /dev/null
@@ -1,48 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: vector-db
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: vector-db
-  template:
-    metadata:
-      labels:
-        app: vector-db
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: vector-db
-      containers:
-      - name: vector-db
-        image: redis/redis-stack:7.2.0-v9
-        ports:
-        - containerPort: 6379
-        - containerPort: 8001
----
-apiVersion: v1
-kind: Service
-metadata:
-  name: vector-db
-spec:
-  type: ClusterIP
-  selector:
-    app: vector-db
-  ports:
-  - name: vector-db-service
-    port: 6379
-    targetPort: 6379
-  - name: vector-db-insight
-    port: 8001
-    targetPort: 8001
diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/chatqna_config_map.yaml
deleted file mode 100644
index 368c800e4..000000000
--- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/chatqna_config_map.yaml
+++ /dev/null
@@ -1,23 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: qna-config
-  namespace: default
-data:
-  EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
-  RERANK_MODEL_ID: BAAI/bge-reranker-base
-  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
-  TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
-  TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
-  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
-  REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
-  INDEX_NAME: rag-redis
-  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-  EMBEDDING_SERVICE_HOST_IP: embedding-svc
-  RETRIEVER_SERVICE_HOST_IP: retriever-svc
-  RERANK_SERVICE_HOST_IP: reranking-svc
-  NODE_SELECTOR: chatqna-opea
-  LLM_SERVICE_HOST_IP: llm-svc
diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/chatqna_mega_service_run.yaml
deleted file mode 100644
index 98422525f..000000000
--- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/chatqna_mega_service_run.yaml
+++ /dev/null
@@ -1,55 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: chatqna-backend-server-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: chatqna-backend-server-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: chatqna-backend-server-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: chatqna-backend-server-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/chatqna:latest
-        imagePullPolicy: IfNotPresent
-        name: chatqna-backend-server-deploy
-        args: null
-        ports:
-        - containerPort: 8888
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: chatqna-backend-server-svc
-spec:
-  type: NodePort
-  selector:
-    app: chatqna-backend-server-deploy
-  ports:
-  - name: service
-    port: 8888
-    targetPort: 8888
-    nodePort: 30888
diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/dataprep-microservice_run.yaml
deleted file mode 100644
index 4c71df7ce..000000000
--- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/dataprep-microservice_run.yaml
+++ /dev/null
@@ -1,75 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: dataprep-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: dataprep-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: dataprep-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: dataprep-deploy
-      hostIPC: true
-      containers:
-      - env:
-        - name: REDIS_URL
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: REDIS_URL
-        - name: TEI_ENDPOINT
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: TEI_EMBEDDING_ENDPOINT
-        - name: INDEX_NAME
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: INDEX_NAME
-        image: opea/dataprep-redis:latest
-        imagePullPolicy: IfNotPresent
-        name: dataprep-deploy
-        args: null
-        ports:
-        - containerPort: 6007
-        - containerPort: 6008
-        - containerPort: 6009
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: dataprep-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: dataprep-deploy
-  ports:
-  - name: port1
-    port: 6007
-    targetPort: 6007
-  - name: port2
-    port: 6008
-    targetPort: 6008
-  - name: port3
-    port: 6009
-    targetPort: 6009
diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/embedding-dependency_run.yaml
deleted file mode 100644
index 42a20871d..000000000
--- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/embedding-dependency_run.yaml
+++ /dev/null
@@ -1,62 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: embedding-dependency-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: embedding-dependency-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: embedding-dependency-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
-        name: embedding-dependency-deploy
-        args:
-        - --model-id
-        - $(EMBEDDING_MODEL_ID)
-        - --auto-truncate
-        volumeMounts:
-        - mountPath: /data
-          name: model-volume
-        - mountPath: /dev/shm
-          name: shm
-        ports:
-        - containerPort: 80
-      serviceAccountName: default
-      volumes:
-      - name: model-volume
-        hostPath:
-          path: /mnt/models
-          type: Directory
-      - name: shm
-        emptyDir:
-          medium: Memory
-          sizeLimit: 1Gi
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: embedding-dependency-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: embedding-dependency-deploy
-  ports:
-  - name: service
-    port: 6006
-    targetPort: 80
diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/embedding-microservice_run.yaml
deleted file mode 100644
index 3af5b9859..000000000
--- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/embedding-microservice_run.yaml
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: embedding-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: embedding-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: embedding-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: embedding-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/embedding-tei:latest
-        imagePullPolicy: IfNotPresent
-        name: embedding-deploy
-        args: null
-        ports:
-        - containerPort: 6000
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: embedding-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: embedding-deploy
-  ports:
-  - name: service
-    port: 6000
-    targetPort: 6000
diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml
deleted file mode 100644
index 093d2264b..000000000
--- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-dependency_run.yaml
+++ /dev/null
@@ -1,88 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: llm-dependency-deploy
-  namespace: default
-spec:
-  replicas: 7
-  selector:
-    matchLabels:
-      app: llm-dependency-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: llm-dependency-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: ghcr.io/huggingface/tgi-gaudi:2.0.1
-        name: llm-dependency-deploy-demo
-        securityContext:
-          capabilities:
-            add:
-            - SYS_NICE
-        args:
-        - --model-id
-        - $(LLM_MODEL_ID)
-        - --max-input-length
-        - '2048'
-        - --max-total-tokens
-        - '4096'
-        - --max-batch-total-tokens
-        - '65536'
-        - --max-batch-prefill-tokens
-        - '4096'
-        volumeMounts:
-        - mountPath: /data
-          name: model-volume
-        - mountPath: /dev/shm
-          name: shm
-        ports:
-        - containerPort: 80
-        resources:
-          limits:
-            habana.ai/gaudi: 1
-        env:
-        - name: OMPI_MCA_btl_vader_single_copy_mechanism
-          value: none
-        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
-          value: 'true'
-        - name: runtime
-          value: habana
-        - name: HABANA_VISIBLE_DEVICES
-          value: all
-        - name: HF_TOKEN
-          value: ${HF_TOKEN}
-      serviceAccountName: default
-      volumes:
-      - name: model-volume
-        hostPath:
-          path: /mnt/models
-          type: Directory
-      - name: shm
-        emptyDir:
-          medium: Memory
-          sizeLimit: 1Gi
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: llm-dependency-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: llm-dependency-deploy
-  ports:
-  - name: service
-    port: 9009
-    targetPort: 80
diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-microservice_run.yaml
deleted file mode 100644
index 3056dbc1d..000000000
--- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/llm-microservice_run.yaml
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: llm-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: llm-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: llm-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: llm-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/llm-tgi:latest
-        imagePullPolicy: IfNotPresent
-        name: llm-deploy
-        args: null
-        ports:
-        - containerPort: 9000
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: llm-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: llm-deploy
-  ports:
-  - name: service
-    port: 9000
-    targetPort: 9000
diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml
new file mode 100644
index 000000000..b05326a30
--- /dev/null
+++ b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml
@@ -0,0 +1,734 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: qna-config
+  namespace: default
+data:
+  EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
+  RERANK_MODEL_ID: BAAI/bge-reranker-base
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+  TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
+  TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
+  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
+  REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
+  INDEX_NAME: rag-redis
+  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+  EMBEDDING_SERVICE_HOST_IP: embedding-svc
+  RETRIEVER_SERVICE_HOST_IP: retriever-svc
+  RERANK_SERVICE_HOST_IP: reranking-svc
+  NODE_SELECTOR: chatqna-opea
+  LLM_SERVICE_HOST_IP: llm-svc
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: chatqna-backend-server-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: chatqna-backend-server-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: chatqna-backend-server-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: chatqna-backend-server-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/chatqna:latest
+        imagePullPolicy: IfNotPresent
+        name: chatqna-backend-server-deploy
+        args: null
+        ports:
+        - containerPort: 8888
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: chatqna-backend-server-svc
+  namespace: default
+spec:
+  type: NodePort
+  selector:
+    app: chatqna-backend-server-deploy
+  ports:
+  - name: service
+    port: 8888
+    targetPort: 8888
+    nodePort: 30888
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: dataprep-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: dataprep-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: dataprep-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: dataprep-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: TEI_ENDPOINT
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: TEI_EMBEDDING_ENDPOINT
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/dataprep-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: dataprep-deploy
+        args: null
+        ports:
+        - containerPort: 6007
+        - containerPort: 6008
+        - containerPort: 6009
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: dataprep-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: dataprep-deploy
+  ports:
+  - name: port1
+    port: 6007
+    targetPort: 6007
+  - name: port2
+    port: 6008
+    targetPort: 6008
+  - name: port3
+    port: 6009
+    targetPort: 6009
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-dependency-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: embedding-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+        name: embedding-dependency-deploy
+        args:
+        - --model-id
+        - $(EMBEDDING_MODEL_ID)
+        - --auto-truncate
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-dependency-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-dependency-deploy
+  ports:
+  - name: service
+    port: 6006
+    targetPort: 80
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: embedding-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: embedding-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/embedding-tei:latest
+        imagePullPolicy: IfNotPresent
+        name: embedding-deploy
+        args: null
+        ports:
+        - containerPort: 6000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-deploy
+  ports:
+  - name: service
+    port: 6000
+    targetPort: 6000
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-dependency-deploy
+  namespace: default
+spec:
+  replicas: 7
+  selector:
+    matchLabels:
+      app: llm-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.1
+        name: llm-dependency-deploy-demo
+        securityContext:
+          capabilities:
+            add:
+            - SYS_NICE
+        args:
+        - --model-id
+        - $(LLM_MODEL_ID)
+        - --max-input-length
+        - '2048'
+        - --max-total-tokens
+        - '4096'
+        - --max-batch-total-tokens
+        - '65536'
+        - --max-batch-prefill-tokens
+        - '4096'
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+        env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: HF_TOKEN
+          value: ${HF_TOKEN}
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-dependency-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-dependency-deploy
+  ports:
+  - name: service
+    port: 9009
+    targetPort: 80
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: llm-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: llm-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/llm-tgi:latest
+        imagePullPolicy: IfNotPresent
+        name: llm-deploy
+        args: null
+        ports:
+        - containerPort: 9000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-deploy
+  ports:
+  - name: service
+    port: 9000
+    targetPort: 9000
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: reranking-dependency-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: reranking-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: reranking-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: reranking-dependency-deploy
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/tei-gaudi:latest
+        name: reranking-dependency-deploy
+        args:
+        - --model-id
+        - $(RERANK_MODEL_ID)
+        - --auto-truncate
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+        env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: HF_TOKEN
+          value: ${HF_TOKEN}
+        - name: MAX_WARMUP_SEQUENCE_LENGTH
+          value: '512'
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: reranking-dependency-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: reranking-dependency-deploy
+  ports:
+  - name: service
+    port: 8808
+    targetPort: 80
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: reranking-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: reranking-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: reranking-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: reranking-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/reranking-tei:latest
+        imagePullPolicy: IfNotPresent
+        name: reranking-deploy
+        args: null
+        ports:
+        - containerPort: 8000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: reranking-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: reranking-deploy
+  ports:
+  - name: service
+    port: 8000
+    targetPort: 8000
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: retriever-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: retriever-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: retriever-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: retriever-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: TEI_EMBEDDING_ENDPOINT
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: TEI_EMBEDDING_ENDPOINT
+        - name: HUGGINGFACEHUB_API_TOKEN
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: HUGGINGFACEHUB_API_TOKEN
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/retriever-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: retriever-deploy
+        args: null
+        ports:
+        - containerPort: 7000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: retriever-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: retriever-deploy
+  ports:
+  - name: service
+    port: 7000
+    targetPort: 7000
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: vector-db
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: vector-db
+  template:
+    metadata:
+      labels:
+        app: vector-db
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: vector-db
+      containers:
+      - name: vector-db
+        image: redis/redis-stack:7.2.0-v9
+        ports:
+        - containerPort: 6379
+        - containerPort: 8001
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: vector-db
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: vector-db
+  ports:
+  - name: vector-db-service
+    port: 6379
+    targetPort: 6379
+  - name: vector-db-insight
+    port: 8001
+    targetPort: 8001
+
+
+---
diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/reranking-dependency_run.yaml
deleted file mode 100644
index af908ecd1..000000000
--- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/reranking-dependency_run.yaml
+++ /dev/null
@@ -1,85 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: reranking-dependency-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: reranking-dependency-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: reranking-dependency-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: reranking-dependency-deploy
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/tei-gaudi:latest
-        name: reranking-dependency-deploy
-        args:
-        - --model-id
-        - $(RERANK_MODEL_ID)
-        - --auto-truncate
-        volumeMounts:
-        - mountPath: /data
-          name: model-volume
-        - mountPath: /dev/shm
-          name: shm
-        ports:
-        - containerPort: 80
-        resources:
-          limits:
-            habana.ai/gaudi: 1
-        env:
-        - name: OMPI_MCA_btl_vader_single_copy_mechanism
-          value: none
-        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
-          value: 'true'
-        - name: runtime
-          value: habana
-        - name: HABANA_VISIBLE_DEVICES
-          value: all
-        - name: HF_TOKEN
-          value: ${HF_TOKEN}
-        - name: MAX_WARMUP_SEQUENCE_LENGTH
-          value: '512'
-      serviceAccountName: default
-      volumes:
-      - name: model-volume
-        hostPath:
-          path: /mnt/models
-          type: Directory
-      - name: shm
-        emptyDir:
-          medium: Memory
-          sizeLimit: 1Gi
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: reranking-dependency-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: reranking-dependency-deploy
-  ports:
-  - name: service
-    port: 8808
-    targetPort: 80
diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/reranking-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/reranking-microservice_run.yaml
deleted file mode 100644
index 0723d46a8..000000000
--- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/reranking-microservice_run.yaml
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: reranking-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: reranking-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: reranking-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: reranking-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/reranking-tei:latest
-        imagePullPolicy: IfNotPresent
-        name: reranking-deploy
-        args: null
-        ports:
-        - containerPort: 8000
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: reranking-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: reranking-deploy
-  ports:
-  - name: service
-    port: 8000
-    targetPort: 8000
diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/retrieval-microservice_run.yaml
deleted file mode 100644
index ac6c12fdc..000000000
--- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/retrieval-microservice_run.yaml
+++ /dev/null
@@ -1,72 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: retriever-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: retriever-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: retriever-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: retriever-deploy
-      hostIPC: true
-      containers:
-      - env:
-        - name: REDIS_URL
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: REDIS_URL
-        - name: TEI_EMBEDDING_ENDPOINT
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: TEI_EMBEDDING_ENDPOINT
-        - name: HUGGINGFACEHUB_API_TOKEN
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: HUGGINGFACEHUB_API_TOKEN
-        - name: INDEX_NAME
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: INDEX_NAME
-        image: opea/retriever-redis:latest
-        imagePullPolicy: IfNotPresent
-        name: retriever-deploy
-        args: null
-        ports:
-        - containerPort: 7000
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: retriever-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: retriever-deploy
-  ports:
-  - name: service
-    port: 7000
-    targetPort: 7000
diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/vector-db_run.yaml
deleted file mode 100644
index e04e8c5fe..000000000
--- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/vector-db_run.yaml
+++ /dev/null
@@ -1,48 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: vector-db
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: vector-db
-  template:
-    metadata:
-      labels:
-        app: vector-db
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: vector-db
-      containers:
-      - name: vector-db
-        image: redis/redis-stack:7.2.0-v9
-        ports:
-        - containerPort: 6379
-        - containerPort: 8001
----
-apiVersion: v1
-kind: Service
-metadata:
-  name: vector-db
-spec:
-  type: ClusterIP
-  selector:
-    app: vector-db
-  ports:
-  - name: vector-db-service
-    port: 6379
-    targetPort: 6379
-  - name: vector-db-insight
-    port: 8001
-    targetPort: 8001
diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/chatqna_config_map.yaml
deleted file mode 100644
index 368c800e4..000000000
--- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/chatqna_config_map.yaml
+++ /dev/null
@@ -1,23 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: qna-config
-  namespace: default
-data:
-  EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
-  RERANK_MODEL_ID: BAAI/bge-reranker-base
-  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
-  TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
-  TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
-  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
-  REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
-  INDEX_NAME: rag-redis
-  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-  EMBEDDING_SERVICE_HOST_IP: embedding-svc
-  RETRIEVER_SERVICE_HOST_IP: retriever-svc
-  RERANK_SERVICE_HOST_IP: reranking-svc
-  NODE_SELECTOR: chatqna-opea
-  LLM_SERVICE_HOST_IP: llm-svc
diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/chatqna_mega_service_run.yaml
deleted file mode 100644
index 98422525f..000000000
--- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/chatqna_mega_service_run.yaml
+++ /dev/null
@@ -1,55 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: chatqna-backend-server-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: chatqna-backend-server-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: chatqna-backend-server-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: chatqna-backend-server-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/chatqna:latest
-        imagePullPolicy: IfNotPresent
-        name: chatqna-backend-server-deploy
-        args: null
-        ports:
-        - containerPort: 8888
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: chatqna-backend-server-svc
-spec:
-  type: NodePort
-  selector:
-    app: chatqna-backend-server-deploy
-  ports:
-  - name: service
-    port: 8888
-    targetPort: 8888
-    nodePort: 30888
diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/dataprep-microservice_run.yaml
deleted file mode 100644
index 4c71df7ce..000000000
--- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/dataprep-microservice_run.yaml
+++ /dev/null
@@ -1,75 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: dataprep-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: dataprep-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: dataprep-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: dataprep-deploy
-      hostIPC: true
-      containers:
-      - env:
-        - name: REDIS_URL
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: REDIS_URL
-        - name: TEI_ENDPOINT
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: TEI_EMBEDDING_ENDPOINT
-        - name: INDEX_NAME
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: INDEX_NAME
-        image: opea/dataprep-redis:latest
-        imagePullPolicy: IfNotPresent
-        name: dataprep-deploy
-        args: null
-        ports:
-        - containerPort: 6007
-        - containerPort: 6008
-        - containerPort: 6009
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: dataprep-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: dataprep-deploy
-  ports:
-  - name: port1
-    port: 6007
-    targetPort: 6007
-  - name: port2
-    port: 6008
-    targetPort: 6008
-  - name: port3
-    port: 6009
-    targetPort: 6009
diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/embedding-dependency_run.yaml
deleted file mode 100644
index 42a20871d..000000000
--- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/embedding-dependency_run.yaml
+++ /dev/null
@@ -1,62 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: embedding-dependency-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: embedding-dependency-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: embedding-dependency-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
-        name: embedding-dependency-deploy
-        args:
-        - --model-id
-        - $(EMBEDDING_MODEL_ID)
-        - --auto-truncate
-        volumeMounts:
-        - mountPath: /data
-          name: model-volume
-        - mountPath: /dev/shm
-          name: shm
-        ports:
-        - containerPort: 80
-      serviceAccountName: default
-      volumes:
-      - name: model-volume
-        hostPath:
-          path: /mnt/models
-          type: Directory
-      - name: shm
-        emptyDir:
-          medium: Memory
-          sizeLimit: 1Gi
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: embedding-dependency-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: embedding-dependency-deploy
-  ports:
-  - name: service
-    port: 6006
-    targetPort: 80
diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/embedding-microservice_run.yaml
deleted file mode 100644
index 3af5b9859..000000000
--- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/embedding-microservice_run.yaml
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: embedding-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: embedding-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: embedding-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: embedding-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/embedding-tei:latest
-        imagePullPolicy: IfNotPresent
-        name: embedding-deploy
-        args: null
-        ports:
-        - containerPort: 6000
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: embedding-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: embedding-deploy
-  ports:
-  - name: service
-    port: 6000
-    targetPort: 6000
diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml
deleted file mode 100644
index 9499f04ed..000000000
--- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-dependency_run.yaml
+++ /dev/null
@@ -1,88 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: llm-dependency-deploy
-  namespace: default
-spec:
-  replicas: 15
-  selector:
-    matchLabels:
-      app: llm-dependency-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: llm-dependency-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: ghcr.io/huggingface/tgi-gaudi:2.0.1
-        name: llm-dependency-deploy-demo
-        securityContext:
-          capabilities:
-            add:
-            - SYS_NICE
-        args:
-        - --model-id
-        - $(LLM_MODEL_ID)
-        - --max-input-length
-        - '2048'
-        - --max-total-tokens
-        - '4096'
-        - --max-batch-total-tokens
-        - '65536'
-        - --max-batch-prefill-tokens
-        - '4096'
-        volumeMounts:
-        - mountPath: /data
-          name: model-volume
-        - mountPath: /dev/shm
-          name: shm
-        ports:
-        - containerPort: 80
-        resources:
-          limits:
-            habana.ai/gaudi: 1
-        env:
-        - name: OMPI_MCA_btl_vader_single_copy_mechanism
-          value: none
-        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
-          value: 'true'
-        - name: runtime
-          value: habana
-        - name: HABANA_VISIBLE_DEVICES
-          value: all
-        - name: HF_TOKEN
-          value: ${HF_TOKEN}
-      serviceAccountName: default
-      volumes:
-      - name: model-volume
-        hostPath:
-          path: /mnt/models
-          type: Directory
-      - name: shm
-        emptyDir:
-          medium: Memory
-          sizeLimit: 1Gi
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: llm-dependency-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: llm-dependency-deploy
-  ports:
-  - name: service
-    port: 9009
-    targetPort: 80
diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-microservice_run.yaml
deleted file mode 100644
index 3056dbc1d..000000000
--- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/llm-microservice_run.yaml
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: llm-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: llm-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: llm-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: llm-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/llm-tgi:latest
-        imagePullPolicy: IfNotPresent
-        name: llm-deploy
-        args: null
-        ports:
-        - containerPort: 9000
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: llm-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: llm-deploy
-  ports:
-  - name: service
-    port: 9000
-    targetPort: 9000
diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml
new file mode 100644
index 000000000..13d834512
--- /dev/null
+++ b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml
@@ -0,0 +1,734 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: qna-config
+  namespace: default
+data:
+  EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
+  RERANK_MODEL_ID: BAAI/bge-reranker-base
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+  TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
+  TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
+  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
+  REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
+  INDEX_NAME: rag-redis
+  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+  EMBEDDING_SERVICE_HOST_IP: embedding-svc
+  RETRIEVER_SERVICE_HOST_IP: retriever-svc
+  RERANK_SERVICE_HOST_IP: reranking-svc
+  NODE_SELECTOR: chatqna-opea
+  LLM_SERVICE_HOST_IP: llm-svc
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: chatqna-backend-server-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: chatqna-backend-server-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: chatqna-backend-server-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: chatqna-backend-server-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/chatqna:latest
+        imagePullPolicy: IfNotPresent
+        name: chatqna-backend-server-deploy
+        args: null
+        ports:
+        - containerPort: 8888
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: chatqna-backend-server-svc
+  namespace: default
+spec:
+  type: NodePort
+  selector:
+    app: chatqna-backend-server-deploy
+  ports:
+  - name: service
+    port: 8888
+    targetPort: 8888
+    nodePort: 30888
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: dataprep-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: dataprep-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: dataprep-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: dataprep-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: TEI_ENDPOINT
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: TEI_EMBEDDING_ENDPOINT
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/dataprep-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: dataprep-deploy
+        args: null
+        ports:
+        - containerPort: 6007
+        - containerPort: 6008
+        - containerPort: 6009
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: dataprep-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: dataprep-deploy
+  ports:
+  - name: port1
+    port: 6007
+    targetPort: 6007
+  - name: port2
+    port: 6008
+    targetPort: 6008
+  - name: port3
+    port: 6009
+    targetPort: 6009
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-dependency-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: embedding-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+        name: embedding-dependency-deploy
+        args:
+        - --model-id
+        - $(EMBEDDING_MODEL_ID)
+        - --auto-truncate
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-dependency-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-dependency-deploy
+  ports:
+  - name: service
+    port: 6006
+    targetPort: 80
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: embedding-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: embedding-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/embedding-tei:latest
+        imagePullPolicy: IfNotPresent
+        name: embedding-deploy
+        args: null
+        ports:
+        - containerPort: 6000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-deploy
+  ports:
+  - name: service
+    port: 6000
+    targetPort: 6000
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-dependency-deploy
+  namespace: default
+spec:
+  replicas: 15
+  selector:
+    matchLabels:
+      app: llm-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.1
+        name: llm-dependency-deploy-demo
+        securityContext:
+          capabilities:
+            add:
+            - SYS_NICE
+        args:
+        - --model-id
+        - $(LLM_MODEL_ID)
+        - --max-input-length
+        - '2048'
+        - --max-total-tokens
+        - '4096'
+        - --max-batch-total-tokens
+        - '65536'
+        - --max-batch-prefill-tokens
+        - '4096'
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+        env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: HF_TOKEN
+          value: ${HF_TOKEN}
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-dependency-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-dependency-deploy
+  ports:
+  - name: service
+    port: 9009
+    targetPort: 80
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: llm-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: llm-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/llm-tgi:latest
+        imagePullPolicy: IfNotPresent
+        name: llm-deploy
+        args: null
+        ports:
+        - containerPort: 9000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-deploy
+  ports:
+  - name: service
+    port: 9000
+    targetPort: 9000
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: reranking-dependency-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: reranking-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: reranking-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: reranking-dependency-deploy
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/tei-gaudi:latest
+        name: reranking-dependency-deploy
+        args:
+        - --model-id
+        - $(RERANK_MODEL_ID)
+        - --auto-truncate
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+        env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: HF_TOKEN
+          value: ${HF_TOKEN}
+        - name: MAX_WARMUP_SEQUENCE_LENGTH
+          value: '512'
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: reranking-dependency-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: reranking-dependency-deploy
+  ports:
+  - name: service
+    port: 8808
+    targetPort: 80
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: reranking-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: reranking-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: reranking-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: reranking-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/reranking-tei:latest
+        imagePullPolicy: IfNotPresent
+        name: reranking-deploy
+        args: null
+        ports:
+        - containerPort: 8000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: reranking-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: reranking-deploy
+  ports:
+  - name: service
+    port: 8000
+    targetPort: 8000
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: retriever-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: retriever-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: retriever-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: retriever-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: TEI_EMBEDDING_ENDPOINT
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: TEI_EMBEDDING_ENDPOINT
+        - name: HUGGINGFACEHUB_API_TOKEN
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: HUGGINGFACEHUB_API_TOKEN
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/retriever-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: retriever-deploy
+        args: null
+        ports:
+        - containerPort: 7000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: retriever-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: retriever-deploy
+  ports:
+  - name: service
+    port: 7000
+    targetPort: 7000
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: vector-db
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: vector-db
+  template:
+    metadata:
+      labels:
+        app: vector-db
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: vector-db
+      containers:
+      - name: vector-db
+        image: redis/redis-stack:7.2.0-v9
+        ports:
+        - containerPort: 6379
+        - containerPort: 8001
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: vector-db
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: vector-db
+  ports:
+  - name: vector-db-service
+    port: 6379
+    targetPort: 6379
+  - name: vector-db-insight
+    port: 8001
+    targetPort: 8001
+
+
+---
diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/reranking-dependency_run.yaml
deleted file mode 100644
index af908ecd1..000000000
--- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/reranking-dependency_run.yaml
+++ /dev/null
@@ -1,85 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: reranking-dependency-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: reranking-dependency-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: reranking-dependency-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: reranking-dependency-deploy
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/tei-gaudi:latest
-        name: reranking-dependency-deploy
-        args:
-        - --model-id
-        - $(RERANK_MODEL_ID)
-        - --auto-truncate
-        volumeMounts:
-        - mountPath: /data
-          name: model-volume
-        - mountPath: /dev/shm
-          name: shm
-        ports:
-        - containerPort: 80
-        resources:
-          limits:
-            habana.ai/gaudi: 1
-        env:
-        - name: OMPI_MCA_btl_vader_single_copy_mechanism
-          value: none
-        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
-          value: 'true'
-        - name: runtime
-          value: habana
-        - name: HABANA_VISIBLE_DEVICES
-          value: all
-        - name: HF_TOKEN
-          value: ${HF_TOKEN}
-        - name: MAX_WARMUP_SEQUENCE_LENGTH
-          value: '512'
-      serviceAccountName: default
-      volumes:
-      - name: model-volume
-        hostPath:
-          path: /mnt/models
-          type: Directory
-      - name: shm
-        emptyDir:
-          medium: Memory
-          sizeLimit: 1Gi
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: reranking-dependency-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: reranking-dependency-deploy
-  ports:
-  - name: service
-    port: 8808
-    targetPort: 80
diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/reranking-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/reranking-microservice_run.yaml
deleted file mode 100644
index 0723d46a8..000000000
--- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/reranking-microservice_run.yaml
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: reranking-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: reranking-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: reranking-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: reranking-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/reranking-tei:latest
-        imagePullPolicy: IfNotPresent
-        name: reranking-deploy
-        args: null
-        ports:
-        - containerPort: 8000
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: reranking-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: reranking-deploy
-  ports:
-  - name: service
-    port: 8000
-    targetPort: 8000
diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/retrieval-microservice_run.yaml
deleted file mode 100644
index ac6c12fdc..000000000
--- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/retrieval-microservice_run.yaml
+++ /dev/null
@@ -1,72 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: retriever-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: retriever-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: retriever-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: retriever-deploy
-      hostIPC: true
-      containers:
-      - env:
-        - name: REDIS_URL
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: REDIS_URL
-        - name: TEI_EMBEDDING_ENDPOINT
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: TEI_EMBEDDING_ENDPOINT
-        - name: HUGGINGFACEHUB_API_TOKEN
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: HUGGINGFACEHUB_API_TOKEN
-        - name: INDEX_NAME
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: INDEX_NAME
-        image: opea/retriever-redis:latest
-        imagePullPolicy: IfNotPresent
-        name: retriever-deploy
-        args: null
-        ports:
-        - containerPort: 7000
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: retriever-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: retriever-deploy
-  ports:
-  - name: service
-    port: 7000
-    targetPort: 7000
diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/vector-db_run.yaml
deleted file mode 100644
index e04e8c5fe..000000000
--- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/vector-db_run.yaml
+++ /dev/null
@@ -1,48 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: vector-db
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: vector-db
-  template:
-    metadata:
-      labels:
-        app: vector-db
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: vector-db
-      containers:
-      - name: vector-db
-        image: redis/redis-stack:7.2.0-v9
-        ports:
-        - containerPort: 6379
-        - containerPort: 8001
----
-apiVersion: v1
-kind: Service
-metadata:
-  name: vector-db
-spec:
-  type: ClusterIP
-  selector:
-    app: vector-db
-  ports:
-  - name: vector-db-service
-    port: 6379
-    targetPort: 6379
-  - name: vector-db-insight
-    port: 8001
-    targetPort: 8001
diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_config_map.yaml
deleted file mode 100644
index 368c800e4..000000000
--- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_config_map.yaml
+++ /dev/null
@@ -1,23 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: qna-config
-  namespace: default
-data:
-  EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
-  RERANK_MODEL_ID: BAAI/bge-reranker-base
-  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
-  TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
-  TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
-  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
-  REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
-  INDEX_NAME: rag-redis
-  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-  EMBEDDING_SERVICE_HOST_IP: embedding-svc
-  RETRIEVER_SERVICE_HOST_IP: retriever-svc
-  RERANK_SERVICE_HOST_IP: reranking-svc
-  NODE_SELECTOR: chatqna-opea
-  LLM_SERVICE_HOST_IP: llm-svc
diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_mega_service_run.yaml
deleted file mode 100644
index 687fdc51e..000000000
--- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/chatqna_mega_service_run.yaml
+++ /dev/null
@@ -1,55 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: chatqna-backend-server-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: chatqna-backend-server-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: chatqna-backend-server-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: chatqna-backend-server-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/chatqna-without-rerank:latest
-        imagePullPolicy: IfNotPresent
-        name: chatqna-backend-server-deploy
-        args: null
-        ports:
-        - containerPort: 8888
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: chatqna-backend-server-svc
-spec:
-  type: NodePort
-  selector:
-    app: chatqna-backend-server-deploy
-  ports:
-  - name: service
-    port: 8888
-    targetPort: 8888
-    nodePort: 30888
diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/dataprep-microservice_run.yaml
deleted file mode 100644
index 4c71df7ce..000000000
--- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/dataprep-microservice_run.yaml
+++ /dev/null
@@ -1,75 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: dataprep-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: dataprep-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: dataprep-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: dataprep-deploy
-      hostIPC: true
-      containers:
-      - env:
-        - name: REDIS_URL
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: REDIS_URL
-        - name: TEI_ENDPOINT
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: TEI_EMBEDDING_ENDPOINT
-        - name: INDEX_NAME
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: INDEX_NAME
-        image: opea/dataprep-redis:latest
-        imagePullPolicy: IfNotPresent
-        name: dataprep-deploy
-        args: null
-        ports:
-        - containerPort: 6007
-        - containerPort: 6008
-        - containerPort: 6009
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: dataprep-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: dataprep-deploy
-  ports:
-  - name: port1
-    port: 6007
-    targetPort: 6007
-  - name: port2
-    port: 6008
-    targetPort: 6008
-  - name: port3
-    port: 6009
-    targetPort: 6009
diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-dependency_run.yaml
deleted file mode 100644
index 42a20871d..000000000
--- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-dependency_run.yaml
+++ /dev/null
@@ -1,62 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: embedding-dependency-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: embedding-dependency-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: embedding-dependency-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
-        name: embedding-dependency-deploy
-        args:
-        - --model-id
-        - $(EMBEDDING_MODEL_ID)
-        - --auto-truncate
-        volumeMounts:
-        - mountPath: /data
-          name: model-volume
-        - mountPath: /dev/shm
-          name: shm
-        ports:
-        - containerPort: 80
-      serviceAccountName: default
-      volumes:
-      - name: model-volume
-        hostPath:
-          path: /mnt/models
-          type: Directory
-      - name: shm
-        emptyDir:
-          medium: Memory
-          sizeLimit: 1Gi
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: embedding-dependency-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: embedding-dependency-deploy
-  ports:
-  - name: service
-    port: 6006
-    targetPort: 80
diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-microservice_run.yaml
deleted file mode 100644
index 3af5b9859..000000000
--- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/embedding-microservice_run.yaml
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: embedding-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: embedding-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: embedding-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: embedding-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/embedding-tei:latest
-        imagePullPolicy: IfNotPresent
-        name: embedding-deploy
-        args: null
-        ports:
-        - containerPort: 6000
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: embedding-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: embedding-deploy
-  ports:
-  - name: service
-    port: 6000
-    targetPort: 6000
diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml
deleted file mode 100644
index 64b4197db..000000000
--- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-dependency_run.yaml
+++ /dev/null
@@ -1,88 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: llm-dependency-deploy
-  namespace: default
-spec:
-  replicas: 32
-  selector:
-    matchLabels:
-      app: llm-dependency-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: llm-dependency-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: ghcr.io/huggingface/tgi-gaudi:2.0.1
-        name: llm-dependency-deploy-demo
-        securityContext:
-          capabilities:
-            add:
-            - SYS_NICE
-        args:
-        - --model-id
-        - $(LLM_MODEL_ID)
-        - --max-input-length
-        - '2048'
-        - --max-total-tokens
-        - '4096'
-        - --max-batch-total-tokens
-        - '65536'
-        - --max-batch-prefill-tokens
-        - '4096'
-        volumeMounts:
-        - mountPath: /data
-          name: model-volume
-        - mountPath: /dev/shm
-          name: shm
-        ports:
-        - containerPort: 80
-        resources:
-          limits:
-            habana.ai/gaudi: 1
-        env:
-        - name: OMPI_MCA_btl_vader_single_copy_mechanism
-          value: none
-        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
-          value: 'true'
-        - name: runtime
-          value: habana
-        - name: HABANA_VISIBLE_DEVICES
-          value: all
-        - name: HF_TOKEN
-          value: ${HF_TOKEN}
-      serviceAccountName: default
-      volumes:
-      - name: model-volume
-        hostPath:
-          path: /mnt/models
-          type: Directory
-      - name: shm
-        emptyDir:
-          medium: Memory
-          sizeLimit: 1Gi
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: llm-dependency-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: llm-dependency-deploy
-  ports:
-  - name: service
-    port: 9009
-    targetPort: 80
diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-microservice_run.yaml
deleted file mode 100644
index 3056dbc1d..000000000
--- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/llm-microservice_run.yaml
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: llm-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: llm-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: llm-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: llm-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/llm-tgi:latest
-        imagePullPolicy: IfNotPresent
-        name: llm-deploy
-        args: null
-        ports:
-        - containerPort: 9000
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: llm-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: llm-deploy
-  ports:
-  - name: service
-    port: 9000
-    targetPort: 9000
diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/oob_four_gaudi_without_rerank.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/oob_four_gaudi_without_rerank.yaml
new file mode 100644
index 000000000..e010496b8
--- /dev/null
+++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/oob_four_gaudi_without_rerank.yaml
@@ -0,0 +1,734 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: qna-config
+  namespace: default
+data:
+  EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
+  RERANK_MODEL_ID: BAAI/bge-reranker-base
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+  TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
+  TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
+  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
+  REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
+  INDEX_NAME: rag-redis
+  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+  EMBEDDING_SERVICE_HOST_IP: embedding-svc
+  RETRIEVER_SERVICE_HOST_IP: retriever-svc
+  RERANK_SERVICE_HOST_IP: reranking-svc
+  NODE_SELECTOR: chatqna-opea
+  LLM_SERVICE_HOST_IP: llm-svc
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: chatqna-backend-server-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: chatqna-backend-server-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: chatqna-backend-server-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: chatqna-backend-server-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/chatqna-without-rerank:latest
+        imagePullPolicy: IfNotPresent
+        name: chatqna-backend-server-deploy
+        args: null
+        ports:
+        - containerPort: 8888
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: chatqna-backend-server-svc
+  namespace: default
+spec:
+  type: NodePort
+  selector:
+    app: chatqna-backend-server-deploy
+  ports:
+  - name: service
+    port: 8888
+    targetPort: 8888
+    nodePort: 30888
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: dataprep-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: dataprep-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: dataprep-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: dataprep-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: TEI_ENDPOINT
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: TEI_EMBEDDING_ENDPOINT
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/dataprep-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: dataprep-deploy
+        args: null
+        ports:
+        - containerPort: 6007
+        - containerPort: 6008
+        - containerPort: 6009
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: dataprep-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: dataprep-deploy
+  ports:
+  - name: port1
+    port: 6007
+    targetPort: 6007
+  - name: port2
+    port: 6008
+    targetPort: 6008
+  - name: port3
+    port: 6009
+    targetPort: 6009
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-dependency-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: embedding-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+        name: embedding-dependency-deploy
+        args:
+        - --model-id
+        - $(EMBEDDING_MODEL_ID)
+        - --auto-truncate
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-dependency-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-dependency-deploy
+  ports:
+  - name: service
+    port: 6006
+    targetPort: 80
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: embedding-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: embedding-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/embedding-tei:latest
+        imagePullPolicy: IfNotPresent
+        name: embedding-deploy
+        args: null
+        ports:
+        - containerPort: 6000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-deploy
+  ports:
+  - name: service
+    port: 6000
+    targetPort: 6000
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-dependency-deploy
+  namespace: default
+spec:
+  replicas: 32
+  selector:
+    matchLabels:
+      app: llm-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.1
+        name: llm-dependency-deploy-demo
+        securityContext:
+          capabilities:
+            add:
+            - SYS_NICE
+        args:
+        - --model-id
+        - $(LLM_MODEL_ID)
+        - --max-input-length
+        - '2048'
+        - --max-total-tokens
+        - '4096'
+        - --max-batch-total-tokens
+        - '65536'
+        - --max-batch-prefill-tokens
+        - '4096'
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+        env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: HF_TOKEN
+          value: ${HF_TOKEN}
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-dependency-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-dependency-deploy
+  ports:
+  - name: service
+    port: 9009
+    targetPort: 80
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: llm-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: llm-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/llm-tgi:latest
+        imagePullPolicy: IfNotPresent
+        name: llm-deploy
+        args: null
+        ports:
+        - containerPort: 9000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-deploy
+  ports:
+  - name: service
+    port: 9000
+    targetPort: 9000
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: reranking-dependency-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: reranking-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: reranking-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: reranking-dependency-deploy
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/tei-gaudi:latest
+        name: reranking-dependency-deploy
+        args:
+        - --model-id
+        - $(RERANK_MODEL_ID)
+        - --auto-truncate
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+        env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: HF_TOKEN
+          value: ${HF_TOKEN}
+        - name: MAX_WARMUP_SEQUENCE_LENGTH
+          value: '512'
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: reranking-dependency-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: reranking-dependency-deploy
+  ports:
+  - name: service
+    port: 8808
+    targetPort: 80
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: reranking-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: reranking-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: reranking-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: reranking-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/reranking-tei:latest
+        imagePullPolicy: IfNotPresent
+        name: reranking-deploy
+        args: null
+        ports:
+        - containerPort: 8000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: reranking-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: reranking-deploy
+  ports:
+  - name: service
+    port: 8000
+    targetPort: 8000
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: retriever-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: retriever-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: retriever-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: retriever-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: TEI_EMBEDDING_ENDPOINT
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: TEI_EMBEDDING_ENDPOINT
+        - name: HUGGINGFACEHUB_API_TOKEN
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: HUGGINGFACEHUB_API_TOKEN
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/retriever-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: retriever-deploy
+        args: null
+        ports:
+        - containerPort: 7000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: retriever-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: retriever-deploy
+  ports:
+  - name: service
+    port: 7000
+    targetPort: 7000
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: vector-db
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: vector-db
+  template:
+    metadata:
+      labels:
+        app: vector-db
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: vector-db
+      containers:
+      - name: vector-db
+        image: redis/redis-stack:7.2.0-v9
+        ports:
+        - containerPort: 6379
+        - containerPort: 8001
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: vector-db
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: vector-db
+  ports:
+  - name: vector-db-service
+    port: 6379
+    targetPort: 6379
+  - name: vector-db-insight
+    port: 8001
+    targetPort: 8001
+
+
+---
diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-dependency_run.yaml
deleted file mode 100644
index af908ecd1..000000000
--- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-dependency_run.yaml
+++ /dev/null
@@ -1,85 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: reranking-dependency-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: reranking-dependency-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: reranking-dependency-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: reranking-dependency-deploy
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/tei-gaudi:latest
-        name: reranking-dependency-deploy
-        args:
-        - --model-id
-        - $(RERANK_MODEL_ID)
-        - --auto-truncate
-        volumeMounts:
-        - mountPath: /data
-          name: model-volume
-        - mountPath: /dev/shm
-          name: shm
-        ports:
-        - containerPort: 80
-        resources:
-          limits:
-            habana.ai/gaudi: 1
-        env:
-        - name: OMPI_MCA_btl_vader_single_copy_mechanism
-          value: none
-        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
-          value: 'true'
-        - name: runtime
-          value: habana
-        - name: HABANA_VISIBLE_DEVICES
-          value: all
-        - name: HF_TOKEN
-          value: ${HF_TOKEN}
-        - name: MAX_WARMUP_SEQUENCE_LENGTH
-          value: '512'
-      serviceAccountName: default
-      volumes:
-      - name: model-volume
-        hostPath:
-          path: /mnt/models
-          type: Directory
-      - name: shm
-        emptyDir:
-          medium: Memory
-          sizeLimit: 1Gi
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: reranking-dependency-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: reranking-dependency-deploy
-  ports:
-  - name: service
-    port: 8808
-    targetPort: 80
diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-microservice_run.yaml
deleted file mode 100644
index 0723d46a8..000000000
--- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/reranking-microservice_run.yaml
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: reranking-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: reranking-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: reranking-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: reranking-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/reranking-tei:latest
-        imagePullPolicy: IfNotPresent
-        name: reranking-deploy
-        args: null
-        ports:
-        - containerPort: 8000
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: reranking-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: reranking-deploy
-  ports:
-  - name: service
-    port: 8000
-    targetPort: 8000
diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/retrieval-microservice_run.yaml
deleted file mode 100644
index ac6c12fdc..000000000
--- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/retrieval-microservice_run.yaml
+++ /dev/null
@@ -1,72 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: retriever-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: retriever-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: retriever-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: retriever-deploy
-      hostIPC: true
-      containers:
-      - env:
-        - name: REDIS_URL
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: REDIS_URL
-        - name: TEI_EMBEDDING_ENDPOINT
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: TEI_EMBEDDING_ENDPOINT
-        - name: HUGGINGFACEHUB_API_TOKEN
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: HUGGINGFACEHUB_API_TOKEN
-        - name: INDEX_NAME
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: INDEX_NAME
-        image: opea/retriever-redis:latest
-        imagePullPolicy: IfNotPresent
-        name: retriever-deploy
-        args: null
-        ports:
-        - containerPort: 7000
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: retriever-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: retriever-deploy
-  ports:
-  - name: service
-    port: 7000
-    targetPort: 7000
diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/vector-db_run.yaml
deleted file mode 100644
index e04e8c5fe..000000000
--- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/vector-db_run.yaml
+++ /dev/null
@@ -1,48 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: vector-db
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: vector-db
-  template:
-    metadata:
-      labels:
-        app: vector-db
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: vector-db
-      containers:
-      - name: vector-db
-        image: redis/redis-stack:7.2.0-v9
-        ports:
-        - containerPort: 6379
-        - containerPort: 8001
----
-apiVersion: v1
-kind: Service
-metadata:
-  name: vector-db
-spec:
-  type: ClusterIP
-  selector:
-    app: vector-db
-  ports:
-  - name: vector-db-service
-    port: 6379
-    targetPort: 6379
-  - name: vector-db-insight
-    port: 8001
-    targetPort: 8001
diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_config_map.yaml
deleted file mode 100644
index 368c800e4..000000000
--- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_config_map.yaml
+++ /dev/null
@@ -1,23 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: qna-config
-  namespace: default
-data:
-  EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
-  RERANK_MODEL_ID: BAAI/bge-reranker-base
-  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
-  TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
-  TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
-  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
-  REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
-  INDEX_NAME: rag-redis
-  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-  EMBEDDING_SERVICE_HOST_IP: embedding-svc
-  RETRIEVER_SERVICE_HOST_IP: retriever-svc
-  RERANK_SERVICE_HOST_IP: reranking-svc
-  NODE_SELECTOR: chatqna-opea
-  LLM_SERVICE_HOST_IP: llm-svc
diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_mega_service_run.yaml
deleted file mode 100644
index 687fdc51e..000000000
--- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/chatqna_mega_service_run.yaml
+++ /dev/null
@@ -1,55 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: chatqna-backend-server-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: chatqna-backend-server-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: chatqna-backend-server-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: chatqna-backend-server-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/chatqna-without-rerank:latest
-        imagePullPolicy: IfNotPresent
-        name: chatqna-backend-server-deploy
-        args: null
-        ports:
-        - containerPort: 8888
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: chatqna-backend-server-svc
-spec:
-  type: NodePort
-  selector:
-    app: chatqna-backend-server-deploy
-  ports:
-  - name: service
-    port: 8888
-    targetPort: 8888
-    nodePort: 30888
diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/dataprep-microservice_run.yaml
deleted file mode 100644
index 4c71df7ce..000000000
--- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/dataprep-microservice_run.yaml
+++ /dev/null
@@ -1,75 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: dataprep-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: dataprep-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: dataprep-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: dataprep-deploy
-      hostIPC: true
-      containers:
-      - env:
-        - name: REDIS_URL
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: REDIS_URL
-        - name: TEI_ENDPOINT
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: TEI_EMBEDDING_ENDPOINT
-        - name: INDEX_NAME
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: INDEX_NAME
-        image: opea/dataprep-redis:latest
-        imagePullPolicy: IfNotPresent
-        name: dataprep-deploy
-        args: null
-        ports:
-        - containerPort: 6007
-        - containerPort: 6008
-        - containerPort: 6009
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: dataprep-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: dataprep-deploy
-  ports:
-  - name: port1
-    port: 6007
-    targetPort: 6007
-  - name: port2
-    port: 6008
-    targetPort: 6008
-  - name: port3
-    port: 6009
-    targetPort: 6009
diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-dependency_run.yaml
deleted file mode 100644
index 42a20871d..000000000
--- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-dependency_run.yaml
+++ /dev/null
@@ -1,62 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: embedding-dependency-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: embedding-dependency-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: embedding-dependency-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
-        name: embedding-dependency-deploy
-        args:
-        - --model-id
-        - $(EMBEDDING_MODEL_ID)
-        - --auto-truncate
-        volumeMounts:
-        - mountPath: /data
-          name: model-volume
-        - mountPath: /dev/shm
-          name: shm
-        ports:
-        - containerPort: 80
-      serviceAccountName: default
-      volumes:
-      - name: model-volume
-        hostPath:
-          path: /mnt/models
-          type: Directory
-      - name: shm
-        emptyDir:
-          medium: Memory
-          sizeLimit: 1Gi
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: embedding-dependency-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: embedding-dependency-deploy
-  ports:
-  - name: service
-    port: 6006
-    targetPort: 80
diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-microservice_run.yaml
deleted file mode 100644
index 3af5b9859..000000000
--- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/embedding-microservice_run.yaml
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: embedding-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: embedding-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: embedding-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: embedding-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/embedding-tei:latest
-        imagePullPolicy: IfNotPresent
-        name: embedding-deploy
-        args: null
-        ports:
-        - containerPort: 6000
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: embedding-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: embedding-deploy
-  ports:
-  - name: service
-    port: 6000
-    targetPort: 6000
diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml
deleted file mode 100644
index bbf9d6aeb..000000000
--- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-dependency_run.yaml
+++ /dev/null
@@ -1,88 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: llm-dependency-deploy
-  namespace: default
-spec:
-  replicas: 8
-  selector:
-    matchLabels:
-      app: llm-dependency-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: llm-dependency-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: ghcr.io/huggingface/tgi-gaudi:2.0.1
-        name: llm-dependency-deploy-demo
-        securityContext:
-          capabilities:
-            add:
-            - SYS_NICE
-        args:
-        - --model-id
-        - $(LLM_MODEL_ID)
-        - --max-input-length
-        - '2048'
-        - --max-total-tokens
-        - '4096'
-        - --max-batch-total-tokens
-        - '65536'
-        - --max-batch-prefill-tokens
-        - '4096'
-        volumeMounts:
-        - mountPath: /data
-          name: model-volume
-        - mountPath: /dev/shm
-          name: shm
-        ports:
-        - containerPort: 80
-        resources:
-          limits:
-            habana.ai/gaudi: 1
-        env:
-        - name: OMPI_MCA_btl_vader_single_copy_mechanism
-          value: none
-        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
-          value: 'true'
-        - name: runtime
-          value: habana
-        - name: HABANA_VISIBLE_DEVICES
-          value: all
-        - name: HF_TOKEN
-          value: ${HF_TOKEN}
-      serviceAccountName: default
-      volumes:
-      - name: model-volume
-        hostPath:
-          path: /mnt/models
-          type: Directory
-      - name: shm
-        emptyDir:
-          medium: Memory
-          sizeLimit: 1Gi
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: llm-dependency-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: llm-dependency-deploy
-  ports:
-  - name: service
-    port: 9009
-    targetPort: 80
diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-microservice_run.yaml
deleted file mode 100644
index 3056dbc1d..000000000
--- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/llm-microservice_run.yaml
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: llm-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: llm-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: llm-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: llm-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/llm-tgi:latest
-        imagePullPolicy: IfNotPresent
-        name: llm-deploy
-        args: null
-        ports:
-        - containerPort: 9000
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: llm-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: llm-deploy
-  ports:
-  - name: service
-    port: 9000
-    targetPort: 9000
diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/oob_single_gaudi_without_rerank.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/oob_single_gaudi_without_rerank.yaml
new file mode 100644
index 000000000..b75e8f291
--- /dev/null
+++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/oob_single_gaudi_without_rerank.yaml
@@ -0,0 +1,583 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: qna-config
+  namespace: default
+data:
+  EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
+  RERANK_MODEL_ID: BAAI/bge-reranker-base
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+  TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
+  TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
+  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
+  REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
+  INDEX_NAME: rag-redis
+  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+  EMBEDDING_SERVICE_HOST_IP: embedding-svc
+  RETRIEVER_SERVICE_HOST_IP: retriever-svc
+  RERANK_SERVICE_HOST_IP: reranking-svc
+  NODE_SELECTOR: chatqna-opea
+  LLM_SERVICE_HOST_IP: llm-svc
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: chatqna-backend-server-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: chatqna-backend-server-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: chatqna-backend-server-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: chatqna-backend-server-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/chatqna-without-rerank:latest
+        imagePullPolicy: IfNotPresent
+        name: chatqna-backend-server-deploy
+        args: null
+        ports:
+        - containerPort: 8888
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: chatqna-backend-server-svc
+  namespace: default
+spec:
+  type: NodePort
+  selector:
+    app: chatqna-backend-server-deploy
+  ports:
+  - name: service
+    port: 8888
+    targetPort: 8888
+    nodePort: 30888
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: dataprep-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: dataprep-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: dataprep-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: dataprep-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: TEI_ENDPOINT
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: TEI_EMBEDDING_ENDPOINT
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/dataprep-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: dataprep-deploy
+        args: null
+        ports:
+        - containerPort: 6007
+        - containerPort: 6008
+        - containerPort: 6009
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: dataprep-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: dataprep-deploy
+  ports:
+  - name: port1
+    port: 6007
+    targetPort: 6007
+  - name: port2
+    port: 6008
+    targetPort: 6008
+  - name: port3
+    port: 6009
+    targetPort: 6009
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-dependency-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: embedding-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+        name: embedding-dependency-deploy
+        args:
+        - --model-id
+        - $(EMBEDDING_MODEL_ID)
+        - --auto-truncate
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-dependency-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-dependency-deploy
+  ports:
+  - name: service
+    port: 6006
+    targetPort: 80
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: embedding-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: embedding-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/embedding-tei:latest
+        imagePullPolicy: IfNotPresent
+        name: embedding-deploy
+        args: null
+        ports:
+        - containerPort: 6000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-deploy
+  ports:
+  - name: service
+    port: 6000
+    targetPort: 6000
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-dependency-deploy
+  namespace: default
+spec:
+  replicas: 8
+  selector:
+    matchLabels:
+      app: llm-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.1
+        name: llm-dependency-deploy-demo
+        securityContext:
+          capabilities:
+            add:
+            - SYS_NICE
+        args:
+        - --model-id
+        - $(LLM_MODEL_ID)
+        - --max-input-length
+        - '2048'
+        - --max-total-tokens
+        - '4096'
+        - --max-batch-total-tokens
+        - '65536'
+        - --max-batch-prefill-tokens
+        - '4096'
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+        env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: HF_TOKEN
+          value: ${HF_TOKEN}
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-dependency-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-dependency-deploy
+  ports:
+  - name: service
+    port: 9009
+    targetPort: 80
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: llm-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: llm-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/llm-tgi:latest
+        imagePullPolicy: IfNotPresent
+        name: llm-deploy
+        args: null
+        ports:
+        - containerPort: 9000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-deploy
+  ports:
+  - name: service
+    port: 9000
+    targetPort: 9000
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: retriever-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: retriever-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: retriever-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: retriever-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: TEI_EMBEDDING_ENDPOINT
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: TEI_EMBEDDING_ENDPOINT
+        - name: HUGGINGFACEHUB_API_TOKEN
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: HUGGINGFACEHUB_API_TOKEN
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/retriever-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: retriever-deploy
+        args: null
+        ports:
+        - containerPort: 7000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: retriever-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: retriever-deploy
+  ports:
+  - name: service
+    port: 7000
+    targetPort: 7000
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: vector-db
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: vector-db
+  template:
+    metadata:
+      labels:
+        app: vector-db
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: vector-db
+      containers:
+      - name: vector-db
+        image: redis/redis-stack:7.2.0-v9
+        ports:
+        - containerPort: 6379
+        - containerPort: 8001
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: vector-db
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: vector-db
+  ports:
+  - name: vector-db-service
+    port: 6379
+    targetPort: 6379
+  - name: vector-db-insight
+    port: 8001
+    targetPort: 8001
+
+
+---
diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/retrieval-microservice_run.yaml
deleted file mode 100644
index ac6c12fdc..000000000
--- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/retrieval-microservice_run.yaml
+++ /dev/null
@@ -1,72 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: retriever-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: retriever-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: retriever-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: retriever-deploy
-      hostIPC: true
-      containers:
-      - env:
-        - name: REDIS_URL
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: REDIS_URL
-        - name: TEI_EMBEDDING_ENDPOINT
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: TEI_EMBEDDING_ENDPOINT
-        - name: HUGGINGFACEHUB_API_TOKEN
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: HUGGINGFACEHUB_API_TOKEN
-        - name: INDEX_NAME
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: INDEX_NAME
-        image: opea/retriever-redis:latest
-        imagePullPolicy: IfNotPresent
-        name: retriever-deploy
-        args: null
-        ports:
-        - containerPort: 7000
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: retriever-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: retriever-deploy
-  ports:
-  - name: service
-    port: 7000
-    targetPort: 7000
diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/vector-db_run.yaml
deleted file mode 100644
index e04e8c5fe..000000000
--- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/vector-db_run.yaml
+++ /dev/null
@@ -1,48 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: vector-db
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: vector-db
-  template:
-    metadata:
-      labels:
-        app: vector-db
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: vector-db
-      containers:
-      - name: vector-db
-        image: redis/redis-stack:7.2.0-v9
-        ports:
-        - containerPort: 6379
-        - containerPort: 8001
----
-apiVersion: v1
-kind: Service
-metadata:
-  name: vector-db
-spec:
-  type: ClusterIP
-  selector:
-    app: vector-db
-  ports:
-  - name: vector-db-service
-    port: 6379
-    targetPort: 6379
-  - name: vector-db-insight
-    port: 8001
-    targetPort: 8001
diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_config_map.yaml
deleted file mode 100644
index 368c800e4..000000000
--- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_config_map.yaml
+++ /dev/null
@@ -1,23 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: qna-config
-  namespace: default
-data:
-  EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
-  RERANK_MODEL_ID: BAAI/bge-reranker-base
-  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
-  TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
-  TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
-  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
-  REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
-  INDEX_NAME: rag-redis
-  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-  EMBEDDING_SERVICE_HOST_IP: embedding-svc
-  RETRIEVER_SERVICE_HOST_IP: retriever-svc
-  RERANK_SERVICE_HOST_IP: reranking-svc
-  NODE_SELECTOR: chatqna-opea
-  LLM_SERVICE_HOST_IP: llm-svc
diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_mega_service_run.yaml
deleted file mode 100644
index 687fdc51e..000000000
--- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/chatqna_mega_service_run.yaml
+++ /dev/null
@@ -1,55 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: chatqna-backend-server-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: chatqna-backend-server-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: chatqna-backend-server-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: chatqna-backend-server-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/chatqna-without-rerank:latest
-        imagePullPolicy: IfNotPresent
-        name: chatqna-backend-server-deploy
-        args: null
-        ports:
-        - containerPort: 8888
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: chatqna-backend-server-svc
-spec:
-  type: NodePort
-  selector:
-    app: chatqna-backend-server-deploy
-  ports:
-  - name: service
-    port: 8888
-    targetPort: 8888
-    nodePort: 30888
diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/dataprep-microservice_run.yaml
deleted file mode 100644
index 4c71df7ce..000000000
--- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/dataprep-microservice_run.yaml
+++ /dev/null
@@ -1,75 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: dataprep-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: dataprep-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: dataprep-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: dataprep-deploy
-      hostIPC: true
-      containers:
-      - env:
-        - name: REDIS_URL
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: REDIS_URL
-        - name: TEI_ENDPOINT
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: TEI_EMBEDDING_ENDPOINT
-        - name: INDEX_NAME
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: INDEX_NAME
-        image: opea/dataprep-redis:latest
-        imagePullPolicy: IfNotPresent
-        name: dataprep-deploy
-        args: null
-        ports:
-        - containerPort: 6007
-        - containerPort: 6008
-        - containerPort: 6009
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: dataprep-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: dataprep-deploy
-  ports:
-  - name: port1
-    port: 6007
-    targetPort: 6007
-  - name: port2
-    port: 6008
-    targetPort: 6008
-  - name: port3
-    port: 6009
-    targetPort: 6009
diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-dependency_run.yaml
deleted file mode 100644
index 42a20871d..000000000
--- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-dependency_run.yaml
+++ /dev/null
@@ -1,62 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: embedding-dependency-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: embedding-dependency-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: embedding-dependency-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
-        name: embedding-dependency-deploy
-        args:
-        - --model-id
-        - $(EMBEDDING_MODEL_ID)
-        - --auto-truncate
-        volumeMounts:
-        - mountPath: /data
-          name: model-volume
-        - mountPath: /dev/shm
-          name: shm
-        ports:
-        - containerPort: 80
-      serviceAccountName: default
-      volumes:
-      - name: model-volume
-        hostPath:
-          path: /mnt/models
-          type: Directory
-      - name: shm
-        emptyDir:
-          medium: Memory
-          sizeLimit: 1Gi
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: embedding-dependency-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: embedding-dependency-deploy
-  ports:
-  - name: service
-    port: 6006
-    targetPort: 80
diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-microservice_run.yaml
deleted file mode 100644
index 3af5b9859..000000000
--- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/embedding-microservice_run.yaml
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: embedding-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: embedding-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: embedding-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: embedding-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/embedding-tei:latest
-        imagePullPolicy: IfNotPresent
-        name: embedding-deploy
-        args: null
-        ports:
-        - containerPort: 6000
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: embedding-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: embedding-deploy
-  ports:
-  - name: service
-    port: 6000
-    targetPort: 6000
diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml
deleted file mode 100644
index e78da3e38..000000000
--- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-dependency_run.yaml
+++ /dev/null
@@ -1,88 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: llm-dependency-deploy
-  namespace: default
-spec:
-  replicas: 16
-  selector:
-    matchLabels:
-      app: llm-dependency-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: llm-dependency-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: ghcr.io/huggingface/tgi-gaudi:2.0.1
-        name: llm-dependency-deploy-demo
-        securityContext:
-          capabilities:
-            add:
-            - SYS_NICE
-        args:
-        - --model-id
-        - $(LLM_MODEL_ID)
-        - --max-input-length
-        - '2048'
-        - --max-total-tokens
-        - '4096'
-        - --max-batch-total-tokens
-        - '65536'
-        - --max-batch-prefill-tokens
-        - '4096'
-        volumeMounts:
-        - mountPath: /data
-          name: model-volume
-        - mountPath: /dev/shm
-          name: shm
-        ports:
-        - containerPort: 80
-        resources:
-          limits:
-            habana.ai/gaudi: 1
-        env:
-        - name: OMPI_MCA_btl_vader_single_copy_mechanism
-          value: none
-        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
-          value: 'true'
-        - name: runtime
-          value: habana
-        - name: HABANA_VISIBLE_DEVICES
-          value: all
-        - name: HF_TOKEN
-          value: ${HF_TOKEN}
-      serviceAccountName: default
-      volumes:
-      - name: model-volume
-        hostPath:
-          path: /mnt/models
-          type: Directory
-      - name: shm
-        emptyDir:
-          medium: Memory
-          sizeLimit: 1Gi
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: llm-dependency-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: llm-dependency-deploy
-  ports:
-  - name: service
-    port: 9009
-    targetPort: 80
diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-microservice_run.yaml
deleted file mode 100644
index 3056dbc1d..000000000
--- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/llm-microservice_run.yaml
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: llm-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: llm-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: llm-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: llm-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/llm-tgi:latest
-        imagePullPolicy: IfNotPresent
-        name: llm-deploy
-        args: null
-        ports:
-        - containerPort: 9000
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: llm-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: llm-deploy
-  ports:
-  - name: service
-    port: 9000
-    targetPort: 9000
diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/oob_two_gaudi_without_rerank.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/oob_two_gaudi_without_rerank.yaml
new file mode 100644
index 000000000..fa62ef84d
--- /dev/null
+++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/oob_two_gaudi_without_rerank.yaml
@@ -0,0 +1,583 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: qna-config
+  namespace: default
+data:
+  EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
+  RERANK_MODEL_ID: BAAI/bge-reranker-base
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+  TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
+  TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
+  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
+  REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
+  INDEX_NAME: rag-redis
+  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+  EMBEDDING_SERVICE_HOST_IP: embedding-svc
+  RETRIEVER_SERVICE_HOST_IP: retriever-svc
+  RERANK_SERVICE_HOST_IP: reranking-svc
+  NODE_SELECTOR: chatqna-opea
+  LLM_SERVICE_HOST_IP: llm-svc
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: chatqna-backend-server-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: chatqna-backend-server-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: chatqna-backend-server-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: chatqna-backend-server-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/chatqna-without-rerank:latest
+        imagePullPolicy: IfNotPresent
+        name: chatqna-backend-server-deploy
+        args: null
+        ports:
+        - containerPort: 8888
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: chatqna-backend-server-svc
+  namespace: default
+spec:
+  type: NodePort
+  selector:
+    app: chatqna-backend-server-deploy
+  ports:
+  - name: service
+    port: 8888
+    targetPort: 8888
+    nodePort: 30888
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: dataprep-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: dataprep-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: dataprep-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: dataprep-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: TEI_ENDPOINT
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: TEI_EMBEDDING_ENDPOINT
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/dataprep-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: dataprep-deploy
+        args: null
+        ports:
+        - containerPort: 6007
+        - containerPort: 6008
+        - containerPort: 6009
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: dataprep-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: dataprep-deploy
+  ports:
+  - name: port1
+    port: 6007
+    targetPort: 6007
+  - name: port2
+    port: 6008
+    targetPort: 6008
+  - name: port3
+    port: 6009
+    targetPort: 6009
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-dependency-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: embedding-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+        name: embedding-dependency-deploy
+        args:
+        - --model-id
+        - $(EMBEDDING_MODEL_ID)
+        - --auto-truncate
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-dependency-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-dependency-deploy
+  ports:
+  - name: service
+    port: 6006
+    targetPort: 80
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: embedding-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: embedding-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/embedding-tei:latest
+        imagePullPolicy: IfNotPresent
+        name: embedding-deploy
+        args: null
+        ports:
+        - containerPort: 6000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-deploy
+  ports:
+  - name: service
+    port: 6000
+    targetPort: 6000
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-dependency-deploy
+  namespace: default
+spec:
+  replicas: 16
+  selector:
+    matchLabels:
+      app: llm-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.1
+        name: llm-dependency-deploy-demo
+        securityContext:
+          capabilities:
+            add:
+            - SYS_NICE
+        args:
+        - --model-id
+        - $(LLM_MODEL_ID)
+        - --max-input-length
+        - '2048'
+        - --max-total-tokens
+        - '4096'
+        - --max-batch-total-tokens
+        - '65536'
+        - --max-batch-prefill-tokens
+        - '4096'
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+        env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: HF_TOKEN
+          value: ${HF_TOKEN}
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-dependency-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-dependency-deploy
+  ports:
+  - name: service
+    port: 9009
+    targetPort: 80
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: llm-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: llm-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/llm-tgi:latest
+        imagePullPolicy: IfNotPresent
+        name: llm-deploy
+        args: null
+        ports:
+        - containerPort: 9000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-deploy
+  ports:
+  - name: service
+    port: 9000
+    targetPort: 9000
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: retriever-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: retriever-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: retriever-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: retriever-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: TEI_EMBEDDING_ENDPOINT
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: TEI_EMBEDDING_ENDPOINT
+        - name: HUGGINGFACEHUB_API_TOKEN
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: HUGGINGFACEHUB_API_TOKEN
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/retriever-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: retriever-deploy
+        args: null
+        ports:
+        - containerPort: 7000
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: retriever-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: retriever-deploy
+  ports:
+  - name: service
+    port: 7000
+    targetPort: 7000
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: vector-db
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: vector-db
+  template:
+    metadata:
+      labels:
+        app: vector-db
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: vector-db
+      containers:
+      - name: vector-db
+        image: redis/redis-stack:7.2.0-v9
+        ports:
+        - containerPort: 6379
+        - containerPort: 8001
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: vector-db
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: vector-db
+  ports:
+  - name: vector-db-service
+    port: 6379
+    targetPort: 6379
+  - name: vector-db-insight
+    port: 8001
+    targetPort: 8001
+
+
+---
diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/retrieval-microservice_run.yaml
deleted file mode 100644
index ac6c12fdc..000000000
--- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/retrieval-microservice_run.yaml
+++ /dev/null
@@ -1,72 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: retriever-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: retriever-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: retriever-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: retriever-deploy
-      hostIPC: true
-      containers:
-      - env:
-        - name: REDIS_URL
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: REDIS_URL
-        - name: TEI_EMBEDDING_ENDPOINT
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: TEI_EMBEDDING_ENDPOINT
-        - name: HUGGINGFACEHUB_API_TOKEN
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: HUGGINGFACEHUB_API_TOKEN
-        - name: INDEX_NAME
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: INDEX_NAME
-        image: opea/retriever-redis:latest
-        imagePullPolicy: IfNotPresent
-        name: retriever-deploy
-        args: null
-        ports:
-        - containerPort: 7000
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: retriever-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: retriever-deploy
-  ports:
-  - name: service
-    port: 7000
-    targetPort: 7000
diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/vector-db_run.yaml
deleted file mode 100644
index e04e8c5fe..000000000
--- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/vector-db_run.yaml
+++ /dev/null
@@ -1,48 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: vector-db
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: vector-db
-  template:
-    metadata:
-      labels:
-        app: vector-db
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: vector-db
-      containers:
-      - name: vector-db
-        image: redis/redis-stack:7.2.0-v9
-        ports:
-        - containerPort: 6379
-        - containerPort: 8001
----
-apiVersion: v1
-kind: Service
-metadata:
-  name: vector-db
-spec:
-  type: ClusterIP
-  selector:
-    app: vector-db
-  ports:
-  - name: vector-db-service
-    port: 6379
-    targetPort: 6379
-  - name: vector-db-insight
-    port: 8001
-    targetPort: 8001
diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/chatqna_config_map.yaml
deleted file mode 100644
index 368c800e4..000000000
--- a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/chatqna_config_map.yaml
+++ /dev/null
@@ -1,23 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: qna-config
-  namespace: default
-data:
-  EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
-  RERANK_MODEL_ID: BAAI/bge-reranker-base
-  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
-  TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
-  TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
-  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
-  REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
-  INDEX_NAME: rag-redis
-  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-  EMBEDDING_SERVICE_HOST_IP: embedding-svc
-  RETRIEVER_SERVICE_HOST_IP: retriever-svc
-  RERANK_SERVICE_HOST_IP: reranking-svc
-  NODE_SELECTOR: chatqna-opea
-  LLM_SERVICE_HOST_IP: llm-svc
diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/chatqna_mega_service_run.yaml
deleted file mode 100644
index 30cacdffe..000000000
--- a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/chatqna_mega_service_run.yaml
+++ /dev/null
@@ -1,62 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: chatqna-backend-server-deploy
-  namespace: default
-spec:
-  replicas: 4
-  selector:
-    matchLabels:
-      app: chatqna-backend-server-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: chatqna-backend-server-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: chatqna-backend-server-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/chatqna:latest
-        imagePullPolicy: IfNotPresent
-        name: chatqna-backend-server-deploy
-        args: null
-        ports:
-        - containerPort: 8888
-        resources:
-          limits:
-            cpu: 8
-            memory: 4000Mi
-          requests:
-            cpu: 8
-            memory: 4000Mi
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: chatqna-backend-server-svc
-spec:
-  type: NodePort
-  selector:
-    app: chatqna-backend-server-deploy
-  ports:
-  - name: service
-    port: 8888
-    targetPort: 8888
-    nodePort: 30888
diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/dataprep-microservice_run.yaml
deleted file mode 100644
index 4c71df7ce..000000000
--- a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/dataprep-microservice_run.yaml
+++ /dev/null
@@ -1,75 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: dataprep-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: dataprep-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: dataprep-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: dataprep-deploy
-      hostIPC: true
-      containers:
-      - env:
-        - name: REDIS_URL
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: REDIS_URL
-        - name: TEI_ENDPOINT
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: TEI_EMBEDDING_ENDPOINT
-        - name: INDEX_NAME
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: INDEX_NAME
-        image: opea/dataprep-redis:latest
-        imagePullPolicy: IfNotPresent
-        name: dataprep-deploy
-        args: null
-        ports:
-        - containerPort: 6007
-        - containerPort: 6008
-        - containerPort: 6009
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: dataprep-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: dataprep-deploy
-  ports:
-  - name: port1
-    port: 6007
-    targetPort: 6007
-  - name: port2
-    port: 6008
-    targetPort: 6008
-  - name: port3
-    port: 6009
-    targetPort: 6009
diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/embedding-dependency_run.yaml
deleted file mode 100644
index 69dbd7af9..000000000
--- a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/embedding-dependency_run.yaml
+++ /dev/null
@@ -1,69 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: embedding-dependency-deploy
-  namespace: default
-spec:
-  replicas: 4
-  selector:
-    matchLabels:
-      app: embedding-dependency-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: embedding-dependency-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
-        name: embedding-dependency-deploy
-        args:
-        - --model-id
-        - $(EMBEDDING_MODEL_ID)
-        - --auto-truncate
-        volumeMounts:
-        - mountPath: /data
-          name: model-volume
-        - mountPath: /dev/shm
-          name: shm
-        ports:
-        - containerPort: 80
-        resources:
-          limits:
-            cpu: 80
-            memory: 20000Mi
-          requests:
-            cpu: 80
-            memory: 20000Mi
-      serviceAccountName: default
-      volumes:
-      - name: model-volume
-        hostPath:
-          path: /mnt/models
-          type: Directory
-      - name: shm
-        emptyDir:
-          medium: Memory
-          sizeLimit: 1Gi
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: embedding-dependency-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: embedding-dependency-deploy
-  ports:
-  - name: service
-    port: 6006
-    targetPort: 80
diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/embedding-microservice_run.yaml
deleted file mode 100644
index 348aa7a23..000000000
--- a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/embedding-microservice_run.yaml
+++ /dev/null
@@ -1,59 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: embedding-deploy
-  namespace: default
-spec:
-  replicas: 4
-  selector:
-    matchLabels:
-      app: embedding-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: embedding-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: embedding-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/embedding-tei:latest
-        imagePullPolicy: IfNotPresent
-        name: embedding-deploy
-        args: null
-        ports:
-        - containerPort: 6000
-        resources:
-          limits:
-            cpu: 4
-          requests:
-            cpu: 4
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: embedding-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: embedding-deploy
-  ports:
-  - name: service
-    port: 6000
-    targetPort: 6000
diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/llm-dependency_run.yaml
deleted file mode 100644
index 6903ee506..000000000
--- a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/llm-dependency_run.yaml
+++ /dev/null
@@ -1,88 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: llm-dependency-deploy
-  namespace: default
-spec:
-  replicas: 31
-  selector:
-    matchLabels:
-      app: llm-dependency-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: llm-dependency-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: ghcr.io/huggingface/tgi-gaudi:2.0.4
-        name: llm-dependency-deploy-demo
-        securityContext:
-          capabilities:
-            add:
-            - SYS_NICE
-        args:
-        - --model-id
-        - $(LLM_MODEL_ID)
-        - --max-input-length
-        - '1024'
-        - --max-total-tokens
-        - '2048'
-        - --max-batch-total-tokens
-        - '65536'
-        - --max-batch-prefill-tokens
-        - '4096'
-        volumeMounts:
-        - mountPath: /data
-          name: model-volume
-        - mountPath: /dev/shm
-          name: shm
-        ports:
-        - containerPort: 80
-        resources:
-          limits:
-            habana.ai/gaudi: 1
-        env:
-        - name: OMPI_MCA_btl_vader_single_copy_mechanism
-          value: none
-        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
-          value: 'true'
-        - name: runtime
-          value: habana
-        - name: HABANA_VISIBLE_DEVICES
-          value: all
-        - name: HF_TOKEN
-          value: ${HF_TOKEN}
-      serviceAccountName: default
-      volumes:
-      - name: model-volume
-        hostPath:
-          path: /mnt/models
-          type: Directory
-      - name: shm
-        emptyDir:
-          medium: Memory
-          sizeLimit: 1Gi
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: llm-dependency-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: llm-dependency-deploy
-  ports:
-  - name: service
-    port: 9009
-    targetPort: 80
diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/llm-microservice_run.yaml
deleted file mode 100644
index 7cc6ad123..000000000
--- a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/llm-microservice_run.yaml
+++ /dev/null
@@ -1,59 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: llm-deploy
-  namespace: default
-spec:
-  replicas: 4
-  selector:
-    matchLabels:
-      app: llm-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: llm-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: llm-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/llm-tgi:latest
-        imagePullPolicy: IfNotPresent
-        name: llm-deploy
-        args: null
-        ports:
-        - containerPort: 9000
-        resources:
-          limits:
-            cpu: 4
-          requests:
-            cpu: 4
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: llm-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: llm-deploy
-  ports:
-  - name: service
-    port: 9000
-    targetPort: 9000
diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/reranking-dependency_run.yaml
deleted file mode 100644
index af908ecd1..000000000
--- a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/reranking-dependency_run.yaml
+++ /dev/null
@@ -1,85 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: reranking-dependency-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: reranking-dependency-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: reranking-dependency-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: reranking-dependency-deploy
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/tei-gaudi:latest
-        name: reranking-dependency-deploy
-        args:
-        - --model-id
-        - $(RERANK_MODEL_ID)
-        - --auto-truncate
-        volumeMounts:
-        - mountPath: /data
-          name: model-volume
-        - mountPath: /dev/shm
-          name: shm
-        ports:
-        - containerPort: 80
-        resources:
-          limits:
-            habana.ai/gaudi: 1
-        env:
-        - name: OMPI_MCA_btl_vader_single_copy_mechanism
-          value: none
-        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
-          value: 'true'
-        - name: runtime
-          value: habana
-        - name: HABANA_VISIBLE_DEVICES
-          value: all
-        - name: HF_TOKEN
-          value: ${HF_TOKEN}
-        - name: MAX_WARMUP_SEQUENCE_LENGTH
-          value: '512'
-      serviceAccountName: default
-      volumes:
-      - name: model-volume
-        hostPath:
-          path: /mnt/models
-          type: Directory
-      - name: shm
-        emptyDir:
-          medium: Memory
-          sizeLimit: 1Gi
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: reranking-dependency-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: reranking-dependency-deploy
-  ports:
-  - name: service
-    port: 8808
-    targetPort: 80
diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/reranking-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/reranking-microservice_run.yaml
deleted file mode 100644
index 193350b0d..000000000
--- a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/reranking-microservice_run.yaml
+++ /dev/null
@@ -1,59 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: reranking-deploy
-  namespace: default
-spec:
-  replicas: 4
-  selector:
-    matchLabels:
-      app: reranking-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: reranking-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: reranking-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/reranking-tei:latest
-        imagePullPolicy: IfNotPresent
-        name: reranking-deploy
-        args: null
-        ports:
-        - containerPort: 8000
-        resources:
-          limits:
-            cpu: 4
-          requests:
-            cpu: 4
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: reranking-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: reranking-deploy
-  ports:
-  - name: service
-    port: 8000
-    targetPort: 8000
diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/retrieval-microservice_run.yaml
deleted file mode 100644
index 25314a782..000000000
--- a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/retrieval-microservice_run.yaml
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: retriever-deploy
-  namespace: default
-spec:
-  replicas: 4
-  selector:
-    matchLabels:
-      app: retriever-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: retriever-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: retriever-deploy
-      hostIPC: true
-      containers:
-      - env:
-        - name: REDIS_URL
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: REDIS_URL
-        - name: TEI_EMBEDDING_ENDPOINT
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: TEI_EMBEDDING_ENDPOINT
-        - name: HUGGINGFACEHUB_API_TOKEN
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: HUGGINGFACEHUB_API_TOKEN
-        - name: INDEX_NAME
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: INDEX_NAME
-        image: opea/retriever-redis:latest
-        imagePullPolicy: IfNotPresent
-        name: retriever-deploy
-        args: null
-        ports:
-        - containerPort: 7000
-        resources:
-          limits:
-            cpu: 8
-            memory: 2500Mi
-          requests:
-            cpu: 8
-            memory: 2500Mi
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: retriever-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: retriever-deploy
-  ports:
-  - name: service
-    port: 7000
-    targetPort: 7000
diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml
new file mode 100644
index 000000000..373b46c8a
--- /dev/null
+++ b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml
@@ -0,0 +1,770 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: qna-config
+  namespace: default
+data:
+  EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
+  RERANK_MODEL_ID: BAAI/bge-reranker-base
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+  TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
+  TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
+  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
+  REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
+  INDEX_NAME: rag-redis
+  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+  EMBEDDING_SERVICE_HOST_IP: embedding-svc
+  RETRIEVER_SERVICE_HOST_IP: retriever-svc
+  RERANK_SERVICE_HOST_IP: reranking-svc
+  NODE_SELECTOR: chatqna-opea
+  LLM_SERVICE_HOST_IP: llm-svc
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: chatqna-backend-server-deploy
+  namespace: default
+spec:
+  replicas: 4
+  selector:
+    matchLabels:
+      app: chatqna-backend-server-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: chatqna-backend-server-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: chatqna-backend-server-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/chatqna:latest
+        imagePullPolicy: IfNotPresent
+        name: chatqna-backend-server-deploy
+        args: null
+        ports:
+        - containerPort: 8888
+        resources:
+          limits:
+            cpu: 8
+            memory: 4000Mi
+          requests:
+            cpu: 8
+            memory: 4000Mi
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: chatqna-backend-server-svc
+  namespace: default
+spec:
+  type: NodePort
+  selector:
+    app: chatqna-backend-server-deploy
+  ports:
+  - name: service
+    port: 8888
+    targetPort: 8888
+    nodePort: 30888
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: dataprep-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: dataprep-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: dataprep-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: dataprep-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: TEI_ENDPOINT
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: TEI_EMBEDDING_ENDPOINT
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/dataprep-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: dataprep-deploy
+        args: null
+        ports:
+        - containerPort: 6007
+        - containerPort: 6008
+        - containerPort: 6009
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: dataprep-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: dataprep-deploy
+  ports:
+  - name: port1
+    port: 6007
+    targetPort: 6007
+  - name: port2
+    port: 6008
+    targetPort: 6008
+  - name: port3
+    port: 6009
+    targetPort: 6009
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-dependency-deploy
+  namespace: default
+spec:
+  replicas: 4
+  selector:
+    matchLabels:
+      app: embedding-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+        name: embedding-dependency-deploy
+        args:
+        - --model-id
+        - $(EMBEDDING_MODEL_ID)
+        - --auto-truncate
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            cpu: 80
+            memory: 20000Mi
+          requests:
+            cpu: 80
+            memory: 20000Mi
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-dependency-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-dependency-deploy
+  ports:
+  - name: service
+    port: 6006
+    targetPort: 80
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-deploy
+  namespace: default
+spec:
+  replicas: 4
+  selector:
+    matchLabels:
+      app: embedding-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: embedding-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/embedding-tei:latest
+        imagePullPolicy: IfNotPresent
+        name: embedding-deploy
+        args: null
+        ports:
+        - containerPort: 6000
+        resources:
+          limits:
+            cpu: 4
+          requests:
+            cpu: 4
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-deploy
+  ports:
+  - name: service
+    port: 6000
+    targetPort: 6000
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-dependency-deploy
+  namespace: default
+spec:
+  replicas: 31
+  selector:
+    matchLabels:
+      app: llm-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.4
+        name: llm-dependency-deploy-demo
+        securityContext:
+          capabilities:
+            add:
+            - SYS_NICE
+        args:
+        - --model-id
+        - $(LLM_MODEL_ID)
+        - --max-input-length
+        - '1024'
+        - --max-total-tokens
+        - '2048'
+        - --max-batch-total-tokens
+        - '65536'
+        - --max-batch-prefill-tokens
+        - '4096'
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+        env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: HF_TOKEN
+          value: ${HF_TOKEN}
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-dependency-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-dependency-deploy
+  ports:
+  - name: service
+    port: 9009
+    targetPort: 80
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-deploy
+  namespace: default
+spec:
+  replicas: 4
+  selector:
+    matchLabels:
+      app: llm-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: llm-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/llm-tgi:latest
+        imagePullPolicy: IfNotPresent
+        name: llm-deploy
+        args: null
+        ports:
+        - containerPort: 9000
+        resources:
+          limits:
+            cpu: 4
+          requests:
+            cpu: 4
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-deploy
+  ports:
+  - name: service
+    port: 9000
+    targetPort: 9000
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: reranking-dependency-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: reranking-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: reranking-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: reranking-dependency-deploy
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/tei-gaudi:latest
+        name: reranking-dependency-deploy
+        args:
+        - --model-id
+        - $(RERANK_MODEL_ID)
+        - --auto-truncate
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+        env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: HF_TOKEN
+          value: ${HF_TOKEN}
+        - name: MAX_WARMUP_SEQUENCE_LENGTH
+          value: '512'
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: reranking-dependency-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: reranking-dependency-deploy
+  ports:
+  - name: service
+    port: 8808
+    targetPort: 80
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: reranking-deploy
+  namespace: default
+spec:
+  replicas: 4
+  selector:
+    matchLabels:
+      app: reranking-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: reranking-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: reranking-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/reranking-tei:latest
+        imagePullPolicy: IfNotPresent
+        name: reranking-deploy
+        args: null
+        ports:
+        - containerPort: 8000
+        resources:
+          limits:
+            cpu: 4
+          requests:
+            cpu: 4
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: reranking-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: reranking-deploy
+  ports:
+  - name: service
+    port: 8000
+    targetPort: 8000
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: retriever-deploy
+  namespace: default
+spec:
+  replicas: 4
+  selector:
+    matchLabels:
+      app: retriever-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: retriever-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: retriever-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: TEI_EMBEDDING_ENDPOINT
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: TEI_EMBEDDING_ENDPOINT
+        - name: HUGGINGFACEHUB_API_TOKEN
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: HUGGINGFACEHUB_API_TOKEN
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/retriever-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: retriever-deploy
+        args: null
+        ports:
+        - containerPort: 7000
+        resources:
+          limits:
+            cpu: 8
+            memory: 2500Mi
+          requests:
+            cpu: 8
+            memory: 2500Mi
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: retriever-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: retriever-deploy
+  ports:
+  - name: service
+    port: 7000
+    targetPort: 7000
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: vector-db
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: vector-db
+  template:
+    metadata:
+      labels:
+        app: vector-db
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: vector-db
+      containers:
+      - name: vector-db
+        image: redis/redis-stack:7.2.0-v9
+        ports:
+        - containerPort: 6379
+        - containerPort: 8001
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: vector-db
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: vector-db
+  ports:
+  - name: vector-db-service
+    port: 6379
+    targetPort: 6379
+  - name: vector-db-insight
+    port: 8001
+    targetPort: 8001
+
+
+---
diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/vector-db_run.yaml
deleted file mode 100644
index e04e8c5fe..000000000
--- a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/vector-db_run.yaml
+++ /dev/null
@@ -1,48 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: vector-db
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: vector-db
-  template:
-    metadata:
-      labels:
-        app: vector-db
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: vector-db
-      containers:
-      - name: vector-db
-        image: redis/redis-stack:7.2.0-v9
-        ports:
-        - containerPort: 6379
-        - containerPort: 8001
----
-apiVersion: v1
-kind: Service
-metadata:
-  name: vector-db
-spec:
-  type: ClusterIP
-  selector:
-    app: vector-db
-  ports:
-  - name: vector-db-service
-    port: 6379
-    targetPort: 6379
-  - name: vector-db-insight
-    port: 8001
-    targetPort: 8001
diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/chatqna_config_map.yaml
deleted file mode 100644
index 368c800e4..000000000
--- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/chatqna_config_map.yaml
+++ /dev/null
@@ -1,23 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: qna-config
-  namespace: default
-data:
-  EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
-  RERANK_MODEL_ID: BAAI/bge-reranker-base
-  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
-  TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
-  TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
-  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
-  REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
-  INDEX_NAME: rag-redis
-  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-  EMBEDDING_SERVICE_HOST_IP: embedding-svc
-  RETRIEVER_SERVICE_HOST_IP: retriever-svc
-  RERANK_SERVICE_HOST_IP: reranking-svc
-  NODE_SELECTOR: chatqna-opea
-  LLM_SERVICE_HOST_IP: llm-svc
diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/chatqna_mega_service_run.yaml
deleted file mode 100644
index 4a3d3f563..000000000
--- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/chatqna_mega_service_run.yaml
+++ /dev/null
@@ -1,62 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: chatqna-backend-server-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: chatqna-backend-server-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: chatqna-backend-server-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: chatqna-backend-server-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/chatqna:latest
-        imagePullPolicy: IfNotPresent
-        name: chatqna-backend-server-deploy
-        args: null
-        ports:
-        - containerPort: 8888
-        resources:
-          limits:
-            cpu: 8
-            memory: 4000Mi
-          requests:
-            cpu: 8
-            memory: 4000Mi
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: chatqna-backend-server-svc
-spec:
-  type: NodePort
-  selector:
-    app: chatqna-backend-server-deploy
-  ports:
-  - name: service
-    port: 8888
-    targetPort: 8888
-    nodePort: 30888
diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/dataprep-microservice_run.yaml
deleted file mode 100644
index 4c71df7ce..000000000
--- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/dataprep-microservice_run.yaml
+++ /dev/null
@@ -1,75 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: dataprep-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: dataprep-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: dataprep-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: dataprep-deploy
-      hostIPC: true
-      containers:
-      - env:
-        - name: REDIS_URL
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: REDIS_URL
-        - name: TEI_ENDPOINT
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: TEI_EMBEDDING_ENDPOINT
-        - name: INDEX_NAME
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: INDEX_NAME
-        image: opea/dataprep-redis:latest
-        imagePullPolicy: IfNotPresent
-        name: dataprep-deploy
-        args: null
-        ports:
-        - containerPort: 6007
-        - containerPort: 6008
-        - containerPort: 6009
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: dataprep-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: dataprep-deploy
-  ports:
-  - name: port1
-    port: 6007
-    targetPort: 6007
-  - name: port2
-    port: 6008
-    targetPort: 6008
-  - name: port3
-    port: 6009
-    targetPort: 6009
diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/embedding-dependency_run.yaml
deleted file mode 100644
index f27ffcad0..000000000
--- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/embedding-dependency_run.yaml
+++ /dev/null
@@ -1,69 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: embedding-dependency-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: embedding-dependency-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: embedding-dependency-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
-        name: embedding-dependency-deploy
-        args:
-        - --model-id
-        - $(EMBEDDING_MODEL_ID)
-        - --auto-truncate
-        volumeMounts:
-        - mountPath: /data
-          name: model-volume
-        - mountPath: /dev/shm
-          name: shm
-        ports:
-        - containerPort: 80
-        resources:
-          limits:
-            cpu: 80
-            memory: 20000Mi
-          requests:
-            cpu: 80
-            memory: 20000Mi
-      serviceAccountName: default
-      volumes:
-      - name: model-volume
-        hostPath:
-          path: /mnt/models
-          type: Directory
-      - name: shm
-        emptyDir:
-          medium: Memory
-          sizeLimit: 1Gi
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: embedding-dependency-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: embedding-dependency-deploy
-  ports:
-  - name: service
-    port: 6006
-    targetPort: 80
diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/embedding-microservice_run.yaml
deleted file mode 100644
index f23ba0b4f..000000000
--- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/embedding-microservice_run.yaml
+++ /dev/null
@@ -1,59 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: embedding-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: embedding-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: embedding-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: embedding-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/embedding-tei:latest
-        imagePullPolicy: IfNotPresent
-        name: embedding-deploy
-        args: null
-        ports:
-        - containerPort: 6000
-        resources:
-          limits:
-            cpu: 4
-          requests:
-            cpu: 4
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: embedding-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: embedding-deploy
-  ports:
-  - name: service
-    port: 6000
-    targetPort: 6000
diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/llm-dependency_run.yaml
deleted file mode 100644
index 6de12aa6d..000000000
--- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/llm-dependency_run.yaml
+++ /dev/null
@@ -1,88 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: llm-dependency-deploy
-  namespace: default
-spec:
-  replicas: 7
-  selector:
-    matchLabels:
-      app: llm-dependency-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: llm-dependency-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: ghcr.io/huggingface/tgi-gaudi:2.0.4
-        name: llm-dependency-deploy-demo
-        securityContext:
-          capabilities:
-            add:
-            - SYS_NICE
-        args:
-        - --model-id
-        - $(LLM_MODEL_ID)
-        - --max-input-length
-        - '1024'
-        - --max-total-tokens
-        - '2048'
-        - --max-batch-total-tokens
-        - '65536'
-        - --max-batch-prefill-tokens
-        - '4096'
-        volumeMounts:
-        - mountPath: /data
-          name: model-volume
-        - mountPath: /dev/shm
-          name: shm
-        ports:
-        - containerPort: 80
-        resources:
-          limits:
-            habana.ai/gaudi: 1
-        env:
-        - name: OMPI_MCA_btl_vader_single_copy_mechanism
-          value: none
-        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
-          value: 'true'
-        - name: runtime
-          value: habana
-        - name: HABANA_VISIBLE_DEVICES
-          value: all
-        - name: HF_TOKEN
-          value: ${HF_TOKEN}
-      serviceAccountName: default
-      volumes:
-      - name: model-volume
-        hostPath:
-          path: /mnt/models
-          type: Directory
-      - name: shm
-        emptyDir:
-          medium: Memory
-          sizeLimit: 1Gi
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: llm-dependency-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: llm-dependency-deploy
-  ports:
-  - name: service
-    port: 9009
-    targetPort: 80
diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/llm-microservice_run.yaml
deleted file mode 100644
index 1d9e29112..000000000
--- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/llm-microservice_run.yaml
+++ /dev/null
@@ -1,59 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: llm-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: llm-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: llm-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: llm-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/llm-tgi:latest
-        imagePullPolicy: IfNotPresent
-        name: llm-deploy
-        args: null
-        ports:
-        - containerPort: 9000
-        resources:
-          limits:
-            cpu: 4
-          requests:
-            cpu: 4
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: llm-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: llm-deploy
-  ports:
-  - name: service
-    port: 9000
-    targetPort: 9000
diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/reranking-dependency_run.yaml
deleted file mode 100644
index af908ecd1..000000000
--- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/reranking-dependency_run.yaml
+++ /dev/null
@@ -1,85 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: reranking-dependency-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: reranking-dependency-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: reranking-dependency-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: reranking-dependency-deploy
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/tei-gaudi:latest
-        name: reranking-dependency-deploy
-        args:
-        - --model-id
-        - $(RERANK_MODEL_ID)
-        - --auto-truncate
-        volumeMounts:
-        - mountPath: /data
-          name: model-volume
-        - mountPath: /dev/shm
-          name: shm
-        ports:
-        - containerPort: 80
-        resources:
-          limits:
-            habana.ai/gaudi: 1
-        env:
-        - name: OMPI_MCA_btl_vader_single_copy_mechanism
-          value: none
-        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
-          value: 'true'
-        - name: runtime
-          value: habana
-        - name: HABANA_VISIBLE_DEVICES
-          value: all
-        - name: HF_TOKEN
-          value: ${HF_TOKEN}
-        - name: MAX_WARMUP_SEQUENCE_LENGTH
-          value: '512'
-      serviceAccountName: default
-      volumes:
-      - name: model-volume
-        hostPath:
-          path: /mnt/models
-          type: Directory
-      - name: shm
-        emptyDir:
-          medium: Memory
-          sizeLimit: 1Gi
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: reranking-dependency-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: reranking-dependency-deploy
-  ports:
-  - name: service
-    port: 8808
-    targetPort: 80
diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/reranking-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/reranking-microservice_run.yaml
deleted file mode 100644
index 25f6a00b3..000000000
--- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/reranking-microservice_run.yaml
+++ /dev/null
@@ -1,59 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: reranking-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: reranking-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: reranking-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: reranking-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/reranking-tei:latest
-        imagePullPolicy: IfNotPresent
-        name: reranking-deploy
-        args: null
-        ports:
-        - containerPort: 8000
-        resources:
-          limits:
-            cpu: 4
-          requests:
-            cpu: 4
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: reranking-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: reranking-deploy
-  ports:
-  - name: service
-    port: 8000
-    targetPort: 8000
diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/retrieval-microservice_run.yaml
deleted file mode 100644
index 298abd73a..000000000
--- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/retrieval-microservice_run.yaml
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: retriever-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: retriever-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: retriever-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: retriever-deploy
-      hostIPC: true
-      containers:
-      - env:
-        - name: REDIS_URL
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: REDIS_URL
-        - name: TEI_EMBEDDING_ENDPOINT
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: TEI_EMBEDDING_ENDPOINT
-        - name: HUGGINGFACEHUB_API_TOKEN
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: HUGGINGFACEHUB_API_TOKEN
-        - name: INDEX_NAME
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: INDEX_NAME
-        image: opea/retriever-redis:latest
-        imagePullPolicy: IfNotPresent
-        name: retriever-deploy
-        args: null
-        ports:
-        - containerPort: 7000
-        resources:
-          limits:
-            cpu: 8
-            memory: 2500Mi
-          requests:
-            cpu: 8
-            memory: 2500Mi
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: retriever-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: retriever-deploy
-  ports:
-  - name: service
-    port: 7000
-    targetPort: 7000
diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml
new file mode 100644
index 000000000..9d2f0ee96
--- /dev/null
+++ b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml
@@ -0,0 +1,770 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: qna-config
+  namespace: default
+data:
+  EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
+  RERANK_MODEL_ID: BAAI/bge-reranker-base
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+  TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
+  TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
+  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
+  REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
+  INDEX_NAME: rag-redis
+  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+  EMBEDDING_SERVICE_HOST_IP: embedding-svc
+  RETRIEVER_SERVICE_HOST_IP: retriever-svc
+  RERANK_SERVICE_HOST_IP: reranking-svc
+  NODE_SELECTOR: chatqna-opea
+  LLM_SERVICE_HOST_IP: llm-svc
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: chatqna-backend-server-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: chatqna-backend-server-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: chatqna-backend-server-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: chatqna-backend-server-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/chatqna:latest
+        imagePullPolicy: IfNotPresent
+        name: chatqna-backend-server-deploy
+        args: null
+        ports:
+        - containerPort: 8888
+        resources:
+          limits:
+            cpu: 8
+            memory: 4000Mi
+          requests:
+            cpu: 8
+            memory: 4000Mi
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: chatqna-backend-server-svc
+  namespace: default
+spec:
+  type: NodePort
+  selector:
+    app: chatqna-backend-server-deploy
+  ports:
+  - name: service
+    port: 8888
+    targetPort: 8888
+    nodePort: 30888
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: dataprep-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: dataprep-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: dataprep-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: dataprep-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: TEI_ENDPOINT
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: TEI_EMBEDDING_ENDPOINT
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/dataprep-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: dataprep-deploy
+        args: null
+        ports:
+        - containerPort: 6007
+        - containerPort: 6008
+        - containerPort: 6009
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: dataprep-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: dataprep-deploy
+  ports:
+  - name: port1
+    port: 6007
+    targetPort: 6007
+  - name: port2
+    port: 6008
+    targetPort: 6008
+  - name: port3
+    port: 6009
+    targetPort: 6009
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-dependency-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: embedding-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+        name: embedding-dependency-deploy
+        args:
+        - --model-id
+        - $(EMBEDDING_MODEL_ID)
+        - --auto-truncate
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            cpu: 80
+            memory: 20000Mi
+          requests:
+            cpu: 80
+            memory: 20000Mi
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-dependency-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-dependency-deploy
+  ports:
+  - name: service
+    port: 6006
+    targetPort: 80
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: embedding-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: embedding-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/embedding-tei:latest
+        imagePullPolicy: IfNotPresent
+        name: embedding-deploy
+        args: null
+        ports:
+        - containerPort: 6000
+        resources:
+          limits:
+            cpu: 4
+          requests:
+            cpu: 4
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-deploy
+  ports:
+  - name: service
+    port: 6000
+    targetPort: 6000
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-dependency-deploy
+  namespace: default
+spec:
+  replicas: 7
+  selector:
+    matchLabels:
+      app: llm-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.4
+        name: llm-dependency-deploy-demo
+        securityContext:
+          capabilities:
+            add:
+            - SYS_NICE
+        args:
+        - --model-id
+        - $(LLM_MODEL_ID)
+        - --max-input-length
+        - '1024'
+        - --max-total-tokens
+        - '2048'
+        - --max-batch-total-tokens
+        - '65536'
+        - --max-batch-prefill-tokens
+        - '4096'
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+        env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: HF_TOKEN
+          value: ${HF_TOKEN}
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-dependency-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-dependency-deploy
+  ports:
+  - name: service
+    port: 9009
+    targetPort: 80
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: llm-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: llm-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/llm-tgi:latest
+        imagePullPolicy: IfNotPresent
+        name: llm-deploy
+        args: null
+        ports:
+        - containerPort: 9000
+        resources:
+          limits:
+            cpu: 4
+          requests:
+            cpu: 4
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-deploy
+  ports:
+  - name: service
+    port: 9000
+    targetPort: 9000
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: reranking-dependency-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: reranking-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: reranking-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: reranking-dependency-deploy
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/tei-gaudi:latest
+        name: reranking-dependency-deploy
+        args:
+        - --model-id
+        - $(RERANK_MODEL_ID)
+        - --auto-truncate
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+        env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: HF_TOKEN
+          value: ${HF_TOKEN}
+        - name: MAX_WARMUP_SEQUENCE_LENGTH
+          value: '512'
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: reranking-dependency-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: reranking-dependency-deploy
+  ports:
+  - name: service
+    port: 8808
+    targetPort: 80
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: reranking-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: reranking-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: reranking-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: reranking-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/reranking-tei:latest
+        imagePullPolicy: IfNotPresent
+        name: reranking-deploy
+        args: null
+        ports:
+        - containerPort: 8000
+        resources:
+          limits:
+            cpu: 4
+          requests:
+            cpu: 4
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: reranking-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: reranking-deploy
+  ports:
+  - name: service
+    port: 8000
+    targetPort: 8000
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: retriever-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: retriever-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: retriever-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: retriever-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: TEI_EMBEDDING_ENDPOINT
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: TEI_EMBEDDING_ENDPOINT
+        - name: HUGGINGFACEHUB_API_TOKEN
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: HUGGINGFACEHUB_API_TOKEN
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/retriever-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: retriever-deploy
+        args: null
+        ports:
+        - containerPort: 7000
+        resources:
+          limits:
+            cpu: 8
+            memory: 2500Mi
+          requests:
+            cpu: 8
+            memory: 2500Mi
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: retriever-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: retriever-deploy
+  ports:
+  - name: service
+    port: 7000
+    targetPort: 7000
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: vector-db
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: vector-db
+  template:
+    metadata:
+      labels:
+        app: vector-db
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: vector-db
+      containers:
+      - name: vector-db
+        image: redis/redis-stack:7.2.0-v9
+        ports:
+        - containerPort: 6379
+        - containerPort: 8001
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: vector-db
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: vector-db
+  ports:
+  - name: vector-db-service
+    port: 6379
+    targetPort: 6379
+  - name: vector-db-insight
+    port: 8001
+    targetPort: 8001
+
+
+---
diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/vector-db_run.yaml
deleted file mode 100644
index e04e8c5fe..000000000
--- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/vector-db_run.yaml
+++ /dev/null
@@ -1,48 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: vector-db
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: vector-db
-  template:
-    metadata:
-      labels:
-        app: vector-db
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: vector-db
-      containers:
-      - name: vector-db
-        image: redis/redis-stack:7.2.0-v9
-        ports:
-        - containerPort: 6379
-        - containerPort: 8001
----
-apiVersion: v1
-kind: Service
-metadata:
-  name: vector-db
-spec:
-  type: ClusterIP
-  selector:
-    app: vector-db
-  ports:
-  - name: vector-db-service
-    port: 6379
-    targetPort: 6379
-  - name: vector-db-insight
-    port: 8001
-    targetPort: 8001
diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/chatqna_config_map.yaml
deleted file mode 100644
index 368c800e4..000000000
--- a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/chatqna_config_map.yaml
+++ /dev/null
@@ -1,23 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: qna-config
-  namespace: default
-data:
-  EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
-  RERANK_MODEL_ID: BAAI/bge-reranker-base
-  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
-  TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
-  TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
-  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
-  REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
-  INDEX_NAME: rag-redis
-  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-  EMBEDDING_SERVICE_HOST_IP: embedding-svc
-  RETRIEVER_SERVICE_HOST_IP: retriever-svc
-  RERANK_SERVICE_HOST_IP: reranking-svc
-  NODE_SELECTOR: chatqna-opea
-  LLM_SERVICE_HOST_IP: llm-svc
diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/chatqna_mega_service_run.yaml
deleted file mode 100644
index da8d9e8f1..000000000
--- a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/chatqna_mega_service_run.yaml
+++ /dev/null
@@ -1,62 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: chatqna-backend-server-deploy
-  namespace: default
-spec:
-  replicas: 2
-  selector:
-    matchLabels:
-      app: chatqna-backend-server-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: chatqna-backend-server-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: chatqna-backend-server-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/chatqna:latest
-        imagePullPolicy: IfNotPresent
-        name: chatqna-backend-server-deploy
-        args: null
-        ports:
-        - containerPort: 8888
-        resources:
-          limits:
-            cpu: 8
-            memory: 4000Mi
-          requests:
-            cpu: 8
-            memory: 4000Mi
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: chatqna-backend-server-svc
-spec:
-  type: NodePort
-  selector:
-    app: chatqna-backend-server-deploy
-  ports:
-  - name: service
-    port: 8888
-    targetPort: 8888
-    nodePort: 30888
diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/dataprep-microservice_run.yaml
deleted file mode 100644
index 4c71df7ce..000000000
--- a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/dataprep-microservice_run.yaml
+++ /dev/null
@@ -1,75 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: dataprep-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: dataprep-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: dataprep-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: dataprep-deploy
-      hostIPC: true
-      containers:
-      - env:
-        - name: REDIS_URL
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: REDIS_URL
-        - name: TEI_ENDPOINT
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: TEI_EMBEDDING_ENDPOINT
-        - name: INDEX_NAME
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: INDEX_NAME
-        image: opea/dataprep-redis:latest
-        imagePullPolicy: IfNotPresent
-        name: dataprep-deploy
-        args: null
-        ports:
-        - containerPort: 6007
-        - containerPort: 6008
-        - containerPort: 6009
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: dataprep-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: dataprep-deploy
-  ports:
-  - name: port1
-    port: 6007
-    targetPort: 6007
-  - name: port2
-    port: 6008
-    targetPort: 6008
-  - name: port3
-    port: 6009
-    targetPort: 6009
diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/embedding-dependency_run.yaml
deleted file mode 100644
index 485d73402..000000000
--- a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/embedding-dependency_run.yaml
+++ /dev/null
@@ -1,69 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: embedding-dependency-deploy
-  namespace: default
-spec:
-  replicas: 2
-  selector:
-    matchLabels:
-      app: embedding-dependency-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: embedding-dependency-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
-        name: embedding-dependency-deploy
-        args:
-        - --model-id
-        - $(EMBEDDING_MODEL_ID)
-        - --auto-truncate
-        volumeMounts:
-        - mountPath: /data
-          name: model-volume
-        - mountPath: /dev/shm
-          name: shm
-        ports:
-        - containerPort: 80
-        resources:
-          limits:
-            cpu: 80
-            memory: 20000Mi
-          requests:
-            cpu: 80
-            memory: 20000Mi
-      serviceAccountName: default
-      volumes:
-      - name: model-volume
-        hostPath:
-          path: /mnt/models
-          type: Directory
-      - name: shm
-        emptyDir:
-          medium: Memory
-          sizeLimit: 1Gi
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: embedding-dependency-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: embedding-dependency-deploy
-  ports:
-  - name: service
-    port: 6006
-    targetPort: 80
diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/embedding-microservice_run.yaml
deleted file mode 100644
index 3822537c4..000000000
--- a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/embedding-microservice_run.yaml
+++ /dev/null
@@ -1,59 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: embedding-deploy
-  namespace: default
-spec:
-  replicas: 2
-  selector:
-    matchLabels:
-      app: embedding-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: embedding-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: embedding-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/embedding-tei:latest
-        imagePullPolicy: IfNotPresent
-        name: embedding-deploy
-        args: null
-        ports:
-        - containerPort: 6000
-        resources:
-          limits:
-            cpu: 4
-          requests:
-            cpu: 4
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: embedding-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: embedding-deploy
-  ports:
-  - name: service
-    port: 6000
-    targetPort: 6000
diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/llm-dependency_run.yaml
deleted file mode 100644
index 550e338e1..000000000
--- a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/llm-dependency_run.yaml
+++ /dev/null
@@ -1,88 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: llm-dependency-deploy
-  namespace: default
-spec:
-  replicas: 15
-  selector:
-    matchLabels:
-      app: llm-dependency-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: llm-dependency-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: ghcr.io/huggingface/tgi-gaudi:2.0.4
-        name: llm-dependency-deploy-demo
-        securityContext:
-          capabilities:
-            add:
-            - SYS_NICE
-        args:
-        - --model-id
-        - $(LLM_MODEL_ID)
-        - --max-input-length
-        - '1024'
-        - --max-total-tokens
-        - '2048'
-        - --max-batch-total-tokens
-        - '65536'
-        - --max-batch-prefill-tokens
-        - '4096'
-        volumeMounts:
-        - mountPath: /data
-          name: model-volume
-        - mountPath: /dev/shm
-          name: shm
-        ports:
-        - containerPort: 80
-        resources:
-          limits:
-            habana.ai/gaudi: 1
-        env:
-        - name: OMPI_MCA_btl_vader_single_copy_mechanism
-          value: none
-        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
-          value: 'true'
-        - name: runtime
-          value: habana
-        - name: HABANA_VISIBLE_DEVICES
-          value: all
-        - name: HF_TOKEN
-          value: ${HF_TOKEN}
-      serviceAccountName: default
-      volumes:
-      - name: model-volume
-        hostPath:
-          path: /mnt/models
-          type: Directory
-      - name: shm
-        emptyDir:
-          medium: Memory
-          sizeLimit: 1Gi
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: llm-dependency-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: llm-dependency-deploy
-  ports:
-  - name: service
-    port: 9009
-    targetPort: 80
diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/llm-microservice_run.yaml
deleted file mode 100644
index 49a67fd2e..000000000
--- a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/llm-microservice_run.yaml
+++ /dev/null
@@ -1,59 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: llm-deploy
-  namespace: default
-spec:
-  replicas: 2
-  selector:
-    matchLabels:
-      app: llm-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: llm-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: llm-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/llm-tgi:latest
-        imagePullPolicy: IfNotPresent
-        name: llm-deploy
-        args: null
-        ports:
-        - containerPort: 9000
-        resources:
-          limits:
-            cpu: 4
-          requests:
-            cpu: 4
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: llm-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: llm-deploy
-  ports:
-  - name: service
-    port: 9000
-    targetPort: 9000
diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/reranking-dependency_run.yaml
deleted file mode 100644
index af908ecd1..000000000
--- a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/reranking-dependency_run.yaml
+++ /dev/null
@@ -1,85 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: reranking-dependency-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: reranking-dependency-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: reranking-dependency-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: reranking-dependency-deploy
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/tei-gaudi:latest
-        name: reranking-dependency-deploy
-        args:
-        - --model-id
-        - $(RERANK_MODEL_ID)
-        - --auto-truncate
-        volumeMounts:
-        - mountPath: /data
-          name: model-volume
-        - mountPath: /dev/shm
-          name: shm
-        ports:
-        - containerPort: 80
-        resources:
-          limits:
-            habana.ai/gaudi: 1
-        env:
-        - name: OMPI_MCA_btl_vader_single_copy_mechanism
-          value: none
-        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
-          value: 'true'
-        - name: runtime
-          value: habana
-        - name: HABANA_VISIBLE_DEVICES
-          value: all
-        - name: HF_TOKEN
-          value: ${HF_TOKEN}
-        - name: MAX_WARMUP_SEQUENCE_LENGTH
-          value: '512'
-      serviceAccountName: default
-      volumes:
-      - name: model-volume
-        hostPath:
-          path: /mnt/models
-          type: Directory
-      - name: shm
-        emptyDir:
-          medium: Memory
-          sizeLimit: 1Gi
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: reranking-dependency-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: reranking-dependency-deploy
-  ports:
-  - name: service
-    port: 8808
-    targetPort: 80
diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/reranking-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/reranking-microservice_run.yaml
deleted file mode 100644
index bec1c8b2c..000000000
--- a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/reranking-microservice_run.yaml
+++ /dev/null
@@ -1,59 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: reranking-deploy
-  namespace: default
-spec:
-  replicas: 2
-  selector:
-    matchLabels:
-      app: reranking-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: reranking-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: reranking-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/reranking-tei:latest
-        imagePullPolicy: IfNotPresent
-        name: reranking-deploy
-        args: null
-        ports:
-        - containerPort: 8000
-        resources:
-          limits:
-            cpu: 4
-          requests:
-            cpu: 4
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: reranking-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: reranking-deploy
-  ports:
-  - name: service
-    port: 8000
-    targetPort: 8000
diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/retrieval-microservice_run.yaml
deleted file mode 100644
index b6799fc60..000000000
--- a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/retrieval-microservice_run.yaml
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: retriever-deploy
-  namespace: default
-spec:
-  replicas: 2
-  selector:
-    matchLabels:
-      app: retriever-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: retriever-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: retriever-deploy
-      hostIPC: true
-      containers:
-      - env:
-        - name: REDIS_URL
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: REDIS_URL
-        - name: TEI_EMBEDDING_ENDPOINT
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: TEI_EMBEDDING_ENDPOINT
-        - name: HUGGINGFACEHUB_API_TOKEN
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: HUGGINGFACEHUB_API_TOKEN
-        - name: INDEX_NAME
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: INDEX_NAME
-        image: opea/retriever-redis:latest
-        imagePullPolicy: IfNotPresent
-        name: retriever-deploy
-        args: null
-        ports:
-        - containerPort: 7000
-        resources:
-          limits:
-            cpu: 8
-            memory: 2500Mi
-          requests:
-            cpu: 8
-            memory: 2500Mi
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: retriever-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: retriever-deploy
-  ports:
-  - name: service
-    port: 7000
-    targetPort: 7000
diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml
new file mode 100644
index 000000000..4ed98c347
--- /dev/null
+++ b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml
@@ -0,0 +1,770 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: qna-config
+  namespace: default
+data:
+  EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
+  RERANK_MODEL_ID: BAAI/bge-reranker-base
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+  TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
+  TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
+  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
+  REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
+  INDEX_NAME: rag-redis
+  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+  EMBEDDING_SERVICE_HOST_IP: embedding-svc
+  RETRIEVER_SERVICE_HOST_IP: retriever-svc
+  RERANK_SERVICE_HOST_IP: reranking-svc
+  NODE_SELECTOR: chatqna-opea
+  LLM_SERVICE_HOST_IP: llm-svc
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: chatqna-backend-server-deploy
+  namespace: default
+spec:
+  replicas: 2
+  selector:
+    matchLabels:
+      app: chatqna-backend-server-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: chatqna-backend-server-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: chatqna-backend-server-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/chatqna:latest
+        imagePullPolicy: IfNotPresent
+        name: chatqna-backend-server-deploy
+        args: null
+        ports:
+        - containerPort: 8888
+        resources:
+          limits:
+            cpu: 8
+            memory: 4000Mi
+          requests:
+            cpu: 8
+            memory: 4000Mi
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: chatqna-backend-server-svc
+  namespace: default
+spec:
+  type: NodePort
+  selector:
+    app: chatqna-backend-server-deploy
+  ports:
+  - name: service
+    port: 8888
+    targetPort: 8888
+    nodePort: 30888
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: dataprep-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: dataprep-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: dataprep-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: dataprep-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: TEI_ENDPOINT
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: TEI_EMBEDDING_ENDPOINT
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/dataprep-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: dataprep-deploy
+        args: null
+        ports:
+        - containerPort: 6007
+        - containerPort: 6008
+        - containerPort: 6009
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: dataprep-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: dataprep-deploy
+  ports:
+  - name: port1
+    port: 6007
+    targetPort: 6007
+  - name: port2
+    port: 6008
+    targetPort: 6008
+  - name: port3
+    port: 6009
+    targetPort: 6009
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-dependency-deploy
+  namespace: default
+spec:
+  replicas: 2
+  selector:
+    matchLabels:
+      app: embedding-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+        name: embedding-dependency-deploy
+        args:
+        - --model-id
+        - $(EMBEDDING_MODEL_ID)
+        - --auto-truncate
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            cpu: 80
+            memory: 20000Mi
+          requests:
+            cpu: 80
+            memory: 20000Mi
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-dependency-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-dependency-deploy
+  ports:
+  - name: service
+    port: 6006
+    targetPort: 80
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-deploy
+  namespace: default
+spec:
+  replicas: 2
+  selector:
+    matchLabels:
+      app: embedding-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: embedding-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/embedding-tei:latest
+        imagePullPolicy: IfNotPresent
+        name: embedding-deploy
+        args: null
+        ports:
+        - containerPort: 6000
+        resources:
+          limits:
+            cpu: 4
+          requests:
+            cpu: 4
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-deploy
+  ports:
+  - name: service
+    port: 6000
+    targetPort: 6000
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-dependency-deploy
+  namespace: default
+spec:
+  replicas: 15
+  selector:
+    matchLabels:
+      app: llm-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.4
+        name: llm-dependency-deploy-demo
+        securityContext:
+          capabilities:
+            add:
+            - SYS_NICE
+        args:
+        - --model-id
+        - $(LLM_MODEL_ID)
+        - --max-input-length
+        - '1024'
+        - --max-total-tokens
+        - '2048'
+        - --max-batch-total-tokens
+        - '65536'
+        - --max-batch-prefill-tokens
+        - '4096'
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+        env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: HF_TOKEN
+          value: ${HF_TOKEN}
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-dependency-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-dependency-deploy
+  ports:
+  - name: service
+    port: 9009
+    targetPort: 80
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-deploy
+  namespace: default
+spec:
+  replicas: 2
+  selector:
+    matchLabels:
+      app: llm-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: llm-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/llm-tgi:latest
+        imagePullPolicy: IfNotPresent
+        name: llm-deploy
+        args: null
+        ports:
+        - containerPort: 9000
+        resources:
+          limits:
+            cpu: 4
+          requests:
+            cpu: 4
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-deploy
+  ports:
+  - name: service
+    port: 9000
+    targetPort: 9000
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: reranking-dependency-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: reranking-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: reranking-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: reranking-dependency-deploy
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/tei-gaudi:latest
+        name: reranking-dependency-deploy
+        args:
+        - --model-id
+        - $(RERANK_MODEL_ID)
+        - --auto-truncate
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+        env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: HF_TOKEN
+          value: ${HF_TOKEN}
+        - name: MAX_WARMUP_SEQUENCE_LENGTH
+          value: '512'
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: reranking-dependency-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: reranking-dependency-deploy
+  ports:
+  - name: service
+    port: 8808
+    targetPort: 80
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: reranking-deploy
+  namespace: default
+spec:
+  replicas: 2
+  selector:
+    matchLabels:
+      app: reranking-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: reranking-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: reranking-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/reranking-tei:latest
+        imagePullPolicy: IfNotPresent
+        name: reranking-deploy
+        args: null
+        ports:
+        - containerPort: 8000
+        resources:
+          limits:
+            cpu: 4
+          requests:
+            cpu: 4
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: reranking-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: reranking-deploy
+  ports:
+  - name: service
+    port: 8000
+    targetPort: 8000
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: retriever-deploy
+  namespace: default
+spec:
+  replicas: 2
+  selector:
+    matchLabels:
+      app: retriever-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: retriever-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: retriever-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: TEI_EMBEDDING_ENDPOINT
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: TEI_EMBEDDING_ENDPOINT
+        - name: HUGGINGFACEHUB_API_TOKEN
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: HUGGINGFACEHUB_API_TOKEN
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/retriever-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: retriever-deploy
+        args: null
+        ports:
+        - containerPort: 7000
+        resources:
+          limits:
+            cpu: 8
+            memory: 2500Mi
+          requests:
+            cpu: 8
+            memory: 2500Mi
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: retriever-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: retriever-deploy
+  ports:
+  - name: service
+    port: 7000
+    targetPort: 7000
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: vector-db
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: vector-db
+  template:
+    metadata:
+      labels:
+        app: vector-db
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: vector-db
+      containers:
+      - name: vector-db
+        image: redis/redis-stack:7.2.0-v9
+        ports:
+        - containerPort: 6379
+        - containerPort: 8001
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: vector-db
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: vector-db
+  ports:
+  - name: vector-db-service
+    port: 6379
+    targetPort: 6379
+  - name: vector-db-insight
+    port: 8001
+    targetPort: 8001
+
+
+---
diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/vector-db_run.yaml
deleted file mode 100644
index e04e8c5fe..000000000
--- a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/vector-db_run.yaml
+++ /dev/null
@@ -1,48 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: vector-db
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: vector-db
-  template:
-    metadata:
-      labels:
-        app: vector-db
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: vector-db
-      containers:
-      - name: vector-db
-        image: redis/redis-stack:7.2.0-v9
-        ports:
-        - containerPort: 6379
-        - containerPort: 8001
----
-apiVersion: v1
-kind: Service
-metadata:
-  name: vector-db
-spec:
-  type: ClusterIP
-  selector:
-    app: vector-db
-  ports:
-  - name: vector-db-service
-    port: 6379
-    targetPort: 6379
-  - name: vector-db-insight
-    port: 8001
-    targetPort: 8001
diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_config_map.yaml
deleted file mode 100644
index 368c800e4..000000000
--- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_config_map.yaml
+++ /dev/null
@@ -1,23 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: qna-config
-  namespace: default
-data:
-  EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
-  RERANK_MODEL_ID: BAAI/bge-reranker-base
-  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
-  TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
-  TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
-  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
-  REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
-  INDEX_NAME: rag-redis
-  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-  EMBEDDING_SERVICE_HOST_IP: embedding-svc
-  RETRIEVER_SERVICE_HOST_IP: retriever-svc
-  RERANK_SERVICE_HOST_IP: reranking-svc
-  NODE_SELECTOR: chatqna-opea
-  LLM_SERVICE_HOST_IP: llm-svc
diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_mega_service_run.yaml
deleted file mode 100644
index 22c8c4d46..000000000
--- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/chatqna_mega_service_run.yaml
+++ /dev/null
@@ -1,62 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: chatqna-backend-server-deploy
-  namespace: default
-spec:
-  replicas: 4
-  selector:
-    matchLabels:
-      app: chatqna-backend-server-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: chatqna-backend-server-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: chatqna-backend-server-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/chatqna-without-rerank:latest
-        imagePullPolicy: IfNotPresent
-        name: chatqna-backend-server-deploy
-        args: null
-        ports:
-        - containerPort: 8888
-        resources:
-          limits:
-            cpu: 8
-            memory: 4000Mi
-          requests:
-            cpu: 8
-            memory: 4000Mi
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: chatqna-backend-server-svc
-spec:
-  type: NodePort
-  selector:
-    app: chatqna-backend-server-deploy
-  ports:
-  - name: service
-    port: 8888
-    targetPort: 8888
-    nodePort: 30888
diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/dataprep-microservice_run.yaml
deleted file mode 100644
index 4c71df7ce..000000000
--- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/dataprep-microservice_run.yaml
+++ /dev/null
@@ -1,75 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: dataprep-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: dataprep-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: dataprep-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: dataprep-deploy
-      hostIPC: true
-      containers:
-      - env:
-        - name: REDIS_URL
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: REDIS_URL
-        - name: TEI_ENDPOINT
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: TEI_EMBEDDING_ENDPOINT
-        - name: INDEX_NAME
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: INDEX_NAME
-        image: opea/dataprep-redis:latest
-        imagePullPolicy: IfNotPresent
-        name: dataprep-deploy
-        args: null
-        ports:
-        - containerPort: 6007
-        - containerPort: 6008
-        - containerPort: 6009
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: dataprep-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: dataprep-deploy
-  ports:
-  - name: port1
-    port: 6007
-    targetPort: 6007
-  - name: port2
-    port: 6008
-    targetPort: 6008
-  - name: port3
-    port: 6009
-    targetPort: 6009
diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-dependency_run.yaml
deleted file mode 100644
index 69dbd7af9..000000000
--- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-dependency_run.yaml
+++ /dev/null
@@ -1,69 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: embedding-dependency-deploy
-  namespace: default
-spec:
-  replicas: 4
-  selector:
-    matchLabels:
-      app: embedding-dependency-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: embedding-dependency-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
-        name: embedding-dependency-deploy
-        args:
-        - --model-id
-        - $(EMBEDDING_MODEL_ID)
-        - --auto-truncate
-        volumeMounts:
-        - mountPath: /data
-          name: model-volume
-        - mountPath: /dev/shm
-          name: shm
-        ports:
-        - containerPort: 80
-        resources:
-          limits:
-            cpu: 80
-            memory: 20000Mi
-          requests:
-            cpu: 80
-            memory: 20000Mi
-      serviceAccountName: default
-      volumes:
-      - name: model-volume
-        hostPath:
-          path: /mnt/models
-          type: Directory
-      - name: shm
-        emptyDir:
-          medium: Memory
-          sizeLimit: 1Gi
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: embedding-dependency-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: embedding-dependency-deploy
-  ports:
-  - name: service
-    port: 6006
-    targetPort: 80
diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-microservice_run.yaml
deleted file mode 100644
index 348aa7a23..000000000
--- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/embedding-microservice_run.yaml
+++ /dev/null
@@ -1,59 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: embedding-deploy
-  namespace: default
-spec:
-  replicas: 4
-  selector:
-    matchLabels:
-      app: embedding-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: embedding-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: embedding-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/embedding-tei:latest
-        imagePullPolicy: IfNotPresent
-        name: embedding-deploy
-        args: null
-        ports:
-        - containerPort: 6000
-        resources:
-          limits:
-            cpu: 4
-          requests:
-            cpu: 4
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: embedding-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: embedding-deploy
-  ports:
-  - name: service
-    port: 6000
-    targetPort: 6000
diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-dependency_run.yaml
deleted file mode 100644
index ebee24319..000000000
--- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-dependency_run.yaml
+++ /dev/null
@@ -1,88 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: llm-dependency-deploy
-  namespace: default
-spec:
-  replicas: 32
-  selector:
-    matchLabels:
-      app: llm-dependency-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: llm-dependency-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: ghcr.io/huggingface/tgi-gaudi:2.0.4
-        name: llm-dependency-deploy-demo
-        securityContext:
-          capabilities:
-            add:
-            - SYS_NICE
-        args:
-        - --model-id
-        - $(LLM_MODEL_ID)
-        - --max-input-length
-        - '1024'
-        - --max-total-tokens
-        - '2048'
-        - --max-batch-total-tokens
-        - '65536'
-        - --max-batch-prefill-tokens
-        - '4096'
-        volumeMounts:
-        - mountPath: /data
-          name: model-volume
-        - mountPath: /dev/shm
-          name: shm
-        ports:
-        - containerPort: 80
-        resources:
-          limits:
-            habana.ai/gaudi: 1
-        env:
-        - name: OMPI_MCA_btl_vader_single_copy_mechanism
-          value: none
-        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
-          value: 'true'
-        - name: runtime
-          value: habana
-        - name: HABANA_VISIBLE_DEVICES
-          value: all
-        - name: HF_TOKEN
-          value: ${HF_TOKEN}
-      serviceAccountName: default
-      volumes:
-      - name: model-volume
-        hostPath:
-          path: /mnt/models
-          type: Directory
-      - name: shm
-        emptyDir:
-          medium: Memory
-          sizeLimit: 1Gi
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: llm-dependency-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: llm-dependency-deploy
-  ports:
-  - name: service
-    port: 9009
-    targetPort: 80
diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-microservice_run.yaml
deleted file mode 100644
index 7cc6ad123..000000000
--- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/llm-microservice_run.yaml
+++ /dev/null
@@ -1,59 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: llm-deploy
-  namespace: default
-spec:
-  replicas: 4
-  selector:
-    matchLabels:
-      app: llm-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: llm-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: llm-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/llm-tgi:latest
-        imagePullPolicy: IfNotPresent
-        name: llm-deploy
-        args: null
-        ports:
-        - containerPort: 9000
-        resources:
-          limits:
-            cpu: 4
-          requests:
-            cpu: 4
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: llm-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: llm-deploy
-  ports:
-  - name: service
-    port: 9000
-    targetPort: 9000
diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/retrieval-microservice_run.yaml
deleted file mode 100644
index 25314a782..000000000
--- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/retrieval-microservice_run.yaml
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: retriever-deploy
-  namespace: default
-spec:
-  replicas: 4
-  selector:
-    matchLabels:
-      app: retriever-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: retriever-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: retriever-deploy
-      hostIPC: true
-      containers:
-      - env:
-        - name: REDIS_URL
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: REDIS_URL
-        - name: TEI_EMBEDDING_ENDPOINT
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: TEI_EMBEDDING_ENDPOINT
-        - name: HUGGINGFACEHUB_API_TOKEN
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: HUGGINGFACEHUB_API_TOKEN
-        - name: INDEX_NAME
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: INDEX_NAME
-        image: opea/retriever-redis:latest
-        imagePullPolicy: IfNotPresent
-        name: retriever-deploy
-        args: null
-        ports:
-        - containerPort: 7000
-        resources:
-          limits:
-            cpu: 8
-            memory: 2500Mi
-          requests:
-            cpu: 8
-            memory: 2500Mi
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: retriever-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: retriever-deploy
-  ports:
-  - name: service
-    port: 7000
-    targetPort: 7000
diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/tuned_four_gaudi_without_rerank.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/tuned_four_gaudi_without_rerank.yaml
new file mode 100644
index 000000000..10c0963a7
--- /dev/null
+++ b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/tuned_four_gaudi_without_rerank.yaml
@@ -0,0 +1,614 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: qna-config
+  namespace: default
+data:
+  EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
+  RERANK_MODEL_ID: BAAI/bge-reranker-base
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+  TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
+  TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
+  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
+  REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
+  INDEX_NAME: rag-redis
+  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+  EMBEDDING_SERVICE_HOST_IP: embedding-svc
+  RETRIEVER_SERVICE_HOST_IP: retriever-svc
+  RERANK_SERVICE_HOST_IP: reranking-svc
+  NODE_SELECTOR: chatqna-opea
+  LLM_SERVICE_HOST_IP: llm-svc
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: chatqna-backend-server-deploy
+  namespace: default
+spec:
+  replicas: 4
+  selector:
+    matchLabels:
+      app: chatqna-backend-server-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: chatqna-backend-server-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: chatqna-backend-server-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/chatqna-without-rerank:latest
+        imagePullPolicy: IfNotPresent
+        name: chatqna-backend-server-deploy
+        args: null
+        ports:
+        - containerPort: 8888
+        resources:
+          limits:
+            cpu: 8
+            memory: 4000Mi
+          requests:
+            cpu: 8
+            memory: 4000Mi
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: chatqna-backend-server-svc
+  namespace: default
+spec:
+  type: NodePort
+  selector:
+    app: chatqna-backend-server-deploy
+  ports:
+  - name: service
+    port: 8888
+    targetPort: 8888
+    nodePort: 30888
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: dataprep-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: dataprep-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: dataprep-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: dataprep-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: TEI_ENDPOINT
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: TEI_EMBEDDING_ENDPOINT
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/dataprep-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: dataprep-deploy
+        args: null
+        ports:
+        - containerPort: 6007
+        - containerPort: 6008
+        - containerPort: 6009
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: dataprep-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: dataprep-deploy
+  ports:
+  - name: port1
+    port: 6007
+    targetPort: 6007
+  - name: port2
+    port: 6008
+    targetPort: 6008
+  - name: port3
+    port: 6009
+    targetPort: 6009
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-dependency-deploy
+  namespace: default
+spec:
+  replicas: 4
+  selector:
+    matchLabels:
+      app: embedding-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+        name: embedding-dependency-deploy
+        args:
+        - --model-id
+        - $(EMBEDDING_MODEL_ID)
+        - --auto-truncate
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            cpu: 80
+            memory: 20000Mi
+          requests:
+            cpu: 80
+            memory: 20000Mi
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-dependency-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-dependency-deploy
+  ports:
+  - name: service
+    port: 6006
+    targetPort: 80
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-deploy
+  namespace: default
+spec:
+  replicas: 4
+  selector:
+    matchLabels:
+      app: embedding-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: embedding-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/embedding-tei:latest
+        imagePullPolicy: IfNotPresent
+        name: embedding-deploy
+        args: null
+        ports:
+        - containerPort: 6000
+        resources:
+          limits:
+            cpu: 4
+          requests:
+            cpu: 4
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-deploy
+  ports:
+  - name: service
+    port: 6000
+    targetPort: 6000
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-dependency-deploy
+  namespace: default
+spec:
+  replicas: 32
+  selector:
+    matchLabels:
+      app: llm-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.4
+        name: llm-dependency-deploy-demo
+        securityContext:
+          capabilities:
+            add:
+            - SYS_NICE
+        args:
+        - --model-id
+        - $(LLM_MODEL_ID)
+        - --max-input-length
+        - '1024'
+        - --max-total-tokens
+        - '2048'
+        - --max-batch-total-tokens
+        - '65536'
+        - --max-batch-prefill-tokens
+        - '4096'
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+        env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: HF_TOKEN
+          value: ${HF_TOKEN}
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-dependency-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-dependency-deploy
+  ports:
+  - name: service
+    port: 9009
+    targetPort: 80
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-deploy
+  namespace: default
+spec:
+  replicas: 4
+  selector:
+    matchLabels:
+      app: llm-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: llm-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/llm-tgi:latest
+        imagePullPolicy: IfNotPresent
+        name: llm-deploy
+        args: null
+        ports:
+        - containerPort: 9000
+        resources:
+          limits:
+            cpu: 4
+          requests:
+            cpu: 4
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-deploy
+  ports:
+  - name: service
+    port: 9000
+    targetPort: 9000
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: retriever-deploy
+  namespace: default
+spec:
+  replicas: 4
+  selector:
+    matchLabels:
+      app: retriever-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: retriever-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: retriever-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: TEI_EMBEDDING_ENDPOINT
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: TEI_EMBEDDING_ENDPOINT
+        - name: HUGGINGFACEHUB_API_TOKEN
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: HUGGINGFACEHUB_API_TOKEN
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/retriever-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: retriever-deploy
+        args: null
+        ports:
+        - containerPort: 7000
+        resources:
+          limits:
+            cpu: 8
+            memory: 2500Mi
+          requests:
+            cpu: 8
+            memory: 2500Mi
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: retriever-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: retriever-deploy
+  ports:
+  - name: service
+    port: 7000
+    targetPort: 7000
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: vector-db
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: vector-db
+  template:
+    metadata:
+      labels:
+        app: vector-db
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: vector-db
+      containers:
+      - name: vector-db
+        image: redis/redis-stack:7.2.0-v9
+        ports:
+        - containerPort: 6379
+        - containerPort: 8001
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: vector-db
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: vector-db
+  ports:
+  - name: vector-db-service
+    port: 6379
+    targetPort: 6379
+  - name: vector-db-insight
+    port: 8001
+    targetPort: 8001
+
+
+---
diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/vector-db_run.yaml
deleted file mode 100644
index e04e8c5fe..000000000
--- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/vector-db_run.yaml
+++ /dev/null
@@ -1,48 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: vector-db
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: vector-db
-  template:
-    metadata:
-      labels:
-        app: vector-db
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: vector-db
-      containers:
-      - name: vector-db
-        image: redis/redis-stack:7.2.0-v9
-        ports:
-        - containerPort: 6379
-        - containerPort: 8001
----
-apiVersion: v1
-kind: Service
-metadata:
-  name: vector-db
-spec:
-  type: ClusterIP
-  selector:
-    app: vector-db
-  ports:
-  - name: vector-db-service
-    port: 6379
-    targetPort: 6379
-  - name: vector-db-insight
-    port: 8001
-    targetPort: 8001
diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_config_map.yaml
deleted file mode 100644
index 368c800e4..000000000
--- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_config_map.yaml
+++ /dev/null
@@ -1,23 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: qna-config
-  namespace: default
-data:
-  EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
-  RERANK_MODEL_ID: BAAI/bge-reranker-base
-  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
-  TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
-  TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
-  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
-  REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
-  INDEX_NAME: rag-redis
-  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-  EMBEDDING_SERVICE_HOST_IP: embedding-svc
-  RETRIEVER_SERVICE_HOST_IP: retriever-svc
-  RERANK_SERVICE_HOST_IP: reranking-svc
-  NODE_SELECTOR: chatqna-opea
-  LLM_SERVICE_HOST_IP: llm-svc
diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_mega_service_run.yaml
deleted file mode 100644
index cfe155580..000000000
--- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/chatqna_mega_service_run.yaml
+++ /dev/null
@@ -1,62 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: chatqna-backend-server-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: chatqna-backend-server-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: chatqna-backend-server-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: chatqna-backend-server-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/chatqna-without-rerank:latest
-        imagePullPolicy: IfNotPresent
-        name: chatqna-backend-server-deploy
-        args: null
-        ports:
-        - containerPort: 8888
-        resources:
-          limits:
-            cpu: 8
-            memory: 4000Mi
-          requests:
-            cpu: 8
-            memory: 4000Mi
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: chatqna-backend-server-svc
-spec:
-  type: NodePort
-  selector:
-    app: chatqna-backend-server-deploy
-  ports:
-  - name: service
-    port: 8888
-    targetPort: 8888
-    nodePort: 30888
diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/dataprep-microservice_run.yaml
deleted file mode 100644
index 4c71df7ce..000000000
--- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/dataprep-microservice_run.yaml
+++ /dev/null
@@ -1,75 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: dataprep-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: dataprep-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: dataprep-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: dataprep-deploy
-      hostIPC: true
-      containers:
-      - env:
-        - name: REDIS_URL
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: REDIS_URL
-        - name: TEI_ENDPOINT
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: TEI_EMBEDDING_ENDPOINT
-        - name: INDEX_NAME
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: INDEX_NAME
-        image: opea/dataprep-redis:latest
-        imagePullPolicy: IfNotPresent
-        name: dataprep-deploy
-        args: null
-        ports:
-        - containerPort: 6007
-        - containerPort: 6008
-        - containerPort: 6009
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: dataprep-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: dataprep-deploy
-  ports:
-  - name: port1
-    port: 6007
-    targetPort: 6007
-  - name: port2
-    port: 6008
-    targetPort: 6008
-  - name: port3
-    port: 6009
-    targetPort: 6009
diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-dependency_run.yaml
deleted file mode 100644
index f27ffcad0..000000000
--- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-dependency_run.yaml
+++ /dev/null
@@ -1,69 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: embedding-dependency-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: embedding-dependency-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: embedding-dependency-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
-        name: embedding-dependency-deploy
-        args:
-        - --model-id
-        - $(EMBEDDING_MODEL_ID)
-        - --auto-truncate
-        volumeMounts:
-        - mountPath: /data
-          name: model-volume
-        - mountPath: /dev/shm
-          name: shm
-        ports:
-        - containerPort: 80
-        resources:
-          limits:
-            cpu: 80
-            memory: 20000Mi
-          requests:
-            cpu: 80
-            memory: 20000Mi
-      serviceAccountName: default
-      volumes:
-      - name: model-volume
-        hostPath:
-          path: /mnt/models
-          type: Directory
-      - name: shm
-        emptyDir:
-          medium: Memory
-          sizeLimit: 1Gi
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: embedding-dependency-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: embedding-dependency-deploy
-  ports:
-  - name: service
-    port: 6006
-    targetPort: 80
diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-microservice_run.yaml
deleted file mode 100644
index f23ba0b4f..000000000
--- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/embedding-microservice_run.yaml
+++ /dev/null
@@ -1,59 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: embedding-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: embedding-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: embedding-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: embedding-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/embedding-tei:latest
-        imagePullPolicy: IfNotPresent
-        name: embedding-deploy
-        args: null
-        ports:
-        - containerPort: 6000
-        resources:
-          limits:
-            cpu: 4
-          requests:
-            cpu: 4
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: embedding-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: embedding-deploy
-  ports:
-  - name: service
-    port: 6000
-    targetPort: 6000
diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-dependency_run.yaml
deleted file mode 100644
index 6fd539c95..000000000
--- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-dependency_run.yaml
+++ /dev/null
@@ -1,88 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: llm-dependency-deploy
-  namespace: default
-spec:
-  replicas: 8
-  selector:
-    matchLabels:
-      app: llm-dependency-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: llm-dependency-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: ghcr.io/huggingface/tgi-gaudi:2.0.4
-        name: llm-dependency-deploy-demo
-        securityContext:
-          capabilities:
-            add:
-            - SYS_NICE
-        args:
-        - --model-id
-        - $(LLM_MODEL_ID)
-        - --max-input-length
-        - '1024'
-        - --max-total-tokens
-        - '2048'
-        - --max-batch-total-tokens
-        - '65536'
-        - --max-batch-prefill-tokens
-        - '4096'
-        volumeMounts:
-        - mountPath: /data
-          name: model-volume
-        - mountPath: /dev/shm
-          name: shm
-        ports:
-        - containerPort: 80
-        resources:
-          limits:
-            habana.ai/gaudi: 1
-        env:
-        - name: OMPI_MCA_btl_vader_single_copy_mechanism
-          value: none
-        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
-          value: 'true'
-        - name: runtime
-          value: habana
-        - name: HABANA_VISIBLE_DEVICES
-          value: all
-        - name: HF_TOKEN
-          value: ${HF_TOKEN}
-      serviceAccountName: default
-      volumes:
-      - name: model-volume
-        hostPath:
-          path: /mnt/models
-          type: Directory
-      - name: shm
-        emptyDir:
-          medium: Memory
-          sizeLimit: 1Gi
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: llm-dependency-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: llm-dependency-deploy
-  ports:
-  - name: service
-    port: 9009
-    targetPort: 80
diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-microservice_run.yaml
deleted file mode 100644
index 1d9e29112..000000000
--- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/llm-microservice_run.yaml
+++ /dev/null
@@ -1,59 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: llm-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: llm-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: llm-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: llm-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/llm-tgi:latest
-        imagePullPolicy: IfNotPresent
-        name: llm-deploy
-        args: null
-        ports:
-        - containerPort: 9000
-        resources:
-          limits:
-            cpu: 4
-          requests:
-            cpu: 4
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: llm-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: llm-deploy
-  ports:
-  - name: service
-    port: 9000
-    targetPort: 9000
diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/retrieval-microservice_run.yaml
deleted file mode 100644
index 298abd73a..000000000
--- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/retrieval-microservice_run.yaml
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: retriever-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: retriever-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: retriever-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: retriever-deploy
-      hostIPC: true
-      containers:
-      - env:
-        - name: REDIS_URL
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: REDIS_URL
-        - name: TEI_EMBEDDING_ENDPOINT
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: TEI_EMBEDDING_ENDPOINT
-        - name: HUGGINGFACEHUB_API_TOKEN
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: HUGGINGFACEHUB_API_TOKEN
-        - name: INDEX_NAME
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: INDEX_NAME
-        image: opea/retriever-redis:latest
-        imagePullPolicy: IfNotPresent
-        name: retriever-deploy
-        args: null
-        ports:
-        - containerPort: 7000
-        resources:
-          limits:
-            cpu: 8
-            memory: 2500Mi
-          requests:
-            cpu: 8
-            memory: 2500Mi
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: retriever-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: retriever-deploy
-  ports:
-  - name: service
-    port: 7000
-    targetPort: 7000
diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/tuned_single_gaudi_without_rerank.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/tuned_single_gaudi_without_rerank.yaml
new file mode 100644
index 000000000..1388453a2
--- /dev/null
+++ b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/tuned_single_gaudi_without_rerank.yaml
@@ -0,0 +1,614 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: qna-config
+  namespace: default
+data:
+  EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
+  RERANK_MODEL_ID: BAAI/bge-reranker-base
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+  TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
+  TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
+  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
+  REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
+  INDEX_NAME: rag-redis
+  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+  EMBEDDING_SERVICE_HOST_IP: embedding-svc
+  RETRIEVER_SERVICE_HOST_IP: retriever-svc
+  RERANK_SERVICE_HOST_IP: reranking-svc
+  NODE_SELECTOR: chatqna-opea
+  LLM_SERVICE_HOST_IP: llm-svc
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: chatqna-backend-server-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: chatqna-backend-server-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: chatqna-backend-server-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: chatqna-backend-server-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/chatqna-without-rerank:latest
+        imagePullPolicy: IfNotPresent
+        name: chatqna-backend-server-deploy
+        args: null
+        ports:
+        - containerPort: 8888
+        resources:
+          limits:
+            cpu: 8
+            memory: 4000Mi
+          requests:
+            cpu: 8
+            memory: 4000Mi
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: chatqna-backend-server-svc
+  namespace: default
+spec:
+  type: NodePort
+  selector:
+    app: chatqna-backend-server-deploy
+  ports:
+  - name: service
+    port: 8888
+    targetPort: 8888
+    nodePort: 30888
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: dataprep-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: dataprep-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: dataprep-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: dataprep-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: TEI_ENDPOINT
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: TEI_EMBEDDING_ENDPOINT
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/dataprep-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: dataprep-deploy
+        args: null
+        ports:
+        - containerPort: 6007
+        - containerPort: 6008
+        - containerPort: 6009
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: dataprep-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: dataprep-deploy
+  ports:
+  - name: port1
+    port: 6007
+    targetPort: 6007
+  - name: port2
+    port: 6008
+    targetPort: 6008
+  - name: port3
+    port: 6009
+    targetPort: 6009
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-dependency-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: embedding-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+        name: embedding-dependency-deploy
+        args:
+        - --model-id
+        - $(EMBEDDING_MODEL_ID)
+        - --auto-truncate
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            cpu: 80
+            memory: 20000Mi
+          requests:
+            cpu: 80
+            memory: 20000Mi
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-dependency-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-dependency-deploy
+  ports:
+  - name: service
+    port: 6006
+    targetPort: 80
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: embedding-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: embedding-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/embedding-tei:latest
+        imagePullPolicy: IfNotPresent
+        name: embedding-deploy
+        args: null
+        ports:
+        - containerPort: 6000
+        resources:
+          limits:
+            cpu: 4
+          requests:
+            cpu: 4
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-deploy
+  ports:
+  - name: service
+    port: 6000
+    targetPort: 6000
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-dependency-deploy
+  namespace: default
+spec:
+  replicas: 8
+  selector:
+    matchLabels:
+      app: llm-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.4
+        name: llm-dependency-deploy-demo
+        securityContext:
+          capabilities:
+            add:
+            - SYS_NICE
+        args:
+        - --model-id
+        - $(LLM_MODEL_ID)
+        - --max-input-length
+        - '1024'
+        - --max-total-tokens
+        - '2048'
+        - --max-batch-total-tokens
+        - '65536'
+        - --max-batch-prefill-tokens
+        - '4096'
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+        env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: HF_TOKEN
+          value: ${HF_TOKEN}
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-dependency-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-dependency-deploy
+  ports:
+  - name: service
+    port: 9009
+    targetPort: 80
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: llm-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: llm-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/llm-tgi:latest
+        imagePullPolicy: IfNotPresent
+        name: llm-deploy
+        args: null
+        ports:
+        - containerPort: 9000
+        resources:
+          limits:
+            cpu: 4
+          requests:
+            cpu: 4
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-deploy
+  ports:
+  - name: service
+    port: 9000
+    targetPort: 9000
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: retriever-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: retriever-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: retriever-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: retriever-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: TEI_EMBEDDING_ENDPOINT
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: TEI_EMBEDDING_ENDPOINT
+        - name: HUGGINGFACEHUB_API_TOKEN
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: HUGGINGFACEHUB_API_TOKEN
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/retriever-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: retriever-deploy
+        args: null
+        ports:
+        - containerPort: 7000
+        resources:
+          limits:
+            cpu: 8
+            memory: 2500Mi
+          requests:
+            cpu: 8
+            memory: 2500Mi
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: retriever-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: retriever-deploy
+  ports:
+  - name: service
+    port: 7000
+    targetPort: 7000
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: vector-db
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: vector-db
+  template:
+    metadata:
+      labels:
+        app: vector-db
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: vector-db
+      containers:
+      - name: vector-db
+        image: redis/redis-stack:7.2.0-v9
+        ports:
+        - containerPort: 6379
+        - containerPort: 8001
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: vector-db
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: vector-db
+  ports:
+  - name: vector-db-service
+    port: 6379
+    targetPort: 6379
+  - name: vector-db-insight
+    port: 8001
+    targetPort: 8001
+
+
+---
diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/vector-db_run.yaml
deleted file mode 100644
index e04e8c5fe..000000000
--- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/vector-db_run.yaml
+++ /dev/null
@@ -1,48 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: vector-db
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: vector-db
-  template:
-    metadata:
-      labels:
-        app: vector-db
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: vector-db
-      containers:
-      - name: vector-db
-        image: redis/redis-stack:7.2.0-v9
-        ports:
-        - containerPort: 6379
-        - containerPort: 8001
----
-apiVersion: v1
-kind: Service
-metadata:
-  name: vector-db
-spec:
-  type: ClusterIP
-  selector:
-    app: vector-db
-  ports:
-  - name: vector-db-service
-    port: 6379
-    targetPort: 6379
-  - name: vector-db-insight
-    port: 8001
-    targetPort: 8001
diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_config_map.yaml
deleted file mode 100644
index 368c800e4..000000000
--- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_config_map.yaml
+++ /dev/null
@@ -1,23 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: qna-config
-  namespace: default
-data:
-  EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
-  RERANK_MODEL_ID: BAAI/bge-reranker-base
-  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
-  TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
-  TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
-  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
-  REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
-  INDEX_NAME: rag-redis
-  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-  EMBEDDING_SERVICE_HOST_IP: embedding-svc
-  RETRIEVER_SERVICE_HOST_IP: retriever-svc
-  RERANK_SERVICE_HOST_IP: reranking-svc
-  NODE_SELECTOR: chatqna-opea
-  LLM_SERVICE_HOST_IP: llm-svc
diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_mega_service_run.yaml
deleted file mode 100644
index b95d4edec..000000000
--- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/chatqna_mega_service_run.yaml
+++ /dev/null
@@ -1,62 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: chatqna-backend-server-deploy
-  namespace: default
-spec:
-  replicas: 2
-  selector:
-    matchLabels:
-      app: chatqna-backend-server-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: chatqna-backend-server-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: chatqna-backend-server-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/chatqna-without-rerank:latest
-        imagePullPolicy: IfNotPresent
-        name: chatqna-backend-server-deploy
-        args: null
-        ports:
-        - containerPort: 8888
-        resources:
-          limits:
-            cpu: 8
-            memory: 4000Mi
-          requests:
-            cpu: 8
-            memory: 4000Mi
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: chatqna-backend-server-svc
-spec:
-  type: NodePort
-  selector:
-    app: chatqna-backend-server-deploy
-  ports:
-  - name: service
-    port: 8888
-    targetPort: 8888
-    nodePort: 30888
diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/dataprep-microservice_run.yaml
deleted file mode 100644
index 4c71df7ce..000000000
--- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/dataprep-microservice_run.yaml
+++ /dev/null
@@ -1,75 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: dataprep-deploy
-  namespace: default
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: dataprep-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: dataprep-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: dataprep-deploy
-      hostIPC: true
-      containers:
-      - env:
-        - name: REDIS_URL
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: REDIS_URL
-        - name: TEI_ENDPOINT
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: TEI_EMBEDDING_ENDPOINT
-        - name: INDEX_NAME
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: INDEX_NAME
-        image: opea/dataprep-redis:latest
-        imagePullPolicy: IfNotPresent
-        name: dataprep-deploy
-        args: null
-        ports:
-        - containerPort: 6007
-        - containerPort: 6008
-        - containerPort: 6009
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: dataprep-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: dataprep-deploy
-  ports:
-  - name: port1
-    port: 6007
-    targetPort: 6007
-  - name: port2
-    port: 6008
-    targetPort: 6008
-  - name: port3
-    port: 6009
-    targetPort: 6009
diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-dependency_run.yaml
deleted file mode 100644
index 485d73402..000000000
--- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-dependency_run.yaml
+++ /dev/null
@@ -1,69 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: embedding-dependency-deploy
-  namespace: default
-spec:
-  replicas: 2
-  selector:
-    matchLabels:
-      app: embedding-dependency-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: embedding-dependency-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
-        name: embedding-dependency-deploy
-        args:
-        - --model-id
-        - $(EMBEDDING_MODEL_ID)
-        - --auto-truncate
-        volumeMounts:
-        - mountPath: /data
-          name: model-volume
-        - mountPath: /dev/shm
-          name: shm
-        ports:
-        - containerPort: 80
-        resources:
-          limits:
-            cpu: 80
-            memory: 20000Mi
-          requests:
-            cpu: 80
-            memory: 20000Mi
-      serviceAccountName: default
-      volumes:
-      - name: model-volume
-        hostPath:
-          path: /mnt/models
-          type: Directory
-      - name: shm
-        emptyDir:
-          medium: Memory
-          sizeLimit: 1Gi
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: embedding-dependency-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: embedding-dependency-deploy
-  ports:
-  - name: service
-    port: 6006
-    targetPort: 80
diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-microservice_run.yaml
deleted file mode 100644
index 3822537c4..000000000
--- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/embedding-microservice_run.yaml
+++ /dev/null
@@ -1,59 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: embedding-deploy
-  namespace: default
-spec:
-  replicas: 2
-  selector:
-    matchLabels:
-      app: embedding-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: embedding-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: embedding-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/embedding-tei:latest
-        imagePullPolicy: IfNotPresent
-        name: embedding-deploy
-        args: null
-        ports:
-        - containerPort: 6000
-        resources:
-          limits:
-            cpu: 4
-          requests:
-            cpu: 4
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: embedding-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: embedding-deploy
-  ports:
-  - name: service
-    port: 6000
-    targetPort: 6000
diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-dependency_run.yaml
deleted file mode 100644
index 466008735..000000000
--- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-dependency_run.yaml
+++ /dev/null
@@ -1,88 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: llm-dependency-deploy
-  namespace: default
-spec:
-  replicas: 16
-  selector:
-    matchLabels:
-      app: llm-dependency-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: llm-dependency-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: ghcr.io/huggingface/tgi-gaudi:2.0.4
-        name: llm-dependency-deploy-demo
-        securityContext:
-          capabilities:
-            add:
-            - SYS_NICE
-        args:
-        - --model-id
-        - $(LLM_MODEL_ID)
-        - --max-input-length
-        - '1024'
-        - --max-total-tokens
-        - '2048'
-        - --max-batch-total-tokens
-        - '65536'
-        - --max-batch-prefill-tokens
-        - '4096'
-        volumeMounts:
-        - mountPath: /data
-          name: model-volume
-        - mountPath: /dev/shm
-          name: shm
-        ports:
-        - containerPort: 80
-        resources:
-          limits:
-            habana.ai/gaudi: 1
-        env:
-        - name: OMPI_MCA_btl_vader_single_copy_mechanism
-          value: none
-        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
-          value: 'true'
-        - name: runtime
-          value: habana
-        - name: HABANA_VISIBLE_DEVICES
-          value: all
-        - name: HF_TOKEN
-          value: ${HF_TOKEN}
-      serviceAccountName: default
-      volumes:
-      - name: model-volume
-        hostPath:
-          path: /mnt/models
-          type: Directory
-      - name: shm
-        emptyDir:
-          medium: Memory
-          sizeLimit: 1Gi
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: llm-dependency-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: llm-dependency-deploy
-  ports:
-  - name: service
-    port: 9009
-    targetPort: 80
diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-microservice_run.yaml
deleted file mode 100644
index 49a67fd2e..000000000
--- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/llm-microservice_run.yaml
+++ /dev/null
@@ -1,59 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: llm-deploy
-  namespace: default
-spec:
-  replicas: 2
-  selector:
-    matchLabels:
-      app: llm-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: llm-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: llm-deploy
-      hostIPC: true
-      containers:
-      - envFrom:
-        - configMapRef:
-            name: qna-config
-        image: opea/llm-tgi:latest
-        imagePullPolicy: IfNotPresent
-        name: llm-deploy
-        args: null
-        ports:
-        - containerPort: 9000
-        resources:
-          limits:
-            cpu: 4
-          requests:
-            cpu: 4
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: llm-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: llm-deploy
-  ports:
-  - name: service
-    port: 9000
-    targetPort: 9000
diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/retrieval-microservice_run.yaml
deleted file mode 100644
index b6799fc60..000000000
--- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/retrieval-microservice_run.yaml
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: retriever-deploy
-  namespace: default
-spec:
-  replicas: 2
-  selector:
-    matchLabels:
-      app: retriever-deploy
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-      labels:
-        app: retriever-deploy
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: retriever-deploy
-      hostIPC: true
-      containers:
-      - env:
-        - name: REDIS_URL
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: REDIS_URL
-        - name: TEI_EMBEDDING_ENDPOINT
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: TEI_EMBEDDING_ENDPOINT
-        - name: HUGGINGFACEHUB_API_TOKEN
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: HUGGINGFACEHUB_API_TOKEN
-        - name: INDEX_NAME
-          valueFrom:
-            configMapKeyRef:
-              name: qna-config
-              key: INDEX_NAME
-        image: opea/retriever-redis:latest
-        imagePullPolicy: IfNotPresent
-        name: retriever-deploy
-        args: null
-        ports:
-        - containerPort: 7000
-        resources:
-          limits:
-            cpu: 8
-            memory: 2500Mi
-          requests:
-            cpu: 8
-            memory: 2500Mi
-      serviceAccountName: default
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: retriever-svc
-spec:
-  type: ClusterIP
-  selector:
-    app: retriever-deploy
-  ports:
-  - name: service
-    port: 7000
-    targetPort: 7000
diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/tuned_two_gaudi_without_rerank.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/tuned_two_gaudi_without_rerank.yaml
new file mode 100644
index 000000000..b38a50253
--- /dev/null
+++ b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/tuned_two_gaudi_without_rerank.yaml
@@ -0,0 +1,614 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: qna-config
+  namespace: default
+data:
+  EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
+  RERANK_MODEL_ID: BAAI/bge-reranker-base
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+  TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
+  TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
+  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
+  REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
+  INDEX_NAME: rag-redis
+  HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+  EMBEDDING_SERVICE_HOST_IP: embedding-svc
+  RETRIEVER_SERVICE_HOST_IP: retriever-svc
+  RERANK_SERVICE_HOST_IP: reranking-svc
+  NODE_SELECTOR: chatqna-opea
+  LLM_SERVICE_HOST_IP: llm-svc
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: chatqna-backend-server-deploy
+  namespace: default
+spec:
+  replicas: 2
+  selector:
+    matchLabels:
+      app: chatqna-backend-server-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: chatqna-backend-server-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: chatqna-backend-server-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/chatqna-without-rerank:latest
+        imagePullPolicy: IfNotPresent
+        name: chatqna-backend-server-deploy
+        args: null
+        ports:
+        - containerPort: 8888
+        resources:
+          limits:
+            cpu: 8
+            memory: 4000Mi
+          requests:
+            cpu: 8
+            memory: 4000Mi
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: chatqna-backend-server-svc
+  namespace: default
+spec:
+  type: NodePort
+  selector:
+    app: chatqna-backend-server-deploy
+  ports:
+  - name: service
+    port: 8888
+    targetPort: 8888
+    nodePort: 30888
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: dataprep-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: dataprep-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: dataprep-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: dataprep-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: TEI_ENDPOINT
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: TEI_EMBEDDING_ENDPOINT
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/dataprep-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: dataprep-deploy
+        args: null
+        ports:
+        - containerPort: 6007
+        - containerPort: 6008
+        - containerPort: 6009
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: dataprep-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: dataprep-deploy
+  ports:
+  - name: port1
+    port: 6007
+    targetPort: 6007
+  - name: port2
+    port: 6008
+    targetPort: 6008
+  - name: port3
+    port: 6009
+    targetPort: 6009
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-dependency-deploy
+  namespace: default
+spec:
+  replicas: 2
+  selector:
+    matchLabels:
+      app: embedding-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+        name: embedding-dependency-deploy
+        args:
+        - --model-id
+        - $(EMBEDDING_MODEL_ID)
+        - --auto-truncate
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            cpu: 80
+            memory: 20000Mi
+          requests:
+            cpu: 80
+            memory: 20000Mi
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-dependency-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-dependency-deploy
+  ports:
+  - name: service
+    port: 6006
+    targetPort: 80
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-deploy
+  namespace: default
+spec:
+  replicas: 2
+  selector:
+    matchLabels:
+      app: embedding-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: embedding-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/embedding-tei:latest
+        imagePullPolicy: IfNotPresent
+        name: embedding-deploy
+        args: null
+        ports:
+        - containerPort: 6000
+        resources:
+          limits:
+            cpu: 4
+          requests:
+            cpu: 4
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-deploy
+  ports:
+  - name: service
+    port: 6000
+    targetPort: 6000
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-dependency-deploy
+  namespace: default
+spec:
+  replicas: 16
+  selector:
+    matchLabels:
+      app: llm-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.4
+        name: llm-dependency-deploy-demo
+        securityContext:
+          capabilities:
+            add:
+            - SYS_NICE
+        args:
+        - --model-id
+        - $(LLM_MODEL_ID)
+        - --max-input-length
+        - '1024'
+        - --max-total-tokens
+        - '2048'
+        - --max-batch-total-tokens
+        - '65536'
+        - --max-batch-prefill-tokens
+        - '4096'
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+        env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: HF_TOKEN
+          value: ${HF_TOKEN}
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /mnt/models
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-dependency-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-dependency-deploy
+  ports:
+  - name: service
+    port: 9009
+    targetPort: 80
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-deploy
+  namespace: default
+spec:
+  replicas: 2
+  selector:
+    matchLabels:
+      app: llm-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: llm-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/llm-tgi:latest
+        imagePullPolicy: IfNotPresent
+        name: llm-deploy
+        args: null
+        ports:
+        - containerPort: 9000
+        resources:
+          limits:
+            cpu: 4
+          requests:
+            cpu: 4
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-deploy
+  ports:
+  - name: service
+    port: 9000
+    targetPort: 9000
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: retriever-deploy
+  namespace: default
+spec:
+  replicas: 2
+  selector:
+    matchLabels:
+      app: retriever-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: retriever-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: retriever-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: TEI_EMBEDDING_ENDPOINT
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: TEI_EMBEDDING_ENDPOINT
+        - name: HUGGINGFACEHUB_API_TOKEN
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: HUGGINGFACEHUB_API_TOKEN
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/retriever-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: retriever-deploy
+        args: null
+        ports:
+        - containerPort: 7000
+        resources:
+          limits:
+            cpu: 8
+            memory: 2500Mi
+          requests:
+            cpu: 8
+            memory: 2500Mi
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: retriever-svc
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: retriever-deploy
+  ports:
+  - name: service
+    port: 7000
+    targetPort: 7000
+
+
+---
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: vector-db
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: vector-db
+  template:
+    metadata:
+      labels:
+        app: vector-db
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: vector-db
+      containers:
+      - name: vector-db
+        image: redis/redis-stack:7.2.0-v9
+        ports:
+        - containerPort: 6379
+        - containerPort: 8001
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: vector-db
+  namespace: default
+spec:
+  type: ClusterIP
+  selector:
+    app: vector-db
+  ports:
+  - name: vector-db-service
+    port: 6379
+    targetPort: 6379
+  - name: vector-db-insight
+    port: 8001
+    targetPort: 8001
+
+
+---
diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/vector-db_run.yaml
deleted file mode 100644
index e04e8c5fe..000000000
--- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/vector-db_run.yaml
+++ /dev/null
@@ -1,48 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: vector-db
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: vector-db
-  template:
-    metadata:
-      labels:
-        app: vector-db
-    spec:
-      nodeSelector:
-        node-type: chatqna-opea
-      topologySpreadConstraints:
-      - maxSkew: 1
-        topologyKey: kubernetes.io/hostname
-        whenUnsatisfiable: ScheduleAnyway
-        labelSelector:
-          matchLabels:
-            app: vector-db
-      containers:
-      - name: vector-db
-        image: redis/redis-stack:7.2.0-v9
-        ports:
-        - containerPort: 6379
-        - containerPort: 8001
----
-apiVersion: v1
-kind: Service
-metadata:
-  name: vector-db
-spec:
-  type: ClusterIP
-  selector:
-    app: vector-db
-  ports:
-  - name: vector-db-service
-    port: 6379
-    targetPort: 6379
-  - name: vector-db-insight
-    port: 8001
-    targetPort: 8001