From 62e06a0aff5b78224ab7aa36693bbc4dd6ba8725 Mon Sep 17 00:00:00 2001 From: minmin-intel Date: Thu, 26 Sep 2024 19:21:54 -0700 Subject: [PATCH] Update DocIndexRetriever Example to allow user passing in retriever/reranker params (#880) Signed-off-by: minmin-intel Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- DocIndexRetriever/README.md | 16 ++++- .../docker_compose/intel/cpu/xeon/README.md | 15 +++- .../intel/cpu/xeon/compose.yaml | 19 ++++- .../docker_compose/intel/hpu/gaudi/README.md | 15 +++- .../intel/hpu/gaudi/compose.yaml | 19 ++++- DocIndexRetriever/tests/test.py | 71 +++++++++++++++++++ .../tests/test_compose_on_gaudi.sh | 19 ++++- .../tests/test_compose_on_xeon.sh | 26 +++++-- 8 files changed, 188 insertions(+), 12 deletions(-) create mode 100644 DocIndexRetriever/tests/test.py diff --git a/DocIndexRetriever/README.md b/DocIndexRetriever/README.md index bfd09a830..0301c6205 100644 --- a/DocIndexRetriever/README.md +++ b/DocIndexRetriever/README.md @@ -1,8 +1,22 @@ # DocRetriever Application -DocRetriever are the most widely adopted use case for leveraging the different methodologies to match user query against a set of free-text records. DocRetriever is essential to RAG system, which bridges the knowledge gap by dynamically fetching relevant information from external sources, ensuring that responses generated remain factual and current. The core of this architecture are vector databases, which are instrumental in enabling efficient and semantic retrieval of information. These databases store data as vectors, allowing RAG to swiftly access the most pertinent documents or data points based on semantic similarity. +DocRetriever is the most widely adopted use case for leveraging the different methodologies to match user query against a set of free-text records. DocRetriever is essential to RAG system, which bridges the knowledge gap by dynamically fetching relevant information from external sources, ensuring that responses generated remain factual and current. The core of this architecture are vector databases, which are instrumental in enabling efficient and semantic retrieval of information. These databases store data as vectors, allowing RAG to swiftly access the most pertinent documents or data points based on semantic similarity. ## We provided DocRetriever with different deployment infra - [docker xeon version](docker_compose/intel/cpu/xeon/README.md) => minimum endpoints, easy to setup - [docker gaudi version](docker_compose/intel/hpu/gaudi/README.md) => with extra tei_gaudi endpoint, faster + +## We allow users to set retriever/reranker hyperparams via requests + +Example usage: + +```python +url = "http://{host_ip}:{port}/v1/retrievaltool".format(host_ip=host_ip, port=port) +payload = { + "messages": query, + "k": 5, # retriever top k + "top_n": 2, # reranker top n +} +response = requests.post(url, json=payload) +``` diff --git a/DocIndexRetriever/docker_compose/intel/cpu/xeon/README.md b/DocIndexRetriever/docker_compose/intel/cpu/xeon/README.md index b921cc126..58354babf 100644 --- a/DocIndexRetriever/docker_compose/intel/cpu/xeon/README.md +++ b/DocIndexRetriever/docker_compose/intel/cpu/xeon/README.md @@ -79,13 +79,26 @@ Retrieval from KnowledgeBase ```bash curl http://${host_ip}:8889/v1/retrievaltool -X POST -H "Content-Type: application/json" -d '{ - "text": "Explain the OPEA project?" + "messages": "Explain the OPEA project?" }' # expected output {"id":"354e62c703caac8c547b3061433ec5e8","reranked_docs":[{"id":"06d5a5cefc06cf9a9e0b5fa74a9f233c","text":"Close SearchsearchMenu WikiNewsCommunity Daysx-twitter linkedin github searchStreamlining implementation of enterprise-grade Generative AIEfficiently integrate secure, performant, and cost-effective Generative AI workflows into business value.TODAYOPEA..."}],"initial_query":"Explain the OPEA project?"} ``` +**Note**: `messages` is the required field. You can also pass in parameters for the retriever and reranker in the request. The parameters that can changed are listed below. + + 1. retriever + * search_type: str = "similarity" + * k: int = 4 + * distance_threshold: Optional[float] = None + * fetch_k: int = 20 + * lambda_mult: float = 0.5 + * score_threshold: float = 0.2 + + 2. reranker + * top_n: int = 1 + ## 5. Trouble shooting 1. check all containers are alive diff --git a/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml b/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml index 23399f9d0..9fe1ed962 100644 --- a/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml +++ b/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml @@ -74,13 +74,30 @@ services: HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} restart: unless-stopped + tei-reranking-service: + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + container_name: tei-reranking-server + ports: + - "8808:80" + volumes: + - "./data:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + command: --model-id ${RERANK_MODEL_ID} --auto-truncate reranking: image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest} container_name: reranking-tei-xeon-server + depends_on: + - tei-reranking-service ports: - "8000:8000" ipc: host - entrypoint: python local_reranking.py environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} diff --git a/DocIndexRetriever/docker_compose/intel/hpu/gaudi/README.md b/DocIndexRetriever/docker_compose/intel/hpu/gaudi/README.md index 2cac81c8f..6799aa500 100644 --- a/DocIndexRetriever/docker_compose/intel/hpu/gaudi/README.md +++ b/DocIndexRetriever/docker_compose/intel/hpu/gaudi/README.md @@ -80,13 +80,26 @@ Retrieval from KnowledgeBase ```bash curl http://${host_ip}:8889/v1/retrievaltool -X POST -H "Content-Type: application/json" -d '{ - "text": "Explain the OPEA project?" + "messages": "Explain the OPEA project?" }' # expected output {"id":"354e62c703caac8c547b3061433ec5e8","reranked_docs":[{"id":"06d5a5cefc06cf9a9e0b5fa74a9f233c","text":"Close SearchsearchMenu WikiNewsCommunity Daysx-twitter linkedin github searchStreamlining implementation of enterprise-grade Generative AIEfficiently integrate secure, performant, and cost-effective Generative AI workflows into business value.TODAYOPEA..."}],"initial_query":"Explain the OPEA project?"} ``` +**Note**: `messages` is the required field. You can also pass in parameters for the retriever and reranker in the request. The parameters that can changed are listed below. + + 1. retriever + * search_type: str = "similarity" + * k: int = 4 + * distance_threshold: Optional[float] = None + * fetch_k: int = 20 + * lambda_mult: float = 0.5 + * score_threshold: float = 0.2 + + 2. reranker + * top_n: int = 1 + ## 5. Trouble shooting 1. check all containers are alive diff --git a/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose.yaml b/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose.yaml index 831659dca..1d0a44505 100644 --- a/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose.yaml @@ -77,13 +77,30 @@ services: REDIS_URL: ${REDIS_URL} INDEX_NAME: ${INDEX_NAME} restart: unless-stopped + tei-reranking-service: + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + container_name: tei-reranking-gaudi-server + ports: + - "8808:80" + volumes: + - "./data:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + command: --model-id ${RERANK_MODEL_ID} --auto-truncate reranking: image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest} container_name: reranking-tei-gaudi-server + depends_on: + - tei-reranking-service ports: - "8000:8000" ipc: host - entrypoint: python local_reranking.py environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} diff --git a/DocIndexRetriever/tests/test.py b/DocIndexRetriever/tests/test.py new file mode 100644 index 000000000..698f40da3 --- /dev/null +++ b/DocIndexRetriever/tests/test.py @@ -0,0 +1,71 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse + +import requests + + +def search_knowledge_base(query: str, url: str, request_type="chat_completion") -> str: + """Search the knowledge base for a specific query.""" + print(url) + proxies = {"http": ""} + if request_type == "chat_completion": + print("Sending chat completion request") + payload = { + "messages": query, + "k": 5, + "top_n": 2, + } + else: + print("Sending text request") + payload = { + "text": query, + } + response = requests.post(url, json=payload, proxies=proxies) + print(response) + if "documents" in response.json(): + docs = response.json()["documents"] + context = "" + for i, doc in enumerate(docs): + if i == 0: + context = str(i) + ": " + doc + else: + context += "\n" + str(i) + ": " + doc + # print(context) + return context + elif "text" in response.json(): + return response.json()["text"] + elif "reranked_docs" in response.json(): + docs = response.json()["reranked_docs"] + context = "" + for i, doc in enumerate(docs): + if i == 0: + context = doc["text"] + else: + context += "\n" + doc["text"] + # print(context) + return context + else: + return "Error parsing response from the knowledge base." + + +def main(): + parser = argparse.ArgumentParser(description="Index data") + parser.add_argument("--host_ip", type=str, default="localhost", help="Host IP") + parser.add_argument("--port", type=int, default=8889, help="Port") + parser.add_argument("--request_type", type=str, default="chat_completion", help="Test type") + args = parser.parse_args() + print(args) + + host_ip = args.host_ip + port = args.port + url = "http://{host_ip}:{port}/v1/retrievaltool".format(host_ip=host_ip, port=port) + + response = search_knowledge_base("OPEA", url, request_type=args.request_type) + + print(response) + + +if __name__ == "__main__": + main() diff --git a/DocIndexRetriever/tests/test_compose_on_gaudi.sh b/DocIndexRetriever/tests/test_compose_on_gaudi.sh index 04f32a7b5..8779944be 100644 --- a/DocIndexRetriever/tests/test_compose_on_gaudi.sh +++ b/DocIndexRetriever/tests/test_compose_on_gaudi.sh @@ -64,7 +64,7 @@ function validate() { } function validate_megaservice() { - echo "Testing DataPrep Service" + echo "=========Ingest data==================" local CONTENT=$(curl -X POST "http://${ip_address}:6007/v1/dataprep" \ -H "Content-Type: multipart/form-data" \ -F 'link_list=["https://opea.dev"]') @@ -78,7 +78,7 @@ function validate_megaservice() { fi # Curl the Mega Service - echo "Testing retriever service" + echo "==============Testing retriever service: Text Request=================" local CONTENT=$(curl http://${ip_address}:8889/v1/retrievaltool -X POST -H "Content-Type: application/json" -d '{ "text": "Explain the OPEA project?" }') @@ -93,6 +93,21 @@ function validate_megaservice() { docker logs doc-index-retriever-server | tee -a ${LOG_PATH}/doc-index-retriever-service-gaudi.log exit 1 fi + + echo "==============Testing retriever service: ChatCompletion Request================" + cd $WORKPATH/tests + local CONTENT=$(python test.py --host_ip ${ip_address} --request_type chat_completion) + local EXIT_CODE=$(validate "$CONTENT" "OPEA" "doc-index-retriever-service-gaudi") + echo "$EXIT_CODE" + local EXIT_CODE="${EXIT_CODE:0-1}" + echo "return value is $EXIT_CODE" + if [ "$EXIT_CODE" == "1" ]; then + docker logs tei-embedding-gaudi-server | tee -a ${LOG_PATH}/doc-index-retriever-service-gaudi.log + docker logs retriever-redis-server | tee -a ${LOG_PATH}/doc-index-retriever-service-gaudi.log + docker logs reranking-tei-server | tee -a ${LOG_PATH}/doc-index-retriever-service-gaudi.log + docker logs doc-index-retriever-server | tee -a ${LOG_PATH}/doc-index-retriever-service-gaudi.log + exit 1 + fi } function stop_docker() { diff --git a/DocIndexRetriever/tests/test_compose_on_xeon.sh b/DocIndexRetriever/tests/test_compose_on_xeon.sh index 4a04030d4..c6ff29e29 100644 --- a/DocIndexRetriever/tests/test_compose_on_xeon.sh +++ b/DocIndexRetriever/tests/test_compose_on_xeon.sh @@ -63,8 +63,8 @@ function validate() { } function validate_megaservice() { - echo "Testing DataPrep Service" - local CONTENT=$(curl -X POST "http://${ip_address}:6007/v1/dataprep" \ + echo "===========Ingest data==================" + local CONTENT=$(http_proxy="" curl -X POST "http://${ip_address}:6007/v1/dataprep" \ -H "Content-Type: multipart/form-data" \ -F 'link_list=["https://opea.dev"]') local EXIT_CODE=$(validate "$CONTENT" "Data preparation succeeded" "dataprep-redis-service-xeon") @@ -77,16 +77,32 @@ function validate_megaservice() { fi # Curl the Mega Service - echo "Testing retriever service" + echo "================Testing retriever service: Default params================" + local CONTENT=$(curl http://${ip_address}:8889/v1/retrievaltool -X POST -H "Content-Type: application/json" -d '{ - "text": "Explain the OPEA project?" + "messages": "Explain the OPEA project?" }') local EXIT_CODE=$(validate "$CONTENT" "OPEA" "doc-index-retriever-service-xeon") echo "$EXIT_CODE" local EXIT_CODE="${EXIT_CODE:0-1}" echo "return value is $EXIT_CODE" if [ "$EXIT_CODE" == "1" ]; then - docker logs tei-embedding-xeon-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log + docker logs tei-embedding-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log + docker logs retriever-redis-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log + docker logs reranking-tei-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log + docker logs doc-index-retriever-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log + exit 1 + fi + + echo "================Testing retriever service: ChatCompletion Request================" + cd $WORKPATH/tests + local CONTENT=$(python test.py --host_ip ${ip_address} --request_type chat_completion) + local EXIT_CODE=$(validate "$CONTENT" "OPEA" "doc-index-retriever-service-xeon") + echo "$EXIT_CODE" + local EXIT_CODE="${EXIT_CODE:0-1}" + echo "return value is $EXIT_CODE" + if [ "$EXIT_CODE" == "1" ]; then + docker logs tei-embedding-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log docker logs retriever-redis-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log docker logs reranking-tei-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log docker logs doc-index-retriever-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log