Merge branch 'main' into faqgen

opea-project · Sep 20, 2024 · c11330b · c11330b
2 parents 66c6634 + d55ded4
commit c11330b
Show file tree

Hide file tree

Showing 15 changed files with 335 additions and 22 deletions.
diff --git a/.github/workflows/pr-path-detection.yml b/.github/workflows/pr-path-detection.yml
@@ -0,0 +1,123 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: Check Paths and Hyperlinks
+
+on:
+  pull_request:
+    branches: [main]
+    types: [opened, reopened, ready_for_review, synchronize]
+
+jobs:
+  check-the-validity-of-hyperlinks-in-README:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Clean Up Working Directory
+        run: sudo rm -rf ${{github.workspace}}/*
+
+      - name: Checkout Repo GenAIInfra
+        uses: actions/checkout@v4
+
+      - name: Check the Validity of Hyperlinks
+        run: |
+          cd ${{github.workspace}}
+          fail="FALSE"
+          url_lines=$(grep -Eo '\]\(http[s]?://[^)]+\)' --include='*.md' -r .|grep -Ev 'GenAIEval/blob/main')
+          if [ -n "$url_lines" ]; then
+            for url_line in $url_lines; do
+              url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//')
+              path=$(echo "$url_line"|cut -d':' -f1 | cut -d'/' -f2-)
+              response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url")
+              if [ "$response" -ne 200 ]; then
+                echo "**********Validation failed, try again**********"
+                response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url")
+                if [ "$response_retry" -eq 200 ]; then
+                  echo "*****Retry successfully*****"
+                else
+                  echo "Invalid link from ${{github.workspace}}/$path: $url"
+                  fail="TRUE"
+                fi
+              fi
+            done
+          fi
+
+          if [[ "$fail" == "TRUE" ]]; then
+            exit 1
+          else
+            echo "All hyperlinks are valid."
+          fi
+        shell: bash
+
+  check-the-validity-of-relative-path:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Clean up Working Directory
+        run: sudo rm -rf ${{github.workspace}}/*
+
+      - name: Checkout Repo GenAIInfra
+        uses: actions/checkout@v4
+
+      - name: Checking Relative Path Validity
+        run: |
+          cd ${{github.workspace}}
+          fail="FALSE"
+          repo_name=${{ github.event.pull_request.head.repo.full_name }}
+          if [ "$(echo "$repo_name"|cut -d'/' -f1)" != "opea-project" ]; then
+            owner=$(echo "${{ github.event.pull_request.head.repo.full_name }}" |cut -d'/' -f1)
+            branch="https://github.com/$owner/GenAIInfra/tree/${{ github.event.pull_request.head.ref }}"
+          else
+            branch="https://github.com/opea-project/GenAIInfra/blob/${{ github.event.pull_request.head.ref }}"
+          fi
+          link_head="https://github.com/opea-project/GenAIInfra/blob/main"
+          png_lines=$(grep -Eo '\]\([^)]+\)' --include='*.md' -r .|grep -Ev 'http')
+          if [ -n "$png_lines" ]; then
+            for png_line in $png_lines; do
+              refer_path=$(echo "$png_line"|cut -d':' -f1 | cut -d'/' -f2-)
+              png_path=$(echo "$png_line"|cut -d '(' -f2 | cut -d ')' -f1)
+              if [[ "${png_path:0:1}" == "/" ]]; then
+                check_path=${{github.workspace}}$png_path
+              elif [[ "${png_path:0:1}" == "#" ]]; then
+                check_path=${{github.workspace}}/$refer_path$png_path
+              else
+                check_path=${{github.workspace}}/$(dirname "$refer_path")/$png_path
+              fi
+              real_path=$(realpath $check_path)
+              if [ $? -ne 0 ]; then
+                echo "Path $png_path in file ${{github.workspace}}/$refer_path does not exist"
+                fail="TRUE"
+              else
+                url=$link_head$(echo "$real_path" | sed 's|.*/GenAIInfra||')
+                response=$(curl -I -L -s -o /dev/null -w "%{http_code}" "$url")
+                if [ "$response" -ne 200 ]; then
+                  echo "**********Validation failed, try again**********"
+                  response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url")
+                  if [ "$response_retry" -eq 200 ]; then
+                    echo "*****Retry successfully*****"
+                  else
+                    echo "Retry failed. Check branch ${{ github.event.pull_request.head.ref }}"
+                    url_dev=$branch$(echo "$real_path" | sed 's|.*/GenAIInfra||')
+                    response=$(curl -I -L -s -o /dev/null -w "%{http_code}" "$url_dev")
+                    if [ "$response" -ne 200 ]; then
+                      echo "**********Validation failed, try again**********"
+                      response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url_dev")
+                      if [ "$response_retry" -eq 200 ]; then
+                        echo "*****Retry successfully*****"
+                      else
+                        echo "Invalid path from ${{github.workspace}}/$refer_path: $png_path"
+                        fail="TRUE"
+                      fi
+                    else
+                      echo "Check branch ${{ github.event.pull_request.head.ref }} successfully."
+                    fi
+                  fi
+                fi
+              fi
+            done
+          fi
+
+          if [[ "$fail" == "TRUE" ]]; then
+            exit 1
+          else
+            echo "All hyperlinks are valid."
+          fi
+        shell: bash
diff --git a/README.md b/README.md
@@ -49,7 +49,7 @@ The following steps are optional. They're only required if you want to run the w
 
 Follow [GMC README](https://github.com/opea-project/GenAIInfra/blob/main/microservices-connector/README.md)
 to install GMC into your kubernetes cluster. [GenAIExamples](https://github.com/opea-project/GenAIExamples) contains several sample GenAI example use case pipelines such as ChatQnA, DocSum, etc.
-Once you have deployed GMC in your Kubernetes cluster, you can deploy any of the example pipelines by following its Readme file (e.g. [Docsum](https://github.com/opea-project/GenAIExamples/blob/main/DocSum/kubernetes/README.md)).
+Once you have deployed GMC in your Kubernetes cluster, you can deploy any of the example pipelines by following its Readme file (e.g. [Docsum](https://github.com/opea-project/GenAIExamples/blob/main/DocSum/kubernetes/intel/README_gmc.md)).
 
 ### Use helm charts to deploy
 

diff --git a/helm-charts/README.md b/helm-charts/README.md
@@ -7,11 +7,12 @@ This directory contains helm charts for [GenAIComps](https://github.com/opea-pro
 - [Helm Charts](#helm-charts)
   - [Examples](#examples)
   - [Components](#components)
-- [How to deploy with helm charts](#deploy-with-helm-charts)
+- [Deploy with helm charts](#deploy-with-helm-charts)
+  - [From Source Code](#from-source-code)
+  - [Using Helm Charts repository](#using-helm-charts-repository)
 - [Helm Charts Options](#helm-charts-options)
 - [Using Persistent Volume](#using-persistent-volume)
 - [Using Private Docker Hub](#using-private-docker-hub)
-- [Helm Charts repository](#helm-chart-repository)
 - [Generate manifests from Helm Charts](#generate-manifests-from-helm-charts)
 
 ## Helm Charts
@@ -35,7 +36,9 @@ Components which are building blocks for AI application.
 All components helm charts are put in the ./common directory, and the support list is growing.  
 Refer to [GenAIComps](https://github.com/opea-project/GenAIComps) for details of each component.
 
-## How to deploy with helm charts
+## Deploy with helm charts
+
+### From Source Code
 
 These helm charts are designed to be easy to start, which means you can deploy a workload easily without further options.  
 However, `HUGGINGFACEHUB_API_TOKEN` should be set in most cases for a workload to start up correctly.  
@@ -50,6 +53,27 @@ helm install $myrelease $chartname --set global.HUGGINGFACEHUB_API_TOKEN="insert
 
 Depends on your environment, you might want to customize some of the options, see [Helm Charts Options](#helm-charts-options) for further information.
 
+### Using Helm Charts repository
+
+The helm charts are released in this [helm chart repository](https://opea-project.github.io/GenAIInfra), you can use it directly.
+
+Add the repo:
+
+`helm repo add opea https://opea-project.github.io/GenAIInfra`
+
+Show all helm charts available in the repo:
+
+```
+helm repo update
+helm search repo opea
+```
+
+Install the chart:
+
+`helm install tgi opea/tgi`
+
+Use `helm --help` to see all commands for helm, and check [Helm Charts Options](#helm-charts-options) for more installation options.
+
 ## Helm Charts Options
 
 Here we list a few important options that user might want to change, for more options, you can read each helm chart's README file and check the values.yaml and gaudi-values.yaml(If applicable).
@@ -143,10 +167,6 @@ export OPEA_IMAGE_REPO=192.168.0.100:5000/
 find . -name '*values.yaml' -type f -exec sed -i "s#repository: opea/*#repository: ${OPEA_IMAGE_REPO}opea/#g" {} \;
 ```
 
-## Helm Charts repository (Experimental)
-
-https://opea-project.github.io/GenAIInfra
-
 ## Generate manifests from Helm Charts
 
 Some users may want to use kubernetes manifests(yaml files) for workload deployment, we do not maintain manifests itself, and will generate them using `helm template`.  

diff --git a/helm-charts/chatqna/README.md b/helm-charts/chatqna/README.md
@@ -77,3 +77,7 @@ Open a browser to access `http://<k8s-node-ip-address>:${port}` to play with the
 | service.port                           | string | `"8888"`                      |                                                                                                                                         |
 | tgi.LLM_MODEL_ID                       | string | `"Intel/neural-chat-7b-v3-3"` | Models id from https://huggingface.co/, or predownloaded model directory                                                                |
 | global.horizontalPodAutoscaler.enabled | bop;   | false                         | HPA autoscaling for the TGI and TEI service deployments based on metrics they provide. See HPA section in ../README.md before enabling! |
+
+## Troubleshooting
+
+If you encount any issues, please refer to [ChatQnA Troubleshooting](troubleshooting.md)
diff --git a/helm-charts/chatqna/troubleshooting.md b/helm-charts/chatqna/troubleshooting.md
@@ -0,0 +1,153 @@
+## ChatQnA Troubleshooting
+
+After deploying chatqna with helm chart, we can use the following command to check whether each service is working properly.
+These commands show the steps how RAG work with LLM.
+
+### a function to get the endpoint of service
+
+This is a based command to get each service endpoint of chatqna components.
+
+```bash
+svc_endpoint() {
+  endpoint=$(kubectl -n ${2:-default} get svc -l ${1} -o jsonpath='{.items[0].spec.clusterIP}:{.items[0].spec.ports[0].port}')
+  echo "${endpoint}"
+}
+```
+
+### define the namespace of service
+
+Please specify the namespace of chatqna, it will be **default** if not define.
+
+```
+# define your namespace
+ns=opea-chatqna
+```
+
+Check the available namespace by:
+
+```console
+kubectl get ns
+NAME                          STATUS   AGE
+calico-system                 Active   21d
+cert-manager                  Active   21d
+default                       Active   21d
+kube-public                   Active   21d
+kube-system                   Active   21d
+nfd                           Active   21d
+observability                 Active   21d
+opea-chatqna                  Active   21d
+openebs                       Active   21d
+orchestrator-system           Active   21d
+tigera-operator               Active   21d
+```
+
+### Update a file to database
+
+This step will upload a pdf about nike revenue information to vector database.
+
+```bash
+# data-prep
+label='app.kubernetes.io/name=data-prep'
+
+wget https://raw.githubusercontent.com/opea-project/GenAIComps/refs/heads/main/comps/retrievers/redis/data/nke-10k-2023.pdf
+
+endpoint=$(svc_endpoint ${label} ${ns})
+echo $endpoint
+curl -x "" -X POST "http://${endpoint}/v1/dataprep" \
+     -H "Content-Type: multipart/form-data" \
+     -F "files=@./nke-10k-2023.pdf"
+```
+
+> **_NOTE:_** Get the service label by:
+>
+> ```bash
+> kubectl get -n ${ns} svc -o json | jq .items[].metadata.labels
+> ```
+>
+> you can use **grep** to filter the labels by key.
+
+### get the embedding of input
+
+This step will get the embedding of your input/question.
+
+```bash
+label='app.kubernetes.io/name=tei'
+input="What is the revenue of Nike in 2023?"
+
+endpoint=$(svc_endpoint ${label} ${ns})
+echo $endpoint
+
+your_embedding=$(curl -x "" http://${endpoint}/embed \
+    -X POST \
+    -d '{"inputs":"'"$input"'"}' \
+    -H 'Content-Type: application/json' |jq .[0] -c)
+```
+
+### get the retriever docs
+
+This step will get related docs related to your input/question.
+
+```bash
+label='app.kubernetes.io/name=retriever-usvc'
+text=$input
+
+endpoint=$(svc_endpoint ${label} ${ns})
+echo $endpoint
+
+retrieved_docs=$(curl -x "" http://${endpoint}/v1/retrieval \
+  -X POST \
+  -d "{\"text\":\"${text}\",\"embedding\":${your_embedding}}" \
+  -H 'Content-Type: application/json' | jq -c .retrieved_docs)
+```
+
+### reranking the docs
+
+This step will get related docs most relevant to your input/question.
+
+```bash
+label='app.kubernetes.io/name=reranking-usvc'
+query=$input
+
+endpoint=$(svc_endpoint ${label} ${ns})
+echo $endpoint
+
+reranking_docs=$(curl -x "" http://${endpoint}/v1/reranking \
+  -X POST \
+  -d '{"initial_query":"'"$query"'", "retrieved_docs": '"$retrieved_docs"'}' \
+  -H 'Content-Type: application/json' | jq -c .documents[0])
+
+# remove "
+reranking_docs=$(sed 's/\\"/ /g' <<< "${reranking_docs}")
+reranking_docs=$(tr -d '"' <<< "${reranking_docs}")
+```
+
+### TGI Q and A
+
+This step will render the answer of your question.
+
+```bash
+label='app.kubernetes.io/name=tgi'
+
+endpoint=$(svc_endpoint ${label} ${ns})
+echo $endpoint
+
+# your question
+query=${input}
+# inputs template.
+inputs="### You are a helpful, respectful and honest assistant to help the user with questions. Please refer to the search results obtained from the local knowledge base. But be careful to not incorporate the information that you think is not relevant to the question. If you don't know the answer to a question, please don't share false information. ### Search results: ${reranking_docs} ### Question: ${query} \n\n### Answer:"
+
+curl -x "" http://${endpoint}/generate \
+  -X POST \
+  -d '{"inputs":"'"${inputs}"'","parameters":{"max_new_tokens":1024, "do_sample": true}}' \
+  -H 'Content-Type: application/json'
+```
+
+The output
+
+```console
+{"generated_text":" In fiscal 2023, NIKE, Inc. achieved record Revenues of $51.2 billion."}
+```
+
+### REF
+
+[Build Mega Service of ChatQnA on Xeon](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/docker_compose/intel/cpu/xeon)
diff --git a/helm-charts/common/tei/README.md b/helm-charts/common/tei/README.md
@@ -41,4 +41,4 @@ curl http://localhost:2081/embed -X POST -d '{"inputs":"What is Deep Learning?"}
 | global.modelUseHostPath         | string | `"/mnt/opea-models"`                              | Cached models directory, tei will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory. Set this to null/empty will force it to download model. |
 | image.repository                | string | `"ghcr.io/huggingface/text-embeddings-inference"` |                                                                                                                                                                                                                       |
 | image.tag                       | string | `"cpu-1.5"`                                       |                                                                                                                                                                                                                       |
-| horizontalPodAutoscaler.enabled | bool   | false                                             | Enable HPA autoscaling for the service deployment based on metrics it provides. See HPA section in ../../README.md before enabling!                                                                                   |
+| horizontalPodAutoscaler.enabled | bool   | false                                             | Enable HPA autoscaling for the service deployment based on metrics it provides. See [HPA section](../../HPA.md) before enabling!                                                                                      |
diff --git a/helm-charts/common/teirerank/README.md b/helm-charts/common/teirerank/README.md
@@ -44,4 +44,4 @@ curl http://localhost:2082/rerank \
 | global.modelUseHostPath         | string | `"/mnt/opea-models"`                              | Cached models directory, teirerank will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory. Set this to null/empty will force it to download model. |
 | image.repository                | string | `"ghcr.io/huggingface/text-embeddings-inference"` |                                                                                                                                                                                                                             |
 | image.tag                       | string | `"cpu-1.5"`                                       |                                                                                                                                                                                                                             |
-| horizontalPodAutoscaler.enabled | bool   | false                                             | Enable HPA autoscaling for the service deployment based on metrics it provides. See HPA section in ../../README.md before enabling!                                                                                         |
+| horizontalPodAutoscaler.enabled | bool   | false                                             | Enable HPA autoscaling for the service deployment based on metrics it provides. See [HPA section](../../HPA.md) before enabling!                                                                                            |
diff --git a/helm-charts/common/tgi/README.md b/helm-charts/common/tgi/README.md
@@ -48,4 +48,4 @@ curl http://localhost:2080/generate \
 | global.modelUseHostPath         | string | `"/mnt/opea-models"`                              | Cached models directory, tgi will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory. Set this to null/empty will force it to download model. |
 | image.repository                | string | `"ghcr.io/huggingface/text-generation-inference"` |                                                                                                                                                                                                                       |
 | image.tag                       | string | `"1.4"`                                           |                                                                                                                                                                                                                       |
-| horizontalPodAutoscaler.enabled | bool   | false                                             | Enable HPA autoscaling for the service deployment based on metrics it provides. See HPA section in ../../README.md before enabling!                                                                                   |
+| horizontalPodAutoscaler.enabled | bool   | false                                             | Enable HPA autoscaling for the service deployment based on metrics it provides. See [HPA section](../../HPA.md) before enabling!                                                                                      |
diff --git a/helm-charts/common/tgi/values.yaml b/helm-charts/common/tgi/values.yaml
@@ -26,7 +26,8 @@ image:
   repository: ghcr.io/huggingface/text-generation-inference
   pullPolicy: IfNotPresent
   # Overrides the image tag whose default is the chart appVersion.
-  tag: "2.2.0"
+  # `sha-e4201f4-intel-cpu` is the image tag for intel cpu optimized tgi image
+  tag: "sha-e4201f4-intel-cpu"
 
 # empty for CPU
 accelDevice: ""

diff --git a/helm-charts/common/vllm/README.md b/helm-charts/common/vllm/README.md
@@ -2,7 +2,7 @@
 
 Helm chart for deploying vLLM Inference service.
 
-Refer to [Deploy with Helm Charts](../README.md) for global guides.
+Refer to [Deploy with Helm Charts](../../README.md) for global guides.
 
 ## Installing the Chart