diff --git a/workflows/chatbot/fine_tuning/README.md b/workflows/chatbot/fine_tuning/README.md
index 74b386466cc..8b5a412302c 100644
--- a/workflows/chatbot/fine_tuning/README.md
+++ b/workflows/chatbot/fine_tuning/README.md
@@ -18,7 +18,7 @@ Recommend python 3.9 or higher version.
 ```shell
 pip install -r requirements.txt
 # To use ccl as the distributed backend in distributed training on CPU requires to install below requirement.
-python -m pip install oneccl_bind_pt==1.13 -f https://developer.intel.com/ipex-whl-stable-cpu
+python -m pip install oneccl_bind_pt -f https://developer.intel.com/ipex-whl-stable-cpu
 ```
 
 ## 2. Prepare the Model
@@ -156,11 +156,11 @@ python finetune_clm.py \
 # the script also support other models, like mpt.
 ```
 
-- use the below command line for code tuning with `meta-llama/Llama-2-7b` on [theblackcat102/evol-codealpaca-v1](https://huggingface.co/datasets/theblackcat102/evol-codealpaca-v1).
+- use the below command line for code tuning with `meta-llama/Llama-2-7b-hf` on [theblackcat102/evol-codealpaca-v1](https://huggingface.co/datasets/theblackcat102/evol-codealpaca-v1).
 
 ```bash
 python finetune_clm.py \
-        --model_name_or_path "meta-llama/Llama-2-7b" \
+        --model_name_or_path "meta-llama/Llama-2-7b-hf" \
         --bf16 True \
         --dataset_name "theblackcat102/evol-codealpaca-v1" \
         --per_device_train_batch_size 8 \
diff --git a/workflows/chatbot/fine_tuning/docker/Dockerfile b/workflows/chatbot/fine_tuning/docker/Dockerfile
index a114474acd4..883e2d01caa 100644
--- a/workflows/chatbot/fine_tuning/docker/Dockerfile
+++ b/workflows/chatbot/fine_tuning/docker/Dockerfile
@@ -62,8 +62,7 @@ RUN conda init bash && \
     echo "conda activate chatbot-finetuning" >> ~/.bashrc && \
     source ~/.bashrc
 
-SHELL ["/bin/bash", "--login", "-c", "conda", "run", "-n", "chatbot-finetuning"]
-RUN wget https://intel-extension-for-pytorch.s3.amazonaws.com/torch_ccl/cpu/oneccl_bind_pt-1.13.0%2Bcpu-cp39-cp39-linux_x86_64.whl && \
+RUN source activate && conda activate chatbot-finetuning && pip install oneccl_bind_pt -f https://developer.intel.com/ipex-whl-stable-cpu && \
     pip install datasets torch accelerate SentencePiece evaluate nltk rouge_score protobuf==3.20.1 tokenizers einops && \
     git clone https://github.com/huggingface/peft.git && cd peft && python setup.py install && \
     cd /itrex && pip install -v . && \
@@ -104,4 +103,4 @@ RUN git clone --single-branch --branch=${ITREX_VER} ${REPO} itrex && \
 RUN cd /itrex && pip install -v . && \
     pip install transformers==4.28.1
 
-WORKDIR /itrex/workflows/chatbot/fine_tuning
\ No newline at end of file
+WORKDIR /itrex/workflows/chatbot/fine_tuning
diff --git a/workflows/chatbot/inference/README.md b/workflows/chatbot/inference/README.md
index 2474113ef5e..459a06c1401 100644
--- a/workflows/chatbot/inference/README.md
+++ b/workflows/chatbot/inference/README.md
@@ -88,6 +88,7 @@ python generate.py \
         --base_model_path "decapoda-research/llama-7b-hf" \
         --peft_model_path "./llama_peft_finetuned_model" \
         --use_slow_tokenizer \
+        --use_kv_cache \
         --instructions "Transform the following sentence into one that shows contrast. The tree is rotten."
 ```
 
@@ -100,6 +101,7 @@ python generate.py \
         --repetition_penalty 1.2 \
         --base_model_path "decapoda-research/llama-7b-hf" \
         --use_slow_tokenizer \
+        --use_kv_cache \
         --instructions "Tell me about China."
 ```
 
diff --git a/workflows/chatbot/inference/docker/Dockerfile b/workflows/chatbot/inference/docker/Dockerfile
index e0432f3ca1d..bbf553a0b2d 100644
--- a/workflows/chatbot/inference/docker/Dockerfile
+++ b/workflows/chatbot/inference/docker/Dockerfile
@@ -61,9 +61,8 @@ RUN conda init bash && \
     conda create -yn chatbot-demo python=3.9 && \
     echo "conda activate chatbot-demo" >> ~/.bashrc
 
-SHELL ["/bin/bash", "--login", "-c", "conda", "run", "-n", "chatbot-demo"]
-
-RUN conda install astunparse ninja pyyaml mkl mkl-include setuptools cmake cffi typing_extensions future six requests dataclasses -y && \
+RUN source activate && conda activate chatbot-demo && \
+    conda install astunparse ninja pyyaml mkl mkl-include setuptools cmake cffi typing_extensions future six requests dataclasses -y && \
     conda install jemalloc gperftools -c conda-forge -y && \
     conda install pytorch torchvision torchaudio cpuonly -c pytorch && \
     pip install farm-haystack==1.14.0 && \
@@ -72,11 +71,8 @@ RUN conda install astunparse ninja pyyaml mkl mkl-include setuptools cmake cffi
     pip install transformers diffusers accelerate SentencePiece peft evaluate nltk datasets && \
     pip install fastapi uvicorn sse_starlette bottle gevent pymysql && \
     pip install schema && \
-    pip install -i https://test.pypi.org/simple/ intel-extension-for-transformers==1.0.0.dev20230602 && \
-    pip install datasets torch transformers sentencepiece peft evaluate nltk rouge_score
-
-
-RUN cd /root/chatbot && git clone https://github.com/intel/intel-extension-for-transformers.git \
+    pip install datasets torch transformers sentencepiece peft evaluate nltk rouge_score && \
+    cd /root/chatbot && git clone https://github.com/intel/intel-extension-for-transformers.git \
     && cd ./intel-extension-for-transformers/workflows/chatbot/inference/ && pip install -r requirements.txt
 
 WORKDIR /root/chatbot/intel-extension-for-transformers/workflows/chatbot/inference/