lm-sys · merrymercy · Mar 21, 2023 · Mar 21, 2023 · Mar 21, 2023 · Mar 21, 2023
diff --git a/README.md b/README.md
@@ -25,8 +25,8 @@ python3 -m chatserver.server.gradio_web_server
 # You can open your brower and chat with a model now.
 ```
 
-## Training
-## Train Alpaca with SkyPilot
+## Deploy Chatbot on Any Cloud with SkyPilot
+### Training Alpaca with SkyPilot
 1. Install skypilot and setup the credentials locally following the instructions [here](https://skypilot.readthedocs.io/en/latest/getting-started/installation.html)
 2. Launch the training job with the following line (will be launched on a single node with 4 A100-80GB GPUs)
     ```
@@ -44,3 +44,21 @@ python3 -m chatserver.server.gradio_web_server
     sky launch -c alpaca-2 -s --num-nodes 2 --gpus A100-80GB:8 scripts/train.yaml  --env WANDB_API_KEY
     ```
     Managed spot version TO BE ADDED.
+
+### Serving Alpaca with SkyPilot
+1. We assume SkyPilot is installed and the model checkpoint is stored on some cloud storage (e.g., GCS).
+2. Launch the controller server (default to a cheap CPU VM):
+    ```
+    sky launch -c controller scripts/serving/controller.yaml
+    ```
+3. Find the IP address of the controller server on the cloud console. Make sure the ports are open (default port 21001 for controller, 21002 for model workers).
+4. Launch a model worker (default to A100):
+    ```
+    sky launch -c model-worker scripts/serving/model_worker.yaml --env CONTROLLER_IP=<controller-ip>
+    ```
+    You can use spot instances to save 3x cost. SkyPilot will automatically recover the spot instance if it is preempted ([more details](https://skypilot.readthedocs.io/en/latest/examples/spot-jobs.html)).
+    ```
+    sky spot launch scripts/serving/model_worker.yaml --env CONTROLLER_IP=<controller-ip>
+    ```
+5. Click the link generated from step 2 and chat with AI :)
+![screenshot](./assets/screenshot.png)
diff --git a/assets/screenshot.png b/assets/screenshot.png
diff --git a/scripts/serving/controller.yaml b/scripts/serving/controller.yaml
@@ -0,0 +1,29 @@
+resources:
+    cloud: gcp
+    region: us-central1
+
+num_nodes: 1
+
+workdir: .
+
+file_mounts:
+  ~/chatlogs:
+    name: skypilot-chatbot-logs
+    store: gcs
+    mode: MOUNT
+
+setup: |
+  conda activate chatbot
+  if [ $? -eq 0 ]; then
+    echo 'conda env exists'
+  else
+    # Setup the environment
+    conda create -n chatbot python=3.10 -y
+    conda activate chatbot
+    pip3 install -e .
+  fi
+
+run: |
+  conda activate chatbot
+  python3 -m chatserver.server.controller --host 0.0.0.0 --port 21001 &
+  python3 -m chatserver.server.gradio_web_server --share
diff --git a/scripts/serving/model_worker.yaml b/scripts/serving/model_worker.yaml
@@ -0,0 +1,57 @@
+resources:
+  accelerators: A100:1
+  cloud: gcp
+  region: us-central1
+
+num_nodes: 1
+
+workdir: .
+
+file_mounts:
+  /artifacts:
+    name: skypilot-chatbot
+    store: gcs
+    mode: MOUNT
+
+  ~/chatlogs:
+    name: skypilot-chatbot-logs
+    store: gcs
+    mode: MOUNT
+
+setup: |
+  conda activate chatbot
+  if [ $? -eq 0 ]; then
+    echo 'conda env exists'
+  else
+    # Setup the environment
+    conda create -n chatbot python=3.10 -y
+    conda activate chatbot
+
+    pip3 install -e .
+
+    # Install pytorch
+    pip install torch==1.13.1+cu116 --extra-index-url https://download.pytorch.org/whl/cu116
+
+    # Install huggingface with the LLaMA commit
+    pip install git+https://github.com/huggingface/transformers
+
+    # Install alpaca
+    git clone https://github.com/tatsu-lab/stanford_alpaca.git
+    cd stanford_alpaca
+    pip install -r requirements.txt
+    cd -
+  fi
+
+  ln -s /artifacts/chatbot/13b/ckpt/ ~/alpaca-13b
+
+run: |
+  conda activate chatbot
+  WORKER_IP=$(hostname -I | cut -d' ' -f1)
+  CONTROLLER_PORT=21001
+  WORKER_PORT=21002
+  python3 -m chatserver.server.model_worker \
+    --model ~/alpaca-13b \
+    --controller-address http://${CONTROLLER_IP}:${CONTROLLER_PORT} \
+    --worker-address http://${WORKER_IP}:${WORKER_PORT} \
+    --host 0.0.0.0 \
+    --port ${WORKER_PORT}