skypilot-org · romilbhardwaj · Sep 10, 2024 · Aug 27, 2024 · Aug 27, 2024 · Aug 28, 2024
diff --git a/examples/k8s_cloud_deploy/README.md b/examples/k8s_cloud_deploy/README.md
@@ -0,0 +1,93 @@
+# Deploying a Kubernetes cluster on the cloud in 1-click with SkyPilot
+
+This example demonstrates how to deploy a Kubernetes cluster on the cloud with SkyPilot. For the purposes of this guide, we will use lambda cloud as the cloud provider, but you can change cloud providers by editing `cloud_k8s.yaml`.
+
+## Prerequisites
+1. Latest SkyPilot nightly release:
+```bash
+pip install "skypilot-nightly[lambda,kubernetes]"
+```
+
+2. On your lambda cloud dashboard, configure the firewall to allow inbound connections on port `443` and `6443` (required to expose k8s API server).
+
+<p align="center">
+<img src="https://i.imgur.com/uSA7BMH.png" alt="firewall" width="500"/>
+</p>
+
+## Instructions
+
+1. Edit `cloud_k8s.yaml` to set the desired number of workers and GPUs per node.
+```yaml
+resources:
+  cloud: lambda
+  accelerators: A10:1
+
+num_nodes: 2
+```
+
+2. Use the convenience script to launch the cluster:
+```bash
+./launch_k8s.sh
+```
+
+SkyPilot will do all the heavy lifting for you: provision lambda VMs, deploy the k8s cluster, fetch the kubeconfig, and set up your local kubectl to connect to the cluster.
+
+3. You should now be able to run `kubectl` and `sky` commands to interact with the cluster:
+```console
+$ kubectl get nodes
+NAME              STATUS   ROLES                  AGE   VERSION
+129-80-133-44     Ready    <none>                 14m   v1.30.4+k3s1
+150-230-191-161   Ready    control-plane,master   14m   v1.30.4+k3s1
+
+$ sky show-gpus --cloud kubernetes
+Kubernetes GPUs
+GPU  QTY_PER_NODE  TOTAL_GPUS  TOTAL_FREE_GPUS
+A10  1             2           2              
+
+Kubernetes per node GPU availability
+NODE_NAME        GPU_NAME  TOTAL_GPUS  FREE_GPUS
+129-80-133-44    A10       1           1
+150-230-191-161  A10       1           1
+```
+
+## Run AI workloads on your Kubernetes cluster with SkyPilot
+
+### Development clusters
+To launch a [GPU enabled development cluster](https://skypilot.readthedocs.io/en/latest/examples/interactive-development.html), run `sky launch -c mycluster --cloud kubernetes --gpus A10:1`. 
+
+SkyPilot will setup SSH config for you.
+* [SSH access](https://skypilot.readthedocs.io/en/latest/examples/interactive-development.html#ssh): `ssh mycluster`
+* [VSCode remote development](https://skypilot.readthedocs.io/en/latest/examples/interactive-development.html#vscode): `code --remote ssh-remote+mycluster "/"`
+
+
+### Jobs
+To run jobs, use `sky jobs launch --gpus A10:1 --cloud kubernetes -- 'nvidia-smi; sleep 600'`
+
+You can submit multiple jobs and let SkyPilot handle queuing if the cluster runs out of resources:
+```bash
+$ sky jobs queue
+Fetching managed job statuses...
+Managed jobs
+In progress tasks: 2 RUNNING, 1 STARTING
+ID  TASK  NAME      RESOURCES  SUBMITTED    TOT. DURATION  JOB DURATION  #RECOVERIES  STATUS
+3   -     finetune  1x[A10:1]  24 secs ago  24s            -             0            STARTING
+2   -     qlora     1x[A10:1]  2 min ago    2m 18s         12s           0            RUNNING
+1   -     sky-cmd   1x[A10:1]  4 mins ago   4m 27s         3m 12s        0            RUNNING
+```
+
+You can also observe the pods created by SkyPilot with `kubectl get pods`:
+```bash
+$ kubectl get pods
+NAME                                     READY   STATUS    RESTARTS   AGE
+qlora-2-2ea4-head                        1/1     Running   0          5m31s
+sky-cmd-1-2ea4-head                      1/1     Running   0          8m36s
+sky-jobs-controller-2ea485ea-2ea4-head   1/1     Running   0          10m
+```
+
+Refer to [SkyPilot docs](https://skypilot.readthedocs.io/) for more.
+
+## Teardown
+To teardown the Kubernetes cluster, run:
+```bash
+sky down k8s
+```
diff --git a/examples/k8s_cloud_deploy/cloud_k8s.yaml b/examples/k8s_cloud_deploy/cloud_k8s.yaml
@@ -0,0 +1,96 @@
+resources:
+  cloud: lambda
+  accelerators: A10:1
+#  Uncomment the following line to expose ports on a different cloud
+#  ports: 6443
+
+num_nodes: 2
+
+envs:
+  SKY_K3S_TOKEN: mytoken # Can be any string, used to join worker nodes to the cluster
+
+run: |
+  wait_for_gpu_operator_installation() {
+      echo "Starting wait for GPU operator installation..."
+
+      SECONDS=0
+      TIMEOUT=600  # 10 minutes in seconds
+
+      while true; do
+          if kubectl describe nodes --kubeconfig ~/.kube/config | grep -q 'nvidia.com/gpu:'; then
+              echo "GPU operator installed."
+              break
+          elif [ $SECONDS -ge $TIMEOUT ]; then
+              echo "Timed out waiting for GPU operator installation."
+              exit 1
+          else
+              echo "Waiting for GPU operator installation..."
+              echo "To check status, see Nvidia GPU operator pods:"
+              echo "kubectl get pods -n gpu-operator --kubeconfig ~/.kube/config"
+              sleep 5
+          fi
+      done
+  }
+
+  if [ ${SKYPILOT_NODE_RANK} -ne 0 ]; then
+    # Worker nodes
+    MASTER_ADDR=`echo "$SKYPILOT_NODE_IPS" | head -n1`
+    echo "Worker joining k3s cluster @ ${MASTER_ADDR}"
+    curl -sfL https://get.k3s.io | K3S_URL=https://${MASTER_ADDR}:6443 K3S_TOKEN=${SKY_K3S_TOKEN} sh -
+    exit 0  
+  fi 
+
+  # Head node
+  curl -sfL https://get.k3s.io | K3S_TOKEN=${SKY_K3S_TOKEN} sh -
+
+  # Copy over kubeconfig file
+  echo "Copying kubeconfig file"
+  mkdir -p $HOME/.kube
+  sudo cp /etc/rancher/k3s/k3s.yaml $HOME/.kube/config
+  sudo chown $(id -u):$(id -g) $HOME/.kube/config
+
+  # Wait for k3s to be ready
+  echo "Waiting for k3s to be ready"
+  sleep 5
+  kubectl wait --for=condition=ready node --all --timeout=5m --kubeconfig ~/.kube/config
+
+  # =========== GPU support ===========
+  # Install helm
+  echo "Installing helm"
+  curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3
+  chmod 700 get_helm.sh
+  ./get_helm.sh
+
+  helm repo add nvidia https://helm.ngc.nvidia.com/nvidia && helm repo update
+
+  # Create namespace if it doesn't exist
+  echo "Creating namespace gpu-operator"
+  kubectl create namespace gpu-operator --kubeconfig ~/.kube/config || true
+
+  # Patch ldconfig
+  echo "Patching ldconfig"
+  sudo ln -s /sbin/ldconfig /sbin/ldconfig.real
+
+  # Install GPU operator
+  echo "Installing GPU operator"
+  helm install gpu-operator -n gpu-operator --create-namespace \
+  nvidia/gpu-operator $HELM_OPTIONS \
+    --set 'toolkit.env[0].name=CONTAINERD_CONFIG' \
+    --set 'toolkit.env[0].value=/var/lib/rancher/k3s/agent/etc/containerd/config.toml' \
+    --set 'toolkit.env[1].name=CONTAINERD_SOCKET' \
+    --set 'toolkit.env[1].value=/run/k3s/containerd/containerd.sock' \
+    --set 'toolkit.env[2].name=CONTAINERD_RUNTIME_CLASS' \
+    --set 'toolkit.env[2].value=nvidia'
+
+  wait_for_gpu_operator_installation
+
+  # Create RuntimeClass
+  sleep 5
+  echo "Creating RuntimeClass"
+  kubectl apply --kubeconfig ~/.kube/config -f - <<EOF
+  apiVersion: node.k8s.io/v1
+  kind: RuntimeClass
+  metadata:
+    name: nvidia
+  handler: nvidia
+  EOF
diff --git a/examples/k8s_cloud_deploy/launch_k8s.sh b/examples/k8s_cloud_deploy/launch_k8s.sh
@@ -0,0 +1,87 @@
+#!/bin/bash
+echo -e "\033[1m===== SkyPilot Kubernetes cluster deployment script =====\033[0m"
+echo -e "This script will deploy a Kubernetes cluster on the cloud and GPUs specified in cloud_k8s.yaml.\n"
+
+set -ex
+
+# Read cluster name from environment variable if it exists, else use default value
+CLUSTER_NAME=${CLUSTER_NAME:-k8s}
+
+# Deploy the k8s cluster
+sky launch -y -c ${CLUSTER_NAME} cloud_k8s.yaml
+
+# Get the endpoint of the k8s cluster
+# Attempt to get the primary endpoint and handle any errors
+PRIMARY_ENDPOINT=""
+SKY_STATUS_OUTPUT=$(SKYPILOT_DEBUG=0 sky status --endpoint 6443 ${CLUSTER_NAME} 2>&1) || true
+
+# Check if the command was successful and if the output contains a valid IP address
+if [[ "$SKY_STATUS_OUTPUT" != *"ValueError"* ]]; then
+  PRIMARY_ENDPOINT="$SKY_STATUS_OUTPUT"
+else
+  echo "Primary endpoint retrieval failed or unsupported. Falling back to alternate method..."
+fi
+
+# If primary endpoint is empty or invalid, try to fetch from SSH config
+if [[ -z "$PRIMARY_ENDPOINT" ]]; then
+  echo "Using alternate method to fetch endpoint..."
+
+  # Parse the HostName from the SSH config file
+  SSH_CONFIG_FILE="$HOME/.sky/generated/ssh/${CLUSTER_NAME}"
+  if [[ -f "$SSH_CONFIG_FILE" ]]; then
+    ENDPOINT=$(awk '/^ *HostName / { print $2; exit}' "$SSH_CONFIG_FILE")
+    ENDPOINT="${ENDPOINT}:6443"
+  fi
+
+  if [[ -z "$ENDPOINT" ]]; then
+    echo "Failed to retrieve a valid endpoint. Exiting."
+    exit 1
+  fi
+else
+  ENDPOINT="$PRIMARY_ENDPOINT"
+  echo "Using primary endpoint: $ENDPOINT"
+fi
+
+# Rsync the remote kubeconfig to the local machine
+mkdir -p ~/.kube
+rsync -av ${CLUSTER_NAME}:'~/.kube/config' ~/.kube/config
+
+KUBECONFIG_FILE="$HOME/.kube/config"
+
+# Back up the original kubeconfig file if it exists
+if [[ -f "$KUBECONFIG_FILE" ]]; then
+  echo "Backing up kubeconfig file to ${KUBECONFIG_FILE}.bak"
+  cp "$KUBECONFIG_FILE" "${KUBECONFIG_FILE}.bak"
+fi
+
+# Temporary file to hold the modified kubeconfig
+TEMP_FILE=$(mktemp)
+
+# Remove the certificate-authority-data, and replace the server with
+awk '
+  BEGIN { in_cluster = 0 }
+  /^clusters:/ { in_cluster = 1 }
+  /^users:/ { in_cluster = 0 }
+  in_cluster && /^ *certificate-authority-data:/ { next }
+  in_cluster && /^ *server:/ {
+    print "    server: https://'${ENDPOINT}'"
+    print "    insecure-skip-tls-verify: true"
+    next
+  }
+  { print }
+' "$KUBECONFIG_FILE" > "$TEMP_FILE"
+
+# Replace the original kubeconfig with the modified one
+mv "$TEMP_FILE" "$KUBECONFIG_FILE"
+
+echo "Updated kubeconfig file successfully."
+
+sleep 5 # Wait for the cluster to be ready
+sky check kubernetes
+
+set +x
+echo -e "\033[1m===== Kubernetes cluster deployment complete =====\033[0m"
+echo -e "You can now access your k8s cluster with kubectl and skypilot.\n"
+echo -e "• View the list of available GPUs on Kubernetes: \033[1msky show-gpus --cloud kubernetes\033[0m"
+echo -e "• To launch a SkyPilot job running nvidia-smi on this cluster: \033[1msky launch --cloud kubernetes --gpus <GPU> -- nvidia-smi\033[0m"
+
diff --git a/sky/templates/lambda-ray.yml.j2 b/sky/templates/lambda-ray.yml.j2
@@ -89,13 +89,13 @@ setup_commands:
 # Increment the following for catching performance bugs easier:
 #   current num items (num SSH connections): 2
 head_start_ray_commands:
-  - {{ sky_activate_python_env }}; {{ sky_ray_cmd }} stop; RAY_SCHEDULER_EVENTS=0 RAY_DEDUP_LOGS=0 {{ sky_ray_cmd }} start --disable-usage-stats --head --port={{ray_port}} --dashboard-port={{ray_dashboard_port}} --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml {{"--resources='%s'" % custom_resources if custom_resources}} --temp-dir {{ray_temp_dir}} || exit 1;
+  - {{ sky_activate_python_env }}; {{ sky_ray_cmd }} stop; RAY_SCHEDULER_EVENTS=0 RAY_DEDUP_LOGS=0 {{ sky_ray_cmd }} start --disable-usage-stats --head --port={{ray_port}} --min-worker-port 11002 --dashboard-port={{ray_dashboard_port}} --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml {{"--resources='%s'" % custom_resources if custom_resources}} --temp-dir {{ray_temp_dir}} || exit 1;
     which prlimit && for id in $(pgrep -f raylet/raylet); do sudo prlimit --nofile=1048576:1048576 --pid=$id || true; done;
     {{dump_port_command}}; {{ray_head_wait_initialized_command}}
 
 {%- if num_nodes > 1 %}
 worker_start_ray_commands:
-  - {{ sky_activate_python_env }}; {{ sky_ray_cmd }} stop; RAY_SCHEDULER_EVENTS=0 RAY_DEDUP_LOGS=0 {{ sky_ray_cmd }} start --disable-usage-stats --address=$RAY_HEAD_IP:{{ray_port}} --object-manager-port=8076 {{"--resources='%s'" % custom_resources if custom_resources}} --temp-dir {{ray_temp_dir}} || exit 1;
+  - {{ sky_activate_python_env }}; {{ sky_ray_cmd }} stop; RAY_SCHEDULER_EVENTS=0 RAY_DEDUP_LOGS=0 {{ sky_ray_cmd }} start --disable-usage-stats --address=$RAY_HEAD_IP:{{ray_port}} --min-worker-port 11002 --object-manager-port=8076 {{"--resources='%s'" % custom_resources if custom_resources}} --temp-dir {{ray_temp_dir}} || exit 1;
     which prlimit && for id in $(pgrep -f raylet/raylet); do sudo prlimit --nofile=1048576:1048576 --pid=$id || true; done;
 {%- else %}
 worker_start_ray_commands: []