diff --git a/.buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh b/.buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh
index b2e910e1ba8a7..a67fc89d54e60 100644
--- a/.buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh
+++ b/.buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh
@@ -41,6 +41,6 @@ while getopts "m:b:l:f:" OPT; do
 done
 
 lm_eval --model hf \
-  --model_args pretrained=$MODEL,parallelize=True \
-  --tasks gsm8k --num_fewshot $FEWSHOT --limit $LIMIT \
-  --batch_size $BATCH_SIZE
+  --model_args "pretrained=$MODEL,parallelize=True" \
+  --tasks gsm8k --num_fewshot "$FEWSHOT" --limit "$LIMIT" \
+  --batch_size "$BATCH_SIZE"
diff --git a/.buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh b/.buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh
index 4d32b49a4fac3..65be3c5d93b20 100644
--- a/.buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh
+++ b/.buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh
@@ -46,6 +46,6 @@ while getopts "m:b:l:f:t:" OPT; do
 done
 
 lm_eval --model vllm \
-  --model_args pretrained=$MODEL,tensor_parallel_size=$TP_SIZE,distributed_executor_backend="ray",trust_remote_code=true,max_model_len=4096 \
-  --tasks gsm8k --num_fewshot $FEWSHOT --limit $LIMIT \
-  --batch_size $BATCH_SIZE
+  --model_args "pretrained=$MODEL,tensor_parallel_size=$TP_SIZE,distributed_executor_backend=ray,trust_remote_code=true,max_model_len=4096" \
+  --tasks gsm8k --num_fewshot "$FEWSHOT" --limit "$LIMIT" \
+  --batch_size "$BATCH_SIZE"
diff --git a/.buildkite/lm-eval-harness/run-tests.sh b/.buildkite/lm-eval-harness/run-tests.sh
index b4fdde6dab425..26f33b744289a 100644
--- a/.buildkite/lm-eval-harness/run-tests.sh
+++ b/.buildkite/lm-eval-harness/run-tests.sh
@@ -30,7 +30,7 @@ while getopts "c:t:" OPT; do
 done
 
 # Parse list of configs.
-IFS=$'\n' read -d '' -r -a MODEL_CONFIGS < $CONFIG
+IFS=$'\n' read -d '' -r -a MODEL_CONFIGS < "$CONFIG"
 
 for MODEL_CONFIG in "${MODEL_CONFIGS[@]}"
 do
diff --git a/.buildkite/nightly-benchmarks/scripts/launch-server.sh b/.buildkite/nightly-benchmarks/scripts/launch-server.sh
index e9d7d6a8d760a..fb5063db86942 100644
--- a/.buildkite/nightly-benchmarks/scripts/launch-server.sh
+++ b/.buildkite/nightly-benchmarks/scripts/launch-server.sh
@@ -50,31 +50,30 @@ launch_trt_server() {
   git clone https://github.com/triton-inference-server/tensorrtllm_backend.git
   git lfs install
   cd tensorrtllm_backend
-  git checkout $trt_llm_version
-  tensorrtllm_backend_dir=$(pwd)
+  git checkout "$trt_llm_version"
   git submodule update --init --recursive
 
   # build trtllm engine
   cd /tensorrtllm_backend
-  cd ./tensorrt_llm/examples/${model_type}
+  cd "./tensorrt_llm/examples/${model_type}"
   python3 convert_checkpoint.py \
-    --model_dir ${model_path} \
-    --dtype ${model_dtype} \
-    --tp_size ${model_tp_size} \
-    --output_dir ${trt_model_path}
+    --model_dir "${model_path}" \
+    --dtype "${model_dtype}" \
+    --tp_size "${model_tp_size}" \
+    --output_dir "${trt_model_path}"
   trtllm-build \
-    --checkpoint_dir ${trt_model_path} \
+    --checkpoint_dir "${trt_model_path}" \
     --use_fused_mlp \
     --reduce_fusion disable \
     --workers 8 \
-    --gpt_attention_plugin ${model_dtype} \
-    --gemm_plugin ${model_dtype} \
-    --tp_size ${model_tp_size} \
-    --max_batch_size ${max_batch_size} \
-    --max_input_len ${max_input_len} \
-    --max_seq_len ${max_seq_len} \
-    --max_num_tokens ${max_num_tokens} \
-    --output_dir ${trt_engine_path}
+    --gpt_attention_plugin "${model_dtype}" \
+    --gemm_plugin "${model_dtype}" \
+    --tp_size "${model_tp_size}" \
+    --max_batch_size "${max_batch_size}" \
+    --max_input_len "${max_input_len}" \
+    --max_seq_len "${max_seq_len}" \
+    --max_num_tokens "${max_num_tokens}" \
+    --output_dir "${trt_engine_path}"
 
   # handle triton protobuf files and launch triton server
   cd /tensorrtllm_backend
@@ -82,15 +81,15 @@ launch_trt_server() {
   cp -r all_models/inflight_batcher_llm/* triton_model_repo/
   cd triton_model_repo
   rm -rf ./tensorrt_llm/1/*
-  cp -r ${trt_engine_path}/* ./tensorrt_llm/1
+  cp -r "${trt_engine_path}"/* ./tensorrt_llm/1
   python3 ../tools/fill_template.py -i tensorrt_llm/config.pbtxt triton_backend:tensorrtllm,engine_dir:/tensorrtllm_backend/triton_model_repo/tensorrt_llm/1,decoupled_mode:true,batching_strategy:inflight_fused_batching,batch_scheduler_policy:guaranteed_no_evict,exclude_input_in_output:true,triton_max_batch_size:2048,max_queue_delay_microseconds:0,max_beam_width:1,max_queue_size:2048,enable_kv_cache_reuse:false
-  python3 ../tools/fill_template.py -i preprocessing/config.pbtxt triton_max_batch_size:2048,tokenizer_dir:$model_path,preprocessing_instance_count:5
-  python3 ../tools/fill_template.py -i postprocessing/config.pbtxt triton_max_batch_size:2048,tokenizer_dir:$model_path,postprocessing_instance_count:5,skip_special_tokens:false
-  python3 ../tools/fill_template.py -i ensemble/config.pbtxt triton_max_batch_size:$max_batch_size
-  python3 ../tools/fill_template.py -i tensorrt_llm_bls/config.pbtxt triton_max_batch_size:$max_batch_size,decoupled_mode:true,accumulate_tokens:"False",bls_instance_count:1
+  python3 ../tools/fill_template.py -i preprocessing/config.pbtxt "triton_max_batch_size:2048,tokenizer_dir:$model_path,preprocessing_instance_count:5"
+  python3 ../tools/fill_template.py -i postprocessing/config.pbtxt "triton_max_batch_size:2048,tokenizer_dir:$model_path,postprocessing_instance_count:5,skip_special_tokens:false"
+  python3 ../tools/fill_template.py -i ensemble/config.pbtxt triton_max_batch_size:"$max_batch_size"
+  python3 ../tools/fill_template.py -i tensorrt_llm_bls/config.pbtxt "triton_max_batch_size:$max_batch_size,decoupled_mode:true,accumulate_tokens:False,bls_instance_count:1"
   cd /tensorrtllm_backend
   python3 scripts/launch_triton_server.py \
-    --world_size=${model_tp_size} \
+    --world_size="${model_tp_size}" \
     --model_repo=/tensorrtllm_backend/triton_model_repo &
 
 }
@@ -98,10 +97,7 @@ launch_trt_server() {
 launch_tgi_server() {
   model=$(echo "$common_params" | jq -r '.model')
   tp=$(echo "$common_params" | jq -r '.tp')
-  dataset_name=$(echo "$common_params" | jq -r '.dataset_name')
-  dataset_path=$(echo "$common_params" | jq -r '.dataset_path')
   port=$(echo "$common_params" | jq -r '.port')
-  num_prompts=$(echo "$common_params" | jq -r '.num_prompts')
   server_args=$(json2args "$server_params")
 
   if echo "$common_params" | jq -e 'has("fp8")' >/dev/null; then
@@ -129,10 +125,7 @@ launch_tgi_server() {
 launch_lmdeploy_server() {
   model=$(echo "$common_params" | jq -r '.model')
   tp=$(echo "$common_params" | jq -r '.tp')
-  dataset_name=$(echo "$common_params" | jq -r '.dataset_name')
-  dataset_path=$(echo "$common_params" | jq -r '.dataset_path')
   port=$(echo "$common_params" | jq -r '.port')
-  num_prompts=$(echo "$common_params" | jq -r '.num_prompts')
   server_args=$(json2args "$server_params")
 
   server_command="lmdeploy serve api_server $model \
@@ -149,10 +142,7 @@ launch_sglang_server() {
 
   model=$(echo "$common_params" | jq -r '.model')
   tp=$(echo "$common_params" | jq -r '.tp')
-  dataset_name=$(echo "$common_params" | jq -r '.dataset_name')
-  dataset_path=$(echo "$common_params" | jq -r '.dataset_path')
   port=$(echo "$common_params" | jq -r '.port')
-  num_prompts=$(echo "$common_params" | jq -r '.num_prompts')
   server_args=$(json2args "$server_params")
 
   if echo "$common_params" | jq -e 'has("fp8")' >/dev/null; then
@@ -185,10 +175,7 @@ launch_vllm_server() {
 
   model=$(echo "$common_params" | jq -r '.model')
   tp=$(echo "$common_params" | jq -r '.tp')
-  dataset_name=$(echo "$common_params" | jq -r '.dataset_name')
-  dataset_path=$(echo "$common_params" | jq -r '.dataset_path')
   port=$(echo "$common_params" | jq -r '.port')
-  num_prompts=$(echo "$common_params" | jq -r '.num_prompts')
   server_args=$(json2args "$server_params")
 
   if echo "$common_params" | jq -e 'has("fp8")' >/dev/null; then
@@ -217,19 +204,19 @@ launch_vllm_server() {
 
 main() {
 
-  if [[ $CURRENT_LLM_SERVING_ENGINE == "trt" ]]; then
+  if [[ "$CURRENT_LLM_SERVING_ENGINE" == "trt" ]]; then
     launch_trt_server
   fi
 
-  if [[ $CURRENT_LLM_SERVING_ENGINE == "tgi" ]]; then
+  if [[ "$CURRENT_LLM_SERVING_ENGINE" == "tgi" ]]; then
     launch_tgi_server
   fi
 
-  if [[ $CURRENT_LLM_SERVING_ENGINE == "lmdeploy" ]]; then
+  if [[ "$CURRENT_LLM_SERVING_ENGINE" == "lmdeploy" ]]; then
     launch_lmdeploy_server
   fi
 
-  if [[ $CURRENT_LLM_SERVING_ENGINE == "sglang" ]]; then
+  if [[ "$CURRENT_LLM_SERVING_ENGINE" == "sglang" ]]; then
     launch_sglang_server
   fi
 
diff --git a/.buildkite/nightly-benchmarks/scripts/nightly-annotate.sh b/.buildkite/nightly-benchmarks/scripts/nightly-annotate.sh
index c6a1bbdeb7d48..686f70dbece6c 100644
--- a/.buildkite/nightly-benchmarks/scripts/nightly-annotate.sh
+++ b/.buildkite/nightly-benchmarks/scripts/nightly-annotate.sh
@@ -16,10 +16,10 @@ main() {
     fi
 
     # initial annotation
-    description="$VLLM_SOURCE_CODE_LOC/.buildkite/nightly-benchmarks/nightly-descriptions.md"
+    #description="$VLLM_SOURCE_CODE_LOC/.buildkite/nightly-benchmarks/nightly-descriptions.md"
 
     # download results
-    cd $VLLM_SOURCE_CODE_LOC/benchmarks
+    cd "$VLLM_SOURCE_CODE_LOC/benchmarks"
     mkdir -p results/
     /workspace/buildkite-agent artifact download 'results/*nightly_results.json' results/
     ls
@@ -30,15 +30,15 @@ main() {
     /workspace/buildkite-agent artifact upload "results.zip"
 
     # upload benchmarking scripts
-    cd $VLLM_SOURCE_CODE_LOC/
+    cd "$VLLM_SOURCE_CODE_LOC/"
     zip -r nightly-benchmarks.zip .buildkite/ benchmarks/
     /workspace/buildkite-agent artifact upload "nightly-benchmarks.zip"
 
-    cd $VLLM_SOURCE_CODE_LOC/.buildkite/nightly-benchmarks/
+    cd "$VLLM_SOURCE_CODE_LOC/.buildkite/nightly-benchmarks/"
     # upload benchmarking pipeline
     /workspace/buildkite-agent artifact upload "nightly-pipeline.yaml"
 
-    cd $VLLM_SOURCE_CODE_LOC/.buildkite/nightly-benchmarks/
+    cd "$VLLM_SOURCE_CODE_LOC/.buildkite/nightly-benchmarks/"
     /workspace/buildkite-agent annotate --style "success" --context "nightly-benchmarks-results" --append < nightly-annotation.md
     
 
@@ -75,4 +75,4 @@ main() {
     # /workspace/buildkite-agent annotate --style "success" --context "nightly-benchmarks-results" --append < nightly_results.md
 }
 
-main "$@"
\ No newline at end of file
+main "$@"
diff --git a/.buildkite/nightly-benchmarks/scripts/run-nightly-benchmarks.sh b/.buildkite/nightly-benchmarks/scripts/run-nightly-benchmarks.sh
index dd8c15e0700eb..3f38cf5137535 100644
--- a/.buildkite/nightly-benchmarks/scripts/run-nightly-benchmarks.sh
+++ b/.buildkite/nightly-benchmarks/scripts/run-nightly-benchmarks.sh
@@ -12,7 +12,7 @@ check_gpus() {
     echo "Need at least 1 GPU to run benchmarking."
     exit 1
   fi
-  declare -g gpu_type=$(echo $(nvidia-smi --query-gpu=name --format=csv,noheader) | awk '{print $2}')
+  declare -g gpu_type="$(nvidia-smi --query-gpu=name --format=csv,noheader | awk '{print $2}')"
   echo "GPU type is $gpu_type"
 }
 
@@ -102,7 +102,7 @@ kill_gpu_processes() {
   pkill -f text-generation
   pkill -f lmdeploy
 
-  while [ $(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits | head -n 1) -ge 1000 ]; do
+  while [ "$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits | head -n 1)" -ge 1000 ]; do
     sleep 1
   done
 }
@@ -119,8 +119,8 @@ wait_for_server() {
 ensure_installed() {
   # Ensure that the given command is installed by apt-get
   local cmd=$1
-  if ! which $cmd >/dev/null; then
-    apt-get update && apt-get install -y $cmd
+  if ! which "$cmd" >/dev/null; then
+    apt-get update && apt-get install -y "$cmd"
   fi
 }
 
@@ -173,13 +173,11 @@ run_serving_tests() {
       echo "Reuse previous server for test case $test_name"
     else
       kill_gpu_processes
-      bash $VLLM_SOURCE_CODE_LOC/.buildkite/nightly-benchmarks/scripts/launch-server.sh \
+      bash "$VLLM_SOURCE_CODE_LOC/.buildkite/nightly-benchmarks/scripts/launch-server.sh" \
         "$server_params" "$common_params"
     fi
 
-    wait_for_server
-
-    if [ $? -eq 0 ]; then
+    if wait_for_server; then
       echo ""
       echo "$CURRENT_LLM_SERVING_ENGINE server is up and running."
     else
@@ -190,13 +188,13 @@ run_serving_tests() {
 
     # prepare tokenizer
     # this is required for lmdeploy.
-    cd $VLLM_SOURCE_CODE_LOC/benchmarks
+    cd "$VLLM_SOURCE_CODE_LOC/benchmarks"
     rm -rf /tokenizer_cache
     mkdir /tokenizer_cache
     python3 ../.buildkite/nightly-benchmarks/scripts/download-tokenizer.py \
       --model "$model" \
       --cachedir /tokenizer_cache
-    cd $VLLM_SOURCE_CODE_LOC/benchmarks
+    cd "$VLLM_SOURCE_CODE_LOC/benchmarks"
 
 
     # change model name for lmdeploy (it will not follow standard hf name)
@@ -307,11 +305,11 @@ run_serving_tests() {
 prepare_dataset() {
 
   # download sharegpt dataset
-  cd $VLLM_SOURCE_CODE_LOC/benchmarks
+  cd "$VLLM_SOURCE_CODE_LOC/benchmarks"
   wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
 
   # duplicate sonnet by 4x, to allow benchmarking with input length 2048
-  cd $VLLM_SOURCE_CODE_LOC/benchmarks
+  cd "$VLLM_SOURCE_CODE_LOC/benchmarks"
   echo "" > sonnet_4x.txt
   for _ in {1..4}
   do
@@ -339,17 +337,17 @@ main() {
 
   prepare_dataset
 
-  cd $VLLM_SOURCE_CODE_LOC/benchmarks
+  cd "$VLLM_SOURCE_CODE_LOC/benchmarks"
   declare -g RESULTS_FOLDER=results/
   mkdir -p $RESULTS_FOLDER
-  BENCHMARK_ROOT=$VLLM_SOURCE_CODE_LOC/.buildkite/nightly-benchmarks/
+  BENCHMARK_ROOT="$VLLM_SOURCE_CODE_LOC/.buildkite/nightly-benchmarks/"
 
   # run the test
-  run_serving_tests $BENCHMARK_ROOT/tests/nightly-tests.json
+  run_serving_tests "$BENCHMARK_ROOT/tests/nightly-tests.json"
 
   # upload benchmark results to buildkite
   python3 -m pip install tabulate pandas
-  python3 $BENCHMARK_ROOT/scripts/summary-nightly-results.py
+  python3 "$BENCHMARK_ROOT/scripts/summary-nightly-results.py"
   upload_to_buildkite
 
 }
diff --git a/.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh b/.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh
index a0b9a409b758d..d397b05cdff23 100644
--- a/.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh
+++ b/.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh
@@ -17,7 +17,7 @@ check_gpus() {
     echo "Need at least 1 GPU to run benchmarking."
     exit 1
   fi
-  declare -g gpu_type=$(echo $(nvidia-smi --query-gpu=name --format=csv,noheader) | awk '{print $2}')
+  declare -g gpu_type=$(nvidia-smi --query-gpu=name --format=csv,noheader | awk '{print $2}')
   echo "GPU type is $gpu_type"
 }
 
@@ -93,7 +93,7 @@ kill_gpu_processes() {
 
 
   # wait until GPU memory usage smaller than 1GB
-  while [ $(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits | head -n 1) -ge 1000 ]; do
+  while [ "$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits | head -n 1)" -ge 1000 ]; do
     sleep 1
   done
 
@@ -117,7 +117,7 @@ upload_to_buildkite() {
   fi
 
   # Use the determined command to annotate and upload artifacts
-  $BUILDKITE_AGENT_COMMAND annotate --style "info" --context "$BUILDKITE_LABEL-benchmark-results" <$RESULTS_FOLDER/benchmark_results.md
+  $BUILDKITE_AGENT_COMMAND annotate --style "info" --context "$BUILDKITE_LABEL-benchmark-results" < "$RESULTS_FOLDER/benchmark_results.md"
   $BUILDKITE_AGENT_COMMAND artifact upload "$RESULTS_FOLDER/*"
 }
 
@@ -150,7 +150,7 @@ run_latency_tests() {
     # check if there is enough GPU to run the test
     tp=$(echo "$latency_params" | jq -r '.tensor_parallel_size')
     if [[ $gpu_count -lt $tp ]]; then
-      echo "Required tensor-parallel-size $tp but only $gpu_count GPU found. Skip testcase $testname."
+      echo "Required tensor-parallel-size $tp but only $gpu_count GPU found. Skip testcase $test_name."
       continue
     fi
 
@@ -206,9 +206,9 @@ run_throughput_tests() {
     throughput_args=$(json2args "$throughput_params")
 
     # check if there is enough GPU to run the test
-    tp=$(echo $throughput_params | jq -r '.tensor_parallel_size')
+    tp=$(echo "$throughput_params" | jq -r '.tensor_parallel_size')
     if [[ $gpu_count -lt $tp ]]; then
-      echo "Required tensor-parallel-size $tp but only $gpu_count GPU found. Skip testcase $testname."
+      echo "Required tensor-parallel-size $tp but only $gpu_count GPU found. Skip testcase $test_name."
       continue
     fi
 
@@ -270,7 +270,7 @@ run_serving_tests() {
     # check if there is enough GPU to run the test
     tp=$(echo "$server_params" | jq -r '.tensor_parallel_size')
     if [[ $gpu_count -lt $tp ]]; then
-      echo "Required tensor-parallel-size $tp but only $gpu_count GPU found. Skip testcase $testname."
+      echo "Required tensor-parallel-size $tp but only $gpu_count GPU found. Skip testcase $test_name."
       continue
     fi
 
@@ -278,7 +278,7 @@ run_serving_tests() {
     server_model=$(echo "$server_params" | jq -r '.model')
     client_model=$(echo "$client_params" | jq -r '.model')
     if [[ $server_model != "$client_model" ]]; then
-      echo "Server model and client model must be the same. Skip testcase $testname."
+      echo "Server model and client model must be the same. Skip testcase $test_name."
       continue
     fi
 
@@ -293,8 +293,7 @@ run_serving_tests() {
     server_pid=$!
 
     # wait until the server is alive
-    wait_for_server
-    if [ $? -eq 0 ]; then
+    if wait_for_server; then
       echo ""
       echo "vllm server is up and running."
     else
diff --git a/.buildkite/nightly-benchmarks/scripts/wait-for-image.sh b/.buildkite/nightly-benchmarks/scripts/wait-for-image.sh
index f16862907def1..19f7160e68a4d 100644
--- a/.buildkite/nightly-benchmarks/scripts/wait-for-image.sh
+++ b/.buildkite/nightly-benchmarks/scripts/wait-for-image.sh
@@ -6,7 +6,7 @@ TIMEOUT_SECONDS=10
 
 retries=0
 while [ $retries -lt 1000 ]; do
-    if [ $(curl -s --max-time $TIMEOUT_SECONDS -L -H "Authorization: Bearer $TOKEN" -o /dev/null -w "%{http_code}" $URL) -eq 200 ]; then
+    if [ "$(curl -s --max-time "$TIMEOUT_SECONDS" -L -H "Authorization: Bearer $TOKEN" -o /dev/null -w "%{http_code}" "$URL")" -eq 200 ]; then
         exit 0
     fi
 
@@ -16,4 +16,4 @@ while [ $retries -lt 1000 ]; do
     sleep 5
 done
 
-exit 1
\ No newline at end of file
+exit 1
diff --git a/.buildkite/run-amd-test.sh b/.buildkite/run-amd-test.sh
index 860272e71fd84..902e162720b89 100755
--- a/.buildkite/run-amd-test.sh
+++ b/.buildkite/run-amd-test.sh
@@ -1,3 +1,5 @@
+#!/bin/bash
+
 # This script runs test inside the corresponding ROCm docker container.
 set -o pipefail
 
@@ -57,17 +59,17 @@ done
 echo "--- Pulling container" 
 image_name="rocm/vllm-ci:${BUILDKITE_COMMIT}"
 container_name="rocm_${BUILDKITE_COMMIT}_$(tr -dc A-Za-z0-9 < /dev/urandom | head -c 10; echo)"
-docker pull ${image_name}
+docker pull "${image_name}"
 
 remove_docker_container() {
-   docker rm -f ${container_name} || docker image rm -f ${image_name} || true
+   docker rm -f "${container_name}" || docker image rm -f "${image_name}" || true
 }
 trap remove_docker_container EXIT
 
 echo "--- Running container"
 
 HF_CACHE="$(realpath ~)/huggingface"
-mkdir -p ${HF_CACHE}
+mkdir -p "${HF_CACHE}"
 HF_MOUNT="/root/.cache/huggingface"
 
 commands=$@
@@ -118,25 +120,25 @@ if [[ $commands == *"--shard-id="* ]]; then
         --network host \
         --shm-size=16gb \
         --rm \
-        -e HIP_VISIBLE_DEVICES=${GPU} \
+        -e HIP_VISIBLE_DEVICES="${GPU}" \
         -e HF_TOKEN \
-        -v ${HF_CACHE}:${HF_MOUNT} \
-        -e HF_HOME=${HF_MOUNT} \
-        --name ${container_name}_${GPU}  \
-        ${image_name} \
+        -v "${HF_CACHE}:${HF_MOUNT}" \
+        -e "HF_HOME=${HF_MOUNT}" \
+        --name "${container_name}_${GPU}" \
+        "${image_name}" \
         /bin/bash -c "${commands_gpu}" \
         |& while read -r line; do echo ">>Shard $GPU: $line"; done &
     PIDS+=($!)
   done
   #wait for all processes to finish and collect exit codes
-  for pid in ${PIDS[@]}; do
-    wait ${pid}
+  for pid in "${PIDS[@]}"; do
+    wait "${pid}"
     STATUS+=($?)
   done
-  for st in ${STATUS[@]}; do
+  for st in "${STATUS[@]}"; do
     if [[ ${st} -ne 0 ]]; then
       echo "One of the processes failed with $st"
-      exit ${st}
+      exit "${st}"
     fi
   done
 else
@@ -147,9 +149,9 @@ else
           --rm \
           -e HIP_VISIBLE_DEVICES=0 \
           -e HF_TOKEN \
-          -v ${HF_CACHE}:${HF_MOUNT} \
-          -e HF_HOME=${HF_MOUNT} \
-          --name ${container_name} \
-          ${image_name} \
+          -v "${HF_CACHE}:${HF_MOUNT}" \
+          -e "HF_HOME=${HF_MOUNT}" \
+          --name "${container_name}" \
+          "${image_name}" \
           /bin/bash -c "${commands}"
 fi
diff --git a/.buildkite/run-benchmarks.sh b/.buildkite/run-benchmarks.sh
index cbf6dda677c53..1641c1faa9d6a 100644
--- a/.buildkite/run-benchmarks.sh
+++ b/.buildkite/run-benchmarks.sh
@@ -1,3 +1,5 @@
+#!/bin/bash
+
 # This script is run by buildkite to run the benchmarks and upload the results to buildkite
 
 set -ex
diff --git a/.buildkite/run-cpu-test-ppc64le.sh b/.buildkite/run-cpu-test-ppc64le.sh
index 06c48f8ed5613..9fbacaab0ec47 100755
--- a/.buildkite/run-cpu-test-ppc64le.sh
+++ b/.buildkite/run-cpu-test-ppc64le.sh
@@ -1,3 +1,5 @@
+#!/bin/bash
+
 # This script build the CPU docker image and run the offline inference inside the container.
 # It serves a sanity check for compilation and basic model usage.
 set -ex
@@ -13,7 +15,7 @@ docker build -t cpu-test -f Dockerfile.ppc64le .
 # Run the image, setting --shm-size=4g for tensor parallel.
 source /etc/environment
 #docker run -itd --entrypoint /bin/bash -v ~/.cache/huggingface:/root/.cache/huggingface --privileged=true --network host -e HF_TOKEN --env VLLM_CPU_KVCACHE_SPACE=4 --shm-size=4g --name cpu-test cpu-test
-docker run -itd --entrypoint /bin/bash -v ~/.cache/huggingface:/root/.cache/huggingface --privileged=true --network host -e HF_TOKEN=$HF_TOKEN --name cpu-test cpu-test
+docker run -itd --entrypoint /bin/bash -v ~/.cache/huggingface:/root/.cache/huggingface --privileged=true --network host -e HF_TOKEN="$HF_TOKEN" --name cpu-test cpu-test
 
 # Run basic model test
 docker exec cpu-test bash -c "
diff --git a/.buildkite/run-cpu-test.sh b/.buildkite/run-cpu-test.sh
index 2dbeee8562971..064d7c77ab570 100644
--- a/.buildkite/run-cpu-test.sh
+++ b/.buildkite/run-cpu-test.sh
@@ -1,3 +1,5 @@
+#!/bin/bash
+
 # This script build the CPU docker image and run the offline inference inside the container.
 # It serves a sanity check for compilation and basic model usage.
 set -ex
diff --git a/.buildkite/run-multi-node-test.sh b/.buildkite/run-multi-node-test.sh
index 7ac4dcc4c786d..530bf90a855fe 100755
--- a/.buildkite/run-multi-node-test.sh
+++ b/.buildkite/run-multi-node-test.sh
@@ -14,7 +14,7 @@ DOCKER_IMAGE=$4
 
 shift 4
 COMMANDS=("$@")
-if [ ${#COMMANDS[@]} -ne $NUM_NODES ]; then
+if [ ${#COMMANDS[@]} -ne "$NUM_NODES" ]; then
     echo "The number of commands must be equal to the number of nodes."
     echo "Number of nodes: $NUM_NODES"
     echo "Number of commands: ${#COMMANDS[@]}"
@@ -23,7 +23,7 @@ fi
 
 echo "List of commands"
 for command in "${COMMANDS[@]}"; do
-    echo $command
+    echo "$command"
 done
 
 start_network() {
@@ -36,7 +36,7 @@ start_nodes() {
         for node_gpu in $(seq 0 $(($NUM_GPUS - 1))); do
             DEVICE_NUM=$(($node * $NUM_GPUS + $node_gpu))
             GPU_DEVICES+=$(($DEVICE_NUM))
-            if [ $node_gpu -lt $(($NUM_GPUS - 1)) ]; then
+            if [ "$node_gpu" -lt $(($NUM_GPUS - 1)) ]; then
                 GPU_DEVICES+=','
             fi
         done
@@ -49,17 +49,20 @@ start_nodes() {
         # 3. map the huggingface cache directory to the container
         # 3. assign ip addresses to the containers (head node: 192.168.10.10, worker nodes:
         #    starting from 192.168.10.11)
-        docker run -d --gpus "$GPU_DEVICES" --shm-size=10.24gb -e HF_TOKEN -v ~/.cache/huggingface:/root/.cache/huggingface --name node$node --network docker-net --ip 192.168.10.$((10 + $node)) --rm $DOCKER_IMAGE /bin/bash -c "tail -f /dev/null"
+        docker run -d --gpus "$GPU_DEVICES" --shm-size=10.24gb -e HF_TOKEN \
+            -v ~/.cache/huggingface:/root/.cache/huggingface --name "node$node" \
+            --network docker-net --ip 192.168.10.$((10 + $node)) --rm "$DOCKER_IMAGE" \
+            /bin/bash -c "tail -f /dev/null"
 
         # organize containers into a ray cluster
-        if [ $node -eq 0 ]; then
+        if [ "$node" -eq 0 ]; then
             # start the ray head node
-            docker exec -d node$node /bin/bash -c "ray start --head --port=6379 --block"
+            docker exec -d "node$node" /bin/bash -c "ray start --head --port=6379 --block"
             # wait for the head node to be ready
             sleep 10
         else
             # start the ray worker nodes, and connect them to the head node
-            docker exec -d node$node /bin/bash -c "ray start --address=192.168.10.10:6379 --block"
+            docker exec -d "node$node" /bin/bash -c "ray start --address=192.168.10.10:6379 --block"
         fi
     done
 
@@ -79,22 +82,22 @@ run_nodes() {
         for node_gpu in $(seq 0 $(($NUM_GPUS - 1))); do
             DEVICE_NUM=$(($node * $NUM_GPUS + $node_gpu))
             GPU_DEVICES+=$(($DEVICE_NUM))
-            if [ $node_gpu -lt $(($NUM_GPUS - 1)) ]; then
+            if [ "$node_gpu" -lt $(($NUM_GPUS - 1)) ]; then
                 GPU_DEVICES+=','
             fi
         done
         GPU_DEVICES+='"'
         echo "Running node$node with GPU devices: $GPU_DEVICES"
-        if [ $node -ne 0 ]; then
-            docker exec -d node$node /bin/bash -c "cd $WORKING_DIR ; ${COMMANDS[$node]}"
+        if [ "$node" -ne 0 ]; then
+            docker exec -d "node$node" /bin/bash -c "cd $WORKING_DIR ; ${COMMANDS[$node]}"
         else
-            docker exec node$node /bin/bash -c "cd $WORKING_DIR ; ${COMMANDS[$node]}"
+            docker exec "node$node" /bin/bash -c "cd $WORKING_DIR ; ${COMMANDS[$node]}"
         fi
     done
 }
 cleanup() {
     for node in $(seq 0 $(($NUM_NODES-1))); do
-        docker stop node$node
+        docker stop "node$node"
     done
     docker network rm docker-net
 }
diff --git a/.buildkite/run-neuron-test.sh b/.buildkite/run-neuron-test.sh
index 252c0f7fecd12..9259391aaed49 100644
--- a/.buildkite/run-neuron-test.sh
+++ b/.buildkite/run-neuron-test.sh
@@ -1,3 +1,5 @@
+#!/bin/bash
+
 # This script build the Neuron docker image and run the API server inside the container.
 # It serves a sanity check for compilation and basic model usage.
 set -e
@@ -12,10 +14,10 @@ if [ -f /tmp/neuron-docker-build-timestamp ]; then
     current_time=$(date +%s)
     if [ $((current_time - last_build)) -gt 86400 ]; then
         docker system prune -f
-        echo $current_time > /tmp/neuron-docker-build-timestamp
+        echo "$current_time" > /tmp/neuron-docker-build-timestamp
     fi
 else
-    echo $(date +%s) > /tmp/neuron-docker-build-timestamp
+    date "+%s" > /tmp/neuron-docker-build-timestamp
 fi
 
 docker build -t neuron -f Dockerfile.neuron .
@@ -34,7 +36,7 @@ wait_for_server_to_start() {
     timeout=300
     counter=0
 
-    while [ "$(curl -s -o /dev/null -w ''%{http_code}'' localhost:8000/health)" != "200" ]; do
+    while [ "$(curl -s -o /dev/null -w '%{http_code}' localhost:8000/health)" != "200" ]; do
         sleep 1
         counter=$((counter + 1))
         if [ $counter -ge $timeout ]; then
diff --git a/.buildkite/run-openvino-test.sh b/.buildkite/run-openvino-test.sh
index 35ad5c0ddde77..6b12f424fd828 100755
--- a/.buildkite/run-openvino-test.sh
+++ b/.buildkite/run-openvino-test.sh
@@ -1,3 +1,5 @@
+#!/bin/bash
+
 # This script build the OpenVINO docker image and run the offline inference inside the container.
 # It serves a sanity check for compilation and basic model usage.
 set -ex
diff --git a/.buildkite/run-tpu-test.sh b/.buildkite/run-tpu-test.sh
index 988d5aef5fb8c..770dad6ffa3a1 100644
--- a/.buildkite/run-tpu-test.sh
+++ b/.buildkite/run-tpu-test.sh
@@ -1,3 +1,5 @@
+#!/bin/bash
+
 set -e
 
 # Build the docker image.
@@ -12,4 +14,4 @@ remove_docker_container
 # For HF_TOKEN.
 source /etc/environment
 # Run a simple end-to-end example.
-docker run --privileged --net host --shm-size=16G -it -e HF_TOKEN=$HF_TOKEN --name tpu-test vllm-tpu /bin/bash -c "python3 -m pip install git+https://github.com/thuml/depyf.git && python3 -m pip install pytest && python3 -m pip install lm_eval[api]==0.4.4 && pytest -v -s /workspace/vllm/tests/entrypoints/openai/test_accuracy.py && pytest -v -s /workspace/vllm/tests/tpu/test_custom_dispatcher.py && python3 /workspace/vllm/tests/tpu/test_compilation.py && python3 /workspace/vllm/examples/offline_inference_tpu.py"
+docker run --privileged --net host --shm-size=16G -it -e "HF_TOKEN=$HF_TOKEN" --name tpu-test vllm-tpu /bin/bash -c "python3 -m pip install git+https://github.com/thuml/depyf.git && python3 -m pip install pytest && python3 -m pip install lm_eval[api]==0.4.4 && pytest -v -s /workspace/vllm/tests/entrypoints/openai/test_accuracy.py && pytest -v -s /workspace/vllm/tests/tpu/test_custom_dispatcher.py && python3 /workspace/vllm/tests/tpu/test_compilation.py && python3 /workspace/vllm/examples/offline_inference_tpu.py"
diff --git a/.buildkite/run-xpu-test.sh b/.buildkite/run-xpu-test.sh
index 6ffa66d5ef3d6..faeac8e2ded36 100644
--- a/.buildkite/run-xpu-test.sh
+++ b/.buildkite/run-xpu-test.sh
@@ -1,3 +1,5 @@
+#!/bin/bash
+
 # This script build the CPU docker image and run the offline inference inside the container.
 # It serves a sanity check for compilation and basic model usage.
 set -ex
diff --git a/.github/workflows/scripts/cuda-install.sh b/.github/workflows/scripts/cuda-install.sh
index 312c6e82f33a3..3d0b7a1fe0402 100644
--- a/.github/workflows/scripts/cuda-install.sh
+++ b/.github/workflows/scripts/cuda-install.sh
@@ -1,16 +1,16 @@
 #!/bin/bash
 
 # Replace '.' with '-' ex: 11.8 -> 11-8
-cuda_version=$(echo $1 | tr "." "-")
+cuda_version=$(echo "$1" | tr "." "-")
 # Removes '-' and '.' ex: ubuntu-20.04 -> ubuntu2004
-OS=$(echo $2 | tr -d ".\-")
+OS=$(echo "$2" | tr -d ".\-")
 
 # Installs CUDA
-wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-keyring_1.1-1_all.deb
+wget -nv "https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-keyring_1.1-1_all.deb"
 sudo dpkg -i cuda-keyring_1.1-1_all.deb
 rm cuda-keyring_1.1-1_all.deb
 sudo apt -qq update
-sudo apt -y install cuda-${cuda_version} cuda-nvcc-${cuda_version} cuda-libraries-dev-${cuda_version}
+sudo apt -y install "cuda-${cuda_version}" "cuda-nvcc-${cuda_version}" "cuda-libraries-dev-${cuda_version}"
 sudo apt clean
 
 # Test nvcc
diff --git a/.github/workflows/scripts/pytorch-install.sh b/.github/workflows/scripts/pytorch-install.sh
index dfc1851d7692c..e3cda7dad2d17 100644
--- a/.github/workflows/scripts/pytorch-install.sh
+++ b/.github/workflows/scripts/pytorch-install.sh
@@ -6,7 +6,7 @@ cuda_version=$3
 
 # Install torch
 $python_executable -m pip install numpy pyyaml scipy ipython mkl mkl-include ninja cython typing pandas typing-extensions dataclasses setuptools && conda clean -ya
-$python_executable -m pip install torch==${pytorch_version}+cu${cuda_version//./} --extra-index-url https://download.pytorch.org/whl/cu${cuda_version//./}
+$python_executable -m pip install torch=="${pytorch_version}+cu${cuda_version//./}" --extra-index-url "https://download.pytorch.org/whl/cu${cuda_version//./}"
 
 # Print version information
 $python_executable --version
diff --git a/.github/workflows/shellcheck.yml b/.github/workflows/shellcheck.yml
new file mode 100644
index 0000000000000..ac43b20c31390
--- /dev/null
+++ b/.github/workflows/shellcheck.yml
@@ -0,0 +1,37 @@
+name: Lint shell scripts
+on:
+  push:
+    branches:
+      - "main"
+    paths:
+      - '**/*.sh'
+      - '.github/workflows/shellcheck.yml'
+  pull_request:
+    branches:
+      - "main"
+    paths:
+      - '**/*.sh'
+      - '.github/workflows/shellcheck.yml'
+
+env:
+  LC_ALL: en_US.UTF-8
+
+defaults:
+  run:
+    shell: bash
+
+permissions:
+  contents: read
+
+jobs:
+  shellcheck:
+    runs-on: ubuntu-latest
+    steps:
+      - name: "Checkout"
+        uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
+        with:
+          fetch-depth: 0
+
+      - name: "Check shell scripts"
+        run: |
+          tools/shellcheck.sh
diff --git a/.gitignore b/.gitignore
index 1ea6e3419db2a..ceef6a5fba456 100644
--- a/.gitignore
+++ b/.gitignore
@@ -202,3 +202,4 @@ benchmarks/*.json
 
 # Linting
 actionlint
+shellcheck*/
diff --git a/.shellcheckrc b/.shellcheckrc
new file mode 100644
index 0000000000000..f3b6eedf8d907
--- /dev/null
+++ b/.shellcheckrc
@@ -0,0 +1,9 @@
+# rules currently disabled:
+#
+#   SC1091 (info): Not following: <sourced file> was not specified as input (see shellcheck -x)
+#   SC2004 (style): $/${} is unnecessary on arithmetic variables.
+#   SC2129 (style): Consider using { cmd1; cmd2; } >> file instead of individual redirects.
+#   SC2155 (warning): Declare and assign separately to avoid masking return values.
+#   SC2164 (warning): Use 'cd ... || exit' or 'cd ... || return' in case cd fails.
+#
+disable=SC1091,SC2004,SC2129,SC2155,SC2164
diff --git a/benchmarks/launch_tgi_server.sh b/benchmarks/launch_tgi_server.sh
index 8c5cd454fbbee..ba7383d88dc49 100755
--- a/benchmarks/launch_tgi_server.sh
+++ b/benchmarks/launch_tgi_server.sh
@@ -4,13 +4,13 @@ PORT=8000
 MODEL=$1
 TOKENS=$2
 
-docker run -e HF_TOKEN=$HF_TOKEN --gpus all --shm-size 1g -p $PORT:80 \
-           -v $PWD/data:/data \
+docker run -e "HF_TOKEN=$HF_TOKEN" --gpus all --shm-size 1g -p $PORT:80 \
+           -v "$PWD/data:/data" \
            ghcr.io/huggingface/text-generation-inference:2.2.0 \
-           --model-id $MODEL \
+           --model-id "$MODEL" \
            --sharded false  \
            --max-input-length 1024 \
            --max-total-tokens 2048 \
            --max-best-of 5 \
            --max-concurrent-requests 5000 \
-           --max-batch-total-tokens $TOKENS
+           --max-batch-total-tokens "$TOKENS"
diff --git a/examples/run_cluster.sh b/examples/run_cluster.sh
index 8e4aa59e1766d..7b4b40b4b7e23 100644
--- a/examples/run_cluster.sh
+++ b/examples/run_cluster.sh
@@ -14,7 +14,7 @@ PATH_TO_HF_HOME="$4"
 shift 4
 
 # Additional arguments are passed directly to the Docker command
-ADDITIONAL_ARGS="$@"
+ADDITIONAL_ARGS=("$@")
 
 # Validate node type
 if [ "${NODE_TYPE}" != "--head" ] && [ "${NODE_TYPE}" != "--worker" ]; then
@@ -45,5 +45,5 @@ docker run \
     --shm-size 10.24g \
     --gpus all \
     -v "${PATH_TO_HF_HOME}:/root/.cache/huggingface" \
-    ${ADDITIONAL_ARGS} \
+    "${ADDITIONAL_ARGS[@]}" \
     "${DOCKER_IMAGE}" -c "${RAY_START_CMD}"
diff --git a/format.sh b/format.sh
index be6ee0ce46dcb..d06ee62351a21 100755
--- a/format.sh
+++ b/format.sh
@@ -44,14 +44,14 @@ CLANGFORMAT_VERSION=$(clang-format --version | awk '{print $3}')
 
 # # params: tool name, tool version, required version
 tool_version_check() {
-    if [[ $2 != $3 ]]; then
+    if [[ "$2" != "$3" ]]; then
         echo "❓❓Wrong $1 version installed: $3 is required, not $2."
         exit 1
     fi
 }
 
-tool_version_check "yapf" $YAPF_VERSION "$(grep yapf requirements-lint.txt | cut -d'=' -f3)"
-tool_version_check "ruff" $RUFF_VERSION "$(grep "ruff==" requirements-lint.txt | cut -d'=' -f3)"
+tool_version_check "yapf" "$YAPF_VERSION" "$(grep yapf requirements-lint.txt | cut -d'=' -f3)"
+tool_version_check "ruff" "$RUFF_VERSION" "$(grep "ruff==" requirements-lint.txt | cut -d'=' -f3)"
 tool_version_check "mypy" "$MYPY_VERSION" "$(grep mypy requirements-lint.txt | cut -d'=' -f3)"
 tool_version_check "isort" "$ISORT_VERSION" "$(grep isort requirements-lint.txt | cut -d'=' -f3)"
 tool_version_check "codespell" "$CODESPELL_VERSION" "$(grep codespell requirements-lint.txt | cut -d'=' -f3)"
@@ -294,6 +294,10 @@ echo 'vLLM actionlint:'
 tools/actionlint.sh -color
 echo 'vLLM actionlint: Done'
 
+echo 'vLLM shellcheck:'
+tools/shellcheck.sh
+echo 'vLLM shellcheck: Done'
+
 if ! git diff --quiet &>/dev/null; then
     echo 
     echo "🔍🔍There are files changed by the format checker or by you that are not added and committed:"
diff --git a/tests/weight_loading/run_model_weight_loading_test.sh b/tests/weight_loading/run_model_weight_loading_test.sh
index e80c1d6c5849c..a4d0c44c22b51 100755
--- a/tests/weight_loading/run_model_weight_loading_test.sh
+++ b/tests/weight_loading/run_model_weight_loading_test.sh
@@ -14,7 +14,7 @@ while getopts "c:" OPT; do
 done
 
 
-IFS=$'\n' read -d '' -r -a MODEL_CONFIGS < $CONFIG
+IFS=$'\n' read -d '' -r -a MODEL_CONFIGS < "$CONFIG"
 
 for MODEL_CONFIG in "${MODEL_CONFIGS[@]}"
 do
diff --git a/tools/mypy.sh b/tools/mypy.sh
index 7e8f7d402cdd5..e984e739d70cf 100755
--- a/tools/mypy.sh
+++ b/tools/mypy.sh
@@ -3,13 +3,13 @@
 CI=${1:-0}
 PYTHON_VERSION=${2:-3.9}
 
-if [ $CI -eq 1 ]; then
+if [ "$CI" -eq 1 ]; then
     set -e
 fi
 
 run_mypy() {
     echo "Running mypy on $1"
-    if [ $CI -eq 1 ] && [ -z "$1" ]; then
+    if [ "$CI" -eq 1 ] && [ -z "$1" ]; then
         mypy --python-version "${PYTHON_VERSION}" "$@"
         return
     fi
diff --git a/tools/shellcheck.sh b/tools/shellcheck.sh
new file mode 100755
index 0000000000000..e850742a07900
--- /dev/null
+++ b/tools/shellcheck.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+scversion="stable"
+
+if [ -d "shellcheck-${scversion}" ]; then
+    export PATH="$PATH:$(pwd)/shellcheck-${scversion}"
+fi
+
+if ! [ -x "$(command -v shellcheck)" ]; then
+    if [ "$(uname -s)" != "Linux" ] || [ "$(uname -m)" != "x86_64" ]; then
+        echo "Please install shellcheck: https://github.com/koalaman/shellcheck?tab=readme-ov-file#installing"
+        exit 1
+    fi
+
+    # automatic local install if linux x86_64
+    wget -qO- "https://github.com/koalaman/shellcheck/releases/download/${scversion?}/shellcheck-${scversion?}.linux.x86_64.tar.xz" | tar -xJv
+    export PATH="$PATH:$(pwd)/shellcheck-${scversion}"
+fi
+
+# TODO - fix warnings in .buildkite/run-amd-test.sh
+find . -name "*.sh" -not -path "./.deps/*" -not -path "./.buildkite/run-amd-test.sh" -exec shellcheck {} +