-
Notifications
You must be signed in to change notification settings - Fork 588
/
Copy pathdeepseek-r1-671B.yaml
39 lines (33 loc) · 1.14 KB
/
deepseek-r1-671B.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
name: deepseek-r1
resources:
accelerators: {H200:8, H100:8, A100-80GB:8}
disk_size: 1024 # Large disk for model weights
disk_tier: best
ports: 30000
any_of:
- use_spot: true
- use_spot: false
num_nodes: 2 # Specify number of nodes to launch
setup: |
# Install sglang with all dependencies using uv
uv pip install "sglang[all]==0.4.2.post4" --find-links https://flashinfer.ai/whl/cu124/torch2.5/flashinfer
# Set up shared memory for better performance
sudo bash -c "echo 'vm.max_map_count=655300' >> /etc/sysctl.conf"
sudo sysctl -p
run: |
# Launch the server with appropriate configuration
MASTER_ADDR=$(echo "$SKYPILOT_NODE_IPS" | head -n1)
# TP should be number of GPUs per node times number of nodes
TP=$(($SKYPILOT_NUM_GPUS_PER_NODE * $SKYPILOT_NUM_NODES))
python -m sglang.launch_server \
--model deepseek-ai/DeepSeek-R1 \
--tp $TP \
--dist-init-addr ${MASTER_ADDR}:5000 \
--nnodes ${SKYPILOT_NUM_NODES} \
--node-rank ${SKYPILOT_NODE_RANK} \
--trust-remote-code \
--enable-dp-attention \
--enable-torch-compile \
--torch-compile-max-bs 8 \
--host 0.0.0.0 \
--port 30000