forked from SNU-ARC/any-precision-llm
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpmpd_bench_graph.sh
83 lines (36 loc) · 2.37 KB
/
pmpd_bench_graph.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
MODEL_PATH=checkpoints/mtgv/MobileLLaMA-1.4B-Chat
CUDA_VISIBLE_DEVICES=0 python bench_graph.py --model_path ${MODEL_PATH} --mode fp --use_cuda_graph
MODEL_PATH=cache/packed/anyprec-MobileLLaMA-1.4B-Chat-w4_orig2-gc1-c4_s100_blk512
WBIT=2
CUDA_VISIBLE_DEVICES=0 python bench_graph.py --model_path ${MODEL_PATH} --mode quant --wbit ${WBIT} --use_cuda_graph
WBIT=3
CUDA_VISIBLE_DEVICES=0 python bench_graph.py --model_path ${MODEL_PATH} --mode quant --wbit ${WBIT} --use_cuda_graph
WBIT=4
CUDA_VISIBLE_DEVICES=0 python bench_graph.py --model_path ${MODEL_PATH} --mode quant --wbit ${WBIT} --use_cuda_graph
MODEL_PATH=checkpoints/microsoft/phi-1_5
CUDA_VISIBLE_DEVICES=0 python bench_graph.py --model_path ${MODEL_PATH} --mode fp --use_cuda_graph
MODEL_PATH=cache/packed/anyprec-phi-1_5-w4_orig2-gc1-c4_s100_blk512
WBIT=2
CUDA_VISIBLE_DEVICES=0 python bench_graph.py --model_path ${MODEL_PATH} --mode quant --wbit ${WBIT} --use_cuda_graph
WBIT=3
CUDA_VISIBLE_DEVICES=0 python bench_graph.py --model_path ${MODEL_PATH} --mode quant --wbit ${WBIT} --use_cuda_graph
WBIT=4
CUDA_VISIBLE_DEVICES=0 python bench_graph.py --model_path ${MODEL_PATH} --mode quant --wbit ${WBIT} --use_cuda_graph
MODEL_PATH=checkpoints/stabilityai/stablelm-zephyr-3b
CUDA_VISIBLE_DEVICES=0 python bench_graph.py --model_path ${MODEL_PATH} --mode fp --use_cuda_graph
MODEL_PATH=cache/packed/anyprec-stablelm-zephyr-3b-w4_orig2-gc1-c4_s100_blk512
WBIT=2
CUDA_VISIBLE_DEVICES=0 python bench_graph.py --model_path ${MODEL_PATH} --mode quant --wbit ${WBIT} --use_cuda_graph
WBIT=3
CUDA_VISIBLE_DEVICES=0 python bench_graph.py --model_path ${MODEL_PATH} --mode quant --wbit ${WBIT} --use_cuda_graph
WBIT=4
CUDA_VISIBLE_DEVICES=0 python bench_graph.py --model_path ${MODEL_PATH} --mode quant --wbit ${WBIT} --use_cuda_graph
MODEL_PATH=checkpoints/lmsys/vicuna-7b-v1.5
CUDA_VISIBLE_DEVICES=0 python bench_graph.py --model_path ${MODEL_PATH} --mode fp --use_cuda_graph
MODEL_PATH=cache/packed/anyprec-vicuna-7b-v1.5-w4_orig2-gc1-c4_s100_blk512
WBIT=2
CUDA_VISIBLE_DEVICES=0 python bench_graph.py --model_path ${MODEL_PATH} --mode quant --wbit ${WBIT} --use_cuda_graph
WBIT=3
CUDA_VISIBLE_DEVICES=0 python bench_graph.py --model_path ${MODEL_PATH} --mode quant --wbit ${WBIT} --use_cuda_graph
WBIT=4
CUDA_VISIBLE_DEVICES=0 python bench_graph.py --model_path ${MODEL_PATH} --mode quant --wbit ${WBIT} --use_cuda_graph