-
Notifications
You must be signed in to change notification settings - Fork 5
/
valohai.yaml
100 lines (97 loc) · 3.05 KB
/
valohai.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
- step:
name: data-preprocess
image: valohai/llm-toolkit:0.2-gpu
environment: trial2023-g4dn-xlarge
command:
- pip install -r requirements-gpu.txt
- python data-preprocess.py {parameters}
parameters:
- name: model_id
type: string
default: 'mistralai/Mistral-7B-v0.1'
- name: max_tokens
type: integer
default: 512
inputs:
- name: dataset
default:
- s3://dd-sample-bucket/mistral/gem-viggo-dataset/test.csv
- s3://dd-sample-bucket/mistral/gem-viggo-dataset/train.csv
- s3://dd-sample-bucket/mistral/gem-viggo-dataset/validation.csv
- step:
name: finetune
image: valohai/llm-toolkit:0.2-gpu
environment: trial2023-g4dn-xlarge
command:
- pip install -r requirements-gpu.txt
- python finetune-mistral.py {parameters}
parameters:
- name: model_id
type: string
default: "mistralai/Mistral-7B-v0.1"
- name: max_tokens
type: integer
default: 512
- name: output_dir
type: string
default: "finetuned_mistral"
- name: warmup_steps
type: integer
default: 5
- name: max_steps
type: integer
default: 30
- name: learning_rate
type: float
default: 2.5e-5
- name: do_eval
type: flag
default: False
inputs:
- name: train_data
default: dataset://viggo/dev_train
- name: test_data
default: dataset://viggo/dev_test
- name: val_data
default: dataset://viggo/dev_val
- step:
name: inference
image: valohai/llm-toolkit:0.2-gpu
environment: trial2023-g4dn-xlarge
command:
- pip install -r requirements-gpu.txt
- python inference-mistral.py {parameters}
parameters:
- name: model_id
type: string
default: "mistralai/Mistral-7B-v0.1"
- name: max_tokens
type: integer
default: 512
- name: sentence
type: string
default: Satisfactory is a 2024 game developed by Coffee Stain Studios. It is a great game about building factories on an alien planet.
inputs:
- name: finetuned-checkpoint
default: dataset://mistral-models/best_mistral_checkpoint
- pipeline:
name: training-pipeline
nodes:
- name: preprocess
type: execution
step: data-preprocess
- name: train
type: execution
step: finetune
- name: inference
type: execution
step: inference
edges:
- [preprocess.output.encoded_val/*, train.input.val_data]
- [preprocess.output.encoded_train/*, train.input.train_data]
- [preprocess.output.encoded_test/*, train.input.test_data]
- [train.output.finetuned_mistral.best_model/*, inference.input.finetuned-checkpoint]
- [preprocess.parameter.max_tokens, train.parameter.max_tokens]
- [train.parameter.max_tokens, inference.parameter.max_tokens]
- [preprocess.parameter.model_id, train.parameter.model_id]
- [train.parameter.model_id, inference.parameter.model_id]