-
Notifications
You must be signed in to change notification settings - Fork 333
/
Copy pathdiscrete_iql.yaml
66 lines (56 loc) · 1007 Bytes
/
discrete_iql.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# task and env
env:
name: CartPole-v1
task: ""
n_samples_stats: 1000
seed: 0
train_num_envs: 1
eval_num_envs: 1
backend: gymnasium
# collector
collector:
frames_per_batch: 200
total_frames: 20000
init_random_frames: 1000
env_per_collector: 1
device:
max_frames_per_traj: 200
# logger
logger:
backend: wandb
project_name: torchrl_example_discrete_iql
exp_name: iql_${env.name}
group_name: null
log_interval: 5000 # record interval in frames
eval_steps: 200
mode: online
eval_iter: 1000
video: False
# replay buffer
replay_buffer:
prb: 0
buffer_prefetch: 64
size: 1_000_000
# optimization
optim:
utd_ratio: 1
device: null
lr: 3e-4
weight_decay: 0.0
batch_size: 256
# network
model:
hidden_sizes: [256, 256]
activation: relu
# loss
loss:
loss_function: l2
gamma: 0.99
hard_update_interval: 10
# IQL specific hyperparameter
temperature: 100
expectile: 0.8
compile:
compile: False
compile_mode: default
cudagraphs: False