From 57f38fa773a8aeedbd4ba8d9350043ed68c4df18 Mon Sep 17 00:00:00 2001 From: Wanchao Liang Date: Fri, 8 Mar 2024 14:52:45 -0800 Subject: [PATCH] set betas and weight decay for optimizers according to suggestions in https://github.com/pytorch/torchtrain/issues/118#issuecomment-1986470746 ghstack-source-id: d03375a1eedd660d116b25609db906eaf9cc2c50 Pull Request resolved: https://github.com/pytorch/torchtrain/pull/123 --- train.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/train.py b/train.py index 15f0e493a..c8101bbcc 100644 --- a/train.py +++ b/train.py @@ -65,9 +65,10 @@ def build_optimizer(model, job_config: JobConfig): name = job_config.optimizer.name lr = job_config.optimizer.lr if name == "Adam": - optimizer = torch.optim.Adam(model.parameters(), lr=lr) + # TODO: make the optimizer options configurable by toml/cmd args + optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.95), weight_decay=0.1) elif name == "AdamW": - optimizer = torch.optim.AdamW(model.parameters(), lr=lr) + optimizer = torch.optim.AdamW(model.parameters(), lr=lr, betas=(0.9, 0.95), weight_decay=0.1) else: raise NotImplementedError(f"optimizer {name} not added")