From 88bf3dbb71f8a8165172cf0fcead0317d5a6882d Mon Sep 17 00:00:00 2001 From: Sourab Mangrulkar <13534540+pacman100@users.noreply.github.com> Date: Mon, 13 Feb 2023 14:41:50 +0530 Subject: [PATCH] deepspeed `hidden_size` auto value default fixes --- src/accelerate/accelerator.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/accelerate/accelerator.py b/src/accelerate/accelerator.py index c8eb9952161..bb095e264cc 100644 --- a/src/accelerate/accelerator.py +++ b/src/accelerate/accelerator.py @@ -1108,15 +1108,20 @@ def _prepare_deepspeed(self, *args): ) if model is not None: - if hasattr(model, "config") and hasattr(model.config, "hidden_size"): - hidden_size = model.config.hidden_size - config_kwargs.update( - { - "zero_optimization.reduce_bucket_size": hidden_size * hidden_size, - "zero_optimization.stage3_prefetch_bucket_size": 0.9 * hidden_size * hidden_size, - "zero_optimization.stage3_param_persistence_threshold": 10 * hidden_size, - } + if hasattr(model, "config"): + hidden_size = ( + max(model.config.hidden_sizes) + if getattr(model.config, "hidden_sizes", None) + else getattr(model.config, "hidden_size", None) ) + if hidden_size is not None: + config_kwargs.update( + { + "zero_optimization.reduce_bucket_size": hidden_size * hidden_size, + "zero_optimization.stage3_prefetch_bucket_size": 0.9 * hidden_size * hidden_size, + "zero_optimization.stage3_param_persistence_threshold": 10 * hidden_size, + } + ) if isinstance(optimizer, (DummyOptim)): config_kwargs.update(