huggingface · muellerzr · Nov 16, 2022 · Nov 16, 2022 · Nov 16, 2022 · Nov 16, 2022
diff --git a/src/accelerate/accelerator.py b/src/accelerate/accelerator.py
@@ -1464,16 +1464,18 @@ def pad_across_processes(self, tensor, dim=0, pad_index=0, pad_first=False):
         """
         return pad_across_processes(tensor, dim=dim, pad_index=pad_index, pad_first=pad_first)
 
-    def unwrap_model(self, model):
+    def unwrap_model(self, model, keep_fp32_wrapper: bool = False):
         """
         Unwraps the `model` from the additional layer possible added by [`~Accelerator.prepare`]. Useful before saving
         the model.
 
         Args:
             model (`torch.nn.Module`):
                 The model to unwrap.
+            keep_fp32_wrapper (`bool`, *optional*, defaults to `False`):
+                Whether to not remove the mixed precision hook if it was added.
         """
-        return extract_model_from_parallel(model)
+        return extract_model_from_parallel(model, keep_fp32_wrapper)
 
     def wait_for_everyone(self):
         """
@@ -1760,7 +1762,7 @@ def get_state_dict(self, model, unwrap=True):
         Args:
             model (`torch.nn.Module`):
                 A PyTorch model sent through [`Accelerator.prepare`]
-            unwrap (`bool`, *optional*, defaults to True):
+            unwrap (`bool`, *optional*, defaults to `True`):
                 Whether to return the original underlying state_dict of `model` or to return the wrapped state_dict
         """
         is_zero_3 = False

diff --git a/src/accelerate/utils/other.py b/src/accelerate/utils/other.py
@@ -21,6 +21,7 @@
 from ..state import AcceleratorState
 from .dataclasses import DistributedType
 from .imports import is_deepspeed_available, is_tpu_available
+from .operations import ConvertOutputsToFp32
 
 
 if is_deepspeed_available():
@@ -30,12 +31,15 @@
     import torch_xla.core.xla_model as xm
 
 
-def extract_model_from_parallel(model):
+def extract_model_from_parallel(model, keep_fp32_wrapper: bool = False):
     """
     Extract a model from its distributed containers.
 
     Args:
-        model (`torch.nn.Module`): The model to extract.
+        model (`torch.nn.Module`):
+            The model to extract.
+        keep_fp32_wrapper (`bool`, *optional*):
+            Whether to remove mixed precision hooks from the model.
 
     Returns:
         `torch.nn.Module`: The extracted model.
@@ -46,6 +50,11 @@ def extract_model_from_parallel(model):
 
     while isinstance(model, options):
         model = model.module
+
+    if not keep_fp32_wrapper:
+        forward = getattr(model, "forward")
+        if isinstance(forward, ConvertOutputsToFp32):
+            setattr(model, "forward", forward.model_forward)
     return model