fix: use bf16 to avoid overflow

CWHer · CWHer · commit cbf0112e05ac · 2023-08-19T20:58:09.000+08:00
diff --git a/applications/Chat/examples/train_prompts.py b/applications/Chat/examples/train_prompts.py
@@ -65,8 +65,8 @@ def main(args):
         if args.rm_path is not None:
             reward_model.load_state_dict(state_dict, strict=False)
 
-        initial_model.to(torch.float16).to(torch.cuda.current_device())
-        reward_model.to(torch.float16).to(torch.cuda.current_device())
+        initial_model.to(torch.bfloat16).to(torch.cuda.current_device())
+        reward_model.to(torch.bfloat16).to(torch.cuda.current_device())
 
         if args.model == 'gpt2':
             actor = GPTActor(pretrained=args.pretrain, lora_rank=args.lora_rank)
@@ -95,8 +95,8 @@ def main(args):
             del state_dict
 
     if args.strategy != 'colossalai_gemini':
-        critic.to(torch.float16).to(torch.cuda.current_device())
-        actor.to(torch.float16).to(torch.cuda.current_device())
+        critic.to(torch.bfloat16).to(torch.cuda.current_device())
+        actor.to(torch.bfloat16).to(torch.cuda.current_device())
 
     # configure optimizer
     if args.strategy.startswith('colossalai'):
diff --git a/applications/Chat/examples/train_reward_model.py b/applications/Chat/examples/train_reward_model.py
@@ -45,7 +45,7 @@ def train(args):
         else:
             raise ValueError(f'Unsupported model "{args.model}"')
 
-        model.to(torch.float16).to(torch.cuda.current_device())
+        model.to(torch.bfloat16).to(torch.cuda.current_device())
 
         if args.model_path is not None:
             state_dict = torch.load(args.model_path)
diff --git a/applications/Chat/examples/train_sft.py b/applications/Chat/examples/train_sft.py
@@ -60,7 +60,7 @@ def train(args):
         else:
             raise ValueError(f'Unsupported model "{args.model}"')
 
-        model.to(torch.float16).to(torch.cuda.current_device())
+        model.to(torch.bfloat16).to(torch.cuda.current_device())
 
     # configure tokenizer
     if args.model == 'gpt2':