Skip to content

Commit

Permalink
adding deepspeedplugin configs
Browse files Browse the repository at this point in the history
  • Loading branch information
Ubuntu committed Mar 18, 2022
1 parent 6126ec2 commit 6f42b70
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 14 deletions.
8 changes: 4 additions & 4 deletions configs/training/diff_lora.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
"input_length" : 512,
"output_length" : 512,
"num_train_epochs" : 1,
"output_dir" : "outputs/GPT2_large_10_1e-4_diff_lora",
"dataset" : "wikipedia_0910_gpt2",
"output_dir" : "outputs/GPT2_large_12_1e-4_diff_lora",
"dataset" : "wikipedia_1112_gpt2",
"dataset_version" : "full",
"train_batch_size" : 8,
"learning_rate" : 1e-4,
Expand All @@ -18,9 +18,9 @@
"CUDA_VISIBLE_DEVICES" : "0,1,2,3,4,5,6,7",
"wandb_log": true,
"wandb_project": "ever_changing",
"wandb_run_name" : "GPT2_large_10_1e-4_diff_lora",
"wandb_run_name" : "GPT2_large_12_1e-4_diff_lora",
"mode" : "pretrain",
"use_lr_scheduling" : true,
"check_validation" : false,
"checkpoint_path" : "outputs/GPT2_large_09_1e-4_diff_lora_/epoch=0"
"checkpoint_path" : "outputs/GPT2_11_lora"
}
8 changes: 4 additions & 4 deletions configs/training/diff_recadam.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
"input_length" : 512,
"output_length" : 512,
"num_train_epochs" : 1,
"output_dir" : "outputs/GPT2_large_09_1e-4_diff_lora",
"dataset" : "wikipedia_0809_gpt2",
"output_dir" : "outputs/GPT2_large_12_1e-4_diff_recadam",
"dataset" : "wikipedia_1112_gpt2",
"dataset_version" : "full",
"train_batch_size" : 8,
"learning_rate" : 1e-4,
Expand All @@ -18,9 +18,9 @@
"CUDA_VISIBLE_DEVICES" : "0,1,2,3,4,5,6,7",
"wandb_log": true,
"wandb_project": "ever_changing",
"wandb_run_name" : "GPT2_large_09_1e-4_diff_lora",
"wandb_run_name" : "GPT2_large_12_1e-4_diff_recadam",
"mode" : "pretrain",
"use_lr_scheduling" : true,
"check_validation" : false,
"checkpoint_path" : "outputs/GPT2_large_08_full.ckpt"
"checkpoint_path" : "outputs/GPT2_large_11_1e-4_diff_recadam_/epoch=0"
}
4 changes: 2 additions & 2 deletions convert_to_fp32.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
# lightning deepspeed has saved a directory instead of a file

##### Please modify this part #####
checkpoint_dir = 'outputs/GPT2_large_09_1e-4_diff_lora'
output_path = 'outputs/GPT2_large_09_1e-4_diff_lora_/'
checkpoint_dir = 'outputs/GPT2_large_12_1e-4_diff_lora'
output_path = 'outputs/GPT2_large_12_1e-4_diff_lora_/'

if not os.path.isdir(output_path):
os.mkdir(output_path)
Expand Down
12 changes: 8 additions & 4 deletions run.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,11 @@
import torch
import pytorch_lightning as pl
from pytorch_lightning.loggers import WandbLogger
# from T5_Model import T5
# from GPT2_Model import GPT2
from transformers import T5Tokenizer, GPT2Tokenizer
from models import load_model

from pytorch_lightning.callbacks.model_checkpoint import ModelCheckpoint
from pytorch_lightning.plugins import DeepSpeedPlugin

def set_seed(seed):
random.seed(seed)
Expand Down Expand Up @@ -78,6 +77,10 @@ def set_seed(seed):
wandb_logger = WandbLogger(project=hparam.wandb_project, name=hparam.wandb_run_name, entity="lklab_kaist")
else:
wandb_logger = None

#Change accelerator to DeepSpeedPlugin
if hparam.accelerator=='deepspeed_stage_2':
hparam.accelerator = DeepSpeedPlugin(stage=2, load_full_weights=True)

#Setting configurations
args_dict = dict(
Expand Down Expand Up @@ -116,8 +119,9 @@ def set_seed(seed):
args = argparse.Namespace(**args_dict)

#Setting different val & checkpoint saving config for mode
if args.mode=='pretrain_brute':
saving_epoch = (args.num_files//2)
if args.mode=='pretrain_chunks':
#saving_epoch = int(args.num_files // 3)
saving_epoch = 1
else:
saving_epoch = 1

Expand Down

0 comments on commit 6f42b70

Please sign in to comment.