From 1b65756d82aba5626569ef4fa2342fe7793d9514 Mon Sep 17 00:00:00 2001 From: zhouyu5 Date: Tue, 10 Oct 2023 21:15:18 +0000 Subject: [PATCH] update readme for merge model --- .../deltatuner/finetune/merge_model/readme.md | 19 ++++++------------- .../finetune/merge_model/ssf-merge-test.sh | 1 + 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/tests/deltatuner/finetune/merge_model/readme.md b/tests/deltatuner/finetune/merge_model/readme.md index dba41f0c9..517c8a45e 100644 --- a/tests/deltatuner/finetune/merge_model/readme.md +++ b/tests/deltatuner/finetune/merge_model/readme.md @@ -27,7 +27,6 @@ python instruction_tuning_pipeline/finetune_clm.py \ --output_dir "$DATA_PATH/llama2-7b-ssf-denas-bf16" \ --delta ssf \ --denas True \ - --bf16 True \ | tee llama2-7b-ssf-denas-bf16-1epoch.log ``` @@ -39,13 +38,10 @@ python instruction_tuning_pipeline/finetune_clm.py \ --model_name_or_path "$DATA_PATH/Llama-2-7b-hf" \ --train_file "$DATA_PATH/alpaca_data.json" \ --dataset_concatenation \ - --per_device_train_batch_size 8 \ --per_device_eval_batch_size 8 \ --gradient_accumulation_steps 1 \ --validation_split_percentage 30 \ --do_eval \ - --learning_rate 1e-4 \ - --num_train_epochs 1 \ --logging_steps 100 \ --save_total_limit 1 \ --log_level info \ @@ -54,22 +50,18 @@ python instruction_tuning_pipeline/finetune_clm.py \ --no_cuda \ --output_dir "$DATA_PATH/llama2-7b-ssf-denas-bf16-merge" \ --delta ssf \ - --bf16 True \ --resume_peft "$DATA_PATH/llama2-7b-ssf-denas-bf16" \ --save_merged_model True \ - --denas "$DATA_PATH/llama2-7b-ssf-denas-bf16/best_model_structure.txt" \ + --merge_model_code_dir "instruction_tuning_pipeline/models/llama2-ssf" \ --debugs ``` -### 3. Evaluate merged model As ssf will enable bias, while the default Llama2 disable all the bias, to enable the full parameters of the adapter, we need to change the model definition. -First copy the updated model codes with the merged weights. -```shell -cp instruction_tuning_pipeline/models/llama2-ssf/* $DATA_PATH/llama2-7b-ssf-denas-bf16-merge/merged_model -``` +First, specify the `merge_model_code_dir` args, it will copy the updated model codes with the merged weights. -Then update the "best_model_structure" and "target_modules" setting in config.json. if not enable denas and not change "target_modules" default settings, can skip correpsonding setting. +Then it will automatically update the "best_model_structure" and "target_modules" setting in `config.json`. if "denas" or "target_modules" are not enabled/changed, it will skip the corresponding setting. +The changed `config.json` looks like this: ```shell ... "best_model_structure": {"num_hidden_layers": [1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1]}, # change to your best structure, skip to keep default @@ -77,6 +69,8 @@ Then update the "best_model_structure" and "target_modules" setting in config.js ... ``` +### 3. Evaluate merged model + Finally we can directly evalute the merged model. ```shell python instruction_tuning_pipeline/finetune_clm.py \ @@ -93,5 +87,4 @@ python instruction_tuning_pipeline/finetune_clm.py \ --trust_remote_code True \ --no_cuda \ --output_dir "$DATA_PATH/llama2-7b-ssf-denas-bf16-merge/eval_merge" \ - --bf16 True ``` \ No newline at end of file diff --git a/tests/deltatuner/finetune/merge_model/ssf-merge-test.sh b/tests/deltatuner/finetune/merge_model/ssf-merge-test.sh index 1acd03e39..4b19fb5d7 100755 --- a/tests/deltatuner/finetune/merge_model/ssf-merge-test.sh +++ b/tests/deltatuner/finetune/merge_model/ssf-merge-test.sh @@ -19,6 +19,7 @@ python instruction_tuning_pipeline/finetune_clm.py \ --delta ssf \ --resume_peft "$DATA_PATH/mpt-7b-ssf-allmodules-denas-bf16" \ --save_merged_model True \ + --merge_model_code_dir instruction_tuning_pipeline/models/llama2-ssf \ --debugs