Skip to content

Commit

Permalink
Prepare for public use / Test new docker image
Browse files Browse the repository at this point in the history
  • Loading branch information
sofiacharnota committed Nov 2, 2023
1 parent f59fd03 commit a4b5451
Show file tree
Hide file tree
Showing 6 changed files with 27 additions and 33 deletions.
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -151,3 +151,10 @@ vh pipeline run training-pipeline --adhoc
The completed pipeline view:

![alt text](https://github.com/valohai/mistral-example/blob/main/screenshots/completed_pipeline.jpeg)


The generated response by the model looks like this:

![alt text](https://github.com/valohai/mistral-example/blob/main/screenshots/inference_result.jpeg)

We need to consider that the model underwent only a limited number of fine-tuning steps, so achieving satisfactory results might necessitate further experimentation with model parameters.
2 changes: 1 addition & 1 deletion data-preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def __init__(self, args):

@staticmethod
def load_datasets():
data_path = '/valohai/inputs/dataset/viggo.py'
data_path = valohai.inputs('dataset').path()
train_dataset = load_dataset(data_path, split='train')
eval_dataset = load_dataset(data_path, split='validation')
test_dataset = load_dataset(data_path, split='test')
Expand Down
4 changes: 2 additions & 2 deletions finetune-mistral.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def setup_datasets(self):
self.tokenized_eval_dataset = datasets.load_from_disk(os.path.dirname(val_path))

def setup_model(self):
base_model_id = '/valohai/inputs/model/'
base_model_id = self.args.base_mistral_model
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
Expand All @@ -48,7 +48,6 @@ def setup_model(self):
self.model = AutoModelForCausalLM.from_pretrained(
base_model_id,
quantization_config=bnb_config,
local_files_only=True,
)
self.tokenizer = AutoTokenizer.from_pretrained(
base_model_id,
Expand Down Expand Up @@ -171,6 +170,7 @@ def on_log(self, args, state, control, logs=None, **kwargs):

# Add arguments based on your script's needs
# fmt: off
parser.add_argument("--base_mistral_model", type=str, default="mistralai/Mistral-7B-v0.1", help="Base mistral from hugging face")
parser.add_argument("--output_dir", type=str, default="finetuned_mistral", help="Output directory for checkpoints")
parser.add_argument("--model_max_length", type=int, default=512, help="Maximum length for the model")
parser.add_argument("--warmup_steps", type=int, default=5, help="Warmup steps")
Expand Down
6 changes: 3 additions & 3 deletions inference-mistral.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def load_checkpoint(self, model_path, checkpoint_path):
bnb_4bit_quant_type='nf4',
bnb_4bit_compute_dtype=torch.bfloat16,
)
model = AutoModelForCausalLM.from_pretrained(model_path, local_files_only=True, quantization_config=bnb_config)
model = AutoModelForCausalLM.from_pretrained(model_path, quantization_config=bnb_config)
ft_model = PeftModel.from_pretrained(model, checkpoint_path)
return ft_model.eval()

Expand All @@ -47,17 +47,17 @@ def generate_response(self, prompt, max_tokens=50):


def main(args):
model_path = '/valohai/inputs/model-base/'
checkpoint_path = '/valohai/inputs/finetuned-checkpoint/'

inference = ModelInference(model_path, checkpoint_path, args.prompt)
inference = ModelInference(args.base_mistral_model, checkpoint_path, args.prompt)
response = inference.generate_response(args.prompt, args.max_tokens)
print('Generated Response:')
print(response)


if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Fine-tuned Model Inference')
parser.add_argument("--base_mistral_model", type=str, default="mistralai/Mistral-7B-v0.1", help="Base mistral from hugging face")
parser.add_argument('--prompt', type=str, help='Input prompt for text generation')
parser.add_argument('--max_tokens', type=int, default=50, help='Maximum number of tokens in the generated response')

Expand Down
Binary file added screenshots/inference_result.jpeg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
41 changes: 14 additions & 27 deletions valohai.yaml
Original file line number Diff line number Diff line change
@@ -1,14 +1,8 @@
- step:
name: data-preprocess
image: sofiiavalohai/valohai-llms:v1.5
image: sofiiavalohai/llm-toolkit:v1.0
environment: staging-aws-eu-west-1-p3-2xlarge
command:
- python -m pip install --upgrade pip
- pip install valohai-utils peft accelerate bitsandbytes torch>=1.10
- pip install -U transformers
- pip install -q -U bitsandbytes
- pip install -q trl xformers wandb datasets einops gradio sentencepiece
- pip install -q -U datasets scipy ipywidgets
- python data-preprocess.py {parameters}
parameters:
- name: tokenizer
Expand All @@ -19,21 +13,21 @@
default: 512
inputs:
- name: dataset
default: s3://dd-sample-bucket/mistral/gem-viggo-dataset/*
default:
- s3://dd-sample-bucket/mistral/gem-viggo-dataset/viggo.py
- s3://dd-sample-bucket/mistral/gem-viggo-dataset/test.csv
- s3://dd-sample-bucket/mistral/gem-viggo-dataset/train.csv
- s3://dd-sample-bucket/mistral/gem-viggo-dataset/validation.csv

- step:
name: finetune
image: sofiiavalohai/valohai-llms:v1.5
image: sofiiavalohai/llm-toolkit:v1.0
environment: staging-aws-eu-west-1-p3-2xlarge
command:
- python -m pip install --upgrade pip
- pip install valohai-utils peft accelerate bitsandbytes torch>=1.10
- pip install -U transformers
- pip install -q -U bitsandbytes
- pip install -q trl xformers wandb datasets einops gradio sentencepiece
- pip install -q -U datasets scipy ipywidgets
- python finetune-mistral.py {parameters}
parameters:
- name: base_mistral_model
default: "mistralai/Mistral-7B-v0.1"
- name: output_dir
type: string
default: "finetuned_mistral"
Expand All @@ -45,7 +39,7 @@
default: 5
- name: max_steps
type: integer
default: 150
default: 15
- name: learning_rate
type: float
default: 2.5e-5
Expand All @@ -59,31 +53,23 @@
default: dataset://viggo/dev_test
- name: val_data
default: dataset://viggo/dev_val
- name: model
default: s3://dd-sample-bucket/mistral/model-checkpoint/*

- step:
name: inference
image: sofiiavalohai/valohai-llms:v1.5
image: sofiiavalohai/llm-toolkit:v1.0
environment: staging-aws-eu-west-1-p3-2xlarge
command:
- python -m pip install --upgrade pip
- pip install valohai-utils peft accelerate bitsandbytes torch>=1.10
- pip install -U transformers
- pip install -q -U bitsandbytes
- pip install -q trl xformers wandb datasets einops gradio sentencepiece
- pip install -q -U datasets scipy ipywidgets
- python inference-mistral.py {parameters}
parameters:
- name: base_mistral_model
default: "mistralai/Mistral-7B-v0.1"
- name: prompt
type: string
default: "give_opinion(name[SpellForce 3], rating[poor], genres[real-time strategy, role-playing], player_perspective[bird view])"
- name: max_tokens
type: integer
default: 305
inputs:
- name: model-base
default: s3://dd-sample-bucket/mistral/model-checkpoint/*
- name: finetuned-checkpoint
default: dataset://mistral-models/best_mistral_checkpoint
- name: test_data
Expand All @@ -106,5 +92,6 @@
- [preprocess.output.encoded_train/*, train.input.train_data]
- [preprocess.output.encoded_test/*, train.input.test_data]
- [train.output.finetuned_mistral.best_model/*, inference.input.finetuned-checkpoint]
- [train.parameter.base_mistral_model, inference.parameter.base_mistral_model]


0 comments on commit a4b5451

Please sign in to comment.