Skip to content

Commit

Permalink
adding training image creationcode
Browse files Browse the repository at this point in the history
  • Loading branch information
deepanker13 committed Dec 12, 2023
1 parent ca9e7e3 commit d7b4ca4
Show file tree
Hide file tree
Showing 4 changed files with 129 additions and 0 deletions.
23 changes: 23 additions & 0 deletions .github/workflows/publish-sdk-images.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
name: Publish Training Operator SDK Images

on:
- pull_request

jobs:
core:
name: Publish Image
uses: ./.github/workflows/build-and-publish-images.yaml
with:
component-name: ${{ matrix.component-name }}
platforms: linux/amd64,linux/arm64,linux/ppc64le
dockerfile: ${{ matrix.dockerfile }}
secrets:
DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }}

strategy:
fail-fast: false
matrix:
include:
- component-name: train-api-training-image
dockerfile: sdk/python/kubeflow/training/training_container/Dockerfile
17 changes: 17 additions & 0 deletions sdk/python/kubeflow/training/training_container/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Use an official Pytorch runtime as a parent image
FROM pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime

# Set the working directory in the container
WORKDIR /app

# Copy the Python package and its source code into the container
COPY . /app

# Copy the requirements.txt file into the container
COPY requirements.txt /app/requirements.txt

# Install any needed packages specified in requirements.txt
RUN pip install --no-cache-dir -r requirements.txt

# Run storage.py when the container launches
ENTRYPOINT ["python", "hf_llm_training.py"]
86 changes: 86 additions & 0 deletions sdk/python/kubeflow/training/training_container/hf_llm_training.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import argparse
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
AutoConfig,
TrainingArguments,
DataCollatorForLanguageModeling,
Trainer
)
import torch
from datasets import load_dataset
from peft import LoraConfig, get_peft_model


def setup_model_and_tokenizer(token_dir, model_dir):
# Set up the model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(token_dir, use_fast=False, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.add_pad_token = True

model = AutoModelForCausalLM.from_pretrained(
model_dir,
device_map='auto',
trust_remote_code=True,
)

# Freeze model parameters
for param in model.parameters():
param.requires_grad = False

return model, tokenizer

def load_and_preprocess_data(dataset_dir, tokenizer):
# Load and preprocess the dataset
train_data = load_dataset(dataset_dir, split='train').map(lambda x: tokenizer(x['text']), batched=True)
train_data = train_data.train_test_split(shuffle=True, test_size=200)

try:
eval_data = load_dataset(dataset_dir, split='eval')
except Exception as err:
eval_data = None

return train_data, eval_data

def setup_peft_model(model, lora_config):
# Set up the PEFT model
lora_config = LoraConfig(**lora_config)
model = get_peft_model(model, lora_config)
return model

def train_model(model, train_data, eval_data,tokenizer, train_params):
# Train the model
trainer = Trainer(
model=model,
train_dataset=train_data,
eval_dataset=eval_data,
tokenizer=tokenizer,
args=TrainingArguments(
**train_params,
data_collator=DataCollatorForLanguageModeling(
tokenizer,
pad_to_multiple_of=8,
return_tensors="pt",
mlm=False
)
)
)

trainer.train()

def parse_arguments():
parser = argparse.ArgumentParser(description='Script for training a model with PEFT configuration.')
parser.add_argument('--model_dir', help='directory containing model')
parser.add_argument('--token_dir', help='directory containing tokenizer')
parser.add_argument('--dataset_dir', help='directory contaning dataset')
parser.add_argument('--peft_config', help='peft_config')
parser.add_argument('--train_params', help='hugging face training parameters')

return parser.parse_args()

if __name__ == "__main__":
args = parse_arguments()
model, tokenizer = setup_model_and_tokenizer(args.token_dir, args.model_dir)
train_data, eval_data = load_and_preprocess_data(args.dataset_dir, tokenizer)
model = setup_peft_model(model, args.peft_config)
train_model(model, train_data, eval_data, tokenizer, args)
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
peft==0.7.0
datasets==2.15.0
transformers==4.35.2

0 comments on commit d7b4ca4

Please sign in to comment.