Skip to content

Commit

Permalink
Fix import for HuggingFace Dataset Provider (#2085)
Browse files Browse the repository at this point in the history
Signed-off-by: Andrey Velichkevich <andrey.velichkevich@gmail.com>
  • Loading branch information
andreyvelich authored Apr 26, 2024
1 parent dd1226c commit f8f7363
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 13 deletions.
4 changes: 2 additions & 2 deletions examples/pytorch/language-modeling/train_api_hf_dataset.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
"from kubeflow.storage_initializer.hugging_face import (\n",
" HuggingFaceModelParams,\n",
" HuggingFaceTrainerParams,\n",
" HfDatasetParams,\n",
" HuggingFaceDatasetParams,\n",
")\n",
"from kubeflow.storage_initializer.constants import INIT_CONTAINER_MOUNT_PATH\n",
"from peft import LoraConfig\n",
Expand Down Expand Up @@ -70,7 +70,7 @@
" ),\n",
" # it is assumed for text related tasks, you have 'text' column in the dataset.\n",
" # for more info on how dataset is loaded check load_and_preprocess_data function in sdk/python/kubeflow/trainer/hf_llm_training.py\n",
" dataset_provider_parameters=HfDatasetParams(repo_id=\"imdatta0/ultrachat_1k\"),\n",
" dataset_provider_parameters=HuggingFaceDatasetParams(repo_id=\"imdatta0/ultrachat_1k\"),\n",
" trainer_parameters=HuggingFaceTrainerParams(\n",
" lora_config=LoraConfig(\n",
" r=8,\n",
Expand Down
16 changes: 7 additions & 9 deletions examples/pytorch/language-modeling/train_api_s3_dataset.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
"from kubeflow.storage_initializer.hugging_face import (\n",
" HuggingFaceModelParams,\n",
" HuggingFaceTrainerParams,\n",
" HfDatasetParams,\n",
")\n",
"from kubeflow.storage_initializer.s3 import S3DatasetParams\n",
"from kubeflow.storage_initializer.constants import INIT_CONTAINER_MOUNT_PATH\n",
"from peft import LoraConfig\n",
"import transformers\n",
Expand Down Expand Up @@ -81,14 +81,12 @@
" # it is assumed for text related tasks, you have 'text' column in the dataset.\n",
" # for more info on how dataset is loaded check load_and_preprocess_data function in sdk/python/kubeflow/trainer/hf_llm_training.py\n",
" dataset_provider_parameters=S3DatasetParams(\n",
" {\n",
" \"endpoint_url\": \"http://10.117.63.3\",\n",
" \"bucket_name\": \"test\",\n",
" \"file_key\": \"imdatta0___ultrachat_1k\",\n",
" \"region_name\": \"us-east-1\",\n",
" \"access_key\": s3_access_key,\n",
" \"secret_key\": s3_secret_key,\n",
" }\n",
" endpoint_url=\"http://10.117.63.3\",\n",
" bucket_name=\"test\",\n",
" file_key=\"imdatta0___ultrachat_1k\",\n",
" region_name=\"us-east-1\",\n",
" access_key=s3_access_key,\n",
" secret_key=s3_secret_key,\n",
" ),\n",
" trainer_parameters=HuggingFaceTrainerParams(\n",
" lora_config=LoraConfig(\n",
Expand Down
4 changes: 2 additions & 2 deletions examples/pytorch/text-classification/Fine-Tune-BERT-LLM.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -614,7 +614,7 @@
"from kubeflow.storage_initializer.hugging_face import (\n",
" HuggingFaceModelParams,\n",
" HuggingFaceTrainerParams,\n",
" HfDatasetParams,\n",
" HuggingFaceDatasetParams,\n",
")\n",
"\n",
"import transformers\n",
Expand Down Expand Up @@ -646,7 +646,7 @@
" \"access_modes\": [\"ReadWriteOnce\"] # Since we use 1 Worker, PVC access mode is ReadWriteOnce.\n",
" },\n",
" # Use 3000 samples from Yelp dataset.\n",
" dataset_provider_parameters=HfDatasetParams(\n",
" dataset_provider_parameters=HuggingFaceDatasetParams(\n",
" repo_id=\"yelp_review_full\",\n",
" split=\"train[:3000]\",\n",
" ),\n",
Expand Down

0 comments on commit f8f7363

Please sign in to comment.