zenml-io · stefannica · Sep 9, 2024 · Aug 29, 2024 · Aug 29, 2024 · Aug 29, 2024
diff --git a/examples/quickstart/README.md b/examples/quickstart/README.md
@@ -80,11 +80,11 @@ the individual steps in the [`steps`](steps) directory. The pipeline that connec
 the [`pipeline`](pipelines) directory.
 
 And here is how to run it. When you run the pipeline with the following command you will be using the configuration
-[here](configs/training_local.yaml)
+[here](configs/training_default.yaml)
 
 ```bash
 # Run the pipeline locally
-python run.py --model_type=t5-small --orchestration_environment local
+python run.py --model_type=t5-small
 ```
 
 <img src=".assets/DAG.png" width="50%" alt="Dashboard view">
@@ -119,7 +119,7 @@ need an AWS stack registered in ZenML.
 zenml integration install aws s3 -y
 
 zenml stack set <INSERT_YOUR_STACK_NAME_HERE>
-python run.py --model_type=t5-small --orchestration_environment aws
+python run.py --model_type=t5-small
 ```
 
 You can edit `configs/training_aws.yaml` to adjust the settings for running your pipeline in aws.
@@ -133,7 +133,7 @@ need an AWS stack registered in ZenML.
 zenml integration install gcp
 
 zenml stack set <INSERT_YOUR_STACK_NAME_HERE>
-python run.py --model_type=t5-small --orchestration_environment gcp
+python run.py --model_type=t5-small
 ```
 
 You can edit `configs/training_gcp.yaml` to adjust the settings.
@@ -144,7 +144,7 @@ You can edit `configs/training_gcp.yaml` to adjust the settings.
 zenml integration install azure
 
 zenml stack set <INSERT_YOUR_STACK_NAME_HERE>
-python run.py --model_type=t5-small --orchestration_environment azure
+python run.py --model_type=t5-small
 ```
 
 You can edit `configs/training_azure.yaml` to adjust the settings.

diff --git a/examples/quickstart/configs/training_aws.yaml b/examples/quickstart/configs/training_aws.yaml
@@ -2,12 +2,10 @@ enable_cache: True
 
 # Environment configuration
 settings:
-  resources:
-    memory: 64GB
   docker:
-    parent_image: "zenmldocker/zenml-public-pipelines:quickstart-0.65.0-py3.11-aws"
-    skip_build: False
-# Uncomment the following two lines to specify the accelerator for your aws orchestrator
+    parent_image: "339712793861.dkr.ecr.eu-central-1.amazonaws.com/zenml-public-pipelines:quickstart-0.65.0-py3.11-aws"
+    skip_build: True  # If you switch this to False remove the parent_image
+    # requirements: requirements.txt
   orchestrator.sagemaker:
     instance_type: ml.m5.4xlarge
 
@@ -19,6 +17,7 @@ model:
 
 # Configure the pipeline
 parameters:
+  data_url: 'https://storage.googleapis.com/zenml-public-bucket/quickstart-files/translations.txt'
   # model_type: "t5-small"  # Choose between t5-small and t5-large
   num_train_epochs: 2
   per_device_train_batch_size: 4
@@ -35,6 +34,3 @@ steps:
       test_size: 0.1
       eval_size: 0.2
       random_state: 42
-  load_data:
-    parameters:
-      data_url: 'https://storage.googleapis.com/zenml-public-bucket/quickstart-files/translations.txt'
diff --git a/examples/quickstart/configs/training_azure.yaml b/examples/quickstart/configs/training_azure.yaml
@@ -1,8 +1,12 @@
+enable_cache: True
+
 # Environment configuration
 settings:
   docker:
     parent_image: "zenmldocker/zenml-public-pipelines:quickstart-0.65.0-py3.11-azure"
     skip_build: True
+    # requirements: requirements.txt  # Uncomment this to add your own dependencies
+
 # Uncomment the following two lines to specify the accelerator for your skypilot vm orchestrator
 #  orchestrator.vm_azure:
 #    instance_type: 'Standard_NC6'
@@ -15,6 +19,7 @@ model:
 
 # Configure the pipeline
 parameters:
+  data_url: 'https://storage.googleapis.com/zenml-public-bucket/quickstart-files/translations.txt'
   # model_type: "t5-small"  # Choose between t5-small and t5-large
   num_train_epochs: 2
   per_device_train_batch_size: 16
@@ -31,6 +36,3 @@ steps:
       test_size: 0.1
       eval_size: 0.2
       random_state: 42
-  load_data:
-    parameters:
-      data_url: 'https://storage.googleapis.com/zenml-public-bucket/quickstart-files/translations.txt'
diff --git a/...es/quickstart/configs/training_local.yaml → .../quickstart/configs/training_default.yaml b/...es/quickstart/configs/training_local.yaml → .../quickstart/configs/training_default.yaml
@@ -1,3 +1,8 @@
+# Environment configuration
+settings:
+  docker:
+    requirements: requirements.txt
+
 # Model Control Plane configuration
 model:
   name: YeOldeEnglishTranslator
@@ -6,6 +11,7 @@ model:
 
 # Configure the pipeline
 parameters:
+  data_url: 'https://storage.googleapis.com/zenml-public-bucket/quickstart-files/translations.txt'
   # model_type: "t5-small"  # Choose between t5-small and t5-large
   num_train_epochs: 1
   per_device_train_batch_size: 1
@@ -21,6 +27,3 @@ steps:
       test_size: 0.1
       eval_size: 0.2
       random_state: 42
-  load_data:
-    parameters:
-      data_url: 'https://storage.googleapis.com/zenml-public-bucket/quickstart-files/translations.txt'
diff --git a/examples/quickstart/configs/training_gcp.yaml b/examples/quickstart/configs/training_gcp.yaml
@@ -1,10 +1,11 @@
+enable_cache: True
+
 # Environment configuration
 settings:
-  resources:
-    memory: 64GB
   docker:
     parent_image: "zenmldocker/zenml-public-pipelines:quickstart-0.65.0-py3.11-gcp"
     skip_build: True
+    # requirements: requirements.txt  # Uncomment this to add your own dependencies
 # Uncomment the following two lines to specify the accelerator for your vertex orchestrator
 #  orchestrator.vertex:
 #    node_selector_constraint: ["cloud.google.com/gke-accelerator", "NVIDIA_TESLA_P4"]
@@ -17,6 +18,7 @@ model:
 
 # Configure the pipeline
 parameters:
+  data_url: 'https://storage.googleapis.com/zenml-public-bucket/quickstart-files/translations.txt'
   # model_type: "t5-small"  # Choose between t5-small and t5-large
   num_train_epochs: 2
   per_device_train_batch_size: 16
@@ -32,6 +34,3 @@ steps:
       test_size: 0.1
       eval_size: 0.2
       random_state: 42
-  load_data:
-    parameters:
-      data_url: 'https://storage.googleapis.com/zenml-public-bucket/quickstart-files/translations.txt'
diff --git a/examples/quickstart/pipelines/training.py b/examples/quickstart/pipelines/training.py
@@ -35,14 +35,15 @@
 
 @pipeline
 def english_translation_pipeline(
+    data_url: str,
     model_type: T5_Model,
     per_device_train_batch_size: int,
     gradient_accumulation_steps: int,
     dataloader_num_workers: int,
     num_train_epochs: int = 5,
 ):
     """Define a pipeline that connects the steps."""
-    full_dataset = load_data()
+    full_dataset = load_data(data_url)
     tokenized_dataset, tokenizer = tokenize_data(
         dataset=full_dataset, model_type=model_type
     )