zenml-io · htahir1 · Oct 18, 2024 · Oct 14, 2024 · Oct 14, 2024 · Oct 14, 2024
diff --git a/docs/book/component-guide/orchestrators/sagemaker.md b/docs/book/component-guide/orchestrators/sagemaker.md
@@ -84,7 +84,7 @@ zenml orchestrator register <ORCHESTRATOR_NAME> \
 zenml stack register <STACK_NAME> -o <ORCHESTRATOR_NAME> ... --set
 ```
 
-See the [`SagemakerOrchestratorConfig` SDK Docs](https://sdkdocs.zenml.io/latest/integration\_code\_docs/integrations-aws/#zenml.integrations.aws.flavors.sagemaker\_orchestrator\_flavor) for more information on available configuration options.
+See the [`SagemakerOrchestratorConfig` SDK Docs](https://sdkdocs.zenml.io/latest/integration_code_docs/integrations-aws/#zenml.integrations.aws.flavors.sagemaker_orchestrator_flavor.SagemakerOrchestratorSettings) for more information on available configuration options.
 {% endtab %}
 
 {% tab title="Implicit Authentication" %}
@@ -189,13 +189,13 @@ For example, if your ZenML component is configured to use `ml.c5.xlarge` with 40
 
 Check out [this docs page](../../how-to/use-configuration-files/runtime-configuration.md) for more information on how to specify settings in general.
 
-For more information and a full list of configurable attributes of the Sagemaker orchestrator, check out the [SDK Docs](https://sdkdocs.zenml.io/latest/integration\_code\_docs/integrations-aws/#zenml.integrations.aws.orchestrators.sagemaker\_orchestrator.SagemakerOrchestrator) .
+For more information and a full list of configurable attributes of the Sagemaker orchestrator, check out the [SDK Docs](https://sdkdocs.zenml.io/latest/integration_code_docs/integrations-aws/#zenml.integrations.aws.flavors.sagemaker_orchestrator_flavor.SagemakerOrchestratorSettings) .
 
 ### Using Warm Pools for your pipelines
 
 [Warm Pools in SageMaker](https://docs.aws.amazon.com/sagemaker/latest/dg/train-warm-pools.html) can significantly reduce the startup time of your pipeline steps, leading to faster iterations and improved development efficiency. This feature keeps compute instances in a "warm" state, ready to quickly start new jobs.
 
-To enable Warm Pools, use the `SagemakerOrchestratorSettings` class:
+To enable Warm Pools, use the [`SagemakerOrchestratorSettings`](https://sdkdocs.zenml.io/latest/integration_code_docs/integrations-aws/#zenml.integrations.aws.flavors.sagemaker_orchestrator_flavor.SagemakerOrchestratorSettings) class:
 
 ```python
 sagemaker_orchestrator_settings = SagemakerOrchestratorSettings(
@@ -290,6 +290,51 @@ sagemaker_orchestrator_settings = SagemakerOrchestratorSettings(
 Using multichannel output or output mode except `EndOfJob` will make it impossible to use TrainingStep and also Warm Pools. See corresponding section of this document for details.
 {% endhint %}
 
+### Tagging SageMaker Pipeline Executions and Jobs
+
+The SageMaker orchestrator allows you to add tags to your pipeline executions and individual jobs. Here's how you can apply tags at both the pipeline and step levels:
+
+```python
+from zenml import pipeline, step
+from zenml.integrations.aws.flavors.sagemaker_orchestrator_flavor import SagemakerOrchestratorSettings
+
+# Define settings for the pipeline
+pipeline_settings = SagemakerOrchestratorSettings(
+    pipeline_tags={
+        "project": "my-ml-project",
+        "environment": "production",
+    }
+)
+
+# Define settings for a specific step
+step_settings = SagemakerOrchestratorSettings(
+    tags={
+        "step": "data-preprocessing",
+        "owner": "data-team"
+    }
+)
+
+@step(settings={"orchestrator": step_settings})
+def preprocess_data():
+    # Your preprocessing code here
+    pass
+
+@pipeline(settings={"orchestrator": pipeline_settings})
+def my_training_pipeline():
+    preprocess_data()
+    # Other steps...
+
+# Run the pipeline
+my_training_pipeline()
+```
+
+In this example:
+
+- The `pipeline_tags` are applied to the entire SageMaker pipeline execution. They are also propagated automatically by AWS down to the child Sagemaker jobs.
+- The `tags` in `step_settings` are applied to the specific SageMaker job for the `preprocess_data` step.
+
+This approach allows for more granular tagging, giving you flexibility in how you categorize and manage your SageMaker resources. You can view and manage these tags in the AWS Management Console, CLI, or API calls related to your SageMaker resources.
+
 ### Enabling CUDA for GPU-backed hardware
 
 Note that if you wish to use this orchestrator to run steps on a GPU, you will need to follow [the instructions on this page](../../how-to/training-with-gpus/training-with-gpus.md) to ensure that it works. It requires adding some extra settings customization and is essential to enable CUDA for the GPU to give its full acceleration.

diff --git a/src/zenml/integrations/aws/flavors/sagemaker_orchestrator_flavor.py b/src/zenml/integrations/aws/flavors/sagemaker_orchestrator_flavor.py
@@ -52,6 +52,8 @@ class SagemakerOrchestratorSettings(BaseSettings):
         max_runtime_in_seconds: The maximum runtime in seconds for the
             processing job.
         tags: Tags to apply to the Processor/Estimator assigned to the step.
+        pipeline_tags: Tags to apply to the pipeline via the
+            sagemaker.workflow.pipeline.Pipeline.create method.
         processor_tags: DEPRECATED: use `tags` instead.
         keep_alive_period_in_seconds: The time in seconds after which the
             provisioned instance will be terminated if not used. This is only
@@ -107,6 +109,7 @@ class SagemakerOrchestratorSettings(BaseSettings):
     volume_size_in_gb: int = 30
     max_runtime_in_seconds: int = 86400
     tags: Dict[str, str] = {}
+    pipeline_tags: Dict[str, str] = {}
     keep_alive_period_in_seconds: Optional[int] = 300  # 5 minutes
     use_training_step: Optional[bool] = None
 

diff --git a/src/zenml/integrations/aws/orchestrators/sagemaker_orchestrator.py b/src/zenml/integrations/aws/orchestrators/sagemaker_orchestrator.py
@@ -332,6 +332,7 @@ def prepare_or_run_pipeline(
                     else None
                 ),
             )
+
             args_for_step_executor.setdefault(
                 "instance_type", step_settings.instance_type
             )
@@ -457,7 +458,13 @@ def prepare_or_run_pipeline(
             sagemaker_session=session,
         )
 
-        pipeline.create(role_arn=self.config.execution_role)
+        settings = cast(
+            SagemakerOrchestratorSettings, self.get_settings(deployment)
+        )
+
+        pipeline.create(
+            role_arn=self.config.execution_role, tags=settings.pipeline_tags
+        )
         execution = pipeline.start()
         logger.warning(
             "Steps can take 5-15 minutes to start running "
@@ -467,9 +474,6 @@ def prepare_or_run_pipeline(
         # Yield metadata based on the generated execution object
         yield from self.compute_metadata(execution=execution)
 
-        settings = cast(
-            SagemakerOrchestratorSettings, self.get_settings(deployment)
-        )
         # mainly for testing purposes, we wait for the pipeline to finish
         if settings.synchronous:
             logger.info(