From 47014d223593d07a0bb77303f89dc02e539e4ab6 Mon Sep 17 00:00:00 2001 From: Breda McColgan Date: Thu, 4 Jul 2024 11:45:15 +0100 Subject: [PATCH] ENG-8742: Updates pipeline example to remove workaround --- ...a-science-workloads-from-ds-pipelines.adoc | 51 +++++++++---------- 1 file changed, 24 insertions(+), 27 deletions(-) diff --git a/modules/running-distributed-data-science-workloads-from-ds-pipelines.adoc b/modules/running-distributed-data-science-workloads-from-ds-pipelines.adoc index 2ad5bd6f..99bd5205 100644 --- a/modules/running-distributed-data-science-workloads-from-ds-pipelines.adoc +++ b/modules/running-distributed-data-science-workloads-from-ds-pipelines.adoc @@ -110,29 +110,27 @@ from kfp import dsl def ray_fn(): import ray <1> - import time <2> - from codeflare_sdk import Cluster, ClusterConfiguration, generate_cert <3> + from codeflare_sdk import Cluster, ClusterConfiguration, generate_cert <2> - cluster = Cluster( <4> + cluster = Cluster( <3> ClusterConfiguration( - namespace="my_project", <5> + namespace="my_project", <4> name="raytest", num_workers=1, head_cpus="500m", min_memory=1, max_memory=1, num_gpus=0, - image="quay.io/rhoai/ray:2.23.0-py39-cu121", <6> - local_queue="local_queue_name", <7> + image="quay.io/rhoai/ray:2.23.0-py39-cu121", <5> + local_queue="local_queue_name", <6> ) ) print(cluster.status()) - cluster.up() <8> - // cluster.wait_ready() - time.sleep(180) <9> + cluster.up() <7> + cluster.wait_ready() <8> print(cluster.status()) print(cluster.details()) @@ -142,7 +140,7 @@ def ray_fn(): print(ray_dashboard_uri, ray_cluster_uri) # Enable Ray client to connect to secure Ray cluster that has mTLS enabled - generate_cert.generate_tls_cert(cluster.config.name, cluster.config.namespace) <10> + generate_cert.generate_tls_cert(cluster.config.name, cluster.config.namespace) <9> generate_cert.export_env(cluster.config.name, cluster.config.namespace) @@ -151,7 +149,7 @@ def ray_fn(): @ray.remote - def train_fn(): <11> + def train_fn(): <10> # complex training function return 100 @@ -159,12 +157,12 @@ def ray_fn(): result = ray.get(train_fn.remote()) assert 100 == result ray.shutdown() - cluster.down() <12> + cluster.down() <11> auth.logout() return result -@dsl.pipeline( <13> +@dsl.pipeline( <12> name="Ray Simple Example", description="Ray Simple Example", ) @@ -174,25 +172,24 @@ def ray_integration(): ray_fn() -if __name__ == '__main__': <14> +if __name__ == '__main__': <13> from kfp.compiler import Compiler Compiler().compile(ray_integration, 'compiled-example.yaml') ---- <1> Imports Ray. -<2> Imports the `time` package so that you can use the `sleep` function to wait during code execution, as a workaround for link:https://issues.redhat.com/browse/RHOAIENG-7346[RHOAIENG-7346]. -<3> Imports packages from the CodeFlare SDK to define the cluster functions. -<4> Specifies the Ray cluster configuration: replace these example values with the values for your Ray cluster. -<5> Optional: Specifies the project where the Ray cluster is created. Replace the example value with the name of your project. If you omit this line, the Ray cluster is created in the current project. -<6> Specifies the location of the Ray cluster image. If you are running this code in a disconnected environment, replace the default value with the location for your environment. -<7> Specifies the local queue to which the Ray cluster will be submitted. If a default local queue is configured, you can omit this line. -<8> Creates a Ray cluster by using the specified image and configuration. -<9> Waits until the Ray cluster is ready before proceeding. As a workaround for link:https://issues.redhat.com/browse/RHOAIENG-7346[RHOAIENG-7346], use `time.sleep(180)` instead of `cluster.wait_ready()`. -<10> Enables the Ray client to connect to a secure Ray cluster that has mutual Transport Layer Security (mTLS) enabled. mTLS is enabled by default in the CodeFlare component in {productname-short}. -<11> Replace the example details in this section with the details for your workload. -<12> Removes the Ray cluster when your workload is finished. -<13> Replace the example name and description with the values for your workload. -<14> Compiles the Python code and saves the output in a YAML file. +<2> Imports packages from the CodeFlare SDK to define the cluster functions. +<3> Specifies the Ray cluster configuration: replace these example values with the values for your Ray cluster. +<4> Optional: Specifies the project where the Ray cluster is created. Replace the example value with the name of your project. If you omit this line, the Ray cluster is created in the current project. +<5> Specifies the location of the Ray cluster image. If you are running this code in a disconnected environment, replace the default value with the location for your environment. +<6> Specifies the local queue to which the Ray cluster will be submitted. If a default local queue is configured, you can omit this line. +<7> Creates a Ray cluster by using the specified image and configuration. +<8> Waits until the Ray cluster is ready before proceeding. +<9> Enables the Ray client to connect to a secure Ray cluster that has mutual Transport Layer Security (mTLS) enabled. mTLS is enabled by default in the CodeFlare component in {productname-short}. +<10> Replace the example details in this section with the details for your workload. +<11> Removes the Ray cluster when your workload is finished. +<12> Replace the example name and description with the values for your workload. +<13> Compiles the Python code and saves the output in a YAML file. .. Compile the Python file (in this example, the `compile_example.py` file): +