Skip to content

Commit

Permalink
feat: Support NFS for Ray cluster creation
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 698972917
  • Loading branch information
yinghsienwu authored and copybara-github committed Nov 22, 2024
1 parent 653ba88 commit 1ca9a05
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 1 deletion.
15 changes: 14 additions & 1 deletion google/cloud/aiplatform/vertex_ray/cluster_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from google.cloud.aiplatform import utils
from google.cloud.aiplatform.utils import resource_manager_utils
from google.cloud.aiplatform_v1beta1.types import persistent_resource_service

from google.cloud.aiplatform_v1beta1.types.machine_resources import NfsMount
from google.cloud.aiplatform_v1beta1.types.persistent_resource import (
PersistentResource,
RayLogsSpec,
Expand Down Expand Up @@ -64,6 +64,7 @@ def create_ray_cluster(
enable_logging: Optional[bool] = True,
psc_interface_config: Optional[resources.PscIConfig] = None,
reserved_ip_ranges: Optional[List[str]] = None,
nfs_mounts: Optional[List[resources.NfsMount]] = None,
labels: Optional[Dict[str, str]] = None,
) -> str:
"""Create a ray cluster on the Vertex AI.
Expand Down Expand Up @@ -312,6 +313,17 @@ def create_ray_cluster(
ray_metric_spec=ray_metric_spec,
ray_logs_spec=ray_logs_spec,
)
if nfs_mounts:
gapic_nfs_mounts = []
for nfs_mount in nfs_mounts:
gapic_nfs_mounts.append(
NfsMount(
server=nfs_mount.server,
path=nfs_mount.path,
mount_point=nfs_mount.mount_point,
)
)
ray_spec.nfs_mounts = gapic_nfs_mounts
if service_account:
service_account_spec = ServiceAccountSpec(
enable_custom_service_account=True,
Expand All @@ -329,6 +341,7 @@ def create_ray_cluster(
)
else:
gapic_psc_interface_config = None

persistent_resource = PersistentResource(
resource_pools=resource_pools,
network=network,
Expand Down
18 changes: 18 additions & 0 deletions google/cloud/aiplatform/vertex_ray/util/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,24 @@ class PscIConfig:
network_attachment: str = None


@dataclasses.dataclass
class NfsMount:
"""NFS mount.
Attributes:
server: Required. IP address of the NFS server.
path: Required. Source path exported from NFS server. Has to start
with '/', and combined with the ip address, it indicates the
source mount path in the form of ``server:path``.
mount_point: Required. Destination mount path. The NFS will be mounted
for the user under /mnt/nfs/<mount_point>.
"""

server: str = None
path: str = None
mount_point: str = None


@dataclasses.dataclass
class Cluster:
"""Ray cluster (output only).
Expand Down
1 change: 1 addition & 0 deletions tests/unit/vertex_ray/test_cluster_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,7 @@ def test_create_ray_cluster_1_pool_custom_image_success(
network=tc.ProjectConstants.TEST_VPC_NETWORK,
cluster_name=tc.ClusterConstants.TEST_VERTEX_RAY_PR_ID,
custom_images=custom_images,
nfs_mounts=[tc.ClusterConstants.TEST_NFS_MOUNT],
)

assert tc.ClusterConstants.TEST_VERTEX_RAY_PR_ADDRESS == cluster_name
Expand Down
12 changes: 12 additions & 0 deletions tests/unit/vertex_ray/test_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,14 @@
from google.cloud.aiplatform.vertex_ray.util.resources import Cluster
from google.cloud.aiplatform.vertex_ray.util.resources import (
AutoscalingSpec,
NfsMount,
PscIConfig,
Resources,
)
from google.cloud.aiplatform_v1beta1.types.machine_resources import DiskSpec
from google.cloud.aiplatform_v1beta1.types.machine_resources import (
NfsMount as GapicNfsMount,
)
from google.cloud.aiplatform_v1beta1.types.machine_resources import (
MachineSpec,
)
Expand Down Expand Up @@ -105,6 +109,12 @@ class ClusterConstants:
TEST_VERTEX_RAY_PR_ADDRESS = (
f"{ProjectConstants.TEST_PARENT}/persistentResources/" + TEST_VERTEX_RAY_PR_ID
)
TEST_NFS_MOUNT = NfsMount(
server="10.10.10.10", path="nfs_path", mount_point="nfs_mount_point"
)
TEST_GAPIC_NFS_MOUNT = GapicNfsMount(
server="10.10.10.10", path="nfs_path", mount_point="nfs_mount_point"
)
TEST_CPU_IMAGE_2_9 = "us-docker.pkg.dev/vertex-ai/training/ray-cpu.2-9.py310:latest"
TEST_GPU_IMAGE_2_9 = "us-docker.pkg.dev/vertex-ai/training/ray-gpu.2-9.py310:latest"
TEST_CPU_IMAGE_2_33 = (
Expand Down Expand Up @@ -177,6 +187,7 @@ class ClusterConstants:
resource_pool_images={"head-node": TEST_CUSTOM_IMAGE},
ray_metric_spec=RayMetricSpec(disabled=False),
ray_logs_spec=RayLogsSpec(disabled=False),
nfs_mounts=[TEST_GAPIC_NFS_MOUNT],
),
),
psc_interface_config=None,
Expand Down Expand Up @@ -227,6 +238,7 @@ class ClusterConstants:
ray_spec=RaySpec(
resource_pool_images={"head-node": TEST_CUSTOM_IMAGE},
ray_metric_spec=RayMetricSpec(disabled=False),
nfs_mounts=[TEST_GAPIC_NFS_MOUNT],
),
),
psc_interface_config=None,
Expand Down

0 comments on commit 1ca9a05

Please sign in to comment.