Skip to content

Commit

Permalink
[ML] Singularity JobSchema: Support JobResourceConfiguration.locations (
Browse files Browse the repository at this point in the history
#29200)

* [ML] Singularity JobSchema: Support JobResourceConfiguration.locations
  • Loading branch information
TonyJ1 authored Mar 9, 2023
1 parent f94e35d commit 6ec3c2b
Show file tree
Hide file tree
Showing 9 changed files with 427 additions and 9 deletions.
5 changes: 3 additions & 2 deletions sdk/ml/azure-ai-ml/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@
- Removed Experimental Tag from Image Metadata on Compute Instances.
- Added support for feature-set and feature-store-entity operations.
- Added support for data binding on outputs inside dynamic arguments for dsl pipeline
- Added support for serverless compute in pipeline job
- Added support for serverless compute in command, automl and sweep job
- Added support for serverless compute in pipeline, command, automl and sweep job
- Added support for `job_tier` and `priority` in job
- Added support for passing `locations` via command function and set it to `JobResourceConfiguration.locations`

### Bugs Fixed

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@


class JobResourceConfigurationSchema(ResourceConfigurationSchema):
locations = fields.List(fields.Str())
shm_size = fields.Str(
metadata={
"description": (
Expand Down
3 changes: 3 additions & 0 deletions sdk/ml/azure-ai-ml/azure/ai/ml/entities/_builders/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,7 @@ def set_resources(
*,
instance_type: Optional[Union[str, List[str]]] = None,
instance_count: Optional[int] = None,
locations: Optional[List[str]] = None,
properties: Optional[Dict] = None,
docker_args: Optional[str] = None,
shm_size: Optional[str] = None,
Expand All @@ -366,6 +367,8 @@ def set_resources(
if self.resources is None:
self.resources = JobResourceConfiguration()

if locations is not None:
self.resources.locations = locations
if instance_type is not None:
self.resources.instance_type = instance_type
if instance_count is not None:
Expand Down
19 changes: 16 additions & 3 deletions sdk/ml/azure-ai-ml/azure/ai/ml/entities/_builders/command_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# pylint: disable=protected-access

import os
from typing import Callable, Dict, Optional, Tuple, Union
from typing import Callable, Dict, List, Optional, Tuple, Union

from azure.ai.ml.constants._common import AssetTypes, LegacyAssetTypes
from azure.ai.ml.constants._component import ComponentSource
Expand Down Expand Up @@ -124,6 +124,7 @@ def command(
outputs: Optional[Dict] = None,
instance_count: Optional[int] = None,
instance_type: Optional[str] = None,
locations: Optional[List[str]] = None,
docker_args: Optional[str] = None,
shm_size: Optional[str] = None,
timeout: Optional[int] = None,
Expand Down Expand Up @@ -173,6 +174,8 @@ def command(
:vartype instance_count: int
:param instance_type: Optional type of VM used as supported by the compute target.
:vartype instance_type: str
:param locations: Optional list of locations where the job can run.
:vartype locations: List[str]
:param docker_args: Extra arguments to pass to the Docker run command. This would override any
parameters that have already been set by the system, or in this section. This parameter is only
supported for Azure ML compute types.
Expand Down Expand Up @@ -244,9 +247,19 @@ def command(
**kwargs,
)

if instance_count is not None or instance_type is not None or docker_args is not None or shm_size is not None:
if (
locations is not None
or instance_count is not None
or instance_type is not None
or docker_args is not None
or shm_size is not None
):
command_obj.set_resources(
instance_count=instance_count, instance_type=instance_type, docker_args=docker_args, shm_size=shm_size
locations=locations,
instance_count=instance_count,
instance_type=instance_type,
docker_args=docker_args,
shm_size=shm_size,
)

if timeout is not None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import json
import logging
from typing import Any, Dict, Optional
from typing import Any, Dict, Optional, List

from azure.ai.ml._restclient.v2023_02_01_preview.models import JobResourceConfiguration as RestJobResourceConfiguration
from azure.ai.ml.constants._job.job import JobComputePropertyFields
Expand Down Expand Up @@ -88,6 +88,8 @@ class JobResourceConfiguration(RestTranslatableMixin, DictMixin):
:param instance_count: Optional number of instances or nodes used by the compute target.
:type instance_count: int
:param locations: Optional list of locations where the job can run.
:vartype locations: List[str]
:param instance_type: Optional type of VM used as supported by the compute target.
:type instance_type: str
:param properties: Additional properties bag.
Expand All @@ -105,13 +107,15 @@ class JobResourceConfiguration(RestTranslatableMixin, DictMixin):
def __init__(
self,
*,
locations: Optional[List[str]] = None,
instance_count: Optional[int] = None,
instance_type: Optional[str] = None,
properties: Optional[Dict[str, Any]] = None,
docker_args: Optional[str] = None,
shm_size: Optional[str] = None,
**kwargs
): # pylint: disable=unused-argument
self.locations = locations
self.instance_count = instance_count
self.instance_type = instance_type
self.shm_size = shm_size
Expand All @@ -134,6 +138,7 @@ def properties(self, properties: Dict[str, Any]):

def _to_rest_object(self) -> RestJobResourceConfiguration:
return RestJobResourceConfiguration(
locations=self.locations,
instance_count=self.instance_count,
instance_type=self.instance_type,
properties=self.properties.as_dict(),
Expand All @@ -146,6 +151,7 @@ def _from_rest_object(cls, obj: Optional[RestJobResourceConfiguration]) -> Optio
if obj is None:
return None
return JobResourceConfiguration(
locations=obj.locations,
instance_count=obj.instance_count,
instance_type=obj.instance_type,
properties=obj.properties,
Expand All @@ -158,7 +164,8 @@ def __eq__(self, other: object) -> bool:
if not isinstance(other, JobResourceConfiguration):
return NotImplemented
return (
self.instance_count == other.instance_count
self.locations == other.locations
and self.instance_count == other.instance_count
and self.instance_type == other.instance_type
and self.docker_args == other.docker_args
and self.shm_size == other.shm_size
Expand All @@ -171,6 +178,8 @@ def __ne__(self, other: object) -> bool:

def _merge_with(self, other: "JobResourceConfiguration") -> None:
if other:
if other.locations:
self.locations = other.locations
if other.instance_count:
self.instance_count = other.instance_count
if other.instance_type:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from azure.ai.ml._restclient.v2023_02_01_preview.models import ListViewType
from azure.ai.ml._utils._arm_id_utils import AMLVersionedArmId
from azure.ai.ml.constants._common import COMMON_RUNTIME_ENV_VAR, LOCAL_COMPUTE_TARGET, TID_FMT, AssetTypes
from azure.ai.ml.entities import AmlTokenConfiguration
from azure.ai.ml.entities import AmlTokenConfiguration, QueueSettings
from azure.ai.ml.entities._assets._artifacts.data import Data
from azure.ai.ml.entities._job.command_job import CommandJob
from azure.ai.ml.entities._job.distribution import MpiDistribution
Expand Down Expand Up @@ -79,6 +79,37 @@ def test_command_job(self, randstr: Callable[[], str], client: MLClient) -> None
assert command_job_2.compute == "cpu-cluster"
check_tid_in_url(client, command_job_2)

@pytest.mark.e2etest
def test_command_job_with_singularity(self, randstr: Callable[[], str], client: MLClient) -> None:
job_name = randstr("job_name")
params_override = [{"name": job_name}]
job = load_job(
source="./tests/test_configs/command_job/command_job_singularity_test.yml",
params_override=params_override,
)
command_job: CommandJob = client.jobs.create_or_update(job=job)
assert command_job.queue_settings == QueueSettings(job_tier="premium", priority="medium")
assert command_job.resources.locations == ["westus", "eastus"]
assert command_job.name == job_name
assert command_job.status in RunHistoryConstants.IN_PROGRESS_STATUSES
assert command_job.environment == "azureml:AzureML-sklearn-1.0-ubuntu20.04-py38-cpu:33"
assert (
command_job.compute
== "/subscriptions/79a1ba0c-35bb-436b-bff2-3074d5ff1f89/resourceGroups/Runtime/providers/Microsoft.MachineLearningServices/virtualclusters/centeuapvc"
)
check_tid_in_url(client, command_job)

command_job_2 = client.jobs.get(job_name)
assert command_job.name == command_job_2.name
assert command_job_2.environment == "azureml:AzureML-sklearn-1.0-ubuntu20.04-py38-cpu:33"
assert (
command_job_2.compute
== "/subscriptions/79a1ba0c-35bb-436b-bff2-3074d5ff1f89/resourceGroups/Runtime/providers/Microsoft.MachineLearningServices/virtualclusters/centeuapvc"
)
assert command_job_2.queue_settings == command_job.queue_settings
assert command_job_2.resources.locations == command_job.resources.locations
check_tid_in_url(client, command_job_2)

@pytest.mark.e2etest
def test_command_job_with_dataset(self, randstr: Callable[[], str], client: MLClient) -> None:
# TODO: need to create a workspace under a e2e-testing-only subscription and resource group
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ def test_command_job_builder_serialization(self) -> None:
outputs={"best_model": {}},
instance_count=2,
instance_type="STANDARD_BLA",
locations=["westus"],
timeout=300,
code="./",
queue_settings=QueueSettings(job_tier="standard", priorty="medium"),
Expand All @@ -154,7 +155,7 @@ def test_command_job_builder_serialization(self) -> None:
environment_variables={"EVN1": "VAR1"},
outputs={"best_model": {}},
limits=CommandJobLimits(timeout=300),
resources=JobResourceConfiguration(instance_count=2, instance_type="STANDARD_BLA"),
resources=JobResourceConfiguration(instance_count=2, instance_type="STANDARD_BLA", locations=["westus"]),
queue_settings=QueueSettings(job_tier="standard", priorty="medium"),
code="./",
)
Expand Down
Loading

0 comments on commit 6ec3c2b

Please sign in to comment.