Skip to content

Commit ec6bf4a

Browse files
authored
[CLI][SDK] Add supports to V3 deployment types (#109)
1 parent a60db1d commit ec6bf4a

File tree

5 files changed

+134
-82
lines changed

5 files changed

+134
-82
lines changed

centml/cli/cluster.py

Lines changed: 64 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -7,21 +7,26 @@
77
from centml.sdk.api import get_centml_client
88

99

10+
# convert deployment type enum to a user friendly name
1011
depl_type_to_name_map = {
11-
DeploymentType.INFERENCE: 'inference',
12-
DeploymentType.COMPUTE: 'compute',
13-
DeploymentType.COMPILATION: 'compilation',
14-
DeploymentType.INFERENCE_V2: 'inference',
15-
DeploymentType.COMPUTE_V2: 'compute',
16-
DeploymentType.CSERVE: 'cserve',
17-
DeploymentType.CSERVE_V2: 'cserve',
18-
DeploymentType.RAG: 'rag',
12+
DeploymentType.INFERENCE: "inference",
13+
DeploymentType.COMPUTE: "compute",
14+
DeploymentType.COMPILATION: "compilation",
15+
DeploymentType.INFERENCE_V2: "inference",
16+
DeploymentType.INFERENCE_V3: "inference",
17+
DeploymentType.COMPUTE_V2: "compute",
18+
# For user, they are all cserve.
19+
DeploymentType.CSERVE: "cserve",
20+
DeploymentType.CSERVE_V2: "cserve",
21+
DeploymentType.CSERVE_V3: "cserve",
22+
DeploymentType.RAG: "rag",
1923
}
24+
# use latest type to for user requests
2025
depl_name_to_type_map = {
21-
'inference': DeploymentType.INFERENCE_V2,
22-
'cserve': DeploymentType.CSERVE_V2,
23-
'compute': DeploymentType.COMPUTE_V2,
24-
'rag': DeploymentType.RAG,
26+
"inference": DeploymentType.INFERENCE_V3,
27+
"cserve": DeploymentType.CSERVE_V3,
28+
"compute": DeploymentType.COMPUTE_V2,
29+
"rag": DeploymentType.RAG,
2530
}
2631

2732

@@ -56,6 +61,21 @@ def _format_ssh_key(ssh_key):
5661
return ssh_key[:32] + "..."
5762

5863

64+
def _get_replica_info(deployment):
65+
"""Extract replica information handling V2/V3 field differences"""
66+
# Check actual deployment object fields rather than depl_type
67+
# since unified get_cserve() can return either V2 or V3 objects
68+
if hasattr(deployment, 'min_replicas'):
69+
# V3 deployment response object
70+
return {"min": deployment.min_replicas, "max": deployment.max_replicas}
71+
elif hasattr(deployment, 'min_scale'):
72+
# V2 deployment response object
73+
return {"min": deployment.min_scale, "max": deployment.max_scale}
74+
else:
75+
# Fallback - shouldn't happen
76+
return {"min": "N/A", "max": "N/A"}
77+
78+
5979
def _get_ready_status(cclient, deployment):
6080
api_status = deployment.status
6181
service_status = (
@@ -121,12 +141,12 @@ def get(type, id):
121141
with get_centml_client() as cclient:
122142
depl_type = depl_name_to_type_map[type]
123143

124-
if depl_type == DeploymentType.INFERENCE_V2:
125-
deployment = cclient.get_inference(id)
144+
if depl_type in [DeploymentType.INFERENCE_V2, DeploymentType.INFERENCE_V3]:
145+
deployment = cclient.get_inference(id) # handles both V2 and V3
126146
elif depl_type == DeploymentType.COMPUTE_V2:
127147
deployment = cclient.get_compute(id)
128-
elif depl_type == DeploymentType.CSERVE_V2:
129-
deployment = cclient.get_cserve(id)
148+
elif depl_type in [DeploymentType.CSERVE_V2, DeploymentType.CSERVE_V3]:
149+
deployment = cclient.get_cserve(id) # handles both V2 and V3
130150
else:
131151
sys.exit("Please enter correct deployment type")
132152

@@ -150,21 +170,18 @@ def get(type, id):
150170
)
151171

152172
click.echo("Additional deployment configurations:")
153-
if depl_type == DeploymentType.INFERENCE_V2:
154-
click.echo(
155-
tabulate(
156-
[
157-
("Image", deployment.image_url),
158-
("Container port", deployment.container_port),
159-
("Healthcheck", deployment.healthcheck or "/"),
160-
("Replicas", {"min": deployment.min_scale, "max": deployment.max_scale}),
161-
("Environment variables", deployment.env_vars or "None"),
162-
("Max concurrency", deployment.concurrency or "None"),
163-
],
164-
tablefmt="rounded_outline",
165-
disable_numparse=True,
166-
)
167-
)
173+
if depl_type in [DeploymentType.INFERENCE_V2, DeploymentType.INFERENCE_V3]:
174+
replica_info = _get_replica_info(deployment)
175+
display_rows = [
176+
("Image", deployment.image_url),
177+
("Container port", deployment.container_port),
178+
("Healthcheck", deployment.healthcheck or "/"),
179+
("Replicas", replica_info),
180+
("Environment variables", deployment.env_vars or "None"),
181+
("Max concurrency", deployment.concurrency or "None"),
182+
]
183+
184+
click.echo(tabulate(display_rows, tablefmt="rounded_outline", disable_numparse=True))
168185
elif depl_type == DeploymentType.COMPUTE_V2:
169186
click.echo(
170187
tabulate(
@@ -173,25 +190,22 @@ def get(type, id):
173190
disable_numparse=True,
174191
)
175192
)
176-
elif depl_type == DeploymentType.CSERVE_V2:
177-
click.echo(
178-
tabulate(
179-
[
180-
("Hugging face model", deployment.recipe.model),
181-
(
182-
"Parallelism",
183-
{
184-
"tensor": deployment.recipe.additional_properties['tensor_parallel_size'],
185-
"pipeline": deployment.recipe.additional_properties['pipeline_parallel_size'],
186-
},
187-
),
188-
("Replicas", {"min": deployment.min_scale, "max": deployment.max_scale}),
189-
("Max concurrency", deployment.concurrency or "None"),
190-
],
191-
tablefmt="rounded_outline",
192-
disable_numparse=True,
193-
)
194-
)
193+
elif depl_type in [DeploymentType.CSERVE_V2, DeploymentType.CSERVE_V3]:
194+
replica_info = _get_replica_info(deployment)
195+
display_rows = [
196+
("Hugging face model", deployment.recipe.model),
197+
(
198+
"Parallelism",
199+
{
200+
"tensor": deployment.recipe.additional_properties.get("tensor_parallel_size", "N/A"),
201+
"pipeline": deployment.recipe.additional_properties.get("pipeline_parallel_size", "N/A"),
202+
},
203+
),
204+
("Replicas", replica_info),
205+
("Max concurrency", deployment.concurrency or "None"),
206+
]
207+
208+
click.echo(tabulate(display_rows, tablefmt="rounded_outline", disable_numparse=True))
195209

196210

197211
@click.command(help="Delete a deployment")

centml/sdk/api.py

Lines changed: 42 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,10 @@
44
from platform_api_python_client import (
55
DeploymentType,
66
DeploymentStatus,
7-
CreateInferenceDeploymentRequest,
7+
CreateInferenceV3DeploymentRequest,
88
CreateComputeDeploymentRequest,
9-
CreateCServeV2DeploymentRequest,
9+
CreateCServeV3DeploymentRequest,
10+
ApiException,
1011
Metric,
1112
)
1213

@@ -27,31 +28,59 @@ def get_status(self, id):
2728
return self._api.get_deployment_status_deployments_status_deployment_id_get(id)
2829

2930
def get_inference(self, id):
30-
return self._api.get_inference_deployment_deployments_inference_deployment_id_get(id)
31+
"""Get Inference deployment details - automatically handles both V2 and V3 deployments"""
32+
# Try V3 first (recommended), fallback to V2 if deployment is V2
33+
try:
34+
return self._api.get_inference_v3_deployment_deployments_inference_v3_deployment_id_get(id)
35+
except ApiException as e:
36+
# If V3 fails with 404 or similar, try V2
37+
if e.status in [404, 400]: # Deployment might be V2 or endpoint not found
38+
try:
39+
return self._api.get_inference_deployment_deployments_inference_deployment_id_get(id)
40+
except ApiException as v2_error:
41+
# If both fail, raise the original V3 error as it's more likely to be the real issue
42+
raise e from v2_error
43+
else:
44+
# For other errors (auth, network, etc.), raise immediately
45+
raise
3146

3247
def get_compute(self, id):
3348
return self._api.get_compute_deployment_deployments_compute_deployment_id_get(id)
3449

3550
def get_cserve(self, id):
36-
return self._api.get_cserve_v2_deployment_deployments_cserve_v2_deployment_id_get(id)
37-
38-
def create_inference(self, request: CreateInferenceDeploymentRequest):
39-
return self._api.create_inference_deployment_deployments_inference_post(request)
51+
"""Get CServe deployment details - automatically handles both V2 and V3 deployments"""
52+
# Try V3 first (recommended), fallback to V2 if deployment is V2
53+
try:
54+
return self._api.get_cserve_v3_deployment_deployments_cserve_v3_deployment_id_get(id)
55+
except ApiException as e:
56+
# If V3 fails with 404 or similar, try V2
57+
if e.status in [404, 400]: # Deployment might be V2 or endpoint not found
58+
try:
59+
return self._api.get_cserve_v2_deployment_deployments_cserve_v2_deployment_id_get(id)
60+
except ApiException as v2_error:
61+
# If both fail, raise the original V3 error as it's more likely to be the real issue
62+
raise e from v2_error
63+
else:
64+
# For other errors (auth, network, etc.), raise immediately
65+
raise
66+
67+
def create_inference(self, request: CreateInferenceV3DeploymentRequest):
68+
return self._api.create_inference_v3_deployment_deployments_inference_v3_post(request)
4069

4170
def create_compute(self, request: CreateComputeDeploymentRequest):
4271
return self._api.create_compute_deployment_deployments_compute_post(request)
4372

44-
def create_cserve(self, request: CreateCServeV2DeploymentRequest):
45-
return self._api.create_cserve_v2_deployment_deployments_cserve_v2_post(request)
73+
def create_cserve(self, request: CreateCServeV3DeploymentRequest):
74+
return self._api.create_cserve_v3_deployment_deployments_cserve_v3_post(request)
4675

47-
def update_inference(self, deployment_id: int, request: CreateInferenceDeploymentRequest):
48-
return self._api.update_inference_deployment_deployments_inference_put(deployment_id, request)
76+
def update_inference(self, deployment_id: int, request: CreateInferenceV3DeploymentRequest):
77+
return self._api.update_inference_v3_deployment_deployments_inference_v3_put(deployment_id, request)
4978

5079
def update_compute(self, deployment_id: int, request: CreateComputeDeploymentRequest):
5180
return self._api.update_compute_deployment_deployments_compute_put(deployment_id, request)
5281

53-
def update_cserve(self, deployment_id: int, request: CreateCServeV2DeploymentRequest):
54-
return self._api.update_cserve_v2_deployment_deployments_cserve_v2_put(deployment_id, request)
82+
def update_cserve(self, deployment_id: int, request: CreateCServeV3DeploymentRequest):
83+
return self._api.update_cserve_v3_deployment_deployments_cserve_v3_put(deployment_id, request)
5584

5685
def _update_status(self, id, new_status):
5786
status_req = platform_api_python_client.DeploymentStatusRequest(status=new_status)

examples/sdk/create_cserve.py

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,18 @@
11
import centml
22
from centml.sdk.api import get_centml_client
3-
from centml.sdk import DeploymentType, CreateCServeV2DeploymentRequest, CServeV2Recipe
3+
from centml.sdk import DeploymentType, CreateCServeV3DeploymentRequest, CServeV2Recipe
44

55

66
def get_fastest_cserve_config(cclient, name, model):
77
fastest = cclient.get_cserve_recipe(model=model)[0].fastest
88

9-
return CreateCServeV2DeploymentRequest(
9+
return CreateCServeV3DeploymentRequest(
1010
name=name,
1111
cluster_id=cclient.get_cluster_id(fastest.hardware_instance_id),
1212
hardware_instance_id=fastest.hardware_instance_id,
1313
recipe=fastest.recipe,
14-
min_scale=1,
15-
max_scale=1,
14+
min_replicas=1,
15+
max_replicas=1,
1616
env_vars={},
1717
)
1818

@@ -22,41 +22,44 @@ def get_default_cserve_config(cclient, name, model):
2222

2323
hardware_instance = cclient.get_hardware_instances(cluster_id=1001)[0]
2424

25-
return CreateCServeV2DeploymentRequest(
25+
return CreateCServeV3DeploymentRequest(
2626
name=name,
2727
cluster_id=hardware_instance.cluster_id,
2828
hardware_instance_id=hardware_instance.id,
2929
recipe=default_recipe,
30-
min_scale=1,
31-
max_scale=1,
30+
min_replicas=1,
31+
max_replicas=1,
3232
env_vars={},
3333
)
3434

3535

3636
def main():
3737
with get_centml_client() as cclient:
3838
### Get the configurations for the Qwen model
39-
qwen_config = get_fastest_cserve_config(cclient, name="qwen-fastest", model="Qwen/Qwen2-VL-7B-Instruct")
40-
#qwen_config = get_default_cserve_config(cclient, name="qwen-default", model="Qwen/Qwen2-VL-7B-Instruct")
39+
qwen_config = get_fastest_cserve_config(
40+
cclient, name="qwen-fastest", model="Qwen/Qwen2-VL-7B-Instruct"
41+
)
42+
# qwen_config = get_default_cserve_config(cclient, name="qwen-default", model="Qwen/Qwen2-VL-7B-Instruct")
4143

4244
### Modify the recipe if necessary
4345
qwen_config.recipe.additional_properties["max_num_seqs"] = 512
4446

45-
# Create CServeV2 deployment
47+
# Create CServeV3 deployment
4648
response = cclient.create_cserve(qwen_config)
4749
print("Create deployment response: ", response)
4850

4951
### Get deployment details
50-
deployment = cclient.get_cserve(response.id)
52+
deployment = cclient.get_cserve(response.id) # Automatically detects V2
5153
print("Deployment details: ", deployment)
5254

53-
'''
55+
"""
5456
### Pause the deployment
5557
cclient.pause(deployment.id)
5658
5759
### Delete the deployment
5860
cclient.delete(deployment.id)
59-
'''
61+
"""
62+
6063

6164
if __name__ == "__main__":
6265
main()

examples/sdk/create_inference.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,32 @@
11
import centml
22
from centml.sdk.api import get_centml_client
3-
from centml.sdk import DeploymentType, CreateInferenceDeploymentRequest, UserVaultType
3+
from centml.sdk import DeploymentType, CreateInferenceV3DeploymentRequest, UserVaultType
44

55

66
def main():
77
with get_centml_client() as cclient:
88
certs = cclient.get_user_vault(UserVaultType.CERTIFICATES)
99

10-
request = CreateInferenceDeploymentRequest(
10+
request = CreateInferenceV3DeploymentRequest(
1111
name="nginx",
1212
cluster_id=1000,
1313
hardware_instance_id=1000,
1414
image_url="nginxinc/nginx-unprivileged",
1515
port=8080,
16-
min_scale=1,
17-
max_scale=1,
16+
min_replicas=1, # V3 uses min_replicas instead of min_scale
17+
max_replicas=3, # V3 uses max_replicas instead of max_scale
18+
initial_replicas=1, # Optional in V3 - initial number of replicas
1819
endpoint_certificate_authority=certs["my_cert"],
20+
# V3 rollout strategy parameters
21+
max_surge=1, # Allow 1 extra pod during updates
22+
max_unavailable=0, # Keep all pods available during updates
23+
healthcheck="/",
24+
concurrency=10,
1925
)
2026
response = cclient.create_inference(request)
2127
print("Create deployment response: ", response)
2228

23-
### Get deployment details
29+
### Get deployment details (automatically detects V2 or V3)
2430
deployment = cclient.get_inference(response.id)
2531
print("Deployment details: ", deployment)
2632

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,4 @@ cryptography==44.0.1
99
prometheus-client>=0.20.0
1010
scipy>=1.6.0
1111
scikit-learn>=1.5.1
12-
platform-api-python-client==4.0.12
12+
platform-api-python-client==4.1.9

0 commit comments

Comments
 (0)