7
7
from centml .sdk .api import get_centml_client
8
8
9
9
10
+ # convert deployment type enum to a user friendly name
10
11
depl_type_to_name_map = {
11
- DeploymentType .INFERENCE : 'inference' ,
12
- DeploymentType .COMPUTE : 'compute' ,
13
- DeploymentType .COMPILATION : 'compilation' ,
14
- DeploymentType .INFERENCE_V2 : 'inference' ,
15
- DeploymentType .COMPUTE_V2 : 'compute' ,
16
- DeploymentType .CSERVE : 'cserve' ,
17
- DeploymentType .CSERVE_V2 : 'cserve' ,
18
- DeploymentType .RAG : 'rag' ,
12
+ DeploymentType .INFERENCE : "inference" ,
13
+ DeploymentType .COMPUTE : "compute" ,
14
+ DeploymentType .COMPILATION : "compilation" ,
15
+ DeploymentType .INFERENCE_V2 : "inference" ,
16
+ DeploymentType .INFERENCE_V3 : "inference" ,
17
+ DeploymentType .COMPUTE_V2 : "compute" ,
18
+ # For user, they are all cserve.
19
+ DeploymentType .CSERVE : "cserve" ,
20
+ DeploymentType .CSERVE_V2 : "cserve" ,
21
+ DeploymentType .CSERVE_V3 : "cserve" ,
22
+ DeploymentType .RAG : "rag" ,
19
23
}
24
+ # use latest type to for user requests
20
25
depl_name_to_type_map = {
21
- ' inference' : DeploymentType .INFERENCE_V2 ,
22
- ' cserve' : DeploymentType .CSERVE_V2 ,
23
- ' compute' : DeploymentType .COMPUTE_V2 ,
24
- ' rag' : DeploymentType .RAG ,
26
+ " inference" : DeploymentType .INFERENCE_V3 ,
27
+ " cserve" : DeploymentType .CSERVE_V3 ,
28
+ " compute" : DeploymentType .COMPUTE_V2 ,
29
+ " rag" : DeploymentType .RAG ,
25
30
}
26
31
27
32
@@ -56,6 +61,21 @@ def _format_ssh_key(ssh_key):
56
61
return ssh_key [:32 ] + "..."
57
62
58
63
64
+ def _get_replica_info (deployment ):
65
+ """Extract replica information handling V2/V3 field differences"""
66
+ # Check actual deployment object fields rather than depl_type
67
+ # since unified get_cserve() can return either V2 or V3 objects
68
+ if hasattr (deployment , 'min_replicas' ):
69
+ # V3 deployment response object
70
+ return {"min" : deployment .min_replicas , "max" : deployment .max_replicas }
71
+ elif hasattr (deployment , 'min_scale' ):
72
+ # V2 deployment response object
73
+ return {"min" : deployment .min_scale , "max" : deployment .max_scale }
74
+ else :
75
+ # Fallback - shouldn't happen
76
+ return {"min" : "N/A" , "max" : "N/A" }
77
+
78
+
59
79
def _get_ready_status (cclient , deployment ):
60
80
api_status = deployment .status
61
81
service_status = (
@@ -121,12 +141,12 @@ def get(type, id):
121
141
with get_centml_client () as cclient :
122
142
depl_type = depl_name_to_type_map [type ]
123
143
124
- if depl_type == DeploymentType .INFERENCE_V2 :
125
- deployment = cclient .get_inference (id )
144
+ if depl_type in [ DeploymentType .INFERENCE_V2 , DeploymentType . INFERENCE_V3 ] :
145
+ deployment = cclient .get_inference (id ) # handles both V2 and V3
126
146
elif depl_type == DeploymentType .COMPUTE_V2 :
127
147
deployment = cclient .get_compute (id )
128
- elif depl_type == DeploymentType .CSERVE_V2 :
129
- deployment = cclient .get_cserve (id )
148
+ elif depl_type in [ DeploymentType .CSERVE_V2 , DeploymentType . CSERVE_V3 ] :
149
+ deployment = cclient .get_cserve (id ) # handles both V2 and V3
130
150
else :
131
151
sys .exit ("Please enter correct deployment type" )
132
152
@@ -150,21 +170,18 @@ def get(type, id):
150
170
)
151
171
152
172
click .echo ("Additional deployment configurations:" )
153
- if depl_type == DeploymentType .INFERENCE_V2 :
154
- click .echo (
155
- tabulate (
156
- [
157
- ("Image" , deployment .image_url ),
158
- ("Container port" , deployment .container_port ),
159
- ("Healthcheck" , deployment .healthcheck or "/" ),
160
- ("Replicas" , {"min" : deployment .min_scale , "max" : deployment .max_scale }),
161
- ("Environment variables" , deployment .env_vars or "None" ),
162
- ("Max concurrency" , deployment .concurrency or "None" ),
163
- ],
164
- tablefmt = "rounded_outline" ,
165
- disable_numparse = True ,
166
- )
167
- )
173
+ if depl_type in [DeploymentType .INFERENCE_V2 , DeploymentType .INFERENCE_V3 ]:
174
+ replica_info = _get_replica_info (deployment )
175
+ display_rows = [
176
+ ("Image" , deployment .image_url ),
177
+ ("Container port" , deployment .container_port ),
178
+ ("Healthcheck" , deployment .healthcheck or "/" ),
179
+ ("Replicas" , replica_info ),
180
+ ("Environment variables" , deployment .env_vars or "None" ),
181
+ ("Max concurrency" , deployment .concurrency or "None" ),
182
+ ]
183
+
184
+ click .echo (tabulate (display_rows , tablefmt = "rounded_outline" , disable_numparse = True ))
168
185
elif depl_type == DeploymentType .COMPUTE_V2 :
169
186
click .echo (
170
187
tabulate (
@@ -173,25 +190,22 @@ def get(type, id):
173
190
disable_numparse = True ,
174
191
)
175
192
)
176
- elif depl_type == DeploymentType .CSERVE_V2 :
177
- click .echo (
178
- tabulate (
179
- [
180
- ("Hugging face model" , deployment .recipe .model ),
181
- (
182
- "Parallelism" ,
183
- {
184
- "tensor" : deployment .recipe .additional_properties ['tensor_parallel_size' ],
185
- "pipeline" : deployment .recipe .additional_properties ['pipeline_parallel_size' ],
186
- },
187
- ),
188
- ("Replicas" , {"min" : deployment .min_scale , "max" : deployment .max_scale }),
189
- ("Max concurrency" , deployment .concurrency or "None" ),
190
- ],
191
- tablefmt = "rounded_outline" ,
192
- disable_numparse = True ,
193
- )
194
- )
193
+ elif depl_type in [DeploymentType .CSERVE_V2 , DeploymentType .CSERVE_V3 ]:
194
+ replica_info = _get_replica_info (deployment )
195
+ display_rows = [
196
+ ("Hugging face model" , deployment .recipe .model ),
197
+ (
198
+ "Parallelism" ,
199
+ {
200
+ "tensor" : deployment .recipe .additional_properties .get ("tensor_parallel_size" , "N/A" ),
201
+ "pipeline" : deployment .recipe .additional_properties .get ("pipeline_parallel_size" , "N/A" ),
202
+ },
203
+ ),
204
+ ("Replicas" , replica_info ),
205
+ ("Max concurrency" , deployment .concurrency or "None" ),
206
+ ]
207
+
208
+ click .echo (tabulate (display_rows , tablefmt = "rounded_outline" , disable_numparse = True ))
195
209
196
210
197
211
@click .command (help = "Delete a deployment" )
0 commit comments