Skip to content

Commit 724e97e

Browse files
authored
[GCP] Add H100 mega (#4099)
* Add H100 mega support on GCP * fix for some other regions * format * fix resource type * fix catalog fetching
1 parent 53380e2 commit 724e97e

File tree

3 files changed

+21
-7
lines changed

3 files changed

+21
-7
lines changed

sky/clouds/gcp.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -483,7 +483,7 @@ def _failover_disk_tier() -> Optional[resources_utils.DiskTier]:
483483
if acc in ('A100-80GB', 'L4'):
484484
# A100-80GB and L4 have a different name pattern.
485485
resources_vars['gpu'] = f'nvidia-{acc.lower()}'
486-
elif acc == 'H100':
486+
elif acc in ('H100', 'H100-MEGA'):
487487
resources_vars['gpu'] = f'nvidia-{acc.lower()}-80gb'
488488
else:
489489
resources_vars['gpu'] = 'nvidia-tesla-{}'.format(

sky/clouds/service_catalog/data_fetchers/fetch_gcp.py

+17-6
Original file line numberDiff line numberDiff line change
@@ -419,6 +419,11 @@ def _get_gpus_for_zone(zone: str) -> 'pd.DataFrame':
419419
if count != 8:
420420
# H100 only has 8 cards.
421421
continue
422+
if 'H100-MEGA-80GB' in gpu_name:
423+
gpu_name = 'H100-MEGA'
424+
if count != 8:
425+
# H100-MEGA only has 8 cards.
426+
continue
422427
if 'VWS' in gpu_name:
423428
continue
424429
if gpu_name.startswith('TPU-'):
@@ -447,6 +452,7 @@ def _gpu_info_from_name(name: str) -> Optional[Dict[str, List[Dict[str, Any]]]]:
447452
'A100-80GB': 80 * 1024,
448453
'A100': 40 * 1024,
449454
'H100': 80 * 1024,
455+
'H100-MEGA': 80 * 1024,
450456
'P4': 8 * 1024,
451457
'T4': 16 * 1024,
452458
'V100': 16 * 1024,
@@ -491,12 +497,17 @@ def get_gpu_price(row: pd.Series, spot: bool) -> Optional[float]:
491497
if sku['category']['usageType'] != ondemand_or_spot:
492498
continue
493499

494-
gpu_name = row['AcceleratorName']
495-
if gpu_name == 'A100-80GB':
496-
gpu_name = 'A100 80GB'
497-
if gpu_name == 'H100':
498-
gpu_name = 'H100 80GB'
499-
if f'{gpu_name} GPU' not in sku['description']:
500+
gpu_names = [row['AcceleratorName']]
501+
if gpu_names[0] == 'A100-80GB':
502+
gpu_names = ['A100 80GB']
503+
if gpu_names[0] == 'H100':
504+
gpu_names = ['H100 80GB']
505+
if gpu_names[0] == 'H100-MEGA':
506+
# Seems that H100-MEGA has two different descriptions in SKUs in
507+
# different regions: 'H100 80GB Mega' and 'H100 80GB Plus'.
508+
gpu_names = ['H100 80GB Mega', 'H100 80GB Plus']
509+
if not any(f'{gpu_name} GPU' in sku['description']
510+
for gpu_name in gpu_names):
500511
continue
501512

502513
unit_price = _get_unit_price(sku)

sky/clouds/service_catalog/gcp_catalog.py

+3
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,9 @@
9898
},
9999
'H100': {
100100
8: ['a3-highgpu-8g'],
101+
},
102+
'H100-MEGA': {
103+
8: ['a3-megagpu-8g'],
101104
}
102105
}
103106

0 commit comments

Comments
 (0)