From 7245d3c8825dd05b9e35a5137feaa85140987abd Mon Sep 17 00:00:00 2001 From: Tuomas Koskela Date: Thu, 20 Jun 2024 10:02:39 +0100 Subject: [PATCH 1/3] WIP: develop config for dawn pvc nodes --- benchmarks/reframe_config.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/benchmarks/reframe_config.py b/benchmarks/reframe_config.py index 1e97e4c6..01adca10 100644 --- a/benchmarks/reframe_config.py +++ b/benchmarks/reframe_config.py @@ -120,6 +120,35 @@ def spack_root_to_path(): 'num_sockets': 2, 'num_cpus_per_socket': 56, }, + { + 'name': 'pvc', + 'descr': 'Ponte Vecchio (Dawn) compute nodes', + 'scheduler': 'slurm', + 'launcher': 'mpirun', + 'env_vars': [ + ['I_MPI_PMI_LIBRARY', '/usr/local/software/slurm/current-rhel8/lib/libpmi2.so'], + ['I_MPI_OFI_PROVIDER', 'mlx'], + ['UCX_NET_DEVICES', 'mlx5_0:1'], + ], + 'access': ['--partition=pvc', '--exclusive'], + 'sched_options': { + 'job_submit_timeout': 120, + }, + 'environs': ['default'], + 'max_jobs': 64, + 'features': ['gpu'], + 'processor': { + 'num_cpus': 96, + 'num_cpus_per_core': 1, + 'num_sockets': 2, + 'num_cpus_per_socket': 48, + }, + 'resources': [ + { + 'name': 'gpu', + 'options': ['--gres=gpu:{num_gpus_per_node}'], + }, + }, ] }, # end CSD3 Rocky 8 From 5d6f6bac566086d660fd3c5ee65b103037ff17de Mon Sep 17 00:00:00 2001 From: Tuomas Koskela Date: Thu, 20 Jun 2024 15:15:36 +0100 Subject: [PATCH 2/3] Fix typo --- benchmarks/reframe_config.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/benchmarks/reframe_config.py b/benchmarks/reframe_config.py index 01adca10..f935a552 100644 --- a/benchmarks/reframe_config.py +++ b/benchmarks/reframe_config.py @@ -120,6 +120,7 @@ def spack_root_to_path(): 'num_sockets': 2, 'num_cpus_per_socket': 56, }, + }, { 'name': 'pvc', 'descr': 'Ponte Vecchio (Dawn) compute nodes', @@ -148,6 +149,7 @@ def spack_root_to_path(): 'name': 'gpu', 'options': ['--gres=gpu:{num_gpus_per_node}'], }, + ], }, ] From e49af104b59b45c340f2aca0dcbba82f251d2ca8 Mon Sep 17 00:00:00 2001 From: Tuomas Koskela Date: Fri, 21 Jun 2024 15:05:57 +0100 Subject: [PATCH 3/3] Fix MPI launcher --- benchmarks/reframe_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/reframe_config.py b/benchmarks/reframe_config.py index f935a552..7818900c 100644 --- a/benchmarks/reframe_config.py +++ b/benchmarks/reframe_config.py @@ -125,7 +125,7 @@ def spack_root_to_path(): 'name': 'pvc', 'descr': 'Ponte Vecchio (Dawn) compute nodes', 'scheduler': 'slurm', - 'launcher': 'mpirun', + 'launcher': 'srun', 'env_vars': [ ['I_MPI_PMI_LIBRARY', '/usr/local/software/slurm/current-rhel8/lib/libpmi2.so'], ['I_MPI_OFI_PROVIDER', 'mlx'],