Skip to content

Commit

Permalink
Merge branch 'main' into qe_mixin
Browse files Browse the repository at this point in the history
  • Loading branch information
Samuel Moors committed Dec 12, 2024
2 parents b216813 + 061bfa1 commit 0568d1e
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 44 deletions.
6 changes: 6 additions & 0 deletions config/azure_mc.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,12 @@
'access': ['--partition=x86-64-amd-zen4-node', '--export=NONE'],
'descr': 'Zen4, 16 cores, 30 GB',
'prepare_cmds': [
# Avoid
# https://www.eessi.io/docs/known_issues/eessi-2023.06/#eessi-production-repository-v202306
'export OMPI_MCA_btl=^uct,ofi'
'export OMPI_MCA_pml=ucx'
'export OMPI_MCA_mtl=^ofi'
# Use override to avoid fallback to zen3
'export EESSI_SOFTWARE_SUBDIR_OVERRIDE=x86_64/amd/zen4',
common_eessi_init(),
# Required when using srun as launcher with --export=NONE in partition access,
Expand Down
89 changes: 45 additions & 44 deletions config/izum_vega.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,50 +68,51 @@
},
'descr': 'CPU partition Standard, see https://en-doc.vega.izum.si/architecture/'
},
# {
# 'name': 'gpu',
# 'scheduler': 'slurm',
# 'prepare_cmds': [
# common_eessi_init(),
# # Pass job environment variables like $PATH, etc., into job steps
# 'export SLURM_EXPORT_ENV=ALL',
# # Needed when using srun launcher
# # 'export SLURM_MPI_TYPE=pmix', # WARNING: this broke the GROMACS on Vega
# # Avoid https://github.com/EESSI/software-layer/issues/136
# # Can be taken out once we don't care about old OpenMPI versions anymore (pre-4.1.1)
# 'export OMPI_MCA_pml=ucx',
# ],
# 'launcher': 'mpirun',
# # Use --export=None to avoid that login environment is passed down to submitted jobs
# 'access': ['-p gpu', '--export=None'],
# 'environs': ['default'],
# 'max_jobs': 60,
# 'devices': [
# {
# 'type': DEVICE_TYPES[GPU],
# 'num_devices': 4,
# }
# ],
# 'resources': [
# {
# 'name': '_rfm_gpu',
# 'options': ['--gpus-per-node={num_gpus_per_node}'],
# },
# {
# 'name': 'memory',
# 'options': ['--mem={size}'],
# }
# ],
# 'features': [
# FEATURES[GPU],
# ] + list(SCALES.keys()),
# 'extras': {
# # Make sure to round down, otherwise a job might ask for more mem than is available
# # per node
# 'mem_per_node': 476.837 * 1024 # in MiB (should be checked, its unclear from slurm.conf)
# },
# 'descr': 'GPU partition, see https://en-doc.vega.izum.si/architecture/'
# },
{
'name': 'gpu',
'scheduler': 'slurm',
'prepare_cmds': [
common_eessi_init(),
# Pass job environment variables like $PATH, etc., into job steps
'export SLURM_EXPORT_ENV=ALL',
# Needed when using srun launcher
# 'export SLURM_MPI_TYPE=pmix', # WARNING: this broke the GROMACS on Vega
# Avoid https://github.com/EESSI/software-layer/issues/136
# Can be taken out once we don't care about old OpenMPI versions anymore (pre-4.1.1)
'export OMPI_MCA_pml=ucx',
],
'launcher': 'mpirun',
# Use --export=None to avoid that login environment is passed down to submitted jobs
'access': ['-p gpu', '--export=None'],
'environs': ['default'],
'max_jobs': 60,
'devices': [
{
'type': DEVICE_TYPES[GPU],
'num_devices': 4,
}
],
'resources': [
{
'name': '_rfm_gpu',
'options': ['--gpus-per-node={num_gpus_per_node}'],
},
{
'name': 'memory',
'options': ['--mem={size}'],
}
],
'features': [
FEATURES[GPU],
] + list(SCALES.keys()),
'extras': {
GPU_VENDOR: GPU_VENDORS[NVIDIA],
# Make sure to round down, otherwise a job might ask for more mem than is available
# per node
'mem_per_node': 476.837 * 1024 # in MiB (should be checked, its unclear from slurm.conf)
},
'descr': 'GPU partition, see https://en-doc.vega.izum.si/architecture/'
},
]
},
],
Expand Down
43 changes: 43 additions & 0 deletions eessi/testsuite/eessi_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@
from reframe.core.exceptions import ReframeFatalError
from reframe.core.pipeline import RegressionMixin
from reframe.utility.sanity import make_performance_function
import reframe.utility.sanity as sn

from eessi.testsuite import hooks
from eessi.testsuite.constants import DEVICE_TYPES, SCALES, COMPUTE_UNIT, TAGS
from eessi.testsuite.utils import log
from eessi.testsuite import __version__ as testsuite_version


# Hooks from the Mixin class seem to be executed _before_ those of the child class
Expand Down Expand Up @@ -41,6 +43,14 @@ class EESSI_Mixin(RegressionMixin):
scale = parameter(SCALES.keys())
bench_name_ci = None

# Create ReFrame variables for logging runtime environment information
cvmfs_repo_name = variable(str, value='None')
cvmfs_software_subdir = variable(str, value='None')
full_modulepath = variable(str, value='None')

# Make sure the version of the EESSI test suite gets logged in the ReFrame report
eessi_testsuite_version = variable(str, value=testsuite_version)

# Note that the error for an empty parameter is a bit unclear for ReFrame 4.6.2, but that will hopefully improve
# see https://github.com/reframe-hpc/reframe/issues/3254
# If that improves: uncomment the following to force the user to set module_name
Expand Down Expand Up @@ -166,3 +176,36 @@ def assign_tasks_per_compute_unit(self):
def request_mem(self):
"""Call hook to request the required amount of memory per node"""
hooks.req_memory_per_node(self, app_mem_req=self.required_mem_per_node())

@run_after('setup')
def log_runtime_info(self):
"""Log additional runtime information: which CVMFS repo was used (or if it was testing local software),
path to the modulefile, EESSI software subdir, EESSI testsuite version"""
self.postrun_cmds.append('echo "EESSI_CVMFS_REPO: $EESSI_CVMFS_REPO"')
self.postrun_cmds.append('echo "EESSI_SOFTWARE_SUBDIR: $EESSI_SOFTWARE_SUBDIR"')
if self.module_name:
# Get full modulepath
get_full_modpath = f'echo "FULL_MODULEPATH: $(module --location show {self.module_name})"'
self.postrun_cmds.append(get_full_modpath)

@run_after('run')
def extract_runtime_info_from_log(self):
"""Extracts the printed runtime info from the job log and logs it as reframe variables"""
if self.is_dry_run():
return

# If EESSI_CVMFS_REPO environment variable was set, extract it and store it in self.cvmfs_repo_name
repo_name = sn.extractall(r'EESSI_CVMFS_REPO: /cvmfs/(?P<repo>.*)$', f'{self.stagedir}/{self.stdout}',
'repo', str)
if repo_name:
self.cvmfs_repo_name = f'{repo_name}'

software_subdir = sn.extractall(r'EESSI_SOFTWARE_SUBDIR: (?P<subdir>.*)$',
f'{self.stagedir}/{self.stdout}', 'subdir', str)
if software_subdir:
self.cvmfs_software_subdir = f'{software_subdir}'

module_path = sn.extractall(r'FULL_MODULEPATH: (?P<modpath>.*)$', f'{self.stagedir}/{self.stdout}',
'modpath', str)
if module_path:
self.full_modulepath = f'{module_path}'

0 comments on commit 0568d1e

Please sign in to comment.