From a9e77c4cc16830c561868a6a880deb20cc41d571 Mon Sep 17 00:00:00 2001 From: Samuel Moors Date: Sat, 30 Nov 2024 13:40:15 +0100 Subject: [PATCH 1/8] use reframe warnings in hooks --- eessi/testsuite/hooks.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/eessi/testsuite/hooks.py b/eessi/testsuite/hooks.py index ca09d999..a444f471 100644 --- a/eessi/testsuite/hooks.py +++ b/eessi/testsuite/hooks.py @@ -3,7 +3,6 @@ """ import math import shlex -import warnings import reframe as rfm import reframe.core.logging as rflog @@ -435,7 +434,7 @@ def _set_or_append_valid_systems(test: rfm.RegressionTest, valid_systems: str): warn_msg = f"valid_systems has multiple ({len(test.valid_systems)}) items," warn_msg += " which is not supported by this hook." warn_msg += " Make sure to handle filtering yourself." - warnings.warn(warn_msg) + rflog.getlogger().warning(warn_msg) return @@ -529,7 +528,6 @@ def req_memory_per_node(test: rfm.RegressionTest, app_mem_req: float): # and return from this hook (as setting test.extra_resources will be ignored in that case according to # https://reframe-hpc.readthedocs.io/en/stable/regression_test_api.html#reframe.core.pipeline.RegressionTest.extra_resources if 'memory' not in test.current_partition.resources: - logger = rflog.getlogger() msg = "Your ReFrame configuration file does not specify any resource called 'memory' for this partition " msg += f" ({test.current_partition.name})." msg += " Without this, an explicit memory request cannot be made from the scheduler. This test will run," @@ -538,7 +536,7 @@ def req_memory_per_node(test: rfm.RegressionTest, app_mem_req: float): msg += " 'memory' in your ReFrame configuration file for this partition." msg += " For a SLURM system, one would e.g. define:" msg += " 'resources': [{'name': 'memory', 'options': ['--mem={size}']}]" - logger.warning(msg) + rflog.getlogger().warning(msg) # We return, as setting a test.extra_resources is pointless - it would be ignored anyway # This way, we also don't add any lines to the log that a specific amount of memory was requested return @@ -580,14 +578,13 @@ def req_memory_per_node(test: rfm.RegressionTest, app_mem_req: float): log(f"Requested {req_mem_per_task} MiB per task from the torque batch scheduler") else: - logger = rflog.getlogger() msg = "hooks.req_memory_per_node does not support the scheduler you configured" msg += f" ({test.current_partition.scheduler.registered_name})." msg += " The test will run, but since it doesn't request the required amount of memory explicitely," msg += " it may result in an out-of-memory error." msg += " Please expand the functionality of hooks.req_memory_per_node for your scheduler." # Warnings will, at default loglevel, be printed on stdout when executing the ReFrame command - logger.warning(msg) + rflog.getlogger().warning(msg) def set_modules(test: rfm.RegressionTest): @@ -671,14 +668,13 @@ def set_compact_process_binding(test: rfm.RegressionTest): log(f'Set environment variable SLURM_DISTRIBUTION to {test.env_vars["SLURM_DISTRIBUTION"]}') log(f'Set environment variable SLURM_CPU_BIND to {test.env_vars["SLURM_CPU_BIND"]}') else: - logger = rflog.getlogger() msg = "hooks.set_compact_process_binding does not support the current launcher" msg += f" ({test.current_partition.launcher_type().registered_name})." msg += " The test will run, but using the default binding strategy of your parallel launcher." msg += " This may lead to suboptimal performance." msg += " Please expand the functionality of hooks.set_compact_process_binding for your parallel launcher." # Warnings will, at default loglevel, be printed on stdout when executing the ReFrame command - logger.warning(msg) + rflog.getlogger().warning(msg) def set_compact_thread_binding(test: rfm.RegressionTest): From 04e8cbea391c686e89721cca1ba625632f9f5842 Mon Sep 17 00:00:00 2001 From: Samuel Moors Date: Sat, 30 Nov 2024 13:40:47 +0100 Subject: [PATCH 2/8] don't error if bench_name is not defined --- eessi/testsuite/eessi_mixin.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/eessi/testsuite/eessi_mixin.py b/eessi/testsuite/eessi_mixin.py index f5011c58..00bb6b44 100644 --- a/eessi/testsuite/eessi_mixin.py +++ b/eessi/testsuite/eessi_mixin.py @@ -120,9 +120,6 @@ def measure_mem_usage(self): def set_tag_ci(self): "Set CI tag if bench_name_ci and bench_name are set and are equal" if self.bench_name_ci: - if not self.bench_name: - msg = "Attribute bench_name_ci is set, but bench_name is not set" - raise ReframeFatalError(msg) if self.bench_name == self.bench_name_ci: self.tags.add(TAGS['CI']) log(f'tags set to {self.tags}') From 15949f8c1fa6586629b36cb65a9fd63b1aa5ba94 Mon Sep 17 00:00:00 2001 From: Samuel Moors Date: Sat, 30 Nov 2024 13:42:09 +0100 Subject: [PATCH 3/8] use mixin class for QuantumESPRESSO --- eessi/testsuite/tests/apps/QuantumESPRESSO.py | 77 +++++-------------- 1 file changed, 21 insertions(+), 56 deletions(-) diff --git a/eessi/testsuite/tests/apps/QuantumESPRESSO.py b/eessi/testsuite/tests/apps/QuantumESPRESSO.py index 288354b2..3597b953 100644 --- a/eessi/testsuite/tests/apps/QuantumESPRESSO.py +++ b/eessi/testsuite/tests/apps/QuantumESPRESSO.py @@ -30,52 +30,32 @@ import reframe as rfm from hpctestlib.sciapps.qespresso.benchmarks import QEspressoPWCheck -from reframe.core.builtins import ( # added only to make the linter happy - parameter, run_after) +from reframe.core.builtins import parameter, run_after -from eessi.testsuite import hooks -from eessi.testsuite.constants import (COMPUTE_UNIT, CPU, DEVICE_TYPES, GPU, - SCALES, TAGS) -from eessi.testsuite.utils import find_modules, log +from eessi.testsuite.constants import COMPUTE_UNIT, CPU, DEVICE_TYPES, GPU, SCALES +from eessi.testsuite.eessi_mixin import EESSI_Mixin +from eessi.testsuite.utils import find_modules @rfm.simple_test -class EESSI_QuantumESPRESSO_PW(QEspressoPWCheck): +class EESSI_QuantumESPRESSO_PW(QEspressoPWCheck, EESSI_Mixin): scale = parameter(SCALES.keys()) - valid_prog_environs = ['default'] - valid_systems = ['*'] time_limit = '30m' module_name = parameter(find_modules('QuantumESPRESSO')) - # For now, QE is being build for CPU targets only - # compute_device = parameter([DEVICE_TYPES[CPU], DEVICE_TYPES[GPU]]) - compute_device = parameter([DEVICE_TYPES[CPU], ]) + # For now, QE is built for CPU targets only + device_type = parameter([DEVICE_TYPES[CPU]]) + bench_name_ci = 'bench_ci' - @run_after('init') - def run_after_init(self): - """Hooks to run after the init phase""" - - # Filter on which scales are supported by the partitions defined in the ReFrame configuration - hooks.filter_supported_scales(self) - - # Make sure that GPU tests run in partitions that support running on a GPU, - # and that CPU-only tests run in partitions that support running CPU-only. - # Also support setting valid_systems on the cmd line. - hooks.filter_valid_systems_by_device_type(self, required_device_type=self.compute_device) - - # Support selecting modules on the cmd line. - hooks.set_modules(self) - - # Support selecting scales on the cmd line via tags. - hooks.set_tag_scale(self) + def required_mem_per_node(self): + return (self.num_tasks_per_node * 0.9 + 4) * 1024 @run_after('init') - def set_tag_ci(self): + def set_ci(self): """Set tag CI on smallest benchmark, so it can be selected on the cmd line via --tag CI""" min_ecut = min(QEspressoPWCheck.ecut.values) min_nbnd = min(QEspressoPWCheck.nbnd.values) if self.ecut == min_ecut and self.nbnd == min_nbnd: - self.tags.add(TAGS['CI']) - log(f'tags set to {self.tags}') + self.bench_name = self.bench_name_ci @run_after('init') def set_increased_walltime(self): @@ -85,29 +65,14 @@ def set_increased_walltime(self): if self.ecut == max_ecut and self.nbnd == max_nbnd: self.time_limit = '60m' - @run_after('setup') - def run_after_setup(self): - """Hooks to run after the setup phase""" - - # Calculate default requested resources based on the scale: - # 1 task per CPU for CPU-only tests, 1 task per GPU for GPU tests. - # Also support setting the resources on the cmd line. - if self.compute_device == DEVICE_TYPES[GPU]: - hooks.assign_tasks_per_compute_unit(test=self, compute_unit=COMPUTE_UNIT[GPU]) - else: - hooks.assign_tasks_per_compute_unit(test=self, compute_unit=COMPUTE_UNIT[CPU]) - - @run_after('setup') - def request_mem(self): - memory_required = self.num_tasks_per_node * 0.9 + 4 - hooks.req_memory_per_node(test=self, app_mem_req=memory_required * 1024) - - @run_after('setup') - def set_omp_num_threads(self): + @run_after('init') + def set_compute_unit(self): """ - Set number of OpenMP threads via OMP_NUM_THREADS. - Set default number of OpenMP threads equal to number of CPUs per task. + Set the compute unit to which tasks will be assigned: + one task per CPU core for CPU runs, and one task per GPU for GPU runs. """ - - self.env_vars['OMP_NUM_THREADS'] = self.num_cpus_per_task - log(f'env_vars set to {self.env_vars}') + device_to_compute_unit = { + DEVICE_TYPES[CPU]: COMPUTE_UNIT[CPU], + DEVICE_TYPES[GPU]: COMPUTE_UNIT[GPU], + } + self.compute_unit = device_to_compute_unit.get(self.device_type) From e40ed006859fab904b2b4cebc542d4f7de93c5ed Mon Sep 17 00:00:00 2001 From: Samuel Moors Date: Sat, 30 Nov 2024 19:39:27 +0100 Subject: [PATCH 4/8] remove scale --- eessi/testsuite/tests/apps/QuantumESPRESSO.py | 1 - 1 file changed, 1 deletion(-) diff --git a/eessi/testsuite/tests/apps/QuantumESPRESSO.py b/eessi/testsuite/tests/apps/QuantumESPRESSO.py index 3597b953..536d4465 100644 --- a/eessi/testsuite/tests/apps/QuantumESPRESSO.py +++ b/eessi/testsuite/tests/apps/QuantumESPRESSO.py @@ -39,7 +39,6 @@ @rfm.simple_test class EESSI_QuantumESPRESSO_PW(QEspressoPWCheck, EESSI_Mixin): - scale = parameter(SCALES.keys()) time_limit = '30m' module_name = parameter(find_modules('QuantumESPRESSO')) # For now, QE is built for CPU targets only From f12f07ce756ad867d78999658b2c0264ec0e69b0 Mon Sep 17 00:00:00 2001 From: Samuel Moors Date: Sat, 30 Nov 2024 19:43:15 +0100 Subject: [PATCH 5/8] remove scales import --- eessi/testsuite/tests/apps/QuantumESPRESSO.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eessi/testsuite/tests/apps/QuantumESPRESSO.py b/eessi/testsuite/tests/apps/QuantumESPRESSO.py index 536d4465..d59c7826 100644 --- a/eessi/testsuite/tests/apps/QuantumESPRESSO.py +++ b/eessi/testsuite/tests/apps/QuantumESPRESSO.py @@ -32,7 +32,7 @@ from hpctestlib.sciapps.qespresso.benchmarks import QEspressoPWCheck from reframe.core.builtins import parameter, run_after -from eessi.testsuite.constants import COMPUTE_UNIT, CPU, DEVICE_TYPES, GPU, SCALES +from eessi.testsuite.constants import COMPUTE_UNIT, CPU, DEVICE_TYPES, GPU from eessi.testsuite.eessi_mixin import EESSI_Mixin from eessi.testsuite.utils import find_modules From d85b51594c852007210479203c51a0bdb8734994 Mon Sep 17 00:00:00 2001 From: Samuel Moors Date: Sat, 30 Nov 2024 20:54:54 +0100 Subject: [PATCH 6/8] don't initialize bench_name in the class body --- eessi/testsuite/eessi_mixin.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/eessi/testsuite/eessi_mixin.py b/eessi/testsuite/eessi_mixin.py index 00bb6b44..0b95e258 100644 --- a/eessi/testsuite/eessi_mixin.py +++ b/eessi/testsuite/eessi_mixin.py @@ -39,7 +39,6 @@ class EESSI_Mixin(RegressionMixin): # Set defaults for these class variables, can be overwritten by child class if desired measure_memory_usage = variable(bool, value=False) scale = parameter(SCALES.keys()) - bench_name = None bench_name_ci = None # Note that the error for an empty parameter is a bit unclear for ReFrame 4.6.2, but that will hopefully improve @@ -54,6 +53,8 @@ def __init_subclass__(cls, **kwargs): cls.valid_systems = ['*'] if not cls.time_limit: cls.time_limit = '1h' + if 'bench_name' not in dir(): + cls.bench_name = None # Helper function to validate if an attribute is present it item_dict. # If not, print it's current name, value, and the valid_values From b2168136ea787b367cfc06a827063fb4d9f3911b Mon Sep 17 00:00:00 2001 From: Samuel Moors Date: Thu, 12 Dec 2024 11:17:10 +0100 Subject: [PATCH 7/8] readd bench_name check --- eessi/testsuite/eessi_mixin.py | 3 +++ eessi/testsuite/tests/apps/QuantumESPRESSO.py | 1 + 2 files changed, 4 insertions(+) diff --git a/eessi/testsuite/eessi_mixin.py b/eessi/testsuite/eessi_mixin.py index 0b95e258..4018b600 100644 --- a/eessi/testsuite/eessi_mixin.py +++ b/eessi/testsuite/eessi_mixin.py @@ -121,6 +121,9 @@ def measure_mem_usage(self): def set_tag_ci(self): "Set CI tag if bench_name_ci and bench_name are set and are equal" if self.bench_name_ci: + if not self.bench_name: + msg = "Attribute bench_name_ci is set, but bench_name is not set" + raise ReframeFatalError(msg) if self.bench_name == self.bench_name_ci: self.tags.add(TAGS['CI']) log(f'tags set to {self.tags}') diff --git a/eessi/testsuite/tests/apps/QuantumESPRESSO.py b/eessi/testsuite/tests/apps/QuantumESPRESSO.py index d59c7826..1990e0be 100644 --- a/eessi/testsuite/tests/apps/QuantumESPRESSO.py +++ b/eessi/testsuite/tests/apps/QuantumESPRESSO.py @@ -44,6 +44,7 @@ class EESSI_QuantumESPRESSO_PW(QEspressoPWCheck, EESSI_Mixin): # For now, QE is built for CPU targets only device_type = parameter([DEVICE_TYPES[CPU]]) bench_name_ci = 'bench_ci' + bench_name = None def required_mem_per_node(self): return (self.num_tasks_per_node * 0.9 + 4) * 1024 From a38870a34b8081ef83207414e16bf3b9665db36c Mon Sep 17 00:00:00 2001 From: Samuel Moors Date: Thu, 12 Dec 2024 11:48:47 +0100 Subject: [PATCH 8/8] fix setting ci with bench_name check --- eessi/testsuite/eessi_mixin.py | 3 +-- eessi/testsuite/tests/apps/QuantumESPRESSO.py | 4 +--- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/eessi/testsuite/eessi_mixin.py b/eessi/testsuite/eessi_mixin.py index a794fe6c..d03d65e2 100644 --- a/eessi/testsuite/eessi_mixin.py +++ b/eessi/testsuite/eessi_mixin.py @@ -41,6 +41,7 @@ class EESSI_Mixin(RegressionMixin): # Set defaults for these class variables, can be overwritten by child class if desired measure_memory_usage = variable(bool, value=False) scale = parameter(SCALES.keys()) + bench_name = None bench_name_ci = None # Create ReFrame variables for logging runtime environment information @@ -63,8 +64,6 @@ def __init_subclass__(cls, **kwargs): cls.valid_systems = ['*'] if not cls.time_limit: cls.time_limit = '1h' - if 'bench_name' not in dir(): - cls.bench_name = None # Helper function to validate if an attribute is present it item_dict. # If not, print it's current name, value, and the valid_values diff --git a/eessi/testsuite/tests/apps/QuantumESPRESSO.py b/eessi/testsuite/tests/apps/QuantumESPRESSO.py index 1990e0be..1f1f6270 100644 --- a/eessi/testsuite/tests/apps/QuantumESPRESSO.py +++ b/eessi/testsuite/tests/apps/QuantumESPRESSO.py @@ -43,8 +43,6 @@ class EESSI_QuantumESPRESSO_PW(QEspressoPWCheck, EESSI_Mixin): module_name = parameter(find_modules('QuantumESPRESSO')) # For now, QE is built for CPU targets only device_type = parameter([DEVICE_TYPES[CPU]]) - bench_name_ci = 'bench_ci' - bench_name = None def required_mem_per_node(self): return (self.num_tasks_per_node * 0.9 + 4) * 1024 @@ -55,7 +53,7 @@ def set_ci(self): min_ecut = min(QEspressoPWCheck.ecut.values) min_nbnd = min(QEspressoPWCheck.nbnd.values) if self.ecut == min_ecut and self.nbnd == min_nbnd: - self.bench_name = self.bench_name_ci + self.bench_name = self.bench_name_ci = 'bench_ci' @run_after('init') def set_increased_walltime(self):