From 1fc583f1ef0aea1ba5392a76df24bad280d1ac05 Mon Sep 17 00:00:00 2001 From: Jo Basevi Date: Fri, 22 Mar 2024 13:40:34 +1100 Subject: [PATCH] Add restart reproducibility test to checksum pytests --- test/models/accessom2.py | 29 +++++++++++++++++++++++++++-- test/models/model.py | 13 ++++++++++++- test/test_bit_reproducibility.py | 18 ++++++++---------- 3 files changed, 47 insertions(+), 13 deletions(-) diff --git a/test/models/accessom2.py b/test/models/accessom2.py index 61e24dd3..15235241 100644 --- a/test/models/accessom2.py +++ b/test/models/accessom2.py @@ -6,6 +6,7 @@ import f90nml import re from pathlib import Path +from typing import Dict, Any from models.model import Model @@ -36,13 +37,13 @@ def set_model_runtime(self, nml['date_manager_nml']['restart_period'] = [years, months, seconds] nml.write(self.accessom2_config, force=True) - def output_exists(self): + def output_exists(self) -> bool: """Check for existing output file""" return self.output_file.exists() def extract_checksums(self, output_directory: Path = None, - schema_version: str = None): + schema_version: str = None) -> Dict[str, Any]: """Parse output file and create checksum using defined schema""" if output_directory: output_filename = output_directory / 'access-om2.out' @@ -89,3 +90,27 @@ def extract_checksums(self, f"Unsupported checksum schema version: {schema_version}") return checksums + + def check_checksums_over_restarts(self, + long_run_checksum: Dict[str, Any], + short_run_checksum_0: Dict[str, Any], + short_run_checksum_1: Dict[str, Any] + ) -> bool: + """Compare a checksums from a long run (e.g. 2 days) against + checksums from 2 short runs (e.g. 1 day)""" + short_run_checksums = short_run_checksum_0['output'] + for field, checksums in short_run_checksum_1['output'].items(): + if field not in short_run_checksums: + short_run_checksums[field] = checksums + else: + short_run_checksums[field].extend(checksums) + + matching_checksums = True + for field, checksums in long_run_checksum['output'].items(): + for checksum in checksums: + if (field not in short_run_checksums or + checksum not in short_run_checksums[field]): + print(f"Unequal checksum: {field}: {checksum}") + matching_checksums = False + + return matching_checksums diff --git a/test/models/model.py b/test/models/model.py index 2139cb77..2ae126f1 100644 --- a/test/models/model.py +++ b/test/models/model.py @@ -1,12 +1,15 @@ """Generic Model class""" from pathlib import Path +from typing import Dict, Any class Model(object): def __init__(self, experiment): self.experiment = experiment - def extract_checksums(self, output_directory: Path = None): + def extract_checksums(self, + output_directory: Path, + schema_version: str): """Extract checksums from output directory""" raise NotImplementedError @@ -20,3 +23,11 @@ def set_model_runtime(self, def output_exists(self): """Check for existing output files""" raise NotImplementedError + + def check_checksums_over_restarts(self, + long_run_checksum, + short_run_checksum_0, + short_run_checksum_1) -> bool: + """Compare a checksums from a long run (e.g. 2 days) against + checksums from 2 short runs (e.g. 1 day)""" + raise NotImplementedError diff --git a/test/test_bit_reproducibility.py b/test/test_bit_reproducibility.py index 303b81a5..dbcc8cdf 100644 --- a/test/test_bit_reproducibility.py +++ b/test/test_bit_reproducibility.py @@ -74,8 +74,7 @@ def test_bit_repro_repeat(self, output_path: Path, control_path: Path): assert produced == expected - @pytest.mark.slow - @pytest.mark.skip(reason="TODO:Check checksum comparision across restarts") + @pytest.mark.checksum def test_restart_repro(self, output_path: Path, control_path: Path): """ Test that a run reproduces across restarts. @@ -104,16 +103,15 @@ def test_restart_repro(self, output_path: Path, control_path: Path): checksums_1d_0 = exp_2x1day.extract_checksums() checksums_1d_1 = exp_2x1day.extract_checksums(exp_2x1day.output001) - # Adding checksums over two outputs might need to be model specific? - checksums_2x1d = checksums_1d_0['output'] + checksums_1d_1['output'] - checksums_2d = exp_2day.extract_checksums() - matching_checksums = True - for item in checksums_2d['output']: - if item not in checksums_2x1d: - print("Unequal checksum:", item) - matching_checksums = False + # Use model specific comparision method for checksums + model = exp_2day.model + matching_checksums = model.check_checksums_over_restarts( + long_run_checksum=checksums_2d, + short_run_checksum_0=checksums_1d_0, + short_run_checksum_1=checksums_1d_1 + ) if not matching_checksums: # Write checksums out to file