Refactor Result Saving (#841)

This PR changes the way results are saved and enables loading and validating results. Change summary - Added custom dataclass serialisation - Models are serializable now - Results can be loaded - Added save/load_result_file - Parameter History added to result Commits: * Added project to darglint, mypy and pydocstyle pre-commit checks * Added Model.as_dict and Model.get_parameters * Added project/dataclass, changed scheme, adapted yml io * Changed result use project/dataclass * Added test for save model yml * Added load and save result file and implemented it with yml * Refactored folder plugin, moved SavingOptions. * Update glotaran/project/result.py * Made variable names in model more consistent. * Added parameter history class and added it to result. * 🔧 Configure darglint to ignore protocol methods * Fixed Parameter doc. * ♻️ Refactored by Sourcery * 🧹 Partial revert of eb430c * 🧹 Partial revert of 761d4b * 🧹 Restored original behavior of save_result folder plugin + added files As a side effect it sets the file paths of saved files in the Result object. * 🧹 Removed unused variable name in yml save_model looping over dict * 🩹 Fixed wrong typing usage of any builtin function * 🔧👌 Activated mypy for parameter subpackage and fixed typing issues * 🔧👌🩹 Activated mypy for projectsubpackage and fixed typing issues * 🩹 Fix bug in _create_result when optimizer fails * ♻️ Rename ParameterHistory.{number_records,number_of_records) * 🔧 Raise interrogate threshold from 55% to 59% (current 60.7%) * 🧹 Removed obsolete darglint ignore comments * 🧹Fix typos * 👌 Add annotation to __str__ method * ♻️ Rename project.dataclasses to project.dataclass_helpers * Update glotaran/analysis/optimize.py * Update glotaran/parameter/parameter_group.py * Update glotaran/parameter/parameter_group.py * ♻️ Refactor glotaran/builtin/io/yml/test/test_save_model.py to use tmp_path instead of tmpdir * 🚇 🔧 Skip interrogate in pre-commit CI * 🧹 Renamed 'test_dataclasses.py' to 'test_dataclass_helpers.py' Co-authored-by: Sebastian Weigand <s.weigand.phy@gmail.com> Co-authored-by: Sourcery AI <> Co-authored-by: Joris Snellenburg <jsnel@users.noreply.github.com>
glotaran · Oct 12, 2021 · d1e36a9 · d1e36a9
1 parent 7797bc9
commit d1e36a9
Show file tree

Hide file tree

Showing 39 changed files with 1,729 additions and 617 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,3 +1,6 @@
+ci:
+  skip: [interrogate]
+
 repos:
   # Formatters
   - repo: https://github.com/pre-commit/pre-commit-hooks
@@ -77,7 +80,7 @@ repos:
     rev: 6.1.1
     hooks:
       - id: pydocstyle
-        files: "^glotaran/(plugin_system|utils|deprecation|testing)"
+        files: "^glotaran/(plugin_system|utils|deprecation|testing|parameter|project)"
         exclude: "docs|tests?/"
         # this is needed due to the following issue:
         # https://github.com/PyCQA/pydocstyle/issues/368
@@ -87,14 +90,14 @@ repos:
     rev: v1.8.0
     hooks:
       - id: darglint
-        files: "^glotaran/(plugin_system|utils|deprecation|testing)"
+        files: "^glotaran/(plugin_system|utils|deprecation|testing|parameter|project)"
         exclude: "docs|tests?/"
 
   - repo: https://github.com/pre-commit/mirrors-mypy
     rev: v0.910
     hooks:
       - id: mypy
-        files: "^glotaran/(plugin_system|utils|deprecation|testing)"
+        files: "^glotaran/(plugin_system|utils|deprecation|testing|parameter|project)"
         exclude: "docs"
         additional_dependencies: [types-all]
 
@@ -112,6 +115,7 @@ repos:
         types: [file]
         types_or: [python, pyi]
         additional_dependencies: [flake8-docstrings, flake8-print]
+        exclude: "parameter.py"
 
   - repo: https://github.com/PyCQA/flake8
     rev: 3.9.2
@@ -133,7 +137,7 @@ repos:
     hooks:
       - id: rst-backticks
       - id: python-check-blanket-noqa
-        exclude: "docs|tests?"
+        exclude: "parameter.py|docs|tests?"
       - id: python-check-blanket-type-ignore
         exclude: "docs|tests?"
       - id: python-use-type-annotations

diff --git a/glotaran/analysis/optimize.py b/glotaran/analysis/optimize.py
@@ -6,6 +6,7 @@
 from scipy.optimize import OptimizeResult
 from scipy.optimize import least_squares
 
+from glotaran import __version__ as glotaran_version
 from glotaran.analysis.problem import Problem
 from glotaran.analysis.problem_grouped import GroupedProblem
 from glotaran.analysis.problem_ungrouped import UngroupedProblem
@@ -91,23 +92,21 @@ def _create_result(
     success = ls_result is not None
 
     number_of_function_evaluation = (
-        ls_result.nfev if ls_result is not None else len(problem.parameter_history)
+        ls_result.nfev if success else problem.parameter_history.number_of_records
     )
     number_of_jacobian_evaluation = ls_result.njev if success else None
-    optimality = ls_result.optimality if success else None
+    optimality = float(ls_result.optimality) if success else None
     number_of_data_points = ls_result.fun.size if success else None
     number_of_variables = ls_result.x.size if success else None
     degrees_of_freedom = number_of_data_points - number_of_variables if success else None
-    chi_square = np.sum(ls_result.fun ** 2) if success else None
+    chi_square = float(np.sum(ls_result.fun ** 2)) if success else None
     reduced_chi_square = chi_square / degrees_of_freedom if success else None
-    root_mean_square_error = np.sqrt(reduced_chi_square) if success else None
+    root_mean_square_error = float(np.sqrt(reduced_chi_square)) if success else None
     jacobian = ls_result.jac if success else None
 
     if success:
         problem.parameters.set_from_label_and_value_arrays(free_parameter_labels, ls_result.x)
-    problem.reset()
-    history_index = None if success else -2
-    data = problem.create_result_data(history_index=history_index)
+    data = problem.create_result_data(success)
     # the optimized parameters are those of the last run if the optimization has crashed
     parameters = problem.parameters
     covariance_matrix = None
@@ -125,10 +124,12 @@ def _create_result(
         additional_penalty=problem.additional_penalty,
         cost=problem.cost,
         data=data,
+        glotaran_version=glotaran_version,
         free_parameter_labels=free_parameter_labels,
         number_of_function_evaluations=number_of_function_evaluation,
         initial_parameters=problem.scheme.parameters,
         optimized_parameters=parameters,
+        parameter_history=problem.parameter_history,
         scheme=problem.scheme,
         success=success,
         termination_reason=termination_reason,

diff --git a/glotaran/analysis/problem.py b/glotaran/analysis/problem.py
@@ -16,13 +16,19 @@
 from glotaran.model import DatasetModel
 from glotaran.model import Model
 from glotaran.parameter import ParameterGroup
+from glotaran.parameter import ParameterHistory
 from glotaran.project import Scheme
 
 if TYPE_CHECKING:
     from typing import Hashable
 
 
-class ParameterError(ValueError):
+class InitialParameterError(ValueError):
+    def __init__(self):
+        super().__init__("Initial parameters can not be evaluated.")
+
+
+class ParameterNotInitializedError(ValueError):
     def __init__(self):
         super().__init__("Parameter not initialized")
 
@@ -83,7 +89,7 @@ def __init__(self, scheme: Scheme):
 
         self._overwrite_index_dependent = self.model.need_index_dependent()
         self._parameters = scheme.parameters.copy()
-        self._parameter_history = []
+        self._parameter_history = ParameterHistory()
 
         self._model.validate(raise_exception=True)
 
@@ -140,7 +146,7 @@ def parameters(self, parameters: ParameterGroup):
         self.reset()
 
     @property
-    def parameter_history(self) -> list[ParameterGroup]:
+    def parameter_history(self) -> ParameterHistory:
         return self._parameter_history
 
     @property
@@ -318,13 +324,15 @@ def _add_weight(self, label, dataset):
                     )
                 dataset.weight[idx] *= weight.value
 
-    def create_result_data(
-        self, copy: bool = True, history_index: int | None = None
-    ) -> dict[str, xr.Dataset]:
+    def create_result_data(self, copy: bool = True, success: bool = True) -> dict[str, xr.Dataset]:
 
-        if history_index is not None and history_index != -1:
-            self.parameters = self.parameter_history[history_index]
+        if not success:
+            if self.parameter_history.number_of_records > 1:
+                self.parameters.set_from_history(self.parameter_history, -2)
+            else:
+                raise InitialParameterError()
 
+        self.reset()
         self.prepare_result_creation()
         result_data = {}
         for label, dataset_model in self.dataset_models.items():

diff --git a/glotaran/analysis/problem_grouped.py b/glotaran/analysis/problem_grouped.py
@@ -2,13 +2,14 @@
 
 import collections
 import itertools
+from typing import Any
 from typing import Deque
 
 import numpy as np
 import xarray as xr
 
 from glotaran.analysis.problem import GroupedProblemDescriptor
-from glotaran.analysis.problem import ParameterError
+from glotaran.analysis.problem import ParameterNotInitializedError
 from glotaran.analysis.problem import Problem
 from glotaran.analysis.problem import ProblemGroup
 from glotaran.analysis.util import CalculatedMatrix
@@ -192,7 +193,7 @@ def groups(self) -> dict[str, list[str]]:
 
     def calculate_matrices(self):
         if self._parameters is None:
-            raise ParameterError
+            raise ParameterNotInitializedError
         if self._index_dependent:
             self.calculate_index_dependent_matrices()
         else:
@@ -308,7 +309,7 @@ def _index_dependent_residual(
         problem: ProblemGroup,
         matrix: CalculatedMatrix,
         clp_labels: str,
-        index: any,
+        index: Any,
     ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
 
         reduced_clp_labels = matrix.clp_labels
@@ -338,7 +339,7 @@ def _index_dependent_residual(
         )
         return clp_labels, clps, weighted_residual, residual
 
-    def _index_independent_residual(self, problem: ProblemGroup, index: any):
+    def _index_independent_residual(self, problem: ProblemGroup, index: Any):
         matrix = self.reduced_matrices[problem.group]
         reduced_clp_labels = matrix.clp_labels
         matrix = matrix.matrix.copy()

diff --git a/glotaran/analysis/problem_ungrouped.py b/glotaran/analysis/problem_ungrouped.py
@@ -3,7 +3,7 @@
 import numpy as np
 import xarray as xr
 
-from glotaran.analysis.problem import ParameterError
+from glotaran.analysis.problem import ParameterNotInitializedError
 from glotaran.analysis.problem import Problem
 from glotaran.analysis.util import CalculatedMatrix
 from glotaran.analysis.util import apply_weight
@@ -51,7 +51,7 @@ def calculate_matrices(
     ]:
         """Calculates the model matrices."""
         if self._parameters is None:
-            raise ParameterError
+            raise ParameterNotInitializedError
 
         self._matrices = {}
         self._global_matrices = {}

diff --git a/glotaran/builtin/io/folder/folder_plugin.py b/glotaran/builtin/io/folder/folder_plugin.py
@@ -6,13 +6,19 @@
 
 from __future__ import annotations
 
-import os
+from pathlib import Path
 from typing import TYPE_CHECKING
 
+from glotaran.io import save_dataset
+from glotaran.io import save_model
+from glotaran.io import save_parameters
+from glotaran.io import save_scheme
 from glotaran.io.interface import ProjectIoInterface
+from glotaran.plugin_system.project_io_registration import SAVING_OPTIONS_DEFAULT
 from glotaran.plugin_system.project_io_registration import register_project_io
 
 if TYPE_CHECKING:
+    from glotaran.plugin_system.project_io_registration import SavingOptions
     from glotaran.project import Result
 
 
@@ -24,21 +30,39 @@ class FolderProjectIo(ProjectIoInterface):
     a markdown summary output and the important data saved to files.
     """
 
-    def save_result(self, result: Result, result_path: str) -> list[str]:
+    def save_result(
+        self,
+        result: Result,
+        result_path: str,
+        *,
+        saving_options: SavingOptions = SAVING_OPTIONS_DEFAULT,
+    ) -> list[str]:
         """Save the result to a given folder.
 
         Returns a list with paths of all saved items.
-        The following files are saved:
+        The following files are saved if not configured otherwise:
         * `result.md`: The result with the model formatted as markdown text.
+        * `model.yml`: Model spec file.
+        * `scheme.yml`: Scheme spec file.
+        * `initial_parameters.csv`: Initially used parameters.
         * `optimized_parameters.csv`: The optimized parameter as csv file.
+        * `parameter_history.csv`: Parameter changes over the optimization
         * `{dataset_label}.nc`: The result data for each dataset as NetCDF file.
 
+        Note
+        ----
+        As a side effect it populates the file path properties of ``result`` which can be
+        used in other plugins (e.g. the ``yml`` save_result).
+
         Parameters
         ----------
         result : Result
             Result instance to be saved.
         result_path : str
             The path to the folder in which to save the result.
+        saving_options : SavingOptions
+            Options for saving the the result.
+
 
         Returns
         -------
@@ -50,25 +74,64 @@ def save_result(self, result: Result, result_path: str) -> list[str]:
         ValueError
             If ``result_path`` is a file.
         """
-        if not os.path.exists(result_path):
-            os.makedirs(result_path)
-        if not os.path.isdir(result_path):
-            raise ValueError(f"The path '{result_path}' is not a directory.")
+        result_folder = Path(result_path)
+        if result_folder.is_file():
+            raise ValueError(f"The path '{result_folder}' is not a directory.")
+        result_folder.mkdir(parents=True, exist_ok=True)
 
         paths = []
-
-        md_path = os.path.join(result_path, "result.md")
-        with open(md_path, "w") as f:
-            f.write(str(result.markdown()))
-        paths.append(md_path)
-
-        csv_path = os.path.join(result_path, "optimized_parameters.csv")
-        result.optimized_parameters.to_csv(csv_path)
-        paths.append(csv_path)
-
-        for label, data in result.data.items():
-            nc_path = os.path.join(result_path, f"{label}.nc")
-            data.to_netcdf(nc_path, engine="netcdf4")
-            paths.append(nc_path)
+        if saving_options.report:
+            report_file = result_folder / "result.md"
+            report_file.write_text(str(result.markdown()))
+            paths.append(report_file.as_posix())
+
+        result.scheme.model_file = "model.yml"
+        save_model(
+            result.scheme.model, result_folder / result.scheme.model_file, allow_overwrite=True
+        )
+        paths.append((result_folder / result.scheme.model_file).as_posix())
+
+        result.initial_parameters_file = (
+            result.scheme.parameters_file
+        ) = f"initial_parameters.{saving_options.parameter_format}"
+        save_parameters(
+            result.scheme.parameters,
+            result_folder / result.scheme.parameters_file,
+            format_name=saving_options.parameter_format,
+            allow_overwrite=True,
+        )
+        paths.append((result_folder / result.scheme.parameters_file).as_posix())
+
+        result.optimized_parameters_file = (
+            f"optimized_parameters.{saving_options.parameter_format}"
+        )
+        save_parameters(
+            result.optimized_parameters,
+            result_folder / result.optimized_parameters_file,
+            format_name=saving_options.parameter_format,
+            allow_overwrite=True,
+        )
+        paths.append((result_folder / result.optimized_parameters_file).as_posix())
+
+        result.scheme_file = "scheme.yml"
+        save_scheme(result.scheme, result_folder / result.scheme_file, allow_overwrite=True)
+        paths.append((result_folder / result.scheme_file).as_posix())
+
+        result.parameter_history_file = "parameter_history.csv"
+        result.parameter_history.to_csv(result_folder / result.parameter_history_file)
+        paths.append((result_folder / result.parameter_history_file).as_posix())
+
+        result.data_files = {
+            label: f"{label}.{saving_options.data_format}" for label in result.data
+        }
+
+        for label, data_file in result.data_files.items():
+            save_dataset(
+                result.data[label],
+                result_folder / data_file,
+                format_name=saving_options.data_format,
+                allow_overwrite=True,
+            )
+            paths.append((result_folder / data_file).as_posix())
 
         return paths