From e80d397c49f2aebf7501e5cdb6648b0e7356f8b1 Mon Sep 17 00:00:00 2001 From: YoelPH Date: Mon, 11 Dec 2023 12:35:59 +0100 Subject: [PATCH 01/75] Add: First version of midline module added --- lymph/models/__init__.py | 3 +- lymph/models/bilateral.py | 4 +- lymph/models/midline.py | 411 +++++++++++++++++++++++++++++++ tests/binary_bilateral_test.py | 2 +- tests/binary_unilateral_test.py | 2 +- tests/trinary_unilateral_test.py | 2 +- 6 files changed, 418 insertions(+), 6 deletions(-) create mode 100644 lymph/models/midline.py diff --git a/lymph/models/__init__.py b/lymph/models/__init__.py index b129da0..5333065 100644 --- a/lymph/models/__init__.py +++ b/lymph/models/__init__.py @@ -4,5 +4,6 @@ from .bilateral import Bilateral from .unilateral import Unilateral +from .midline import Midline -__all__ = ["Unilateral", "Bilateral"] +__all__ = ["Unilateral", "Bilateral", "Midline"] diff --git a/lymph/models/bilateral.py b/lymph/models/bilateral.py index 61f737f..ba8d8c4 100644 --- a/lymph/models/bilateral.py +++ b/lymph/models/bilateral.py @@ -341,7 +341,7 @@ def assign_params( *new_params_args, **ipsi_kwargs, **general_kwargs ) remaining_args, remainings_kwargs = self.contra.assign_params( - *remaining_args, **contra_kwargs, **remainings_kwargs + *remaining_args, **contra_kwargs, **general_kwargs ) return remaining_args, remainings_kwargs @@ -602,7 +602,7 @@ def comp_posterior_joint_state_dist( joint_state_dist = self.comp_joint_state_dist(t_stage=t_stage, mode=mode) # matrix with P(Zi=zi,Zc=zc|Xi,Xc) * P(Xi,Xc) for all states Xi,Xc. joint_diagnose_and_state = ( - diagnose_given_state["ipsi"].T + diagnose_given_state["ipsi"][:, np.newaxis] * joint_state_dist * diagnose_given_state["contra"] ) diff --git a/lymph/models/midline.py b/lymph/models/midline.py new file mode 100644 index 0000000..4448ef0 --- /dev/null +++ b/lymph/models/midline.py @@ -0,0 +1,411 @@ +from __future__ import annotations +from argparse import OPTIONAL + +import logging +import warnings +from typing import Any, Iterable, Iterator + +import numpy as np +import pandas as pd + +from lymph import graph, matrix, modalities, models +from lymph.helper import ( + AbstractLookupDict, + DelegatorMixin, + DiagnoseType, + PatternType, + early_late_mapping, +) + +warnings.filterwarnings("ignore", category=pd.errors.PerformanceWarning) +logger = logging.getLogger(__name__) + + + +def create_property_sync_callback( + names: list[str], + this: graph.Edge, + other: graph.Edge, +) -> callable: + """Return func to sync property values whose name is in ``names`` btw two edges. + + The returned function is meant to be added to the list of callbacks of the + :py:class:`Edge` class, such that two edges in a mirrored pair of graphs are kept + in sync. + """ + def sync(): + # We must set the value of `this` property via the private name, otherwise + # we would trigger the setter's callbacks and may end up in an infinite loop. + for name in names: + private_name = f"_{name}" + setattr(other, private_name, getattr(this, name)) + + logger.debug(f"Created sync callback for properties {names} of {this.name} edge.") + return sync + + +def init_edge_sync( + property_names: list[str], + this_edge_list: list[graph.Edge], + other_edge_list: list[graph.Edge], +) -> None: + """Initialize the callbacks to sync properties btw. Edges. + + Implementing this as a separate method allows a user in theory to initialize + an arbitrary kind of symmetry between the two sides of the neck. + """ + this_edge_names = [e.name for e in this_edge_list] + other_edge_names = [e.name for e in other_edge_list] + + for edge_name in set(this_edge_names).intersection(other_edge_names): + this_edge = this_edge_list[this_edge_names.index(edge_name)] + other_edge = other_edge_list[other_edge_names.index(edge_name)] + + this_edge.trigger_callbacks.append( + create_property_sync_callback( + names=property_names, + this=this_edge, + other=other_edge, + ) + ) + other_edge.trigger_callbacks.append( + create_property_sync_callback( + names=property_names, + this=other_edge, + other=this_edge, + ) + ) + + +def init_dict_sync( + this: AbstractLookupDict, + other: AbstractLookupDict, +) -> None: + """Add callback to ``this`` to sync with ``other``.""" + def sync(): + other.clear() + other.update(this) + + this.trigger_callbacks.append(sync) + + +class Midline(DelegatorMixin): + """Model a bilateral lymphatic system where an additional risk factor can + be provided in the data: Whether or not the primary tumor extended over the + mid-sagittal line. + + It is reasonable to assume (and supported by data) that such an extension + significantly increases the risk for metastatic spread to the contralateral + side of the neck. This class attempts to capture this using a simple + assumption: We assume that the probability of spread to the contralateral + side for patients *with* midline extension is larger than for patients + *without* it, but smaller than the probability of spread to the ipsilateral + side. Formally: + + .. math:: + b_c^{\\in} = \\alpha \\cdot b_i + (1 - \\alpha) \\cdot b_c^{\\not\\in} + + where :math:`b_c^{\\in}` is the probability of spread from the primary tumor + to the contralateral side for patients with midline extension, and + :math:`b_c^{\\not\\in}` for patients without. :math:`\\alpha` is the linear + mixing parameter. + """ + def __init__( + self, + graph_dict: dict[tuple[str], list[str]], + use_mixing: bool = True, + trans_symmetric: bool = True, + **_kwargs + ): + """The class is constructed in a similar fashion to the + :class:`Bilateral`: That class contains one :class:`Unilateral` for + each side of the neck, while this class will contain two instances of + :class:`Bilateral`, one for the case of a midline extension and one for + the case of no midline extension. + + Args: + graph: Dictionary of the same kind as for initialization of + :class:`System`. This graph will be passed to the constructors of + two :class:`System` attributes of this class. + use_mixing: Describe the contralateral base spread probabilities for the + case of a midline extension as a linear combination between the base + spread probs of the ipsilateral side and the ones of the contralateral + side when no midline extension is present. + trans_symmetric: If ``True``, the spread probabilities among the + LNLs will be set symmetrically. + + See Also: + :class:`Bilateral`: Two of these are held as attributes by this + class. One for the case of a mid-sagittal extension of the primary + tumor and one for the case of no such extension. + """ + self.ext = models.Bilateral( + graph_dict=graph, tumor_spread_symmetric=False, lnl_spread_symmetric = trans_symmetric, modalities_symmetric = True, + ) + self.noext = models.Bilateral( + graph_dict=graph, tumor_spread_symmetric=False, lnl_spread_symmetric = trans_symmetric, modalities_symmetric = True, + ) + self.use_mixing = use_mixing + if self.use_mixing: + self.alpha_mix = 0. + + self.noext.diag_time_dists = self.ext.diag_time_dists + + def get_params( + self): + """Return the parameters of the model. + + Should be optimized ti fut tge actual code design + """ + + if self.use_mixing: + return np.concatenate([ + self.ext.ipsi.base_probs, + self.noext.contra.base_probs, + [self.alpha_mix],]) + else: + return np.concatenate([ + self.ext.ipsi.base_probs, + self.ext.contra.base_probs, + self.noext.contra.base_probs,]) + + + def assign_params( + self, + *new_params_args, + **new_params_kwargs, + ) -> tuple[Iterator[float, dict[str, float]]]: + """Assign new parameters to the model. + + This works almost exactly as the unilateral model's + :py:meth:`~lymph.models.Unilateral.assign_params` method. However, this one + allows the user to set the parameters of individual sides of the neck by + prefixing the parameter name with ``"ipsi_"`` or ``"contra_"``. This is + necessary for parameters that are not symmetric between the two sides of the + neck. For symmetric parameters, the prefix is not needed as they are directly + sent to the ipsilateral side, which then triggers a sync callback. + + Note: + When setting the parameters via positional arguments, the order is + important. The first ``len(self.ipsi.get_params(as_dict=True))`` arguments + are passed to the ipsilateral side, the remaining ones to the contralateral + side. + """ + if self.use_mixing: + ipsi_kwargs, contra_kwargs, general_kwargs = {}, {}, {} + for key, value in new_params_kwargs.items(): + if "ipsi_" in key: + ipsi_kwargs[key.replace("ipsi_", "")] = value + elif "contra_" in key: + contra_kwargs[key.replace("contra_", "")] = value + elif 'mixing' in key: + self.alpha_mix = value + else: + general_kwargs[key] = value + + remaining_args, remainings_kwargs = self.ext.ipsi.assign_params( + *new_params_args, **ipsi_kwargs, **general_kwargs + ) + remaining_args, remainings_kwargs = self.noext.contra.assign_params( + *remaining_args, **contra_kwargs, **remainings_kwargs + ) + else: + ipsi_kwargs, noext_contra_kwargs, ext_contra_kwargs, general_kwargs = {}, {}, {}, {} + + for key, value in new_params_kwargs.items(): + if "ipsi_" in key: + ipsi_kwargs[key.replace("ipsi_", "")] = value + elif "contra_noext" in key: + noext_contra_kwargs[key.replace("contra_noext", "")] = value + elif 'contra_ext' in key: + ext_contra_kwargs[key.replace("contra_ext", "")] = value + + else: + general_kwargs[key] = value + + remaining_args, remainings_kwargs = self.ext.ipsi.assign_params( + *new_params_args, **ipsi_kwargs, **general_kwargs + ) + remaining_args, remainings_kwargs = self.noext.contra.assign_params( + *remaining_args, **noext_contra_kwargs, **remainings_kwargs + ) + remaining_args, remainings_kwargs = self.ext.contra.assign_params( + *remaining_args, **ext_contra_kwargs, **remainings_kwargs + ) + return remaining_args, remainings_kwargs + + + @property + def modalities(self) -> modalities.ModalitiesUserDict: + """Return the set diagnostic modalities of the model. + + See Also: + :py:attr:`lymph.models.Unilateral.modalities` + The corresponding unilateral attribute. + :py:class:`~lymph.descriptors.ModalitiesUserDict` + The implementation of the descriptor class. + """ + if not self.modalities_symmetric: + raise AttributeError( + "The modalities are not symmetric. Please access them via the " + "`ipsi` or `contra` attributes." + ) + return self.ext.modalities + + @modalities.setter + def modalities(self, new_modalities) -> None: + """Set the diagnostic modalities of the model.""" + if not self.modalities_symmetric: + raise AttributeError( + "The modalities are not symmetric. Please set them via the " + "`ipsi` or `contra` attributes." + ) + self.ext.modalities = new_modalities + self.noext.modalities = new_modalities + + + def load_patient_data( + self, + patient_data: pd.DataFrame, + mapping: callable = early_late_mapping, + ) -> None: + """Load patient data into the model. + + This amounts to calling the :py:meth:`~lymph.models.Unilateral.load_patient_data` + method on both models. + """ + + ext_data = patient_data.loc[patient_data[("info", "tumor", "midline_extension")]] + noext_data = patient_data.loc[~patient_data[("info", "tumor", "midline_extension")]] + + + self.ext.load_patient_data( + ext_data) + self.noext.load_patient_data( + noext_data,) + self.ext.load_patient_data(ext_data, mapping) + self.noext.load_patient_data(noext_data, mapping) + + + def likelihood( + self, + data: OPTIONAL[pd.DataFrame] = None, + given_params: OPTIONAL[np.ndarray] = None, + log: bool = True, + ) -> float: + """Compute log-likelihood of (already stored) data, given the spread + probabilities and either a discrete diagnose time or a distribution to + use for marginalization over diagnose times. + + Args: + data: Table with rows of patients and columns of per-LNL involvment. See + :meth:`load_data` for more details on how this should look like. + + given_params: The likelihood is a function of these parameters. They mainly + consist of the :attr:`spread_probs` of the model. Any excess parameters + will be used to update the parametrized distributions used for + marginalizing over the diagnose times (see :attr:`diag_time_dists`). + + log: When ``True``, the log-likelihood is returned. + + Returns: + The log-likelihood :math:`\\log{p(D \\mid \\theta)}` where :math:`D` + is the data and :math:`\\theta` is the tuple of spread probabilities + and diagnose times or distributions over diagnose times. + + See Also: + :attr:`spread_probs`: Property for getting and setting the spread + probabilities, of which a lymphatic network has as many as it has + :class:`Edge` instances (in case no symmetries apply). + + :meth:`Unilateral.likelihood`: The log-likelihood function of + the unilateral system. + + :meth:`Bilateral.likelihood`: The (log-)likelihood function of the + bilateral system. + """ + if data is not None: + self.patient_data = data + + try: + self.assign_params(given_params) + except ValueError: + return -np.inf if log else 0. + + llh = 0. if log else 1. + + llh += self.ext._hmm_likelihood(log=log) + llh += self.noext._hmm_likelihood(log=log) + + + return llh + + + def risk( + self, + involvement: PatternType | None = None, + given_param_args: Iterable[float] | None = None, + given_param_kwargs: dict[str, float] | None = None, + given_diagnoses: dict[str, DiagnoseType] | None = None, + t_stage: str = "early", + midline_extension: bool = False, + mode: str = "HMM", + ) -> float: + """Compute the risk of nodal involvement given a specific diagnose. + + Args: + spread_probs: Set ot new spread parameters. This also contains the + mixing parameter alpha in the last position. + midline_extension: Whether or not the patient's tumor extends over + the mid-sagittal line. + + See Also: + :meth:`Bilateral.risk`: Depending on whether or not the patient's + tumor does extend over the midline, the risk function of the + respective :class:`Bilateral` instance gets called. + """ + if given_param_args is not None: + self.assign_params(*given_param_args) + if given_param_kwargs is not None: + self.assign_params(**given_param_kwargs) + + if midline_extension: + return self.ext.risk(given_diagnoses,t_stage = t_stage, involvement = involvement) + else: + return self.noext.risk(given_diagnoses,t_stage = t_stage, involvement = involvement) + + + + # def generate_dataset( + # self, + # num_patients: int, + # stage_dist: dict[str, float], + # ) -> pd.DataFrame: + # """Generate/sample a pandas :class:`DataFrame` from the defined network. + + # Args: + # num_patients: Number of patients to generate. + # stage_dist: Probability to find a patient in a certain T-stage. + # """ + # # TODO: check if this still works + # drawn_t_stages, drawn_diag_times = self.diag_time_dists.draw( + # dist=stage_dist, size=num_patients + # ) + + # drawn_obs_ipsi = self.ipsi._draw_patient_diagnoses(drawn_diag_times) + # drawn_obs_contra = self.contra._draw_patient_diagnoses(drawn_diag_times) + # drawn_obs = np.concatenate([drawn_obs_ipsi, drawn_obs_contra], axis=1) + + # # construct MultiIndex for dataset from stored modalities + # sides = ["ipsi", "contra"] + # modalities = list(self.modalities.keys()) + # lnl_names = [lnl.name for lnl in self.ipsi.graph._lnls] + # multi_cols = pd.MultiIndex.from_product([sides, modalities, lnl_names]) + + # # create DataFrame + # dataset = pd.DataFrame(drawn_obs, columns=multi_cols) + # dataset = dataset.reorder_levels(order=[1, 0, 2], axis="columns") + # dataset = dataset.sort_index(axis="columns", level=0) + # dataset[('info', 'tumor', 't_stage')] = drawn_t_stages + + # return dataset \ No newline at end of file diff --git a/tests/binary_bilateral_test.py b/tests/binary_bilateral_test.py index d3ba017..53bc4ea 100644 --- a/tests/binary_bilateral_test.py +++ b/tests/binary_bilateral_test.py @@ -3,7 +3,7 @@ """ import unittest -import fixtures +from tests import fixtures import numpy as np from lymph import models diff --git a/tests/binary_unilateral_test.py b/tests/binary_unilateral_test.py index cf80dc4..b9e8168 100644 --- a/tests/binary_unilateral_test.py +++ b/tests/binary_unilateral_test.py @@ -1,7 +1,7 @@ """Test the binary unilateral system.""" import unittest -import fixtures +from tests import fixtures import numpy as np from lymph.graph import LymphNodeLevel, Tumor diff --git a/tests/trinary_unilateral_test.py b/tests/trinary_unilateral_test.py index d6870f4..acedf83 100644 --- a/tests/trinary_unilateral_test.py +++ b/tests/trinary_unilateral_test.py @@ -1,7 +1,7 @@ """Test the trinary unilateral system.""" import unittest -import fixtures +from tests import fixtures import numpy as np import pandas as pd From de2bbeda83907ed667f7b0ee9ea0a20bd1d2064d Mon Sep 17 00:00:00 2001 From: YoelPH Date: Mon, 11 Dec 2023 15:31:02 +0100 Subject: [PATCH 02/75] change! adaptations to make package functional --- lymph/models/midline.py | 98 ++++++++++++++++++++++++----------------- 1 file changed, 58 insertions(+), 40 deletions(-) diff --git a/lymph/models/midline.py b/lymph/models/midline.py index 4448ef0..0bec926 100644 --- a/lymph/models/midline.py +++ b/lymph/models/midline.py @@ -114,7 +114,9 @@ def __init__( self, graph_dict: dict[tuple[str], list[str]], use_mixing: bool = True, + modalities_symmetric: bool = True, trans_symmetric: bool = True, + unilateral_kwargs: dict[str, Any] | None = None, **_kwargs ): """The class is constructed in a similar fashion to the @@ -139,17 +141,39 @@ def __init__( class. One for the case of a mid-sagittal extension of the primary tumor and one for the case of no such extension. """ + super().__init__() self.ext = models.Bilateral( - graph_dict=graph, tumor_spread_symmetric=False, lnl_spread_symmetric = trans_symmetric, modalities_symmetric = True, - ) + graph_dict=graph_dict, tumor_spread_symmetric=False, lnl_spread_symmetric = trans_symmetric, modalities_symmetric = True, unilateral_kwargs=unilateral_kwargs) self.noext = models.Bilateral( - graph_dict=graph, tumor_spread_symmetric=False, lnl_spread_symmetric = trans_symmetric, modalities_symmetric = True, - ) + graph_dict=graph_dict, tumor_spread_symmetric=False, lnl_spread_symmetric = trans_symmetric, modalities_symmetric = True, unilateral_kwargs=unilateral_kwargs) self.use_mixing = use_mixing + self.diag_time_dists = {} if self.use_mixing: self.alpha_mix = 0. - self.noext.diag_time_dists = self.ext.diag_time_dists + self.modalities_symmetric = modalities_symmetric + property_names = ["spread_prob"] + if self.ext.ipsi.graph.is_trinary: + property_names.append("micro_mod") + delegated_attrs = [ + "max_time", "t_stages", + "is_binary", "is_trinary", + ] + + init_dict_sync( + this=self.ext.ipsi.diag_time_dists, + other=self.noext.ipsi.diag_time_dists, + ) + + + if self.modalities_symmetric: + delegated_attrs.append("modalities") + init_dict_sync( + this=self.ext.modalities, + other=self.noext.modalities, + ) + + self.init_delegation(ext=delegated_attrs) def get_params( self): @@ -159,15 +183,14 @@ def get_params( """ if self.use_mixing: - return np.concatenate([ - self.ext.ipsi.base_probs, - self.noext.contra.base_probs, - [self.alpha_mix],]) + return {'ipsi': self.ext.ipsi.get_params(as_dict=True), + 'no extension contra':self.noext.contra.get_params(as_dict=True), + 'mixing':self.alpha_mix} else: - return np.concatenate([ - self.ext.ipsi.base_probs, - self.ext.contra.base_probs, - self.noext.contra.base_probs,]) + return { + 'ipsi':self.ext.ipsi.get_params(as_dict=True), + 'extension contra':self.ext.contra.get_params(as_dict=True), + 'no extension contra':self.noext.contra.get_params(as_dict=True)} def assign_params( @@ -192,23 +215,21 @@ def assign_params( side. """ if self.use_mixing: - ipsi_kwargs, contra_kwargs, general_kwargs = {}, {}, {} + extension_kwargs = {} + no_extension_kwargs = {} for key, value in new_params_kwargs.items(): - if "ipsi_" in key: - ipsi_kwargs[key.replace("ipsi_", "")] = value - elif "contra_" in key: - contra_kwargs[key.replace("contra_", "")] = value - elif 'mixing' in key: + if 'mixing' in key: self.alpha_mix = value else: - general_kwargs[key] = value + no_extension_kwargs[key] = value + remaining_args, remainings_kwargs = self.noext.assign_params(*new_params_args, **no_extension_kwargs) + for key in no_extension_kwargs.keys(): + if 'contra_primary' in key: + extension_kwargs[key] = self.alpha_mix * extension_kwargs[(key.replace("contra", "ipsi"))] + (1. - self.alpha_mix) * no_extension_kwargs[key] + else: + extension_kwargs[key] = no_extension_kwargs[key] + remaining_args, remainings_kwargs = self.ext.assign_params(*remaining_args, **extension_kwargs) - remaining_args, remainings_kwargs = self.ext.ipsi.assign_params( - *new_params_args, **ipsi_kwargs, **general_kwargs - ) - remaining_args, remainings_kwargs = self.noext.contra.assign_params( - *remaining_args, **contra_kwargs, **remainings_kwargs - ) else: ipsi_kwargs, noext_contra_kwargs, ext_contra_kwargs, general_kwargs = {}, {}, {}, {} @@ -275,14 +296,9 @@ def load_patient_data( method on both models. """ - ext_data = patient_data.loc[patient_data[("info", "tumor", "midline_extension")]] - noext_data = patient_data.loc[~patient_data[("info", "tumor", "midline_extension")]] + ext_data = patient_data.loc[patient_data[("tumor", "1", "extension")]] + noext_data = patient_data.loc[~patient_data[("tumor", "1", "extension")]] - - self.ext.load_patient_data( - ext_data) - self.noext.load_patient_data( - noext_data,) self.ext.load_patient_data(ext_data, mapping) self.noext.load_patient_data(noext_data, mapping) @@ -290,7 +306,7 @@ def load_patient_data( def likelihood( self, data: OPTIONAL[pd.DataFrame] = None, - given_params: OPTIONAL[np.ndarray] = None, + given_param_kwargs: dict[str, float] | None = None, log: bool = True, ) -> float: """Compute log-likelihood of (already stored) data, given the spread @@ -327,15 +343,18 @@ def likelihood( if data is not None: self.patient_data = data + if given_param_kwargs is None: + given_param_kwargs = {} + try: - self.assign_params(given_params) + self.assign_params(**given_param_kwargs) except ValueError: return -np.inf if log else 0. llh = 0. if log else 1. + llh += self.ext.likelihood(log = log) + llh += self.noext.likelihood(log=log) - llh += self.ext._hmm_likelihood(log=log) - llh += self.noext._hmm_likelihood(log=log) return llh @@ -368,11 +387,10 @@ def risk( self.assign_params(*given_param_args) if given_param_kwargs is not None: self.assign_params(**given_param_kwargs) - if midline_extension: - return self.ext.risk(given_diagnoses,t_stage = t_stage, involvement = involvement) + return self.ext.risk(given_diagnoses = given_diagnoses,t_stage = t_stage, involvement = involvement) else: - return self.noext.risk(given_diagnoses,t_stage = t_stage, involvement = involvement) + return self.noext.risk(given_diagnoses = given_diagnoses,t_stage = t_stage, involvement = involvement) From 28282da5b17eb424d57751e542ad3919f875ccdc Mon Sep 17 00:00:00 2001 From: YoelPH Date: Wed, 13 Dec 2023 11:24:55 +0100 Subject: [PATCH 03/75] change! first adaptations to implement a central model --- lymph/models/midline.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/lymph/models/midline.py b/lymph/models/midline.py index 0bec926..1ae501b 100644 --- a/lymph/models/midline.py +++ b/lymph/models/midline.py @@ -143,9 +143,11 @@ def __init__( """ super().__init__() self.ext = models.Bilateral( - graph_dict=graph_dict, tumor_spread_symmetric=False, lnl_spread_symmetric = trans_symmetric, modalities_symmetric = True, unilateral_kwargs=unilateral_kwargs) + graph_dict=graph_dict, tumor_spread_symmetric=False, lnl_spread_symmetric = trans_symmetric, modalities_symmetric = modalities_symmetric, unilateral_kwargs=unilateral_kwargs) self.noext = models.Bilateral( - graph_dict=graph_dict, tumor_spread_symmetric=False, lnl_spread_symmetric = trans_symmetric, modalities_symmetric = True, unilateral_kwargs=unilateral_kwargs) + graph_dict=graph_dict, tumor_spread_symmetric=False, lnl_spread_symmetric = trans_symmetric, modalities_symmetric = modalities_symmetric, unilateral_kwargs=unilateral_kwargs) + self.central = models.Bilateral( + graph_dict=graph_dict, tumor_spread_symmetric=True, lnl_spread_symmetric = trans_symmetric, modalities_symmetric = modalities_symmetric, unilateral_kwargs=unilateral_kwargs) self.use_mixing = use_mixing self.diag_time_dists = {} if self.use_mixing: @@ -164,7 +166,10 @@ def __init__( this=self.ext.ipsi.diag_time_dists, other=self.noext.ipsi.diag_time_dists, ) - + init_dict_sync( + this=self.ext.ipsi.diag_time_dists, + other=self.central.ipsi.diag_time_dists + ) if self.modalities_symmetric: delegated_attrs.append("modalities") @@ -172,6 +177,11 @@ def __init__( this=self.ext.modalities, other=self.noext.modalities, ) + delegated_attrs.append("modalities") + init_dict_sync( + this=self.ext.modalities, + other=self.central.modalities, + ) self.init_delegation(ext=delegated_attrs) From 741e83914ec17cc23147db3363d6e97c7626aae0 Mon Sep 17 00:00:00 2001 From: YoelPH Date: Fri, 15 Dec 2023 16:42:06 +0100 Subject: [PATCH 04/75] change: Added central tumor location A first version has been produced with central tumor locations that seems to work out. Further refinement of the module comming in next push --- lymph/models/midline.py | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/lymph/models/midline.py b/lymph/models/midline.py index 1ae501b..ae91d5d 100644 --- a/lymph/models/midline.py +++ b/lymph/models/midline.py @@ -142,12 +142,10 @@ def __init__( tumor and one for the case of no such extension. """ super().__init__() - self.ext = models.Bilateral( - graph_dict=graph_dict, tumor_spread_symmetric=False, lnl_spread_symmetric = trans_symmetric, modalities_symmetric = modalities_symmetric, unilateral_kwargs=unilateral_kwargs) - self.noext = models.Bilateral( - graph_dict=graph_dict, tumor_spread_symmetric=False, lnl_spread_symmetric = trans_symmetric, modalities_symmetric = modalities_symmetric, unilateral_kwargs=unilateral_kwargs) - self.central = models.Bilateral( - graph_dict=graph_dict, tumor_spread_symmetric=True, lnl_spread_symmetric = trans_symmetric, modalities_symmetric = modalities_symmetric, unilateral_kwargs=unilateral_kwargs) + self.ext = models.Bilateral(graph_dict= graph_dict,unilateral_kwargs=unilateral_kwargs, is_symmetric={'tumor_spread':True, "modalities": modalities_symmetric, "lnl_spread":trans_symmetric}) + self.noext = models.Bilateral(graph_dict= graph_dict,unilateral_kwargs=unilateral_kwargs, is_symmetric={'tumor_spread':True, "modalities": modalities_symmetric, "lnl_spread":trans_symmetric}) + self.central = models.Bilateral(graph_dict= graph_dict,unilateral_kwargs=unilateral_kwargs, is_symmetric={'tumor_spread':True, "modalities": modalities_symmetric, "lnl_spread":trans_symmetric}) + self.use_mixing = use_mixing self.diag_time_dists = {} if self.use_mixing: @@ -167,7 +165,7 @@ def __init__( other=self.noext.ipsi.diag_time_dists, ) init_dict_sync( - this=self.ext.ipsi.diag_time_dists, + this=self.noext.ipsi.diag_time_dists, other=self.central.ipsi.diag_time_dists ) @@ -179,7 +177,7 @@ def __init__( ) delegated_attrs.append("modalities") init_dict_sync( - this=self.ext.modalities, + this=self.noext.modalities, other=self.central.modalities, ) @@ -227,6 +225,7 @@ def assign_params( if self.use_mixing: extension_kwargs = {} no_extension_kwargs = {} + central_kwargs = {} for key, value in new_params_kwargs.items(): if 'mixing' in key: self.alpha_mix = value @@ -239,7 +238,12 @@ def assign_params( else: extension_kwargs[key] = no_extension_kwargs[key] remaining_args, remainings_kwargs = self.ext.assign_params(*remaining_args, **extension_kwargs) + for key in no_extension_kwargs.keys(): + if 'contra' not in key: + central_kwargs[key] = no_extension_kwargs[key] + remaining_args, remainings_kwargs = self.central.assign_params(*new_params_args, **central_kwargs) +#this part is not tested yet or properly implemented else: ipsi_kwargs, noext_contra_kwargs, ext_contra_kwargs, general_kwargs = {}, {}, {}, {} @@ -251,6 +255,7 @@ def assign_params( elif 'contra_ext' in key: ext_contra_kwargs[key.replace("contra_ext", "")] = value + else: general_kwargs[key] = value @@ -293,6 +298,7 @@ def modalities(self, new_modalities) -> None: ) self.ext.modalities = new_modalities self.noext.modalities = new_modalities + self.central.modalities = new_modalities def load_patient_data( @@ -306,11 +312,13 @@ def load_patient_data( method on both models. """ - ext_data = patient_data.loc[patient_data[("tumor", "1", "extension")]] + ext_data = patient_data.loc[(patient_data[("tumor", "1", "extension")] == True) & (patient_data[("tumor", "1", "central")] != True)] noext_data = patient_data.loc[~patient_data[("tumor", "1", "extension")]] + central = patient_data[patient_data[("tumor", "1", "central")].notna() & patient_data[("tumor", "1", "central")]] self.ext.load_patient_data(ext_data, mapping) self.noext.load_patient_data(noext_data, mapping) + self.central.load_patient_data(central, mapping) def likelihood( @@ -363,7 +371,8 @@ def likelihood( llh = 0. if log else 1. llh += self.ext.likelihood(log = log) - llh += self.noext.likelihood(log=log) + llh += self.noext.likelihood(log = log) + llh += self.central.likelihood(log = log) @@ -378,6 +387,7 @@ def risk( given_diagnoses: dict[str, DiagnoseType] | None = None, t_stage: str = "early", midline_extension: bool = False, + central: bool = False, mode: str = "HMM", ) -> float: """Compute the risk of nodal involvement given a specific diagnose. @@ -397,10 +407,11 @@ def risk( self.assign_params(*given_param_args) if given_param_kwargs is not None: self.assign_params(**given_param_kwargs) + if central: + return self.central.risk(given_diagnoses = given_diagnoses,t_stage = t_stage, involvement = involvement) if midline_extension: return self.ext.risk(given_diagnoses = given_diagnoses,t_stage = t_stage, involvement = involvement) - else: - return self.noext.risk(given_diagnoses = given_diagnoses,t_stage = t_stage, involvement = involvement) + return self.noext.risk(given_diagnoses = given_diagnoses,t_stage = t_stage, involvement = involvement) From 112e677612aa0de13b5a562c7ccb1ffde0334db7 Mon Sep 17 00:00:00 2001 From: YoelPH Date: Tue, 19 Dec 2023 10:37:46 +0100 Subject: [PATCH 05/75] change! further expand functionality of midline The midline model was further expanded to have similar funcitonalities as the old model. Additionally, the code style was made more conformal with the bilateral module. Right now the code is not functional. --- lymph/graph.py | 2 +- lymph/models/midline.py | 94 +++++++++++++++++++++++++++-------------- 2 files changed, 64 insertions(+), 32 deletions(-) diff --git a/lymph/graph.py b/lymph/graph.py index 609e45c..d510c3f 100644 --- a/lymph/graph.py +++ b/lymph/graph.py @@ -661,7 +661,7 @@ def get_mermaid(self) -> str: mermaid_graph = "flowchart TD\n" for idx, node in enumerate(self.nodes): - for edge in self.nodes[idx].out: + for edge in self.nodes[node].out: mermaid_graph += f"\t{node.name}-->|{edge.spread_prob:.0%}| {edge.child.name}\n" return mermaid_graph diff --git a/lymph/models/midline.py b/lymph/models/midline.py index ae91d5d..c0d79e0 100644 --- a/lymph/models/midline.py +++ b/lymph/models/midline.py @@ -43,7 +43,7 @@ def sync(): logger.debug(f"Created sync callback for properties {names} of {this.name} edge.") return sync - +# this here could probably be used to sync the edges for the different bilateral classes if we want to keep on using it def init_edge_sync( property_names: list[str], this_edge_list: list[graph.Edge], @@ -117,6 +117,7 @@ def __init__( modalities_symmetric: bool = True, trans_symmetric: bool = True, unilateral_kwargs: dict[str, Any] | None = None, + central_enabled: bool = True, **_kwargs ): """The class is constructed in a similar fashion to the @@ -142,9 +143,11 @@ def __init__( tumor and one for the case of no such extension. """ super().__init__() + self.central_enabled = central_enabled self.ext = models.Bilateral(graph_dict= graph_dict,unilateral_kwargs=unilateral_kwargs, is_symmetric={'tumor_spread':True, "modalities": modalities_symmetric, "lnl_spread":trans_symmetric}) self.noext = models.Bilateral(graph_dict= graph_dict,unilateral_kwargs=unilateral_kwargs, is_symmetric={'tumor_spread':True, "modalities": modalities_symmetric, "lnl_spread":trans_symmetric}) - self.central = models.Bilateral(graph_dict= graph_dict,unilateral_kwargs=unilateral_kwargs, is_symmetric={'tumor_spread':True, "modalities": modalities_symmetric, "lnl_spread":trans_symmetric}) + if self.central_enabled: + self.central = models.Bilateral(graph_dict= graph_dict,unilateral_kwargs=unilateral_kwargs, is_symmetric={'tumor_spread':True, "modalities": modalities_symmetric, "lnl_spread":trans_symmetric}) self.use_mixing = use_mixing self.diag_time_dists = {} @@ -164,10 +167,11 @@ def __init__( this=self.ext.ipsi.diag_time_dists, other=self.noext.ipsi.diag_time_dists, ) - init_dict_sync( - this=self.noext.ipsi.diag_time_dists, - other=self.central.ipsi.diag_time_dists - ) + if central_enabled: + init_dict_sync( + this=self.noext.ipsi.diag_time_dists, + other=self.central.ipsi.diag_time_dists + ) if self.modalities_symmetric: delegated_attrs.append("modalities") @@ -175,14 +179,51 @@ def __init__( this=self.ext.modalities, other=self.noext.modalities, ) - delegated_attrs.append("modalities") - init_dict_sync( - this=self.noext.modalities, - other=self.central.modalities, - ) - + if central_enabled: + delegated_attrs.append("modalities") + init_dict_sync( + this=self.noext.modalities, + other=self.central.modalities, + ) + self.init_synchronization() self.init_delegation(ext=delegated_attrs) + def init_synchronization(self) -> None: + """Initialize the synchronization of edges, modalities, and diagnose times.""" + # Sync spread probabilities + property_names = ["spread_prob", "micro_mod"] if self.noext.ipsi.is_trinary else ["spread_prob"] + noext_ipsi_tumor_edges = list(self.noext.ipsi.graph.tumor_edges.values()) + noext_ipsi_lnl_edges = list(self.noext.ipsi.graph.lnl_edges.values()) + noext_ipsi_edges = ( + noext_ipsi_tumor_edges + noext_ipsi_lnl_edges + ) + ext_ipsi_tumor_edges = list(self.ext.ipsi.graph.tumor_edges.values()) + ext_ipsi_lnl_edges = list(self.ext.ipsi.graph.lnl_edges.values()) + ext_ipsi_edges = ( + ext_ipsi_tumor_edges + + ext_ipsi_lnl_edges + ) + + + init_edge_sync( + property_names=property_names, + this_edges=noext_ipsi_edges, + other_edges=ext_ipsi_edges, + ) + + if self.central_enabled: + central_ipsi_tumor_edges = list(self.central.ipsi.graph.tumor_edges.values()) + central_ipsi_lnl_edges = list(self.central.ipsi.graph.lnl_edges.values()) + central_ipsi_edges = ( + central_ipsi_tumor_edges + + central_ipsi_lnl_edges + ) + init_edge_sync( + property_names=property_names, + this_edges=noext_ipsi_edges, + other_edges=central_ipsi_edges, + ) + def get_params( self): """Return the parameters of the model. @@ -208,19 +249,11 @@ def assign_params( ) -> tuple[Iterator[float, dict[str, float]]]: """Assign new parameters to the model. - This works almost exactly as the unilateral model's - :py:meth:`~lymph.models.Unilateral.assign_params` method. However, this one - allows the user to set the parameters of individual sides of the neck by - prefixing the parameter name with ``"ipsi_"`` or ``"contra_"``. This is - necessary for parameters that are not symmetric between the two sides of the - neck. For symmetric parameters, the prefix is not needed as they are directly - sent to the ipsilateral side, which then triggers a sync callback. - - Note: - When setting the parameters via positional arguments, the order is - important. The first ``len(self.ipsi.get_params(as_dict=True))`` arguments - are passed to the ipsilateral side, the remaining ones to the contralateral - side. + This works almost exactly as the bilateral model's + :py:meth:`~lymph.models.Bilateral.assign_params` method. However the assignment of parametrs + with an array is disabled as it gets to messy with such a large parameter space. + For universal parameters, the prefix is not needed as they are directly + sent to the noextension ipsilateral side, which then triggers a sync callback. """ if self.use_mixing: extension_kwargs = {} @@ -238,10 +271,11 @@ def assign_params( else: extension_kwargs[key] = no_extension_kwargs[key] remaining_args, remainings_kwargs = self.ext.assign_params(*remaining_args, **extension_kwargs) - for key in no_extension_kwargs.keys(): - if 'contra' not in key: - central_kwargs[key] = no_extension_kwargs[key] - remaining_args, remainings_kwargs = self.central.assign_params(*new_params_args, **central_kwargs) + if self.central_enabled: + for key in no_extension_kwargs.keys(): + if 'contra' not in key: + central_kwargs[key] = no_extension_kwargs[key] + remaining_args, remainings_kwargs = self.central.assign_params(*new_params_args, **central_kwargs) #this part is not tested yet or properly implemented else: @@ -254,8 +288,6 @@ def assign_params( noext_contra_kwargs[key.replace("contra_noext", "")] = value elif 'contra_ext' in key: ext_contra_kwargs[key.replace("contra_ext", "")] = value - - else: general_kwargs[key] = value From 2dfb966316f436a2a8f3bfaaf26bcd3d27aee126 Mon Sep 17 00:00:00 2001 From: YoelPH Date: Fri, 5 Jan 2024 17:10:48 +0100 Subject: [PATCH 06/75] fix fixed a typo in the midline model --- lymph/models/midline.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lymph/models/midline.py b/lymph/models/midline.py index c0d79e0..745fa6a 100644 --- a/lymph/models/midline.py +++ b/lymph/models/midline.py @@ -144,8 +144,8 @@ def __init__( """ super().__init__() self.central_enabled = central_enabled - self.ext = models.Bilateral(graph_dict= graph_dict,unilateral_kwargs=unilateral_kwargs, is_symmetric={'tumor_spread':True, "modalities": modalities_symmetric, "lnl_spread":trans_symmetric}) - self.noext = models.Bilateral(graph_dict= graph_dict,unilateral_kwargs=unilateral_kwargs, is_symmetric={'tumor_spread':True, "modalities": modalities_symmetric, "lnl_spread":trans_symmetric}) + self.ext = models.Bilateral(graph_dict= graph_dict,unilateral_kwargs=unilateral_kwargs, is_symmetric={'tumor_spread':False, "modalities": modalities_symmetric, "lnl_spread":trans_symmetric}) + self.noext = models.Bilateral(graph_dict= graph_dict,unilateral_kwargs=unilateral_kwargs, is_symmetric={'tumor_spread':False, "modalities": modalities_symmetric, "lnl_spread":trans_symmetric}) if self.central_enabled: self.central = models.Bilateral(graph_dict= graph_dict,unilateral_kwargs=unilateral_kwargs, is_symmetric={'tumor_spread':True, "modalities": modalities_symmetric, "lnl_spread":trans_symmetric}) @@ -207,8 +207,8 @@ def init_synchronization(self) -> None: init_edge_sync( property_names=property_names, - this_edges=noext_ipsi_edges, - other_edges=ext_ipsi_edges, + this_edge_list=noext_ipsi_edges, + other_edge_list=ext_ipsi_edges, ) if self.central_enabled: @@ -220,8 +220,8 @@ def init_synchronization(self) -> None: ) init_edge_sync( property_names=property_names, - this_edges=noext_ipsi_edges, - other_edges=central_ipsi_edges, + this_edge_list=noext_ipsi_edges, + other_edge_list=central_ipsi_edges, ) def get_params( @@ -232,7 +232,7 @@ def get_params( """ if self.use_mixing: - return {'ipsi': self.ext.ipsi.get_params(as_dict=True), + return {'ipsi': self.noext.ipsi.get_params(as_dict=True), 'no extension contra':self.noext.contra.get_params(as_dict=True), 'mixing':self.alpha_mix} else: From fb738bc2d1b31764caa23b55b8047aac0ce32fba Mon Sep 17 00:00:00 2001 From: YoelPH Date: Tue, 16 Jan 2024 09:29:47 +0100 Subject: [PATCH 07/75] fix(bil): added new modality sync The new modality sync makes sure that both models stay updated --- lymph/models/bilateral.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/lymph/models/bilateral.py b/lymph/models/bilateral.py index bb49cf6..7cd0af5 100644 --- a/lymph/models/bilateral.py +++ b/lymph/models/bilateral.py @@ -112,6 +112,16 @@ def init_dict_sync( create_lookupdict_sync_callback(this=other, other=this) ) +def init_dict_sync2( + this: AbstractLookupDict, + other: AbstractLookupDict, +) -> None: + """Add callback to ``this`` to sync with ``other``.""" + def sync(): + other.clear() + other.update(this) + + this.trigger_callbacks.append(sync) class Bilateral(DelegatorMixin): @@ -234,16 +244,15 @@ def init_synchronization(self) -> None: ipsi_tumor_edges = list(self.ipsi.graph.tumor_edges.values()) ipsi_lnl_edges = list(self.ipsi.graph.lnl_edges.values()) ipsi_edges = ( - ipsi_tumor_edges if self.is_symmetric["tumor_spread"] else [] - + ipsi_lnl_edges if self.is_symmetric["lnl_spread"] else [] + (ipsi_tumor_edges if self.is_symmetric["tumor_spread"] else []) + + (ipsi_lnl_edges if self.is_symmetric["lnl_spread"] else []) ) contra_tumor_edges = list(self.contra.graph.tumor_edges.values()) contra_lnl_edges = list(self.contra.graph.lnl_edges.values()) contra_edges = ( - contra_tumor_edges if self.is_symmetric["tumor_spread"] else [] - + contra_lnl_edges if self.is_symmetric["lnl_spread"] else [] + (contra_tumor_edges if self.is_symmetric["tumor_spread"] else []) + + (contra_lnl_edges if self.is_symmetric["lnl_spread"] else []) ) - init_edge_sync( property_names=property_names, this_edges=ipsi_edges, @@ -252,7 +261,7 @@ def init_synchronization(self) -> None: # Sync modalities if self.is_symmetric["modalities"]: - init_dict_sync( + init_dict_sync2( this=self.ipsi.modalities, other=self.contra.modalities, ) From c745675439f768c45398c4ca49389f89b265b1ad Mon Sep 17 00:00:00 2001 From: YoelPH Date: Tue, 16 Jan 2024 09:30:49 +0100 Subject: [PATCH 08/75] fix(mid): changed key assignment New key assignment for midline model where 'ipsi' is removed to ensure correct assignment --- lymph/models/midline.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/lymph/models/midline.py b/lymph/models/midline.py index 745fa6a..306f599 100644 --- a/lymph/models/midline.py +++ b/lymph/models/midline.py @@ -180,7 +180,7 @@ def __init__( other=self.noext.modalities, ) if central_enabled: - delegated_attrs.append("modalities") + # delegated_attrs.append("modalities") init_dict_sync( this=self.noext.modalities, other=self.central.modalities, @@ -211,18 +211,18 @@ def init_synchronization(self) -> None: other_edge_list=ext_ipsi_edges, ) - if self.central_enabled: - central_ipsi_tumor_edges = list(self.central.ipsi.graph.tumor_edges.values()) - central_ipsi_lnl_edges = list(self.central.ipsi.graph.lnl_edges.values()) - central_ipsi_edges = ( - central_ipsi_tumor_edges - + central_ipsi_lnl_edges - ) - init_edge_sync( - property_names=property_names, - this_edge_list=noext_ipsi_edges, - other_edge_list=central_ipsi_edges, - ) + # if self.central_enabled: + # central_ipsi_tumor_edges = list(self.central.ipsi.graph.tumor_edges.values()) + # central_ipsi_lnl_edges = list(self.central.ipsi.graph.lnl_edges.values()) + # central_ipsi_edges = ( + # central_ipsi_tumor_edges + # + central_ipsi_lnl_edges + # ) + # init_edge_sync( + # property_names=property_names, + # this_edge_list=noext_ipsi_edges, + # other_edge_list=central_ipsi_edges, + # ) def get_params( self): @@ -274,7 +274,7 @@ def assign_params( if self.central_enabled: for key in no_extension_kwargs.keys(): if 'contra' not in key: - central_kwargs[key] = no_extension_kwargs[key] + central_kwargs[(key.replace("ipsi_", ""))] = no_extension_kwargs[key] remaining_args, remainings_kwargs = self.central.assign_params(*new_params_args, **central_kwargs) #this part is not tested yet or properly implemented From b8aa199f57516e6709938aace4cb7f23ac261b35 Mon Sep 17 00:00:00 2001 From: YoelPH Date: Tue, 23 Jan 2024 08:54:43 +0100 Subject: [PATCH 09/75] change: Non-mixture midline implemented fixed the non mixture midline extension model and added documentation --- lymph/models/midline.py | 65 ++++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 23 deletions(-) diff --git a/lymph/models/midline.py b/lymph/models/midline.py index 306f599..b4354e9 100644 --- a/lymph/models/midline.py +++ b/lymph/models/midline.py @@ -92,7 +92,7 @@ def sync(): class Midline(DelegatorMixin): """Model a bilateral lymphatic system where an additional risk factor can be provided in the data: Whether or not the primary tumor extended over the - mid-sagittal line. + mid-sagittal line, or is located on the mid-saggital line. It is reasonable to assume (and supported by data) that such an extension significantly increases the risk for metastatic spread to the contralateral @@ -122,9 +122,9 @@ def __init__( ): """The class is constructed in a similar fashion to the :class:`Bilateral`: That class contains one :class:`Unilateral` for - each side of the neck, while this class will contain two instances of - :class:`Bilateral`, one for the case of a midline extension and one for - the case of no midline extension. + each side of the neck, while this class will contain two/three instances of + :class:`Bilateral`, one for the case of a midline extension, one for + the case of no midline extension and one for the tumors on the midline. Args: graph: Dictionary of the same kind as for initialization of @@ -180,7 +180,6 @@ def __init__( other=self.noext.modalities, ) if central_enabled: - # delegated_attrs.append("modalities") init_dict_sync( this=self.noext.modalities, other=self.central.modalities, @@ -211,6 +210,8 @@ def init_synchronization(self) -> None: other_edge_list=ext_ipsi_edges, ) + #The syncing below does not work properly. The ipsilateral central side is synced, but the contralateral central side is not synced. It seems like no callback is initiated when syncing in this manner + # if self.central_enabled: # central_ipsi_tumor_edges = list(self.central.ipsi.graph.tumor_edges.values()) # central_ipsi_lnl_edges = list(self.central.ipsi.graph.lnl_edges.values()) @@ -227,8 +228,7 @@ def init_synchronization(self) -> None: def get_params( self): """Return the parameters of the model. - - Should be optimized ti fut tge actual code design + Parameters are only returned as dictionary. """ if self.use_mixing: @@ -271,15 +271,16 @@ def assign_params( else: extension_kwargs[key] = no_extension_kwargs[key] remaining_args, remainings_kwargs = self.ext.assign_params(*remaining_args, **extension_kwargs) + # If the syncing of the edges works properly, this below can be deleted. if self.central_enabled: for key in no_extension_kwargs.keys(): if 'contra' not in key: central_kwargs[(key.replace("ipsi_", ""))] = no_extension_kwargs[key] remaining_args, remainings_kwargs = self.central.assign_params(*new_params_args, **central_kwargs) -#this part is not tested yet or properly implemented +#this part is not fully tested yet else: - ipsi_kwargs, noext_contra_kwargs, ext_contra_kwargs, general_kwargs = {}, {}, {}, {} + ipsi_kwargs, noext_contra_kwargs, ext_contra_kwargs, general_kwargs, central_kwargs = {}, {}, {}, {}, {} for key, value in new_params_kwargs.items(): if "ipsi_" in key: @@ -289,7 +290,12 @@ def assign_params( elif 'contra_ext' in key: ext_contra_kwargs[key.replace("contra_ext", "")] = value else: - general_kwargs[key] = value + if 'contra' in key: + warnings.warn( + "'contra' keys were assigned without 'ext' or 'noext' defined. For a non-mixture model" + "For a non mixture model these values have no meaning.") + else: + general_kwargs[key] = value remaining_args, remainings_kwargs = self.ext.ipsi.assign_params( *new_params_args, **ipsi_kwargs, **general_kwargs @@ -300,6 +306,13 @@ def assign_params( remaining_args, remainings_kwargs = self.ext.contra.assign_params( *remaining_args, **ext_contra_kwargs, **remainings_kwargs ) + if self.central_enabled: + for key in ipsi_kwargs.keys(): + central_kwargs[(key.replace("ipsi_", ""))] = ipsi_kwargs[key] + print(ipsi_kwargs) + print(general_kwargs) + remaining_args, remainings_kwargs = self.central.assign_params(*new_params_args, **central_kwargs, **general_kwargs) + return remaining_args, remainings_kwargs @@ -329,8 +342,6 @@ def modalities(self, new_modalities) -> None: "`ipsi` or `contra` attributes." ) self.ext.modalities = new_modalities - self.noext.modalities = new_modalities - self.central.modalities = new_modalities def load_patient_data( @@ -343,14 +354,16 @@ def load_patient_data( This amounts to calling the :py:meth:`~lymph.models.Unilateral.load_patient_data` method on both models. """ - - ext_data = patient_data.loc[(patient_data[("tumor", "1", "extension")] == True) & (patient_data[("tumor", "1", "central")] != True)] - noext_data = patient_data.loc[~patient_data[("tumor", "1", "extension")]] - central = patient_data[patient_data[("tumor", "1", "central")].notna() & patient_data[("tumor", "1", "central")]] - + if self.central_enabled: + ext_data = patient_data.loc[(patient_data[("tumor", "1", "extension")] == True) & (patient_data[("tumor", "1", "central")] != True)] + noext_data = patient_data.loc[~patient_data[("tumor", "1", "extension")]] + central = patient_data[patient_data[("tumor", "1", "central")].notna() & patient_data[("tumor", "1", "central")]] + self.central.load_patient_data(central, mapping) + else: + ext_data = patient_data.loc[(patient_data[("tumor", "1", "extension")] == True)] + noext_data = patient_data.loc[~patient_data[("tumor", "1", "extension")]] self.ext.load_patient_data(ext_data, mapping) self.noext.load_patient_data(noext_data, mapping) - self.central.load_patient_data(central, mapping) def likelihood( @@ -358,6 +371,7 @@ def likelihood( data: OPTIONAL[pd.DataFrame] = None, given_param_kwargs: dict[str, float] | None = None, log: bool = True, + mode: str = 'HMM' ) -> float: """Compute log-likelihood of (already stored) data, given the spread probabilities and either a discrete diagnose time or a distribution to @@ -402,11 +416,16 @@ def likelihood( return -np.inf if log else 0. llh = 0. if log else 1. - llh += self.ext.likelihood(log = log) - llh += self.noext.likelihood(log = log) - llh += self.central.likelihood(log = log) - - + if log: + llh += self.ext.likelihood(log = log, mode = mode) + llh += self.noext.likelihood(log = log, mode = mode) + if self.central_enabled: + llh += self.central.likelihood(log = log, mode = mode) + else: + llh *= self.ext.likelihood(log = log, mode = mode) + llh *= self.noext.likelihood(log = log, mode = mode) + if self.central_enabled: + llh *= self.central.likelihood(log = log, mode = mode) return llh From d4d5f42239494762bce8544a3285acdf84b37554 Mon Sep 17 00:00:00 2001 From: YoelPH Date: Tue, 23 Jan 2024 09:10:27 +0100 Subject: [PATCH 10/75] fix: Fixed assignment problem in midline without mix --- lymph/models/midline.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lymph/models/midline.py b/lymph/models/midline.py index b4354e9..d171c77 100644 --- a/lymph/models/midline.py +++ b/lymph/models/midline.py @@ -285,10 +285,10 @@ def assign_params( for key, value in new_params_kwargs.items(): if "ipsi_" in key: ipsi_kwargs[key.replace("ipsi_", "")] = value - elif "contra_noext" in key: - noext_contra_kwargs[key.replace("contra_noext", "")] = value - elif 'contra_ext' in key: - ext_contra_kwargs[key.replace("contra_ext", "")] = value + elif "noext" in key: + noext_contra_kwargs[key.replace("contra_noext_", "")] = value + elif 'ext' in key: + ext_contra_kwargs[key.replace("contra_ext_", "")] = value else: if 'contra' in key: warnings.warn( @@ -301,10 +301,10 @@ def assign_params( *new_params_args, **ipsi_kwargs, **general_kwargs ) remaining_args, remainings_kwargs = self.noext.contra.assign_params( - *remaining_args, **noext_contra_kwargs, **remainings_kwargs + *remaining_args, **noext_contra_kwargs, **remainings_kwargs, **general_kwargs ) remaining_args, remainings_kwargs = self.ext.contra.assign_params( - *remaining_args, **ext_contra_kwargs, **remainings_kwargs + *remaining_args, **ext_contra_kwargs, **remainings_kwargs, **general_kwargs ) if self.central_enabled: for key in ipsi_kwargs.keys(): From 6b0099f5362bc4c54b57e2042f16fd8ad21f858e Mon Sep 17 00:00:00 2001 From: YoelPH Date: Mon, 12 Feb 2024 11:07:37 +0100 Subject: [PATCH 11/75] add: added doc strings --- lymph/models/midline.py | 15 +++++++++------ lymph/models/unilateral.py | 2 +- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/lymph/models/midline.py b/lymph/models/midline.py index d171c77..72c05f4 100644 --- a/lymph/models/midline.py +++ b/lymph/models/midline.py @@ -92,9 +92,11 @@ def sync(): class Midline(DelegatorMixin): """Model a bilateral lymphatic system where an additional risk factor can be provided in the data: Whether or not the primary tumor extended over the - mid-sagittal line, or is located on the mid-saggital line. + mid-sagittal line, or is located on the mid-saggital line. Further, some + tumors may be centralized. In this case this class offers the `midline` + option which assigns the same base spread probabilities to both sides. - It is reasonable to assume (and supported by data) that such an extension + It is reasonable to assume (and supported by data) that an extension significantly increases the risk for metastatic spread to the contralateral side of the neck. This class attempts to capture this using a simple assumption: We assume that the probability of spread to the contralateral @@ -136,7 +138,10 @@ def __init__( side when no midline extension is present. trans_symmetric: If ``True``, the spread probabilities among the LNLs will be set symmetrically. - + central_enabled: If ``True``, a third bilateral class is produced + which holds a model for patients with central tumor locations. + + The ``unilateral_kwargs`` are passed to both all bilateral models. See Also: :class:`Bilateral`: Two of these are held as attributes by this class. One for the case of a mid-sagittal extension of the primary @@ -220,7 +225,7 @@ def init_synchronization(self) -> None: # + central_ipsi_lnl_edges # ) # init_edge_sync( - # property_names=property_names, + # property_names=property_names,W # this_edge_list=noext_ipsi_edges, # other_edge_list=central_ipsi_edges, # ) @@ -277,8 +282,6 @@ def assign_params( if 'contra' not in key: central_kwargs[(key.replace("ipsi_", ""))] = no_extension_kwargs[key] remaining_args, remainings_kwargs = self.central.assign_params(*new_params_args, **central_kwargs) - -#this part is not fully tested yet else: ipsi_kwargs, noext_contra_kwargs, ext_contra_kwargs, general_kwargs, central_kwargs = {}, {}, {}, {}, {} diff --git a/lymph/models/unilateral.py b/lymph/models/unilateral.py index f038534..556de8e 100644 --- a/lymph/models/unilateral.py +++ b/lymph/models/unilateral.py @@ -476,7 +476,7 @@ def modalities(self) -> modalities.ModalitiesUserDict: return modalities.ModalitiesUserDict(is_trinary=self.is_trinary) - @cached_property + @property def observation_matrix(self) -> np.ndarray: """The matrix encoding the probabilities to observe a certain diagnosis. From b6a44fdb0de9b23cfb1a6bebd63263267086227f Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Thu, 15 Feb 2024 10:34:48 +0100 Subject: [PATCH 12/75] docs(mid): improve midline docstrings slightly --- lymph/models/midline.py | 60 ++++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 28 deletions(-) diff --git a/lymph/models/midline.py b/lymph/models/midline.py index 72c05f4..28c8896 100644 --- a/lymph/models/midline.py +++ b/lymph/models/midline.py @@ -1,14 +1,14 @@ from __future__ import annotations -from argparse import OPTIONAL import logging import warnings +from argparse import OPTIONAL from typing import Any, Iterable, Iterator import numpy as np import pandas as pd -from lymph import graph, matrix, modalities, models +from lymph import graph, modalities, models from lymph.helper import ( AbstractLookupDict, DelegatorMixin, @@ -90,14 +90,14 @@ def sync(): class Midline(DelegatorMixin): - """Model a bilateral lymphatic system where an additional risk factor can + """Models metastatic progression bilaterally with tumor lateralization. + + Model a bilateral lymphatic system where an additional risk factor can be provided in the data: Whether or not the primary tumor extended over the - mid-sagittal line, or is located on the mid-saggital line. Further, some - tumors may be centralized. In this case this class offers the `midline` - option which assigns the same base spread probabilities to both sides. + mid-sagittal line, or is located on the mid-saggital line. - It is reasonable to assume (and supported by data) that an extension - significantly increases the risk for metastatic spread to the contralateral + It is reasonable to assume (and supported by data) that an extension of the primary + tumor significantly increases the risk for metastatic spread to the contralateral side of the neck. This class attempts to capture this using a simple assumption: We assume that the probability of spread to the contralateral side for patients *with* midline extension is larger than for patients @@ -122,11 +122,15 @@ def __init__( central_enabled: bool = True, **_kwargs ): - """The class is constructed in a similar fashion to the - :class:`Bilateral`: That class contains one :class:`Unilateral` for - each side of the neck, while this class will contain two/three instances of - :class:`Bilateral`, one for the case of a midline extension, one for - the case of no midline extension and one for the tumors on the midline. + """Initialize the model. + + The class is constructed in a similar fashion to the + :py:class:`~lymph.models.Bilateral`: That class contains one + :py:class:`~lymph.models.Unilateral` for each side of the neck, while this + class will contain several instances of :py:class:`~lymph.models.Bilateral`, + one for the ipsilateral side and two to three for the the contralateral side + covering the cases a) no midline extension, b) midline extension, and c) + central tumor location. Args: graph: Dictionary of the same kind as for initialization of @@ -140,8 +144,8 @@ def __init__( LNLs will be set symmetrically. central_enabled: If ``True``, a third bilateral class is produced which holds a model for patients with central tumor locations. - - The ``unilateral_kwargs`` are passed to both all bilateral models. + + The ``unilateral_kwargs`` are passed to all bilateral models. See Also: :class:`Bilateral`: Two of these are held as attributes by this class. One for the case of a mid-sagittal extension of the primary @@ -199,13 +203,13 @@ def init_synchronization(self) -> None: noext_ipsi_tumor_edges = list(self.noext.ipsi.graph.tumor_edges.values()) noext_ipsi_lnl_edges = list(self.noext.ipsi.graph.lnl_edges.values()) noext_ipsi_edges = ( - noext_ipsi_tumor_edges + noext_ipsi_lnl_edges + noext_ipsi_tumor_edges + noext_ipsi_lnl_edges ) ext_ipsi_tumor_edges = list(self.ext.ipsi.graph.tumor_edges.values()) ext_ipsi_lnl_edges = list(self.ext.ipsi.graph.lnl_edges.values()) ext_ipsi_edges = ( - ext_ipsi_tumor_edges - + ext_ipsi_lnl_edges + ext_ipsi_tumor_edges + + ext_ipsi_lnl_edges ) @@ -214,15 +218,15 @@ def init_synchronization(self) -> None: this_edge_list=noext_ipsi_edges, other_edge_list=ext_ipsi_edges, ) - + #The syncing below does not work properly. The ipsilateral central side is synced, but the contralateral central side is not synced. It seems like no callback is initiated when syncing in this manner # if self.central_enabled: # central_ipsi_tumor_edges = list(self.central.ipsi.graph.tumor_edges.values()) # central_ipsi_lnl_edges = list(self.central.ipsi.graph.lnl_edges.values()) # central_ipsi_edges = ( - # central_ipsi_tumor_edges - # + central_ipsi_lnl_edges + # central_ipsi_tumor_edges + # + central_ipsi_lnl_edges # ) # init_edge_sync( # property_names=property_names,W @@ -295,8 +299,8 @@ def assign_params( else: if 'contra' in key: warnings.warn( - "'contra' keys were assigned without 'ext' or 'noext' defined. For a non-mixture model" - "For a non mixture model these values have no meaning.") + "'contra' keys were assigned without 'ext' or 'noext' defined. For a non-mixture model" + "For a non mixture model these values have no meaning.") else: general_kwargs[key] = value @@ -461,13 +465,13 @@ def risk( self.assign_params(*given_param_args) if given_param_kwargs is not None: self.assign_params(**given_param_kwargs) - if central: + if central: return self.central.risk(given_diagnoses = given_diagnoses,t_stage = t_stage, involvement = involvement) if midline_extension: return self.ext.risk(given_diagnoses = given_diagnoses,t_stage = t_stage, involvement = involvement) - return self.noext.risk(given_diagnoses = given_diagnoses,t_stage = t_stage, involvement = involvement) - - + return self.noext.risk(given_diagnoses = given_diagnoses,t_stage = t_stage, involvement = involvement) + + # def generate_dataset( # self, @@ -501,4 +505,4 @@ def risk( # dataset = dataset.sort_index(axis="columns", level=0) # dataset[('info', 'tumor', 't_stage')] = drawn_t_stages - # return dataset \ No newline at end of file + # return dataset From 9676dd91ba53f15d13104599b056b8fea3c03278 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Thu, 15 Feb 2024 11:26:48 +0100 Subject: [PATCH 13/75] feat: add delegator/synchronizer mixin Related: #74 --- lymph/helper.py | 69 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/lymph/helper.py b/lymph/helper.py index 3ba6175..7dbbda4 100644 --- a/lymph/helper.py +++ b/lymph/helper.py @@ -109,6 +109,75 @@ def wrapper(*args, **kwargs): return super().__getattribute__(name) +class DelegationSyncMixin: + """Mixin to delegate and synchronize an attribute of multiple instances. + + If a container class holds several (i.e. one ore more) instances of a class, this + mixin can be used with the container class to delegate and synchronize an attribute + from the instances. + + See the explanation in the :py:class:`DelegatorMixin.init_delegation_sync` method. + """ + def __init__(self) -> None: + self._delegated_and_synced = {} + + + def init_delegation_sync(self, **attrs_from_instances) -> None: + """Initialize the delegation and synchronization of attributes. + + Each keyword argument is the name of an attribute to synchronize. The value + should be a list of instances for which that attribute should be synchronized. + + Example: + + >>> class Hand: + ... def __init__(self, num_fingers): + ... self.num_fingers = num_fingers + >>> class Person(DelegationSyncMixin): + ... def __init__(self): + ... super().__init__() + ... self.left = Hand(6) + ... self.right = Hand(4) + ... self.init_delegation_sync(num_fingers=[self.left, self.right]) + >>> person = Person() + >>> person.left.num_fingers + 6 + >>> person.right.num_fingers + 4 + >>> person.num_fingers # note that this will also issue a warning + 4 + >>> person.num_fingers = 5 + >>> person.left.num_fingers + 5 + >>> person.right.num_fingers + 5 + >>> person.num_fingers + 5 + """ + self._delegated_and_synced = attrs_from_instances + + + def __getattr__(self, name): + if name == "_delegated_and_synced" or name not in self._delegated_and_synced: + return super().__getattr__(name) + + values = {getattr(inst, name) for inst in self._delegated_and_synced[name]} + if len(values) > 1: + warnings.warn( + f"Attribute '{name}' not synchronized: {values}. Set this " + "attribute on each instance to synchronize it." + ) + return values.pop() + + + def __setattr__(self, name, value): + if name != "_delegated_and_synced" and name in self._delegated_and_synced: + for inst in self._delegated_and_synced[name]: + setattr(inst, name, value) + else: + super().__setattr__(name, value) + + def check_unique_names(graph: dict): """Check all nodes in ``graph`` have unique names and no duplicate connections.""" node_name_set = set() From b2ce7e49d99e46517381b37fea9603b3f52de6c0 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Fri, 16 Feb 2024 10:14:02 +0100 Subject: [PATCH 14/75] feat: add delegation/sync mixin We have added a mixin class that allows the delegation and synchronization of attributes. This may replace the convoluted and unreadable callback functions we used to sync some properties Related: #74 --- lymph/helper.py | 282 ++++++++++++++++++++++---------------- lymph/models/bilateral.py | 24 ++-- 2 files changed, 177 insertions(+), 129 deletions(-) diff --git a/lymph/helper.py b/lymph/helper.py index 7dbbda4..e9b2ffc 100644 --- a/lymph/helper.py +++ b/lymph/helper.py @@ -1,6 +1,7 @@ """ Module containing supporting classes and functions used accross the project. """ +import logging import warnings from collections import UserDict from functools import cached_property, lru_cache, wraps @@ -16,97 +17,9 @@ DiagnoseType = dict[str, PatternType] """Type alias for a diagnose, which is a involvement pattern per diagnostic modality.""" +logger = logging.getLogger(__name__) -class DelegatorMixin: - """Mixin class that allows the delegation of attributes from another object.""" - def __init__(self): - self._delegated = {} - - - def init_delegation(self, **from_to) -> None: - """Initialize the delegation of attributes. - - For each keyword argument that is an attribute of ``self``, the value is a - list of attributes to delegate to ``self``. - - Inspiration from this came from the `delegation pattern`_. - - .. _delegation pattern: https://github.com/faif/python-patterns/blob/master/patterns/fundamental/delegation_pattern.py - - Example: - - >>> class Delegate: - ... def __init__(self): - ... self.fancy_attr = "foo" - ... @property - ... def property_attr(self): - ... return "bar" - ... @cached_property - ... def cached_attr(self): - ... return "baz" - >>> class A(DelegatorMixin): - ... def __init__(self): - ... super().__init__() - ... self.delegated = "hello world" - ... self.also_delegated = Delegate() - ... self.normal_attr = 42 - ... self.init_delegation( - ... delegated=["count"], - ... also_delegated=["fancy_attr", "property_attr", "cached_attr"], - ... ) - >>> a = A() - >>> a.delegated.count("l") - 3 - >>> a.count("l") - 3 - >>> a.also_delegated.fancy_attr - 'foo' - >>> a.fancy_attr - 'foo' - >>> a.also_delegated.property_attr - 'bar' - >>> a.property_attr - 'bar' - >>> a.also_delegated.cached_attr - 'baz' - >>> a.cached_attr - 'baz' - >>> a.normal_attr - 42 - >>> a.non_existent - Traceback (most recent call last): - ... - AttributeError: 'A' object has no attribute 'non_existent' - """ - for attr, sub_attrs in from_to.items(): - attr_obj = getattr(self, attr) - - for sub_attr in sub_attrs: - if not hasattr(attr_obj, sub_attr): - raise AttributeError( - f"Attribute '{sub_attr}' not found in '{attr_obj}'" - ) - - if sub_attr in self._delegated: - warnings.warn( - f"Attribute '{sub_attr}' already delegated. Overwriting." - ) - self._delegated[sub_attr] = (attr_obj, sub_attr) - - def __getattr__(self, name): - if name in self._delegated: - attr = getattr(*self._delegated[name]) - - if not callable(attr): - return attr - - @wraps(attr) - def wrapper(*args, **kwargs): - return attr(*args, **kwargs) - - return wrapper - - return super().__getattribute__(name) +BASIC_TYPES = (int, float, str, bool, bytes, type(None)) class DelegationSyncMixin: @@ -117,6 +30,9 @@ class DelegationSyncMixin: from the instances. See the explanation in the :py:class:`DelegatorMixin.init_delegation_sync` method. + + This also works for attributes that are not hashable, such as lists or dictionaries. + See more details about that in the :py:class:`AccessPassthrough` class docs. """ def __init__(self) -> None: self._delegated_and_synced = {} @@ -130,54 +46,147 @@ def init_delegation_sync(self, **attrs_from_instances) -> None: Example: - >>> class Hand: - ... def __init__(self, num_fingers): - ... self.num_fingers = num_fingers + >>> class Eye: + ... def __init__(self, color="blue"): + ... self.eye_color = color >>> class Person(DelegationSyncMixin): ... def __init__(self): ... super().__init__() - ... self.left = Hand(6) - ... self.right = Hand(4) - ... self.init_delegation_sync(num_fingers=[self.left, self.right]) + ... self.left = Eye("green") + ... self.right = Eye("brown") + ... self.init_delegation_sync(eye_color=[self.left, self.right]) >>> person = Person() - >>> person.left.num_fingers - 6 - >>> person.right.num_fingers - 4 - >>> person.num_fingers # note that this will also issue a warning - 4 - >>> person.num_fingers = 5 - >>> person.left.num_fingers - 5 - >>> person.right.num_fingers - 5 - >>> person.num_fingers - 5 + >>> person.eye_color # pop element of sorted set and warn that not synced + 'green' + >>> person.eye_color = 'red' + >>> person.left.eye_color == person.right.eye_color == 'red' + True """ self._delegated_and_synced = attrs_from_instances def __getattr__(self, name): - if name == "_delegated_and_synced" or name not in self._delegated_and_synced: - return super().__getattr__(name) + try: + values_set = {getattr(inst, name) for inst in self._delegated_and_synced[name]} + if len(values_set) > 1: + warnings.warn( + f"Attribute '{name}' not synchronized: {values_set}. Set this " + "attribute on each instance to synchronize it." + ) + return sorted(values_set).pop() + + # Not all attributes might be hashable, which is necessary for a set + except TypeError: + values_list = [getattr(inst, name) for inst in self._delegated_and_synced[name]] + return AccessPassthrough(values_list) + + + def __setattr__(self, name, value): + if name != "_delegated_and_synced" and name in self._delegated_and_synced: + for inst in self._delegated_and_synced[name]: + setattr(inst, name, value) + else: + super().__setattr__(name, value) + - values = {getattr(inst, name) for inst in self._delegated_and_synced[name]} +class AccessPassthrough: + """Allows delegated access to an attribute's methods. + + This class is constructed from a list of objects. It allows access to the + methods and items of the objects in the list. Setting items is also supported, but + only one level deep. + + It is used by the :py:class:`DelegationSyncMixin` to handle unhashable attributes. + For example, a delegated and synched attribute might be a dictionary. In this case, + a call like ``container.attribute["key"]`` would retrieve the right value, but + setting it via ``container.attribute["key"] = value`` would at best set the value + on one of the synched instances, but not on all of them. This class handles passing + the set value to all instances. + + Note: + This class is not meant to be used directly, but only by the + :py:class:`DelegationSyncMixin`. + + Below is an example that demonstrates how calls to ``__setitem__``, ``__setattr__``, + and ``__call__`` are passed through to both instances for which the delegation and + synchronization is invoked: + + >>> class Param: + ... def __init__(self, value): + ... self.value = value + >>> class Model: + ... def __init__(self, **kwargs): + ... self.params_dict = kwargs + ... self.param = Param(sum(kwargs.values())) + ... def set_value(self, key, value): + ... self.params_dict[key] = value + >>> class Mixture(DelegationSyncMixin): + ... def __init__(self): + ... super().__init__() + ... self.c1 = Model(a=1, b=2) + ... self.c2 = Model(a=3, b=4, c=5) + ... self.init_delegation_sync( + ... params_dict=[self.c1, self.c2], + ... param=[self.c1, self.c2], + ... set_value=[self.c1, self.c2], + ... ) + >>> mixture = Mixture() + >>> mixture.params_dict["a"] # pop element of sorted set and warn that not synced + 1 + >>> mixture.params_dict["a"] = 99 + >>> mixture.c1.params_dict["a"] == mixture.c2.params_dict["a"] == 99 + True + >>> mixture.param.value + 12 + >>> mixture.param.value = 42 + >>> mixture.c1.param.value == mixture.c2.param.value == 42 + True + >>> mixture.set_value("c", 100) + >>> mixture.c1.params_dict["c"] == mixture.c2.params_dict["c"] == 100 + True + """ + def __init__(self, attr_values: list[object]) -> None: + self._attr_objects = attr_values + + + def __getattr__(self, name): + values = {getattr(obj, name) for obj in self._attr_objects} if len(values) > 1: warnings.warn( f"Attribute '{name}' not synchronized: {values}. Set this " "attribute on each instance to synchronize it." ) + return sorted(values).pop() + + + def __getitem__(self, key): + values = {obj[key] for obj in self._attr_objects} + if len(values) > 1: + warnings.warn( + f"Value for key '{key}' not synchronized: {values}. Set this " + "value on each item to synchronize it." + ) return values.pop() def __setattr__(self, name, value): - if name != "_delegated_and_synced" and name in self._delegated_and_synced: - for inst in self._delegated_and_synced[name]: - setattr(inst, name, value) + if name != "_attr_objects": + for obj in self._attr_objects: + setattr(obj, name, value) else: super().__setattr__(name, value) + def __setitem__(self, key, value): + for obj in self._attr_objects: + obj[key] = value + + + def __call__(self, *args: Any, **kwds: Any) -> Any: + for obj in self._attr_objects: + obj(*args, **kwds) + + def check_unique_names(graph: dict): """Check all nodes in ``graph`` have unique names and no duplicate connections.""" node_name_set = set() @@ -494,11 +503,6 @@ def wrapper(self, *args, **kwargs): return wrapper -if __name__ == "__main__": - import doctest - doctest.testmod() - - class AbstractLookupDict(UserDict): """Abstract ``UserDict`` subclass that can lazily and dynamically return values. @@ -602,3 +606,41 @@ def callable_mapping(key): return mapping[key] return callable_mapping + + + +if __name__ == "__main__": + + class Number: + __hash__ = None + def __init__(self, value): + self.value = value + + class Param: + def __init__(self, value, mapping): + self.value = value + self.mapping = mapping + self.number = Number(10 * value) + + class Container(DelegationSyncMixin): + def __init__(self): + super().__init__() + self.one = Param(1, {"key": 1}) + self.two = Param(2, {"key": 2}) + self.init_delegation_sync( + value=[self.one, self.two], + mapping=[self.one, self.two], + number=[self.one, self.two], + ) + + container = Container() + print(container.value) + print(container.mapping) + print(container.mapping["key"]) + container.mapping["key"] = 4 + print(container.one.mapping["key"]) + print(container.two.mapping["key"]) + print(container.number.value) + container.number.value = 99 + print(container.one.number.value) + print(container.two.number.value) diff --git a/lymph/models/bilateral.py b/lymph/models/bilateral.py index e31a8e7..ab0076a 100644 --- a/lymph/models/bilateral.py +++ b/lymph/models/bilateral.py @@ -10,7 +10,7 @@ from lymph import graph, matrix, modalities, models from lymph.helper import ( AbstractLookupDict, - DelegatorMixin, + DelegationSyncMixin, DiagnoseType, PatternType, early_late_mapping, @@ -124,7 +124,7 @@ def sync(): this.trigger_callbacks.append(sync) -class Bilateral(DelegatorMixin): +class Bilateral(DelegationSyncMixin): """Class that models metastatic progression in a bilateral lymphatic system. This is achieved by creating two instances of the @@ -185,13 +185,19 @@ def __init__( contralateral_kwargs=contralateral_kwargs, ) - self.init_synchronization() - - delegated_attrs = [ - "max_time", "t_stages", "diag_time_dists", - "is_binary", "is_trinary", - ] + ["modalities"] if self.is_symmetric["modalities"] else [] - self.init_delegation(ipsi=delegated_attrs) + if self.is_symmetric["modalities"]: + delegation_sync_kwargs = {"modalities": [self.ipsi, self.contra]} + else: + delegation_sync_kwargs = {} + + self.init_delegation_sync( + max_time=[self.ipsi, self.contra], + t_stages=[self.ipsi, self.contra], + diag_time_dists=[self.ipsi, self.contra], + is_binary=[self.ipsi, self.contra], + is_trinary=[self.ipsi, self.contra], + **delegation_sync_kwargs, + ) def init_models( From 77607bf4a334d66e711295f486627800665a5e1a Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Fri, 16 Feb 2024 13:32:16 +0100 Subject: [PATCH 15/75] feat(helper): add `popfirst()` and `flatten()` Two new helper function in relation to getting and setting params. --- lymph/helper.py | 70 ++++++++++++++++++++++--------------------------- 1 file changed, 32 insertions(+), 38 deletions(-) diff --git a/lymph/helper.py b/lymph/helper.py index e9b2ffc..2fcca56 100644 --- a/lymph/helper.py +++ b/lymph/helper.py @@ -5,7 +5,7 @@ import warnings from collections import UserDict from functools import cached_property, lru_cache, wraps -from typing import Any, Callable +from typing import Any, Callable, Sequence import numpy as np from cachetools import LRUCache @@ -38,7 +38,7 @@ def __init__(self) -> None: self._delegated_and_synced = {} - def init_delegation_sync(self, **attrs_from_instances) -> None: + def _init_delegation_sync(self, **attrs_from_instances) -> None: """Initialize the delegation and synchronization of attributes. Each keyword argument is the name of an attribute to synchronize. The value @@ -54,7 +54,7 @@ def init_delegation_sync(self, **attrs_from_instances) -> None: ... super().__init__() ... self.left = Eye("green") ... self.right = Eye("brown") - ... self.init_delegation_sync(eye_color=[self.left, self.right]) + ... self._init_delegation_sync(eye_color=[self.left, self.right]) >>> person = Person() >>> person.eye_color # pop element of sorted set and warn that not synced 'green' @@ -125,14 +125,14 @@ class AccessPassthrough: ... super().__init__() ... self.c1 = Model(a=1, b=2) ... self.c2 = Model(a=3, b=4, c=5) - ... self.init_delegation_sync( + ... self._init_delegation_sync( ... params_dict=[self.c1, self.c2], ... param=[self.c1, self.c2], ... set_value=[self.c1, self.c2], ... ) >>> mixture = Mixture() >>> mixture.params_dict["a"] # pop element of sorted set and warn that not synced - 1 + 3 >>> mixture.params_dict["a"] = 99 >>> mixture.c1.params_dict["a"] == mixture.c2.params_dict["a"] == 99 True @@ -166,7 +166,7 @@ def __getitem__(self, key): f"Value for key '{key}' not synchronized: {values}. Set this " "value on each item to synchronize it." ) - return values.pop() + return sorted(values).pop() def __setattr__(self, name, value): @@ -493,7 +493,7 @@ def early_late_mapping(t_stage: int | str) -> str: def trigger(func: callable) -> callable: - """Method decorator that runs instance's ``trigger()`` when the method is called.""" + """Decorator that runs instance's ``trigger_callbacks`` when called.""" @wraps(func) def wrapper(self, *args, **kwargs): result = func(self, *args, **kwargs) @@ -608,39 +608,33 @@ def callable_mapping(key): return callable_mapping +def popfirst(seq: Sequence[Any]) -> tuple[Any, Sequence[Any]]: + """Return the first element of a sequence and the sequence without it. + + Example: -if __name__ == "__main__": + >>> popfirst([1, 2, 3]) + (1, [2, 3]) + """ + return seq[0], seq[1:] - class Number: - __hash__ = None - def __init__(self, value): - self.value = value - class Param: - def __init__(self, value, mapping): - self.value = value - self.mapping = mapping - self.number = Number(10 * value) +def flatten(mapping: dict) -> dict: + """Flatten a nested dictionary. - class Container(DelegationSyncMixin): - def __init__(self): - super().__init__() - self.one = Param(1, {"key": 1}) - self.two = Param(2, {"key": 2}) - self.init_delegation_sync( - value=[self.one, self.two], - mapping=[self.one, self.two], - number=[self.one, self.two], - ) + Example: - container = Container() - print(container.value) - print(container.mapping) - print(container.mapping["key"]) - container.mapping["key"] = 4 - print(container.one.mapping["key"]) - print(container.two.mapping["key"]) - print(container.number.value) - container.number.value = 99 - print(container.one.number.value) - print(container.two.number.value) + >>> flatten({"a": {"b": 1, "c": 2}, "d": 3}) + {'a_b': 1, 'a_c': 2, 'd': 3} + """ + def _flatten(mapping, parent_key='', sep='_'): + items = [] + for k, v in mapping.items(): + new_key = f"{parent_key}{sep}{k}" if parent_key else k + if isinstance(v, dict): + items.extend(_flatten(v, new_key, sep=sep).items()) + else: + items.append((new_key, v)) + return dict(items) + + return _flatten(mapping) From 7976997b5688714d69552cef603e192b90bcfe80 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Fri, 16 Feb 2024 13:34:26 +0100 Subject: [PATCH 16/75] change!: make `get_params()` uniform and chainable The API of all `get_params()` methods is now nice and uniform, allowing arbitrary chaining of these methods. BREAKING CHANGE: The `get_params()` methods don't accept the `param` argument anymore. --- lymph/diagnose_times.py | 45 ++---- lymph/graph.py | 42 +++-- lymph/modalities.py | 4 +- lymph/models/bilateral.py | 264 ++++--------------------------- lymph/models/midline.py | 4 +- lymph/models/unilateral.py | 90 ++++++----- tests/trinary_unilateral_test.py | 25 ++- 7 files changed, 152 insertions(+), 322 deletions(-) diff --git a/lymph/diagnose_times.py b/lymph/diagnose_times.py index 7a4b572..6c3b264 100644 --- a/lymph/diagnose_times.py +++ b/lymph/diagnose_times.py @@ -20,7 +20,7 @@ import numpy as np -from lymph.helper import AbstractLookupDict, trigger +from lymph.helper import AbstractLookupDict, flatten logger = logging.getLogger(__name__) @@ -170,8 +170,8 @@ def is_updateable(self) -> bool: def get_params( self, - param: str | None = None, - as_dict: bool = False, + as_dict: bool = True, + **_kwargs, ) -> float | Iterable[float] | dict[str, float]: """If updateable, return the dist's ``param`` value or all params in a dict. @@ -185,9 +185,6 @@ def get_params( warnings.warn("Distribution is not updateable, returning empty dict") return {} if as_dict else None - if param is not None: - return self._kwargs[param] - return self._kwargs if as_dict else self._kwargs.values() @@ -252,7 +249,6 @@ class DistributionsUserDict(AbstractLookupDict): """Dictionary with added methods for storing distributions over diagnose times.""" max_time: int - @trigger def __setitem__( self, t_stage: str, @@ -266,7 +262,7 @@ def __setitem__( super().__setitem__(t_stage, distribution) - @trigger + def __delitem__(self, t_stage: str) -> None: """Delete the distribution for a T-stage.""" super().__delitem__(t_stage) @@ -280,24 +276,19 @@ def num_parametric(self) -> int: def get_params( self, - param: str | None = None, - as_dict: bool = False, + as_dict: bool = True, + as_flat: bool = True, ) -> float | Iterable[float] | dict[str, float]: - """Return the parameter(s) of parametrized distributions. + """Return the parameters of parametrized distributions. - If ``param`` is provided, return the value of that particular parameter. Note - that the parameter name must be of the form ``{t_stage}_{param}``, where - ``t_stage`` is the T-stage and ``param`` is the name of the parameter. + If ``as_dict`` is ``False``, return an iterable of all parameter values. If + ``as_dict`` is ``True``, return a nested dictionary with the T-stages as keys + and the distributions' parameter dicts as values (essentially what is returned + by :py:meth:`~lymph.diagnose_times.Distribution.get_params`). - If ``param`` is ``None`` and ``as_dict`` is ``False``, return an iterable of - all parameter values. If ``as_dict`` is ``True``, return a dictionary with the - parameter names as keys and the parameter values as values. - - See Also: - :py:meth:`lymph.diagnose_times.Distribution.get_params` - :py:meth:`lymph.graph.Edge.get_params` - :py:meth:`lymph.models.Unilateral.get_params` - :py:meth:`lymph.models.Bilateral.get_params` + If ``as_flat`` is ``True``, return a flat dictionary with the T-stages and + parameters as keys and values, respectively. This is the result of passing the + nested dictionary to :py:meth:`~lymph.helper.flatten`. """ params = {} @@ -305,16 +296,14 @@ def get_params( if not distribution.is_updateable: continue - for name, value in distribution.get_params(as_dict=True).items(): - params[f"{t_stage}_{name}"] = value + params[t_stage] = distribution.get_params(as_flat=as_flat) - if param is not None: - return params[param] + if as_flat or not as_dict: + params = flatten(params) return params if as_dict else params.values() - @trigger def set_params(self, **kwargs) -> None: """Update all parametrized distributions via keyword arguments. diff --git a/lymph/graph.py b/lymph/graph.py index cdbd3b0..5f0d038 100644 --- a/lymph/graph.py +++ b/lymph/graph.py @@ -18,7 +18,7 @@ import numpy as np -from lymph.helper import check_unique_names, comp_transition_tensor, trigger +from lymph.helper import check_unique_names, comp_transition_tensor, flatten class AbstractNode: @@ -260,7 +260,6 @@ def parent(self) -> Tumor | LymphNodeLevel: return self._parent @parent.setter - @trigger def parent(self, new_parent: Tumor | LymphNodeLevel) -> None: """Set the parent node of the edge.""" if hasattr(self, '_parent'): @@ -279,7 +278,6 @@ def child(self) -> LymphNodeLevel: return self._child @child.setter - @trigger def child(self, new_child: LymphNodeLevel) -> None: """Set the end (child) node of the edge.""" if hasattr(self, '_child'): @@ -323,7 +321,6 @@ def get_micro_mod(self) -> float: self._micro_mod = 1. return self._micro_mod - @trigger def set_micro_mod(self, new_micro_mod: float) -> None: """Set the spread modifier for LNLs with microscopic involvement.""" if self.child.is_binary: @@ -347,7 +344,6 @@ def get_spread_prob(self) -> float: self._spread_prob = 0. return self._spread_prob - @trigger def set_spread_prob(self, new_spread_prob): """Set the spread probability of the edge.""" if not 0. <= new_spread_prob <= 1.: @@ -363,9 +359,9 @@ def set_spread_prob(self, new_spread_prob): def get_params( self, - param: str | None = None, - as_dict: bool = False, - ) -> float | Iterable[float] | dict[str, float]: + as_dict: bool = True, + **_kwargs, + ) -> Iterable[float] | dict[str, float]: """Return the value of the parameter ``param`` or all params in a dict. See Also: @@ -376,15 +372,12 @@ def get_params( """ if self.is_growth: params = {"growth": self.get_spread_prob()} - return params if as_dict else params[param] + return params if as_dict else params.values() params = {"spread": self.get_spread_prob()} if self.child.is_trinary and not self.is_tumor_spread: params["micro"] = self.get_micro_mod() - if param is not None: - return params[param] - return params if as_dict else params.values() @@ -763,3 +756,28 @@ def state_list(self): except AttributeError: self._gen_state_list() return self._state_list + + + def get_params( + self, + as_dict: bool = True, + as_flat: bool = True, + ) -> Iterable[float] | dict[str, float]: + """Return the parameters of the edges in the graph. + + If ``as_dict`` is ``False``, return an iterable of all parameter values. If + ``as_dict`` is ``True``, return a nested dictionary with the edges' names as + keys and the edges' parameter dicts as values. + + If ``as_flat`` is ``True``, return a flat dictionary with the T-stages and + parameters as keys and values, respectively. This is the result of passing the + nested dictionary to :py:meth:`~lymph.helper.flatten`. + """ + params = {} + for edge in self.edges.values(): + params[edge.name] = edge.get_params(as_flat=as_flat) + + if as_flat or not as_dict: + params = flatten(params) + + return params if as_dict else params.values() diff --git a/lymph/modalities.py b/lymph/modalities.py index 2490787..7398205 100644 --- a/lymph/modalities.py +++ b/lymph/modalities.py @@ -13,7 +13,7 @@ import numpy as np -from lymph.helper import AbstractLookupDict, trigger +from lymph.helper import AbstractLookupDict class Modality: @@ -151,7 +151,6 @@ class ModalitiesUserDict(AbstractLookupDict): [0., 1.], [0., 1.]]) """ - @trigger def __setitem__(self, name: str, value: ModalityDef, / ) -> None: """Set the modality of the lymph model.""" # pylint: disable=unidiomatic-typecheck @@ -209,7 +208,6 @@ def __setitem__(self, name: str, value: ModalityDef, / ) -> None: super().__setitem__(name, value) - @trigger def __delitem__(self, key: str) -> None: return super().__delitem__(key) diff --git a/lymph/models/bilateral.py b/lymph/models/bilateral.py index ab0076a..594c502 100644 --- a/lymph/models/bilateral.py +++ b/lymph/models/bilateral.py @@ -7,13 +7,13 @@ import numpy as np import pandas as pd -from lymph import graph, matrix, modalities, models +from lymph import matrix, models from lymph.helper import ( - AbstractLookupDict, DelegationSyncMixin, DiagnoseType, PatternType, early_late_mapping, + flatten, ) warnings.filterwarnings("ignore", category=pd.errors.PerformanceWarning) @@ -21,109 +21,6 @@ -def create_property_sync_callback( - names: list[str], - this: graph.Edge, - other: graph.Edge, -) -> callable: - """Return func to sync property values whose name is in ``names`` btw two edges. - - The returned function is meant to be added to the list of callbacks of the - :py:class:`Edge` class, such that two edges in a mirrored pair of graphs are kept - in sync. - """ - def sync(): - # We must set the value of `this` property via the private name, otherwise - # we would trigger the setter's callbacks and may end up in an infinite loop. - for name in names: - private_name = f"_{name}" - setattr(other, private_name, getattr(this, name)) - - logger.debug(f"Created sync callback for properties {names} of {this.name} edge.") - return sync - - -def init_edge_sync( - property_names: list[str], - this_edges: list[graph.Edge], - other_edges: list[graph.Edge], -) -> None: - """Initialize the callbacks to sync properties btw. Edges. - - This is a two-way sync, i.e. the properties of both ``this_edges`` and - ``other_edges`` are kept in sync. The ``property_names`` is a list of property - names that should be synced. - - Implementing this as a separate method allows a user in theory to initialize - an arbitrary kind of symmetry between the two sides of the neck. - """ - this_edge_names = [e.name for e in this_edges] - other_edge_names = [e.name for e in other_edges] - - for edge_name in set(this_edge_names).intersection(other_edge_names): - this_edge = this_edges[this_edge_names.index(edge_name)] - other_edge = other_edges[other_edge_names.index(edge_name)] - - this_edge.trigger_callbacks.append( - create_property_sync_callback( - names=property_names, - this=this_edge, - other=other_edge, - ) - ) - other_edge.trigger_callbacks.append( - create_property_sync_callback( - names=property_names, - this=other_edge, - other=this_edge, - ) - ) - - -def create_lookupdict_sync_callback( - this: AbstractLookupDict, - other: AbstractLookupDict, -) -> callable: - """Return func to sync content of ``this`` lookup dict to ``other``. - - The returned function is meant to be added to the list of callbacks of the lookup - dict class, such that two dicts in a mirrored pair of graphs are kept in sync. - """ - def sync(): - other.clear_without_trigger() - other.update_without_trigger(this) - - logger.debug(f"Created sync callback from {this} lookup dict to {other}.") - return sync - - -def init_dict_sync( - this: AbstractLookupDict, - other: AbstractLookupDict, -) -> None: - """Initialize the callbacks to sync two lookup dicts. - - This is a two-way sync, i.e. the dicts are kept in sync in both directions. - """ - this.trigger_callbacks.append( - create_lookupdict_sync_callback(this=this, other=other) - ) - other.trigger_callbacks.append( - create_lookupdict_sync_callback(this=other, other=this) - ) - -def init_dict_sync2( - this: AbstractLookupDict, - other: AbstractLookupDict, -) -> None: - """Add callback to ``this`` to sync with ``other``.""" - def sync(): - other.clear() - other.update(this) - - this.trigger_callbacks.append(sync) - - class Bilateral(DelegationSyncMixin): """Class that models metastatic progression in a bilateral lymphatic system. @@ -141,7 +38,7 @@ class Bilateral(DelegationSyncMixin): def __init__( self, graph_dict: dict[tuple[str], list[str]], - is_symmetric: dict[str, bool] | None = None, + modalities_symmetric: bool = True, unilateral_kwargs: dict[str, Any] | None = None, ipsilateral_kwargs: dict[str, Any] | None = None, contralateral_kwargs: dict[str, Any] | None = None, @@ -154,20 +51,11 @@ def __init__( which in turn pass it to the :py:class:`~lymph.graph.Representation` class that stores the graph. - The ``is_symmetric`` dictionary defines which characteristics of the bilateral - model should be symmetric. Valid keys are: - - - ``"modalities"``: - Whether the diagnostic modalities of the two neck sides are symmetric - (default: ``True``). - - ``"tumor_spread"``: - Whether the spread probabilities from the tumor(s) to the LNLs are - symmetric (default: ``False``). If this is set to ``True`` but the graphs - are asymmetric, a warning is issued. - - ``"lnl_spread"``: - Whether the spread probabilities between the LNLs are symmetric - (default: ``True`` if the graphs are symmetric, otherwise ``False``). If - this is set to ``True`` but the graphs are asymmetric, a warning is issued. + With the boolean ``modalities_symmetric`` the user can specify whether the + diagnostic modalities of the ``ipsi`` and ``contra`` side are symmetric. If + they are, instances of this class will have a ``modalities`` attribute that + will synchronize the diagnostic modalities of the two sides of the neck when + setting it or its keys. The ``unilateral_kwargs`` are passed to both instances of the unilateral model, while the ``ipsilateral_kwargs`` and ``contralateral_kwargs`` are passed to the @@ -177,20 +65,19 @@ def __init__( """ super().__init__() - self.init_models( + self._init_models( graph_dict=graph_dict, - is_symmetric=is_symmetric, unilateral_kwargs=unilateral_kwargs, ipsilateral_kwargs=ipsilateral_kwargs, contralateral_kwargs=contralateral_kwargs, ) - if self.is_symmetric["modalities"]: + if modalities_symmetric: delegation_sync_kwargs = {"modalities": [self.ipsi, self.contra]} else: delegation_sync_kwargs = {} - self.init_delegation_sync( + self._init_delegation_sync( max_time=[self.ipsi, self.contra], t_stages=[self.ipsi, self.contra], diag_time_dists=[self.ipsi, self.contra], @@ -200,10 +87,9 @@ def __init__( ) - def init_models( + def _init_models( self, graph_dict: dict[tuple[str], list[str]], - is_symmetric: dict[str, bool] | None = None, unilateral_kwargs: dict[str, Any] | None = None, ipsilateral_kwargs: dict[str, Any] | None = None, contralateral_kwargs: dict[str, Any] | None = None, @@ -223,64 +109,6 @@ def init_models( self.ipsi = models.Unilateral(**ipsi_kwargs) self.contra = models.Unilateral(**contra_kwargs) - self.is_symmetric = { - "modalities": True, - "tumor_spread": False, - "lnl_spread": ipsi_kwargs == contra_kwargs, - } - try: - self.is_symmetric.update(is_symmetric or {}) - except TypeError as type_err: - raise TypeError( - "The `is_symmetric` argument must be a dictionary with possible keys " - f"{list(self.is_symmetric.keys())} and boolean values." - ) from type_err - - if ( - (self.is_symmetric["tumor_spread"] or self.is_symmetric["lnl_spread"]) - and ipsi_kwargs != contra_kwargs - ): - warnings.warn( - "The graphs are asymmetric. Syncing spread probabilities " - "may not have intended effect." - ) - - - def init_synchronization(self) -> None: - """Initialize the synchronization of edges, modalities, and diagnose times.""" - # Sync spread probabilities - property_names = ["spread_prob", "micro_mod"] if self.ipsi.is_trinary else ["spread_prob"] - ipsi_tumor_edges = list(self.ipsi.graph.tumor_edges.values()) - ipsi_lnl_edges = list(self.ipsi.graph.lnl_edges.values()) - ipsi_edges = ( - (ipsi_tumor_edges if self.is_symmetric["tumor_spread"] else []) - + (ipsi_lnl_edges if self.is_symmetric["lnl_spread"] else []) - ) - contra_tumor_edges = list(self.contra.graph.tumor_edges.values()) - contra_lnl_edges = list(self.contra.graph.lnl_edges.values()) - contra_edges = ( - (contra_tumor_edges if self.is_symmetric["tumor_spread"] else []) - + (contra_lnl_edges if self.is_symmetric["lnl_spread"] else []) - ) - init_edge_sync( - property_names=property_names, - this_edges=ipsi_edges, - other_edges=contra_edges, - ) - - # Sync modalities - if self.is_symmetric["modalities"]: - init_dict_sync2( - this=self.ipsi.modalities, - other=self.contra.modalities, - ) - - # Sync diagnose time distributions - init_dict_sync( - this=self.ipsi.diag_time_dists, - other=self.contra.diag_time_dists, - ) - @classmethod def binary(cls, *args, **kwargs) -> Bilateral: @@ -299,13 +127,12 @@ def trinary(cls, *args, **kwargs) -> Bilateral: def get_params( self, - param: str | None = None, - as_dict: bool = False, - nested: bool = False, - ) -> float | Iterable[float] | dict[str, float] | dict[str, dict[str, float]]: + as_dict: bool = True, + as_flat: bool = True, + ) -> Iterable[float] | dict[str, float]: """Return the parameters of the model. - If ``nested`` is ``True``, the parameters of the two sides of the neck are + If ``as_flat`` is ``False``, the parameters of the two sides of the neck are returned as a nested dictionary in addition to one dictionary storing the parameters of the parametric distributions for marginalizing over diagnose times. Otherwise, the parameters are returned as a flat dictionary, with the @@ -327,23 +154,18 @@ def get_params( :py:meth:`lymph.graph.Edge.get_params` :py:meth:`lymph.models.Unilateral.get_params` """ - ipsi_params = self.ipsi.get_params(as_dict=True, with_dists=False) - contra_params = self.contra.get_params(as_dict=True, with_dists=False) - dist_params = self.ipsi.get_params(as_dict=True, with_edges=False) - - if nested and as_dict and param is None: - return { - "ipsi": ipsi_params, - "contra": contra_params, - "diag_time_dists": dist_params, - } - - params = {f"ipsi_{k}": v for k, v in ipsi_params.items()} - params.update({f"contra_{k}": v for k, v in contra_params.items()}) - params.update(dist_params) + ipsi_params = self.ipsi.graph.get_params(as_flat=as_flat) + contra_params = self.contra.graph.get_params(as_flat=as_flat) + dist_params = self.diag_time_dists.get_params(as_flat=as_flat) + + params = { + "ipsi": ipsi_params, + "contra": contra_params, + **dist_params, + } - if param is not None: - return params[param] + if as_flat or not as_dict: + params = flatten(params) return params if as_dict else params.values() @@ -358,9 +180,7 @@ def assign_params( This works almost exactly as the unilateral model's :py:meth:`~lymph.models.Unilateral.assign_params` method. However, this one allows the user to set the parameters of individual sides of the neck by - prefixing the keyword arguments' names with ``"ipsi_"`` or ``"contra_"``. This - is necessary for parameters that are not symmetric between the two sides of the - neck. + prefixing the keyword arguments' names with ``"ipsi_"`` or ``"contra_"``. Anything not prefixed by ``"ipsi_"`` or ``"contra_"`` is passed to both sides of the neck. @@ -395,34 +215,6 @@ def assign_params( return remaining_args, {"ipsi": rem_ipsi_kwargs, "contra": rem_contra_kwargs} - @property - def modalities(self) -> modalities.ModalitiesUserDict: - """Return the set diagnostic modalities of the model. - - See Also: - :py:attr:`lymph.models.Unilateral.modalities` - The corresponding unilateral attribute. - :py:class:`~lymph.modalities.ModalitiesUserDict` - The implementation of the descriptor class. - """ - if not self.is_symmetric["modalities"]: - raise AttributeError( - "The modalities are not symmetric. Please access them via the " - "`ipsi` or `contra` attributes." - ) - return self.ipsi.modalities - - @modalities.setter - def modalities(self, new_modalities) -> None: - """Set the diagnostic modalities of the model.""" - if not self.is_symmetric["modalities"]: - raise AttributeError( - "The modalities are not symmetric. Please set them via the " - "`ipsi` or `contra` attributes." - ) - self.ipsi.modalities = new_modalities - - def load_patient_data( self, patient_data: pd.DataFrame, diff --git a/lymph/models/midline.py b/lymph/models/midline.py index 28c8896..f4a5758 100644 --- a/lymph/models/midline.py +++ b/lymph/models/midline.py @@ -11,7 +11,7 @@ from lymph import graph, modalities, models from lymph.helper import ( AbstractLookupDict, - DelegatorMixin, + DelegationSyncMixin, DiagnoseType, PatternType, early_late_mapping, @@ -89,7 +89,7 @@ def sync(): this.trigger_callbacks.append(sync) -class Midline(DelegatorMixin): +class Midline(DelegationSyncMixin): """Models metastatic progression bilaterally with tumor lateralization. Model a bilateral lymphatic system where an additional risk factor can diff --git a/lymph/models/unilateral.py b/lymph/models/unilateral.py index 5c2089f..8fb6f0e 100644 --- a/lymph/models/unilateral.py +++ b/lymph/models/unilateral.py @@ -4,25 +4,26 @@ import warnings from functools import cached_property from itertools import product -from typing import Any, Callable, Generator, Iterable, Iterator +from typing import Any, Callable, Generator, Iterable, Iterator, Literal import numpy as np import pandas as pd from lymph import diagnose_times, graph, matrix, modalities from lymph.helper import ( - DelegatorMixin, + DelegationSyncMixin, DiagnoseType, PatternType, dict_to_func, early_late_mapping, + flatten, smart_updating_dict_cached_property, ) warnings.filterwarnings("ignore", category=pd.errors.PerformanceWarning) -class Unilateral(DelegatorMixin): +class Unilateral(DelegationSyncMixin): """Class that models metastatic progression in a unilateral lymphatic system. It does this by representing it as a directed graph (DAG), which is stored in and @@ -101,12 +102,12 @@ def __init__( self._max_time = max_time - self.init_delegation( - graph=[ - "is_binary", "is_trinary", - "get_state", "set_state", "state_list", - "lnls", - ], + self._init_delegation_sync( + is_binary=[self.graph], + is_trinary=[self.graph], + get_state=[self.graph], + set_state=[self.graph], + state_list=[self.graph], ) @@ -153,46 +154,55 @@ def print_info(self): def get_params( self, - param: str | None = None, - as_dict: bool = False, - with_edges: bool = True, - with_dists: bool = True, + as_dict: bool = True, + as_flat: bool = True, ) -> float | Iterable[float] | dict[str, float]: """Get the parameters of the model. - If ``as_dict`` is ``True``, return a dictionary with the parameters as values. - Otherwise, return the value of the parameter ``param``. + If ``as_dict`` is ``True``, the parameters are returned as a dictionary. If + ``as_flat`` is ``True``, the dictionary is flattened, i.e., all nested + dictionaries are merged into one, using :py:func:`~lymph.helper.flatten`. + """ + params = self.graph.get_params(as_flat=as_flat) + params.update(self.diag_time_dists.get_params(as_flat=as_flat)) - Using the keyword arguments ``with_edges`` and ``with_dists``, one can control - whether the parameters of the edges and the distributions over diagnose times - should be included in the returned parameters. By default, both are included. + if as_flat or not as_dict: + params = flatten(params) - See Also: - :py:meth:`lymph.diagnose_times.Distribution.get_params` - :py:meth:`lymph.diagnose_times.DistributionsUserDict.get_params` - :py:meth:`lymph.graph.Edge.get_params` - :py:meth:`lymph.models.Bilateral.get_params` + return params if as_dict else params.values() + + + def assign_edge_params( + self, + *args: float, + kind: Literal["growth", "tumor", "lnl"] | None = None, + **kwargs: float, + ) -> tuple[Iterator[float], dict[str, float]]: + """Assign the spread probabilities of the tumor edges. + + If the params are provided via positional arguments, they are used in the order + of the edges as they are stored in the graph. Keyword arguments override the + positional arguments. + + Via the ``kind`` parameter, one can specify whether the spread probabilities + should be set for the tumor edges (``kind="tumor"``), the LNL edges + (``kind="lnl"``), the growth edges (``kind="growth"``), or all (``kind=None``). """ - iterator = [] - params = {} + args = iter(args) + kind = "" if kind is None else f"{kind}_" + edges = getattr(self.graph, f"{kind}edges") - if with_edges: - iterator = itertools.chain(iterator, self.graph.edges.items()) + for (edge_name, edge), param_arg in zip(edges.items(), args): + edge.set_spread_prob(param_arg) + if (param_name := f"{edge_name}_spread") in kwargs: + edge.set_spread_prob(kwargs.pop(param_name)) + elif (param_name := f"{edge_name}_growth") in kwargs: + edge.set_spread_prob(kwargs.pop(param_name)) - if with_dists: - iterator = itertools.chain(iterator, self.diag_time_dists.items()) + return args, kwargs - for edge_name_or_tstage, edge_or_dist in iterator: - with warnings.catch_warnings(): - warnings.simplefilter("ignore", category=UserWarning) - edge_or_dist_params = edge_or_dist.get_params(as_dict=True) - for name, value in edge_or_dist_params.items(): - params[f"{edge_name_or_tstage}_{name}"] = value - if param is not None: - return params[param] - return params if as_dict else params.values() def _assign_via_args(self, new_params_args: Iterator[float]) -> Iterator[float]: @@ -241,8 +251,8 @@ def _assign_via_kwargs( edge_name_or_tstage, type_ = key.rsplit("_", maxsplit=1) except ValueError as val_err: raise KeyError( - "Keyword arguments must be of the form '_' " - "or '_' for the distributions over diagnose " + "Keyword arguments must be of the form `_` " + "or `_` for the distributions over diagnose " "times." ) from val_err if edge_name_or_tstage in edges_and_dists: diff --git a/tests/trinary_unilateral_test.py b/tests/trinary_unilateral_test.py index 5f0257c..2e6aa61 100644 --- a/tests/trinary_unilateral_test.py +++ b/tests/trinary_unilateral_test.py @@ -1,11 +1,11 @@ """Test the trinary unilateral system.""" import unittest +import fixtures import numpy as np import pandas as pd from lymph.graph import LymphNodeLevel -from tests import fixtures class TrinaryInitTestCase(fixtures.TrinaryFixtureMixin, unittest.TestCase): @@ -100,6 +100,29 @@ def test_diagnose_matrices_shape(self) -> None: self.assertEqual(diagnose_matrix.shape, (3 ** num_lnls, num_patients)) +class TrinaryParamAssignmentTestCase(fixtures.TrinaryFixtureMixin, unittest.TestCase): + """Test the assignment of parameters in a trinary model.""" + + def setUp(self): + """Load patient data.""" + super().setUp() + self.init_diag_time_dists(early="frozen", late="parametric") + self.model.assign_params(**self.create_random_params()) + + def test_edge_params(self): + """Test the assignment of edge parameters.""" + params_to_set = {} + for edge_name, edge in self.model.graph.edges.items(): + params = edge.get_params(as_dict=True) + for param in params: + params_to_set[f"{edge_name}_{param}"] = self.rng.random() + + self.model.assign_edge_params(**params_to_set) + retrieved_params = self.model.get_params(as_dict=True) + for param in params_to_set: + self.assertEqual(params_to_set[param], retrieved_params[param]) + + class TrinaryLikelihoodTestCase(fixtures.TrinaryFixtureMixin, unittest.TestCase): """Test the likelihood of a trinary model.""" From edd603c248deff079552e7f47bef4b3e11172e2a Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Fri, 16 Feb 2024 16:12:46 +0100 Subject: [PATCH 17/75] change!: make `set_params()` uniform and chainable The API of all `set_params()` methods is now nice and uniform, allowing arbitrary chaining of these methods. BREAKING CHANGE: The `assign_params()` method has been removed and some `set_params()` methods work differently than before. --- lymph/diagnose_times.py | 93 ++++++++++--------------- lymph/graph.py | 108 +++++++++++++++++++++++------- lymph/helper.py | 93 +++++++++++++++++++------ lymph/models/bilateral.py | 13 ++-- lymph/models/midline.py | 9 ++- lymph/models/unilateral.py | 106 ++++++++--------------------- lymph/types.py | 22 ++++++ tests/bayesian_unilateral_test.py | 2 +- tests/binary_bilateral_test.py | 12 ++-- tests/binary_unilateral_test.py | 12 ++-- tests/fixtures.py | 4 +- tests/trinary_unilateral_test.py | 6 +- 12 files changed, 266 insertions(+), 214 deletions(-) create mode 100644 lymph/types.py diff --git a/lymph/diagnose_times.py b/lymph/diagnose_times.py index 6c3b264..da78420 100644 --- a/lymph/diagnose_times.py +++ b/lymph/diagnose_times.py @@ -20,7 +20,8 @@ import numpy as np -from lymph.helper import AbstractLookupDict, flatten +from lymph.helper import AbstractLookupDict, flatten, popfirst, set_params_for +from lymph.types import SetParamsReturnType logger = logging.getLogger(__name__) @@ -156,9 +157,7 @@ def normalize(distribution: np.ndarray) -> np.ndarray: def distribution(self) -> np.ndarray: """Return the probability mass function of the distribution if it is frozen.""" if not hasattr(self, "_frozen") or self._frozen is None: - self._frozen = self.normalize( - self._func(self.support, **self._kwargs) - ) + self._frozen = self.normalize(self._func(self.support, **self._kwargs)) return self._frozen @@ -188,44 +187,35 @@ def get_params( return self._kwargs if as_dict else self._kwargs.values() - def set_params(self, **kwargs) -> None: + def set_params(self, *args: float, **kwargs: float) -> SetParamsReturnType: """Update distribution by setting its parameters and storing the frozen PMF. - To work during inference using e.g. MCMC sampling, it needs to throw a - ``ValueError`` if the parameters are invalid. To this end, it expects the - underlying function to raise a ``ValueError`` if one of the parameters is - invalid. If the parameters are valid, the frozen PMF is stored and can be - retrieved via the :py:meth:`distribution` property. + Parameters can be set via positional arguments or keyword arguments. Keyword + arguments override positional arguments. If the distribution is not updateable, + a warning is issued and all args and kwargs are returned. - Note: - Parameters whose values are ``None`` are ignored. + If any of the parameters is invalid, a ``ValueError`` is raised and the original + parameters are restored. - See Also: - :py:meth:`lymph.diagnose_times.DistributionsUserDict.set_params` - :py:meth:`lymph.graph.Edge.set_params` + Unused args and kwargs are returned as well. """ - params_to_set = {} - for name, value in kwargs.items(): - if name not in self._kwargs or value is None: - continue - params_to_set[name] = value - - if self.is_updateable: - new_kwargs = self._kwargs.copy() - new_kwargs.update(params_to_set) - - try: - self._frozen = self.normalize( - self._func(self.support, **new_kwargs) - ) - except ValueError as val_err: - raise ValueError( - "Invalid parameter(s) provided to distribution over diagnose times" - ) from val_err - - self._kwargs = new_kwargs - else: - warnings.warn("Distribution is not updateable, skipping...") + if not self.is_updateable: + warnings.warn("Distribution is not updateable, ignoring parameters") + return args, kwargs + + old_kwargs = self._kwargs.copy() + + for name, value in self._kwargs.items(): + first, args = popfirst(args) + self._kwargs[name] = first or kwargs.pop(name, value) + + try: + _ = self.distribution + except ValueError as val_err: + self._kwargs = old_kwargs + raise ValueError("Invalid params provided to distribution") from val_err + + return args, kwargs def draw_diag_times( @@ -304,30 +294,17 @@ def get_params( return params if as_dict else params.values() - def set_params(self, **kwargs) -> None: - """Update all parametrized distributions via keyword arguments. + def set_params(self, *args: float, **kwargs: float) -> SetParamsReturnType: + """Update all parametrized distributions. - The keys must be of the form ``{t_stage}_{param}``, where ``t_stage`` is the - T-stage and ``param`` is the name of the parameter to update. The values are - the new parameter values. + When the new parameters are provided as positional arguments, they are used up + in the order of the T-stages. - See Also: - :py:meth:`lymph.diagnose_times.Distribution.set_params` - :py:meth:`lymph.graph.Edge.set_params` + If the params are provided as keyword arguments, the keys must be of the form + ``{t_stage}_{param}``, where ``t_stage`` is the T-stage and ``param`` is the + name of the parameter to update. Keyword arguments override positional ones. """ - nested_params = { - t_stage: {} for t_stage, dist in self.items() - if dist.is_updateable - } - for key, value in kwargs.items(): - t_stage, param = key.split("_", maxsplit=1) - if t_stage not in nested_params: - logger.debug( - f"Skipping parameter {param} for T-stage {t_stage} " - "because it doesn't have a parametrized distribution" - ) - continue - nested_params[t_stage][param] = value + return set_params_for(self, *args, **kwargs) def draw( diff --git a/lymph/graph.py b/lymph/graph.py index 5f0d038..36772f0 100644 --- a/lymph/graph.py +++ b/lymph/graph.py @@ -18,7 +18,15 @@ import numpy as np -from lymph.helper import check_unique_names, comp_transition_tensor, flatten +from lymph.helper import ( + check_unique_names, + comp_transition_tensor, + flatten, + popfirst, + set_params_for, + trigger, +) +from lymph.types import SetParamsReturnType class AbstractNode: @@ -300,7 +308,7 @@ def name(self) -> str: if self.is_growth: return self.parent.name - return self.parent.name + '_to_' + self.child.name + return self.parent.name + 'to' + self.child.name @property @@ -317,13 +325,21 @@ def is_tumor_spread(self) -> bool: def get_micro_mod(self) -> float: """Return the spread probability.""" - if not hasattr(self, "_micro_mod") or self.child.is_binary: + if ( + not hasattr(self, "_micro_mod") + or isinstance(self.parent, Tumor) + or self.parent.is_binary + ): self._micro_mod = 1. return self._micro_mod - def set_micro_mod(self, new_micro_mod: float) -> None: + @trigger + def set_micro_mod(self, new_micro_mod: float | None) -> None: """Set the spread modifier for LNLs with microscopic involvement.""" - if self.child.is_binary: + if new_micro_mod is None: + return + + if isinstance(self.parent, Tumor) or self.parent.is_binary: warnings.warn("Microscopic spread modifier is not used for binary nodes!") if not 0. <= new_micro_mod <= 1.: @@ -344,10 +360,15 @@ def get_spread_prob(self) -> float: self._spread_prob = 0. return self._spread_prob - def set_spread_prob(self, new_spread_prob): + @trigger + def set_spread_prob(self, new_spread_prob: float | None) -> None: """Set the spread probability of the edge.""" + if new_spread_prob is None: + return + if not 0. <= new_spread_prob <= 1.: raise ValueError("Spread probability must be between 0 and 1!") + self._spread_prob = new_spread_prob spread_prob = property( @@ -381,25 +402,49 @@ def get_params( return params if as_dict else params.values() - def set_params( - self, - growth: float | None = None, - spread: float | None = None, - micro: float | None = None, - ) -> None: + def set_params(self, *args, **kwargs) -> SetParamsReturnType: """Set the values of the edge's parameters. - See Also: - :py:meth:`lymph.diagnose_times.Distribution.set_params` - :py:meth:`lymph.diagnose_times.DistributionsUserDict.set_params` + If provided as positional arguments, the edge connects to a trinary node, and + is not a growth node, the first argument is the spread probability and the + second argument is the microscopic spread modifier. Otherwise it only consumes + one argument, which is the growth or spread probability. + + Keyword arguments (i.e., ``"grwoth"``, ``"spread"``, and ``"micro"``) override + positional arguments. Unused args and kwargs are returned. + + Examples: + + >>> edge = Edge(Tumor("T"), LymphNodeLevel("II", allowed_states=[0, 1, 2])) + >>> _ = edge.set_params(0.1, 0.2) + >>> edge.spread_prob + 0.1 + >>> edge.micro_mod + 0.2 + >>> _ = edge.set_params(spread=0.3, micro=0.4) + >>> edge.spread_prob + 0.3 + >>> edge.micro_mod + 0.4 """ + first, args = popfirst(args) + value = first or self.get_spread_prob() + if self.is_growth: - return self.set_spread_prob(growth) if growth is not None else None + self.set_spread_prob(kwargs.pop("growth", value)) + else: + self.set_spread_prob(kwargs.pop("spread", value)) - if spread is not None: - self.set_spread_prob(spread) - if self.child.is_trinary and not self.is_tumor_spread and micro is not None: - self.set_micro_mod(micro) + if ( + not isinstance(self.parent, Tumor) + and self.parent.is_trinary + and not self.is_growth + ): + first, args = popfirst(args) + value = first or self.get_micro_mod() + self.set_micro_mod(kwargs.pop("micro", value)) + + return args, kwargs @property @@ -610,8 +655,8 @@ def parameter_hash(self) -> int: ... ): ... params_dict = one_edge.get_params(as_dict=True) ... params_to_set = {k: rng.uniform() for k in params_dict} - ... one_edge.set_params(**params_to_set) - ... another_edge.set_params(**params_to_set) + ... _ = one_edge.set_params(**params_to_set) + ... _ = another_edge.set_params(**params_to_set) >>> one_graph.parameter_hash() == another_graph.parameter_hash() True """ @@ -654,9 +699,9 @@ def get_mermaid(self) -> str: ... ('lnl', 'III'): [], ... } >>> graph = Representation(graph_dict) - >>> graph.edges["T_to_II"].spread_prob = 0.1 - >>> graph.edges["T_to_III"].spread_prob = 0.2 - >>> graph.edges["II_to_III"].spread_prob = 0.3 + >>> graph.edges["TtoII"].spread_prob = 0.1 + >>> graph.edges["TtoIII"].spread_prob = 0.2 + >>> graph.edges["IItoIII"].spread_prob = 0.3 >>> print(graph.get_mermaid()) # doctest: +NORMALIZE_WHITESPACE flowchart TD T-->|10%| II @@ -781,3 +826,16 @@ def get_params( params = flatten(params) return params if as_dict else params.values() + + + def set_params(self, *args, **kwargs) -> SetParamsReturnType: + """Set the parameters of the edges in the graph. + + The arguments are passed to the :py:meth:`~lymph.graph.Edge.set_params` method + of the edges. Global keyword arguments (e.g. ``"spread"``) are passed to each + edge's ``set_params`` method. Unused args and kwargs are returned. + + Specific keyword arguments take precedence over global ones which in turn take + precedence over positional arguments. + """ + return set_params_for(self.edges, *args, **kwargs) diff --git a/lymph/helper.py b/lymph/helper.py index 2fcca56..40de86e 100644 --- a/lymph/helper.py +++ b/lymph/helper.py @@ -9,18 +9,11 @@ import numpy as np from cachetools import LRUCache -from pandas._libs.missing import NAType -PatternType = dict[str, bool | NAType | None] -"""Type alias for an involvement pattern.""" - -DiagnoseType = dict[str, PatternType] -"""Type alias for a diagnose, which is a involvement pattern per diagnostic modality.""" +from lymph.types import HasSetParams, SetParamsReturnType logger = logging.getLogger(__name__) -BASIC_TYPES = (int, float, str, bool, bytes, type(None)) - class DelegationSyncMixin: """Mixin to delegate and synchronize an attribute of multiple instances. @@ -611,15 +604,21 @@ def callable_mapping(key): def popfirst(seq: Sequence[Any]) -> tuple[Any, Sequence[Any]]: """Return the first element of a sequence and the sequence without it. - Example: + If the sequence is empty, the first element will be ``None`` and the second just + the empty sequence. Example: >>> popfirst([1, 2, 3]) (1, [2, 3]) + >>> popfirst([]) + (None, []) """ - return seq[0], seq[1:] + try: + return seq[0], seq[1:] + except IndexError: + return None, seq -def flatten(mapping: dict) -> dict: +def flatten(mapping, parent_key='', sep='_') -> dict: """Flatten a nested dictionary. Example: @@ -627,14 +626,64 @@ def flatten(mapping: dict) -> dict: >>> flatten({"a": {"b": 1, "c": 2}, "d": 3}) {'a_b': 1, 'a_c': 2, 'd': 3} """ - def _flatten(mapping, parent_key='', sep='_'): - items = [] - for k, v in mapping.items(): - new_key = f"{parent_key}{sep}{k}" if parent_key else k - if isinstance(v, dict): - items.extend(_flatten(v, new_key, sep=sep).items()) - else: - items.append((new_key, v)) - return dict(items) - - return _flatten(mapping) + items = [] + for k, v in mapping.items(): + new_key = f"{parent_key}{sep}{k}" if parent_key else k + if isinstance(v, dict): + items.extend(flatten(v, new_key, sep=sep).items()) + else: + items.append((new_key, v)) + return dict(items) + + +def unflatten_and_split( + mapping: dict, + expected_keys: list[str], + sep: str = "_", +) -> tuple[dict, dict]: + """Unflatten the part of a dict containing ``expected_keys`` and return the rest. + + Example: + + >>> unflatten_and_split({'a_b': 1, 'a_c_x': 2, 'd_y': 3}, expected_keys=['a']) + ({'a': {'b': 1, 'c_x': 2}}, {'d_y': 3}) + """ + split_kwargs, global_kwargs = {}, {} + for key, value in mapping.items(): + left, _, right = key.partition(sep) + if left not in expected_keys: + global_kwargs[key] = value + continue + + tmp = split_kwargs + if left not in tmp: + tmp[left] = {} + + tmp = tmp[left] + tmp[right] = value + + return split_kwargs, global_kwargs + + +def set_params_for( + objects: dict[str, HasSetParams], + *args: float, + **kwargs: float, +) -> SetParamsReturnType: + """Pass arguments to each ``set_params()`` method of the ``objects``.""" + kwargs, global_kwargs = unflatten_and_split(kwargs, expected_keys=objects.keys()) + rem_global_keys = global_kwargs.copy().keys() + + for key, obj in objects.items(): + obj_kwargs = kwargs.get(key, global_kwargs.copy()) + args, obj_kwargs = obj.set_params(*args, **obj_kwargs) + + rem_global_keys &= obj_kwargs.keys() + for global_key in global_kwargs: + if global_key in obj_kwargs: + del obj_kwargs[global_key] + + kwargs[key] = obj_kwargs + + kwargs.update({key: global_kwargs[key] for key in rem_global_keys}) + return args, flatten(kwargs) diff --git a/lymph/models/bilateral.py b/lymph/models/bilateral.py index 594c502..d4d2b28 100644 --- a/lymph/models/bilateral.py +++ b/lymph/models/bilateral.py @@ -8,13 +8,8 @@ import pandas as pd from lymph import matrix, models -from lymph.helper import ( - DelegationSyncMixin, - DiagnoseType, - PatternType, - early_late_mapping, - flatten, -) +from lymph.helper import DelegationSyncMixin, early_late_mapping, flatten +from lymph.types import DiagnoseType, PatternType warnings.filterwarnings("ignore", category=pd.errors.PerformanceWarning) logger = logging.getLogger(__name__) @@ -206,10 +201,10 @@ def assign_params( else: general_kwargs[key] = value - remaining_args, rem_ipsi_kwargs = self.ipsi.assign_params( + remaining_args, rem_ipsi_kwargs = self.ipsi.set_params( *new_params_args, **ipsi_kwargs, **general_kwargs ) - remaining_args, rem_contra_kwargs = self.contra.assign_params( + remaining_args, rem_contra_kwargs = self.contra.set_params( *remaining_args, **contra_kwargs, **general_kwargs ) return remaining_args, {"ipsi": rem_ipsi_kwargs, "contra": rem_contra_kwargs} diff --git a/lymph/models/midline.py b/lymph/models/midline.py index f4a5758..b21ef7a 100644 --- a/lymph/models/midline.py +++ b/lymph/models/midline.py @@ -12,10 +12,9 @@ from lymph.helper import ( AbstractLookupDict, DelegationSyncMixin, - DiagnoseType, - PatternType, early_late_mapping, ) +from lymph.types import DiagnoseType, PatternType warnings.filterwarnings("ignore", category=pd.errors.PerformanceWarning) logger = logging.getLogger(__name__) @@ -304,13 +303,13 @@ def assign_params( else: general_kwargs[key] = value - remaining_args, remainings_kwargs = self.ext.ipsi.assign_params( + remaining_args, remainings_kwargs = self.ext.ipsi.set_params( *new_params_args, **ipsi_kwargs, **general_kwargs ) - remaining_args, remainings_kwargs = self.noext.contra.assign_params( + remaining_args, remainings_kwargs = self.noext.contra.set_params( *remaining_args, **noext_contra_kwargs, **remainings_kwargs, **general_kwargs ) - remaining_args, remainings_kwargs = self.ext.contra.assign_params( + remaining_args, remainings_kwargs = self.ext.contra.set_params( *remaining_args, **ext_contra_kwargs, **remainings_kwargs, **general_kwargs ) if self.central_enabled: diff --git a/lymph/models/unilateral.py b/lymph/models/unilateral.py index 8fb6f0e..cd22277 100644 --- a/lymph/models/unilateral.py +++ b/lymph/models/unilateral.py @@ -12,13 +12,12 @@ from lymph import diagnose_times, graph, matrix, modalities from lymph.helper import ( DelegationSyncMixin, - DiagnoseType, - PatternType, dict_to_func, early_late_mapping, flatten, smart_updating_dict_cached_property, ) +from lymph.types import DiagnoseType, PatternType, SetParamsReturnType warnings.filterwarnings("ignore", category=pd.errors.PerformanceWarning) @@ -263,57 +262,19 @@ def _assign_via_kwargs( return new_params_kwargs - def assign_params( - self, - *new_params_args, - **new_params_kwargs, - ) -> tuple[Iterator[float], dict[str, float]]: + def set_params(self, *args, **kwargs) -> SetParamsReturnType: """Assign new parameters to the model. - The parameters can either be provided with positional arguments or as - keyword arguments. The positional arguments must be in the following order: - - 1. All spread probs from tumor to the LNLs - 2. The parameters of arcs from LNL to LNL. For each arc, the parameters are set - in the following order: - - 1. The spread probability (or growth probability, if it's a growth edge) - 2. The microscopic involvement probability, if the model is trinary - - 3. The parameters for the marginalizing distributions over diagnose times. Note - that a distribution may take more than one parameter. So, if there are e.g. - two T-stages with distributions over diagnose times that take two parameters - each, this step requires and consumes four arguments. - - If the arguments are not used up, the remaining ones are given back as the first - element of the returned tuple. - - When providing keyword arguments, the order of the keyword arguments obviously - does not matter. If one wants to set the microscopic or growth parameters - globally for all LNLs, the keyword arguments ``micro`` and ``growth`` can - be used for that. - - As with the positional arguments, the dictionary of unused keyword arguments is - returned as the second element of the tuple. + The parameters can be provided either via positional arguments or via keyword + arguments. The positional arguments are used up one by one first by the + :py:meth:`lymph.graph.set_params` method and then by the + :py:meth:`lymph.diag_time_dists.set_params` method. - Note: - Providing positional arguments does not allow using the global - parameters ``micro`` and ``growth``. - - However, when assigning them via keyword arguments, the global parameters - are set first, while still allowing to override them for individual edges. - - Since the distributions over diagnose times may take more than one parameter, - they can be provided as keyword arguments by appending their name to the - corresponding T-stage, separated by an underscore. For example, a parameter - ``foo`` for the T-stage ``early`` is set via the keyword argument ``early_foo``. - - Note: - When using keyword arguments to set the parameters of the distributions - over diagnose times, it is not possible to just use the name of the - T-stage, even when the distribution only takes one parameter. - - The keyword arguments override the positional arguments, when both are provided. + The keyword arguments can be of the format ``"_"`` or + ``"_"`` for the distributions over diagnose times. If only + a ``""`` is provided, it is assumed to be a global parameter and is + sent to all edges or distributions. But the more specific keyword arguments + override the global ones, which in turn override the positional arguments. Example: @@ -327,35 +288,26 @@ def assign_params( ... is_micro_mod_shared=True, ... is_growth_shared=True, ... ) - >>> args, kwargs = model.assign_params( - ... 0.7, 0.5, 0.3, 0.2, 0.1, 0.4, 0.99, A_to_B_param="not_used" - ... ) - >>> next(args) - 0.99 - >>> kwargs - {'A_to_B_param': 'not_used'} + >>> model.set_params(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.99, AtoB_param="not_used") + ((0.99,), {'AtoB_param': 'not_used'}) >>> model.get_params(as_dict=True) # doctest: +NORMALIZE_WHITESPACE - {'T_to_II_spread': 0.7, - 'T_to_III_spread': 0.5, + {'TtoII_spread': 0.1, + 'TtoIII_spread': 0.2, 'II_growth': 0.3, - 'II_to_III_spread': 0.2, - 'II_to_III_micro': 0.1, - 'III_growth': 0.4} - >>> _ = model.assign_params(growth=0.123) + 'IItoIII_spread': 0.4, + 'IItoIII_micro': 0.5, + 'III_growth': 0.6} + >>> _ = model.set_params(growth=0.123) >>> model.get_params(as_dict=True) # doctest: +NORMALIZE_WHITESPACE - {'T_to_II_spread': 0.7, - 'T_to_III_spread': 0.5, + {'TtoII_spread': 0.1, + 'TtoIII_spread': 0.2, 'II_growth': 0.123, - 'II_to_III_spread': 0.2, - 'II_to_III_micro': 0.1, + 'IItoIII_spread': 0.4, + 'IItoIII_micro': 0.5, 'III_growth': 0.123} """ - with warnings.catch_warnings(): - warnings.simplefilter("ignore", category=UserWarning) - remaining_args = self._assign_via_args(iter(new_params_args)) - remainig_kwargs = self._assign_via_kwargs(new_params_kwargs) - - return remaining_args, remainig_kwargs + args, kwargs = self.graph.set_params(*args, **kwargs) + return self.diag_time_dists.set_params(*args, **kwargs) def comp_transition_prob( @@ -473,7 +425,7 @@ def transition_matrix(self) -> np.ndarray: ... ("lnl", "II"): ["III"], ... ("lnl", "III"): [], ... }) - >>> model.assign_params(0.7, 0.3, 0.2) # doctest: +ELLIPSIS + >>> model.set_params(0.7, 0.3, 0.2) # doctest: +ELLIPSIS (..., {}) >>> model.transition_matrix() array([[0.21, 0.09, 0.49, 0.21], @@ -795,7 +747,7 @@ def likelihood( The parameters of the model can be set via ``given_param_args`` and ``given_param_kwargs``. Both arguments are used to call the - :py:meth:`~assign_params` method. If the parameters are not provided, the + :py:meth:`Unilateral.set_params` method. If the parameters are not provided, the previously assigned parameters are used. Returns the log-likelihood if ``log`` is set to ``True``. The ``mode`` parameter @@ -811,7 +763,7 @@ def likelihood( try: # all functions and methods called here should raise a ValueError if the # given parameters are invalid... - self.assign_params(*given_param_args, **given_param_kwargs) + _ = self.set_params(*given_param_args, **given_param_kwargs) except ValueError: return -np.inf if log else 0. @@ -883,7 +835,7 @@ def comp_posterior_state_dist( # here if the parameters are invalid, since we want to know if the user # provided invalid parameters. In the likelihood, we rather return a zero # likelihood to tell the inference algorithm that the parameters are invalid. - self.assign_params(*given_param_args, **given_param_kwargs) + self.set_params(*given_param_args, **given_param_kwargs) if given_diagnoses is None: given_diagnoses = {} diff --git a/lymph/types.py b/lymph/types.py new file mode 100644 index 0000000..a32f7c9 --- /dev/null +++ b/lymph/types.py @@ -0,0 +1,22 @@ +""" +Type aliases and protocols used in the lymph package. +""" +from typing import Protocol + +from pandas._libs.missing import NAType + + +class HasSetParams(Protocol): + """Protocol for classes that have a ``set_params`` method.""" + def set_params(self, *args: float, **kwargs: float) -> None: + ... + + +PatternType = dict[str, bool | NAType | None] +"""Type alias for an involvement pattern.""" + +DiagnoseType = dict[str, PatternType] +"""Type alias for a diagnose, which is a involvement pattern per diagnostic modality.""" + +SetParamsReturnType = tuple[tuple[float], dict[str, float]] +"""Type returned by all ``set_params()`` methods.""" diff --git a/tests/bayesian_unilateral_test.py b/tests/bayesian_unilateral_test.py index 26c64a4..779275f 100644 --- a/tests/bayesian_unilateral_test.py +++ b/tests/bayesian_unilateral_test.py @@ -12,7 +12,7 @@ class BayesianUnilateralModelTestCase(fixtures.BinaryUnilateralModelMixin, unitt def setUp(self): super().setUp() - self.model.assign_params(**self.create_random_params()) + self.model.set_params(**self.create_random_params()) self.model.modalities = fixtures.MODALITIES self.load_patient_data(filename="2021-usz-oropharynx.csv") diff --git a/tests/binary_bilateral_test.py b/tests/binary_bilateral_test.py index 78de850..2c7377d 100644 --- a/tests/binary_bilateral_test.py +++ b/tests/binary_bilateral_test.py @@ -55,7 +55,7 @@ def test_transition_matrix_sync(self): rand_ipsi_param = self.rng.choice(list( self.model.ipsi.get_params(as_dict=True).keys() )) - self.model.assign_params(**{f"ipsi_{rand_ipsi_param}": self.rng.random()}) + self.model.set_params(**{f"ipsi_{rand_ipsi_param}": self.rng.random()}) self.assertFalse(np.all( ipsi_trans_mat == self.model.ipsi.transition_matrix() )) @@ -200,14 +200,14 @@ def test_get_params_as_dict(self): contra_dict = self.model.contra.get_params(as_dict=True) self.assertEqual(ipsi_dict.keys(), contra_dict.keys()) - def test_assign_params_as_args(self): + def test_set_params_as_args(self): """Test that the parameters can be assigned.""" ipsi_args = self.rng.uniform(size=len(self.model.ipsi.get_params())) contra_args = self.rng.uniform(size=len(self.model.contra.get_params())) none_args = [None] * len(ipsi_args) # Assigning only the ipsi side - self.model.assign_params(*ipsi_args, *none_args) + self.model.set_params(*ipsi_args, *none_args) self.assertTrue(np.allclose(ipsi_args, list(self.model.ipsi.get_params()))) self.assertEqual( list(self.model.ipsi.diag_time_dists["late"].get_params())[0], @@ -215,7 +215,7 @@ def test_assign_params_as_args(self): ) # Assigning only the contra side - self.model.assign_params(*none_args, *contra_args) + self.model.set_params(*none_args, *contra_args) self.assertTrue(np.allclose(contra_args, list(self.model.contra.get_params()))) self.assertEqual( list(self.model.ipsi.diag_time_dists["late"].get_params())[0], @@ -223,7 +223,7 @@ def test_assign_params_as_args(self): ) # Assigning both sides - self.model.assign_params(*ipsi_args, *contra_args) + self.model.set_params(*ipsi_args, *contra_args) self.assertTrue(np.allclose(ipsi_args[:-1], list(self.model.ipsi.get_params())[:-1])) self.assertTrue(np.allclose(contra_args, list(self.model.contra.get_params()))) self.assertEqual( @@ -308,7 +308,7 @@ def setUp(self): super().setUp() self.model.modalities = fixtures.MODALITIES self.init_diag_time_dists(early="frozen", late="parametric") - self.model.assign_params(**self.create_random_params()) + self.model.set_params(**self.create_random_params()) def test_generate_data(self): """Check bilateral data generation.""" diff --git a/tests/binary_unilateral_test.py b/tests/binary_unilateral_test.py index 299c565..a5de392 100644 --- a/tests/binary_unilateral_test.py +++ b/tests/binary_unilateral_test.py @@ -142,7 +142,7 @@ def test_params_assignment_via_lookup(self): def test_params_assignment_via_method(self): """Make sure the spread parameters are assigned correctly.""" params_to_set = self.create_random_params() - self.model.assign_params(**params_to_set) + self.model.set_params(**params_to_set) edges_and_dists = self.model.graph.edges.copy() edges_and_dists.update(self.model.diag_time_dists) @@ -170,7 +170,7 @@ class TransitionMatrixTestCase(fixtures.BinaryUnilateralModelMixin, unittest.Tes def setUp(self): """Initialize a simple binary model.""" super().setUp() - self.model.assign_params(**self.create_random_params()) + self.model.set_params(**self.create_random_params()) def test_shape(self): """Make sure the transition matrix has the correct shape.""" @@ -232,7 +232,7 @@ def setUp(self): super().setUp() self.model.modalities = fixtures.MODALITIES self.init_diag_time_dists(early="frozen", late="parametric", foo="frozen") - self.model.assign_params(**self.create_random_params()) + self.model.set_params(**self.create_random_params()) self.load_patient_data(filename="2021-usz-oropharynx.csv") def test_load_patient_data(self): @@ -311,7 +311,7 @@ def setUp(self): super().setUp() self.model.modalities = fixtures.MODALITIES self.init_diag_time_dists(early="frozen", late="parametric") - self.model.assign_params(**self.create_random_params()) + self.model.set_params(**self.create_random_params()) self.load_patient_data(filename="2021-usz-oropharynx.csv") def test_log_likelihood_smaller_zero(self): @@ -340,7 +340,7 @@ def setUp(self): super().setUp() self.model.modalities = fixtures.MODALITIES self.init_diag_time_dists(early="frozen", late="parametric") - self.model.assign_params(**self.create_random_params()) + self.model.set_params(**self.create_random_params()) def create_random_diagnoses(self): """Create a random diagnosis for each modality and LNL.""" @@ -399,7 +399,7 @@ def setUp(self): super().setUp() self.model.modalities = fixtures.MODALITIES self.init_diag_time_dists(early="frozen", late="parametric") - self.model.assign_params(**self.create_random_params()) + self.model.set_params(**self.create_random_params()) def test_generate_early_patients(self): """Check that generating only early T-stage patients works.""" diff --git a/tests/fixtures.py b/tests/fixtures.py index 51ff3fa..f4662f9 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -12,9 +12,9 @@ import lymph from lymph import diagnose_times -from lymph.helper import PatternType from lymph.modalities import Clinical, Modality, Pathological from lymph.models import Unilateral +from lymph.types import PatternType MODALITIES = { "CT": Clinical(specificity=0.81, sensitivity=0.86), @@ -171,7 +171,7 @@ def setUp(self): self.graph_dict = get_graph("large") self.model = lymph.models.Bilateral(graph_dict=self.graph_dict, **self.model_kwargs) self.init_diag_time_dists(early="frozen", late="parametric") - self.model.assign_params(**self.create_random_params()) + self.model.set_params(**self.create_random_params()) self.logger = get_logger(level=logging.INFO) diff --git a/tests/trinary_unilateral_test.py b/tests/trinary_unilateral_test.py index 2e6aa61..0bda40b 100644 --- a/tests/trinary_unilateral_test.py +++ b/tests/trinary_unilateral_test.py @@ -32,7 +32,7 @@ class TrinaryTransitionMatrixTestCase(fixtures.TrinaryFixtureMixin, unittest.Tes def setUp(self): super().setUp() params_to_set = self.create_random_params() - self.model.assign_params(**params_to_set) + self.model.set_params(**params_to_set) def test_edge_transition_tensors(self) -> None: """Test the tensors associated with each edge. @@ -107,7 +107,7 @@ def setUp(self): """Load patient data.""" super().setUp() self.init_diag_time_dists(early="frozen", late="parametric") - self.model.assign_params(**self.create_random_params()) + self.model.set_params(**self.create_random_params()) def test_edge_params(self): """Test the assignment of edge parameters.""" @@ -131,7 +131,7 @@ def setUp(self): super().setUp() self.model.modalities = fixtures.MODALITIES self.init_diag_time_dists(early="frozen", late="parametric") - self.model.assign_params(**self.create_random_params()) + self.model.set_params(**self.create_random_params()) self.load_patient_data(filename="2021-usz-oropharynx.csv") def test_log_likelihood_smaller_zero(self): From ef33d8b69e52021ce4413f7f8338ea29bf03d9ae Mon Sep 17 00:00:00 2001 From: rmnldwg <48687784+rmnldwg@users.noreply.github.com> Date: Mon, 19 Feb 2024 11:41:28 +0100 Subject: [PATCH 18/75] change!: make `set_params()` not return kwargs It does make sense to "use up" the positional arguments one by one in the `set_params()` methods, but doing the same thing with keyword arguments is pointless, difficult and error prone. BREAKING CHANGE: `set_params()` only returns one tuple of floats. --- lymph/diagnose_times.py | 11 ++-- lymph/graph.py | 13 +++-- lymph/helper.py | 49 ++++++++++++----- lymph/models/bilateral.py | 70 +++++++++++++------------ lymph/models/midline.py | 8 +-- lymph/models/unilateral.py | 104 +++---------------------------------- lymph/types.py | 15 ++++-- 7 files changed, 105 insertions(+), 165 deletions(-) diff --git a/lymph/diagnose_times.py b/lymph/diagnose_times.py index da78420..7dd1abb 100644 --- a/lymph/diagnose_times.py +++ b/lymph/diagnose_times.py @@ -21,7 +21,6 @@ import numpy as np from lymph.helper import AbstractLookupDict, flatten, popfirst, set_params_for -from lymph.types import SetParamsReturnType logger = logging.getLogger(__name__) @@ -187,7 +186,7 @@ def get_params( return self._kwargs if as_dict else self._kwargs.values() - def set_params(self, *args: float, **kwargs: float) -> SetParamsReturnType: + def set_params(self, *args: float, **kwargs: float) -> tuple[float]: """Update distribution by setting its parameters and storing the frozen PMF. Parameters can be set via positional arguments or keyword arguments. Keyword @@ -201,13 +200,13 @@ def set_params(self, *args: float, **kwargs: float) -> SetParamsReturnType: """ if not self.is_updateable: warnings.warn("Distribution is not updateable, ignoring parameters") - return args, kwargs + return args old_kwargs = self._kwargs.copy() for name, value in self._kwargs.items(): first, args = popfirst(args) - self._kwargs[name] = first or kwargs.pop(name, value) + self._kwargs[name] = first or kwargs.get(name, value) try: _ = self.distribution @@ -215,7 +214,7 @@ def set_params(self, *args: float, **kwargs: float) -> SetParamsReturnType: self._kwargs = old_kwargs raise ValueError("Invalid params provided to distribution") from val_err - return args, kwargs + return args def draw_diag_times( @@ -294,7 +293,7 @@ def get_params( return params if as_dict else params.values() - def set_params(self, *args: float, **kwargs: float) -> SetParamsReturnType: + def set_params(self, *args: float, **kwargs: float) -> tuple[float]: """Update all parametrized distributions. When the new parameters are provided as positional arguments, they are used up diff --git a/lymph/graph.py b/lymph/graph.py index 36772f0..b224057 100644 --- a/lymph/graph.py +++ b/lymph/graph.py @@ -26,7 +26,6 @@ set_params_for, trigger, ) -from lymph.types import SetParamsReturnType class AbstractNode: @@ -402,7 +401,7 @@ def get_params( return params if as_dict else params.values() - def set_params(self, *args, **kwargs) -> SetParamsReturnType: + def set_params(self, *args, **kwargs) -> tuple[float]: """Set the values of the edge's parameters. If provided as positional arguments, the edge connects to a trinary node, and @@ -431,9 +430,9 @@ def set_params(self, *args, **kwargs) -> SetParamsReturnType: value = first or self.get_spread_prob() if self.is_growth: - self.set_spread_prob(kwargs.pop("growth", value)) + self.set_spread_prob(kwargs.get("growth", value)) else: - self.set_spread_prob(kwargs.pop("spread", value)) + self.set_spread_prob(kwargs.get("spread", value)) if ( not isinstance(self.parent, Tumor) @@ -442,9 +441,9 @@ def set_params(self, *args, **kwargs) -> SetParamsReturnType: ): first, args = popfirst(args) value = first or self.get_micro_mod() - self.set_micro_mod(kwargs.pop("micro", value)) + self.set_micro_mod(kwargs.get("micro", value)) - return args, kwargs + return args @property @@ -828,7 +827,7 @@ def get_params( return params if as_dict else params.values() - def set_params(self, *args, **kwargs) -> SetParamsReturnType: + def set_params(self, *args, **kwargs) -> tuple[float]: """Set the parameters of the edges in the graph. The arguments are passed to the :py:meth:`~lymph.graph.Edge.set_params` method diff --git a/lymph/helper.py b/lymph/helper.py index 40de86e..3d8c8cd 100644 --- a/lymph/helper.py +++ b/lymph/helper.py @@ -10,7 +10,7 @@ import numpy as np from cachetools import LRUCache -from lymph.types import HasSetParams, SetParamsReturnType +from lymph.types import HasGetParams, HasSetParams logger = logging.getLogger(__name__) @@ -669,21 +669,46 @@ def set_params_for( objects: dict[str, HasSetParams], *args: float, **kwargs: float, -) -> SetParamsReturnType: +) -> tuple[float]: """Pass arguments to each ``set_params()`` method of the ``objects``.""" kwargs, global_kwargs = unflatten_and_split(kwargs, expected_keys=objects.keys()) - rem_global_keys = global_kwargs.copy().keys() for key, obj in objects.items(): - obj_kwargs = kwargs.get(key, global_kwargs.copy()) - args, obj_kwargs = obj.set_params(*args, **obj_kwargs) + obj_kwargs = global_kwargs.copy() + obj_kwargs.update(kwargs.get(key, {})) + args = obj.set_params(*args, **obj_kwargs) - rem_global_keys &= obj_kwargs.keys() - for global_key in global_kwargs: - if global_key in obj_kwargs: - del obj_kwargs[global_key] + return args - kwargs[key] = obj_kwargs - kwargs.update({key: global_kwargs[key] for key in rem_global_keys}) - return args, flatten(kwargs) +def synchronize_params( + get_from: dict[str, HasGetParams], + set_to: dict[str, HasSetParams], +) -> None: + """Get the parameters from one object and set them to another.""" + for key, obj in set_to.items(): + obj.set_params(**get_from[key].get_params(as_dict=True)) + + +def set_bilateral_params_for( + ipsi_objects: dict[str, HasSetParams], + contra_objects: dict[str, HasSetParams], + *args: float, + is_symmetric: bool = False, + **kwargs: float, +) -> tuple[float]: + """Pass arguments to each ``set_params()`` method of the ``objects``.""" + kwargs, global_kwargs = unflatten_and_split(kwargs, expected_keys=["ipsi", "contra"]) + + ipsi_kwargs = global_kwargs.copy() + ipsi_kwargs.update(kwargs.get("ipsi", {})) + args = set_params_for(ipsi_objects, *args, **ipsi_kwargs) + + if is_symmetric: + synchronize_params(ipsi_objects, contra_objects) + else: + contra_kwargs = global_kwargs.copy() + contra_kwargs.update(kwargs.get("contra", {})) + args = set_params_for(contra_objects, *args, **contra_kwargs) + + return args diff --git a/lymph/models/bilateral.py b/lymph/models/bilateral.py index d4d2b28..d599b8c 100644 --- a/lymph/models/bilateral.py +++ b/lymph/models/bilateral.py @@ -2,13 +2,18 @@ import logging import warnings -from typing import Any, Iterable, Iterator +from typing import Any, Iterable import numpy as np import pandas as pd from lymph import matrix, models -from lymph.helper import DelegationSyncMixin, early_late_mapping, flatten +from lymph.helper import ( + DelegationSyncMixin, + early_late_mapping, + flatten, + set_bilateral_params_for, +) from lymph.types import DiagnoseType, PatternType warnings.filterwarnings("ignore", category=pd.errors.PerformanceWarning) @@ -33,7 +38,7 @@ class Bilateral(DelegationSyncMixin): def __init__( self, graph_dict: dict[tuple[str], list[str]], - modalities_symmetric: bool = True, + is_symmetric: dict[str, bool] | None = None, unilateral_kwargs: dict[str, Any] | None = None, ipsilateral_kwargs: dict[str, Any] | None = None, contralateral_kwargs: dict[str, Any] | None = None, @@ -46,11 +51,17 @@ def __init__( which in turn pass it to the :py:class:`~lymph.graph.Representation` class that stores the graph. - With the boolean ``modalities_symmetric`` the user can specify whether the - diagnostic modalities of the ``ipsi`` and ``contra`` side are symmetric. If - they are, instances of this class will have a ``modalities`` attribute that - will synchronize the diagnostic modalities of the two sides of the neck when - setting it or its keys. + With the dictionary ``is_symmetric`` the user can specify which aspects of the + model are symmetric. Valid keys are ``"modalities"``, ``"tumor_spread"``, + and ``"lnl_spread"``. The values are booleans, with ``True`` meaning that the + aspect is symmetric. + + Note: + The symmetries of tumor and LNL spread are only guaranteed if the + respective parameters are set via the :py:meth:`~set_params()` method of + this bilateral model. It is still possible to set different parameters for + the ipsi- and contralateral side by using their respective + :py:meth:`~lymph.models.Unilateral.set_params()` method. The ``unilateral_kwargs`` are passed to both instances of the unilateral model, while the ``ipsilateral_kwargs`` and ``contralateral_kwargs`` are passed to the @@ -67,7 +78,7 @@ def __init__( contralateral_kwargs=contralateral_kwargs, ) - if modalities_symmetric: + if is_symmetric["modalities"]: delegation_sync_kwargs = {"modalities": [self.ipsi, self.contra]} else: delegation_sync_kwargs = {} @@ -139,10 +150,6 @@ def get_params( ``param`` is not ``None``, only the value of the parameter with that name is returned. Otherwise, all parameters are returned as a dictionary or a list. - Note: - The arguments ``as_dict`` and ``nested`` are ignored if ``param`` is not - ``None``. Also, ``nested`` is ignored if ``as_dict`` is ``False``. - See Also: :py:meth:`lymph.diagnose_times.Distribution.get_params` :py:meth:`lymph.diagnose_times.DistributionsUserDict.get_params` @@ -165,11 +172,7 @@ def get_params( return params if as_dict else params.values() - def assign_params( - self, - *new_params_args, - **new_params_kwargs, - ) -> tuple[Iterator[float, dict[str, dict[str, float]]]]: + def set_params(self, *args: float, **kwargs: float) -> tuple[float]: """Assign new parameters to the model. This works almost exactly as the unilateral model's @@ -192,22 +195,21 @@ def assign_params( Similar to the unilateral method, this returns a tuple of the remaining args and a dictionary with the remaining `"ipsi"` and `"contra"` kwargs. """ - ipsi_kwargs, contra_kwargs, general_kwargs = {}, {}, {} - for key, value in new_params_kwargs.items(): - if "ipsi_" in key: - ipsi_kwargs[key.replace("ipsi_", "")] = value - elif "contra_" in key: - contra_kwargs[key.replace("contra_", "")] = value - else: - general_kwargs[key] = value - - remaining_args, rem_ipsi_kwargs = self.ipsi.set_params( - *new_params_args, **ipsi_kwargs, **general_kwargs + args = set_bilateral_params_for( + ipsi_objects=self.ipsi.graph.tumor_edges, + contra_objects=self.contra.graph.tumor_edges, + *args, + is_symmetric=self.is_symmetric["tumor_spread"], + **kwargs, ) - remaining_args, rem_contra_kwargs = self.contra.set_params( - *remaining_args, **contra_kwargs, **general_kwargs + args = set_bilateral_params_for( + ipsi_objects=self.ipsi.graph.lnl_edges, + contra_objects=self.contra.graph.lnl_edges, + *args, + is_symmetric=self.is_symmetric["lnl_spread"], + **kwargs, ) - return remaining_args, {"ipsi": rem_ipsi_kwargs, "contra": rem_contra_kwargs} + return self.diag_time_dists.set_params(*args, **kwargs) def load_patient_data( @@ -380,7 +382,7 @@ def likelihood( try: # all functions and methods called here should raise a ValueError if the # given parameters are invalid... - self.assign_params(*given_param_args, **given_param_kwargs) + self.set_params(*given_param_args, **given_param_kwargs) except ValueError: return -np.inf if log else 0. @@ -422,7 +424,7 @@ def comp_posterior_joint_state_dist( if given_param_kwargs is None: given_param_kwargs = {} - self.assign_params(*given_param_args, **given_param_kwargs) + self.set_params(*given_param_args, **given_param_kwargs) if given_diagnoses is None: given_diagnoses = {} diff --git a/lymph/models/midline.py b/lymph/models/midline.py index b21ef7a..6a7e7c6 100644 --- a/lymph/models/midline.py +++ b/lymph/models/midline.py @@ -272,19 +272,19 @@ def assign_params( self.alpha_mix = value else: no_extension_kwargs[key] = value - remaining_args, remainings_kwargs = self.noext.assign_params(*new_params_args, **no_extension_kwargs) + remaining_args, remainings_kwargs = self.noext.set_params(*new_params_args, **no_extension_kwargs) for key in no_extension_kwargs.keys(): if 'contra_primary' in key: extension_kwargs[key] = self.alpha_mix * extension_kwargs[(key.replace("contra", "ipsi"))] + (1. - self.alpha_mix) * no_extension_kwargs[key] else: extension_kwargs[key] = no_extension_kwargs[key] - remaining_args, remainings_kwargs = self.ext.assign_params(*remaining_args, **extension_kwargs) + remaining_args, remainings_kwargs = self.ext.set_params(*remaining_args, **extension_kwargs) # If the syncing of the edges works properly, this below can be deleted. if self.central_enabled: for key in no_extension_kwargs.keys(): if 'contra' not in key: central_kwargs[(key.replace("ipsi_", ""))] = no_extension_kwargs[key] - remaining_args, remainings_kwargs = self.central.assign_params(*new_params_args, **central_kwargs) + remaining_args, remainings_kwargs = self.central.set_params(*new_params_args, **central_kwargs) else: ipsi_kwargs, noext_contra_kwargs, ext_contra_kwargs, general_kwargs, central_kwargs = {}, {}, {}, {}, {} @@ -317,7 +317,7 @@ def assign_params( central_kwargs[(key.replace("ipsi_", ""))] = ipsi_kwargs[key] print(ipsi_kwargs) print(general_kwargs) - remaining_args, remainings_kwargs = self.central.assign_params(*new_params_args, **central_kwargs, **general_kwargs) + remaining_args, remainings_kwargs = self.central.set_params(*new_params_args, **central_kwargs, **general_kwargs) return remaining_args, remainings_kwargs diff --git a/lymph/models/unilateral.py b/lymph/models/unilateral.py index cd22277..f83bcd0 100644 --- a/lymph/models/unilateral.py +++ b/lymph/models/unilateral.py @@ -1,10 +1,9 @@ from __future__ import annotations -import itertools import warnings from functools import cached_property from itertools import product -from typing import Any, Callable, Generator, Iterable, Iterator, Literal +from typing import Any, Callable, Generator, Iterable import numpy as np import pandas as pd @@ -17,7 +16,7 @@ flatten, smart_updating_dict_cached_property, ) -from lymph.types import DiagnoseType, PatternType, SetParamsReturnType +from lymph.types import DiagnoseType, PatternType warnings.filterwarnings("ignore", category=pd.errors.PerformanceWarning) @@ -171,98 +170,7 @@ def get_params( return params if as_dict else params.values() - def assign_edge_params( - self, - *args: float, - kind: Literal["growth", "tumor", "lnl"] | None = None, - **kwargs: float, - ) -> tuple[Iterator[float], dict[str, float]]: - """Assign the spread probabilities of the tumor edges. - - If the params are provided via positional arguments, they are used in the order - of the edges as they are stored in the graph. Keyword arguments override the - positional arguments. - - Via the ``kind`` parameter, one can specify whether the spread probabilities - should be set for the tumor edges (``kind="tumor"``), the LNL edges - (``kind="lnl"``), the growth edges (``kind="growth"``), or all (``kind=None``). - """ - args = iter(args) - kind = "" if kind is None else f"{kind}_" - edges = getattr(self.graph, f"{kind}edges") - - for (edge_name, edge), param_arg in zip(edges.items(), args): - edge.set_spread_prob(param_arg) - if (param_name := f"{edge_name}_spread") in kwargs: - edge.set_spread_prob(kwargs.pop(param_name)) - elif (param_name := f"{edge_name}_growth") in kwargs: - edge.set_spread_prob(kwargs.pop(param_name)) - - return args, kwargs - - - - - - def _assign_via_args(self, new_params_args: Iterator[float]) -> Iterator[float]: - """Assign parameters to egdes and to distributions via positional arguments.""" - for edge_or_dist in itertools.chain( - self.graph.edges.values(), - self.diag_time_dists.values(), - ): - with warnings.catch_warnings(): - warnings.simplefilter("ignore", category=UserWarning) - params = edge_or_dist.get_params(as_dict=True) - - new_params = {} - for name in params: - try: - new_params[name] = next(new_params_args) - except StopIteration: - return new_params_args - finally: - edge_or_dist.set_params(**new_params) - - return new_params_args - - - def _assign_via_kwargs( - self, - new_params_kwargs: dict[str, float], - ) -> dict[str, float]: - """Assign parameters to egdes and to distributions via keyword arguments.""" - global_growth_param = new_params_kwargs.pop("growth", None) - global_micro_mod = new_params_kwargs.pop("micro", None) - - if global_growth_param is not None: - for growth_edge in self.graph.growth_edges.values(): - growth_edge.set_spread_prob(global_growth_param) - - if global_micro_mod is not None: - for lnl_edge in self.graph.lnl_edges.values(): - lnl_edge.set_micro_mod(global_micro_mod) - - edges_and_dists = self.graph.edges.copy() - edges_and_dists.update(self.diag_time_dists) - new_params_keys = list(new_params_kwargs.keys()) - for key in new_params_keys: - try: - edge_name_or_tstage, type_ = key.rsplit("_", maxsplit=1) - except ValueError as val_err: - raise KeyError( - "Keyword arguments must be of the form `_` " - "or `_` for the distributions over diagnose " - "times." - ) from val_err - if edge_name_or_tstage in edges_and_dists: - value = new_params_kwargs.pop(key) - edge_or_dist = edges_and_dists[edge_name_or_tstage] - edge_or_dist.set_params(**{type_: value}) - - return new_params_kwargs - - - def set_params(self, *args, **kwargs) -> SetParamsReturnType: + def set_params(self, *args: float, **kwargs: float) -> tuple[float]: """Assign new parameters to the model. The parameters can be provided either via positional arguments or via keyword @@ -289,7 +197,7 @@ def set_params(self, *args, **kwargs) -> SetParamsReturnType: ... is_growth_shared=True, ... ) >>> model.set_params(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.99, AtoB_param="not_used") - ((0.99,), {'AtoB_param': 'not_used'}) + (0.99,) >>> model.get_params(as_dict=True) # doctest: +NORMALIZE_WHITESPACE {'TtoII_spread': 0.1, 'TtoIII_spread': 0.2, @@ -306,7 +214,7 @@ def set_params(self, *args, **kwargs) -> SetParamsReturnType: 'IItoIII_micro': 0.5, 'III_growth': 0.123} """ - args, kwargs = self.graph.set_params(*args, **kwargs) + args = self.graph.set_params(*args, **kwargs) return self.diag_time_dists.set_params(*args, **kwargs) @@ -426,7 +334,7 @@ def transition_matrix(self) -> np.ndarray: ... ("lnl", "III"): [], ... }) >>> model.set_params(0.7, 0.3, 0.2) # doctest: +ELLIPSIS - (..., {}) + () >>> model.transition_matrix() array([[0.21, 0.09, 0.49, 0.21], [0. , 0.3 , 0. , 0.7 ], diff --git a/lymph/types.py b/lymph/types.py index a32f7c9..bc43910 100644 --- a/lymph/types.py +++ b/lymph/types.py @@ -8,7 +8,17 @@ class HasSetParams(Protocol): """Protocol for classes that have a ``set_params`` method.""" - def set_params(self, *args: float, **kwargs: float) -> None: + def set_params(self, *args: float, **kwargs: float) -> tuple[float]: + ... + + +class HasGetParams(Protocol): + """Protocol for classes that have a ``get_params`` method.""" + def get_params( + self, + as_dict: bool = True, + as_flat: bool = True, + ) -> tuple[float] | dict[str, float]: ... @@ -17,6 +27,3 @@ def set_params(self, *args: float, **kwargs: float) -> None: DiagnoseType = dict[str, PatternType] """Type alias for a diagnose, which is a involvement pattern per diagnostic modality.""" - -SetParamsReturnType = tuple[tuple[float], dict[str, float]] -"""Type returned by all ``set_params()`` methods.""" From 779196066cb1a461884c16bc3e83923131ed3df7 Mon Sep 17 00:00:00 2001 From: rmnldwg <48687784+rmnldwg@users.noreply.github.com> Date: Mon, 19 Feb 2024 11:57:44 +0100 Subject: [PATCH 19/75] docs: go over `set_params()` docstrings --- lymph/diagnose_times.py | 11 +++++------ lymph/graph.py | 21 +++++++++++++++++---- lymph/models/bilateral.py | 32 ++++++++++++++++++++------------ 3 files changed, 42 insertions(+), 22 deletions(-) diff --git a/lymph/diagnose_times.py b/lymph/diagnose_times.py index 7dd1abb..0a5a50a 100644 --- a/lymph/diagnose_times.py +++ b/lymph/diagnose_times.py @@ -189,14 +189,13 @@ def get_params( def set_params(self, *args: float, **kwargs: float) -> tuple[float]: """Update distribution by setting its parameters and storing the frozen PMF. - Parameters can be set via positional arguments or keyword arguments. Keyword - arguments override positional arguments. If the distribution is not updateable, - a warning is issued and all args and kwargs are returned. + Parameters can be set via positional arguments - which are used up one by one + in the order they are provided and are then returned - or keyword arguments. + Keyword arguments override positional arguments. If the distribution is not + updateable, a warning is issued and all args and kwargs are returned. If any of the parameters is invalid, a ``ValueError`` is raised and the original parameters are restored. - - Unused args and kwargs are returned as well. """ if not self.is_updateable: warnings.warn("Distribution is not updateable, ignoring parameters") @@ -297,7 +296,7 @@ def set_params(self, *args: float, **kwargs: float) -> tuple[float]: """Update all parametrized distributions. When the new parameters are provided as positional arguments, they are used up - in the order of the T-stages. + in the order of the T-stages and remaining args are returned. If the params are provided as keyword arguments, the keys must be of the form ``{t_stage}_{param}``, where ``t_stage`` is the T-stage and ``param`` is the diff --git a/lymph/graph.py b/lymph/graph.py index b224057..21c0e39 100644 --- a/lymph/graph.py +++ b/lymph/graph.py @@ -409,12 +409,12 @@ def set_params(self, *args, **kwargs) -> tuple[float]: second argument is the microscopic spread modifier. Otherwise it only consumes one argument, which is the growth or spread probability. - Keyword arguments (i.e., ``"grwoth"``, ``"spread"``, and ``"micro"``) override - positional arguments. Unused args and kwargs are returned. + Keyword arguments (i.e., ``"growth"``, ``"spread"``, and ``"micro"``) override + positional arguments. Unused args are returned. Examples: - >>> edge = Edge(Tumor("T"), LymphNodeLevel("II", allowed_states=[0, 1, 2])) + >>> edge = Edge(LymphNodeLevel("II", allowed_states=[0, 1, 2]), LymphNodeLevel("III")) >>> _ = edge.set_params(0.1, 0.2) >>> edge.spread_prob 0.1 @@ -832,9 +832,22 @@ def set_params(self, *args, **kwargs) -> tuple[float]: The arguments are passed to the :py:meth:`~lymph.graph.Edge.set_params` method of the edges. Global keyword arguments (e.g. ``"spread"``) are passed to each - edge's ``set_params`` method. Unused args and kwargs are returned. + edge's ``set_params`` method. Unused args are returned. Specific keyword arguments take precedence over global ones which in turn take precedence over positional arguments. + + Example: + + >>> graph = Representation(graph_dict={ + ... ("tumor", "T"): ["II" , "III"], + ... ("lnl", "II"): ["III"], + ... ("lnl", "III"): [], + ... }) + >>> _ = graph.set_params(0.1, 0.2, 0.3, spread=0.4, TtoII_spread=0.5) + >>> graph.get_params(as_dict=True) # doctest: +NORMALIZE_WHITESPACE + {'TtoII_spread': 0.5, + 'TtoIII_spread': 0.4, + 'IItoIII_spread': 0.4} """ return set_params_for(self.edges, *args, **kwargs) diff --git a/lymph/models/bilateral.py b/lymph/models/bilateral.py index d599b8c..7f7fea4 100644 --- a/lymph/models/bilateral.py +++ b/lymph/models/bilateral.py @@ -173,27 +173,35 @@ def get_params( def set_params(self, *args: float, **kwargs: float) -> tuple[float]: - """Assign new parameters to the model. + """Set new parameters to the model. This works almost exactly as the unilateral model's - :py:meth:`~lymph.models.Unilateral.assign_params` method. However, this one + :py:meth:`~lymph.models.Unilateral.set_params` method. However, this one allows the user to set the parameters of individual sides of the neck by prefixing the keyword arguments' names with ``"ipsi_"`` or ``"contra_"``. Anything not prefixed by ``"ipsi_"`` or ``"contra_"`` is passed to both sides - of the neck. + of the neck. This does obviously not work with positional arguments. Note: When setting the parameters via positional arguments, the order is - important. The first ``len(self.ipsi.get_params(as_dict=True))`` arguments - are passed to the ipsilateral side, the remaining ones to the contralateral - side. - - When still some remain after that, they are returned as the first element - of the returned tuple. - - Similar to the unilateral method, this returns a tuple of the remaining args - and a dictionary with the remaining `"ipsi"` and `"contra"` kwargs. + important: + + 1. The parameters of the edges from tumor to LNLs: + 1. first the ipsilateral parameters, + 2. if ``is_symmetric["tumor_spread"]`` is ``False``, the contralateral + parameters. Otherwise, the ipsilateral parameters are used for both + sides. + 2. The parameters of the edges from LNLs to tumor: + 1. again, first the ipsilateral parameters, + 2. if ``is_symmetric["lnl_spread"]`` is ``False``, the contralateral + parameters. Otherwise, the ipsilateral parameters are used for both + sides. + 3. The parameters of the parametric distributions for marginalizing over + diagnose times. + + When still some positional arguments remain after that, they are returned + in a tuple. """ args = set_bilateral_params_for( ipsi_objects=self.ipsi.graph.tumor_edges, From a6f4d0355eba0da1a4627af6043541918de31d4e Mon Sep 17 00:00:00 2001 From: rmnldwg <48687784+rmnldwg@users.noreply.github.com> Date: Mon, 19 Feb 2024 18:05:50 +0100 Subject: [PATCH 20/75] fix(diag): delete frozen dist when params change --- lymph/diagnose_times.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lymph/diagnose_times.py b/lymph/diagnose_times.py index 0a5a50a..16a0df2 100644 --- a/lymph/diagnose_times.py +++ b/lymph/diagnose_times.py @@ -206,6 +206,7 @@ def set_params(self, *args: float, **kwargs: float) -> tuple[float]: for name, value in self._kwargs.items(): first, args = popfirst(args) self._kwargs[name] = first or kwargs.get(name, value) + del self._frozen try: _ = self.distribution From 48bbfe0537b272be5c615073e680f1762a1a838f Mon Sep 17 00:00:00 2001 From: rmnldwg <48687784+rmnldwg@users.noreply.github.com> Date: Mon, 19 Feb 2024 18:09:06 +0100 Subject: [PATCH 21/75] change(graph)!: replace `name` with `get_name()` In the `Edge` class, the `name` property is replaced by a function `get_name()` that is more flexible and allows us to have edge names without underscores when we need it. BREAKING CHANGE: `Edge.name` is replaced by `Edge.get_name()` --- lymph/graph.py | 17 ++++++++--------- lymph/models/midline.py | 6 +++--- tests/binary_bilateral_test.py | 6 +++--- tests/binary_unilateral_test.py | 12 ++++-------- tests/edge_test.py | 2 +- tests/trinary_unilateral_test.py | 3 ++- 6 files changed, 21 insertions(+), 25 deletions(-) diff --git a/lymph/graph.py b/lymph/graph.py index 21c0e39..d488f35 100644 --- a/lymph/graph.py +++ b/lymph/graph.py @@ -247,7 +247,7 @@ def __init__( def __str__(self) -> str: """Print basic info.""" - return f"Edge {self.name.replace('_', ' ')}" + return f"Edge {self.get_name(middle=' to ')}" def __repr__(self) -> str: """Print basic info.""" @@ -297,17 +297,16 @@ def child(self, new_child: LymphNodeLevel) -> None: self.child.inc.append(self) - @property - def name(self) -> str: + def get_name(self, middle='to') -> str: """Return the name of the edge. - This is used to identify and assign spread probabilities to it in the - :py:class:`~models.Unilateral` class. + This is used to identify and assign spread probabilities to it e.g. in the + :py:class:`~models.Unilateral.set_params()` method and elsewhere. """ if self.is_growth: return self.parent.name - return self.parent.name + 'to' + self.child.name + return f"{self.parent.name}{middle}{self.child.name}" @property @@ -584,12 +583,12 @@ def _init_edges( start = self.nodes[start_name] if isinstance(start, LymphNodeLevel) and start.is_trinary: growth_edge = Edge(parent=start, child=start, callbacks=on_edge_change) - self._edges[growth_edge.name] = growth_edge + self._edges[growth_edge.get_name()] = growth_edge for end_name in end_names: end = self.nodes[end_name] new_edge = Edge(parent=start, child=end, callbacks=on_edge_change) - self._edges[new_edge.name] = new_edge + self._edges[new_edge.get_name()] = new_edge @property @@ -819,7 +818,7 @@ def get_params( """ params = {} for edge in self.edges.values(): - params[edge.name] = edge.get_params(as_flat=as_flat) + params[edge.get_name()] = edge.get_params(as_flat=as_flat) if as_flat or not as_dict: params = flatten(params) diff --git a/lymph/models/midline.py b/lymph/models/midline.py index 6a7e7c6..5a895a3 100644 --- a/lymph/models/midline.py +++ b/lymph/models/midline.py @@ -39,7 +39,7 @@ def sync(): private_name = f"_{name}" setattr(other, private_name, getattr(this, name)) - logger.debug(f"Created sync callback for properties {names} of {this.name} edge.") + logger.debug(f"Created sync callback for properties {names} of {this.get_name} edge.") return sync # this here could probably be used to sync the edges for the different bilateral classes if we want to keep on using it @@ -53,8 +53,8 @@ def init_edge_sync( Implementing this as a separate method allows a user in theory to initialize an arbitrary kind of symmetry between the two sides of the neck. """ - this_edge_names = [e.name for e in this_edge_list] - other_edge_names = [e.name for e in other_edge_list] + this_edge_names = [e.get_name for e in this_edge_list] + other_edge_names = [e.get_name for e in other_edge_list] for edge_name in set(this_edge_names).intersection(other_edge_names): this_edge = this_edge_list[this_edge_names.index(edge_name)] diff --git a/tests/binary_bilateral_test.py b/tests/binary_bilateral_test.py index 2c7377d..5e4a9b4 100644 --- a/tests/binary_bilateral_test.py +++ b/tests/binary_bilateral_test.py @@ -3,11 +3,11 @@ """ import unittest +import fixtures import numpy as np from lymph import models from lymph.modalities import Clinical -from tests import fixtures class BilateralInitTest(fixtures.BilateralModelMixin, unittest.TestCase): @@ -32,7 +32,7 @@ def test_delegation(self): def test_edge_sync(self): """Check if synced edges update their respective parameters.""" for ipsi_edge in self.model.ipsi.graph.edges.values(): - contra_edge = self.model.contra.graph.edges[ipsi_edge.name] + contra_edge = self.model.contra.graph.edges[ipsi_edge.get_name] ipsi_edge.set_params(spread=self.rng.random()) self.assertEqual( ipsi_edge.get_params("spread"), @@ -43,7 +43,7 @@ def test_tensor_sync(self): """Check the transition tensors of the edges get deleted and updated properly.""" for ipsi_edge in self.model.ipsi.graph.edges.values(): ipsi_edge.set_params(spread=self.rng.random()) - contra_edge = self.model.contra.graph.edges[ipsi_edge.name] + contra_edge = self.model.contra.graph.edges[ipsi_edge.get_name] self.assertTrue(np.all( ipsi_edge.transition_tensor == contra_edge.transition_tensor )) diff --git a/tests/binary_unilateral_test.py b/tests/binary_unilateral_test.py index a5de392..ed8704c 100644 --- a/tests/binary_unilateral_test.py +++ b/tests/binary_unilateral_test.py @@ -79,7 +79,7 @@ def test_tumor_to_lnl_edges(self): self.assertEqual(edge.parent.name, "T") self.assertIn(edge.child.name, receiving_lnls) self.assertTrue(edge.is_tumor_spread) - self.assertIn(edge.name, connecting_edge_names) + self.assertIn(edge.get_name(middle="_to_"), connecting_edge_names) class DelegationTestCase(fixtures.BinaryUnilateralModelMixin, unittest.TestCase): @@ -99,10 +99,6 @@ def test_delegation(self): self.model.graph.get_state(), self.model.get_state(), ) - self.assertEqual( - self.model.graph.lnls, - self.model.lnls, - ) def test_set_state_delegation(self): """Check that the ``set_state`` method is also correctly delegated.""" @@ -135,7 +131,7 @@ def test_params_assignment_via_lookup(self): name, type_ = param_name.rsplit("_", maxsplit=1) edges_and_dists[name].set_params(**{type_: value}) self.assertEqual( - edges_and_dists[name].get_params(type_), + edges_and_dists[name].get_params()[type_], value, ) @@ -150,7 +146,7 @@ def test_params_assignment_via_method(self): for param_name, value in params_to_set.items(): name, type_ = param_name.rsplit("_", maxsplit=1) self.assertEqual( - edges_and_dists[name].get_params(type_), + edges_and_dists[name].get_params()[type_], value, ) @@ -158,7 +154,7 @@ def test_transition_matrix_deletion(self): """Check if the transition matrix gets deleted when a parameter is set.""" first_lnl_name = list(self.model.graph.lnls.values())[0].name trans_mat = self.model.transition_matrix() - self.model.graph.edges[f"T_to_{first_lnl_name}"].set_spread_prob(0.5) + self.model.graph.edges[f"Tto{first_lnl_name}"].set_spread_prob(0.5) self.assertFalse(np.all( trans_mat == self.model.transition_matrix() )) diff --git a/tests/edge_test.py b/tests/edge_test.py index 623bb28..3c98d1b 100644 --- a/tests/edge_test.py +++ b/tests/edge_test.py @@ -35,7 +35,7 @@ def test_repr(self) -> None: "Tumor": graph.Tumor, }, ) - self.assertEqual(self.edge.name, recreated_edge.name) + self.assertEqual(self.edge.get_name(), recreated_edge.get_name()) self.assertEqual(self.edge.parent.name, recreated_edge.parent.name) self.assertEqual(self.edge.child.name, recreated_edge.child.name) self.assertEqual(self.edge.spread_prob, recreated_edge.spread_prob) diff --git a/tests/trinary_unilateral_test.py b/tests/trinary_unilateral_test.py index 0bda40b..7059036 100644 --- a/tests/trinary_unilateral_test.py +++ b/tests/trinary_unilateral_test.py @@ -6,6 +6,7 @@ import pandas as pd from lymph.graph import LymphNodeLevel +from lymph.helper import set_params_for class TrinaryInitTestCase(fixtures.TrinaryFixtureMixin, unittest.TestCase): @@ -117,7 +118,7 @@ def test_edge_params(self): for param in params: params_to_set[f"{edge_name}_{param}"] = self.rng.random() - self.model.assign_edge_params(**params_to_set) + set_params_for(self.model.graph.edges, **params_to_set) retrieved_params = self.model.get_params(as_dict=True) for param in params_to_set: self.assertEqual(params_to_set[param], retrieved_params[param]) From ccbe6c07481fdafc2872203c6ec4d26752a8dbc3 Mon Sep 17 00:00:00 2001 From: rmnldwg <48687784+rmnldwg@users.noreply.github.com> Date: Mon, 19 Feb 2024 18:10:38 +0100 Subject: [PATCH 22/75] change(bi)!: reintroduce `is_symmetric` attribute This will once again manage the symmetry of the `Bilateral` class's different ipsi- and contralateral attributes. --- lymph/models/bilateral.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/lymph/models/bilateral.py b/lymph/models/bilateral.py index 7f7fea4..47b5bdf 100644 --- a/lymph/models/bilateral.py +++ b/lymph/models/bilateral.py @@ -78,7 +78,15 @@ def __init__( contralateral_kwargs=contralateral_kwargs, ) - if is_symmetric["modalities"]: + self.is_symmetric = is_symmetric + if self.is_symmetric is None: + self.is_symmetric = { + "modalities": True, + "tumor_spread": False, + "lnl_spread": True, + } + + if self.is_symmetric["modalities"]: delegation_sync_kwargs = {"modalities": [self.ipsi, self.contra]} else: delegation_sync_kwargs = {} @@ -204,16 +212,16 @@ def set_params(self, *args: float, **kwargs: float) -> tuple[float]: in a tuple. """ args = set_bilateral_params_for( + *args, ipsi_objects=self.ipsi.graph.tumor_edges, contra_objects=self.contra.graph.tumor_edges, - *args, is_symmetric=self.is_symmetric["tumor_spread"], **kwargs, ) args = set_bilateral_params_for( + *args, ipsi_objects=self.ipsi.graph.lnl_edges, contra_objects=self.contra.graph.lnl_edges, - *args, is_symmetric=self.is_symmetric["lnl_spread"], **kwargs, ) @@ -366,7 +374,7 @@ def likelihood( The parameters of the model can be set via ``given_param_args`` and ``given_param_kwargs``. Both arguments are used to call the - :py:meth:`~assign_params` method. If the parameters are not provided, the + :py:meth:`~set_params` method. If the parameters are not provided, the previously assigned parameters are used. Returns the log-likelihood if ``log`` is set to ``True``. The ``mode`` parameter @@ -473,7 +481,7 @@ def risk( The parameters can be set via the ``given_param_args`` and ``given_param_kwargs``, both of which are passed to the - :py:meth:`~assign_params` method. The ``given_diagnoses`` must be a dictionary + :py:meth:`~set_params` method. The ``given_diagnoses`` must be a dictionary mapping the side of the neck to a :py:class:`DiagnoseType`. Note: From c60860eff1043fd32f42199dfe212028712f368f Mon Sep 17 00:00:00 2001 From: rmnldwg <48687784+rmnldwg@users.noreply.github.com> Date: Mon, 19 Feb 2024 18:11:07 +0100 Subject: [PATCH 23/75] change(helper): work on `AccessPassthrough` (WIP) --- lymph/helper.py | 144 +++++++++++++++++++++++++++---------- lymph/models/unilateral.py | 2 +- 2 files changed, 107 insertions(+), 39 deletions(-) diff --git a/lymph/helper.py b/lymph/helper.py index 3d8c8cd..0acb4fe 100644 --- a/lymph/helper.py +++ b/lymph/helper.py @@ -5,7 +5,7 @@ import warnings from collections import UserDict from functools import cached_property, lru_cache, wraps -from typing import Any, Callable, Sequence +from typing import Any, Callable, Iterable, Sequence import numpy as np from cachetools import LRUCache @@ -28,10 +28,10 @@ class DelegationSyncMixin: See more details about that in the :py:class:`AccessPassthrough` class docs. """ def __init__(self) -> None: - self._delegated_and_synced = {} + self._attrs_to_objects = {} - def _init_delegation_sync(self, **attrs_from_instances) -> None: + def _init_delegation_sync(self, **attrs_to_objects: list[object]) -> None: """Initialize the delegation and synchronization of attributes. Each keyword argument is the name of an attribute to synchronize. The value @@ -55,28 +55,29 @@ def _init_delegation_sync(self, **attrs_from_instances) -> None: >>> person.left.eye_color == person.right.eye_color == 'red' True """ - self._delegated_and_synced = attrs_from_instances + for name, objects in attrs_to_objects.items(): + types = {type(obj) for obj in objects} + if len(types) > 1: + raise ValueError( + f"Instances of delegated attribute {name} must be of same type" + ) + + self._attrs_to_objects = attrs_to_objects def __getattr__(self, name): - try: - values_set = {getattr(inst, name) for inst in self._delegated_and_synced[name]} - if len(values_set) > 1: - warnings.warn( - f"Attribute '{name}' not synchronized: {values_set}. Set this " - "attribute on each instance to synchronize it." - ) - return sorted(values_set).pop() + objects = self._attrs_to_objects[name] + attr_list = [getattr(obj, name) for obj in objects] + + if len(attr_list) == 1: + return attr_list[0] - # Not all attributes might be hashable, which is necessary for a set - except TypeError: - values_list = [getattr(inst, name) for inst in self._delegated_and_synced[name]] - return AccessPassthrough(values_list) + return fuse(attr_list) def __setattr__(self, name, value): - if name != "_delegated_and_synced" and name in self._delegated_and_synced: - for inst in self._delegated_and_synced[name]: + if name != "_attrs_to_objects" and name in self._attrs_to_objects: + for inst in self._attrs_to_objects[name]: setattr(inst, name, value) else: super().__setattr__(name, value) @@ -124,8 +125,8 @@ class AccessPassthrough: ... set_value=[self.c1, self.c2], ... ) >>> mixture = Mixture() - >>> mixture.params_dict["a"] # pop element of sorted set and warn that not synced - 3 + >>> mixture.params_dict["b"] # get first element and warn that not synced + 4 >>> mixture.params_dict["a"] = 99 >>> mixture.c1.params_dict["a"] == mixture.c2.params_dict["a"] == 99 True @@ -138,28 +139,22 @@ class AccessPassthrough: >>> mixture.c1.params_dict["c"] == mixture.c2.params_dict["c"] == 100 True """ - def __init__(self, attr_values: list[object]) -> None: - self._attr_objects = attr_values + def __init__(self, attr_objects: list[object]) -> None: + self._attr_objects = attr_objects def __getattr__(self, name): - values = {getattr(obj, name) for obj in self._attr_objects} - if len(values) > 1: - warnings.warn( - f"Attribute '{name}' not synchronized: {values}. Set this " - "attribute on each instance to synchronize it." - ) - return sorted(values).pop() + if len(self._attr_objects) == 1: + return getattr(self._attr_objects[0], name) + + return fuse([getattr(obj, name) for obj in self._attr_objects]) def __getitem__(self, key): - values = {obj[key] for obj in self._attr_objects} - if len(values) > 1: - warnings.warn( - f"Value for key '{key}' not synchronized: {values}. Set this " - "value on each item to synchronize it." - ) - return sorted(values).pop() + if len(self._attr_objects) == 1: + return self._attr_objects[0][key] + + return fuse([obj[key] for obj in self._attr_objects]) def __setattr__(self, name, value): @@ -176,8 +171,37 @@ def __setitem__(self, key, value): def __call__(self, *args: Any, **kwds: Any) -> Any: + return_values = [] for obj in self._attr_objects: - obj(*args, **kwds) + return_values.append(obj(*args, **kwds)) + + return fuse(return_values) + + + def __len__(self) -> int: + if len(self._attr_objects) == 1: + return len(self._attr_objects[0]) + + return fuse([len(obj) for obj in self._attr_objects]) + + +def fuse(objects: list[Any]) -> Any: + """Try to fuse ``objects`` and return one result. + + TODO: This should not immediately return an ``AccessPassthrough`` just because the + ``objects`` are not all equal. It should do so, when the ``objects`` may be dict- + like or be callables... I need to think of a proper criterion. + + What about return an ``AccessPassthrough`` when the type is one of those defined + in this package? + """ + if all(objects[0] == obj for obj in objects[1:]): + return objects[0] + + try: + return sorted(set(objects)).pop() + except TypeError: + return AccessPassthrough(objects) def check_unique_names(graph: dict): @@ -691,9 +715,9 @@ def synchronize_params( def set_bilateral_params_for( + *args: float, ipsi_objects: dict[str, HasSetParams], contra_objects: dict[str, HasSetParams], - *args: float, is_symmetric: bool = False, **kwargs: float, ) -> tuple[float]: @@ -712,3 +736,47 @@ def set_bilateral_params_for( args = set_params_for(contra_objects, *args, **contra_kwargs) return args + + +def has_any_dunder_method(obj: Any, *methods: str) -> bool: + """Check whether a class has any of the given dunder methods.""" + return any(hasattr(obj, method) for method in methods) + + +def check_unique_and_get_first(objects: Iterable, attr: str = "") -> Any: + """Check if ``objects`` are unique via a set and return of them. + + This function is meant to be used with the ``AccessPassthrough`` class. It is + used to retrieve the last element of a set of values that are not synchronized. + """ + object_set = set(objects) + if len(object_set) > 1: + warnings.warn(f"{attr} not synced: {object_set}. Setting should sync.") + return sorted(object_set).pop() + + +if __name__ == "__main__": + class Param: + def __init__(self, value): + self.value = value + class Model: + def __init__(self, **kwargs): + self.params_dict = kwargs + self.param = Param(sum(kwargs.values())) + def set_value(self, key, value): + self.params_dict[key] = value + class Mixture(DelegationSyncMixin): + def __init__(self): + super().__init__() + self.c1 = Model(a=1, b=2) + self.c2 = Model(a=3, b=4, c=5) + self._init_delegation_sync( + params_dict=[self.c1, self.c2], + param=[self.c1, self.c2], + set_value=[self.c1, self.c2], + ) + mixture = Mixture() + mixture.params_dict["b"] + mixture.params_dict["a"] = 99 + mixture.param.value = 42 + mixture.set_value("c", 100) diff --git a/lymph/models/unilateral.py b/lymph/models/unilateral.py index f83bcd0..d6bc2e9 100644 --- a/lymph/models/unilateral.py +++ b/lymph/models/unilateral.py @@ -227,7 +227,7 @@ def comp_transition_prob( The probability is computed as the product of the transition probabilities of the individual LNLs. If ``assign`` is ``True``, the new state is assigned to - the model using the method :py:meth:`~Unilateral.assign_states`. + the model using the method :py:meth:`~Unilateral.set_states`. """ trans_prob = 1 for i, lnl in enumerate(self.graph.lnls): From e3f32bb5853052f7442499605ee142490b36aab3 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Tue, 20 Feb 2024 15:47:02 +0100 Subject: [PATCH 24/75] change(diag)!: use composite for distributions Instead of a dict that holds the T-stages and corresponding distributions over diagnose times, this implements them as a composite pattern. This replaces the dict-like API entirely with methods. This has several advantages: 1. It is more explicit and thus more readable 2. The composite pattern is designed to work naturally with tree-like structures, which we have here when dealing with bilateral models. 3. This separates a lot of the logic into separate abstract classes, further separating concerns. BREAKING CHANGES: The old API of setting and getting distributions will not work anymore. --- lymph/diagnose_times.py | 326 ++++++++++++++++++++++++---------------- 1 file changed, 195 insertions(+), 131 deletions(-) diff --git a/lymph/diagnose_times.py b/lymph/diagnose_times.py index 16a0df2..1b1a02c 100644 --- a/lymph/diagnose_times.py +++ b/lymph/diagnose_times.py @@ -16,11 +16,12 @@ import inspect import logging import warnings -from typing import Iterable +from abc import ABC +from typing import Any, Iterable, TypeVar import numpy as np -from lymph.helper import AbstractLookupDict, flatten, popfirst, set_params_for +from lymph.helper import flatten, popfirst, unflatten_and_split logger = logging.getLogger(__name__) @@ -30,10 +31,10 @@ class SupportError(Exception): class Distribution: - """Class that provides a way of storeing distributions over diagnose times.""" + """Class that provides a way of storing distributions over diagnose times.""" def __init__( self, - distribution: list[float] | np.ndarray | callable, + distribution: Iterable[float] | callable, max_time: int | None = None, ) -> None: """Initialize a distribution over diagnose times. @@ -51,62 +52,48 @@ def __init__( parameters have bounds (like the binomial distribution's ``p``), the function must raise a ``ValueError`` if the parameter is invalid. - Since ``max_time`` specifies the support of the distribution (rangin from 0 to + Since ``max_time`` specifies the support of the distribution (ranging from 0 to ``max_time``), it must be provided if a parametrized function is passed. If a list of probabilities is passed, ``max_time`` is inferred from the length of the list and can be omitted. But an error is raised if the length of the list and ``max_time`` + 1 don't match, in case it is accidentally provided. """ - self._kwargs = {} - if callable(distribution): - if max_time is None: - raise ValueError("max_time must be provided if a function is passed") - if max_time < 0: - raise ValueError("max_time must be a positive integer") - - self.check_callable(distribution) - self.support = np.arange(max_time + 1) - self._func = distribution - self._frozen = self.distribution - + self._init_from_callable(distribution, max_time) else: - max_time = self.check_frozen(distribution, max_time) - self.support = np.arange(max_time + 1) - self._func = None - self._frozen = self.normalize(distribution) + self._init_from_frozen(distribution, max_time) - def copy(self) -> Distribution: - """Return a copy of the distribution. + def _init_from_frozen(self, distribution: Iterable[float], max_time: int): + """Initialize the distribution from a frozen distribution.""" + if max_time is None: + max_time = len(distribution) - 1 - Note: - This will return a frozen distribution, even if the original distribution - was parametrized. - """ - return type(self)( - distribution=self.distribution, - max_time=self.support[-1], - ) + if max_time != len(distribution) - 1: + raise ValueError("max_time and the length of the distribution don't match") + self.support = np.arange(max_time + 1) + self._kwargs = {} + self._func = None + self._frozen = self.normalize(distribution) - @staticmethod - def check_frozen(distribution: list[float] | np.ndarray, max_time: int) -> int: - """Check if the frozen distribution is valid. - The frozen distribution must be a list or array of probabilities for each - diagnose time. The length of the list must be ``max_time`` + 1. - """ + def _init_from_callable(self, distribution: callable, max_time: int | None = None): + """Initialize the distribution from a callable distribution.""" if max_time is None: - max_time = len(distribution) - 1 - elif max_time != len(distribution) - 1: - raise ValueError("max_time and the length of the distribution don't match") + raise ValueError("max_time must be provided if a function is passed") + if max_time < 0: + raise ValueError("max_time must be a positive integer") - return max_time + self.support = np.arange(max_time + 1) + self._kwargs = self.extract_kwargs(distribution) + self._func = distribution + self._frozen = self.pmf - def check_callable(self, distribution: callable) -> None: - """Check if the callable's signature is valid. + @staticmethod + def extract_kwargs(distribution: callable) -> dict[str, Any]: + """Extract the keyword arguments from the provided parametric distribution. The signature of the provided parametric distribution must be ``func(support, **kwargs)``. The first argument is the support of the @@ -114,6 +101,7 @@ def check_callable(self, distribution: callable) -> None: The ``**kwargs`` are keyword parameters that are passed to the function to update it. """ + kwargs = {} # skip the first parameter, which is the support skip_first = True for name, param in inspect.signature(distribution).parameters.items(): @@ -124,25 +112,24 @@ def check_callable(self, distribution: callable) -> None: if param.default is inspect.Parameter.empty: raise ValueError("All params of the function must be keyword arguments") - self._kwargs[name] = param.default + kwargs[name] = param.default + return kwargs - @classmethod - def from_instance(cls, other: Distribution, max_time: int) -> Distribution: - """Create a new distribution from an existing one.""" - if other.support[-1] != max_time: - warnings.warn( - "max_time of the new distribution is different from the old one. " - "Support will be truncated/expanded." - ) - if other.is_updateable: - new_instance = cls(other._func, max_time=max_time) - new_instance._kwargs = other._kwargs - else: - new_instance = cls(other.distribution[:max_time + 1], max_time=max_time) + def __repr__(self) -> str: + return f"Distribution({self.pmf})" + + + def __eq__(self, __value) -> bool: + if not isinstance(__value, Distribution): + return False - return new_instance + return np.all(self.pmf == __value.pmf) + + + def __hash__(self) -> int: + return hash(self.pmf.tobytes()) @staticmethod @@ -153,7 +140,7 @@ def normalize(distribution: np.ndarray) -> np.ndarray: @property - def distribution(self) -> np.ndarray: + def pmf(self) -> np.ndarray: """Return the probability mass function of the distribution if it is frozen.""" if not hasattr(self, "_frozen") or self._frozen is None: self._frozen = self.normalize(self._func(self.support, **self._kwargs)) @@ -209,7 +196,7 @@ def set_params(self, *args: float, **kwargs: float) -> tuple[float]: del self._frozen try: - _ = self.distribution + _ = self.pmf except ValueError as val_err: self._kwargs = old_kwargs raise ValueError("Invalid params provided to distribution") from val_err @@ -231,102 +218,179 @@ def draw_diag_times( if rng is None: rng = np.random.default_rng(seed) - return rng.choice(a=self.support, p=self.distribution, size=num) + return rng.choice(a=self.support, p=self.pmf, size=num) -class DistributionsUserDict(AbstractLookupDict): - """Dictionary with added methods for storing distributions over diagnose times.""" - max_time: int - def __setitem__( - self, - t_stage: str, - distribution: list[float] | np.ndarray | Distribution, +DC = TypeVar("DC", bound="Composite") + +class Composite(ABC): + """Abstract base class implementing the composite pattern for distributions. + + Any class inheriting from this class should be able to handle the definition of + distributions over diagnosis times. + + Example: + + >>> class MyComposite(Composite): + ... pass + >>> leaf1 = MyComposite(is_distribution_leaf=True) + >>> leaf2 = MyComposite(is_distribution_leaf=True) + >>> leaf3 = MyComposite(is_distribution_leaf=True) + >>> branch1 = MyComposite(distribution_children={"L1": leaf1, "L2": leaf2}) + >>> branch2 = MyComposite(distribution_children={"L3": leaf3}) + >>> root = MyComposite(distribution_children={"B1": branch1, "B2": branch2}) + >>> root.set_distribution("T1", Distribution([0.1, 0.9])) + >>> root.get_distribution("T1") + Distribution([0.1 0.9]) + >>> leaf1.get_distribution("T1") + Distribution([0.1 0.9]) + """ + _max_time: int + _distributions: dict[str, Distribution] # only for leaf nodes + _distribution_children: dict[str, Composite] + + def __init__( + self: DC, + max_time: int = 10, + distribution_children: dict[str, Composite] | None = None, + is_distribution_leaf: bool = False, ) -> None: - """Set the distribution to marginalize over diagnose times for a T-stage.""" - if isinstance(distribution, Distribution): - distribution = Distribution.from_instance(distribution, max_time=self.max_time) - else: - distribution = Distribution(distribution, max_time=self.max_time) + """Initialize the distribution composite.""" + self.max_time = max_time - super().__setitem__(t_stage, distribution) + if distribution_children is None: + distribution_children = {} + if is_distribution_leaf: + self._distributions = {} + distribution_children = {} # ignore any provided children - def __delitem__(self, t_stage: str) -> None: - """Delete the distribution for a T-stage.""" - super().__delitem__(t_stage) + self._distribution_children = distribution_children + super().__init__() @property - def num_parametric(self) -> int: - """Return the number of parametrized distributions.""" - return sum(distribution.is_updateable for distribution in self.values()) + def _is_distribution_leaf(self: DC) -> bool: + """Return whether the object is a leaf node w.r.t. distributions.""" + if len(self._distribution_children) > 0: + return False + if not hasattr(self, "_distributions"): + raise AttributeError(f"{self} has no children and no distributions.") - def get_params( - self, - as_dict: bool = True, - as_flat: bool = True, - ) -> float | Iterable[float] | dict[str, float]: - """Return the parameters of parametrized distributions. + return True - If ``as_dict`` is ``False``, return an iterable of all parameter values. If - ``as_dict`` is ``True``, return a nested dictionary with the T-stages as keys - and the distributions' parameter dicts as values (essentially what is returned - by :py:meth:`~lymph.diagnose_times.Distribution.get_params`). - If ``as_flat`` is ``True``, return a flat dictionary with the T-stages and - parameters as keys and values, respectively. This is the result of passing the - nested dictionary to :py:meth:`~lymph.helper.flatten`. - """ - params = {} + @property + def max_time(self: DC) -> int: + """Return the maximum time for the distributions.""" + return self._max_time - for t_stage, distribution in self.items(): - if not distribution.is_updateable: - continue + @max_time.setter + def max_time(self: DC, value: int) -> None: + """Set the maximum time for the distributions.""" + if value < 0: + raise ValueError("max_time must be a positive integer") - params[t_stage] = distribution.get_params(as_flat=as_flat) + self._max_time = value - if as_flat or not as_dict: - params = flatten(params) - return params if as_dict else params.values() + @property + def t_stages(self: DC) -> list[str]: + """Return the T-stages for which distributions are defined.""" + return list(self.get_all_distributions().keys()) - def set_params(self, *args: float, **kwargs: float) -> tuple[float]: - """Update all parametrized distributions. + def get_distribution(self: DC, t_stage: str) -> Distribution: + """Return the distribution for the given ``t_stage``.""" + return self.get_all_distributions()[t_stage] - When the new parameters are provided as positional arguments, they are used up - in the order of the T-stages and remaining args are returned. - If the params are provided as keyword arguments, the keys must be of the form - ``{t_stage}_{param}``, where ``t_stage`` is the T-stage and ``param`` is the - name of the parameter to update. Keyword arguments override positional ones. - """ - return set_params_for(self, *args, **kwargs) + def get_all_distributions(self: DC) -> dict[str, Distribution]: + """Return all distributions.""" + if self._is_distribution_leaf: + return self._distributions + child_keys = list(self._distribution_children.keys()) + first_child = self._distribution_children[child_keys[0]] + first_distributions = first_child.get_all_distributions() + are_all_equal = True + for key in child_keys[1:]: + other_child = self._distribution_children[key] + are_all_equal &= first_distributions == other_child.get_all_distributions() - def draw( - self, - prob_of_t_stage: dict[str, float], - size: int = 1, - ) -> tuple[list[str], list[int]]: - """ - Draw first a T-stage and then from that distribution a diagnose time. + if not are_all_equal: + warnings.warn("Not all distributions are equal. Returning the first one.") - Args: - dist: Distribution over T-stages. For each key, this defines the - probability for seeing the respective T-stage. Will be normalized if - it isn't already. - """ - stage_dist = np.zeros(shape=len(self)) - t_stages = list(self.keys()) + return first_distributions - for i, t_stage in enumerate(t_stages): - stage_dist[i] = prob_of_t_stage[t_stage] - stage_dist = stage_dist / np.sum(stage_dist) - drawn_t_stages = np.random.choice(a=t_stages, p=stage_dist, size=size).tolist() - drawn_diag_times = [self[t].draw() for t in drawn_t_stages] + def set_distribution( + self: DC, + t_stage: str, + distribution: Distribution | Iterable[float] | callable, + ) -> None: + """Set/update the distribution for the given ``t_stage``.""" + if self._is_distribution_leaf: + self._distributions[t_stage] = Distribution(distribution, self.max_time) + + else: + for child in self._distribution_children.values(): + child.set_distribution(t_stage, distribution) + + + def replace_all_distributions(self: DC, distributions: dict[str, Distribution]) -> None: + """Replace all distributions with the given ones.""" + if self._is_distribution_leaf: + self._distributions = {} + for t_stage, distribution in distributions.items(): + self.set_distribution(t_stage, distribution) + + else: + for child in self._distribution_children.values(): + child.replace_all_distributions(distributions) + - return drawn_t_stages, drawn_diag_times + def get_distribution_params( + self: DC, + as_dict: bool = True, + as_flat: bool = True, + ) -> Iterable[float] | dict[str, float]: + """Return the parameters of all distributions.""" + params = {} + + if self._is_distribution_leaf: + for t_stage, distribution in self._distributions.items(): + params[t_stage] = distribution.get_params(as_flat=as_flat) + else: + for key, child in self._distribution_children.items(): + params[key] = child.get_distribution_params(as_flat=as_flat) + + if as_flat or not as_dict: + params = flatten(params) + + return params if as_dict else params.values() + + + def set_distribution_params(self: DC, *args: float, **kwargs: float) -> tuple[float]: + """Set the parameters of all distributions.""" + if self._is_distribution_leaf: + kwargs, global_kwargs = unflatten_and_split( + kwargs, expected_keys=self._distributions.keys() + ) + for t_stage, distribution in self._distributions.items(): + t_stage_kwargs = global_kwargs.copy() + t_stage_kwargs.update(kwargs.get(t_stage, {})) + args = distribution.set_params(*args, **t_stage_kwargs) + + else: + kwargs, global_kwargs = unflatten_and_split( + kwargs, expected_keys=self._distribution_children.keys() + ) + for key, child in self._distribution_children.items(): + child_kwargs = global_kwargs.copy() + child_kwargs.update(kwargs.get(key, {})) + args = child.set_distribution_params(*args, **child_kwargs) + + return args From 1a28909757824f4e5c258c16ed58c808027811f3 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Tue, 20 Feb 2024 15:48:16 +0100 Subject: [PATCH 25/75] change(mod)!: use composite for modalities Instead of a dict that holds the names and corresponding sens/spec for diagnostic modalities, this implements them as a composite pattern. This replaces the dict-like API entirely with methods. This has several advantages: 1. It is more explicit and thus more readable 2. The composite pattern is designed to work naturally with tree-like structures, which we have here when dealing with bilateral models. 3. This separates a lot of the logic into separate abstract classes, further separating concerns. BREAKING CHANGES: The old API of setting and getting modalities will not work anymore. --- lymph/modalities.py | 241 ++++++++++++++++++++++---------------------- 1 file changed, 118 insertions(+), 123 deletions(-) diff --git a/lymph/modalities.py b/lymph/modalities.py index 7398205..8daf24f 100644 --- a/lymph/modalities.py +++ b/lymph/modalities.py @@ -9,12 +9,11 @@ from __future__ import annotations import warnings -from typing import List, Tuple, Union +from abc import ABC +from typing import Literal, TypeVar import numpy as np -from lymph.helper import AbstractLookupDict - class Modality: """Stores the confusion matrix of a diagnostic modality.""" @@ -32,6 +31,10 @@ def __init__( self.is_trinary = is_trinary + def __hash__(self) -> int: + return hash((self.specificity, self.sensitivity, self.is_trinary)) + + def __repr__(self) -> str: return ( f"{type(self).__name__}(" @@ -107,126 +110,118 @@ def compute_confusion_matrix(self) -> np.ndarray: -ModalityDef = Union[Modality, np.ndarray, Tuple[float, float], List[float]] - -class ModalitiesUserDict(AbstractLookupDict): - """Dictionary storing instances of :py:class:`Modality` for a lymph model. - - This class allows the user to specify the diagnostic modalities of a lymph model - in a convenient way. The user may pass an instance of :py:class:`Modality` - or one - of its subclasses - directly. Especially for trinary models, it is recommended to - use the subclasses :py:class:`Clinical` and :py:class:`Pathological` to avoid - ambiguities. - - Alternatively, a simple tuple or list of floats may be passed, from which the first - two entries are interpreted as the specificity and sensitivity, respectively. For - trinary models, we assume the modality to be :py:class:`Clinical`. - - For completely custom confusion matrices, the user may pass a numpy array directly. - In the binary case, a valid :py:class:`Modality` instance is constructed from the - array. For trinary models, the array must have three rows, and is not possible - anymore to infer the type of the modality or unambiguouse values for sensitivity and - specificity. This may lead to unexpected results when the confusion matrix is - recomputed accidentally at some point. - - Examples: - - >>> binary_modalities = ModalitiesUserDict(is_trinary=False) - >>> binary_modalities["test"] = Modality(0.9, 0.8) - >>> binary_modalities["test"].confusion_matrix - array([[0.9, 0.1], - [0.2, 0.8]]) - >>> modalities = ModalitiesUserDict(is_trinary=True) - >>> modalities["CT"] = Clinical(specificity=0.9, sensitivity=0.8) - >>> modalities["CT"].confusion_matrix - array([[0.9, 0.1], - [0.9, 0.1], - [0.2, 0.8]]) - >>> modalities["PET"] = (0.85, 0.82) - >>> modalities["PET"] - Clinical(specificity=0.85, sensitivity=0.82, is_trinary=True) - >>> modalities["pathology"] = Pathological(specificity=1.0, sensitivity=1.0) - >>> modalities["pathology"].confusion_matrix - array([[1., 0.], - [0., 1.], - [0., 1.]]) +MC = TypeVar("MC", bound="Composite") + +class Composite(ABC): + """Abstract base class implementing the composite pattern for diagnostic modalities. + + Any class inheriting from this class should be able to handle the definition of + diagnostic modalities and their sensitivity/specificity values, """ - def __setitem__(self, name: str, value: ModalityDef, / ) -> None: - """Set the modality of the lymph model.""" - # pylint: disable=unidiomatic-typecheck - # pylint: disable=no-member - cls = Clinical - - if type(value) is Modality: - # we assume the modality to be clinical here, because for a binary model - # it does not matter, but for a trinary model the base `Modalitiy` class - # would not work. - if self.is_trinary: - warnings.warn(f"Assuming modality to be `{cls.__name__}`.") - value = cls(value.specificity, value.sensitivity, self.is_trinary) - - elif isinstance(value, Modality): - # in this case, the user has provided a `Clinical` or `Pathological` - # modality, so we can just use it after passing the model's type (binary - # or trinary). - value.is_trinary = self.is_trinary - - elif isinstance(value, np.ndarray): - # this should allow users to pass some custom confusion matrix directly. - # we do check if the matrix is valid, but the `Modalitiy` class may - # misbehave, e.g. when a recomputation of the confusion matrix is triggered. - specificity = value[0, 0] - sensitivity = value[-1, -1] - modality = Modality(specificity, sensitivity, self.is_trinary) - modality.confusion_matrix = value - - if self.is_trinary: - warnings.warn( - "Provided transition matrix will be used as is. The sensitivity " - "and specificity extracted from it may be nonsensical. Recomputing " - "the confusion matrix from them may not work." - ) - - value = modality + _is_trinary: bool + _modalities: dict[str, Modality] # only for leaf nodes + _modality_children: dict[str, Composite] + + def __init__( + self: MC, + is_trinary: bool = False, + modality_children: dict[str, Composite] = None, + is_modality_leaf: bool = False, + ) -> None: + """Initialize the modality composite.""" + self._is_trinary = is_trinary + + if modality_children is None: + modality_children = {} + + if is_modality_leaf: + self._modalities = {} + self._modality_children = {} # ignore any provided children + + self._modality_children = modality_children + super().__init__() + + + @property + def _is_modality_leaf(self: MC) -> bool: + """Return whether the composite is a leaf node.""" + if len(self._modality_children) > 0: + return False + + if not hasattr(self, "_modalities"): + raise AttributeError(f"{self} has no children and no modalities.") + + return True + + + @property + def is_trinary(self: MC) -> bool: + """Return whether the modality is trinary.""" + return self._is_trinary + + + def get_modality(self: MC, name: str) -> Modality: + """Return the modality with the given name.""" + return self.get_all_modalities()[name] + + + def get_all_modalities(self: MC) -> dict[str, Modality]: + """Return all modalities of the composite.""" + if self._is_modality_leaf: + return self._modalities + + child_keys = list(self._modality_children.keys()) + first_child = self._modality_children[child_keys[0]] + firs_modalities = first_child.get_all_modalities() + are_all_equal = True + for key in child_keys[1:]: + other_child = self._modality_children[key] + are_all_equal &= firs_modalities == other_child.get_all_modalities() + + if not are_all_equal: + warnings.warn("Not all modalities are equal. Returning first one.") + + return firs_modalities + + + def set_modality( + self, + name: str, + specificity: float, + sensitivity: float, + kind: Literal["clinical", "pathological"] = "clinical", + ) -> None: + """Set the modality with the given name.""" + if self._is_modality_leaf: + cls = Pathological if kind == "pathological" else Clinical + self._modalities[name] = cls(specificity, sensitivity, self.is_trinary) + + else: + for child in self._modality_children.values(): + child.set_modality(name, specificity, sensitivity, kind) + + + def replace_all_modalities(self: MC, modalities: dict[str, Modality]) -> None: + """Replace all modalities of the composite.""" + if self._is_modality_leaf: + self._modalities = {} + for name, modality in modalities.items(): + kind = "pathological" if isinstance(modality, Pathological) else "clinical" + self.set_modality(name, modality.specificity, modality.sensitivity, kind) else: - # lastly, the user may have provided a list or tuple with the specificity - # and sensitivity and we're trying to interpret it that way. As before, we - # assume the modality to be clinical here. - try: - specificity, sensitivity = value - if self.is_trinary: - warnings.warn(f"Assuming modality to be `{cls.__name__}`.") - value = cls(specificity, sensitivity, self.is_trinary) - except (ValueError, TypeError) as err: - raise ValueError( - "Value must be a `Clinical` or `Pathological` modality, a " - "confusion matrix or a list/tuple containing specificity and " - "sensitivity." - ) from err - - super().__setitem__(name, value) - - - def __delitem__(self, key: str) -> None: - return super().__delitem__(key) - - - def confusion_matrices_hash(self) -> int: - """Compute a kind of hash from all confusion matrices. - - Note: - This is used to check if some modalities have changed and the observation - matrix needs to be recomputed. It should not be used as a replacement for - the ``__hash__`` method, for two reasons: - - 1. It may change over the lifetime of the object, whereas ``__hash__`` - should be constant. - 2. It only takes into account the ``confusion_matrix`` of the modality, - nothing else. - """ - confusion_mat_bytes = b"" - for modality in self.values(): - confusion_mat_bytes += modality.confusion_matrix.tobytes() - - return hash(confusion_mat_bytes) + for child in self._modality_children.values(): + child.replace_all_modalities(modalities) + + + def compute_modalities_hash(self: MC) -> int: + """Compute a hash from all modalities.""" + hash_res = 0 + if self._is_modality_leaf: + for name, modality in self._modalities.items(): + hash_res = hash((hash_res, name, hash(modality))) + + for child in self._modality_children.values(): + hash_res = hash((hash_res, hash(child))) + + return hash_res From 8bbcbceef245b876cd402774b0094c87c1ab6173 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Tue, 20 Feb 2024 15:51:12 +0100 Subject: [PATCH 26/75] feat(type): add model ABC to inherit from I added an abstract base class from which all model-like classes should inherit. It defines all the methods that need to be present in a model. The idea behind this is that any subclass of this can be part of a composite that correctly delegates getting/setting parameters, diagnose time distributions, and modalities. --- lymph/types.py | 61 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 60 insertions(+), 1 deletion(-) diff --git a/lymph/types.py b/lymph/types.py index bc43910..aae7494 100644 --- a/lymph/types.py +++ b/lymph/types.py @@ -1,8 +1,10 @@ """ Type aliases and protocols used in the lymph package. """ -from typing import Protocol +from abc import ABC, abstractmethod +from typing import Iterable, Protocol, TypeVar +import pandas as pd from pandas._libs.missing import NAType @@ -27,3 +29,60 @@ def get_params( DiagnoseType = dict[str, PatternType] """Type alias for a diagnose, which is a involvement pattern per diagnostic modality.""" + + +M = TypeVar("M", bound="Model") + +class Model(ABC): + """Abstract base class for models. + + This class provides a scaffold for the methods that any model for lymphatic + tumor progression should implement. + """ + @abstractmethod + def get_params( + self: M, + as_dict: bool = True, + as_flat: bool = True, + ) -> Iterable[float] | dict[str, float]: + """Return the parameters of the model. + + The parameters are returned as a dictionary if ``as_dict`` is True, and as + an iterable of floats otherwise. The argument ``as_flat`` determines whether + the returned dict is flat or nested. This is helpful, because a model may call + the ``get_params`` method of other instances, which can be fused to get a + flat dictionary. + """ + + @abstractmethod + def set_params(self: M, *args: float, **kwargs: float) -> tuple[float]: + """Set the parameters of the model. + + The parameters may be passed as positional or keyword arguments. The positional + arguments are used up one by one by the ``set_params`` methods the model calls. + Keyword arguments override the positional arguments. + """ + + @abstractmethod + def load_patient_data( + self: M, + patient_data: pd.DataFrame, + ) -> None: + """Load patient data in `LyProX`_ format into the model. + + .. _LyProX: https://lyprox.org/ + """ + + @abstractmethod + def likelihood( + self: M, + given_param_args: Iterable[float], + given_param_kwargs: dict[str, float], + log: bool = True, + ) -> float: + """Return the likelihood of the model given the parameters. + + The likelihood is returned in log space if ``log`` is True, and in linear space + otherwise. The parameters may be passed as positional or keyword arguments. + They are then passed to the :py:meth:`set_params` method first. + """ From 679b86a5e90ed889215c80fa5ccb9745b91ba124 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Tue, 20 Feb 2024 15:52:45 +0100 Subject: [PATCH 27/75] change(uni)!: transform to composite pattern Use the new composite pattern for the distribution over diagnose times and modalities. BREAKING CHANGE: `Unilateral` has no attributes `modalities` or `diag_time_dists` anymore. --- lymph/models/unilateral.py | 120 ++++++++++--------------------------- 1 file changed, 31 insertions(+), 89 deletions(-) diff --git a/lymph/models/unilateral.py b/lymph/models/unilateral.py index d6bc2e9..3e39b9a 100644 --- a/lymph/models/unilateral.py +++ b/lymph/models/unilateral.py @@ -3,25 +3,27 @@ import warnings from functools import cached_property from itertools import product -from typing import Any, Callable, Generator, Iterable +from typing import Any, Callable, Iterable import numpy as np import pandas as pd -from lymph import diagnose_times, graph, matrix, modalities +from lymph import diagnose_times, graph, matrix, modalities, types from lymph.helper import ( - DelegationSyncMixin, dict_to_func, early_late_mapping, flatten, smart_updating_dict_cached_property, ) -from lymph.types import DiagnoseType, PatternType warnings.filterwarnings("ignore", category=pd.errors.PerformanceWarning) -class Unilateral(DelegationSyncMixin): +class Unilateral( + diagnose_times.Composite, + modalities.Composite, + types.Model, +): """Class that models metastatic progression in a unilateral lymphatic system. It does this by representing it as a directed graph (DAG), which is stored in and @@ -95,17 +97,11 @@ def __init__( allowed_states=allowed_states, ) - if 0 >= max_time: - raise ValueError("Latest diagnosis time `max_time` must be positive int") - - self._max_time = max_time - - self._init_delegation_sync( - is_binary=[self.graph], - is_trinary=[self.graph], - get_state=[self.graph], - set_state=[self.graph], - state_list=[self.graph], + diagnose_times.Composite.__init__( + self, max_time=max_time, is_distribution_leaf=True, + ) + modalities.Composite.__init__( + self, is_trinary=self.is_trinary, is_modality_leaf=True, ) @@ -126,17 +122,6 @@ def __str__(self) -> str: return f"Unilateral with {len(self.graph.tumors)} tumors and {len(self.graph.lnls)} LNLs" - @property - def max_time(self) -> int: - """The latest time(-step) to include in the model's evolution. - - This attribute cannot be changed (easily). Thus, we recommend creating a new - instance of the model when you feel like needing to change the initially set - value. - """ - return self._max_time - - def print_info(self): """Print detailed information about the instance.""" num_tumors = len(self.graph.tumors) @@ -154,7 +139,7 @@ def get_params( self, as_dict: bool = True, as_flat: bool = True, - ) -> float | Iterable[float] | dict[str, float]: + ) -> Iterable[float] | dict[str, float]: """Get the parameters of the model. If ``as_dict`` is ``True``, the parameters are returned as a dictionary. If @@ -162,7 +147,7 @@ def get_params( dictionaries are merged into one, using :py:func:`~lymph.helper.flatten`. """ params = self.graph.get_params(as_flat=as_flat) - params.update(self.diag_time_dists.get_params(as_flat=as_flat)) + params.update(self.get_distribution_params(as_flat=as_flat)) if as_flat or not as_dict: params = flatten(params) @@ -215,7 +200,7 @@ def set_params(self, *args: float, **kwargs: float) -> tuple[float]: 'III_growth': 0.123} """ args = self.graph.set_params(*args, **kwargs) - return self.diag_time_dists.set_params(*args, **kwargs) + return self.set_distribution_params(*args, **kwargs) def comp_transition_prob( @@ -256,7 +241,7 @@ def comp_diagnose_prob( diagnoses, given the current state of the system. """ prob = 1. - for name, modality in self.modalities.items(): + for name, modality in self.get_all_modalities().items(): if name in diagnoses: mod_diagnose = diagnoses[name] for lnl in self.graph.lnls: @@ -304,7 +289,7 @@ def obs_list(self): pathology modality. """ possible_obs_list = [] - for modality in self.modalities.values(): + for modality in self.get_all_modalities().values(): possible_obs = np.arange(modality.confusion_matrix.shape[1]) for _ in self.graph.lnls: possible_obs_list.append(possible_obs.copy()) @@ -344,27 +329,6 @@ def transition_matrix(self) -> np.ndarray: return matrix.cached_generate_transition(self.graph.parameter_hash(), self) - @smart_updating_dict_cached_property - def modalities(self) -> modalities.ModalitiesUserDict: - """Dictionary of diagnostic modalities and their confusion matrices. - - This must be set by the user. For example, if one wanted to add the modality - "CT" with a sensitivity of 80% and a specificity of 90%, one would do: - - >>> model = Unilateral(graph_dict={ - ... ("tumor", "T"): ["II", "III"], - ... ("lnl", "II"): ["III"], - ... ("lnl", "III"): [], - ... }) - >>> model.modalities["CT"] = (0.8, 0.9) - - See Also: - :py:class:`~lymph.descriptors.modalities.ModalitiesUserDict` - :py:class:`~lymph.descriptors.modalities.Modality` - """ - return modalities.ModalitiesUserDict(is_trinary=self.is_trinary) - - def observation_matrix(self) -> np.ndarray: """The matrix encoding the probabilities to observe a certain diagnosis. @@ -379,7 +343,7 @@ def observation_matrix(self) -> np.ndarray: The function actually computing the observation matrix. """ return matrix.cached_generate_observation( - self.modalities.confusion_matrices_hash(), self + self.compute_modalities_hash(), self ) @@ -407,28 +371,6 @@ def diagnose_matrices(self) -> matrix.DiagnoseUserDict: return matrix.DiagnoseUserDict(model=self) - @smart_updating_dict_cached_property - def diag_time_dists(self) -> diagnose_times.DistributionsUserDict: - """Dictionary of distributions over diagnose times for each T-stage.""" - return diagnose_times.DistributionsUserDict(max_time=self.max_time) - - - @property - def t_stages(self) -> Generator[str, None, None]: - """Generator of all valid T-stages in the model. - - This is the intersection of the unique T-stages found in the (mapped) data - and the T-stages defined in the distributions over diagnose times. - """ - for t_stage in self.diag_time_dists.keys(): - # This implementation is a little special, because the diagnose matrix - # of a particular T-stage is only computed when either __contains__ or - # __getitem__ is called on it. Therefore, we cannot directly loop over - # the diagnose matrices' keys or something like that. - if t_stage in self.diagnose_matrices: - yield t_stage - - def load_patient_data( self, patient_data: pd.DataFrame, @@ -458,7 +400,7 @@ def load_patient_data( if isinstance(mapping, dict): mapping = dict_to_func(mapping) - for modality_name in self.modalities.keys(): + for modality_name in self.get_all_modalities().keys(): if modality_name not in patient_data: raise ValueError(f"Modality '{modality_name}' not found in data.") @@ -479,7 +421,7 @@ def load_patient_data( lambda row: mapping(row["tumor", "1", "t_stage"]), axis=1 ) - for t_stage in self.diag_time_dists.keys(): + for t_stage in self.t_stages: if t_stage not in patient_data["_model", "#", "t_stage"].values: warnings.warn(f"No data for T-stage {t_stage} found.") @@ -550,7 +492,7 @@ def comp_dist_evolution(self) -> np.ndarray: distribution over diagnose times that are stored and managed for each T-stage in the dictionary :py:attr:`~diag_time_dists`. """ - state_dists = np.zeros(shape=(self.max_time + 1, len(self.state_list))) + state_dists = np.zeros(shape=(self.max_time + 1, len(self.graph.state_list))) state_dists[0, 0] = 1. for t in range(1, self.max_time + 1): @@ -573,7 +515,7 @@ def comp_state_dist(self, t_stage: str = "early", mode: str = "HMM") -> np.ndarr """ if mode == "HMM": state_dists = self.comp_dist_evolution() - diag_time_dist = self.diag_time_dists[t_stage].distribution + diag_time_dist = self.get_distribution(t_stage).pmf return diag_time_dist @ state_dists @@ -631,7 +573,7 @@ def _hmm_likelihood(self, log: bool = True, t_stage: str | None = None) -> float for t_stage in t_stages: patient_likelihoods = ( - self.diag_time_dists[t_stage].distribution + self.get_distribution(t_stage).pmf @ evolved_model @ self.diagnose_matrices[t_stage] ) @@ -686,12 +628,12 @@ def likelihood( def comp_diagnose_encoding( self, - given_diagnoses: DiagnoseType | None = None, + given_diagnoses: types.DiagnoseType | None = None, ) -> np.ndarray: """Compute one-hot vector encoding of a given diagnosis.""" diagnose_encoding = np.array([True], dtype=bool) - for modality in self.modalities.keys(): + for modality in self.get_all_modalities().keys(): diagnose_encoding = np.kron( diagnose_encoding, matrix.compute_encoding( @@ -708,7 +650,7 @@ def comp_posterior_state_dist( self, given_param_args: Iterable[float] | None = None, given_param_kwargs: dict[str, float] | None = None, - given_diagnoses: DiagnoseType | None = None, + given_diagnoses: types.DiagnoseType | None = None, t_stage: str | int = "early", mode: str = "HMM", ) -> np.ndarray: @@ -765,10 +707,10 @@ def comp_posterior_state_dist( def risk( self, - involvement: PatternType | None = None, + involvement: types.PatternType | None = None, given_param_args: Iterable[float] | None = None, given_param_kwargs: dict[str, float] | None = None, - given_diagnoses: dict[str, PatternType] | None = None, + given_diagnoses: dict[str, types.PatternType] | None = None, t_stage: str = "early", mode: str = "HMM", **_kwargs, @@ -863,18 +805,18 @@ def draw_patients( stage_dist = np.array(stage_dist) / sum(stage_dist) drawn_t_stages = rng.choice( - a=list(self.diag_time_dists.keys()), + a=self.t_stages, p=stage_dist, size=num, ) drawn_diag_times = [ - self.diag_time_dists[t_stage].draw_diag_times(rng=rng) + self.get_distribution(t_stage).draw_diag_times(rng=rng) for t_stage in drawn_t_stages ] drawn_obs = self.draw_diagnoses(drawn_diag_times, rng=rng) - modality_names = list(self.modalities.keys()) + modality_names = list(self.get_all_modalities().keys()) lnl_names = list(self.graph.lnls.keys()) multi_cols = pd.MultiIndex.from_product([modality_names, ["ipsi"], lnl_names]) From 0b4e9620e7770ddacd5de97fe8a283844d465bdc Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Tue, 20 Feb 2024 15:53:23 +0100 Subject: [PATCH 28/75] change: update other modules to new API --- lymph/helper.py | 36 ------------------------------------ lymph/matrix.py | 6 +++--- lymph/models/__init__.py | 7 +++---- tests/distribution_test.py | 12 ++++++------ 4 files changed, 12 insertions(+), 49 deletions(-) diff --git a/lymph/helper.py b/lymph/helper.py index 0acb4fe..b152c99 100644 --- a/lymph/helper.py +++ b/lymph/helper.py @@ -565,15 +565,6 @@ def __contains__(self, key: object) -> bool: return False - def clear_without_trigger(self) -> None: - """Clear the dictionary without triggering the callbacks.""" - self.__dict__["data"].clear() - - def update_without_trigger(self, other=(), /, **kwargs): - """Update the dictionary without triggering the callbacks.""" - self.__dict__["data"].update(other, **kwargs) - - class smart_updating_dict_cached_property(cached_property): """Allows setting/deleting dict-like attrs by updating/clearing them.""" def __set__(self, instance: object, value: Any) -> None: @@ -753,30 +744,3 @@ def check_unique_and_get_first(objects: Iterable, attr: str = "") -> Any: if len(object_set) > 1: warnings.warn(f"{attr} not synced: {object_set}. Setting should sync.") return sorted(object_set).pop() - - -if __name__ == "__main__": - class Param: - def __init__(self, value): - self.value = value - class Model: - def __init__(self, **kwargs): - self.params_dict = kwargs - self.param = Param(sum(kwargs.values())) - def set_value(self, key, value): - self.params_dict[key] = value - class Mixture(DelegationSyncMixin): - def __init__(self): - super().__init__() - self.c1 = Model(a=1, b=2) - self.c2 = Model(a=3, b=4, c=5) - self._init_delegation_sync( - params_dict=[self.c1, self.c2], - param=[self.c1, self.c2], - set_value=[self.c1, self.c2], - ) - mixture = Mixture() - mixture.params_dict["b"] - mixture.params_dict["a"] = 99 - mixture.param.value = 42 - mixture.set_value("c", 100) diff --git a/lymph/matrix.py b/lymph/matrix.py index 4f4d3ea..5d3af72 100644 --- a/lymph/matrix.py +++ b/lymph/matrix.py @@ -90,7 +90,7 @@ def generate_observation(instance: models.Unilateral) -> np.ndarray: shape = (base ** num_lnls, 1) observation_matrix = np.ones(shape=shape) - for modality in instance.modalities.values(): + for modality in instance.get_all_modalities().values(): mod_obs_matrix = np.ones(shape=(1,1)) for _ in instance.graph.lnls: mod_obs_matrix = np.kron(mod_obs_matrix, modality.confusion_matrix) @@ -237,7 +237,7 @@ def generate_data_encoding( for i, (_, patient_row) in enumerate(selected_patients["_model"].iterrows()): patient_encoding = np.ones(shape=1, dtype=bool) - for modality_name in model.modalities.keys(): + for modality_name in model.get_all_modalities().keys(): if modality_name not in patient_row: continue diagnose_encoding = compute_encoding( @@ -320,7 +320,7 @@ def __setitem__(self, __key, __value) -> None: warnings.warn("Setting the diagnose matrices is not supported.") def __getitem__(self, key: Any) -> Any: - modalities_hash = self.model.modalities.confusion_matrices_hash() + modalities_hash = self.model.compute_modalities_hash() patient_data_hash = self.model.patient_data_hash joint_hash = hash((modalities_hash, patient_data_hash, key)) return cached_generate_diagnose(joint_hash, self.model, key) diff --git a/lymph/models/__init__.py b/lymph/models/__init__.py index 5333065..fde9311 100644 --- a/lymph/models/__init__.py +++ b/lymph/models/__init__.py @@ -1,9 +1,8 @@ """ This module implements the core classes to model lymphatic tumor progression. """ - -from .bilateral import Bilateral -from .unilateral import Unilateral -from .midline import Midline +from lymph.models.bilateral import Bilateral +from lymph.models.midline import Midline +from lymph.models.unilateral import Unilateral __all__ = ["Unilateral", "Bilateral", "Midline"] diff --git a/tests/distribution_test.py b/tests/distribution_test.py index 7c8a7dc..06afeeb 100644 --- a/tests/distribution_test.py +++ b/tests/distribution_test.py @@ -45,8 +45,8 @@ def test_frozen_distribution_without_max_time(self): warnings.simplefilter("ignore", category=UserWarning) self.assertEqual({}, dist.get_params(as_dict=True)) self.assertTrue(len(dist.support) == self.max_time + 1) - self.assertTrue(len(dist.distribution) == self.max_time + 1) - self.assertTrue(np.allclose(sum(dist.distribution), 1.)) + self.assertTrue(len(dist.pmf) == self.max_time + 1) + self.assertTrue(np.allclose(sum(dist.pmf), 1.)) def test_frozen_distribution_with_max_time(self): """Test the creation of a frozen distribution where we provide the max_time.""" @@ -56,8 +56,8 @@ def test_frozen_distribution_with_max_time(self): warnings.simplefilter("ignore", category=UserWarning) self.assertEqual({}, dist.get_params(as_dict=True)) self.assertTrue(len(dist.support) == self.max_time + 1) - self.assertTrue(len(dist.distribution) == self.max_time + 1) - self.assertTrue(np.allclose(sum(dist.distribution), 1.)) + self.assertTrue(len(dist.pmf) == self.max_time + 1) + self.assertTrue(np.allclose(sum(dist.pmf), 1.)) self.assertRaises(ValueError, Distribution, self.array_arg, max_time=5) @@ -72,8 +72,8 @@ def test_updateable_distribution_with_max_time(self): dist.set_params(p=0.5) self.assertTrue(len(dist.support) == self.max_time + 1) - self.assertTrue(len(dist.distribution) == self.max_time + 1) - self.assertTrue(np.allclose(sum(dist.distribution), 1.)) + self.assertTrue(len(dist.pmf) == self.max_time + 1) + self.assertTrue(np.allclose(sum(dist.pmf), 1.)) def test_updateable_distribution_raises_value_error(self): """Check that an invalid parameter raises a ValueError.""" From 4a6d7ca33662de747f03e3d4463e75a14a967f1d Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Tue, 20 Feb 2024 15:53:41 +0100 Subject: [PATCH 29/75] docs: update quickstart guide to new API --- docs/source/quickstart_unilateral.ipynb | 86 ++++++++++--------------- 1 file changed, 34 insertions(+), 52 deletions(-) diff --git a/docs/source/quickstart_unilateral.ipynb b/docs/source/quickstart_unilateral.ipynb index 6779d9b..aa9a14d 100644 --- a/docs/source/quickstart_unilateral.ipynb +++ b/docs/source/quickstart_unilateral.ipynb @@ -45,10 +45,10 @@ "source": [ "graph_dict = {\n", " ('tumor', 'T') : ['I', 'II', 'III', 'IV'], \n", - " ('lnl' , 'I') : ['II'], \n", - " ('lnl' , 'II') : ['III'], \n", - " ('lnl' , 'III'): ['IV'], \n", - " ('lnl' , 'IV') : []\n", + " ('lnl' , 'I') : ['II'], \n", + " ('lnl' , 'II') : ['III'], \n", + " ('lnl' , 'III'): ['IV'], \n", + " ('lnl' , 'IV') : []\n", "}" ] }, @@ -86,8 +86,8 @@ "metadata": {}, "outputs": [], "source": [ - "model.graph.edges[\"T_to_II\"].set_params(spread=0.5)\n", - "model.graph.edges[\"T_to_II\"].get_params(\"spread\")" + "model.graph.edges[\"TtoII\"].set_params(spread=0.5)\n", + "model.graph.edges[\"TtoII\"].get_params(\"spread\")" ] }, { @@ -96,7 +96,7 @@ "metadata": {}, "outputs": [], "source": [ - "model.assign_params(II_to_III_spread=0.25)\n", + "model.set_params(IItoIII_spread=0.25)\n", "model.get_params(as_dict=True)" ] }, @@ -106,7 +106,7 @@ "source": [ "## Diagnostic Modalities\n", "\n", - "To ultimately compute the likelihoods of observations, we need to fix the sensitivities and specificities of the obtained diagnoses. And since we might have multiple diagnostic modalities available, we need to tell the system which of them comes with which specificity and sensitivity. We do this by creating a dictionary of specificity/sensitivity pairs:" + "To ultimately compute the likelihoods of observations, we need to fix the sensitivities and specificities of the obtained diagnoses. And since we might have multiple diagnostic modalities available, we need to tell the system which of them comes with which specificity and sensitivity. We do this by adding specificity/sensitivity pairs to our model:" ] }, { @@ -115,52 +115,20 @@ "metadata": {}, "outputs": [], "source": [ - "mri_and_pet_spsn = {\"MRI\": [0.63, 0.81], \n", - " \"PET\": [0.86, 0.79]}\n", - "# ^ ^\n", - "# specificty sensitivity" + "model.set_modality(\"MRI\", specificity=0.63, sensitivity=0.81)\n", + "model.set_modality(\"PET\", specificity=0.86, sensitivity=0.79, kind=\"clinical\")\n", + "# ^^^^^^^^^^^^^^^\n", + "# No effect in binary model,\n", + "# but important for trinary.\n", + "model.get_all_modalities()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Now we can pass this to the system by setting the `modalities` attribute, which expects a dictionary containing the diagnostic modalities and as a key to it the numbers for specificity & sensitivity." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model.modalities = mri_and_pet_spsn\n", - "model.modalities" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "One can also specify the values of the modality dictionary with the helper classes `Clincal` and `Pathological`. In the binary model case that case no advantage over the method above, aside from being maybe a bit more readable. But when we get to trinary models, it becomes very important." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from lymph.modalities import Clinical\n", + "One can also specify if the modality is a `Clincal` or `Pathological` one. In the binary model case that has no advantage and makes no difference, aside from being maybe a bit more explicit. But when we get to trinary models, it becomes very important.\n", "\n", - "model.modalities[\"CT\"] = Clinical(specificity=0.86, sensitivity=0.81)\n", - "model.modalities" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ "Now it is also possible to access the confusion matrix of the specified diagnostic modalities:" ] }, @@ -170,7 +138,7 @@ "metadata": {}, "outputs": [], "source": [ - "print(model.modalities[\"PET\"].confusion_matrix)" + "print(model.get_modality(\"PET\").confusion_matrix)" ] }, { @@ -231,7 +199,11 @@ "metadata": {}, "outputs": [], "source": [ - "model.modalities = {\"PET\": [0.86, 0.79]}" + "from lymph.modalities import Clinical\n", + "\n", + "\n", + "model.replace_all_modalities({\"PET\": Clinical(specificity=0.86, sensitivity=0.79),})\n", + "model.get_all_modalities()" ] }, { @@ -304,7 +276,8 @@ "metadata": {}, "outputs": [], "source": [ - "model.diag_time_dists[\"early\"] = early_prior" + "model.set_distribution(\"early\", early_prior)\n", + "model.get_all_distributions()" ] }, { @@ -367,8 +340,17 @@ "metadata": {}, "outputs": [], "source": [ - "model.diag_time_dists[\"late\"] = late_binomial\n", - "model.get_params(as_dict=True)" + "model.set_distribution(\"late\", late_binomial)\n", + "model.get_all_distributions()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model.get_distribution_params(as_flat=False)" ] }, { From 571b42417f6bb0a9dc9f47d665bb3430a4328183 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Tue, 20 Feb 2024 16:31:10 +0100 Subject: [PATCH 30/75] change(bi): update for new composite API --- lymph/diagnose_times.py | 20 +++++++-- lymph/modalities.py | 9 +++- lymph/models/bilateral.py | 85 +++++++++++++++----------------------- lymph/models/unilateral.py | 5 +-- 4 files changed, 60 insertions(+), 59 deletions(-) diff --git a/lymph/diagnose_times.py b/lymph/diagnose_times.py index 1b1a02c..75d19dd 100644 --- a/lymph/diagnose_times.py +++ b/lymph/diagnose_times.py @@ -285,15 +285,27 @@ def _is_distribution_leaf(self: DC) -> bool: @property def max_time(self: DC) -> int: """Return the maximum time for the distributions.""" - return self._max_time + if self._is_distribution_leaf: + return self._max_time + + max_times = {child.max_time for child in self._distribution_children.values()} + if len(max_times) > 1: + warnings.warn("Not all max_times are equal. Returning the first one.") + + return self._distribution_children.values()[0].max_time @max_time.setter def max_time(self: DC, value: int) -> None: """Set the maximum time for the distributions.""" - if value < 0: - raise ValueError("max_time must be a positive integer") + if self._is_distribution_leaf: + if value < 0: + raise ValueError("max_time must be a positive integer") + + self._max_time = value - self._max_time = value + else: + for child in self._distribution_children.values(): + child.max_time = value @property diff --git a/lymph/modalities.py b/lymph/modalities.py index 8daf24f..0d2fa27 100644 --- a/lymph/modalities.py +++ b/lymph/modalities.py @@ -157,7 +157,14 @@ def _is_modality_leaf(self: MC) -> bool: @property def is_trinary(self: MC) -> bool: """Return whether the modality is trinary.""" - return self._is_trinary + if self._is_modality_leaf: + return self._is_trinary + + values = {child.is_trinary for child in self._modality_children.values()} + if len(values) > 1: + warnings.warn("Not all children have same 'narity'. Returning first one.") + + return self._modality_children.values()[0].is_trinary def get_modality(self: MC, name: str) -> Modality: diff --git a/lymph/models/bilateral.py b/lymph/models/bilateral.py index 47b5bdf..308ed46 100644 --- a/lymph/models/bilateral.py +++ b/lymph/models/bilateral.py @@ -7,21 +7,18 @@ import numpy as np import pandas as pd -from lymph import matrix, models -from lymph.helper import ( - DelegationSyncMixin, - early_late_mapping, - flatten, - set_bilateral_params_for, -) -from lymph.types import DiagnoseType, PatternType +from lymph import diagnose_times, matrix, modalities, models, types +from lymph.helper import early_late_mapping, flatten, set_bilateral_params_for warnings.filterwarnings("ignore", category=pd.errors.PerformanceWarning) logger = logging.getLogger(__name__) - -class Bilateral(DelegationSyncMixin): +class Bilateral( + diagnose_times.Composite, + modalities.Composite, + types.Model, +): """Class that models metastatic progression in a bilateral lymphatic system. This is achieved by creating two instances of the @@ -86,18 +83,17 @@ def __init__( "lnl_spread": True, } - if self.is_symmetric["modalities"]: - delegation_sync_kwargs = {"modalities": [self.ipsi, self.contra]} - else: - delegation_sync_kwargs = {} - - self._init_delegation_sync( - max_time=[self.ipsi, self.contra], - t_stages=[self.ipsi, self.contra], - diag_time_dists=[self.ipsi, self.contra], - is_binary=[self.ipsi, self.contra], - is_trinary=[self.ipsi, self.contra], - **delegation_sync_kwargs, + diagnose_times.Composite.__init__( + self, + max_time=self.max_time, # `max_time` already accessible from ipsi/contra + distribution_children={"ipsi": self.ipsi, "contra": self.contra}, + is_distribution_leaf=False, + ) + modalities.Composite.__init__( + self, + is_trinary=self.is_trinary, # `is_trinary` already accessible from ipsi/contra + modality_children={"ipsi": self.ipsi, "contra": self.contra}, + is_modality_leaf=False, ) @@ -146,27 +142,14 @@ def get_params( ) -> Iterable[float] | dict[str, float]: """Return the parameters of the model. - If ``as_flat`` is ``False``, the parameters of the two sides of the neck are - returned as a nested dictionary in addition to one dictionary storing the - parameters of the parametric distributions for marginalizing over diagnose - times. Otherwise, the parameters are returned as a flat dictionary, with the - keys prefixed by ``"ipsi_"`` or ``"contra_"``. The parameters of the parametric - distributions are only prefixed by their corresponding T-stage, e.g. - ``"early_p"``. - - If ``as_dict`` is ``True``, the parameters are returned as a dictionary. If - ``param`` is not ``None``, only the value of the parameter with that name is - returned. Otherwise, all parameters are returned as a dictionary or a list. - - See Also: - :py:meth:`lymph.diagnose_times.Distribution.get_params` - :py:meth:`lymph.diagnose_times.DistributionsUserDict.get_params` - :py:meth:`lymph.graph.Edge.get_params` - :py:meth:`lymph.models.Unilateral.get_params` + It returns the combination of the call to the + :py:meth:`lymph.models.Unilateral.get_params` of the ipsi- and contralateral + side. For the use of the ``as_dict`` and ``as_flat`` arguments, see the + documentation of the :py:meth:`lymph.types.Model.get_params` method. """ ipsi_params = self.ipsi.graph.get_params(as_flat=as_flat) contra_params = self.contra.graph.get_params(as_flat=as_flat) - dist_params = self.diag_time_dists.get_params(as_flat=as_flat) + dist_params = self.get_distribution_params(as_flat=as_flat) params = { "ipsi": ipsi_params, @@ -225,7 +208,7 @@ def set_params(self, *args: float, **kwargs: float) -> tuple[float]: is_symmetric=self.is_symmetric["lnl_spread"], **kwargs, ) - return self.diag_time_dists.set_params(*args, **kwargs) + return self.set_distribution_params(*args, **kwargs) def load_patient_data( @@ -261,7 +244,7 @@ def comp_joint_state_dist( if mode == "HMM": ipsi_state_evo = self.ipsi.comp_dist_evolution() contra_state_evo = self.contra.comp_dist_evolution() - time_marg_matrix = np.diag(self.diag_time_dists[t_stage].distribution) + time_marg_matrix = np.diag(self.get_distribution(t_stage).pmf) result = ( ipsi_state_evo.T @@ -334,7 +317,7 @@ def _hmm_likelihood(self, log: bool = True, t_stage: str | None = None) -> float t_stages = [t_stage] for stage in t_stages: - diag_time_matrix = np.diag(self.diag_time_dists[stage].distribution) + diag_time_matrix = np.diag(self.get_distribution(stage).pmf) # Note that I am not using the `comp_joint_state_dist` method here, since # that would recompute the state dist evolution for each T-stage. @@ -415,13 +398,13 @@ def comp_posterior_joint_state_dist( self, given_param_args: Iterable[float] | None = None, given_param_kwargs: dict[str, float] | None = None, - given_diagnoses: dict[str, DiagnoseType] | None = None, + given_diagnoses: dict[str, types.DiagnoseType] | None = None, t_stage: str | int = "early", mode: str = "HMM", ) -> np.ndarray: """Compute joint post. dist. over ipsi & contra states, ``given_diagnoses``. - The ``given_diagnoses`` is a dictionary storing a :py:class:`DiagnoseType` for + The ``given_diagnoses`` is a dictionary storing a :py:class:`types.DiagnoseType` for the ``"ipsi"`` and ``"contra"`` side of the neck. Essentially, this is the risk for any possible combination of ipsi- and @@ -470,10 +453,10 @@ def comp_posterior_joint_state_dist( def risk( self, - involvement: PatternType | None = None, + involvement: types.PatternType | None = None, given_param_args: Iterable[float] | None = None, given_param_kwargs: dict[str, float] | None = None, - given_diagnoses: dict[str, DiagnoseType] | None = None, + given_diagnoses: dict[str, types.DiagnoseType] | None = None, t_stage: str = "early", mode: str = "HMM", ) -> float: @@ -482,7 +465,7 @@ def risk( The parameters can be set via the ``given_param_args`` and ``given_param_kwargs``, both of which are passed to the :py:meth:`~set_params` method. The ``given_diagnoses`` must be a dictionary - mapping the side of the neck to a :py:class:`DiagnoseType`. + mapping the side of the neck to a :py:class:`types.DiagnoseType`. Note: The computation is much faster if no parameters are given, since then the @@ -549,12 +532,12 @@ def draw_patients( stage_dist = np.array(stage_dist) / sum(stage_dist) drawn_t_stages = rng.choice( - a=list(self.diag_time_dists.keys()), + a=self.t_stages, p=stage_dist, size=num, ) drawn_diag_times = [ - self.diag_time_dists[t_stage].draw_diag_times(rng=rng) + self.get_distribution(t_stage).draw_diag_times(rng=rng) for t_stage in drawn_t_stages ] @@ -565,7 +548,7 @@ def draw_patients( # construct MultiIndex with "ipsi" and "contra" at top level to allow # concatenation of the two separate drawn diagnoses sides = ["ipsi", "contra"] - modality_names = list(self.modalities.keys()) + modality_names = list(self.get_all_modalities().keys()) lnl_names = [lnl for lnl in self.ipsi.graph.lnls.keys()] multi_cols = pd.MultiIndex.from_product([sides, modality_names, lnl_names]) diff --git a/lymph/models/unilateral.py b/lymph/models/unilateral.py index 3e39b9a..dd090e4 100644 --- a/lymph/models/unilateral.py +++ b/lymph/models/unilateral.py @@ -36,7 +36,6 @@ class Unilateral( is_trinary: bool get_state: Callable set_state: Callable - state_list: list[int] lnls: dict[str, graph.LymphNodeLevel] def __init__( @@ -520,9 +519,9 @@ def comp_state_dist(self, t_stage: str = "early", mode: str = "HMM") -> np.ndarr return diag_time_dist @ state_dists if mode == "BN": - state_dist = np.ones(shape=(len(self.state_list),), dtype=float) + state_dist = np.ones(shape=(len(self.graph.state_list),), dtype=float) - for i, state in enumerate(self.state_list): + for i, state in enumerate(self.graph.state_list): self.set_state(*state) for node in self.graph.lnls.values(): state_dist[i] *= node.comp_bayes_net_prob() From 1fe7994a7bcab3ecf415e2e6c4857701b3fb5783 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Tue, 20 Feb 2024 17:12:20 +0100 Subject: [PATCH 31/75] fix: deal with first obvious bugs --- docs/source/quickstart_bilateral.ipynb | 46 +++++++------------------- lymph/diagnose_times.py | 18 ++++++---- lymph/modalities.py | 27 +++++++-------- lymph/models/bilateral.py | 23 ++++++++++--- lymph/models/unilateral.py | 21 +++++++----- 5 files changed, 69 insertions(+), 66 deletions(-) diff --git a/docs/source/quickstart_bilateral.ipynb b/docs/source/quickstart_bilateral.ipynb index e4f2f9b..58b0853 100644 --- a/docs/source/quickstart_bilateral.ipynb +++ b/docs/source/quickstart_bilateral.ipynb @@ -40,8 +40,8 @@ "metadata": {}, "outputs": [], "source": [ - "model.ipsi.graph.edges[\"I_to_II\"].set_params(spread=0.123)\n", - "model.contra.graph.edges[\"I_to_II\"].get_params(\"spread\")" + "model.set_params(spread=0.123)\n", + "model.contra.graph.edges[\"ItoII\"].get_params(\"spread\")" ] }, { @@ -50,7 +50,7 @@ "metadata": {}, "outputs": [], "source": [ - "model.assign_params(ipsi_T_to_III_spread=0.234)\n", + "model.set_params(ipsi_TtoIII_spread=0.234)\n", "model.get_params(as_dict=True)" ] }, @@ -60,10 +60,9 @@ "metadata": {}, "outputs": [], "source": [ - "mri_and_pet_spsn = {\"MRI\": [0.63, 0.81], \n", - " \"PET\": [0.86, 0.79]}\n", - "# ^ ^\n", - "# specificty sensitivity" + "model.set_modality(\"MRI\", specificity=0.63, sensitivity=0.81)\n", + "model.set_modality(\"PET\", specificity=0.86, sensitivity=0.79)\n", + "model.get_all_modalities()" ] }, { @@ -72,29 +71,7 @@ "metadata": {}, "outputs": [], "source": [ - "model.modalities = mri_and_pet_spsn\n", - "model.modalities" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from lymph.modalities import Clinical\n", - "\n", - "model.modalities[\"CT\"] = Clinical(specificity=0.86, sensitivity=0.81)\n", - "model.modalities" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(model.modalities[\"PET\"].confusion_matrix)" + "print(model.get_modality(\"PET\").confusion_matrix)" ] }, { @@ -130,7 +107,8 @@ "metadata": {}, "outputs": [], "source": [ - "model.modalities = {\"PET\": [0.86, 0.79]}" + "model.replace_all_modalities({})\n", + "model.set_modality(\"PET\", specificity=0.86, sensitivity=0.79)" ] }, { @@ -167,7 +145,7 @@ "p = 0.4\n", "\n", "early_prior = sp.stats.binom.pmf(time_steps, max_time, p)\n", - "model.diag_time_dists[\"early\"] = early_prior" + "model.set_distribution(\"early\", early_prior)" ] }, { @@ -187,8 +165,8 @@ "metadata": {}, "outputs": [], "source": [ - "model.diag_time_dists[\"late\"] = late_binomial\n", - "params_dict = model.get_params(as_dict=True)\n", + "model.set_distribution(\"late\", late_binomial)\n", + "params_dict = model.get_params(as_dict=True, as_flat=True)\n", "params_dict" ] }, diff --git a/lymph/diagnose_times.py b/lymph/diagnose_times.py index 75d19dd..e6c6eba 100644 --- a/lymph/diagnose_times.py +++ b/lymph/diagnose_times.py @@ -257,16 +257,16 @@ def __init__( is_distribution_leaf: bool = False, ) -> None: """Initialize the distribution composite.""" - self.max_time = max_time - if distribution_children is None: distribution_children = {} if is_distribution_leaf: self._distributions = {} - distribution_children = {} # ignore any provided children + self._distribution_children = {} # ignore any provided children + self.max_time = max_time # only set max_time in leaf self._distribution_children = distribution_children + super().__init__() @@ -292,7 +292,7 @@ def max_time(self: DC) -> int: if len(max_times) > 1: warnings.warn("Not all max_times are equal. Returning the first one.") - return self._distribution_children.values()[0].max_time + return list(self._distribution_children.values())[0].max_time @max_time.setter def max_time(self: DC, value: int) -> None: @@ -376,8 +376,14 @@ def get_distribution_params( for t_stage, distribution in self._distributions.items(): params[t_stage] = distribution.get_params(as_flat=as_flat) else: - for key, child in self._distribution_children.items(): - params[key] = child.get_distribution_params(as_flat=as_flat) + child_keys = list(self._distribution_children.keys()) + first_child = self._distribution_children[child_keys[0]] + params = first_child.get_distribution_params(as_flat=as_flat) + are_all_equal = True + for key in child_keys[1:]: + other_child = self._distribution_children[key] + other_params = other_child.get_distribution_params(as_flat=as_flat) + are_all_equal &= params == other_params if as_flat or not as_dict: params = flatten(params) diff --git a/lymph/modalities.py b/lymph/modalities.py index 0d2fa27..3f4bd2c 100644 --- a/lymph/modalities.py +++ b/lymph/modalities.py @@ -9,7 +9,7 @@ from __future__ import annotations import warnings -from abc import ABC +from abc import ABC, abstractmethod from typing import Literal, TypeVar import numpy as np @@ -35,6 +35,17 @@ def __hash__(self) -> int: return hash((self.specificity, self.sensitivity, self.is_trinary)) + def __eq__(self, other: object) -> bool: + if not isinstance(other, Modality): + return False + + return ( + self.specificity == other.specificity + and self.sensitivity == other.sensitivity + and self.is_trinary == other.is_trinary + ) + + def __repr__(self) -> str: return ( f"{type(self).__name__}(" @@ -124,19 +135,16 @@ class Composite(ABC): def __init__( self: MC, - is_trinary: bool = False, modality_children: dict[str, Composite] = None, is_modality_leaf: bool = False, ) -> None: """Initialize the modality composite.""" - self._is_trinary = is_trinary - if modality_children is None: modality_children = {} if is_modality_leaf: self._modalities = {} - self._modality_children = {} # ignore any provided children + modality_children = {} # ignore any provided children self._modality_children = modality_children super().__init__() @@ -155,16 +163,9 @@ def _is_modality_leaf(self: MC) -> bool: @property + @abstractmethod def is_trinary(self: MC) -> bool: """Return whether the modality is trinary.""" - if self._is_modality_leaf: - return self._is_trinary - - values = {child.is_trinary for child in self._modality_children.values()} - if len(values) > 1: - warnings.warn("Not all children have same 'narity'. Returning first one.") - - return self._modality_children.values()[0].is_trinary def get_modality(self: MC, name: str) -> Modality: diff --git a/lymph/models/bilateral.py b/lymph/models/bilateral.py index 308ed46..7d32181 100644 --- a/lymph/models/bilateral.py +++ b/lymph/models/bilateral.py @@ -66,8 +66,6 @@ def __init__( override the unilateral kwargs and may also override the ``graph_dict``. This allows the user to specify different graphs for the two sides of the neck. """ - super().__init__() - self._init_models( graph_dict=graph_dict, unilateral_kwargs=unilateral_kwargs, @@ -85,13 +83,11 @@ def __init__( diagnose_times.Composite.__init__( self, - max_time=self.max_time, # `max_time` already accessible from ipsi/contra distribution_children={"ipsi": self.ipsi, "contra": self.contra}, is_distribution_leaf=False, ) modalities.Composite.__init__( self, - is_trinary=self.is_trinary, # `is_trinary` already accessible from ipsi/contra modality_children={"ipsi": self.ipsi, "contra": self.contra}, is_modality_leaf=False, ) @@ -135,6 +131,23 @@ def trinary(cls, *args, **kwargs) -> Bilateral: return cls(*args, unilateral_kwargs=unilateral_kwargs, **kwargs) + @property + def is_trinary(self) -> bool: + """Return whether the model is trinary.""" + if self.ipsi.is_trinary != self.contra.is_trinary: + raise ValueError("Both sides must be of the same 'naryity'.") + + return self.ipsi.is_trinary + + @property + def is_binary(self) -> bool: + """Return whether the model is binary.""" + if self.ipsi.is_binary != self.contra.is_binary: + raise ValueError("Both sides must be of the same 'naryity'.") + + return self.ipsi.is_binary + + def get_params( self, as_dict: bool = True, @@ -154,8 +167,8 @@ def get_params( params = { "ipsi": ipsi_params, "contra": contra_params, - **dist_params, } + params.update(dist_params) if as_flat or not as_dict: params = flatten(params) diff --git a/lymph/models/unilateral.py b/lymph/models/unilateral.py index dd090e4..403bc4b 100644 --- a/lymph/models/unilateral.py +++ b/lymph/models/unilateral.py @@ -88,20 +88,14 @@ def __init__( LNLs. If they are set to ``True``, the parameters are set globally for all LNLs. If they are set to ``False``, the parameters are set individually for each LNL. """ - super().__init__() - self.graph = graph.Representation( graph_dict=graph_dict, tumor_state=tumor_state, allowed_states=allowed_states, ) - diagnose_times.Composite.__init__( - self, max_time=max_time, is_distribution_leaf=True, - ) - modalities.Composite.__init__( - self, is_trinary=self.is_trinary, is_modality_leaf=True, - ) + diagnose_times.Composite.__init__(self, max_time=max_time, is_distribution_leaf=True) + modalities.Composite.__init__(self, is_modality_leaf=True) @classmethod @@ -134,6 +128,17 @@ def print_info(self): print(string) + @property + def is_trinary(self) -> bool: + """Return whether the model is trinary.""" + return self.graph.is_trinary + + @property + def is_binary(self) -> bool: + """Return whether the model is binary.""" + return self.graph.is_binary + + def get_params( self, as_dict: bool = True, From ace24fdb90d49b2ec9294ee9ed6f8c790f22c609 Mon Sep 17 00:00:00 2001 From: rmnldwg <48687784+rmnldwg@users.noreply.github.com> Date: Wed, 21 Feb 2024 10:15:25 +0100 Subject: [PATCH 32/75] change(mod)!: shorten to sens/spec Also, add a `clear_modalities()` and a `clear_distributions()` method to the respective composites. --- lymph/diagnose_times.py | 48 +++++++++++++++++--- lymph/modalities.py | 97 ++++++++++++++++++++++++++++++----------- 2 files changed, 114 insertions(+), 31 deletions(-) diff --git a/lymph/diagnose_times.py b/lymph/diagnose_times.py index e6c6eba..c840f70 100644 --- a/lymph/diagnose_times.py +++ b/lymph/diagnose_times.py @@ -118,18 +118,23 @@ def extract_kwargs(distribution: callable) -> dict[str, Any]: def __repr__(self) -> str: - return f"Distribution({self.pmf})" + return f"Distribution({repr(self.pmf.tolist())})" - def __eq__(self, __value) -> bool: - if not isinstance(__value, Distribution): + def __eq__(self, other) -> bool: + if not isinstance(other, Distribution): return False - return np.all(self.pmf == __value.pmf) + return ( + self.is_updateable == other.is_updateable + and self._kwargs == other._kwargs + and np.all(self.pmf == other.pmf) + ) def __hash__(self) -> int: - return hash(self.pmf.tobytes()) + kwarg_tpl = tuple(self._kwargs.items()) + return hash((self.is_updateable, kwarg_tpl, self.pmf.tobytes())) @staticmethod @@ -320,7 +325,14 @@ def get_distribution(self: DC, t_stage: str) -> Distribution: def get_all_distributions(self: DC) -> dict[str, Distribution]: - """Return all distributions.""" + """Return all distributions. + + This will issue a warning if it finds that not all distributions of the + composite are equal. Note that it will always return the distributions of the + first child. This means one should NOT try to set the distributions via the + returned dictionary of this method. Instead, use the :py:meth:`set_modality` + method. + """ if self._is_distribution_leaf: return self._distributions @@ -364,6 +376,30 @@ def replace_all_distributions(self: DC, distributions: dict[str, Distribution]) child.replace_all_distributions(distributions) + def clear_distributions(self: DC) -> None: + """Remove all distributions.""" + if self._is_distribution_leaf: + self._distributions.clear() + + else: + for child in self._distribution_children.values(): + child.clear_distributions() + + + def distributions_hash(self: DC) -> int: + """Return a hash of all distributions.""" + hash_res = 0 + if self._is_distribution_leaf: + for t_stage, distribution in self._distributions.items(): + hash_res = hash((hash_res, t_stage, hash(distribution))) + + else: + for child in self._distribution_children.values(): + hash_res = hash((hash_res, child.distributions_hash())) + + return hash_res + + def get_distribution_params( self: DC, as_dict: bool = True, diff --git a/lymph/modalities.py b/lymph/modalities.py index 3f4bd2c..9e573b5 100644 --- a/lymph/modalities.py +++ b/lymph/modalities.py @@ -19,47 +19,77 @@ class Modality: """Stores the confusion matrix of a diagnostic modality.""" def __init__( self, - specificity: float, - sensitivity: float, + spec: float, + sens: float, is_trinary: bool = False, ) -> None: - if not (0. <= sensitivity <= 1. and 0. <= specificity <= 1.): + if not (0. <= sens <= 1. and 0. <= spec <= 1.): raise ValueError("Senstivity and specificity must be between 0 and 1.") - self.specificity = specificity - self.sensitivity = sensitivity + self.spec = spec + self.sens = sens self.is_trinary = is_trinary def __hash__(self) -> int: - return hash((self.specificity, self.sensitivity, self.is_trinary)) + return hash(self.confusion_matrix.tobytes()) def __eq__(self, other: object) -> bool: if not isinstance(other, Modality): return False - return ( - self.specificity == other.specificity - and self.sensitivity == other.sensitivity - and self.is_trinary == other.is_trinary - ) + return np.all(self.confusion_matrix == other.confusion_matrix) def __repr__(self) -> str: return ( f"{type(self).__name__}(" - f"specificity={self.specificity!r}, " - f"sensitivity={self.sensitivity!r}, " + f"spec={self.spec!r}, " + f"sens={self.sens!r}, " f"is_trinary={self.is_trinary!r})" ) + @property + def spec(self) -> float: + """Return the specificity of the modality.""" + return self._spec + + @spec.setter + def spec(self, value: float) -> None: + """Set the specificity of the modality.""" + if not 0. <= value <= 1.: + raise ValueError("Specificity must be between 0 and 1.") + + if hasattr(self, "_confusion_matrix"): + del self._confusion_matrix + + self._spec = value + + + @property + def sens(self) -> float: + """Return the sensitivity of the modality.""" + return self._sens + + @sens.setter + def sens(self, value: float) -> None: + """Set the sensitivity of the modality.""" + if not 0. <= value <= 1.: + raise ValueError("Sensitivity must be between 0 and 1.") + + if hasattr(self, "_confusion_matrix"): + del self._confusion_matrix + + self._sens = value + + def compute_confusion_matrix(self) -> np.ndarray: """Compute the confusion matrix of the modality.""" return np.array([ - [self.specificity, 1. - self.specificity], - [1. - self.sensitivity, self.sensitivity], + [self.spec, 1. - self.spec], + [1. - self.sens, self.sens], ]) @property @@ -174,7 +204,13 @@ def get_modality(self: MC, name: str) -> Modality: def get_all_modalities(self: MC) -> dict[str, Modality]: - """Return all modalities of the composite.""" + """Return all modalities of the composite. + + This will issue a warning if it finds that not all modalities of the composite + are equal. Note that it will always return the modalities of the first child. + This means one should NOT try to set the modalities via the returned dictionary + of this method. Instead, use the :py:meth:`set_modality` method. + """ if self._is_modality_leaf: return self._modalities @@ -195,41 +231,52 @@ def get_all_modalities(self: MC) -> dict[str, Modality]: def set_modality( self, name: str, - specificity: float, - sensitivity: float, + spec: float, + sens: float, kind: Literal["clinical", "pathological"] = "clinical", ) -> None: """Set the modality with the given name.""" if self._is_modality_leaf: cls = Pathological if kind == "pathological" else Clinical - self._modalities[name] = cls(specificity, sensitivity, self.is_trinary) + self._modalities[name] = cls(spec, sens, self.is_trinary) else: for child in self._modality_children.values(): - child.set_modality(name, specificity, sensitivity, kind) + child.set_modality(name, spec, sens, kind) def replace_all_modalities(self: MC, modalities: dict[str, Modality]) -> None: """Replace all modalities of the composite.""" if self._is_modality_leaf: - self._modalities = {} + self.clear_modalities() for name, modality in modalities.items(): kind = "pathological" if isinstance(modality, Pathological) else "clinical" - self.set_modality(name, modality.specificity, modality.sensitivity, kind) + self.set_modality(name, modality.spec, modality.sens, kind) else: for child in self._modality_children.values(): child.replace_all_modalities(modalities) - def compute_modalities_hash(self: MC) -> int: + def modalities_hash(self: MC) -> int: """Compute a hash from all modalities.""" hash_res = 0 if self._is_modality_leaf: for name, modality in self._modalities.items(): hash_res = hash((hash_res, name, hash(modality))) - for child in self._modality_children.values(): - hash_res = hash((hash_res, hash(child))) + else: + for child in self._modality_children.values(): + hash_res = hash((hash_res, child.modalities_hash())) return hash_res + + + def clear_modalities(self: MC) -> None: + """Clear all modalities of the composite.""" + if self._is_modality_leaf: + self._modalities.clear() + + else: + for child in self._modality_children.values(): + child.clear_modalities() From 6376ecf29eff338680dd5764d321d5ea879c2356 Mon Sep 17 00:00:00 2001 From: rmnldwg <48687784+rmnldwg@users.noreply.github.com> Date: Wed, 21 Feb 2024 10:16:58 +0100 Subject: [PATCH 33/75] feat(graph)!: add `__hash__` to edge, node, graph This removed the dedicated `parameter_hash()` method. --- lymph/graph.py | 49 +++++++++++++------------------------------------ 1 file changed, 13 insertions(+), 36 deletions(-) diff --git a/lymph/graph.py b/lymph/graph.py index d488f35..a8c09fc 100644 --- a/lymph/graph.py +++ b/lymph/graph.py @@ -76,6 +76,10 @@ def __repr__(self) -> str: f"allowed_states={self.allowed_states!r})" ) + def __hash__(self) -> int: + """Return a hash of the node's name and state.""" + return hash((self.name, self.state, tuple(self.allowed_states))) + @property def name(self) -> str: @@ -260,6 +264,10 @@ def __repr__(self) -> str: f"micro_mod={self.micro_mod!r})" ) + def __hash__(self) -> int: + """Return a hash of the edge's transition tensor.""" + return hash((self.get_name(), self.transition_tensor.tobytes())) + @property def parent(self) -> Tumor | LymphNodeLevel: @@ -625,44 +633,13 @@ def growth_edges(self) -> dict[str, Edge]: return {n: e for n, e in self.edges.items() if e.is_growth} - def parameter_hash(self) -> int: - """Compute a hash of the graph. - - Note: - This is used to check if the graph has changed and the transition matrix - needs to be recomputed. It should not be used as a replacement for the - ``__hash__`` method, for two reasons: - - 1. It may change over the lifetime of the object, whereas ``__hash__`` - should be constant. - 2. It only takes into account the ``transition_tensor`` of the edges, - nothing else. - - Example: - - >>> graph_dict = { - ... ('tumor', 'T'): ['II', 'III'], - ... ('lnl', 'II'): ['III'], - ... ('lnl', 'III'): [], - ... } - >>> one_graph = Representation(graph_dict) - >>> another_graph = Representation(graph_dict) - >>> rng = np.random.default_rng(42) - >>> for one_edge, another_edge in zip( - ... one_graph.edges.values(), another_graph.edges.values() - ... ): - ... params_dict = one_edge.get_params(as_dict=True) - ... params_to_set = {k: rng.uniform() for k in params_dict} - ... _ = one_edge.set_params(**params_to_set) - ... _ = another_edge.set_params(**params_to_set) - >>> one_graph.parameter_hash() == another_graph.parameter_hash() - True - """ - tensor_bytes = b"" + def __hash__(self) -> int: + """Return a hash of the graph.""" + hash_res = 0 for edge in self.edges.values(): - tensor_bytes += edge.transition_tensor.tobytes() + hash_res = hash((hash_res, hash(edge))) - return hash(tensor_bytes) + return hash_res def to_dict(self) -> dict[tuple[str, str], set[str]]: From 9630ba49e8b5668328bcc4151c675ed7f220f73b Mon Sep 17 00:00:00 2001 From: rmnldwg <48687784+rmnldwg@users.noreply.github.com> Date: Wed, 21 Feb 2024 10:19:22 +0100 Subject: [PATCH 34/75] change(matrix): use hashables over arg0 cache Instead of using this weird `arg0_cache` for the observation and transition matrix, I use the necessary arguments only, which are all hashable now. --- lymph/matrix.py | 55 +++++++++++++++----------------------- lymph/models/unilateral.py | 11 +++++--- 2 files changed, 30 insertions(+), 36 deletions(-) diff --git a/lymph/matrix.py b/lymph/matrix.py index 5d3af72..c298aa3 100644 --- a/lymph/matrix.py +++ b/lymph/matrix.py @@ -5,13 +5,14 @@ from __future__ import annotations import warnings -from typing import Any +from functools import lru_cache +from typing import Any, Iterable import numpy as np import pandas as pd from cachetools import LRUCache -from lymph import models +from lymph import graph, models from lymph.helper import ( AbstractLookupDict, arg0_cache, @@ -19,16 +20,20 @@ row_wise_kron, tile_and_repeat, ) +from lymph.modalities import Modality -def generate_transition(instance: models.Unilateral) -> np.ndarray: +@lru_cache(maxsize=128) +def generate_transition( + lnls: Iterable[graph.LymphNodeLevel], + num_states: int, +) -> np.ndarray: """Compute the transition matrix of the lymph model.""" - lnls_list = list(instance.graph.lnls.values()) - num_lnls = len(lnls_list) - num_states = 3 if instance.graph.is_trinary else 2 + lnls = list(lnls) # necessary for `index()` call + num_lnls = len(lnls) transition_matrix = np.ones(shape=(num_states**num_lnls, num_states**num_lnls)) - for i, lnl in enumerate(lnls_list): + for i, lnl in enumerate(lnls): current_state_idx = get_state_idx_matrix( lnl_idx=i, num_lnls=num_lnls, @@ -47,7 +52,7 @@ def generate_transition(instance: models.Unilateral) -> np.ndarray: 0, current_state_idx, new_state_idx ] else: - parent_node_i = lnls_list.index(edge.parent) + parent_node_i = lnls.index(edge.parent) parent_state_idx = get_state_idx_matrix( lnl_idx=parent_node_i, num_lnls=num_lnls, @@ -74,25 +79,19 @@ def generate_transition(instance: models.Unilateral) -> np.ndarray: return transition_matrix -cached_generate_transition = arg0_cache(maxsize=128, cache_class=LRUCache)(generate_transition) -"""Cached version of :py:func:`generate_transition`. - -This expects the first argument to be a hashable object that is used instrad of the -``instance`` argument of :py:func:`generate_transition`. It is intended to be used with -the :py:meth:`~lymph.graph.Representation.parameter_hash` method of the graph. -""" - - -def generate_observation(instance: models.Unilateral) -> np.ndarray: +@lru_cache(maxsize=128) +def generate_observation( + modalities: Iterable[Modality], + num_lnls: int, + base: int = 2, +) -> np.ndarray: """Generate the observation matrix of the lymph model.""" - num_lnls = len(instance.graph.lnls) - base = 2 if instance.graph.is_binary else 3 shape = (base ** num_lnls, 1) observation_matrix = np.ones(shape=shape) - for modality in instance.get_all_modalities().values(): + for modality in modalities: mod_obs_matrix = np.ones(shape=(1,1)) - for _ in instance.graph.lnls: + for _ in range(num_lnls): mod_obs_matrix = np.kron(mod_obs_matrix, modality.confusion_matrix) observation_matrix = row_wise_kron(observation_matrix, mod_obs_matrix) @@ -100,16 +99,6 @@ def generate_observation(instance: models.Unilateral) -> np.ndarray: return observation_matrix -cached_generate_observation = arg0_cache(maxsize=128, cache_class=LRUCache)(generate_observation) -"""Cached version of :py:func:`generate_observation`. - -This expects the first argument to be a hashable object that is used instrad of the -``instance`` argument of :py:func:`generate_observation`. It is intended to be used -with the hash of all confusion matrices of the model's modalities, which is returned -by the method :py:meth:`~lymph.modalities.ModalitiesUserDict.confusion_matrices_hash`. -""" - - def compute_encoding( lnls: list[str], pattern: pd.Series | dict[str, bool | int | str], @@ -320,7 +309,7 @@ def __setitem__(self, __key, __value) -> None: warnings.warn("Setting the diagnose matrices is not supported.") def __getitem__(self, key: Any) -> Any: - modalities_hash = self.model.compute_modalities_hash() + modalities_hash = self.model.modalities_hash() patient_data_hash = self.model.patient_data_hash joint_hash = hash((modalities_hash, patient_data_hash, key)) return cached_generate_diagnose(joint_hash, self.model, key) diff --git a/lymph/models/unilateral.py b/lymph/models/unilateral.py index 403bc4b..795cb96 100644 --- a/lymph/models/unilateral.py +++ b/lymph/models/unilateral.py @@ -330,7 +330,10 @@ def transition_matrix(self) -> np.ndarray: [0. , 0. , 0.56, 0.44], [0. , 0. , 0. , 1. ]]) """ - return matrix.cached_generate_transition(self.graph.parameter_hash(), self) + return matrix.generate_transition( + lnls=self.graph.lnls.values(), + num_states=3 if self.is_trinary else 2, + ) def observation_matrix(self) -> np.ndarray: @@ -346,8 +349,10 @@ def observation_matrix(self) -> np.ndarray: :py:func:`~lymph.descriptors.matrix.generate_observation` The function actually computing the observation matrix. """ - return matrix.cached_generate_observation( - self.compute_modalities_hash(), self + return matrix.generate_observation( + modalities=self.get_all_modalities().values(), + num_lnls=len(self.graph.lnls), + base=3 if self.is_trinary else 2, ) From 781c3ff3f1a94b956e7ce412e27fef4bb755a749 Mon Sep 17 00:00:00 2001 From: rmnldwg <48687784+rmnldwg@users.noreply.github.com> Date: Wed, 21 Feb 2024 10:19:52 +0100 Subject: [PATCH 35/75] docs: update quickstart guides to API changes --- docs/source/quickstart_bilateral.ipynb | 6 +++--- docs/source/quickstart_unilateral.ipynb | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/source/quickstart_bilateral.ipynb b/docs/source/quickstart_bilateral.ipynb index 58b0853..930b084 100644 --- a/docs/source/quickstart_bilateral.ipynb +++ b/docs/source/quickstart_bilateral.ipynb @@ -60,8 +60,8 @@ "metadata": {}, "outputs": [], "source": [ - "model.set_modality(\"MRI\", specificity=0.63, sensitivity=0.81)\n", - "model.set_modality(\"PET\", specificity=0.86, sensitivity=0.79)\n", + "model.set_modality(\"MRI\", spec=0.63, sens=0.81)\n", + "model.set_modality(\"PET\", spec=0.86, sens=0.79)\n", "model.get_all_modalities()" ] }, @@ -108,7 +108,7 @@ "outputs": [], "source": [ "model.replace_all_modalities({})\n", - "model.set_modality(\"PET\", specificity=0.86, sensitivity=0.79)" + "model.set_modality(\"PET\", spec=0.86, sens=0.79)" ] }, { diff --git a/docs/source/quickstart_unilateral.ipynb b/docs/source/quickstart_unilateral.ipynb index aa9a14d..ad731aa 100644 --- a/docs/source/quickstart_unilateral.ipynb +++ b/docs/source/quickstart_unilateral.ipynb @@ -115,11 +115,11 @@ "metadata": {}, "outputs": [], "source": [ - "model.set_modality(\"MRI\", specificity=0.63, sensitivity=0.81)\n", - "model.set_modality(\"PET\", specificity=0.86, sensitivity=0.79, kind=\"clinical\")\n", - "# ^^^^^^^^^^^^^^^\n", - "# No effect in binary model,\n", - "# but important for trinary.\n", + "model.set_modality(\"MRI\", spec=0.63, sens=0.81)\n", + "model.set_modality(\"PET\", spec=0.86, sens=0.79, kind=\"clinical\")\n", + "# ^^^^^^^^^^^^^^^\n", + "# No effect in binary model,\n", + "# but important for trinary.\n", "model.get_all_modalities()" ] }, @@ -202,7 +202,7 @@ "from lymph.modalities import Clinical\n", "\n", "\n", - "model.replace_all_modalities({\"PET\": Clinical(specificity=0.86, sensitivity=0.79),})\n", + "model.replace_all_modalities({\"PET\": Clinical(spec=0.86, sens=0.79),})\n", "model.get_all_modalities()" ] }, From 4522528988cdae6795b4e6e6c3322b99eb3230d1 Mon Sep 17 00:00:00 2001 From: rmnldwg <48687784+rmnldwg@users.noreply.github.com> Date: Wed, 21 Feb 2024 13:43:57 +0100 Subject: [PATCH 36/75] fix(diag): correct max time & params The `max_time` is now correctly accessed and set. Also, the distribution params are not used up by synched distributions, but only by the distributions in composite leafs. --- lymph/diagnose_times.py | 132 +++++++++++++++++++++++++++++----------- 1 file changed, 97 insertions(+), 35 deletions(-) diff --git a/lymph/diagnose_times.py b/lymph/diagnose_times.py index c840f70..c260995 100644 --- a/lymph/diagnose_times.py +++ b/lymph/diagnose_times.py @@ -60,24 +60,12 @@ def __init__( """ if callable(distribution): self._init_from_callable(distribution, max_time) + elif isinstance(distribution, Distribution): + self._init_from_instance(distribution) else: self._init_from_frozen(distribution, max_time) - def _init_from_frozen(self, distribution: Iterable[float], max_time: int): - """Initialize the distribution from a frozen distribution.""" - if max_time is None: - max_time = len(distribution) - 1 - - if max_time != len(distribution) - 1: - raise ValueError("max_time and the length of the distribution don't match") - - self.support = np.arange(max_time + 1) - self._kwargs = {} - self._func = None - self._frozen = self.normalize(distribution) - - def _init_from_callable(self, distribution: callable, max_time: int | None = None): """Initialize the distribution from a callable distribution.""" if max_time is None: @@ -85,12 +73,40 @@ def _init_from_callable(self, distribution: callable, max_time: int | None = Non if max_time < 0: raise ValueError("max_time must be a positive integer") - self.support = np.arange(max_time + 1) + self.max_time = max_time self._kwargs = self.extract_kwargs(distribution) self._func = distribution self._frozen = self.pmf + def _init_from_instance(self, instance: Distribution): + """Initialize the distribution from another instance.""" + if not instance.is_updateable: + self._init_from_frozen(instance.pmf, instance.max_time) + else: + self.max_time = instance.max_time + self._kwargs = instance._kwargs.copy() + self._func = instance._func + self._frozen = self.pmf + + + def _init_from_frozen(self, distribution: Iterable[float], max_time: int | None = None): + """Initialize the distribution from a frozen distribution.""" + if max_time is None: + max_time = len(distribution) - 1 + + if max_time != len(distribution) - 1: + raise ValueError( + f"max_time {max_time} and len of distribution {len(distribution)} " + "don't match" + ) + + self.max_time = max_time + self._kwargs = {} + self._func = None + self._frozen = self.normalize(distribution) + + @staticmethod def extract_kwargs(distribution: callable) -> dict[str, Any]: """Extract the keyword arguments from the provided parametric distribution. @@ -120,7 +136,6 @@ def extract_kwargs(distribution: callable) -> dict[str, Any]: def __repr__(self) -> str: return f"Distribution({repr(self.pmf.tolist())})" - def __eq__(self, other) -> bool: if not isinstance(other, Distribution): return False @@ -131,12 +146,29 @@ def __eq__(self, other) -> bool: and np.all(self.pmf == other.pmf) ) + def __len__(self) -> int: + return len(self.support) def __hash__(self) -> int: kwarg_tpl = tuple(self._kwargs.items()) return hash((self.is_updateable, kwarg_tpl, self.pmf.tobytes())) + @property + def max_time(self) -> int: + """Return the maximum time for the distribution.""" + return self.support[-1] + + @max_time.setter + def max_time(self, value: int) -> None: + """Set the maximum time for the distribution.""" + if value < 0: + raise ValueError("max_time must be a positive integer") + + self.support = np.arange(value + 1) + self._frozen = None + + @staticmethod def normalize(distribution: np.ndarray) -> np.ndarray: """Normalize a distribution.""" @@ -239,17 +271,17 @@ class Composite(ABC): >>> class MyComposite(Composite): ... pass - >>> leaf1 = MyComposite(is_distribution_leaf=True) - >>> leaf2 = MyComposite(is_distribution_leaf=True) - >>> leaf3 = MyComposite(is_distribution_leaf=True) + >>> leaf1 = MyComposite(is_distribution_leaf=True, max_time=1) + >>> leaf2 = MyComposite(is_distribution_leaf=True, max_time=1) + >>> leaf3 = MyComposite(is_distribution_leaf=True, max_time=1) >>> branch1 = MyComposite(distribution_children={"L1": leaf1, "L2": leaf2}) >>> branch2 = MyComposite(distribution_children={"L3": leaf3}) >>> root = MyComposite(distribution_children={"B1": branch1, "B2": branch2}) >>> root.set_distribution("T1", Distribution([0.1, 0.9])) >>> root.get_distribution("T1") - Distribution([0.1 0.9]) + Distribution([0.1, 0.9]) >>> leaf1.get_distribution("T1") - Distribution([0.1 0.9]) + Distribution([0.1, 0.9]) """ _max_time: int _distributions: dict[str, Distribution] # only for leaf nodes @@ -257,7 +289,7 @@ class Composite(ABC): def __init__( self: DC, - max_time: int = 10, + max_time: int | None = None, distribution_children: dict[str, Composite] | None = None, is_distribution_leaf: bool = False, ) -> None: @@ -291,22 +323,37 @@ def _is_distribution_leaf(self: DC) -> bool: def max_time(self: DC) -> int: """Return the maximum time for the distributions.""" if self._is_distribution_leaf: + are_all_equal = True + for dist in self._distributions.values(): + are_equal = dist.max_time == self._max_time + if not are_equal: + dist.max_time = self._max_time + are_all_equal &= are_equal + + if not are_all_equal: + warnings.warn(f"Not all max_times were equal. Set all to {self._max_time}") + return self._max_time - max_times = {child.max_time for child in self._distribution_children.values()} - if len(max_times) > 1: + max_times = [child.max_time for child in self._distribution_children.values()] + if len(set(max_times)) > 1: warnings.warn("Not all max_times are equal. Returning the first one.") - return list(self._distribution_children.values())[0].max_time + return max_times[0] @max_time.setter def max_time(self: DC, value: int) -> None: """Set the maximum time for the distributions.""" if self._is_distribution_leaf: + if value is None: + raise ValueError("max_time must be provided if the composite is a leaf") + if value < 0: raise ValueError("max_time must be a positive integer") self._max_time = value + for dist in self._distributions.values(): + dist.max_time = value else: for child in self._distribution_children.values(): @@ -364,6 +411,16 @@ def set_distribution( child.set_distribution(t_stage, distribution) + def del_distribution(self: DC, t_stage: str) -> None: + """Delete the distribution for the given ``t_stage``.""" + if self._is_distribution_leaf: + del self._distributions[t_stage] + + else: + for child in self._distribution_children.values(): + child.del_distribution(t_stage) + + def replace_all_distributions(self: DC, distributions: dict[str, Distribution]) -> None: """Replace all distributions with the given ones.""" if self._is_distribution_leaf: @@ -410,6 +467,8 @@ def get_distribution_params( if self._is_distribution_leaf: for t_stage, distribution in self._distributions.items(): + if not distribution.is_updateable: + continue params[t_stage] = distribution.get_params(as_flat=as_flat) else: child_keys = list(self._distribution_children.keys()) @@ -434,17 +493,20 @@ def set_distribution_params(self: DC, *args: float, **kwargs: float) -> tuple[fl kwargs, expected_keys=self._distributions.keys() ) for t_stage, distribution in self._distributions.items(): + if not distribution.is_updateable: + continue t_stage_kwargs = global_kwargs.copy() t_stage_kwargs.update(kwargs.get(t_stage, {})) args = distribution.set_params(*args, **t_stage_kwargs) + # in leafs, use up args one by one + return args - else: - kwargs, global_kwargs = unflatten_and_split( - kwargs, expected_keys=self._distribution_children.keys() - ) - for key, child in self._distribution_children.items(): - child_kwargs = global_kwargs.copy() - child_kwargs.update(kwargs.get(key, {})) - args = child.set_distribution_params(*args, **child_kwargs) - - return args + kwargs, global_kwargs = unflatten_and_split( + kwargs, expected_keys=self._distribution_children.keys() + ) + for key, child in self._distribution_children.items(): + child_kwargs = global_kwargs.copy() + child_kwargs.update(kwargs.get(key, {})) + rem_args = child.set_distribution_params(*args, **child_kwargs) + # in branches, distribute all args to children + return rem_args From 060bd0c61e84b6cab0e4664abaddb527acb26c86 Mon Sep 17 00:00:00 2001 From: rmnldwg <48687784+rmnldwg@users.noreply.github.com> Date: Wed, 21 Feb 2024 13:44:29 +0100 Subject: [PATCH 37/75] feat(mod): add mathod to delete modality --- lymph/modalities.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/lymph/modalities.py b/lymph/modalities.py index 9e573b5..cc3f0f5 100644 --- a/lymph/modalities.py +++ b/lymph/modalities.py @@ -245,6 +245,16 @@ def set_modality( child.set_modality(name, spec, sens, kind) + def del_modality(self: MC, name: str) -> None: + """Delete the modality with the given name.""" + if self._is_modality_leaf: + del self._modalities[name] + + else: + for child in self._modality_children.values(): + child.del_modality(name) + + def replace_all_modalities(self: MC, modalities: dict[str, Modality]) -> None: """Replace all modalities of the composite.""" if self._is_modality_leaf: From bf3852a5fb72433845e86db3046ceffd6effd699 Mon Sep 17 00:00:00 2001 From: rmnldwg <48687784+rmnldwg@users.noreply.github.com> Date: Wed, 21 Feb 2024 13:45:07 +0100 Subject: [PATCH 38/75] fix(graph): avoid warning for micro mod setting --- lymph/graph.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lymph/graph.py b/lymph/graph.py index a8c09fc..dbdd9cb 100644 --- a/lymph/graph.py +++ b/lymph/graph.py @@ -243,7 +243,11 @@ def __init__( self.parent: Tumor | LymphNodeLevel = parent self.child: LymphNodeLevel = child - if self.child.is_trinary: + if ( + not isinstance(self.parent, Tumor) + and self.parent.is_trinary + and not self.is_growth + ): self.micro_mod = micro_mod self.spread_prob = spread_prob From 11cb0c3ebd13b000be3fca3be6170d17d3561dfb Mon Sep 17 00:00:00 2001 From: rmnldwg <48687784+rmnldwg@users.noreply.github.com> Date: Wed, 21 Feb 2024 13:46:04 +0100 Subject: [PATCH 39/75] docs: adapt tests to new API (now passing) --- lymph/matrix.py | 2 +- lymph/models/midline.py | 2 +- lymph/models/unilateral.py | 23 +++-- tests/bayesian_unilateral_test.py | 2 +- tests/binary_bilateral_test.py | 157 +++++++++++++----------------- tests/binary_unilateral_test.py | 64 +++--------- tests/distribution_test.py | 51 +--------- tests/fixtures.py | 39 ++++---- tests/integration_test.py | 6 +- tests/trinary_unilateral_test.py | 14 +-- 10 files changed, 130 insertions(+), 230 deletions(-) diff --git a/lymph/matrix.py b/lymph/matrix.py index c298aa3..ea172a4 100644 --- a/lymph/matrix.py +++ b/lymph/matrix.py @@ -285,7 +285,7 @@ def generate_diagnose(model: models.Unilateral, t_stage: str) -> np.ndarray: The decorated function expects an additional first argument that should be unique for the combination of modalities and patient data. It is intended to be used with the joint hash of the modalities -(:py:meth:`~lymph.modalities.ModalitiesUserDict.confusion_matrices_hash`) and the +(:py:meth:`~modalities.Composite.modalities_hash`) and the patient data hash that is always precomputed when a new dataset is loaded into the model (:py:meth:`~lymph.models.Unilateral.patient_data_hash`). """ diff --git a/lymph/models/midline.py b/lymph/models/midline.py index 5a895a3..abdca2d 100644 --- a/lymph/models/midline.py +++ b/lymph/models/midline.py @@ -347,7 +347,7 @@ def modalities(self, new_modalities) -> None: "The modalities are not symmetric. Please set them via the " "`ipsi` or `contra` attributes." ) - self.ext.modalities = new_modalities + self.ext.replace_all_modalities(new_modalities) def load_patient_data( diff --git a/lymph/models/unilateral.py b/lymph/models/unilateral.py index 795cb96..d9db808 100644 --- a/lymph/models/unilateral.py +++ b/lymph/models/unilateral.py @@ -139,6 +139,17 @@ def is_binary(self) -> bool: return self.graph.is_binary + @property + def t_stages(self) -> list[str]: + """Return the T-stages of the model.""" + distribution_t_stages = super().t_stages + try: + data_t_stages = self.patient_data[("_model", "#", "t_stage")].unique() + except AttributeError: + return distribution_t_stages + return sorted(set(distribution_t_stages) & set(data_t_stages)) + + def get_params( self, as_dict: bool = True, @@ -165,7 +176,7 @@ def set_params(self, *args: float, **kwargs: float) -> tuple[float]: The parameters can be provided either via positional arguments or via keyword arguments. The positional arguments are used up one by one first by the :py:meth:`lymph.graph.set_params` method and then by the - :py:meth:`lymph.diag_time_dists.set_params` method. + :py:meth:`lymph.models.Unilateral.set_distribution_params` method. The keyword arguments can be of the format ``"_"`` or ``"_"`` for the distributions over diagnose times. If only @@ -216,7 +227,7 @@ def comp_transition_prob( The probability is computed as the product of the transition probabilities of the individual LNLs. If ``assign`` is ``True``, the new state is assigned to - the model using the method :py:meth:`~Unilateral.set_states`. + the model using the method :py:meth:`lymph.graph.Representation.set_state`. """ trans_prob = 1 for i, lnl in enumerate(self.graph.lnls): @@ -274,10 +285,8 @@ def obs_list(self): ... ("lnl", "II"): ["III"], ... ("lnl", "III"): [], ... }) - >>> model.modalities = { - ... "CT": (0.8, 0.8), - ... "pathology": (1.0, 1.0), - ... } + >>> model.set_modality("CT", spec=0.8, sens=0.8) + >>> model.set_modality("pathology", spec=1.0, sens=1.0) >>> model.obs_list # doctest: +ELLIPSIS array([[0, 0, 0, 0], [0, 0, 0, 1], @@ -532,7 +541,7 @@ def comp_state_dist(self, t_stage: str = "early", mode: str = "HMM") -> np.ndarr state_dist = np.ones(shape=(len(self.graph.state_list),), dtype=float) for i, state in enumerate(self.graph.state_list): - self.set_state(*state) + self.graph.set_state(*state) for node in self.graph.lnls.values(): state_dist[i] *= node.comp_bayes_net_prob() diff --git a/tests/bayesian_unilateral_test.py b/tests/bayesian_unilateral_test.py index 779275f..93fcab2 100644 --- a/tests/bayesian_unilateral_test.py +++ b/tests/bayesian_unilateral_test.py @@ -13,7 +13,7 @@ class BayesianUnilateralModelTestCase(fixtures.BinaryUnilateralModelMixin, unitt def setUp(self): super().setUp() self.model.set_params(**self.create_random_params()) - self.model.modalities = fixtures.MODALITIES + self.model.replace_all_modalities(fixtures.MODALITIES) self.load_patient_data(filename="2021-usz-oropharynx.csv") def test_state_dist(self): diff --git a/tests/binary_bilateral_test.py b/tests/binary_bilateral_test.py index 5e4a9b4..5932281 100644 --- a/tests/binary_bilateral_test.py +++ b/tests/binary_bilateral_test.py @@ -7,7 +7,6 @@ import numpy as np from lymph import models -from lymph.modalities import Clinical class BilateralInitTest(fixtures.BilateralModelMixin, unittest.TestCase): @@ -29,25 +28,6 @@ def test_delegation(self): self.assertEqual(self.model.max_time, self.model.ipsi.max_time) self.assertEqual(list(self.model.t_stages), list(self.model.ipsi.t_stages)) - def test_edge_sync(self): - """Check if synced edges update their respective parameters.""" - for ipsi_edge in self.model.ipsi.graph.edges.values(): - contra_edge = self.model.contra.graph.edges[ipsi_edge.get_name] - ipsi_edge.set_params(spread=self.rng.random()) - self.assertEqual( - ipsi_edge.get_params("spread"), - contra_edge.get_params("spread"), - ) - - def test_tensor_sync(self): - """Check the transition tensors of the edges get deleted and updated properly.""" - for ipsi_edge in self.model.ipsi.graph.edges.values(): - ipsi_edge.set_params(spread=self.rng.random()) - contra_edge = self.model.contra.graph.edges[ipsi_edge.get_name] - self.assertTrue(np.all( - ipsi_edge.transition_tensor == contra_edge.transition_tensor - )) - def test_transition_matrix_sync(self): """Make sure contra transition matrix gets recomputed when ipsi param is set.""" ipsi_trans_mat = self.model.ipsi.transition_matrix() @@ -65,17 +45,14 @@ def test_transition_matrix_sync(self): def test_modality_sync(self): """Make sure the modalities are synced between the two sides.""" - self.model.ipsi.modalities = {"foo": Clinical( - specificity=self.rng.uniform(), - sensitivity=self.rng.uniform(), - )} + self.model.set_modality("foo", spec=self.rng.uniform(), sens=self.rng.uniform()) self.assertEqual( - self.model.ipsi.modalities["foo"].sensitivity, - self.model.contra.modalities["foo"].sensitivity, + self.model.ipsi.get_modality("foo").sens, + self.model.contra.get_modality("foo").sens, ) self.assertEqual( - self.model.ipsi.modalities["foo"].specificity, - self.model.contra.modalities["foo"].specificity, + self.model.ipsi.get_modality("foo").spec, + self.model.contra.get_modality("foo").spec, ) def test_asymmetric_model(self): @@ -111,68 +88,69 @@ class ModalityDelegationTestCase(fixtures.BilateralModelMixin, unittest.TestCase def setUp(self): super().setUp() - self.model.modalities = fixtures.MODALITIES + self.model.replace_all_modalities(fixtures.MODALITIES) def test_modality_access(self): """Test that the modality can be accessed.""" self.assertEqual( - self.model.modalities["CT"].sensitivity, - self.model.ipsi.modalities["CT"].sensitivity, + self.model.get_modality("CT").sens, + self.model.ipsi.get_modality("CT").sens, ) self.assertEqual( - self.model.modalities["FNA"].specificity, - self.model.ipsi.modalities["FNA"].specificity, + self.model.get_modality("FNA").spec, + self.model.ipsi.get_modality("FNA").spec, ) def test_modality_delete(self): """Test that the modality can be deleted.""" - del self.model.modalities["CT"] - self.assertNotIn("CT", self.model.modalities) - self.assertNotIn("CT", self.model.ipsi.modalities) - self.assertNotIn("CT", self.model.contra.modalities) + self.model.del_modality("CT") + self.assertNotIn("CT", self.model.get_all_modalities()) + self.assertNotIn("CT", self.model.ipsi.get_all_modalities()) + self.assertNotIn("CT", self.model.contra.get_all_modalities()) def test_modality_update(self): """Test that the modality can be updated.""" - self.model.modalities["CT"].sensitivity = 0.8 + old_mod = self.model.get_modality("CT") + self.model.set_modality("CT", spec=old_mod.spec, sens=0.8) self.assertEqual( - self.model.modalities["CT"].sensitivity, - self.model.ipsi.modalities["CT"].sensitivity, + self.model.get_modality("CT").sens, + self.model.ipsi.get_modality("CT").sens, ) self.assertEqual( - self.model.modalities["CT"].sensitivity, - self.model.contra.modalities["CT"].sensitivity, + self.model.get_modality("CT").sens, + self.model.contra.get_modality("CT").sens, ) def test_modality_reset(self): """Test resetting the modalities also works.""" - self.model.modalities = {"foo": Clinical(0.8, 0.9)} + self.model.set_modality("foo", spec=0.8, sens=0.9) self.assertEqual( - self.model.modalities["foo"].sensitivity, - self.model.ipsi.modalities["foo"].sensitivity, + self.model.get_modality("foo").sens, + self.model.ipsi.get_modality("foo").sens, ) self.assertEqual( - self.model.modalities["foo"].specificity, - self.model.contra.modalities["foo"].specificity, + self.model.get_modality("foo").spec, + self.model.contra.get_modality("foo").spec, ) def test_diag_time_dists_delegation(self): """Test that the diagnose time distributions are delegated.""" - self.assertTrue(np.allclose( - list(self.model.diag_time_dists["early"].distribution), - list(self.model.ipsi.diag_time_dists["early"].distribution), - )) - self.assertTrue(np.allclose( - list(self.model.diag_time_dists["late"].get_params()), - list(self.model.ipsi.diag_time_dists["late"].get_params()), - )) - self.assertTrue(np.allclose( - list(self.model.diag_time_dists["early"].distribution), - list(self.model.contra.diag_time_dists["early"].distribution), - )) - self.assertTrue(np.allclose( - list(self.model.diag_time_dists["late"].get_params()), - list(self.model.contra.diag_time_dists["late"].get_params()), - )) + self.assertEqual( + list(self.model.get_distribution("early").pmf), + list(self.model.ipsi.get_distribution("early").pmf), + ) + self.assertEqual( + list(self.model.get_distribution("late").get_params()), + list(self.model.ipsi.get_distribution("late").get_params()), + ) + self.assertEqual( + list(self.model.get_distribution("early").pmf), + list(self.model.contra.get_distribution("early").pmf), + ) + self.assertEqual( + list(self.model.get_distribution("late").get_params()), + list(self.model.contra.get_distribution("late").get_params()), + ) class ParameterAssignmentTestCase(fixtures.BilateralModelMixin, unittest.TestCase): @@ -202,33 +180,30 @@ def test_get_params_as_dict(self): def test_set_params_as_args(self): """Test that the parameters can be assigned.""" - ipsi_args = self.rng.uniform(size=len(self.model.ipsi.get_params())) - contra_args = self.rng.uniform(size=len(self.model.contra.get_params())) - none_args = [None] * len(ipsi_args) - - # Assigning only the ipsi side - self.model.set_params(*ipsi_args, *none_args) - self.assertTrue(np.allclose(ipsi_args, list(self.model.ipsi.get_params()))) + ipsi_tumor_spread_args = self.rng.uniform(size=len(self.model.ipsi.graph.tumor_edges)) + ipsi_lnl_spread_args = self.rng.uniform(size=len(self.model.ipsi.graph.lnl_edges)) + contra_tumor_spread_args = self.rng.uniform(size=len(self.model.contra.graph.tumor_edges)) + contra_lnl_spread_args = self.rng.uniform(size=len(self.model.contra.graph.lnl_edges)) + dist_params = self.rng.uniform(size=len(self.model.get_distribution_params())) + + self.model.set_params( + *ipsi_tumor_spread_args, + *contra_tumor_spread_args, + *ipsi_lnl_spread_args, + *contra_lnl_spread_args, + *dist_params, + ) self.assertEqual( - list(self.model.ipsi.diag_time_dists["late"].get_params())[0], - list(self.model.contra.diag_time_dists["late"].get_params())[0], + [*ipsi_tumor_spread_args, *ipsi_lnl_spread_args, *dist_params], + list(self.model.ipsi.get_params(as_dict=False)), ) - - # Assigning only the contra side - self.model.set_params(*none_args, *contra_args) - self.assertTrue(np.allclose(contra_args, list(self.model.contra.get_params()))) self.assertEqual( - list(self.model.ipsi.diag_time_dists["late"].get_params())[0], - list(self.model.contra.diag_time_dists["late"].get_params())[0], + [*contra_tumor_spread_args, *contra_lnl_spread_args, *dist_params], + list(self.model.contra.get_params(as_dict=False)), ) - - # Assigning both sides - self.model.set_params(*ipsi_args, *contra_args) - self.assertTrue(np.allclose(ipsi_args[:-1], list(self.model.ipsi.get_params())[:-1])) - self.assertTrue(np.allclose(contra_args, list(self.model.contra.get_params()))) self.assertEqual( - list(self.model.ipsi.diag_time_dists["late"].get_params())[0], - list(self.model.contra.diag_time_dists["late"].get_params())[0], + list(self.model.ipsi.get_distribution("late").get_params())[0], + list(self.model.contra.get_distribution("late").get_params())[0], ) @@ -237,7 +212,7 @@ class LikelihoodTestCase(fixtures.BilateralModelMixin, unittest.TestCase): def setUp(self): super().setUp() - self.model.modalities = fixtures.MODALITIES + self.model.replace_all_modalities(fixtures.MODALITIES) self.load_patient_data() def test_compute_likelihood_twice(self): @@ -252,7 +227,7 @@ class RiskTestCase(fixtures.BilateralModelMixin, unittest.TestCase): def setUp(self): super().setUp() - self.model.modalities = fixtures.MODALITIES + self.model.replace_all_modalities(fixtures.MODALITIES) def create_random_diagnoses(self): """Create a random diagnosis for each modality and LNL.""" @@ -262,14 +237,14 @@ def create_random_diagnoses(self): diagnoses[side] = {} side_model = getattr(self.model, side) lnl_names = side_model.graph.lnls.keys() - for modality in side_model.modalities: + for modality in side_model.get_all_modalities(): diagnoses[side][modality] = fixtures.create_random_pattern(lnl_names) return diagnoses def test_posterior_state_dist(self): """Test that the posterior state distribution is computed correctly.""" - num_states = len(self.model.ipsi.state_list) + num_states = len(self.model.ipsi.graph.state_list) random_parameters = self.create_random_params() random_diagnoses = self.create_random_diagnoses() @@ -306,7 +281,7 @@ class DataGenerationTestCase(fixtures.BilateralModelMixin, unittest.TestCase): def setUp(self): super().setUp() - self.model.modalities = fixtures.MODALITIES + self.model.replace_all_modalities(fixtures.MODALITIES) self.init_diag_time_dists(early="frozen", late="parametric") self.model.set_params(**self.create_random_params()) @@ -318,7 +293,7 @@ def test_generate_data(self): rng=self.rng, ) - for mod in self.model.modalities.keys(): + for mod in self.model.get_all_modalities(): self.assertIn(mod, dataset) for side in ["ipsi", "contra"]: self.assertIn(side, dataset[mod]) diff --git a/tests/binary_unilateral_test.py b/tests/binary_unilateral_test.py index ed8704c..b72a43f 100644 --- a/tests/binary_unilateral_test.py +++ b/tests/binary_unilateral_test.py @@ -4,7 +4,6 @@ import numpy as np from lymph.graph import LymphNodeLevel, Tumor -from lymph.modalities import Pathological from tests import fixtures @@ -82,42 +81,6 @@ def test_tumor_to_lnl_edges(self): self.assertIn(edge.get_name(middle="_to_"), connecting_edge_names) -class DelegationTestCase(fixtures.BinaryUnilateralModelMixin, unittest.TestCase): - """Test the delegation of parameters via the `DelegatorMixing`.""" - - def test_delegation(self): - """Make sure the specified attributes from graph are delegated upwards.""" - self.assertEqual( - self.model.graph.is_binary, - self.model.is_binary, - ) - self.assertEqual( - self.model.graph.is_trinary, - self.model.is_trinary, - ) - self.assertEqual( - self.model.graph.get_state(), - self.model.get_state(), - ) - - def test_set_state_delegation(self): - """Check that the ``set_state`` method is also correctly delegated.""" - old_state = self.model.get_state() - choice = [0,1] - if self.model.is_trinary: - choice.append(2) - - new_state = self.rng.choice(a=choice, size=len(old_state)) - self.model.set_state(*new_state) - self.assertTrue(np.all(self.model.get_state() == new_state)) - self.assertTrue(np.all(self.model.graph.get_state() == new_state)) - - new_state = self.rng.choice(a=choice, size=len(old_state)) - self.model.graph.set_state(*new_state) - self.assertTrue(np.all(self.model.get_state() == new_state)) - self.assertTrue(np.all(self.model.graph.get_state() == new_state)) - - class ParameterAssignmentTestCase(fixtures.BinaryUnilateralModelMixin, unittest.TestCase): """Test the assignment of parameters in a binary model.""" @@ -125,7 +88,7 @@ def test_params_assignment_via_lookup(self): """Make sure the spread parameters are assigned correctly.""" params_to_set = self.create_random_params() edges_and_dists = self.model.graph.edges.copy() - edges_and_dists.update(self.model.diag_time_dists) + edges_and_dists.update(self.model.get_all_distributions()) for param_name, value in params_to_set.items(): name, type_ = param_name.rsplit("_", maxsplit=1) @@ -141,7 +104,7 @@ def test_params_assignment_via_method(self): self.model.set_params(**params_to_set) edges_and_dists = self.model.graph.edges.copy() - edges_and_dists.update(self.model.diag_time_dists) + edges_and_dists.update(self.model.get_all_distributions()) for param_name, value in params_to_set.items(): name, type_ = param_name.rsplit("_", maxsplit=1) @@ -205,12 +168,12 @@ class ObservationMatrixTestCase(fixtures.BinaryUnilateralModelMixin, unittest.Te def setUp(self): """Initialize a simple binary model.""" super().setUp() - self.model.modalities = fixtures.MODALITIES + self.model.replace_all_modalities(fixtures.MODALITIES) def test_shape(self): """Make sure the observation matrix has the correct shape.""" num_lnls = len(self.model.graph.lnls) - num_modalities = len(self.model.modalities) + num_modalities = len(self.model.get_all_modalities()) expected_shape = (2**num_lnls, 2**(num_lnls * num_modalities)) self.assertEqual(self.model.observation_matrix().shape, expected_shape) @@ -226,7 +189,7 @@ class PatientDataTestCase(fixtures.BinaryUnilateralModelMixin, unittest.TestCase def setUp(self): """Load patient data.""" super().setUp() - self.model.modalities = fixtures.MODALITIES + self.model.replace_all_modalities(fixtures.MODALITIES) self.init_diag_time_dists(early="frozen", late="parametric", foo="frozen") self.model.set_params(**self.create_random_params()) self.load_patient_data(filename="2021-usz-oropharynx.csv") @@ -241,7 +204,7 @@ def test_load_patient_data(self): def test_t_stages(self): """Make sure all T-stages are present.""" t_stages_in_data = self.model.patient_data["_model", "#" ,"t_stage"].unique() - t_stages_in_diag_time_dists = self.model.diag_time_dists.keys() + t_stages_in_diag_time_dists = super(type(self.model), self.model).t_stages t_stages_in_model = list(self.model.t_stages) t_stages_intersection = set(t_stages_in_data).intersection(t_stages_in_diag_time_dists) @@ -305,7 +268,7 @@ class LikelihoodTestCase(fixtures.BinaryUnilateralModelMixin, unittest.TestCase) def setUp(self): """Load patient data.""" super().setUp() - self.model.modalities = fixtures.MODALITIES + self.model.replace_all_modalities(fixtures.MODALITIES) self.init_diag_time_dists(early="frozen", late="parametric") self.model.set_params(**self.create_random_params()) self.load_patient_data(filename="2021-usz-oropharynx.csv") @@ -334,7 +297,7 @@ class RiskTestCase(fixtures.BinaryUnilateralModelMixin, unittest.TestCase): def setUp(self): """Load params.""" super().setUp() - self.model.modalities = fixtures.MODALITIES + self.model.replace_all_modalities(fixtures.MODALITIES) self.init_diag_time_dists(early="frozen", late="parametric") self.model.set_params(**self.create_random_params()) @@ -343,7 +306,7 @@ def create_random_diagnoses(self): lnl_names = list(self.model.graph.lnls.keys()) diagnoses = {} - for modality in self.model.modalities: + for modality in self.model.get_all_modalities(): diagnoses[modality] = fixtures.create_random_pattern(lnl_names) return diagnoses @@ -351,7 +314,8 @@ def create_random_diagnoses(self): def test_comp_diagnose_encoding(self): """Check computation of one-hot encoding of diagnoses.""" random_diagnoses = self.create_random_diagnoses() - num_lnls, num_mods = len(self.model.graph.lnls), len(self.model.modalities) + num_lnls = len(self.model.graph.lnls) + num_mods = len(self.model.get_all_modalities()) num_posible_diagnoses = 2**(num_lnls * num_mods) diagnose_encoding = self.model.comp_diagnose_encoding(random_diagnoses) @@ -393,7 +357,7 @@ class DataGenerationTestCase(fixtures.BinaryUnilateralModelMixin, unittest.TestC def setUp(self): """Load params.""" super().setUp() - self.model.modalities = fixtures.MODALITIES + self.model.replace_all_modalities(fixtures.MODALITIES) self.init_diag_time_dists(early="frozen", late="parametric") self.model.set_params(**self.create_random_params()) @@ -428,10 +392,10 @@ def test_distribution_of_patients(self): lnl_edge.set_spread_prob(0.) # make all patients diagnosed after exactly one time-step - self.model.diag_time_dists["early"] = [0,1,0,0,0,0,0,0,0,0,0] + self.model.set_distribution("early", [0,1,0,0,0,0,0,0,0,0,0]) # assign only one pathology modality - self.model.modalities = {"tmp": Pathological(specificity=1., sensitivity=1.)} + self.model.set_modality("tmp", spec=1., sens=1.) # extract the tumor spread parameters params = self.model.get_params(as_dict=True) diff --git a/tests/distribution_test.py b/tests/distribution_test.py index 06afeeb..7a5fcee 100644 --- a/tests/distribution_test.py +++ b/tests/distribution_test.py @@ -5,7 +5,7 @@ import numpy as np import scipy as sp -from lymph.diagnose_times import Distribution, DistributionsUserDict +from lymph.diagnose_times import Distribution class FixtureMixin: @@ -80,52 +80,3 @@ def test_updateable_distribution_raises_value_error(self): dist = Distribution(self.func_arg, max_time=self.max_time) self.assertTrue(dist.is_updateable) self.assertRaises(ValueError, dist.set_params, p=1.5) - - -class DistributionDictTestCase(FixtureMixin, unittest.TestCase): - """Test the distribution dictionary.""" - - def setUp(self): - super().setUp() - self.rng = np.random.default_rng(42) - self.dist_dict = DistributionsUserDict( - max_time=self.max_time, - trigger_callbacks=[], - ) - - def test_setitem_distribution_from_array(self): - """Test setting a distribution created from an array.""" - self.dist_dict['test'] = Distribution(self.array_arg) - self.assertTrue('test' in self.dist_dict) - self.assertTrue(self.dist_dict.max_time == self.max_time) - - def test_setitem_distribution_from_func(self): - """Test setting a distribution created from a function.""" - self.assertRaises(ValueError, Distribution, self.func_arg) - self.dist_dict['test'] = Distribution(self.func_arg, max_time=self.max_time) - self.assertTrue('test' in self.dist_dict) - - def test_setitem_from_array(self): - """Test setting an item via an array distribution.""" - self.dist_dict['test'] = self.array_arg - self.assertTrue('test' in self.dist_dict) - - def test_setitem_from_func(self): - """Test setting an item via a parametrized distribution.""" - self.dist_dict['test'] = self.func_arg - self.assertTrue('test' in self.dist_dict) - - def test_multiple_setitem(self): - """Test setting multiple distributions.""" - for i in range(5): - func = lambda support, p=0.2: sp.stats.binom.pmf(support, self.max_time, p) - self.dist_dict[f"test_{i}"] = func - - self.assertTrue(len(self.dist_dict) == 5) - for i in range(5): - self.assertTrue(f"test_{i}" in self.dist_dict) - self.assertTrue(self.dist_dict[f"test_{i}"].is_updateable) - param = self.rng.uniform() - self.dist_dict[f"test_{i}"].set_params(p=param) - returned_param = self.dist_dict[f"test_{i}"].get_params(as_dict=True) - self.assertTrue(np.allclose(param, returned_param["p"])) diff --git a/tests/fixtures.py b/tests/fixtures.py index f4662f9..adafe83 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -17,8 +17,8 @@ from lymph.types import PatternType MODALITIES = { - "CT": Clinical(specificity=0.81, sensitivity=0.86), - "FNA": Pathological(specificity=0.95, sensitivity=0.81), + "CT": Clinical(spec=0.81, sens=0.86), + "FNA": Pathological(spec=0.95, sens=0.81), } RNG = np.random.default_rng(42) @@ -74,7 +74,6 @@ def _create_random_frozen_dist( unnormalized = rng.random(size=max_time + 1) return unnormalized / np.sum(unnormalized) - def _create_random_parametric_dist( max_time: int, rng: np.random.Generator = RNG, @@ -88,7 +87,6 @@ def _pmf(support: np.ndarray, p: float = rng.random()) -> np.ndarray: max_time=max_time, ) - def create_random_dist( type_: str, max_time: int, @@ -134,7 +132,7 @@ def create_random_params(self) -> dict[str, float]: warnings.simplefilter("ignore", category=UserWarning) params.update({ f"{t_stage}_{type_}": self.rng.random() - for t_stage, dist in self.model.diag_time_dists.items() + for t_stage, dist in self.model.get_all_distributions().items() for type_ in dist.get_params(as_dict=True).keys() }) return params @@ -143,8 +141,9 @@ def create_random_params(self) -> dict[str, float]: def init_diag_time_dists(self, **dists) -> None: """Init the diagnose time distributions.""" for t_stage, type_ in dists.items(): - self.model.diag_time_dists[t_stage] = create_random_dist( - type_, self.model.max_time, self.rng + self.model.set_distribution( + t_stage, + create_random_dist(type_, self.model.max_time, self.rng), ) @@ -178,8 +177,9 @@ def setUp(self): def init_diag_time_dists(self, **dists) -> None: """Init the diagnose time distributions.""" for t_stage, type_ in dists.items(): - self.model.diag_time_dists[t_stage] = create_random_dist( - type_, self.model.max_time, self.rng + self.model.set_distribution( + t_stage, + create_random_dist(type_, self.model.max_time, self.rng), ) @@ -226,7 +226,7 @@ def create_random_params(self) -> dict[str, float]: warnings.simplefilter("ignore", category=UserWarning) params.update({ f"{t_stage}_{type_}": self.rng.random() - for t_stage, dist in self.model.diag_time_dists.items() + for t_stage, dist in self.model.get_all_distributions().items() for type_ in dist.get_params(as_dict=True).keys() }) @@ -236,21 +236,22 @@ def create_random_params(self) -> dict[str, float]: def init_diag_time_dists(self, **dists) -> None: """Init the diagnose time distributions.""" for t_stage, type_ in dists.items(): - self.model.diag_time_dists[t_stage] = create_random_dist( - type_, self.model.max_time, self.rng + self.model.set_distribution( + t_stage, + create_random_dist(type_, self.model.max_time, self.rng), ) def get_modalities_subset(self, names: list[str]) -> dict[str, Modality]: """Create a dictionary of modalities.""" modalities_in_data = { - "CT": Clinical(specificity=0.76, sensitivity=0.81), - "MRI": Clinical(specificity=0.63, sensitivity=0.81), - "PET": Clinical(specificity=0.86, sensitivity=0.79), - "FNA": Pathological(specificity=0.98, sensitivity=0.80), - "diagnostic_consensus": Clinical(specificity=0.86, sensitivity=0.81), - "pathology": Pathological(specificity=1.0, sensitivity=1.0), - "pCT": Clinical(specificity=0.86, sensitivity=0.81), + "CT": Clinical(spec=0.76, sens=0.81), + "MRI": Clinical(spec=0.63, sens=0.81), + "PET": Clinical(spec=0.86, sens=0.79), + "FNA": Pathological(spec=0.98, sens=0.80), + "diagnostic_consensus": Clinical(spec=0.86, sens=0.81), + "pathology": Pathological(spec=1.0, sens=1.0), + "pCT": Clinical(spec=0.86, sens=0.81), } return {name: modalities_in_data[name] for name in names} diff --git a/tests/integration_test.py b/tests/integration_test.py index 8ba9c01..697e830 100644 --- a/tests/integration_test.py +++ b/tests/integration_test.py @@ -22,7 +22,7 @@ class IntegrationTestCase(fixtures.BinaryUnilateralModelMixin, unittest.TestCase def setUp(self): self.graph_dict = fixtures.get_graph(size="medium") self.model = lymph.models.Unilateral.binary(graph_dict=self.graph_dict) - self.model.modalities = {"PET": [0.86, 0.79]} + self.model.set_modality("PET", spec=0.86, sens=0.79) self.load_patient_data("2021-usz-oropharynx.csv") early_fixed = sp.stats.binom.pmf( @@ -30,8 +30,8 @@ def setUp(self): self.model.max_time, 0.4, ) - self.model.diag_time_dists["early"] = early_fixed - self.model.diag_time_dists["late"] = late_binomial + self.model.set_distribution("early", early_fixed) + self.model.set_distribution("late", late_binomial) def test_likelihood_value(self): """Check that the computed likelihood is correct.""" diff --git a/tests/trinary_unilateral_test.py b/tests/trinary_unilateral_test.py index 7059036..95ef2fe 100644 --- a/tests/trinary_unilateral_test.py +++ b/tests/trinary_unilateral_test.py @@ -65,14 +65,14 @@ class TrinaryObservationMatrixTestCase(fixtures.TrinaryFixtureMixin, unittest.Te def setUp(self): super().setUp() - self.model.modalities = self.get_modalities_subset( - names=["diagnostic_consensus", "pathology"], + self.model.replace_all_modalities( + self.get_modalities_subset(names=["diagnostic_consensus", "pathology"]), ) def test_observation_matrix(self) -> None: """Test the observation matrix of the model.""" num_lnls = len(self.model.graph.lnls) - num = num_lnls * len(self.model.modalities) + num = num_lnls * len(self.model.get_all_modalities()) observation_matrix = self.model.observation_matrix() self.assertEqual(observation_matrix.shape, (3 ** num_lnls, 2 ** num)) @@ -85,7 +85,7 @@ class TrinaryDiagnoseMatricesTestCase(fixtures.TrinaryFixtureMixin, unittest.Tes def setUp(self): super().setUp() - self.model.modalities = fixtures.MODALITIES + self.model.replace_all_modalities(fixtures.MODALITIES) self.load_patient_data(filename="2021-usz-oropharynx.csv") def get_patient_data(self) -> pd.DataFrame: @@ -130,7 +130,7 @@ class TrinaryLikelihoodTestCase(fixtures.TrinaryFixtureMixin, unittest.TestCase) def setUp(self): """Load patient data.""" super().setUp() - self.model.modalities = fixtures.MODALITIES + self.model.replace_all_modalities(fixtures.MODALITIES) self.init_diag_time_dists(early="frozen", late="parametric") self.model.set_params(**self.create_random_params()) self.load_patient_data(filename="2021-usz-oropharynx.csv") @@ -159,7 +159,7 @@ class TrinaryRiskTestCase(fixtures.TrinaryFixtureMixin, unittest.TestCase): def setUp(self): """Load patient data.""" super().setUp() - self.model.modalities = fixtures.MODALITIES + self.model.replace_all_modalities(fixtures.MODALITIES) self.init_diag_time_dists(early="frozen", late="parametric") self.load_patient_data(filename="2021-usz-oropharynx.csv") @@ -168,7 +168,7 @@ def create_random_diagnoses(self): lnl_names = list(self.model.graph.lnls.keys()) diagnoses = {} - for modality in self.model.modalities: + for modality in self.model.get_all_modalities(): diagnoses[modality] = fixtures.create_random_pattern(lnl_names) return diagnoses From d3e1950c646c10b31a89b29e4234e2f787dc4680 Mon Sep 17 00:00:00 2001 From: rmnldwg <48687784+rmnldwg@users.noreply.github.com> Date: Wed, 21 Feb 2024 17:34:28 +0100 Subject: [PATCH 40/75] change: try to make tests fast again --- lymph/models/unilateral.py | 3 ++- tests/binary_unilateral_test.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/lymph/models/unilateral.py b/lymph/models/unilateral.py index d9db808..0cd2941 100644 --- a/lymph/models/unilateral.py +++ b/lymph/models/unilateral.py @@ -827,8 +827,9 @@ def draw_patients( p=stage_dist, size=num, ) + distributions = self.get_all_distributions() drawn_diag_times = [ - self.get_distribution(t_stage).draw_diag_times(rng=rng) + distributions[t_stage].draw_diag_times(rng=rng) for t_stage in drawn_t_stages ] diff --git a/tests/binary_unilateral_test.py b/tests/binary_unilateral_test.py index b72a43f..e3d3646 100644 --- a/tests/binary_unilateral_test.py +++ b/tests/binary_unilateral_test.py @@ -1,10 +1,10 @@ """Test the binary unilateral system.""" import unittest +import fixtures import numpy as np from lymph.graph import LymphNodeLevel, Tumor -from tests import fixtures class InitTestCase(fixtures.BinaryUnilateralModelMixin, unittest.TestCase): From 1fc8fbfd8cc434d2e8dfde8d002993a307b1cbc5 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Thu, 22 Feb 2024 09:20:31 +0100 Subject: [PATCH 41/75] feat: add more get/set params methods --- lymph/helper.py | 40 +++++++++--------- lymph/models/bilateral.py | 83 ++++++++++++++++++++++---------------- lymph/models/unilateral.py | 56 +++++++++++++++++++++---- 3 files changed, 120 insertions(+), 59 deletions(-) diff --git a/lymph/helper.py b/lymph/helper.py index b152c99..5d3aebf 100644 --- a/lymph/helper.py +++ b/lymph/helper.py @@ -680,6 +680,22 @@ def unflatten_and_split( return split_kwargs, global_kwargs +def get_params_from( + objects: dict[str, HasGetParams], + as_dict: bool = True, + as_flat: bool = True, +) -> Iterable[float] | dict[str, float]: + """Get the parameters from each ``get_params()`` method of the ``objects``.""" + params = {} + for key, obj in objects.items(): + params[key] = obj.get_params(as_flat=as_flat) + + if as_flat or not as_dict: + params = flatten(params) + + return params if as_dict else params.values() + + def set_params_for( objects: dict[str, HasSetParams], *args: float, @@ -712,7 +728,12 @@ def set_bilateral_params_for( is_symmetric: bool = False, **kwargs: float, ) -> tuple[float]: - """Pass arguments to each ``set_params()`` method of the ``objects``.""" + """Pass arguments to ``set_params()`` of ``ipsi_objects`` and ``contra_objects``. + + If ``is_symmetric`` is ``True``, the parameters of the ``contra_objects`` will be + set to the parameters of the ``ipsi_objects``. Otherwise, the parameters of the + ``contra_objects`` will be set independently. + """ kwargs, global_kwargs = unflatten_and_split(kwargs, expected_keys=["ipsi", "contra"]) ipsi_kwargs = global_kwargs.copy() @@ -727,20 +748,3 @@ def set_bilateral_params_for( args = set_params_for(contra_objects, *args, **contra_kwargs) return args - - -def has_any_dunder_method(obj: Any, *methods: str) -> bool: - """Check whether a class has any of the given dunder methods.""" - return any(hasattr(obj, method) for method in methods) - - -def check_unique_and_get_first(objects: Iterable, attr: str = "") -> Any: - """Check if ``objects`` are unique via a set and return of them. - - This function is meant to be used with the ``AccessPassthrough`` class. It is - used to retrieve the last element of a set of values that are not synchronized. - """ - object_set = set(objects) - if len(object_set) > 1: - warnings.warn(f"{attr} not synced: {object_set}. Setting should sync.") - return sorted(object_set).pop() diff --git a/lymph/models/bilateral.py b/lymph/models/bilateral.py index 7d32181..f1bae84 100644 --- a/lymph/models/bilateral.py +++ b/lymph/models/bilateral.py @@ -8,7 +8,12 @@ import pandas as pd from lymph import diagnose_times, matrix, modalities, models, types -from lymph.helper import early_late_mapping, flatten, set_bilateral_params_for +from lymph.helper import ( + early_late_mapping, + flatten, + synchronize_params, + unflatten_and_split, +) warnings.filterwarnings("ignore", category=pd.errors.PerformanceWarning) logger = logging.getLogger(__name__) @@ -187,40 +192,50 @@ def set_params(self, *args: float, **kwargs: float) -> tuple[float]: Anything not prefixed by ``"ipsi_"`` or ``"contra_"`` is passed to both sides of the neck. This does obviously not work with positional arguments. - Note: - When setting the parameters via positional arguments, the order is - important: - - 1. The parameters of the edges from tumor to LNLs: - 1. first the ipsilateral parameters, - 2. if ``is_symmetric["tumor_spread"]`` is ``False``, the contralateral - parameters. Otherwise, the ipsilateral parameters are used for both - sides. - 2. The parameters of the edges from LNLs to tumor: - 1. again, first the ipsilateral parameters, - 2. if ``is_symmetric["lnl_spread"]`` is ``False``, the contralateral - parameters. Otherwise, the ipsilateral parameters are used for both - sides. - 3. The parameters of the parametric distributions for marginalizing over - diagnose times. - - When still some positional arguments remain after that, they are returned - in a tuple. + When setting the parameters via positional arguments, the order is + important: + + 1. The parameters of the edges from tumor to LNLs: + 1. first the ipsilateral parameters, + 2. if ``is_symmetric["tumor_spread"]`` is ``False``, the contralateral + parameters. Otherwise, the ipsilateral parameters are used for both + sides. + 2. The parameters of the edges from LNLs to tumor: + 1. again, first the ipsilateral parameters, + 2. if ``is_symmetric["lnl_spread"]`` is ``False``, the contralateral + parameters. Otherwise, the ipsilateral parameters are used for both + sides. + 3. The parameters of the parametric distributions for marginalizing over + diagnose times. + + When still some positional arguments remain after that, they are returned + in a tuple. """ - args = set_bilateral_params_for( - *args, - ipsi_objects=self.ipsi.graph.tumor_edges, - contra_objects=self.contra.graph.tumor_edges, - is_symmetric=self.is_symmetric["tumor_spread"], - **kwargs, - ) - args = set_bilateral_params_for( - *args, - ipsi_objects=self.ipsi.graph.lnl_edges, - contra_objects=self.contra.graph.lnl_edges, - is_symmetric=self.is_symmetric["lnl_spread"], - **kwargs, - ) + kwargs, global_kwargs = unflatten_and_split(kwargs, expected_keys=["ipsi", "contra"]) + + ipsi_kwargs = global_kwargs.copy() + ipsi_kwargs.update(kwargs.get("ipsi", {})) + contra_kwargs = global_kwargs.copy() + contra_kwargs.update(kwargs.get("contra", {})) + + args = self.ipsi.set_tumor_spread_params(*args, **ipsi_kwargs) + if self.is_symmetric["tumor_spread"]: + synchronize_params( + get_from=self.ipsi.graph.tumor_edges, + set_to=self.contra.graph.tumor_edges, + ) + else: + args = self.contra.set_tumor_spread_params(*args, **contra_kwargs) + + args = self.ipsi.set_lnl_spread_params(*args, **ipsi_kwargs) + if self.is_symmetric["lnl_spread"]: + synchronize_params( + get_from=self.ipsi.graph.lnl_edges, + set_to=self.contra.graph.lnl_edges, + ) + else: + args = self.contra.set_lnl_spread_params(*args, **contra_kwargs) + return self.set_distribution_params(*args, **kwargs) diff --git a/lymph/models/unilateral.py b/lymph/models/unilateral.py index 0cd2941..fab0c5d 100644 --- a/lymph/models/unilateral.py +++ b/lymph/models/unilateral.py @@ -3,7 +3,7 @@ import warnings from functools import cached_property from itertools import product -from typing import Any, Callable, Iterable +from typing import Any, Callable, Iterable, Literal import numpy as np import pandas as pd @@ -13,6 +13,8 @@ dict_to_func, early_late_mapping, flatten, + get_params_from, + set_params_for, smart_updating_dict_cached_property, ) @@ -139,15 +141,29 @@ def is_binary(self) -> bool: return self.graph.is_binary - @property - def t_stages(self) -> list[str]: + def get_t_stages( + self, + which: Literal["valid", "distributions", "data"] = "valid", + ) -> list[str]: """Return the T-stages of the model.""" distribution_t_stages = super().t_stages + try: data_t_stages = self.patient_data[("_model", "#", "t_stage")].unique() except AttributeError: + data_t_stages = [] + + if which == "valid": + return sorted(set(distribution_t_stages) & set(data_t_stages)) + if which == "distributions": return distribution_t_stages - return sorted(set(distribution_t_stages) & set(data_t_stages)) + if which == "data": + return data_t_stages + + raise ValueError( + f"Invalid value for 'which': {which}. Must be either 'valid', " + "'distributions', or 'data'." + ) def get_params( @@ -170,6 +186,23 @@ def get_params( return params if as_dict else params.values() + def get_tumor_spread_params( + self, + as_dict: bool = True, + as_flat: bool = True, + ) -> Iterable[float] | dict[str, float]: + """Get the parameters of the tumor spread edges.""" + return get_params_from(self.graph.tumor_edges, as_dict, as_flat) + + def get_lnl_spread_params( + self, + as_dict: bool = True, + as_flat: bool = True, + ) -> Iterable[float] | dict[str, float]: + """Get the parameters of the LNL spread edges.""" + return get_params_from(self.graph.lnl_edges, as_dict, as_flat) + + def set_params(self, *args: float, **kwargs: float) -> tuple[float]: """Assign new parameters to the model. @@ -218,6 +251,15 @@ def set_params(self, *args: float, **kwargs: float) -> tuple[float]: return self.set_distribution_params(*args, **kwargs) + def set_tumor_spread_params(self, *args: float, **kwargs: float) -> tuple[float]: + """Assign new parameters to the tumor spread edges.""" + return set_params_for(self.graph.tumor_edges, *args, **kwargs) + + def set_lnl_spread_params(self, *args: float, **kwargs: float) -> tuple[float]: + """Assign new parameters to the LNL spread edges.""" + return set_params_for(self.graph.lnl_edges, *args, **kwargs) + + def comp_transition_prob( self, newstate: list[int], @@ -439,7 +481,7 @@ def load_patient_data( lambda row: mapping(row["tumor", "1", "t_stage"]), axis=1 ) - for t_stage in self.t_stages: + for t_stage in self.get_t_stages("distributions"): if t_stage not in patient_data["_model", "#", "t_stage"].values: warnings.warn(f"No data for T-stage {t_stage} found.") @@ -585,7 +627,7 @@ def _hmm_likelihood(self, log: bool = True, t_stage: str | None = None) -> float llh = 0. if log else 1. if t_stage is None: - t_stages = self.t_stages + t_stages = self.get_t_stages("valid") else: t_stages = [t_stage] @@ -823,7 +865,7 @@ def draw_patients( stage_dist = np.array(stage_dist) / sum(stage_dist) drawn_t_stages = rng.choice( - a=self.t_stages, + a=self.get_t_stages("distributions"), p=stage_dist, size=num, ) From 40fed350b7a487f8bc82f3f0deccbf426f25d4a3 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Thu, 22 Feb 2024 09:20:55 +0100 Subject: [PATCH 42/75] test: fix long-running test --- tests/binary_unilateral_test.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/binary_unilateral_test.py b/tests/binary_unilateral_test.py index e3d3646..84a2048 100644 --- a/tests/binary_unilateral_test.py +++ b/tests/binary_unilateral_test.py @@ -203,9 +203,9 @@ def test_load_patient_data(self): def test_t_stages(self): """Make sure all T-stages are present.""" - t_stages_in_data = self.model.patient_data["_model", "#" ,"t_stage"].unique() - t_stages_in_diag_time_dists = super(type(self.model), self.model).t_stages - t_stages_in_model = list(self.model.t_stages) + t_stages_in_data = self.model.get_t_stages("data") + t_stages_in_diag_time_dists = self.model.get_t_stages("distributions") + t_stages_in_model = self.model.get_t_stages("valid") t_stages_intersection = set(t_stages_in_data).intersection(t_stages_in_diag_time_dists) self.assertNotIn("foo", t_stages_in_model) @@ -395,14 +395,15 @@ def test_distribution_of_patients(self): self.model.set_distribution("early", [0,1,0,0,0,0,0,0,0,0,0]) # assign only one pathology modality + self.model.clear_modalities() self.model.set_modality("tmp", spec=1., sens=1.) # extract the tumor spread parameters params = self.model.get_params(as_dict=True) params = { - key.replace("T_to_", "").replace("_spread", ""): value + key.replace("Tto", "").replace("_spread", ""): value for key, value in params.items() - if "T_to_" in key + if "Tto" in key } # draw large enough amount of patients From 1d37abcc17990fb97faf23e5a5bfab7befbec272 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Thu, 22 Feb 2024 15:51:57 +0100 Subject: [PATCH 43/75] chore: move timing data --- tests/{ => data}/trans_mat_timings_897b495.csv | 0 tests/{ => data}/trans_mat_timings_v0.4.3.csv | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename tests/{ => data}/trans_mat_timings_897b495.csv (100%) rename tests/{ => data}/trans_mat_timings_v0.4.3.csv (100%) diff --git a/tests/trans_mat_timings_897b495.csv b/tests/data/trans_mat_timings_897b495.csv similarity index 100% rename from tests/trans_mat_timings_897b495.csv rename to tests/data/trans_mat_timings_897b495.csv diff --git a/tests/trans_mat_timings_v0.4.3.csv b/tests/data/trans_mat_timings_v0.4.3.csv similarity index 100% rename from tests/trans_mat_timings_v0.4.3.csv rename to tests/data/trans_mat_timings_v0.4.3.csv From 59b02243809d08ee5aac4ee92529060aa1121fc9 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Thu, 22 Feb 2024 15:53:23 +0100 Subject: [PATCH 44/75] fix!: make likelihood work with emcee again The way the likelihood was defined, it did not actually play nicely with how the emcee package works. This is now fixed. --- lymph/models/bilateral.py | 147 +++++++++++++++++++++++-------------- lymph/models/unilateral.py | 31 +++----- lymph/types.py | 3 +- 3 files changed, 102 insertions(+), 79 deletions(-) diff --git a/lymph/models/bilateral.py b/lymph/models/bilateral.py index f1bae84..0c02b90 100644 --- a/lymph/models/bilateral.py +++ b/lymph/models/bilateral.py @@ -54,9 +54,8 @@ def __init__( stores the graph. With the dictionary ``is_symmetric`` the user can specify which aspects of the - model are symmetric. Valid keys are ``"modalities"``, ``"tumor_spread"``, - and ``"lnl_spread"``. The values are booleans, with ``True`` meaning that the - aspect is symmetric. + model are symmetric. Valid keys are ``"tumor_spread"`` and ``"lnl_spread"``. + The values are booleans, with ``True`` meaning that the aspect is symmetric. Note: The symmetries of tumor and LNL spread are only guaranteed if the @@ -78,13 +77,12 @@ def __init__( contralateral_kwargs=contralateral_kwargs, ) - self.is_symmetric = is_symmetric - if self.is_symmetric is None: - self.is_symmetric = { - "modalities": True, + if is_symmetric is None: + is_symmetric = { "tumor_spread": False, "lnl_spread": True, } + self.is_symmetric = is_symmetric diagnose_times.Composite.__init__( self, @@ -153,6 +151,46 @@ def is_binary(self) -> bool: return self.ipsi.is_binary + def get_spread_params( + self, + as_dict: bool = True, + as_flat: bool = True, + ) -> Iterable[float] | dict[str, float]: + """Return the parameters of the model's spread edges. + + Depending on the symmetries (i.e. the ``is_symmetric`` attribute), this returns + different results: + + If ``is_symmetric["tumor_spread"] = False``, the flattened (``as_flat=True``) + dictionary (``as_dict=True``) will contain keys of the form + ``ipsi_Tto_spread`` and ``contra_Tto_spread``, where ```` is the + name of the lymph node level. However, if the tumor spread is set to be + symmetric, the leading ``ipsi_`` or ``contra_`` is omitted, since it's valid + for both sides. + + This is consistent with how the :py:meth:`~lymph.models.Bilteral.set_params` + method expects the keyword arguments in case of the symmetry configurations. + """ + params = {} + + if self.is_symmetric["tumor_spread"]: + params.update(self.ipsi.get_tumor_spread_params(as_flat=as_flat)) + else: + params["ipsi"] = self.ipsi.get_tumor_spread_params(as_flat=as_flat) + params["contra"] = self.contra.get_tumor_spread_params(as_flat=as_flat) + + if self.is_symmetric["lnl_spread"]: + params.update(self.ipsi.get_lnl_spread_params(as_flat=as_flat)) + else: + params["ipsi"].update(self.ipsi.get_lnl_spread_params(as_flat=as_flat)) + params["contra"].update(self.contra.get_lnl_spread_params(as_flat=as_flat)) + + if as_flat or not as_dict: + params = flatten(params) + + return params if as_dict else params.values() + + def get_params( self, as_dict: bool = True, @@ -164,16 +202,12 @@ def get_params( :py:meth:`lymph.models.Unilateral.get_params` of the ipsi- and contralateral side. For the use of the ``as_dict`` and ``as_flat`` arguments, see the documentation of the :py:meth:`lymph.types.Model.get_params` method. - """ - ipsi_params = self.ipsi.graph.get_params(as_flat=as_flat) - contra_params = self.contra.graph.get_params(as_flat=as_flat) - dist_params = self.get_distribution_params(as_flat=as_flat) - params = { - "ipsi": ipsi_params, - "contra": contra_params, - } - params.update(dist_params) + Also see the :py:meth:`lymph.models.Bilateral.get_spread_params` method to + understand how the symmetry settings affect the return value. + """ + params = self.get_spread_params(as_flat=as_flat) + params.update(self.get_distribution_params(as_flat=as_flat)) if as_flat or not as_dict: params = flatten(params) @@ -181,6 +215,36 @@ def get_params( return params if as_dict else params.values() + def set_spread_params(self, *args: float, **kwargs: float) -> tuple[float]: + """Set the parameters of the model's spread edges.""" + kwargs, global_kwargs = unflatten_and_split(kwargs, expected_keys=["ipsi", "contra"]) + + ipsi_kwargs = global_kwargs.copy() + ipsi_kwargs.update(kwargs.get("ipsi", {})) + contra_kwargs = global_kwargs.copy() + contra_kwargs.update(kwargs.get("contra", {})) + + args = self.ipsi.set_tumor_spread_params(*args, **ipsi_kwargs) + if self.is_symmetric["tumor_spread"]: + synchronize_params( + get_from=self.ipsi.graph.tumor_edges, + set_to=self.contra.graph.tumor_edges, + ) + else: + args = self.contra.set_tumor_spread_params(*args, **contra_kwargs) + + args = self.ipsi.set_lnl_spread_params(*args, **ipsi_kwargs) + if self.is_symmetric["lnl_spread"]: + synchronize_params( + get_from=self.ipsi.graph.lnl_edges, + set_to=self.contra.graph.lnl_edges, + ) + else: + args = self.contra.set_lnl_spread_params(*args, **contra_kwargs) + + return args + + def set_params(self, *args: float, **kwargs: float) -> tuple[float]: """Set new parameters to the model. @@ -211,31 +275,7 @@ def set_params(self, *args: float, **kwargs: float) -> tuple[float]: When still some positional arguments remain after that, they are returned in a tuple. """ - kwargs, global_kwargs = unflatten_and_split(kwargs, expected_keys=["ipsi", "contra"]) - - ipsi_kwargs = global_kwargs.copy() - ipsi_kwargs.update(kwargs.get("ipsi", {})) - contra_kwargs = global_kwargs.copy() - contra_kwargs.update(kwargs.get("contra", {})) - - args = self.ipsi.set_tumor_spread_params(*args, **ipsi_kwargs) - if self.is_symmetric["tumor_spread"]: - synchronize_params( - get_from=self.ipsi.graph.tumor_edges, - set_to=self.contra.graph.tumor_edges, - ) - else: - args = self.contra.set_tumor_spread_params(*args, **contra_kwargs) - - args = self.ipsi.set_lnl_spread_params(*args, **ipsi_kwargs) - if self.is_symmetric["lnl_spread"]: - synchronize_params( - get_from=self.ipsi.graph.lnl_edges, - set_to=self.contra.graph.lnl_edges, - ) - else: - args = self.contra.set_lnl_spread_params(*args, **contra_kwargs) - + args = self.set_spread_params(*args, **kwargs) return self.set_distribution_params(*args, **kwargs) @@ -375,18 +415,15 @@ def _hmm_likelihood(self, log: bool = True, t_stage: str | None = None) -> float def likelihood( self, - given_param_args: Iterable[float] | None = None, - given_param_kwargs: dict[str, float] | None = None, + given_params: Iterable[float] | dict[str, float] | None = None, log: bool = True, mode: str = "HMM", for_t_stage: str | None = None, ): - """Compute the (log-)likelihood of the ``data`` given the model (and params). + """Compute the (log-)likelihood of the stored data given the model (and params). - The parameters of the model can be set via ``given_param_args`` and - ``given_param_kwargs``. Both arguments are used to call the - :py:meth:`~set_params` method. If the parameters are not provided, the - previously assigned parameters are used. + See the documentation of :py:meth:`lymph.types.Model.likelihood` for more + information on how to use the ``given_params`` parameter. Returns the log-likelihood if ``log`` is set to ``True``. The ``mode`` parameter determines whether the likelihood is computed for the hidden Markov model @@ -400,22 +437,20 @@ def likelihood( :py:meth:`lymph.models.Unilateral.likelihood` The corresponding unilateral function. """ - if given_param_args is None: - given_param_args = [] - - if given_param_kwargs is None: - given_param_kwargs = {} - try: # all functions and methods called here should raise a ValueError if the # given parameters are invalid... - self.set_params(*given_param_args, **given_param_kwargs) + if given_params is None: + pass + elif isinstance(given_params, dict): + self.set_params(**given_params) + else: + self.set_params(*given_params) except ValueError: return -np.inf if log else 0. if mode == "HMM": return self._hmm_likelihood(log, for_t_stage) - if mode == "BN": return self._bn_likelihood(log, for_t_stage) diff --git a/lymph/models/unilateral.py b/lymph/models/unilateral.py index fab0c5d..f0ab965 100644 --- a/lymph/models/unilateral.py +++ b/lymph/models/unilateral.py @@ -3,7 +3,7 @@ import warnings from functools import cached_property from itertools import product -from typing import Any, Callable, Iterable, Literal +from typing import Any, Iterable, Literal import numpy as np import pandas as pd @@ -34,12 +34,6 @@ class Unilateral( of this class allow to calculate the probability of a certain hidden pattern of involvement, given an individual diagnosis of a patient. """ - is_binary: bool - is_trinary: bool - get_state: Callable - set_state: Callable - lnls: dict[str, graph.LymphNodeLevel] - def __init__( self, graph_dict: dict[tuple[str], list[str]], @@ -647,39 +641,34 @@ def _hmm_likelihood(self, log: bool = True, t_stage: str | None = None) -> float def likelihood( self, - given_param_args: Iterable[float] | None = None, - given_param_kwargs: dict[str, float] | None = None, + given_params: Iterable[float] | dict[str, float] | None = None, log: bool = True, mode: str = "HMM", for_t_stage: str | None = None, ) -> float: """Compute the (log-)likelihood of the stored data given the model (and params). - The parameters of the model can be set via ``given_param_args`` and - ``given_param_kwargs``. Both arguments are used to call the - :py:meth:`Unilateral.set_params` method. If the parameters are not provided, the - previously assigned parameters are used. + See the documentation of :py:meth:`lymph.types.Model.likelihood` for more + information on how to use the ``given_params`` parameter. Returns the log-likelihood if ``log`` is set to ``True``. The ``mode`` parameter determines whether the likelihood is computed for the hidden Markov model (``"HMM"``) or the Bayesian network (``"BN"``). """ - if given_param_args is None: - given_param_args = [] - - if given_param_kwargs is None: - given_param_kwargs = {} - try: # all functions and methods called here should raise a ValueError if the # given parameters are invalid... - _ = self.set_params(*given_param_args, **given_param_kwargs) + if given_params is None: + pass + elif isinstance(given_params, dict): + self.set_params(**given_params) + else: + self.set_params(*given_params) except ValueError: return -np.inf if log else 0. if mode == "HMM": return self._hmm_likelihood(log, for_t_stage) - if mode == "BN": return self._bn_likelihood(log, for_t_stage) diff --git a/lymph/types.py b/lymph/types.py index aae7494..f871a78 100644 --- a/lymph/types.py +++ b/lymph/types.py @@ -76,8 +76,7 @@ def load_patient_data( @abstractmethod def likelihood( self: M, - given_param_args: Iterable[float], - given_param_kwargs: dict[str, float], + given_params: Iterable[float] | dict[str, float] | None = None, log: bool = True, ) -> float: """Return the likelihood of the model given the parameters. From 8afd716c4c1b5b15082d687b9b382abe079908a2 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Thu, 22 Feb 2024 15:53:52 +0100 Subject: [PATCH 45/75] test: add integration tests with emcee --- docs/source/quickstart_unilateral.ipynb | 66 +++++++++++++++++++++---- tests/emcee_intergration_test.py | 39 +++++++++++++++ 2 files changed, 96 insertions(+), 9 deletions(-) create mode 100644 tests/emcee_intergration_test.py diff --git a/docs/source/quickstart_unilateral.ipynb b/docs/source/quickstart_unilateral.ipynb index ad731aa..cb4f5d9 100644 --- a/docs/source/quickstart_unilateral.ipynb +++ b/docs/source/quickstart_unilateral.ipynb @@ -377,7 +377,7 @@ "source": [ "test_probabilities = [0.02, 0.24, 0.03, 0.2, 0.23, 0.18, 0.18, 0.5]\n", "\n", - "llh = model.likelihood(given_param_args=test_probabilities, log=True)\n", + "llh = model.likelihood(test_probabilities, log=True, mode=\"HMM\")\n", "\n", "print(f\"log-likelihood is {llh}\")" ] @@ -386,18 +386,66 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "From here it is up to the user what to do with this quantity. Most *likely* though, one would want to perform MCMC sampling with this.\n", + "## Sampling using `emcee`\n", "\n", - "## Summary\n", + "Now we'll show how one could do inference using MCMC sampling. Note that this is by far not the only way to learn the model parameters from the data. But it is a quick and useful one.\n", "\n", - "To set up a model for lymphatic metastatic spread, you need to do the following things:\n", + "First we define a couple of parameters for the sampling. Have a look at the documentation of [emcee] to understand them in more detail.\n", "\n", - "1. Define a graph via a dictionary that connects the tumor and lymph node levels\n", - "2. Provide the specificity & sensitivity of the diagnostic modalities to the `modalities` attribute\n", - "3. Assign your correctly formatted pandas `DataFrame` to the method `load_patient_data()` of the model\n", - "4. For each T-stage in the data, define a distribution over possible diagnose times and provide it to the `diag_time_dist` dictionary\n", + "[emcee]: https://emcee.readthedocs.io" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import emcee\n", "\n", - "Then, you can use the `likelihood` method to compute the (log-)likelihood of the stored data given an array of parameters - the spread probabilities." + "nwalkers, ndim = 100, len(model.get_params())\n", + "nsteps = 200\n", + "initial = np.random.uniform(size=(nwalkers, ndim))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The we create a sampler with these parameters and finally start sampling." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sampler = emcee.EnsembleSampler(\n", + " nwalkers=nwalkers,\n", + " ndim=ndim,\n", + " log_prob_fn=model.likelihood,\n", + " moves=[(emcee.moves.DEMove(), 0.8), (emcee.moves.DESnookerMove(), 0.2)],\n", + " parameter_names=list(model.get_params().keys()),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sampler.run_mcmc(initial, nsteps, progress=True);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sampler.get_chain(discard=int(0.9*nsteps), flat=True).mean(axis=0)" ] } ], diff --git a/tests/emcee_intergration_test.py b/tests/emcee_intergration_test.py new file mode 100644 index 0000000..ba1022d --- /dev/null +++ b/tests/emcee_intergration_test.py @@ -0,0 +1,39 @@ +""" +Make sure the models work with the emcee package. +""" +import unittest + +import emcee +import numpy as np + +from . import fixtures + + +class UnilateralEmceeTestCase(fixtures.BinaryUnilateralModelMixin, unittest.TestCase): + """Test the emcee package with the Unilateral model.""" + + def setUp(self): + super().setUp(graph_size="small") + self.model.set_modality("PET", spec=0.86, sens=0.79) + self.load_patient_data(filename="2021-usz-oropharynx.csv") + + + def test_emcee(self): + """Test the emcee package with the Unilateral model.""" + nwalkers, ndim = 50, len(self.model.get_params()) + nsteps = 100 + initial = self.rng.uniform(size=(nwalkers, ndim)) + + sampler = emcee.EnsembleSampler( + nwalkers=nwalkers, + ndim=ndim, + log_prob_fn=self.model.likelihood, + moves=[(emcee.moves.DEMove(), 0.8), (emcee.moves.DESnookerMove(), 0.2)], + parameter_names=list(self.model.get_params().keys()), + ) + sampler.run_mcmc(initial, nsteps, progress=True) + samples = sampler.get_chain(discard=int(0.9*nsteps), flat=True) + self.assertGreater(sampler.acceptance_fraction.mean(), 0.2) + self.assertLess(sampler.acceptance_fraction.mean(), 0.5) + self.assertTrue(np.all(samples.mean(axis=0) >= 0.0)) + self.assertTrue(np.all(samples.mean(axis=0) <= 1.0)) From 6c4c2079072889dcad1efd5d674f9d67c82d9bef Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Thu, 22 Feb 2024 15:54:50 +0100 Subject: [PATCH 46/75] test: add checks for bilateral symmetries --- tests/bayesian_unilateral_test.py | 3 +- tests/binary_bilateral_test.py | 82 +++++++++++++++++++++++++++---- tests/binary_unilateral_test.py | 3 +- tests/fixtures.py | 4 +- tests/integration_test.py | 3 +- tests/trinary_unilateral_test.py | 3 +- 6 files changed, 82 insertions(+), 16 deletions(-) diff --git a/tests/bayesian_unilateral_test.py b/tests/bayesian_unilateral_test.py index 93fcab2..dc2dd13 100644 --- a/tests/bayesian_unilateral_test.py +++ b/tests/bayesian_unilateral_test.py @@ -3,9 +3,10 @@ """ import unittest -import fixtures import numpy as np +from . import fixtures + class BayesianUnilateralModelTestCase(fixtures.BinaryUnilateralModelMixin, unittest.TestCase): """Test the Bayesian Unilateral Model.""" diff --git a/tests/binary_bilateral_test.py b/tests/binary_bilateral_test.py index 5932281..476ae31 100644 --- a/tests/binary_bilateral_test.py +++ b/tests/binary_bilateral_test.py @@ -3,10 +3,12 @@ """ import unittest -import fixtures import numpy as np from lymph import models +from lymph.helper import flatten + +from . import fixtures class BilateralInitTest(fixtures.BilateralModelMixin, unittest.TestCase): @@ -16,7 +18,6 @@ def setUp(self): self.model_kwargs = {"is_symmetric": { "tumor_spread": True, "lnl_spread": True, - "modalities": True, }} super().setUp() self.load_patient_data() @@ -153,33 +154,47 @@ def test_diag_time_dists_delegation(self): ) -class ParameterAssignmentTestCase(fixtures.BilateralModelMixin, unittest.TestCase): - """Test the parameter assignment.""" +class NoSymmetryParamsTestCase(fixtures.BilateralModelMixin, unittest.TestCase): + """Test the parameter assignment when the model is not symmetric""" def setUp(self): self.model_kwargs = { "is_symmetric": { "tumor_spread": False, "lnl_spread": False, - "modalities": True, } } super().setUp() def test_get_params_as_args(self): """Test that the parameters can be retrieved.""" - ipsi_args = self.model.ipsi.get_params() - contra_args = self.model.contra.get_params() - self.assertEqual(len(ipsi_args), len(contra_args)) + ipsi_args = list(self.model.ipsi.get_params(as_dict=False)) + contra_args = list(self.model.contra.get_params(as_dict=False)) + both_args = list(self.model.get_params(as_dict=False)) + num_param_dists = len(self.model.get_distribution_params()) + # need plus one, because distribution's parameter is accounted for twice + self.assertEqual(len(ipsi_args) + len(contra_args), len(both_args) + 1) + self.assertEqual( + [*ipsi_args[:-num_param_dists], *contra_args[:-num_param_dists]], + both_args[:-num_param_dists], + ) def test_get_params_as_dict(self): """Test that the parameters can be retrieved.""" ipsi_dict = self.model.ipsi.get_params(as_dict=True) contra_dict = self.model.contra.get_params(as_dict=True) - self.assertEqual(ipsi_dict.keys(), contra_dict.keys()) + both_dict = self.model.get_params(as_dict=True, as_flat=False) + dist_param_keys = self.model.get_distribution_params().keys() + + for key in dist_param_keys: + ipsi_dict.pop(key) + contra_dict.pop(key) + + self.assertEqual(ipsi_dict, flatten(both_dict["ipsi"])) + self.assertEqual(contra_dict, flatten(both_dict["contra"])) def test_set_params_as_args(self): - """Test that the parameters can be assigned.""" + """Test that the parameters can be set.""" ipsi_tumor_spread_args = self.rng.uniform(size=len(self.model.ipsi.graph.tumor_edges)) ipsi_lnl_spread_args = self.rng.uniform(size=len(self.model.ipsi.graph.lnl_edges)) contra_tumor_spread_args = self.rng.uniform(size=len(self.model.contra.graph.tumor_edges)) @@ -206,6 +221,53 @@ def test_set_params_as_args(self): list(self.model.contra.get_distribution("late").get_params())[0], ) + def test_set_params_as_dict(self): + """Test that the parameters can be set via keyword arguments.""" + params_to_set = {k: self.rng.uniform() for k in self.model.get_params().keys()} + self.model.set_params(**params_to_set) + self.assertEqual(params_to_set, self.model.get_params()) + + +class SymmetryParamsTestCase(fixtures.BilateralModelMixin, unittest.TestCase): + """Test the parameter assignment when the model is symmetric.""" + + def setUp(self): + self.model_kwargs = { + "is_symmetric": { + "tumor_spread": True, + "lnl_spread": True, + } + } + super().setUp() + + def test_get_params_as_args(self): + """Test that the parameters can be retrieved.""" + ipsi_args = list(self.model.ipsi.get_params(as_dict=False)) + contra_args = list(self.model.contra.get_params(as_dict=False)) + both_args = list(self.model.get_params(as_dict=False)) + self.assertEqual(ipsi_args, both_args) + self.assertEqual(contra_args, both_args) + + def test_get_params_as_dict(self): + """Test that the parameters can be retrieved.""" + ipsi_dict = self.model.ipsi.get_params() + contra_dict = self.model.contra.get_params() + both_dict = self.model.get_params() + self.assertEqual(ipsi_dict, both_dict) + self.assertEqual(contra_dict, both_dict) + + def test_set_params_as_args(self): + """Test that the parameters can be set.""" + args_to_set = [self.rng.uniform() for _ in self.model.ipsi.get_params(as_dict=False)] + self.model.set_params(*args_to_set) + self.assertEqual(args_to_set, list(self.model.contra.get_params().values())) + + def test_set_params_as_dict(self): + """Test that the parameters can be set via keyword arguments.""" + params_to_set = {k: self.rng.uniform() for k in self.model.contra.get_params()} + self.model.set_params(**params_to_set) + self.assertEqual(params_to_set, self.model.ipsi.get_params()) + class LikelihoodTestCase(fixtures.BilateralModelMixin, unittest.TestCase): """Check that the (log-)likelihood is computed correctly.""" diff --git a/tests/binary_unilateral_test.py b/tests/binary_unilateral_test.py index 84a2048..9f8ae8b 100644 --- a/tests/binary_unilateral_test.py +++ b/tests/binary_unilateral_test.py @@ -1,11 +1,12 @@ """Test the binary unilateral system.""" import unittest -import fixtures import numpy as np from lymph.graph import LymphNodeLevel, Tumor +from . import fixtures + class InitTestCase(fixtures.BinaryUnilateralModelMixin, unittest.TestCase): """Test the initialization of a binary model.""" diff --git a/tests/fixtures.py b/tests/fixtures.py index adafe83..057331a 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -113,10 +113,10 @@ def create_random_pattern(lnls: list[str]) -> PatternType: class BinaryUnilateralModelMixin: """Mixin class for simple binary model fixture creation.""" - def setUp(self): + def setUp(self, graph_size: str = "large"): """Initialize a simple binary model.""" self.rng = np.random.default_rng(42) - self.graph_dict = get_graph(size="large") + self.graph_dict = get_graph(size=graph_size) self.model = Unilateral.binary(graph_dict=self.graph_dict) self.logger = get_logger(level=logging.INFO) diff --git a/tests/integration_test.py b/tests/integration_test.py index 697e830..c181d36 100644 --- a/tests/integration_test.py +++ b/tests/integration_test.py @@ -4,12 +4,13 @@ """ import unittest -import fixtures import numpy as np import scipy as sp import lymph +from . import fixtures + def late_binomial(support: np.ndarray, p: float = 0.5) -> np.ndarray: """Parametrized binomial distribution.""" diff --git a/tests/trinary_unilateral_test.py b/tests/trinary_unilateral_test.py index 95ef2fe..61f9216 100644 --- a/tests/trinary_unilateral_test.py +++ b/tests/trinary_unilateral_test.py @@ -1,13 +1,14 @@ """Test the trinary unilateral system.""" import unittest -import fixtures import numpy as np import pandas as pd from lymph.graph import LymphNodeLevel from lymph.helper import set_params_for +from . import fixtures + class TrinaryInitTestCase(fixtures.TrinaryFixtureMixin, unittest.TestCase): """Testing the basic initialization of a trinary model.""" From bdc2bfb1542fe41eaf2d1a5b5eeb22b3e36c0842 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Thu, 22 Feb 2024 16:51:36 +0100 Subject: [PATCH 47/75] feat(mid): implement `set_params` --- lymph/diagnose_times.py | 3 +- lymph/models/midline.py | 544 ++++++++++++++++++---------------------- lymph/types.py | 9 +- 3 files changed, 258 insertions(+), 298 deletions(-) diff --git a/lymph/diagnose_times.py b/lymph/diagnose_times.py index c260995..1648e48 100644 --- a/lymph/diagnose_times.py +++ b/lymph/diagnose_times.py @@ -230,7 +230,8 @@ def set_params(self, *args: float, **kwargs: float) -> tuple[float]: for name, value in self._kwargs.items(): first, args = popfirst(args) self._kwargs[name] = first or kwargs.get(name, value) - del self._frozen + if hasattr(self, "_frozen"): + del self._frozen try: _ = self.pmf diff --git a/lymph/models/midline.py b/lymph/models/midline.py index abdca2d..e03f80b 100644 --- a/lymph/models/midline.py +++ b/lymph/models/midline.py @@ -2,17 +2,17 @@ import logging import warnings -from argparse import OPTIONAL -from typing import Any, Iterable, Iterator +from typing import Any, Iterable import numpy as np import pandas as pd -from lymph import graph, modalities, models +from lymph import diagnose_times, modalities, models, types from lymph.helper import ( - AbstractLookupDict, - DelegationSyncMixin, early_late_mapping, + flatten, + popfirst, + unflatten_and_split, ) from lymph.types import DiagnoseType, PatternType @@ -21,74 +21,11 @@ -def create_property_sync_callback( - names: list[str], - this: graph.Edge, - other: graph.Edge, -) -> callable: - """Return func to sync property values whose name is in ``names`` btw two edges. - - The returned function is meant to be added to the list of callbacks of the - :py:class:`Edge` class, such that two edges in a mirrored pair of graphs are kept - in sync. - """ - def sync(): - # We must set the value of `this` property via the private name, otherwise - # we would trigger the setter's callbacks and may end up in an infinite loop. - for name in names: - private_name = f"_{name}" - setattr(other, private_name, getattr(this, name)) - - logger.debug(f"Created sync callback for properties {names} of {this.get_name} edge.") - return sync - -# this here could probably be used to sync the edges for the different bilateral classes if we want to keep on using it -def init_edge_sync( - property_names: list[str], - this_edge_list: list[graph.Edge], - other_edge_list: list[graph.Edge], -) -> None: - """Initialize the callbacks to sync properties btw. Edges. - - Implementing this as a separate method allows a user in theory to initialize - an arbitrary kind of symmetry between the two sides of the neck. - """ - this_edge_names = [e.get_name for e in this_edge_list] - other_edge_names = [e.get_name for e in other_edge_list] - - for edge_name in set(this_edge_names).intersection(other_edge_names): - this_edge = this_edge_list[this_edge_names.index(edge_name)] - other_edge = other_edge_list[other_edge_names.index(edge_name)] - - this_edge.trigger_callbacks.append( - create_property_sync_callback( - names=property_names, - this=this_edge, - other=other_edge, - ) - ) - other_edge.trigger_callbacks.append( - create_property_sync_callback( - names=property_names, - this=other_edge, - other=this_edge, - ) - ) - - -def init_dict_sync( - this: AbstractLookupDict, - other: AbstractLookupDict, -) -> None: - """Add callback to ``this`` to sync with ``other``.""" - def sync(): - other.clear() - other.update(this) - - this.trigger_callbacks.append(sync) - - -class Midline(DelegationSyncMixin): +class Midline( + diagnose_times.Composite, + modalities.Composite, + types.Model, +): """Models metastatic progression bilaterally with tumor lateralization. Model a bilateral lymphatic system where an additional risk factor can @@ -114,11 +51,10 @@ class Midline(DelegationSyncMixin): def __init__( self, graph_dict: dict[tuple[str], list[str]], + is_symmetric: dict[str, bool] | None = None, use_mixing: bool = True, - modalities_symmetric: bool = True, - trans_symmetric: bool = True, + use_central: bool = True, unilateral_kwargs: dict[str, Any] | None = None, - central_enabled: bool = True, **_kwargs ): """Initialize the model. @@ -150,204 +86,232 @@ class will contain several instances of :py:class:`~lymph.models.Bilateral`, class. One for the case of a mid-sagittal extension of the primary tumor and one for the case of no such extension. """ - super().__init__() - self.central_enabled = central_enabled - self.ext = models.Bilateral(graph_dict= graph_dict,unilateral_kwargs=unilateral_kwargs, is_symmetric={'tumor_spread':False, "modalities": modalities_symmetric, "lnl_spread":trans_symmetric}) - self.noext = models.Bilateral(graph_dict= graph_dict,unilateral_kwargs=unilateral_kwargs, is_symmetric={'tumor_spread':False, "modalities": modalities_symmetric, "lnl_spread":trans_symmetric}) - if self.central_enabled: - self.central = models.Bilateral(graph_dict= graph_dict,unilateral_kwargs=unilateral_kwargs, is_symmetric={'tumor_spread':True, "modalities": modalities_symmetric, "lnl_spread":trans_symmetric}) - - self.use_mixing = use_mixing - self.diag_time_dists = {} - if self.use_mixing: - self.alpha_mix = 0. - - self.modalities_symmetric = modalities_symmetric - property_names = ["spread_prob"] - if self.ext.ipsi.graph.is_trinary: - property_names.append("micro_mod") - delegated_attrs = [ - "max_time", "t_stages", - "is_binary", "is_trinary", - ] - - init_dict_sync( - this=self.ext.ipsi.diag_time_dists, - other=self.noext.ipsi.diag_time_dists, - ) - if central_enabled: - init_dict_sync( - this=self.noext.ipsi.diag_time_dists, - other=self.central.ipsi.diag_time_dists - ) - - if self.modalities_symmetric: - delegated_attrs.append("modalities") - init_dict_sync( - this=self.ext.modalities, - other=self.noext.modalities, + if is_symmetric is None: + is_symmetric = { + "tumor_spread": False, + "lnl_spread": True, + } + if is_symmetric["tumor_spread"]: + raise ValueError( + "If you want the tumor spread to be symmetric, consider using the " + "Bilateral class." ) - if central_enabled: - init_dict_sync( - this=self.noext.modalities, - other=self.central.modalities, - ) - self.init_synchronization() - self.init_delegation(ext=delegated_attrs) - - def init_synchronization(self) -> None: - """Initialize the synchronization of edges, modalities, and diagnose times.""" - # Sync spread probabilities - property_names = ["spread_prob", "micro_mod"] if self.noext.ipsi.is_trinary else ["spread_prob"] - noext_ipsi_tumor_edges = list(self.noext.ipsi.graph.tumor_edges.values()) - noext_ipsi_lnl_edges = list(self.noext.ipsi.graph.lnl_edges.values()) - noext_ipsi_edges = ( - noext_ipsi_tumor_edges + noext_ipsi_lnl_edges + self.is_symmetric = is_symmetric + + self.ext = models.Bilateral( + graph_dict=graph_dict, + unilateral_kwargs=unilateral_kwargs, + is_symmetric=self.is_symmetric, ) - ext_ipsi_tumor_edges = list(self.ext.ipsi.graph.tumor_edges.values()) - ext_ipsi_lnl_edges = list(self.ext.ipsi.graph.lnl_edges.values()) - ext_ipsi_edges = ( - ext_ipsi_tumor_edges - + ext_ipsi_lnl_edges + self.noext = models.Bilateral( + graph_dict=graph_dict, + unilateral_kwargs=unilateral_kwargs, + is_symmetric=self.is_symmetric, ) + central_child = {} + if use_central: + self.central = models.Bilateral( + graph_dict=graph_dict, + unilateral_kwargs=unilateral_kwargs, + is_symmetric={ + "tumor_spread": True, + "lnl_spread": self.is_symmetric["lnl_spread"], + }, + ) + central_child = {"central": self.central} + if use_mixing: + self.mixing_param = 0. - init_edge_sync( - property_names=property_names, - this_edge_list=noext_ipsi_edges, - other_edge_list=ext_ipsi_edges, + diagnose_times.Composite.__init__( + self, + distribution_children={"ext": self.ext, "noext": self.noext, **central_child}, + is_distribution_leaf=False, + ) + modalities.Composite.__init__( + self, + modality_children={"ext": self.ext, "noext": self.noext, **central_child}, + is_modality_leaf=False, ) - #The syncing below does not work properly. The ipsilateral central side is synced, but the contralateral central side is not synced. It seems like no callback is initiated when syncing in this manner - - # if self.central_enabled: - # central_ipsi_tumor_edges = list(self.central.ipsi.graph.tumor_edges.values()) - # central_ipsi_lnl_edges = list(self.central.ipsi.graph.lnl_edges.values()) - # central_ipsi_edges = ( - # central_ipsi_tumor_edges - # + central_ipsi_lnl_edges - # ) - # init_edge_sync( - # property_names=property_names,W - # this_edge_list=noext_ipsi_edges, - # other_edge_list=central_ipsi_edges, - # ) - def get_params( - self): - """Return the parameters of the model. - Parameters are only returned as dictionary. + @property + def is_trinary(self) -> bool: + """Return whether the model is trinary.""" + if self.ext.is_trinary != self.noext.is_trinary: + raise ValueError("The bilateral models must have the same trinary status.") + + if self.use_central and self.central.is_trinary != self.ext.is_trinary: + raise ValueError("The bilateral models must have the same trinary status.") + + return self.ext.is_trinary + + + @property + def mixing_param(self) -> float | None: + """Return the mixing parameter.""" + if hasattr(self, "_mixing_param"): + return self._mixing_param + + return None + + @mixing_param.setter + def mixing_param(self, value: float) -> None: + """Set the mixing parameter.""" + if value is not None and not 0. <= value <= 1.: + raise ValueError("The mixing parameter must be in the range [0, 1].") + + self._mixing_param = value + + @property + def use_mixing(self) -> bool: + """Return whether the model uses a mixing parameter.""" + return hasattr(self, "_mixing_param") + + @property + def use_central(self) -> bool: + """Return whether the model uses a central model.""" + return hasattr(self, "central") + + + def get_spread_params( + self, + as_dict: bool = True, + as_flat: bool = True, + ) -> dict[str, float] | Iterable[float]: + """Return the spread parameters of the model. + + TODO: enrich docstring """ + params = {} + params["ipsi"] = self.ext.ipsi.get_tumor_spread_params(as_flat=as_flat) if self.use_mixing: - return {'ipsi': self.noext.ipsi.get_params(as_dict=True), - 'no extension contra':self.noext.contra.get_params(as_dict=True), - 'mixing':self.alpha_mix} + params["contra"] = self.noext.contra.get_tumor_spread_params(as_flat=as_flat) + params["mixing"] = self.mixing_param + else: + params["noext"] = { + "contra": self.noext.contra.get_tumor_spread_params(as_flat=as_flat) + } + params["ext"] = { + "contra": self.ext.contra.get_tumor_spread_params(as_flat=as_flat) + } + + if self.is_symmetric["lnl_spread"]: + params.update(self.ext.ipsi.get_lnl_spread_params(as_flat=as_flat)) else: - return { - 'ipsi':self.ext.ipsi.get_params(as_dict=True), - 'extension contra':self.ext.contra.get_params(as_dict=True), - 'no extension contra':self.noext.contra.get_params(as_dict=True)} + if "contra" not in params: + params["contra"] = {} + params["ipsi"].update(self.ext.ipsi.get_lnl_spread_params(as_flat=as_flat)) + params["contra"].update(self.noext.contra.get_lnl_spread_params(as_flat=as_flat)) + + if as_flat or not as_dict: + params = flatten(params) + return params if as_dict else params.values() - def assign_params( + + def get_params( self, - *new_params_args, - **new_params_kwargs, - ) -> tuple[Iterator[float, dict[str, float]]]: - """Assign new parameters to the model. + as_dict: bool = True, + as_flat: bool = True, + ) -> Iterable[float] | dict[str, float]: + """Return the parameters of the model. - This works almost exactly as the bilateral model's - :py:meth:`~lymph.models.Bilateral.assign_params` method. However the assignment of parametrs - with an array is disabled as it gets to messy with such a large parameter space. - For universal parameters, the prefix is not needed as they are directly - sent to the noextension ipsilateral side, which then triggers a sync callback. + TODO: enrich docstring """ + params = self.get_spread_params(as_flat=as_flat) + params.update(self.get_distribution_params(as_flat=as_flat)) + + if as_flat or not as_dict: + params = flatten(params) + + return params if as_dict else params.values() + + + def set_spread_params( + self, *args: float, **kwargs: float, + ) -> Iterable[float] | dict[str, float]: + """Set the spread parameters of the midline model. + + TODO: enrich docstring + """ + kwargs, global_kwargs = unflatten_and_split( + kwargs, expected_keys=["ipsi", "noext", "ext", "contra"], + ) + + # first, take care of ipsilateral tumor spread (same for all models) + ipsi_kwargs = global_kwargs.copy() + ipsi_kwargs.update(kwargs.get("ipsi", {})) + if self.use_central: + self.central.set_spread_params(*args, **ipsi_kwargs) + self.ext.ipsi.set_tumor_spread_params(*args, **ipsi_kwargs) + args = self.noext.ipsi.set_tumor_spread_params(*args, **ipsi_kwargs) + + # then, take care of contralateral tumor spread if self.use_mixing: - extension_kwargs = {} - no_extension_kwargs = {} - central_kwargs = {} - for key, value in new_params_kwargs.items(): - if 'mixing' in key: - self.alpha_mix = value - else: - no_extension_kwargs[key] = value - remaining_args, remainings_kwargs = self.noext.set_params(*new_params_args, **no_extension_kwargs) - for key in no_extension_kwargs.keys(): - if 'contra_primary' in key: - extension_kwargs[key] = self.alpha_mix * extension_kwargs[(key.replace("contra", "ipsi"))] + (1. - self.alpha_mix) * no_extension_kwargs[key] - else: - extension_kwargs[key] = no_extension_kwargs[key] - remaining_args, remainings_kwargs = self.ext.set_params(*remaining_args, **extension_kwargs) - # If the syncing of the edges works properly, this below can be deleted. - if self.central_enabled: - for key in no_extension_kwargs.keys(): - if 'contra' not in key: - central_kwargs[(key.replace("ipsi_", ""))] = no_extension_kwargs[key] - remaining_args, remainings_kwargs = self.central.set_params(*new_params_args, **central_kwargs) + contra_kwargs = global_kwargs.copy() + contra_kwargs.update(kwargs.get("contra", {})) + args = self.noext.contra.set_tumor_spread_params(*args, **contra_kwargs) + mixing_param, args = popfirst(args) + mixing_param = global_kwargs.get("mixing", mixing_param) or self.mixing_param + self.mixing_param = global_kwargs.get("mixing", mixing_param) + + ext_contra_kwargs = {} + for (key, ipsi_param), noext_contra_param in zip( + self.ext.ipsi.get_tumor_spread_params().items(), + self.noext.contra.get_tumor_spread_params().values(), + ): + ext_contra_kwargs[key] = ( + self.mixing_param * ipsi_param + + (1. - self.mixing_param) * noext_contra_param + ) + self.ext.contra.set_tumor_spread_params(**ext_contra_kwargs) + else: - ipsi_kwargs, noext_contra_kwargs, ext_contra_kwargs, general_kwargs, central_kwargs = {}, {}, {}, {}, {} - - for key, value in new_params_kwargs.items(): - if "ipsi_" in key: - ipsi_kwargs[key.replace("ipsi_", "")] = value - elif "noext" in key: - noext_contra_kwargs[key.replace("contra_noext_", "")] = value - elif 'ext' in key: - ext_contra_kwargs[key.replace("contra_ext_", "")] = value - else: - if 'contra' in key: - warnings.warn( - "'contra' keys were assigned without 'ext' or 'noext' defined. For a non-mixture model" - "For a non mixture model these values have no meaning.") - else: - general_kwargs[key] = value - - remaining_args, remainings_kwargs = self.ext.ipsi.set_params( - *new_params_args, **ipsi_kwargs, **general_kwargs - ) - remaining_args, remainings_kwargs = self.noext.contra.set_params( - *remaining_args, **noext_contra_kwargs, **remainings_kwargs, **general_kwargs - ) - remaining_args, remainings_kwargs = self.ext.contra.set_params( - *remaining_args, **ext_contra_kwargs, **remainings_kwargs, **general_kwargs - ) - if self.central_enabled: - for key in ipsi_kwargs.keys(): - central_kwargs[(key.replace("ipsi_", ""))] = ipsi_kwargs[key] - print(ipsi_kwargs) - print(general_kwargs) - remaining_args, remainings_kwargs = self.central.set_params(*new_params_args, **central_kwargs, **general_kwargs) + noext_contra_kwargs = global_kwargs.copy() + noext_contra_kwargs.update(kwargs.get("noext", {}).get("contra", {})) + args = self.noext.contra.set_tumor_spread_params(*args, **noext_contra_kwargs) + + ext_contra_kwargs = global_kwargs.copy() + ext_contra_kwargs.update(kwargs.get("ext", {}).get("contra", {})) + args = self.ext.contra.set_tumor_spread_params(*args, **ext_contra_kwargs) + + # finally, take care of LNL spread + if self.is_symmetric["lnl_spread"]: + if self.use_central: + self.central.ipsi.set_lnl_spread_params(*args, **global_kwargs) + self.central.contra.set_lnl_spread_params(*args, **global_kwargs) + self.ext.ipsi.set_lnl_spread_params(*args, **global_kwargs) + self.ext.contra.set_lnl_spread_params(*args, **global_kwargs) + self.noext.ipsi.set_lnl_spread_params(*args, **global_kwargs) + args = self.noext.contra.set_lnl_spread_params(*args, **global_kwargs) - return remaining_args, remainings_kwargs + else: + if self.use_central: + self.central.ipsi.set_lnl_spread_params(*args, **ipsi_kwargs) + self.ext.ipsi.set_lnl_spread_params(*args, **ipsi_kwargs) + args = self.noext.ipsi.set_lnl_spread_params(*args, **ipsi_kwargs) + contra_kwargs = global_kwargs.copy() + contra_kwargs.update(kwargs.get("contra", {})) + if self.use_central: + self.central.contra.set_lnl_spread_params(*args, **contra_kwargs) + self.ext.contra.set_lnl_spread_params(*args, **contra_kwargs) + args = self.noext.contra.set_lnl_spread_params(*args, **contra_kwargs) - @property - def modalities(self) -> modalities.ModalitiesUserDict: - """Return the set diagnostic modalities of the model. + return args - See Also: - :py:attr:`lymph.models.Unilateral.modalities` - The corresponding unilateral attribute. - :py:class:`~lymph.descriptors.ModalitiesUserDict` - The implementation of the descriptor class. + + def set_params( + self, *args: float, **kwargs: float, + ) -> Iterable[float] | dict[str, float]: + """Assign new parameters to the model. + + TODO: enrich docstring """ - if not self.modalities_symmetric: - raise AttributeError( - "The modalities are not symmetric. Please access them via the " - "`ipsi` or `contra` attributes." - ) - return self.ext.modalities - - @modalities.setter - def modalities(self, new_modalities) -> None: - """Set the diagnostic modalities of the model.""" - if not self.modalities_symmetric: - raise AttributeError( - "The modalities are not symmetric. Please set them via the " - "`ipsi` or `contra` attributes." - ) - self.ext.replace_all_modalities(new_modalities) + args = self.set_spread_params(*args, **kwargs) + return self.set_distribution_params(*args, **kwargs) def load_patient_data( @@ -360,7 +324,7 @@ def load_patient_data( This amounts to calling the :py:meth:`~lymph.models.Unilateral.load_patient_data` method on both models. """ - if self.central_enabled: + if self.use_central: ext_data = patient_data.loc[(patient_data[("tumor", "1", "extension")] == True) & (patient_data[("tumor", "1", "central")] != True)] noext_data = patient_data.loc[~patient_data[("tumor", "1", "extension")]] central = patient_data[patient_data[("tumor", "1", "central")].notna() & patient_data[("tumor", "1", "central")]] @@ -374,64 +338,52 @@ def load_patient_data( def likelihood( self, - data: OPTIONAL[pd.DataFrame] = None, - given_param_kwargs: dict[str, float] | None = None, + given_params: Iterable[float] | dict[str, float] | None = None, log: bool = True, - mode: str = 'HMM' + mode: str = "HMM", + for_t_stage: str | None = None, ) -> float: - """Compute log-likelihood of (already stored) data, given the spread - probabilities and either a discrete diagnose time or a distribution to - use for marginalization over diagnose times. - - Args: - data: Table with rows of patients and columns of per-LNL involvment. See - :meth:`load_data` for more details on how this should look like. + """Compute the (log-)likelihood of the stored data given the model (and params). - given_params: The likelihood is a function of these parameters. They mainly - consist of the :attr:`spread_probs` of the model. Any excess parameters - will be used to update the parametrized distributions used for - marginalizing over the diagnose times (see :attr:`diag_time_dists`). + See the documentation of :py:meth:`lymph.types.Model.likelihood` for more + information on how to use the ``given_params`` parameter. - log: When ``True``, the log-likelihood is returned. + Returns the log-likelihood if ``log`` is set to ``True``. The ``mode`` parameter + determines whether the likelihood is computed for the hidden Markov model + (``"HMM"``) or the Bayesian network (``"BN"``). - Returns: - The log-likelihood :math:`\\log{p(D \\mid \\theta)}` where :math:`D` - is the data and :math:`\\theta` is the tuple of spread probabilities - and diagnose times or distributions over diagnose times. + Note: + The computation is much faster if no parameters are given, since then the + transition matrix does not need to be recomputed. See Also: - :attr:`spread_probs`: Property for getting and setting the spread - probabilities, of which a lymphatic network has as many as it has - :class:`Edge` instances (in case no symmetries apply). - - :meth:`Unilateral.likelihood`: The log-likelihood function of - the unilateral system. - - :meth:`Bilateral.likelihood`: The (log-)likelihood function of the - bilateral system. + :py:meth:`lymph.models.Unilateral.likelihood` + The corresponding unilateral function. """ - if data is not None: - self.patient_data = data - - if given_param_kwargs is None: - given_param_kwargs = {} - try: - self.assign_params(**given_param_kwargs) + # all functions and methods called here should raise a ValueError if the + # given parameters are invalid... + if given_params is None: + pass + elif isinstance(given_params, dict): + self.set_params(**given_params) + else: + self.set_params(*given_params) except ValueError: return -np.inf if log else 0. + kwargs = {"log": log, "mode": mode, "for_t_stage": for_t_stage} llh = 0. if log else 1. if log: - llh += self.ext.likelihood(log = log, mode = mode) - llh += self.noext.likelihood(log = log, mode = mode) - if self.central_enabled: - llh += self.central.likelihood(log = log, mode = mode) + llh += self.ext.likelihood(**kwargs) + llh += self.noext.likelihood(**kwargs) + if self.use_central: + llh += self.central.likelihood(**kwargs) else: - llh *= self.ext.likelihood(log = log, mode = mode) - llh *= self.noext.likelihood(log = log, mode = mode) - if self.central_enabled: - llh *= self.central.likelihood(log = log, mode = mode) + llh *= self.ext.likelihood(**kwargs) + llh *= self.noext.likelihood(**kwargs) + if self.use_central: + llh *= self.central.likelihood(**kwargs) return llh @@ -461,9 +413,9 @@ def risk( respective :class:`Bilateral` instance gets called. """ if given_param_args is not None: - self.assign_params(*given_param_args) + self.set_params(*given_param_args) if given_param_kwargs is not None: - self.assign_params(**given_param_kwargs) + self.set_params(**given_param_kwargs) if central: return self.central.risk(given_diagnoses = given_diagnoses,t_stage = t_stage, involvement = involvement) if midline_extension: diff --git a/lymph/types.py b/lymph/types.py index f871a78..0ef44fe 100644 --- a/lymph/types.py +++ b/lymph/types.py @@ -2,7 +2,7 @@ Type aliases and protocols used in the lymph package. """ from abc import ABC, abstractmethod -from typing import Iterable, Protocol, TypeVar +from typing import Iterable, Literal, Protocol, TypeVar import pandas as pd from pandas._libs.missing import NAType @@ -54,6 +54,13 @@ def get_params( flat dictionary. """ + def get_num_dims(self: M, mode: Literal["HMM", "BN"] = "HMM") -> int: + """Return the number of dimensions of the parameter space.""" + num = len(self.get_params()) + if mode == "BN": + num -= len(self.get_distribution_params()) + return num + @abstractmethod def set_params(self: M, *args: float, **kwargs: float) -> tuple[float]: """Set the parameters of the model. From fb47b3347f5bc5c772438d60abc539c094ec0718 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Thu, 22 Feb 2024 16:52:42 +0100 Subject: [PATCH 48/75] test(mid): add first check of `set_params()` meth --- tests/binary_midline_test.py | 63 ++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 tests/binary_midline_test.py diff --git a/tests/binary_midline_test.py b/tests/binary_midline_test.py new file mode 100644 index 0000000..bbe6076 --- /dev/null +++ b/tests/binary_midline_test.py @@ -0,0 +1,63 @@ +""" +Test the midline model for the binary case. +""" +import unittest +from typing import Literal + +import numpy as np + +from lymph import models + +from . import fixtures + + +class MidlineSetParamsTestCase(unittest.TestCase): + """Check that the complex parameter assignment works correctly.""" + + def setUp( + self, + seed: int = 42, + graph_size: Literal["small", "medium", "large"] = "small", + use_mixing: bool = True, + use_central: bool = True, + is_symmetric: dict[str, bool] | None = None, + ) -> None: + super().setUp() + self.rng = np.random.default_rng(seed) + graph_dict = fixtures.get_graph(graph_size) + if is_symmetric is None: + is_symmetric = {"tumor_spread": False, "lnl_spread": True} + + self.model = models.Midline( + graph_dict=graph_dict, + is_symmetric=is_symmetric, + use_mixing=use_mixing, + use_central=use_central, + ) + + + def test_set_spread_params(self) -> None: + """Check that the complex parameter assignment works correctly.""" + params_to_set = {k: self.rng.uniform() for k in self.model.get_params().keys()} + self.model.set_params(**params_to_set) + + self.assertEqual( + self.model.central.ipsi.get_tumor_spread_params(), + self.model.central.contra.get_tumor_spread_params(), + ) + self.assertEqual( + self.model.central.ipsi.get_lnl_spread_params(), + self.model.central.contra.get_lnl_spread_params(), + ) + self.assertEqual( + self.model.central.contra.get_lnl_spread_params(), + self.model.ext.ipsi.get_lnl_spread_params(), + ) + self.assertEqual( + self.model.ext.ipsi.get_lnl_spread_params(), + self.model.noext.ipsi.get_lnl_spread_params(), + ) + self.assertEqual( + self.model.ext.ipsi.get_tumor_spread_params(), + self.model.noext.ipsi.get_tumor_spread_params(), + ) From 6a2f9d1b709ce3766921f8211935d088a8c456a3 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Thu, 22 Feb 2024 17:08:21 +0100 Subject: [PATCH 49/75] feat(mid): implement the `load_patient_data` meth --- lymph/models/midline.py | 37 +++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/lymph/models/midline.py b/lymph/models/midline.py index e03f80b..f6ac717 100644 --- a/lymph/models/midline.py +++ b/lymph/models/midline.py @@ -20,6 +20,10 @@ logger = logging.getLogger(__name__) +EXT_COL = ("tumor", "1", "extension") +CENTRAL_COL = ("tumor", "1", "central") + + class Midline( diagnose_times.Composite, @@ -321,19 +325,32 @@ def load_patient_data( ) -> None: """Load patient data into the model. - This amounts to calling the :py:meth:`~lymph.models.Unilateral.load_patient_data` - method on both models. + This amounts to sorting the patients into three bins: + 1. Patients whose tumor is clearly laterlaized, meaning the column + ``("tumor", "1", "extension")`` reports ``False``. These get assigned to + the :py:attr:`noext` attribute. + 2. Those with a central tumor, indicated by ``True`` in the column + ``("tumor", "1", "central")``. If the :py:attr:`use_central` attribute is + set to ``True``, these patients are assigned to the :py:attr:`central` + model. Otherwise, they are assigned to the :py:attr:`ext` model. + 3. The rest, which amounts to patients whose tumor extends over the mid-sagittal + line but is not central, i.e., symmetric w.r.t to the mid-sagittal line. + These are assigned to the :py:attr:`ext` model. + + The split data is sent to the :py:meth:`lymph.models.Bilateral.load_patient_data` + method of the respective models. """ + # pylint: disable=singleton-comparison + is_lateralized = patient_data[EXT_COL] == False + self.noext.load_patient_data(patient_data[is_lateralized], mapping) + if self.use_central: - ext_data = patient_data.loc[(patient_data[("tumor", "1", "extension")] == True) & (patient_data[("tumor", "1", "central")] != True)] - noext_data = patient_data.loc[~patient_data[("tumor", "1", "extension")]] - central = patient_data[patient_data[("tumor", "1", "central")].notna() & patient_data[("tumor", "1", "central")]] - self.central.load_patient_data(central, mapping) + is_central = patient_data[CENTRAL_COL] == True + self.central.load_patient_data(patient_data[is_central], mapping) + self.ext.load_patient_data(patient_data[~is_lateralized & ~is_central], mapping) + else: - ext_data = patient_data.loc[(patient_data[("tumor", "1", "extension")] == True)] - noext_data = patient_data.loc[~patient_data[("tumor", "1", "extension")]] - self.ext.load_patient_data(ext_data, mapping) - self.noext.load_patient_data(noext_data, mapping) + self.ext.load_patient_data(patient_data[~is_lateralized], mapping) def likelihood( From 9b7da334278a8b332618f4a1ff6e93f8952b0d26 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Thu, 22 Feb 2024 17:15:26 +0100 Subject: [PATCH 50/75] feat(mid): finish midline (feature complete) --- lymph/models/midline.py | 82 +++++++++++++---------------------------- 1 file changed, 26 insertions(+), 56 deletions(-) diff --git a/lymph/models/midline.py b/lymph/models/midline.py index f6ac717..bb07269 100644 --- a/lymph/models/midline.py +++ b/lymph/models/midline.py @@ -408,69 +408,39 @@ def likelihood( def risk( self, involvement: PatternType | None = None, - given_param_args: Iterable[float] | None = None, - given_param_kwargs: dict[str, float] | None = None, + given_params: Iterable[float] | dict[str, float] | None = None, given_diagnoses: dict[str, DiagnoseType] | None = None, t_stage: str = "early", midline_extension: bool = False, central: bool = False, mode: str = "HMM", ) -> float: - """Compute the risk of nodal involvement given a specific diagnose. - - Args: - spread_probs: Set ot new spread parameters. This also contains the - mixing parameter alpha in the last position. - midline_extension: Whether or not the patient's tumor extends over - the mid-sagittal line. + """Compute the risk of nodal involvement ``given_diagnoses``. - See Also: - :meth:`Bilateral.risk`: Depending on whether or not the patient's - tumor does extend over the midline, the risk function of the - respective :class:`Bilateral` instance gets called. + TODO: finish docstring """ - if given_param_args is not None: - self.set_params(*given_param_args) - if given_param_kwargs is not None: - self.set_params(**given_param_kwargs) + if isinstance(given_params, dict): + self.set_params(**given_params) + else: + self.set_params(*given_params) + if central: - return self.central.risk(given_diagnoses = given_diagnoses,t_stage = t_stage, involvement = involvement) + return self.central.risk( + given_diagnoses=given_diagnoses, + t_stage=t_stage, + involvement=involvement, + mode=mode, + ) if midline_extension: - return self.ext.risk(given_diagnoses = given_diagnoses,t_stage = t_stage, involvement = involvement) - return self.noext.risk(given_diagnoses = given_diagnoses,t_stage = t_stage, involvement = involvement) - - - - # def generate_dataset( - # self, - # num_patients: int, - # stage_dist: dict[str, float], - # ) -> pd.DataFrame: - # """Generate/sample a pandas :class:`DataFrame` from the defined network. - - # Args: - # num_patients: Number of patients to generate. - # stage_dist: Probability to find a patient in a certain T-stage. - # """ - # # TODO: check if this still works - # drawn_t_stages, drawn_diag_times = self.diag_time_dists.draw( - # dist=stage_dist, size=num_patients - # ) - - # drawn_obs_ipsi = self.ipsi._draw_patient_diagnoses(drawn_diag_times) - # drawn_obs_contra = self.contra._draw_patient_diagnoses(drawn_diag_times) - # drawn_obs = np.concatenate([drawn_obs_ipsi, drawn_obs_contra], axis=1) - - # # construct MultiIndex for dataset from stored modalities - # sides = ["ipsi", "contra"] - # modalities = list(self.modalities.keys()) - # lnl_names = [lnl.name for lnl in self.ipsi.graph._lnls] - # multi_cols = pd.MultiIndex.from_product([sides, modalities, lnl_names]) - - # # create DataFrame - # dataset = pd.DataFrame(drawn_obs, columns=multi_cols) - # dataset = dataset.reorder_levels(order=[1, 0, 2], axis="columns") - # dataset = dataset.sort_index(axis="columns", level=0) - # dataset[('info', 'tumor', 't_stage')] = drawn_t_stages - - # return dataset + return self.ext.risk( + given_diagnoses=given_diagnoses, + t_stage=t_stage, + involvement=involvement, + mode=mode, + ) + return self.noext.risk( + given_diagnoses=given_diagnoses, + t_stage=t_stage, + involvement=involvement, + mode=mode, + ) From 5205a3f0902e0c3226b88d25a989079de85be8b3 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Thu, 22 Feb 2024 17:19:54 +0100 Subject: [PATCH 51/75] change!: adapt risk to likelihood call signature --- lymph/models/bilateral.py | 20 +++++++------------- lymph/models/unilateral.py | 19 +++++++------------ 2 files changed, 14 insertions(+), 25 deletions(-) diff --git a/lymph/models/bilateral.py b/lymph/models/bilateral.py index 0c02b90..ea5a2a8 100644 --- a/lymph/models/bilateral.py +++ b/lymph/models/bilateral.py @@ -459,8 +459,7 @@ def likelihood( def comp_posterior_joint_state_dist( self, - given_param_args: Iterable[float] | None = None, - given_param_kwargs: dict[str, float] | None = None, + given_params: Iterable[float] | dict[str, float] | None = None, given_diagnoses: dict[str, types.DiagnoseType] | None = None, t_stage: str | int = "early", mode: str = "HMM", @@ -480,13 +479,10 @@ def comp_posterior_joint_state_dist( See Also: :py:meth:`lymph.models.Unilateral.comp_posterior_state_dist` """ - if given_param_args is None: - given_param_args = [] - - if given_param_kwargs is None: - given_param_kwargs = {} - - self.set_params(*given_param_args, **given_param_kwargs) + if isinstance(given_params, dict): + self.set_params(**given_params) + else: + self.set_params(*given_params) if given_diagnoses is None: given_diagnoses = {} @@ -517,8 +513,7 @@ def comp_posterior_joint_state_dist( def risk( self, involvement: types.PatternType | None = None, - given_param_args: Iterable[float] | None = None, - given_param_kwargs: dict[str, float] | None = None, + given_params: Iterable[float] | dict[str, float] | None = None, given_diagnoses: dict[str, types.DiagnoseType] | None = None, t_stage: str = "early", mode: str = "HMM", @@ -544,8 +539,7 @@ def risk( """ # TODO: test this method posterior_state_probs = self.comp_posterior_joint_state_dist( - given_param_args=given_param_args, - given_param_kwargs=given_param_kwargs, + given_params=given_params, given_diagnoses=given_diagnoses, t_stage=t_stage, mode=mode, diff --git a/lymph/models/unilateral.py b/lymph/models/unilateral.py index f0ab965..b8f08f2 100644 --- a/lymph/models/unilateral.py +++ b/lymph/models/unilateral.py @@ -697,8 +697,7 @@ def comp_diagnose_encoding( def comp_posterior_state_dist( self, - given_param_args: Iterable[float] | None = None, - given_param_kwargs: dict[str, float] | None = None, + given_params: Iterable[float] | dict[str, float] | None = None, given_diagnoses: types.DiagnoseType | None = None, t_stage: str | int = "early", mode: str = "HMM", @@ -724,17 +723,14 @@ def comp_posterior_state_dist( The computation is much faster if no parameters are given, since then the transition matrix does not need to be recomputed. """ - if given_param_args is None: - given_param_args = [] - - if given_param_kwargs is None: - given_param_kwargs = {} - # in contrast to when computing the likelihood, we do want to raise an error # here if the parameters are invalid, since we want to know if the user # provided invalid parameters. In the likelihood, we rather return a zero # likelihood to tell the inference algorithm that the parameters are invalid. - self.set_params(*given_param_args, **given_param_kwargs) + if isinstance(given_params, dict): + self.set_params(**given_params) + else: + self.set_params(*given_params) if given_diagnoses is None: given_diagnoses = {} @@ -757,8 +753,7 @@ def comp_posterior_state_dist( def risk( self, involvement: types.PatternType | None = None, - given_param_args: Iterable[float] | None = None, - given_param_kwargs: dict[str, float] | None = None, + given_params: Iterable[float] | dict[str, float] | None = None, given_diagnoses: dict[str, types.PatternType] | None = None, t_stage: str = "early", mode: str = "HMM", @@ -782,7 +777,7 @@ def risk( :py:meth:`comp_posterior_state_dist` """ posterior_state_dist = self.comp_posterior_state_dist( - given_param_args, given_param_kwargs, given_diagnoses, t_stage, mode, + given_params, given_diagnoses, t_stage, mode, ) if involvement is None: From 937d47770cc9c006ee030bf9bb84b7b9fca1a78b Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Thu, 22 Feb 2024 17:24:46 +0100 Subject: [PATCH 52/75] change(type): add risk to abstract methods --- lymph/types.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/lymph/types.py b/lymph/types.py index 0ef44fe..fd94b8e 100644 --- a/lymph/types.py +++ b/lymph/types.py @@ -4,6 +4,7 @@ from abc import ABC, abstractmethod from typing import Iterable, Literal, Protocol, TypeVar +import numpy as np import pandas as pd from pandas._libs.missing import NAType @@ -56,6 +57,7 @@ def get_params( def get_num_dims(self: M, mode: Literal["HMM", "BN"] = "HMM") -> int: """Return the number of dimensions of the parameter space.""" + # pylint: disable=no-member num = len(self.get_params()) if mode == "BN": num -= len(self.get_distribution_params()) @@ -92,3 +94,12 @@ def likelihood( otherwise. The parameters may be passed as positional or keyword arguments. They are then passed to the :py:meth:`set_params` method first. """ + + @abstractmethod + def risk( + self, + involvement: PatternType | None = None, + given_params: Iterable[float] | dict[str, float] | None = None, + given_diagnoses: dict[str, PatternType] | None = None, + ) -> float | np.ndarray: + """Return the risk of ``involvement``, given the parameters and diagnoses.""" From 05d20219d405003f4329cf40c0af35e50171a571 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Fri, 23 Feb 2024 14:45:15 +0100 Subject: [PATCH 53/75] feat: complete set/get methods on model classes The `Unilateral, `Bilateral`, and `Midline` model now all have the six methods `set_tumor_spread_params`, `set_lnl_spread_params`, `set_spread_params`, `set_params`, `get_tumor_spread_params`, `get_lnl_spread_params`, `get_spread_params`, and `get_params`. --- lymph/graph.py | 4 +- lymph/models/bilateral.py | 94 +++++++++++++++++++++++++----- lymph/models/midline.py | 96 +++++++++++++++++++++++++++---- lymph/models/unilateral.py | 78 ++++++++++++++++--------- lymph/types.py | 8 ++- tests/bayesian_unilateral_test.py | 2 +- tests/binary_bilateral_test.py | 4 +- tests/binary_midline_test.py | 7 +++ tests/binary_unilateral_test.py | 6 +- tests/integration_test.py | 2 +- tests/trinary_midline_test.py | 70 ++++++++++++++++++++++ tests/trinary_unilateral_test.py | 4 +- 12 files changed, 310 insertions(+), 65 deletions(-) create mode 100644 tests/trinary_midline_test.py diff --git a/lymph/graph.py b/lymph/graph.py index dbdd9cb..bc74f6a 100644 --- a/lymph/graph.py +++ b/lymph/graph.py @@ -622,9 +622,9 @@ def lnl_edges(self) -> dict[str, Edge]: """List of all LNL :py:class:`~Edge` instances in the graph. This contains all edges who's parents and children are instances of - :py:class:`~LymphNodeLevel` and that are not growth edges. + :py:class:`~LymphNodeLevel`, including growth edges (if the graph is trinary). """ - return {n: e for n, e in self.edges.items() if not (e.is_tumor_spread or e.is_growth)} + return {n: e for n, e in self.edges.items() if not e.is_tumor_spread} @property def growth_edges(self) -> dict[str, Edge]: diff --git a/lymph/models/bilateral.py b/lymph/models/bilateral.py index ea5a2a8..a6563fd 100644 --- a/lymph/models/bilateral.py +++ b/lymph/models/bilateral.py @@ -151,6 +151,58 @@ def is_binary(self) -> bool: return self.ipsi.is_binary + def get_tumor_spread_params( + self, + as_dict: bool = True, + as_flat: bool = True, + ) -> Iterable[float] | dict[str, float]: + """Return the parameters of the model's spread from tumor to LNLs.""" + params = { + "ipsi": self.ipsi.get_tumor_spread_params(as_flat=as_flat), + "contra": self.contra.get_tumor_spread_params(as_flat=as_flat), + } + + if self.is_symmetric["tumor_spread"]: + if params["ipsi"] != params["contra"]: + warnings.warn( + "The tumor spread parameters are not symmetric. " + "Returning the ipsilateral parameters." + ) + + params = params["ipsi"] + + if as_flat or not as_dict: + params = flatten(params) + + return params if as_dict else params.values() + + + def get_lnl_spread_params( + self, + as_dict: bool = True, + as_flat: bool = True, + ) -> Iterable[float] | dict[str, float]: + """Return the parameters of the model's spread from LNLs to tumor.""" + params = { + "ipsi": self.ipsi.get_lnl_spread_params(as_flat=as_flat), + "contra": self.contra.get_lnl_spread_params(as_flat=as_flat), + } + + if self.is_symmetric["lnl_spread"]: + if params["ipsi"] != params["contra"]: + warnings.warn( + "The LNL spread parameters are not symmetric. " + "Returning the ipsilateral parameters." + ) + + params = params["ipsi"] + + if as_flat or not as_dict: + params = flatten(params) + + return params if as_dict else params.values() + + def get_spread_params( self, as_dict: bool = True, @@ -171,19 +223,13 @@ def get_spread_params( This is consistent with how the :py:meth:`~lymph.models.Bilteral.set_params` method expects the keyword arguments in case of the symmetry configurations. """ - params = {} - - if self.is_symmetric["tumor_spread"]: - params.update(self.ipsi.get_tumor_spread_params(as_flat=as_flat)) - else: - params["ipsi"] = self.ipsi.get_tumor_spread_params(as_flat=as_flat) - params["contra"] = self.contra.get_tumor_spread_params(as_flat=as_flat) + params = self.get_tumor_spread_params(as_flat=False) - if self.is_symmetric["lnl_spread"]: - params.update(self.ipsi.get_lnl_spread_params(as_flat=as_flat)) + if not self.is_symmetric["tumor_spread"] and not self.is_symmetric["lnl_spread"]: + params["ipsi"].update(self.get_lnl_spread_params(as_flat=False)["ipsi"]) + params["contra"].update(self.get_lnl_spread_params(as_flat=False)["contra"]) else: - params["ipsi"].update(self.ipsi.get_lnl_spread_params(as_flat=as_flat)) - params["contra"].update(self.contra.get_lnl_spread_params(as_flat=as_flat)) + params.update(self.get_lnl_spread_params(as_flat=as_flat)) if as_flat or not as_dict: params = flatten(params) @@ -215,8 +261,8 @@ def get_params( return params if as_dict else params.values() - def set_spread_params(self, *args: float, **kwargs: float) -> tuple[float]: - """Set the parameters of the model's spread edges.""" + def set_tumor_spread_params(self, *args: float, **kwargs: float) -> tuple[float]: + """Set the parameters of the model's spread from tumor to LNLs.""" kwargs, global_kwargs = unflatten_and_split(kwargs, expected_keys=["ipsi", "contra"]) ipsi_kwargs = global_kwargs.copy() @@ -233,6 +279,18 @@ def set_spread_params(self, *args: float, **kwargs: float) -> tuple[float]: else: args = self.contra.set_tumor_spread_params(*args, **contra_kwargs) + return args + + + def set_lnl_spread_params(self, *args: float, **kwargs: float) -> tuple[float]: + """Set the parameters of the model's spread from LNLs to tumor.""" + kwargs, global_kwargs = unflatten_and_split(kwargs, expected_keys=["ipsi", "contra"]) + + ipsi_kwargs = global_kwargs.copy() + ipsi_kwargs.update(kwargs.get("ipsi", {})) + contra_kwargs = global_kwargs.copy() + contra_kwargs.update(kwargs.get("contra", {})) + args = self.ipsi.set_lnl_spread_params(*args, **ipsi_kwargs) if self.is_symmetric["lnl_spread"]: synchronize_params( @@ -245,6 +303,12 @@ def set_spread_params(self, *args: float, **kwargs: float) -> tuple[float]: return args + def set_spread_params(self, *args: float, **kwargs: float) -> tuple[float]: + """Set the parameters of the model's spread edges.""" + args = self.set_tumor_spread_params(*args, **kwargs) + return self.set_lnl_spread_params(*args, **kwargs) + + def set_params(self, *args: float, **kwargs: float) -> tuple[float]: """Set new parameters to the model. @@ -520,8 +584,8 @@ def risk( ) -> float: """Compute risk of an ``involvement`` pattern, given parameters and diagnoses. - The parameters can be set via the ``given_param_args`` and - ``given_param_kwargs``, both of which are passed to the + The parameters can be set via the ``given_params`` and + ``given_params``, both of which are passed to the :py:meth:`~set_params` method. The ``given_diagnoses`` must be a dictionary mapping the side of the neck to a :py:class:`types.DiagnoseType`. diff --git a/lymph/models/midline.py b/lymph/models/midline.py index bb07269..24bdb3b 100644 --- a/lymph/models/midline.py +++ b/lymph/models/midline.py @@ -114,7 +114,7 @@ class will contain several instances of :py:class:`~lymph.models.Bilateral`, ) central_child = {} if use_central: - self.central = models.Bilateral( + self._central = models.Bilateral( graph_dict=graph_dict, unilateral_kwargs=unilateral_kwargs, is_symmetric={ @@ -139,6 +139,14 @@ class will contain several instances of :py:class:`~lymph.models.Bilateral`, ) + @classmethod + def trinary(cls, *args, **kwargs) -> Midline: + """Create a trinary model.""" + unilateral_kwargs = kwargs.pop("unilateral_kwargs", {}) + unilateral_kwargs["allowed_states"] = [0, 1, 2] + return cls(*args, unilateral_kwargs=unilateral_kwargs, **kwargs) + + @property def is_trinary(self) -> bool: """Return whether the model is trinary.""" @@ -175,18 +183,20 @@ def use_mixing(self) -> bool: @property def use_central(self) -> bool: """Return whether the model uses a central model.""" - return hasattr(self, "central") + return hasattr(self, "_central") + @property + def central(self) -> models.Bilateral: + """Return the central model.""" + return self._central - def get_spread_params( + + def get_tumor_spread_params( self, as_dict: bool = True, as_flat: bool = True, ) -> dict[str, float] | Iterable[float]: - """Return the spread parameters of the model. - - TODO: enrich docstring - """ + """Return the tumor spread parameters of the model.""" params = {} params["ipsi"] = self.ext.ipsi.get_tumor_spread_params(as_flat=as_flat) @@ -201,13 +211,59 @@ def get_spread_params( "contra": self.ext.contra.get_tumor_spread_params(as_flat=as_flat) } + if as_flat or not as_dict: + params = flatten(params) + + return params if as_dict else params.values() + + + def get_lnl_spread_params( + self, + as_dict: bool = True, + as_flat: bool = True, + ) -> dict[str, float] | Iterable[float]: + """Return the LNL spread parameters of the model.""" + ext_lnl_params = self.ext.get_lnl_spread_params(as_flat=False) + noext_lnl_params = self.noext.get_lnl_spread_params(as_flat=False) + + if ext_lnl_params != noext_lnl_params: + raise ValueError( + "LNL spread params not synched between ext and noext models. " + "Returning the ext params." + ) + + if self.use_central: + central_lnl_params = self.central.get_lnl_spread_params(as_flat=False) + if central_lnl_params != ext_lnl_params: + warnings.warn( + "LNL spread params not synched between central and ext models. " + "Returning the ext params." + ) + + if as_flat or not as_dict: + ext_lnl_params = flatten(ext_lnl_params) + + return ext_lnl_params if as_dict else ext_lnl_params.values() + + + def get_spread_params( + self, + as_dict: bool = True, + as_flat: bool = True, + ) -> dict[str, float] | Iterable[float]: + """Return the spread parameters of the model. + + TODO: enrich docstring + """ + params = self.get_tumor_spread_params(as_flat=False) + if self.is_symmetric["lnl_spread"]: - params.update(self.ext.ipsi.get_lnl_spread_params(as_flat=as_flat)) + params.update(self.ext.ipsi.get_lnl_spread_params(as_flat=False)) else: if "contra" not in params: params["contra"] = {} - params["ipsi"].update(self.ext.ipsi.get_lnl_spread_params(as_flat=as_flat)) - params["contra"].update(self.noext.contra.get_lnl_spread_params(as_flat=as_flat)) + params["ipsi"].update(self.ext.ipsi.get_lnl_spread_params(as_flat=False)) + params["contra"].update(self.noext.contra.get_lnl_spread_params(as_flat=False)) if as_flat or not as_dict: params = flatten(params) @@ -233,7 +289,7 @@ def get_params( return params if as_dict else params.values() - def set_spread_params( + def set_tumor_spread_params( self, *args: float, **kwargs: float, ) -> Iterable[float] | dict[str, float]: """Set the spread parameters of the midline model. @@ -281,7 +337,17 @@ def set_spread_params( ext_contra_kwargs.update(kwargs.get("ext", {}).get("contra", {})) args = self.ext.contra.set_tumor_spread_params(*args, **ext_contra_kwargs) - # finally, take care of LNL spread + return args + + + def set_lnl_spread_params(self, *args: float, **kwargs: float) -> Iterable[float]: + """Set the LNL spread parameters of the midline model.""" + kwargs, global_kwargs = unflatten_and_split( + kwargs, expected_keys=["ipsi", "noext", "ext", "contra"], + ) + ipsi_kwargs = global_kwargs.copy() + ipsi_kwargs.update(kwargs.get("ipsi", {})) + if self.is_symmetric["lnl_spread"]: if self.use_central: self.central.ipsi.set_lnl_spread_params(*args, **global_kwargs) @@ -307,6 +373,12 @@ def set_spread_params( return args + def set_spread_params(self, *args: float, **kwargs: float) -> Iterable[float]: + """Set the spread parameters of the midline model.""" + args = self.set_tumor_spread_params(*args, **kwargs) + return self.set_lnl_spread_params(*args, **kwargs) + + def set_params( self, *args: float, **kwargs: float, ) -> Iterable[float] | dict[str, float]: diff --git a/lymph/models/unilateral.py b/lymph/models/unilateral.py index b8f08f2..9a47aaa 100644 --- a/lymph/models/unilateral.py +++ b/lymph/models/unilateral.py @@ -160,6 +160,40 @@ def get_t_stages( ) + def get_tumor_spread_params( + self, + as_dict: bool = True, + as_flat: bool = True, + ) -> Iterable[float] | dict[str, float]: + """Get the parameters of the tumor spread edges.""" + return get_params_from(self.graph.tumor_edges, as_dict, as_flat) + + + def get_lnl_spread_params( + self, + as_dict: bool = True, + as_flat: bool = True, + ) -> Iterable[float] | dict[str, float]: + """Get the parameters of the LNL spread edges. + + In the trinary case, this includes the growth parameters as well as the + microscopic modification parameters. + """ + return get_params_from(self.graph.lnl_edges, as_dict, as_flat) + + + def get_spread_params( + self, + as_dict: bool = True, + as_flat: bool = True, + ) -> Iterable[float] | dict[str, float]: + """Get the parameters of the spread edges.""" + return { + **self.get_tumor_spread_params(as_dict, as_flat), + **self.get_lnl_spread_params(as_dict, as_flat), + } + + def get_params( self, as_dict: bool = True, @@ -171,7 +205,7 @@ def get_params( ``as_flat`` is ``True``, the dictionary is flattened, i.e., all nested dictionaries are merged into one, using :py:func:`~lymph.helper.flatten`. """ - params = self.graph.get_params(as_flat=as_flat) + params = self.get_spread_params(as_flat=as_flat) params.update(self.get_distribution_params(as_flat=as_flat)) if as_flat or not as_dict: @@ -180,21 +214,20 @@ def get_params( return params if as_dict else params.values() - def get_tumor_spread_params( - self, - as_dict: bool = True, - as_flat: bool = True, - ) -> Iterable[float] | dict[str, float]: - """Get the parameters of the tumor spread edges.""" - return get_params_from(self.graph.tumor_edges, as_dict, as_flat) + def set_tumor_spread_params(self, *args: float, **kwargs: float) -> tuple[float]: + """Assign new parameters to the tumor spread edges.""" + return set_params_for(self.graph.tumor_edges, *args, **kwargs) - def get_lnl_spread_params( - self, - as_dict: bool = True, - as_flat: bool = True, - ) -> Iterable[float] | dict[str, float]: - """Get the parameters of the LNL spread edges.""" - return get_params_from(self.graph.lnl_edges, as_dict, as_flat) + + def set_lnl_spread_params(self, *args: float, **kwargs: float) -> tuple[float]: + """Assign new parameters to the LNL spread edges.""" + return set_params_for(self.graph.lnl_edges, *args, **kwargs) + + + def set_spread_params(self, *args: float, **kwargs: float) -> tuple[float]: + """Assign new parameters to the spread edges.""" + args = self.set_tumor_spread_params(*args, **kwargs) + return self.set_lnl_spread_params(*args, **kwargs) def set_params(self, *args: float, **kwargs: float) -> tuple[float]: @@ -241,19 +274,10 @@ def set_params(self, *args: float, **kwargs: float) -> tuple[float]: 'IItoIII_micro': 0.5, 'III_growth': 0.123} """ - args = self.graph.set_params(*args, **kwargs) + args = self.set_spread_params(*args, **kwargs) return self.set_distribution_params(*args, **kwargs) - def set_tumor_spread_params(self, *args: float, **kwargs: float) -> tuple[float]: - """Assign new parameters to the tumor spread edges.""" - return set_params_for(self.graph.tumor_edges, *args, **kwargs) - - def set_lnl_spread_params(self, *args: float, **kwargs: float) -> tuple[float]: - """Assign new parameters to the LNL spread edges.""" - return set_params_for(self.graph.lnl_edges, *args, **kwargs) - - def comp_transition_prob( self, newstate: list[int], @@ -727,7 +751,9 @@ def comp_posterior_state_dist( # here if the parameters are invalid, since we want to know if the user # provided invalid parameters. In the likelihood, we rather return a zero # likelihood to tell the inference algorithm that the parameters are invalid. - if isinstance(given_params, dict): + if given_params is None: + pass + elif isinstance(given_params, dict): self.set_params(**given_params) else: self.set_params(*given_params) diff --git a/lymph/types.py b/lymph/types.py index fd94b8e..1995ff3 100644 --- a/lymph/types.py +++ b/lymph/types.py @@ -56,7 +56,13 @@ def get_params( """ def get_num_dims(self: M, mode: Literal["HMM", "BN"] = "HMM") -> int: - """Return the number of dimensions of the parameter space.""" + """Return the number of dimensions of the parameter space. + + A hidden Markov model (``mode="HMM"``) typically has more parameters than a + Bayesian network (``mode="BN"``), because it we need parameters for the + distributions over diagnosis times. Your can read more about that in the + :py:mod:`lymph.diagnose_times` module. + """ # pylint: disable=no-member num = len(self.get_params()) if mode == "BN": diff --git a/tests/bayesian_unilateral_test.py b/tests/bayesian_unilateral_test.py index dc2dd13..7fe99fe 100644 --- a/tests/bayesian_unilateral_test.py +++ b/tests/bayesian_unilateral_test.py @@ -38,7 +38,7 @@ def test_likelihood_invalid_params_isinf(self): for name in random_params: random_params[name] += 1. likelihood = self.model.likelihood( - given_param_kwargs=random_params, + given_params=random_params, log=True, mode="BN", ) diff --git a/tests/binary_bilateral_test.py b/tests/binary_bilateral_test.py index 476ae31..f2820e0 100644 --- a/tests/binary_bilateral_test.py +++ b/tests/binary_bilateral_test.py @@ -311,7 +311,7 @@ def test_posterior_state_dist(self): random_diagnoses = self.create_random_diagnoses() posterior = self.model.comp_posterior_joint_state_dist( - given_param_kwargs=random_parameters, + given_params=random_parameters, given_diagnoses=random_diagnoses, ) self.assertEqual(posterior.shape, (num_states, num_states)) @@ -330,7 +330,7 @@ def test_risk(self): risk = self.model.risk( involvement=random_pattern, - given_param_kwargs=random_parameters, + given_params=random_parameters, given_diagnoses=random_diagnoses, t_stage=random_t_stage, ) diff --git a/tests/binary_midline_test.py b/tests/binary_midline_test.py index bbe6076..5a5aeda 100644 --- a/tests/binary_midline_test.py +++ b/tests/binary_midline_test.py @@ -36,6 +36,13 @@ def setUp( ) + def test_init(self) -> None: + """Check some basic attributes.""" + self.assertTrue(self.model.use_central) + self.assertTrue(self.model.use_mixing) + self.assertFalse(self.model.is_trinary) + + def test_set_spread_params(self) -> None: """Check that the complex parameter assignment works correctly.""" params_to_set = {k: self.rng.uniform() for k in self.model.get_params().keys()} diff --git a/tests/binary_unilateral_test.py b/tests/binary_unilateral_test.py index 9f8ae8b..4a3427a 100644 --- a/tests/binary_unilateral_test.py +++ b/tests/binary_unilateral_test.py @@ -285,7 +285,7 @@ def test_likelihood_invalid_params_isinf(self): for name in random_params: random_params[name] += 1. likelihood = self.model.likelihood( - given_param_kwargs=random_params, + given_params=random_params, log=True, mode="HMM", ) @@ -326,7 +326,7 @@ def test_comp_diagnose_encoding(self): def test_posterior_state_dist(self): """Make sure the posterior state dist is correctly computed.""" posterior_state_dist = self.model.comp_posterior_state_dist( - given_param_kwargs=self.create_random_params(), + given_params=self.create_random_params(), given_diagnoses=self.create_random_diagnoses(), t_stage=self.rng.choice(["early", "late"]), ) @@ -343,7 +343,7 @@ def test_risk(self): risk = self.model.risk( involvement=random_pattern, - given_param_kwargs=random_params, + given_params=random_params, given_diagnoses=random_diagnoses, t_stage=random_t_stage, ) diff --git a/tests/integration_test.py b/tests/integration_test.py index c181d36..46217b4 100644 --- a/tests/integration_test.py +++ b/tests/integration_test.py @@ -37,5 +37,5 @@ def setUp(self): def test_likelihood_value(self): """Check that the computed likelihood is correct.""" test_probabilities = [0.02, 0.24, 0.03, 0.2, 0.23, 0.18, 0.18, 0.5] - llh = self.model.likelihood(given_param_args=test_probabilities, log=True) + llh = self.model.likelihood(given_params=test_probabilities, log=True) self.assertAlmostEqual(llh, -586.8723971388224, places=10) diff --git a/tests/trinary_midline_test.py b/tests/trinary_midline_test.py new file mode 100644 index 0000000..4625d89 --- /dev/null +++ b/tests/trinary_midline_test.py @@ -0,0 +1,70 @@ +""" +Test the midline model for the binary case. +""" +import unittest +from typing import Literal + +import numpy as np + +from lymph import models + +from . import fixtures + + +class MidlineSetParamsTestCase(unittest.TestCase): + """Check that the complex parameter assignment works correctly.""" + + def setUp( + self, + seed: int = 42, + graph_size: Literal["small", "medium", "large"] = "small", + use_mixing: bool = True, + use_central: bool = True, + is_symmetric: dict[str, bool] | None = None, + ) -> None: + super().setUp() + self.rng = np.random.default_rng(seed) + graph_dict = fixtures.get_graph(graph_size) + if is_symmetric is None: + is_symmetric = {"tumor_spread": False, "lnl_spread": True} + + self.model = models.Midline.trinary( + graph_dict=graph_dict, + is_symmetric=is_symmetric, + use_mixing=use_mixing, + use_central=use_central, + ) + + + def test_init(self) -> None: + """Check some basic attributes.""" + self.assertTrue(self.model.use_central) + self.assertTrue(self.model.use_mixing) + self.assertTrue(self.model.is_trinary) + + + def test_set_spread_params(self) -> None: + """Check that the complex parameter assignment works correctly.""" + params_to_set = {k: self.rng.uniform() for k in self.model.get_params().keys()} + self.model.set_params(**params_to_set) + + self.assertEqual( + self.model.central.ipsi.get_tumor_spread_params(), + self.model.central.contra.get_tumor_spread_params(), + ) + self.assertEqual( + self.model.central.ipsi.get_lnl_spread_params(), + self.model.central.contra.get_lnl_spread_params(), + ) + self.assertEqual( + self.model.central.contra.get_lnl_spread_params(), + self.model.ext.ipsi.get_lnl_spread_params(), + ) + self.assertEqual( + self.model.ext.ipsi.get_lnl_spread_params(), + self.model.noext.ipsi.get_lnl_spread_params(), + ) + self.assertEqual( + self.model.ext.ipsi.get_tumor_spread_params(), + self.model.noext.ipsi.get_tumor_spread_params(), + ) diff --git a/tests/trinary_unilateral_test.py b/tests/trinary_unilateral_test.py index 61f9216..2f64b62 100644 --- a/tests/trinary_unilateral_test.py +++ b/tests/trinary_unilateral_test.py @@ -147,7 +147,7 @@ def test_likelihood_invalid_params_isinf(self): for name in random_params: random_params[name] += 1. likelihood = self.model.likelihood( - given_param_kwargs=random_params, + given_params=random_params, log=True, mode="HMM", ) @@ -179,7 +179,7 @@ def test_risk_is_probability(self): risk = self.model.risk( involvement=fixtures.create_random_pattern(lnls=list(self.model.graph.lnls.keys())), given_diagnoses=self.create_random_diagnoses(), - given_param_kwargs=self.create_random_params(), + given_params=self.create_random_params(), t_stage=self.rng.choice(["early", "late"]), ) self.assertGreaterEqual(risk, 0.) From 7207ced2f9992c12ac658d922c3f058c4c55e944 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Fri, 23 Feb 2024 14:55:02 +0100 Subject: [PATCH 54/75] remove: unused helper functions --- lymph/helper.py | 295 ------------------------------------------------ 1 file changed, 295 deletions(-) diff --git a/lymph/helper.py b/lymph/helper.py index 5d3aebf..2f25f3c 100644 --- a/lymph/helper.py +++ b/lymph/helper.py @@ -2,7 +2,6 @@ Module containing supporting classes and functions used accross the project. """ import logging -import warnings from collections import UserDict from functools import cached_property, lru_cache, wraps from typing import Any, Callable, Iterable, Sequence @@ -15,194 +14,6 @@ logger = logging.getLogger(__name__) -class DelegationSyncMixin: - """Mixin to delegate and synchronize an attribute of multiple instances. - - If a container class holds several (i.e. one ore more) instances of a class, this - mixin can be used with the container class to delegate and synchronize an attribute - from the instances. - - See the explanation in the :py:class:`DelegatorMixin.init_delegation_sync` method. - - This also works for attributes that are not hashable, such as lists or dictionaries. - See more details about that in the :py:class:`AccessPassthrough` class docs. - """ - def __init__(self) -> None: - self._attrs_to_objects = {} - - - def _init_delegation_sync(self, **attrs_to_objects: list[object]) -> None: - """Initialize the delegation and synchronization of attributes. - - Each keyword argument is the name of an attribute to synchronize. The value - should be a list of instances for which that attribute should be synchronized. - - Example: - - >>> class Eye: - ... def __init__(self, color="blue"): - ... self.eye_color = color - >>> class Person(DelegationSyncMixin): - ... def __init__(self): - ... super().__init__() - ... self.left = Eye("green") - ... self.right = Eye("brown") - ... self._init_delegation_sync(eye_color=[self.left, self.right]) - >>> person = Person() - >>> person.eye_color # pop element of sorted set and warn that not synced - 'green' - >>> person.eye_color = 'red' - >>> person.left.eye_color == person.right.eye_color == 'red' - True - """ - for name, objects in attrs_to_objects.items(): - types = {type(obj) for obj in objects} - if len(types) > 1: - raise ValueError( - f"Instances of delegated attribute {name} must be of same type" - ) - - self._attrs_to_objects = attrs_to_objects - - - def __getattr__(self, name): - objects = self._attrs_to_objects[name] - attr_list = [getattr(obj, name) for obj in objects] - - if len(attr_list) == 1: - return attr_list[0] - - return fuse(attr_list) - - - def __setattr__(self, name, value): - if name != "_attrs_to_objects" and name in self._attrs_to_objects: - for inst in self._attrs_to_objects[name]: - setattr(inst, name, value) - else: - super().__setattr__(name, value) - - -class AccessPassthrough: - """Allows delegated access to an attribute's methods. - - This class is constructed from a list of objects. It allows access to the - methods and items of the objects in the list. Setting items is also supported, but - only one level deep. - - It is used by the :py:class:`DelegationSyncMixin` to handle unhashable attributes. - For example, a delegated and synched attribute might be a dictionary. In this case, - a call like ``container.attribute["key"]`` would retrieve the right value, but - setting it via ``container.attribute["key"] = value`` would at best set the value - on one of the synched instances, but not on all of them. This class handles passing - the set value to all instances. - - Note: - This class is not meant to be used directly, but only by the - :py:class:`DelegationSyncMixin`. - - Below is an example that demonstrates how calls to ``__setitem__``, ``__setattr__``, - and ``__call__`` are passed through to both instances for which the delegation and - synchronization is invoked: - - >>> class Param: - ... def __init__(self, value): - ... self.value = value - >>> class Model: - ... def __init__(self, **kwargs): - ... self.params_dict = kwargs - ... self.param = Param(sum(kwargs.values())) - ... def set_value(self, key, value): - ... self.params_dict[key] = value - >>> class Mixture(DelegationSyncMixin): - ... def __init__(self): - ... super().__init__() - ... self.c1 = Model(a=1, b=2) - ... self.c2 = Model(a=3, b=4, c=5) - ... self._init_delegation_sync( - ... params_dict=[self.c1, self.c2], - ... param=[self.c1, self.c2], - ... set_value=[self.c1, self.c2], - ... ) - >>> mixture = Mixture() - >>> mixture.params_dict["b"] # get first element and warn that not synced - 4 - >>> mixture.params_dict["a"] = 99 - >>> mixture.c1.params_dict["a"] == mixture.c2.params_dict["a"] == 99 - True - >>> mixture.param.value - 12 - >>> mixture.param.value = 42 - >>> mixture.c1.param.value == mixture.c2.param.value == 42 - True - >>> mixture.set_value("c", 100) - >>> mixture.c1.params_dict["c"] == mixture.c2.params_dict["c"] == 100 - True - """ - def __init__(self, attr_objects: list[object]) -> None: - self._attr_objects = attr_objects - - - def __getattr__(self, name): - if len(self._attr_objects) == 1: - return getattr(self._attr_objects[0], name) - - return fuse([getattr(obj, name) for obj in self._attr_objects]) - - - def __getitem__(self, key): - if len(self._attr_objects) == 1: - return self._attr_objects[0][key] - - return fuse([obj[key] for obj in self._attr_objects]) - - - def __setattr__(self, name, value): - if name != "_attr_objects": - for obj in self._attr_objects: - setattr(obj, name, value) - else: - super().__setattr__(name, value) - - - def __setitem__(self, key, value): - for obj in self._attr_objects: - obj[key] = value - - - def __call__(self, *args: Any, **kwds: Any) -> Any: - return_values = [] - for obj in self._attr_objects: - return_values.append(obj(*args, **kwds)) - - return fuse(return_values) - - - def __len__(self) -> int: - if len(self._attr_objects) == 1: - return len(self._attr_objects[0]) - - return fuse([len(obj) for obj in self._attr_objects]) - - -def fuse(objects: list[Any]) -> Any: - """Try to fuse ``objects`` and return one result. - - TODO: This should not immediately return an ``AccessPassthrough`` just because the - ``objects`` are not all equal. It should do so, when the ``objects`` may be dict- - like or be callables... I need to think of a proper criterion. - - What about return an ``AccessPassthrough`` when the type is one of those defined - in this package? - """ - if all(objects[0] == obj for obj in objects[1:]): - return objects[0] - - try: - return sorted(set(objects)).pop() - except TypeError: - return AccessPassthrough(objects) - def check_unique_names(graph: dict): """Check all nodes in ``graph`` have unique names and no duplicate connections.""" @@ -241,57 +52,6 @@ def check_spsn(spsn: list[float]): raise ValueError(msg) -def change_base( - number: int, - base: int, - reverse: bool = False, - length: int | None = None -) -> str: - """Convert an integer into another base. - - Args: - number: Number to convert - base: Base of the resulting converted number - reverse: If true, the converted number will be printed in reverse order. - length: Length of the returned string. If longer than would be - necessary, the output will be padded. - - Returns: - The (padded) string of the converted number. - """ - if number < 0: - raise ValueError("Cannot convert negative numbers") - if base > 16: - raise ValueError("Base must be 16 or smaller!") - elif base < 2: - raise ValueError("There is no unary number system, base must be > 2") - - convert_string = "0123456789ABCDEF" - result = '' - - if number == 0: - result += '0' - else: - while number >= base: - result += convert_string[number % base] - number = number//base - if number > 0: - result += convert_string[number] - - if length is None: - length = len(result) - elif length < len(result): - length = len(result) - warnings.warn("Length cannot be shorter than converted number.") - - pad = '0' * (length - len(result)) - - if reverse: - return result + pad - else: - return pad + result[::-1] - - @lru_cache def comp_transition_tensor( num_parent: int, @@ -344,32 +104,6 @@ def comp_transition_tensor( return tensor -def check_modality(modality: str, spsn: list): - """Private method that checks whether all inserted values - are valid for a confusion matrix. - - Args: - modality (str): name of the modality - spsn (list): list with specificity and sensiticity - - Raises: - TypeError: returns a type error if the modality is not a string - ValueError: raises a value error if the spec or sens is not a number btw. 0.5 and 1.0 - """ - if not isinstance(modality, str): - raise TypeError("Modality names must be strings.") - - has_len_2 = len(spsn) == 2 - is_above_lb = np.all(np.greater_equal(spsn, 0.5)) - is_below_ub = np.all(np.less_equal(spsn, 1.)) - - if not has_len_2 or not is_above_lb or not is_below_ub: - raise ValueError( - "For each modality provide a list of two decimals between 0.5 and 1.0 " - "as specificity & sensitivity respectively." - ) - - def clinical(spsn: list) -> np.ndarray: """produces the confusion matrix of a clinical modality, i.e. a modality that can not detect microscopic metastases @@ -719,32 +453,3 @@ def synchronize_params( """Get the parameters from one object and set them to another.""" for key, obj in set_to.items(): obj.set_params(**get_from[key].get_params(as_dict=True)) - - -def set_bilateral_params_for( - *args: float, - ipsi_objects: dict[str, HasSetParams], - contra_objects: dict[str, HasSetParams], - is_symmetric: bool = False, - **kwargs: float, -) -> tuple[float]: - """Pass arguments to ``set_params()`` of ``ipsi_objects`` and ``contra_objects``. - - If ``is_symmetric`` is ``True``, the parameters of the ``contra_objects`` will be - set to the parameters of the ``ipsi_objects``. Otherwise, the parameters of the - ``contra_objects`` will be set independently. - """ - kwargs, global_kwargs = unflatten_and_split(kwargs, expected_keys=["ipsi", "contra"]) - - ipsi_kwargs = global_kwargs.copy() - ipsi_kwargs.update(kwargs.get("ipsi", {})) - args = set_params_for(ipsi_objects, *args, **ipsi_kwargs) - - if is_symmetric: - synchronize_params(ipsi_objects, contra_objects) - else: - contra_kwargs = global_kwargs.copy() - contra_kwargs.update(kwargs.get("contra", {})) - args = set_params_for(contra_objects, *args, **contra_kwargs) - - return args From 82f323fd36dd9b963cebd235feb94657f90f642a Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Fri, 23 Feb 2024 16:24:53 +0100 Subject: [PATCH 55/75] docs: update index & fix some docstrings --- docs/source/api.rst | 3 +- docs/source/index.rst | 3 +- docs/source/models.rst | 1 + docs/source/types.rst | 11 +++++ lymph/diagnose_times.py | 2 - lymph/graph.py | 10 +---- lymph/helper.py | 10 ----- lymph/matrix.py | 1 - lymph/models/bilateral.py | 21 +++++++++ lymph/models/midline.py | 88 ++++++++++++++++++++++++-------------- lymph/models/unilateral.py | 4 -- 11 files changed, 94 insertions(+), 60 deletions(-) create mode 100644 docs/source/types.rst diff --git a/docs/source/api.rst b/docs/source/api.rst index cee3275..0cf6e63 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -6,12 +6,13 @@ Detailed API ============ .. toctree:: - :maxdepth: 2 + :maxdepth: 3 :caption: Content graph models components + types helper diff --git a/docs/source/index.rst b/docs/source/index.rst index 7cafe22..8198175 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -13,12 +13,11 @@ Documentation ============= .. toctree:: - :maxdepth: 2 + :maxdepth: 3 :caption: Content install quickstart_unilateral - sampling api license diff --git a/docs/source/models.rst b/docs/source/models.rst index 59b8109..5e3af1a 100644 --- a/docs/source/models.rst +++ b/docs/source/models.rst @@ -9,3 +9,4 @@ Lymphatic Progression Models .. automodule:: lymph.models :members: :special-members: __init__ + :show-inheritance: diff --git a/docs/source/types.rst b/docs/source/types.rst new file mode 100644 index 0000000..ae11573 --- /dev/null +++ b/docs/source/types.rst @@ -0,0 +1,11 @@ +.. module: types + +.. _types: + + +Types +===== + +.. automodule:: lymph.types + :members: + :special-members: __init__ diff --git a/lymph/diagnose_times.py b/lymph/diagnose_times.py index 1648e48..bd127a8 100644 --- a/lymph/diagnose_times.py +++ b/lymph/diagnose_times.py @@ -268,8 +268,6 @@ class Composite(ABC): Any class inheriting from this class should be able to handle the definition of distributions over diagnosis times. - Example: - >>> class MyComposite(Composite): ... pass >>> leaf1 = MyComposite(is_distribution_leaf=True, max_time=1) diff --git a/lymph/graph.py b/lymph/graph.py index bc74f6a..5fa61a3 100644 --- a/lymph/graph.py +++ b/lymph/graph.py @@ -423,8 +423,6 @@ def set_params(self, *args, **kwargs) -> tuple[float]: Keyword arguments (i.e., ``"growth"``, ``"spread"``, and ``"micro"``) override positional arguments. Unused args are returned. - Examples: - >>> edge = Edge(LymphNodeLevel("II", allowed_states=[0, 1, 2]), LymphNodeLevel("III")) >>> _ = edge.set_params(0.1, 0.2) >>> edge.spread_prob @@ -462,7 +460,7 @@ def transition_tensor(self) -> np.ndarray: """Return the transition tensor of the edge. See Also: - :py:function:`lymph.helper.comp_transition_tensor` + :py:func:`lymph.helper.comp_transition_tensor` """ return comp_transition_tensor( num_parent=len(self.parent.allowed_states), @@ -649,8 +647,6 @@ def __hash__(self) -> int: def to_dict(self) -> dict[tuple[str, str], set[str]]: """Returns graph representing this instance's nodes and egdes as dictionary. - Example: - >>> graph_dict = { ... ('tumor', 'T'): ['II', 'III'], ... ('lnl', 'II'): ['III'], @@ -670,8 +666,6 @@ def to_dict(self) -> dict[tuple[str, str], set[str]]: def get_mermaid(self) -> str: """Prints the graph in mermaid format. - Example: - >>> graph_dict = { ... ('tumor', 'T'): ['II', 'III'], ... ('lnl', 'II'): ['III'], @@ -817,8 +811,6 @@ def set_params(self, *args, **kwargs) -> tuple[float]: Specific keyword arguments take precedence over global ones which in turn take precedence over positional arguments. - Example: - >>> graph = Representation(graph_dict={ ... ("tumor", "T"): ["II" , "III"], ... ("lnl", "II"): ["III"], diff --git a/lymph/helper.py b/lymph/helper.py index 2f25f3c..925f089 100644 --- a/lymph/helper.py +++ b/lymph/helper.py @@ -154,8 +154,6 @@ def tile_and_repeat( .. _tile: https://numpy.org/doc/stable/reference/generated/numpy.tile.html .. _repeat: https://numpy.org/doc/stable/reference/generated/numpy.repeat.html - Example: - >>> mat = np.array([[1, 2], [3, 4]]) >>> tile_and_repeat(mat, (2, 2), (2, 2)) array([[1, 1, 2, 2, 1, 1, 2, 2], @@ -183,8 +181,6 @@ def tile_and_repeat( def get_state_idx_matrix(lnl_idx: int, num_lnls: int, num_states: int) -> np.ndarray: """Return the indices for the transition tensor correpsonding to ``lnl_idx``. - Example: - >>> get_state_idx_matrix(1, 3, 2) array([[0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], @@ -215,8 +211,6 @@ def row_wise_kron(a: np.ndarray, b: np.ndarray) -> np.ndarray: .. _kronecker product: https://en.wikipedia.org/wiki/Kronecker_product - Example: - >>> a = np.array([[1, 2], [3, 4]]) >>> b = np.array([[5, 6], [7, 8]]) >>> row_wise_kron(a, b) @@ -370,8 +364,6 @@ def popfirst(seq: Sequence[Any]) -> tuple[Any, Sequence[Any]]: def flatten(mapping, parent_key='', sep='_') -> dict: """Flatten a nested dictionary. - Example: - >>> flatten({"a": {"b": 1, "c": 2}, "d": 3}) {'a_b': 1, 'a_c': 2, 'd': 3} """ @@ -392,8 +384,6 @@ def unflatten_and_split( ) -> tuple[dict, dict]: """Unflatten the part of a dict containing ``expected_keys`` and return the rest. - Example: - >>> unflatten_and_split({'a_b': 1, 'a_c_x': 2, 'd_y': 3}, expected_keys=['a']) ({'a': {'b': 1, 'c_x': 2}}, {'d_y': 3}) """ diff --git a/lymph/matrix.py b/lymph/matrix.py index ea172a4..8ad5168 100644 --- a/lymph/matrix.py +++ b/lymph/matrix.py @@ -130,7 +130,6 @@ def compute_encoding( Missing values are treated as unknown involvement. - Examples: >>> compute_encoding(["II", "III"], {"II": True, "III": False}) array([False, False, True, False]) >>> compute_encoding(["II", "III"], {"II": "involved"}) diff --git a/lymph/models/bilateral.py b/lymph/models/bilateral.py index a6563fd..8abc017 100644 --- a/lymph/models/bilateral.py +++ b/lymph/models/bilateral.py @@ -222,6 +222,27 @@ def get_spread_params( This is consistent with how the :py:meth:`~lymph.models.Bilteral.set_params` method expects the keyword arguments in case of the symmetry configurations. + + >>> model = Bilateral(graph_dict={ + ... ("tumor", "T"): ["II", "III"], + ... ("lnl", "II"): ["III"], + ... ("lnl", "III"): [], + ... }) + >>> num_dims = model.get_num_dims() + >>> model.set_spread_params(*np.round(np.linspace(0., 1., num_dims+1), 2)) + (1.0,) + >>> model.get_spread_params(as_flat=False) # doctest: +NORMALIZE_WHITESPACE + {'ipsi': {'TtoII': {'spread': 0.0}, + 'TtoIII': {'spread': 0.2}}, + 'contra': {'TtoII': {'spread': 0.4}, + 'TtoIII': {'spread': 0.6}}, + 'IItoIII': {'spread': 0.8}} + >>> model.get_spread_params(as_flat=True) # doctest: +NORMALIZE_WHITESPACE + {'ipsi_TtoII_spread': 0.0, + 'ipsi_TtoIII_spread': 0.2, + 'contra_TtoII_spread': 0.4, + 'contra_TtoIII_spread': 0.6, + 'IItoIII_spread': 0.8} """ params = self.get_tumor_spread_params(as_flat=False) diff --git a/lymph/models/midline.py b/lymph/models/midline.py index 24bdb3b..6e60f05 100644 --- a/lymph/models/midline.py +++ b/lymph/models/midline.py @@ -63,30 +63,22 @@ def __init__( ): """Initialize the model. - The class is constructed in a similar fashion to the - :py:class:`~lymph.models.Bilateral`: That class contains one - :py:class:`~lymph.models.Unilateral` for each side of the neck, while this - class will contain several instances of :py:class:`~lymph.models.Bilateral`, + The class is constructed in a similar fashion to the :py:class:`~.Bilateral`: + That class contains one :py:class:`~.Unilateral` for each side of the neck, + while this class will contain several instances of :py:class:`~.Bilateral`, one for the ipsilateral side and two to three for the the contralateral side covering the cases a) no midline extension, b) midline extension, and c) central tumor location. - Args: - graph: Dictionary of the same kind as for initialization of - :class:`System`. This graph will be passed to the constructors of - two :class:`System` attributes of this class. - use_mixing: Describe the contralateral base spread probabilities for the - case of a midline extension as a linear combination between the base - spread probs of the ipsilateral side and the ones of the contralateral - side when no midline extension is present. - trans_symmetric: If ``True``, the spread probabilities among the - LNLs will be set symmetrically. - central_enabled: If ``True``, a third bilateral class is produced - which holds a model for patients with central tumor locations. + Added keyword arguments in this constructor are ``use_mixing``, which controls + whether to use the above described mixture of spread parameters from tumor to + the LNLs. And ``use_central``, which controls whether to use a third + :py:class:`~.Bilateral` model for the case of a central tumor location. The ``unilateral_kwargs`` are passed to all bilateral models. + See Also: - :class:`Bilateral`: Two of these are held as attributes by this + :py:class:`Bilateral`: Two of these are held as attributes by this class. One for the case of a mid-sagittal extension of the primary tumor and one for the case of no such extension. """ @@ -196,7 +188,13 @@ def get_tumor_spread_params( as_dict: bool = True, as_flat: bool = True, ) -> dict[str, float] | Iterable[float]: - """Return the tumor spread parameters of the model.""" + """Return the tumor spread parameters of the model. + + If the model uses the mixing parameter, the returned params will contain the + ipsilateral spread from tumor to LNLs, the contralateral ones for the case of + no midline extension, and the mixing parameter. Otherwise, it will contain the + contralateral params for the cases of present and absent midline extension. + """ params = {} params["ipsi"] = self.ext.ipsi.get_tumor_spread_params(as_flat=as_flat) @@ -222,7 +220,12 @@ def get_lnl_spread_params( as_dict: bool = True, as_flat: bool = True, ) -> dict[str, float] | Iterable[float]: - """Return the LNL spread parameters of the model.""" + """Return the LNL spread parameters of the model. + + Depending on the value of ``is_symmetric["lnl_spread"]``, the returned params + may contain only one set of spread parameters (if ``True``) or one for the ipsi- + and one for the contralateral side (if ``False``). + """ ext_lnl_params = self.ext.get_lnl_spread_params(as_flat=False) noext_lnl_params = self.noext.get_lnl_spread_params(as_flat=False) @@ -253,17 +256,19 @@ def get_spread_params( ) -> dict[str, float] | Iterable[float]: """Return the spread parameters of the model. - TODO: enrich docstring + This combines the returned values from the calls to + :py:meth:`get_tumor_spread_params` and :py:meth:`get_lnl_spread_params`. """ params = self.get_tumor_spread_params(as_flat=False) + lnl_spread_params = self.get_lnl_spread_params(as_flat=False) if self.is_symmetric["lnl_spread"]: - params.update(self.ext.ipsi.get_lnl_spread_params(as_flat=False)) + params.update(lnl_spread_params) else: if "contra" not in params: params["contra"] = {} - params["ipsi"].update(self.ext.ipsi.get_lnl_spread_params(as_flat=False)) - params["contra"].update(self.noext.contra.get_lnl_spread_params(as_flat=False)) + params["ipsi"].update(lnl_spread_params["ipsi"]) + params["contra"].update(lnl_spread_params["contra"]) if as_flat or not as_dict: params = flatten(params) @@ -276,9 +281,10 @@ def get_params( as_dict: bool = True, as_flat: bool = True, ) -> Iterable[float] | dict[str, float]: - """Return the parameters of the model. + """Return all the parameters of the model. - TODO: enrich docstring + This includes the spread parameters from the call to :py:meth:`get_spread_params` + and the distribution parameters from the call to :py:meth:`get_distribution_params`. """ params = self.get_spread_params(as_flat=as_flat) params.update(self.get_distribution_params(as_flat=as_flat)) @@ -294,7 +300,12 @@ def set_tumor_spread_params( ) -> Iterable[float] | dict[str, float]: """Set the spread parameters of the midline model. - TODO: enrich docstring + In analogy to the :py:meth:`get_tumor_spread_params` method, this method sets + the parameters describing how the tumor spreads to the LNLs. How many params + to provide to this model depends on the value of the ``use_mixing`` and the + ``use_central`` attributes. Have a look at what the + :py:meth:`get_tumor_spread_params` method returns for an insight in what you + can provide. """ kwargs, global_kwargs = unflatten_and_split( kwargs, expected_keys=["ipsi", "noext", "ext", "contra"], @@ -341,7 +352,13 @@ def set_tumor_spread_params( def set_lnl_spread_params(self, *args: float, **kwargs: float) -> Iterable[float]: - """Set the LNL spread parameters of the midline model.""" + """Set the LNL spread parameters of the midline model. + + This works exactly like the :py:meth:`.Bilateral.set_lnl_spread_params` for the + user, but under the hood, the parameters also need to be distributed to two or + three instances of :py:class:`~.Bilateral` depending on the value of the + ``use_central`` attribute. + """ kwargs, global_kwargs = unflatten_and_split( kwargs, expected_keys=["ipsi", "noext", "ext", "contra"], ) @@ -382,9 +399,10 @@ def set_spread_params(self, *args: float, **kwargs: float) -> Iterable[float]: def set_params( self, *args: float, **kwargs: float, ) -> Iterable[float] | dict[str, float]: - """Assign new parameters to the model. + """Set all parameters of the model. - TODO: enrich docstring + Combines the calls to :py:meth:`set_spread_params` and + :py:meth:`set_distribution_params`. """ args = self.set_spread_params(*args, **kwargs) return self.set_distribution_params(*args, **kwargs) @@ -489,9 +507,17 @@ def risk( ) -> float: """Compute the risk of nodal involvement ``given_diagnoses``. - TODO: finish docstring + In addition to the arguments of the :py:meth:`.Bilateral.risk` method, this + also allows specifying if the patient's tumor extended over the mid-sagittal + line (``midline_extension=True``) or if it was even located right on that line + (``central=True``). + + For logical reasons, ``midline_extension=False`` makes no sense if + ``central=True`` and is thus ignored. """ - if isinstance(given_params, dict): + if given_params is None: + pass + elif isinstance(given_params, dict): self.set_params(**given_params) else: self.set_params(*given_params) diff --git a/lymph/models/unilateral.py b/lymph/models/unilateral.py index 9a47aaa..ead6616 100644 --- a/lymph/models/unilateral.py +++ b/lymph/models/unilateral.py @@ -244,8 +244,6 @@ def set_params(self, *args: float, **kwargs: float) -> tuple[float]: sent to all edges or distributions. But the more specific keyword arguments override the global ones, which in turn override the positional arguments. - Example: - >>> graph = { ... ("tumor", "T"): ["II", "III"], ... ("lnl", "II"): ["III"], @@ -384,8 +382,6 @@ def transition_matrix(self) -> np.ndarray: :py:func:`~lymph.descriptors.matrix.generate_transition` The function actually computing the transition matrix. - Example: - >>> model = Unilateral(graph_dict={ ... ("tumor", "T"): ["II", "III"], ... ("lnl", "II"): ["III"], From 0e763b284181fda377486cb076220dfa5afeaa75 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Fri, 23 Feb 2024 17:06:01 +0100 Subject: [PATCH 56/75] chore: make changelog super detailed --- pyproject.toml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 7a9e2b3..27c8fd9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -105,8 +105,14 @@ body = """ {% endif %}\ {% for group, commits in commits | group_by(attribute="group") %} ### {{ group | upper_first }} - {% for commit in commits %} - - {% if commit.breaking %}⚠ **BREAKING** {% endif %}{% if commit.scope %}(**{{ commit.scope }}**) {% endif %}{{ commit.message | upper_first }}\ + {% for c in commits %} + - {% if c.breaking %}⚠ **BREAKING** {% endif -%} + {% if c.scope %}(**{{ c.scope }}**) {% endif -%} + {{ c.message | upper_first }}. + {%- if c.footers %}{% for f in c.footers %}{% if not f.breaking %} {{ f.token }} [{{ f.value }}].{% endif %}{% endfor %}{% endif %} + {%- if c.body %}\\ + {{ c.body | indent(prefix=" ", first=True) }} + {% endif -%} {% endfor %} {% endfor %}\n """ From 081123dad17ccfc808978de370d23e7d7e3fb2a1 Mon Sep 17 00:00:00 2001 From: rmnldwg <48687784+rmnldwg@users.noreply.github.com> Date: Sat, 24 Feb 2024 15:36:21 +0100 Subject: [PATCH 57/75] feat(mid): reimplement the midline evolution The midline evolution that Lars Widmer worked on is now reimplemented. However, although this implementation is analogous to the one used in previsou version of the code and should thus work, it is still untested at this point. --- lymph/matrix.py | 18 ++++++ lymph/models/midline.py | 130 ++++++++++++++++++++++++++++++++++------ 2 files changed, 130 insertions(+), 18 deletions(-) diff --git a/lymph/matrix.py b/lymph/matrix.py index 8ad5168..eb5ed18 100644 --- a/lymph/matrix.py +++ b/lymph/matrix.py @@ -316,3 +316,21 @@ def __getitem__(self, key: Any) -> Any: def __missing__(self, t_stage: str): """Create the diagnose matrix for a specific T-stage if necessary.""" return self[t_stage] + + +@lru_cache +def evolve_midext(max_time: int, midext_prob: int) -> np.ndarray: + """Compute the evolution over the state of a tumor's midline extension.""" + midext_states = np.zeros(shape=(max_time + 1, 2), dtype=float) + midext_states[0,0] = 1. + + midext_transition_matrix = np.array([ + [1 - midext_prob, midext_prob], + [0. , 1. ], + ]) + + # compute midext prob for all time steps + for i in range(len(midext_states) - 1): + midext_states[i+1,:] = midext_states[i,:] @ midext_transition_matrix + + return midext_states diff --git a/lymph/models/midline.py b/lymph/models/midline.py index 6e60f05..017d7cc 100644 --- a/lymph/models/midline.py +++ b/lymph/models/midline.py @@ -58,6 +58,7 @@ def __init__( is_symmetric: dict[str, bool] | None = None, use_mixing: bool = True, use_central: bool = True, + use_midext_evo: bool = True, unilateral_kwargs: dict[str, Any] | None = None, **_kwargs ): @@ -75,6 +76,10 @@ def __init__( the LNLs. And ``use_central``, which controls whether to use a third :py:class:`~.Bilateral` model for the case of a central tumor location. + The parameter ``use_midext_evo`` decides whether the tumor's midline extions + should be considered a random variable, in which case it is evolved like the + state of the LNLs, or not. + The ``unilateral_kwargs`` are passed to all bilateral models. See Also: @@ -104,7 +109,15 @@ def __init__( unilateral_kwargs=unilateral_kwargs, is_symmetric=self.is_symmetric, ) - central_child = {} + + self.use_midext_evo = use_midext_evo + if self.use_midext_evo and use_central: + raise ValueError( + "Evolution to central tumor not yet implemented. Choose to use either " + "the central model or the midline extension evolution." + # Actually, this shouldn't be too hard, but we still need to think + # about it for a bit. + ) if use_central: self._central = models.Bilateral( graph_dict=graph_dict, @@ -119,6 +132,8 @@ def __init__( if use_mixing: self.mixing_param = 0. + self.midext_prob = 0. + diagnose_times.Composite.__init__( self, distribution_children={"ext": self.ext, "noext": self.noext, **central_child}, @@ -151,12 +166,26 @@ def is_trinary(self) -> bool: return self.ext.is_trinary + @property + def midext_prob(self) -> float: + """Return the probability of midline extension.""" + if hasattr(self, "_midext_prob"): + return self._midext_prob + return 0. + + @midext_prob.setter + def midext_prob(self, value: float) -> None: + """Set the probability of midline extension.""" + if value is not None and not 0. <= value <= 1.: + raise ValueError("The midline extension prob must be in the range [0, 1].") + self._midext_prob = value + + @property def mixing_param(self) -> float | None: """Return the mixing parameter.""" if hasattr(self, "_mixing_param"): return self._mixing_param - return None @mixing_param.setter @@ -164,7 +193,6 @@ def mixing_param(self, value: float) -> None: """Set the mixing parameter.""" if value is not None and not 0. <= value <= 1.: raise ValueError("The mixing parameter must be in the range [0, 1].") - self._mixing_param = value @property @@ -288,6 +316,8 @@ def get_params( """ params = self.get_spread_params(as_flat=as_flat) params.update(self.get_distribution_params(as_flat=as_flat)) + params["mixing"] = self.mixing_param + params["midext_prob"] = self.midext_prob if as_flat or not as_dict: params = flatten(params) @@ -405,6 +435,10 @@ def set_params( :py:meth:`set_distribution_params`. """ args = self.set_spread_params(*args, **kwargs) + first, args = popfirst(args) + self.mixing_param = kwargs.get("mixing", first) or self.mixing_param + first, args = popfirst(args) + self.midext_prob = kwargs.get("midext_prob", first) or self.midext_prob return self.set_distribution_params(*args, **kwargs) @@ -443,11 +477,50 @@ def load_patient_data( self.ext.load_patient_data(patient_data[~is_lateralized], mapping) + def comp_contra_dist_evolution(self) -> tuple[np.ndarray, np.ndarray]: + """Evolve contra side as mixture of with & without midline extension.""" + noext_contra_dist_evo = np.zeros( + shape=(self.max_time + 1, len(self.noext.contra.state_list)) + ) + noext_contra_dist_evo[0,0] = 1. + + ext_contra_dist_evo = np.zeros( + shape=(self.max_time + 1, len(self.ext.contra.state_list)) + ) + if not self.use_midext_evo: + noext_contra_dist_evo[0,0] = (1. - self.midext_prob) + ext_contra_dist_evo[0,0] = self.midext_prob + + for t in range(self.max_time): + # When evolving over the midline extension state, there's a chance at any + # time step that the tumor grows over the midline and starts spreading to + # the contralateral side more aggressively. + if self.use_midext_evo: + noext_contra_dist_evo[t+1] = ( + (1. - self.midext_prob) * noext_contra_dist_evo[t] + ) @ self.noext.contra.transition_matrix + ext_contra_dist_evo[t+1] = ( + self.midext_prob * noext_contra_dist_evo[t] + + ext_contra_dist_evo[t] + ) @ self.ext.contra.transition_matrix + + # When we do not evolve, the tumor is considered lateralized or extending + # over the midline from the start. + else: + noext_contra_dist_evo[t+1] = ( + noext_contra_dist_evo[t] @ self.noext.contra.transition_matrix + ) + ext_contra_dist_evo[t+1] = ( + ext_contra_dist_evo[t] @ self.ext.contra.transition_matrix + ) + + return noext_contra_dist_evo, ext_contra_dist_evo + + def likelihood( self, given_params: Iterable[float] | dict[str, float] | None = None, log: bool = True, - mode: str = "HMM", for_t_stage: str | None = None, ) -> float: """Compute the (log-)likelihood of the stored data given the model (and params). @@ -455,9 +528,9 @@ def likelihood( See the documentation of :py:meth:`lymph.types.Model.likelihood` for more information on how to use the ``given_params`` parameter. - Returns the log-likelihood if ``log`` is set to ``True``. The ``mode`` parameter - determines whether the likelihood is computed for the hidden Markov model - (``"HMM"``) or the Bayesian network (``"BN"``). + Returns the log-likelihood if ``log`` is set to ``True``. Note that in contrast + to the :py:class:`~.Bilateral` model, the midline model does not support the + Bayesian network mode. Note: The computation is much faster if no parameters are given, since then the @@ -479,18 +552,39 @@ def likelihood( except ValueError: return -np.inf if log else 0. - kwargs = {"log": log, "mode": mode, "for_t_stage": for_t_stage} llh = 0. if log else 1. - if log: - llh += self.ext.likelihood(**kwargs) - llh += self.noext.likelihood(**kwargs) - if self.use_central: - llh += self.central.likelihood(**kwargs) - else: - llh *= self.ext.likelihood(**kwargs) - llh *= self.noext.likelihood(**kwargs) - if self.use_central: - llh *= self.central.likelihood(**kwargs) + + ipsi_dist_evo = self.ext.ipsi.comp_dist_evolution() + contra_dist_evo = {} + contra_dist_evo["ext"], contra_dist_evo["noext"] = self.comp_contra_dist_evolution() + + t_stages = self.t_stages if for_t_stage is None else [for_t_stage] + for stage in t_stages: + diag_time_matrix = np.diag(self.get_distribution(stage).pmf) + # see the `Bilateral` model for why this is done in this way. + for case in ["ext", "noext"]: + joint_state_dist = ( + ipsi_dist_evo.T + @ diag_time_matrix + @ contra_dist_evo[case] + ) + joint_diagnose_dist = np.sum( + getattr(self, case).ipsi.diagnose_matrices[stage] + * ( + joint_state_dist + @ getattr(self, case).contra.diagnose_matrices[stage] + ) + ) + if log: + llh += np.sum(np.log(joint_diagnose_dist)) + else: + llh *= np.prod(joint_diagnose_dist) + + if self.use_central: + if log: + llh += self.central.likelihood(log=log, for_t_stage=for_t_stage) + else: + llh *= self.central.likelihood(log=log, for_t_stage=for_t_stage) return llh From d01e213d0d6d85506df4609d5180e8bd507af3de Mon Sep 17 00:00:00 2001 From: rmnldwg <48687784+rmnldwg@users.noreply.github.com> Date: Sat, 24 Feb 2024 16:18:23 +0100 Subject: [PATCH 58/75] fix(bi): fix uninitialized `is_symmetric` dict --- lymph/models/bilateral.py | 9 +++++---- lymph/models/midline.py | 9 +++++---- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/lymph/models/bilateral.py b/lymph/models/bilateral.py index 8abc017..e0adfaa 100644 --- a/lymph/models/bilateral.py +++ b/lymph/models/bilateral.py @@ -78,10 +78,11 @@ def __init__( ) if is_symmetric is None: - is_symmetric = { - "tumor_spread": False, - "lnl_spread": True, - } + is_symmetric = {} + + is_symmetric["tumor_spread"] = is_symmetric.get("tumor_spread", False) + is_symmetric["lnl_spread"] = is_symmetric.get("lnl_spread", True) + self.is_symmetric = is_symmetric diagnose_times.Composite.__init__( diff --git a/lymph/models/midline.py b/lymph/models/midline.py index 017d7cc..516eb76 100644 --- a/lymph/models/midline.py +++ b/lymph/models/midline.py @@ -88,10 +88,11 @@ def __init__( tumor and one for the case of no such extension. """ if is_symmetric is None: - is_symmetric = { - "tumor_spread": False, - "lnl_spread": True, - } + is_symmetric = {} + + is_symmetric["tumor_spread"] = is_symmetric.get("tumor_spread", False) + is_symmetric["lnl_spread"] = is_symmetric.get("lnl_spread", True) + if is_symmetric["tumor_spread"]: raise ValueError( "If you want the tumor spread to be symmetric, consider using the " From 991f2acbd769607eabfa39b365ba92822b28ebe7 Mon Sep 17 00:00:00 2001 From: rmnldwg <48687784+rmnldwg@users.noreply.github.com> Date: Sat, 24 Feb 2024 16:20:09 +0100 Subject: [PATCH 59/75] fix(mid): add missing dict in init --- lymph/models/midline.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lymph/models/midline.py b/lymph/models/midline.py index 516eb76..45768e1 100644 --- a/lymph/models/midline.py +++ b/lymph/models/midline.py @@ -119,6 +119,7 @@ def __init__( # Actually, this shouldn't be too hard, but we still need to think # about it for a bit. ) + central_child = {} if use_central: self._central = models.Bilateral( graph_dict=graph_dict, From 0c635b40904fe22ffb4e0d01c1108a5a27920ab3 Mon Sep 17 00:00:00 2001 From: rmnldwg <48687784+rmnldwg@users.noreply.github.com> Date: Sun, 25 Feb 2024 18:46:21 +0100 Subject: [PATCH 60/75] fix(mid): update call to trans mat & state_list --- lymph/models/midline.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lymph/models/midline.py b/lymph/models/midline.py index 45768e1..36bfc2c 100644 --- a/lymph/models/midline.py +++ b/lymph/models/midline.py @@ -482,12 +482,12 @@ def load_patient_data( def comp_contra_dist_evolution(self) -> tuple[np.ndarray, np.ndarray]: """Evolve contra side as mixture of with & without midline extension.""" noext_contra_dist_evo = np.zeros( - shape=(self.max_time + 1, len(self.noext.contra.state_list)) + shape=(self.max_time + 1, len(self.noext.contra.graph.state_list)) ) noext_contra_dist_evo[0,0] = 1. ext_contra_dist_evo = np.zeros( - shape=(self.max_time + 1, len(self.ext.contra.state_list)) + shape=(self.max_time + 1, len(self.ext.contra.graph.state_list)) ) if not self.use_midext_evo: noext_contra_dist_evo[0,0] = (1. - self.midext_prob) @@ -500,20 +500,20 @@ def comp_contra_dist_evolution(self) -> tuple[np.ndarray, np.ndarray]: if self.use_midext_evo: noext_contra_dist_evo[t+1] = ( (1. - self.midext_prob) * noext_contra_dist_evo[t] - ) @ self.noext.contra.transition_matrix + ) @ self.noext.contra.transition_matrix() ext_contra_dist_evo[t+1] = ( self.midext_prob * noext_contra_dist_evo[t] + ext_contra_dist_evo[t] - ) @ self.ext.contra.transition_matrix + ) @ self.ext.contra.transition_matrix() # When we do not evolve, the tumor is considered lateralized or extending # over the midline from the start. else: noext_contra_dist_evo[t+1] = ( - noext_contra_dist_evo[t] @ self.noext.contra.transition_matrix + noext_contra_dist_evo[t] @ self.noext.contra.transition_matrix() ) ext_contra_dist_evo[t+1] = ( - ext_contra_dist_evo[t] @ self.ext.contra.transition_matrix + ext_contra_dist_evo[t] @ self.ext.contra.transition_matrix() ) return noext_contra_dist_evo, ext_contra_dist_evo From 2ddacec568d76f25126c1c5ed57550e155c62265 Mon Sep 17 00:00:00 2001 From: rmnldwg <48687784+rmnldwg@users.noreply.github.com> Date: Mon, 26 Feb 2024 11:24:45 +0100 Subject: [PATCH 61/75] fix(mid): correct llh function --- lymph/models/midline.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/lymph/models/midline.py b/lymph/models/midline.py index 36bfc2c..34c4d1b 100644 --- a/lymph/models/midline.py +++ b/lymph/models/midline.py @@ -479,6 +479,22 @@ def load_patient_data( self.ext.load_patient_data(patient_data[~is_lateralized], mapping) + def comp_midext_evolution(self) -> np.ndarray: + """Evolve only the state of the midline extension.""" + midext_states = np.zeros(shape=(self.max_time + 1, 2), dtype=float) + midext_states[0,0] = 1. + + midextransition_matrix = np.array([ + [1 - self.midext_prob, self.midext_prob], + [0. , 1. ], + ]) + + # compute involvement for all time steps + for i in range(len(midext_states)-1): + midext_states[i+1,:] = midext_states[i,:] @ midextransition_matrix + return midext_states + + def comp_contra_dist_evolution(self) -> tuple[np.ndarray, np.ndarray]: """Evolve contra side as mixture of with & without midline extension.""" noext_contra_dist_evo = np.zeros( @@ -558,7 +574,7 @@ def likelihood( ipsi_dist_evo = self.ext.ipsi.comp_dist_evolution() contra_dist_evo = {} - contra_dist_evo["ext"], contra_dist_evo["noext"] = self.comp_contra_dist_evolution() + contra_dist_evo["noext"], contra_dist_evo["ext"] = self.comp_contra_dist_evolution() t_stages = self.t_stages if for_t_stage is None else [for_t_stage] for stage in t_stages: @@ -575,7 +591,8 @@ def likelihood( * ( joint_state_dist @ getattr(self, case).contra.diagnose_matrices[stage] - ) + ), + axis=1, ) if log: llh += np.sum(np.log(joint_diagnose_dist)) From 4ea3bd867fd07aae54475b1130dc3ad46d1a4000 Mon Sep 17 00:00:00 2001 From: rmnldwg <48687784+rmnldwg@users.noreply.github.com> Date: Mon, 26 Feb 2024 11:24:55 +0100 Subject: [PATCH 62/75] test(mid): check llh function --- tests/binary_midline_test.py | 61 ++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/tests/binary_midline_test.py b/tests/binary_midline_test.py index 5a5aeda..8e793c9 100644 --- a/tests/binary_midline_test.py +++ b/tests/binary_midline_test.py @@ -5,6 +5,7 @@ from typing import Literal import numpy as np +import pandas as pd from lymph import models @@ -68,3 +69,63 @@ def test_set_spread_params(self) -> None: self.model.ext.ipsi.get_tumor_spread_params(), self.model.noext.ipsi.get_tumor_spread_params(), ) + + +class MidlineLikelihoodTestCase(unittest.TestCase): + """Check that the likelihood function works correctly.""" + + def setUp( + self, + seed: int = 42, + graph_size: Literal["small", "medium", "large"] = "small", + use_mixing: bool = True, + use_central: bool = False, + use_midext_evo: bool = True, + is_symmetric: dict[str, bool] | None = None, + ) -> None: + super().setUp() + self.rng = np.random.default_rng(seed) + graph_dict = fixtures.get_graph(graph_size) + if is_symmetric is None: + is_symmetric = {"tumor_spread": False, "lnl_spread": True} + + self.model = models.Midline( + graph_dict=graph_dict, + is_symmetric=is_symmetric, + use_mixing=use_mixing, + use_central=use_central, + use_midext_evo=use_midext_evo, + ) + self.model.set_distribution( + "early", + fixtures.create_random_dist( + type_="frozen", + max_time=self.model.max_time, + rng=self.rng, + ), + ) + self.model.set_distribution( + "late", + fixtures.create_random_dist( + type_="parametric", + max_time=self.model.max_time, + rng=self.rng, + ), + ) + self.model.set_modality("pathology", spec=1., sens=1., kind="pathological") + self.model.load_patient_data(pd.read_csv("./tests/data/2021-clb-oropharynx.csv", header=[0,1,2])) + + + def test_likelihood(self) -> None: + """Check that the likelihood function works correctly.""" + params_to_set = {k: self.rng.uniform() for k in self.model.get_params().keys()} + self.model.set_params(**params_to_set) + + # Check that the likelihood is a number + self.assertTrue(np.isscalar(self.model.likelihood())) + + # Check that the likelihood is not NaN + self.assertFalse(np.isnan(self.model.likelihood())) + + # Check that the log-likelihood is smaller than 0 + self.assertLessEqual(self.model.likelihood(), 0) From 4ae8d9c50fe05ebf4d52c1ede3d649e6872f35be Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Mon, 26 Feb 2024 14:51:11 +0100 Subject: [PATCH 63/75] fix(mid): sum over correct axis --- lymph/models/midline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lymph/models/midline.py b/lymph/models/midline.py index 34c4d1b..184be29 100644 --- a/lymph/models/midline.py +++ b/lymph/models/midline.py @@ -592,7 +592,7 @@ def likelihood( joint_state_dist @ getattr(self, case).contra.diagnose_matrices[stage] ), - axis=1, + axis=0, ) if log: llh += np.sum(np.log(joint_diagnose_dist)) From 4af6beff93171bf99f8020a21e5ef8f008e1ccd7 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Tue, 27 Feb 2024 16:36:25 +0100 Subject: [PATCH 64/75] feat: add helper to draw diagnoses The new helper function`draw_diagnoses` is a re-implementation of the `Unilateral` class's method with the same name for easier reusing. --- lymph/helper.py | 22 ++++++++++++++++++++++ lymph/models/unilateral.py | 29 +++++++++++++++++++++++++++-- 2 files changed, 49 insertions(+), 2 deletions(-) diff --git a/lymph/helper.py b/lymph/helper.py index 925f089..b29cd96 100644 --- a/lymph/helper.py +++ b/lymph/helper.py @@ -443,3 +443,25 @@ def synchronize_params( """Get the parameters from one object and set them to another.""" for key, obj in set_to.items(): obj.set_params(**get_from[key].get_params(as_dict=True)) + + +def draw_diagnoses( + diagnose_times: list[int], + state_evolution: np.ndarray, + observation_matrix: np.ndarray, + possible_diagnoses: np.ndarray, + rng: np.random.Generator | None = None, + seed: int = 42, +) -> np.ndarray: + """Given the ``diagnose_times`` and a hidden ``state_evolution``, draw diagnoses.""" + if rng is None: + rng = np.random.default_rng(seed) + + state_dists_given_time = state_evolution[diagnose_times] + observation_dists_given_time = state_dists_given_time @ observation_matrix + + drawn_observation_idxs = [ + rng.choice(np.arange(len(possible_diagnoses)), p=dist) + for dist in observation_dists_given_time + ] + return possible_diagnoses[drawn_observation_idxs].astype(bool) diff --git a/lymph/models/unilateral.py b/lymph/models/unilateral.py index ead6616..959c5d9 100644 --- a/lymph/models/unilateral.py +++ b/lymph/models/unilateral.py @@ -822,7 +822,32 @@ def draw_diagnoses( rng: np.random.Generator | None = None, seed: int = 42, ) -> np.ndarray: - """Given some ``diag_times``, draw diagnoses for each LNL.""" + """Given some ``diag_times``, draw diagnoses for each LNL. + + >>> model = Unilateral(graph_dict={ + ... ("tumor", "T"): ["II" , "III"], + ... ("lnl", "II"): ["III"], + ... ("lnl", "III"): [], + ... }) + >>> model.set_modality("CT", spec=0.8, sens=0.8) + >>> model.draw_diagnoses([0, 1, 2, 3, 4]) # doctest: +NORMALIZE_WHITESPACE + array([[False, True], + [False, False], + [ True, False], + [False, True], + [False, False]]) + >>> draw_diagnoses( # this is the same as the previous example + ... diagnose_times=[0, 1, 2, 3, 4], + ... state_evolution=model.comp_dist_evolution(), + ... observation_matrix=model.observation_matrix(), + ... possible_diagnoses=model.obs_list, + ... ) + array([[False, True], + [False, False], + [ True, False], + [False, True], + [False, False]]) + """ if rng is None: rng = np.random.default_rng(seed) @@ -831,7 +856,7 @@ def draw_diagnoses( obs_indices = np.arange(len(self.obs_list)) drawn_obs_idx = [ - np.random.choice(obs_indices, p=obs_prob) + rng.choice(obs_indices, p=obs_prob) for obs_prob in obs_probs_given_time ] From 5b356bd03e90983e468ac232e417af48354dcbd1 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Tue, 27 Feb 2024 16:38:49 +0100 Subject: [PATCH 65/75] feat(mid): add `draw_patients` method (WIP) --- lymph/models/midline.py | 93 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 90 insertions(+), 3 deletions(-) diff --git a/lymph/models/midline.py b/lymph/models/midline.py index 184be29..891dafd 100644 --- a/lymph/models/midline.py +++ b/lymph/models/midline.py @@ -9,6 +9,7 @@ from lymph import diagnose_times, modalities, models, types from lymph.helper import ( + draw_diagnoses, early_late_mapping, flatten, popfirst, @@ -317,9 +318,9 @@ def get_params( and the distribution parameters from the call to :py:meth:`get_distribution_params`. """ params = self.get_spread_params(as_flat=as_flat) - params.update(self.get_distribution_params(as_flat=as_flat)) params["mixing"] = self.mixing_param params["midext_prob"] = self.midext_prob + params.update(self.get_distribution_params(as_flat=as_flat)) if as_flat or not as_dict: params = flatten(params) @@ -438,8 +439,6 @@ def set_params( """ args = self.set_spread_params(*args, **kwargs) first, args = popfirst(args) - self.mixing_param = kwargs.get("mixing", first) or self.mixing_param - first, args = popfirst(args) self.midext_prob = kwargs.get("midext_prob", first) or self.midext_prob return self.set_distribution_params(*args, **kwargs) @@ -655,3 +654,91 @@ def risk( involvement=involvement, mode=mode, ) + + + def draw_patients( + self, + num: int, + stage_dist: Iterable[float], + rng: np.random.Generator | None = None, + seed: int = 42, + ) -> pd.DataFrame: + """Draw ``num`` patients from the parameterized model.""" + if rng is None: + rng = np.random.default_rng(seed) + + if sum(stage_dist) != 1.: + warnings.warn("Sum of stage distribution is not 1. Renormalizing.") + stage_dist = np.array(stage_dist) / sum(stage_dist) + + if self.use_central: + raise NotImplementedError( + "Drawing patients from the central model not yet supported." + ) + + drawn_t_stages = rng.choice( + a=self.t_stages, + p=stage_dist, + size=num, + ) + distributions = self.get_all_distributions() + drawn_diag_times = [ + distributions[t_stage].draw_diag_times(rng=rng) + for t_stage in drawn_t_stages + ] + + ipsi_evo = self.ext.ipsi.comp_dist_evolution() + contra_evo = {} + contra_evo["noext"], contra_evo["ext"] = self.comp_contra_dist_evolution() + + if self.use_midext_evo: + midext_evo = self.comp_midext_evolution() + drawn_midexts = [ + rng.choice(a=[False, True], p=midext_evo[t]) + for t in drawn_diag_times + ] + else: + drawn_midexts = rng.choice( + a=[False, True], + p=[1. - self.midext_prob, self.midext_prob], + size=num, + ) + + drawn_diags = np.empty(shape=(num, len(self.ext.ipsi.obs_list))) + for case in ["ext", "noext"]: + drawn_ipsi_diags = draw_diagnoses( + diagnose_times=drawn_diag_times[drawn_midexts == (case == "ext")], + state_evolution=ipsi_evo, + observation_matrix=getattr(self, case).ipsi.observation_matrix, + possible_diagnoses=getattr(self, case).ipsi.obs_list, + rng=rng, + seed=seed, + ) + drawn_contra_diags = draw_diagnoses( + diagnose_times=drawn_diag_times[drawn_midexts == (case == "ext")], + state_evolution=contra_evo[case], + observation_matrix=getattr(self, case).contra.observation_matrix, + possible_diagnoses=getattr(self, case).contra.obs_list, + rng=rng, + seed=seed, + ) + drawn_case_diags = np.concatenate([drawn_ipsi_diags, drawn_contra_diags], axis=1) + drawn_diags[drawn_midexts == (case == "ext")] = drawn_case_diags + + # construct MultiIndex with "ipsi" and "contra" at top level to allow + # concatenation of the two separate drawn diagnoses + sides = ["ipsi", "contra"] + modality_names = list(self.get_all_modalities().keys()) + lnl_names = [lnl for lnl in self.ext.ipsi.graph.lnls.keys()] + multi_cols = pd.MultiIndex.from_product([sides, modality_names, lnl_names]) + + # reorder the column levels and thus also the individual columns to match the + # LyProX format without mixing up the data + dataset = pd.DataFrame(drawn_diags, columns=multi_cols) + dataset = dataset.reorder_levels(order=[1, 0, 2], axis="columns") + dataset = dataset.sort_index(axis="columns", level=0) + dataset["tumor", "1", "t_stage"] = drawn_t_stages + dataset["tumor", "1", "extension"] = drawn_midexts + dataset["patient", "#", "diagnose_time"] = drawn_diag_times + + return dataset From b6a0d3024c1149457032380f50047c9b2512a378 Mon Sep 17 00:00:00 2001 From: rmnldwg <48687784+rmnldwg@users.noreply.github.com> Date: Wed, 28 Feb 2024 16:54:31 +0100 Subject: [PATCH 66/75] feat(mid): allow marg over unknown midext This is implemented differently than before: If data with unknown midline extension is added, it gets loaded into an attribute named `unknown`, which is a `Bilateral` model only used to store that data and generate diagnose matrices. --- lymph/helper.py | 7 +++ lymph/matrix.py | 14 +++++ lymph/models/bilateral.py | 74 +++++++++++++------------- lymph/models/midline.py | 98 +++++++++++++++++++++++++++-------- lymph/models/unilateral.py | 29 +++++------ tests/binary_midline_test.py | 1 + tests/trinary_midline_test.py | 1 + 7 files changed, 148 insertions(+), 76 deletions(-) diff --git a/lymph/helper.py b/lymph/helper.py index b29cd96..7a5640c 100644 --- a/lymph/helper.py +++ b/lymph/helper.py @@ -465,3 +465,10 @@ def draw_diagnoses( for dist in observation_dists_given_time ] return possible_diagnoses[drawn_observation_idxs].astype(bool) + + +def add_or_mult(llh: float, arr: np.ndarray, log: bool = True) -> float: + """Add or multiply the log-likelihood with the given array.""" + if log: + return llh + np.sum(np.log(arr)) + return llh * np.prod(arr) diff --git a/lymph/matrix.py b/lymph/matrix.py index eb5ed18..f4a591e 100644 --- a/lymph/matrix.py +++ b/lymph/matrix.py @@ -334,3 +334,17 @@ def evolve_midext(max_time: int, midext_prob: int) -> np.ndarray: midext_states[i+1,:] = midext_states[i,:] @ midext_transition_matrix return midext_states + + +def fast_trace( + left: np.ndarray, + right: np.ndarray, +) -> np.ndarray: + """Compute the trace of a product of two matrices (``left`` and ``right``). + + This is based on the observation that the trace of a product of two matrices is + equal to the sum of the element-wise products of the two matrices. See + `Wikipedia `_ and + `StackOverflow `_ for more information. + """ + return np.sum(left.T * right, axis=0) diff --git a/lymph/models/bilateral.py b/lymph/models/bilateral.py index e0adfaa..d11a04c 100644 --- a/lymph/models/bilateral.py +++ b/lymph/models/bilateral.py @@ -2,13 +2,14 @@ import logging import warnings -from typing import Any, Iterable +from typing import Any, Iterable, Literal import numpy as np import pandas as pd from lymph import diagnose_times, matrix, modalities, models, types from lymph.helper import ( + add_or_mult, early_late_mapping, flatten, synchronize_params, @@ -379,10 +380,10 @@ def load_patient_data( self.contra.load_patient_data(patient_data, "contra", mapping) - def comp_joint_state_dist( + def comp_state_dist( self, t_stage: str = "early", - mode: str = "HMM", + mode: Literal["HMM", "BN"] = "HMM", ) -> np.ndarray: """Compute the joint distribution over the ipsi- & contralateral hidden states. @@ -393,7 +394,9 @@ def comp_joint_state_dist( See Also: :py:meth:`lymph.models.Unilateral.comp_state_dist` - The corresponding unilateral function. + The corresponding unilateral function. Note that this method returns + a 2D array, because it computes the probability of any possible + combination of ipsi- and contralateral states. """ if mode == "HMM": ipsi_state_evo = self.ipsi.comp_dist_evolution() @@ -405,31 +408,31 @@ def comp_joint_state_dist( @ time_marg_matrix @ contra_state_evo ) - elif mode == "BN": ipsi_state_dist = self.ipsi.comp_state_dist(mode=mode) contra_state_dist = self.contra.comp_state_dist(mode=mode) result = np.outer(ipsi_state_dist, contra_state_dist) - else: raise ValueError(f"Unknown mode '{mode}'.") return result - def comp_joint_obs_dist( + def comp_obs_dist( self, t_stage: str = "early", - mode: str = "HMM", + mode: Literal["HMM", "BN"] = "HMM", ) -> np.ndarray: """Compute the joint distribution over the ipsi- & contralateral observations. See Also: :py:meth:`lymph.models.Unilateral.comp_obs_dist` - The corresponding unilateral function. + The corresponding unilateral function. Note that this method returns + a 2D array, because it computes the probability of any possible + combination of ipsi- and contralateral observations. """ - joint_state_dist = self.comp_joint_state_dist(t_stage=t_stage, mode=mode) + joint_state_dist = self.comp_state_dist(t_stage=t_stage, mode=mode) return ( self.ipsi.observation_matrix().T @ joint_state_dist @@ -437,25 +440,32 @@ def comp_joint_obs_dist( ) + def comp_patient_llhs( + self, + t_stage: str = "early", + mode: Literal["HMM", "BN"] = "HMM", + ) -> np.ndarray: + """Compute the likelihood of each patient individually.""" + joint_state_dist = self.comp_state_dist(t_stage=t_stage, mode=mode) + return matrix.fast_trace( + self.ipsi.diagnose_matrices[t_stage].T, + joint_state_dist @ self.contra.diagnose_matrices[t_stage], + ) + + def _bn_likelihood(self, log: bool = True, t_stage: str | None = None) -> float: """Compute the BN likelihood of data, using the stored params.""" - llh = 0. if log else 1. - if t_stage is None: t_stage = "_BN" - joint_state_dist = self.comp_joint_state_dist(mode="BN") - joint_diagnose_dist = np.sum( + joint_state_dist = self.comp_state_dist(mode="BN") + patient_llhs = np.sum( self.ipsi.diagnose_matrices[t_stage] * (joint_state_dist @ self.contra.diagnose_matrices[t_stage]), axis=0, ) - if log: - llh += np.sum(np.log(joint_diagnose_dist)) - else: - llh *= np.prod(joint_diagnose_dist) - return llh + return np.sum(np.log(patient_llhs)) if log else np.prod(patient_llhs) def _hmm_likelihood(self, log: bool = True, t_stage: str | None = None) -> float: @@ -480,21 +490,11 @@ def _hmm_likelihood(self, log: bool = True, t_stage: str | None = None) -> float @ diag_time_matrix @ contra_dist_evo ) - # the computation below is a trick to make the computation fatser: - # What we want to compute is the sum over the diagonal of the matrix - # product of the ipsi diagnose matrix with the joint state distribution - # and the contra diagnose matrix. - # Source: https://stackoverflow.com/a/18854776 - joint_diagnose_dist = np.sum( - self.ipsi.diagnose_matrices[stage] - * (joint_state_dist @ self.contra.diagnose_matrices[stage]), - axis=0, + patient_llhs = matrix.fast_trace( + self.ipsi.diagnose_matrices[stage].T, + joint_state_dist @ self.contra.diagnose_matrices[stage], ) - - if log: - llh += np.sum(np.log(joint_diagnose_dist)) - else: - llh *= np.prod(joint_diagnose_dist) + llh = add_or_mult(llh, patient_llhs, log) return llh @@ -503,7 +503,7 @@ def likelihood( self, given_params: Iterable[float] | dict[str, float] | None = None, log: bool = True, - mode: str = "HMM", + mode: Literal["HMM", "BN"] = "HMM", for_t_stage: str | None = None, ): """Compute the (log-)likelihood of the stored data given the model (and params). @@ -548,7 +548,7 @@ def comp_posterior_joint_state_dist( given_params: Iterable[float] | dict[str, float] | None = None, given_diagnoses: dict[str, types.DiagnoseType] | None = None, t_stage: str | int = "early", - mode: str = "HMM", + mode: Literal["HMM", "BN"] = "HMM", ) -> np.ndarray: """Compute joint post. dist. over ipsi & contra states, ``given_diagnoses``. @@ -585,7 +585,7 @@ def comp_posterior_joint_state_dist( # vector with P(Z=z|X) for each state X. A data matrix for one "patient" diagnose_given_state[side] = diagnose_encoding @ observation_matrix.T - joint_state_dist = self.comp_joint_state_dist(t_stage=t_stage, mode=mode) + joint_state_dist = self.comp_state_dist(t_stage=t_stage, mode=mode) # matrix with P(Zi=zi,Zc=zc|Xi,Xc) * P(Xi,Xc) for all states Xi,Xc. joint_diagnose_and_state = np.outer( diagnose_given_state["ipsi"], @@ -602,7 +602,7 @@ def risk( given_params: Iterable[float] | dict[str, float] | None = None, given_diagnoses: dict[str, types.DiagnoseType] | None = None, t_stage: str = "early", - mode: str = "HMM", + mode: Literal["HMM", "BN"] = "HMM", ) -> float: """Compute risk of an ``involvement`` pattern, given parameters and diagnoses. diff --git a/lymph/models/midline.py b/lymph/models/midline.py index 891dafd..01936dd 100644 --- a/lymph/models/midline.py +++ b/lymph/models/midline.py @@ -7,8 +7,9 @@ import numpy as np import pandas as pd -from lymph import diagnose_times, modalities, models, types +from lymph import diagnose_times, matrix, modalities, models, types from lymph.helper import ( + add_or_mult, draw_diagnoses, early_late_mapping, flatten, @@ -60,6 +61,7 @@ def __init__( use_mixing: bool = True, use_central: bool = True, use_midext_evo: bool = True, + marginalize_unknown: bool = True, unilateral_kwargs: dict[str, Any] | None = None, **_kwargs ): @@ -81,12 +83,22 @@ def __init__( should be considered a random variable, in which case it is evolved like the state of the LNLs, or not. + With ``marginalize_unknown`` (default: ``True``), the model will also load + patients with unknown midline extension status into the model and marginalize + over their state of midline extension when computing the likelihood. This extra + data is stored in a :py:class:`~.Bilateral` instance accessible via the + attribute ``"unknown"``. Note that this bilateral instance does not get updated + parameters or any other kind of attention. It is solely used to store the data + and generate diagnose matrices for those data. + The ``unilateral_kwargs`` are passed to all bilateral models. See Also: - :py:class:`Bilateral`: Two of these are held as attributes by this + :py:class:`Bilateral`: Two to four of these are held as attributes by this class. One for the case of a mid-sagittal extension of the primary - tumor and one for the case of no such extension. + tumor, one for the case of no such extension, (possibly) one for the case of + a central/symmetric tumor, and (possibly) one for the case of unknown + midline extension status. """ if is_symmetric is None: is_symmetric = {} @@ -120,7 +132,7 @@ def __init__( # Actually, this shouldn't be too hard, but we still need to think # about it for a bit. ) - central_child = {} + other_children = {} if use_central: self._central = models.Bilateral( graph_dict=graph_dict, @@ -130,7 +142,15 @@ def __init__( "lnl_spread": self.is_symmetric["lnl_spread"], }, ) - central_child = {"central": self.central} + other_children["central"] = self.central + + if marginalize_unknown: + self._unknown = models.Bilateral( + graph_dict=graph_dict, + unilateral_kwargs=unilateral_kwargs, + is_symmetric=self.is_symmetric, + ) + other_children["unknown"] = self._unknown if use_mixing: self.mixing_param = 0. @@ -139,12 +159,12 @@ def __init__( diagnose_times.Composite.__init__( self, - distribution_children={"ext": self.ext, "noext": self.noext, **central_child}, + distribution_children={"ext": self.ext, "noext": self.noext, **other_children}, is_distribution_leaf=False, ) modalities.Composite.__init__( self, - modality_children={"ext": self.ext, "noext": self.noext, **central_child}, + modality_children={"ext": self.ext, "noext": self.noext, **other_children}, is_modality_leaf=False, ) @@ -211,7 +231,23 @@ def use_central(self) -> bool: @property def central(self) -> models.Bilateral: """Return the central model.""" - return self._central + if self.use_central: + return self._central + raise AttributeError("This instance does not account for central tumors.") + + @property + def marginalize_unknown(self) -> bool: + """Return whether the model marginalizes over unknown midline extension.""" + return hasattr(self, "_unknown") + + @property + def unknown(self) -> models.Bilateral: + """Return the model storing the patients with unknown midline extension.""" + if self.marginalize_unknown: + return self._unknown + raise AttributeError( + "This instance does not marginalize over unknown midline extension." + ) def get_tumor_spread_params( @@ -467,15 +503,25 @@ def load_patient_data( """ # pylint: disable=singleton-comparison is_lateralized = patient_data[EXT_COL] == False + has_extension = patient_data[EXT_COL] == True + is_unknown = patient_data[EXT_COL].isna() self.noext.load_patient_data(patient_data[is_lateralized], mapping) if self.use_central: is_central = patient_data[CENTRAL_COL] == True + has_extension = has_extension & ~is_central self.central.load_patient_data(patient_data[is_central], mapping) - self.ext.load_patient_data(patient_data[~is_lateralized & ~is_central], mapping) + self.ext.load_patient_data(patient_data[has_extension], mapping) + + if self.marginalize_unknown and is_unknown.sum() > 0: + self.unknown.load_patient_data(patient_data[is_unknown], mapping) else: - self.ext.load_patient_data(patient_data[~is_lateralized], mapping) + warnings.warn( + f"Discarding {is_unknown.sum()} patients where midline extension " + "is unknown." + ) + def comp_midext_evolution(self) -> np.ndarray: @@ -578,6 +624,8 @@ def likelihood( t_stages = self.t_stages if for_t_stage is None else [for_t_stage] for stage in t_stages: diag_time_matrix = np.diag(self.get_distribution(stage).pmf) + num_states = ipsi_dist_evo.shape[1] + marg_joint_state_dist = np.zeros(shape=(num_states, num_states)) # see the `Bilateral` model for why this is done in this way. for case in ["ext", "noext"]: joint_state_dist = ( @@ -585,18 +633,24 @@ def likelihood( @ diag_time_matrix @ contra_dist_evo[case] ) - joint_diagnose_dist = np.sum( - getattr(self, case).ipsi.diagnose_matrices[stage] - * ( - joint_state_dist - @ getattr(self, case).contra.diagnose_matrices[stage] - ), - axis=0, + marg_joint_state_dist += joint_state_dist + _model = getattr(self, case) + patient_llhs = matrix.fast_trace( + _model.ipsi.diagnose_matrices[stage].T, + joint_state_dist @ _model.contra.diagnose_matrices[stage] + ) + llh = add_or_mult(llh, patient_llhs, log=log) + + try: + marg_patient_llhs = matrix.fast_trace( + self.unknown.ipsi.diagnose_matrices[stage].T, + marg_joint_state_dist @ self.unknown.contra.diagnose_matrices[stage] ) - if log: - llh += np.sum(np.log(joint_diagnose_dist)) - else: - llh *= np.prod(joint_diagnose_dist) + llh = add_or_mult(llh, marg_patient_llhs, log=log) + except AttributeError: + # an AttributeError is raised both when the model has no `unknown` + # attribute and when no data is loaded in the `unknown` model. + pass if self.use_central: if log: @@ -615,7 +669,7 @@ def risk( t_stage: str = "early", midline_extension: bool = False, central: bool = False, - mode: str = "HMM", + mode: Literal["HMM", "BN"] = "HMM", ) -> float: """Compute the risk of nodal involvement ``given_diagnoses``. diff --git a/lymph/models/unilateral.py b/lymph/models/unilateral.py index 959c5d9..006bc7c 100644 --- a/lymph/models/unilateral.py +++ b/lymph/models/unilateral.py @@ -9,8 +9,10 @@ import pandas as pd from lymph import diagnose_times, graph, matrix, modalities, types -from lymph.helper import ( +from lymph.helper import ( # nopycln: import + add_or_mult, dict_to_func, + draw_diagnoses, early_late_mapping, flatten, get_params_from, @@ -575,7 +577,7 @@ def comp_dist_evolution(self) -> np.ndarray: return state_dists - def comp_state_dist(self, t_stage: str = "early", mode: str = "HMM") -> np.ndarray: + def comp_state_dist(self, t_stage: str = "early", mode: Literal["HMM", "BN"] = "HMM") -> np.ndarray: """Compute the distribution over possible states. Do this either for a given ``t_stage``, when ``mode`` is set to ``"HMM"``, @@ -604,7 +606,7 @@ def comp_state_dist(self, t_stage: str = "early", mode: str = "HMM") -> np.ndarr return state_dist - def comp_obs_dist(self, t_stage: str = "early", mode: str = "HMM") -> np.ndarray: + def comp_obs_dist(self, t_stage: str = "early", mode: Literal["HMM", "BN"] = "HMM") -> np.ndarray: """Compute the distribution over all possible observations for a given T-stage. Returns an array of probabilities for each possible complete observation. This @@ -626,13 +628,9 @@ def _bn_likelihood(self, log: bool = True, t_stage: str | None = None) -> float: t_stage = "_BN" state_dist = self.comp_state_dist(mode="BN") - patient_likelihoods = state_dist @ self.diagnose_matrices[t_stage] + patient_llhs = state_dist @ self.diagnose_matrices[t_stage] - if log: - llh = np.sum(np.log(patient_likelihoods)) - else: - llh = np.prod(patient_likelihoods) - return llh + return np.sum(np.log(patient_llhs)) if log else np.prod(patient_llhs) def _hmm_likelihood(self, log: bool = True, t_stage: str | None = None) -> float: @@ -646,15 +644,12 @@ def _hmm_likelihood(self, log: bool = True, t_stage: str | None = None) -> float t_stages = [t_stage] for t_stage in t_stages: - patient_likelihoods = ( + patient_llhs = ( self.get_distribution(t_stage).pmf @ evolved_model @ self.diagnose_matrices[t_stage] ) - if log: - llh += np.sum(np.log(patient_likelihoods)) - else: - llh *= np.prod(patient_likelihoods) + llh = add_or_mult(llh, patient_llhs, log) return llh @@ -663,7 +658,7 @@ def likelihood( self, given_params: Iterable[float] | dict[str, float] | None = None, log: bool = True, - mode: str = "HMM", + mode: Literal["HMM", "BN"] = "HMM", for_t_stage: str | None = None, ) -> float: """Compute the (log-)likelihood of the stored data given the model (and params). @@ -720,7 +715,7 @@ def comp_posterior_state_dist( given_params: Iterable[float] | dict[str, float] | None = None, given_diagnoses: types.DiagnoseType | None = None, t_stage: str | int = "early", - mode: str = "HMM", + mode: Literal["HMM", "BN"] = "HMM", ) -> np.ndarray: """Compute the posterior distribution over hidden states given a diagnosis. @@ -778,7 +773,7 @@ def risk( given_params: Iterable[float] | dict[str, float] | None = None, given_diagnoses: dict[str, types.PatternType] | None = None, t_stage: str = "early", - mode: str = "HMM", + mode: Literal["HMM", "BN"] = "HMM", **_kwargs, ) -> float | np.ndarray: """Compute risk of a certain involvement, given a patient's diagnosis. diff --git a/tests/binary_midline_test.py b/tests/binary_midline_test.py index 8e793c9..61336bb 100644 --- a/tests/binary_midline_test.py +++ b/tests/binary_midline_test.py @@ -34,6 +34,7 @@ def setUp( is_symmetric=is_symmetric, use_mixing=use_mixing, use_central=use_central, + use_midext_evo=False, ) diff --git a/tests/trinary_midline_test.py b/tests/trinary_midline_test.py index 4625d89..72a6061 100644 --- a/tests/trinary_midline_test.py +++ b/tests/trinary_midline_test.py @@ -33,6 +33,7 @@ def setUp( is_symmetric=is_symmetric, use_mixing=use_mixing, use_central=use_central, + use_midext_evo=False, ) From e9fc2ad10e8a0658cef26c4a1a14be4ccb02f0c9 Mon Sep 17 00:00:00 2001 From: rmnldwg <48687784+rmnldwg@users.noreply.github.com> Date: Wed, 28 Feb 2024 18:11:02 +0100 Subject: [PATCH 67/75] fix(mid): finish `draw_patients` method Some bugs in the method for drawing synthetic patients from the `Midline` were fixed. This seems to be working now. --- lymph/helper.py | 2 +- lymph/models/midline.py | 24 ++++++++++----------- tests/binary_midline_test.py | 41 ++++++++++++++++++++++++++++++++++++ 3 files changed, 53 insertions(+), 14 deletions(-) diff --git a/lymph/helper.py b/lymph/helper.py index 7a5640c..c444f83 100644 --- a/lymph/helper.py +++ b/lymph/helper.py @@ -461,7 +461,7 @@ def draw_diagnoses( observation_dists_given_time = state_dists_given_time @ observation_matrix drawn_observation_idxs = [ - rng.choice(np.arange(len(possible_diagnoses)), p=dist) + rng.choice(a=np.arange(len(possible_diagnoses)), p=dist) for dist in observation_dists_given_time ] return possible_diagnoses[drawn_observation_idxs].astype(bool) diff --git a/lymph/models/midline.py b/lymph/models/midline.py index 01936dd..b429f5f 100644 --- a/lymph/models/midline.py +++ b/lymph/models/midline.py @@ -736,21 +736,17 @@ def draw_patients( size=num, ) distributions = self.get_all_distributions() - drawn_diag_times = [ + drawn_diag_times = np.array([ distributions[t_stage].draw_diag_times(rng=rng) for t_stage in drawn_t_stages - ] - - ipsi_evo = self.ext.ipsi.comp_dist_evolution() - contra_evo = {} - contra_evo["noext"], contra_evo["ext"] = self.comp_contra_dist_evolution() + ]) if self.use_midext_evo: midext_evo = self.comp_midext_evolution() - drawn_midexts = [ + drawn_midexts = np.array([ rng.choice(a=[False, True], p=midext_evo[t]) for t in drawn_diag_times - ] + ]) else: drawn_midexts = rng.choice( a=[False, True], @@ -758,21 +754,23 @@ def draw_patients( size=num, ) + ipsi_evo = self.ext.ipsi.comp_dist_evolution() drawn_diags = np.empty(shape=(num, len(self.ext.ipsi.obs_list))) for case in ["ext", "noext"]: + case_model = getattr(self, case) drawn_ipsi_diags = draw_diagnoses( diagnose_times=drawn_diag_times[drawn_midexts == (case == "ext")], state_evolution=ipsi_evo, - observation_matrix=getattr(self, case).ipsi.observation_matrix, - possible_diagnoses=getattr(self, case).ipsi.obs_list, + observation_matrix=case_model.ipsi.observation_matrix(), + possible_diagnoses=case_model.ipsi.obs_list, rng=rng, seed=seed, ) drawn_contra_diags = draw_diagnoses( diagnose_times=drawn_diag_times[drawn_midexts == (case == "ext")], - state_evolution=contra_evo[case], - observation_matrix=getattr(self, case).contra.observation_matrix, - possible_diagnoses=getattr(self, case).contra.obs_list, + state_evolution=case_model.contra.comp_dist_evolution(), + observation_matrix=case_model.contra.observation_matrix(), + possible_diagnoses=case_model.contra.obs_list, rng=rng, seed=seed, ) diff --git a/tests/binary_midline_test.py b/tests/binary_midline_test.py index 61336bb..0e54097 100644 --- a/tests/binary_midline_test.py +++ b/tests/binary_midline_test.py @@ -130,3 +130,44 @@ def test_likelihood(self) -> None: # Check that the log-likelihood is smaller than 0 self.assertLessEqual(self.model.likelihood(), 0) + + +class MidlineDrawPatientsTestCase(unittest.TestCase): + """Check the data generation.""" + + def setUp(self) -> None: + super().setUp() + self.rng = np.random.default_rng(42) + graph_dict = { + ("tumor", "T"): ["A"], + ("lnl", "A"): ["B"], + ("lnl", "B"): [], + } + self.model = models.Midline( + graph_dict=graph_dict, + use_mixing=True, + use_central=False, + use_midext_evo=True, + marginalize_unknown=False, + unilateral_kwargs={"max_time": 2}, + ) + self.model.set_distribution("early", [0., 1., 0.]) + self.model.set_distribution("late", [0., 0., 1.]) + self.model.set_modality("pathology", spec=1., sens=1., kind="pathological") + + + def test_draw_patients(self) -> None: + """Check that the data generation works correctly.""" + self.model.set_params( + ipsi_TtoA_spread=1.0, + contra_TtoA_spread=0.0, + AtoB_spread=1.0, + mixing=0.5, + midext_prob=0.5, + ) + drawn_data = self.model.draw_patients( + num=100, + stage_dist=[0.5, 0.5], + rng=self.rng, + ) + self.assertEqual(len(drawn_data), 100) From 9d3a8db16e57f8ccc4c8921c7c71d1d9245a3d2d Mon Sep 17 00:00:00 2001 From: rmnldwg <48687784+rmnldwg@users.noreply.github.com> Date: Thu, 29 Feb 2024 13:27:34 +0100 Subject: [PATCH 68/75] refactor(mid): split llh method --- lymph/models/midline.py | 81 +++++++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 36 deletions(-) diff --git a/lymph/models/midline.py b/lymph/models/midline.py index b429f5f..e9fbae0 100644 --- a/lymph/models/midline.py +++ b/lymph/models/midline.py @@ -2,7 +2,7 @@ import logging import warnings -from typing import Any, Iterable +from typing import Any, Iterable, Literal import numpy as np import pandas as pd @@ -580,41 +580,8 @@ def comp_contra_dist_evolution(self) -> tuple[np.ndarray, np.ndarray]: return noext_contra_dist_evo, ext_contra_dist_evo - def likelihood( - self, - given_params: Iterable[float] | dict[str, float] | None = None, - log: bool = True, - for_t_stage: str | None = None, - ) -> float: - """Compute the (log-)likelihood of the stored data given the model (and params). - - See the documentation of :py:meth:`lymph.types.Model.likelihood` for more - information on how to use the ``given_params`` parameter. - - Returns the log-likelihood if ``log`` is set to ``True``. Note that in contrast - to the :py:class:`~.Bilateral` model, the midline model does not support the - Bayesian network mode. - - Note: - The computation is much faster if no parameters are given, since then the - transition matrix does not need to be recomputed. - - See Also: - :py:meth:`lymph.models.Unilateral.likelihood` - The corresponding unilateral function. - """ - try: - # all functions and methods called here should raise a ValueError if the - # given parameters are invalid... - if given_params is None: - pass - elif isinstance(given_params, dict): - self.set_params(**given_params) - else: - self.set_params(*given_params) - except ValueError: - return -np.inf if log else 0. - + def _hmm_likelihood(self, log: bool = True, for_t_stage: str | None = None) -> float: + """Compute the likelihood of the stored data under the hidden Markov model.""" llh = 0. if log else 1. ipsi_dist_evo = self.ext.ipsi.comp_dist_evolution() @@ -661,6 +628,48 @@ def likelihood( return llh + def likelihood( + self, + given_params: Iterable[float] | dict[str, float] | None = None, + log: bool = True, + mode: Literal["HMM", "BN"] = "HMM", + for_t_stage: str | None = None, + ) -> float: + """Compute the (log-)likelihood of the stored data given the model (and params). + + See the documentation of :py:meth:`lymph.types.Model.likelihood` for more + information on how to use the ``given_params`` parameter. + + Returns the log-likelihood if ``log`` is set to ``True``. Note that in contrast + to the :py:class:`~.Bilateral` model, the midline model does not support the + Bayesian network mode. + + Note: + The computation is faster if no parameters are given, since then the + transition matrix does not need to be recomputed. + + See Also: + :py:meth:`lymph.models.Unilateral.likelihood` + The corresponding unilateral function. + """ + try: + # all functions and methods called here should raise a ValueError if the + # given parameters are invalid... + if given_params is None: + pass + elif isinstance(given_params, dict): + self.set_params(**given_params) + else: + self.set_params(*given_params) + except ValueError: + return -np.inf if log else 0. + + if mode == "HMM": + return self._hmm_likelihood(log, for_t_stage) + + raise NotImplementedError("Only HMM mode is supported as of now.") + + def risk( self, involvement: PatternType | None = None, From febb8e05b2761af4031370b6e5af3938cc8d0a1e Mon Sep 17 00:00:00 2001 From: rmnldwg <48687784+rmnldwg@users.noreply.github.com> Date: Thu, 29 Feb 2024 14:17:18 +0100 Subject: [PATCH 69/75] docs: fix some typos and cross-references --- docs/source/explanation.rst | 2 +- docs/source/index.rst | 2 + docs/source/install.rst | 10 +- docs/source/quickstart_bilateral.ipynb | 10 +- docs/source/quickstart_unilateral.ipynb | 6 +- docs/source/refs.rst | 2 +- lymph/graph.py | 11 +++ lymph/models/bilateral.py | 117 ++++++++++++++---------- lymph/models/midline.py | 30 +++--- lymph/models/unilateral.py | 23 ++--- 10 files changed, 129 insertions(+), 84 deletions(-) diff --git a/docs/source/explanation.rst b/docs/source/explanation.rst index a279538..ebcf51b 100644 --- a/docs/source/explanation.rst +++ b/docs/source/explanation.rst @@ -1,3 +1,3 @@ .. include:: ../../README.rst :start-line: 18 - :end-line: 100 + :end-line: 80 diff --git a/docs/source/index.rst b/docs/source/index.rst index 8198175..b71784b 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -9,6 +9,7 @@ ---- + Documentation ============= @@ -21,6 +22,7 @@ Documentation api license + Index & search -------------- diff --git a/docs/source/install.rst b/docs/source/install.rst index da5b7f7..46721f5 100644 --- a/docs/source/install.rst +++ b/docs/source/install.rst @@ -1,12 +1,16 @@ Installation ============ -The easiest way to install it is via pip. Note that due to a name clash, on the python packaging index the package isn't called ``lymph``, but ``lymph-model``: +From PyPI +--------- + +The easiest way to install it is via pip from `PyPI `_. Note that due to a name clash, on the python packaging index the package isn't called ``lymph``, but ``lymph-model``: .. code-block:: bash pip install lymph-model + From Source ----------- @@ -14,10 +18,10 @@ To install the package from the source repository, start by cloning it. .. code-block:: bash - git clone https://github.com/rmnldwg/lymph.git + git clone https://github.com/rmnldwg/lymph cd lymph -From here you can either use `pip `_ +From here you can again use `pip `_ to install the local copy of the package. .. code-block:: bash diff --git a/docs/source/quickstart_bilateral.ipynb b/docs/source/quickstart_bilateral.ipynb index 930b084..ed0082a 100644 --- a/docs/source/quickstart_bilateral.ipynb +++ b/docs/source/quickstart_bilateral.ipynb @@ -17,10 +17,10 @@ "source": [ "graph_dict = {\n", " ('tumor', 'T') : ['I', 'II', 'III', 'IV'], \n", - " ('lnl' , 'I') : ['II'], \n", - " ('lnl' , 'II') : ['III'], \n", - " ('lnl' , 'III'): ['IV'], \n", - " ('lnl' , 'IV') : []\n", + " ('lnl' , 'I') : ['II'], \n", + " ('lnl' , 'II') : ['III'], \n", + " ('lnl' , 'III'): ['IV'], \n", + " ('lnl' , 'IV') : []\n", "}" ] }, @@ -179,7 +179,7 @@ "rng = np.random.default_rng(42)\n", "test_probabilities = {p: rng.random() for p in params_dict}\n", "\n", - "llh = model.likelihood(given_param_kwargs=test_probabilities, log=True)\n", + "llh = model.likelihood(given_params=test_probabilities, log=True)\n", "ipsi_llh = model.ipsi.likelihood(log=True)\n", "contra_llh = model.contra.likelihood(log=True)\n", "\n", diff --git a/docs/source/quickstart_unilateral.ipynb b/docs/source/quickstart_unilateral.ipynb index cb4f5d9..359b98f 100644 --- a/docs/source/quickstart_unilateral.ipynb +++ b/docs/source/quickstart_unilateral.ipynb @@ -328,9 +328,11 @@ "source": [ ":::{note}\n", "\n", - "Surprisingly, this is much slower than just implementing it \"by hand\" using numpy array functions. So, if you want to do sampling with a model, maybe don't use `scipy.stats`.\n", + "Surprisingly, this is much faster than just using the implementation from [scipy] using numpy array functions. So, if you want to do sampling with a model, maybe don't use `scipy.stats`.\n", ":::\n", "\n", + "[scipy]: https://scipy.org\n", + "\n", "And now we assign this parametric distribution to the model:" ] }, @@ -436,7 +438,7 @@ "metadata": {}, "outputs": [], "source": [ - "sampler.run_mcmc(initial, nsteps, progress=True);" + "sampler.run_mcmc(initial, nsteps, progress=False);" ] }, { diff --git a/docs/source/refs.rst b/docs/source/refs.rst index d4b18ba..5bab100 100644 --- a/docs/source/refs.rst +++ b/docs/source/refs.rst @@ -1,2 +1,2 @@ .. include:: ../../README.rst - :start-line: 100 + :start-line: 80 diff --git a/lymph/graph.py b/lymph/graph.py index 5fa61a3..f23cb43 100644 --- a/lymph/graph.py +++ b/lymph/graph.py @@ -312,8 +312,19 @@ def child(self, new_child: LymphNodeLevel) -> None: def get_name(self, middle='to') -> str: """Return the name of the edge. + An edge's name is simply the name of the parent node and the child node, + connected by the string provided via the ``middle`` argument. + This is used to identify and assign spread probabilities to it e.g. in the :py:class:`~models.Unilateral.set_params()` method and elsewhere. + + >>> lnl_II = LymphNodeLevel("II") + >>> lnl_III = LymphNodeLevel("III") + >>> edge = Edge(lnl_II, lnl_III) + >>> edge.get_name() + 'IItoIII' + >>> edge.get_name(middle='->') + 'II->III' """ if self.is_growth: return self.parent.name diff --git a/lymph/models/bilateral.py b/lymph/models/bilateral.py index d11a04c..32f111e 100644 --- a/lymph/models/bilateral.py +++ b/lymph/models/bilateral.py @@ -34,9 +34,8 @@ class Bilateral( See Also: :py:class:`~lymph.models.Unilateral` - Two instances of this class are created as attributes. - :py:class:`~lymph.descriptors.Distribution` - A class to store fixed and parametric distributions over diagnose times. + Two instances of this class are created as attributes. One for the ipsi- and + one for the contralateral side of the neck. """ def __init__( self, @@ -47,11 +46,11 @@ def __init__( contralateral_kwargs: dict[str, Any] | None = None, **_kwargs, ) -> None: - """Initialize both sides of the neck as :py:class:`~lymph.models.Unilateral`. + """Initialize both sides of the neck as :py:class:`.models.Unilateral`. The ``graph_dict`` is a dictionary of tuples as keys and lists of strings as - values. It is passed to both :py:class:`~lymph.models.Unilateral` instances, - which in turn pass it to the :py:class:`~lymph.graph.Representation` class that + values. It is passed to both :py:class:`.models.Unilateral` instances, + which in turn pass it to the :py:class:`.graph.Representation` class that stores the graph. With the dictionary ``is_symmetric`` the user can specify which aspects of the @@ -60,10 +59,10 @@ def __init__( Note: The symmetries of tumor and LNL spread are only guaranteed if the - respective parameters are set via the :py:meth:`~set_params()` method of + respective parameters are set via the :py:meth:`.set_params()` method of this bilateral model. It is still possible to set different parameters for the ipsi- and contralateral side by using their respective - :py:meth:`~lymph.models.Unilateral.set_params()` method. + :py:meth:`.Unilateral.set_params()` method. The ``unilateral_kwargs`` are passed to both instances of the unilateral model, while the ``ipsilateral_kwargs`` and ``contralateral_kwargs`` are passed to the @@ -123,14 +122,24 @@ def _init_models( @classmethod def binary(cls, *args, **kwargs) -> Bilateral: - """Initialize a binary bilateral model.""" + """Initialize a binary bilateral model. + + This is a convenience method that sets the ``allowed_states`` of the + ``unilateral_kwargs`` to ``[0, 1]``. All other ``args`` and ``kwargs`` are + passed to the :py:meth:`.__init__` method. + """ unilateral_kwargs = kwargs.pop("unilateral_kwargs", {}) unilateral_kwargs["allowed_states"] = [0, 1] return cls(*args, unilateral_kwargs=unilateral_kwargs, **kwargs) @classmethod def trinary(cls, *args, **kwargs) -> Bilateral: - """Initialize a trinary bilateral model.""" + """Initialize a trinary bilateral model. + + This is a convenience method that sets the ``allowed_states`` of the + ``unilateral_kwargs`` to ``[0, 1, 2]``. All other ``args`` and ``kwargs`` are + passed to the :py:meth:`.__init__` method. + """ unilateral_kwargs = kwargs.pop("unilateral_kwargs", {}) unilateral_kwargs["allowed_states"] = [0, 1, 2] return cls(*args, unilateral_kwargs=unilateral_kwargs, **kwargs) @@ -158,7 +167,13 @@ def get_tumor_spread_params( as_dict: bool = True, as_flat: bool = True, ) -> Iterable[float] | dict[str, float]: - """Return the parameters of the model's spread from tumor to LNLs.""" + """Return the parameters of the model's spread from tumor to LNLs. + + If the attribute dictionary :py:attr:`.is_symmetric` stores the key-value pair + ``"tumor_spread": True``, the parameters are returned as a single dictionary, + since they are the same ipsi- and contralaterally. Otherwise, the parameters + are returned as a dictionary with two keys, ``"ipsi"`` and ``"contra"``. + """ params = { "ipsi": self.ipsi.get_tumor_spread_params(as_flat=as_flat), "contra": self.contra.get_tumor_spread_params(as_flat=as_flat), @@ -184,7 +199,13 @@ def get_lnl_spread_params( as_dict: bool = True, as_flat: bool = True, ) -> Iterable[float] | dict[str, float]: - """Return the parameters of the model's spread from LNLs to tumor.""" + """Return the parameters of the model's spread from LNLs to tumor. + + Similarily to the :py:meth:`.get_tumor_spread_params` method, this returns only + one dictionary if the attribute dictionary :py:attr:`.is_symmetric` stores the + key-value pair ``"lnl_spread": True``. Otherwise, the parameters are returned + as a dictionary with two keys, ``"ipsi"`` and ``"contra"``. + """ params = { "ipsi": self.ipsi.get_lnl_spread_params(as_flat=as_flat), "contra": self.contra.get_lnl_spread_params(as_flat=as_flat), @@ -267,13 +288,13 @@ def get_params( ) -> Iterable[float] | dict[str, float]: """Return the parameters of the model. - It returns the combination of the call to the - :py:meth:`lymph.models.Unilateral.get_params` of the ipsi- and contralateral - side. For the use of the ``as_dict`` and ``as_flat`` arguments, see the - documentation of the :py:meth:`lymph.types.Model.get_params` method. + It returns the combination of the call to the :py:meth:`.Unilateral.get_params` + of the ipsi- and contralateral side. For the use of the ``as_dict`` and + ``as_flat`` arguments, see the documentation of the + :py:meth:`.types.Model.get_params` method. - Also see the :py:meth:`lymph.models.Bilateral.get_spread_params` method to - understand how the symmetry settings affect the return value. + Also see the :py:meth:`.get_spread_params` method to understand how the + symmetry settings affect the return value. """ params = self.get_spread_params(as_flat=as_flat) params.update(self.get_distribution_params(as_flat=as_flat)) @@ -336,9 +357,9 @@ def set_params(self, *args: float, **kwargs: float) -> tuple[float]: """Set new parameters to the model. This works almost exactly as the unilateral model's - :py:meth:`~lymph.models.Unilateral.set_params` method. However, this one - allows the user to set the parameters of individual sides of the neck by - prefixing the keyword arguments' names with ``"ipsi_"`` or ``"contra_"``. + :py:meth:`.Unilateral.set_params` method. However, this one allows the user to + set the parameters of individual sides of the neck by prefixing the keyword + arguments' names with ``"ipsi_"`` or ``"contra_"``. Anything not prefixed by ``"ipsi_"`` or ``"contra_"`` is passed to both sides of the neck. This does obviously not work with positional arguments. @@ -347,17 +368,21 @@ def set_params(self, *args: float, **kwargs: float) -> tuple[float]: important: 1. The parameters of the edges from tumor to LNLs: - 1. first the ipsilateral parameters, - 2. if ``is_symmetric["tumor_spread"]`` is ``False``, the contralateral - parameters. Otherwise, the ipsilateral parameters are used for both - sides. + + 1. first the ipsilateral parameters, + 2. if ``is_symmetric["tumor_spread"]`` is ``False``, the contralateral + parameters. Otherwise, the ipsilateral parameters are used for both + sides. + 2. The parameters of the edges from LNLs to tumor: - 1. again, first the ipsilateral parameters, - 2. if ``is_symmetric["lnl_spread"]`` is ``False``, the contralateral - parameters. Otherwise, the ipsilateral parameters are used for both - sides. + + 1. again, first the ipsilateral parameters, + 2. if ``is_symmetric["lnl_spread"]`` is ``False``, the contralateral + parameters. Otherwise, the ipsilateral parameters are used for both + sides. + 3. The parameters of the parametric distributions for marginalizing over - diagnose times. + diagnose times. When still some positional arguments remain after that, they are returned in a tuple. @@ -393,7 +418,7 @@ def comp_state_dist( ``t_stage``. See Also: - :py:meth:`lymph.models.Unilateral.comp_state_dist` + :py:meth:`.Unilateral.comp_state_dist` The corresponding unilateral function. Note that this method returns a 2D array, because it computes the probability of any possible combination of ipsi- and contralateral states. @@ -427,7 +452,7 @@ def comp_obs_dist( """Compute the joint distribution over the ipsi- & contralateral observations. See Also: - :py:meth:`lymph.models.Unilateral.comp_obs_dist` + :py:meth:`.Unilateral.comp_obs_dist` The corresponding unilateral function. Note that this method returns a 2D array, because it computes the probability of any possible combination of ipsi- and contralateral observations. @@ -508,7 +533,7 @@ def likelihood( ): """Compute the (log-)likelihood of the stored data given the model (and params). - See the documentation of :py:meth:`lymph.types.Model.likelihood` for more + See the documentation of :py:meth:`.types.Model.likelihood` for more information on how to use the ``given_params`` parameter. Returns the log-likelihood if ``log`` is set to ``True``. The ``mode`` parameter @@ -520,7 +545,7 @@ def likelihood( transition matrix does not need to be recomputed. See Also: - :py:meth:`lymph.models.Unilateral.likelihood` + :py:meth:`.Unilateral.likelihood` The corresponding unilateral function. """ try: @@ -552,8 +577,8 @@ def comp_posterior_joint_state_dist( ) -> np.ndarray: """Compute joint post. dist. over ipsi & contra states, ``given_diagnoses``. - The ``given_diagnoses`` is a dictionary storing a :py:class:`types.DiagnoseType` for - the ``"ipsi"`` and ``"contra"`` side of the neck. + The ``given_diagnoses`` is a dictionary storing a :py:class:`types.DiagnoseType` + for the ``"ipsi"`` and ``"contra"`` side of the neck. Essentially, this is the risk for any possible combination of ipsi- and contralateral involvement, given the provided diagnoses. @@ -563,7 +588,7 @@ def comp_posterior_joint_state_dist( transition matrix does not need to be recomputed. See Also: - :py:meth:`lymph.models.Unilateral.comp_posterior_state_dist` + :py:meth:`.Unilateral.comp_posterior_state_dist` """ if isinstance(given_params, dict): self.set_params(**given_params) @@ -606,19 +631,19 @@ def risk( ) -> float: """Compute risk of an ``involvement`` pattern, given parameters and diagnoses. - The parameters can be set via the ``given_params`` and - ``given_params``, both of which are passed to the - :py:meth:`~set_params` method. The ``given_diagnoses`` must be a dictionary - mapping the side of the neck to a :py:class:`types.DiagnoseType`. + The parameters can be set via the ``given_params`` and ``given_params``, both + of which are passed to the :py:meth:`.set_params` method. The + ``given_diagnoses`` must be a dictionary mapping the side of the neck to a + :py:class:`.types.DiagnoseType`. Note: The computation is much faster if no parameters are given, since then the transition matrix does not need to be recomputed. See Also: - :py:meth:`lymph.models.Unilateral.risk` + :py:meth:`.Unilateral.risk` The unilateral method for computing the risk of an involvment pattern. - :py:meth:`lymph.models.Bilateral.comp_posterior_joint_state_dist` + :py:meth:`.Bilateral.comp_posterior_joint_state_dist` This method computes the joint distribution over ipsi- and contralateral states, given the parameters and diagnoses. The risk then only marginalizes over the states that match the involvement pattern. @@ -660,11 +685,11 @@ def draw_patients( """Draw ``num`` random patients from the parametrized model. See Also: - :py:meth:`lymph.diagnose_times.Distribution.draw_diag_times` + :py:meth:`.diagnose_times.Distribution.draw_diag_times` Method to draw diagnose times from a distribution. - :py:meth:`lymph.models.Unilateral.draw_diagnoses` + :py:meth:`.Unilateral.draw_diagnoses` Method to draw individual diagnoses from a unilateral model. - :py:meth:`lymph.models.Unilateral.draw_patients` + :py:meth:`.Unilateral.draw_patients` The unilateral method to draw a synthetic dataset. """ if rng is None: diff --git a/lymph/models/midline.py b/lymph/models/midline.py index e9fbae0..93d476c 100644 --- a/lymph/models/midline.py +++ b/lymph/models/midline.py @@ -470,8 +470,8 @@ def set_params( ) -> Iterable[float] | dict[str, float]: """Set all parameters of the model. - Combines the calls to :py:meth:`set_spread_params` and - :py:meth:`set_distribution_params`. + Combines the calls to :py:meth:`.set_spread_params` and + :py:meth:`.set_distribution_params`. """ args = self.set_spread_params(*args, **kwargs) first, args = popfirst(args) @@ -487,19 +487,20 @@ def load_patient_data( """Load patient data into the model. This amounts to sorting the patients into three bins: + 1. Patients whose tumor is clearly laterlaized, meaning the column - ``("tumor", "1", "extension")`` reports ``False``. These get assigned to - the :py:attr:`noext` attribute. + ``("tumor", "1", "extension")`` reports ``False``. These get assigned to + the :py:attr:`.noext` attribute. 2. Those with a central tumor, indicated by ``True`` in the column - ``("tumor", "1", "central")``. If the :py:attr:`use_central` attribute is - set to ``True``, these patients are assigned to the :py:attr:`central` - model. Otherwise, they are assigned to the :py:attr:`ext` model. + ``("tumor", "1", "central")``. If the :py:attr:`.use_central` attribute is + set to ``True``, these patients are assigned to the :py:attr:`.central` + model. Otherwise, they are assigned to the :py:attr:`.ext` model. 3. The rest, which amounts to patients whose tumor extends over the mid-sagittal - line but is not central, i.e., symmetric w.r.t to the mid-sagittal line. - These are assigned to the :py:attr:`ext` model. + line but is not central, i.e., symmetric w.r.t to the mid-sagittal line. + These are assigned to the :py:attr:`.ext` model. - The split data is sent to the :py:meth:`lymph.models.Bilateral.load_patient_data` - method of the respective models. + The split data is sent to the :py:meth:`.Bilateral.load_patient_data` method of + the respective models. """ # pylint: disable=singleton-comparison is_lateralized = patient_data[EXT_COL] == False @@ -523,7 +524,6 @@ def load_patient_data( ) - def comp_midext_evolution(self) -> np.ndarray: """Evolve only the state of the midline extension.""" midext_states = np.zeros(shape=(self.max_time + 1, 2), dtype=float) @@ -637,11 +637,11 @@ def likelihood( ) -> float: """Compute the (log-)likelihood of the stored data given the model (and params). - See the documentation of :py:meth:`lymph.types.Model.likelihood` for more + See the documentation of :py:meth:`.types.Model.likelihood` for more information on how to use the ``given_params`` parameter. Returns the log-likelihood if ``log`` is set to ``True``. Note that in contrast - to the :py:class:`~.Bilateral` model, the midline model does not support the + to the :py:class:`.Bilateral` model, the midline model does not support the Bayesian network mode. Note: @@ -649,7 +649,7 @@ def likelihood( transition matrix does not need to be recomputed. See Also: - :py:meth:`lymph.models.Unilateral.likelihood` + :py:meth:`.Unilateral.likelihood` The corresponding unilateral function. """ try: diff --git a/lymph/models/unilateral.py b/lymph/models/unilateral.py index 006bc7c..247bcce 100644 --- a/lymph/models/unilateral.py +++ b/lymph/models/unilateral.py @@ -336,8 +336,8 @@ def comp_diagnose_prob( def obs_list(self): """Return the list of all possible observations. - They are ordered the same way as the :py:attr:`~Unilateral.state_list`, but - additionally by modality. E.g., for two LNLs II, III and two modalities CT, + They are ordered the same way as the :py:attr:`.graph.Representation.state_list`, + but additionally by modality. E.g., for two LNLs II, III and two modalities CT, pathology, the list would look like this: >>> model = Unilateral(graph_dict={ @@ -378,7 +378,7 @@ def transition_matrix(self) -> np.ndarray: :math:`2^N \\times 2^N` where :math:`N` is the number of nodes in the graph. The :math:`i`-th row and :math:`j`-th column encodes the probability to transition from the :math:`i`-th state to the :math:`j`-th state. The states - are ordered as in the :py:attr:`lymph.graph.state_list`. + are ordered as in the :py:attr:`.graph.Representation.state_list`. See Also: :py:func:`~lymph.descriptors.matrix.generate_transition` @@ -528,11 +528,11 @@ def patient_data_hash(self) -> int: def patient_data(self) -> pd.DataFrame: """Return the patient data loaded into the model. - After succesfully loading the data with the method :py:meth:`load_patient_data`, + After succesfully loading the data with the method :py:meth:`.load_patient_data`, the copied patient data now contains the additional top-level header ``"_model"``. Under it, the observed per LNL involvement is listed for every - diagnostic modality in the dictionary :py:attr:`~modalities` and for each of - the LNLs in the list :py:attr:`~lnls`. + diagnostic modality in the dictionary returned by :py:meth:`.get_all_modalities` + and for each of the LNLs in the list :py:attr:`.graph.Representation.lnls`. It also contains information on the patient's T-stage under the header ``("_model", "#", "t_stage")``. @@ -566,7 +566,7 @@ def comp_dist_evolution(self) -> np.ndarray: Note that at this point, the distributions are not weighted with the distribution over diagnose times that are stored and managed for each T-stage - in the dictionary :py:attr:`~diag_time_dists`. + in the dictionary returned by :py:meth:`.get_all_distributions`. """ state_dists = np.zeros(shape=(self.max_time + 1, len(self.graph.state_list))) state_dists[0, 0] = 1. @@ -582,9 +582,9 @@ def comp_state_dist(self, t_stage: str = "early", mode: Literal["HMM", "BN"] = " Do this either for a given ``t_stage``, when ``mode`` is set to ``"HMM"``, which is essentially a marginalization of the evolution over the possible - states as computed by :py:meth:`~comp_dist_evolution` with the distribution - over diagnose times for the given T-stage from the dictionary - :py:attr:`~diag_time_dists`. + states as computed by :py:meth:`.comp_dist_evolution` with the distribution + over diagnose times for the given T-stage from the dictionary returned by + :py:meth:`.get_all_dsitributions`. Or, when ``mode`` is set to ``"BN"``, compute the distribution over states for the Bayesian network. In that case, the ``t_stage`` parameter is ignored. @@ -870,7 +870,8 @@ def draw_patients( For this, a ``stage_dist``, i.e., a distribution over the T-stages, needs to be defined. This must be an iterable of probabilities with as many elements as - there are defined T-stages in the model's :py:attr:`diag_time_dists` attribute. + there are defined T-stages in the model (accessible via + :py:meth:`.get_all_distributions`). A random number generator can be provided as ``rng``. If ``None``, a new one is initialized with the given ``seed`` (or ``42``, by default). From f541de5fe705764a83551902bc7bea748c8fbc52 Mon Sep 17 00:00:00 2001 From: rmnldwg <48687784+rmnldwg@users.noreply.github.com> Date: Thu, 29 Feb 2024 14:45:19 +0100 Subject: [PATCH 70/75] docs: more fixes & improvements of docstrings --- docs/source/components.rst | 4 ++-- lymph/diagnose_times.py | 10 ++++++++-- lymph/helper.py | 37 ++++++++++--------------------------- lymph/matrix.py | 9 ++++----- lymph/modalities.py | 19 +++++++++++++------ 5 files changed, 37 insertions(+), 42 deletions(-) diff --git a/docs/source/components.rst b/docs/source/components.rst index f63e2a8..c392ab5 100644 --- a/docs/source/components.rst +++ b/docs/source/components.rst @@ -14,7 +14,7 @@ Diagnostic Modalities .. automodule:: lymph.modalities :members: - :special-members: __init__ + :special-members: __init__, __hash__ :show-inheritance: @@ -23,7 +23,7 @@ Marginalization over Diagnose Times .. automodule:: lymph.diagnose_times :members: - :special-members: __init__ + :special-members: __init__, __hash__ :show-inheritance: Matrices diff --git a/lymph/diagnose_times.py b/lymph/diagnose_times.py index bd127a8..67096c6 100644 --- a/lymph/diagnose_times.py +++ b/lymph/diagnose_times.py @@ -150,6 +150,12 @@ def __len__(self) -> int: return len(self.support) def __hash__(self) -> int: + """Return a hash of the distribution. + + This is computed from the stored frozen distribution and -- if + :py:meth:`.is_updateable` returns ``True`` -- the stored keyword arguments of + the parametric distribution. + """ kwarg_tpl = tuple(self._kwargs.items()) return hash((self.is_updateable, kwarg_tpl, self.pmf.tobytes())) @@ -376,8 +382,8 @@ def get_all_distributions(self: DC) -> dict[str, Distribution]: This will issue a warning if it finds that not all distributions of the composite are equal. Note that it will always return the distributions of the first child. This means one should NOT try to set the distributions via the - returned dictionary of this method. Instead, use the :py:meth:`set_modality` - method. + returned dictionary of this method. Instead, use the + :py:meth:`.set_distribution` method. """ if self._is_distribution_leaf: return self._distributions diff --git a/lymph/helper.py b/lymph/helper.py index c444f83..8096ea0 100644 --- a/lymph/helper.py +++ b/lymph/helper.py @@ -33,23 +33,15 @@ def check_unique_names(graph: dict): def check_spsn(spsn: list[float]): - """Private method that checks whether specificity and sensitvity - are valid. - - Args: - spsn (list): list with specificity and sensiticity - - Raises: - ValueError: raises a value error if the spec or sens is not a number btw. 0.5 and 1.0 - """ + """Check whether specificity and sensitivity are valid.""" has_len_2 = len(spsn) == 2 is_above_lb = np.all(np.greater_equal(spsn, 0.5)) is_below_ub = np.all(np.less_equal(spsn, 1.)) if not has_len_2 or not is_above_lb or not is_below_ub: - msg = ("For each modality provide a list of two decimals " - "between 0.5 and 1.0 as specificity & sensitivity " - "respectively.") - raise ValueError(msg) + raise ValueError( + "For each modality provide a list of two decimals between 0.5 and 1.0 as " + "specificity & sensitivity respectively." + ) @lru_cache @@ -105,14 +97,9 @@ def comp_transition_tensor( def clinical(spsn: list) -> np.ndarray: - """produces the confusion matrix of a clinical modality, i.e. a modality - that can not detect microscopic metastases + """Produce the confusion matrix of a clinical modality. - Args: - spsn (list): list with specificity and sensitivity of modality - - Returns: - np.ndarray: confusion matrix of modality + A clinical modality can by definition *not* detect microscopic metastases. """ check_spsn(spsn) sp, sn = spsn @@ -125,14 +112,10 @@ def clinical(spsn: list) -> np.ndarray: def pathological(spsn: list) -> np.ndarray: - """produces the confusion matrix of a pathological modality, i.e. a modality - that can detect microscopic metastases - - Args: - spsn (list): list with specificity and sensitivity of modality + """Produce the confusion matrix of a pathological modality. - Returns: - np.ndarray: confusion matrix of modality + A pathological modality can detect microscopic disease, but is unable to + differentiante between micro- and macroscopic involvement. """ check_spsn(spsn) sp, sn = spsn diff --git a/lymph/matrix.py b/lymph/matrix.py index f4a591e..d2d066d 100644 --- a/lymph/matrix.py +++ b/lymph/matrix.py @@ -283,10 +283,9 @@ def generate_diagnose(model: models.Unilateral, t_stage: str) -> np.ndarray: The decorated function expects an additional first argument that should be unique for the combination of modalities and patient data. It is intended to be used with the -joint hash of the modalities -(:py:meth:`~modalities.Composite.modalities_hash`) and the -patient data hash that is always precomputed when a new dataset is loaded into the -model (:py:meth:`~lymph.models.Unilateral.patient_data_hash`). +joint hash of the modalities (:py:meth:`.modalities_hash`) and the patient data hash +that is always precomputed when a new dataset is loaded into the model +(:py:meth:`~lymph.models.Unilateral.patient_data_hash`). """ @@ -300,7 +299,7 @@ class DiagnoseUserDict(AbstractLookupDict): the patient data (meaning the data matrix needs to be updated) change. See Also: - :py:attr:`~lymph.models.Unilateral.diagnose_matrices` + :py:attr:`.Unilateral.diagnose_matrices` """ model: models.Unilateral diff --git a/lymph/modalities.py b/lymph/modalities.py index cc3f0f5..0d628ae 100644 --- a/lymph/modalities.py +++ b/lymph/modalities.py @@ -32,6 +32,10 @@ def __init__( def __hash__(self) -> int: + """Return a hash of the modality. + + This is computed from the confusion matrix of the modality. + """ return hash(self.confusion_matrix.tobytes()) @@ -199,7 +203,7 @@ def is_trinary(self: MC) -> bool: def get_modality(self: MC, name: str) -> Modality: - """Return the modality with the given name.""" + """Return the modality with the given ``name``.""" return self.get_all_modalities()[name] @@ -209,7 +213,7 @@ def get_all_modalities(self: MC) -> dict[str, Modality]: This will issue a warning if it finds that not all modalities of the composite are equal. Note that it will always return the modalities of the first child. This means one should NOT try to set the modalities via the returned dictionary - of this method. Instead, use the :py:meth:`set_modality` method. + of this method. Instead, use the :py:meth:`.set_modality` method. """ if self._is_modality_leaf: return self._modalities @@ -235,7 +239,7 @@ def set_modality( sens: float, kind: Literal["clinical", "pathological"] = "clinical", ) -> None: - """Set the modality with the given name.""" + """Set the modality with the given ``name``.""" if self._is_modality_leaf: cls = Pathological if kind == "pathological" else Clinical self._modalities[name] = cls(spec, sens, self.is_trinary) @@ -246,7 +250,7 @@ def set_modality( def del_modality(self: MC, name: str) -> None: - """Delete the modality with the given name.""" + """Delete the modality with the given ``name``.""" if self._is_modality_leaf: del self._modalities[name] @@ -256,7 +260,7 @@ def del_modality(self: MC, name: str) -> None: def replace_all_modalities(self: MC, modalities: dict[str, Modality]) -> None: - """Replace all modalities of the composite.""" + """Replace all modalities of the composite with new ``modalities``.""" if self._is_modality_leaf: self.clear_modalities() for name, modality in modalities.items(): @@ -269,7 +273,10 @@ def replace_all_modalities(self: MC, modalities: dict[str, Modality]) -> None: def modalities_hash(self: MC) -> int: - """Compute a hash from all modalities.""" + """Compute a hash from all stored modalities. + + See the :py:meth:`.Modality.__hash__` method for more information. + """ hash_res = 0 if self._is_modality_leaf: for name, modality in self._modalities.items(): From 87d638c67f1730d8293e04792fd422c7d38a25e7 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Mon, 4 Mar 2024 11:58:42 +0100 Subject: [PATCH 71/75] change(types): abstract methods raise error --- lymph/models/unilateral.py | 1 - lymph/types.py | 22 ++++++++++++++++++++-- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/lymph/models/unilateral.py b/lymph/models/unilateral.py index 247bcce..95d2d31 100644 --- a/lymph/models/unilateral.py +++ b/lymph/models/unilateral.py @@ -774,7 +774,6 @@ def risk( given_diagnoses: dict[str, types.PatternType] | None = None, t_stage: str = "early", mode: Literal["HMM", "BN"] = "HMM", - **_kwargs, ) -> float | np.ndarray: """Compute risk of a certain involvement, given a patient's diagnosis. diff --git a/lymph/types.py b/lymph/types.py index 1995ff3..b7f0f54 100644 --- a/lymph/types.py +++ b/lymph/types.py @@ -26,10 +26,23 @@ def get_params( PatternType = dict[str, bool | NAType | None] -"""Type alias for an involvement pattern.""" +"""Type alias for an involvement pattern. + +An involvement pattern is a dictionary with keys for the lymph node levels and values +for the involvement of the respective lymph nodes. The values are either True, False, +or None, which means that the involvement is unknown. + +>>> pattern = {"I": True, "II": False, "III": None} +""" DiagnoseType = dict[str, PatternType] -"""Type alias for a diagnose, which is a involvement pattern per diagnostic modality.""" +"""Type alias for a diagnose, which is an involvement pattern per diagnostic modality. + +>>> diagnose = { +... "CT": {"I": True, "II": False, "III": None}, +... "MRI": {"I": True, "II": True, "III": None}, +... } +""" M = TypeVar("M", bound="Model") @@ -54,6 +67,7 @@ def get_params( the ``get_params`` method of other instances, which can be fused to get a flat dictionary. """ + raise NotImplementedError def get_num_dims(self: M, mode: Literal["HMM", "BN"] = "HMM") -> int: """Return the number of dimensions of the parameter space. @@ -77,6 +91,7 @@ def set_params(self: M, *args: float, **kwargs: float) -> tuple[float]: arguments are used up one by one by the ``set_params`` methods the model calls. Keyword arguments override the positional arguments. """ + raise NotImplementedError @abstractmethod def load_patient_data( @@ -87,6 +102,7 @@ def load_patient_data( .. _LyProX: https://lyprox.org/ """ + raise NotImplementedError @abstractmethod def likelihood( @@ -100,6 +116,7 @@ def likelihood( otherwise. The parameters may be passed as positional or keyword arguments. They are then passed to the :py:meth:`set_params` method first. """ + raise NotImplementedError @abstractmethod def risk( @@ -109,3 +126,4 @@ def risk( given_diagnoses: dict[str, PatternType] | None = None, ) -> float | np.ndarray: """Return the risk of ``involvement``, given the parameters and diagnoses.""" + raise NotImplementedError From 6a60059f96e41a214859fcf8c58221cfedbbe509 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Mon, 4 Mar 2024 12:16:18 +0100 Subject: [PATCH 72/75] chore: update changelog --- CHANGELOG.md | 168 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 167 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7737935..c1707ea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,170 @@ All notable changes to this project will be documented in this file. + +## [1.0.0.rc1] - 2024-03-04 + +This release hopefully represents the last major change before releasing version 1.0.0. It was necessary because during the implementation of the midline model, managing the symmetries in a transparent and user-friendly way became impossible in the old implementation. + +Now, a [composite pattern] is used for both the modalities and the distributions over diagnose times. This furhter separates the logic and will allow more hierarchical models based on the ones provided here to work seamlessly almost out of the box. This may become relevant with the mixture model. + +[composite pattern]: https://refactoring.guru/design-patterns/composite + + +### Add + +- First version of midline module added. + +### Bug Fixes + +- (**diag**) Delete frozen distributions when params change. +- (**diag**) Correct max time & params.\ + The `max_time` is now correctly accessed and set. Also, the distribution + params are not used up by synched distributions, but only by the + distributions in composite leafs. +- (**graph**) Avoid warning for micro mod setting. +- ⚠ **BREAKING** Make likelihood work with emcee again.\ + The way the likelihood was defined, it did not actually play nicely with + how the emcee package works. This is now fixed. +- (**bi**) Fix uninitialized `is_symmetric` dict. +- (**mid**) Add missing dict in init. +- (**mid**) Update call to `transition_matrix()` & `state_list`. +- (**mid**) Finish `draw_patients` method.\ + Some bugs in the method for drawing synthetic patients from the + `Midline` were fixed. This seems to be working now. + + +### Documentation + +- (**mid**) Improve midline docstrings slightly. +- Go over `set_params()` docstrings. +- Update quickstart guide to new API. +- Adapt tests to new API (now passing). +- Update index & fix some docstrings. +- Fix some typos and cross-references. + +### Features + +- (**helper**) Add `popfirst()` and `flatten()`.\ + Two new helper function in relation to getting and setting params. +- (**type**) Add model ABC to inherit from.\ + I added an abstract base class from which all model-like classes should + inherit. It defines all the methods that need to be present in a model.\ + The idea behind this is that any subclass of this can be part of a + composite that correctly delegates getting/setting parameters, + diagnose time distributions, and modalities. +- ⚠ **BREAKING** (**graph**) Add `__hash__` to edge, node, graph.\ + This replaces the dedicated `parameter_hash()` method. +- (**mod**) Add method to delete modality `del_modality()`. +- Add more get/set params methods. +- (**mid**) Implement `set_params`. +- (**mid**) Implement the `load_patient_data` meth. +- (**mid**) Finish midline (feature complete). +- Complete set/get methods on model classes.\ + The `Unilateral`, `Bilateral`, and `Midline` model now all have the six + methods `set_tumor_spread_params`, `set_lnl_spread_params`, + `set_spread_params`, `set_params`, `get_tumor_spread_params`, + `get_lnl_spread_params`, `get_spread_params`, and `get_params`. +- (**mid**) Reimplement the midline evolution.\ + The midline evolution that Lars Widmer worked on is now reimplemented. + However, although this implementation is analogous to the one used in + previsou version of the code and should thus work, it is still untested + at this point. +- Add helper to draw diagnoses.\ + The new helper function`draw_diagnoses` is a re-implementation of the + `Unilateral` class's method with the same name for easier reusing. +- (**mid**) Allow marginalization over unknown midline extension.\ + This is implemented differently than before: If data with unknown + midline extension is added, it gets loaded into an attribute named + `unknown`, which is a `Bilateral` model only used to store that data and + generate diagnose matrices. + + +### Miscellaneous Tasks + +- Move timing data. +- Make changelog super detailed. + +### Refactor + +- (**mid**) Split likelihood method. + +### Testing + +- Fix long-running test. +- Add integration tests with emcee. +- Add checks for bilateral symmetries. +- (**mid**) Add first check of `set_params()` method. +- (**mid**) Check likelihood function. + +### Add + +- Added doc strings. + +### Change + +- Non-mixture midline implemented.\ + fixed the non mixture midline extension model and added documentation +- ⚠ **BREAKING** Make `get_params()` uniform and chainable.\ + The API of all `get_params()` methods is now nice and uniform, allowing + arbitrary chaining of these methods. +- ⚠ **BREAKING** Make `set_params()` uniform and chainable.\ + The API of all `set_params()` methods is now nice and uniform, + allowing arbitrary chaining of these methods. +- ⚠ **BREAKING** Make `set_params()` not return kwargs.\ + It does make sense to "use up" the positional arguments one by one in + the `set_params()` methods, but doing the same thing with keyword + arguments is pointless, difficult and error prone. +- ⚠ **BREAKING** (**graph**) Replace `name` with `get_name()`.\ + In the `Edge` class, the `name` property is replaced by a function + `get_name()` that is more flexible and allows us to have edge names + without underscores when we need it. +- ⚠ **BREAKING** (**bi**) Reintroduce `is_symmetric` attribute.\ + This will once again manage the symmetry of the `Bilateral` class's + different ipsi- and contralateral attributes. +- ⚠ **BREAKING** (**diag**) Use composite for distributions.\ + Instead of a dict that holds the T-stages and corresponding + distributions over diagnose times, this implements them as a composite + pattern. This replaces the dict-like API entirely with methods. This has + several advantages: + 1. It is more explicit and thus more readable + 2. The composite pattern is designed to work naturally with tree-like + structures, which we have here when dealing with bilateral models. +- ⚠ **BREAKING** (**mod**) Use composite for modalities.\ + Instead of a dict that holds the names and corresponding + sens/spec for diagnostic modalities, this implements them as a composite + pattern. This replaces the dict-like API entirely with methods. This has + several advantages: + 1. It is more explicit and thus more readable + 2. The composite pattern is designed to work naturally with tree-like + structures, which we have here when dealing with bilateral models. +- ⚠ **BREAKING** (**uni**) Transform to composite pattern.\ + Use the new composite pattern for the distribution over diagnose times + and modalities. +- (**bi**) Update for new composite API. +- ⚠ **BREAKING** (**mod**) Shorten to sens/spec.\ + Also, add a `clear_modalities()` and a `clear_distributions()` method to + the respective composites. +- (**matrix**) Use hashables over arg0 cache.\ + Instead of using this weird `arg0_cache` for the observation and + transition matrix, I use the necessary arguments only, which are all + hashable now. +- ⚠ **BREAKING** Adapt risk to likelihood call signature. +- (**type**) Add risk to abstract methods. +- (**type**) Abstract methods raise error. + +### Merge + +- Branch 'yoel-dev' into 'dev'. +- Branch '74-synchronization-is-unreadable-and-error-prone' into 'dev'. Fixes [#74]. +- Branch 'main' into 'dev'. +- Branch 'add-midext-evolution' into 'dev'. + +### Remove + +- Unused helper functions. + + ## [1.0.0.a6] - 2024-02-15 @@ -326,7 +490,8 @@ Almost the entire API has changed. I'd therefore recommend to have a look at the - add pre-commit hook to check commit msg -[Unreleased]: https://github.com/rmnldwg/lymph/compare/1.0.0.a6...HEAD +[Unreleased]: https://github.com/rmnldwg/lymph/compare/1.0.0.rc1...HEAD +[1.0.0.rc1]: https://github.com/rmnldwg/lymph/compare/1.0.0.a6...1.0.0.rc1 [1.0.0.a6]: https://github.com/rmnldwg/lymph/compare/1.0.0.a5...1.0.0.a6 [1.0.0.a5]: https://github.com/rmnldwg/lymph/compare/1.0.0.a4...1.0.0.a5 [1.0.0.a4]: https://github.com/rmnldwg/lymph/compare/1.0.0.a3...1.0.0.a4 @@ -339,6 +504,7 @@ Almost the entire API has changed. I'd therefore recommend to have a look at the [0.4.1]: https://github.com/rmnldwg/lymph/compare/0.4.0...0.4.1 [0.4.0]: https://github.com/rmnldwg/lymph/compare/0.3.10...0.4.0 +[#74]: https://github.com/rmnldwg/lymph/issues/74 [#72]: https://github.com/rmnldwg/lymph/issues/72 [#69]: https://github.com/rmnldwg/lymph/issues/69 [#68]: https://github.com/rmnldwg/lymph/issues/68 From c0206609258b7602eb7f3c6eeabc785b6bd483bc Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Mon, 4 Mar 2024 14:23:50 +0100 Subject: [PATCH 73/75] docs: add CSS for nicer signatures The parameters of call signatures now each get their own line, making it much more readable. --- docs/source/_static/css/custom.css | 11 +++++++++++ docs/source/conf.py | 5 ++++- docs/source/types.rst | 2 +- 3 files changed, 16 insertions(+), 2 deletions(-) create mode 100644 docs/source/_static/css/custom.css diff --git a/docs/source/_static/css/custom.css b/docs/source/_static/css/custom.css new file mode 100644 index 0000000..4e87d9c --- /dev/null +++ b/docs/source/_static/css/custom.css @@ -0,0 +1,11 @@ +/* Indent by four spaces */ +.sig-param::before { + content: "\a\20\20\20\20"; + white-space: pre; +} + +/* Don't indent closing bracket */ +dt em.sig-param:last-of-type::after { + content: "\a"; + white-space: pre; +} diff --git a/docs/source/conf.py b/docs/source/conf.py index f2d3b89..c32e97f 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -93,4 +93,7 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -# html_static_path = ['./_static'] +html_static_path = ['./_static'] +html_css_files = [ + "css/custom.css", +] diff --git a/docs/source/types.rst b/docs/source/types.rst index ae11573..073910a 100644 --- a/docs/source/types.rst +++ b/docs/source/types.rst @@ -8,4 +8,4 @@ Types .. automodule:: lymph.types :members: - :special-members: __init__ + :show-inheritance: From 81dc493a1751267b5a8eb6acf870e594056b6839 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Mon, 4 Mar 2024 14:30:28 +0100 Subject: [PATCH 74/75] ci: change CWD of unittest run --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index d958416..46bccc3 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -35,7 +35,7 @@ jobs: python3 -m pip install .[test] - name: Run tests run: | - coverage run --omit=tests/*,*/_*.py -m unittest discover -v -p *_test.py ./tests + coverage run --omit=tests/*,*/_*.py -m unittest discover -v -p *_test.py . coverage xml - name: Upload to codecov uses: codecov/codecov-action@v3 From 0fee476bed08a816e80c258834fecb7d1a990dae Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Mon, 4 Mar 2024 14:33:41 +0100 Subject: [PATCH 75/75] chore: add emcee to test dependencies --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 27c8fd9..6d8e8dd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,6 +37,7 @@ dynamic = ["version"] test = [ "scipy < 2", "coverage < 8", + "emcee < 4", ] dev = [ "pre-commit",