From 64b86a0bb6dde92027da70d8be13ac7c4b5258d4 Mon Sep 17 00:00:00 2001
From: Margaret Duff <margaret.duff@stfc.ac.uk>
Date: Thu, 22 Aug 2024 15:09:49 +0000
Subject: [PATCH] Documentation updates and unit test fix

---
 .../cil/optimisation/algorithms/SPDHG.py      | 115 +++++++-----------
 Wrappers/Python/test/test_algorithms.py       |  62 +++++-----
 2 files changed, 80 insertions(+), 97 deletions(-)

diff --git a/Wrappers/Python/cil/optimisation/algorithms/SPDHG.py b/Wrappers/Python/cil/optimisation/algorithms/SPDHG.py
index 62710eac9e..9bd9258f86 100644
--- a/Wrappers/Python/cil/optimisation/algorithms/SPDHG.py
+++ b/Wrappers/Python/cil/optimisation/algorithms/SPDHG.py
@@ -24,15 +24,18 @@
 from cil.optimisation.utilities import Sampler
 from numbers import Number
 import warnings
+from cil.framework import BlockDataContainer
 
 log = logging.getLogger(__name__)
 
 
 class SPDHG(Algorithm):
     r'''Stochastic Primal Dual Hybrid Gradient (SPDHG) solves separable optimisation problems of the type: 
+    .. math::
 
-    Problem:
-    .. math:: \min_{x} f(Kx) + g(x) = \min_{x} \sum f_i(K_i x) + g(x)
+      \min_{x} f(Kx) + g(x) = \min_{x} \sum f_i(K_i x) + g(x)
+
+    where :math:`f_i` and the regulariser :math:`g` need to be proper, convex and lower semi-continuous.
 
     Parameters
     ----------
@@ -42,11 +45,11 @@ class SPDHG(Algorithm):
         A convex function with a "simple" proximal
     operator : BlockOperator
         BlockOperator must contain Linear Operators
-    tau : positive float, optional, default=None
-        Step size parameter for Primal problem
-    sigma : list of positive float, optional, default=None
-        List of Step size parameters for Dual problem
-    initial : DataContainer, optional, default=None
+    tau : positive float, optional, default= see note
+        Step size parameter for primal problem
+    sigma : list of positive float, optional, default= see note
+        List of Step size parameters for dual problem
+    initial : DataContainer, optional, default to a zero DataContainer in the range of the `operator`.
         Initial point for the SPDHG algorithm
     gamma : float, optional
             Parameter controlling the trade-off between the primal and dual step sizes
@@ -56,13 +59,6 @@ class SPDHG(Algorithm):
             Consider that the sampler is called a large number of times this argument holds the expected number of times each index would be called,  normalised to 1. Note that this should not be passed if the provided sampler has it as an attribute. 
 
 
-    **kwargs
-    ---------
-    prob : list of floats, optional, default=None
-        List of probabilities. If None each subset will have probability = 1/number of subsets. To be deprecated.
-    norms : list of floats
-        Precalculated list of norms of the operators. To be deprecated and placed by the `set_norms` functionalist in a BlockOperator.
-
 
     Example
     -------
@@ -95,17 +91,17 @@ class SPDHG(Algorithm):
 
     - Case 1: If neither `sigma` or `tau` are provided then `sigma` is set using the formula:
 
-    .. math:: \sigma_i=0.99 / (\|K_i\|**2)
+    .. math:: \sigma_i= \frac{0.99}{\|K_i\|^2}
 
     and `tau` is set as per case 2
 
     - Case 2: If `sigma` is provided but not `tau` then `tau` is calculated using the formula 
 
-    .. math:: \tau = 0.99\min_i([p_i / (\sigma_i * \|K_i\|**2) ])
+    .. math:: \tau = 0.99\min_i( \frac{p_i}{ (\sigma_i  \|K_i\|^2) })
 
     - Case 3: If `tau` is provided but not `sigma` then `sigma` is calculated using the formula
 
-    .. math:: \sigma_i=0.99 p_i / (\tau*\|K_i\|**2)
+    .. math:: \sigma_i= \frac{0.99 p_i}{\tau\|K_i\|^2}
 
     - Case 4: Both `sigma` and `tau` are provided.
 
@@ -115,7 +111,7 @@ class SPDHG(Algorithm):
 
     Convergence is guaranteed provided that [2, eq. (12)]:
 
-    .. math:: \|\sigma[i]^{1/2} * K[i] * tau^{1/2} \|^2  < p_i for all i
+    .. math:: \|\sigma[i]^{1/2}  K[i]  \tau^{1/2} \|^2  < p_i \text{ for all } i
 
     References
     ----------
@@ -133,42 +129,19 @@ class SPDHG(Algorithm):
     def __init__(self, f=None, g=None, operator=None, tau=None, sigma=None,
                  initial=None, sampler=None, prob_weights=None,   **kwargs):
 
-
         update_objective_interval = kwargs.pop('update_objective_interval', 1)
-        super(SPDHG, self).__init__(update_objective_interval=update_objective_interval)
+        super(SPDHG, self).__init__(
+            update_objective_interval=update_objective_interval)
 
         self.set_up(f=f, g=g, operator=operator, sigma=sigma, tau=tau,
                     initial=initial,  sampler=sampler, prob_weights=prob_weights,  **kwargs)
 
     def set_up(self, f, g, operator, sigma=None, tau=None,
                initial=None,   sampler=None, prob_weights=None, **deprecated_kwargs):
-
         '''set-up of the algorithm
-        
-        Parameters
-        ----------
-        f : BlockFunction
-            Each must be a convex function with a "simple" proximal method of its conjugate
-        g : Function
-            A convex function with a "simple" proximal
-        operator : BlockOperator
-            BlockOperator must contain Linear Operators
-        tau : positive float, optional, default=None
-            Step size parameter for Primal problem
-        sigma : list of positive float, optional, default=None
-            List of Step size parameters for Dual problem
-        initial : DataContainer, optional, default=None
-            Initial point for the SPDHG algorithm
-        gamma : float, optional
-            parameter controlling the trade-off between the primal and dual step sizes
-        sampler: optional, an instance of a `cil.optimisation.utilities.Sampler` class or another class with the function __next__(self) implemented outputting a sample from {1,...,len(operator)}. 
-             Method of selecting the next index for the SPDHG update. If None, a sampler will be created for random sampling  with replacement and each index will have probability = 1/len(operator)
-        prob_weights: optional, list of floats of length num_indices that sum to 1. Defaults to [1/len(operator)]*len(operator)
-            Consider that the sampler is called a large number of times this argument holds the expected number of times each index would be called,  normalised to 1. Note that this should not be passed if the provided sampler has it as an attribute. 
-
         '''
         log.info("%s setting up", self.__class__.__name__)
-    
+
         # algorithmic parameters
         self.f = f
         self.g = g
@@ -211,13 +184,15 @@ def set_up(self, f, g, operator, sigma=None, tau=None,
 
         # initialize dual variable to 0
         self._y_old = operator.range_geometry().allocate(0)
+        if not isinstance(self._y_old, BlockDataContainer): #This can be removed once #1863 is fixed
+            self._y_old =BlockDataContainer(self._y_old)
 
         # initialize variable z corresponding to back-projected dual variable
         self._z = operator.domain_geometry().allocate(0)
         self._zbar = operator.domain_geometry().allocate(0)
         # relaxation parameter
         self._theta = 1
-        
+
         self.configured = True
         logging.info("{} configured".format(self.__class__.__name__, ))
 
@@ -239,7 +214,7 @@ def _deprecated_kwargs(self, deprecated_kwargs):
 
         if prob is not None:
             if self._prob_weights is None:
-                warnings.warn('`prob` is being deprecated to be replaced with a sampler class and `prob_weights`. To randomly sample with replacement use "sampler=Sampler.randomWithReplacement(number_of_subsets,  prob=prob). To pass probabilites to the calculation for `sigma` and `tau` please use `prob_weights`. ')
+                warnings.warn('`prob` is being deprecated to be replaced with a sampler class and `prob_weights`. To randomly sample with replacement use "sampler=Sampler.randomWithReplacement(number_of_subsets,  prob=prob). To pass probabilities to the calculation for `sigma` and `tau` please use `prob_weights`. ', DeprecationWarning, stacklevel=2)
                 self._prob_weights = prob
             else:
 
@@ -249,11 +224,10 @@ def _deprecated_kwargs(self, deprecated_kwargs):
         if norms is not None:
             self.operator.set_norms(norms)
             warnings.warn(
-                ' `norms` is being deprecated, use instead the `BlockOperator` function `set_norms`')
+                ' `norms` is being deprecated, use instead the `BlockOperator` function `set_norms`', DeprecationWarning, stacklevel=2)
 
         if deprecated_kwargs:
-            raise ValueError("Additional keyword arguments passed but not used: {}".format(
-                deprecated_kwargs))
+            raise ValueError("Additional keyword arguments passed but not used: {}".format(deprecated_kwargs))
 
     @property
     def sigma(self):
@@ -267,9 +241,10 @@ def set_step_sizes_from_ratio(self, gamma=1.0, rho=0.99):
         r""" Sets gamma, the step-size ratio for the SPDHG algorithm. Currently gamma takes a scalar value.
 
         The step sizes `sigma` and `tau` are set using the equations:
-        .. math:: \sigma_i=\gamma\rho / (\|K_i\|**2)\\
-            
-        .. math::  \tau = \rho\min_i([p_i / (\sigma_i * \|K_i\|**2) ])
+
+        .. math:: \sigma_i= \frac{\gamma\rho }{\|K_i\|^2}
+
+        .. math::  \tau = \rho\min_i([ \frac{p_i }{\sigma_i  \|K_i\|^2})
 
 
         Parameters
@@ -279,8 +254,8 @@ def set_step_sizes_from_ratio(self, gamma=1.0, rho=0.99):
             rho : Positive float
                  parameter controlling the size of the product :math: \sigma\tau :math:
 
-        
-        
+
+
         """
         if isinstance(gamma, Number):
             if gamma <= 0:
@@ -311,28 +286,27 @@ def set_step_sizes(self, sigma=None, tau=None):
 
         - Case 1: If neither `sigma` or `tau` are provided then `sigma` is set using the formula:
 
-        .. math:: \sigma_i=0.99 / (\|K_i\|**2)`
-
+        .. math:: \sigma_i= \frac{0.99}{\|K_i\|^2}
 
         and `tau` is set as per case 2
 
         - Case 2: If `sigma` is provided but not `tau` then `tau` is calculated using the formula 
 
-        .. math:: \tau = 0.99\min_i([p_i / (\sigma_i * \|K_i\|**2) ])
+        .. math:: \tau = 0.99\min_i( \frac{p_i}{ (\sigma_i  \|K_i\|^2) })
 
         - Case 3: If `tau` is provided but not `sigma` then `sigma` is calculated using the formula
 
-        .. math:: \sigma_i=0.99 p_i / (\tau*\|K_i\|**2)
+        .. math:: \sigma_i= \frac{0.99 p_i}{\tau\|K_i\|^2}
 
         - Case 4: Both `sigma` and `tau` are provided.
-        
-        
+
+
         Parameters
         ----------
-            sigma : list of positive float, optional, default=None
-                List of Step size parameters for Dual problem
-            tau : positive float, optional, default=None
-                Step size parameter for Primal problem
+            sigma : list of positive float, optional, default= see docstring
+                List of Step size parameters for dual problem
+            tau : positive float, optional, default= see docstring
+                Step size parameter for primal problem
 
         """
         gamma = 1.
@@ -351,7 +325,7 @@ def set_step_sizes(self, sigma=None, tau=None):
             self._sigma = sigma
 
         elif tau is None:
-            self._sigma = [gamma* rho / ni for ni in self._norms]
+            self._sigma = [gamma * rho / ni for ni in self._norms]
         else:
             self._sigma = [
                 rho*pi / (tau*ni**2) for ni, pi in zip(self._norms, self._prob_weights)]
@@ -360,7 +334,7 @@ def set_step_sizes(self, sigma=None, tau=None):
             values = [rho*pi / (si * ni**2) for pi, ni,
                       si in zip(self._prob_weights, self._norms, self._sigma)]
             self._tau = min([value for value in values if value > 1e-8])
-            
+
         else:
             if isinstance(tau, Number) and tau > 0:
                 pass
@@ -377,11 +351,11 @@ def check_convergence(self):
         -------
         Boolean
             True if convergence criterion is satisfied. False if not satisfied or convergence is unknown. 
-            
+
         Note
         -----
         Convergence criterion currently can only be checked for scalar values of tau.
-        
+
         Note
         ----
         This checks the convergence criterion. Numerical errors may mean some sigma and tau values that satisfy the convergence criterion may not converge. 
@@ -433,7 +407,7 @@ def update(self):
         # zbar = z + (theta/p[i]) * x_tmp
 
         self._z.sapyb(1., self._x_tmp, self._theta /
-                     self._prob_weights[i], out=self._zbar)
+                      self._prob_weights[i], out=self._zbar)
 
         # save previous iteration
         self._save_previous_iteration(i, y_k)
@@ -455,6 +429,7 @@ def update_objective(self):
     @property
     def objective(self):
         '''The saved primal objectives. 
+
         Returns
         -------
         list
@@ -465,6 +440,7 @@ def objective(self):
     @property
     def dual_objective(self):
         '''The saved dual objectives. 
+
         Returns
         -------
         list
@@ -475,6 +451,7 @@ def dual_objective(self):
     @property
     def primal_dual_gap(self):
         '''The saved primal-dual gap. 
+
         Returns
         -------
         list
diff --git a/Wrappers/Python/test/test_algorithms.py b/Wrappers/Python/test/test_algorithms.py
index a10176dff1..310266f88f 100644
--- a/Wrappers/Python/test/test_algorithms.py
+++ b/Wrappers/Python/test/test_algorithms.py
@@ -1120,7 +1120,7 @@ def setUp(self):
         data = dataexample.SIMPLE_PHANTOM_2D.get(size=(20, 20))
         self.subsets = 10
         
-        data = dataexample.SIMPLE_PHANTOM_2D.get(size=(128, 128))
+        data = dataexample.SIMPLE_PHANTOM_2D.get(size=(16, 16))
 
         ig = data.geometry
         ig.voxel_size_x = 0.1
@@ -1277,42 +1277,48 @@ def test_spdhg_check_convergence(self):
         spdhg.set_step_sizes(sigma=None, tau=100)
         self.assertTrue(spdhg.check_convergence())
 
-    # def test_SPDHG_num_subsets_1(self): TODO: fix this!
-    #     data = dataexample.SIMPLE_PHANTOM_2D.get(size=(10, 10))
+    def test_SPDHG_num_subsets_1(self): 
+        data = dataexample.SIMPLE_PHANTOM_2D.get(size=(10, 10))
 
-    #     subsets = 1
+        subsets = 1
 
-    #     ig = data.geometry
-    #     ig.voxel_size_x = 0.1
-    #     ig.voxel_size_y = 0.1
+        ig = data.geometry
+        ig.voxel_size_x = 0.1
+        ig.voxel_size_y = 0.1
 
-    #     detectors = ig.shape[0]
-    #     angles = np.linspace(0, np.pi, 90)
-    #     ag = AcquisitionGeometry.create_Parallel2D().set_angles(
-    #         angles, angle_unit='radian').set_panel(detectors, 0.1)
-    #     # Select device
-    #     dev = 'cpu'
+        detectors = ig.shape[0]
+        angles = np.linspace(0, np.pi, 90)
+        ag = AcquisitionGeometry.create_Parallel2D().set_angles(
+            angles, angle_unit='radian').set_panel(detectors, 0.1)
+        # Select device
+        dev = 'cpu'
 
-    #     Aop = ProjectionOperator(ig, ag, dev)
+        Aop = ProjectionOperator(ig, ag, dev)
 
-    #     sin = Aop.direct(data)
-    #     partitioned_data = sin.partition(subsets, 'sequential')
-    #     A = BlockOperator(
-    #         *[IdentityOperator(partitioned_data[i].geometry) for i in range(subsets)])
+        sin = Aop.direct(data)
+        partitioned_data = sin.partition(subsets, 'sequential')
+        A = BlockOperator(
+            *[IdentityOperator(partitioned_data[i].geometry) for i in range(subsets)])
        
-    #     # block function
-    #     F = BlockFunction(*[L2NormSquared(b=partitioned_data[i])
-    #                              for i in range(subsets)])
-    #     alpha = 0.025
-    #     G = alpha * FGP_TV()
+        # block function
+        F = BlockFunction(*[L2NormSquared(b=partitioned_data[i])
+                                 for i in range(subsets)])
+        
+        F_phdhg=L2NormSquared(b=partitioned_data[0])
+        A_pdhg = IdentityOperator(partitioned_data[0].geometry) 
+                            
+        alpha = 0.025
+        G = alpha * FGP_TV()
+        
+        spdhg = SPDHG(f=F, g=G, operator=A,  update_objective_interval=10)
+        
+        spdhg.run(7)   
         
-    #     spdhg = SPDHG(f=F, g=G, operator=A,  update_objective_interval=10)
+        pdhg = PDHG(f=F_phdhg, g=G, operator=A_pdhg, update_objective_interval=10)
         
-    #     spdhg.run(7)   
-    #     pdhg = PDHG(f=F, g=G, operator=A, update_objective_interval=10)
+        pdhg.run(7) 
         
-    #     pdhg.run(7)       
-    #     self.assertNumpyArrayAlmostEqual(pdhg.solution.as_array(), spdhg.solution.as_array(), decimal=3)
+        self.assertNumpyArrayAlmostEqual(pdhg.solution.as_array(), spdhg.solution.as_array(), decimal=3)
 
     @unittest.skipUnless(has_astra, "cil-astra not available")
     def test_SPDHG_vs_PDHG_implicit(self):