diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 0fbceadd9e..4915cdb127 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -122,7 +122,7 @@ jobs:
       working-directory: recipe
       run: |
         conda install boa
-        conda mambabuild . -c conda-forge -c intel -c ccpi --python=${{ matrix.python-version }} --numpy=${{ matrix.numpy-version }} --output-folder .
+        conda mambabuild . -c conda-forge -c https://software.repos.intel.com/python/conda -c ccpi --python=${{ matrix.python-version }} --numpy=${{ matrix.numpy-version }} --output-folder .
     - name: Upload artifact of the conda package
       uses: actions/upload-artifact@v4
       with:
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 904ddf15f9..f123a5cdf0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,9 +1,14 @@
-
-* XX.X.X
-
+* 24.x.x
+  - New Features:
+    - Added SAG and SAGA stochastic functions (#1624) 
+  - Enhancements:
+    - New unit tests for operators and functions to check for in place errors and the behaviour of `out` (#1805)
+  
+* 24.1.0
   - New Features:
     - Added method to plot filter in `GenericFilteredBackProjection` (#1667)
-    - Added wavelet operator, wrapping PyWavelets operator as a CIL operator (#1618)
+    - Added wavelet operator, wrapping PyWavelets operator as a CIL operator (#1615)
+    - Added PaganinProcessor processor, to perform phase retrieval from phase contrast images (#1737)
     - Added L1Sparsity function, allowing calculations of `|Ax-b|_1` and its proximal, in the case of orthogonal operators, `A` (#1618)
     - Options in algorithms GD, ISTA and FISTA to pass a `cil.optimisation.utilities.StepSizeRule` or a `cil.optimisation.utilities.Preconditioner`(#1768)
     - an implementation of the Armijo Rule as a child class of  `cil.optimisation.utilities.StepSizeRule` (#1768)
@@ -13,8 +18,9 @@
     - Raises error in `BlockDataContainer.pnorm` if the shape of the containers is not the same (#1799)
     - Operators and functions now also return when out is specified (#1742)
     - The CIL function class now has a `__neg__` function, so you can write `-YourFunction(x)` rather than `-1*YourFunction(x)` (#1808)
+    - Added documentation for the Partitioner to `framework.rst` (#1828)
     - Added CIL vs SIRF tests comparing preconditioned ISTA in CIL and MLEM in SIRF (#1823)
-    - New unit tests for operators and functions to check for in place errors and the behaviour of `out` (#1805)
+    - Update to CCPi-Regularisation toolkit v24.0.1 (#1868) 
   - Bug fixes:
     - gradient descent `update_objective` called twice on the initial point.(#1789)
     - ProjectionMap operator bug fix in adjoint and added documentation (#1743)
diff --git a/Dockerfile b/Dockerfile
index 1a9321c574..5dc5f25bca 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -18,7 +18,7 @@ COPY --chown="${NB_USER}" scripts/requirements-test.yml environment.yml
 RUN sed -ri '/tigre|astra-toolbox| python /d' environment.yml \
   && for pkg in 'jupyter-server-proxy>4.1.0' $CIL_EXTRA_PACKAGES; do echo "  - $pkg" >> environment.yml; done \
   && conda config --env --set channel_priority strict \
-  && for ch in defaults nvidia ccpi intel conda-forge; do conda config --env --add channels $ch; done \
+  && for ch in defaults nvidia ccpi https://software.repos.intel.com/python/conda conda-forge; do conda config --env --add channels $ch; done \
   && mamba env update -n base \
   && mamba clean -a -y -f \
   && rm environment.yml \
diff --git a/README.md b/README.md
index f371adf843..315cdff911 100644
--- a/README.md
+++ b/README.md
@@ -23,13 +23,13 @@ We recommend using either [`miniconda`](https://docs.conda.io/projects/miniconda
 Install a new environment using:
 
 ```sh
-conda create --name cil -c conda-forge -c intel -c ccpi cil=24.0.0
+conda create --name cil -c conda-forge -c https://software.repos.intel.com/python/conda -c ccpi cil=24.1.0
 ```
 
 To install CIL and the additional packages and plugins needed to run the [CIL demos](https://github.com/TomographicImaging/CIL-Demos) install the environment with:
 
 ```sh
-conda create --name cil -c conda-forge -c intel -c ccpi cil=24.0.0 astra-toolbox=*=cuda* tigre ccpi-regulariser tomophantom ipywidgets
+conda create --name cil -c conda-forge -c https://software.repos.intel.com/python/conda -c ccpi cil=24.1.0 astra-toolbox=*=cuda* tigre ccpi-regulariser tomophantom ipywidgets
 ```
 
 where:
@@ -44,7 +44,7 @@ where:
 
 CIL's [optimised FDK/FBP](https://github.com/TomographicImaging/CIL/discussions/1070) `recon` module requires:
 
-1. the Intel [Integrated Performance Primitives](https://www.intel.com/content/www/us/en/developer/tools/oneapi/ipp.html#gs.gxwq5p) Library ([license](https://www.intel.com/content/dam/develop/external/us/en/documents/pdf/intel-simplified-software-license-version-august-2021.pdf)) which can be installed via conda from the `intel` [channel](https://anaconda.org/intel/ipp).
+1. the Intel [Integrated Performance Primitives](https://www.intel.com/content/www/us/en/developer/tools/oneapi/ipp.html#gs.gxwq5p) Library ([license](https://www.intel.com/content/dam/develop/external/us/en/documents/pdf/intel-simplified-software-license-version-august-2021.pdf)) which can be installed via conda from the `https://software.repos.intel.com/python/conda` channel.
 2. [TIGRE](https://github.com/CERN/TIGRE), which can be installed via conda from the `ccpi` channel.
 
 ### Docker
diff --git a/Wrappers/Python/cil/framework/framework.py b/Wrappers/Python/cil/framework/framework.py
index 283b5ad2a2..8b7e370375 100644
--- a/Wrappers/Python/cil/framework/framework.py
+++ b/Wrappers/Python/cil/framework/framework.py
@@ -967,7 +967,22 @@ def get_centre_slice(self):
         return self
 
     def calculate_magnification(self):
-        return [None, None, 1.0]
+        '''Method to calculate magnification and distance from the sample to 
+        the detector using the detector positions and the rotation axis. 
+        For parallel beam geometry magnification = 1
+        
+        Returns
+        -------
+        list
+            A list containing the [0] distance from the source to the rotate 
+            axis, [1] distance from the rotate axis to the detector, 
+            [2] magnification of the system
+
+        '''
+        ab = (self.rotation_axis.position - self.detector.position)
+        dist_center_detector = float(numpy.sqrt(ab.dot(ab)))
+
+        return [None, dist_center_detector, 1.0]
 
 class Parallel3D(SystemConfiguration):
     r'''This class creates the SystemConfiguration of a parallel beam 3D tomographic system
@@ -1116,7 +1131,22 @@ def __eq__(self, other):
         return False
 
     def calculate_magnification(self):
-        return [None, None, 1.0]
+        '''Method to calculate magnification and distance from the sample to 
+        the detector using the detector positions and the rotation axis. 
+        For parallel beam geometry magnification = 1
+        
+        Returns
+        -------
+        list
+            A list containing the [0] distance from the source to the rotate 
+            axis, [1] distance from the rotate axis to the detector, 
+            [2] magnification of the system
+
+        '''
+        ab = (self.rotation_axis.position - self.detector.position)
+        dist_center_detector = float(numpy.sqrt(ab.dot(ab)))
+
+        return [None, dist_center_detector, 1.0]
 
     def get_centre_slice(self):
         """Returns the 2D system configuration corresponding to the centre slice
diff --git a/Wrappers/Python/cil/optimisation/algorithms/Algorithm.py b/Wrappers/Python/cil/optimisation/algorithms/Algorithm.py
index 6deb5d90e7..ad78b83d31 100644
--- a/Wrappers/Python/cil/optimisation/algorithms/Algorithm.py
+++ b/Wrappers/Python/cil/optimisation/algorithms/Algorithm.py
@@ -15,6 +15,7 @@
 #
 # Authors:
 # CIL Developers, listed at: https://github.com/TomographicImaging/CIL/blob/master/NOTICE.txt
+from itertools import count
 from numbers import Integral
 from typing import List, Optional
 from warnings import warn
@@ -237,7 +238,9 @@ def run(self, iterations=None, callbacks: Optional[List[Callback]]=None, verbose
 
         # call `__next__` upto `iterations` times or until `StopIteration` is raised
         self.max_iteration = self.iteration + iterations
-        for _ in zip(range(self.iteration, self.iteration + iterations), self):
+        iters = (count(self.iteration) if np.isposinf(self.max_iteration)
+                 else range(self.iteration, self.max_iteration))
+        for _ in zip(iters, self):
             try:
                 for callback in callbacks:
                     callback(self)
diff --git a/Wrappers/Python/cil/optimisation/functions/ApproximateGradientSumFunction.py b/Wrappers/Python/cil/optimisation/functions/ApproximateGradientSumFunction.py
index bc2ce1fda2..7616417593 100644
--- a/Wrappers/Python/cil/optimisation/functions/ApproximateGradientSumFunction.py
+++ b/Wrappers/Python/cil/optimisation/functions/ApproximateGradientSumFunction.py
@@ -229,7 +229,7 @@ def data_passes_indices(self):
     def data_passes(self):
         """ The property :code:`data_passes` is a list of floats that holds the amount of data that has been processed up until each call of `gradient`. This list is updated each time `gradient` is called by appending the proportion of the data used when calculating the approximate gradient since the class was initialised (a full gradient calculation would be 1 full data pass). Warning: if your functions do not contain an equal `amount` of data, for example your data was not partitioned into equal batches, then you must first use the `set_data_partition_weights" function for this to be accurate.   """
         data_passes = []
-        for el in self._data_passes_indices:
+        for el in self.data_passes_indices:
             try:
                 data_passes.append(data_passes[-1])
             except IndexError:
diff --git a/Wrappers/Python/cil/optimisation/functions/SAGFunction.py b/Wrappers/Python/cil/optimisation/functions/SAGFunction.py
new file mode 100644
index 0000000000..31e0a0ca30
--- /dev/null
+++ b/Wrappers/Python/cil/optimisation/functions/SAGFunction.py
@@ -0,0 +1,220 @@
+#  Copyright 2024 United Kingdom Research and Innovation
+#  Copyright 2024 The University of Manchester
+# 
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+# 
+#      http://www.apache.org/licenses/LICENSE-2.0
+# 
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+# 
+# Authors:
+# - CIL Developers, listed at: https://github.com/TomographicImaging/CIL/blob/master/NOTICE.txt
+# - Daniel Deidda (National Physical Laboratory, UK)
+# - Claire Delplancke (Electricite de France, Research and Development)
+# - Ashley Gillman (Australian e-Health Res. Ctr., CSIRO, Brisbane, Queensland, Australia)
+# - Zeljko Kereta (Department of Computer Science, University College London, UK)
+# - Evgueni Ovtchinnikov (STFC - UKRI)
+# - Georg Schramm (Department of Imaging and Pathology, Division of Nuclear Medicine, KU Leuven, Leuven, Belgium)
+
+from .ApproximateGradientSumFunction import ApproximateGradientSumFunction
+import numpy as np
+
+
+class SAGFunction(ApproximateGradientSumFunction):
+
+    r"""
+    The stochastic average gradient (SAG) function takes a index :math:`i_k` and calculates the approximate gradient of :math:`\sum_{i=1}^{n-1}f_i` at iteration :math:`x_k` as
+    
+    .. math ::
+                \sum_{i=1}^{n-1} g_i^k \qquad \text{where} \qquad g_i^k= \begin{cases}
+                                                                            \nabla f_i(x_k), \text{ if } i=i_k\\
+                                                                            g_i^{k-1},\text{ otherwise }
+                                                                            \end{cases}
+
+        
+            
+    
+    The idea is that by incorporating a memory of previous gradient values the SAG method can achieve a faster convergence rate than black-box stochastic gradient methods. 
+    
+    Note
+    -----
+    Compared with the literature, we do not divide by :math:`n`, the number of functions, so that we return an approximate gradient of the whole sum function and not an average gradient.
+
+    Reference
+    ----------
+    Schmidt, M., Le Roux, N. and Bach, F., 2017. Minimizing finite sums with the stochastic average gradient. Mathematical Programming, 162, pp.83-112. https://doi.org/10.1007/s10107-016-1030-6. 
+
+    Parameters:
+    -----------
+    functions : `list`  of functions
+        A list of functions: :math:`[f_{0}, f_{1}, ..., f_{n-1}]`. Each function is assumed to be smooth with an implemented :func:`~Function.gradient` method. All functions must have the same domain. The number of functions (equivalently the length of the list `n`) must be strictly greater than 1. 
+    sampler: An instance of a CIL Sampler class ( :meth:`~optimisation.utilities.sampler`) or of another class which has a `next` function implemented to output integers in :math:`{0,...,n-1}`.
+        This sampler is called each time `gradient` is called and sets the internal `function_num` passed to the `approximate_gradient` function.  Default is `Sampler.random_with_replacement(len(functions))`. 
+    
+    Note
+    ------
+    
+    The user has the option of calling the class method `warm_start_approximate_gradients` after initialising this class. This will compute and store the gradient for each function at an initial point, equivalently setting :math:`g_i^0=\nabla f_i(x_0)` for initial point :math:`x_0`.  If this method is not called, the gradients are initialised with zeros. 
+
+    Note:  
+    ------  
+
+    This function's memory requirements are `n + 3` times the image space, that is with 100 subsets the memory requirement is 103 images, which is huge.
+    
+
+    """
+
+    def __init__(self, functions,  sampler=None):
+        self._list_stored_gradients = None
+        self._full_gradient_at_iterate = None
+        self._warm_start_just_done = False
+        self._sampled_grad = None
+        
+        super(SAGFunction, self).__init__(functions, sampler)
+
+
+        
+
+    def approximate_gradient(self, x, function_num,  out=None):
+        """ SAG approximate gradient, calculated at the point :math:`x` and updated using the function index given by `function_num`.  
+
+        Parameters
+        ----------
+        x : DataContainer (e.g. ImageData object)
+            Element in the domain of the `functions`
+        function_num: `int` 
+            Between 0 and the number of functions in the list  
+
+        """
+        
+        
+        if self._list_stored_gradients is None: # Initialise the stored gradients on the first call of gradient unless using warm start.  
+            self._list_stored_gradients = [
+                0*x for fi in self.functions]
+            self._full_gradient_at_iterate = 0*x
+            self._sampled_grad = x.copy()
+            self._stochastic_grad_difference = x.copy()
+        
+        if self.function_num >= self.num_functions or self.function_num<0 : # check the sampler and raise an error if needed
+            raise IndexError(f"The sampler has produced the index {self.function_num} which does not match the expected range of available functions to sample from. Please ensure your sampler only selects from [0,1,...,len(functions)-1] ")
+
+            
+        # Calculate the gradient of the sampled function at the current iterate 
+        self.functions[function_num].gradient(x, out=self._sampled_grad)
+
+        
+        # Calculate the difference between the new gradient of the sampled function and the stored one
+        self._sampled_grad.sapyb(
+            1., self._list_stored_gradients[function_num], -1., out=self._stochastic_grad_difference)
+
+        # Calculate the  approximate gradient
+        out = self._update_approx_gradient(out)
+
+        # Update the stored gradients 
+        self._list_stored_gradients[function_num].fill(
+            self._sampled_grad)
+        
+        # Calculate the stored full gradient
+        self._full_gradient_at_iterate.sapyb(
+            1., self._stochastic_grad_difference, 1., out=self._full_gradient_at_iterate)
+
+        return out
+    
+    def _update_approx_gradient(self, out):
+        """Internal function used to differentiate between the SAG and SAGA calculations. This is the SAG approximation: """
+        out = self._stochastic_grad_difference.sapyb(  
+                1., self._full_gradient_at_iterate, 1., out=out)  
+
+        return out 
+    
+    def warm_start_approximate_gradients(self, initial):
+        """A function to warm start SAG or SAGA algorithms by initialising all the gradients at an initial point. Equivalently setting :math:`g_i^0=\nabla f_i(x_0)` for initial point :math:`x_0`. 
+        
+        Parameters
+        ----------
+        initial: DataContainer,
+            The initial point to warmstart the calculation
+            
+        Note
+        ----
+        When using SAG or SAGA with a deterministic algorithm, you should warm start the SAG-SAGA Function with the same initial point that you initialise the algorithm
+        
+        """
+        self._list_stored_gradients = [
+            fi.gradient(initial) for fi in self.functions]
+        self._full_gradient_at_iterate = np.sum(self._list_stored_gradients)
+        self._update_data_passes_indices(list(range(self.num_functions)))
+        self._sampled_grad = initial.copy()
+        self._stochastic_grad_difference = initial.copy()
+
+    @property
+    def data_passes_indices(self): 
+        """ The property :code:`data_passes_indices` is a list of lists holding the indices of the functions that are processed in each call of `gradient`. This list is updated each time `gradient` is called by appending a list of the indices of the functions used to calculate the gradient.  
+        This is overwritten from the base class to first check to see if the approximate gradient was warm started and, if it was, ensure that the first element of `data_passes_indices` contains each index used to warm start and the index used in the first call to `gradient`. Thus the length of `data_passes_indices` is always equal to the number of calls to `gradient`. 
+        """
+        ret = self._data_passes_indices[:]  
+        if len(ret[0]) == self.num_functions:  
+            a = ret.pop(1)  
+            ret[0] += a  
+        return ret
+    
+class SAGAFunction(SAGFunction):
+
+    r"""
+    SAGA (SAG-Ameliore) is an accelerated version of the stochastic average gradient (SAG) function which takes a index :math:`i_k` and calculates the approximate gradient of :math:`\sum_{i=1}^{n-1}f_i` at iteration :math:`x_k` as
+    
+    .. math ::
+                 n\left(g_{i_k}^{k}-g_{i_k}^{k-1}\right)+\sum_{i=1}^{n-1} g_i^{k-1} \qquad \text{where} \qquad g_i^k= \begin{cases}
+                                                                            \nabla f_i(x_k), \text{ if } i=i_k\\
+                                                                            g_i^{k-1},\text{ otherwise}
+                                                                            \end{cases}
+                                                                        
+         
+    SAGA improves on the theory behind SAG and SVRG, with better theoretical convergence rates. Compared to SAG it is an unbiased estimator. 
+    
+    Note
+    ------
+    Compared with the literature, we do not divide by :math:`n`, the number of functions, so that we return an approximate gradient of the whole sum function and not an average gradient.
+
+    Note:  
+    ------  
+
+    This function's memory requirements are `n + 3` times the image space, that is with 100 subsets the memory requirement is 103 images, which is huge.
+    
+    Reference
+    ----------
+    Defazio, A., Bach, F. and Lacoste-Julien, S., 2014. SAGA: A fast incremental gradient method with support for non-strongly convex composite objectives. Advances in neural information processing systems, 27. https://proceedings.neurips.cc/paper_files/paper/2014/file/ede7e2b6d13a41ddf9f4bdef84fdc737-Paper.pdf
+   
+
+    Parameters:
+    -----------
+    functions : `list`  of functions
+                A list of functions: :code:`[f_{0}, f_{1}, ..., f_{n-1}]`. Each function is assumed to be smooth function with an implemented :func:`~Function.gradient` method. Each function must have the same domain. The number of functions must be strictly greater than 1. 
+    sampler: An instance of one of the :meth:`~optimisation.utilities.sampler` classes which has a `next` function implemented and a `num_indices` property.
+        This sampler is called each time gradient is called and  sets the internal `function_num` passed to the `approximate_gradient` function.  The `num_indices` must match the number of functions provided. Default is `Sampler.random_with_replacement(len(functions))`. 
+    
+    Note
+    ----
+    The user has the option of calling the class method `warm_start_approximate_gradients` after initialising this class. This will compute and store the gradient for each function at an initial point, equivalently setting :math:`g_i^0=\nabla f_i(x_0)` for initial point :math:`x_0`. If this method is not called, the gradients are initialised with zeros. 
+
+  
+    """
+
+    def __init__(self, functions,  sampler=None):
+        super(SAGAFunction, self).__init__(functions, sampler)
+
+
+    def _update_approx_gradient(self, out):
+        """Internal function used to differentiate between the SAG and SAGA calculations. This is the SAGA approximation and differs in the constants multiplying the gradients: """
+
+        # Due to the convention that we follow: without the 1/n factor
+        out= self._stochastic_grad_difference.sapyb(  
+            self.num_functions, self._full_gradient_at_iterate, 1., out)  
+
+        return out 
\ No newline at end of file
diff --git a/Wrappers/Python/cil/optimisation/functions/__init__.py b/Wrappers/Python/cil/optimisation/functions/__init__.py
index 7f7c989123..96bc3de02f 100644
--- a/Wrappers/Python/cil/optimisation/functions/__init__.py
+++ b/Wrappers/Python/cil/optimisation/functions/__init__.py
@@ -37,4 +37,5 @@
 from .L1Sparsity import L1Sparsity
 from .ApproximateGradientSumFunction import ApproximateGradientSumFunction
 from .SGFunction import SGFunction
+from .SAGFunction import SAGFunction, SAGAFunction
 
diff --git a/Wrappers/Python/cil/plugins/ccpi_regularisation/functions/regularisers.py b/Wrappers/Python/cil/plugins/ccpi_regularisation/functions/regularisers.py
index 46aabc94d4..c863982808 100644
--- a/Wrappers/Python/cil/plugins/ccpi_regularisation/functions/regularisers.py
+++ b/Wrappers/Python/cil/plugins/ccpi_regularisation/functions/regularisers.py
@@ -20,7 +20,7 @@
     from ccpi.filters import regularisers
     from ccpi.filters.TV import TV_ENERGY
 except ImportError as exc:
-    raise ImportError('Please `conda install "ccpi::ccpi-regulariser>=24"`') from exc
+    raise ImportError('Please `conda install "ccpi::ccpi-regulariser>=24.0.1"`') from exc
 
 
 from cil.framework import DataOrder
diff --git a/Wrappers/Python/cil/processors/PaganinProcessor.py b/Wrappers/Python/cil/processors/PaganinProcessor.py
new file mode 100644
index 0000000000..33ad87841b
--- /dev/null
+++ b/Wrappers/Python/cil/processors/PaganinProcessor.py
@@ -0,0 +1,574 @@
+#  Copyright 2024 United Kingdom Research and Innovation
+#  Copyright 2024 The University of Manchester
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+# Authors:
+# CIL Developers, listed at: 
+# https://github.com/TomographicImaging/CIL/blob/master/NOTICE.txt
+
+from cil.framework import Processor, AcquisitionData, DataOrder
+
+import numpy as np
+from scipy.fft import fft2
+from scipy.fft import ifft2
+from scipy.fft import ifftshift
+from scipy import constants
+from tqdm import tqdm
+import logging
+
+log = logging.getLogger(__name__)
+
+class PaganinProcessor(Processor):
+
+    r"""
+    Processor to retrieve quantitative information from phase contrast images 
+    using the Paganin phase retrieval algorithm described in [1]
+    
+    Parameters
+    ----------
+    delta: float (optional)
+        Real part of the deviation of the material refractive index from 1, 
+        where refractive index :math:`n = (1 - \delta) + i \beta` energy-
+        dependent refractive index information for x-ray wavelengths can be 
+        found at [2], default is 1
+    
+    beta: float (optional)
+        Complex part of the material refractive index, where refractive index 
+        :math:`n = (1 - \delta) + i \beta` energy-dependent refractive index 
+        information for x-ray wavelengths can be found at [2], default is 1e-2
+    
+    energy: float (optional)
+        Energy of the incident photon, default is 40000
+
+    energy_units: string (optional)
+        Energy units, default is 'eV'
+    
+    full_retrieval : bool, optional
+        If True, perform the full phase retrieval and return the thickness. If 
+        False, return a filtered image, default is True
+
+    filter_type: string (optional)
+        The form of the Paganin filter to use, either 'paganin_method' 
+        (default) or 'generalised_paganin_method' as described in [3] 
+
+    pad: int (optional)
+        Number of pixels to pad the image in Fourier space to reduce aliasing, 
+        default is 0 
+
+    return_units: string (optional)
+        The distance units to return the sample thickness in, must be one of 
+        'm', 'cm', 'mm' or 'um'. Only applies if full_retrieval=True (default 
+        is'cm')
+
+    Returns
+    -------
+    AcquisitionData
+        AcquisitionData corrected for phase effects, retrieved sample thickness 
+        or (if :code:`full_retrieval=False`) filtered data 
+                
+    Example
+    -------
+    >>> processor = PaganinProcessor(delta=5, beta=0.05, energy=18000)
+    >>> processor.set_input(data)
+    >>> thickness = processor.get_output()
+
+    Example
+    -------
+    >>> processor = PaganinProcessor(delta=1,beta=10e2, full_retrieval=False)
+    >>> processor.set_input(data)
+    >>> filtered_image = processor.get_output()
+
+    Example
+    -------
+    >>> processor = PaganinProcessor()
+    >>> processor.set_input(data)
+    >>> thickness = processor.get_output(override_filter={'alpha':10})
+    >>> phase_retrieved_image = thickness*processor.mu
+
+    Notes
+    -----
+    This processor will work most efficiently using the cil data order with
+    `data.reorder('cil')`
+    
+    Notes
+    -----
+    This processor uses the phase retrieval algorithm described by Paganin et 
+    al. [1] to retrieve the sample thickness
+    
+    .. math:: T(x,y) = - \frac{1}{\mu}\ln\left (\mathcal{F}^{-1}\left 
+        (\frac{\mathcal{F}\left ( M^2I_{norm}(x, y,z = \Delta) \right )}{1 + 
+          \alpha\left ( k_x^2 + k_y^2 \right )}  \right )\right ),
+    
+    where
+
+        - :math:`T`, is the sample thickness,
+        - :math:`\mu = \frac{4\pi\beta}{\lambda}` is the material linear 
+        attenuation coefficient where :math:`\beta` is the complex part of the 
+        material refractive index and :math:`\lambda=\frac{hc}{E}` is the probe 
+        wavelength,
+        - :math:`M` is the magnification at the detector,
+        - :math:`I_{norm}` is the input image which is expected to be the 
+        normalised transmission data, 
+        - :math:`\Delta` is the propagation distance,
+        - :math:`\alpha = \frac{\Delta\delta}{\mu}` is a parameter determining 
+        the strength of the filter to be applied in Fourier space where 
+        :math:`\delta` is the real part of the deviation of the material 
+        refractive index from 1 
+        - :math:`k_x, k_y = \left ( \frac{2\pi p}{N_xW}, \frac{2\pi q}{N_yW} 
+        \right )` where :math:`p` and :math:`q` are co-ordinates in a Fourier 
+        mesh in the range :math:`-N_x/2` to :math:`N_x/2` for an image with 
+        size :math:`N_x, N_y` and pixel size :math:`W`.
+    
+    A generalised form of the Paganin phase retrieval method can be called 
+    using :code:`filter_type='generalised_paganin_method'`, which uses the 
+    form of the algorithm described in [2]
+    
+    .. math:: T(x,y) = -\frac{1}{\mu}\ln\left (\mathcal{F}^{-1}\left (\frac{
+        \mathcal{F}\left ( M^2I_{norm}(x, y,z = \Delta) \right )}{1 - \frac{2
+        \alpha}{W^2}\left ( \cos(Wk_x) + \cos(Wk_y) -2 \right )}  \right )
+        \right )
+    
+    The phase retrieval is valid under the following assumptions
+
+        - used with paraxial propagation-induced phase contrast images which 
+        can be assumed to be single-material locally
+        - using intensity data which has been flat field corrected
+        - and under the assumption that the Fresnel number 
+        :math:`F_N = W^2/(\lambda\Delta) >> 1`
+    
+    To apply a filter to images using the Paganin method, call 
+    :code:`full_retrieval=False`. In this case the pre-scaling and conversion 
+    to absorption is not applied so the requirement to supply flat field 
+    corrected intensity data is relaxed,
+    
+    .. math:: I_{filt} = \mathcal{F}^{-1}\left (\frac{\mathcal{F}\left ( 
+        I(x, y,z = \Delta) \right )}
+        {1 - \alpha\left ( k_x^2 + k_y^2 \right )}  \right )
+
+    References
+    ---------
+    - [1] https://doi.org/10.1046/j.1365-2818.2002.01010.x 
+    - [2] https://henke.lbl.gov/optical_constants/getdb2.html
+    - [3] https://iopscience.iop.org/article/10.1088/2040-8986/abbab9
+    With thanks to colleagues at DTU for help with the initial implementation 
+    of the phase retrieval algorithm
+
+    """
+
+    def __init__(self, delta=1, beta=1e-2, energy=40000,
+                 energy_units='eV',  full_retrieval=True, 
+                 filter_type='paganin_method', pad=0, 
+                 return_units='cm'):
+        
+        kwargs = {
+            'energy' : energy,
+            'wavelength' : self._energy_to_wavelength(energy, energy_units,
+                                                      return_units),
+            'delta': delta,
+            'beta': beta,
+            '_delta_user' : delta,
+            '_beta_user' : beta,
+            'filter_Nx' : None,
+            'filter_Ny' : None,
+            'filter_type' : filter_type,
+            'mu' : None,
+            'alpha' : None,
+            'pixel_size' : None,
+            'propagation_distance' : None,
+            'magnification' : None,
+            'filter' : None,
+            'full_retrieval' : full_retrieval,
+            'pad' : pad,
+            'override_geometry' : None,
+            'override_filter' : None,
+            'return_units' : return_units
+            }
+        
+        super(PaganinProcessor, self).__init__(**kwargs)
+
+    def check_input(self, data):
+        if not isinstance(data, (AcquisitionData)):
+            raise TypeError('Processor only supports AcquisitionData')
+    
+        return True
+        
+    def process(self, out=None):
+
+        data  = self.get_input()
+        cil_order = tuple(DataOrder.get_order_for_engine('cil', data.geometry))
+        if data.dimension_labels != cil_order:
+            log.warning(msg="This processor will work most efficiently using\
+                        \nCIL data order, consider using `data.reorder('cil')`")
+
+        # set the geometry parameters to use from data.geometry unless the 
+        # geometry is overridden with an override_geometry
+        self._set_geometry(data.geometry, self.override_geometry)
+
+        if out is None:
+            out = data.geometry.allocate(None)
+
+        # make slice indices to get the projection
+        slice_proj = [slice(None)]*len(data.shape)
+        angle_axis = data.get_dimension_axis('angle')
+        slice_proj[angle_axis] = 0
+        
+        if data.geometry.channels>1:
+            channel_axis = data.get_dimension_axis('channel')
+            slice_proj[channel_axis] = 0
+        else:
+            channel_axis = None
+
+        data_proj = data.as_array()[tuple(slice_proj)]
+
+        # create an empty axis if the data is 2D
+        if len(data_proj.shape) == 1:
+            data.array = np.expand_dims(data.array, len(data.shape))
+            slice_proj.append(slice(None))
+            data_proj = data.as_array()[tuple(slice_proj)]
+            
+        elif len(data_proj.shape) == 2:
+            pass
+        else:
+            raise(ValueError('Data must be 2D or 3D per channel'))
+        
+        # create a filter based on the shape of the data
+        filter_shape = np.shape(data_proj)
+        self.filter_Nx = filter_shape[0]+self.pad*2
+        self.filter_Ny = filter_shape[1]+self.pad*2
+        self._create_filter(self.override_filter)
+        
+        # pre-calculate the scaling factor
+        scaling_factor = -(1/self.mu)
+
+        # allocate padded buffer
+        padded_buffer = np.zeros(tuple(x+self.pad*2 for x in data_proj.shape))
+        
+        # make slice indices to unpad the data
+        if self.pad>0:
+            slice_pad = tuple([slice(self.pad,-self.pad)]
+                                *len(padded_buffer.shape))
+        else:
+            slice_pad = tuple([slice(None)]*len(padded_buffer.shape))
+        # loop over the channels
+        for j in range(data.geometry.channels):
+            if channel_axis is not None:
+                slice_proj[channel_axis] = j
+            # loop over the projections
+            for i in tqdm(range(len(out.geometry.angles))):
+                
+                slice_proj[angle_axis] = i
+                padded_buffer[slice_pad] = data.array[(tuple(slice_proj))]
+                
+                if self.full_retrieval==True:
+                    # apply the filter in fourier space, apply log and scale 
+                    # by magnification
+                    fI = fft2(self.magnification**2*padded_buffer)
+                    iffI = ifft2(fI*self.filter)
+                    # apply scaling factor
+                    padded_buffer = scaling_factor*np.log(iffI)
+                else:
+                    # apply the filter in fourier space
+                    fI = fft2(padded_buffer)
+                    padded_buffer = ifft2(fI*self.filter)
+                if data.geometry.channels>1:
+                    out.fill(np.squeeze(padded_buffer[slice_pad]), angle = i, 
+                             channel=j)
+                else:
+                    out.fill(np.squeeze(padded_buffer[slice_pad]), angle = i)
+        data.array = np.squeeze(data.array)
+        return out
+    
+    def set_input(self, dataset):
+        """
+        Set the input data to the processor
+
+        Parameters
+        ----------
+        dataset : AcquisitionData
+            The input AcquisitionData
+        """
+        return super().set_input(dataset)
+        
+    def get_output(self, out=None, override_geometry=None, 
+                   override_filter=None):
+        r'''
+        Function to get output from the PaganinProcessor
+        
+        Parameters
+        ----------
+        out : DataContainer, optional
+           Fills the referenced DataContainer with the processed data
+
+        override_geometry: dict, optional
+            Geometry parameters to use in the phase retrieval if you want to 
+            over-ride values found in `data.geometry`. Specify parameters as a 
+            dictionary :code:`{'parameter':value}` where parameter is 
+            :code:`'magnification', 'propagation_distance'` or 
+            :code:`'pixel_size'` and value is the new value to use. Specify 
+            distance parameters in the same units as :code:`return_units` 
+            (default is cm).
+
+        override_filter: dict, optional
+            Over-ride the filter parameters to use in the phase retrieval. 
+            Specify parameters as :code:`{'parameter':value}` where parameter 
+            is :code:`'delta', 'beta'` or :code:`'alpha'` and value is the new 
+            value to use.
+
+        Returns
+        -------
+        AcquisitionData
+            AcquisitionData corrected for phase effects, retrieved sample 
+            thickness or (if :code:`full_retrieval=False`) filtered data 
+                    
+        Example
+        -------
+        >>> processor = PaganinProcessor(delta=5, beta=0.05, energy=18000)
+        >>> processor.set_input(data)
+        >>> thickness = processor.get_output()
+
+        Example
+        -------
+        >>> processor = PaganinProcessor(delta=1,beta=10e2, 
+        full_retrieval=False)
+        >>> processor.set_input(data)
+        >>> filtered_image = processor.get_output()
+
+        Example
+        -------
+        >>> processor = PaganinProcessor()
+        >>> processor.set_input(data)
+        >>> thickness = processor.get_output(override_filter={'alpha':10})
+        >>> phase_retrieved_image = thickness*processor.mu
+
+        Notes
+        -----
+        If :code:`'alpha'` is specified in override_filter the new value will 
+        be used and delta will be ignored but beta will still be used to 
+        calculate :math:`\mu = \frac{4\pi\beta}{\lambda}` which is used for 
+        scaling the thickness, therefore it is only recommended to specify 
+        alpha when also using :code:`get_output(full_retrieval=False)`, or 
+        re-scaling the result by :math:`\mu` e.g. 
+        :code:`thickness*processor.mu` If :code:`alpha` is not specified, 
+        it will be calculated :math:`\frac{\Delta\delta\lambda}{4\pi\beta}`
+
+        '''
+        self.override_geometry = override_geometry
+        self.override_filter = override_filter
+        
+        return super().get_output(out)
+    
+    def __call__(self, x, out=None, override_geometry=None, 
+                 override_filter=None):
+        self.set_input(x)
+
+        if out is None:
+            out = self.get_output(override_geometry=override_geometry, 
+                                  override_filter=override_filter)
+        else:
+            self.get_output(out=out, override_geometry=override_geometry, 
+                            override_filter=override_filter)
+
+        return out
+
+    def _set_geometry(self, geometry, override_geometry=None):
+        '''
+        Function to set the geometry parameters for the processor. Values are 
+        from the data geometry unless the geometry is overridden with an 
+        override_geometry dictionary.
+        '''
+        
+        parameters = ['magnification', 'propagation_distance', 'pixel_size']
+        # specify parameter names as defined in geometry
+        geometry_parameters = ['magnification', 'dist_center_detector', 
+                               ('pixel_size_h', 'pixel_size_v')]
+        # specify if parameter requires unit conversion
+        convert_units = [False, True, True]
+        
+        if override_geometry is None:
+            override_geometry = {}
+
+        # get and check parameters from over-ride geometry dictionary
+        for parameter in override_geometry.keys():
+            if parameter not in parameters:
+                raise ValueError('Parameter {} not recognised, expected one of\
+                                 {}.'.format(parameter, parameters))
+            elif (override_geometry[parameter] is None) \
+                | (override_geometry[parameter] == 0):
+                raise ValueError("Parameter {} cannot be {}, please update \
+                                 data.geometry.{} or over-ride with \
+                                 processor.get_output(override_geometry= \
+                                 {{ '{}' : value }} )"\
+                    .format(parameter, str(getattr(self, parameter)), 
+                            geometry_parameters[i], parameter))
+            else:
+                self.__setattr__(parameter, override_geometry[parameter])
+
+
+        # get and check parameters from geometry if they are not in the 
+        # over-ride geometry dictionary
+        for i, parameter in enumerate(parameters):
+            if parameter not in override_geometry:
+                if type(geometry_parameters[i])==tuple:
+                    param1 = getattr(geometry, geometry_parameters[i][0])
+                    param2 = getattr(geometry, geometry_parameters[i][1])
+                    if (param1 - param2) / (param1 + param2) >= 1e-5:
+                        raise ValueError("Parameter {} is not homogeneous up \
+                                         to 1e-5: got {} and {}, please update\
+                                          geometry using data.geometry.{} and \
+                                         data.geometry.{} or over-ride with \
+                                         processor.get_output(\
+                                         override_geometry={{ '{}' : value }})"
+                                         .format(parameter, str(param1), 
+                                                 str(param2), 
+                                                 geometry_parameters[i][0], 
+                                                 geometry_parameters[i][1], 
+                                                 parameter))
+                else:
+                    param1 = getattr(geometry, geometry_parameters[i])
+                
+                if (param1 is None) | (param1 == 0):
+                    raise ValueError("Parameter {} cannot be {}, please update\
+                                      data.geometry.{} or over-ride with \
+                                     processor.get_output(override_geometry\
+                                     ={{ '{}' : value }} )"
+                                     .format(parameter, str(param1), 
+                                             str(geometry_parameters[i]),
+                                             parameter))
+                else:
+                    if convert_units[i]:
+                        param1 = self._convert_units(param1, 'distance',
+                                                       geometry.config.units, 
+                                                       self.return_units)
+                    self.__setattr__(parameter, param1)
+
+        
+    def _create_filter(self, override_filter=None):
+        '''
+        Function to create the Paganin filter, either using the paganin [1] or 
+        generalised paganin [2] method
+        The filter is created on a mesh in Fourier space kx, ky
+        [1] https://doi.org/10.1046/j.1365-2818.2002.01010.x
+        [2] https://iopscience.iop.org/article/10.1088/2040-8986/abbab9 
+        '''
+        if override_filter is None:
+            override_filter = {}
+
+        # update any parameter which has been over-ridden with override_filter
+        if ('alpha' in override_filter) & ('delta' in override_filter):
+            log.warning(msg="Because you specified alpha, it will not be \
+                        calculated and therefore delta will be ignored")
+
+        if ('delta' in override_filter):
+            self.delta = override_filter['delta']
+        else:
+            self.delta = self._delta_user
+        
+        if ('beta' in override_filter):
+            self.beta = override_filter['beta']
+        else:
+            self.beta = self._beta_user
+
+        self._calculate_mu()
+
+        if ('alpha' in override_filter):
+            self.alpha = override_filter['alpha']
+        else:
+            self._calculate_alpha()
+            
+        # create the Fourier mesh
+        kx,ky = np.meshgrid( 
+            np.arange(-self.filter_Nx/2, self.filter_Nx/2, 1, dtype=np.float64) 
+            * (2*np.pi)/(self.filter_Nx*self.pixel_size),
+            np.arange(-self.filter_Ny/2, self.filter_Ny/2, 1, dtype=np.float64) 
+            * (2*np.pi)/(self.filter_Ny*self.pixel_size),
+            sparse=False, 
+            indexing='ij'
+            )
+        
+        # create the filter using either paganin or generalised paganin method
+        if self.filter_type == 'paganin_method':
+            self.filter =  ifftshift(1/(1. + self.alpha*(kx**2 + ky**2)))
+        elif self.filter_type == 'generalised_paganin_method':       
+            self.filter =  ifftshift(1/(1. - (2*self.alpha/self.pixel_size**2)
+                                        *(np.cos(self.pixel_size*kx) 
+                                          + np.cos(self.pixel_size*ky) -2)))
+        else:
+            raise ValueError("filter_type not recognised: got {0} expected one\
+                              of 'paganin_method' or \
+                             'generalised_paganin_method'"
+                             .format(self.filter_type))
+        
+    def _calculate_mu(self):
+        '''
+        Function to calculate the linear attenutation coefficient mu
+        '''
+        self.mu = 4.0*np.pi*self.beta/self.wavelength
+
+    def _calculate_alpha(self):
+        '''
+        Function to calculate alpha, a constant defining the Paganin filter 
+        strength
+        '''
+        self.alpha = self.propagation_distance*self.delta/self.mu
+    
+    def _energy_to_wavelength(self, energy, energy_units, return_units):
+        '''
+        Function to convert photon energy in eV to wavelength in return_units
+        
+        Parameters
+        ----------
+        energy: float
+            Photon energy
+        
+        energy_units
+            Energy units
+
+        return_units
+            Distance units in which to return the wavelength
+        
+        Returns
+        -------
+        float
+            Photon wavelength in return_units
+        '''
+        top = self._convert_units(constants.h*constants.speed_of_light, 
+                                    'distance', 'm', return_units)
+        bottom = self._convert_units(energy, 'energy', energy_units, 'J')
+
+        return top/bottom
+    
+    def _convert_units(self, value, unit_type, input_unit, output_unit):
+        unit_types = ['distance','energy','angle']
+
+        if unit_type == unit_types[0]:
+            unit_list = ['m','cm','mm','um']
+            unit_multipliers = [1.0, 1e-2, 1e-3, 1e-6]
+        elif unit_type == unit_types[1]:
+            unit_list = ['meV', 'eV', 'keV', 'MeV', 'J']
+            unit_multipliers = [1e-3, 1, 1e3, 1e6, 1/constants.eV]
+        elif unit_type == unit_types[2]:
+            unit_list = ['deg', 'rad']
+            unit_multipliers = [1, np.rad2deg(1)]
+        else:
+            raise ValueError("Unit type '{}' not recognised, must be one of {}"
+                            .format(unit_type, unit_types))
+
+        for x in [input_unit, output_unit]:
+            if x not in unit_list:
+                raise ValueError("Unit '{}' not recognised, must be one of {}.\
+                                 \nGeometry units can be updated using geometry.config.units"
+                                 .format(x, unit_list))
+            
+        return value*unit_multipliers[unit_list.index(input_unit)]\
+            /unit_multipliers[unit_list.index(output_unit)]
\ No newline at end of file
diff --git a/Wrappers/Python/cil/processors/__init__.py b/Wrappers/Python/cil/processors/__init__.py
index baae130b25..15ba249e74 100644
--- a/Wrappers/Python/cil/processors/__init__.py
+++ b/Wrappers/Python/cil/processors/__init__.py
@@ -26,3 +26,4 @@
 from .TransmissionAbsorptionConverter import TransmissionAbsorptionConverter
 from .Masker import Masker
 from .Padder import Padder
+from .PaganinProcessor import PaganinProcessor
\ No newline at end of file
diff --git a/Wrappers/Python/test/test_AcquisitionGeometry.py b/Wrappers/Python/test/test_AcquisitionGeometry.py
index c4fd162081..30a8853dc7 100644
--- a/Wrappers/Python/test/test_AcquisitionGeometry.py
+++ b/Wrappers/Python/test/test_AcquisitionGeometry.py
@@ -704,7 +704,8 @@ def test_get_centre_slice(self):
     def test_calculate_magnification(self):
         AG = AcquisitionGeometry.create_Parallel2D()
         out = AG.config.system.calculate_magnification()
-        self.assertEqual(out, [None, None, 1])
+        detector_position = np.array(AG.config.system.detector.position)
+        self.assertEqual(out, [None, float(np.sqrt(detector_position.dot(detector_position))), 1])
 
     def test_calculate_centre_of_rotation(self):
         AG = AcquisitionGeometry.create_Parallel2D()
@@ -858,7 +859,8 @@ def test_get_centre_slice(self):
     def test_calculate_magnification(self):
         AG = AcquisitionGeometry.create_Parallel3D()
         out = AG.config.system.calculate_magnification()
-        self.assertEqual(out, [None, None, 1])
+        detector_position = np.array(AG.config.system.detector.position)
+        self.assertEqual(out, [None, float(np.sqrt(detector_position.dot(detector_position))), 1])
 
     def test_calculate_centre_of_rotation(self):
 
diff --git a/Wrappers/Python/test/test_DataProcessor.py b/Wrappers/Python/test/test_DataProcessor.py
index a7516d8a65..a821bba105 100644
--- a/Wrappers/Python/test/test_DataProcessor.py
+++ b/Wrappers/Python/test/test_DataProcessor.py
@@ -22,6 +22,7 @@
 from cil.framework import ImageGeometry, VectorGeometry, AcquisitionGeometry
 from cil.framework import ImageData, AcquisitionData
 from cil.utilities import dataexample
+from cil.utilities import quality_measures
 
 from cil.framework import AX, CastDataContainer, PixelByPixelDataProcessor
 from cil.recon import FBP
@@ -29,9 +30,12 @@
 from cil.processors import CentreOfRotationCorrector
 from cil.processors.CofR_xcorrelation import CofR_xcorrelation
 from cil.processors import TransmissionAbsorptionConverter, AbsorptionTransmissionConverter
-from cil.processors import Slicer, Binner, MaskGenerator, Masker, Padder
+from cil.processors import Slicer, Binner, MaskGenerator, Masker, Padder, PaganinProcessor
 import gc
 
+from scipy import constants
+from scipy.fft import ifftshift
+
 from utils import has_astra, has_tigre, has_nvidia, has_tomophantom, initialise_tests, has_ipp
 
 initialise_tests()
@@ -2756,11 +2760,319 @@ def Masker_check(self, mask, data, data_init):
         data_test = data.copy().as_array()
         data_test[2,3] = (data_test[1,3] + data_test[3,3]) / 2
         data_test[4,5] = (data_test[3,5] + data_test[5,5]) / 2
+        
+        numpy.testing.assert_allclose(res.as_array(), data_test, rtol=1E-6)  
 
-        numpy.testing.assert_allclose(res.as_array(), data_test, rtol=1E-6)
+
+class TestPaganinProcessor(unittest.TestCase):
+
+    def setUp(self):
+        self.data_parallel = dataexample.SIMULATED_PARALLEL_BEAM_DATA.get()
+        self.data_cone = dataexample.SIMULATED_CONE_BEAM_DATA.get()
+        ag = AcquisitionGeometry.create_Parallel3D()\
+            .set_angles(numpy.linspace(0,360,360,endpoint=False))\
+            .set_panel([128,128],0.1)\
+            .set_channels(4)
+
+        self.data_multichannel = ag.allocate('random')
+
+    def error_message(self,processor, test_parameter):
+            return "Failed with processor " + str(processor) + " on test parameter " + test_parameter
+
+    def test_PaganinProcessor_init(self):
+        # test default values are initialised
+        processor = PaganinProcessor()
+        test_parameter = ['energy', 'wavelength', 'delta', 'beta', 'full_retrieval', 
+                          'filter_type', 'pad', 'return_units']
+        test_value = [40000, 1e2*(constants.h*constants.speed_of_light)/(40000*constants.electron_volt), 
+                      1, 1e-2, True, 'paganin_method', 0, 'cm']
+
+        for i in numpy.arange(len(test_value)):
+            self.assertEqual(getattr(processor,test_parameter[i]), test_value[i], msg=self.error_message(processor, test_parameter[i]))
+
+        # test non-default values are initialised
+        processor = PaganinProcessor(1, 2, 3, 'keV', False, 'string', 19, 'mm')
+        test_value = [3, 1e3*(constants.h*constants.speed_of_light)/(3000*constants.electron_volt), 1, 2, False, 'string', 19, 'mm']
+        for i in numpy.arange(len(test_value)):
+            self.assertEqual(getattr(processor,test_parameter[i]), test_value[i], msg=self.error_message(processor, test_parameter[i]))
+
+        with self.assertRaises(ValueError):
+            processor = PaganinProcessor(return_units='string')
+
+    def test_PaganinProcessor_energy_to_wavelength(self):
+        processor = PaganinProcessor()
+        wavelength = processor._energy_to_wavelength(10, 'meV', 'mm')
+        self.assertAlmostEqual(wavelength, 0.12398419)
 
 
+    def test_PaganinProcessor_check_input(self):
+        processor = PaganinProcessor()
+        for data in [self.data_cone, self.data_parallel, self.data_multichannel]:
+            processor.set_input(data)
+            data2 = processor.get_input()
+            numpy.testing.assert_allclose(data2.as_array(), data.as_array())
 
+            # check there is an error when the wrong data type is input
+            with self.assertRaises(TypeError):
+                processor.set_input(data.geometry)
+
+            with self.assertRaises(TypeError):
+                processor.set_input(data.as_array())
+
+            dc = DataContainer(data.as_array())
+            with self.assertRaises(TypeError):
+                processor.set_input(dc)
+
+
+    def test_PaganinProcessor_set_geometry(self):
+        processor = PaganinProcessor()
+        data = self.data_cone
+        # check there is an error when the data geometry does not have units
+        processor.set_input(data)
+        with self.assertRaises(ValueError):
+            processor._set_geometry(data.geometry, None)
+        
+        # check there is no error when the geometry unit is provided
+        data.geometry.config.units = 'um'
+        processor._set_geometry(data.geometry, None)
+        multiplier = 1e-4 # convert um to return units cm
+        
+        # check the processor finds the correct geometry values, scaled by the units
+        self.assertAlmostEqual(processor.propagation_distance, data.geometry.dist_center_detector*multiplier, 
+                         msg=self.error_message(processor, 'propagation_distance'))
+        self.assertEqual(processor.magnification, data.geometry.magnification, 
+                         msg=self.error_message(processor, 'magnification'))
+        self.assertAlmostEqual(processor.pixel_size, data.geometry.pixel_size_h*multiplier, 
+                         msg=self.error_message(processor, 'pixel_size'))
+                
+        # check there is an error when the data geometry does not have propagation distance, and it is not provided in override geometry
+        processor.set_input(self.data_parallel)
+        with self.assertRaises(ValueError):
+            processor._set_geometry(self.data_parallel.geometry, None)
+        
+        # check override_geometry
+        for data in [self.data_parallel, self.data_cone, self.data_multichannel]:
+            processor.set_input(data)
+            processor._set_geometry(self.data_cone.geometry, override_geometry={'propagation_distance':1,'magnification':2, 'pixel_size':3})
+            
+            self.assertEqual(processor.propagation_distance, 1, 
+                            msg=self.error_message(processor, 'propagation_distance'))
+            self.assertEqual(processor.magnification, 2, 
+                            msg=self.error_message(processor, 'magnification'))
+            self.assertEqual(processor.pixel_size, 3, 
+                            msg=self.error_message(processor, 'pixel_size'))
+        
+        # check the processor goes back to values from geometry if the geometry over-ride is not passed
+        processor.set_input(self.data_cone)
+        processor._set_geometry(self.data_cone.geometry)
+        self.assertAlmostEqual(processor.propagation_distance, self.data_cone.geometry.dist_center_detector*multiplier, 
+                        msg=self.error_message(processor, 'propagation_distance'))
+        self.assertEqual(processor.magnification, self.data_cone.geometry.magnification, 
+                        msg=self.error_message(processor, 'magnification'))
+        self.assertAlmostEqual(processor.pixel_size, self.data_cone.geometry.pixel_size_h*multiplier, 
+                        msg=self.error_message(processor, 'pixel_size'))
+        
+        processor.set_input(self.data_parallel)
+        with self.assertRaises(ValueError):
+            processor._set_geometry(self.data_parallel.geometry)
+
+        # check there is an error when the pixel_size_h and pixel_size_v are different
+        self.data_parallel.geometry.pixel_size_h = 9
+        self.data_parallel.geometry.pixel_size_h = 10
+        with self.assertRaises(ValueError):
+            processor._set_geometry(self.data_parallel.geometry, override_geometry={'propagation_distance':1})
+
+    def test_PaganinProcessor_create_filter(self):
+        image = self.data_cone.get_slice(angle=0).as_array()
+        Nx, Ny = image.shape
+        
+        delta = 1
+        beta = 2
+        energy = 3
+        processor =  PaganinProcessor(delta=delta, beta=beta, energy=energy, return_units='m')
+
+        # check alpha and mu are calculated correctly
+        wavelength = (constants.h*constants.speed_of_light)/(energy*constants.electron_volt)
+        mu = 4.0*numpy.pi*beta/(wavelength)
+        alpha = 60000*delta/mu
+
+        self.data_cone.geometry.config.units='m'
+        processor.set_input(self.data_cone)
+        processor._set_geometry(self.data_cone.geometry)
+        processor.filter_Nx = Nx
+        processor.filter_Ny = Ny
+        processor._create_filter()
+        
+        self.assertEqual(processor.alpha, alpha, msg=self.error_message(processor, 'alpha'))
+        self.assertEqual(processor.mu, mu, msg=self.error_message(processor, 'mu'))
+        
+        kx,ky = numpy.meshgrid( 
+            numpy.arange(-Nx/2, Nx/2, 1, dtype=numpy.float64) * (2*numpy.pi)/(Nx*self.data_cone.geometry.pixel_size_h),
+            numpy.arange(-Ny/2, Ny/2, 1, dtype=numpy.float64) * (2*numpy.pi)/(Nx*self.data_cone.geometry.pixel_size_h),
+            sparse=False, 
+            indexing='ij'
+            )
+        
+        # check default filter is created with paganin_method
+        filter =  ifftshift(1/(1. + alpha*(kx**2 + ky**2)))
+        numpy.testing.assert_allclose(processor.filter, filter)
+
+        # check generalised_paganin_method
+        processor = PaganinProcessor(delta=delta, beta=beta, energy=energy, filter_type='generalised_paganin_method', return_units='m')
+        processor.set_input(self.data_cone)
+        processor._set_geometry(self.data_cone.geometry)
+        processor.filter_Nx = Nx
+        processor.filter_Ny = Ny
+        processor._create_filter()
+        filter = ifftshift(1/(1. - (2*alpha/self.data_cone.geometry.pixel_size_h**2)*(numpy.cos(self.data_cone.geometry.pixel_size_h*kx) + numpy.cos(self.data_cone.geometry.pixel_size_h*ky) -2)))
+        numpy.testing.assert_allclose(processor.filter, filter)
+
+        # check unknown method raises error
+        processor =  PaganinProcessor(delta=delta, beta=beta, energy=energy, filter_type='unknown_method', return_units='m')
+        processor.set_input(self.data_cone)
+        processor._set_geometry(self.data_cone.geometry)
+        processor.filter_Nx = Nx
+        processor.filter_Ny = Ny
+        with self.assertRaises(ValueError):
+            processor._create_filter()
+
+        # check parameter override 
+        processor =  PaganinProcessor(delta=delta, beta=beta, energy=energy, return_units='m')
+        processor.set_input(self.data_cone)
+        processor._set_geometry(self.data_cone.geometry)
+        delta = 100
+        beta=200
+        processor.filter_Nx = Nx
+        processor.filter_Ny = Ny
+        processor._create_filter(override_filter={'delta':delta, 'beta':beta})
+        
+        # check alpha and mu are calculated correctly
+        wavelength = (constants.h*constants.speed_of_light)/(energy*constants.electron_volt)
+        mu = 4.0*numpy.pi*beta/(wavelength)
+        alpha = 60000*delta/mu
+        self.assertEqual(processor.delta, delta, msg=self.error_message(processor, 'delta'))
+        self.assertEqual(processor.beta, beta, msg=self.error_message(processor, 'beta'))
+        self.assertEqual(processor.alpha, alpha, msg=self.error_message(processor, 'alpha'))
+        self.assertEqual(processor.mu, mu, msg=self.error_message(processor, 'mu'))
+        filter =  ifftshift(1/(1. + alpha*(kx**2 + ky**2)))
+        numpy.testing.assert_allclose(processor.filter, filter)
+        
+        # test specifying alpha, delta and beta
+        delta = 12
+        beta = 13
+        alpha = 14
+        processor.filter_Nx = Nx
+        processor.filter_Ny = Ny
+        with self.assertLogs(level='WARN') as log:
+            processor._create_filter(override_filter = {'delta':delta, 'beta':beta, 'alpha':alpha})
+        wavelength = (constants.h*constants.speed_of_light)/(energy*constants.electron_volt)
+        mu = 4.0*numpy.pi*beta/(wavelength)
+        
+        self.assertEqual(processor.delta, delta, msg=self.error_message(processor, 'delta'))
+        self.assertEqual(processor.beta, beta, msg=self.error_message(processor, 'beta'))
+        self.assertEqual(processor.alpha, alpha, msg=self.error_message(processor, 'alpha'))
+        self.assertEqual(processor.mu, mu, msg=self.error_message(processor, 'mu'))
+        filter =  ifftshift(1/(1. + alpha*(kx**2 + ky**2)))
+        numpy.testing.assert_allclose(processor.filter, filter)
+
+    def test_PaganinProcessor(self):
+
+        wavelength = (constants.h*constants.speed_of_light)/(40000*constants.electron_volt)
+        mu = 4.0*numpy.pi*1e-2/(wavelength)        
+
+        data_array = [self.data_cone, self.data_parallel, self.data_multichannel]
+        for data in data_array:
+            data.geometry.config.units = 'm'
+            data_abs = -(1/mu)*numpy.log(data)
+            processor = PaganinProcessor(full_retrieval=True)
+            processor.set_input(data)
+            thickness = processor.get_output(override_geometry={'propagation_distance':1})
+            self.assertLessEqual(quality_measures.mse(thickness, data_abs), 1e-5)
+            processor = PaganinProcessor(full_retrieval=False)
+            processor.set_input(data)
+            filtered_image = processor.get_output(override_geometry={'propagation_distance':1})
+            self.assertLessEqual(quality_measures.mse(filtered_image, data), 1e-5)
+
+            # test with GPM
+            processor = PaganinProcessor(full_retrieval=True, filter_type='generalised_paganin_method')
+            processor.set_input(data)
+            thickness = processor.get_output(override_geometry={'propagation_distance':1})
+            self.assertLessEqual(quality_measures.mse(thickness, data_abs), 1e-5)
+            processor = PaganinProcessor(full_retrieval=False, filter_type='generalised_paganin_method')
+            processor.set_input(data)
+            filtered_image = processor.get_output(override_geometry={'propagation_distance':1})
+            self.assertLessEqual(quality_measures.mse(filtered_image, data), 1e-5)
+
+            # test with padding
+            processor = PaganinProcessor(full_retrieval=True, pad=10)
+            processor.set_input(data)
+            thickness = processor.get_output(override_geometry={'propagation_distance':1})
+            self.assertLessEqual(quality_measures.mse(thickness, data_abs), 1e-5)
+            processor = PaganinProcessor(full_retrieval=False, pad=10)
+            processor.set_input(data)
+            filtered_image = processor.get_output(override_geometry={'propagation_distance':1})
+            self.assertLessEqual(quality_measures.mse(filtered_image, data), 1e-5)
+
+            # test in-line
+            thickness_inline = PaganinProcessor(full_retrieval=True, pad=10)(data, override_geometry={'propagation_distance':1})
+            numpy.testing.assert_allclose(thickness.as_array(), thickness_inline.as_array())
+            filtered_image_inline = PaganinProcessor(full_retrieval=False, pad=10)(data, override_geometry={'propagation_distance':1})
+            numpy.testing.assert_allclose(filtered_image.as_array(), filtered_image_inline.as_array())
+
+            # check with different data order
+            data.reorder('astra')
+            data_abs = -(1/mu)*numpy.log(data)
+            processor = PaganinProcessor(full_retrieval=True, pad=10)
+            processor.set_input(data)
+            with self.assertLogs(level='WARN') as log:
+                thickness = processor.get_output(override_geometry={'propagation_distance':1})
+            self.assertLessEqual(quality_measures.mse(thickness, data_abs), 1e-5)
+            processor = PaganinProcessor(full_retrieval=False, pad=10)
+            processor.set_input(data)
+            with self.assertLogs(level='WARN') as log:
+                filtered_image = processor.get_output(override_geometry={'propagation_distance':1})
+            self.assertLessEqual(quality_measures.mse(filtered_image, data), 1e-5)
+            
+            # check with different channel data order
+            if data.geometry.channels>1:
+                data.reorder(('vertical','channel','horizontal','angle'))
+                data_abs = -(1/mu)*numpy.log(data)
+                processor = PaganinProcessor(full_retrieval=True, pad=10)
+                processor.set_input(data)
+                with self.assertLogs(level='WARN') as log:
+                    thickness = processor.get_output(override_geometry={'propagation_distance':1})
+                self.assertLessEqual(quality_measures.mse(thickness, data_abs), 1e-5)
+                processor = PaganinProcessor(full_retrieval=False, pad=10)
+                processor.set_input(data)
+                with self.assertLogs(level='WARN') as log:
+                    filtered_image = processor.get_output(override_geometry={'propagation_distance':1})
+                self.assertLessEqual(quality_measures.mse(filtered_image, data), 1e-5)
+
+    def test_PaganinProcessor_2D(self):
+        self.data_parallel.geometry.config.units = 'm'
+        data_slice = self.data_parallel.get_slice(vertical=10)
+        wavelength = (constants.h*constants.speed_of_light)/(40000*constants.electron_volt)
+        mu = 4.0*numpy.pi*1e-2/(wavelength) 
+        thickness = -(1/mu)*numpy.log(data_slice)
+
+        processor = PaganinProcessor(pad=10)
+        processor.set_input(data_slice)
+        output = processor.get_output(override_geometry={'propagation_distance':1})
+        self.assertLessEqual(quality_measures.mse(output, thickness), 0.05)
+
+        # check with different data order
+        data_slice.reorder(('horizontal','angle'))
+        wavelength = (constants.h*constants.speed_of_light)/(40000*constants.electron_volt)
+        mu = 4.0*numpy.pi*1e-2/(wavelength) 
+        thickness = -(1/mu)*numpy.log(data_slice)
+
+        processor = PaganinProcessor(pad=10)
+        processor.set_input(data_slice)
+        output = processor.get_output(override_geometry={'propagation_distance':1})
+        self.assertLessEqual(quality_measures.mse(output, thickness), 0.05)
+
+        # 'horizontal, vertical, angles
+        
 if __name__ == "__main__":
 
     d = TestDataProcessor()
diff --git a/Wrappers/Python/test/test_PluginsRegularisation.py b/Wrappers/Python/test/test_PluginsRegularisation.py
index 05370c82f2..c2e64f6843 100644
--- a/Wrappers/Python/test/test_PluginsRegularisation.py
+++ b/Wrappers/Python/test/test_PluginsRegularisation.py
@@ -35,7 +35,7 @@
 class TestPlugin(unittest.TestCase):
     def setUp(self):
         #Default test image
-        self.data = dataexample.SIMPLE_PHANTOM_2D.get(size=(64,64))
+        self.data = dataexample.SIMPLE_PHANTOM_2D.get(size=(64,30))
         self.alpha = 2.0
         self.iterations = 1000
 
@@ -46,7 +46,7 @@ def tearDown(self):
 
     @unittest.skipUnless(has_ccpi_regularisation, "Skipping as CCPi Regularisation Toolkit is not installed")
     def test_FGP_TV_complex(self):
-        data = dataexample.CAMERA.get(size=(256,256))
+        data = dataexample.CAMERA.get(size=(256,100))
         datarr = data.as_array()
         cmpx = np.zeros(data.shape, dtype=np.complex64)
         cmpx.real = datarr[:]
@@ -90,7 +90,7 @@ def test_FGP_TGV_rmul(self):
 
     @unittest.skipUnless(has_ccpi_regularisation, "Skipping as CCPi Regularisation Toolkit is not installed")
     def test_FGP_dTV_rmul(self):
-        data = dataexample.CAMERA.get(size=(256,256))
+        data = dataexample.CAMERA.get(size=(256,100))
         f = FGP_dTV(data)
 
         self.rmul_test(f)
@@ -98,7 +98,7 @@ def test_FGP_dTV_rmul(self):
 
     @unittest.skipUnless(has_ccpi_regularisation, "Skipping as CCPi Regularisation Toolkit is not installed")
     def test_functionality_FGP_TV(self):
-        data = dataexample.CAMERA.get(size=(256,256))
+        data = dataexample.CAMERA.get(size=(256,100))
         datarr = data.as_array()
 
         tau = 1.
@@ -111,7 +111,7 @@ def test_functionality_FGP_TV(self):
 
     @unittest.skipUnless(has_ccpi_regularisation, "Skipping as CCPi Regularisation Toolkit is not installed")
     def test_functionality_TGV(self):
-        data = dataexample.CAMERA.get(size=(256,256))
+        data = dataexample.CAMERA.get(size=(256,100))
         datarr = data.as_array()
 
         tau = 1.
@@ -125,7 +125,7 @@ def test_functionality_TGV(self):
 
     @unittest.skipUnless(has_ccpi_regularisation, "Skipping as CCPi Regularisation Toolkit is not installed")
     def test_functionality_FGP_dTV(self):
-        data = dataexample.CAMERA.get(size=(256,256))
+        data = dataexample.CAMERA.get(size=(256,100))
         datarr = data.as_array()
         ref = data*0.3
 
@@ -160,7 +160,7 @@ def test_functionality_TNV(self):
     @unittest.skipUnless(has_ccpi_regularisation, "Skipping as CCPi Regularisation Toolkit is not installed")
     def test_TNV_raise_on_2D(self):
         # data = dataexample.SYNCHROTRON_PARALLEL_BEAM_DATA.get()
-        data = dataexample.CAMERA.get(size=(256,256))
+        data = dataexample.CAMERA.get(size=(256,100))
         datarr = data.as_array()
 
         tau = 1.
@@ -173,7 +173,7 @@ def test_TNV_raise_on_2D(self):
     @unittest.skipUnless(has_ccpi_regularisation, "Skipping as CCPi Regularisation Toolkit is not installed")
     def test_TNV_raise_on_3D_nochannel(self):
         # data = dataexample.SYNCHROTRON_PARALLEL_BEAM_DATA.get()
-        data = dataexample.CAMERA.get(size=(256,256))
+        data = dataexample.CAMERA.get(size=(256,100))
         datarr = data.as_array()
         tau = 1.
 
diff --git a/Wrappers/Python/test/test_approximate_gradient.py b/Wrappers/Python/test/test_approximate_gradient.py
index 774cdd9fa2..764a2c5067 100644
--- a/Wrappers/Python/test/test_approximate_gradient.py
+++ b/Wrappers/Python/test/test_approximate_gradient.py
@@ -25,10 +25,9 @@
 from cil.framework import VectorData
 
 
-from cil.utilities import dataexample
 from cil.optimisation.functions import LeastSquares
 from cil.optimisation.functions import ApproximateGradientSumFunction
-from cil.optimisation.functions import SGFunction
+from cil.optimisation.functions import SGFunction, SAGFunction, SAGAFunction
 from cil.optimisation.operators import MatrixOperator
 from cil.optimisation.algorithms import GD
 from cil.framework import VectorData
@@ -71,36 +70,60 @@ def test_ABC(self):
                 self.functions, self.sampler)
 
 
-class TestSGD(CCPiTestClass):
     
+class TestApproximateGradientSumFunction(CCPiTestClass):
+
     def setUp(self):
+        self.sampler = Sampler.random_with_replacement(5)
+        self.initial = VectorData(np.zeros(10))
+        self.b = VectorData(np.random.normal(0, 1, 10))
+        self.functions = []
+        for i in range(5):
+            diagonal = np.zeros(10)
+            diagonal[2*i:2*(i+1)] = 1
+            A = MatrixOperator(np.diag(diagonal))
+            self.functions.append(LeastSquares(A, A.direct(self.b)))
+            if i == 0:
+                self.objective = LeastSquares(A, A.direct(self.b))
+            else:
+                self.objective += LeastSquares(A, A.direct(self.b))
+
+    def test_ABC(self):
+        with self.assertRaises(TypeError):
+            self.stochastic_objective = ApproximateGradientSumFunction(
+                self.functions, self.sampler)
+
+
+class approx_gradient_child_class_testing():
+    
+
         
-        
-           
-        self.sampler = Sampler.random_with_replacement(6)
+    def set_up(self):
+        np.random.seed(10)
+        self.sampler = Sampler.random_with_replacement(self.n_subsets)
         self.initial = VectorData(np.zeros(30))
         b = VectorData(np.array(range(30))/50)
-        self.n_subsets = 6
+
         self.f_subsets = []
-        for i in range(6):
+        for i in range(self.n_subsets):
             diagonal = np.zeros(30)
-            diagonal[5*i:5*(i+1)] = 1
+            diagonal[(30//self.n_subsets)*i:(30//self.n_subsets)*(i+1)] = 1
             Ai = MatrixOperator(np.diag(diagonal))
             self.f_subsets.append(LeastSquares(Ai, Ai.direct(b)))
         self.A=MatrixOperator(np.diag(np.ones(30)))
         self.f = LeastSquares(self.A, b)
-        self.f_stochastic = SGFunction(self.f_subsets, self.sampler)
-            
-            
+        
+        self.f_stochastic=self.stochastic_estimator(self.f_subsets, self.sampler)
 
     def test_approximate_gradient_not_equal_full(self):
+        self.f_stochastic.gradient(self.initial)
         self.assertFalse((self.f_stochastic.full_gradient(
-            self.initial) == self.f_stochastic.gradient(self.initial).array).all())
+            self.initial+1) == self.f_stochastic.gradient(self.initial+1).array).all())
 
     
     def test_sampler(self):
         self.assertTrue(isinstance(self.f_stochastic.sampler, SamplerRandom))
-        f = SGFunction(self.f_subsets)
+        f = self.stochastic_estimator(self.f_subsets) 
         self.assertTrue(isinstance(f.sampler, SamplerRandom))
         self.assertEqual(f.sampler._type, 'random_with_replacement')
 
@@ -117,13 +140,13 @@ def test_full_gradient(self):
     
     def test_value_error_with_only_one_function(self):
         with self.assertRaises(ValueError):
-            SGFunction([self.f], self.sampler)
+            self.stochastic_estimator([self.f], self.sampler)
             pass
 
     
     def test_type_error_if_functions_not_a_list(self):
         with self.assertRaises(TypeError):
-            SGFunction(self.f, self.sampler)
+            self.stochastic_estimator(self.f, self.sampler)
 
     
     def test_sampler_without_next(self):
@@ -132,16 +155,60 @@ def init(self):
                 pass
         bad_sampler = bad_Sampler()
         with self.assertRaises(ValueError):
-            SGFunction([self.f, self.f], bad_sampler)
+            self.stochastic_estimator([self.f, self.f], bad_sampler)
 
     
     def test_sampler_out_of_range(self):
-        bad_sampler = Sampler.sequential(10)
-        f = SGFunction([self.f, self.f], bad_sampler)
+        def g(index):
+            return -2
+        bad_sampler = Sampler.from_function(12,g)
+        f = self.stochastic_estimator([self.f]*10, bad_sampler)
         with self.assertRaises(IndexError):
             f.gradient(self.initial)
             f.gradient(self.initial)
-            f.gradient(self.initial)
+  
+    @unittest.skipUnless(has_cvxpy, "CVXpy not installed") 
+    def test_toy_example(self):
+        sampler = Sampler.random_with_replacement(5)
+        initial = VectorData(np.zeros(25))
+        b = VectorData(np.array(range(25)))
+        functions = []
+        for i in range(5):
+            diagonal = np.zeros(25)
+            diagonal[5*i:5*(i+1)] = 1
+            A = MatrixOperator(np.diag(diagonal))
+            functions.append(0.5*LeastSquares(A, A.direct(b)))
+            
+        Aop=MatrixOperator(np.diag(np.ones(25)))
+
+        u_cvxpy = cvxpy.Variable(b.shape[0])
+        objective = cvxpy.Minimize( 0.5*cvxpy.sum_squares(Aop.A @ u_cvxpy - Aop.direct(b).array))
+        p = cvxpy.Problem(objective)
+        p.solve(verbose=True, solver=cvxpy.SCS, eps=1e-4) 
+        
+        
+
+        stochastic_objective = self.stochastic_estimator(functions, sampler)
+
+        alg_stochastic = GD(initial=initial,
+                            objective_function=stochastic_objective, update_objective_interval=1000,
+                            step_size=0.05)
+        alg_stochastic.run(600, verbose=0)
+        
+        np.testing.assert_allclose(p.value ,stochastic_objective(alg_stochastic.x) , atol=1e-1)
+        self.assertNumpyArrayAlmostEqual(
+            alg_stochastic.x.as_array(), u_cvxpy.value, 3)
+        self.assertNumpyArrayAlmostEqual(
+            alg_stochastic.x.as_array(), b.as_array(), 3)
+        
+        
+class TestSGD(CCPiTestClass, approx_gradient_child_class_testing):
+    
+    def setUp(self):
+        self.stochastic_estimator=SGFunction
+        self.n_subsets=6
+        self.set_up()
+        
             
     def test_partition_weights(self):
         f_stochastic=SGFunction(self.f_subsets, Sampler.sequential(self.n_subsets))
@@ -160,39 +227,67 @@ def test_partition_weights(self):
             f_stochastic.gradient(self.initial)
             self.assertEqual(f_stochastic.data_passes[i], f_stochastic.data_passes[i-1]+a[i%self.n_subsets])
         
-        
+
+
+
+class TestSAG(CCPiTestClass, approx_gradient_child_class_testing): 
     
-  
+    def setUp(self):
+        self.stochastic_estimator=SAGFunction
+        self.n_subsets=6
+        self.set_up()
     
-    @unittest.skipUnless(has_cvxpy, "CVXpy not installed") 
-    def test_SGD_toy_example(self):
-        sampler = Sampler.random_with_replacement(5)
-        initial = VectorData(np.zeros(25))
-        b = VectorData(np.array(range(25)))
-        functions = []
-        for i in range(5):
-            diagonal = np.zeros(25)
-            diagonal[5*i:5*(i+1)] = 1
-            A = MatrixOperator(np.diag(diagonal))
-            functions.append(0.5*LeastSquares(A, A.direct(b)))
-            
-        Aop=MatrixOperator(np.diag(np.ones(25)))
+    
+
 
+    
+    def test_warm_start_and_data_passes(self):
+     
+        f1=SAGFunction(self.f_subsets,Sampler.sequential(self.n_subsets))
+        f=SAGFunction(self.f_subsets,Sampler.sequential(self.n_subsets))
+        f.warm_start_approximate_gradients(self.initial)
+        f1.gradient(self.initial)
+        f.gradient(self.initial)
+        self.assertEqual(f.function_num, 0)
+        self.assertEqual(f1.function_num, 0)
+        self.assertNumpyArrayAlmostEqual(np.array(f1.data_passes), np.array([1./f1.num_functions]))
+        self.assertNumpyArrayAlmostEqual(np.array(f.data_passes), np.array([ 1+1./f1.num_functions]))
+        self.assertNumpyArrayAlmostEqual(np.array(f.data_passes_indices[0]), np.array(list(range(f1.num_functions))+ [0]))
+        self.assertNumpyArrayAlmostEqual(np.array(f1.data_passes_indices[0]), np.array([0]))
+        self.assertNumpyArrayAlmostEqual(f._list_stored_gradients[0].array, f1._list_stored_gradients[0].array)
+        self.assertNumpyArrayAlmostEqual(f._list_stored_gradients[0].array, self.f_subsets[0].gradient(self.initial).array)
+        self.assertNumpyArrayAlmostEqual(f._list_stored_gradients[1].array, self.f_subsets[1].gradient(self.initial).array)
+        
+        self.assertFalse((f._list_stored_gradients[3].array== f1._list_stored_gradients[3].array).all())
+        
+
+    @unittest.skipUnless(has_cvxpy, "CVXpy not installed") 
+    def test_SAG_toy_example_warm_start(self): 
+        sampler=Sampler.random_with_replacement(3,seed=1)
+        initial = VectorData(np.zeros(21))
+        np.random.seed(4)
+        b =  VectorData(np.random.normal(0,4,21))
+        functions=[]
+        for i in range(3):
+            diagonal=np.zeros(21)
+            diagonal[7*i:7*(i+1)]=1
+            A=MatrixOperator(np.diag(diagonal))
+            functions.append( LeastSquares(A, A.direct(b)))
+       
+        Aop=MatrixOperator(np.diag(np.ones(21)))
+    
         u_cvxpy = cvxpy.Variable(b.shape[0])
         objective = cvxpy.Minimize( 0.5*cvxpy.sum_squares(Aop.A @ u_cvxpy - Aop.direct(b).array))
         p = cvxpy.Problem(objective)
         p.solve(verbose=True, solver=cvxpy.SCS, eps=1e-4) 
         
-        
-        stochastic_objective = SGFunction(functions, sampler)
-
-        alg_stochastic = GD(initial=initial,
-                            objective_function=stochastic_objective, update_objective_interval=1000,
-                            step_size=1/stochastic_objective.L)
-        alg_stochastic.run(600, verbose=0)
-        self.assertAlmostEqual(stochastic_objective.data_passes[-1], 600/5)
-        self.assertListEqual(stochastic_objective.data_passes_indices[-1], [stochastic_objective.function_num])
+        stochastic_objective=SAGFunction(functions, sampler)
+        stochastic_objective.warm_start_approximate_gradients(initial)
 
+        alg_stochastic = GD(initial=initial, 
+                              objective_function=stochastic_objective, update_objective_interval=1000,
+                              step_size=0.05, max_iteration =5000)
+        alg_stochastic.run( 80, verbose=0)
         np.testing.assert_allclose(p.value ,stochastic_objective(alg_stochastic.x) , atol=1e-1)
         self.assertNumpyArrayAlmostEqual(
             alg_stochastic.x.as_array(), u_cvxpy.value, 3)
@@ -200,4 +295,69 @@ def test_SGD_toy_example(self):
             alg_stochastic.x.as_array(), b.as_array(), 3)
 
 
-    
\ No newline at end of file
+    
+   
+class TestSAGA(CCPiTestClass,approx_gradient_child_class_testing):
+    
+    def setUp(self):
+        self.stochastic_estimator=SAGAFunction
+        self.n_subsets=6
+        self.set_up()
+    
+  
+    
+    def test_warm_start_and_data_passes(self):
+     
+        f1=SAGAFunction(self.f_subsets,Sampler.sequential(self.n_subsets))
+        f=SAGAFunction(self.f_subsets,Sampler.sequential(self.n_subsets))
+        f.warm_start_approximate_gradients(self.initial)
+        f1.gradient(self.initial)
+        f.gradient(self.initial)
+        
+        self.assertEqual(f.function_num, 0)
+        self.assertEqual(f1.function_num, 0)
+        self.assertNumpyArrayAlmostEqual(np.array(f1.data_passes), np.array([1./f1.num_functions]))
+        self.assertNumpyArrayAlmostEqual(np.array(f.data_passes), np.array([1+1./f1.num_functions]))
+        self.assertNumpyArrayAlmostEqual(np.array(f.data_passes_indices[0]),np.array( list(range(self.n_subsets))+[0]))
+        self.assertNumpyArrayAlmostEqual(np.array(f1.data_passes_indices[0]), np.array([0]))
+        self.assertNumpyArrayAlmostEqual(f._list_stored_gradients[0].array, f1._list_stored_gradients[0].array)
+        self.assertNumpyArrayAlmostEqual(f._list_stored_gradients[0].array, self.f_subsets[0].gradient(self.initial).array)
+        self.assertNumpyArrayAlmostEqual(f._list_stored_gradients[1].array, self.f_subsets[1].gradient(self.initial).array)
+        
+        self.assertFalse((f._list_stored_gradients[1].array== f1._list_stored_gradients[1].array).all())
+        self.assertNumpyArrayAlmostEqual(f1._list_stored_gradients[1].array, self.initial.array)
+        
+
+
+    @unittest.skipUnless(has_cvxpy, "CVXpy not installed") 
+    def test_SAGA_toy_example_warm_start(self): 
+        sampler=Sampler.random_with_replacement(3,seed=1)
+        initial = VectorData(np.zeros(21))
+        np.random.seed(4)
+        b =  VectorData(np.random.normal(0,4,21))
+        functions=[]
+        for i in range(3):
+            diagonal=np.zeros(21)
+            diagonal[7*i:7*(i+1)]=1
+            A=MatrixOperator(np.diag(diagonal))
+            functions.append( LeastSquares(A, A.direct(b)))
+       
+        Aop=MatrixOperator(np.diag(np.ones(21)))
+    
+        u_cvxpy = cvxpy.Variable(b.shape[0])
+        objective = cvxpy.Minimize( 0.5*cvxpy.sum_squares(Aop.A @ u_cvxpy - Aop.direct(b).array))
+        p = cvxpy.Problem(objective)
+        p.solve(verbose=True, solver=cvxpy.SCS, eps=1e-4) 
+        
+        stochastic_objective=SAGAFunction(functions, sampler)
+        stochastic_objective.warm_start_approximate_gradients(initial)
+
+        alg_stochastic = GD(initial=initial, 
+                              objective_function=stochastic_objective, update_objective_interval=1000,
+                              step_size=0.05, max_iteration =5000)
+        alg_stochastic.run( 100, verbose=0)
+        np.testing.assert_allclose(p.value ,stochastic_objective(alg_stochastic.x) , atol=1e-1)
+        self.assertNumpyArrayAlmostEqual(
+            alg_stochastic.x.as_array(), u_cvxpy.value, 3)
+        self.assertNumpyArrayAlmostEqual(
+            alg_stochastic.x.as_array(), b.as_array(), 3)
diff --git a/Wrappers/Python/test/test_sampler.py b/Wrappers/Python/test/test_sampler.py
index 69c9dd1e1e..8d1e9ba127 100644
--- a/Wrappers/Python/test/test_sampler.py
+++ b/Wrappers/Python/test/test_sampler.py
@@ -128,7 +128,6 @@ def test_from_function(self):
         self.assertListEqual(sampler.prob_weights,  [1]+[0]*39)
         self.assertEqual(sampler.num_indices, 40)
         self.assertEqual(sampler._type, 'from_function')
-
     def test_sequential_iterator_and_get_samples(self):
 
         sampler = Sampler.sequential(10)
diff --git a/docs/docs_environment.yml b/docs/docs_environment.yml
index ea238cc225..df4c5aebf2 100644
--- a/docs/docs_environment.yml
+++ b/docs/docs_environment.yml
@@ -14,7 +14,7 @@
 name: cil_dev
 channels:
   - conda-forge
-  - intel
+  - https://software.repos.intel.com/python/conda
   - ccpi
   - defaults
 dependencies:
diff --git a/docs/source/framework.rst b/docs/source/framework.rst
index 9fdb4a9a76..27f2a45e31 100644
--- a/docs/source/framework.rst
+++ b/docs/source/framework.rst
@@ -174,6 +174,64 @@ This provide a base class that will behave as normal :code:`DataContainer`.
    :members:
    :inherited-members:
 
+Partitioner
+===========
+
+This method partitions an instance of tomography :code:`AcquisitionData` into a number of batches. For example, to use with a stochastic optimisation method. 
+
+The partitioning is done by taking batches of angles and the corresponding data collected by taking projections along these angles. The partitioner method chooses what angles go in which batch depending on the `mode` and takes in an `AquisitionData` object and outputs a `BlockDataContainer` where each element in the block is  `AquisitionData` object with the batch of data and corresponding geometry. 
+We consider a **batch** to be a subset of the :code:`AcquisitionData` and the verb, **to partition**, to be the act of splitting into batches. 
+ 
+
+For example: 
+
+.. code-block :: python
+
+   from cil.utilities import dataexample
+   from cil.plugins.astra.operators import ProjectionOperator
+   
+   # get the data  
+   data = dataexample.SIMULATED_PARALLEL_BEAM_DATA.get()
+   data.reorder('astra')
+   data = data.get_slice(vertical='centre')
+
+   # create the geometries 
+   ag = data.geometry 
+   ig = ag.get_ImageGeometry()
+
+   # partition the data into batches contained in the elements of a BlockDataContainer
+   data_partitioned = data.partition(num_batches=10, mode='staggered') # Choose mode from `sequential`, `staggered` or `random_permutation` 
+   # From the partitioned data build a BlockOperator container the projectors for each batch 
+   A_partitioned = ProjectionOperator(ig, data_partitioned.geometry, device = "cpu")
+
+   print('The total number of angles is ', len(data.geometry.angles))
+   print('The first 30 angles are ', data.geometry.angles[:30])
+
+   print('In batch zero the number of angles is ', len(data_partitioned[0].geometry.angles))
+   print('The angles in batch zero are ', data_partitioned[0].geometry.angles)
+   print('The angles in batch one are ', data_partitioned[1].geometry.angles)
+
+.. code-block :: RST
+
+   The total number of angles is  300
+   The first 30 angles are  [ 0.   1.2  2.4  3.6  4.8  6.   7.2  8.4  9.6 10.8 12.  13.2 14.4 15.6
+   16.8 18.  19.2 20.4 21.6 22.8 24.  25.2 26.4 27.6 28.8 30.  31.2 32.4
+   33.6 34.8]
+   In batch zero the number of angles is  30
+   The angles in batch zero are  [  0.  12.  24.  36.  48.  60.  72.  84.  96. 108. 120. 132. 144. 156.
+   168. 180. 192. 204. 216. 228. 240. 252. 264. 276. 288. 300. 312. 324.
+   336. 348.]
+   The angles in batch one are  [  1.2  13.2  25.2  37.2  49.2  61.2  73.2  85.2  97.2 109.2 121.2 133.2
+   145.2 157.2 169.2 181.2 193.2 205.2 217.2 229.2 241.2 253.2 265.2 277.2
+   289.2 301.2 313.2 325.2 337.2 349.2]
+
+
+The :code:`partition` method is defined as part of:
+
+.. autoclass:: cil.framework.Partitioner
+   :members:
+   
+
 DataOrder
 =========
 .. autoclass:: cil.framework.DataOrder
diff --git a/docs/source/optimisation.rst b/docs/source/optimisation.rst
index 275ccb0b7f..ebda404029 100644
--- a/docs/source/optimisation.rst
+++ b/docs/source/optimisation.rst
@@ -151,8 +151,6 @@ Each iteration considers just one index of the sum, potentially reducing computa
    :inherited-members: run, update_objective_interval, max_iteration
 
 
-
-
 Approximate gradient methods
 ----------------------------------
 
@@ -161,6 +159,8 @@ For example, when :math:`g(x)=0`, the standard Gradient Descent algorithm utilis
 
    .. math::
       x_{k+1}=x_k-\alpha \nabla f(x_k) =x_k-\alpha \sum_{i=0}^{n-1}\nabla f_i(x_k).
+:math:`\nabla f(x_k)=\sum_{i=0}^{n-1}\nabla f_i(x_k)` with :math:`n \nabla f_i(x_k)`, for an index :math:`i` which changes each iteration, leads to the well known stochastic gradient descent algorithm. 
+
 
 Replacing, :math:`\nabla f(x_k)=\sum_{i=0}^{n-1}\nabla f_i(x_k)` with :math:`n \nabla f_i(x_k)`, for an index :math:`i` which changes each iteration, leads to the well known stochastic gradient descent algorithm. 
 
@@ -178,9 +178,9 @@ In a similar way, plugging approximate gradient calculations into deterministic
 +----------------+-------+------------+----------------+
 | SGFunction     | SGD   | Prox-SGD   | Acc-Prox-SGD   |
 +----------------+-------+------------+----------------+
-| SAGFunction\*  | SAG   | Prox-SAG   | Acc-Prox-SAG   |
+| SAGFunction\  | SAG   | Prox-SAG   | Acc-Prox-SAG   |
 +----------------+-------+------------+----------------+
-| SAGAFunction\* | SAGA  | Prox-SAGA  | Acc-Prox-SAGA  |
+| SAGAFunction\ | SAGA  | Prox-SAGA  | Acc-Prox-SAGA  |
 +----------------+-------+------------+----------------+
 | SVRGFunction\* | SVRG  | Prox-SVRG  | Acc-Prox-SVRG  |
 +----------------+-------+------------+----------------+
@@ -228,10 +228,21 @@ The below is an example of Stochastic Gradient Descent built of the SGFunction a
    alg = GD(initial=ig.allocate(0), objective_function=f, step_size=1/f.L)
    alg.run(300)
 
-  
-
 
+Note
+----
+ All the approximate gradients written in CIL are of a similar order of magnitude to the full gradient calculation. For example, in the :code:`SGFunction` we approximate the full gradient by :math:`n\nabla f_i` for an index :math:`i` given by the sampler. 
+ The multiplication by :math:`n` is a choice to more easily allow comparisons between stochastic and non-stochastic methods and between stochastic methods with varying numbers of subsets.
+ The multiplication ensures that the (SAGA, SGD, and SVRG  and LSVRG) approximate gradients are an unbiased estimator of the full gradient ie :math:`\mathbb{E}\left[\tilde\nabla f(x)\right] =\nabla f(x)``.
+  This has an implication when choosing step sizes. For example, a suitable step size for GD with a SGFunction could be 
+  :math:`\propto 1/(L_{max}*n)`, where :math:`L_{max}` is the largest Lipschitz constant of the list of functions in the SGFunction and the additional factor of  :math:`n` reflects this multiplication by  :math:`n` in the approximate gradient. 
 
+  
+Memory requirements
+-------------------
+Note that the approximate gradient methods have different memory requirements:
++ The `SGFunction` has the same requirements as a `SumFunction`, so no increased memory usage
++ `SAGFunction` and `SAGAFunction` both store `n+3` times the image size in memory to store the last calculated gradient for each function in the sum and for intermediary calculations. 
 
 
 Operators
@@ -504,6 +515,21 @@ Stochastic Gradient function
    :members:
    :inherited-members:
 
+SAG function
+-------------
+
+.. autoclass:: cil.optimisation.functions.SAGFunction 
+   :members:
+   :inherited-members:
+
+SAGA function
+--------------
+
+.. autoclass:: cil.optimisation.functions.SAGAFunction 
+   :members:
+   :inherited-members:
+
+
 
 Utilities
 =========
diff --git a/docs/source/processors.rst b/docs/source/processors.rst
index 052589a594..1381a065fc 100644
--- a/docs/source/processors.rst
+++ b/docs/source/processors.rst
@@ -140,4 +140,12 @@ Ring Remover
    :inherited-members: set_input, get_output
 
 
+Paganin Processor
+-----------------
+
+.. autoclass:: cil.processors.PaganinProcessor
+   :exclude-members: check_input, get_input
+   :members:
+   :inherited-members:
+
 :ref:`Return Home <mastertoc>`
diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index 7094ff9660..1f35a41a9c 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -38,7 +38,7 @@ test:
     - tomophantom=2.0.0       # [ linux ]
     - tigre=2.6
     - packaging
-    - ccpi-regulariser=24.0.0 # [ not osx ]
+    - ccpi-regulariser=24.0.1 # [ not osx ]
     - astra-toolbox=2.1=cuda* # [ not osx ]
 
   source_files:
@@ -93,7 +93,7 @@ requirements:
     - tomophantom=2.0.0
     - astra-toolbox>=1.9.9.dev5,<=2.1
     - tigre>=2.4,<=2.6
-    - ccpi-regulariser=24.0.0
+    - ccpi-regulariser=24.0.1
     - ipywidgets
 
 about:
diff --git a/scripts/create_local_env_for_cil_development.sh b/scripts/create_local_env_for_cil_development.sh
index b193b37bb2..8e65ca665f 100755
--- a/scripts/create_local_env_for_cil_development.sh
+++ b/scripts/create_local_env_for_cil_development.sh
@@ -71,11 +71,11 @@ if test -n "$cil_ver"; then
 fi
 
 if test $test_deps = 0; then
-  conda_args+=(-c conda-forge -c intel -c defaults --override-channels)
+  conda_args+=(-c conda-forge -c https://software.repos.intel.com/python/conda -c defaults --override-channels)
 else
   conda_args+=(
     astra-toolbox=2.1=cuda*
-    ccpi-regulariser=24.0.0
+    ccpi-regulariser=24.0.1
     cil-data
     cvxpy
     ipywidgets
@@ -86,7 +86,7 @@ else
     tigre=2.6
     tomophantom=2.0.0
     -c conda-forge
-    -c intel
+    -c https://software.repos.intel.com/python/conda
     -c ccpi/label/dev
     -c ccpi
     --override-channels
diff --git a/scripts/requirements-test.yml b/scripts/requirements-test.yml
index b34b5240aa..73e121d81d 100644
--- a/scripts/requirements-test.yml
+++ b/scripts/requirements-test.yml
@@ -15,6 +15,7 @@ name: cil_dev
 channels:
   - conda-forge
   - nvidia
+  - https://software.repos.intel.com/python/conda
   - defaults
 dependencies:
   # base (vis. recipe/conda_build_config.yaml)
@@ -22,7 +23,7 @@ dependencies:
   - numpy >=1.23
   - ccpi::cil-data
   - ccpi::tigre=2.6
-  - ccpi::ccpi-regulariser=24.0.0
+  - ccpi::ccpi-regulariser=24.0.1
   - ccpi::tomophantom=2.0.0
   - astra-toolbox=2.1=cuda*
   - cvxpy
@@ -30,10 +31,10 @@ dependencies:
   - scikit-image
   - packaging
   - cmake >=3.16
-  - setuptools
-  - intel::ipp-include >=2021.10
-  - intel::ipp-devel >=2021.10
-  - intel::ipp >=2021.10
+  - setuptools <71
+  - ipp-include >=2021.10
+  - ipp-devel >=2021.10
+  - ipp >=2021.10
   - ipywidgets
   - scipy
   - matplotlib