Merge pull request #146 from yucongalicechen/mudcalc

finalize `mud_calculator.py`
diffpy · Dec 27, 2024 · e8572fa · e8572fa
2 parents 7f264d2 + bd1dd40
commit e8572fa
Show file tree

Hide file tree

Showing 3 changed files with 81 additions and 49 deletions.
diff --git a/news/muD.rst b/news/muD.rst
@@ -0,0 +1,23 @@
+**Added:**
+
+* no news added - minor edits in mud_calculator.py
+
+**Changed:**
+
+* <news item>
+
+**Deprecated:**
+
+* <news item>
+
+**Removed:**
+
+* <news item>
+
+**Fixed:**
+
+* <news item>
+
+**Security:**
+
+* <news item>
diff --git a/src/diffpy/labpdfproc/mud_calculator.py b/src/diffpy/labpdfproc/mud_calculator.py
@@ -5,96 +5,105 @@
 from diffpy.utils.parsers.loaddata import loadData
 
 
-def _top_hat(x, slit_width):
+def _top_hat(z, half_slit_width):
     """
-    create a top-hat function, return 1.0 for values within the specified slit width and 0 otherwise
+    Create a top-hat function, return 1.0 for values within the specified slit width and 0 otherwise
     """
-    return np.where((x >= -slit_width) & (x <= slit_width), 1.0, 0)
+    return np.where((z >= -half_slit_width) & (z <= half_slit_width), 1.0, 0.0)
 
 
-def _model_function(x, diameter, x0, I0, mud, slope):
+def _model_function(z, diameter, z0, I0, mud, slope):
     """
-    compute the model function with the following steps:
-    1. Recenter x to h by subtracting x0 (so that the circle is centered at 0 and it is easier to compute length l)
+    Compute the model function with the following steps:
+    1. Recenter z to x by subtracting z0 (so that the circle is centered at 0 and it is easier to compute length l)
     2. Compute length l that is the effective length for computing intensity I = I0 * e^{-mu * l}:
-    - For h within the diameter range, l is the chord length of the circle at position h
-    - For h outside this range, l = 0
-    3. Apply a linear adjustment to I0 by taking I0 as I0 - slope * x
+    - For x within the diameter range, l is the chord length of the circle at position x
+    - For x outside this range, l = 0
+    3. Apply a linear adjustment to I0 by taking I0 as I0 - slope * z
     """
     min_radius = -diameter / 2
     max_radius = diameter / 2
-    h = x - x0
+    x = z - z0
     length = np.piecewise(
-        h,
-        [h < min_radius, (min_radius <= h) & (h <= max_radius), h > max_radius],
-        [0, lambda h: 2 * np.sqrt((diameter / 2) ** 2 - h**2), 0],
+        x,
+        [x < min_radius, (min_radius <= x) & (x <= max_radius), x > max_radius],
+        [0, lambda x: 2 * np.sqrt((diameter / 2) ** 2 - x**2), 0],
     )
-    return (I0 - slope * x) * np.exp(-mud / diameter * length)
+    return (I0 - slope * z) * np.exp(-mud / diameter * length)
 
 
-def _extend_x_and_convolve(x, diameter, slit_width, x0, I0, mud, slope):
+def _extend_z_and_convolve(z, diameter, half_slit_width, z0, I0, mud, slope):
     """
-    extend x values and I values for padding (so that we don't have tails in convolution), then perform convolution
+    extend z values and I values for padding (so that we don't have tails in convolution), then perform convolution
     (note that the convolved I values are the same as modeled I values if slit width is close to 0)
     """
-    n_points = len(x)
-    x_left_pad = np.linspace(x.min() - n_points * (x[1] - x[0]), x.min(), n_points)
-    x_right_pad = np.linspace(x.max(), x.max() + n_points * (x[1] - x[0]), n_points)
-    x_extended = np.concatenate([x_left_pad, x, x_right_pad])
-    I_extended = _model_function(x_extended, diameter, x0, I0, mud, slope)
-    kernel = _top_hat(x_extended - x_extended.mean(), slit_width)
+    n_points = len(z)
+    z_left_pad = np.linspace(z.min() - n_points * (z[1] - z[0]), z.min(), n_points)
+    z_right_pad = np.linspace(z.max(), z.max() + n_points * (z[1] - z[0]), n_points)
+    z_extended = np.concatenate([z_left_pad, z, z_right_pad])
+    I_extended = _model_function(z_extended, diameter, z0, I0, mud, slope)
+    kernel = _top_hat(z_extended - z_extended.mean(), half_slit_width)
     I_convolved = I_extended  # this takes care of the case where slit width is close to 0
     if kernel.sum() != 0:
         kernel /= kernel.sum()
         I_convolved = convolve(I_extended, kernel, mode="same")
-    padding_length = len(x_left_pad)
+    padding_length = len(z_left_pad)
     return I_convolved[padding_length:-padding_length]
 
 
-def _objective_function(params, x, observed_data):
+def _objective_function(params, z, observed_data):
     """
-    compute the objective function for fitting a model to the observed/experimental data
+    Compute the objective function for fitting a model to the observed/experimental data
     by minimizing the sum of squared residuals between the observed data and the convolved model data
     """
-    diameter, slit_width, x0, I0, mud, slope = params
-    convolved_model_data = _extend_x_and_convolve(x, diameter, slit_width, x0, I0, mud, slope)
+    diameter, half_slit_width, z0, I0, mud, slope = params
+    convolved_model_data = _extend_z_and_convolve(z, diameter, half_slit_width, z0, I0, mud, slope)
     residuals = observed_data - convolved_model_data
     return np.sum(residuals**2)
 
 
-def _compute_single_mud(x_data, I_data):
+def _compute_single_mud(z_data, I_data):
     """
-    perform dual annealing optimization and extract the parameters
+    Perform dual annealing optimization and extract the parameters
     """
     bounds = [
-        (1e-5, x_data.max() - x_data.min()),  # diameter: [small positive value, upper bound]
-        (0, (x_data.max() - x_data.min()) / 2),  # slit width: [0, upper bound]
-        (x_data.min(), x_data.max()),  # x0: [min x, max x]
+        (1e-5, z_data.max() - z_data.min()),  # diameter: [small positive value, upper bound]
+        (0, (z_data.max() - z_data.min()) / 2),  # half slit width: [0, upper bound]
+        (z_data.min(), z_data.max()),  # z0: [min z, max z]
         (1e-5, I_data.max()),  # I0: [small positive value, max observed intensity]
         (1e-5, 20),  # muD: [small positive value, upper bound]
-        (-10000, 10000),  # slope: [lower bound, upper bound]
+        (-100000, 100000),  # slope: [lower bound, upper bound]
     ]
-    result = dual_annealing(_objective_function, bounds, args=(x_data, I_data))
-    diameter, slit_width, x0, I0, mud, slope = result.x
-    convolved_fitted_signal = _extend_x_and_convolve(x_data, diameter, slit_width, x0, I0, mud, slope)
+    result = dual_annealing(_objective_function, bounds, args=(z_data, I_data))
+    diameter, half_slit_width, z0, I0, mud, slope = result.x
+    convolved_fitted_signal = _extend_z_and_convolve(z_data, diameter, half_slit_width, z0, I0, mud, slope)
     residuals = I_data - convolved_fitted_signal
     rmse = np.sqrt(np.mean(residuals**2))
     return mud, rmse
 
 
 def compute_mud(filepath):
-    """
-    compute the best-fit mu*D value from a z-scan file
+    """Compute the best-fit mu*D value from a z-scan file, removing the sample holder effect.
+
+    This function loads z-scan data and fits it to a model
+    that convolves a top-hat function with I = I0 * e^{-mu * l}.
+    The fitting procedure is run multiple times, and we return the best-fit parameters based on the lowest rmse.
+
+    The full mathematical details are described in the paper:
+    An ad hoc Absorption Correction for Reliable Pair-Distribution Functions from Low Energy x-ray Sources,
+    Yucong Chen, Till Schertenleib, Andrew Yang, Pascal Schouwink, Wendy L. Queen and Simon J. L. Billinge,
+    in preparation.
 
     Parameters
     ----------
-    filepath str
-        the path to the z-scan file
+    filepath : str
+        The path to the z-scan file.
 
     Returns
     -------
-    a float contains the best-fit mu*D value
+    mu*D : float
+        The best-fit mu*D value.
     """
-    x_data, I_data = loadData(filepath, unpack=True)
-    best_mud, _ = min((_compute_single_mud(x_data, I_data) for _ in range(10)), key=lambda pair: pair[1])
+    z_data, I_data = loadData(filepath, unpack=True)
+    best_mud, _ = min((_compute_single_mud(z_data, I_data) for _ in range(20)), key=lambda pair: pair[1])
     return best_mud
diff --git a/tests/test_mud_calculator.py b/tests/test_mud_calculator.py
@@ -3,20 +3,20 @@
 import numpy as np
 import pytest
 
-from diffpy.labpdfproc.mud_calculator import _extend_x_and_convolve, compute_mud
+from diffpy.labpdfproc.mud_calculator import _extend_z_and_convolve, compute_mud
 
 
 def test_compute_mud(tmp_path):
-    diameter, slit_width, x0, I0, mud, slope = 1, 0.1, 0, 1e5, 3, 0
-    x_data = np.linspace(-1, 1, 50)
-    convolved_I_data = _extend_x_and_convolve(x_data, diameter, slit_width, x0, I0, mud, slope)
+    diameter, slit_width, z0, I0, mud, slope = 1, 0.1, 0, 1e5, 3, 0
+    z_data = np.linspace(-1, 1, 50)
+    convolved_I_data = _extend_z_and_convolve(z_data, diameter, slit_width, z0, I0, mud, slope)
 
     directory = Path(tmp_path)
     file = directory / "testfile"
     with open(file, "w") as f:
-        for x, I in zip(x_data, convolved_I_data):
+        for x, I in zip(z_data, convolved_I_data):
             f.write(f"{x}\t{I}\n")
 
     expected_mud = 3
     actual_mud = compute_mud(file)
-    assert actual_mud == pytest.approx(expected_mud, rel=1e-4, abs=0.1)
+    assert actual_mud == pytest.approx(expected_mud, rel=0.01, abs=0.1)