From 4514296a597c2c191da04f1fba86b978596cefd2 Mon Sep 17 00:00:00 2001
From: yuzie007 <yuji.ikeda.ac.jp@gmail.com>
Date: Tue, 10 Sep 2024 10:51:17 +0200
Subject: [PATCH 01/63] BUG: fix `minimize` with fixed bounds and callback

---
 scipy/optimize/_minimize.py           |  5 ++++-
 scipy/optimize/tests/test_optimize.py | 20 ++++++++++++++++++++
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/scipy/optimize/_minimize.py b/scipy/optimize/_minimize.py
index 606a223c586e..9dd237b388c6 100644
--- a/scipy/optimize/_minimize.py
+++ b/scipy/optimize/_minimize.py
@@ -10,6 +10,7 @@
 __all__ = ['minimize', 'minimize_scalar']
 
 
+import inspect
 from warnings import warn
 
 import numpy as np
@@ -696,7 +697,9 @@ def minimize(fun, x0, args=(), method=None, jac=None, hess=None,
                 bounds = _remove_from_bounds(bounds, i_fixed)
                 fun = _remove_from_func(fun, i_fixed, x_fixed)
                 if callable(callback):
-                    callback = _remove_from_func(callback, i_fixed, x_fixed)
+                    sig = inspect.signature(callback)
+                    if set(sig.parameters) != {'intermediate_result'}:
+                        callback = _remove_from_func(callback, i_fixed, x_fixed)
                 if callable(jac):
                     jac = _remove_from_func(jac, i_fixed, x_fixed, remove=1)
 
diff --git a/scipy/optimize/tests/test_optimize.py b/scipy/optimize/tests/test_optimize.py
index c6b4cfa41226..fc3fc0e61d52 100644
--- a/scipy/optimize/tests/test_optimize.py
+++ b/scipy/optimize/tests/test_optimize.py
@@ -1259,6 +1259,26 @@ def dfunc(z):
             assert func(sol1.x) < func(sol2.x), \
                    f"{method}: {func(sol1.x)} vs. {func(sol2.x)}"
 
+    @pytest.mark.parametrize(
+        'bounds', [None, [[0.0, 0.0], [-np.inf, +np.inf], [-np.inf, +np.inf]]],
+    )
+    @pytest.mark.parametrize('method', ['l-bfgs-b'])
+    def test_minimize_callback_result(self, method, bounds):
+        """Check if `OptimizeResult` is passed to the callback function.
+
+        The issue related to fixed bounds (gh-21537) is also checked.
+        """
+        def callback(intermediate_result):
+            assert isinstance(intermediate_result, optimize.OptimizeResult)
+
+        res = optimize.minimize(self.func, np.zeros(3), method=method,
+                                bounds=bounds, callback=callback)
+
+        if bounds is not None:
+            for i in range(3):
+                assert bounds[i][0] <= res.x[i]  # check lower bounds
+                assert bounds[i][1] >= res.x[i]  # check upper bounds
+
     @pytest.mark.fail_slow(10)
     @pytest.mark.filterwarnings('ignore::UserWarning')
     @pytest.mark.filterwarnings('ignore::RuntimeWarning')  # See gh-18547

From cc6131d5dfc3c6369f1ba0c1e8c51c626174b536 Mon Sep 17 00:00:00 2001
From: Jamie Townsend <jamiehntownsend@gmail.com>
Date: Thu, 10 Oct 2024 10:11:35 +0200
Subject: [PATCH 02/63] MAINT: stats.dirichlet_multinomial relax n to >= 0

Also update docstring and tests accordingly. Some discussion and
motivation for this change: https://github.com/scipy/scipy/issues/21531.
---
 scipy/stats/_multivariate.py           |  6 +++---
 scipy/stats/tests/test_multivariate.py | 19 +++++++++++++++++--
 2 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/scipy/stats/_multivariate.py b/scipy/stats/_multivariate.py
index 8dbace857514..31187f9667c6 100644
--- a/scipy/stats/_multivariate.py
+++ b/scipy/stats/_multivariate.py
@@ -5965,7 +5965,7 @@ def _sample_uniform_direction(dim, size, random_state):
     determines the dimensionality of the distribution. Each entry must be
     strictly positive.
 n : int or array_like
-    The number of trials. Each element must be a strictly positive integer.
+    The number of trials. Each element must be a non-negative integer.
 """
 
 _dirichlet_mn_doc_frozen_callparams = ""
@@ -6007,8 +6007,8 @@ def _dirichlet_multinomial_check_parameters(alpha, n, x=None):
         raise ValueError("`alpha` must contain only positive values.")
 
     n_int = np.floor(n)
-    if np.any(n <= 0) or np.any(n != n_int):
-        raise ValueError("`n` must be a positive integer.")
+    if np.any(n < 0) or np.any(n != n_int):
+        raise ValueError("`n` must be a non-negative integer.")
     n = n_int
 
     sum_alpha = np.sum(alpha, axis=-1)
diff --git a/scipy/stats/tests/test_multivariate.py b/scipy/stats/tests/test_multivariate.py
index 6f49f0141b96..be3b42b72898 100644
--- a/scipy/stats/tests/test_multivariate.py
+++ b/scipy/stats/tests/test_multivariate.py
@@ -3751,6 +3751,21 @@ def test_dimensionality_one(self):
         assert_equal(dist.var(), 0)
         assert_equal(dist.cov(), 0)
 
+    def test_n_is_zero(self):
+        # similarly, only one possible outcome if n is zero
+        n = 0
+        alpha = np.asarray([1., 1.])
+        x = np.asarray([0, 0])
+        dist = dirichlet_multinomial(alpha, n)
+
+        assert_equal(dist.pmf(x), 1)
+        assert_equal(dist.pmf(x+1), 0)
+        assert_equal(dist.logpmf(x), 0)
+        assert_equal(dist.logpmf(x+1), -np.inf)
+        assert_equal(dist.mean(), [0, 0])
+        assert_equal(dist.var(), [0, 0])
+        assert_equal(dist.cov(), [[0, 0], [0, 0]])
+
     @pytest.mark.parametrize('method_name', ['pmf', 'logpmf'])
     def test_against_betabinom_pmf(self, method_name):
         rng, m, alpha, n, x = self.get_params(100)
@@ -3813,11 +3828,11 @@ def test_input_validation(self):
         with assert_raises(ValueError, match=text):
             dirichlet_multinomial.logpmf(x0, [3, -1, 4], n0)
 
-        text = "`n` must be a positive integer."
+        text = "`n` must be a non-negative integer."
         with assert_raises(ValueError, match=text):
             dirichlet_multinomial.logpmf(x0, alpha0, 49.1)
         with assert_raises(ValueError, match=text):
-            dirichlet_multinomial.logpmf(x0, alpha0, 0)
+            dirichlet_multinomial.logpmf(x0, alpha0, -1)
 
         x = np.array([1, 2, 3, 4])
         alpha = np.array([3, 4, 5])

From 6e7767b7a41038b5bebe4f9a628360edfa8b4578 Mon Sep 17 00:00:00 2001
From: Andrew Nelson <andyfaff@gmail.com>
Date: Fri, 20 Dec 2024 14:26:08 +1100
Subject: [PATCH 03/63] ENH: use workers for approx_derivative

---
 scipy/optimize/_numdiff.py | 63 ++++++++++++++++++++++++++++++++++----
 1 file changed, 57 insertions(+), 6 deletions(-)

diff --git a/scipy/optimize/_numdiff.py b/scipy/optimize/_numdiff.py
index 1351824e1597..98bccb91874c 100644
--- a/scipy/optimize/_numdiff.py
+++ b/scipy/optimize/_numdiff.py
@@ -276,7 +276,8 @@ def group_columns(A, order=0):
 
 def approx_derivative(fun, x0, method='3-point', rel_step=None, abs_step=None,
                       f0=None, bounds=(-np.inf, np.inf), sparsity=None,
-                      as_linear_operator=False, args=(), kwargs=None):
+                      as_linear_operator=False, args=(), kwargs=None,
+                      workers=map):
     """Compute finite difference approximation of the derivatives of a
     vector-valued function.
 
@@ -355,6 +356,8 @@ def approx_derivative(fun, x0, method='3-point', rel_step=None, abs_step=None,
     args, kwargs : tuple and dict, optional
         Additional arguments passed to `fun`. Both empty by default.
         The calling signature is ``fun(x, *args, **kwargs)``.
+    workers : map-like callable
+        map-like used to call user function with different steps.
 
     Returns
     -------
@@ -522,7 +525,8 @@ def fun_wrapped(x):
 
         if sparsity is None:
             return _dense_difference(fun_wrapped, x0, f0, h,
-                                     use_one_sided, method)
+                                     use_one_sided, method,
+                                     workers)
         else:
             if not issparse(sparsity) and len(sparsity) == 2:
                 structure, groups = sparsity
@@ -582,7 +586,7 @@ def matvec(p):
     return LinearOperator((m, n), matvec)
 
 
-def _dense_difference(fun, x0, f0, h, use_one_sided, method):
+def _dense_difference(fun, x0, f0, h, use_one_sided, method, workers):
     m = f0.size
     n = x0.size
     J_transposed = np.empty((n, m))
@@ -590,12 +594,58 @@ def _dense_difference(fun, x0, f0, h, use_one_sided, method):
     x2 = x0.copy()
     xc = x0.astype(complex, copy=True)
 
+    if method == '2-point':
+        def x_generator(x0, h):
+            for i in range(n):
+                x1[i] = x0[i] + h[i]
+                yield x1
+                x1[i] = x0[i]
+
+        f_evals = workers(fun, x_generator(x0, h))
+        dx = [(x0[i] + h[i]) - x0[i] for i in range(n)]
+        df = [f_eval - f0 for f_eval in f_evals]
+        df_dx = [delf / delx for delf, delx in zip(df, dx)]
+        for i, v in enumerate(df_dx):
+            J_transposed[i] = v
+
+    elif method == '3-point':
+        def x_generator(x0, h, use_one_sided):
+            for i, one_sided in enumerate(use_one_sided):
+                if one_sided:
+                    x1[i] = x0[i] + h[i]
+                    x2[i] = x0[i] + 2*h[i]
+                else:
+                    x1[i] = x0[i] - h[i]
+                    x2[i] = x0[i] + h[i]
+                yield x1
+                yield x2
+                x1[i] = x2[i] = x0[i]
+
+        f_evals = workers(fun, x_generator(x0, h, use_one_sided))
+        gen = x_generator(x0, h, use_one_sided)
+        dx = list()
+        df = list()
+        for i, one_sided in enumerate(use_one_sided):
+            l = next(gen)
+            u = next(gen)
+
+            f1 = next(f_evals)
+            f2 = next(f_evals)
+            if one_sided:
+                dx.append(u[i] - x0[i])
+                df.append(-3.0 * f0 + 4 * f1 - f2)
+            else:
+                dx.append(u[i] - l[i])
+                df.append(f2 - f1)
+        df_dx = [delf / delx for delf, delx in zip(df, dx)]
+        for i, v in enumerate(df_dx):
+            J_transposed[i] = v
+
     for i in range(h.size):
         if method == '2-point':
-            x1[i] += h[i]
-            dx = x1[i] - x0[i]  # Recompute dx as exactly representable number.
-            df = fun(x1) - f0
+            continue
         elif method == '3-point' and use_one_sided[i]:
+            continue
             x1[i] += h[i]
             x2[i] += 2 * h[i]
             dx = x2[i] - x0[i]
@@ -603,6 +653,7 @@ def _dense_difference(fun, x0, f0, h, use_one_sided, method):
             f2 = fun(x2)
             df = -3.0 * f0 + 4 * f1 - f2
         elif method == '3-point' and not use_one_sided[i]:
+            continue
             x1[i] -= h[i]
             x2[i] += h[i]
             dx = x2[i] - x1[i]

From 241773817f617c0824f0f155d17bfb2d2d0df00a Mon Sep 17 00:00:00 2001
From: Andrew Nelson <andyfaff@gmail.com>
Date: Fri, 20 Dec 2024 15:21:50 +1100
Subject: [PATCH 04/63] ENH: approx_derivative workers

---
 scipy/optimize/_numdiff.py | 103 ++++++++++++++++++++-----------------
 1 file changed, 57 insertions(+), 46 deletions(-)

diff --git a/scipy/optimize/_numdiff.py b/scipy/optimize/_numdiff.py
index 98bccb91874c..68f6e5c8a164 100644
--- a/scipy/optimize/_numdiff.py
+++ b/scipy/optimize/_numdiff.py
@@ -469,17 +469,18 @@ def approx_derivative(fun, x0, method='3-point', rel_step=None, abs_step=None,
     if kwargs is None:
         kwargs = {}
 
-    def fun_wrapped(x):
-        # send user function same fp type as x0. (but only if cs is not being
-        # used
-        if xp.isdtype(x.dtype, "real floating"):
-            x = xp.astype(x, x0.dtype)
-
-        f = np.atleast_1d(fun(x, *args, **kwargs))
-        if f.ndim > 1:
-            raise RuntimeError("`fun` return value has "
-                               "more than 1 dimension.")
-        return f
+    fun_wrapped = _Fun_Wrapper(fun, x0, args, kwargs)
+    # def fun_wrapped(x):
+    #     # send user function same fp type as x0. (but only if cs is not being
+    #     # used
+    #     if xp.isdtype(x.dtype, "real floating"):
+    #         x = xp.astype(x, x0.dtype)
+    #
+    #     f = np.atleast_1d(fun(x, *args, **kwargs))
+    #     if f.ndim > 1:
+    #         raise RuntimeError("`fun` return value has "
+    #                            "more than 1 dimension.")
+    #     return f
 
     if f0 is None:
         f0 = fun_wrapped(x0)
@@ -590,27 +591,28 @@ def _dense_difference(fun, x0, f0, h, use_one_sided, method, workers):
     m = f0.size
     n = x0.size
     J_transposed = np.empty((n, m))
-    x1 = x0.copy()
-    x2 = x0.copy()
     xc = x0.astype(complex, copy=True)
 
     if method == '2-point':
         def x_generator(x0, h):
             for i in range(n):
+                # if copying isn't done then it's possible for different workers
+                # to see the same values of x1. (at least that's what happened
+                # when I used `multiprocessing.dummy.Pool`).
+                x1 = np.copy(x0)
                 x1[i] = x0[i] + h[i]
                 yield x1
-                x1[i] = x0[i]
 
         f_evals = workers(fun, x_generator(x0, h))
         dx = [(x0[i] + h[i]) - x0[i] for i in range(n)]
         df = [f_eval - f0 for f_eval in f_evals]
         df_dx = [delf / delx for delf, delx in zip(df, dx)]
-        for i, v in enumerate(df_dx):
-            J_transposed[i] = v
 
     elif method == '3-point':
         def x_generator(x0, h, use_one_sided):
             for i, one_sided in enumerate(use_one_sided):
+                x1 = np.copy(x0)
+                x2 = np.copy(x0)
                 if one_sided:
                     x1[i] = x0[i] + h[i]
                     x2[i] = x0[i] + 2*h[i]
@@ -619,9 +621,10 @@ def x_generator(x0, h, use_one_sided):
                     x2[i] = x0[i] + h[i]
                 yield x1
                 yield x2
-                x1[i] = x2[i] = x0[i]
 
-        f_evals = workers(fun, x_generator(x0, h, use_one_sided))
+        # workers may return something like a list that needs to be turned
+        # into an iterable (can't call `next` on a list)
+        f_evals = iter(workers(fun, x_generator(x0, h, use_one_sided)))
         gen = x_generator(x0, h, use_one_sided)
         dx = list()
         df = list()
@@ -641,35 +644,20 @@ def x_generator(x0, h, use_one_sided):
         for i, v in enumerate(df_dx):
             J_transposed[i] = v
 
-    for i in range(h.size):
-        if method == '2-point':
-            continue
-        elif method == '3-point' and use_one_sided[i]:
-            continue
-            x1[i] += h[i]
-            x2[i] += 2 * h[i]
-            dx = x2[i] - x0[i]
-            f1 = fun(x1)
-            f2 = fun(x2)
-            df = -3.0 * f0 + 4 * f1 - f2
-        elif method == '3-point' and not use_one_sided[i]:
-            continue
-            x1[i] -= h[i]
-            x2[i] += h[i]
-            dx = x2[i] - x1[i]
-            f1 = fun(x1)
-            f2 = fun(x2)
-            df = f2 - f1
-        elif method == 'cs':
-            xc[i] += h[i] * 1.j
-            f1 = fun(xc)
-            df = f1.imag
-            dx = h[i]
-        else:
-            raise RuntimeError("Never be here.")
+    elif method == 'cs':
+        def x_generator(x0, h):
+            for i in range(n):
+                xc = x0.astype(complex, copy=True)
+                xc[i] += h[j] * 1.j
+                yield xc
 
-        J_transposed[i] = df / dx
-        x1[i] = x2[i] = xc[i] = x0[i]
+        f_evals = iter(workers(fun, x_generator(x0, h)))
+        df_dx = [f1.imag / hi for f1, hi in zip(f_evals, h)]
+    else:
+        raise RuntimeError("Never be here.")
+
+    for i, v in enumerate(df_dx):
+        J_transposed[i] = v
 
     if m == 1:
         J_transposed = np.ravel(J_transposed)
@@ -758,6 +746,29 @@ def _sparse_difference(fun, x0, f0, h, use_one_sided,
     return csr_array((fractions, (row_indices, col_indices)), shape=(m, n))
 
 
+class _Fun_Wrapper:
+    # Permits pickling of a wrapped function
+    def __init__(self, fun, x0, args, kwargs):
+        self.fun = fun
+        self.x0 = x0
+        self.args = args
+        self.kwargs = kwargs
+
+    def __call__(self, x):
+        # send user function same fp type as x0. (but only if cs is not being
+        # used
+        xp = array_namespace(self.x0)
+
+        if xp.isdtype(x.dtype, "real floating"):
+            x = xp.astype(x, self.x0.dtype)
+
+        f = np.atleast_1d(self.fun(x, *self.args, **self.kwargs))
+        if f.ndim > 1:
+            raise RuntimeError("`fun` return value has "
+                               "more than 1 dimension.")
+        return f
+
+
 def check_derivative(fun, jac, x0, bounds=(-np.inf, np.inf), args=(),
                      kwargs=None):
     """Check correctness of a function computing derivatives (Jacobian or

From de51865baa9851b21583b7720cfad6b0d8a1338b Mon Sep 17 00:00:00 2001
From: Andrew Nelson <andyfaff@gmail.com>
Date: Fri, 20 Dec 2024 15:35:28 +1100
Subject: [PATCH 05/63] ENH: approx_derivative workers

---
 scipy/optimize/_numdiff.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/scipy/optimize/_numdiff.py b/scipy/optimize/_numdiff.py
index 68f6e5c8a164..e37cb5ceda09 100644
--- a/scipy/optimize/_numdiff.py
+++ b/scipy/optimize/_numdiff.py
@@ -596,9 +596,12 @@ def _dense_difference(fun, x0, f0, h, use_one_sided, method, workers):
     if method == '2-point':
         def x_generator(x0, h):
             for i in range(n):
-                # if copying isn't done then it's possible for different workers
-                # to see the same values of x1. (at least that's what happened
+                # If copying isn't done then it's possible for different workers
+                # to see the same values of x1. (At least that's what happened
                 # when I used `multiprocessing.dummy.Pool`).
+                # I also considered creating all the vectors at once, but that
+                # means assembling a very large N x N array. It's therefore a
+                # trade-off between N array copies or creating an NxN array.
                 x1 = np.copy(x0)
                 x1[i] = x0[i] + h[i]
                 yield x1

From ca0cfb7df16dfb75670f88f24c98a4bfd44355ed Mon Sep 17 00:00:00 2001
From: Andrew Nelson <andyfaff@gmail.com>
Date: Fri, 20 Dec 2024 18:35:26 +1100
Subject: [PATCH 06/63] MAINT: cruft cleanup

---
 scipy/optimize/_numdiff.py | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/scipy/optimize/_numdiff.py b/scipy/optimize/_numdiff.py
index e37cb5ceda09..ebddeb07bd33 100644
--- a/scipy/optimize/_numdiff.py
+++ b/scipy/optimize/_numdiff.py
@@ -470,17 +470,6 @@ def approx_derivative(fun, x0, method='3-point', rel_step=None, abs_step=None,
         kwargs = {}
 
     fun_wrapped = _Fun_Wrapper(fun, x0, args, kwargs)
-    # def fun_wrapped(x):
-    #     # send user function same fp type as x0. (but only if cs is not being
-    #     # used
-    #     if xp.isdtype(x.dtype, "real floating"):
-    #         x = xp.astype(x, x0.dtype)
-    #
-    #     f = np.atleast_1d(fun(x, *args, **kwargs))
-    #     if f.ndim > 1:
-    #         raise RuntimeError("`fun` return value has "
-    #                            "more than 1 dimension.")
-    #     return f
 
     if f0 is None:
         f0 = fun_wrapped(x0)
@@ -644,8 +633,6 @@ def x_generator(x0, h, use_one_sided):
                 dx.append(u[i] - l[i])
                 df.append(f2 - f1)
         df_dx = [delf / delx for delf, delx in zip(df, dx)]
-        for i, v in enumerate(df_dx):
-            J_transposed[i] = v
 
     elif method == 'cs':
         def x_generator(x0, h):

From f819e92a804788d823ade734aaef935d1b7f4cd5 Mon Sep 17 00:00:00 2001
From: Andrew Nelson <andyfaff@gmail.com>
Date: Fri, 20 Dec 2024 20:16:20 +1100
Subject: [PATCH 07/63] MAINT: rename stray variable

---
 scipy/optimize/_numdiff.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scipy/optimize/_numdiff.py b/scipy/optimize/_numdiff.py
index ebddeb07bd33..abf89d6c9cb5 100644
--- a/scipy/optimize/_numdiff.py
+++ b/scipy/optimize/_numdiff.py
@@ -638,7 +638,7 @@ def x_generator(x0, h, use_one_sided):
         def x_generator(x0, h):
             for i in range(n):
                 xc = x0.astype(complex, copy=True)
-                xc[i] += h[j] * 1.j
+                xc[i] += h[i] * 1.j
                 yield xc
 
         f_evals = iter(workers(fun, x_generator(x0, h)))

From c7799f9b9623eab1460f450d7294406c2e555d24 Mon Sep 17 00:00:00 2001
From: Andrew Nelson <andyfaff@gmail.com>
Date: Fri, 20 Dec 2024 20:39:31 +1100
Subject: [PATCH 08/63] MAINT: mypy gremlins

---
 scipy/optimize/_numdiff.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/scipy/optimize/_numdiff.py b/scipy/optimize/_numdiff.py
index abf89d6c9cb5..67fbb29e51af 100644
--- a/scipy/optimize/_numdiff.py
+++ b/scipy/optimize/_numdiff.py
@@ -580,10 +580,9 @@ def _dense_difference(fun, x0, f0, h, use_one_sided, method, workers):
     m = f0.size
     n = x0.size
     J_transposed = np.empty((n, m))
-    xc = x0.astype(complex, copy=True)
 
     if method == '2-point':
-        def x_generator(x0, h):
+        def x_generator2(x0, h):
             for i in range(n):
                 # If copying isn't done then it's possible for different workers
                 # to see the same values of x1. (At least that's what happened
@@ -595,13 +594,13 @@ def x_generator(x0, h):
                 x1[i] = x0[i] + h[i]
                 yield x1
 
-        f_evals = workers(fun, x_generator(x0, h))
+        f_evals = workers(fun, x_generator2(x0, h))
         dx = [(x0[i] + h[i]) - x0[i] for i in range(n)]
         df = [f_eval - f0 for f_eval in f_evals]
         df_dx = [delf / delx for delf, delx in zip(df, dx)]
 
     elif method == '3-point':
-        def x_generator(x0, h, use_one_sided):
+        def x_generator3(x0, h, use_one_sided):
             for i, one_sided in enumerate(use_one_sided):
                 x1 = np.copy(x0)
                 x2 = np.copy(x0)
@@ -616,8 +615,8 @@ def x_generator(x0, h, use_one_sided):
 
         # workers may return something like a list that needs to be turned
         # into an iterable (can't call `next` on a list)
-        f_evals = iter(workers(fun, x_generator(x0, h, use_one_sided)))
-        gen = x_generator(x0, h, use_one_sided)
+        f_evals = iter(workers(fun, x_generator3(x0, h, use_one_sided)))
+        gen = x_generator3(x0, h, use_one_sided)
         dx = list()
         df = list()
         for i, one_sided in enumerate(use_one_sided):
@@ -635,13 +634,13 @@ def x_generator(x0, h, use_one_sided):
         df_dx = [delf / delx for delf, delx in zip(df, dx)]
 
     elif method == 'cs':
-        def x_generator(x0, h):
+        def x_generator_cs(x0, h):
             for i in range(n):
                 xc = x0.astype(complex, copy=True)
                 xc[i] += h[i] * 1.j
                 yield xc
 
-        f_evals = iter(workers(fun, x_generator(x0, h)))
+        f_evals = iter(workers(fun, x_generator_cs(x0, h)))
         df_dx = [f1.imag / hi for f1, hi in zip(f_evals, h)]
     else:
         raise RuntimeError("Never be here.")
@@ -672,7 +671,7 @@ def _sparse_difference(fun, x0, f0, h, use_one_sided,
             x = x0 + h_vec
             dx = x - x0
             df = fun(x) - f0
-            # The result is  written to columns which correspond to perturbed
+            # The result is written to columns which correspond to perturbed
             # variables.
             cols, = np.nonzero(e)
             # Find all non-zero elements in selected columns of Jacobian.

From a0394f0b39df5542124cec183a0e3323abd16029 Mon Sep 17 00:00:00 2001
From: Andrew Nelson <andyfaff@gmail.com>
Date: Sat, 21 Dec 2024 08:27:34 +1100
Subject: [PATCH 09/63] MAINT: parallelise sparse difference

---
 scipy/optimize/_numdiff.py | 89 +++++++++++++++++++++++++-------------
 1 file changed, 60 insertions(+), 29 deletions(-)

diff --git a/scipy/optimize/_numdiff.py b/scipy/optimize/_numdiff.py
index 67fbb29e51af..632d5960bb1f 100644
--- a/scipy/optimize/_numdiff.py
+++ b/scipy/optimize/_numdiff.py
@@ -532,7 +532,7 @@ def approx_derivative(fun, x0, method='3-point', rel_step=None, abs_step=None,
             groups = np.atleast_1d(groups)
             return _sparse_difference(fun_wrapped, x0, f0, h,
                                       use_one_sided, structure,
-                                      groups, method)
+                                      groups, method, workers)
 
 
 def _linear_operator_difference(fun, x0, f0, h, method):
@@ -655,7 +655,7 @@ def x_generator_cs(x0, h):
 
 
 def _sparse_difference(fun, x0, f0, h, use_one_sided,
-                       structure, groups, method):
+                       structure, groups, method, workers):
     m = f0.size
     n = x0.size
     row_indices = []
@@ -663,24 +663,23 @@ def _sparse_difference(fun, x0, f0, h, use_one_sided,
     fractions = []
 
     n_groups = np.max(groups) + 1
-    for group in range(n_groups):
+
+    def e_generator():
         # Perturb variables which are in the same group simultaneously.
-        e = np.equal(group, groups)
-        h_vec = h * e
-        if method == '2-point':
+        for group in range(n_groups):
+            yield np.equal(group, groups)
+
+    def x_generator2():
+        e_gen = e_generator()
+        for e in e_gen:
+            h_vec = h * e
             x = x0 + h_vec
-            dx = x - x0
-            df = fun(x) - f0
-            # The result is written to columns which correspond to perturbed
-            # variables.
-            cols, = np.nonzero(e)
-            # Find all non-zero elements in selected columns of Jacobian.
-            i, j, _ = find(structure[:, cols])
-            # Restore column indices in the full array.
-            j = cols[j]
-        elif method == '3-point':
-            # Here we do conceptually the same but separate one-sided
-            # and two-sided schemes.
+            yield x
+
+    def x_generator3():
+        e_gen = e_generator()
+        for e in e_gen:
+            h_vec = h * e
             x1 = x0.copy()
             x2 = x0.copy()
 
@@ -691,17 +690,52 @@ def _sparse_difference(fun, x0, f0, h, use_one_sided,
             mask_2 = ~use_one_sided & e
             x1[mask_2] -= h_vec[mask_2]
             x2[mask_2] += h_vec[mask_2]
+            yield x1
+            yield x2
+
+    def x_generator_cs():
+        e_gen = e_generator()
+        for e in e_gen:
+            h_vec = h * e
+            yield x0 + h_vec * 1.j
+
+    # evaluate the function for each of the groups
+    if method == '2-point':
+        f_evals = iter(workers(fun, x_generator2()))
+        xs = x_generator2()
+    elif method == '3-point':
+        f_evals = iter(workers(fun, x_generator3()))
+        xs = x_generator3()
+    elif method == 'cs':
+        f_evals = iter(workers(fun, x_generator_cs()))
+
+    for e in e_generator():
+        # The result is written to columns which correspond to perturbed
+        # variables.
+        cols, = np.nonzero(e)
+        # Find all non-zero elements in selected columns of Jacobian.
+        i, j, _ = find(structure[:, cols])
+        # Restore column indices in the full array.
+        j = cols[j]
+
+        if method == '2-point':
+            dx = next(xs) - x0
+            df = next(f_evals) - f0
+        elif method == '3-point':
+            # Here we do conceptually the same but separate one-sided
+            # and two-sided schemes.
+            x1 = next(xs)
+            x2 = next(xs)
+
+            mask_1 = use_one_sided & e
+            mask_2 = ~use_one_sided & e
 
             dx = np.zeros(n)
             dx[mask_1] = x2[mask_1] - x0[mask_1]
             dx[mask_2] = x2[mask_2] - x1[mask_2]
 
-            f1 = fun(x1)
-            f2 = fun(x2)
-
-            cols, = np.nonzero(e)
-            i, j, _ = find(structure[:, cols])
-            j = cols[j]
+            f1 = next(f_evals)
+            f2 = next(f_evals)
 
             mask = use_one_sided[j]
             df = np.empty(m)
@@ -712,12 +746,9 @@ def _sparse_difference(fun, x0, f0, h, use_one_sided,
             rows = i[~mask]
             df[rows] = f2[rows] - f1[rows]
         elif method == 'cs':
-            f1 = fun(x0 + h_vec*1.j)
+            f1 = next(f_evals)
             df = f1.imag
-            dx = h_vec
-            cols, = np.nonzero(e)
-            i, j, _ = find(structure[:, cols])
-            j = cols[j]
+            dx = h * e
         else:
             raise ValueError("Never be here.")
 

From 4b3197efc4ee1ec8939ced705dd9f86f639aaf77 Mon Sep 17 00:00:00 2001
From: Andrew Nelson <andyfaff@gmail.com>
Date: Sat, 21 Dec 2024 08:40:22 +1100
Subject: [PATCH 10/63] TST: add tests for workers in approx_derivative

---
 scipy/optimize/_numdiff.py            |  5 ++-
 scipy/optimize/tests/test__numdiff.py | 46 +++++++++++++++------------
 2 files changed, 30 insertions(+), 21 deletions(-)

diff --git a/scipy/optimize/_numdiff.py b/scipy/optimize/_numdiff.py
index 632d5960bb1f..441534e7e55e 100644
--- a/scipy/optimize/_numdiff.py
+++ b/scipy/optimize/_numdiff.py
@@ -277,7 +277,7 @@ def group_columns(A, order=0):
 def approx_derivative(fun, x0, method='3-point', rel_step=None, abs_step=None,
                       f0=None, bounds=(-np.inf, np.inf), sparsity=None,
                       as_linear_operator=False, args=(), kwargs=None,
-                      workers=map):
+                      workers=None):
     """Compute finite difference approximation of the derivatives of a
     vector-valued function.
 
@@ -481,6 +481,9 @@ def approx_derivative(fun, x0, method='3-point', rel_step=None, abs_step=None,
     if np.any((x0 < lb) | (x0 > ub)):
         raise ValueError("`x0` violates bound constraints.")
 
+    # check the map function for parallelisation
+    workers = workers or map
+
     if as_linear_operator:
         if rel_step is None:
             rel_step = _eps_for_method(x0.dtype, f0.dtype, method)
diff --git a/scipy/optimize/tests/test__numdiff.py b/scipy/optimize/tests/test__numdiff.py
index 853a73e56d6b..3c544631dce4 100644
--- a/scipy/optimize/tests/test__numdiff.py
+++ b/scipy/optimize/tests/test__numdiff.py
@@ -5,6 +5,7 @@
 from numpy.testing import assert_allclose, assert_equal, assert_
 from pytest import raises as assert_raises
 
+from scipy._lib._util import MapWrapper
 from scipy.sparse import csr_array, csc_array, lil_array
 
 from scipy.optimize._numdiff import (
@@ -267,11 +268,12 @@ def test_scalar_scalar_abs_step(self):
 
     def test_scalar_vector(self):
         x0 = 0.5
-        jac_diff_2 = approx_derivative(self.fun_scalar_vector, x0,
-                                       method='2-point')
-        jac_diff_3 = approx_derivative(self.fun_scalar_vector, x0)
-        jac_diff_4 = approx_derivative(self.fun_scalar_vector, x0,
-                                       method='cs')
+        with MapWrapper(2) as mapper:
+            jac_diff_2 = approx_derivative(self.fun_scalar_vector, x0,
+                                           method='2-point', workers=mapper)
+            jac_diff_3 = approx_derivative(self.fun_scalar_vector, x0, workers=mapper)
+            jac_diff_4 = approx_derivative(self.fun_scalar_vector, x0,
+                                           method='cs', workers=mapper)
         jac_true = self.jac_scalar_vector(np.atleast_1d(x0))
         assert_allclose(jac_diff_2, jac_true, rtol=1e-6)
         assert_allclose(jac_diff_3, jac_true, rtol=1e-9)
@@ -308,8 +310,9 @@ def test_vector_vector(self):
         jac_diff_2 = approx_derivative(self.fun_vector_vector, x0,
                                        method='2-point')
         jac_diff_3 = approx_derivative(self.fun_vector_vector, x0)
-        jac_diff_4 = approx_derivative(self.fun_vector_vector, x0,
-                                       method='cs')
+        with MapWrapper(2) as mapper:
+            jac_diff_4 = approx_derivative(self.fun_vector_vector, x0,
+                                           method='cs', workers=mapper)
         jac_true = self.jac_vector_vector(x0)
         assert_allclose(jac_diff_2, jac_true, rtol=1e-5)
         assert_allclose(jac_diff_3, jac_true, rtol=1e-6)
@@ -576,19 +579,22 @@ def test_all(self):
         np.random.shuffle(order)
         groups_2 = group_columns(A, order)
 
-        for method, groups, l, u in product(
-                ['2-point', '3-point', 'cs'], [groups_1, groups_2],
-                [-np.inf, self.lb], [np.inf, self.ub]):
-            J = approx_derivative(self.fun, self.x0, method=method,
-                                  bounds=(l, u), sparsity=(A, groups))
-            assert_(isinstance(J, csr_array))
-            assert_allclose(J.toarray(), self.J_true, rtol=1e-6)
-
-            rel_step = np.full_like(self.x0, 1e-8)
-            rel_step[::2] *= -1
-            J = approx_derivative(self.fun, self.x0, method=method,
-                                  rel_step=rel_step, sparsity=(A, groups))
-            assert_allclose(J.toarray(), self.J_true, rtol=1e-5)
+        with MapWrapper(2) as mapper:
+            for method, groups, l, u, mf in product(
+                    ['2-point', '3-point', 'cs'], [groups_1, groups_2],
+                    [-np.inf, self.lb], [np.inf, self.ub], [None, map, mapper]):
+                J = approx_derivative(self.fun, self.x0, method=method,
+                                      bounds=(l, u), sparsity=(A, groups),
+                                      workers=mf)
+                assert_(isinstance(J, csr_array))
+                assert_allclose(J.toarray(), self.J_true, rtol=1e-6)
+
+                rel_step = np.full_like(self.x0, 1e-8)
+                rel_step[::2] *= -1
+                J = approx_derivative(self.fun, self.x0, method=method,
+                                      rel_step=rel_step, sparsity=(A, groups),
+                                      workers=mf)
+                assert_allclose(J.toarray(), self.J_true, rtol=1e-5)
 
     def test_no_precomputed_groups(self):
         A = self.structure(self.n)

From 35f3ec1a1e1cf6c48afbc1e4488653a94b6c5ee3 Mon Sep 17 00:00:00 2001
From: Andrew Nelson <andyfaff@gmail.com>
Date: Sat, 21 Dec 2024 18:04:14 +1100
Subject: [PATCH 11/63] ENH: modify ScalarFunction for workers

---
 scipy/_lib/_util.py                         | 27 +++++++
 scipy/optimize/_differentiable_functions.py | 81 ++++++++++-----------
 2 files changed, 65 insertions(+), 43 deletions(-)

diff --git a/scipy/_lib/_util.py b/scipy/_lib/_util.py
index a70317c6400f..263fbca1d423 100644
--- a/scipy/_lib/_util.py
+++ b/scipy/_lib/_util.py
@@ -657,6 +657,33 @@ def __call__(self, x):
         return self.f(x, *self.args)
 
 
+class _ScalarFunctionWrapper:
+    """
+    Object to wrap scalar user function, allowing picklability
+    """
+    def __init__(self, f, args):
+        self.f = f
+        self.args = [] if args is None else args
+        self.nfev = 0
+
+    def __call__(self, x):
+        # Send a copy because the user may overwrite it.
+        # Overwriting results in undefined behaviour because
+        # fun(self.x) will change self.x, with the two no longer linked.
+        fx = self.f(np.copy(x), *self.args)
+        self.nfev += 1
+        # Make sure the function returns a true scalar
+        if not np.isscalar(fx):
+            try:
+                fx = np.asarray(fx).item()
+            except (TypeError, ValueError) as e:
+                raise ValueError(
+                    "The user-provided objective function "
+                    "must return a scalar value."
+                ) from e
+        return fx
+
+
 class MapWrapper:
     """
     Parallelisation wrapper for working with map-like callables, such as
diff --git a/scipy/optimize/_differentiable_functions.py b/scipy/optimize/_differentiable_functions.py
index cf7d1e0cb5c3..7d2c5a758023 100644
--- a/scipy/optimize/_differentiable_functions.py
+++ b/scipy/optimize/_differentiable_functions.py
@@ -5,51 +5,43 @@
 from scipy.sparse.linalg import LinearOperator
 from scipy._lib._array_api import array_namespace
 from scipy._lib import array_api_extra as xpx
+from scipy._lib._util import _ScalarFunctionWrapper
 
 
 FD_METHODS = ('2-point', '3-point', 'cs')
 
 
-def _wrapper_fun(fun, args=()):
-    ncalls = [0]
-
-    def wrapped(x):
-        ncalls[0] += 1
+class _GradWrapper:
+    """
+    Wrapper class for gradient calculation
+    """
+    def __init__(
+            self,
+            grad,
+            fun=None,
+            args=None,
+            finite_diff_options=None,
+            workers=None
+    ):
+        self.fun = fun
+        self.grad = grad
+        self.args = [] if args is None else args
+        self.finite_diff_options = finite_diff_options
+        self.workers = workers or map
+        self.ngev = 0
+
+    def __call__(self, x, f0=None, **kwds):
         # Send a copy because the user may overwrite it.
         # Overwriting results in undefined behaviour because
         # fun(self.x) will change self.x, with the two no longer linked.
-        fx = fun(np.copy(x), *args)
-        # Make sure the function returns a true scalar
-        if not np.isscalar(fx):
-            try:
-                fx = np.asarray(fx).item()
-            except (TypeError, ValueError) as e:
-                raise ValueError(
-                    "The user-provided objective function "
-                    "must return a scalar value."
-                ) from e
-        return fx
-    return wrapped, ncalls
-
-
-def _wrapper_grad(grad, fun=None, args=(), finite_diff_options=None):
-    ncalls = [0]
-
-    if callable(grad):
-        def wrapped(x, **kwds):
-            # kwds present to give function same signature as numdiff variant
-            ncalls[0] += 1
-            return np.atleast_1d(grad(np.copy(x), *args))
-        return wrapped, ncalls
-
-    elif grad in FD_METHODS:
-        def wrapped1(x, f0=None):
-            ncalls[0] += 1
-            return approx_derivative(
-                fun, x, f0=f0, **finite_diff_options
+        if callable(self.grad):
+            g = np.atleast_1d(self.grad(np.copy(x), *self.args))
+        elif self.grad in FD_METHODS:
+            g = approx_derivative(
+                self.fun, x, f0=f0, **self.finite_diff_options, workers=self.workers
             )
-
-        return wrapped1, ncalls
+        self.ngev += 1
+        return g
 
 
 def _wrapper_hess(hess, grad=None, x0=None, args=(), finite_diff_options=None):
@@ -151,6 +143,8 @@ class ScalarFunction:
         For ``method='3-point'`` the sign of `epsilon` is ignored. By default
         relative steps are used, only if ``epsilon is not None`` are absolute
         steps used.
+    workers : map-like callable
+        map-like used to call user function with different steps.
 
     Notes
     -----
@@ -164,7 +158,8 @@ class ScalarFunction:
            of *any* of the methods may overwrite the attribute.
     """
     def __init__(self, fun, x0, args, grad, hess, finite_diff_rel_step,
-                 finite_diff_bounds, epsilon=None):
+                 finite_diff_bounds, epsilon=None, workers=None):
+
         if not callable(grad) and grad not in FD_METHODS:
             raise ValueError(
                 f"`grad` must be either callable or one of {FD_METHODS}."
@@ -182,7 +177,6 @@ def __init__(self, fun, x0, args, grad, hess, finite_diff_rel_step,
                              "finite-differences, we require the Hessian "
                              "to be estimated using one of the "
                              "quasi-Newton strategies.")
-
         self.xp = xp = array_namespace(x0)
         _x = xpx.atleast_nd(xp.asarray(x0), ndim=1, xp=xp)
         _dtype = xp.float64
@@ -190,7 +184,7 @@ def __init__(self, fun, x0, args, grad, hess, finite_diff_rel_step,
             _dtype = _x.dtype
 
         # original arguments
-        self._wrapped_fun, self._nfev = _wrapper_fun(fun, args=args)
+        self._wrapped_fun = _ScalarFunctionWrapper(fun, args)
         self._orig_fun = fun
         self._orig_grad = grad
         self._orig_hess = hess
@@ -223,11 +217,12 @@ def __init__(self, fun, x0, args, grad, hess, finite_diff_rel_step,
         self._update_fun()
 
         # Initial gradient evaluation
-        self._wrapped_grad, self._ngev = _wrapper_grad(
+        self._wrapped_grad = _GradWrapper(
             grad,
             fun=self._wrapped_fun,
             args=args,
-            finite_diff_options=finite_diff_options
+            finite_diff_options=finite_diff_options,
+            workers=workers
         )
         self._update_grad()
 
@@ -257,11 +252,11 @@ def __init__(self, fun, x0, args, grad, hess, finite_diff_rel_step,
 
     @property
     def nfev(self):
-        return self._nfev[0]
+        return self._wrapped_fun.nfev
 
     @property
     def ngev(self):
-        return self._ngev[0]
+        return self._wrapped_grad.ngev
 
     @property
     def nhev(self):

From a80643bb3af8970729ce8f753e4344f36ee32ed0 Mon Sep 17 00:00:00 2001
From: Andrew Nelson <andyfaff@gmail.com>
Date: Sun, 22 Dec 2024 13:48:23 +1100
Subject: [PATCH 12/63] TST: multiprocess worker example

---
 scipy/_lib/_util.py                                |  2 +-
 .../tests/test_differentiable_functions.py         | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/scipy/_lib/_util.py b/scipy/_lib/_util.py
index 263fbca1d423..67d6fa0259a7 100644
--- a/scipy/_lib/_util.py
+++ b/scipy/_lib/_util.py
@@ -672,6 +672,7 @@ def __call__(self, x):
         # fun(self.x) will change self.x, with the two no longer linked.
         fx = self.f(np.copy(x), *self.args)
         self.nfev += 1
+
         # Make sure the function returns a true scalar
         if not np.isscalar(fx):
             try:
@@ -683,7 +684,6 @@ def __call__(self, x):
                 ) from e
         return fx
 
-
 class MapWrapper:
     """
     Parallelisation wrapper for working with map-like callables, such as
diff --git a/scipy/optimize/tests/test_differentiable_functions.py b/scipy/optimize/tests/test_differentiable_functions.py
index 79cc39349f90..469a078bbbd3 100644
--- a/scipy/optimize/tests/test_differentiable_functions.py
+++ b/scipy/optimize/tests/test_differentiable_functions.py
@@ -5,6 +5,7 @@
                            assert_array_equal, assert_, assert_allclose,
                            assert_equal)
 from scipy._lib._gcutils import assert_deallocated
+from scipy._lib._util import MapWrapper
 from scipy.sparse import csr_array
 from scipy.sparse.linalg import LinearOperator
 from scipy.optimize._differentiable_functions import (ScalarFunction,
@@ -130,6 +131,19 @@ def test_finite_difference_grad(self):
         assert_array_almost_equal(f_analit, f_approx)
         assert_array_almost_equal(g_analit, g_approx)
 
+    def test_workers(self):
+        ex = ExScalarFunction()
+        x0 = np.array([2.0, 0.3])
+        with MapWrapper(2) as mapper:
+            approx = ScalarFunction(ex.fun, x0, (), '2-point',
+                                    ex.hess, None, (-np.inf, np.inf),
+                                    workers=mapper)
+        approx_series = ScalarFunction(ex.fun, x0, (), '2-point',
+                                       ex.hess, None, (-np.inf, np.inf),
+                                      )
+        assert_allclose(approx.grad(x0), approx_series.grad(x0))
+        assert_equal(approx.nfev, approx_series.nfev)
+
     def test_fun_and_grad(self):
         ex = ExScalarFunction()
 

From c3b5746436ef76bffa63165170774c08b0c88aa7 Mon Sep 17 00:00:00 2001
From: Andrew Nelson <andyfaff@gmail.com>
Date: Sun, 22 Dec 2024 15:11:09 +1100
Subject: [PATCH 13/63] MAINT: track nfev for parallelised approx_derivative

---
 scipy/_lib/_util.py                           |  2 +-
 scipy/optimize/_differentiable_functions.py   | 17 ++++-
 scipy/optimize/_numdiff.py                    | 64 +++++++++++++++----
 scipy/optimize/tests/test__numdiff.py         | 30 ++++++++-
 .../tests/test_differentiable_functions.py    | 10 +++
 5 files changed, 105 insertions(+), 18 deletions(-)

diff --git a/scipy/_lib/_util.py b/scipy/_lib/_util.py
index 67d6fa0259a7..e7f3745bdbc8 100644
--- a/scipy/_lib/_util.py
+++ b/scipy/_lib/_util.py
@@ -661,7 +661,7 @@ class _ScalarFunctionWrapper:
     """
     Object to wrap scalar user function, allowing picklability
     """
-    def __init__(self, f, args):
+    def __init__(self, f, args=None):
         self.f = f
         self.args = [] if args is None else args
         self.nfev = 0
diff --git a/scipy/optimize/_differentiable_functions.py b/scipy/optimize/_differentiable_functions.py
index 7d2c5a758023..48b714128640 100644
--- a/scipy/optimize/_differentiable_functions.py
+++ b/scipy/optimize/_differentiable_functions.py
@@ -29,6 +29,8 @@ def __init__(
         self.finite_diff_options = finite_diff_options
         self.workers = workers or map
         self.ngev = 0
+        # number of function evaluations consumed by finite difference
+        self.nfev = 0
 
     def __call__(self, x, f0=None, **kwds):
         # Send a copy because the user may overwrite it.
@@ -37,9 +39,16 @@ def __call__(self, x, f0=None, **kwds):
         if callable(self.grad):
             g = np.atleast_1d(self.grad(np.copy(x), *self.args))
         elif self.grad in FD_METHODS:
-            g = approx_derivative(
-                self.fun, x, f0=f0, **self.finite_diff_options, workers=self.workers
+            g, dct = approx_derivative(
+                self.fun,
+                x,
+                f0=f0,
+                **self.finite_diff_options,
+                workers=self.workers,
+                full_output=True
             )
+            self.nfev += dct['nfev']
+
         self.ngev += 1
         return g
 
@@ -214,6 +223,7 @@ def __init__(self, fun, x0, args, grad, hess, finite_diff_rel_step,
             finite_diff_options["as_linear_operator"] = True
 
         # Initial function evaluation
+        self._nfev = 0
         self._update_fun()
 
         # Initial gradient evaluation
@@ -252,7 +262,7 @@ def __init__(self, fun, x0, args, grad, hess, finite_diff_rel_step,
 
     @property
     def nfev(self):
-        return self._wrapped_fun.nfev
+        return self._nfev + self._wrapped_grad.nfev
 
     @property
     def ngev(self):
@@ -288,6 +298,7 @@ def _update_x(self, x):
     def _update_fun(self):
         if not self.f_updated:
             fx = self._wrapped_fun(self.x)
+            self._nfev += 1
             if fx < self._lowest_f:
                 self._lowest_x = self.x
                 self._lowest_f = fx
diff --git a/scipy/optimize/_numdiff.py b/scipy/optimize/_numdiff.py
index 441534e7e55e..866cd8583c40 100644
--- a/scipy/optimize/_numdiff.py
+++ b/scipy/optimize/_numdiff.py
@@ -277,7 +277,7 @@ def group_columns(A, order=0):
 def approx_derivative(fun, x0, method='3-point', rel_step=None, abs_step=None,
                       f0=None, bounds=(-np.inf, np.inf), sparsity=None,
                       as_linear_operator=False, args=(), kwargs=None,
-                      workers=None):
+                      full_output=False, workers=None):
     """Compute finite difference approximation of the derivatives of a
     vector-valued function.
 
@@ -356,7 +356,10 @@ def approx_derivative(fun, x0, method='3-point', rel_step=None, abs_step=None,
     args, kwargs : tuple and dict, optional
         Additional arguments passed to `fun`. Both empty by default.
         The calling signature is ``fun(x, *args, **kwargs)``.
-    workers : map-like callable
+    full_output : bool, optional
+        If True then the function also returns a dictionary with extra information
+        about the calculation.
+    workers : map-like callable, optional
         map-like used to call user function with different steps.
 
     Returns
@@ -373,6 +376,12 @@ def approx_derivative(fun, x0, method='3-point', rel_step=None, abs_step=None,
         a 2-D structure. For ndarrays, if m=1 it is returned
         as a 1-D gradient array with shape (n,).
 
+    info_dict : dict
+        Dictionary containing extra information about the calculation. The
+        keys include:
+
+        - `nfev`, number of function evaluations.
+
     See Also
     --------
     check_derivative : Check correctness of a function computing derivatives.
@@ -444,6 +453,8 @@ def approx_derivative(fun, x0, method='3-point', rel_step=None, abs_step=None,
     if method not in ['2-point', '3-point', 'cs']:
         raise ValueError(f"Unknown method '{method}'. ")
 
+    info_dict = {'nfev': None}
+
     xp = array_namespace(x0)
     _x = xpx.atleast_nd(xp.asarray(x0), ndim=1, xp=xp)
     _dtype = xp.float64
@@ -471,8 +482,17 @@ def approx_derivative(fun, x0, method='3-point', rel_step=None, abs_step=None,
 
     fun_wrapped = _Fun_Wrapper(fun, x0, args, kwargs)
 
+    # How many function evaluations are consumed by `approx_derivative`.
+    # Historically this was done by a wrapper around `fun`. However, with
+    # parallelization via workers it was going to be impossible to keep that
+    # counter updated across Processes. Counter synchronisation can be achieved
+    # via multiprocessing.Value and a Pool. However, workers can be any map-like,
+    # not necessarily a Pool.
+    nfev = _nfev = 0
+
     if f0 is None:
         f0 = fun_wrapped(x0)
+        nfev = 1
     else:
         f0 = np.atleast_1d(f0)
         if f0.ndim > 1:
@@ -488,8 +508,8 @@ def approx_derivative(fun, x0, method='3-point', rel_step=None, abs_step=None,
         if rel_step is None:
             rel_step = _eps_for_method(x0.dtype, f0.dtype, method)
 
-        return _linear_operator_difference(fun_wrapped, x0,
-                                           f0, rel_step, method)
+        J, _nfev = _linear_operator_difference(fun_wrapped, x0,
+                                               f0, rel_step, method)
     else:
         # by default we use rel_step
         if abs_step is None:
@@ -517,7 +537,7 @@ def approx_derivative(fun, x0, method='3-point', rel_step=None, abs_step=None,
             use_one_sided = False
 
         if sparsity is None:
-            return _dense_difference(fun_wrapped, x0, f0, h,
+            J, _nfev = _dense_difference(fun_wrapped, x0, f0, h,
                                      use_one_sided, method,
                                      workers)
         else:
@@ -533,9 +553,16 @@ def approx_derivative(fun, x0, method='3-point', rel_step=None, abs_step=None,
                 structure = np.atleast_2d(structure)
 
             groups = np.atleast_1d(groups)
-            return _sparse_difference(fun_wrapped, x0, f0, h,
-                                      use_one_sided, structure,
-                                      groups, method, workers)
+            J, _nfev = _sparse_difference(fun_wrapped, x0, f0, h,
+                                         use_one_sided, structure,
+                                         groups, method, workers)
+
+    if full_output:
+        nfev += _nfev
+        info_dict["nfev"] = nfev
+        return J, info_dict
+    else:
+        return J
 
 
 def _linear_operator_difference(fun, x0, f0, h, method):
@@ -543,6 +570,7 @@ def _linear_operator_difference(fun, x0, f0, h, method):
     n = x0.size
 
     if method == '2-point':
+        nfev = 1
         def matvec(p):
             if np.array_equal(p, np.zeros_like(p)):
                 return np.zeros(m)
@@ -552,6 +580,7 @@ def matvec(p):
             return df / dx
 
     elif method == '3-point':
+        nfev = 2
         def matvec(p):
             if np.array_equal(p, np.zeros_like(p)):
                 return np.zeros(m)
@@ -564,6 +593,7 @@ def matvec(p):
             return df / dx
 
     elif method == 'cs':
+        nfev = 1
         def matvec(p):
             if np.array_equal(p, np.zeros_like(p)):
                 return np.zeros(m)
@@ -576,13 +606,14 @@ def matvec(p):
     else:
         raise RuntimeError("Never be here.")
 
-    return LinearOperator((m, n), matvec)
+    return LinearOperator((m, n), matvec), nfev
 
 
 def _dense_difference(fun, x0, f0, h, use_one_sided, method, workers):
     m = f0.size
     n = x0.size
     J_transposed = np.empty((n, m))
+    nfev = 0
 
     if method == '2-point':
         def x_generator2(x0, h):
@@ -601,6 +632,7 @@ def x_generator2(x0, h):
         dx = [(x0[i] + h[i]) - x0[i] for i in range(n)]
         df = [f_eval - f0 for f_eval in f_evals]
         df_dx = [delf / delx for delf, delx in zip(df, dx)]
+        nfev += len(df_dx)
 
     elif method == '3-point':
         def x_generator3(x0, h, use_one_sided):
@@ -635,7 +667,7 @@ def x_generator3(x0, h, use_one_sided):
                 dx.append(u[i] - l[i])
                 df.append(f2 - f1)
         df_dx = [delf / delx for delf, delx in zip(df, dx)]
-
+        nfev += 2 * len(df_dx)
     elif method == 'cs':
         def x_generator_cs(x0, h):
             for i in range(n):
@@ -645,6 +677,7 @@ def x_generator_cs(x0, h):
 
         f_evals = iter(workers(fun, x_generator_cs(x0, h)))
         df_dx = [f1.imag / hi for f1, hi in zip(f_evals, h)]
+        nfev += len(df_dx)
     else:
         raise RuntimeError("Never be here.")
 
@@ -654,7 +687,7 @@ def x_generator_cs(x0, h):
     if m == 1:
         J_transposed = np.ravel(J_transposed)
 
-    return J_transposed.T
+    return J_transposed.T, nfev
 
 
 def _sparse_difference(fun, x0, f0, h, use_one_sided,
@@ -666,6 +699,7 @@ def _sparse_difference(fun, x0, f0, h, use_one_sided,
     fractions = []
 
     n_groups = np.max(groups) + 1
+    nfev = 0
 
     def e_generator():
         # Perturb variables which are in the same group simultaneously.
@@ -724,6 +758,7 @@ def x_generator_cs():
         if method == '2-point':
             dx = next(xs) - x0
             df = next(f_evals) - f0
+            nfev += 1
         elif method == '3-point':
             # Here we do conceptually the same but separate one-sided
             # and two-sided schemes.
@@ -739,6 +774,7 @@ def x_generator_cs():
 
             f1 = next(f_evals)
             f2 = next(f_evals)
+            nfev += 2
 
             mask = use_one_sided[j]
             df = np.empty(m)
@@ -750,6 +786,7 @@ def x_generator_cs():
             df[rows] = f2[rows] - f1[rows]
         elif method == 'cs':
             f1 = next(f_evals)
+            nfev += 1
             df = f1.imag
             dx = h * e
         else:
@@ -764,9 +801,10 @@ def x_generator_cs():
     row_indices = np.hstack(row_indices)
     col_indices = np.hstack(col_indices)
     fractions = np.hstack(fractions)
+
     if isspmatrix(structure):
-        return csr_matrix((fractions, (row_indices, col_indices)), shape=(m, n))
-    return csr_array((fractions, (row_indices, col_indices)), shape=(m, n))
+        return csr_matrix((fractions, (row_indices, col_indices)), shape=(m, n)), nfev
+    return csr_array((fractions, (row_indices, col_indices)), shape=(m, n)), nfev
 
 
 class _Fun_Wrapper:
diff --git a/scipy/optimize/tests/test__numdiff.py b/scipy/optimize/tests/test__numdiff.py
index 3c544631dce4..14b596ed9761 100644
--- a/scipy/optimize/tests/test__numdiff.py
+++ b/scipy/optimize/tests/test__numdiff.py
@@ -5,12 +5,13 @@
 from numpy.testing import assert_allclose, assert_equal, assert_
 from pytest import raises as assert_raises
 
-from scipy._lib._util import MapWrapper
+from scipy._lib._util import MapWrapper, _ScalarFunctionWrapper
 from scipy.sparse import csr_array, csc_array, lil_array
 
 from scipy.optimize._numdiff import (
     _adjust_scheme_to_bounds, approx_derivative, check_derivative,
     group_columns, _eps_for_method, _compute_absolute_step)
+from scipy.optimize import rosen
 
 
 def test_group_columns():
@@ -279,6 +280,33 @@ def test_scalar_vector(self):
         assert_allclose(jac_diff_3, jac_true, rtol=1e-9)
         assert_allclose(jac_diff_4, jac_true, rtol=1e-12)
 
+    def test_nfev(self):
+        # check that nfev consumed by approx_derivative is tracked properly
+        x0 = [0.5, 1.5, 2.0]
+        with MapWrapper(2) as mapper:
+            _, mdct2 = approx_derivative(rosen, x0,
+                                         method='2-point', workers=mapper,
+                                         full_output=True)
+            _, mdct3 = approx_derivative(rosen, x0,
+                                         workers=mapper, full_output=True)
+            _, mdct4 = approx_derivative(rosen, x0,
+                                         method='cs', workers=mapper,
+                                         full_output=True)
+
+        sfr = _ScalarFunctionWrapper(rosen)
+        _, dct2 = approx_derivative(sfr, x0, method='2-point', full_output=True)
+        assert_equal(dct2['nfev'], sfr.nfev)
+        sfr.nfev = 0
+        _, dct3 = approx_derivative(sfr, x0, full_output=True)
+        assert_equal(dct3['nfev'], sfr.nfev)
+        sfr.nfev = 0
+        _, dct4 = approx_derivative(sfr, x0, method='cs', full_output=True)
+        assert_equal(dct4['nfev'], sfr.nfev)
+
+        assert_equal(mdct2['nfev'], dct2['nfev'])
+        assert_equal(mdct3['nfev'], dct3['nfev'])
+        assert_equal(mdct4['nfev'], dct4['nfev'])
+
     def test_vector_scalar(self):
         x0 = np.array([100.0, -0.5])
         jac_diff_2 = approx_derivative(self.fun_vector_scalar, x0,
diff --git a/scipy/optimize/tests/test_differentiable_functions.py b/scipy/optimize/tests/test_differentiable_functions.py
index 469a078bbbd3..2aaa8421872c 100644
--- a/scipy/optimize/tests/test_differentiable_functions.py
+++ b/scipy/optimize/tests/test_differentiable_functions.py
@@ -144,6 +144,16 @@ def test_workers(self):
         assert_allclose(approx.grad(x0), approx_series.grad(x0))
         assert_equal(approx.nfev, approx_series.nfev)
 
+        with MapWrapper(2) as mapper:
+            approx = ScalarFunction(ex.fun, x0, (), '3-point',
+                                    ex.hess, None, (-np.inf, np.inf),
+                                    workers=mapper)
+        approx_series = ScalarFunction(ex.fun, x0, (), '3-point',
+                                       ex.hess, None, (-np.inf, np.inf),
+                                      )
+        assert_allclose(approx.grad(x0), approx_series.grad(x0))
+        assert_equal(approx.nfev, approx_series.nfev)
+
     def test_fun_and_grad(self):
         ex = ExScalarFunction()
 

From 6c96052ba4b7d5c7e6f328d37be998bc015b503e Mon Sep 17 00:00:00 2001
From: Andrew Nelson <andyfaff@gmail.com>
Date: Sun, 22 Dec 2024 21:07:56 +1100
Subject: [PATCH 14/63] MAINT: HessWrapper with approx_derivative

---
 scipy/optimize/_differentiable_functions.py   | 139 +++++++++++-------
 scipy/optimize/_numdiff.py                    |  13 +-
 .../tests/test_differentiable_functions.py    |  54 ++++++-
 3 files changed, 141 insertions(+), 65 deletions(-)

diff --git a/scipy/optimize/_differentiable_functions.py b/scipy/optimize/_differentiable_functions.py
index 48b714128640..57ccf38a2529 100644
--- a/scipy/optimize/_differentiable_functions.py
+++ b/scipy/optimize/_differentiable_functions.py
@@ -1,3 +1,4 @@
+from collections import namedtuple
 import numpy as np
 import scipy.sparse as sps
 from ._numdiff import approx_derivative, group_columns
@@ -21,13 +22,11 @@ def __init__(
             fun=None,
             args=None,
             finite_diff_options=None,
-            workers=None
     ):
         self.fun = fun
         self.grad = grad
         self.args = [] if args is None else args
         self.finite_diff_options = finite_diff_options
-        self.workers = workers or map
         self.ngev = 0
         # number of function evaluations consumed by finite difference
         self.nfev = 0
@@ -44,8 +43,6 @@ def __call__(self, x, f0=None, **kwds):
                 x,
                 f0=f0,
                 **self.finite_diff_options,
-                workers=self.workers,
-                full_output=True
             )
             self.nfev += dct['nfev']
 
@@ -53,40 +50,70 @@ def __call__(self, x, f0=None, **kwds):
         return g
 
 
-def _wrapper_hess(hess, grad=None, x0=None, args=(), finite_diff_options=None):
-    if callable(hess):
-        H = hess(np.copy(x0), *args)
-        ncalls = [1]
-
-        if sps.issparse(H):
-            def wrapped(x, **kwds):
-                ncalls[0] += 1
-                return sps.csr_array(hess(np.copy(x), *args))
+class _HessWrapper:
+    """
+    Wrapper class for hess calculation via finite differences
+    """
+    def __init__(
+            self,
+            hess,
+            x0=None,
+            grad=None,
+            args=None,
+            finite_diff_options=None,
+    ):
+        self.hess = hess
+        self.grad = grad
+        self.args = [] if args is None else args
+        self.finite_diff_options = finite_diff_options
+        # keep track of any finite difference function evaluations for grad
+        self.ngev = 0
+        self.nhev = 0
+        self.H = None
+        self._hess_func = None
 
-            H = sps.csr_array(H)
+        if callable(hess):
+            self.H = hess(np.copy(x0), *args)
+            self.nhev += 1
 
-        elif isinstance(H, LinearOperator):
-            def wrapped(x, **kwds):
-                ncalls[0] += 1
-                return hess(np.copy(x), *args)
+            if sps.issparse(self.H):
+                self._hess_func = self._sparse_callable
+                self.H = sps.csr_array(self.H)
+            elif isinstance(self.H, LinearOperator):
+                self._hess_func = self._linearoperator_callable
+            else:
+                # dense
+                self._hess_func = self._dense_callable
+                self.H = np.atleast_2d(np.asarray(self.H))
+        elif hess in FD_METHODS:
+                self._hess_func = self._fd_hess
 
-        else:  # dense
-            def wrapped(x, **kwds):
-                ncalls[0] += 1
-                return np.atleast_2d(np.asarray(hess(np.copy(x), *args)))
+    def __call__(self, x, f0=None, **kwds):
+        return self._hess_func(np.copy(x), f0=f0)
 
-            H = np.atleast_2d(np.asarray(H))
+    def _fd_hess(self, x, f0=None, **kwds):
+        self.H, dct = approx_derivative(
+            self.grad, x, f0=f0, **self.finite_diff_options
+        )
+        self.ngev += dct["nfev"]
+        return self.H
 
-        return wrapped, ncalls, H
-    elif hess in FD_METHODS:
-        ncalls = [0]
+    def _sparse_callable(self, x, **kwds):
+        self.nhev += 1
+        self.H = sps.csr_array(self.hess(x, *self.args))
+        return self.H
 
-        def wrapped1(x, f0=None):
-            return approx_derivative(
-                grad, x, f0=f0, **finite_diff_options
-            )
+    def _dense_callable(self, x, **kwds):
+        self.nhev += 1
+        self.H = np.atleast_2d(
+            np.asarray(self.hess(x, *self.args))
+        )
+        return self.H
 
-        return wrapped1, ncalls, None
+    def _linearoperator_callable(self, x, **kwds):
+        self.nhev += 1
+        self.H = self.hess(x, *self.args)
+        return self.H
 
 
 class ScalarFunction:
@@ -216,11 +243,15 @@ def __init__(self, fun, x0, args, grad, hess, finite_diff_rel_step,
             finite_diff_options["rel_step"] = finite_diff_rel_step
             finite_diff_options["abs_step"] = epsilon
             finite_diff_options["bounds"] = finite_diff_bounds
+            finite_diff_options["workers"] = workers
+            finite_diff_options["full_output"] = True
         if hess in FD_METHODS:
             finite_diff_options["method"] = hess
             finite_diff_options["rel_step"] = finite_diff_rel_step
             finite_diff_options["abs_step"] = epsilon
             finite_diff_options["as_linear_operator"] = True
+            finite_diff_options["workers"] = workers
+            finite_diff_options["full_output"] = True
 
         # Initial function evaluation
         self._nfev = 0
@@ -232,33 +263,39 @@ def __init__(self, fun, x0, args, grad, hess, finite_diff_rel_step,
             fun=self._wrapped_fun,
             args=args,
             finite_diff_options=finite_diff_options,
-            workers=workers
         )
         self._update_grad()
 
         # Hessian evaluation
-        if callable(hess):
-            self._wrapped_hess, self._nhev, self.H = _wrapper_hess(
-                hess, x0=x0, args=args
-            )
-            self.H_updated = True
-        elif hess in FD_METHODS:
-            self._wrapped_hess, self._nhev, self.H = _wrapper_hess(
-                hess,
-                grad=self._wrapped_grad,
-                x0=x0,
-                finite_diff_options=finite_diff_options
-            )
-            self._update_grad()
-            self.H = self._wrapped_hess(self.x, f0=self.g)
-            self.H_updated = True
-        elif isinstance(hess, HessianUpdateStrategy):
+        if isinstance(hess, HessianUpdateStrategy):
             self.H = hess
             self.H.initialize(self.n, 'hess')
             self.H_updated = True
             self.x_prev = None
             self.g_prev = None
-            self._nhev = [0]
+            _FakeCounter = namedtuple('_FakeCounter', ['ngev', 'nhev'])
+            self._wrapped_hess = _FakeCounter(ngev=0, nhev=0)
+        else:
+            if callable(hess):
+                self._wrapped_hess = _HessWrapper(
+                    hess,
+                    x0=x0,
+                    args=args,
+                    finite_diff_options=finite_diff_options
+                )
+                self.H = self._wrapped_hess.H
+                self.H_updated = True
+            elif hess in FD_METHODS:
+                self._wrapped_hess = _HessWrapper(
+                    hess,
+                    x0=x0,
+                    args=args,
+                    grad=self._wrapped_grad,
+                    finite_diff_options=finite_diff_options
+                )
+                self._update_grad()
+                self.H = self._wrapped_hess(self.x, f0=self.g)
+                self.H_updated = True
 
     @property
     def nfev(self):
@@ -266,11 +303,11 @@ def nfev(self):
 
     @property
     def ngev(self):
-        return self._wrapped_grad.ngev
+        return self._wrapped_grad.ngev  #+ self._wrapped_hess.ngev
 
     @property
     def nhev(self):
-        return self._nhev[0]
+        return self._wrapped_hess.nhev
 
     def _update_x(self, x):
         if isinstance(self._orig_hess, HessianUpdateStrategy):
diff --git a/scipy/optimize/_numdiff.py b/scipy/optimize/_numdiff.py
index 866cd8583c40..375236ff197c 100644
--- a/scipy/optimize/_numdiff.py
+++ b/scipy/optimize/_numdiff.py
@@ -482,12 +482,13 @@ def approx_derivative(fun, x0, method='3-point', rel_step=None, abs_step=None,
 
     fun_wrapped = _Fun_Wrapper(fun, x0, args, kwargs)
 
-    # How many function evaluations are consumed by `approx_derivative`.
-    # Historically this was done by a wrapper around `fun`. However, with
-    # parallelization via workers it was going to be impossible to keep that
-    # counter updated across Processes. Counter synchronisation can be achieved
-    # via multiprocessing.Value and a Pool. However, workers can be any map-like,
-    # not necessarily a Pool.
+    # Record how function evaluations are consumed by `approx_derivative`.
+    # Historically this was done by upstream functions wrapping `fun`.
+    # However, with parallelization via workers it was going to be impossible to
+    # keep that counter updated across Processes. Counter synchronisation can
+    # be achieved via multiprocessing.Value and a Pool. However, workers can be
+    # any map-like, not necessarily a Pool, so initialization of the Value would
+    # be difficult.
     nfev = _nfev = 0
 
     if f0 is None:
diff --git a/scipy/optimize/tests/test_differentiable_functions.py b/scipy/optimize/tests/test_differentiable_functions.py
index 2aaa8421872c..48bec70cc9eb 100644
--- a/scipy/optimize/tests/test_differentiable_functions.py
+++ b/scipy/optimize/tests/test_differentiable_functions.py
@@ -132,27 +132,65 @@ def test_finite_difference_grad(self):
         assert_array_almost_equal(g_analit, g_approx)
 
     def test_workers(self):
-        ex = ExScalarFunction()
         x0 = np.array([2.0, 0.3])
+        ex = ExScalarFunction()
+        ex2 = ExScalarFunction()
         with MapWrapper(2) as mapper:
             approx = ScalarFunction(ex.fun, x0, (), '2-point',
                                     ex.hess, None, (-np.inf, np.inf),
                                     workers=mapper)
-        approx_series = ScalarFunction(ex.fun, x0, (), '2-point',
-                                       ex.hess, None, (-np.inf, np.inf),
-                                      )
-        assert_allclose(approx.grad(x0), approx_series.grad(x0))
+        approx_series = ScalarFunction(ex2.fun, x0, (), '2-point',
+                                       ex2.hess, None, (-np.inf, np.inf),
+                                       )
+        assert_allclose(approx.grad(x0), ex.grad(x0))
+        assert_allclose(approx_series.grad(x0), ex.grad(x0))
+        assert_allclose(approx_series.hess(x0), ex.hess(x0))
+        assert_allclose(approx.hess(x0), ex.hess(x0))
         assert_equal(approx.nfev, approx_series.nfev)
+        assert_equal(approx_series.nfev, ex2.nfev)
+        assert_equal(approx.ngev, approx_series.ngev)
+        assert_equal(approx.nhev, approx_series.nhev)
+        assert_equal(approx_series.nhev, ex2.nhev)
 
+        ex = ExScalarFunction()
+        ex2 = ExScalarFunction()
         with MapWrapper(2) as mapper:
             approx = ScalarFunction(ex.fun, x0, (), '3-point',
                                     ex.hess, None, (-np.inf, np.inf),
                                     workers=mapper)
-        approx_series = ScalarFunction(ex.fun, x0, (), '3-point',
-                                       ex.hess, None, (-np.inf, np.inf),
+        approx_series = ScalarFunction(ex2.fun, x0, (), '3-point',
+                                       ex2.hess, None, (-np.inf, np.inf),
                                       )
-        assert_allclose(approx.grad(x0), approx_series.grad(x0))
+        assert_allclose(approx.grad(x0), ex.grad(x0))
+        assert_allclose(approx_series.grad(x0), ex.grad(x0))
+        assert_allclose(approx_series.hess(x0), ex.hess(x0))
+        assert_allclose(approx.hess(x0), ex.hess(x0))
+        assert_equal(approx.nfev, approx_series.nfev)
+        assert_equal(approx_series.nfev, ex2.nfev)
+        assert_equal(approx.ngev, approx_series.ngev)
+        assert_equal(approx.nhev, approx_series.nhev)
+        assert_equal(approx_series.nhev, ex2.nhev)
+
+        ex = ExScalarFunction()
+        ex2 = ExScalarFunction()
+        x1 = np.array([3.0, 4.0])
+        with MapWrapper(2) as mapper:
+            approx = ScalarFunction(ex.fun, x0, (), ex.grad,
+                                    '3-point', None, (-np.inf, np.inf),
+                                    workers=mapper)
+        approx_series = ScalarFunction(ex2.fun, x0, (), ex2.grad,
+                                       '3-point', None, (-np.inf, np.inf),
+                                       )
+        assert_allclose(approx.grad(x1), ex.grad(x1))
+        assert_allclose(approx_series.grad(x1), ex.grad(x1))
+        approx_series.hess(x1)
+        approx.hess(x1)
         assert_equal(approx.nfev, approx_series.nfev)
+        assert_equal(approx_series.nfev, ex2.nfev)
+        assert_equal(approx.ngev, approx_series.ngev)
+        assert_equal(approx_series.ngev, ex2.ngev)
+        assert_equal(approx.nhev, approx_series.nhev)
+        assert_equal(approx_series.nhev, ex2.nhev)
 
     def test_fun_and_grad(self):
         ex = ExScalarFunction()

From 32b49c2c51eb4ed60a17843f783b0f46f81db6db Mon Sep 17 00:00:00 2001
From: Andrew Nelson <andyfaff@gmail.com>
Date: Tue, 24 Dec 2024 20:34:39 +1100
Subject: [PATCH 15/63] MAINT: use MapWrapper

---
 scipy/optimize/_numdiff.py | 49 +++++++++++++++++++++-----------------
 1 file changed, 27 insertions(+), 22 deletions(-)

diff --git a/scipy/optimize/_numdiff.py b/scipy/optimize/_numdiff.py
index 375236ff197c..3c85ac7d1f61 100644
--- a/scipy/optimize/_numdiff.py
+++ b/scipy/optimize/_numdiff.py
@@ -7,6 +7,7 @@
 from ..sparse import issparse, isspmatrix, find, csc_array, csr_array, csr_matrix
 from ._group_columns import group_dense, group_sparse
 from scipy._lib._array_api import array_namespace
+from scipy._lib._util import MapWrapper
 from scipy._lib import array_api_extra as xpx
 
 
@@ -359,8 +360,14 @@ def approx_derivative(fun, x0, method='3-point', rel_step=None, abs_step=None,
     full_output : bool, optional
         If True then the function also returns a dictionary with extra information
         about the calculation.
-    workers : map-like callable, optional
-        map-like used to call user function with different steps.
+    workers : int or map-like callable, optional
+        If `workers` is an int the task is subdivided into `workers`
+        sections and the functevaluated in parallel
+        (uses `multiprocessing.Pool <multiprocessing>`).
+        Supply -1 to use all available CPU cores.
+        Alternatively supply a map-like callable, such as
+        `multiprocessing.Pool.map` for evaluating the population in parallel.
+        This evaluation is carried out as ``workers(fun, iterable)``.
 
     Returns
     -------
@@ -502,9 +509,6 @@ def approx_derivative(fun, x0, method='3-point', rel_step=None, abs_step=None,
     if np.any((x0 < lb) | (x0 > ub)):
         raise ValueError("`x0` violates bound constraints.")
 
-    # check the map function for parallelisation
-    workers = workers or map
-
     if as_linear_operator:
         if rel_step is None:
             rel_step = _eps_for_method(x0.dtype, f0.dtype, method)
@@ -537,26 +541,27 @@ def approx_derivative(fun, x0, method='3-point', rel_step=None, abs_step=None,
         elif method == 'cs':
             use_one_sided = False
 
-        if sparsity is None:
-            J, _nfev = _dense_difference(fun_wrapped, x0, f0, h,
-                                     use_one_sided, method,
-                                     workers)
-        else:
-            if not issparse(sparsity) and len(sparsity) == 2:
-                structure, groups = sparsity
+        with MapWrapper(workers) as mf:
+            if sparsity is None:
+                J, _nfev = _dense_difference(fun_wrapped, x0, f0, h,
+                                         use_one_sided, method,
+                                         mf)
             else:
-                structure = sparsity
-                groups = group_columns(sparsity)
+                if not issparse(sparsity) and len(sparsity) == 2:
+                    structure, groups = sparsity
+                else:
+                    structure = sparsity
+                    groups = group_columns(sparsity)
 
-            if issparse(structure):
-                structure = structure.tocsc()
-            else:
-                structure = np.atleast_2d(structure)
+                if issparse(structure):
+                    structure = structure.tocsc()
+                else:
+                    structure = np.atleast_2d(structure)
 
-            groups = np.atleast_1d(groups)
-            J, _nfev = _sparse_difference(fun_wrapped, x0, f0, h,
-                                         use_one_sided, structure,
-                                         groups, method, workers)
+                groups = np.atleast_1d(groups)
+                J, _nfev = _sparse_difference(fun_wrapped, x0, f0, h,
+                                             use_one_sided, structure,
+                                             groups, method, mf)
 
     if full_output:
         nfev += _nfev

From 90f1ea3a71d0a6c0242ce4809b2e12c23358b751 Mon Sep 17 00:00:00 2001
From: Andrew Nelson <andyfaff@gmail.com>
Date: Tue, 24 Dec 2024 20:44:03 +1100
Subject: [PATCH 16/63] DOC: give example of parallel calc approx_derivative

---
 scipy/optimize/_numdiff.py | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/scipy/optimize/_numdiff.py b/scipy/optimize/_numdiff.py
index 3c85ac7d1f61..168641681658 100644
--- a/scipy/optimize/_numdiff.py
+++ b/scipy/optimize/_numdiff.py
@@ -456,6 +456,42 @@ def approx_derivative(fun, x0, method='3-point', rel_step=None, abs_step=None,
     array([ 1.])
     >>> approx_derivative(g, x0, bounds=(1.0, np.inf))
     array([ 2.])
+
+    We can also parallelize the derivative calculation using the workers
+    keyword.
+
+    >>> from multiprocessing import Pool
+    >>> import time
+    >>> def fun2(x):       # import from an external file for use with multiprocessing
+    ...     time.sleep(0.002)
+    ...     return rosen(x)
+
+    >>> rng = np.random.default_rng()
+    >>> x0 = rng.uniform(high=10, size=(2000,))
+    >>> f0 = rosen(x0)
+
+    >>> %timeit approx_derivative(fun2, x0, f0=f0)     # may vary
+    10.5 s ± 5.91 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
+
+    >>> elapsed = []
+    >>> with Pool() as workers:
+    ...     for i in range(10):
+    ...         t = time.perf_counter()
+    ...         approx_derivative(fun2, x0, workers=workers.map, f0=f0)
+    ...         et = time.perf_counter()
+    ...         elapsed.append(et - t)
+    >>> np.mean(elapsed)    # may vary
+    np.float64(1.442545195999901)
+
+    Create a map-like vectorised version. Note that the first argument to
+    approx_derivative is effectively ignored in the way I've written it.
+
+    >>> def fun(f, x, *args, **kwds):
+    ...     xx = np.r_[[xs for xs in x]]
+    ...     return f(xx.T)
+    >>> %timeit approx_derivative(fun2, x0, workers=fun, f0=f0)    # may vary
+    91.8 ms ± 755 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)
+
     """
     if method not in ['2-point', '3-point', 'cs']:
         raise ValueError(f"Unknown method '{method}'. ")

From c81131ed1effd389ca2ed7c690125a1b44bbcceb Mon Sep 17 00:00:00 2001
From: Andrew Nelson <andyfaff@gmail.com>
Date: Wed, 25 Dec 2024 01:20:56 +1100
Subject: [PATCH 17/63] MAINT: fix default workers argument

---
 scipy/optimize/_differentiable_functions.py | 12 +++++++++---
 scipy/optimize/_numdiff.py                  |  4 ++--
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/scipy/optimize/_differentiable_functions.py b/scipy/optimize/_differentiable_functions.py
index 57ccf38a2529..398e797eba29 100644
--- a/scipy/optimize/_differentiable_functions.py
+++ b/scipy/optimize/_differentiable_functions.py
@@ -179,8 +179,14 @@ class ScalarFunction:
         For ``method='3-point'`` the sign of `epsilon` is ignored. By default
         relative steps are used, only if ``epsilon is not None`` are absolute
         steps used.
-    workers : map-like callable
-        map-like used to call user function with different steps.
+    workers : int or map-like callable, optional
+        If `workers` is an int any numerical differentiation task is subdivided
+        into `workers` sections and the fun evaluated in parallel
+        (uses `multiprocessing.Pool <multiprocessing>`).
+        Supply -1 to use all available CPU cores.
+        Alternatively supply a map-like callable, such as
+        `multiprocessing.Pool.map` for evaluating the population in parallel.
+        This evaluation is carried out as ``workers(fun, iterable)``.
 
     Notes
     -----
@@ -194,7 +200,7 @@ class ScalarFunction:
            of *any* of the methods may overwrite the attribute.
     """
     def __init__(self, fun, x0, args, grad, hess, finite_diff_rel_step,
-                 finite_diff_bounds, epsilon=None, workers=None):
+                 finite_diff_bounds, epsilon=None, workers=map):
 
         if not callable(grad) and grad not in FD_METHODS:
             raise ValueError(
diff --git a/scipy/optimize/_numdiff.py b/scipy/optimize/_numdiff.py
index 168641681658..b2853918a887 100644
--- a/scipy/optimize/_numdiff.py
+++ b/scipy/optimize/_numdiff.py
@@ -278,7 +278,7 @@ def group_columns(A, order=0):
 def approx_derivative(fun, x0, method='3-point', rel_step=None, abs_step=None,
                       f0=None, bounds=(-np.inf, np.inf), sparsity=None,
                       as_linear_operator=False, args=(), kwargs=None,
-                      full_output=False, workers=None):
+                      full_output=False, workers=map):
     """Compute finite difference approximation of the derivatives of a
     vector-valued function.
 
@@ -362,7 +362,7 @@ def approx_derivative(fun, x0, method='3-point', rel_step=None, abs_step=None,
         about the calculation.
     workers : int or map-like callable, optional
         If `workers` is an int the task is subdivided into `workers`
-        sections and the functevaluated in parallel
+        sections and the fun evaluated in parallel
         (uses `multiprocessing.Pool <multiprocessing>`).
         Supply -1 to use all available CPU cores.
         Alternatively supply a map-like callable, such as

From e91448cb0763ce7ce98206e6aa38e26c78ee5640 Mon Sep 17 00:00:00 2001
From: Andrew Nelson <andyfaff@gmail.com>
Date: Wed, 25 Dec 2024 02:00:09 +1100
Subject: [PATCH 18/63] ENH: apply workers kwd to _minimize_lbfgsb

---
 scipy/optimize/_lbfgsb_py.py | 14 ++++++++++++--
 scipy/optimize/_optimize.py  | 13 +++++++++++--
 2 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/scipy/optimize/_lbfgsb_py.py b/scipy/optimize/_lbfgsb_py.py
index d0e206feaa93..f980164597c3 100644
--- a/scipy/optimize/_lbfgsb_py.py
+++ b/scipy/optimize/_lbfgsb_py.py
@@ -291,7 +291,8 @@ def _minimize_lbfgsb(fun, x0, args=(), jac=None, bounds=None,
                      disp=None, maxcor=10, ftol=2.2204460492503131e-09,
                      gtol=1e-5, eps=1e-8, maxfun=15000, maxiter=15000,
                      iprint=-1, callback=None, maxls=20,
-                     finite_diff_rel_step=None, **unknown_options):
+                     finite_diff_rel_step=None, workers=map,
+                     **unknown_options):
     """
     Minimize a scalar function of one or more variables using the L-BFGS-B
     algorithm.
@@ -344,6 +345,14 @@ def _minimize_lbfgsb(fun, x0, args=(), jac=None, bounds=None,
         possibly adjusted to fit into the bounds. For ``method='3-point'``
         the sign of `h` is ignored. If None (default) then step is selected
         automatically.
+    workers : int or map-like callable, optional
+        If `workers` is an int any numerical differentiation task is subdivided
+        into `workers` sections and the fun evaluated in parallel
+        (uses `multiprocessing.Pool <multiprocessing>`).
+        Supply -1 to use all available CPU cores.
+        Alternatively supply a map-like callable, such as
+        `multiprocessing.Pool.map` for evaluating the population in parallel.
+        This evaluation is carried out as ``workers(fun, iterable)``.
 
     Notes
     -----
@@ -385,7 +394,8 @@ def _minimize_lbfgsb(fun, x0, args=(), jac=None, bounds=None,
     # _prepare_scalar_function can use bounds=None to represent no bounds
     sf = _prepare_scalar_function(fun, x0, jac=jac, args=args, epsilon=eps,
                                   bounds=bounds,
-                                  finite_diff_rel_step=finite_diff_rel_step)
+                                  finite_diff_rel_step=finite_diff_rel_step,
+                                  workers=workers)
 
     func_and_grad = sf.fun_and_grad
 
diff --git a/scipy/optimize/_optimize.py b/scipy/optimize/_optimize.py
index 2ea8ea87e824..2ff252191c0c 100644
--- a/scipy/optimize/_optimize.py
+++ b/scipy/optimize/_optimize.py
@@ -202,7 +202,7 @@ def vecnorm(x, ord=2):
 
 def _prepare_scalar_function(fun, x0, jac=None, args=(), bounds=None,
                              epsilon=None, finite_diff_rel_step=None,
-                             hess=None):
+                             hess=None, workers=map):
     """
     Creates a ScalarFunction object for use with scalar minimizers
     (BFGS/LBFGSB/SLSQP/TNC/CG/etc).
@@ -255,6 +255,14 @@ def _prepare_scalar_function(fun, x0, jac=None, args=(), bounds=None,
         Whenever the gradient is estimated via finite-differences, the Hessian
         cannot be estimated with options {'2-point', '3-point', 'cs'} and needs
         to be estimated using one of the quasi-Newton strategies.
+    workers : int or map-like callable, optional
+        If `workers` is an int any numerical differentiation task is subdivided
+        into `workers` sections and the fun evaluated in parallel
+        (uses `multiprocessing.Pool <multiprocessing>`).
+        Supply -1 to use all available CPU cores.
+        Alternatively supply a map-like callable, such as
+        `multiprocessing.Pool.map` for evaluating the population in parallel.
+        This evaluation is carried out as ``workers(fun, iterable)``.
 
     Returns
     -------
@@ -289,7 +297,8 @@ def hess(x, *args):
     # ScalarFunction caches. Reuse of fun(x) during grad
     # calculation reduces overall function evaluations.
     sf = ScalarFunction(fun, x0, args, grad, hess,
-                        finite_diff_rel_step, bounds, epsilon=epsilon)
+                        finite_diff_rel_step, bounds, epsilon=epsilon,
+                        workers=workers)
 
     return sf
 

From 8d7afb41cc87e32377313cad95e26e5207ac1033 Mon Sep 17 00:00:00 2001
From: Andrew Nelson <andyfaff@gmail.com>
Date: Wed, 25 Dec 2024 02:06:54 +1100
Subject: [PATCH 19/63] TST: fix a test

---
 scipy/optimize/tests/test__numdiff.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scipy/optimize/tests/test__numdiff.py b/scipy/optimize/tests/test__numdiff.py
index 14b596ed9761..00ac45d380c0 100644
--- a/scipy/optimize/tests/test__numdiff.py
+++ b/scipy/optimize/tests/test__numdiff.py
@@ -610,7 +610,7 @@ def test_all(self):
         with MapWrapper(2) as mapper:
             for method, groups, l, u, mf in product(
                     ['2-point', '3-point', 'cs'], [groups_1, groups_2],
-                    [-np.inf, self.lb], [np.inf, self.ub], [None, map, mapper]):
+                    [-np.inf, self.lb], [np.inf, self.ub], [2, map, mapper]):
                 J = approx_derivative(self.fun, self.x0, method=method,
                                       bounds=(l, u), sparsity=(A, groups),
                                       workers=mf)

From 4175488c0e933e39e9980bd9c5ad659b07ff0ad9 Mon Sep 17 00:00:00 2001
From: Andrew Nelson <andyfaff@gmail.com>
Date: Wed, 25 Dec 2024 13:30:45 +1100
Subject: [PATCH 20/63] TST: speed up some tests

---
 scipy/optimize/_differentiable_functions.py |  5 +++-
 scipy/optimize/_lbfgsb_py.py                |  2 +-
 scipy/optimize/_numdiff.py                  |  6 +++--
 scipy/optimize/_optimize.py                 |  5 +++-
 scipy/optimize/tests/test__numdiff.py       | 27 ++++++++++++---------
 5 files changed, 29 insertions(+), 16 deletions(-)

diff --git a/scipy/optimize/_differentiable_functions.py b/scipy/optimize/_differentiable_functions.py
index 398e797eba29..1d61c33fb7d3 100644
--- a/scipy/optimize/_differentiable_functions.py
+++ b/scipy/optimize/_differentiable_functions.py
@@ -200,7 +200,7 @@ class ScalarFunction:
            of *any* of the methods may overwrite the attribute.
     """
     def __init__(self, fun, x0, args, grad, hess, finite_diff_rel_step,
-                 finite_diff_bounds, epsilon=None, workers=map):
+                 finite_diff_bounds, epsilon=None, workers=None):
 
         if not callable(grad) and grad not in FD_METHODS:
             raise ValueError(
@@ -243,6 +243,9 @@ def __init__(self, fun, x0, args, grad, hess, finite_diff_rel_step,
         self._lowest_x = None
         self._lowest_f = np.inf
 
+        # normalize workers
+        workers = workers or map
+
         finite_diff_options = {}
         if grad in FD_METHODS:
             finite_diff_options["method"] = grad
diff --git a/scipy/optimize/_lbfgsb_py.py b/scipy/optimize/_lbfgsb_py.py
index f980164597c3..c8a6fe8bb843 100644
--- a/scipy/optimize/_lbfgsb_py.py
+++ b/scipy/optimize/_lbfgsb_py.py
@@ -291,7 +291,7 @@ def _minimize_lbfgsb(fun, x0, args=(), jac=None, bounds=None,
                      disp=None, maxcor=10, ftol=2.2204460492503131e-09,
                      gtol=1e-5, eps=1e-8, maxfun=15000, maxiter=15000,
                      iprint=-1, callback=None, maxls=20,
-                     finite_diff_rel_step=None, workers=map,
+                     finite_diff_rel_step=None, workers=None,
                      **unknown_options):
     """
     Minimize a scalar function of one or more variables using the L-BFGS-B
diff --git a/scipy/optimize/_numdiff.py b/scipy/optimize/_numdiff.py
index b2853918a887..add9f6349d0e 100644
--- a/scipy/optimize/_numdiff.py
+++ b/scipy/optimize/_numdiff.py
@@ -278,7 +278,7 @@ def group_columns(A, order=0):
 def approx_derivative(fun, x0, method='3-point', rel_step=None, abs_step=None,
                       f0=None, bounds=(-np.inf, np.inf), sparsity=None,
                       as_linear_operator=False, args=(), kwargs=None,
-                      full_output=False, workers=map):
+                      full_output=False, workers=None):
     """Compute finite difference approximation of the derivatives of a
     vector-valued function.
 
@@ -577,6 +577,8 @@ def approx_derivative(fun, x0, method='3-point', rel_step=None, abs_step=None,
         elif method == 'cs':
             use_one_sided = False
 
+        # normalize workers
+        workers = workers or map
         with MapWrapper(workers) as mf:
             if sparsity is None:
                 J, _nfev = _dense_difference(fun_wrapped, x0, f0, h,
@@ -593,7 +595,7 @@ def approx_derivative(fun, x0, method='3-point', rel_step=None, abs_step=None,
                     structure = structure.tocsc()
                 else:
                     structure = np.atleast_2d(structure)
-
+                print(groups)
                 groups = np.atleast_1d(groups)
                 J, _nfev = _sparse_difference(fun_wrapped, x0, f0, h,
                                              use_one_sided, structure,
diff --git a/scipy/optimize/_optimize.py b/scipy/optimize/_optimize.py
index 2ff252191c0c..c3e0d876026c 100644
--- a/scipy/optimize/_optimize.py
+++ b/scipy/optimize/_optimize.py
@@ -202,7 +202,7 @@ def vecnorm(x, ord=2):
 
 def _prepare_scalar_function(fun, x0, jac=None, args=(), bounds=None,
                              epsilon=None, finite_diff_rel_step=None,
-                             hess=None, workers=map):
+                             hess=None, workers=None):
     """
     Creates a ScalarFunction object for use with scalar minimizers
     (BFGS/LBFGSB/SLSQP/TNC/CG/etc).
@@ -294,6 +294,9 @@ def hess(x, *args):
     if bounds is None:
         bounds = (-np.inf, np.inf)
 
+    # normalize workers
+    workers = workers or map
+
     # ScalarFunction caches. Reuse of fun(x) during grad
     # calculation reduces overall function evaluations.
     sf = ScalarFunction(fun, x0, args, grad, hess,
diff --git a/scipy/optimize/tests/test__numdiff.py b/scipy/optimize/tests/test__numdiff.py
index 00ac45d380c0..6a5473143a09 100644
--- a/scipy/optimize/tests/test__numdiff.py
+++ b/scipy/optimize/tests/test__numdiff.py
@@ -272,40 +272,45 @@ def test_scalar_vector(self):
         with MapWrapper(2) as mapper:
             jac_diff_2 = approx_derivative(self.fun_scalar_vector, x0,
                                            method='2-point', workers=mapper)
-            jac_diff_3 = approx_derivative(self.fun_scalar_vector, x0, workers=mapper)
-            jac_diff_4 = approx_derivative(self.fun_scalar_vector, x0,
-                                           method='cs', workers=mapper)
+        jac_diff_3 = approx_derivative(self.fun_scalar_vector, x0, workers=map)
+        jac_diff_4 = approx_derivative(self.fun_scalar_vector, x0,
+                                       method='cs', workers=None)
         jac_true = self.jac_scalar_vector(np.atleast_1d(x0))
         assert_allclose(jac_diff_2, jac_true, rtol=1e-6)
         assert_allclose(jac_diff_3, jac_true, rtol=1e-9)
         assert_allclose(jac_diff_4, jac_true, rtol=1e-12)
 
-    def test_nfev(self):
+    def test_workers(self):
         # check that nfev consumed by approx_derivative is tracked properly
+        # and that parallel evaluation is same as series
         x0 = [0.5, 1.5, 2.0]
         with MapWrapper(2) as mapper:
-            _, mdct2 = approx_derivative(rosen, x0,
+            md2, mdct2 = approx_derivative(rosen, x0,
                                          method='2-point', workers=mapper,
                                          full_output=True)
-            _, mdct3 = approx_derivative(rosen, x0,
+            md3, mdct3 = approx_derivative(rosen, x0,
                                          workers=mapper, full_output=True)
-            _, mdct4 = approx_derivative(rosen, x0,
+            md4, mdct4 = approx_derivative(rosen, x0,
                                          method='cs', workers=mapper,
                                          full_output=True)
 
         sfr = _ScalarFunctionWrapper(rosen)
-        _, dct2 = approx_derivative(sfr, x0, method='2-point', full_output=True)
+        d2, dct2 = approx_derivative(sfr, x0, method='2-point', full_output=True)
         assert_equal(dct2['nfev'], sfr.nfev)
         sfr.nfev = 0
-        _, dct3 = approx_derivative(sfr, x0, full_output=True)
+        d3, dct3 = approx_derivative(sfr, x0, full_output=True)
         assert_equal(dct3['nfev'], sfr.nfev)
         sfr.nfev = 0
-        _, dct4 = approx_derivative(sfr, x0, method='cs', full_output=True)
+        d4, dct4 = approx_derivative(sfr, x0, method='cs', full_output=True)
         assert_equal(dct4['nfev'], sfr.nfev)
 
         assert_equal(mdct2['nfev'], dct2['nfev'])
         assert_equal(mdct3['nfev'], dct3['nfev'])
         assert_equal(mdct4['nfev'], dct4['nfev'])
+        # also check that gradients are equivalent
+        assert_equal(md2, d2)
+        assert_equal(md3, d3)
+        assert_equal(md4, d4)
 
     def test_vector_scalar(self):
         x0 = np.array([100.0, -0.5])
@@ -610,7 +615,7 @@ def test_all(self):
         with MapWrapper(2) as mapper:
             for method, groups, l, u, mf in product(
                     ['2-point', '3-point', 'cs'], [groups_1, groups_2],
-                    [-np.inf, self.lb], [np.inf, self.ub], [2, map, mapper]):
+                    [-np.inf, self.lb], [np.inf, self.ub], [map, mapper]):
                 J = approx_derivative(self.fun, self.x0, method=method,
                                       bounds=(l, u), sparsity=(A, groups),
                                       workers=mf)

From 8caf388069a394ee7c3fb9e1aa20e8d7a55f1c95 Mon Sep 17 00:00:00 2001
From: Andrew Nelson <andyfaff@gmail.com>
Date: Wed, 25 Dec 2024 15:10:19 +1100
Subject: [PATCH 21/63] TST equal bounds

---
 scipy/optimize/_differentiable_functions.py | 14 +++----
 scipy/optimize/_lbfgsb_py.py                | 10 ++---
 scipy/optimize/_minimize.py                 | 42 ++++++++++++---------
 scipy/optimize/_numdiff.py                  | 11 ++++--
 scipy/optimize/tests/test__numdiff.py       |  8 ++--
 scipy/optimize/tests/test_optimize.py       | 29 ++++++++++++++
 6 files changed, 74 insertions(+), 40 deletions(-)

diff --git a/scipy/optimize/_differentiable_functions.py b/scipy/optimize/_differentiable_functions.py
index 1d61c33fb7d3..3a052637ab67 100644
--- a/scipy/optimize/_differentiable_functions.py
+++ b/scipy/optimize/_differentiable_functions.py
@@ -179,14 +179,12 @@ class ScalarFunction:
         For ``method='3-point'`` the sign of `epsilon` is ignored. By default
         relative steps are used, only if ``epsilon is not None`` are absolute
         steps used.
-    workers : int or map-like callable, optional
-        If `workers` is an int any numerical differentiation task is subdivided
-        into `workers` sections and the fun evaluated in parallel
-        (uses `multiprocessing.Pool <multiprocessing>`).
-        Supply -1 to use all available CPU cores.
-        Alternatively supply a map-like callable, such as
-        `multiprocessing.Pool.map` for evaluating the population in parallel.
-        This evaluation is carried out as ``workers(fun, iterable)``.
+    workers : map-like callable, optional
+        A map-like callable, such as `multiprocessing.Pool.map` for evaluating
+        any numerical differentiation in parallel.
+        This evaluation is carried out as ``workers(fun, iterable)``, or
+        ``workers(grad, iterable)``, depending on what is being numerically
+        differentiated.
 
     Notes
     -----
diff --git a/scipy/optimize/_lbfgsb_py.py b/scipy/optimize/_lbfgsb_py.py
index c8a6fe8bb843..c90bc609270f 100644
--- a/scipy/optimize/_lbfgsb_py.py
+++ b/scipy/optimize/_lbfgsb_py.py
@@ -345,13 +345,9 @@ def _minimize_lbfgsb(fun, x0, args=(), jac=None, bounds=None,
         possibly adjusted to fit into the bounds. For ``method='3-point'``
         the sign of `h` is ignored. If None (default) then step is selected
         automatically.
-    workers : int or map-like callable, optional
-        If `workers` is an int any numerical differentiation task is subdivided
-        into `workers` sections and the fun evaluated in parallel
-        (uses `multiprocessing.Pool <multiprocessing>`).
-        Supply -1 to use all available CPU cores.
-        Alternatively supply a map-like callable, such as
-        `multiprocessing.Pool.map` for evaluating the population in parallel.
+    workers : map-like callable, optional
+        A map-like callable, such as `multiprocessing.Pool.map` for evaluating
+        any numerical differentiation in parallel.
         This evaluation is carried out as ``workers(fun, iterable)``.
 
     Notes
diff --git a/scipy/optimize/_minimize.py b/scipy/optimize/_minimize.py
index 0b47c57cb3a1..d4d66b2ceef9 100644
--- a/scipy/optimize/_minimize.py
+++ b/scipy/optimize/_minimize.py
@@ -701,21 +701,21 @@ def minimize(fun, x0, args=(), method=None, jac=None, hess=None,
                 x_fixed = (bounds.lb)[i_fixed]
                 x0 = x0[~i_fixed]
                 bounds = _remove_from_bounds(bounds, i_fixed)
-                fun = _remove_from_func(fun, i_fixed, x_fixed)
+                fun = _Remove_From_Func(fun, i_fixed, x_fixed)
                 if callable(callback):
-                    callback = _remove_from_func(callback, i_fixed, x_fixed)
+                    callback = _Remove_From_Func(callback, i_fixed, x_fixed)
                 if callable(jac):
-                    jac = _remove_from_func(jac, i_fixed, x_fixed, remove=1)
+                    jac = _Remove_From_Func(jac, i_fixed, x_fixed, remove=1)
 
                 # make a copy of the constraints so the user's version doesn't
                 # get changed. (Shallow copy is ok)
                 constraints = [con.copy() for con in constraints]
                 for con in constraints:  # yes, guaranteed to be a list
-                    con['fun'] = _remove_from_func(con['fun'], i_fixed,
+                    con['fun'] = _Remove_From_Func(con['fun'], i_fixed,
                                                    x_fixed, min_dim=1,
                                                    remove=0)
                     if callable(con.get('jac', None)):
-                        con['jac'] = _remove_from_func(con['jac'], i_fixed,
+                        con['jac'] = _Remove_From_Func(con['jac'], i_fixed,
                                                        x_fixed, min_dim=2,
                                                        remove=1)
         bounds = standardize_bounds(bounds, x0, meth)
@@ -1000,27 +1000,33 @@ def _remove_from_bounds(bounds, i_fixed):
     return Bounds(lb, ub)  # don't mutate original Bounds object
 
 
-def _remove_from_func(fun_in, i_fixed, x_fixed, min_dim=None, remove=0):
+class _Remove_From_Func:
     """Wraps a function such that fixed variables need not be passed in"""
-    def fun_out(x_in, *args, **kwargs):
-        x_out = np.zeros_like(i_fixed, dtype=x_in.dtype)
-        x_out[i_fixed] = x_fixed
-        x_out[~i_fixed] = x_in
-        y_out = fun_in(x_out, *args, **kwargs)
+    def __init__(self, fun_in, i_fixed, x_fixed, min_dim=None, remove=0):
+        self.fun_in = fun_in
+        self.i_fixed = i_fixed
+        self.x_fixed = x_fixed
+        self.min_dim = min_dim
+        self.remove = remove
+
+    def __call__(self, x_in, *args, **kwargs):
+        x_out = np.zeros_like(self.i_fixed, dtype=x_in.dtype)
+        x_out[self.i_fixed] = self.x_fixed
+        x_out[~self.i_fixed] = x_in
+        y_out = self.fun_in(x_out, *args, **kwargs)
         y_out = np.array(y_out)
 
-        if min_dim == 1:
+        if self.min_dim == 1:
             y_out = np.atleast_1d(y_out)
-        elif min_dim == 2:
+        elif self.min_dim == 2:
             y_out = np.atleast_2d(y_out)
 
-        if remove == 1:
-            y_out = y_out[..., ~i_fixed]
-        elif remove == 2:
-            y_out = y_out[~i_fixed, ~i_fixed]
+        if self.remove == 1:
+            y_out = y_out[..., ~self.i_fixed]
+        elif self.remove == 2:
+            y_out = y_out[~self.i_fixed, ~self.i_fixed]
 
         return y_out
-    return fun_out
 
 
 def _add_to_array(x_in, i_fixed, x_fixed):
diff --git a/scipy/optimize/_numdiff.py b/scipy/optimize/_numdiff.py
index add9f6349d0e..4f044f630281 100644
--- a/scipy/optimize/_numdiff.py
+++ b/scipy/optimize/_numdiff.py
@@ -361,13 +361,16 @@ def approx_derivative(fun, x0, method='3-point', rel_step=None, abs_step=None,
         If True then the function also returns a dictionary with extra information
         about the calculation.
     workers : int or map-like callable, optional
-        If `workers` is an int the task is subdivided into `workers`
+        Supply a map-like callable, such as
+        `multiprocessing.Pool.map` for evaluating the population in parallel.
+        This evaluation is carried out as ``workers(fun, iterable)``.
+        Alternatively, if `workers` is an int the task is subdivided into `workers`
         sections and the fun evaluated in parallel
         (uses `multiprocessing.Pool <multiprocessing>`).
         Supply -1 to use all available CPU cores.
-        Alternatively supply a map-like callable, such as
-        `multiprocessing.Pool.map` for evaluating the population in parallel.
-        This evaluation is carried out as ``workers(fun, iterable)``.
+        It is recommended that a map-like be used instead of int, as repeated
+        calls to `approx_derivative` will incur large overhead from setting up
+        new processes.
 
     Returns
     -------
diff --git a/scipy/optimize/tests/test__numdiff.py b/scipy/optimize/tests/test__numdiff.py
index 6a5473143a09..c91d214ca06e 100644
--- a/scipy/optimize/tests/test__numdiff.py
+++ b/scipy/optimize/tests/test__numdiff.py
@@ -290,9 +290,11 @@ def test_workers(self):
                                          full_output=True)
             md3, mdct3 = approx_derivative(rosen, x0,
                                          workers=mapper, full_output=True)
-            md4, mdct4 = approx_derivative(rosen, x0,
-                                         method='cs', workers=mapper,
-                                         full_output=True)
+        # supply a number for workers. This is not normally recommended
+        # for upstream workers as setting up processes incurs a large overhead
+        md4, mdct4 = approx_derivative(rosen, x0,
+                                     method='cs', workers=2,
+                                     full_output=True)
 
         sfr = _ScalarFunctionWrapper(rosen)
         d2, dct2 = approx_derivative(sfr, x0, method='2-point', full_output=True)
diff --git a/scipy/optimize/tests/test_optimize.py b/scipy/optimize/tests/test_optimize.py
index 5277af224861..d827c6525e95 100644
--- a/scipy/optimize/tests/test_optimize.py
+++ b/scipy/optimize/tests/test_optimize.py
@@ -35,6 +35,7 @@
                           csr_array, csc_array)
 from scipy.conftest import array_api_compatible
 from scipy._lib._array_api_no_0d import xp_assert_equal, array_namespace
+from scipy._lib._util import MapWrapper
 
 skip_xp_backends = pytest.mark.skip_xp_backends
 
@@ -3255,3 +3256,31 @@ def sparse_rosen_hess(x):
     assert res_dense.nfev == res_sparse.nfev
     assert res_dense.njev == res_sparse.njev
     assert res_dense.nhev == res_sparse.nhev
+
+
+@pytest.mark.parametrize('workers', [None, 2])
+@pytest.mark.parametrize('method', ['l-bfgs-b'])
+class TestWorkers:
+
+    def setup_method(self):
+        self.x0 = np.array([1.0, 2.0, 3.0])
+
+    def test_smoke(self, workers, method):
+        workers = workers or map
+        with MapWrapper(workers) as mf:
+            optimize.minimize(
+                rosen, self.x0, options={"workers":mf}, method=method
+            )
+
+    def test_equal_bounds(self, workers, method):
+        workers = workers or map
+        if method not in ['l-bfgs-b']:
+            pytest.skip(f"{method} cannot use bounds")
+
+        bounds = Bounds([0, 2.0, 0.], [10., 2.0, 10.])
+        with MapWrapper(workers) as mf:
+            res = optimize.minimize(
+                rosen, self.x0, bounds=bounds, options={"workers": mf}, method=method
+            )
+        assert res.success
+        assert_equal(res.x[1], 2.0)

From 3dd2aea0b54c6a1ec6811494f266e613a534c74d Mon Sep 17 00:00:00 2001
From: Andrew Nelson <andyfaff@gmail.com>
Date: Wed, 25 Dec 2024 15:23:32 +1100
Subject: [PATCH 22/63] DOC: amend docstrings

---
 scipy/optimize/_differentiable_functions.py |  7 +++++++
 scipy/optimize/_optimize.py                 | 17 +++++++++++------
 2 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/scipy/optimize/_differentiable_functions.py b/scipy/optimize/_differentiable_functions.py
index 3a052637ab67..c570feefe2f1 100644
--- a/scipy/optimize/_differentiable_functions.py
+++ b/scipy/optimize/_differentiable_functions.py
@@ -185,6 +185,13 @@ class ScalarFunction:
         This evaluation is carried out as ``workers(fun, iterable)``, or
         ``workers(grad, iterable)``, depending on what is being numerically
         differentiated.
+        Alternatively, if `workers` is an int the task is subdivided into `workers`
+        sections and the function evaluated in parallel
+        (uses `multiprocessing.Pool <multiprocessing>`).
+        Supply -1 to use all available CPU cores.
+        It is recommended that a map-like be used instead of int, as repeated
+        calls to `approx_derivative` will incur large overhead from setting up
+        new processes.
 
     Notes
     -----
diff --git a/scipy/optimize/_optimize.py b/scipy/optimize/_optimize.py
index c3e0d876026c..8157b8257f46 100644
--- a/scipy/optimize/_optimize.py
+++ b/scipy/optimize/_optimize.py
@@ -255,14 +255,19 @@ def _prepare_scalar_function(fun, x0, jac=None, args=(), bounds=None,
         Whenever the gradient is estimated via finite-differences, the Hessian
         cannot be estimated with options {'2-point', '3-point', 'cs'} and needs
         to be estimated using one of the quasi-Newton strategies.
-    workers : int or map-like callable, optional
-        If `workers` is an int any numerical differentiation task is subdivided
-        into `workers` sections and the fun evaluated in parallel
+   workers : int or map-like callable, optional
+        A map-like callable, such as `multiprocessing.Pool.map` for evaluating
+        any numerical differentiation in parallel.
+        This evaluation is carried out as ``workers(fun, iterable)``, or
+        ``workers(grad, iterable)``, depending on what is being numerically
+        differentiated.
+        Alternatively, if `workers` is an int the task is subdivided into `workers`
+        sections and the function evaluated in parallel
         (uses `multiprocessing.Pool <multiprocessing>`).
         Supply -1 to use all available CPU cores.
-        Alternatively supply a map-like callable, such as
-        `multiprocessing.Pool.map` for evaluating the population in parallel.
-        This evaluation is carried out as ``workers(fun, iterable)``.
+        It is recommended that a map-like be used instead of int, as repeated
+        calls to `approx_derivative` will incur large overhead from setting up
+        new processes.
 
     Returns
     -------

From 5246a7f47d05e9bbd334cca60b989aa2cc3d4757 Mon Sep 17 00:00:00 2001
From: Andrew Nelson <andyfaff@gmail.com>
Date: Wed, 25 Dec 2024 18:14:34 +1100
Subject: [PATCH 23/63] TST: mark workers test as slow

---
 scipy/optimize/tests/test__numdiff.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scipy/optimize/tests/test__numdiff.py b/scipy/optimize/tests/test__numdiff.py
index c91d214ca06e..b561a68c5c8d 100644
--- a/scipy/optimize/tests/test__numdiff.py
+++ b/scipy/optimize/tests/test__numdiff.py
@@ -280,6 +280,7 @@ def test_scalar_vector(self):
         assert_allclose(jac_diff_3, jac_true, rtol=1e-9)
         assert_allclose(jac_diff_4, jac_true, rtol=1e-12)
 
+    @pytest.mark.fail_slow(3)
     def test_workers(self):
         # check that nfev consumed by approx_derivative is tracked properly
         # and that parallel evaluation is same as series

From cd49c26528fe7943c43e0af44cb5f925efe7eca7 Mon Sep 17 00:00:00 2001
From: Andrew Nelson <andyfaff@gmail.com>
Date: Wed, 25 Dec 2024 08:56:47 +0000
Subject: [PATCH 24/63] TST fix import

---
 scipy/optimize/tests/test__numdiff.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scipy/optimize/tests/test__numdiff.py b/scipy/optimize/tests/test__numdiff.py
index b561a68c5c8d..3b627bfa7d80 100644
--- a/scipy/optimize/tests/test__numdiff.py
+++ b/scipy/optimize/tests/test__numdiff.py
@@ -3,6 +3,7 @@
 
 import numpy as np
 from numpy.testing import assert_allclose, assert_equal, assert_
+import pytest
 from pytest import raises as assert_raises
 
 from scipy._lib._util import MapWrapper, _ScalarFunctionWrapper

From fd88228546c62718aa241fe5174119e39de4b82c Mon Sep 17 00:00:00 2001
From: Andrew Nelson <andyfaff@gmail.com>
Date: Wed, 25 Dec 2024 21:48:29 +1100
Subject: [PATCH 25/63] MAINT: remove print, lengthen time

---
 scipy/optimize/_numdiff.py            | 1 -
 scipy/optimize/tests/test__numdiff.py | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/scipy/optimize/_numdiff.py b/scipy/optimize/_numdiff.py
index 4f044f630281..f9e7ccd63361 100644
--- a/scipy/optimize/_numdiff.py
+++ b/scipy/optimize/_numdiff.py
@@ -598,7 +598,6 @@ def approx_derivative(fun, x0, method='3-point', rel_step=None, abs_step=None,
                     structure = structure.tocsc()
                 else:
                     structure = np.atleast_2d(structure)
-                print(groups)
                 groups = np.atleast_1d(groups)
                 J, _nfev = _sparse_difference(fun_wrapped, x0, f0, h,
                                              use_one_sided, structure,
diff --git a/scipy/optimize/tests/test__numdiff.py b/scipy/optimize/tests/test__numdiff.py
index 3b627bfa7d80..dd5fbbe7d94f 100644
--- a/scipy/optimize/tests/test__numdiff.py
+++ b/scipy/optimize/tests/test__numdiff.py
@@ -281,7 +281,7 @@ def test_scalar_vector(self):
         assert_allclose(jac_diff_3, jac_true, rtol=1e-9)
         assert_allclose(jac_diff_4, jac_true, rtol=1e-12)
 
-    @pytest.mark.fail_slow(3)
+    @pytest.mark.fail_slow(10.0)
     def test_workers(self):
         # check that nfev consumed by approx_derivative is tracked properly
         # and that parallel evaluation is same as series

From d374d4fb6d51ad6abbc9f9f1b32adb4c431652c3 Mon Sep 17 00:00:00 2001
From: Andrew Nelson <andyfaff@gmail.com>
Date: Wed, 25 Dec 2024 22:50:29 +1100
Subject: [PATCH 26/63] MAINT: lengthen time for correct test this time

---
 scipy/optimize/tests/test__numdiff.py         |  2 +-
 .../tests/test_differentiable_functions.py    | 90 +++++++++----------
 2 files changed, 46 insertions(+), 46 deletions(-)

diff --git a/scipy/optimize/tests/test__numdiff.py b/scipy/optimize/tests/test__numdiff.py
index dd5fbbe7d94f..6242aa703638 100644
--- a/scipy/optimize/tests/test__numdiff.py
+++ b/scipy/optimize/tests/test__numdiff.py
@@ -281,7 +281,7 @@ def test_scalar_vector(self):
         assert_allclose(jac_diff_3, jac_true, rtol=1e-9)
         assert_allclose(jac_diff_4, jac_true, rtol=1e-12)
 
-    @pytest.mark.fail_slow(10.0)
+    @pytest.mark.fail_slow(5.0)
     def test_workers(self):
         # check that nfev consumed by approx_derivative is tracked properly
         # and that parallel evaluation is same as series
diff --git a/scipy/optimize/tests/test_differentiable_functions.py b/scipy/optimize/tests/test_differentiable_functions.py
index 48bec70cc9eb..96bcd5f530f3 100644
--- a/scipy/optimize/tests/test_differentiable_functions.py
+++ b/scipy/optimize/tests/test_differentiable_functions.py
@@ -131,6 +131,7 @@ def test_finite_difference_grad(self):
         assert_array_almost_equal(f_analit, f_approx)
         assert_array_almost_equal(g_analit, g_approx)
 
+    @pytest.mark.fail_slow(5.0)
     def test_workers(self):
         x0 = np.array([2.0, 0.3])
         ex = ExScalarFunction()
@@ -139,58 +140,57 @@ def test_workers(self):
             approx = ScalarFunction(ex.fun, x0, (), '2-point',
                                     ex.hess, None, (-np.inf, np.inf),
                                     workers=mapper)
-        approx_series = ScalarFunction(ex2.fun, x0, (), '2-point',
-                                       ex2.hess, None, (-np.inf, np.inf),
-                                       )
-        assert_allclose(approx.grad(x0), ex.grad(x0))
-        assert_allclose(approx_series.grad(x0), ex.grad(x0))
-        assert_allclose(approx_series.hess(x0), ex.hess(x0))
-        assert_allclose(approx.hess(x0), ex.hess(x0))
-        assert_equal(approx.nfev, approx_series.nfev)
-        assert_equal(approx_series.nfev, ex2.nfev)
-        assert_equal(approx.ngev, approx_series.ngev)
-        assert_equal(approx.nhev, approx_series.nhev)
-        assert_equal(approx_series.nhev, ex2.nhev)
-
-        ex = ExScalarFunction()
-        ex2 = ExScalarFunction()
-        with MapWrapper(2) as mapper:
+            approx_series = ScalarFunction(ex2.fun, x0, (), '2-point',
+                                           ex2.hess, None, (-np.inf, np.inf),
+                                           )
+            assert_allclose(approx.grad(x0), ex.grad(x0))
+            assert_allclose(approx_series.grad(x0), ex.grad(x0))
+            assert_allclose(approx_series.hess(x0), ex.hess(x0))
+            assert_allclose(approx.hess(x0), ex.hess(x0))
+            assert_equal(approx.nfev, approx_series.nfev)
+            assert_equal(approx_series.nfev, ex2.nfev)
+            assert_equal(approx.ngev, approx_series.ngev)
+            assert_equal(approx.nhev, approx_series.nhev)
+            assert_equal(approx_series.nhev, ex2.nhev)
+
+            ex = ExScalarFunction()
+            ex2 = ExScalarFunction()
             approx = ScalarFunction(ex.fun, x0, (), '3-point',
                                     ex.hess, None, (-np.inf, np.inf),
                                     workers=mapper)
-        approx_series = ScalarFunction(ex2.fun, x0, (), '3-point',
-                                       ex2.hess, None, (-np.inf, np.inf),
-                                      )
-        assert_allclose(approx.grad(x0), ex.grad(x0))
-        assert_allclose(approx_series.grad(x0), ex.grad(x0))
-        assert_allclose(approx_series.hess(x0), ex.hess(x0))
-        assert_allclose(approx.hess(x0), ex.hess(x0))
-        assert_equal(approx.nfev, approx_series.nfev)
-        assert_equal(approx_series.nfev, ex2.nfev)
-        assert_equal(approx.ngev, approx_series.ngev)
-        assert_equal(approx.nhev, approx_series.nhev)
-        assert_equal(approx_series.nhev, ex2.nhev)
+            approx_series = ScalarFunction(ex2.fun, x0, (), '3-point',
+                                           ex2.hess, None, (-np.inf, np.inf),
+                                          )
+            assert_allclose(approx.grad(x0), ex.grad(x0))
+            assert_allclose(approx_series.grad(x0), ex.grad(x0))
+            assert_allclose(approx_series.hess(x0), ex.hess(x0))
+            assert_allclose(approx.hess(x0), ex.hess(x0))
+            assert_equal(approx.nfev, approx_series.nfev)
+            assert_equal(approx_series.nfev, ex2.nfev)
+            assert_equal(approx.ngev, approx_series.ngev)
+            assert_equal(approx.nhev, approx_series.nhev)
+            assert_equal(approx_series.nhev, ex2.nhev)
+
+            ex = ExScalarFunction()
+            ex2 = ExScalarFunction()
+            x1 = np.array([3.0, 4.0])
 
-        ex = ExScalarFunction()
-        ex2 = ExScalarFunction()
-        x1 = np.array([3.0, 4.0])
-        with MapWrapper(2) as mapper:
             approx = ScalarFunction(ex.fun, x0, (), ex.grad,
                                     '3-point', None, (-np.inf, np.inf),
                                     workers=mapper)
-        approx_series = ScalarFunction(ex2.fun, x0, (), ex2.grad,
-                                       '3-point', None, (-np.inf, np.inf),
-                                       )
-        assert_allclose(approx.grad(x1), ex.grad(x1))
-        assert_allclose(approx_series.grad(x1), ex.grad(x1))
-        approx_series.hess(x1)
-        approx.hess(x1)
-        assert_equal(approx.nfev, approx_series.nfev)
-        assert_equal(approx_series.nfev, ex2.nfev)
-        assert_equal(approx.ngev, approx_series.ngev)
-        assert_equal(approx_series.ngev, ex2.ngev)
-        assert_equal(approx.nhev, approx_series.nhev)
-        assert_equal(approx_series.nhev, ex2.nhev)
+            approx_series = ScalarFunction(ex2.fun, x0, (), ex2.grad,
+                                           '3-point', None, (-np.inf, np.inf),
+                                           )
+            assert_allclose(approx.grad(x1), ex.grad(x1))
+            assert_allclose(approx_series.grad(x1), ex.grad(x1))
+            approx_series.hess(x1)
+            approx.hess(x1)
+            assert_equal(approx.nfev, approx_series.nfev)
+            assert_equal(approx_series.nfev, ex2.nfev)
+            assert_equal(approx.ngev, approx_series.ngev)
+            assert_equal(approx_series.ngev, ex2.ngev)
+            assert_equal(approx.nhev, approx_series.nhev)
+            assert_equal(approx_series.nhev, ex2.nhev)
 
     def test_fun_and_grad(self):
         ex = ExScalarFunction()

From 81a63d30bf0972aa4918aa560d969d1cece961c5 Mon Sep 17 00:00:00 2001
From: Andrew Nelson <andyfaff@gmail.com>
Date: Tue, 14 Jan 2025 02:32:19 +1100
Subject: [PATCH 27/63] TST: make test name more explicit

---
 scipy/optimize/tests/test__numdiff.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/scipy/optimize/tests/test__numdiff.py b/scipy/optimize/tests/test__numdiff.py
index 6242aa703638..0ac1898a81fd 100644
--- a/scipy/optimize/tests/test__numdiff.py
+++ b/scipy/optimize/tests/test__numdiff.py
@@ -282,21 +282,21 @@ def test_scalar_vector(self):
         assert_allclose(jac_diff_4, jac_true, rtol=1e-12)
 
     @pytest.mark.fail_slow(5.0)
-    def test_workers(self):
+    def test_workers_evaluations_and_nfev(self):
         # check that nfev consumed by approx_derivative is tracked properly
         # and that parallel evaluation is same as series
         x0 = [0.5, 1.5, 2.0]
         with MapWrapper(2) as mapper:
             md2, mdct2 = approx_derivative(rosen, x0,
-                                         method='2-point', workers=mapper,
-                                         full_output=True)
+                                           method='2-point', workers=mapper,
+                                           full_output=True)
             md3, mdct3 = approx_derivative(rosen, x0,
-                                         workers=mapper, full_output=True)
+                                           workers=mapper, full_output=True)
         # supply a number for workers. This is not normally recommended
         # for upstream workers as setting up processes incurs a large overhead
         md4, mdct4 = approx_derivative(rosen, x0,
-                                     method='cs', workers=2,
-                                     full_output=True)
+                                       method='cs', workers=2,
+                                       full_output=True)
 
         sfr = _ScalarFunctionWrapper(rosen)
         d2, dct2 = approx_derivative(sfr, x0, method='2-point', full_output=True)

From ce60d5afab3bfab175fda343122f170a10bdf1be Mon Sep 17 00:00:00 2001
From: Andrew Nelson <andyfaff@gmail.com>
Date: Tue, 14 Jan 2025 02:50:24 +1100
Subject: [PATCH 28/63] TST: compare parallel to serial minimize

---
 scipy/optimize/tests/test_optimize.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/scipy/optimize/tests/test_optimize.py b/scipy/optimize/tests/test_optimize.py
index d827c6525e95..bcc3c4bf92ef 100644
--- a/scipy/optimize/tests/test_optimize.py
+++ b/scipy/optimize/tests/test_optimize.py
@@ -3266,11 +3266,17 @@ def setup_method(self):
         self.x0 = np.array([1.0, 2.0, 3.0])
 
     def test_smoke(self, workers, method):
+        # checks parallelised optimization output is same as serial
         workers = workers or map
         with MapWrapper(workers) as mf:
-            optimize.minimize(
+            res = optimize.minimize(
                 rosen, self.x0, options={"workers":mf}, method=method
             )
+        res_default = optimize.minimize(
+            rosen, self.x0, method=method
+        )
+        assert_equal(res.x, res_default.x)
+        assert_equal(res.nfev, res_default.nfev)
 
     def test_equal_bounds(self, workers, method):
         workers = workers or map

From 5ab8d91fb03f4051f73d9c8f8d3fed469acb014e Mon Sep 17 00:00:00 2001
From: Andrew Nelson <andyfaff@gmail.com>
Date: Thu, 16 Jan 2025 06:57:57 +1100
Subject: [PATCH 29/63] MAINT: address review comments

---
 scipy/_lib/_util.py                         | 3 +--
 scipy/optimize/_differentiable_functions.py | 3 +--
 scipy/optimize/_numdiff.py                  | 9 +++++++--
 scipy/optimize/tests/test__numdiff.py       | 1 +
 4 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/scipy/_lib/_util.py b/scipy/_lib/_util.py
index 438f84a96c79..e88a5ecaefef 100644
--- a/scipy/_lib/_util.py
+++ b/scipy/_lib/_util.py
@@ -670,8 +670,7 @@ def __init__(self, f, args=None):
 
     def __call__(self, x):
         # Send a copy because the user may overwrite it.
-        # Overwriting results in undefined behaviour because
-        # fun(self.x) will change self.x, with the two no longer linked.
+        # The user of this class might want `x` to remain unchanged.
         fx = self.f(np.copy(x), *self.args)
         self.nfev += 1
 
diff --git a/scipy/optimize/_differentiable_functions.py b/scipy/optimize/_differentiable_functions.py
index c570feefe2f1..17c0e7257cb1 100644
--- a/scipy/optimize/_differentiable_functions.py
+++ b/scipy/optimize/_differentiable_functions.py
@@ -33,8 +33,7 @@ def __init__(
 
     def __call__(self, x, f0=None, **kwds):
         # Send a copy because the user may overwrite it.
-        # Overwriting results in undefined behaviour because
-        # fun(self.x) will change self.x, with the two no longer linked.
+        # The user of this class might want `x` to remain unchanged.
         if callable(self.grad):
             g = np.atleast_1d(self.grad(np.copy(x), *self.args))
         elif self.grad in FD_METHODS:
diff --git a/scipy/optimize/_numdiff.py b/scipy/optimize/_numdiff.py
index f9e7ccd63361..fe28b9a83800 100644
--- a/scipy/optimize/_numdiff.py
+++ b/scipy/optimize/_numdiff.py
@@ -486,8 +486,11 @@ def approx_derivative(fun, x0, method='3-point', rel_step=None, abs_step=None,
     >>> np.mean(elapsed)    # may vary
     np.float64(1.442545195999901)
 
-    Create a map-like vectorised version. Note that the first argument to
-    approx_derivative is effectively ignored in the way I've written it.
+    Create a map-like vectorized version. `x` is a generator, so first of all
+    a 2-D array, `xx`, is reconstituted. Here `xx` has shape `(Y, N)` where `Y`
+    is the number of function evaluations to perform and `N` is the dimensionality
+    of the objective function. The underlying objective function is `rosen`, which
+    requires `xx` to have shape `(N, Y)`, so a transpose is required.
 
     >>> def fun(f, x, *args, **kwds):
     ...     xx = np.r_[[xs for xs in x]]
@@ -674,6 +677,8 @@ def x_generator2(x0, h):
                 x1[i] = x0[i] + h[i]
                 yield x1
 
+        # only f_evals (numerator) needs parallelization, the denominator
+        # (the step size) is fast to calculate.
         f_evals = workers(fun, x_generator2(x0, h))
         dx = [(x0[i] + h[i]) - x0[i] for i in range(n)]
         df = [f_eval - f0 for f_eval in f_evals]
diff --git a/scipy/optimize/tests/test__numdiff.py b/scipy/optimize/tests/test__numdiff.py
index 0ac1898a81fd..0439aebfc61c 100644
--- a/scipy/optimize/tests/test__numdiff.py
+++ b/scipy/optimize/tests/test__numdiff.py
@@ -609,6 +609,7 @@ def structure(self, n):
 
         return A
 
+    @pytest.mark.fail_slow(2.0)
     def test_all(self):
         A = self.structure(self.n)
         order = np.arange(self.n)

From 800c663d57276b711efcfd4754de068bab069ef2 Mon Sep 17 00:00:00 2001
From: Andrew Nelson <andyfaff@gmail.com>
Date: Thu, 16 Jan 2025 08:34:33 +1100
Subject: [PATCH 30/63] TST: slow test

---
 scipy/optimize/tests/test__numdiff.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scipy/optimize/tests/test__numdiff.py b/scipy/optimize/tests/test__numdiff.py
index 0439aebfc61c..c1079f008728 100644
--- a/scipy/optimize/tests/test__numdiff.py
+++ b/scipy/optimize/tests/test__numdiff.py
@@ -609,7 +609,7 @@ def structure(self, n):
 
         return A
 
-    @pytest.mark.fail_slow(2.0)
+    @pytest.mark.fail_slow(5)
     def test_all(self):
         A = self.structure(self.n)
         order = np.arange(self.n)

From afd730e41fd8eccbd3210ce721d958596aed2880 Mon Sep 17 00:00:00 2001
From: "Tomer.Sery" <tomer.sery@nextsilicon.com>
Date: Tue, 31 Dec 2024 16:16:44 +0000
Subject: [PATCH 31/63] BENCH:Yen: Introduce `yen` algorithm benchmark

Towards introducing algorithm improvements, add a benchmark for yen.
---
 benchmarks/benchmarks/sparse_csgraph_yen.py | 38 +++++++++++++++++++++
 1 file changed, 38 insertions(+)
 create mode 100644 benchmarks/benchmarks/sparse_csgraph_yen.py

diff --git a/benchmarks/benchmarks/sparse_csgraph_yen.py b/benchmarks/benchmarks/sparse_csgraph_yen.py
new file mode 100644
index 000000000000..6adad05901d0
--- /dev/null
+++ b/benchmarks/benchmarks/sparse_csgraph_yen.py
@@ -0,0 +1,38 @@
+"""benchmarks for the scipy.sparse.csgraph module"""
+import numpy as np
+import scipy.sparse
+
+from .common import Benchmark, safe_import
+
+with safe_import():
+    from scipy.sparse.csgraph import yen
+
+
+class Yen(Benchmark):
+    params = [
+        [30, 300, 3000],
+        [10, 100, 300],
+    ]
+    param_names = ['n', 'K']
+
+    def setup(self, n, K):
+        # make a random connectivity matrix
+        data = scipy.sparse.rand(
+            n, n, density=0.4, format='lil', random_state=42, dtype=np.bool_
+        )
+        data.setdiag(np.zeros(n, dtype=np.bool_))
+        self.data = data
+        self.source = np.random.randint(n)
+        sink = np.random.randint(n)
+        while self.source == sink:
+            sink = np.random.randint(n)
+        self.sink = sink
+
+    def time_yen(self, n, K):
+        yen(
+            csgraph=self.data,
+            source=self.source,
+            sink=self.sink,
+            K=K,
+            directed=False,
+        )

From 5cfda31f2d76b915e0cd24087e160cec949f85a5 Mon Sep 17 00:00:00 2001
From: "Tomer.Sery" <tomer.sery@nextsilicon.com>
Date: Tue, 31 Dec 2024 11:32:12 +0000
Subject: [PATCH 32/63] ENH:Yen: Store found paths in a sorted vector.

Instead of the current storing of found paths paths in two arrays,
use a cdef class to handle paths management for insertion
and popping.
Use a sorted vector for the following reasons:
1. Fast access to shortest path and longest path.
2. Fast popping from both ends of the vector.
3. Insertion in the middle of the sorted vector is not catastrophic
   in terms of performance
---
 scipy/sparse/csgraph/_shortest_path.pyx | 203 +++++++++++++-----------
 1 file changed, 110 insertions(+), 93 deletions(-)

diff --git a/scipy/sparse/csgraph/_shortest_path.pyx b/scipy/sparse/csgraph/_shortest_path.pyx
index c6fdd4aad0e3..2d170a1eb554 100644
--- a/scipy/sparse/csgraph/_shortest_path.pyx
+++ b/scipy/sparse/csgraph/_shortest_path.pyx
@@ -26,8 +26,10 @@ cimport cython
 
 from libc.math cimport INFINITY
 
+from libcpp.algorithm cimport lower_bound
 from libcpp.queue cimport priority_queue
 from libcpp.pair cimport pair
+from libcpp.vector cimport vector
 
 np.import_array()
 
@@ -1561,6 +1563,100 @@ def yen(
     return dist_array[:num_paths_found].reshape((num_paths_found,))
 
 
+ctypedef vector[int] yen_path_t
+ctypedef pair[double, yen_path_t] distance_path_pair_t
+
+
+cdef class _YenCandidatePaths:
+    cdef:
+        vector[distance_path_pair_t] _distances_and_paths
+        int _required_paths
+
+    def __init__(self, K: int):
+        self._distances_and_paths = vector[distance_path_pair_t]()
+        self._required_paths = K
+
+    @cython.boundscheck(False)
+    cdef void insert_path(
+        self,
+        const double distance,
+        const int[:] source_to_spur_path,
+        const int[:] spur_to_sink_path,
+        const int spur_node,
+        const int sink,
+    ):
+        cdef:
+            yen_path_t path_to_insert
+            int idx = sink
+            distance_path_pair_t new_element, tmp_element
+            vector[distance_path_pair_t].iterator it
+
+        if self._distances_and_paths.size() >= self._required_paths and distance >= self.max_distance():
+            # The new path is longer than the longest path in the vector - return
+            return
+
+        # Store the path in reverse order, from sink to source
+        # path_to_insert[0] = sink -> vec[1] -> ... -> path_to_insert[N-1] = source
+        while idx != spur_node:
+            path_to_insert.push_back(idx)
+            idx = spur_to_sink_path[idx]
+        while idx != NULL_IDX:
+            path_to_insert.push_back(idx)
+            idx = source_to_spur_path[idx]
+
+        new_element = distance_path_pair_t(distance, path_to_insert)
+        it = lower_bound(self._distances_and_paths.begin(), self._distances_and_paths.end(), new_element)
+        # Check if the path exists already
+        while it != self._distances_and_paths.end():
+            tmp_element = cython.operator.dereference(it)
+            if tmp_element.first != distance:
+                break
+            if tmp_element.second == path_to_insert:
+                # Path already exists - return
+                return
+            it += 1
+
+        self._distances_and_paths.insert(it, new_element)
+
+        # Reduce the number of paths to amount required
+        while self._distances_and_paths.size() > self._required_paths:
+            self._distances_and_paths.pop_back()
+
+    cdef double min_distance(self):
+        if self.empty():
+            return INFINITY
+        return self._distances_and_paths[0].first
+
+    cdef double max_distance(self):
+        if self.empty():
+            return -INFINITY
+        return self._distances_and_paths[-1].first
+
+    @cython.boundscheck(False)
+    cdef void pop_path_to_memory_view(
+        self,
+        int[:] target,
+    ):
+        cdef:
+            yen_path_t shortest_path
+            int idx
+
+        if self.empty():
+            raise RuntimeError("No paths to pop")
+
+        shortest_path = self._distances_and_paths[0].second
+        self._distances_and_paths.erase(self._distances_and_paths.begin())
+
+        # Restore the path in the correct order
+        for idx in range(shortest_path.size() - 1):
+            target[shortest_path[idx]] = shortest_path[idx + 1]
+
+        self._required_paths -= 1
+
+    cdef bint empty(self):
+        return self._distances_and_paths.empty()
+
+
 @cython.boundscheck(False)
 cdef void _yen(
     const int source,
@@ -1580,6 +1676,8 @@ cdef void _yen(
         int[:] predecessor_matrix = np.full((N), NULL_IDX, dtype=ITYPE)
         double[:] dist_matrix = np.full((N), np.inf, dtype=DTYPE)
         int[:] dummy_source_matrix = np.empty((0), dtype=ITYPE) # unused
+        _YenCandidatePaths candidate_paths = _YenCandidatePaths(K)
+
     dist_matrix[source] = 0
 
     # ---------------------------------------------------
@@ -1598,11 +1696,6 @@ cdef void _yen(
         return
 
     cdef:
-        # initialize candidate arrays
-        # for index 'i', candidate_distances[i] stores the distance
-        # of the path stored in candidate_predecessors[i. :]
-        double[:] candidate_distances = np.full(K, INFINITY, dtype=DTYPE)
-        int[:, :] candidate_predecessors = np.full((K, N), NULL_IDX, dtype=ITYPE)
         # Store the original graph weights for restoring the graph
         double[:] csr_weights = original_weights.copy()
         double[:] csrT_weights
@@ -1725,45 +1818,14 @@ cdef void _yen(
 
             # ---------------------------------------------------
             # Add the found path to arrays of candidates
-            if (
-                total_distance != INFINITY
-                and _yen_is_path_in_candidates(candidate_predecessors,
-                                               shortest_paths_predecessors[k-1],
-                                               predecessor_matrix,
-                                               spur_node, sink) == 0
-            ):
-                # Find the index to insert the new path
-                short_path_idx = tmp_i = NULL_IDX
-                tmp_d = -INFINITY # maximal distance in potential distances array
-                for i in range(candidate_distances.shape[0]):
-                    if candidate_distances[i] == INFINITY:
-                        short_path_idx = i
-                        break
-                    elif candidate_distances[i] > tmp_d:
-                        tmp_d = candidate_distances[i]
-                        tmp_i = i
-                if short_path_idx ==  NULL_IDX and total_distance < tmp_d:
-                    short_path_idx = tmp_i
-
-                if short_path_idx != NULL_IDX:
-                    candidate_distances[short_path_idx] = total_distance
-                    # Reset candidate_predecessors[short_path_idx]
-                    candidate_predecessors[short_path_idx, :] = NULL_IDX
-                    # Fill original path
-                    node = spur_node
-                    while node != NULL_IDX:
-                        candidate_predecessors[short_path_idx, node] = (
-                            shortest_paths_predecessors[k-1, node]
-                        )
-                        node = shortest_paths_predecessors[k-1, node]
-
-                    # Fill spur path
-                    node = sink
-                    while node != spur_node:
-                        candidate_predecessors[short_path_idx, node] = (
-                            predecessor_matrix[node]
-                        )
-                        node = predecessor_matrix[node]
+            if total_distance != INFINITY:
+                candidate_paths.insert_path(
+                    total_distance,
+                    shortest_paths_predecessors[k-1],
+                    predecessor_matrix,
+                    spur_node,
+                    sink,
+                )
 
            # ---------------------------------------------------
             # Restore graph weights
@@ -1781,55 +1843,10 @@ cdef void _yen(
 
         # ---------------------------------------------------
         # Find shortest path in candidates and add to result arrays
-        tmp_d = INFINITY # Minimal distance in potential distances array
-        short_path_idx = NULL_IDX
-        for i in range(candidate_distances.shape[0]):
-            if candidate_distances[i] < tmp_d:
-                tmp_d = candidate_distances[i]
-                short_path_idx = i
-        if short_path_idx == NULL_IDX:
+        total_distance = candidate_paths.min_distance()
+        if total_distance == INFINITY:
             # There are no more paths
             break
         else:
-            shortest_distances[k] = candidate_distances[short_path_idx]
-            # Remove path from candidates and add to shortest_paths_predecessors
-            candidate_distances[short_path_idx] = INFINITY
-            shortest_paths_predecessors[k] = candidate_predecessors[short_path_idx]
-
-
-@cython.boundscheck(False)
-cdef bint _yen_is_path_in_candidates(
-    const int[:, :] candidate_predecessors,
-    const int[:] orig_path, const int[:] spur_path,
-    const int spur_node, const int sink
-):
-    """
-    Return 1 if the path, formed by merging orig_path and spur_path,
-    exists in candidate_predecessors. If it doesn't, return 0.
-    """
-    cdef int i
-    cdef int node
-    cdef bint break_flag = 0
-    for i in range(candidate_predecessors.shape[0]):
-        node = sink
-        break_flag = 0
-        while node != spur_node:
-            # Check path moving backwards from sink to spur node
-            if candidate_predecessors[i, node] != spur_path[node]:
-                break_flag = 1
-                break
-            node = candidate_predecessors[i, node]
-        if break_flag:
-            # No match
-            continue
-        while node != NULL_IDX:
-            # Check path from spur node to source
-            if candidate_predecessors[i, node] != orig_path[node]:
-                # No match
-                break_flag = 1
-                break
-            node = candidate_predecessors[i, node]
-        if break_flag == 0:
-            # Paths are equal
-            return 1
-    return 0
+            shortest_distances[k] = total_distance
+            candidate_paths.pop_path_to_memory_view(shortest_paths_predecessors[k])

From 7c3628d4ea45698cf4e554b631f553152850ca32 Mon Sep 17 00:00:00 2001
From: "Tomer.Sery" <tomer.sery@nextsilicon.com>
Date: Mon, 6 Jan 2025 08:12:18 +0000
Subject: [PATCH 33/63] ENH:Yen: Introduce Lawler's modification.

Although not affecting complexity, Lawler's modification has
signifcant effect on run-time as it reduces the amount of calls to
Dijkstra algorithm.

As a nice side bonus, there is no need to worry about hitting the
same path multiple times and this check can be removed.
Closes gh-20366
---
 scipy/sparse/csgraph/_shortest_path.pyx | 59 ++++++++++++++-----------
 1 file changed, 33 insertions(+), 26 deletions(-)

diff --git a/scipy/sparse/csgraph/_shortest_path.pyx b/scipy/sparse/csgraph/_shortest_path.pyx
index 2d170a1eb554..ef9d6af4f215 100644
--- a/scipy/sparse/csgraph/_shortest_path.pyx
+++ b/scipy/sparse/csgraph/_shortest_path.pyx
@@ -1564,16 +1564,24 @@ def yen(
 
 
 ctypedef vector[int] yen_path_t
-ctypedef pair[double, yen_path_t] distance_path_pair_t
+
+cdef struct YenDistancePathStruct:
+    double distance
+    yen_path_t path
+    int spur_node
+
+
+cdef inline bint _yen_compare_distance(YenDistancePathStruct a, YenDistancePathStruct b):
+    return a.distance < b.distance
 
 
 cdef class _YenCandidatePaths:
     cdef:
-        vector[distance_path_pair_t] _distances_and_paths
-        int _required_paths
+        vector[YenDistancePathStruct] _distances_and_paths
+        size_t _required_paths
 
-    def __init__(self, K: int):
-        self._distances_and_paths = vector[distance_path_pair_t]()
+    def __cinit__(self, K: int):
+        self._distances_and_paths = vector[YenDistancePathStruct]()
         self._required_paths = K
 
     @cython.boundscheck(False)
@@ -1588,8 +1596,8 @@ cdef class _YenCandidatePaths:
         cdef:
             yen_path_t path_to_insert
             int idx = sink
-            distance_path_pair_t new_element, tmp_element
-            vector[distance_path_pair_t].iterator it
+            YenDistancePathStruct new_element
+            vector[YenDistancePathStruct].iterator it
 
         if self._distances_and_paths.size() >= self._required_paths and distance >= self.max_distance():
             # The new path is longer than the longest path in the vector - return
@@ -1604,18 +1612,11 @@ cdef class _YenCandidatePaths:
             path_to_insert.push_back(idx)
             idx = source_to_spur_path[idx]
 
-        new_element = distance_path_pair_t(distance, path_to_insert)
-        it = lower_bound(self._distances_and_paths.begin(), self._distances_and_paths.end(), new_element)
-        # Check if the path exists already
-        while it != self._distances_and_paths.end():
-            tmp_element = cython.operator.dereference(it)
-            if tmp_element.first != distance:
-                break
-            if tmp_element.second == path_to_insert:
-                # Path already exists - return
-                return
-            it += 1
-
+        new_element.distance = distance
+        new_element.path = path_to_insert
+        new_element.spur_node = spur_node
+        it = lower_bound(self._distances_and_paths.begin(), self._distances_and_paths.end(), new_element,
+                         _yen_compare_distance)
         self._distances_and_paths.insert(it, new_element)
 
         # Reduce the number of paths to amount required
@@ -1625,26 +1626,28 @@ cdef class _YenCandidatePaths:
     cdef double min_distance(self):
         if self.empty():
             return INFINITY
-        return self._distances_and_paths[0].first
+        return self._distances_and_paths[0].distance
 
     cdef double max_distance(self):
         if self.empty():
             return -INFINITY
-        return self._distances_and_paths[-1].first
+        return self._distances_and_paths.back().distance
 
     @cython.boundscheck(False)
-    cdef void pop_path_to_memory_view(
+    cdef int pop_path_to_memory_view(
         self,
         int[:] target,
     ):
         cdef:
             yen_path_t shortest_path
-            int idx
+            size_t idx
+            int spur_node
 
         if self.empty():
             raise RuntimeError("No paths to pop")
 
-        shortest_path = self._distances_and_paths[0].second
+        shortest_path = self._distances_and_paths[0].path
+        spur_node = self._distances_and_paths[0].spur_node
         self._distances_and_paths.erase(self._distances_and_paths.begin())
 
         # Restore the path in the correct order
@@ -1653,6 +1656,8 @@ cdef class _YenCandidatePaths:
 
         self._required_paths -= 1
 
+        return spur_node
+
     cdef bint empty(self):
         return self._distances_and_paths.empty()
 
@@ -1701,6 +1706,7 @@ cdef void _yen(
         double[:] csrT_weights
 
         int k, i, spur_node, node, short_path_idx, tmp_i
+        int spur_node_k_minus_1
         double root_path_distance, total_distance, tmp_d
 
     # Avoid copying a size 0 memory view
@@ -1718,6 +1724,7 @@ cdef void _yen(
 
     # ---------------------------------------------------
     # Compute and store the K-1 shortest paths
+    spur_node_k_minus_1 = source
     for k in range(1, K):
         # Set spur node as sink
         spur_node = sink
@@ -1727,7 +1734,7 @@ cdef void _yen(
         # ---------------------------------------------------
         # For each spur_node in the previous k-shortest path
         # Search for a new short path from it to the sink
-        while spur_node != source:
+        while spur_node != spur_node_k_minus_1:
             # Decrease the root path distance by the distance of it's final edge and
             # set the source of the final edge as the new spur node
             tmp_i = shortest_paths_predecessors[k-1][spur_node] # previous node
@@ -1849,4 +1856,4 @@ cdef void _yen(
             break
         else:
             shortest_distances[k] = total_distance
-            candidate_paths.pop_path_to_memory_view(shortest_paths_predecessors[k])
+            spur_node_k_minus_1 = candidate_paths.pop_path_to_memory_view(shortest_paths_predecessors[k])

From 38febf8d654ec9a097754a114c055d88057ab10f Mon Sep 17 00:00:00 2001
From: "Tomer.Sery" <tomer.sery@nextsilicon.com>
Date: Mon, 20 Jan 2025 11:06:30 +0000
Subject: [PATCH 34/63] TST: YEN: add dense test

---
 .../sparse/csgraph/tests/test_shortest_path.py  | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/scipy/sparse/csgraph/tests/test_shortest_path.py b/scipy/sparse/csgraph/tests/test_shortest_path.py
index 45d811486846..3914e082bf1c 100644
--- a/scipy/sparse/csgraph/tests/test_shortest_path.py
+++ b/scipy/sparse/csgraph/tests/test_shortest_path.py
@@ -445,6 +445,23 @@ def test_yen_directed():
     assert_allclose(predecessors, directed_2SP_0_to_3)
 
 
+def test_yen_dense():
+    dense_undirected_G = np.array([
+                       [0, 3, 3, 1, 2],
+                       [3, 0, 7, 6, 5],
+                       [3, 7, 0, 4, 0],
+                       [1, 6, 4, 0, 2],
+                       [2, 5, 0, 2, 0]], dtype=float)
+    distances = yen(
+                dense_undirected_G,
+                source=0,
+                sink=4,
+                K=5,
+                directed=False,
+            )
+    assert_allclose(distances, [2., 3., 8., 9., 11.])
+
+
 def test_yen_undirected():
     distances = yen(
         undirected_G,

From e3c6a053cdfb13932cd6405f01a98821dce41e80 Mon Sep 17 00:00:00 2001
From: Jake Bowhay <60778417+j-bowhay@users.noreply.github.com>
Date: Fri, 31 Jan 2025 04:12:21 -0600
Subject: [PATCH 35/63] ENH: linalg: wrap ?stevd (#22426)

---
 scipy/linalg/flapack_gen_tri.pyf.src | 25 +++++++++++++++++++++++--
 scipy/linalg/lapack.py               |  3 +++
 scipy/linalg/tests/test_lapack.py    | 20 ++++++++++++++++++++
 3 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/scipy/linalg/flapack_gen_tri.pyf.src b/scipy/linalg/flapack_gen_tri.pyf.src
index 0c416fbed095..ed94563f3434 100644
--- a/scipy/linalg/flapack_gen_tri.pyf.src
+++ b/scipy/linalg/flapack_gen_tri.pyf.src
@@ -316,7 +316,7 @@ end subroutine <prefix2>stemr
 subroutine <prefix2>stemr_lwork(d,e,range,vl,vu,il,iu,compute_v,n,m,w,z,ldz,nzc,isuppz,tryrac,work,lwork,iwork,liwork,info)
     ! LWORK=-1, LIWORK=-1 call for STEMR
 
-    fortranname <prefix2c>stemr
+    fortranname <prefix2>stemr
     callstatement (*f2py_func)((compute_v?"V":"N"),(range>0?(range==1?"V":"I"):"A"),&n,d,e,&vl,&vu,&il,&iu,&m,w,z,&ldz,&nzc,isuppz,&tryrac,&work,&lwork,&iwork,&liwork,&info)
     callprotoargument char*,char*,F_INT*,<ctype2>*,<ctype2>*,<ctype2>*,<ctype2>*,F_INT*,F_INT*,F_INT*,<ctype2>*,<ctype2>*,F_INT*,F_INT*,F_INT*,F_INT*,<ctype2>*,F_INT*,F_INT*,F_INT*,F_INT*
 
@@ -336,9 +336,30 @@ subroutine <prefix2>stemr_lwork(d,e,range,vl,vu,il,iu,compute_v,n,m,w,z,ldz,nzc,
     integer depend(n),intent(hide) :: nzc = n  ! can also be passed as -1 to do a query
     integer dimension((compute_v?2*n:1)),depend(n),intent(hide) :: isuppz
     integer intent(hide) :: tryrac = 1
-    integer depend(n),intent(hide) :: lwork = -1
+    integer intent(hide) :: lwork = -1
     <ftype2> intent(out) :: work
     integer intent(hide) :: liwork = -1
     integer intent(out) :: iwork
     integer intent(out) :: info
 end subroutine <prefix2>stemr_lwork
+
+
+subroutine <prefix2>stevd(d,e,compute_v,n,z,ldz,work,lwork,iwork,liwork,info)
+    ! ?STEVD computes all eigenvalues and, optionally, eigenvectors of a
+    ! real symmetric tridiagonal matrix A. If eigenvectors are desired, it
+    ! uses a divide and conquer algorithm.
+    callstatement (*f2py_func)((compute_v?"V":"N"),&n,d,e,z,&ldz,work,&lwork,iwork,&liwork,&info)
+    callprotoargument char*,F_INT*,<ctype2>*,<ctype2>*,<ctype2>*,F_INT*,<ctype2>*,F_INT*,F_INT*,F_INT*,F_INT*
+
+    integer optional,intent(in):: compute_v = 1
+    integer depend(d),intent(hide),check(n>0) :: n = shape(d,0)
+    <ftype2> dimension(n),intent(in,out,copy,out=vals) :: d
+    <ftype2> depend(n),dimension(MAX(n-1,1)),intent(in,copy) :: e
+    <ftype2> dimension(ldz,(compute_v?n:1)),intent(out),depend(n,ldz) :: z
+    integer intent(hide),depend(n) :: ldz=(compute_v?n:1)
+    <ftype2> dimension(lwork),depend(lwork),intent(hide) :: work
+    integer depend(n),optional,intent(in),check(lwork>=(compute_v?1+4*n+n*n:1)) :: lwork = (compute_v?1+4*n+n*n:1)
+    integer dimension(liwork),depend(liwork),intent(hide) :: iwork
+    integer depend(n),optional,intent(in),check(liwork>=(compute_v?3+5*n:1)) :: liwork = (compute_v?3+5*n:1)
+    integer intent(out) :: info
+end subroutine <prefix2>stevd
diff --git a/scipy/linalg/lapack.py b/scipy/linalg/lapack.py
index 6281fded6238..1fc948a5f4e7 100644
--- a/scipy/linalg/lapack.py
+++ b/scipy/linalg/lapack.py
@@ -583,6 +583,9 @@
    sstev
    dstev
 
+   sstevd
+   dstevd
+
    ssycon
    dsycon
    csycon
diff --git a/scipy/linalg/tests/test_lapack.py b/scipy/linalg/tests/test_lapack.py
index 3e29fe45d0b7..141f69cdd278 100644
--- a/scipy/linalg/tests/test_lapack.py
+++ b/scipy/linalg/tests/test_lapack.py
@@ -3594,3 +3594,23 @@ def test_langb(dtype, norm):
     ref = lange(norm, A)
     res = langb(norm, kl, ku, ab)
     assert_allclose(res, ref, rtol=2e-6)
+
+
+@pytest.mark.parametrize('dtype', REAL_DTYPES)
+@pytest.mark.parametrize('compute_v', (0, 1))
+def test_stevd(dtype, compute_v):
+    rng = np.random.default_rng(266474747488348746)
+    n = 10
+    d = rng.random(n, dtype=dtype)
+    e = rng.random(n - 1, dtype=dtype)
+    A = np.diag(e, -1) + np.diag(d) + np.diag(e, 1)
+    ref = np.linalg.eigvalsh(A)
+
+    stevd = get_lapack_funcs('stevd')
+    U, V, info = stevd(d, e, compute_v=compute_v)
+    assert info == 0
+    assert_allclose(np.sort(U), np.sort(ref))
+    if compute_v:
+        eps = np.finfo(dtype).eps
+        assert_allclose(V @ np.diag(U) @ V.T, A, atol=eps**0.8)
+

From fe49d2725cd50f42e61d5f9bf2bd9b42c818b485 Mon Sep 17 00:00:00 2001
From: Matt Haberland <mhaberla@calpoly.edu>
Date: Fri, 31 Jan 2025 10:07:32 -0800
Subject: [PATCH 36/63] DOC: optimize.elementwise.find_minimum: fix documented
 termination condition (#22448)

[docs only]
---
 scipy/optimize/_elementwise.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/scipy/optimize/_elementwise.py b/scipy/optimize/_elementwise.py
index 883c644dbcbb..6091735e12ef 100644
--- a/scipy/optimize/_elementwise.py
+++ b/scipy/optimize/_elementwise.py
@@ -342,12 +342,9 @@ def find_minimum(f, init, /, *, args=(), tolerances=None, maxiter=100, callback=
     (where one of the inequalities is strict) are the values of `f` evaluated
     at those points, then the algorithm is considered to have converged when:
 
-    - ``xr - xl <= abs(xm)*xrtol + xatol`` or
+    - ``abs(xr - xm)/2 <= abs(xm)*xrtol + xatol`` or
     - ``(fl - 2*fm + fr)/2 <= abs(fm)*frtol + fatol``.
 
-    Note that first of these differs from the termination conditions described
-    in [1]_.
-
     The default value of `xrtol` is the square root of the precision of the
     appropriate dtype, and ``xatol = fatol = frtol`` is the smallest normal
     number of the appropriate dtype.

From e16c6bba4a5a955d627fd2d886e90ec11836c32e Mon Sep 17 00:00:00 2001
From: Evgeni Burovski <evgeny.burovskiy@gmail.com>
Date: Sat, 1 Feb 2025 05:52:11 +0100
Subject: [PATCH 37/63] ENH: signal.vectorstrength: add array API standard
 support (#22008)

* ENH: signal: TestVectorstrength array api + its tests

* MAINT: signal: simplify vectorstrength array api

* TST: Apply suggestions from code review
---
 scipy/signal/_signaltools.py           | 52 ++++++++++++---
 scipy/signal/tests/test_signaltools.py | 87 ++++++++++++++------------
 2 files changed, 90 insertions(+), 49 deletions(-)

diff --git a/scipy/signal/_signaltools.py b/scipy/signal/_signaltools.py
index ef21c0c557e7..3e0065fc78a1 100644
--- a/scipy/signal/_signaltools.py
+++ b/scipy/signal/_signaltools.py
@@ -3910,6 +3910,31 @@ def resample_poly(x, up, down, axis=0, window=('kaiser', 5.0),
     return y_keep
 
 
+def _angle(z, xp):
+    """np.angle replacement
+    """
+    # XXX: https://github.com/data-apis/array-api/issues/595
+    if xp.isdtype(z.dtype, 'complex floating'):
+        zreal, zimag = xp.real(z), xp.imag(z)
+    else:
+        zreal, zimag = z, 0
+
+    a = xp.atan2(zimag, zreal)
+    return a
+
+
+def _mean(x, *args, xp, **kwds):
+    # https://github.com/data-apis/array-api/pull/850
+    if xp.isdtype(x.dtype, 'complex floating'):
+        I = xp.asarray(1j, dtype=xp.complex64
+                                 if x.dtype == xp.float32
+                                 else xp.complex128)
+        return (xp.mean(xp.real(x), *args, **kwds) +
+                I * xp.mean(xp.imag(x), *args, **kwds))
+    else:
+        return xp.mean(x, *args, **kwds)
+
+
 def vectorstrength(events, period):
     '''
     Determine the vector strength of the events corresponding to the given
@@ -3957,8 +3982,13 @@ def vectorstrength(events, period):
         fixed.  Biol Cybern. 2013 Aug;107(4):491-94.
         :doi:`10.1007/s00422-013-0560-8`.
     '''
-    events = np.asarray(events)
-    period = np.asarray(period)
+    xp = array_namespace(events, period)
+
+    events = xp.asarray(events)
+    period = xp.asarray(period)
+    if xp.isdtype(period.dtype, 'integral'):
+        period = xp.astype(period, xp.float64)
+
     if events.ndim > 1:
         raise ValueError('events cannot have dimensions more than 1')
     if period.ndim > 1:
@@ -3967,19 +3997,23 @@ def vectorstrength(events, period):
     # we need to know later if period was originally a scalar
     scalarperiod = not period.ndim
 
-    events = np.atleast_2d(events)
-    period = np.atleast_2d(period)
-    if (period <= 0).any():
+    events = xpx.atleast_nd(events, ndim=2, xp=xp)
+    period = xpx.atleast_nd(period, ndim=2, xp=xp)
+    if xp.any(period <= 0):
         raise ValueError('periods must be positive')
 
     # this converts the times to vectors
-    vectors = np.exp(np.dot(2j*np.pi/period.T, events))
+    I2pi = xp.asarray(2j*xp.pi, dtype=xp.complex64
+                        if period.dtype == xp.float32
+                        else xp.complex128)
+    events_ = xp.astype(events, I2pi.dtype) if is_torch(xp) else events
+    vectors = xp.exp(I2pi/period.T @ events_)
 
     # the vector strength is just the magnitude of the mean of the vectors
     # the vector phase is the angle of the mean of the vectors
-    vectormean = np.mean(vectors, axis=1)
-    strength = abs(vectormean)
-    phase = np.angle(vectormean)
+    vectormean = _mean(vectors, axis=1, xp=xp)
+    strength = xp.abs(vectormean)
+    phase = _angle(vectormean, xp)
 
     # if the original period was a scalar, return scalars
     if scalarperiod:
diff --git a/scipy/signal/tests/test_signaltools.py b/scipy/signal/tests/test_signaltools.py
index 97f8a873db21..fad3e57a6ffd 100644
--- a/scipy/signal/tests/test_signaltools.py
+++ b/scipy/signal/tests/test_signaltools.py
@@ -3836,11 +3836,10 @@ def test_inverse_scalar_arguments(self, xp):
         assert_almost_equal(a, [1, -1])
 
 
-@skip_xp_backends(np_only=True)
 class TestVectorstrength:
 
     def test_single_1dperiod(self, xp):
-        events = np.array([.5])
+        events = xp.asarray([.5])
         period = 5.
         targ_strength = 1.
         targ_phase = .1
@@ -3849,24 +3848,26 @@ def test_single_1dperiod(self, xp):
 
         assert strength.ndim == 0
         assert phase.ndim == 0
-        assert_almost_equal(strength, targ_strength)
-        assert_almost_equal(phase, 2 * np.pi * targ_phase)
 
+        assert math.isclose(strength, targ_strength, abs_tol=1.5e-7)
+        assert math.isclose(phase, 2 * math.pi * targ_phase, abs_tol=1.5e-7)
+
+    @xfail_xp_backends('torch', reason="phase modulo 2*pi")
     def test_single_2dperiod(self, xp):
-        events = np.array([.5])
-        period = [1, 2, 5.]
-        targ_strength = [1.] * 3
-        targ_phase = np.array([.5, .25, .1])
+        events = xp.asarray([.5])
+        period = xp.asarray([1, 2, 5.])
+        targ_strength = xp.asarray([1.] * 3)
+        targ_phase = xp.asarray([.5, .25, .1])
 
         strength, phase = vectorstrength(events, period)
 
         assert strength.ndim == 1
         assert phase.ndim == 1
         assert_array_almost_equal(strength, targ_strength)
-        assert_almost_equal(phase, 2 * np.pi * targ_phase)
+        assert_almost_equal(phase, 2 * xp.pi * targ_phase)
 
     def test_equal_1dperiod(self, xp):
-        events = np.array([.25, .25, .25, .25, .25, .25])
+        events = xp.asarray([.25, .25, .25, .25, .25, .25])
         period = 2
         targ_strength = 1.
         targ_phase = .125
@@ -3875,24 +3876,25 @@ def test_equal_1dperiod(self, xp):
 
         assert strength.ndim == 0
         assert phase.ndim == 0
-        assert_almost_equal(strength, targ_strength)
-        assert_almost_equal(phase, 2 * np.pi * targ_phase)
+
+        assert math.isclose(strength, targ_strength, abs_tol=1.5e-7)
+        assert math.isclose(phase, 2 * math.pi * targ_phase, abs_tol=1.5e-7)
 
     def test_equal_2dperiod(self, xp):
-        events = np.array([.25, .25, .25, .25, .25, .25])
-        period = [1, 2, ]
-        targ_strength = [1.] * 2
-        targ_phase = np.array([.25, .125])
+        events = xp.asarray([.25, .25, .25, .25, .25, .25])
+        period = xp.asarray([1, 2, ])
+        targ_strength = xp.asarray([1.] * 2)
+        targ_phase = xp.asarray([.25, .125])
 
         strength, phase = vectorstrength(events, period)
 
         assert strength.ndim == 1
         assert phase.ndim == 1
         assert_almost_equal(strength, targ_strength)
-        assert_almost_equal(phase, 2 * np.pi * targ_phase)
+        assert_almost_equal(phase, 2 * xp.pi * targ_phase)
 
     def test_spaced_1dperiod(self, xp):
-        events = np.array([.1, 1.1, 2.1, 4.1, 10.1])
+        events = xp.asarray([.1, 1.1, 2.1, 4.1, 10.1])
         period = 1
         targ_strength = 1.
         targ_phase = .1
@@ -3901,24 +3903,26 @@ def test_spaced_1dperiod(self, xp):
 
         assert strength.ndim == 0
         assert phase.ndim == 0
-        assert_almost_equal(strength, targ_strength)
-        assert_almost_equal(phase, 2 * np.pi * targ_phase)
+
+        assert math.isclose(strength, targ_strength, abs_tol=1.5e-7)
+        assert math.isclose(phase, 2 * math.pi * targ_phase, abs_tol=1.5e-6)
 
     def test_spaced_2dperiod(self, xp):
-        events = np.array([.1, 1.1, 2.1, 4.1, 10.1])
-        period = [1, .5]
-        targ_strength = [1.] * 2
-        targ_phase = np.array([.1, .2])
+        events = xp.asarray([.1, 1.1, 2.1, 4.1, 10.1])
+        period = xp.asarray([1, .5])
+        targ_strength = xp.asarray([1.] * 2)
+        targ_phase = xp.asarray([.1, .2])
 
         strength, phase = vectorstrength(events, period)
 
         assert strength.ndim == 1
         assert phase.ndim == 1
         assert_almost_equal(strength, targ_strength)
-        assert_almost_equal(phase, 2 * np.pi * targ_phase)
+        rtol_kw = {'rtol': 2e-6} if xp_default_dtype(xp) == xp.float32 else {}
+        xp_assert_close(phase, 2 * xp.pi * targ_phase, **rtol_kw)
 
     def test_partial_1dperiod(self, xp):
-        events = np.array([.25, .5, .75])
+        events = xp.asarray([.25, .5, .75])
         period = 1
         targ_strength = 1. / 3.
         targ_phase = .5
@@ -3927,24 +3931,27 @@ def test_partial_1dperiod(self, xp):
 
         assert strength.ndim == 0
         assert phase.ndim == 0
-        assert_almost_equal(strength, targ_strength)
-        assert_almost_equal(phase, 2 * np.pi * targ_phase)
 
+        assert math.isclose(strength, targ_strength)
+        assert math.isclose(phase, 2 * math.pi * targ_phase)
+
+
+    @xfail_xp_backends("torch", reason="phase modulo 2*pi")
     def test_partial_2dperiod(self, xp):
-        events = np.array([.25, .5, .75])
-        period = [1., 1., 1., 1.]
-        targ_strength = [1. / 3.] * 4
-        targ_phase = np.array([.5, .5, .5, .5])
+        events = xp.asarray([.25, .5, .75])
+        period = xp.asarray([1., 1., 1., 1.])
+        targ_strength = xp.asarray([1. / 3.] * 4)
+        targ_phase = xp.asarray([.5, .5, .5, .5])
 
         strength, phase = vectorstrength(events, period)
 
         assert strength.ndim == 1
         assert phase.ndim == 1
         assert_almost_equal(strength, targ_strength)
-        assert_almost_equal(phase, 2 * np.pi * targ_phase)
+        assert_almost_equal(phase, 2 * xp.pi * targ_phase)
 
     def test_opposite_1dperiod(self, xp):
-        events = np.array([0, .25, .5, .75])
+        events = xp.asarray([0, .25, .5, .75])
         period = 1.
         targ_strength = 0
 
@@ -3952,12 +3959,12 @@ def test_opposite_1dperiod(self, xp):
 
         assert strength.ndim == 0
         assert phase.ndim == 0
-        assert_almost_equal(strength, targ_strength)
+        assert math.isclose(strength, targ_strength, abs_tol=1.5e-7)
 
     def test_opposite_2dperiod(self, xp):
-        events = np.array([0, .25, .5, .75])
-        period = [1.] * 10
-        targ_strength = [0.] * 10
+        events = xp.asarray([0, .25, .5, .75])
+        period = xp.asarray([1.] * 10)
+        targ_strength = xp.asarray([0.] * 10)
 
         strength, phase = vectorstrength(events, period)
 
@@ -3966,13 +3973,13 @@ def test_opposite_2dperiod(self, xp):
         assert_almost_equal(strength, targ_strength)
 
     def test_2d_events_ValueError(self, xp):
-        events = np.array([[1, 2]])
+        events = xp.asarray([[1, 2]])
         period = 1.
         assert_raises(ValueError, vectorstrength, events, period)
 
     def test_2d_period_ValueError(self, xp):
         events = 1.
-        period = np.array([[1]])
+        period = xp.asarray([[1]])
         assert_raises(ValueError, vectorstrength, events, period)
 
     def test_zero_period_ValueError(self, xp):

From 36291dcfd82165242792e7f28560d6a6f56ee6ea Mon Sep 17 00:00:00 2001
From: Dietrich Brunn <12721170+DietBru@users.noreply.github.com>
Date: Sat, 1 Feb 2025 16:17:51 +0100
Subject: [PATCH 38/63] DOC: Improve docstrs of `dlsim`, `dimpulse`, `dstep`,
 `dfreqresp` and `dbode` in file `_ltisys.py`. (#22453)

* DOC: Improve docstrs of `dlsim`, `dimpulse`, `dstep`, `dfreqresp` and `dbode` in file `_ltisys.py`.

* Made the description of the parameter `system` more consistent.
* Improved plots plots a little bit.

* DOC: Fixed test position in plot of `signal.dfrqresp`.
---
 scipy/conftest.py       |   3 +
 scipy/signal/_ltisys.py | 189 +++++++++++++++++++++++-----------------
 2 files changed, 110 insertions(+), 82 deletions(-)

diff --git a/scipy/conftest.py b/scipy/conftest.py
index d7f737a2eb7a..a833fe5fdfd6 100644
--- a/scipy/conftest.py
+++ b/scipy/conftest.py
@@ -599,4 +599,7 @@ def warnings_errors_and_rng(test=None):
     }
 
     dt_config.strict_check = True
+
+    # ignore Matplotlib's `ax.text`:
+    dt_config.stopwords.add('.text(')
 ############################################################################
diff --git a/scipy/signal/_ltisys.py b/scipy/signal/_ltisys.py
index e9878a4585e8..2350840d94e0 100644
--- a/scipy/signal/_ltisys.py
+++ b/scipy/signal/_ltisys.py
@@ -3034,20 +3034,24 @@ def place_poles(A, B, poles, method="YT", rtol=1e-3, maxiter=30):
 
 
 def dlsim(system, u, t=None, x0=None):
-    """
-    Simulate output of a discrete-time linear system.
+    r"""Simulate output of a discrete-time linear system.
 
     Parameters
     ----------
-    system : tuple of array_like or instance of `dlti`
-        A tuple describing the system.
-        The following gives the number of elements in the tuple and
-        the interpretation:
+    system : dlti | tuple
+        An instance of the LTI class `dlti` or a tuple describing the system.
+        The number of elements in the tuple determine the interpretation. I.e.:
+
+        * ``system``: Instance of LTI class `dlti`. Note that derived instances, such
+          as instances of `TransferFunction`, `ZerosPolesGain`, or `StateSpace`, are
+          allowed as well.
+        * ``(num, den, dt)``: Rational polynomial as described in `TransferFunction`.
+          The coefficients of the polynomials should be specified in descending
+          exponent order,  e.g., z² + 3z + 5 would be represented as ``[1, 3, 5]``.
+        * ``(zeros, poles, gain, dt)``:  Zeros, poles, gain form as described
+          in `ZerosPolesGain`.
+        * ``(A, B, C, D, dt)``: State-space form as described in `StateSpace`.
 
-            * 1: (instance of `dlti`)
-            * 3: (num, den, dt)
-            * 4: (zeros, poles, gain, dt)
-            * 5: (A, B, C, D, dt)
 
     u : array_like
         An input array describing the input at each time `t` (interpolation is
@@ -3150,20 +3154,23 @@ def dlsim(system, u, t=None, x0=None):
 
 
 def dimpulse(system, x0=None, t=None, n=None):
-    """
-    Impulse response of discrete-time system.
+    r"""Impulse response of discrete-time system.
 
     Parameters
     ----------
-    system : tuple of array_like or instance of `dlti`
-        A tuple describing the system.
-        The following gives the number of elements in the tuple and
-        the interpretation:
-
-            * 1: (instance of `dlti`)
-            * 3: (num, den, dt)
-            * 4: (zeros, poles, gain, dt)
-            * 5: (A, B, C, D, dt)
+        system : dlti | tuple
+        An instance of the LTI class `dlti` or a tuple describing the system.
+        The number of elements in the tuple determine the interpretation. I.e.:
+
+        * ``system``: Instance of LTI class `dlti`. Note that derived instances, such
+          as instances of `TransferFunction`, `ZerosPolesGain`, or `StateSpace`, are
+          allowed as well.
+        * ``(num, den, dt)``: Rational polynomial as described in `TransferFunction`.
+          The coefficients of the polynomials should be specified in descending
+          exponent order,  e.g., z² + 3z + 5 would be represented as ``[1, 3, 5]``.
+        * ``(zeros, poles, gain, dt)``:  Zeros, poles, gain form as described
+          in `ZerosPolesGain`.
+        * ``(A, B, C, D, dt)``: State-space form as described in `StateSpace`.
 
     x0 : array_like, optional
         Initial state-vector.  Defaults to zero.
@@ -3189,14 +3196,17 @@ def dimpulse(system, x0=None, t=None, n=None):
     >>> import numpy as np
     >>> from scipy import signal
     >>> import matplotlib.pyplot as plt
-
-    >>> butter = signal.dlti(*signal.butter(3, 0.5))
-    >>> t, y = signal.dimpulse(butter, n=25)
-    >>> plt.step(t, np.squeeze(y), where='post')
-    >>> plt.grid()
-    >>> plt.xlabel('n [samples]')
-    >>> plt.ylabel('Amplitude')
-
+    ...
+    >>> dt = 1  # sampling interval is one => time unit is sample number
+    >>> bb, aa = signal.butter(3, 0.25, fs=1/dt)
+    >>> t, y = signal.dimpulse((bb, aa, dt), n=25)
+    ...
+    >>> fig0, ax0 = plt.subplots()
+    >>> ax0.step(t, np.squeeze(y), '.-', where='post')
+    >>> ax0.set_title(r"Impulse Response of a $3^\text{rd}$ Order Butterworth Filter")
+    >>> ax0.set(xlabel='Sample number', ylabel='Amplitude')
+    >>> ax0.grid()
+    >>> plt.show()
     """
     # Convert system to dlti-StateSpace
     if isinstance(system, dlti):
@@ -3237,20 +3247,23 @@ def dimpulse(system, x0=None, t=None, n=None):
 
 
 def dstep(system, x0=None, t=None, n=None):
-    """
-    Step response of discrete-time system.
+    r"""Step response of discrete-time system.
 
     Parameters
     ----------
-    system : tuple of array_like
-        A tuple describing the system.
-        The following gives the number of elements in the tuple and
-        the interpretation:
-
-            * 1: (instance of `dlti`)
-            * 3: (num, den, dt)
-            * 4: (zeros, poles, gain, dt)
-            * 5: (A, B, C, D, dt)
+     system : dlti | tuple
+        An instance of the LTI class `dlti` or a tuple describing the system.
+        The number of elements in the tuple determine the interpretation. I.e.:
+
+        * ``system``: Instance of LTI class `dlti`. Note that derived instances, such
+          as instances of `TransferFunction`, `ZerosPolesGain`, or `StateSpace`, are
+          allowed as well.
+        * ``(num, den, dt)``: Rational polynomial as described in `TransferFunction`.
+          The coefficients of the polynomials should be specified in descending
+          exponent order,  e.g., z² + 3z + 5 would be represented as ``[1, 3, 5]``.
+        * ``(zeros, poles, gain, dt)``:  Zeros, poles, gain form as described
+          in `ZerosPolesGain`.
+        * ``(A, B, C, D, dt)``: State-space form as described in `StateSpace`.
 
     x0 : array_like, optional
         Initial state-vector.  Defaults to zero.
@@ -3273,16 +3286,23 @@ def dstep(system, x0=None, t=None, n=None):
 
     Examples
     --------
+    The following example illustrates how to create a digital Butterworth filer and
+    plot its step response:
+
     >>> import numpy as np
     >>> from scipy import signal
     >>> import matplotlib.pyplot as plt
-
-    >>> butter = signal.dlti(*signal.butter(3, 0.5))
-    >>> t, y = signal.dstep(butter, n=25)
-    >>> plt.step(t, np.squeeze(y), where='post')
-    >>> plt.grid()
-    >>> plt.xlabel('n [samples]')
-    >>> plt.ylabel('Amplitude')
+    ...
+    >>> dt = 1  # sampling interval is one => time unit is sample number
+    >>> bb, aa = signal.butter(3, 0.25, fs=1/dt)
+    >>> t, y = signal.dstep((bb, aa, dt), n=25)
+    ...
+    >>> fig0, ax0 = plt.subplots()
+    >>> ax0.step(t, np.squeeze(y), '.-', where='post')
+    >>> ax0.set_title(r"Step Response of a $3^\text{rd}$ Order Butterworth Filter")
+    >>> ax0.set(xlabel='Sample number', ylabel='Amplitude', ylim=(0, 1.1*np.max(y)))
+    >>> ax0.grid()
+    >>> plt.show()
     """
     # Convert system to dlti-StateSpace
     if isinstance(system, dlti):
@@ -3328,14 +3348,19 @@ def dfreqresp(system, w=None, n=10000, whole=False):
 
     Parameters
     ----------
-    system : an instance of the `dlti` class or a tuple describing the system.
-        The following gives the number of elements in the tuple and
-        the interpretation:
-
-            * 1 (instance of `dlti`)
-            * 2 (numerator, denominator, dt)
-            * 3 (zeros, poles, gain, dt)
-            * 4 (A, B, C, D, dt)
+    system : dlti | tuple
+        An instance of the LTI class `dlti` or a tuple describing the system.
+        The number of elements in the tuple determine the interpretation. I.e.:
+
+        * ``system``: Instance of LTI class `dlti`. Note that derived instances, such
+          as instances of `TransferFunction`, `ZerosPolesGain`, or `StateSpace`, are
+          allowed as well.
+        * ``(num, den, dt)``: Rational polynomial as described in `TransferFunction`.
+          The coefficients of the polynomials should be specified in descending
+          exponent order,  e.g., z² + 3z + 5 would be represented as ``[1, 3, 5]``.
+        * ``(zeros, poles, gain, dt)``:  Zeros, poles, gain form as described
+          in `ZerosPolesGain`.
+        * ``(A, B, C, D, dt)``: State-space form as described in `StateSpace`.
 
     w : array_like, optional
         Array of frequencies (in radians/sample). Magnitude and phase data is
@@ -3367,24 +3392,25 @@ def dfreqresp(system, w=None, n=10000, whole=False):
 
     Examples
     --------
-    Generating the Nyquist plot of a transfer function
+    The following example generates the Nyquist plot of the transfer function
+    :math:`H(z) = \frac{1}{z^2 + 2z + 3}`  with a sampling time of 0.05 seconds:
 
     >>> from scipy import signal
     >>> import matplotlib.pyplot as plt
-
-    Construct the transfer function
-    :math:`H(z) = \frac{1}{z^2 + 2z + 3}` with a sampling time of 0.05
-    seconds:
-
-    >>> sys = signal.TransferFunction([1], [1, 2, 3], dt=0.05)
-
+    >>> sys = signal.TransferFunction([1], [1, 2, 3], dt=0.05)  # construct H(z)
     >>> w, H = signal.dfreqresp(sys)
-
-    >>> plt.figure()
-    >>> plt.plot(H.real, H.imag, "b")
-    >>> plt.plot(H.real, -H.imag, "r")
+    ...
+    >>> fig0, ax0 = plt.subplots()
+    >>> ax0.plot(H.real, H.imag, label=r"$H(z=e^{+j\omega})$")
+    >>> ax0.plot(H.real, -H.imag, label=r"$H(z=e^{-j\omega})$")
+    >>> ax0.set_title(r"Nyquist Plot of $H(z) = 1 / (z^2 + 2z + 3)$")
+    >>> ax0.set(xlabel=r"$\text{Re}\{z\}$", ylabel=r"$\text{Im}\{z\}$",
+    ...         xlim=(-0.2, 0.65), aspect='equal')
+    >>> ax0.plot(H[0].real, H[0].imag, 'k.')  # mark H(exp(1j*w[0]))
+    >>> ax0.text(0.2, 0, r"$H(e^{j0})$")
+    >>> ax0.grid(True)
+    >>> ax0.legend()
     >>> plt.show()
-
     """
     if not isinstance(system, dlti):
         if isinstance(system, lti):
@@ -3423,24 +3449,23 @@ def dfreqresp(system, w=None, n=10000, whole=False):
 
 
 def dbode(system, w=None, n=100):
-    r"""
-    Calculate Bode magnitude and phase data of a discrete-time system.
+    r"""Calculate Bode magnitude and phase data of a discrete-time system.
 
     Parameters
     ----------
-    system :
+    system : dlti | tuple
         An instance of the LTI class `dlti` or a tuple describing the system.
-        The number of elements in the tuple determine the interpretation, i.e.:
-
-        1. ``(sys_dlti)``:  Instance of LTI class `dlti`. Note that derived instances,
-           such as instances of `TransferFunction`, `ZerosPolesGain`, or `StateSpace`,
-           are allowed as well.
-        2. ``(num, den, dt)``: Rational polynomial as described in `TransferFunction`.
-           The coefficients of the polynomials should be specified in descending
-           exponent order,  e.g., z² + 3z + 5 would be represented as ``[1, 3, 5]``.
-        3. ``(zeros, poles, gain, dt)``:  Zeros, poles, gain form as described
-           in `ZerosPolesGain`.
-        4. ``(A, B, C, D, dt)``: State-space form as described in `StateSpace`.
+        The number of elements in the tuple determine the interpretation. I.e.:
+
+        * ``system``: Instance of LTI class `dlti`. Note that derived instances, such
+          as instances of `TransferFunction`, `ZerosPolesGain`, or `StateSpace`, are
+          allowed as well.
+        * ``(num, den, dt)``: Rational polynomial as described in `TransferFunction`.
+          The coefficients of the polynomials should be specified in descending
+          exponent order,  e.g., z² + 3z + 5 would be represented as ``[1, 3, 5]``.
+        * ``(zeros, poles, gain, dt)``:  Zeros, poles, gain form as described
+          in `ZerosPolesGain`.
+        * ``(A, B, C, D, dt)``: State-space form as described in `StateSpace`.
 
     w : array_like, optional
         Array of frequencies normalized to the Nyquist frequency being π, i.e.,

From 0196ab098decd4d321449e46b8b8d3b90144d6aa Mon Sep 17 00:00:00 2001
From: Matt Haberland <mhaberla@calpoly.edu>
Date: Sun, 2 Feb 2025 11:30:24 -0800
Subject: [PATCH 39/63] ENH: stats.gstd: add array API support (#22455)

* ENH: stats.gstd: add array api support

* MAINT: stats.gstd: add integer support for array_api_strict

* ENH: stats.gstd: add keepdims, nan_policy support

* STY: stats.gstd: linting

* TST: stats.gstd: skip dask.array because lazywhere doesn't work yet
---
 scipy/stats/_stats_py.py                  | 37 +++++----
 scipy/stats/tests/test_axis_nan_policy.py |  6 ++
 scipy/stats/tests/test_stats.py           | 93 ++++++++++++-----------
 3 files changed, 74 insertions(+), 62 deletions(-)

diff --git a/scipy/stats/_stats_py.py b/scipy/stats/_stats_py.py
index 2d36a7049522..ff52ff380ff0 100644
--- a/scipy/stats/_stats_py.py
+++ b/scipy/stats/_stats_py.py
@@ -2904,7 +2904,7 @@ def zmap(scores, compare, axis=0, ddof=0, nan_policy='propagate'):
     return z
 
 
-def gstd(a, axis=0, ddof=1):
+def gstd(a, axis=0, ddof=1, *, keepdims=False, nan_policy='propagate'):
     r"""
     Calculate the geometric standard deviation of an array.
 
@@ -2919,17 +2919,27 @@ def gstd(a, axis=0, ddof=1):
     ----------
     a : array_like
         An array containing finite, strictly positive, real numbers.
-
-        .. deprecated:: 1.14.0
-            Support for masked array input was deprecated in
-            SciPy 1.14.0 and will be removed in version 1.16.0.
-
     axis : int, tuple or None, optional
         Axis along which to operate. Default is 0. If None, compute over
         the whole array `a`.
     ddof : int, optional
         Degree of freedom correction in the calculation of the
         geometric standard deviation. Default is 1.
+    keepdims : boolean, optional
+        If this is set to ``True``, the axes which are reduced are left
+        in the result as dimensions with length one. With this option,
+        the result will broadcast correctly against the input array.
+    nan_policy : {'propagate', 'omit', 'raise'}, default: 'propagate'
+        Defines how to handle input NaNs.
+
+        - ``propagate``: if a NaN is present in the axis slice (e.g. row) along
+          which the statistic is computed, the corresponding entry of the output
+          will be NaN.
+        - ``omit``: NaNs will be omitted when performing the calculation.
+          If insufficient data remains in the axis slice along which the
+          statistic is computed, the corresponding entry of the output will be
+          NaN.
+        - ``raise``: if a NaN is present, a ``ValueError`` will be raised.
 
     Returns
     -------
@@ -2999,19 +3009,14 @@ def gstd(a, axis=0, ddof=1):
     array([2.12939215, 1.22120169])
 
     """
-    a = np.asanyarray(a)
-    if isinstance(a, ma.MaskedArray):
-        message = ("`gstd` support for masked array input was deprecated in "
-                   "SciPy 1.14.0 and will be removed in version 1.16.0.")
-        warnings.warn(message, DeprecationWarning, stacklevel=2)
-        log = ma.log
-    else:
-        log = np.log
+    xp = array_namespace(a)
+    a = xp_broadcast_promote(a, force_floating=True)[0]  # just promote to correct float
 
+    kwargs = dict(axis=axis, correction=ddof, keepdims=keepdims, nan_policy=nan_policy)
     with np.errstate(invalid='ignore', divide='ignore'):
-        res = np.exp(np.std(log(a), axis=axis, ddof=ddof))
+        res = xp.exp(_xp_var(xp.log(a), **kwargs)**0.5)
 
-    if (a <= 0).any():
+    if xp.any(a <= 0):
         message = ("The geometric standard deviation is only defined if all elements "
                    "are greater than or equal to zero; otherwise, the result is NaN.")
         warnings.warn(message, RuntimeWarning, stacklevel=2)
diff --git a/scipy/stats/tests/test_axis_nan_policy.py b/scipy/stats/tests/test_axis_nan_policy.py
index ecaabf6b9248..be8c04d821e1 100644
--- a/scipy/stats/tests/test_axis_nan_policy.py
+++ b/scipy/stats/tests/test_axis_nan_policy.py
@@ -56,6 +56,11 @@ def xp_var(*args, **kwargs):
     return stats._stats_py._xp_var(*args, **kwargs)
 
 
+def gstd(*args, **kwargs):
+    kwargs.pop('_no_deco', None)
+    return stats.gstd(*args, **kwargs)
+
+
 def combine_pvalues_weighted(*args, **kwargs):
     return stats.combine_pvalues(args[0], *args[2:], weights=args[1],
                                  method='stouffer', **kwargs)
@@ -164,6 +169,7 @@ def weightedtau_weighted(x, y, rank, **kwargs):
     (stats.theilslopes, tuple(), dict(), 1, 4, True, tuple),
     (stats.siegelslopes, tuple(), dict(), 2, 2, True, tuple),
     (stats.siegelslopes, tuple(), dict(), 1, 2, True, tuple),
+    (gstd, tuple(), dict(), 1, 1, False, lambda x: (x,)),
 ]
 
 # If the message is one of those expected, put nans in
diff --git a/scipy/stats/tests/test_stats.py b/scipy/stats/tests/test_stats.py
index 97c43504254a..ebb6a294901e 100644
--- a/scipy/stats/tests/test_stats.py
+++ b/scipy/stats/tests/test_stats.py
@@ -7293,87 +7293,88 @@ def fun(a, axis, weights):
         check_equal_pmean(a, p, desired, axis=axis, weights=weights, rtol=1e-5, xp=xp)
 
 
+@skip_xp_backends("dask.array", reason="lazywhere doesn't work for dask.array")
 class TestGSTD:
     # must add 1 as `gstd` is only defined for positive values
-    array_1d = np.arange(2 * 3 * 4) + 1
+    array_1d = (np.arange(2 * 3 * 4) + 1).tolist()
     gstd_array_1d = 2.294407613602
-    array_3d = array_1d.reshape(2, 3, 4)
+    array_3d = np.reshape(array_1d, (2, 3, 4)).tolist()
 
-    def test_1d_array(self):
-        gstd_actual = stats.gstd(self.array_1d)
-        assert_allclose(gstd_actual, self.gstd_array_1d)
+    def test_1d_array(self, xp):
+        gstd_actual = stats.gstd(xp.asarray(self.array_1d))
+        xp_assert_close(gstd_actual, xp.asarray(self.gstd_array_1d))
 
-    def test_1d_numeric_array_like_input(self):
+    @skip_xp_backends(np_only=True, reason="Only NumPy supports array-like input")
+    def test_1d_numeric_array_like_input(self, xp):
         gstd_actual = stats.gstd(tuple(self.array_1d))
         assert_allclose(gstd_actual, self.gstd_array_1d)
 
-    def test_raises_value_error_non_numeric_input(self):
+    @skip_xp_invalid_arg
+    def test_raises_value_error_non_numeric_input(self, xp):
         # this is raised by NumPy, but it's quite interpretable
         with pytest.raises(TypeError, match="ufunc 'log' not supported"):
             stats.gstd('You cannot take the logarithm of a string.')
 
     @pytest.mark.parametrize('bad_value', (0, -1, np.inf, np.nan))
-    def test_returns_nan_invalid_value(self, bad_value):
-        x = np.append(self.array_1d, [bad_value])
+    def test_returns_nan_invalid_value(self, bad_value, xp):
+        x = xp.asarray(self.array_1d + [bad_value])
         if np.isfinite(bad_value):
             message = "The geometric standard deviation is only defined..."
             with pytest.warns(RuntimeWarning, match=message):
                 res = stats.gstd(x)
         else:
             res = stats.gstd(x)
-        assert_equal(res, np.nan)
+        xp_assert_equal(res, xp.asarray(np.nan))
 
-    def test_propagates_nan_values(self):
-        a = array([[1, 1, 1, 16], [np.nan, 1, 2, 3]])
+    def test_propagates_nan_values(self, xp):
+        a = xp.asarray([[1, 1, 1, 16], [xp.nan, 1, 2, 3]])
         gstd_actual = stats.gstd(a, axis=1)
-        assert_allclose(gstd_actual, np.array([4, np.nan]))
-
-    def test_ddof_equal_to_number_of_observations(self):
-        with pytest.warns(RuntimeWarning, match='Degrees of freedom <= 0'):
-            assert_equal(stats.gstd(self.array_1d, ddof=self.array_1d.size), np.inf)
+        xp_assert_close(gstd_actual, xp.asarray([4, np.nan]))
 
-    def test_3d_array(self):
-        gstd_actual = stats.gstd(self.array_3d, axis=None)
-        assert_allclose(gstd_actual, self.gstd_array_1d)
-
-    def test_3d_array_axis_type_tuple(self):
-        gstd_actual = stats.gstd(self.array_3d, axis=(1,2))
-        assert_allclose(gstd_actual, [2.12939215, 1.22120169])
+    def test_ddof_equal_to_number_of_observations(self, xp):
+        x = xp.asarray(self.array_1d)
+        res = stats.gstd(x, ddof=x.shape[0])
+        xp_assert_equal(res, xp.asarray(xp.nan))
 
-    def test_3d_array_axis_0(self):
-        gstd_actual = stats.gstd(self.array_3d, axis=0)
-        gstd_desired = np.array([
+    def test_3d_array(self, xp):
+        x = xp.asarray(self.array_3d)
+        gstd_actual = stats.gstd(x, axis=None)
+        ref = xp.asarray(self.gstd_array_1d)
+        xp_assert_close(gstd_actual, ref)
+
+    def test_3d_array_axis_type_tuple(self, xp):
+        x = xp.asarray(self.array_3d)
+        gstd_actual = stats.gstd(x, axis=(1, 2))
+        ref = xp.asarray([2.12939215, 1.22120169])
+        xp_assert_close(gstd_actual, ref)
+
+    def test_3d_array_axis_0(self, xp):
+        x = xp.asarray(self.array_3d)
+        gstd_actual = stats.gstd(x, axis=0)
+        gstd_desired = xp.asarray([
             [6.1330555493918, 3.958900210120, 3.1206598248344, 2.6651441426902],
             [2.3758135028411, 2.174581428192, 2.0260062829505, 1.9115518327308],
             [1.8205343606803, 1.746342404566, 1.6846557065742, 1.6325269194382]
         ])
-        assert_allclose(gstd_actual, gstd_desired)
+        xp_assert_close(gstd_actual, gstd_desired)
 
-    def test_3d_array_axis_1(self):
-        gstd_actual = stats.gstd(self.array_3d, axis=1)
-        gstd_desired = np.array([
+    def test_3d_array_axis_1(self, xp):
+        x = xp.asarray(self.array_3d)
+        gstd_actual = stats.gstd(x, axis=1)
+        gstd_desired = xp.asarray([
             [3.118993630946, 2.275985934063, 1.933995977619, 1.742896469724],
             [1.271693593916, 1.254158641801, 1.238774141609, 1.225164057869]
         ])
-        assert_allclose(gstd_actual, gstd_desired)
+        xp_assert_close(gstd_actual, gstd_desired)
 
-    def test_3d_array_axis_2(self):
-        gstd_actual = stats.gstd(self.array_3d, axis=2)
-        gstd_desired = np.array([
+    def test_3d_array_axis_2(self, xp):
+        x = xp.asarray(self.array_3d)
+        gstd_actual = stats.gstd(x, axis=2)
+        gstd_desired = xp.asarray([
             [1.8242475707664, 1.2243686572447, 1.1318311657788],
             [1.0934830582351, 1.0724479791887, 1.0591498540749]
         ])
-        assert_allclose(gstd_actual, gstd_desired)
-
-    def test_masked_3d_array(self):
-        ma = np.ma.masked_where(self.array_3d > 16, self.array_3d)
-        message = "`gstd` support for masked array input was deprecated in..."
-        with pytest.warns(DeprecationWarning, match=message):
-            gstd_actual = stats.gstd(ma, axis=2)
-        gstd_desired = stats.gstd(self.array_3d, axis=2)
-        mask = [[0, 0, 0], [0, 1, 1]]
-        assert_allclose(gstd_actual, gstd_desired)
-        assert_equal(gstd_actual.mask, mask)
+        xp_assert_close(gstd_actual, gstd_desired)
 
 
 def test_binomtest():

From 7174af21527bb2b80dc8c4bdd50e2e7da7982ada Mon Sep 17 00:00:00 2001
From: zitongzhoueric <zitongzhou1999@gmail.com>
Date: Sun, 2 Feb 2025 14:41:48 -0800
Subject: [PATCH 40/63] TST: sparse: add tests for subscriptable types

---
 scipy/sparse/tests/test_base.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/scipy/sparse/tests/test_base.py b/scipy/sparse/tests/test_base.py
index e277ba47fc4a..26a073b7b779 100644
--- a/scipy/sparse/tests/test_base.py
+++ b/scipy/sparse/tests/test_base.py
@@ -29,6 +29,8 @@ class for generic tests" section.
         assert_array_almost_equal, assert_almost_equal, assert_,
         assert_allclose, suppress_warnings)
 
+from types import GenericAlias
+
 import scipy.linalg
 
 import scipy.sparse as sparse
@@ -4307,6 +4309,12 @@ def spcreator(cls, *args, **kwargs):
         with suppress_warnings() as sup:
             sup.filter(SparseEfficiencyWarning, "Changing the sparsity structure")
             return csr_matrix(*args, **kwargs)
+    
+    def test_spmatrix_subscriptable(self):
+        result = csr_matrix[np.int8]
+        assert isinstance(result, GenericAlias)
+        assert result.__origin__ is csr_matrix
+        assert result.__args__ == (np.int8,)
 
 
 TestCSRMatrix.init_class()
@@ -4842,6 +4850,12 @@ def test_large_dimensions_reshape(self):
         # Using __ne__ and nnz instead
         assert_((mat1.reshape((1001, 3000001), order='C') != mat2).nnz == 0)
         assert_((mat2.reshape((3000001, 1001), order='F') != mat1).nnz == 0)
+    
+    def test_sparray_subscriptable(self):
+        result = coo_array[np.int8, tuple[int]]
+        assert isinstance(result, GenericAlias)
+        assert result.__origin__ is coo_array
+        assert result.__args__ == (np.int8, tuple[int])
 
 
 class TestCOOMatrix(_MatrixMixin, TestCOO):

From 87241476bd8db6e88bd73584446532abd0e9f234 Mon Sep 17 00:00:00 2001
From: Dietrich Brunn <12721170+DietBru@users.noreply.github.com>
Date: Mon, 3 Feb 2025 00:15:19 +0100
Subject: [PATCH 41/63] BUG: signal.ShortTimeFFT: make attributes `win` and
 `dual_win` immutable (#22454)

* BUG: signal.ShortTimeFFT: make attributes win and dual_win immutable

Also, some minor docstr issues were fixed.
---
 scipy/signal/_short_time_fft.py | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/scipy/signal/_short_time_fft.py b/scipy/signal/_short_time_fft.py
index f50f58a08812..d43370a18307 100644
--- a/scipy/signal/_short_time_fft.py
+++ b/scipy/signal/_short_time_fft.py
@@ -301,7 +301,7 @@ class ShortTimeFFT:
     A typical STFT application is the creation of various types of time-frequency
     plots, often subsumed under the term "spectrogram". Note that this term is also
     used to explecitly refer to the absolute square of a STFT [11]_, as done in
-    `~ShortTimeFFT.spectrogram`.
+    :meth:`spectrogram`.
 
     The STFT can also be used for filtering and filter banks as discussed in [12]_.
 
@@ -432,8 +432,9 @@ def __init__(self, win: np.ndarray, hop: int, fs: float, *,
             raise ValueError("Parameter win must have finite entries!")
         if not (hop >= 1 and isinstance(hop, int | np.integer)):
             raise ValueError(f"Parameter {hop=} is not an integer >= 1!")
-        self._win, self._hop, self.fs = win, hop, fs
 
+        self._win, self._hop, self.fs = win, hop, fs
+        self.win.setflags(write=False)
         self.mfft = len(win) if mfft is None else mfft
 
         if dual_win is not None:
@@ -441,6 +442,7 @@ def __init__(self, win: np.ndarray, hop: int, fs: float, *,
                 raise ValueError(f"{dual_win.shape=} must equal {win.shape=}!")
             if not all(np.isfinite(dual_win)):
                 raise ValueError("Parameter dual_win must be a finite array!")
+            dual_win.setflags(write=False)
         self._dual_win = dual_win  # needs to be set before scaling
 
         if scale_to is not None:  # needs to be set before fft_mode
@@ -658,7 +660,7 @@ def from_win_equals_dual(
         Notes
         -----
         The set of all possible windows with identical dual is defined by the set of
-        linear constraints of Eq. :math:numref:`eq_STFT_EqualWindDualCond` in the
+        linear constraints of Eq. :math:numref:`eq_STFT_AllDualWinsCond` in the
         :ref:`tutorial_stft` section of the :ref:`user_guide`. There it is also
         derived that ``ShortTimeFFT.dual_win == ShortTimeFFT.m_pts * ShortTimeFFT.win``
         needs to hold for an STFT to be a unitary mapping.
@@ -779,6 +781,7 @@ def win(self) -> np.ndarray:
         """Window function as real- or complex-valued 1d array.
 
         This attribute is read-only, since `dual_win` depends on it.
+        To make this array immutable, its WRITEABLE flag is set to ``FALSE``.
 
         See Also
         --------
@@ -788,6 +791,7 @@ def win(self) -> np.ndarray:
         mfft: Length of input for the FFT used - may be larger than `m_num`.
         hop: ime increment in signal samples for sliding window.
         win: Window function as real- or complex-valued 1d array.
+        numpy.ndarray.setflags: Modify array flags.
         ShortTimeFFT: Class this property belongs to.
         """
         return self._win
@@ -1012,8 +1016,10 @@ def scale_to(self, scaling: Literal['magnitude', 'psd']):
 
         s_fac = self.fac_psd if scaling == 'psd' else self.fac_magnitude
         self._win = self._win * s_fac
+        self.win.setflags(write=False)
         if self._dual_win is not None:
             self._dual_win = self._dual_win / s_fac
+            self.dual_win.setflags(write=False)
         self._fac_mag, self._fac_psd = None, None  # reset scaling factors
         self._scaling = scaling
 
@@ -1398,6 +1404,7 @@ def dual_win(self) -> np.ndarray:
 
         If the dual window cannot be calculated a ``ValueError`` is raised.
         This attribute is read only and calculated lazily.
+        To make this array immutable, its WRITEABLE flag is set to ``FALSE``.
 
         See Also
         --------
@@ -1405,10 +1412,12 @@ def dual_win(self) -> np.ndarray:
         win: Window function as real- or complex-valued 1d array.
         from_win_equals_dual: Create instance where `win` and `dual_win` are equal.
         closest_STFT_dual_window: Calculate dual window closest to a desired window.
+        numpy.ndarray.setflags: Modify array flags.
         ShortTimeFFT: Class this property belongs to.
         """
         if self._dual_win is None:
             self._dual_win = _calc_dual_canonical_window(self.win, self.hop)
+            self.dual_win.setflags(write=False)
         return self._dual_win
 
     @property
@@ -2081,7 +2090,7 @@ def extent(self, n: int, axes_seq: Literal['tf', 'ft'] = 'tf',
         n : int
             Number of samples in input signal.
         axes_seq : {'tf', 'ft'}
-            Return time extent first and then frequency extent or vice-versa.
+            Return time extent first and then frequency extent or vice versa.
         center_bins: bool
             If set (default ``False``), the values of the time slots and
             frequency bins are moved from the side the middle. This is useful,

From 088174d100c88b2efe5c8dafeacf6d9977cb8bd8 Mon Sep 17 00:00:00 2001
From: Andrew Nelson <andyfaff@gmail.com>
Date: Mon, 3 Feb 2025 13:01:17 +1100
Subject: [PATCH 42/63] DOC: outline workers in optimize tutorial

---
 doc/source/tutorial/optimize.rst            | 82 +++++++++++++++++++++
 scipy/optimize/_differentiable_functions.py |  2 +
 scipy/optimize/_lbfgsb_py.py                |  2 +
 scipy/optimize/_optimize.py                 |  4 +-
 4 files changed, 89 insertions(+), 1 deletion(-)

diff --git a/doc/source/tutorial/optimize.rst b/doc/source/tutorial/optimize.rst
index ba25914d9d79..b9e32d155345 100644
--- a/doc/source/tutorial/optimize.rst
+++ b/doc/source/tutorial/optimize.rst
@@ -2002,3 +2002,85 @@ For more MILP tutorials, see the Jupyter notebooks on SciPy Cookbooks:
 
 - `Compressed Sensing l1 program <https://nbviewer.org/github/scipy/scipy-cookbook/blob/main/ipython/LinearAndMixedIntegerLinearProgramming/compressed_sensing_milp_tutorial_1.ipynb>`_
 - `Compressed Sensing l0 program <https://nbviewer.org/github/scipy/scipy-cookbook/blob/main/ipython/LinearAndMixedIntegerLinearProgramming/compressed_sensing_milp_tutorial_2.ipynb>`_
+
+
+Parallel execution support
+---------------------------
+
+Some SciPy optimization methods, such as :func:`differential_evolution` offer
+parallelization through the use of a ``workers`` keyword. Each optimizer is slightly
+different in how parallelization is employed, but there are common characteristics
+in what can be provided to ``workers``. These commonalities are described below.
+
+If an int is supplied then a :class:`multiprocessing.Pool` is created, with the
+object's :func:`map` method being used to evaluate solutions in parallel. With this
+approach it is mandatory that the objective function is pickleable. Lambda functions
+do not meet that requirement.
+
+::
+
+    >>> import numpy as np
+    >>> from scipy.optimize import rosen, differential_evolution, Bounds
+    >>> bnds = Bounds([0., 0., 0.], [10., 10., 10.])
+    >>> res = differential_evolution(rosen, bnds, workers=2)
+
+Alternatively map-like callables can be supplied as a worker. Here the map-like function
+iterates through the solution vectors, evaluating each one against the objective function.
+In the following example we use :class:`multiprocessing.Pool` again, the objective
+function still needs to be pickleable.
+
+::
+
+    >>> from multiprocessing import Pool
+    >>> with Pool(2) as pwl:
+    ...     res = differential_evolution(rosen, bnds, workers=pwl.map)
+
+It can be an advantage to use this pattern because the :obj:`~multiprocessing.Pool`
+can be re-used for further calculations - there is a significant amount of overhead in
+creating those objects. Alternatives to :class:`multiprocessing.Pool` include the
+`mpi4py <https://mpi4py.readthedocs.io/en/stable/>`_ package, which enables parallel
+processing on clusters.
+
+In Scipy 1.16.0 the ``workers`` keyword was introduced to selected :func:`minimize`
+methods. Here parallelization is typically applied during numerical differentiation.
+Either of the two approaches outlined above can be used, although it's strongly
+advised to supply the map-like callable due to the overhead of creating new processes.
+Performance gains will only be made if the objective function is expensive to
+calculate.
+
+::
+
+    >>> x0 = np.array([2.0, 3.0, 4.0, 5.0])
+    >>> with Pool(2) as pwl:
+    ...     res = minimize(rosen, x0, method='L-BFGS-B', options={'workers':pwl.map})
+    >>> res.x
+    array([0.99999903, 0.99999808, 0.99999614, 0.99999228])  # may vary
+
+If the objective function can be vectorized, then a map-like can be used to take
+advantage of vectorization during function evaluation.
+
+::
+
+    >>> def vectorized_maplike(fun, iterable):
+    ...     arr = np.array([i for i in iter(iterable)])
+    ...     arr_t = arr.T
+    ...     r = rosen(arr_t)
+    ...     return r
+    >>>
+    >>> res = minimize(rosen, x0, method='L-BFGS-B', options={'workers':vectorized_maplike})
+
+There are several important points to note about this example:
+
+* The iterable is first converted to an iterator, before being made into an array via
+  a list comprehension. This allows the iterable to be a generator, list, array, etc.
+* The calculation is done using ``rosen`` instead of using ``fun``. The map-like is
+  actually supplied with a wrapped version of the objective function. The wrapping
+  is used to detect various types of error, including checking whether the objective
+  function returns a scalar. If ``fun`` is used then a :class:`RuntimeError` will
+  result, because ``fun(arr_t)`` will be a 1-D array and not a scalar. We therefore use
+  ``rosen`` directly.
+* ``arr.T`` is sent to the objective function. This is because `arr.shape` will be
+  `(S, N)`, where `S` is the number of solution vectors to evaluate and `N` is the
+  number of variables. For ``rosen`` vectorization occurs on `(N, S)` shaped arrays.
+* This approach is not needed for :func:`differential_evolution` as that minimizer
+  already has a keyword for vectorization.
diff --git a/scipy/optimize/_differentiable_functions.py b/scipy/optimize/_differentiable_functions.py
index 17c0e7257cb1..46d38254a0d1 100644
--- a/scipy/optimize/_differentiable_functions.py
+++ b/scipy/optimize/_differentiable_functions.py
@@ -192,6 +192,8 @@ class ScalarFunction:
         calls to `approx_derivative` will incur large overhead from setting up
         new processes.
 
+        .. versionadded:: 1.16.0
+
     Notes
     -----
     This class implements a memoization logic. There are methods `fun`,
diff --git a/scipy/optimize/_lbfgsb_py.py b/scipy/optimize/_lbfgsb_py.py
index c90bc609270f..e48cb266379b 100644
--- a/scipy/optimize/_lbfgsb_py.py
+++ b/scipy/optimize/_lbfgsb_py.py
@@ -350,6 +350,8 @@ def _minimize_lbfgsb(fun, x0, args=(), jac=None, bounds=None,
         any numerical differentiation in parallel.
         This evaluation is carried out as ``workers(fun, iterable)``.
 
+        .. versionadded:: 1.16.0
+
     Notes
     -----
     The option `ftol` is exposed via the `scipy.optimize.minimize` interface,
diff --git a/scipy/optimize/_optimize.py b/scipy/optimize/_optimize.py
index 8157b8257f46..0b45d26438ea 100644
--- a/scipy/optimize/_optimize.py
+++ b/scipy/optimize/_optimize.py
@@ -255,7 +255,7 @@ def _prepare_scalar_function(fun, x0, jac=None, args=(), bounds=None,
         Whenever the gradient is estimated via finite-differences, the Hessian
         cannot be estimated with options {'2-point', '3-point', 'cs'} and needs
         to be estimated using one of the quasi-Newton strategies.
-   workers : int or map-like callable, optional
+    workers : int or map-like callable, optional
         A map-like callable, such as `multiprocessing.Pool.map` for evaluating
         any numerical differentiation in parallel.
         This evaluation is carried out as ``workers(fun, iterable)``, or
@@ -269,6 +269,8 @@ def _prepare_scalar_function(fun, x0, jac=None, args=(), bounds=None,
         calls to `approx_derivative` will incur large overhead from setting up
         new processes.
 
+        .. versionadded:: 1.16.0
+
     Returns
     -------
     sf : ScalarFunction

From 52c44732445d2dd308fe0c2e67d094989f11bc32 Mon Sep 17 00:00:00 2001
From: Andrew Nelson <andyfaff@gmail.com>
Date: Mon, 3 Feb 2025 15:21:41 +1100
Subject: [PATCH 43/63] DOC: workers

---
 doc/source/tutorial/optimize.rst | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/doc/source/tutorial/optimize.rst b/doc/source/tutorial/optimize.rst
index b9e32d155345..d3858405da28 100644
--- a/doc/source/tutorial/optimize.rst
+++ b/doc/source/tutorial/optimize.rst
@@ -2012,10 +2012,10 @@ parallelization through the use of a ``workers`` keyword. Each optimizer is slig
 different in how parallelization is employed, but there are common characteristics
 in what can be provided to ``workers``. These commonalities are described below.
 
-If an int is supplied then a :class:`multiprocessing.Pool` is created, with the
-object's :func:`map` method being used to evaluate solutions in parallel. With this
-approach it is mandatory that the objective function is pickleable. Lambda functions
-do not meet that requirement.
+If an int is supplied then a :class:`multiprocessing.Pool <multiprocessing.pool.Pool>` is
+created, with the object's :func:`map` method being used to evaluate solutions in
+parallel. With this approach it is mandatory that the objective function is pickleable.
+Lambda functions do not meet that requirement.
 
 ::
 
@@ -2026,8 +2026,8 @@ do not meet that requirement.
 
 Alternatively map-like callables can be supplied as a worker. Here the map-like function
 iterates through the solution vectors, evaluating each one against the objective function.
-In the following example we use :class:`multiprocessing.Pool` again, the objective
-function still needs to be pickleable.
+In the following example we use :class:`multiprocessing.Pool <multiprocessing.pool.Pool>`
+again, the objective function still needs to be pickleable.
 
 ::
 
@@ -2035,9 +2035,9 @@ function still needs to be pickleable.
     >>> with Pool(2) as pwl:
     ...     res = differential_evolution(rosen, bnds, workers=pwl.map)
 
-It can be an advantage to use this pattern because the :obj:`~multiprocessing.Pool`
-can be re-used for further calculations - there is a significant amount of overhead in
-creating those objects. Alternatives to :class:`multiprocessing.Pool` include the
+It can be an advantage to use this pattern because the Pool can be re-used for further
+calculations - there is a significant amount of overhead in creating those objects.
+Alternatives to :class:`multiprocessing.Pool <multiprocessing.pool.Pool>` include the
 `mpi4py <https://mpi4py.readthedocs.io/en/stable/>`_ package, which enables parallel
 processing on clusters.
 
@@ -2079,8 +2079,8 @@ There are several important points to note about this example:
   function returns a scalar. If ``fun`` is used then a :class:`RuntimeError` will
   result, because ``fun(arr_t)`` will be a 1-D array and not a scalar. We therefore use
   ``rosen`` directly.
-* ``arr.T`` is sent to the objective function. This is because `arr.shape` will be
-  `(S, N)`, where `S` is the number of solution vectors to evaluate and `N` is the
-  number of variables. For ``rosen`` vectorization occurs on `(N, S)` shaped arrays.
+* ``arr.T`` is sent to the objective function. This is because ``arr.shape == (S, N)``,
+  where ``S`` is the number of solution vectors to evaluate and ``N`` is the number of
+  variables. For ``rosen`` vectorization occurs on ``(N, S)`` shaped arrays.
 * This approach is not needed for :func:`differential_evolution` as that minimizer
   already has a keyword for vectorization.

From 244fa938353b24e90a61b6ab25c7e4607ee67054 Mon Sep 17 00:00:00 2001
From: Andrew Nelson <andyfaff@gmail.com>
Date: Mon, 3 Feb 2025 16:23:43 +1100
Subject: [PATCH 44/63] DOC: workers

---
 doc/source/tutorial/optimize.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/tutorial/optimize.rst b/doc/source/tutorial/optimize.rst
index d3858405da28..f2747f5ab1c9 100644
--- a/doc/source/tutorial/optimize.rst
+++ b/doc/source/tutorial/optimize.rst
@@ -2033,7 +2033,7 @@ again, the objective function still needs to be pickleable.
 
     >>> from multiprocessing import Pool
     >>> with Pool(2) as pwl:
-    ...     res = differential_evolution(rosen, bnds, workers=pwl.map)
+    ...     res = differential_evolution(rosen, bnds, workers=pwl.map, updating='deferred')
 
 It can be an advantage to use this pattern because the Pool can be re-used for further
 calculations - there is a significant amount of overhead in creating those objects.

From a6cb7a7e3cef43d0ca74bb0a69378edb43c20497 Mon Sep 17 00:00:00 2001
From: zitongzhoueric <zitongzhou1999@gmail.com>
Date: Mon, 3 Feb 2025 15:33:03 -0800
Subject: [PATCH 45/63] add single type parameter tests for sparray

---
 scipy/sparse/tests/test_base.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/scipy/sparse/tests/test_base.py b/scipy/sparse/tests/test_base.py
index 26a073b7b779..99c0fb8c75c7 100644
--- a/scipy/sparse/tests/test_base.py
+++ b/scipy/sparse/tests/test_base.py
@@ -4857,6 +4857,11 @@ def test_sparray_subscriptable(self):
         assert result.__origin__ is coo_array
         assert result.__args__ == (np.int8, tuple[int])
 
+        result = coo_array[np.int8]
+        assert isinstance(result, GenericAlias)
+        assert result.__origin__ is coo_array
+        assert result.__args__ == (np.int8,)
+
 
 class TestCOOMatrix(_MatrixMixin, TestCOO):
     spcreator = coo_matrix

From 232e5f11fcbed600ba2e0fda4bce0366a4d31113 Mon Sep 17 00:00:00 2001
From: Andrew Nelson <andyfaff@gmail.com>
Date: Tue, 4 Feb 2025 14:20:56 +1100
Subject: [PATCH 46/63] DOC: workers

---
 doc/source/tutorial/optimize.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/tutorial/optimize.rst b/doc/source/tutorial/optimize.rst
index f2747f5ab1c9..88525270ace3 100644
--- a/doc/source/tutorial/optimize.rst
+++ b/doc/source/tutorial/optimize.rst
@@ -2022,7 +2022,7 @@ Lambda functions do not meet that requirement.
     >>> import numpy as np
     >>> from scipy.optimize import rosen, differential_evolution, Bounds
     >>> bnds = Bounds([0., 0., 0.], [10., 10., 10.])
-    >>> res = differential_evolution(rosen, bnds, workers=2)
+    >>> res = differential_evolution(rosen, bnds, workers=2, updating='deferred')
 
 Alternatively map-like callables can be supplied as a worker. Here the map-like function
 iterates through the solution vectors, evaluating each one against the objective function.

From eecc6dac424deb06c7c979a4160b9fde34856a91 Mon Sep 17 00:00:00 2001
From: Matt Haberland <mhaberla@calpoly.edu>
Date: Tue, 4 Feb 2025 04:48:10 -0800
Subject: [PATCH 47/63] ENH: stats.pearsonr: two simple (but substantial)
 efficiency improvements (#22462)

* ENH: stats.pearsonr: two simple but substantial efficiency improvements

* MAINT: stats.pearsonr: adjustments per review
---
 scipy/stats/_stats_py.py        | 22 +++++++++++++++-------
 scipy/stats/tests/test_stats.py | 26 +++++++++++++++++++++++++-
 2 files changed, 40 insertions(+), 8 deletions(-)

diff --git a/scipy/stats/_stats_py.py b/scipy/stats/_stats_py.py
index ff52ff380ff0..43989c823c4e 100644
--- a/scipy/stats/_stats_py.py
+++ b/scipy/stats/_stats_py.py
@@ -4556,6 +4556,20 @@ def pearsonr(x, y, *, alternative='two-sided', method=None, axis=0):
         raise ValueError('`axis` must be an integer.')
     axis = axis_int
 
+    try:
+        np.broadcast_shapes(x.shape, y.shape)
+        # For consistency with other `stats` functions, we need to
+        # match the dimensionalities before looking at `axis`.
+        # (Note: this is not the NEP 5 / gufunc order of operations;
+        #  see TestPearsonr::test_different_dimensionality for more information.)
+        ndim = max(x.ndim, y.ndim)
+        x = xp.reshape(x, (1,) * (ndim - x.ndim) + x.shape)
+        y = xp.reshape(y, (1,) * (ndim - y.ndim) + y.shape)
+
+    except (ValueError, RuntimeError) as e:
+        message = '`x` and `y` must be broadcastable.'
+        raise ValueError(message) from e
+
     n = x.shape[axis]
     if n != y.shape[axis]:
         raise ValueError('`x` and `y` must have the same length along `axis`.')
@@ -4563,12 +4577,6 @@ def pearsonr(x, y, *, alternative='two-sided', method=None, axis=0):
     if n < 2:
         raise ValueError('`x` and `y` must have length at least 2.')
 
-    try:
-        x, y = xp.broadcast_arrays(x, y)
-    except (ValueError, RuntimeError) as e:
-        message = '`x` and `y` must be broadcastable.'
-        raise ValueError(message) from e
-
     # `moveaxis` only recently added to array API, so it's not yey available in
     # array_api_strict. Replace with e.g. `xp.moveaxis(x, axis, -1)` when available.
     x = xp_moveaxis_to_end(x, axis, xp=xp)
@@ -4658,7 +4666,7 @@ def statistic(x, y, axis):
         warnings.warn(stats.NearConstantInputWarning(msg), stacklevel=2)
 
     with np.errstate(invalid='ignore', divide='ignore'):
-        r = xp.sum(xm/normxm * ym/normym, axis=axis)
+        r = xp.vecdot(xm / normxm, ym / normym, axis=axis)
 
     # Presumably, if abs(r) > 1, then it is only some small artifact of
     # floating point arithmetic.
diff --git a/scipy/stats/tests/test_stats.py b/scipy/stats/tests/test_stats.py
index ebb6a294901e..61a360ba947c 100644
--- a/scipy/stats/tests/test_stats.py
+++ b/scipy/stats/tests/test_stats.py
@@ -597,11 +597,17 @@ def test_length3_r_exactly_negative_one(self, xp):
     @pytest.mark.skip_xp_backends(np_only=True)
     def test_input_validation(self, xp):
         x = [1, 2, 3]
-        y = [4, 5]
+        y = [4]
         message = '`x` and `y` must have the same length along `axis`.'
         with pytest.raises(ValueError, match=message):
             stats.pearsonr(x, y)
 
+        x = [1, 2, 3]
+        y = [4, 5]
+        message = '`x` and `y` must be broadcastable.'
+        with pytest.raises(ValueError, match=message):
+            stats.pearsonr(x, y)
+
         x = [1]
         y = [2]
         message = '`x` and `y` must have length at least 2.'
@@ -764,6 +770,24 @@ def test_nd_special_cases(self, xp):
         xp_assert_close(ci.low, -ones)
         xp_assert_close(ci.high, ones)
 
+    def test_different_dimensionality(self, xp):
+        # For better or for worse, there is one difference between the broadcasting
+        # behavior of most stats functions and NumPy gufuncs / NEP 5: gufuncs `axis`
+        # refers to the core dimension *before* prepending `1`s to the array shapes
+        # to match dimensionality; SciPy's prepends `1`s first. For instance, in
+        # SciPy, `vecdot` would work just like `xp.sum(x * y, axis=axis)`, but this
+        # is NOT true of NumPy. The discrepancy only arises when there are multiple
+        # arguments with different dimensionality and positive indices are used,
+        # which is probably why it hasn't been a problem. There are pros and cons of
+        # each convention, and we might want to consider changing our behavior in
+        # SciPy 2.0. For now, preserve consistency / backward compatibility.
+        rng = np.random.default_rng(45834598265019344)
+        x = rng.random((3, 10))
+        y = rng.random(10)
+        res = stats.pearsonr(x, y, axis=1)
+        ref = stats.pearsonr(x, y, axis=-1)
+        assert_equal(res.statistic, ref.statistic)
+
     @skip_xp_backends('jax.numpy',
                       reason='JAX arrays do not support item assignment')
     @pytest.mark.parametrize('axis', [0, 1, None])

From 59157e374b31142b1cda1ac4264da9535cbdd1dd Mon Sep 17 00:00:00 2001
From: Pamphile Roy <23188539+tupui@users.noreply.github.com>
Date: Tue, 4 Feb 2025 20:40:04 +0100
Subject: [PATCH 48/63] DOC: stats: update Halton docs (#22463)

---
 scipy/stats/_qmc.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/scipy/stats/_qmc.py b/scipy/stats/_qmc.py
index 3287dceaca83..674a48342888 100644
--- a/scipy/stats/_qmc.py
+++ b/scipy/stats/_qmc.py
@@ -1117,14 +1117,16 @@ class Halton(QMCEngine):
     Pseudo-random number generator that generalize the Van der Corput sequence
     for multiple dimensions. The Halton sequence uses the base-two Van der
     Corput sequence for the first dimension, base-three for its second and
-    base-:math:`n` for its n-dimension.
+    base-:math:`p` for its :math:`n`-dimension, with :math:`p` the
+    :math:`n`'th prime.
 
     Parameters
     ----------
     d : int
         Dimension of the parameter space.
     scramble : bool, optional
-        If True, use Owen scrambling. Otherwise no scrambling is done.
+        If True, use random scrambling from [2]_. Otherwise no scrambling
+        is done.
         Default is True.
     optimization : {None, "random-cd", "lloyd"}, optional
         Whether to use an optimization scheme to improve the quality after

From a3d299fc1546ac151b0f488e9aad16af3ddb331b Mon Sep 17 00:00:00 2001
From: Lucas Colley <lucas.colley8@gmail.com>
Date: Tue, 4 Feb 2025 21:53:49 +0000
Subject: [PATCH 49/63] MAINT/TST: address nits from Dask PR (#22467)

---
 scipy/integrate/tests/test_cubature.py | 31 ++++++--------------------
 scipy/ndimage/_measurements.py         |  6 -----
 scipy/signal/_filter_design.py         |  6 -----
 scipy/signal/_signaltools.py           |  2 --
 scipy/stats/tests/test_stats.py        | 13 ++++++-----
 5 files changed, 14 insertions(+), 44 deletions(-)

diff --git a/scipy/integrate/tests/test_cubature.py b/scipy/integrate/tests/test_cubature.py
index c6732f781192..691eb213c591 100644
--- a/scipy/integrate/tests/test_cubature.py
+++ b/scipy/integrate/tests/test_cubature.py
@@ -21,6 +21,7 @@
 )
 
 skip_xp_backends = pytest.mark.skip_xp_backends
+boolean_index_skip_reason = 'JAX/Dask arrays do not support boolean assignment.'
 
 # The integrands ``genz_malik_1980_*`` come from the paper:
 #   A.C. Genz, A.A. Malik, Remarks on algorithm 006: An adaptive algorithm for
@@ -967,14 +968,8 @@ def test_break_points(self, problem, rule, rtol, atol, xp):
                    f"true_error={xp.abs(res.estimate - exact)}")
         assert res.status == "converged", err_msg
 
-    @skip_xp_backends(
-        "jax.numpy",
-        reason="transforms make use of indexing assignment",
-    )
-    @skip_xp_backends(
-        "dask.array",
-        reason="transforms make use of boolean index assignment"
-    )
+    @pytest.mark.skip_xp_backends('jax.numpy', reason=boolean_index_skip_reason)
+    @pytest.mark.skip_xp_backends('dask.array', reason=boolean_index_skip_reason)
     @pytest.mark.parametrize("problem", [
         (
             # Function to integrate
@@ -1121,14 +1116,8 @@ def test_infinite_limits(self, problem, rule, rtol, atol, xp):
             check_0d=False,
         )
 
-    @skip_xp_backends(
-        "jax.numpy",
-        reason="transforms make use of indexing assignment",
-    )
-    @skip_xp_backends(
-        "dask.array",
-        reason="transforms make use of boolean index assignment"
-    )
+    @pytest.mark.skip_xp_backends('jax.numpy', reason=boolean_index_skip_reason)
+    @pytest.mark.skip_xp_backends('dask.array', reason=boolean_index_skip_reason)
     @pytest.mark.parametrize("problem", [
         (
             # Function to integrate
@@ -1332,14 +1321,8 @@ def test_genz_malik_1d_raises_error(self, xp):
             GenzMalikCubature(1, xp=xp)
 
 
-@skip_xp_backends(
-    "jax.numpy",
-    reason="transforms make use of indexing assignment",
-)
-@skip_xp_backends(
-    "dask.array",
-    reason="transforms make use of boolean index assignment"
-)
+@pytest.mark.skip_xp_backends('jax.numpy', reason=boolean_index_skip_reason)
+@pytest.mark.skip_xp_backends('dask.array', reason=boolean_index_skip_reason)
 class TestTransformations:
     @pytest.mark.parametrize(("a", "b", "points"), [
         (
diff --git a/scipy/ndimage/_measurements.py b/scipy/ndimage/_measurements.py
index 28629171f9b7..c52f7f8d1a49 100644
--- a/scipy/ndimage/_measurements.py
+++ b/scipy/ndimage/_measurements.py
@@ -630,9 +630,6 @@ def single_group(vals):
     if labels is None:
         return single_group(input)
 
-    # manually cast to numpy
-    # since libaries (e.g. dask) that implement __array_function__
-    # will not return a numpy array from broadcast_arrays
     labels = np.asarray(labels)
     # ensure input and labels match sizes
     input, labels = np.broadcast_arrays(input, labels)
@@ -948,9 +945,6 @@ def single_group(vals, positions):
     if labels is None:
         return single_group(input, positions)
 
-    # manually cast to numpy
-    # since libaries (e.g. dask) that implement __array_function__
-    # will not return a numpy array from broadcast_arrays
     labels = np.asarray(labels)
     # ensure input and labels match sizes
     input, labels = np.broadcast_arrays(input, labels)
diff --git a/scipy/signal/_filter_design.py b/scipy/signal/_filter_design.py
index ff0bf93dfa78..39a7a039c5d7 100644
--- a/scipy/signal/_filter_design.py
+++ b/scipy/signal/_filter_design.py
@@ -727,10 +727,6 @@ def group_delay(system, w=512, whole=False, fs=2*pi):
 
 def _validate_sos(sos):
     """Helper to validate a SOS input"""
-    # manually cast to numpy array
-    # since libs like dask implement __array_function__
-    # (and will return a dask array instead of casting to
-    # ndarray in atleast_2d)
     sos = np.asarray(sos)
     sos = np.atleast_2d(sos)
     if sos.ndim != 2:
@@ -1779,8 +1775,6 @@ def normalize(b, a):
     """
     num, den = b, a
 
-    # cast to numpy by hand to avoid libraries like dask
-    # trying to dispatch this function via NEP 18
     den = np.asarray(den)
     den = np.atleast_1d(den)
     num = np.atleast_2d(_align_nums(num))
diff --git a/scipy/signal/_signaltools.py b/scipy/signal/_signaltools.py
index 3e0065fc78a1..411093ac6887 100644
--- a/scipy/signal/_signaltools.py
+++ b/scipy/signal/_signaltools.py
@@ -4121,8 +4121,6 @@ def detrend(data: np.ndarray, axis: int = -1,
     else:
         dshape = data.shape
         N = dshape[axis]
-        # Manually cast to numpy to prevent
-        # NEP18 dispatching for libraries like dask
         bp = np.asarray(bp)
         bp = np.sort(np.unique(np.concatenate(np.atleast_1d(0, bp, N))))
         if np.any(bp > N):
diff --git a/scipy/stats/tests/test_stats.py b/scipy/stats/tests/test_stats.py
index 61a360ba947c..1c20ee39121b 100644
--- a/scipy/stats/tests/test_stats.py
+++ b/scipy/stats/tests/test_stats.py
@@ -45,6 +45,7 @@
 from scipy._lib._array_api_no_0d import xp_assert_close, xp_assert_equal
 
 skip_xp_backends = pytest.mark.skip_xp_backends
+boolean_index_skip_reason = 'JAX/Dask arrays do not support boolean assignment.'
 
 
 """ Numbers in docstrings beginning with 'W' refer to the section numbers
@@ -3749,8 +3750,8 @@ def test_precision_loss_gh15554(self, xp):
             a[:, 0] = 1.01
             stats.skew(a)
 
-    @skip_xp_backends('jax.numpy', reason="JAX arrays do not support item assignment")
-    @skip_xp_backends('dask.array', reason='boolean index assignment')
+    @pytest.mark.skip_xp_backends('jax.numpy', reason=boolean_index_skip_reason)
+    @pytest.mark.skip_xp_backends('dask.array', reason=boolean_index_skip_reason)
     @pytest.mark.parametrize('axis', [-1, 0, 2, None])
     @pytest.mark.parametrize('bias', [False, True])
     def test_vectorization(self, xp, axis, bias):
@@ -3844,8 +3845,8 @@ def test_kurtosis_constant_value(self, xp):
             assert xp.isnan(stats.kurtosis(a / float(2**50), fisher=False))
             assert xp.isnan(stats.kurtosis(a, fisher=False, bias=False))
 
-    @skip_xp_backends('jax.numpy', reason='JAX arrays do not support item assignment')
-    @skip_xp_backends('dask.array', reason='boolean index assignment')
+    @pytest.mark.skip_xp_backends('jax.numpy', reason=boolean_index_skip_reason)
+    @pytest.mark.skip_xp_backends('dask.array', reason=boolean_index_skip_reason)
     @pytest.mark.parametrize('axis', [-1, 0, 2, None])
     @pytest.mark.parametrize('bias', [False, True])
     @pytest.mark.parametrize('fisher', [False, True])
@@ -9572,8 +9573,8 @@ def test_complex_gh22404(self, xp):
         xp_assert_close(res, xp.asarray(ref))
 
 
-@skip_xp_backends('jax.numpy', reason='JAX arrays do not support item assignment')
-@skip_xp_backends('dask.array', reason='boolean index assignment')
+@pytest.mark.skip_xp_backends('jax.numpy', reason=boolean_index_skip_reason)
+@pytest.mark.skip_xp_backends('dask.array', reason=boolean_index_skip_reason)
 class TestXP_Var:
     @pytest.mark.parametrize('axis', [None, 1, -1, (-2, 2)])
     @pytest.mark.parametrize('keepdims', [False, True])

From 76acada4eae8490667ad94052d7f4fbbc27b96b3 Mon Sep 17 00:00:00 2001
From: "Christine P. Chai" <star1327p@gmail.com>
Date: Tue, 4 Feb 2025 17:09:16 -0800
Subject: [PATCH 50/63] DOC: Prevent A@x=b from becoming a URL (#22464)

* DOC: Prevent A@x=b from becoming a URL

* Update _interface.py [docs only]

Co-authored-by: Jake Bowhay <60778417+j-bowhay@users.noreply.github.com>

* Update _interface.py [docs only]

Co-authored-by: Jake Bowhay <60778417+j-bowhay@users.noreply.github.com>

* Update _interface.py

[docs only]

---------

Co-authored-by: Jake Bowhay <60778417+j-bowhay@users.noreply.github.com>
---
 scipy/sparse/linalg/_interface.py | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/scipy/sparse/linalg/_interface.py b/scipy/sparse/linalg/_interface.py
index 54918b5a5439..7e8a765e2b6c 100644
--- a/scipy/sparse/linalg/_interface.py
+++ b/scipy/sparse/linalg/_interface.py
@@ -55,14 +55,14 @@
 class LinearOperator:
     """Common interface for performing matrix vector products
 
-    Many iterative methods (e.g. cg, gmres) do not need to know the
-    individual entries of a matrix to solve a linear system A@x=b.
+    Many iterative methods (e.g. `cg`, `gmres`) do not need to know the
+    individual entries of a matrix to solve a linear system ``A@x = b``.
     Such solvers only require the computation of matrix vector
-    products, A@v where v is a dense vector.  This class serves as
+    products, ``A@v`` where ``v`` is a dense vector.  This class serves as
     an abstract interface between iterative solvers and matrix-like
     objects.
 
-    To construct a concrete LinearOperator, either pass appropriate
+    To construct a concrete `LinearOperator`, either pass appropriate
     callables to the constructor of this class, or subclass it.
 
     A subclass must implement either one of the methods ``_matvec``
@@ -82,17 +82,17 @@ class LinearOperator:
     Parameters
     ----------
     shape : tuple
-        Matrix dimensions (M, N).
+        Matrix dimensions ``(M, N)``.
     matvec : callable f(v)
-        Returns returns A @ v.
+        Returns returns ``A @ v``.
     rmatvec : callable f(v)
-        Returns A^H @ v, where A^H is the conjugate transpose of A.
+        Returns ``A^H @ v``, where ``A^H`` is the conjugate transpose of ``A``.
     matmat : callable f(V)
-        Returns A @ V, where V is a dense matrix with dimensions (N, K).
+        Returns ``A @ V``, where ``V`` is a dense matrix with dimensions ``(N, K)``.
     dtype : dtype
         Data type of the matrix.
     rmatmat : callable f(V)
-        Returns A^H @ V, where V is a dense matrix with dimensions (M, K).
+        Returns ``A^H @ V``, where ``V`` is a dense matrix with dimensions ``(M, K)``.
 
     Attributes
     ----------
@@ -108,17 +108,17 @@ class LinearOperator:
 
     Notes
     -----
-    The user-defined matvec() function must properly handle the case
-    where v has shape (N,) as well as the (N,1) case.  The shape of
-    the return type is handled internally by LinearOperator.
+    The user-defined `matvec` function must properly handle the case
+    where ``v`` has shape ``(N,)`` as well as the ``(N,1)`` case.  The shape of
+    the return type is handled internally by `LinearOperator`.
 
     It is highly recommended to explicitly specify the `dtype`, otherwise
     it is determined automatically at the cost of a single matvec application
-    on `int8` zero vector using the promoted `dtype` of the output.
-    Python `int` could be difficult to automatically cast to numpy integers
+    on ``int8`` zero vector using the promoted `dtype` of the output.
+    Python ``int`` could be difficult to automatically cast to numpy integers
     in the definition of the `matvec` so the determination may be inaccurate.
     It is assumed that `matmat`, `rmatvec`, and `rmatmat` would result in
-    the same dtype of the output given an `int8` input as `matvec`.
+    the same dtype of the output given an ``int8`` input as `matvec`.
 
     LinearOperator instances can also be multiplied, added with each
     other and exponentiated, all lazily: the result of these operations

From 4ca431de22985756fe5f31342d0f1d7139f30a1e Mon Sep 17 00:00:00 2001
From: Andrew Nelson <andyfaff@gmail.com>
Date: Wed, 5 Feb 2025 17:06:06 +1100
Subject: [PATCH 51/63] DOC: address vectorization feedback

---
 doc/source/tutorial/optimize.rst | 55 +++++++++++++++++++++++---------
 1 file changed, 40 insertions(+), 15 deletions(-)

diff --git a/doc/source/tutorial/optimize.rst b/doc/source/tutorial/optimize.rst
index 88525270ace3..315ad8069a2c 100644
--- a/doc/source/tutorial/optimize.rst
+++ b/doc/source/tutorial/optimize.rst
@@ -2008,9 +2008,28 @@ Parallel execution support
 ---------------------------
 
 Some SciPy optimization methods, such as :func:`differential_evolution` offer
-parallelization through the use of a ``workers`` keyword. Each optimizer is slightly
-different in how parallelization is employed, but there are common characteristics
-in what can be provided to ``workers``. These commonalities are described below.
+parallelization through the use of a ``workers`` keyword.
+
+For :func:`differential_evolution` there are two loops (iteration) levels in the
+algorithm. The outer loop represents successive generations of a population. This
+loop can't be parallelized. For a given generation candidate solutions are generated
+that have to be compared against existing population members. The fitness of the
+candidate solution can be done in a loop, but it's also possible to parallelize the
+calculation.
+
+Parallelization is also possible in other optimization algorithms. For example in
+various :func:`minimize` methods numerical differentiation is used to estimate
+derivatives. For a simple gradient calculation using two-point forward differences a
+total of ``N + 1`` objective function calculations have to be done, where ``N`` is the
+number of parameters. These are just small perturbations around a given location
+(the +1). Those ``N + 1`` calculations are also parallelizable. The calculation of
+numerical derivatives are used by the minimization algorithm to generate new steps.
+
+Each optimization algorithm is quite different in how they work, but they often have
+locations where multiple objective function calculations are required before the
+algorithm does something else. Those locations are what can be parallelized.
+There are therefore common characteristics in how ``workers`` is used. These
+commonalities are described below.
 
 If an int is supplied then a :class:`multiprocessing.Pool <multiprocessing.pool.Pool>` is
 created, with the object's :func:`map` method being used to evaluate solutions in
@@ -2024,10 +2043,12 @@ Lambda functions do not meet that requirement.
     >>> bnds = Bounds([0., 0., 0.], [10., 10., 10.])
     >>> res = differential_evolution(rosen, bnds, workers=2, updating='deferred')
 
-Alternatively map-like callables can be supplied as a worker. Here the map-like function
-iterates through the solution vectors, evaluating each one against the objective function.
+It is also possible to use a map-like callable as a worker. Here the map-like function
+is provided with a series of vectors that the optimization algorithm provides.
+The map-like function needs to evaluate each vector against the objective function.
 In the following example we use :class:`multiprocessing.Pool <multiprocessing.pool.Pool>`
-again, the objective function still needs to be pickleable.
+as the map-like. As before, the objective function still needs to be pickleable.
+This example is semantically identical to the previous example.
 
 ::
 
@@ -2057,30 +2078,34 @@ calculate.
     array([0.99999903, 0.99999808, 0.99999614, 0.99999228])  # may vary
 
 If the objective function can be vectorized, then a map-like can be used to take
-advantage of vectorization during function evaluation.
+advantage of vectorization during function evaluation. Vectorization means that the
+objective function can carry out the required calculations in a single (rather than
+multiple) call, which is typically very efficient.
 
 ::
 
     >>> def vectorized_maplike(fun, iterable):
-    ...     arr = np.array([i for i in iter(iterable)])
-    ...     arr_t = arr.T
-    ...     r = rosen(arr_t)
+    ...     arr = np.array([i for i in iter(iterable)])   # arr.shape = (S, N)
+    ...     arr_t = arr.T                                 # arr_t.shape = (N, S)
+    ...     r = rosen(arr_t)                              # calculation vectorized over S
     ...     return r
     >>>
     >>> res = minimize(rosen, x0, method='L-BFGS-B', options={'workers':vectorized_maplike})
 
 There are several important points to note about this example:
 
+* The iterable represents the series of parameter vectors that the algorithm wishes
+  to be evaluated.
 * The iterable is first converted to an iterator, before being made into an array via
   a list comprehension. This allows the iterable to be a generator, list, array, etc.
 * The calculation is done using ``rosen`` instead of using ``fun``. The map-like is
   actually supplied with a wrapped version of the objective function. The wrapping
-  is used to detect various types of error, including checking whether the objective
-  function returns a scalar. If ``fun`` is used then a :class:`RuntimeError` will
-  result, because ``fun(arr_t)`` will be a 1-D array and not a scalar. We therefore use
-  ``rosen`` directly.
+  is used to detect various types of common user errors, including checking whether
+  the objective function returns a scalar. If ``fun`` is used then a
+  :class:`RuntimeError` will result, because ``fun(arr_t)`` will be a 1-D array and not
+  a scalar. We therefore use ``rosen`` directly.
 * ``arr.T`` is sent to the objective function. This is because ``arr.shape == (S, N)``,
-  where ``S`` is the number of solution vectors to evaluate and ``N`` is the number of
+  where ``S`` is the number of parameter vectors to evaluate and ``N`` is the number of
   variables. For ``rosen`` vectorization occurs on ``(N, S)`` shaped arrays.
 * This approach is not needed for :func:`differential_evolution` as that minimizer
   already has a keyword for vectorization.

From 4ae4626ba51548c8c9ca56b806116e53ea85a552 Mon Sep 17 00:00:00 2001
From: Andrew Nelson <andyfaff@gmail.com>
Date: Thu, 6 Feb 2025 14:06:48 +1100
Subject: [PATCH 52/63] MAINT: optimize callback with fixed variables

---
 scipy/optimize/_minimize.py           | 28 ++++++++++++++++++++++++++-
 scipy/optimize/tests/test_optimize.py | 25 ++++++++++++------------
 2 files changed, 40 insertions(+), 13 deletions(-)

diff --git a/scipy/optimize/_minimize.py b/scipy/optimize/_minimize.py
index 7652ceca76c6..e0a8f77cf2ff 100644
--- a/scipy/optimize/_minimize.py
+++ b/scipy/optimize/_minimize.py
@@ -703,10 +703,19 @@ def minimize(fun, x0, args=(), method=None, jac=None, hess=None,
                 x0 = x0[~i_fixed]
                 bounds = _remove_from_bounds(bounds, i_fixed)
                 fun = _Remove_From_Func(fun, i_fixed, x_fixed)
+
                 if callable(callback):
                     sig = inspect.signature(callback)
-                    if set(sig.parameters) != {'intermediate_result'}:
+                    if set(sig.parameters) == {'intermediate_result'}:
+                        # callback(intermediate_result)
+                        print(callback)
+                        callback = _Patch_Callback_Equal_Variables(
+                            callback, i_fixed, x_fixed
+                        )
+                    else:
+                        # callback(x)
                         callback = _Remove_From_Func(callback, i_fixed, x_fixed)
+
                 if callable(jac):
                     jac = _Remove_From_Func(jac, i_fixed, x_fixed, remove=1)
 
@@ -723,6 +732,7 @@ def minimize(fun, x0, args=(), method=None, jac=None, hess=None,
                                                        remove=1)
         bounds = standardize_bounds(bounds, x0, meth)
 
+    # selects whether to use callback(x) or callback(intermediate_result)
     callback = _wrap_callback(callback, meth)
 
     if meth == 'nelder-mead':
@@ -1003,6 +1013,22 @@ def _remove_from_bounds(bounds, i_fixed):
     return Bounds(lb, ub)  # don't mutate original Bounds object
 
 
+class _Patch_Callback_Equal_Variables:
+    # Patches a callback that accepts an intermediate_result
+    def __init__(self, callback, i_fixed, x_fixed, ):
+        self.callback = callback
+        self.i_fixed = i_fixed
+        self.x_fixed = x_fixed
+
+    def __call__(self, intermediate_result):
+        x_in = intermediate_result.x
+        x_out = np.zeros_like(self.i_fixed, dtype=x_in.dtype)
+        x_out[self.i_fixed] = self.x_fixed
+        x_out[~self.i_fixed] = x_in
+        intermediate_result.x = x_out
+        return self.callback(intermediate_result)
+
+
 class _Remove_From_Func:
     """Wraps a function such that fixed variables need not be passed in"""
     def __init__(self, fun_in, i_fixed, x_fixed, min_dim=None, remove=0):
diff --git a/scipy/optimize/tests/test_optimize.py b/scipy/optimize/tests/test_optimize.py
index 21ef2eee7c37..0b3144da8e53 100644
--- a/scipy/optimize/tests/test_optimize.py
+++ b/scipy/optimize/tests/test_optimize.py
@@ -1279,24 +1279,25 @@ def dfunc(z):
                    f"{method}: {func(sol1.x)} vs. {func(sol2.x)}"
 
     @pytest.mark.parametrize(
-        'bounds', [None, [[0.0, 0.0], [-np.inf, +np.inf], [-np.inf, +np.inf]]],
+        'bounds',
+         [None,
+          Bounds([0.0, -np.inf, -np.inf], [0.0, np.inf, np.inf])
+         ],
     )
     @pytest.mark.parametrize('method', ['l-bfgs-b'])
-    def test_minimize_callback_result(self, method, bounds):
-        """Check if `OptimizeResult` is passed to the callback function.
-
-        The issue related to fixed bounds (gh-21537) is also checked.
-        """
+    def test_minimize_callback_fixed_variables(self, method, bounds):
+        # gh-21537
         def callback(intermediate_result):
             assert isinstance(intermediate_result, optimize.OptimizeResult)
+            assert len(intermediate_result.x) == 3
 
-        res = optimize.minimize(self.func, np.zeros(3), method=method,
-                                bounds=bounds, callback=callback)
+        def callback2(x):
+            assert len(x) == 3
 
-        if bounds is not None:
-            for i in range(3):
-                assert bounds[i][0] <= res.x[i]  # check lower bounds
-                assert bounds[i][1] >= res.x[i]  # check upper bounds
+        optimize.minimize(self.func, np.zeros(3), method=method,
+                                bounds=bounds, callback=callback)
+        optimize.minimize(self.func, np.zeros(3), method=method,
+                                bounds=bounds, callback=callback2)
 
     @pytest.mark.fail_slow(10)
     @pytest.mark.filterwarnings('ignore::UserWarning')

From 24f31a71f585eb1e962b0fadc2f61c91c88a0c3c Mon Sep 17 00:00:00 2001
From: Andrew Nelson <andyfaff@gmail.com>
Date: Thu, 6 Feb 2025 14:07:56 +1100
Subject: [PATCH 53/63] Lint

---
 scipy/optimize/_minimize.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scipy/optimize/_minimize.py b/scipy/optimize/_minimize.py
index e0a8f77cf2ff..e18bed6dd957 100644
--- a/scipy/optimize/_minimize.py
+++ b/scipy/optimize/_minimize.py
@@ -1015,7 +1015,7 @@ def _remove_from_bounds(bounds, i_fixed):
 
 class _Patch_Callback_Equal_Variables:
     # Patches a callback that accepts an intermediate_result
-    def __init__(self, callback, i_fixed, x_fixed, ):
+    def __init__(self, callback, i_fixed, x_fixed):
         self.callback = callback
         self.i_fixed = i_fixed
         self.x_fixed = x_fixed

From e93b70fa0351702f67ad16433d95baaff70136a7 Mon Sep 17 00:00:00 2001
From: Andrew Nelson <andyfaff@gmail.com>
Date: Thu, 6 Feb 2025 15:25:38 +1100
Subject: [PATCH 54/63] TST: move equal bounds callback test

---
 scipy/optimize/tests/test_optimize.py | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/scipy/optimize/tests/test_optimize.py b/scipy/optimize/tests/test_optimize.py
index 0b3144da8e53..58c5866c383b 100644
--- a/scipy/optimize/tests/test_optimize.py
+++ b/scipy/optimize/tests/test_optimize.py
@@ -7,6 +7,7 @@
 
 """
 import itertools
+import inspect
 import platform
 import threading
 import numpy as np
@@ -1284,20 +1285,6 @@ def dfunc(z):
           Bounds([0.0, -np.inf, -np.inf], [0.0, np.inf, np.inf])
          ],
     )
-    @pytest.mark.parametrize('method', ['l-bfgs-b'])
-    def test_minimize_callback_fixed_variables(self, method, bounds):
-        # gh-21537
-        def callback(intermediate_result):
-            assert isinstance(intermediate_result, optimize.OptimizeResult)
-            assert len(intermediate_result.x) == 3
-
-        def callback2(x):
-            assert len(x) == 3
-
-        optimize.minimize(self.func, np.zeros(3), method=method,
-                                bounds=bounds, callback=callback)
-        optimize.minimize(self.func, np.zeros(3), method=method,
-                                bounds=bounds, callback=callback2)
 
     @pytest.mark.fail_slow(10)
     @pytest.mark.filterwarnings('ignore::UserWarning')
@@ -2925,6 +2912,10 @@ def grad(x):
     def callback(x, *args):
         check_x(x)
 
+    def callback2(intermediate_result):
+        assert isinstance(intermediate_result, OptimizeResult)
+        check_x(intermediate_result.x)
+
     def constraint1(x):
         check_x(x, check_values=False)
         return x[0:1] - 1
@@ -2975,7 +2966,7 @@ def jacobian2(x):
                    ([c1b, c2b], [c1b, c2b]))
 
     # test with and without callback function
-    callbacks = (None, callback)
+    callbacks = (None, callback, callback2)
 
     data = {"methods": methods, "kwds": kwds, "bound_types": bound_types,
             "constraints": constraints, "callbacks": callbacks,
@@ -3014,6 +3005,12 @@ def test_equal_bounds(method, kwds, bound_type, constraints, callback):
     test_constraints, reference_constraints = constraints
     if test_constraints and not method == 'SLSQP':
         pytest.skip('Only SLSQP supports nonlinear constraints')
+
+    if method in ['SLSQP', 'TNC'] and callable(callback):
+        sig = inspect.signature(callback)
+        if 'intermediate_result' in set(sig.parameters):
+            pytest.skip("SLSQP, TNC don't support intermediate_result")
+
     # reference constraints always have analytical jacobian
     # if test constraints are not the same, we'll need finite differences
     fd_needed = (test_constraints != reference_constraints)

From 1e8e87a16c30e7776b3ca5d8f20c86b0b9ac8157 Mon Sep 17 00:00:00 2001
From: Andrew Nelson <andyfaff@gmail.com>
Date: Thu, 6 Feb 2025 15:27:27 +1100
Subject: [PATCH 55/63] TST: move equal bounds callback test

---
 scipy/optimize/tests/test_optimize.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/scipy/optimize/tests/test_optimize.py b/scipy/optimize/tests/test_optimize.py
index 58c5866c383b..d5b48d0d173a 100644
--- a/scipy/optimize/tests/test_optimize.py
+++ b/scipy/optimize/tests/test_optimize.py
@@ -1279,13 +1279,6 @@ def dfunc(z):
             assert func(sol1.x) < func(sol2.x), \
                    f"{method}: {func(sol1.x)} vs. {func(sol2.x)}"
 
-    @pytest.mark.parametrize(
-        'bounds',
-         [None,
-          Bounds([0.0, -np.inf, -np.inf], [0.0, np.inf, np.inf])
-         ],
-    )
-
     @pytest.mark.fail_slow(10)
     @pytest.mark.filterwarnings('ignore::UserWarning')
     @pytest.mark.filterwarnings('ignore::RuntimeWarning')  # See gh-18547

From e7a789f0ecc036e90cae7c4938884ecafc54315b Mon Sep 17 00:00:00 2001
From: Matt Haberland <mhaberla@calpoly.edu>
Date: Thu, 6 Feb 2025 00:06:30 -0800
Subject: [PATCH 56/63] CI: use mpmath pre-release again (#22480)

[skip circle] [skip cirrus]
---
 .github/workflows/linux.yml | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
index c933fafa6bb8..fd73e5a33f64 100644
--- a/.github/workflows/linux.yml
+++ b/.github/workflows/linux.yml
@@ -321,10 +321,7 @@ jobs:
     - name: Install Python packages
       run: |
         python -m pip install cython pythran ninja meson-python pybind11 click rich_click pydevtool
-        python -m pip install --pre --upgrade pytest pytest-cov pytest-xdist gmpy2 threadpoolctl pooch hypothesis matplotlib
-        # Move mpmath back into `install --pre` above once mpmath 1.4.0 with
-        # the fix is out (xref gh-22395)
-        python -m pip install mpmath
+        python -m pip install --pre --upgrade pytest pytest-cov pytest-xdist mpmath gmpy2 threadpoolctl pooch hypothesis matplotlib
         python -m pip install -r requirements/openblas.txt
         # Install numpy last, to ensure we get nightly (avoid possible <2.0 constraints).
         python -m pip install --pre --upgrade --timeout=60 -i https://pypi.anaconda.org/scientific-python-nightly-wheels/simple numpy

From da0972c3860913aa8ca96a41e2aee0baf2af5fd9 Mon Sep 17 00:00:00 2001
From: Tyler Reddy <tyler.je.reddy@gmail.com>
Date: Thu, 6 Feb 2025 11:15:47 -0700
Subject: [PATCH 57/63] MAINT: pearsonr SIMD-related shim

* Fixes #22479.

* This small patch allows `test_stats.py::TestRegression::test_regressZEROX`
to pass on x86_64 Linux instead of failing due to an extra division by
zero warning over a fairly narrow range of NumPy versions near `1.25.2`
where SIMD implementation details appear to have been slightly
different.

[skip circle]
---
 scipy/stats/_stats_py.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scipy/stats/_stats_py.py b/scipy/stats/_stats_py.py
index 43989c823c4e..3d8542f2dc75 100644
--- a/scipy/stats/_stats_py.py
+++ b/scipy/stats/_stats_py.py
@@ -4650,7 +4650,7 @@ def statistic(x, y, axis):
     # use np.linalg.norm.
     xmax = xp.max(xp.abs(xm), axis=axis, keepdims=True)
     ymax = xp.max(xp.abs(ym), axis=axis, keepdims=True)
-    with np.errstate(invalid='ignore'):
+    with np.errstate(invalid='ignore', divide='ignore'):
         normxm = xmax * xp_vector_norm(xm/xmax, axis=axis, keepdims=True)
         normym = ymax * xp_vector_norm(ym/ymax, axis=axis, keepdims=True)
 

From 771a1eceda328e89462b0681e34b57b03fd9a825 Mon Sep 17 00:00:00 2001
From: Tyler Reddy <tyler.je.reddy@gmail.com>
Date: Thu, 6 Feb 2025 11:28:23 -0700
Subject: [PATCH 58/63] MAINT: missing Cython type in build

* This small patch prevents the latest stable release of Cython
from complaining about a failure to type a loop variable, which
otherwise generates some annoying build spam on x86_64 Linux
for example:

```
[1422/1463] Generating 'scipy/spatial/transform/_rotation.cpython-311-x86_64-linux-gnu.so.p/_rotation.c'
performance hint: /home/treddy/github_projects/scipy/scipy/spatial/transform/_rotation.pyx:1162:34: Index should be typed for more efficient access
performance hint: /home/treddy/github_projects/scipy/scipy/spatial/transform/_rotation.pyx:1163:34: Index should be typed for more efficient access
performance hint: /home/treddy/github_projects/scipy/scipy/spatial/transform/_rotation.pyx:1164:34: Index should be typed for more efficient access
performance hint: /home/treddy/github_projects/scipy/scipy/spatial/transform/_rotation.pyx:1165:34: Index should be typed for more efficient access
performance hint: /home/treddy/github_projects/scipy/scipy/spatial/transform/_rotation.pyx:1165:55: Index should be typed for more efficient access
performance hint: /home/treddy/github_projects/scipy/scipy/spatial/transform/_rotation.pyx:1166:34: Index should be typed for more efficient access
performance hint: /home/treddy/github_projects/scipy/scipy/spatial/transform/_rotation.pyx:1174:61: Index should be typed for more efficient access
performance hint: /home/treddy/github_projects/scipy/scipy/spatial/transform/_rotation.pyx:1174:21: Index should be typed for more efficient access
performance hint: /home/treddy/github_projects/scipy/scipy/spatial/transform/_rotation.pyx:1175:39: Index should be typed for more efficient access
performance hint: /home/treddy/github_projects/scipy/scipy/spatial/transform/_rotation.pyx:1175:60: Index should be typed for more efficient access
performance hint: /home/treddy/github_projects/scipy/scipy/spatial/transform/_rotation.pyx:1175:21: Index should be typed for more efficient access
performance hint: /home/treddy/github_projects/scipy/scipy/spatial/transform/_rotation.pyx:1176:39: Index should be typed for more efficient access
performance hint: /home/treddy/github_projects/scipy/scipy/spatial/transform/_rotation.pyx:1176:60: Index should be typed for more efficient access
performance hint: /home/treddy/github_projects/scipy/scipy/spatial/transform/_rotation.pyx:1176:21: Index should be typed for more efficient access
performance hint: /home/treddy/github_projects/scipy/scipy/spatial/transform/_rotation.pyx:1177:39: Index should be typed for more efficient access
performance hint: /home/treddy/github_projects/scipy/scipy/spatial/transform/_rotation.pyx:1177:60: Index should be typed for more efficient access
performance hint: /home/treddy/github_projects/scipy/scipy/spatial/transform/_rotation.pyx:1177:21: Index should be typed for more efficient access
performance hint: /home/treddy/github_projects/scipy/scipy/spatial/transform/_rotation.pyx:1179:39: Index should be typed for more efficient access
performance hint: /home/treddy/github_projects/scipy/scipy/spatial/transform/_rotation.pyx:1179:60: Index should be typed for more efficient access
performance hint: /home/treddy/github_projects/scipy/scipy/spatial/transform/_rotation.pyx:1179:21: Index should be typed for more efficient access
performance hint: /home/treddy/github_projects/scipy/scipy/spatial/transform/_rotation.pyx:1180:39: Index should be typed for more efficient access
performance hint: /home/treddy/github_projects/scipy/scipy/spatial/transform/_rotation.pyx:1180:60: Index should be typed for more efficient access
performance hint: /home/treddy/github_projects/scipy/scipy/spatial/transform/_rotation.pyx:1180:21: Index should be typed for more efficient access
performance hint: /home/treddy/github_projects/scipy/scipy/spatial/transform/_rotation.pyx:1181:39: Index should be typed for more efficient access
performance hint: /home/treddy/github_projects/scipy/scipy/spatial/transform/_rotation.pyx:1181:60: Index should be typed for more efficient access
performance hint: /home/treddy/github_projects/scipy/scipy/spatial/transform/_rotation.pyx:1181:21: Index should be typed for more efficient access
performance hint: /home/treddy/github_projects/scipy/scipy/spatial/transform/_rotation.pyx:1182:21: Index should be typed for more efficient access
performance hint: /home/treddy/github_projects/scipy/scipy/spatial/transform/_rotation.pyx:1185:29: Index should be typed for more efficient access
```

[skip circle]
---
 scipy/spatial/transform/_rotation.pyx | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scipy/spatial/transform/_rotation.pyx b/scipy/spatial/transform/_rotation.pyx
index 31814ac7084d..6bfa3e2a835b 100644
--- a/scipy/spatial/transform/_rotation.pyx
+++ b/scipy/spatial/transform/_rotation.pyx
@@ -1106,6 +1106,8 @@ cdef class Rotation:
 
         .. versionadded:: 1.4.0
         """
+        cdef int ind
+
         is_single = False
         matrix = np.array(matrix, dtype=float)
 

From 26a09b305430c96410b7776e73bb115ccb1e0406 Mon Sep 17 00:00:00 2001
From: Evgeni Burovski <evgeny.burovskiy@gmail.com>
Date: Thu, 6 Feb 2025 21:08:29 +0100
Subject: [PATCH 59/63] BUG: fix `make_lsq_spline` with a non-default axis
 (#22481)

---
 scipy/interpolate/_bsplines.py           |  3 ++
 scipy/interpolate/tests/test_bsplines.py | 42 ++++++++++++++++++++++++
 2 files changed, 45 insertions(+)

diff --git a/scipy/interpolate/_bsplines.py b/scipy/interpolate/_bsplines.py
index a7caf104796f..382d383ba9a5 100644
--- a/scipy/interpolate/_bsplines.py
+++ b/scipy/interpolate/_bsplines.py
@@ -1765,6 +1765,9 @@ def make_lsq_spline(x, y, t, k=3, w=None, axis=0, check_finite=True, *, method="
     axis = normalize_axis_index(axis, y.ndim)
 
     y = np.moveaxis(y, axis, 0)    # now internally interp axis is zero
+    if not y.flags.c_contiguous:
+        # C routines in _dierckx currently require C contiguity
+        y = y.copy(order='C')
 
     if x.ndim != 1:
         raise ValueError("Expect x to be a 1-D sequence.")
diff --git a/scipy/interpolate/tests/test_bsplines.py b/scipy/interpolate/tests/test_bsplines.py
index e8b1b2b58afc..a56806560a7b 100644
--- a/scipy/interpolate/tests/test_bsplines.py
+++ b/scipy/interpolate/tests/test_bsplines.py
@@ -1525,6 +1525,26 @@ def test_shapes(self):
         b = make_interp_spline(x, y, k, bc_type=(d_l, d_r))
         assert b.c.shape == (n + k - 1, 5, 6, 7)
 
+    @pytest.mark.parametrize("axis", range(1, 4))
+    def test_shapes_axis(self, axis):
+        rng = np.random.RandomState(1234)
+        n = 11
+        shp_extra = (5, 6, 7)
+        x = np.arange(n)
+        y = rng.random(size=(n,) + shp_extra)
+        spl = make_interp_spline(x, y)
+
+        y1 = np.moveaxis(y.copy(), 0, axis)
+        spl1 = make_interp_spline(x, y1, axis=axis)
+
+        assert spl(3).shape == shp_extra
+        assert spl([3]).shape == (1,) + shp_extra
+        assert spl([2, 3]).shape == (2,) + shp_extra
+
+        assert spl1(3).shape == shp_extra
+        assert spl1([3]).shape == shp_extra[:axis] + (1,) + shp_extra[axis:]
+        assert spl1([2, 3]).shape == shp_extra[:axis] + (2,) + shp_extra[axis:]
+
     def test_string_aliases(self):
         yy = np.sin(self.xx)
 
@@ -1755,6 +1775,28 @@ def test_multiple_rhs_3(self):
         b_neq = make_lsq_spline(x, y, t, k, method="norm-eq")
         xp_assert_close(b_qr.c, b_neq.c, atol=1e-15)
 
+    @parametrize_lsq_methods
+    @pytest.mark.parametrize("axis", range(1, 4))
+    def test_shapes_axis(self, axis, method):
+        rng = np.random.RandomState(1234)
+        k, n = 3, 11
+        shp_extra = (5, 6, 7)
+        x = np.arange(n)
+        t = (x[0],) * (k+1) + (x[-1],)*(k+1)
+        y = rng.random(size=(n,) + shp_extra)
+        spl = make_lsq_spline(x, y, t=t, method=method)
+
+        y1 = np.moveaxis(y.copy(), 0, axis)
+        spl1 = make_lsq_spline(x, y1, t=t, axis=axis, method=method)
+
+        assert spl(3).shape == shp_extra
+        assert spl([3]).shape == (1,) + shp_extra
+        assert spl([2, 3]).shape == (2,) + shp_extra
+
+        assert spl1(3).shape == shp_extra
+        assert spl1([3]).shape == shp_extra[:axis] + (1,) + shp_extra[axis:]
+        assert spl1([2, 3]).shape == shp_extra[:axis] + (2,) + shp_extra[axis:]
+
     @parametrize_lsq_methods
     def test_complex(self, method):
         # cmplx-valued `y`

From f33dbe05c37e0a9b85e6d016f1c3f0e83c494faf Mon Sep 17 00:00:00 2001
From: Andrew Nelson <andyfaff@gmail.com>
Date: Fri, 7 Feb 2025 11:09:47 +1100
Subject: [PATCH 60/63] DOC: optimize.rst, improve workers examples

---
 doc/source/tutorial/optimize.rst | 59 ++++++++++++++++++++------------
 1 file changed, 37 insertions(+), 22 deletions(-)

diff --git a/doc/source/tutorial/optimize.rst b/doc/source/tutorial/optimize.rst
index 315ad8069a2c..742a0682e436 100644
--- a/doc/source/tutorial/optimize.rst
+++ b/doc/source/tutorial/optimize.rst
@@ -2007,7 +2007,7 @@ For more MILP tutorials, see the Jupyter notebooks on SciPy Cookbooks:
 Parallel execution support
 ---------------------------
 
-Some SciPy optimization methods, such as :func:`differential_evolution` offer
+Some SciPy optimization methods, such as :func:`differential_evolution`, offer
 parallelization through the use of a ``workers`` keyword.
 
 For :func:`differential_evolution` there are two loops (iteration) levels in the
@@ -2068,29 +2068,44 @@ Either of the two approaches outlined above can be used, although it's strongly
 advised to supply the map-like callable due to the overhead of creating new processes.
 Performance gains will only be made if the objective function is expensive to
 calculate.
+Let's compare how much parallelization can help compared to the serial version. To
+simulate a slow function we use the ``time`` package.
 
 ::
 
-    >>> x0 = np.array([2.0, 3.0, 4.0, 5.0])
-    >>> with Pool(2) as pwl:
-    ...     res = minimize(rosen, x0, method='L-BFGS-B', options={'workers':pwl.map})
-    >>> res.x
-    array([0.99999903, 0.99999808, 0.99999614, 0.99999228])  # may vary
+    >>> import time
+    >>> def slow_func(x):
+    ...     time.sleep(0.0002)
+    ...     return rosen(x)
+
+Examine the serial minimization first::
+
+    In [1]: rng = np.random.default_rng()
+
+    In [2]: x0 = rng.uniform(low=0.0, high=10.0, size=(20,))
+
+    In [3]: %timeit minimize(slow_func, x0, method='L-BFGS-B')  # serial approach
+    365 ms ± 6.17 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)  # may vary
+
+Now the parallel version::
+
+    In [4]: with Pool() as pwl:  # parallel approach
+    ...         %timeit minimize(slow_func, x0, method='L-BFGS-B', options={'workers':pwl.map})
+    70.5 ms ± 146 μs per loop (mean ± std. dev. of 7 runs, 1 loop each)  # may vary
 
 If the objective function can be vectorized, then a map-like can be used to take
 advantage of vectorization during function evaluation. Vectorization means that the
 objective function can carry out the required calculations in a single (rather than
-multiple) call, which is typically very efficient.
+multiple) call, which is typically very efficient::
 
-::
+    In [5]: def vectorized_maplike(fun, iterable):
+    ...         arr = np.array([i for i in iter(iterable)])   # arr.shape = (S, N)
+    ...         arr_t = arr.T                                 # arr_t.shape = (N, S)
+    ...         r = slow_func(arr_t)                          # calculation vectorized over S
+    ...         return r
 
-    >>> def vectorized_maplike(fun, iterable):
-    ...     arr = np.array([i for i in iter(iterable)])   # arr.shape = (S, N)
-    ...     arr_t = arr.T                                 # arr_t.shape = (N, S)
-    ...     r = rosen(arr_t)                              # calculation vectorized over S
-    ...     return r
-    >>>
-    >>> res = minimize(rosen, x0, method='L-BFGS-B', options={'workers':vectorized_maplike})
+    In [6]: %timeit minimize(slow_func, x0, method='L-BFGS-B', options={'workers':vectorized_maplike})
+    38.9 ms ± 734 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)  # may vary
 
 There are several important points to note about this example:
 
@@ -2098,14 +2113,14 @@ There are several important points to note about this example:
   to be evaluated.
 * The iterable is first converted to an iterator, before being made into an array via
   a list comprehension. This allows the iterable to be a generator, list, array, etc.
-* The calculation is done using ``rosen`` instead of using ``fun``. The map-like is
-  actually supplied with a wrapped version of the objective function. The wrapping
-  is used to detect various types of common user errors, including checking whether
-  the objective function returns a scalar. If ``fun`` is used then a
-  :class:`RuntimeError` will result, because ``fun(arr_t)`` will be a 1-D array and not
-  a scalar. We therefore use ``rosen`` directly.
+* Within the map-like the calculation is done using ``slow_func`` instead of using
+  ``fun``. The map-like is actually supplied with a wrapped version of the objective
+  function. The wrapping is used to detect various types of common user errors,
+  including checking whether the objective function returns a scalar. If ``fun`` is
+  used then a :class:`RuntimeError` will result, because ``fun(arr_t)`` will be a 1-D
+  array and not a scalar. We therefore use ``slow_func`` directly.
 * ``arr.T`` is sent to the objective function. This is because ``arr.shape == (S, N)``,
   where ``S`` is the number of parameter vectors to evaluate and ``N`` is the number of
-  variables. For ``rosen`` vectorization occurs on ``(N, S)`` shaped arrays.
+  variables. For ``slow_func`` vectorization occurs on ``(N, S)`` shaped arrays.
 * This approach is not needed for :func:`differential_evolution` as that minimizer
   already has a keyword for vectorization.

From ab84560b96cf5816be0015b0ee3a41cef708f675 Mon Sep 17 00:00:00 2001
From: Tyler Reddy <tyler.je.reddy@gmail.com>
Date: Fri, 7 Feb 2025 04:08:44 -0700
Subject: [PATCH 61/63] MAINT: bump min NumPy to 1.25.2, min Python to 3.11
 (#22012)

For the Linux `distro_multiple_pythons` CI job, also bump
the base Ubuntu version so that we use Python 3.11 (non-default)
and Python 3.12 (default). This required using the deadsnakes PPA,
because the newer Ubuntu doesn't have multiple Python versions
in its main package channel.

Co-authored-by: Ralf Gommers <ralf.gommers@gmail.com>
---
 .github/workflows/linux.yml     | 50 +++++++++++++++++----------------
 .github/workflows/macos.yml     |  2 +-
 .github/workflows/musllinux.yml |  4 +--
 .github/workflows/wheels.yml    |  4 +--
 .github/workflows/windows.yml   |  9 +++---
 dev.py                          |  7 ++---
 meson.build                     |  2 +-
 pyproject.toml                  |  5 ++--
 requirements/build.txt          |  2 +-
 requirements/default.txt        |  2 +-
 scipy/__init__.py               |  2 +-
 tools/lint.toml                 |  4 +--
 12 files changed, 46 insertions(+), 47 deletions(-)

diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
index fd73e5a33f64..aa45854dd736 100644
--- a/.github/workflows/linux.yml
+++ b/.github/workflows/linux.yml
@@ -26,7 +26,7 @@ jobs:
     uses: ./.github/workflows/commit_message.yml
 
   test_meson:
-    name: mypy (py3.10) & dev deps (py3.13), fast, dev.py
+    name: mypy (py3.11) & dev deps (py3.13), fast, dev.py
     needs: get_commit_message
     # If using act to run CI locally the github object does not exist and
     # the usual skipping should not be enforced
@@ -36,7 +36,7 @@ jobs:
     runs-on: ubuntu-22.04
     strategy:
       matrix:
-        python-version: ['3.10', '3.13-dev'] # this run will use python dev versions when available
+        python-version: ['3.11', '3.13-dev'] # this run will use python dev versions when available
         maintenance-branch:
           - ${{ contains(github.ref, 'maintenance/') || contains(github.base_ref, 'maintenance/') }}
         exclude:
@@ -63,7 +63,7 @@ jobs:
         sudo apt-get install -y libopenblas-dev libatlas-base-dev liblapack-dev gfortran libgmp-dev libmpfr-dev libsuitesparse-dev ccache libmpc-dev
 
     - name: Install Python packages
-      if: matrix.python-version == '3.10'
+      if: matrix.python-version == '3.11'
       run: |
         python -m pip install numpy cython pytest pytest-xdist pytest-timeout pybind11 mpmath gmpy2 pythran ninja meson click rich-click doit pydevtool pooch hypothesis
 
@@ -95,7 +95,7 @@ jobs:
         path: ${{ steps.prep-ccache.outputs.dir }}
         # Restores ccache from either a previous build on this branch or on main
         key:  ${{ github.workflow }}-${{ matrix.python-version }}-ccache-linux-${{ steps.prep-ccache.outputs.timestamp }}
-        # This evaluates to `Linux Tests-3.10-ccache-linux-` which is not unique. As the CI matrix is expanded, this will
+        # This evaluates to `Linux Tests-3.11-ccache-linux-` which is not unique. As the CI matrix is expanded, this will
         # need to be updated to be unique so that the cache is not restored from a different job altogether.
         restore-keys: |
           ${{ github.workflow }}-${{ matrix.python-version }}-ccache-linux-
@@ -128,7 +128,7 @@ jobs:
       run: ninja -C build -t missingdeps
 
     - name: Mypy
-      if: matrix.python-version == '3.10'
+      if: matrix.python-version == '3.11'
       run: |
         # Packages that are only needed for their annotations
         python -m pip install mypy==1.10.0 types-psutil typing_extensions
@@ -148,7 +148,7 @@ jobs:
     if: >
       needs.get_commit_message.outputs.message == 1
       && (github.repository == 'scipy/scipy' || github.repository == '')
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-24.04
     steps:
     - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
       with:
@@ -202,12 +202,12 @@ jobs:
   #################################################################################
   python_debug:
     # also uses the vcs->sdist->wheel route.
-    name: Python-debug & ATLAS & sdist+wheel, fast, py3.10/npMin, pip+pytest
+    name: Python-debug & ATLAS & sdist+wheel, fast, py3.12/npMin, pip+pytest
     needs: get_commit_message
     if: >
       needs.get_commit_message.outputs.message == 1
       && (github.repository == 'scipy/scipy' || github.repository == '')
-    runs-on: ubuntu-22.04  # provides python3.10-dbg
+    runs-on: ubuntu-24.04  # provides python3.12-dbg
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
         with:
@@ -232,7 +232,7 @@ jobs:
   #################################################################################
   gcc9:
     # Purpose is to examine builds with oldest-supported gcc and test with pydata/sparse.
-    name: Oldest GCC & pydata/sparse, fast, py3.10/npMin, pip+pytest
+    name: Oldest GCC & pydata/sparse, fast, py3.11/npMin, pip+pytest
     needs: get_commit_message
     if: >
       needs.get_commit_message.outputs.message == 1
@@ -246,7 +246,7 @@ jobs:
       - name: Setup Python
         uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
         with:
-          python-version: "3.10"
+          python-version: "3.11"
 
       - name: Setup system dependencies
         run: |
@@ -271,7 +271,7 @@ jobs:
       - name: Install test dependencies
         run: |
           # Downgrade numpy to oldest supported version
-          pip install gmpy2 threadpoolctl mpmath pooch pytest pytest-xdist==2.5.0 pytest-timeout hypothesis sparse "numpy==1.23.5"
+          pip install gmpy2 threadpoolctl mpmath pooch pytest pytest-xdist==2.5.0 pytest-timeout hypothesis sparse "numpy==1.25.2"
 
       - name: Run tests
         run: |
@@ -284,7 +284,7 @@ jobs:
   #################################################################################
   prerelease_deps_coverage_64bit_blas:
     # TODO: re-enable ILP64 build.
-    name: Prerelease deps & coverage report, full, py3.10/npMin & py3.11/npPre, dev.py
+    name: Prerelease deps & coverage report, full, py3.11/npMin & py3.11/npPre, dev.py
     needs: get_commit_message
     if: >
       needs.get_commit_message.outputs.message == 1
@@ -292,9 +292,9 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        # Both use numpy 2.x-dev at build time; 3.10 job then downgrades to
+        # Both use numpy 2.x-dev at build time; 3.11 job then downgrades to
         # lowest supported NumPy version in order to test ABI compatibility.
-        python-version: ['3.10', '3.11']
+        python-version: ['3.11', '3.12']
 
     steps:
     - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
@@ -354,9 +354,9 @@ jobs:
       run: ccache -s
 
     - name: Downgrade NumPy from 2.0-dev to lowest supported
-      if: matrix.python-version == '3.10'
+      if: matrix.python-version == '3.11'
       run: |
-        python -m pip install "numpy==1.23.5"
+        python -m pip install "numpy==1.25.2"
 
     - name: Test SciPy
       run: |
@@ -365,7 +365,7 @@ jobs:
 
   #################################################################################
   linux_32bit:
-    name: 32-bit, fast, py3.10/npMin, dev.py
+    name: 32-bit, fast, py3.11/npMin, dev.py
     needs: get_commit_message
     if: >
       needs.get_commit_message.outputs.message == 1
@@ -385,7 +385,7 @@ jobs:
         docker pull quay.io/pypa/manylinux2014_i686
         docker run -v $(pwd):/scipy --platform=linux/i386 quay.io/pypa/manylinux2014_i686 /bin/bash -c "cd /scipy && \
         uname -a && \
-        python3.10 -m venv test && \
+        python3.11 -m venv test && \
         source test/bin/activate && \
         python -m pip install doit click rich_click pydevtool meson ninja && \
         python -m pip install -r requirements/openblas.txt && \
@@ -398,7 +398,7 @@ jobs:
         runtime_library_dirs = \$(python -c 'import scipy_openblas32; print(scipy_openblas32.get_lib_dir())')
         symbol_prefix = scipy_
         EOL
-        python -m pip install numpy==1.23.5 cython pybind11 pytest pytest-timeout pytest-xdist pytest-env 'Pillow<10.0.0' mpmath pythran pooch meson hypothesis && \
+        python -m pip install numpy==1.25.2 cython pybind11 pytest pytest-timeout pytest-xdist pytest-env 'Pillow<10.0.0' mpmath pythran pooch meson hypothesis && \
         python -c 'import numpy as np; np.show_config()' && \
         python dev.py build --with-scipy-openblas && \
         python dev.py --no-build test"
@@ -408,12 +408,12 @@ jobs:
     # Purpose is to build for a non-default Python interpreter in a Linux distro
     # For such a build config, `python`/`python3` executables may not have
     # build dependencies like Cython or NumPy installed.
-    name: non-default Python interpreter, fast, py3.10/npMin, pip+pytest
+    name: non-default Python interpreter, fast, py3.11/npMin, pip+pytest
     needs: get_commit_message
     if: >
       needs.get_commit_message.outputs.message == 1
       && (github.repository == 'scipy/scipy' || github.repository == '')
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-24.04
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
         with:
@@ -422,8 +422,10 @@ jobs:
       - name: Setup system dependencies
         run: |
           sudo apt-get -y update
-          # `python3-dev` yields Python 3.10 on Ubuntu 22.04
-          sudo apt install -y python3-dev python3.11-dev ninja-build pkg-config libatlas-base-dev liblapack-dev
+          sudo apt install software-properties-common
+          sudo add-apt-repository ppa:deadsnakes/ppa
+          sudo apt update -y
+          sudo apt install -y python3.11-dev ninja-build pkg-config libatlas-base-dev liblapack-dev
 
       - name: Setup Python build deps
         run: |
@@ -437,7 +439,7 @@ jobs:
       - name: Install test dependencies
         run: |
           python3.11 -m pip install pytest hypothesis
-          python3.10 -m pip install meson  # ensure compile test work with this
+          python3.12 -m pip install meson  # ensure compile test work with this
 
       - name: Run tests
         run: |
diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml
index aafa549369fd..a6832dce47b4 100644
--- a/.github/workflows/macos.yml
+++ b/.github/workflows/macos.yml
@@ -68,7 +68,7 @@ jobs:
       with:
         path: ${{ steps.prep-ccache.outputs.dir }}
         key:  ${{ github.workflow }}-${{ matrix.python-version }}-ccache-macos-${{ steps.prep-ccache.outputs.timestamp }}
-        # This evaluates to `macOS Tests-3.10-ccache-macos-` which is not
+        # This evaluates to `macOS Tests-3.11-ccache-macos-` which is not
         # unique. As the CI matrix is expanded, this will need to be updated to
         # be unique so that the cache is not restored from a different job altogether.
         restore-keys: |
diff --git a/.github/workflows/musllinux.yml b/.github/workflows/musllinux.yml
index 4158d74d79eb..f2f309c7c819 100644
--- a/.github/workflows/musllinux.yml
+++ b/.github/workflows/musllinux.yml
@@ -24,7 +24,7 @@ jobs:
     uses: ./.github/workflows/commit_message.yml
 
   musllinux_x86_64:
-    name: musl Ubuntu-latest, fast, py3.10/npAny, dev.py
+    name: musl Ubuntu-latest, fast, py3.11/npAny, dev.py
     needs: get_commit_message
     runs-on: ubuntu-latest
     # If using act to run CI locally the github object does not exist and
@@ -57,7 +57,7 @@ jobs:
             git -c user.email="you@example.com" merge --no-commit my_ref_name
         fi
 
-        ln -s /usr/local/bin/python3.10 /usr/local/bin/python
+        ln -s /usr/local/bin/python3.11 /usr/local/bin/python
         git submodule update --init
 
 
diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
index 3508d0b0ae83..e12866b0577a 100644
--- a/.github/workflows/wheels.yml
+++ b/.github/workflows/wheels.yml
@@ -88,7 +88,7 @@ jobs:
         - [macos-14, macosx, arm64, openblas, "12.3"]
         - [macos-14, macosx, arm64, accelerate, "14.0"]
         - [windows-2019, win, AMD64, "", ""]
-        python: [["cp310", "3.10"], ["cp311", "3.11"], ["cp312", "3.12"], ["cp313", "3.13"], ["cp313t", "3.13"]]
+        python: [["cp311", "3.11"], ["cp312", "3.12"], ["cp313", "3.13"], ["cp313t", "3.13"]]
         # python[0] is used to specify the python versions made by cibuildwheel
 
     env:
@@ -243,7 +243,7 @@ jobs:
           # Note that this step is *after* specific pythons have been used to
           # build and test the wheel
           auto-update-conda: true
-          python-version: "3.10"
+          python-version: "3.11"
           miniforge-version: latest
           conda-remove-defaults: "true"
 
diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
index bd83d3452cf7..3605df736497 100644
--- a/.github/workflows/windows.yml
+++ b/.github/workflows/windows.yml
@@ -65,7 +65,7 @@ jobs:
 
   #############################################################################
   full_dev_py_min_numpy_fail_slow:
-    name: fail slow, full, py3.10/npMin, dev.py
+    name: fail slow, full, py3.11/npMin, dev.py
     needs: get_commit_message
     if: >
       needs.get_commit_message.outputs.message == 1
@@ -79,7 +79,7 @@ jobs:
       - name: Setup Python
         uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
         with:
-          python-version: '3.10'
+          python-version: '3.11'
           cache: 'pip'
           cache-dependency-path: 'environment.yml'
 
@@ -90,9 +90,8 @@ jobs:
 
       - name: pip-packages
         run: |
-          # 1.23.5 is currently the oldest numpy usable on cp3.10 according
-          # to pyproject.toml
-          python -m pip install numpy==1.23.5 cython pybind11 pythran meson-python meson ninja pytest pytest-xdist pytest-timeout pytest-fail-slow pooch rich_click click doit pydevtool hypothesis
+          # 1.25.2 is currently our oldest supported NumPy version
+          python -m pip install numpy==1.25.2 cython pybind11 pythran meson-python meson ninja pytest pytest-xdist pytest-timeout pytest-fail-slow pooch rich_click click doit pydevtool hypothesis
           python -m pip install -r requirements/openblas.txt
 
       - name: Build
diff --git a/dev.py b/dev.py
index c4bc096e2137..675179bc22d0 100644
--- a/dev.py
+++ b/dev.py
@@ -310,7 +310,7 @@ def __init__(self, args=None):
             self.installed = self.build.parent / (self.build.stem + "-install")
 
         # relative path for site-package with py version
-        # i.e. 'lib/python3.10/site-packages'
+        # i.e. 'lib/python3.11/site-packages'
         self.site = self.get_site_packages()
 
     def add_sys_path(self):
@@ -1449,7 +1449,7 @@ class ShowDirs(Python):
 
     PYTHONPATH sets the default search path for module files for the
     interpreter. Here, it includes the path to the local SciPy build
-    (typically `.../build-install/lib/python3.10/site-packages`).
+    (typically `.../build-install/lib/python3.11/site-packages`).
 
     Use the global option `-n` to skip the building step, e.g.:
     `python dev.py -n show_PYTHONPATH`
@@ -1544,8 +1544,7 @@ def cpu_count(only_physical_cores=False):
     or the LOKY_MAX_CPU_COUNT environment variable. If the number of physical
     cores is not found, return the number of logical cores.
 
-    Note that on Windows, the returned number of CPUs cannot exceed 61 (or 60 for
-    Python < 3.10), see:
+    Note that on Windows, the returned number of CPUs cannot exceed 61, see:
     https://bugs.python.org/issue26903.
 
     It is also always larger or equal to 1.
diff --git a/meson.build b/meson.build
index 94ffaeb68cfe..c2f11aba1cff 100644
--- a/meson.build
+++ b/meson.build
@@ -17,7 +17,7 @@ project(
 py3 = import('python').find_installation(pure: false)
 py3_dep = py3.dependency()
 
-min_numpy_version = '1.23.5'  # keep in sync with pyproject.toml
+min_numpy_version = '1.25.2'  # keep in sync with pyproject.toml
 
 # Emit a warning for 32-bit Python installs on Windows; users are getting
 # unexpected from-source builds there because we no longer provide wheels.
diff --git a/pyproject.toml b/pyproject.toml
index 13ae3d7d015b..4f66d4c126fc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -44,9 +44,9 @@ maintainers = [
 # Note: Python and NumPy upper version bounds should be set correctly in
 # release branches, see:
 #     https://scipy.github.io/devdocs/dev/core-dev/index.html#version-ranges-for-numpy-and-other-dependencies
-requires-python = ">=3.10"
+requires-python = ">=3.11"
 dependencies = [
-    "numpy>=1.23.5",
+    "numpy>=1.25.2",
 ] # keep in sync with `min_numpy_version` in meson.build
 readme = "README.rst"
 classifiers = [
@@ -57,7 +57,6 @@ classifiers = [
     "Programming Language :: C",
     "Programming Language :: Python",
     "Programming Language :: Python :: 3",
-    "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
     "Programming Language :: Python :: 3.13",
diff --git a/requirements/build.txt b/requirements/build.txt
index d5eee6315c0b..e963f32adda4 100644
--- a/requirements/build.txt
+++ b/requirements/build.txt
@@ -5,4 +5,4 @@ Cython>=3.0.8
 pybind11>=2.13.2
 pythran>=0.14.0
 ninja
-numpy>=1.23.5
+numpy>=1.25.2
diff --git a/requirements/default.txt b/requirements/default.txt
index 81507967821e..2c7a185bc5e4 100644
--- a/requirements/default.txt
+++ b/requirements/default.txt
@@ -1,3 +1,3 @@
 # Generated via tools/generate_requirements.py.
 # Do not edit this file; modify `pyproject.toml` instead and run `python tools/generate_requirements.py`.
-numpy>=1.23.5
+numpy>=1.25.2
diff --git a/scipy/__init__.py b/scipy/__init__.py
index ca6bb93fcc73..da5d3c21e122 100644
--- a/scipy/__init__.py
+++ b/scipy/__init__.py
@@ -63,7 +63,7 @@
 
 from scipy._lib import _pep440
 # In maintenance branch, change to np_maxversion N+3 if numpy is at N
-np_minversion = '1.23.5'
+np_minversion = '1.25.2'
 np_maxversion = '9.9.99'
 if (_pep440.parse(__numpy_version__) < _pep440.Version(np_minversion) or
         _pep440.parse(__numpy_version__) >= _pep440.Version(np_maxversion)):
diff --git a/tools/lint.toml b/tools/lint.toml
index f775d534b36c..e880e1c2d66e 100644
--- a/tools/lint.toml
+++ b/tools/lint.toml
@@ -8,8 +8,8 @@ force-exclude = true
 
 line-length = 88
 
-# Assume Python 3.10
-target-version = "py310"
+# Assume Python 3.11
+target-version = "py311"
 
 [lint]
 # Enable Pyflakes `E` and `F` and PyUpgrade `UP` codes by default.

From 3921d97b59b3936acb2ee1f8a0da3e8547912c97 Mon Sep 17 00:00:00 2001
From: Matt Haberland <mhaberla@calpoly.edu>
Date: Fri, 7 Feb 2025 10:45:28 -0800
Subject: [PATCH 62/63] ENH: stats: support for array API compatible masked
 arrays (#22393)

---
 .github/workflows/array_api.yml  |   4 +
 scipy/_lib/_array_api.py         |   7 +-
 scipy/_lib/_util.py              |   9 ++-
 scipy/stats/_morestats.py        |   6 +-
 scipy/stats/_stats_py.py         |  49 +++++++-----
 scipy/stats/tests/meson.build    |   1 +
 scipy/stats/tests/test_marray.py | 127 +++++++++++++++++++++++++++++++
 7 files changed, 177 insertions(+), 26 deletions(-)
 create mode 100644 scipy/stats/tests/test_marray.py

diff --git a/.github/workflows/array_api.yml b/.github/workflows/array_api.yml
index c66f14070a8c..af327fd5322f 100644
--- a/.github/workflows/array_api.yml
+++ b/.github/workflows/array_api.yml
@@ -74,6 +74,10 @@ jobs:
       run: |
         python -m pip install dask[array]
 
+    - name: Install MArray
+      run: |
+        python -m pip install marray
+
     - name:  Prepare compiler cache
       id:    prep-ccache
       shell: bash
diff --git a/scipy/_lib/_array_api.py b/scipy/_lib/_array_api.py
index d940c730e852..d8397da6b22a 100644
--- a/scipy/_lib/_array_api.py
+++ b/scipy/_lib/_array_api.py
@@ -35,7 +35,7 @@
 
 __all__ = [
     '_asarray', 'array_namespace', 'assert_almost_equal', 'assert_array_almost_equal',
-    'get_xp_devices', 'default_xp', 'is_lazy_array',
+    'get_xp_devices', 'default_xp', 'is_lazy_array', 'is_marray',
     'is_array_api_strict', 'is_complex', 'is_cupy', 'is_jax', 'is_numpy', 'is_torch', 
     'SCIPY_ARRAY_API', 'SCIPY_DEVICE', 'scipy_namespace_for',
     'xp_assert_close', 'xp_assert_equal', 'xp_assert_less',
@@ -647,3 +647,8 @@ def xp_default_dtype(xp):
     else:
         # we default to float64
         return xp.float64
+
+
+def is_marray(xp):
+    """Returns True if `xp` is an MArray namespace; False otherwise."""
+    return "marray" in xp.__name__
diff --git a/scipy/_lib/_util.py b/scipy/_lib/_util.py
index 1c7b572582e5..2d7bfa839801 100644
--- a/scipy/_lib/_util.py
+++ b/scipy/_lib/_util.py
@@ -12,7 +12,7 @@
 
 import numpy as np
 from scipy._lib._array_api import (Array, array_namespace, is_lazy_array,
-                                   is_numpy, xp_size)
+                                   is_numpy, is_marray, xp_size)
 from scipy._lib._docscrape import FunctionDoc, Parameter
 from scipy._lib._sparse import issparse
 import scipy._lib.array_api_extra as xpx
@@ -150,8 +150,11 @@ def _lazywhere(cond, arrays, f, fillvalue=None, f2=None):
                 dtype = (temp1 * fillvalue).dtype
         else:
            dtype = xp.result_type(temp1.dtype, fillvalue)
-        out = xp.full(cond.shape, dtype=dtype,
-                      fill_value=xp.asarray(fillvalue, dtype=dtype))
+        # whenever mdhaber/marray#89 is resolved, `data` won't need to be extracted here
+        # whenever PAAPIS 2024.12 is released, no need for fill_value to be an array
+        fill_value = xp.asarray(fillvalue, dtype=dtype)
+        fill_value = fill_value.data if is_marray(xp) else fill_value
+        out = xp.full(cond.shape, dtype=dtype, fill_value=fill_value)
     else:
         ncond = ~cond
         temp2 = xp.asarray(f2(*(arr[ncond] for arr in arrays)))
diff --git a/scipy/stats/_morestats.py b/scipy/stats/_morestats.py
index e832157f24c3..74d0d65532ca 100644
--- a/scipy/stats/_morestats.py
+++ b/scipy/stats/_morestats.py
@@ -23,7 +23,7 @@
 from . import _stats_py, _wilcoxon
 from ._fit import FitResult
 from ._stats_py import (find_repeats, _get_pvalue, SignificanceResult,  # noqa:F401
-                        _SimpleNormal, _SimpleChi2)
+                        _SimpleNormal, _SimpleChi2, _length_nonmasked)
 from .contingency import chi2_contingency
 from . import distributions
 from ._distn_infrastructure import rv_generic
@@ -309,7 +309,7 @@ def kstat(data, n=2, *, axis=None):
         data = xp.reshape(data, (-1,))
         axis = 0
 
-    N = data.shape[axis]
+    N = _length_nonmasked(data, axis, xp=xp)
 
     S = [None] + [xp.sum(data**k, axis=axis) for k in range(1, n + 1)]
     if n == 1:
@@ -375,7 +375,7 @@ def kstatvar(data, n=2, *, axis=None):
     if axis is None:
         data = xp.reshape(data, (-1,))
         axis = 0
-    N = data.shape[axis]
+    N = _length_nonmasked(data, axis, xp=xp)
 
     if n == 1:
         return kstat(data, n=2, axis=axis, _no_deco=True) * 1.0/N
diff --git a/scipy/stats/_stats_py.py b/scipy/stats/_stats_py.py
index 3d8542f2dc75..cf9c4f3d4a68 100644
--- a/scipy/stats/_stats_py.py
+++ b/scipy/stats/_stats_py.py
@@ -75,6 +75,7 @@
     _asarray,
     array_namespace,
     is_numpy,
+    is_marray,
     xp_size,
     xp_moveaxis_to_end,
     xp_sign,
@@ -599,7 +600,7 @@ def _put_val_to_limits(a, limits, inclusive, val=np.nan, xp=None):
 
     """
     xp = array_namespace(a) if xp is None else xp
-    mask = xp.zeros(a.shape, dtype=xp.bool)
+    mask = xp.zeros_like(a, dtype=xp.bool)
     if limits is None:
         return a, mask
     lower_limit, upper_limit = limits
@@ -967,7 +968,8 @@ def tsem(a, limits=None, inclusive=(True, True), axis=0, ddof=1):
         # by the axis_nan_policy decorator shortly.
         sd = _xp_var(a, correction=ddof, axis=axis, nan_policy='omit', xp=xp)**0.5
 
-    n_obs = xp.sum(~xp.isnan(a), axis=axis, dtype=sd.dtype)
+    not_nan = xp.astype(~xp.isnan(a), a.dtype)
+    n_obs = xp.sum(not_nan, axis=axis, dtype=sd.dtype)
     return sd / n_obs**0.5
 
 
@@ -1146,13 +1148,12 @@ def _demean(a, mean, axis, *, xp, precision_warning=True):
     with np.errstate(divide='ignore', invalid='ignore'):
         rel_diff = xp.max(xp.abs(a_zero_mean), axis=axis,
                           keepdims=True) / xp.abs(mean)
+
+    n = _length_nonmasked(a, axis, xp=xp)
     with np.errstate(invalid='ignore'):
-        precision_loss = xp.any(rel_diff < eps)
-    n = (xp_size(a) if axis is None
-         # compact way to deal with axis tuples or ints
-         else np.prod(np.asarray(a.shape)[np.asarray(axis)]))
+        precision_loss = xp.any(xp.asarray(rel_diff < eps) & xp.asarray(n > 1))
 
-    if precision_loss and n > 1 and precision_warning:
+    if precision_loss and precision_warning:
         message = ("Precision loss occurred in moment calculation due to "
                    "catastrophic cancellation. This occurs when the data "
                    "are nearly identical. Results may be unreliable.")
@@ -1222,11 +1223,23 @@ def _var(x, axis=0, ddof=0, mean=None, xp=None):
     xp = array_namespace(x) if xp is None else xp
     var = _moment(x, 2, axis, mean=mean, xp=xp)
     if ddof != 0:
-        n = x.shape[axis] if axis is not None else xp_size(x)
+        n = _length_nonmasked(x, axis, xp=xp)
         var *= np.divide(n, n-ddof)  # to avoid error on division by zero
     return var
 
 
+def _length_nonmasked(x, axis, keepdims=False, xp=None):
+    xp = array_namespace(x) if xp is None else xp
+    if is_marray(xp):
+        if np.iterable(axis):
+            message = '`axis` must be an integer or None for use with `MArray`.'
+            raise NotImplementedError(message)
+        return xp.astype(xp.count(x, axis=axis, keepdims=keepdims), x.dtype)
+    return (xp_size(x) if axis is None else
+            # compact way to deal with axis tuples or ints
+            int(np.prod(np.asarray(x.shape)[np.asarray(axis)])))
+
+
 @_axis_nan_policy_factory(
     lambda x: x, result_to_tuple=lambda x: (x,), n_outputs=1
 )
@@ -1308,7 +1321,7 @@ def skew(a, axis=0, bias=True, nan_policy='propagate'):
     """
     xp = array_namespace(a)
     a, axis = _chk_asarray(a, axis, xp=xp)
-    n = a.shape[axis]
+    n = _length_nonmasked(a, axis, xp=xp)
 
     mean = xp.mean(a, axis=axis, keepdims=True)
     mean_reduced = xp.squeeze(mean, axis=axis)  # needed later
@@ -1416,7 +1429,7 @@ def kurtosis(a, axis=0, fisher=True, bias=True, nan_policy='propagate'):
     xp = array_namespace(a)
     a, axis = _chk_asarray(a, axis, xp=xp)
 
-    n = a.shape[axis]
+    n = _length_nonmasked(a, axis, xp=xp)
     mean = xp.mean(a, axis=axis, keepdims=True)
     mean_reduced = xp.squeeze(mean, axis=axis)  # needed later
     m2 = _moment(a, 2, axis, mean=mean, xp=xp)
@@ -1526,7 +1539,9 @@ def describe(a, axis=0, ddof=1, bias=True, nan_policy='propagate'):
     if xp_size(a) == 0:
         raise ValueError("The input must not be empty.")
 
-    n = a.shape[axis]
+    # use xp.astype when data-apis/array-api-compat#226 is resolved
+    n = xp.asarray(_length_nonmasked(a, axis, xp=xp), dtype=xp.int64)
+    n = n[()] if n.ndim == 0 else n
     mm = (xp.min(a, axis=axis), xp.max(a, axis=axis))
     m = xp.mean(a, axis=axis)
     v = _var(a, axis=axis, ddof=ddof, xp=xp)
@@ -2624,7 +2639,7 @@ def sem(a, axis=0, ddof=1, nan_policy='propagate'):
         a = xp.reshape(a, (-1,))
         axis = 0
     a = xpx.atleast_nd(xp.asarray(a), ndim=1, xp=xp)
-    n = a.shape[axis]
+    n = _length_nonmasked(a, axis, xp=xp)
     s = xp.std(a, axis=axis, correction=ddof) / n**0.5
     return s
 
@@ -10820,7 +10835,8 @@ def _xp_mean(x, /, *, axis=None, weights=None, keepdims=False, nan_policy='propa
     if weights is None:
         return xp.mean(x, axis=axis, keepdims=keepdims)
 
-    norm = xp.sum(weights, axis=axis)
+    # ones_like ensures that the mask of `x` is considered
+    norm = xp.sum(xp.ones_like(x) * weights, axis=axis)
     wsum = xp.sum(x * weights, axis=axis)
     with np.errstate(divide='ignore', invalid='ignore'):
         res = wsum/norm
@@ -10864,12 +10880,7 @@ def _xp_var(x, /, *, axis=None, correction=0, keepdims=False, nan_policy='propag
     var = _xp_mean(x_mean * x_mean_conj, keepdims=keepdims, **kwargs)
 
     if correction != 0:
-        if axis is None:
-            n = xp_size(x)
-        elif np.iterable(axis):  # note: using NumPy on `axis` is OK
-            n = math.prod(x.shape[i] for i in axis)
-        else:
-            n = x.shape[axis]
+        n = _length_nonmasked(x, axis, xp=xp)
         # Or two lines with ternaries : )
         # axis = range(x.ndim) if axis is None else axis
         # n = math.prod(x.shape[i] for i in axis) if iterable(axis) else x.shape[axis]
diff --git a/scipy/stats/tests/meson.build b/scipy/stats/tests/meson.build
index 427158211058..132953f31841 100644
--- a/scipy/stats/tests/meson.build
+++ b/scipy/stats/tests/meson.build
@@ -19,6 +19,7 @@ py3.install_sources([
     'test_fit.py',
     'test_hypotests.py',
     'test_kdeoth.py',
+    'test_marray.py',
     'test_mgc.py',
     'test_morestats.py',
     'test_mstats_basic.py',
diff --git a/scipy/stats/tests/test_marray.py b/scipy/stats/tests/test_marray.py
new file mode 100644
index 000000000000..0bc99bcd95f9
--- /dev/null
+++ b/scipy/stats/tests/test_marray.py
@@ -0,0 +1,127 @@
+import pytest
+import numpy as np
+from scipy import stats
+
+from scipy._lib._array_api import xp_assert_close, xp_assert_equal
+from scipy.stats._stats_py import _xp_mean, _xp_var
+
+marray = pytest.importorskip('marray')
+skip_backend = pytest.mark.skip_xp_backends
+
+
+def get_arrays(n_arrays, *, dtype='float64', xp=np, shape=(7, 8), seed=84912165484321):
+    mxp = marray._get_namespace(xp)
+    rng = np.random.default_rng(seed)
+
+    datas, masks = [], []
+    for i in range(n_arrays):
+        data = rng.random(size=shape)
+        if dtype.startswith('complex'):
+            data = 10*data * 10j*rng.standard_normal(size=shape)
+        data = data.astype(dtype)
+        datas.append(data)
+        mask = rng.random(size=shape) > 0.75
+        masks.append(mask)
+
+    marrays = []
+    nan_arrays = []
+    for array, mask in zip(datas, masks):
+        marrays.append(mxp.asarray(array, mask=mask))
+        nan_array = array.copy()
+        nan_array[mask] = xp.nan
+        nan_arrays.append(nan_array)
+
+    return mxp, marrays, nan_arrays
+
+
+@skip_backend('dask.array', reason='Arrays need `device` attribute: dask/dask#11711')
+@skip_backend('jax.numpy', reason="JAX doesn't allow item assignment.")
+@pytest.mark.parametrize('fun, kwargs', [(stats.gmean, {}),
+                                         (stats.hmean, {}),
+                                         (stats.pmean, {'p': 2})])
+@pytest.mark.parametrize('axis', [0, 1])
+def test_xmean(fun, kwargs, axis, xp):
+    mxp, marrays, narrays = get_arrays(2, xp=xp)
+    res = fun(marrays[0], weights=marrays[1], axis=axis, **kwargs)
+    ref = fun(narrays[0], weights=narrays[1], nan_policy='omit', axis=axis, **kwargs)
+    xp_assert_close(res.data, xp.asarray(ref))
+
+
+@skip_backend('dask.array', reason='Arrays need `device` attribute: dask/dask#11711')
+@skip_backend('jax.numpy', reason="JAX doesn't allow item assignment.")
+@pytest.mark.parametrize('axis', [0, 1, None])
+@pytest.mark.parametrize('keepdims', [False, True])
+def test_xp_mean(axis, keepdims, xp):
+    mxp, marrays, narrays = get_arrays(2, xp=xp)
+    kwargs = dict(axis=axis, keepdims=keepdims)
+    res = _xp_mean(marrays[0], weights=marrays[1], **kwargs)
+    ref = _xp_mean(narrays[0], weights=narrays[1], nan_policy='omit', **kwargs)
+    xp_assert_close(res.data, xp.asarray(ref))
+
+
+@skip_backend('dask.array', reason='Arrays need `device` attribute: dask/dask#11711')
+@skip_backend('jax.numpy', reason="JAX doesn't allow item assignment.")
+@skip_backend('torch', reason="array-api-compat#242")
+@pytest.mark.parametrize('fun, kwargs',
+    [(stats.moment, {'order': 2}),
+     (stats.skew, {}),
+     (stats.skew, {'bias': False}),
+     (stats.kurtosis, {}),
+     (stats.kurtosis, {'bias': False}),
+     (stats.sem, {}),
+     (stats.kstat, {'n': 1}),
+     (stats.kstat, {'n': 2}),
+     (stats.kstat, {'n': 3}),
+     (stats.kstat, {'n': 4}),
+     (stats.kstatvar, {'n': 1}),
+     (stats.kstatvar, {'n': 2}),
+     (stats.circmean, {}),
+     (stats.circvar, {}),
+     (stats.circstd, {}),
+     (_xp_var, {}),
+     (stats.tmean, {'limits': (0.1, 0.9)}),
+     (stats.tvar, {'limits': (0.1, 0.9)}),
+     (stats.tmin, {'lowerlimit': 0.5}),
+     (stats.tmax, {'upperlimit': 0.5}),
+     (stats.tstd, {'limits': (0.1, 0.9)}),
+     (stats.tsem, {'limits': (0.1, 0.9)}),
+     ])
+@pytest.mark.parametrize('axis', [0, 1, None])
+def test_several(fun, kwargs, axis, xp):
+    mxp, marrays, narrays = get_arrays(1, xp=xp)
+    kwargs = dict(axis=axis) | kwargs
+    res = fun(marrays[0], **kwargs)
+    ref = fun(narrays[0], nan_policy='omit', **kwargs)
+    xp_assert_close(res.data, xp.asarray(ref))
+
+
+@skip_backend('dask.array', reason='Arrays need `device` attribute: dask/dask#11711')
+@skip_backend('jax.numpy', reason="JAX doesn't allow item assignment.")
+@skip_backend('torch', reason="array-api-compat#242")
+@pytest.mark.parametrize('axis', [0, 1])
+@pytest.mark.parametrize('kwargs', [{}])
+def test_describe(axis, kwargs, xp):
+    mxp, marrays, narrays = get_arrays(1, xp=xp)
+    kwargs = dict(axis=axis) | kwargs
+    res = stats.describe(marrays[0], **kwargs)
+    ref = stats.describe(narrays[0], nan_policy='omit', **kwargs)
+    xp_assert_close(res.nobs.data, xp.asarray(ref.nobs))
+    xp_assert_close(res.minmax[0].data, xp.asarray(ref.minmax[0].data))
+    xp_assert_close(res.minmax[1].data, xp.asarray(ref.minmax[1].data))
+    xp_assert_close(res.variance.data, xp.asarray(ref.variance.data))
+    xp_assert_close(res.skewness.data, xp.asarray(ref.skewness.data))
+    xp_assert_close(res.kurtosis.data, xp.asarray(ref.kurtosis.data))
+
+
+@skip_backend('dask.array', reason='Arrays need `device` attribute: dask/dask#11711')
+@skip_backend('jax.numpy', reason="JAX doesn't allow item assignment.")
+@skip_backend('torch', reason="array-api-compat#242")
+@pytest.mark.parametrize('fun', [stats.zscore, stats.gzscore, stats.zmap])
+@pytest.mark.parametrize('axis', [0, 1, None])
+def test_zscore(fun, axis, xp):
+    mxp, marrays, narrays = (get_arrays(2, xp=xp) if fun == stats.zmap
+                             else get_arrays(1, xp=xp))
+    res = fun(*marrays, axis=axis)
+    ref = xp.asarray(fun(*narrays, nan_policy='omit', axis=axis))
+    xp_assert_close(res.data[~res.mask], ref[~xp.isnan(ref)])
+    xp_assert_equal(res.mask, marrays[0].mask)

From 3daa871a213c3a5fa118d34a8cf4f31632b8b6ce Mon Sep 17 00:00:00 2001
From: Matt Haberland <mhaberla@calpoly.edu>
Date: Fri, 7 Feb 2025 17:51:42 -0800
Subject: [PATCH 63/63] ENH: stats: add marray support to most remaining array
 API functions

---
 scipy/_lib/_array_api.py                      |   3 +-
 scipy/_lib/_util.py                           |   5 +-
 .../special/_support_alternative_backends.py  |  13 +-
 scipy/stats/_stats_py.py                      | 131 ++++++++--------
 scipy/stats/tests/test_axis_nan_policy.py     |   4 +-
 scipy/stats/tests/test_marray.py              | 147 ++++++++++++++++++
 scipy/stats/tests/test_stats.py               |  88 +++++------
 7 files changed, 268 insertions(+), 123 deletions(-)

diff --git a/scipy/_lib/_array_api.py b/scipy/_lib/_array_api.py
index d8397da6b22a..f9ac49bb2e04 100644
--- a/scipy/_lib/_array_api.py
+++ b/scipy/_lib/_array_api.py
@@ -532,7 +532,8 @@ def xp_vector_norm(x: Array, /, *,
                 )
             # return (x @ x)**0.5
             # or to get the right behavior with nd, complex arrays
-            return xp.sum(xp.conj(x) * x, axis=axis, keepdims=keepdims)**0.5
+            x_conj = xp.conj(x) if xp.isdtype(x.dtype, 'complex floating') else x
+            return xp.sum(x_conj * x, axis=axis, keepdims=keepdims)**0.5
     else:
         # to maintain backwards compatibility
         return np.linalg.norm(x, ord=ord, axis=axis, keepdims=keepdims)
diff --git a/scipy/_lib/_util.py b/scipy/_lib/_util.py
index 2d7bfa839801..96367dab3f3f 100644
--- a/scipy/_lib/_util.py
+++ b/scipy/_lib/_util.py
@@ -1092,7 +1092,10 @@ def _get_nan(*data, xp=None):
     except DTypePromotionError:
         # fallback to float64
         dtype = xp.float64
-    return xp.asarray(xp.nan, dtype=dtype)[()]
+    res = xp.asarray(xp.nan, dtype=dtype)[()]
+    # whenever mdhaber/marray#89 is resolved, could just return `res`
+    # whenever PAAPIS 2024.12 is released, no need to return an array
+    return res.data if hasattr(res, 'mask') else res
 
 
 def normalize_axis_index(axis, ndim):
diff --git a/scipy/special/_support_alternative_backends.py b/scipy/special/_support_alternative_backends.py
index fb92e7e95707..46ff427e3bca 100644
--- a/scipy/special/_support_alternative_backends.py
+++ b/scipy/special/_support_alternative_backends.py
@@ -39,9 +39,16 @@ def get_array_special_func(f_name, xp, n_array_args):
     def __f(*args, _f=_f, _xp=xp, **kwargs):
         array_args = args[:n_array_args]
         other_args = args[n_array_args:]
-        array_args = [np.asarray(arg) for arg in array_args]
-        out = _f(*array_args, *other_args, **kwargs)
-        return _xp.asarray(out)
+        if hasattr(array_args[0], 'mask') and not isinstance(array_args[0], np.ndarray):
+            data_args = [np.asarray(arg.data) for arg in array_args]
+            out = _f(*data_args, *other_args, **kwargs)
+            mask = functools.reduce(lambda x, y: x | y,
+                                    (arg.mask for arg in array_args))
+            return _xp.asarray(out, mask=mask)
+        else:
+            array_args = [np.asarray(arg) for arg in array_args]
+            out = _f(*array_args, *other_args, **kwargs)
+            return _xp.asarray(out)
 
     return __f
 
diff --git a/scipy/stats/_stats_py.py b/scipy/stats/_stats_py.py
index cf9c4f3d4a68..02e9409265df 100644
--- a/scipy/stats/_stats_py.py
+++ b/scipy/stats/_stats_py.py
@@ -28,6 +28,7 @@
 """
 import warnings
 import math
+import functools
 from math import gcd
 from collections import namedtuple
 from collections.abc import Sequence
@@ -39,7 +40,7 @@
 from scipy.spatial import distance_matrix
 
 from scipy.optimize import milp, LinearConstraint
-from scipy._lib._array_api import is_lazy_array
+from scipy._lib._array_api import is_lazy_array, xp_ravel
 from scipy._lib._util import (check_random_state, _get_nan,
                               _rename_parameter, _contains_nan,
                               AxisError, _lazywhere)
@@ -1240,6 +1241,13 @@ def _length_nonmasked(x, axis, keepdims=False, xp=None):
             int(np.prod(np.asarray(x.shape)[np.asarray(axis)])))
 
 
+def _share_masks(*args, xp):
+    if hasattr(args[0], 'mask') and not isinstance(args[0], np.ndarray):
+        mask = functools.reduce(lambda x, y: x | y, (arg.mask for arg in args))
+        args = [xp.asarray(arg.data, mask=mask) for arg in args]
+    return args[0] if len(args) == 1 else args
+
+
 @_axis_nan_policy_factory(
     lambda x: x, result_to_tuple=lambda x: (x,), n_outputs=1
 )
@@ -1330,7 +1338,7 @@ def skew(a, axis=0, bias=True, nan_policy='propagate'):
     with np.errstate(all='ignore'):
         eps = xp.finfo(m2.dtype).eps
         zero = m2 <= (eps * mean_reduced)**2
-        vals = xp.where(zero, xp.asarray(xp.nan), m3 / m2**1.5)
+        vals = xp.where(zero, xp.asarray(xp.nan, dtype=m2.dtype), m3 / m2**1.5)
     if not bias:
         can_correct = ~zero & (n > 2)
         if is_lazy_array(can_correct) or xp.any(can_correct):
@@ -1659,22 +1667,24 @@ def skewtest(a, axis=0, nan_policy='propagate', alternative='two-sided'):
     a, axis = _chk_asarray(a, axis, xp=xp)
 
     b2 = skew(a, axis, _no_deco=True)
-    n = a.shape[axis]
-    if n < 8:
-        message = ("`skewtest` requires at least 8 observations; "
-                   f"only {n=} observations were given.")
-        raise ValueError(message)
 
-    y = b2 * math.sqrt(((n + 1) * (n + 3)) / (6.0 * (n - 2)))
-    beta2 = (3.0 * (n**2 + 27*n - 70) * (n+1) * (n+3) /
-             ((n-2.0) * (n+5) * (n+7) * (n+9)))
-    W2 = -1 + math.sqrt(2 * (beta2 - 1))
-    delta = 1 / math.sqrt(0.5 * math.log(W2))
-    alpha = math.sqrt(2.0 / (W2 - 1))
-    y = xp.where(y == 0, xp.asarray(1, dtype=y.dtype), y)
-    Z = delta * xp.log(y / alpha + xp.sqrt((y / alpha)**2 + 1))
+    n = xp.asarray(_length_nonmasked(a, axis), dtype=b2.dtype)
+    n = xpx.at(n, n < 8).set(xp.nan)
+    if xp.any(xp.isnan(n)):
+        message = ("`skewtest` requires at least 8 valid observations;"
+                   "slices with fewer observations will produce NaNs.")
+        warnings.warn(message, SmallSampleWarning, stacklevel=2)
 
-    pvalue = _get_pvalue(Z, _SimpleNormal(), alternative, xp=xp)
+    with np.errstate(divide='ignore', invalid='ignore'):
+        y = b2 * xp.sqrt(((n + 1) * (n + 3)) / (6.0 * (n - 2)))
+        beta2 = (3.0 * (n**2 + 27*n - 70) * (n+1) * (n+3) /
+                 ((n-2.0) * (n+5) * (n+7) * (n+9)))
+        W2 = -1 + xp.sqrt(2 * (beta2 - 1))
+        delta = 1 / xp.sqrt(0.5 * xp.log(W2))
+        alpha = xp.sqrt(2.0 / (W2 - 1))
+        y = xp.where(y == 0, xp.asarray(1, dtype=y.dtype), y)
+        Z = delta * xp.log(y / alpha + xp.sqrt((y / alpha)**2 + 1))
+        pvalue = _get_pvalue(Z, _SimpleNormal(), alternative, xp=xp)
 
     Z = Z[()] if Z.ndim == 0 else Z
     pvalue = pvalue[()] if pvalue.ndim == 0 else pvalue
@@ -1760,18 +1770,15 @@ def kurtosistest(a, axis=0, nan_policy='propagate', alternative='two-sided'):
     xp = array_namespace(a)
     a, axis = _chk_asarray(a, axis, xp=xp)
 
-    n = a.shape[axis]
-
-    if n < 5:
-        message = ("`kurtosistest` requires at least 5 observations; "
-                   f"only {n=} observations were given.")
-        raise ValueError(message)
-    if n < 20:
-        message = ("`kurtosistest` p-value may be inaccurate with fewer than 20 "
-                   f"observations; only {n=} observations were given.")
-        warnings.warn(message, stacklevel=2)
     b2 = kurtosis(a, axis, fisher=False, _no_deco=True)
 
+    n = xp.asarray(_length_nonmasked(a, axis), dtype=b2.dtype)
+    n = xpx.at(n, n < 5).set(xp.nan)
+    if xp.any(xp.isnan(n)):
+        message = ("`kurtosistest` requires at least 5 valid observations; "
+                   "slices with fewer observations will produce NaNs.")
+        warnings.warn(message, SmallSampleWarning, stacklevel=2)
+
     E = 3.0*(n-1) / (n+1)
     varb2 = 24.0*n*(n-2)*(n-3) / ((n+1)*(n+1.)*(n+3)*(n+5))  # [1]_ Eq. 1
     x = (b2-E) / varb2**0.5  # [1]_ Eq. 4
@@ -1791,7 +1798,6 @@ def kurtosistest(a, axis=0, nan_policy='propagate', alternative='two-sided'):
         warnings.warn(msg, RuntimeWarning, stacklevel=2)
 
     Z = (term1 - term2) / (2/(9.0*A))**0.5  # [1]_ Eq. 5
-
     pvalue = _get_pvalue(Z, _SimpleNormal(), alternative, xp=xp)
 
     Z = Z[()] if Z.ndim == 0 else Z
@@ -1870,7 +1876,7 @@ def normaltest(a, axis=0, nan_policy='propagate'):
     k, _ = kurtosistest(a, axis, _no_deco=True)
     statistic = s*s + k*k
 
-    chi2 = _SimpleChi2(xp.asarray(2.))
+    chi2 = _SimpleChi2(xp.asarray(2., dtype=statistic.dtype))
     pvalue = _get_pvalue(statistic, chi2, alternative='greater', symmetric=False, xp=xp)
 
     statistic = statistic[()] if statistic.ndim == 0 else statistic
@@ -1938,22 +1944,17 @@ def jarque_bera(x, *, axis=None):
     For a more detailed example, see :ref:`hypothesis_jarque_bera`.
     """
     xp = array_namespace(x)
-    x = xp.asarray(x)
-    if axis is None:
-        x = xp.reshape(x, (-1,))
-        axis = 0
-
-    n = x.shape[axis]
-    if n == 0:
-        raise ValueError('At least one observation is required.')
+    x, axis = _chk_asarray(x, axis, xp=xp)
 
-    mu = xp.mean(x, axis=axis, keepdims=True)
+    mu = _xp_mean(x, axis=axis, keepdims=True)
     diffx = x - mu
     s = skew(diffx, axis=axis, _no_deco=True)
     k = kurtosis(diffx, axis=axis, _no_deco=True)
+
+    n = xp.asarray(_length_nonmasked(x, axis), dtype=mu.dtype)
     statistic = n / 6 * (s**2 + k**2 / 4)
 
-    chi2 = _SimpleChi2(xp.asarray(2.))
+    chi2 = _SimpleChi2(xp.asarray(2., dtype=mu.dtype))
     pvalue = _get_pvalue(statistic, chi2, alternative='greater', symmetric=False, xp=xp)
 
     statistic = statistic[()] if statistic.ndim == 0 else statistic
@@ -6102,10 +6103,10 @@ def ttest_1samp(a, popmean, axis=0, nan_policy="propagate", alternative="two-sid
     xp = array_namespace(a)
     a, axis = _chk_asarray(a, axis, xp=xp)
 
-    n = a.shape[axis]
+    n = _length_nonmasked(a, axis)
     df = n - 1
 
-    if n == 0:
+    if a.shape[axis] == 0:
         # This is really only needed for *testing* _axis_nan_policy decorator
         # It won't happen when the decorator is used.
         NaN = _get_nan(a)
@@ -6161,7 +6162,7 @@ def _t_confidence_interval(df, t, confidence_level, alternative, dtype=None, xp=
         low, high = xp.broadcast_arrays(special.stdtrit(df, p), inf)
     elif alternative == 0:  # 'two-sided'
         tail_probability = (1 - confidence_level)/2
-        p = xp.asarray([tail_probability, 1-tail_probability])
+        p = xp.stack((tail_probability, 1-tail_probability))
         # axis of p must be the zeroth and orthogonal to all the rest
         p = xp.reshape(p, tuple([2] + [1]*xp.asarray(df).ndim))
         ci = special.stdtrit(df, p)
@@ -6223,6 +6224,7 @@ def _equal_var_ttest_denom(v1, n1, v2, n2, xp=None):
     df = n1 + n2 - 2.0
     svar = ((n1 - 1) * v1 + (n2 - 1) * v2) / df
     denom = xp.sqrt(svar * (1.0 / n1 + 1.0 / n2))
+    df = xp.asarray(df, dtype=denom.dtype)
     return df, denom
 
 
@@ -6658,6 +6660,9 @@ def ttest_ind(a, b, *, axis=0, equal_var=True, nan_policy='propagate',
     if xp.isdtype(b.dtype, 'integral'):
         b = xp.astype(b, default_float)
 
+    if axis is None:
+        a, b, axis = xp_ravel(a), xp_ravel(b), 0
+
     if not (0 <= trim < .5):
         raise ValueError("Trimming percentage should be 0 <= `trim` < .5.")
 
@@ -6700,8 +6705,8 @@ def ttest_ind(a, b, *, axis=0, equal_var=True, nan_policy='propagate',
         return TtestResult(t, prob, df=df, alternative=alternative_nums[alternative],
                            standard_error=denom, estimate=estimate)
 
-    n1 = xp.asarray(a.shape[axis], dtype=a.dtype)
-    n2 = xp.asarray(b.shape[axis], dtype=b.dtype)
+    n1 = _length_nonmasked(a, axis)
+    n2 = _length_nonmasked(b, axis)
 
     if trim == 0:
         with np.errstate(divide='ignore', invalid='ignore'):
@@ -7063,7 +7068,7 @@ def ttest_rel(a, b, axis=0, nan_policy='propagate', alternative="two-sided"):
     TtestResult(statistic=-5.879467544540889, pvalue=7.540777129099917e-09, df=499)
 
     """
-    return ttest_1samp(a - b, popmean=0, axis=axis, alternative=alternative,
+    return ttest_1samp(a - b, popmean=0., axis=axis, alternative=alternative,
                        _no_deco=True)
 
 
@@ -7270,6 +7275,7 @@ def _power_divergence(f_obs, f_exp, ddof, axis, lambda_, sum_check=True):
         bshape = _broadcast_shapes((f_obs_float.shape, f_exp.shape))
         f_obs_float = xp.broadcast_to(f_obs_float, bshape)
         f_exp = xp.broadcast_to(f_exp, bshape)
+        f_obs_float, f_exp = _share_masks(f_obs_float, f_exp, xp=xp)
 
         if sum_check:
             dtype_res = xp.result_type(f_obs.dtype, f_exp.dtype)
@@ -7313,8 +7319,7 @@ def _power_divergence(f_obs, f_exp, ddof, axis, lambda_, sum_check=True):
 
     stat = xp.sum(terms, axis=axis)
 
-    num_obs = xp_size(terms) if axis is None else terms.shape[axis]
-    ddof = xp.asarray(ddof)
+    num_obs = _length_nonmasked(terms, axis)
 
     df = xp.asarray(num_obs - 1 - ddof)
     chi2 = _SimpleChi2(df)
@@ -8931,46 +8936,45 @@ def combine_pvalues(pvalues, method='fisher', weights=None, *, axis=0):
     .. [8] https://en.wikipedia.org/wiki/Extensions_of_Fisher%27s_method
 
     """
-    xp = array_namespace(pvalues)
-    pvalues = xp.asarray(pvalues)
+    xp = array_namespace(pvalues, weights)
+    pvalues, weights = xp_broadcast_promote(pvalues, weights,
+                                            force_floating=True, xp=xp)
+
     if xp_size(pvalues) == 0:
         # This is really only needed for *testing* _axis_nan_policy decorator
         # It won't happen when the decorator is used.
         NaN = _get_nan(pvalues)
         return SignificanceResult(NaN, NaN)
 
-    n = pvalues.shape[axis]
-    # used to convert Python scalar to the right dtype
-    one = xp.asarray(1, dtype=pvalues.dtype)
+    n = _length_nonmasked(pvalues, axis)
+    n = xp.asarray(n, dtype=pvalues.dtype)
 
     if method == 'fisher':
         statistic = -2 * xp.sum(xp.log(pvalues), axis=axis)
-        chi2 = _SimpleChi2(2*n*one)
+        chi2 = _SimpleChi2(2*n)
         pval = _get_pvalue(statistic, chi2, alternative='greater',
                            symmetric=False, xp=xp)
     elif method == 'pearson':
         statistic = 2 * xp.sum(xp.log1p(-pvalues), axis=axis)
-        chi2 = _SimpleChi2(2*n*one)
+        chi2 = _SimpleChi2(2*n)
         pval = _get_pvalue(-statistic, chi2, alternative='less', symmetric=False, xp=xp)
     elif method == 'mudholkar_george':
-        normalizing_factor = math.sqrt(3/n)/xp.pi
+        normalizing_factor = xp.sqrt(3/n)/xp.pi
         statistic = (-xp.sum(xp.log(pvalues), axis=axis)
                      + xp.sum(xp.log1p(-pvalues), axis=axis))
-        nu = 5*n  + 4
-        approx_factor = math.sqrt(nu / (nu - 2))
-        t = _SimpleStudentT(nu*one)
+        nu = 5*n + 4
+        approx_factor = xp.sqrt(nu / (nu - 2))
+        t = _SimpleStudentT(nu)
         pval = _get_pvalue(statistic * normalizing_factor * approx_factor, t,
                            alternative="greater", xp=xp)
     elif method == 'tippett':
         statistic = xp.min(pvalues, axis=axis)
-        beta = _SimpleBeta(one, n*one)
+        beta = _SimpleBeta(xp.ones_like(n), n)
         pval = _get_pvalue(statistic, beta, alternative='less', symmetric=False, xp=xp)
     elif method == 'stouffer':
         if weights is None:
             weights = xp.ones_like(pvalues, dtype=pvalues.dtype)
-        elif weights.shape[axis] != n:
-            raise ValueError("pvalues and weights must be of the same "
-                             "length along `axis`.")
+        pvalues, weights = _share_masks(pvalues, weights, xp=xp)
 
         norm = _SimpleNormal()
         Zi = norm.isf(pvalues)
@@ -10798,6 +10802,8 @@ def _xp_mean(x, /, *, axis=None, weights=None, keepdims=False, nan_policy='propa
         return gmean(x, weights=weights, axis=axis, keepdims=keepdims)
 
     x, weights = xp_broadcast_promote(x, weights, force_floating=True)
+    if weights is not None:
+        x, weights = _share_masks(x, weights, xp=xp)
 
     # handle the special case of zero-sized arrays
     message = (too_small_1d_not_omit if (x.ndim == 1 or axis is None)
@@ -10835,8 +10841,7 @@ def _xp_mean(x, /, *, axis=None, weights=None, keepdims=False, nan_policy='propa
     if weights is None:
         return xp.mean(x, axis=axis, keepdims=keepdims)
 
-    # ones_like ensures that the mask of `x` is considered
-    norm = xp.sum(xp.ones_like(x) * weights, axis=axis)
+    norm = xp.sum(weights, axis=axis)
     wsum = xp.sum(x * weights, axis=axis)
     with np.errstate(divide='ignore', invalid='ignore'):
         res = wsum/norm
diff --git a/scipy/stats/tests/test_axis_nan_policy.py b/scipy/stats/tests/test_axis_nan_policy.py
index be8c04d821e1..44e402ce2f85 100644
--- a/scipy/stats/tests/test_axis_nan_policy.py
+++ b/scipy/stats/tests/test_axis_nan_policy.py
@@ -443,7 +443,7 @@ def unpacker(res):
                                     n_outputs=n_outputs,
                                     nan_policy=nan_policy,
                                     paired=paired, _no_deco=True, **kwds)
-        except (ValueError, RuntimeWarning, ZeroDivisionError) as ea:
+        except (ValueError, RuntimeWarning, ZeroDivisionError, UserWarning) as ea:
             ea_str = str(ea)
             if any([str(ea_str).startswith(msg) for msg in too_small_messages]):
                 res_1da = np.full(n_outputs, np.nan)
@@ -554,7 +554,7 @@ def unpacker(res):
             res1da = nan_policy_1d(hypotest, data_raveled, unpacker, *args,
                                    n_outputs=n_outputs, nan_policy=nan_policy,
                                    paired=paired, _no_deco=True, **kwds)
-        except (RuntimeWarning, ValueError, ZeroDivisionError) as ea:
+        except (RuntimeWarning, ValueError, ZeroDivisionError, UserWarning) as ea:
             res1da = None
             ea_str = str(ea)
 
diff --git a/scipy/stats/tests/test_marray.py b/scipy/stats/tests/test_marray.py
index 0bc99bcd95f9..df6644ab1ff2 100644
--- a/scipy/stats/tests/test_marray.py
+++ b/scipy/stats/tests/test_marray.py
@@ -4,6 +4,8 @@
 
 from scipy._lib._array_api import xp_assert_close, xp_assert_equal
 from scipy.stats._stats_py import _xp_mean, _xp_var
+from scipy.stats._axis_nan_policy import _axis_nan_policy_factory
+
 
 marray = pytest.importorskip('marray')
 skip_backend = pytest.mark.skip_xp_backends
@@ -125,3 +127,148 @@ def test_zscore(fun, axis, xp):
     ref = xp.asarray(fun(*narrays, nan_policy='omit', axis=axis))
     xp_assert_close(res.data[~res.mask], ref[~xp.isnan(ref)])
     xp_assert_equal(res.mask, marrays[0].mask)
+
+
+@skip_backend('dask.array', reason='Arrays need `device` attribute: dask/dask#11711')
+@skip_backend('jax.numpy', reason="JAX doesn't allow item assignment.")
+@skip_backend('torch', reason="array-api-compat#242")
+@skip_backend('cupy', reason="special functions won't work")
+@pytest.mark.parametrize('f_name', ['ttest_1samp', 'ttest_rel', 'ttest_ind'])
+@pytest.mark.parametrize('axis', [0, 1, None])
+def test_ttest(f_name, axis, xp):
+    f = getattr(stats, f_name)
+    mxp, marrays, narrays = get_arrays(2, xp=xp)
+    if f_name == 'ttest_1samp':
+        marrays[1] = mxp.mean(marrays[1], axis=axis, keepdims=axis is not None)
+        narrays[1] = np.nanmean(narrays[1], axis=axis, keepdims=axis is not None)
+    res = f(*marrays, axis=axis)
+    ref = f(*narrays, nan_policy='omit', axis=axis)
+    xp_assert_close(res.statistic.data, xp.asarray(ref.statistic))
+    xp_assert_close(res.pvalue.data, xp.asarray(ref.pvalue))
+    res_ci = res.confidence_interval()
+    ref_ci = ref.confidence_interval()
+    xp_assert_close(res_ci.low.data, xp.asarray(ref_ci.low))
+    xp_assert_close(res_ci.high.data, xp.asarray(ref_ci.high))
+
+
+@skip_backend('dask.array', reason='Arrays need `device` attribute: dask/dask#11711')
+@skip_backend('jax.numpy', reason="JAX doesn't allow item assignment.")
+@skip_backend('torch', reason="array-api-compat#242")
+@skip_backend('cupy', reason="special functions won't work")
+@pytest.mark.filterwarnings("ignore::scipy.stats._axis_nan_policy.SmallSampleWarning")
+@pytest.mark.parametrize('f_name', ['skewtest', 'kurtosistest',
+                                    'normaltest', 'jarque_bera'])
+@pytest.mark.parametrize('axis', [0, 1, None])
+def test_normality_tests(f_name, axis, xp):
+    f = getattr(stats, f_name)
+    mxp, marrays, narrays = get_arrays(1, xp=xp, shape=(10, 11))
+
+    res = f(*marrays, axis=axis)
+    ref = f(*narrays, nan_policy='omit', axis=axis)
+
+    xp_assert_close(res.statistic.data, xp.asarray(ref.statistic))
+    xp_assert_close(res.pvalue.data, xp.asarray(ref.pvalue))
+
+
+def pd_nsamples(kwargs):
+    return 2 if kwargs.get('f_exp', None) is not None else 1
+
+
+@_axis_nan_policy_factory(lambda *args: tuple(args), paired=True, n_samples=pd_nsamples)
+def power_divergence_ref(f_obs, f_exp=None, *,  ddof, lambda_, axis=0):
+    return stats.power_divergence(f_obs, f_exp, axis=axis, ddof=ddof, lambda_=lambda_)
+
+
+@skip_backend('dask.array', reason='Arrays need `device` attribute: dask/dask#11711')
+@skip_backend('jax.numpy', reason="JAX doesn't allow item assignment.")
+@skip_backend('torch', reason="array-api-compat#242")
+@skip_backend('cupy', reason="special functions won't work")
+@pytest.mark.parametrize('lambda_', ['pearson', 'log-likelihood', 'freeman-tukey',
+                                     'mod-log-likelihood', 'neyman', 'cressie-read',
+                                     'chisquare'])
+@pytest.mark.parametrize('ddof', [0, 1])
+@pytest.mark.parametrize('axis', [0, 1, None])
+def test_power_divergence_chisquare(lambda_, ddof, axis, xp):
+    mxp, marrays, narrays = get_arrays(2, xp=xp, shape=(5, 6))
+
+    kwargs = dict(axis=axis, ddof=ddof)
+    if lambda_ == 'chisquare':
+        lambda_ = "pearson"
+        def f(*args, **kwargs):
+            return stats.chisquare(*args, **kwargs)
+    else:
+        def f(*args, **kwargs):
+            return stats.power_divergence(*args, lambda_=lambda_, **kwargs)
+
+    # test 1-arg
+    res = f(marrays[0], **kwargs)
+    ref = power_divergence_ref(narrays[0], nan_policy='omit', lambda_=lambda_, **kwargs)
+
+    xp_assert_close(res.statistic.data, xp.asarray(ref[0]))
+    xp_assert_close(res.pvalue.data, xp.asarray(ref[1]))
+
+    # test 2-arg
+    common_mask = np.isnan(narrays[0]) | np.isnan(narrays[1])
+    normalize = (np.nansum(narrays[1] * ~common_mask, axis=axis, keepdims=True)
+                 / np.nansum(narrays[0] * ~common_mask, axis=axis, keepdims=True))
+    marrays[0] *= xp.asarray(normalize)
+    narrays[0] *= normalize
+
+    res = f(*marrays, **kwargs)
+    ref = power_divergence_ref(*narrays, nan_policy='omit', lambda_=lambda_, **kwargs)
+
+    xp_assert_close(res.statistic.data, xp.asarray(ref[0]))
+    xp_assert_close(res.pvalue.data, xp.asarray(ref[1]))
+
+
+@skip_backend('dask.array', reason='Arrays need `device` attribute: dask/dask#11711')
+@skip_backend('jax.numpy', reason="JAX doesn't allow item assignment.")
+@skip_backend('torch', reason="array-api-compat#242")
+@skip_backend('cupy', reason="special functions won't work")
+@pytest.mark.parametrize('method', ['fisher', 'pearson', 'mudholkar_george',
+                                    'tippett', 'stouffer'])
+@pytest.mark.parametrize('axis', [0, 1, None])
+def test_combine_pvalues(method, axis, xp):
+    mxp, marrays, narrays = get_arrays(2, xp=xp, shape=(10, 11))
+
+    kwargs = dict(method=method, axis=axis)
+    res = stats.combine_pvalues(marrays[0], **kwargs)
+    ref = stats.combine_pvalues(narrays[0], nan_policy='omit', **kwargs)
+
+    xp_assert_close(res.statistic.data, xp.asarray(ref.statistic))
+    xp_assert_close(res.pvalue.data, xp.asarray(ref.pvalue))
+
+    if method != 'stouffer':
+        return
+
+    res = stats.combine_pvalues(marrays[0], weights=marrays[1], **kwargs)
+    ref = stats.combine_pvalues(narrays[0], weights=narrays[1],
+                                nan_policy='omit', **kwargs)
+
+    xp_assert_close(res.statistic.data, xp.asarray(ref.statistic))
+    xp_assert_close(res.pvalue.data, xp.asarray(ref.pvalue))
+
+
+@skip_backend('dask.array', reason='Arrays need `device` attribute: dask/dask#11711')
+@skip_backend('jax.numpy', reason="JAX doesn't allow item assignment.")
+@skip_backend('torch', reason="array-api-compat#242")
+@skip_backend('cupy', reason="special functions won't work")
+def test_ttest_ind_from_stats(xp):
+    shape = (10, 11)
+    mxp, marrays, narrays = get_arrays(6, xp=xp, shape=shape)
+    mask = np.astype(np.sum(np.stack([np.isnan(arg) for arg in narrays]), axis=0), bool)
+    narrays = [arg[~mask] for arg in narrays]
+    marrays[2], marrays[5] = marrays[2] * 100, marrays[5] * 100
+    narrays[2], narrays[5] = narrays[2] * 100, narrays[5] * 100
+
+    res = stats.ttest_ind_from_stats(*marrays)
+    ref = stats.ttest_ind_from_stats(*narrays)
+
+    mask = xp.asarray(mask)
+    assert xp.any(mask) and xp.any(~mask)
+    xp_assert_close(res.statistic.data[~mask], xp.asarray(ref.statistic))
+    xp_assert_close(res.pvalue.data[~mask], xp.asarray(ref.pvalue))
+    xp_assert_close(res.statistic.mask, mask)
+    xp_assert_close(res.pvalue.mask, mask)
+    assert res.statistic.shape == shape
+    assert res.pvalue.shape == shape
diff --git a/scipy/stats/tests/test_stats.py b/scipy/stats/tests/test_stats.py
index 1c20ee39121b..3bac92f5d03b 100644
--- a/scipy/stats/tests/test_stats.py
+++ b/scipy/stats/tests/test_stats.py
@@ -6526,20 +6526,6 @@ def test_describe_empty(self, xp):
 
 
 class NormalityTests:
-    def test_too_small(self, xp):
-        # 1D sample has too few observations -> warning/error
-        test_fun = getattr(stats, self.test_name)
-        x = xp.asarray(4.)
-        if is_numpy(xp):
-            with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit):
-                res = test_fun(x)
-                NaN = xp.asarray(xp.nan)
-                xp_assert_equal(res.statistic, NaN)
-                xp_assert_equal(res.pvalue, NaN)
-        else:
-            message = "...requires at least..."
-            with pytest.raises(ValueError, match=message):
-                test_fun(x)
 
     @skip_xp_backends('cupy', reason='cupy/cupy#8391')
     @pytest.mark.parametrize("alternative", ['two-sided', 'less', 'greater'])
@@ -6585,6 +6571,7 @@ def test_nan(self, xp):
             xp_assert_equal(res.statistic, NaN)
             xp_assert_equal(res.pvalue, NaN)
 
+
 class TestSkewTest(NormalityTests):
     test_name = 'skewtest'
     case_ref = (1.98078826090875881, 0.04761502382843208)  # statistic, pvalue
@@ -6598,20 +6585,18 @@ def test_intuitive(self, xp):
 
     def test_skewtest_too_few_observations(self, xp):
         # Regression test for ticket #1492.
-        # skewtest requires at least 8 observations; 7 should raise a ValueError.
+        # skewtest requires at least 8 observations; 7 should warn and return NaN.
         stats.skewtest(xp.arange(8.0))
 
         x = xp.arange(7.0)
-        if is_numpy(xp):
-            with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit):
-                res = stats.skewtest(x)
-                NaN = xp.asarray(xp.nan)
-                xp_assert_equal(res.statistic, NaN)
-                xp_assert_equal(res.pvalue, NaN)
-        else:
-            message = "`skewtest` requires at least 8 observations"
-            with pytest.raises(ValueError, match=message):
-                stats.skewtest(x)
+
+        message = (too_small_1d_not_omit if is_numpy(xp)
+                   else "`skewtest` requires at least 8 valid observations")
+        with pytest.warns(SmallSampleWarning, match=message):
+            res = stats.skewtest(x)
+            NaN = xp.asarray(xp.nan)
+            xp_assert_equal(res.statistic, NaN)
+            xp_assert_equal(res.pvalue, NaN)
 
 
 class TestKurtosisTest(NormalityTests):
@@ -6637,34 +6622,34 @@ def test_gh9033_regression(self, xp):
 
     @skip_xp_backends('cupy', reason='cupy/cupy#8391')
     def test_kurtosistest_too_few_observations(self, xp):
-        # kurtosistest requires at least 5 observations; 4 should raise a ValueError.
-        # At least 20 are needed to avoid warning
+        # kurtosistest requires at least 5 observations; 4 should warn and return NaN.
         # Regression test for ticket #1425.
-        stats.kurtosistest(xp.arange(20.0))
-
-        message = "`kurtosistest` p-value may be inaccurate..."
-        with pytest.warns(UserWarning, match=message):
-            stats.kurtosistest(xp.arange(5.0))
-        with pytest.warns(UserWarning, match=message):
-            stats.kurtosistest(xp.arange(19.0))
+        stats.kurtosistest(xp.arange(5.0))
 
-        x = xp.arange(4.0)
-        if is_numpy(xp):
-            with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit):
-                res = stats.skewtest(x)
-                NaN = xp.asarray(xp.nan)
-                xp_assert_equal(res.statistic, NaN)
-                xp_assert_equal(res.pvalue, NaN)
-        else:
-            message = "`kurtosistest` requires at least 5 observations"
-            with pytest.raises(ValueError, match=message):
-                stats.kurtosistest(x)
+        message = (too_small_1d_not_omit if is_numpy(xp)
+                   else "`kurtosistest` requires at least 5 valid")
+        with pytest.warns(SmallSampleWarning, match=message):
+            res = stats.kurtosistest(xp.arange(4.))
+            NaN = xp.asarray(xp.nan)
+            xp_assert_equal(res.statistic, NaN)
+            xp_assert_equal(res.pvalue, NaN)
 
 
 class TestNormalTest(NormalityTests):
     test_name = 'normaltest'
     case_ref = (3.92371918158185551, 0.14059672529747502)  # statistic, pvalue
 
+    def test_too_few_observations(self, xp):
+        stats.normaltest(xp.arange(8.))
+
+        # 1D sample has too few observations -> warning / NaN output
+        # specific warning messages tested for `skewtest`/`kurtosistest`
+        with pytest.warns(SmallSampleWarning):
+            res = stats.normaltest(xp.arange(7.))
+            NaN = xp.asarray(xp.nan)
+            xp_assert_equal(res.statistic, NaN)
+            xp_assert_equal(res.pvalue, NaN)
+
 
 class TestRankSums:
 
@@ -6716,18 +6701,15 @@ def test_jarque_bera_array_like(self, xp):
         assert JB1 == JB2 == JB3 == jb_test1.statistic == jb_test2.statistic == jb_test3.statistic  # noqa: E501
         assert p1 == p2 == p3 == jb_test1.pvalue == jb_test2.pvalue == jb_test3.pvalue
 
-    def test_jarque_bera_size(self, xp):
+    @skip_xp_backends('array_api_strict', reason='Noisy; see TestSkew')
+    def test_jarque_bera_too_few_observations(self, xp):
         x = xp.asarray([])
-        if is_numpy(xp):
-            with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit):
-                res = stats.jarque_bera(x)
+
+        with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit):
+            res = stats.jarque_bera(x)
             NaN = xp.asarray(xp.nan)
             xp_assert_equal(res.statistic, NaN)
             xp_assert_equal(res.pvalue, NaN)
-        else:
-            message = "At least one observation is required."
-            with pytest.raises(ValueError, match=message):
-                res = stats.jarque_bera(x)
 
     def test_axis(self, xp):
         rng = np.random.RandomState(seed=122398129)