ott-jax
diff --git a/‎docs/_templates/autosummary/class.rst
+1-1 b/‎docs/_templates/autosummary/class.rst
+1-1
diff --git a/‎docs/conf.py
+2-2 b/‎docs/conf.py
+2-2
diff --git a/‎pyproject.toml
+3 b/‎pyproject.toml
+3
diff --git a/‎src/ott/geometry/costs.py
+3-7 b/‎src/ott/geometry/costs.py
+3-7
diff --git a/‎src/ott/geometry/geodesic.py
+2-2 b/‎src/ott/geometry/geodesic.py
+2-2
diff --git a/‎src/ott/geometry/geometry.py
+9-10 b/‎src/ott/geometry/geometry.py
+9-10
diff --git a/‎src/ott/geometry/graph.py
+5-5 b/‎src/ott/geometry/graph.py
+5-5
diff --git a/‎src/ott/geometry/grid.py
+1-1 b/‎src/ott/geometry/grid.py
+1-1
diff --git a/‎src/ott/geometry/low_rank.py
+2-2 b/‎src/ott/geometry/low_rank.py
+2-2
diff --git a/‎src/ott/geometry/pointcloud.py
+5-5 b/‎src/ott/geometry/pointcloud.py
+5-5
diff --git a/‎src/ott/initializers/linear/initializers.py
+4-4 b/‎src/ott/initializers/linear/initializers.py
+4-4
diff --git a/‎src/ott/initializers/linear/initializers_lr.py
+5-5 b/‎src/ott/initializers/linear/initializers_lr.py
+5-5
diff --git a/‎src/ott/math/matrix_square_root.py
+5-7 b/‎src/ott/math/matrix_square_root.py
+5-7
diff --git a/‎src/ott/math/unbalanced_functions.py
+2-2 b/‎src/ott/math/unbalanced_functions.py
+2-2
@@ -10,7 +10,7 @@
     .. autosummary::
         :toctree: .
     {% for item in methods %}
-    {%- if item not in ['__init__', 'tree_flatten', 'tree_unflatten', 'bind', 'tabulate'] %}
+    {%- if item not in ['__init__', 'tree_flatten', 'tree_unflatten', 'bind', 'tabulate', 'module_paths'] %}
         ~{{ name }}.{{ item }}
     {%- endif %}
     {%- endfor %}
 
@@ -104,8 +104,8 @@
 spelling_warning = True
 spelling_word_list_filename = ["spelling/technical.txt", "spelling/misc.txt"]
 spelling_add_pypi_package_names = True
-# flax misspelled words; `flax.linen.Module.bind` is ignored in `class.rst`
-# because of indentation error that cannot be suppressed
+# flax misspelled words; `flax.linen.Module.{bind,module_paths}` is ignored in
+# the `class.rst` because of indentation error that cannot be suppressed
 spelling_exclude_patterns = [
     "bibliography.rst",
     "**setup.rst",
 
@@ -120,7 +120,10 @@ markers = [
     "fast: Mark tests as fast.",
 ]
 filterwarnings = [
+    "ignore:\\n*.*scipy.sparse array",
     "ignore:jax.random.KeyArray is deprecated:DeprecationWarning",
+    "ignore:.*jax.config:DeprecationWarning",
+    "ignore:jax.core.Shape is deprecated:DeprecationWarning:chex",
 ]
 
 [tool.coverage.run]
 
@@ -284,7 +284,7 @@ def norm(self, x: jnp.ndarray) -> Union[float, jnp.ndarray]:
 
   def pairwise(self, x: jnp.ndarray, y: jnp.ndarray) -> float:
     """Compute minus twice the dot-product between vectors."""
-    return -2. * jnp.vdot(x, y)
+    return -2.0 * jnp.vdot(x, y)
 
   def h(self, z: jnp.ndarray) -> float:  # noqa: D102
     return jnp.sum(z ** 2)
@@ -806,7 +806,6 @@ def covariance_fixpoint_iter(
     min_iterations = kwargs.pop("min_iterations", 1)
     max_iterations = kwargs.pop("max_iterations", 100)
     inner_iterations = kwargs.pop("inner_iterations", 5)
-    dtype = covs.dtype
 
     @functools.partial(jax.vmap, in_axes=[None, 0, 0])
     def scale_covariances(
@@ -838,10 +837,7 @@ def body_fn(
 
     def init_state() -> Tuple[jnp.ndarray, float]:
       cov_init = jnp.eye(self._dimension)
-      diffs = -jnp.ones(
-          (np.ceil(max_iterations / inner_iterations).astype(int),),
-          dtype=dtype
-      )
+      diffs = -jnp.ones(math.ceil(max_iterations / inner_iterations))
       return cov_init, diffs
 
     cov, diffs = fixed_point_loop.fixpoint_iter(
@@ -990,7 +986,7 @@ def pairwise(self, x: jnp.ndarray, y: jnp.ndarray) -> float:
     diff_means = mean_x - mean_y
 
     # Identity matrix of suitable size
-    iden = jnp.eye(self._dimension, dtype=x.dtype)
+    iden = jnp.eye(self._dimension)
 
     # Creates matrices needed in the computation
     tilde_a = 0.5 * gam * (iden - lam * jnp.linalg.inv(cov_x + lam * iden))
 
@@ -56,7 +56,7 @@ def __init__(
       t: float = 1e-3,
       **kwargs: Any
   ):
-    super().__init__(epsilon=1., **kwargs)
+    super().__init__(epsilon=1.0, **kwargs)
     self.scaled_laplacian = scaled_laplacian
     self.eigval = eigval
     self.chebyshev_coeffs = chebyshev_coeffs
@@ -104,7 +104,7 @@ def from_graph(
     if directed:
       G = G + G.T
     if t is None:
-      t = (jnp.sum(G) / jnp.sum(G > 0.0)) ** 2.0
+      t = (jnp.sum(G) / jnp.sum(G > 0.0)) ** 2
 
     degree = jnp.sum(G, axis=1)
     laplacian = jnp.diag(degree) - G
 
@@ -626,7 +626,7 @@ def to_LRCGeometry(
       rank: int = 0,
       tol: float = 1e-2,
       rng: Optional[jax.Array] = None,
-      scale: float = 1.
+      scale: float = 1.0
   ) -> "low_rank.LRCGeometry":
     r"""Factorize the cost matrix using either SVD (full) or :cite:`indyk:19`.
 
@@ -673,8 +673,8 @@ def to_LRCGeometry(
       i_star = jax.random.randint(rng1, shape=(), minval=0, maxval=n)
       j_star = jax.random.randint(rng2, shape=(), minval=0, maxval=m)
 
-      ci_star = self.subset(i_star, None).cost_matrix.ravel() ** 2  # (m,)
-      cj_star = self.subset(None, j_star).cost_matrix.ravel() ** 2  # (n,)
+      ci_star = self.subset([i_star], None).cost_matrix.ravel() ** 2  # (m,)
+      cj_star = self.subset(None, [j_star]).cost_matrix.ravel() ** 2  # (n,)
 
       p_row = cj_star + ci_star[j_star] + jnp.mean(ci_star)  # (n,)
       p_row /= jnp.sum(p_row)
@@ -697,7 +697,7 @@ def to_LRCGeometry(
       _, d, v = jnp.linalg.svd(U.T @ U)  # (k,), (k, k)
       v = v.T / jnp.sqrt(d)[None, :]
 
-      inv_scale = (1. / jnp.sqrt(n_subset))
+      inv_scale = (1.0 / jnp.sqrt(n_subset))
       col_ixs = jax.random.choice(rng5, m, shape=(n_subset,))  # (n_subset,)
 
       # (n, n_subset)
@@ -740,9 +740,9 @@ def subset_fn(
       if arr is None:
         return None
       if src_ixs is not None:
-        arr = arr[jnp.atleast_1d(src_ixs)]
+        arr = arr[src_ixs, ...]
       if tgt_ixs is not None:
-        arr = arr[:, jnp.atleast_1d(tgt_ixs)]
+        arr = arr[:, tgt_ixs]
       return arr  # noqa: RET504
 
     return self._mask_subset_helper(
@@ -757,7 +757,7 @@ def mask(
       self,
       src_mask: Optional[jnp.ndarray],
       tgt_mask: Optional[jnp.ndarray],
-      mask_value: float = 0.,
+      mask_value: float = 0.0,
   ) -> "Geometry":
     """Mask rows or columns of a geometry.
 
@@ -855,7 +855,7 @@ def dtype(self) -> jnp.dtype:
         self._kernel_matrix if self._cost_matrix is None else self._cost_matrix
     ).dtype
 
-  def _masked_geom(self, mask_value: float = 0.) -> "Geometry":
+  def _masked_geom(self, mask_value: float = 0.0) -> "Geometry":
     """Mask geometry based on :attr:`src_mask` and :attr:`tgt_mask`."""
     src_mask, tgt_mask = self.src_mask, self.tgt_mask
     if src_mask is None and tgt_mask is None:
@@ -877,12 +877,11 @@ def _m_normed_ones(self) -> jnp.ndarray:
     return arr / jnp.sum(arr)
 
   @staticmethod
-  def _normalize_mask(mask: Optional[Union[int, jnp.ndarray]],
+  def _normalize_mask(mask: Optional[jnp.ndarray],
                       size: int) -> Optional[jnp.ndarray]:
     """Convert array of indices to a boolean mask."""
     if mask is None:
       return None
-    mask = jnp.atleast_1d(mask)
     if not jnp.issubdtype(mask, (bool, jnp.bool_)):
       mask = jnp.isin(jnp.arange(size), mask)
     assert mask.shape == (size,)
 
@@ -56,7 +56,7 @@ def __init__(
       tol: float = -1.0,
       **kwargs: Any
   ):
-    super().__init__(epsilon=1., **kwargs)
+    super().__init__(epsilon=1.0, **kwargs)
     self.laplacian = laplacian
     self.t = t
     self.n_steps = n_steps
@@ -107,7 +107,7 @@ def from_graph(
       laplacian = inv_sqrt_deg @ laplacian @ inv_sqrt_deg
 
     if t is None:
-      t = (jnp.sum(G) / jnp.sum(G > 0.)) ** 2
+      t = (jnp.sum(G) / jnp.sum(G > 0.0)) ** 2
 
     return cls(laplacian, t=t, **kwargs)
 
@@ -162,7 +162,7 @@ def body_fn(
     # axis we can ignore since the matrix is symmetric
     del eps, axis
 
-    force_scan = self.tol < 0.
+    force_scan = self.tol < 0.0
     fixpoint_fn = (
         fixed_point_loop.fixpoint_iter
         if force_scan else fixed_point_loop.fixpoint_iter_backprop
@@ -204,9 +204,9 @@ def cost_matrix(self) -> jnp.ndarray:  # noqa: D102
   def _scale(self) -> float:
     """Constant used to scale the Laplacian."""
     if self.numerical_scheme == "backward_euler":
-      return self.t / (4. * self.n_steps)
+      return self.t / (4.0 * self.n_steps)
     if self.numerical_scheme == "crank_nicolson":
-      return self.t / (2. * self.n_steps)
+      return self.t / (2.0 * self.n_steps)
     raise NotImplementedError(
         f"Numerical scheme `{self.numerical_scheme}` is not implemented."
     )
 
@@ -320,7 +320,7 @@ def mask(
       self,
       src_mask: Optional[jnp.ndarray],
       tgt_mask: Optional[jnp.ndarray],
-      mask_value: float = 0.,
+      mask_value: float = 0.0,
   ) -> NoReturn:
     """Not implemented."""
     raise NotImplementedError("Masking is not implemented for grids.")
 
@@ -251,7 +251,7 @@ def subset_fn(
         arr: Optional[jnp.ndarray],
         ixs: Optional[jnp.ndarray],
     ) -> jnp.ndarray:
-      return arr if arr is None or ixs is None else arr[jnp.atleast_1d(ixs)]
+      return arr if arr is None or ixs is None else arr[ixs, ...]
 
     return self._mask_subset_helper(
         src_ixs, tgt_ixs, fn=subset_fn, propagate_mask=True, **kwargs
@@ -261,7 +261,7 @@ def mask(  # noqa: D102
       self,
       src_mask: Optional[jnp.ndarray],
       tgt_mask: Optional[jnp.ndarray],
-      mask_value: float = 0.,
+      mask_value: float = 0.0,
   ) -> "LRCGeometry":
 
     def mask_fn(
 
@@ -80,13 +80,13 @@ def __init__(
   def _norm_x(self) -> Union[float, jnp.ndarray]:
     if self._axis_norm == 0:
       return self.cost_fn.norm(self.x)
-    return 0.
+    return 0.0
 
   @property
   def _norm_y(self) -> Union[float, jnp.ndarray]:
     if self._axis_norm == 0:
       return self.cost_fn.norm(self.y)
-    return 0.
+    return 0.0
 
   @property
   def can_LRC(self):  # noqa: D102
@@ -583,7 +583,7 @@ def _cosine_to_sqeucl(self) -> "PointCloud":
     x = x / jnp.linalg.norm(x, axis=-1, keepdims=True)
     y = y / jnp.linalg.norm(y, axis=-1, keepdims=True)
     # TODO(michalk8): find a better way
-    aux_data["scale_cost"] = 2. / self.inv_scale_cost
+    aux_data["scale_cost"] = 2.0 / self.inv_scale_cost
     cost_fn = costs.SqEuclidean()
     return type(self).tree_unflatten(aux_data, [x, y] + args + [cost_fn])
 
@@ -648,7 +648,7 @@ def subset_fn(
         arr: Optional[jnp.ndarray],
         ixs: Optional[jnp.ndarray],
     ) -> jnp.ndarray:
-      return arr if arr is None or ixs is None else arr[jnp.atleast_1d(ixs)]
+      return arr if arr is None or ixs is None else arr[ixs, ...]
 
     return self._mask_subset_helper(
         src_ixs, tgt_ixs, fn=subset_fn, propagate_mask=True, **kwargs
@@ -658,7 +658,7 @@ def mask(  # noqa: D102
       self,
       src_mask: Optional[jnp.ndarray],
       tgt_mask: Optional[jnp.ndarray],
-      mask_value: float = 0.,
+      mask_value: float = 0.0,
   ) -> "PointCloud":
 
     def mask_fn(
 
@@ -105,8 +105,8 @@ def __call__(
     ), f"Expected `g_v` to have shape `{m,}`, found `{b.shape}`."
 
     # cancel dual variables for zero weights
-    a = jnp.where(ot_prob.a > 0., a, -jnp.inf if lse_mode else 0.)
-    b = jnp.where(ot_prob.b > 0., b, -jnp.inf if lse_mode else 0.)
+    a = jnp.where(ot_prob.a > 0.0, a, -jnp.inf if lse_mode else 0.0)
+    b = jnp.where(ot_prob.b > 0.0, b, -jnp.inf if lse_mode else 0.0)
 
     return a, b
 
@@ -339,10 +339,10 @@ def init_dual_a(  # noqa: D102
 
     # subsample
     sub_x = jax.random.choice(
-        key=rng_x, a=x, shape=(self.subsample_n_x,), replace=True, p=a, axis=0
+        rng_x, a=x, shape=(self.subsample_n_x,), replace=True, p=a, axis=0
     )
     sub_y = jax.random.choice(
-        key=rng_y, a=y, shape=(self.subsample_n_y,), replace=True, p=b, axis=0
+        rng_y, a=y, shape=(self.subsample_n_y,), replace=True, p=b, axis=0
     )
 
     # create subsampled point cloud geometry
 
@@ -262,7 +262,7 @@ def init_g(  # noqa: D102
       **kwargs: Any,
   ) -> jnp.ndarray:
     del kwargs
-    init_g = jnp.abs(jax.random.uniform(rng, (self.rank,))) + 1.
+    init_g = jnp.abs(jax.random.uniform(rng, (self.rank,))) + 1.0
     return init_g / jnp.sum(init_g)
 
 
@@ -300,7 +300,7 @@ def _compute_factor(
     y = (marginal - lambda_1 * x) / (1.0 - lambda_1)
 
     return ((lambda_1 * x[:, None] @ g1.reshape(1, -1)) +
-            ((1 - lambda_1) * y[:, None] @ g2.reshape(1, -1)))
+            ((1.0 - lambda_1) * y[:, None] @ g2.reshape(1, -1)))
 
   def init_q(  # noqa: D102
       self,
@@ -477,7 +477,7 @@ class GeneralizedKMeansInitializer(KMeansInitializer):
   def __init__(
       self,
       rank: int,
-      gamma: float = 10.,
+      gamma: float = 10.0,
       min_iterations: int = 0,
       max_iterations: int = 100,
       inner_iterations: int = 10,
@@ -523,7 +523,7 @@ def _compute_factor(
 
     def init_fn() -> GeneralizedKMeansInitializer.State:
       n = geom.shape[0]
-      factor = jnp.abs(jax.random.normal(rng, (n, self.rank))) + 1.  # (n, r)
+      factor = jnp.abs(jax.random.normal(rng, (n, self.rank))) + 1.0  # (n, r)
       factor *= consts.marginal[:, None] / jnp.sum(
           factor, axis=1, keepdims=True
       )
@@ -586,7 +586,7 @@ def body_fn(
 
       norm = jnp.max(jnp.abs(grad)) ** 2
       gamma = consts.gamma / norm
-      eps = 1. / gamma
+      eps = 1.0 / gamma
 
       cost = grad - eps * mu.safe_log(state.factor)  # (n, r)
       cost = geometry.Geometry(
 
@@ -12,11 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import functools
+import math
 from typing import Tuple
 
 import jax
 import jax.numpy as jnp
-import numpy as np
 
 from ott.math import fixed_point_loop
 
@@ -87,7 +87,7 @@ def body_fn(iteration, const, state, compute_error):
     y = 1.5 * y - jnp.matmul(y, w)
     z = 1.5 * z - jnp.matmul(w, z)
 
-    err = jnp.where(compute_error, new_err(x, norm_x, y), np.inf)
+    err = jnp.where(compute_error, new_err(x, norm_x, y), jnp.inf)
 
     errors = errors.at[iteration // inner_iterations].set(err)
 
@@ -98,13 +98,11 @@ def new_err(x, norm_x, y):
     norm_fn = functools.partial(jnp.linalg.norm, axis=(-2, -1))
     return jnp.max(norm_fn(res) / norm_fn(x))
 
-  dtype = x.dtype
   y = x / norm_x
-  z = jnp.eye(dimension, dtype=dtype)
+  z = jnp.eye(dimension)
   if jnp.ndim(x) > 2:
     z = jnp.tile(z, list(x.shape[:-2]) + [1, 1])
-  errors = -jnp.ones((np.ceil(max_iterations / inner_iterations).astype(int),),
-                     dtype=dtype)
+  errors = -jnp.ones(math.ceil(max_iterations / inner_iterations))
   state = (errors, y, z)
   const = (x, threshold)
   errors, y, z = fixed_point_loop.fixpoint_iter_backprop(
@@ -139,7 +137,7 @@ def solve_sylvester_bartels_stewart(
   )
   # The solution in the transformed space will in general be complex, too.
   y = jnp.zeros(a.shape[:-2] + (m, n)) + 0j
-  idx = jnp.arange(m, dtype=jnp.int32)
+  idx = jnp.arange(m)
   for j in range(n):
     lhs = r.at[..., idx, idx].add(-s[..., j:j + 1, j])
     rhs = d[..., j] + jnp.matmul(y[..., :j], s[..., :j, j:j + 1])[..., 0]
 
@@ -75,7 +75,7 @@ def diag_jacobian_of_marginal_fit(
     a vector of the same size as c or h.
   """
   if tau == 1.0:
-    return 0.
+    return 0.0
 
   r = rho(epsilon, tau)
   # here no minus sign because we are taking derivative w.r.t -h
@@ -87,4 +87,4 @@ def diag_jacobian_of_marginal_fit(
 
 
 def rho(epsilon: float, tau: float) -> float:  # noqa: D103
-  return (epsilon * tau) / (1. - tau)
+  return (epsilon * tau) / (1.0 - tau)
Original file line number	Diff line number	Diff line change
`@@ -120,7 +120,10 @@ markers = [`
`120`	`120`	`"fast: Mark tests as fast.",`
`121`	`121`	`]`
`122`	`122`	`filterwarnings = [`
	`123`	`+ "ignore:\\n.scipy.sparse array",`
`123`	`124`	`"ignore:jax.random.KeyArray is deprecated:DeprecationWarning",`
	`125`	`+ "ignore:.*jax.config:DeprecationWarning",`
	`126`	`+ "ignore:jax.core.Shape is deprecated:DeprecationWarning:chex",`
`124`	`127`	`]`
`125`	`128`
`126`	`129`	`[tool.coverage.run]`