From 6288e7be10ac551e41e396122185fe019ad431e6 Mon Sep 17 00:00:00 2001 From: Dale Tovar Date: Thu, 24 Sep 2020 11:08:07 -0700 Subject: [PATCH] refactor elemwise ops --- sparse/_common.py | 2 +- sparse/_compressed/compressed.py | 47 +- sparse/_coo/__init__.py | 2 - sparse/_coo/common.py | 4 +- sparse/_coo/core.py | 307 +----------- sparse/_sparse_array.py | 318 ++++++++++++- sparse/{_coo/umath.py => _umath.py} | 30 +- sparse/tests/test_compressed.py | 8 +- sparse/tests/test_coo.py | 677 -------------------------- sparse/tests/test_elemwise.py | 707 ++++++++++++++++++++++++++++ 10 files changed, 1068 insertions(+), 1034 deletions(-) rename sparse/{_coo/umath.py => _umath.py} (97%) create mode 100644 sparse/tests/test_elemwise.py diff --git a/sparse/_common.py b/sparse/_common.py index 690429ee..914ea28f 100644 --- a/sparse/_common.py +++ b/sparse/_common.py @@ -8,7 +8,7 @@ from ._sparse_array import SparseArray from ._utils import check_compressed_axes, normalize_axis, check_zero_fill_value -from ._coo.umath import elemwise +from ._umath import elemwise from ._coo.common import ( clip, triu, diff --git a/sparse/_compressed/compressed.py b/sparse/_compressed/compressed.py index 2a867212..b323e59c 100644 --- a/sparse/_compressed/compressed.py +++ b/sparse/_compressed/compressed.py @@ -1,3 +1,4 @@ +import copy as _copy import numpy as np import operator from numpy.lib.mixins import NDArrayOperatorsMixin @@ -141,6 +142,24 @@ def __init__( if prune: self._prune() + def copy(self, deep=True): + """Return a copy of the array. + + Parameters + ---------- + deep : boolean, optional + If True (default), the internal coords and data arrays are also + copied. Set to ``False`` to only make a shallow copy. + """ + return _copy.deepcopy(self) if deep else _copy.copy(self) + + def _make_shallow_copy_of(self, other): + self.data = other.data + self.indices = other.indices + self.indptr = other.indptr + self.compressed_axes = other.compressed_axes + super().__init__(other.shape, fill_value=other.fill_value) + @classmethod def from_numpy(cls, x, compressed_axes=None, fill_value=0): coo = COO(x, fill_value=fill_value) @@ -262,8 +281,7 @@ def __str__(self): __getitem__ = getitem def _reduce_calc(self, method, axis, keepdims=False, **kwargs): - - if axis[0] is None: + if axis[0] is None or np.array_equal(axis, np.arange(self.ndim, dtype=np.intp)): x = self.flatten().tocoo() out = x.reduce(method, axis=None, keepdims=keepdims, **kwargs) if keepdims: @@ -744,31 +762,6 @@ def __rmatmul__(self, other): except NotImplementedError: return NotImplemented - def astype(self, dtype, casting="unsafe", copy=True): - """ - Copy of the array, cast to a specified type. - - See also - -------- - scipy.sparse.coo_matrix.astype : SciPy sparse equivalent function - numpy.ndarray.astype : NumPy equivalent ufunc. - :obj:`COO.elemwise`: Apply an arbitrary element-wise function to one or two - arguments. - """ - if self.dtype == dtype and not copy: - return self - # temporary solution - return GCXS( - ( - np.array(self.data, copy=copy).astype(dtype), - np.array(self.indices, copy=copy), - np.array(self.indptr, copy=copy), - ), - shape=self.shape, - compressed_axes=self.compressed_axes, - fill_value=self.fill_value, - ) - def _prune(self): """ Prunes data so that if any fill-values are present, they are removed diff --git a/sparse/_coo/__init__.py b/sparse/_coo/__init__.py index 48385774..4a3c910b 100644 --- a/sparse/_coo/__init__.py +++ b/sparse/_coo/__init__.py @@ -1,5 +1,4 @@ from .core import COO, as_coo -from .umath import elemwise from .common import ( concatenate, clip, @@ -26,7 +25,6 @@ __all__ = [ "COO", "as_coo", - "elemwise", "concatenate", "clip", "stack", diff --git a/sparse/_coo/common.py b/sparse/_coo/common.py index ac7db1bd..b9162051 100644 --- a/sparse/_coo/common.py +++ b/sparse/_coo/common.py @@ -93,7 +93,7 @@ def kron(a, b): [0, 0, 0, 0, 0, 0, 1, 2, 3]], dtype=int64) """ from .core import COO - from .umath import _cartesian_product + from .._umath import _cartesian_product check_zero_fill_value(a, b) @@ -556,7 +556,7 @@ def where(condition, x=None, y=None): -------- numpy.where : Equivalent Numpy function. """ - from .umath import elemwise + from .._umath import elemwise x_given = x is not None y_given = y is not None diff --git a/sparse/_coo/core.py b/sparse/_coo/core.py index df3bf09f..51a54b3b 100644 --- a/sparse/_coo/core.py +++ b/sparse/_coo/core.py @@ -12,7 +12,7 @@ from .._common import dot, matmul from .indexing import getitem -from .umath import elemwise, broadcast_to +from .._umath import elemwise, broadcast_to from .._sparse_array import SparseArray, _reduce_super_ufunc from .._utils import normalize_axis, equivalent, check_zero_fill_value, _zero_of_dtype @@ -692,255 +692,6 @@ def _reduce_return(self, data, arr_attrs, result_fill_value): return out.reshape(tuple(self.shape[d] for d in neg_axis)) - def mean(self, axis=None, keepdims=False, dtype=None, out=None): - """ - Compute the mean along the given axes. Uses all axes by default. - - Parameters - ---------- - axis : Union[int, Iterable[int]], optional - The axes along which to compute the mean. Uses all axes by default. - keepdims : bool, optional - Whether or not to keep the dimensions of the original array. - dtype: numpy.dtype - The data type of the output array. - - Returns - ------- - COO - The reduced output sparse array. - - See Also - -------- - numpy.ndarray.mean : Equivalent numpy method. - scipy.sparse.coo_matrix.mean : Equivalent Scipy method. - - Notes - ----- - * This function internally calls :obj:`COO.sum_duplicates` to bring the - array into canonical form. - * The :code:`out` parameter is provided just for compatibility with - Numpy and isn't actually supported. - - Examples - -------- - You can use :obj:`COO.mean` to compute the mean of an array across any - dimension. - - >>> x = np.array([[1, 2, 0, 0], - ... [0, 1, 0, 0]], dtype='i8') - >>> s = COO.from_numpy(x) - >>> s2 = s.mean(axis=1) - >>> s2.todense() # doctest: +SKIP - array([0.5, 1.5, 0., 0.]) - - You can also use the :code:`keepdims` argument to keep the dimensions - after the mean. - - >>> s3 = s.mean(axis=0, keepdims=True) - >>> s3.shape - (1, 4) - - You can pass in an output datatype, if needed. - - >>> s4 = s.mean(axis=0, dtype=np.float16) - >>> s4.dtype - dtype('float16') - - By default, this reduces the array down to one number, computing the - mean along all axes. - - >>> s.mean() - 0.5 - """ - if axis is None: - axis = tuple(range(self.ndim)) - elif not isinstance(axis, tuple): - axis = (axis,) - den = reduce(operator.mul, (self.shape[i] for i in axis), 1) - - if dtype is None: - if issubclass(self.dtype.type, (np.integer, np.bool_)): - dtype = inter_dtype = np.dtype("f8") - else: - dtype = self.dtype - inter_dtype = ( - np.dtype("f4") if issubclass(dtype.type, np.float16) else dtype - ) - else: - inter_dtype = dtype - - num = self.sum(axis=axis, keepdims=keepdims, dtype=inter_dtype) - - if num.ndim: - out = np.true_divide(num, den, casting="unsafe") - return out.astype(dtype) if out.dtype != dtype else out - return np.divide(num, den, dtype=dtype, out=out) - - def var(self, axis=None, dtype=None, out=None, ddof=0, keepdims=False): - """ - Compute the variance along the gi66ven axes. Uses all axes by default. - - Parameters - ---------- - axis : Union[int, Iterable[int]], optional - The axes along which to compute the variance. Uses all axes by default. - dtype : numpy.dtype, optional - The output datatype. - out: COO, optional - The array to write the output to. - ddof: int - The degrees of freedom. - keepdims : bool, optional - Whether or not to keep the dimensions of the original array. - - Returns - ------- - COO - The reduced output sparse array. - - See Also - -------- - numpy.ndarray.var : Equivalent numpy method. - - Notes - ----- - * This function internally calls :obj:`COO.sum_duplicates` to bring the - array into canonical form. - - Examples - -------- - You can use :obj:`COO.var` to compute the variance of an array across any - dimension. - - >>> x = np.array([[1, 2, 0, 0], - ... [0, 1, 0, 0]], dtype='i8') - >>> s = COO.from_numpy(x) - >>> s2 = s.var(axis=1) - >>> s2.todense() # doctest: +SKIP - array([0.6875, 0.1875]) - - You can also use the :code:`keepdims` argument to keep the dimensions - after the variance. - - >>> s3 = s.var(axis=0, keepdims=True) - >>> s3.shape - (1, 4) - - You can pass in an output datatype, if needed. - - >>> s4 = s.var(axis=0, dtype=np.float16) - >>> s4.dtype - dtype('float16') - - By default, this reduces the array down to one number, computing the - variance along all axes. - - >>> s.var() - 0.5 - """ - axis = normalize_axis(axis, self.ndim) - - if axis is None: - axis = tuple(range(self.ndim)) - - if not isinstance(axis, tuple): - axis = (axis,) - - rcount = reduce(operator.mul, (self.shape[a] for a in axis), 1) - # Make this warning show up on top. - if ddof >= rcount: - warnings.warn("Degrees of freedom <= 0 for slice", RuntimeWarning) - - # Cast bool, unsigned int, and int to float64 by default - if dtype is None and issubclass(self.dtype.type, (np.integer, np.bool_)): - dtype = np.dtype("f8") - - arrmean = self.sum(axis, dtype=dtype, keepdims=True) - np.divide(arrmean, rcount, out=arrmean) - x = self - arrmean - if issubclass(self.dtype.type, np.complexfloating): - x = x.real * x.real + x.imag * x.imag - else: - x = np.multiply(x, x, out=x) - - ret = x.sum(axis=axis, dtype=dtype, out=out, keepdims=keepdims) - - # Compute degrees of freedom and make sure it is not negative. - rcount = max([rcount - ddof, 0]) - - ret = ret[...] - np.divide(ret, rcount, out=ret, casting="unsafe") - return ret[()] - - def std(self, axis=None, dtype=None, out=None, ddof=0, keepdims=False): - """ - Compute the standard deviation along the given axes. Uses all axes by default. - - Parameters - ---------- - axis : Union[int, Iterable[int]], optional - The axes along which to compute the standard deviation. Uses - all axes by default. - dtype : numpy.dtype, optional - The output datatype. - out: COO, optional - The array to write the output to. - ddof: int - The degrees of freedom. - keepdims : bool, optional - Whether or not to keep the dimensions of the original array. - - Returns - ------- - COO - The reduced output sparse array. - - See Also - -------- - numpy.ndarray.std : Equivalent numpy method. - - Notes - ----- - * This function internally calls :obj:`COO.sum_duplicates` to bring the - array into canonical form. - - Examples - -------- - You can use :obj:`COO.std` to compute the standard deviation of an array - across any dimension. - - >>> x = np.array([[1, 2, 0, 0], - ... [0, 1, 0, 0]], dtype='i8') - >>> s = COO.from_numpy(x) - >>> s2 = s.std(axis=1) - >>> s2.todense() # doctest: +SKIP - array([0.8291562, 0.4330127]) - - You can also use the :code:`keepdims` argument to keep the dimensions - after the standard deviation. - - >>> s3 = s.std(axis=0, keepdims=True) - >>> s3.shape - (1, 4) - - You can pass in an output datatype, if needed. - - >>> s4 = s.std(axis=0, dtype=np.float16) - >>> s4.dtype - dtype('float16') - - By default, this reduces the array down to one number, computing the - standard deviation along all axes. - - >>> s.std() # doctest: +SKIP - 0.7071067811865476 - """ - ret = self.var(axis=axis, dtype=dtype, out=out, ddof=ddof, keepdims=keepdims) - - ret = np.sqrt(ret) - return ret - def transpose(self, axes=None): """ Returns a new array which has the order of the axes switched. @@ -1642,62 +1393,6 @@ def broadcast_to(self, shape): """ return broadcast_to(self, shape) - def round(self, decimals=0, out=None): - """ - Evenly round to the given number of decimals. - - See also - -------- - :obj:`numpy.round` : NumPy equivalent ufunc. - :obj:`COO.elemwise`: Apply an arbitrary element-wise function to one or two - arguments. - """ - if out is not None and not isinstance(out, tuple): - out = (out,) - return self.__array_ufunc__( - np.round, "__call__", self, decimals=decimals, out=out - ) - - round_ = round - - def clip(self, min=None, max=None, out=None): - """ - Clip (limit) the values in the array. - - Return an array whose values are limited to ``[min, max]``. One of min - or max must be given. - - See Also - -------- - sparse.clip : For full documentation and more details. - numpy.clip : Equivalent NumPy function. - """ - if min is None and max is None: - raise ValueError("One of max or min must be given.") - if out is not None and not isinstance(out, tuple): - out = (out,) - return self.__array_ufunc__( - np.clip, "__call__", self, a_min=min, a_max=max, out=out - ) - - def astype(self, dtype, casting="unsafe", copy=True): - """ - Copy of the array, cast to a specified type. - - See also - -------- - scipy.sparse.coo_matrix.astype : SciPy sparse equivalent function - numpy.ndarray.astype : NumPy equivalent ufunc. - :obj:`COO.elemwise`: Apply an arbitrary element-wise function to one or two - arguments. - """ - # this matches numpy's behavior - if self.dtype == dtype and not copy: - return self - return self.__array_ufunc__( - np.ndarray.astype, "__call__", self, dtype=dtype, copy=copy, casting=casting - ) - def maybe_densify(self, max_size=1000, min_density=0.25): """ Converts this :obj:`COO` array to a :obj:`numpy.ndarray` if not too diff --git a/sparse/_sparse_array.py b/sparse/_sparse_array.py index 37d6ae22..63577dee 100644 --- a/sparse/_sparse_array.py +++ b/sparse/_sparse_array.py @@ -3,13 +3,12 @@ from numbers import Integral from typing import Callable import operator -import functools +from functools import reduce -import numba import numpy as np import scipy.sparse as ss -from ._coo.umath import elemwise +from ._umath import elemwise from ._utils import _zero_of_dtype, html_table, equivalent, normalize_axis _reduce_super_ufunc = {np.add: np.multiply, np.multiply: np.power} @@ -136,7 +135,7 @@ def size(self): """ # We use this instead of np.prod because np.prod # returns a float64 for an empty shape. - return functools.reduce(operator.mul, self.shape, 1) + return reduce(operator.mul, self.shape, 1) @property def density(self): @@ -355,9 +354,7 @@ def reduce(self, method, axis=(0,), keepdims=False, **kwargs): if not isinstance(axis, tuple): axis = (axis,) - out = self._reduce_calc(method, axis, keepdims, **kwargs) - if len(out) == 1: return out[0] data, counts, axis, n_cols, arr_attrs = out @@ -541,3 +538,312 @@ def prod(self, axis=None, keepdims=False, dtype=None, out=None): return np.multiply.reduce( self, out=out, axis=axis, keepdims=keepdims, dtype=dtype ) + + def round(self, decimals=0, out=None): + """ + Evenly round to the given number of decimals. + + See also + -------- + :obj:`numpy.round` : NumPy equivalent ufunc. + :obj:`COO.elemwise`: Apply an arbitrary element-wise function to one or two + arguments. + """ + if out is not None and not isinstance(out, tuple): + out = (out,) + return self.__array_ufunc__( + np.round, "__call__", self, decimals=decimals, out=out + ) + + round_ = round + + def clip(self, min=None, max=None, out=None): + """ + Clip (limit) the values in the array. + + Return an array whose values are limited to ``[min, max]``. One of min + or max must be given. + + See Also + -------- + sparse.clip : For full documentation and more details. + numpy.clip : Equivalent NumPy function. + """ + if min is None and max is None: + raise ValueError("One of max or min must be given.") + if out is not None and not isinstance(out, tuple): + out = (out,) + return self.__array_ufunc__( + np.clip, "__call__", self, a_min=min, a_max=max, out=out + ) + + def astype(self, dtype, casting="unsafe", copy=True): + """ + Copy of the array, cast to a specified type. + + See also + -------- + scipy.sparse.coo_matrix.astype : SciPy sparse equivalent function + numpy.ndarray.astype : NumPy equivalent ufunc. + :obj:`COO.elemwise`: Apply an arbitrary element-wise function to one or two + arguments. + """ + # this matches numpy's behavior + if self.dtype == dtype and not copy: + return self + return self.__array_ufunc__( + np.ndarray.astype, "__call__", self, dtype=dtype, copy=copy, casting=casting + ) + + def mean(self, axis=None, keepdims=False, dtype=None, out=None): + """ + Compute the mean along the given axes. Uses all axes by default. + + Parameters + ---------- + axis : Union[int, Iterable[int]], optional + The axes along which to compute the mean. Uses all axes by default. + keepdims : bool, optional + Whether or not to keep the dimensions of the original array. + dtype: numpy.dtype + The data type of the output array. + + Returns + ------- + SparseArray + The reduced output sparse array. + + See Also + -------- + numpy.ndarray.mean : Equivalent numpy method. + scipy.sparse.coo_matrix.mean : Equivalent Scipy method. + + Notes + ----- + * This function internally calls :obj:`COO.sum_duplicates` to bring the + array into canonical form. + * The :code:`out` parameter is provided just for compatibility with + Numpy and isn't actually supported. + + Examples + -------- + You can use :obj:`COO.mean` to compute the mean of an array across any + dimension. + + >>> from sparse import COO + >>> x = np.array([[1, 2, 0, 0], + ... [0, 1, 0, 0]], dtype='i8') + >>> s = COO.from_numpy(x) + >>> s2 = s.mean(axis=1) + >>> s2.todense() # doctest: +SKIP + array([0.5, 1.5, 0., 0.]) + + You can also use the :code:`keepdims` argument to keep the dimensions + after the mean. + + >>> s3 = s.mean(axis=0, keepdims=True) + >>> s3.shape + (1, 4) + + You can pass in an output datatype, if needed. + + >>> s4 = s.mean(axis=0, dtype=np.float16) + >>> s4.dtype + dtype('float16') + + By default, this reduces the array down to one number, computing the + mean along all axes. + + >>> s.mean() + 0.5 + """ + + if axis is None: + axis = tuple(range(self.ndim)) + elif not isinstance(axis, tuple): + axis = (axis,) + den = reduce(operator.mul, (self.shape[i] for i in axis), 1) + + if dtype is None: + if issubclass(self.dtype.type, (np.integer, np.bool_)): + dtype = inter_dtype = np.dtype("f8") + else: + dtype = self.dtype + inter_dtype = ( + np.dtype("f4") if issubclass(dtype.type, np.float16) else dtype + ) + else: + inter_dtype = dtype + + num = self.sum(axis=axis, keepdims=keepdims, dtype=inter_dtype) + + if num.ndim: + out = np.true_divide(num, den, casting="unsafe") + return out.astype(dtype) if out.dtype != dtype else out + return np.divide(num, den, dtype=dtype, out=out) + + def var(self, axis=None, dtype=None, out=None, ddof=0, keepdims=False): + """ + Compute the variance along the gi66ven axes. Uses all axes by default. + + Parameters + ---------- + axis : Union[int, Iterable[int]], optional + The axes along which to compute the variance. Uses all axes by default. + dtype : numpy.dtype, optional + The output datatype. + out: SparseArray, optional + The array to write the output to. + ddof: int + The degrees of freedom. + keepdims : bool, optional + Whether or not to keep the dimensions of the original array. + + Returns + ------- + SparseArray + The reduced output sparse array. + + See Also + -------- + numpy.ndarray.var : Equivalent numpy method. + + Notes + ----- + * This function internally calls :obj:`COO.sum_duplicates` to bring the + array into canonical form. + + Examples + -------- + You can use :obj:`COO.var` to compute the variance of an array across any + dimension. + + >>> from sparse import COO + >>> x = np.array([[1, 2, 0, 0], + ... [0, 1, 0, 0]], dtype='i8') + >>> s = COO.from_numpy(x) + >>> s2 = s.var(axis=1) + >>> s2.todense() # doctest: +SKIP + array([0.6875, 0.1875]) + + You can also use the :code:`keepdims` argument to keep the dimensions + after the variance. + + >>> s3 = s.var(axis=0, keepdims=True) + >>> s3.shape + (1, 4) + + You can pass in an output datatype, if needed. + + >>> s4 = s.var(axis=0, dtype=np.float16) + >>> s4.dtype + dtype('float16') + + By default, this reduces the array down to one number, computing the + variance along all axes. + + >>> s.var() + 0.5 + """ + axis = normalize_axis(axis, self.ndim) + + if axis is None: + axis = tuple(range(self.ndim)) + + if not isinstance(axis, tuple): + axis = (axis,) + + rcount = reduce(operator.mul, (self.shape[a] for a in axis), 1) + # Make this warning show up on top. + if ddof >= rcount: + warnings.warn("Degrees of freedom <= 0 for slice", RuntimeWarning) + + # Cast bool, unsigned int, and int to float64 by default + if dtype is None and issubclass(self.dtype.type, (np.integer, np.bool_)): + dtype = np.dtype("f8") + + arrmean = self.sum(axis, dtype=dtype, keepdims=True) + np.divide(arrmean, rcount, out=arrmean) + x = self - arrmean + if issubclass(self.dtype.type, np.complexfloating): + x = x.real * x.real + x.imag * x.imag + else: + x = np.multiply(x, x, out=x) + + ret = x.sum(axis=axis, dtype=dtype, out=out, keepdims=keepdims) + + # Compute degrees of freedom and make sure it is not negative. + rcount = max([rcount - ddof, 0]) + + ret = ret[...] + np.divide(ret, rcount, out=ret, casting="unsafe") + return ret[()] + + def std(self, axis=None, dtype=None, out=None, ddof=0, keepdims=False): + """ + Compute the standard deviation along the given axes. Uses all axes by default. + + Parameters + ---------- + axis : Union[int, Iterable[int]], optional + The axes along which to compute the standard deviation. Uses + all axes by default. + dtype : numpy.dtype, optional + The output datatype. + out: SparseArray, optional + The array to write the output to. + ddof: int + The degrees of freedom. + keepdims : bool, optional + Whether or not to keep the dimensions of the original array. + + Returns + ------- + SparseArray + The reduced output sparse array. + + See Also + -------- + numpy.ndarray.std : Equivalent numpy method. + + Notes + ----- + * This function internally calls :obj:`COO.sum_duplicates` to bring the + array into canonical form. + + Examples + -------- + You can use :obj:`COO.std` to compute the standard deviation of an array + across any dimension. + + >>> from sparse import COO + >>> x = np.array([[1, 2, 0, 0], + ... [0, 1, 0, 0]], dtype='i8') + >>> s = COO.from_numpy(x) + >>> s2 = s.std(axis=1) + >>> s2.todense() # doctest: +SKIP + array([0.8291562, 0.4330127]) + + You can also use the :code:`keepdims` argument to keep the dimensions + after the standard deviation. + + >>> s3 = s.std(axis=0, keepdims=True) + >>> s3.shape + (1, 4) + + You can pass in an output datatype, if needed. + + >>> s4 = s.std(axis=0, dtype=np.float16) + >>> s4.dtype + dtype('float16') + + By default, this reduces the array down to one number, computing the + standard deviation along all axes. + + >>> s.std() # doctest: +SKIP + 0.7071067811865476 + """ + ret = self.var(axis=axis, dtype=dtype, out=out, ddof=ddof, keepdims=keepdims) + + ret = np.sqrt(ret) + return ret diff --git a/sparse/_coo/umath.py b/sparse/_umath.py similarity index 97% rename from sparse/_coo/umath.py rename to sparse/_umath.py index bb0bc1a7..6854d930 100644 --- a/sparse/_coo/umath.py +++ b/sparse/_umath.py @@ -6,7 +6,7 @@ from itertools import zip_longest -from .._utils import isscalar, equivalent, _zero_of_dtype +from ._utils import isscalar, equivalent, _zero_of_dtype def elemwise(func, *args, **kwargs): @@ -25,7 +25,7 @@ def elemwise(func, *args, **kwargs): Returns ------- - COO + SparseArray The result of applying the function. Raises @@ -377,7 +377,7 @@ def broadcast_to(x, shape): -------- :obj:`numpy.broadcast_to` : NumPy equivalent function """ - from .core import COO + from ._coo import COO if shape == x.shape: return x @@ -417,12 +417,17 @@ def __init__(self, func, *args, **kwargs): kwargs : dict Extra arguments to pass to the function. """ - from .core import COO - from .._sparse_array import SparseArray + from ._coo import COO + from ._sparse_array import SparseArray + from ._compressed import GCXS + from ._dok import DOK processed_args = [] + out_type = GCXS for arg in args: + if isinstance(arg, COO) or isinstance(arg, DOK): + out_type = COO if isinstance(arg, scipy.sparse.spmatrix): processed_args.append(COO.from_scipy_sparse(arg)) elif isscalar(arg) or isinstance(arg, np.ndarray): @@ -436,6 +441,7 @@ def __init__(self, func, *args, **kwargs): else: processed_args.append(arg) + self.out_type = out_type self.args = tuple(processed_args) self.func = func self.dtype = kwargs.pop("dtype", None) @@ -447,7 +453,7 @@ def __init__(self, func, *args, **kwargs): self._get_fill_value() def get_result(self): - from .core import COO + from ._coo import COO if self.args is None: return NotImplemented @@ -500,7 +506,7 @@ def get_result(self): shape=self.shape, has_duplicates=False, fill_value=self.fill_value, - ) + ).asformat(self.out_type) def _get_fill_value(self): """ @@ -511,7 +517,7 @@ def _get_fill_value(self): ValueError If the fill-value is inconsistent. """ - from .core import COO + from ._coo import COO zero_args = tuple( arg.fill_value[...] if isinstance(arg, COO) else arg for arg in self.args @@ -561,7 +567,7 @@ def _check_broadcast(self): ValueError If the check fails. """ - from .core import COO + from ._coo import COO full_shape = _get_nary_broadcast_shape(*tuple(arg.shape for arg in self.args)) non_ndarray_shape = _get_nary_broadcast_shape( @@ -589,7 +595,7 @@ def _get_func_coords_data(self, mask): None or tuple The coords/data tuple for the given mask. """ - from .core import COO + from ._coo import COO matched_args = [arg for arg, m in zip(self.args, mask) if m is not None and m] unmatched_args = [ @@ -692,8 +698,8 @@ def _match_coo(*args, **kwargs): The expanded, matched :obj:`COO` objects. Only returned if ``return_midx`` is ``False``. """ - from .core import COO - from .common import linear_loc + from ._coo import COO + from ._coo.common import linear_loc cache = kwargs.pop("cache", None) return_midx = kwargs.pop("return_midx", False) diff --git a/sparse/tests/test_compressed.py b/sparse/tests/test_compressed.py index a10d45e7..eae6fc17 100644 --- a/sparse/tests/test_compressed.py +++ b/sparse/tests/test_compressed.py @@ -42,9 +42,13 @@ def data_rvs(n): [ ("sum", {}), ("sum", {"dtype": np.float32}), + ("mean", {}), + ("mean", {"dtype": np.float32}), ("prod", {}), ("max", {}), ("min", {}), + ("std", {}), + ("var", {}), ], ) @pytest.mark.parametrize("axis", [None, 0, 1, 2, (0, 2), -3, (1, -1)]) @@ -63,7 +67,7 @@ def test_reductions(reduction, random_sparse, axis, keepdims, kwargs): @pytest.mark.filterwarnings("ignore:overflow") @pytest.mark.parametrize( "reduction, kwargs", - [("sum", {"dtype": np.float16})], + [("sum", {"dtype": np.float16}), ("mean", {"dtype": np.float16})], ) @pytest.mark.parametrize("axis", [None, 0, 1, 2, (0, 2)]) def test_reductions_float16(random_sparse, reduction, kwargs, axis): @@ -93,6 +97,8 @@ def test_reductions_bool(random_sparse, reduction, kwargs, axis, keepdims): (np.max, {}), (np.sum, {}), (np.sum, {"dtype": np.float32}), + (np.mean, {}), + (np.mean, {"dtype": np.float32}), (np.prod, {}), (np.min, {}), ], diff --git a/sparse/tests/test_coo.py b/sparse/tests/test_coo.py index 9f488569..f213e4a6 100644 --- a/sparse/tests/test_coo.py +++ b/sparse/tests/test_coo.py @@ -438,683 +438,6 @@ def test_kron_scalar(ndim): assert_eq(sparse.kron(scalar, sa), sol) -@pytest.mark.parametrize( - "func", - [ - np.expm1, - np.log1p, - np.sin, - np.tan, - np.sinh, - np.tanh, - np.floor, - np.ceil, - np.sqrt, - np.conj, - np.round, - np.rint, - lambda x: x.astype("int32"), - np.conjugate, - np.conj, - lambda x: x.round(decimals=2), - abs, - ], -) -def test_elemwise(func): - s = sparse.random((2, 3, 4), density=0.5) - x = s.todense() - - fs = func(s) - assert isinstance(fs, COO) - assert fs.nnz <= s.nnz - - assert_eq(func(x), fs) - - -@pytest.mark.parametrize( - "func", - [ - np.expm1, - np.log1p, - np.sin, - np.tan, - np.sinh, - np.tanh, - np.floor, - np.ceil, - np.sqrt, - np.conj, - np.round, - np.rint, - np.conjugate, - np.conj, - lambda x, out: x.round(decimals=2, out=out), - ], -) -def test_elemwise_inplace(func): - s = sparse.random((2, 3, 4), density=0.5) - x = s.todense() - - func(s, out=s) - func(x, out=x) - assert isinstance(s, COO) - - assert_eq(x, s) - - -@pytest.mark.parametrize( - "shape1, shape2", - [ - ((2, 3, 4), (3, 4)), - ((3, 4), (2, 3, 4)), - ((3, 1, 4), (3, 2, 4)), - ((1, 3, 4), (3, 4)), - ((3, 4, 1), (3, 4, 2)), - ((1, 5), (5, 1)), - ((3, 1), (3, 4)), - ((3, 1), (1, 4)), - ((1, 4), (3, 4)), - ((2, 2, 2), (1, 1, 1)), - ], -) -def test_elemwise_mixed(shape1, shape2): - s1 = sparse.random(shape1, density=0.5) - x2 = np.random.rand(*shape2) - - x1 = s1.todense() - - assert_eq(s1 * x2, x1 * x2) - - -def test_elemwise_mixed_empty(): - s1 = sparse.random((2, 0, 4), density=0.5) - x2 = np.random.rand(2, 0, 4) - - x1 = s1.todense() - - assert_eq(s1 * x2, x1 * x2) - - -def test_elemwise_unsupported(): - class A: - pass - - s1 = sparse.random((2, 3, 4), density=0.5) - x2 = A() - - with pytest.raises(TypeError): - s1 + x2 - - assert sparse.elemwise(operator.add, s1, x2) is NotImplemented - - -def test_elemwise_mixed_broadcast(): - s1 = sparse.random((2, 3, 4), density=0.5) - s2 = sparse.random(4, density=0.5) - x3 = np.random.rand(3, 4) - - x1 = s1.todense() - x2 = s2.todense() - - def func(x1, x2, x3): - return x1 * x2 * x3 - - assert_eq(sparse.elemwise(func, s1, s2, x3), func(x1, x2, x3)) - - -@pytest.mark.parametrize( - "func", - [operator.mul, operator.add, operator.sub, operator.gt, operator.lt, operator.ne], -) -@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)]) -def test_elemwise_binary(func, shape): - xs = sparse.random(shape, density=0.5) - ys = sparse.random(shape, density=0.5) - - x = xs.todense() - y = ys.todense() - - assert_eq(func(xs, ys), func(x, y)) - - -@pytest.mark.parametrize("func", [operator.imul, operator.iadd, operator.isub]) -@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)]) -def test_elemwise_binary_inplace(func, shape): - xs = sparse.random(shape, density=0.5) - ys = sparse.random(shape, density=0.5) - - x = xs.todense() - y = ys.todense() - - xs = func(xs, ys) - x = func(x, y) - - assert_eq(xs, x) - - -@pytest.mark.parametrize( - "func", - [ - lambda x, y, z: x + y + z, - lambda x, y, z: x * y * z, - lambda x, y, z: x + y * z, - lambda x, y, z: (x + y) * z, - ], -) -@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)]) -def test_elemwise_trinary(func, shape): - xs = sparse.random(shape, density=0.5) - ys = sparse.random(shape, density=0.5) - zs = sparse.random(shape, density=0.5) - - x = xs.todense() - y = ys.todense() - z = zs.todense() - - fs = sparse.elemwise(func, xs, ys, zs) - assert isinstance(fs, COO) - - assert_eq(fs, func(x, y, z)) - - -@pytest.mark.parametrize("func", [operator.add, operator.mul]) -@pytest.mark.parametrize( - "shape1,shape2", - [ - ((2, 3, 4), (3, 4)), - ((3, 4), (2, 3, 4)), - ((3, 1, 4), (3, 2, 4)), - ((1, 3, 4), (3, 4)), - ((3, 4, 1), (3, 4, 2)), - ((1, 5), (5, 1)), - ((3, 1), (3, 4)), - ((3, 1), (1, 4)), - ((1, 4), (3, 4)), - ((2, 2, 2), (1, 1, 1)), - ], -) -def test_binary_broadcasting(func, shape1, shape2): - density1 = 1 if np.prod(shape1) == 1 else 0.5 - density2 = 1 if np.prod(shape2) == 1 else 0.5 - - xs = sparse.random(shape1, density=density1) - x = xs.todense() - - ys = sparse.random(shape2, density=density2) - y = ys.todense() - - expected = func(x, y) - actual = func(xs, ys) - - assert isinstance(actual, COO) - assert_eq(expected, actual) - - assert np.count_nonzero(expected) == actual.nnz - - -@pytest.mark.parametrize( - "shape1,shape2", - [((3, 4), (2, 3, 4)), ((3, 1, 4), (3, 2, 4)), ((3, 4, 1), (3, 4, 2))], -) -def test_broadcast_to(shape1, shape2): - a = sparse.random(shape1, density=0.5) - x = a.todense() - - assert_eq(np.broadcast_to(x, shape2), a.broadcast_to(shape2)) - - -@pytest.mark.parametrize( - "shapes", - [ - [(2,), (3, 2), (4, 3, 2)], - [(3,), (2, 3), (2, 2, 3)], - [(2,), (2, 2), (2, 2, 2)], - [(4,), (4, 4), (4, 4, 4)], - [(4,), (4, 4), (4, 4, 4)], - [(4,), (4, 4), (4, 4, 4)], - [(1, 1, 2), (1, 3, 1), (4, 1, 1)], - [(2,), (2, 1), (2, 1, 1)], - ], -) -@pytest.mark.parametrize( - "func", - [ - lambda x, y, z: (x + y) * z, - lambda x, y, z: x * (y + z), - lambda x, y, z: x * y * z, - lambda x, y, z: x + y + z, - lambda x, y, z: x + y - z, - lambda x, y, z: x - y + z, - ], -) -def test_trinary_broadcasting(shapes, func): - args = [sparse.random(s, density=0.5) for s in shapes] - dense_args = [arg.todense() for arg in args] - - fs = sparse.elemwise(func, *args) - assert isinstance(fs, COO) - - assert_eq(fs, func(*dense_args)) - - -@pytest.mark.parametrize( - "shapes, func", - [ - ([(2,), (3, 2), (4, 3, 2)], lambda x, y, z: (x + y) * z), - ([(3,), (2, 3), (2, 2, 3)], lambda x, y, z: x * (y + z)), - ([(2,), (2, 2), (2, 2, 2)], lambda x, y, z: x * y * z), - ([(4,), (4, 4), (4, 4, 4)], lambda x, y, z: x + y + z), - ], -) -@pytest.mark.parametrize("value", [np.nan, np.inf, -np.inf]) -@pytest.mark.parametrize("fraction", [0.25, 0.5, 0.75, 1.0]) -@pytest.mark.filterwarnings("ignore:invalid value") -def test_trinary_broadcasting_pathological(shapes, func, value, fraction): - args = [ - sparse.random(s, density=0.5, data_rvs=random_value_array(value, fraction)) - for s in shapes - ] - dense_args = [arg.todense() for arg in args] - - fs = sparse.elemwise(func, *args) - assert isinstance(fs, COO) - - assert_eq(fs, func(*dense_args)) - - -def test_sparse_broadcasting(monkeypatch): - orig_unmatch_coo = sparse._coo.umath._Elemwise._get_func_coords_data - - state = {"num_matches": 0} - - xs = sparse.random((3, 4), density=0.5) - ys = sparse.random((3, 4), density=0.5) - - def mock_unmatch_coo(*args, **kwargs): - result = orig_unmatch_coo(*args, **kwargs) - if result is not None: - state["num_matches"] += 1 - return result - - monkeypatch.setattr( - sparse._coo.umath._Elemwise, "_get_func_coords_data", mock_unmatch_coo - ) - - xs * ys - - # Less than in case there's absolutely no overlap in some cases. - assert state["num_matches"] <= 1 - - -def test_dense_broadcasting(monkeypatch): - orig_unmatch_coo = sparse._coo.umath._Elemwise._get_func_coords_data - - state = {"num_matches": 0} - - xs = sparse.random((3, 4), density=0.5) - ys = sparse.random((3, 4), density=0.5) - - def mock_unmatch_coo(*args, **kwargs): - result = orig_unmatch_coo(*args, **kwargs) - if result is not None: - state["num_matches"] += 1 - return result - - monkeypatch.setattr( - sparse._coo.umath._Elemwise, "_get_func_coords_data", mock_unmatch_coo - ) - - xs + ys - - # Less than in case there's absolutely no overlap in some cases. - assert state["num_matches"] <= 3 - - -@pytest.mark.parametrize("format", ["coo", "dok"]) -def test_sparsearray_elemwise(format): - xs = sparse.random((3, 4), density=0.5, format=format) - ys = sparse.random((3, 4), density=0.5, format=format) - - x = xs.todense() - y = ys.todense() - - fs = sparse.elemwise(operator.add, xs, ys) - assert isinstance(fs, COO) - - assert_eq(fs, x + y) - - -def test_ndarray_densification_fails(): - xs = sparse.random((2, 3, 4), density=0.5) - y = np.random.rand(3, 4) - - with pytest.raises(ValueError): - xs + y - - -def test_elemwise_noargs(): - def func(): - return np.float_(5.0) - - assert_eq(sparse.elemwise(func), func()) - - -@pytest.mark.parametrize( - "func", - [ - operator.pow, - operator.truediv, - operator.floordiv, - operator.ge, - operator.le, - operator.eq, - operator.mod, - ], -) -@pytest.mark.filterwarnings("ignore:divide by zero") -@pytest.mark.filterwarnings("ignore:invalid value") -def test_nonzero_outout_fv_ufunc(func): - xs = sparse.random((2, 3, 4), density=0.5) - ys = sparse.random((2, 3, 4), density=0.5) - - x = xs.todense() - y = ys.todense() - - f = func(x, y) - fs = func(xs, ys) - assert isinstance(fs, COO) - - assert_eq(f, fs) - - -@pytest.mark.parametrize( - "func, scalar", - [ - (operator.mul, 5), - (operator.add, 0), - (operator.sub, 0), - (operator.pow, 5), - (operator.truediv, 3), - (operator.floordiv, 4), - (operator.gt, 5), - (operator.lt, -5), - (operator.ne, 0), - (operator.ge, 5), - (operator.le, -3), - (operator.eq, 1), - (operator.mod, 5), - ], -) -@pytest.mark.parametrize("convert_to_np_number", [True, False]) -def test_elemwise_scalar(func, scalar, convert_to_np_number): - xs = sparse.random((2, 3, 4), density=0.5) - if convert_to_np_number: - scalar = np.float32(scalar) - y = scalar - - x = xs.todense() - fs = func(xs, y) - - assert isinstance(fs, COO) - assert xs.nnz >= fs.nnz - - assert_eq(fs, func(x, y)) - - -@pytest.mark.parametrize( - "func, scalar", - [ - (operator.mul, 5), - (operator.add, 0), - (operator.sub, 0), - (operator.gt, -5), - (operator.lt, 5), - (operator.ne, 0), - (operator.ge, -5), - (operator.le, 3), - (operator.eq, 1), - ], -) -@pytest.mark.parametrize("convert_to_np_number", [True, False]) -def test_leftside_elemwise_scalar(func, scalar, convert_to_np_number): - xs = sparse.random((2, 3, 4), density=0.5) - if convert_to_np_number: - scalar = np.float32(scalar) - y = scalar - - x = xs.todense() - fs = func(y, xs) - - assert isinstance(fs, COO) - assert xs.nnz >= fs.nnz - - assert_eq(fs, func(y, x)) - - -@pytest.mark.parametrize( - "func, scalar", - [ - (operator.add, 5), - (operator.sub, -5), - (operator.pow, -3), - (operator.truediv, 0), - (operator.floordiv, 0), - (operator.gt, -5), - (operator.lt, 5), - (operator.ne, 1), - (operator.ge, -3), - (operator.le, 3), - (operator.eq, 0), - ], -) -@pytest.mark.filterwarnings("ignore:divide by zero") -@pytest.mark.filterwarnings("ignore:invalid value") -def test_scalar_output_nonzero_fv(func, scalar): - xs = sparse.random((2, 3, 4), density=0.5) - y = scalar - - x = xs.todense() - - f = func(x, y) - fs = func(xs, y) - - assert isinstance(fs, COO) - assert fs.nnz <= xs.nnz - - assert_eq(f, fs) - - -@pytest.mark.parametrize("func", [operator.and_, operator.or_, operator.xor]) -@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)]) -def test_bitwise_binary(func, shape): - # Small arrays need high density to have nnz entries - # Casting floats to int will result in all zeros, hence the * 100 - xs = (sparse.random(shape, density=0.5) * 100).astype(np.int_) - ys = (sparse.random(shape, density=0.5) * 100).astype(np.int_) - - x = xs.todense() - y = ys.todense() - - assert_eq(func(xs, ys), func(x, y)) - - -@pytest.mark.parametrize("func", [operator.iand, operator.ior, operator.ixor]) -@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)]) -def test_bitwise_binary_inplace(func, shape): - # Small arrays need high density to have nnz entries - # Casting floats to int will result in all zeros, hence the * 100 - xs = (sparse.random(shape, density=0.5) * 100).astype(np.int_) - ys = (sparse.random(shape, density=0.5) * 100).astype(np.int_) - - x = xs.todense() - y = ys.todense() - - xs = func(xs, ys) - x = func(x, y) - - assert_eq(xs, x) - - -@pytest.mark.parametrize("func", [operator.lshift, operator.rshift]) -@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)]) -def test_bitshift_binary(func, shape): - # Small arrays need high density to have nnz entries - # Casting floats to int will result in all zeros, hence the * 100 - xs = (sparse.random(shape, density=0.5) * 100).astype(np.int_) - - # Can't merge into test_bitwise_binary because left/right shifting - # with something >= 64 isn't defined. - ys = (sparse.random(shape, density=0.5) * 64).astype(np.int_) - - x = xs.todense() - y = ys.todense() - - assert_eq(func(xs, ys), func(x, y)) - - -@pytest.mark.parametrize("func", [operator.ilshift, operator.irshift]) -@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)]) -def test_bitshift_binary_inplace(func, shape): - # Small arrays need high density to have nnz entries - # Casting floats to int will result in all zeros, hence the * 100 - xs = (sparse.random(shape, density=0.5) * 100).astype(np.int_) - - # Can't merge into test_bitwise_binary because left/right shifting - # with something >= 64 isn't defined. - ys = (sparse.random(shape, density=0.5) * 64).astype(np.int_) - - x = xs.todense() - y = ys.todense() - - xs = func(xs, ys) - x = func(x, y) - - assert_eq(xs, x) - - -@pytest.mark.parametrize("func", [operator.and_]) -@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)]) -def test_bitwise_scalar(func, shape): - # Small arrays need high density to have nnz entries - # Casting floats to int will result in all zeros, hence the * 100 - xs = (sparse.random(shape, density=0.5) * 100).astype(np.int_) - y = np.random.randint(100) - - x = xs.todense() - - assert_eq(func(xs, y), func(x, y)) - assert_eq(func(y, xs), func(y, x)) - - -@pytest.mark.parametrize("func", [operator.lshift, operator.rshift]) -@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)]) -def test_bitshift_scalar(func, shape): - # Small arrays need high density to have nnz entries - # Casting floats to int will result in all zeros, hence the * 100 - xs = (sparse.random(shape, density=0.5) * 100).astype(np.int_) - - # Can't merge into test_bitwise_binary because left/right shifting - # with something >= 64 isn't defined. - y = np.random.randint(64) - - x = xs.todense() - - assert_eq(func(xs, y), func(x, y)) - - -@pytest.mark.parametrize("func", [operator.invert]) -@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)]) -def test_unary_bitwise_nonzero_output_fv(func, shape): - # Small arrays need high density to have nnz entries - # Casting floats to int will result in all zeros, hence the * 100 - xs = (sparse.random(shape, density=0.5) * 100).astype(np.int_) - x = xs.todense() - - f = func(x) - fs = func(xs) - - assert isinstance(fs, COO) - assert fs.nnz <= xs.nnz - - assert_eq(f, fs) - - -@pytest.mark.parametrize("func", [operator.or_, operator.xor]) -@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)]) -def test_binary_bitwise_nonzero_output_fv(func, shape): - # Small arrays need high density to have nnz entries - # Casting floats to int will result in all zeros, hence the * 100 - xs = (sparse.random(shape, density=0.5) * 100).astype(np.int_) - y = np.random.randint(1, 100) - - x = xs.todense() - - f = func(x, y) - fs = func(xs, y) - - assert isinstance(fs, COO) - assert fs.nnz <= xs.nnz - - assert_eq(f, fs) - - -@pytest.mark.parametrize( - "func", - [operator.mul, operator.add, operator.sub, operator.gt, operator.lt, operator.ne], -) -@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)]) -def test_elemwise_nonzero_input_fv(func, shape): - xs = sparse.random(shape, density=0.5, fill_value=np.random.rand()) - ys = sparse.random(shape, density=0.5, fill_value=np.random.rand()) - - x = xs.todense() - y = ys.todense() - - assert_eq(func(xs, ys), func(x, y)) - - -@pytest.mark.parametrize("func", [operator.lshift, operator.rshift]) -@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)]) -def test_binary_bitshift_densification_fails(func, shape): - # Small arrays need high density to have nnz entries - # Casting floats to int will result in all zeros, hence the * 100 - x = np.random.randint(1, 100) - ys = (sparse.random(shape, density=0.5) * 64).astype(np.int_) - - y = ys.todense() - - f = func(x, y) - fs = func(x, ys) - - assert isinstance(fs, COO) - assert fs.nnz <= ys.nnz - - assert_eq(f, fs) - - -@pytest.mark.parametrize("func", [operator.and_, operator.or_, operator.xor]) -@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)]) -def test_bitwise_binary_bool(func, shape): - # Small arrays need high density to have nnz entries - xs = sparse.random(shape, density=0.5).astype(bool) - ys = sparse.random(shape, density=0.5).astype(bool) - - x = xs.todense() - y = ys.todense() - - assert_eq(func(xs, ys), func(x, y)) - - -def test_elemwise_binary_empty(): - x = COO({}, shape=(10, 10)) - y = sparse.random((10, 10), density=0.5) - - for z in [x * y, y * x]: - assert z.nnz == 0 - assert z.coords.shape == (2, 0) - assert z.data.shape == (0,) - - def test_gt(): s = sparse.random((2, 3, 4), density=0.5) x = s.todense() diff --git a/sparse/tests/test_elemwise.py b/sparse/tests/test_elemwise.py new file mode 100644 index 00000000..a911f28c --- /dev/null +++ b/sparse/tests/test_elemwise.py @@ -0,0 +1,707 @@ +import numpy as np +import sparse +import pytest +import operator +from sparse import COO +from sparse._compressed import GCXS +from sparse._utils import assert_eq, random_value_array + + +@pytest.mark.parametrize( + "func", + [ + np.expm1, + np.log1p, + np.sin, + np.tan, + np.sinh, + np.tanh, + np.floor, + np.ceil, + np.sqrt, + np.conj, + np.round, + np.rint, + lambda x: x.astype("int32"), + np.conjugate, + np.conj, + lambda x: x.round(decimals=2), + abs, + ], +) +@pytest.mark.parametrize("format", [COO, GCXS]) +def test_elemwise(func, format): + s = sparse.random((2, 3, 4), density=0.5, format=format) + x = s.todense() + + fs = func(s) + assert isinstance(fs, format) + assert fs.nnz <= s.nnz + + assert_eq(func(x), fs) + + +@pytest.mark.parametrize( + "func", + [ + np.expm1, + np.log1p, + np.sin, + np.tan, + np.sinh, + np.tanh, + np.floor, + np.ceil, + np.sqrt, + np.conj, + np.round, + np.rint, + np.conjugate, + np.conj, + lambda x, out: x.round(decimals=2, out=out), + ], +) +@pytest.mark.parametrize("format", [COO, GCXS]) +def test_elemwise_inplace(func, format): + s = sparse.random((2, 3, 4), density=0.5, format=format) + x = s.todense() + + func(s, out=s) + func(x, out=x) + assert isinstance(s, format) + + assert_eq(x, s) + + +@pytest.mark.parametrize( + "shape1, shape2", + [ + ((2, 3, 4), (3, 4)), + ((3, 4), (2, 3, 4)), + ((3, 1, 4), (3, 2, 4)), + ((1, 3, 4), (3, 4)), + ((3, 4, 1), (3, 4, 2)), + ((1, 5), (5, 1)), + ((3, 1), (3, 4)), + ((3, 1), (1, 4)), + ((1, 4), (3, 4)), + ((2, 2, 2), (1, 1, 1)), + ], +) +@pytest.mark.parametrize("format", [COO, GCXS]) +def test_elemwise_mixed(shape1, shape2, format): + s1 = sparse.random(shape1, density=0.5, format=format) + x2 = np.random.rand(*shape2) + + x1 = s1.todense() + + assert_eq(s1 * x2, x1 * x2) + + +def test_elemwise_mixed_empty(): + s1 = sparse.random((2, 0, 4), density=0.5) + x2 = np.random.rand(2, 0, 4) + + x1 = s1.todense() + + assert_eq(s1 * x2, x1 * x2) + + +def test_elemwise_unsupported(): + class A: + pass + + s1 = sparse.random((2, 3, 4), density=0.5) + x2 = A() + + with pytest.raises(TypeError): + s1 + x2 + + assert sparse.elemwise(operator.add, s1, x2) is NotImplemented + + +def test_elemwise_mixed_broadcast(): + s1 = sparse.random((2, 3, 4), density=0.5) + s2 = sparse.random(4, density=0.5) + x3 = np.random.rand(3, 4) + + x1 = s1.todense() + x2 = s2.todense() + + def func(x1, x2, x3): + return x1 * x2 * x3 + + assert_eq(sparse.elemwise(func, s1, s2, x3), func(x1, x2, x3)) + + +@pytest.mark.parametrize( + "func", + [operator.mul, operator.add, operator.sub, operator.gt, operator.lt, operator.ne], +) +@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)]) +@pytest.mark.parametrize("format", [COO, GCXS]) +def test_elemwise_binary(func, shape, format): + xs = sparse.random(shape, density=0.5, format=format) + ys = sparse.random(shape, density=0.5, format=format) + + x = xs.todense() + y = ys.todense() + + assert_eq(func(xs, ys), func(x, y)) + + +@pytest.mark.parametrize("func", [operator.imul, operator.iadd, operator.isub]) +@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)]) +@pytest.mark.parametrize("format", [COO, GCXS]) +def test_elemwise_binary_inplace(func, shape, format): + xs = sparse.random(shape, density=0.5, format=format) + ys = sparse.random(shape, density=0.5, format=format) + + x = xs.todense() + y = ys.todense() + + xs = func(xs, ys) + x = func(x, y) + + assert_eq(xs, x) + + +@pytest.mark.parametrize( + "func", + [ + lambda x, y, z: x + y + z, + lambda x, y, z: x * y * z, + lambda x, y, z: x + y * z, + lambda x, y, z: (x + y) * z, + ], +) +@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)]) +@pytest.mark.parametrize( + "formats", + [ + [COO, COO, COO], + [GCXS, GCXS, GCXS], + [COO, GCXS, GCXS], + ], +) +def test_elemwise_trinary(func, shape, formats): + xs = sparse.random(shape, density=0.5, format=formats[0]) + ys = sparse.random(shape, density=0.5, format=formats[1]) + zs = sparse.random(shape, density=0.5, format=formats[2]) + + x = xs.todense() + y = ys.todense() + z = zs.todense() + + fs = sparse.elemwise(func, xs, ys, zs) + if COO in formats: + assert isinstance(fs, COO) + else: + assert isinstance(fs, GCXS) + + assert_eq(fs, func(x, y, z)) + + +@pytest.mark.parametrize("func", [operator.add, operator.mul]) +@pytest.mark.parametrize( + "shape1,shape2", + [ + ((2, 3, 4), (3, 4)), + ((3, 4), (2, 3, 4)), + ((3, 1, 4), (3, 2, 4)), + ((1, 3, 4), (3, 4)), + ((3, 4, 1), (3, 4, 2)), + ((1, 5), (5, 1)), + ((3, 1), (3, 4)), + ((3, 1), (1, 4)), + ((1, 4), (3, 4)), + ((2, 2, 2), (1, 1, 1)), + ], +) +def test_binary_broadcasting(func, shape1, shape2): + density1 = 1 if np.prod(shape1) == 1 else 0.5 + density2 = 1 if np.prod(shape2) == 1 else 0.5 + + xs = sparse.random(shape1, density=density1) + x = xs.todense() + + ys = sparse.random(shape2, density=density2) + y = ys.todense() + + expected = func(x, y) + actual = func(xs, ys) + + assert isinstance(actual, COO) + assert_eq(expected, actual) + + assert np.count_nonzero(expected) == actual.nnz + + +@pytest.mark.parametrize( + "shape1,shape2", + [((3, 4), (2, 3, 4)), ((3, 1, 4), (3, 2, 4)), ((3, 4, 1), (3, 4, 2))], +) +def test_broadcast_to(shape1, shape2): + a = sparse.random(shape1, density=0.5) + x = a.todense() + + assert_eq(np.broadcast_to(x, shape2), a.broadcast_to(shape2)) + + +@pytest.mark.parametrize( + "shapes", + [ + [(2,), (3, 2), (4, 3, 2)], + [(3,), (2, 3), (2, 2, 3)], + [(2,), (2, 2), (2, 2, 2)], + [(4,), (4, 4), (4, 4, 4)], + [(4,), (4, 4), (4, 4, 4)], + [(4,), (4, 4), (4, 4, 4)], + [(1, 1, 2), (1, 3, 1), (4, 1, 1)], + [(2,), (2, 1), (2, 1, 1)], + ], +) +@pytest.mark.parametrize( + "func", + [ + lambda x, y, z: (x + y) * z, + lambda x, y, z: x * (y + z), + lambda x, y, z: x * y * z, + lambda x, y, z: x + y + z, + lambda x, y, z: x + y - z, + lambda x, y, z: x - y + z, + ], +) +def test_trinary_broadcasting(shapes, func): + args = [sparse.random(s, density=0.5) for s in shapes] + dense_args = [arg.todense() for arg in args] + + fs = sparse.elemwise(func, *args) + assert isinstance(fs, COO) + + assert_eq(fs, func(*dense_args)) + + +@pytest.mark.parametrize( + "shapes, func", + [ + ([(2,), (3, 2), (4, 3, 2)], lambda x, y, z: (x + y) * z), + ([(3,), (2, 3), (2, 2, 3)], lambda x, y, z: x * (y + z)), + ([(2,), (2, 2), (2, 2, 2)], lambda x, y, z: x * y * z), + ([(4,), (4, 4), (4, 4, 4)], lambda x, y, z: x + y + z), + ], +) +@pytest.mark.parametrize("value", [np.nan, np.inf, -np.inf]) +@pytest.mark.parametrize("fraction", [0.25, 0.5, 0.75, 1.0]) +@pytest.mark.filterwarnings("ignore:invalid value") +def test_trinary_broadcasting_pathological(shapes, func, value, fraction): + args = [ + sparse.random(s, density=0.5, data_rvs=random_value_array(value, fraction)) + for s in shapes + ] + dense_args = [arg.todense() for arg in args] + + fs = sparse.elemwise(func, *args) + assert isinstance(fs, COO) + + assert_eq(fs, func(*dense_args)) + + +def test_sparse_broadcasting(monkeypatch): + orig_unmatch_coo = sparse._umath._Elemwise._get_func_coords_data + + state = {"num_matches": 0} + + xs = sparse.random((3, 4), density=0.5) + ys = sparse.random((3, 4), density=0.5) + + def mock_unmatch_coo(*args, **kwargs): + result = orig_unmatch_coo(*args, **kwargs) + if result is not None: + state["num_matches"] += 1 + return result + + monkeypatch.setattr( + sparse._umath._Elemwise, "_get_func_coords_data", mock_unmatch_coo + ) + + xs * ys + + # Less than in case there's absolutely no overlap in some cases. + assert state["num_matches"] <= 1 + + +def test_dense_broadcasting(monkeypatch): + orig_unmatch_coo = sparse._umath._Elemwise._get_func_coords_data + + state = {"num_matches": 0} + + xs = sparse.random((3, 4), density=0.5) + ys = sparse.random((3, 4), density=0.5) + + def mock_unmatch_coo(*args, **kwargs): + result = orig_unmatch_coo(*args, **kwargs) + if result is not None: + state["num_matches"] += 1 + return result + + monkeypatch.setattr( + sparse._umath._Elemwise, "_get_func_coords_data", mock_unmatch_coo + ) + + xs + ys + + # Less than in case there's absolutely no overlap in some cases. + assert state["num_matches"] <= 3 + + +@pytest.mark.parametrize("format", ["coo", "dok", "gcxs"]) +def test_sparsearray_elemwise(format): + xs = sparse.random((3, 4), density=0.5, format=format) + ys = sparse.random((3, 4), density=0.5, format=format) + + x = xs.todense() + y = ys.todense() + + fs = sparse.elemwise(operator.add, xs, ys) + if format == "gcxs": + assert isinstance(fs, GCXS) + else: + assert isinstance(fs, COO) + + assert_eq(fs, x + y) + + +def test_ndarray_densification_fails(): + xs = sparse.random((2, 3, 4), density=0.5) + y = np.random.rand(3, 4) + + with pytest.raises(ValueError): + xs + y + + +def test_elemwise_noargs(): + def func(): + return np.float_(5.0) + + assert_eq(sparse.elemwise(func), func()) + + +@pytest.mark.parametrize( + "func", + [ + operator.pow, + operator.truediv, + operator.floordiv, + operator.ge, + operator.le, + operator.eq, + operator.mod, + ], +) +@pytest.mark.filterwarnings("ignore:divide by zero") +@pytest.mark.filterwarnings("ignore:invalid value") +@pytest.mark.parametrize("format", [COO, GCXS]) +def test_nonzero_outout_fv_ufunc(func, format): + xs = sparse.random((2, 3, 4), density=0.5, format=format) + ys = sparse.random((2, 3, 4), density=0.5, format=format) + + x = xs.todense() + y = ys.todense() + + f = func(x, y) + fs = func(xs, ys) + assert isinstance(fs, format) + + assert_eq(f, fs) + + +@pytest.mark.parametrize( + "func, scalar", + [ + (operator.mul, 5), + (operator.add, 0), + (operator.sub, 0), + (operator.pow, 5), + (operator.truediv, 3), + (operator.floordiv, 4), + (operator.gt, 5), + (operator.lt, -5), + (operator.ne, 0), + (operator.ge, 5), + (operator.le, -3), + (operator.eq, 1), + (operator.mod, 5), + ], +) +@pytest.mark.parametrize("convert_to_np_number", [True, False]) +@pytest.mark.parametrize("format", [COO, GCXS]) +def test_elemwise_scalar(func, scalar, convert_to_np_number, format): + xs = sparse.random((2, 3, 4), density=0.5, format=format) + if convert_to_np_number: + scalar = np.float32(scalar) + y = scalar + + x = xs.todense() + fs = func(xs, y) + + assert isinstance(fs, format) + assert xs.nnz >= fs.nnz + + assert_eq(fs, func(x, y)) + + +@pytest.mark.parametrize( + "func, scalar", + [ + (operator.mul, 5), + (operator.add, 0), + (operator.sub, 0), + (operator.gt, -5), + (operator.lt, 5), + (operator.ne, 0), + (operator.ge, -5), + (operator.le, 3), + (operator.eq, 1), + ], +) +@pytest.mark.parametrize("convert_to_np_number", [True, False]) +def test_leftside_elemwise_scalar(func, scalar, convert_to_np_number): + xs = sparse.random((2, 3, 4), density=0.5) + if convert_to_np_number: + scalar = np.float32(scalar) + y = scalar + + x = xs.todense() + fs = func(y, xs) + + assert isinstance(fs, COO) + assert xs.nnz >= fs.nnz + + assert_eq(fs, func(y, x)) + + +@pytest.mark.parametrize( + "func, scalar", + [ + (operator.add, 5), + (operator.sub, -5), + (operator.pow, -3), + (operator.truediv, 0), + (operator.floordiv, 0), + (operator.gt, -5), + (operator.lt, 5), + (operator.ne, 1), + (operator.ge, -3), + (operator.le, 3), + (operator.eq, 0), + ], +) +@pytest.mark.filterwarnings("ignore:divide by zero") +@pytest.mark.filterwarnings("ignore:invalid value") +def test_scalar_output_nonzero_fv(func, scalar): + xs = sparse.random((2, 3, 4), density=0.5) + y = scalar + + x = xs.todense() + + f = func(x, y) + fs = func(xs, y) + + assert isinstance(fs, COO) + assert fs.nnz <= xs.nnz + + assert_eq(f, fs) + + +@pytest.mark.parametrize("func", [operator.and_, operator.or_, operator.xor]) +@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)]) +@pytest.mark.parametrize("format", [COO, GCXS]) +def test_bitwise_binary(func, shape, format): + # Small arrays need high density to have nnz entries + # Casting floats to int will result in all zeros, hence the * 100 + xs = (sparse.random(shape, density=0.5, format=format) * 100).astype(np.int_) + ys = (sparse.random(shape, density=0.5, format=format) * 100).astype(np.int_) + + x = xs.todense() + y = ys.todense() + + assert_eq(func(xs, ys), func(x, y)) + + +@pytest.mark.parametrize("func", [operator.iand, operator.ior, operator.ixor]) +@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)]) +@pytest.mark.parametrize("format", [COO, GCXS]) +def test_bitwise_binary_inplace(func, shape, format): + # Small arrays need high density to have nnz entries + # Casting floats to int will result in all zeros, hence the * 100 + xs = (sparse.random(shape, density=0.5, format=format) * 100).astype(np.int_) + ys = (sparse.random(shape, density=0.5, format=format) * 100).astype(np.int_) + + x = xs.todense() + y = ys.todense() + + xs = func(xs, ys) + x = func(x, y) + + assert_eq(xs, x) + + +@pytest.mark.parametrize("func", [operator.lshift, operator.rshift]) +@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)]) +def test_bitshift_binary(func, shape): + # Small arrays need high density to have nnz entries + # Casting floats to int will result in all zeros, hence the * 100 + xs = (sparse.random(shape, density=0.5) * 100).astype(np.int_) + + # Can't merge into test_bitwise_binary because left/right shifting + # with something >= 64 isn't defined. + ys = (sparse.random(shape, density=0.5) * 64).astype(np.int_) + + x = xs.todense() + y = ys.todense() + + assert_eq(func(xs, ys), func(x, y)) + + +@pytest.mark.parametrize("func", [operator.ilshift, operator.irshift]) +@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)]) +def test_bitshift_binary_inplace(func, shape): + # Small arrays need high density to have nnz entries + # Casting floats to int will result in all zeros, hence the * 100 + xs = (sparse.random(shape, density=0.5) * 100).astype(np.int_) + + # Can't merge into test_bitwise_binary because left/right shifting + # with something >= 64 isn't defined. + ys = (sparse.random(shape, density=0.5) * 64).astype(np.int_) + + x = xs.todense() + y = ys.todense() + + xs = func(xs, ys) + x = func(x, y) + + assert_eq(xs, x) + + +@pytest.mark.parametrize("func", [operator.and_]) +@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)]) +def test_bitwise_scalar(func, shape): + # Small arrays need high density to have nnz entries + # Casting floats to int will result in all zeros, hence the * 100 + xs = (sparse.random(shape, density=0.5) * 100).astype(np.int_) + y = np.random.randint(100) + + x = xs.todense() + + assert_eq(func(xs, y), func(x, y)) + assert_eq(func(y, xs), func(y, x)) + + +@pytest.mark.parametrize("func", [operator.lshift, operator.rshift]) +@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)]) +def test_bitshift_scalar(func, shape): + # Small arrays need high density to have nnz entries + # Casting floats to int will result in all zeros, hence the * 100 + xs = (sparse.random(shape, density=0.5) * 100).astype(np.int_) + + # Can't merge into test_bitwise_binary because left/right shifting + # with something >= 64 isn't defined. + y = np.random.randint(64) + + x = xs.todense() + + assert_eq(func(xs, y), func(x, y)) + + +@pytest.mark.parametrize("func", [operator.invert]) +@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)]) +def test_unary_bitwise_nonzero_output_fv(func, shape): + # Small arrays need high density to have nnz entries + # Casting floats to int will result in all zeros, hence the * 100 + xs = (sparse.random(shape, density=0.5) * 100).astype(np.int_) + x = xs.todense() + + f = func(x) + fs = func(xs) + + assert isinstance(fs, COO) + assert fs.nnz <= xs.nnz + + assert_eq(f, fs) + + +@pytest.mark.parametrize("func", [operator.or_, operator.xor]) +@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)]) +def test_binary_bitwise_nonzero_output_fv(func, shape): + # Small arrays need high density to have nnz entries + # Casting floats to int will result in all zeros, hence the * 100 + xs = (sparse.random(shape, density=0.5) * 100).astype(np.int_) + y = np.random.randint(1, 100) + + x = xs.todense() + + f = func(x, y) + fs = func(xs, y) + + assert isinstance(fs, COO) + assert fs.nnz <= xs.nnz + + assert_eq(f, fs) + + +@pytest.mark.parametrize( + "func", + [operator.mul, operator.add, operator.sub, operator.gt, operator.lt, operator.ne], +) +@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)]) +def test_elemwise_nonzero_input_fv(func, shape): + xs = sparse.random(shape, density=0.5, fill_value=np.random.rand()) + ys = sparse.random(shape, density=0.5, fill_value=np.random.rand()) + + x = xs.todense() + y = ys.todense() + + assert_eq(func(xs, ys), func(x, y)) + + +@pytest.mark.parametrize("func", [operator.lshift, operator.rshift]) +@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)]) +def test_binary_bitshift_densification_fails(func, shape): + # Small arrays need high density to have nnz entries + # Casting floats to int will result in all zeros, hence the * 100 + x = np.random.randint(1, 100) + ys = (sparse.random(shape, density=0.5) * 64).astype(np.int_) + + y = ys.todense() + + f = func(x, y) + fs = func(x, ys) + + assert isinstance(fs, COO) + assert fs.nnz <= ys.nnz + + assert_eq(f, fs) + + +@pytest.mark.parametrize("func", [operator.and_, operator.or_, operator.xor]) +@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)]) +def test_bitwise_binary_bool(func, shape): + # Small arrays need high density to have nnz entries + xs = sparse.random(shape, density=0.5).astype(bool) + ys = sparse.random(shape, density=0.5).astype(bool) + + x = xs.todense() + y = ys.todense() + + assert_eq(func(xs, ys), func(x, y)) + + +def test_elemwise_binary_empty(): + x = COO({}, shape=(10, 10)) + y = sparse.random((10, 10), density=0.5) + + for z in [x * y, y * x]: + assert z.nnz == 0 + assert z.coords.shape == (2, 0) + assert z.data.shape == (0,)