From 6288e7be10ac551e41e396122185fe019ad431e6 Mon Sep 17 00:00:00 2001
From: Dale Tovar <daletovar@gmail.com>
Date: Thu, 24 Sep 2020 11:08:07 -0700
Subject: [PATCH] refactor elemwise ops

---
 sparse/_common.py                   |   2 +-
 sparse/_compressed/compressed.py    |  47 +-
 sparse/_coo/__init__.py             |   2 -
 sparse/_coo/common.py               |   4 +-
 sparse/_coo/core.py                 | 307 +-----------
 sparse/_sparse_array.py             | 318 ++++++++++++-
 sparse/{_coo/umath.py => _umath.py} |  30 +-
 sparse/tests/test_compressed.py     |   8 +-
 sparse/tests/test_coo.py            | 677 --------------------------
 sparse/tests/test_elemwise.py       | 707 ++++++++++++++++++++++++++++
 10 files changed, 1068 insertions(+), 1034 deletions(-)
 rename sparse/{_coo/umath.py => _umath.py} (97%)
 create mode 100644 sparse/tests/test_elemwise.py

diff --git a/sparse/_common.py b/sparse/_common.py
index 690429ee..914ea28f 100644
--- a/sparse/_common.py
+++ b/sparse/_common.py
@@ -8,7 +8,7 @@
 from ._sparse_array import SparseArray
 from ._utils import check_compressed_axes, normalize_axis, check_zero_fill_value
 
-from ._coo.umath import elemwise
+from ._umath import elemwise
 from ._coo.common import (
     clip,
     triu,
diff --git a/sparse/_compressed/compressed.py b/sparse/_compressed/compressed.py
index 2a867212..b323e59c 100644
--- a/sparse/_compressed/compressed.py
+++ b/sparse/_compressed/compressed.py
@@ -1,3 +1,4 @@
+import copy as _copy
 import numpy as np
 import operator
 from numpy.lib.mixins import NDArrayOperatorsMixin
@@ -141,6 +142,24 @@ def __init__(
         if prune:
             self._prune()
 
+    def copy(self, deep=True):
+        """Return a copy of the array.
+
+        Parameters
+        ----------
+        deep : boolean, optional
+            If True (default), the internal coords and data arrays are also
+            copied. Set to ``False`` to only make a shallow copy.
+        """
+        return _copy.deepcopy(self) if deep else _copy.copy(self)
+
+    def _make_shallow_copy_of(self, other):
+        self.data = other.data
+        self.indices = other.indices
+        self.indptr = other.indptr
+        self.compressed_axes = other.compressed_axes
+        super().__init__(other.shape, fill_value=other.fill_value)
+
     @classmethod
     def from_numpy(cls, x, compressed_axes=None, fill_value=0):
         coo = COO(x, fill_value=fill_value)
@@ -262,8 +281,7 @@ def __str__(self):
     __getitem__ = getitem
 
     def _reduce_calc(self, method, axis, keepdims=False, **kwargs):
-
-        if axis[0] is None:
+        if axis[0] is None or np.array_equal(axis, np.arange(self.ndim, dtype=np.intp)):
             x = self.flatten().tocoo()
             out = x.reduce(method, axis=None, keepdims=keepdims, **kwargs)
             if keepdims:
@@ -744,31 +762,6 @@ def __rmatmul__(self, other):
         except NotImplementedError:
             return NotImplemented
 
-    def astype(self, dtype, casting="unsafe", copy=True):
-        """
-        Copy of the array, cast to a specified type.
-
-        See also
-        --------
-        scipy.sparse.coo_matrix.astype : SciPy sparse equivalent function
-        numpy.ndarray.astype : NumPy equivalent ufunc.
-        :obj:`COO.elemwise`: Apply an arbitrary element-wise function to one or two
-            arguments.
-        """
-        if self.dtype == dtype and not copy:
-            return self
-        # temporary solution
-        return GCXS(
-            (
-                np.array(self.data, copy=copy).astype(dtype),
-                np.array(self.indices, copy=copy),
-                np.array(self.indptr, copy=copy),
-            ),
-            shape=self.shape,
-            compressed_axes=self.compressed_axes,
-            fill_value=self.fill_value,
-        )
-
     def _prune(self):
         """
         Prunes data so that if any fill-values are present, they are removed
diff --git a/sparse/_coo/__init__.py b/sparse/_coo/__init__.py
index 48385774..4a3c910b 100644
--- a/sparse/_coo/__init__.py
+++ b/sparse/_coo/__init__.py
@@ -1,5 +1,4 @@
 from .core import COO, as_coo
-from .umath import elemwise
 from .common import (
     concatenate,
     clip,
@@ -26,7 +25,6 @@
 __all__ = [
     "COO",
     "as_coo",
-    "elemwise",
     "concatenate",
     "clip",
     "stack",
diff --git a/sparse/_coo/common.py b/sparse/_coo/common.py
index ac7db1bd..b9162051 100644
--- a/sparse/_coo/common.py
+++ b/sparse/_coo/common.py
@@ -93,7 +93,7 @@ def kron(a, b):
            [0, 0, 0, 0, 0, 0, 1, 2, 3]], dtype=int64)
     """
     from .core import COO
-    from .umath import _cartesian_product
+    from .._umath import _cartesian_product
 
     check_zero_fill_value(a, b)
 
@@ -556,7 +556,7 @@ def where(condition, x=None, y=None):
     --------
     numpy.where : Equivalent Numpy function.
     """
-    from .umath import elemwise
+    from .._umath import elemwise
 
     x_given = x is not None
     y_given = y is not None
diff --git a/sparse/_coo/core.py b/sparse/_coo/core.py
index df3bf09f..51a54b3b 100644
--- a/sparse/_coo/core.py
+++ b/sparse/_coo/core.py
@@ -12,7 +12,7 @@
 
 from .._common import dot, matmul
 from .indexing import getitem
-from .umath import elemwise, broadcast_to
+from .._umath import elemwise, broadcast_to
 from .._sparse_array import SparseArray, _reduce_super_ufunc
 from .._utils import normalize_axis, equivalent, check_zero_fill_value, _zero_of_dtype
 
@@ -692,255 +692,6 @@ def _reduce_return(self, data, arr_attrs, result_fill_value):
 
         return out.reshape(tuple(self.shape[d] for d in neg_axis))
 
-    def mean(self, axis=None, keepdims=False, dtype=None, out=None):
-        """
-        Compute the mean along the given axes. Uses all axes by default.
-
-        Parameters
-        ----------
-        axis : Union[int, Iterable[int]], optional
-            The axes along which to compute the mean. Uses all axes by default.
-        keepdims : bool, optional
-            Whether or not to keep the dimensions of the original array.
-        dtype: numpy.dtype
-            The data type of the output array.
-
-        Returns
-        -------
-        COO
-            The reduced output sparse array.
-
-        See Also
-        --------
-        numpy.ndarray.mean : Equivalent numpy method.
-        scipy.sparse.coo_matrix.mean : Equivalent Scipy method.
-
-        Notes
-        -----
-        * This function internally calls :obj:`COO.sum_duplicates` to bring the
-          array into canonical form.
-        * The :code:`out` parameter is provided just for compatibility with
-          Numpy and isn't actually supported.
-
-        Examples
-        --------
-        You can use :obj:`COO.mean` to compute the mean of an array across any
-        dimension.
-
-        >>> x = np.array([[1, 2, 0, 0],
-        ...               [0, 1, 0, 0]], dtype='i8')
-        >>> s = COO.from_numpy(x)
-        >>> s2 = s.mean(axis=1)
-        >>> s2.todense()  # doctest: +SKIP
-        array([0.5, 1.5, 0., 0.])
-
-        You can also use the :code:`keepdims` argument to keep the dimensions
-        after the mean.
-
-        >>> s3 = s.mean(axis=0, keepdims=True)
-        >>> s3.shape
-        (1, 4)
-
-        You can pass in an output datatype, if needed.
-
-        >>> s4 = s.mean(axis=0, dtype=np.float16)
-        >>> s4.dtype
-        dtype('float16')
-
-        By default, this reduces the array down to one number, computing the
-        mean along all axes.
-
-        >>> s.mean()
-        0.5
-        """
-        if axis is None:
-            axis = tuple(range(self.ndim))
-        elif not isinstance(axis, tuple):
-            axis = (axis,)
-        den = reduce(operator.mul, (self.shape[i] for i in axis), 1)
-
-        if dtype is None:
-            if issubclass(self.dtype.type, (np.integer, np.bool_)):
-                dtype = inter_dtype = np.dtype("f8")
-            else:
-                dtype = self.dtype
-                inter_dtype = (
-                    np.dtype("f4") if issubclass(dtype.type, np.float16) else dtype
-                )
-        else:
-            inter_dtype = dtype
-
-        num = self.sum(axis=axis, keepdims=keepdims, dtype=inter_dtype)
-
-        if num.ndim:
-            out = np.true_divide(num, den, casting="unsafe")
-            return out.astype(dtype) if out.dtype != dtype else out
-        return np.divide(num, den, dtype=dtype, out=out)
-
-    def var(self, axis=None, dtype=None, out=None, ddof=0, keepdims=False):
-        """
-        Compute the variance along the gi66ven axes. Uses all axes by default.
-
-        Parameters
-        ----------
-        axis : Union[int, Iterable[int]], optional
-            The axes along which to compute the variance. Uses all axes by default.
-        dtype : numpy.dtype, optional
-            The output datatype.
-        out: COO, optional
-            The array to write the output to.
-        ddof: int
-            The degrees of freedom.
-        keepdims : bool, optional
-            Whether or not to keep the dimensions of the original array.
-
-        Returns
-        -------
-        COO
-            The reduced output sparse array.
-
-        See Also
-        --------
-        numpy.ndarray.var : Equivalent numpy method.
-
-        Notes
-        -----
-        * This function internally calls :obj:`COO.sum_duplicates` to bring the
-          array into canonical form.
-
-        Examples
-        --------
-        You can use :obj:`COO.var` to compute the variance of an array across any
-        dimension.
-
-        >>> x = np.array([[1, 2, 0, 0],
-        ...               [0, 1, 0, 0]], dtype='i8')
-        >>> s = COO.from_numpy(x)
-        >>> s2 = s.var(axis=1)
-        >>> s2.todense()  # doctest: +SKIP
-        array([0.6875, 0.1875])
-
-        You can also use the :code:`keepdims` argument to keep the dimensions
-        after the variance.
-
-        >>> s3 = s.var(axis=0, keepdims=True)
-        >>> s3.shape
-        (1, 4)
-
-        You can pass in an output datatype, if needed.
-
-        >>> s4 = s.var(axis=0, dtype=np.float16)
-        >>> s4.dtype
-        dtype('float16')
-
-        By default, this reduces the array down to one number, computing the
-        variance along all axes.
-
-        >>> s.var()
-        0.5
-        """
-        axis = normalize_axis(axis, self.ndim)
-
-        if axis is None:
-            axis = tuple(range(self.ndim))
-
-        if not isinstance(axis, tuple):
-            axis = (axis,)
-
-        rcount = reduce(operator.mul, (self.shape[a] for a in axis), 1)
-        # Make this warning show up on top.
-        if ddof >= rcount:
-            warnings.warn("Degrees of freedom <= 0 for slice", RuntimeWarning)
-
-        # Cast bool, unsigned int, and int to float64 by default
-        if dtype is None and issubclass(self.dtype.type, (np.integer, np.bool_)):
-            dtype = np.dtype("f8")
-
-        arrmean = self.sum(axis, dtype=dtype, keepdims=True)
-        np.divide(arrmean, rcount, out=arrmean)
-        x = self - arrmean
-        if issubclass(self.dtype.type, np.complexfloating):
-            x = x.real * x.real + x.imag * x.imag
-        else:
-            x = np.multiply(x, x, out=x)
-
-        ret = x.sum(axis=axis, dtype=dtype, out=out, keepdims=keepdims)
-
-        # Compute degrees of freedom and make sure it is not negative.
-        rcount = max([rcount - ddof, 0])
-
-        ret = ret[...]
-        np.divide(ret, rcount, out=ret, casting="unsafe")
-        return ret[()]
-
-    def std(self, axis=None, dtype=None, out=None, ddof=0, keepdims=False):
-        """
-        Compute the standard deviation along the given axes. Uses all axes by default.
-
-        Parameters
-        ----------
-        axis : Union[int, Iterable[int]], optional
-            The axes along which to compute the standard deviation. Uses
-            all axes by default.
-        dtype : numpy.dtype, optional
-            The output datatype.
-        out: COO, optional
-            The array to write the output to.
-        ddof: int
-            The degrees of freedom.
-        keepdims : bool, optional
-            Whether or not to keep the dimensions of the original array.
-
-        Returns
-        -------
-        COO
-            The reduced output sparse array.
-
-        See Also
-        --------
-        numpy.ndarray.std : Equivalent numpy method.
-
-        Notes
-        -----
-        * This function internally calls :obj:`COO.sum_duplicates` to bring the
-          array into canonical form.
-
-        Examples
-        --------
-        You can use :obj:`COO.std` to compute the standard deviation of an array
-        across any dimension.
-
-        >>> x = np.array([[1, 2, 0, 0],
-        ...               [0, 1, 0, 0]], dtype='i8')
-        >>> s = COO.from_numpy(x)
-        >>> s2 = s.std(axis=1)
-        >>> s2.todense()  # doctest: +SKIP
-        array([0.8291562, 0.4330127])
-
-        You can also use the :code:`keepdims` argument to keep the dimensions
-        after the standard deviation.
-
-        >>> s3 = s.std(axis=0, keepdims=True)
-        >>> s3.shape
-        (1, 4)
-
-        You can pass in an output datatype, if needed.
-
-        >>> s4 = s.std(axis=0, dtype=np.float16)
-        >>> s4.dtype
-        dtype('float16')
-
-        By default, this reduces the array down to one number, computing the
-        standard deviation along all axes.
-
-        >>> s.std()  # doctest: +SKIP
-        0.7071067811865476
-        """
-        ret = self.var(axis=axis, dtype=dtype, out=out, ddof=ddof, keepdims=keepdims)
-
-        ret = np.sqrt(ret)
-        return ret
-
     def transpose(self, axes=None):
         """
         Returns a new array which has the order of the axes switched.
@@ -1642,62 +1393,6 @@ def broadcast_to(self, shape):
         """
         return broadcast_to(self, shape)
 
-    def round(self, decimals=0, out=None):
-        """
-        Evenly round to the given number of decimals.
-
-        See also
-        --------
-        :obj:`numpy.round` : NumPy equivalent ufunc.
-        :obj:`COO.elemwise`: Apply an arbitrary element-wise function to one or two
-            arguments.
-        """
-        if out is not None and not isinstance(out, tuple):
-            out = (out,)
-        return self.__array_ufunc__(
-            np.round, "__call__", self, decimals=decimals, out=out
-        )
-
-    round_ = round
-
-    def clip(self, min=None, max=None, out=None):
-        """
-        Clip (limit) the values in the array.
-
-        Return an array whose values are limited to ``[min, max]``. One of min
-        or max must be given.
-
-        See Also
-        --------
-        sparse.clip : For full documentation and more details.
-        numpy.clip : Equivalent NumPy function.
-        """
-        if min is None and max is None:
-            raise ValueError("One of max or min must be given.")
-        if out is not None and not isinstance(out, tuple):
-            out = (out,)
-        return self.__array_ufunc__(
-            np.clip, "__call__", self, a_min=min, a_max=max, out=out
-        )
-
-    def astype(self, dtype, casting="unsafe", copy=True):
-        """
-        Copy of the array, cast to a specified type.
-
-        See also
-        --------
-        scipy.sparse.coo_matrix.astype : SciPy sparse equivalent function
-        numpy.ndarray.astype : NumPy equivalent ufunc.
-        :obj:`COO.elemwise`: Apply an arbitrary element-wise function to one or two
-            arguments.
-        """
-        # this matches numpy's behavior
-        if self.dtype == dtype and not copy:
-            return self
-        return self.__array_ufunc__(
-            np.ndarray.astype, "__call__", self, dtype=dtype, copy=copy, casting=casting
-        )
-
     def maybe_densify(self, max_size=1000, min_density=0.25):
         """
         Converts this :obj:`COO` array to a :obj:`numpy.ndarray` if not too
diff --git a/sparse/_sparse_array.py b/sparse/_sparse_array.py
index 37d6ae22..63577dee 100644
--- a/sparse/_sparse_array.py
+++ b/sparse/_sparse_array.py
@@ -3,13 +3,12 @@
 from numbers import Integral
 from typing import Callable
 import operator
-import functools
+from functools import reduce
 
-import numba
 import numpy as np
 import scipy.sparse as ss
 
-from ._coo.umath import elemwise
+from ._umath import elemwise
 from ._utils import _zero_of_dtype, html_table, equivalent, normalize_axis
 
 _reduce_super_ufunc = {np.add: np.multiply, np.multiply: np.power}
@@ -136,7 +135,7 @@ def size(self):
         """
         # We use this instead of np.prod because np.prod
         # returns a float64 for an empty shape.
-        return functools.reduce(operator.mul, self.shape, 1)
+        return reduce(operator.mul, self.shape, 1)
 
     @property
     def density(self):
@@ -355,9 +354,7 @@ def reduce(self, method, axis=(0,), keepdims=False, **kwargs):
 
         if not isinstance(axis, tuple):
             axis = (axis,)
-
         out = self._reduce_calc(method, axis, keepdims, **kwargs)
-
         if len(out) == 1:
             return out[0]
         data, counts, axis, n_cols, arr_attrs = out
@@ -541,3 +538,312 @@ def prod(self, axis=None, keepdims=False, dtype=None, out=None):
         return np.multiply.reduce(
             self, out=out, axis=axis, keepdims=keepdims, dtype=dtype
         )
+
+    def round(self, decimals=0, out=None):
+        """
+        Evenly round to the given number of decimals.
+
+        See also
+        --------
+        :obj:`numpy.round` : NumPy equivalent ufunc.
+        :obj:`COO.elemwise`: Apply an arbitrary element-wise function to one or two
+            arguments.
+        """
+        if out is not None and not isinstance(out, tuple):
+            out = (out,)
+        return self.__array_ufunc__(
+            np.round, "__call__", self, decimals=decimals, out=out
+        )
+
+    round_ = round
+
+    def clip(self, min=None, max=None, out=None):
+        """
+        Clip (limit) the values in the array.
+
+        Return an array whose values are limited to ``[min, max]``. One of min
+        or max must be given.
+
+        See Also
+        --------
+        sparse.clip : For full documentation and more details.
+        numpy.clip : Equivalent NumPy function.
+        """
+        if min is None and max is None:
+            raise ValueError("One of max or min must be given.")
+        if out is not None and not isinstance(out, tuple):
+            out = (out,)
+        return self.__array_ufunc__(
+            np.clip, "__call__", self, a_min=min, a_max=max, out=out
+        )
+
+    def astype(self, dtype, casting="unsafe", copy=True):
+        """
+        Copy of the array, cast to a specified type.
+
+        See also
+        --------
+        scipy.sparse.coo_matrix.astype : SciPy sparse equivalent function
+        numpy.ndarray.astype : NumPy equivalent ufunc.
+        :obj:`COO.elemwise`: Apply an arbitrary element-wise function to one or two
+            arguments.
+        """
+        # this matches numpy's behavior
+        if self.dtype == dtype and not copy:
+            return self
+        return self.__array_ufunc__(
+            np.ndarray.astype, "__call__", self, dtype=dtype, copy=copy, casting=casting
+        )
+
+    def mean(self, axis=None, keepdims=False, dtype=None, out=None):
+        """
+        Compute the mean along the given axes. Uses all axes by default.
+
+        Parameters
+        ----------
+        axis : Union[int, Iterable[int]], optional
+            The axes along which to compute the mean. Uses all axes by default.
+        keepdims : bool, optional
+            Whether or not to keep the dimensions of the original array.
+        dtype: numpy.dtype
+            The data type of the output array.
+
+        Returns
+        -------
+        SparseArray
+            The reduced output sparse array.
+
+        See Also
+        --------
+        numpy.ndarray.mean : Equivalent numpy method.
+        scipy.sparse.coo_matrix.mean : Equivalent Scipy method.
+
+        Notes
+        -----
+        * This function internally calls :obj:`COO.sum_duplicates` to bring the
+          array into canonical form.
+        * The :code:`out` parameter is provided just for compatibility with
+          Numpy and isn't actually supported.
+
+        Examples
+        --------
+        You can use :obj:`COO.mean` to compute the mean of an array across any
+        dimension.
+
+        >>> from sparse import COO
+        >>> x = np.array([[1, 2, 0, 0],
+        ...               [0, 1, 0, 0]], dtype='i8')
+        >>> s = COO.from_numpy(x)
+        >>> s2 = s.mean(axis=1)
+        >>> s2.todense()  # doctest: +SKIP
+        array([0.5, 1.5, 0., 0.])
+
+        You can also use the :code:`keepdims` argument to keep the dimensions
+        after the mean.
+
+        >>> s3 = s.mean(axis=0, keepdims=True)
+        >>> s3.shape
+        (1, 4)
+
+        You can pass in an output datatype, if needed.
+
+        >>> s4 = s.mean(axis=0, dtype=np.float16)
+        >>> s4.dtype
+        dtype('float16')
+
+        By default, this reduces the array down to one number, computing the
+        mean along all axes.
+
+        >>> s.mean()
+        0.5
+        """
+
+        if axis is None:
+            axis = tuple(range(self.ndim))
+        elif not isinstance(axis, tuple):
+            axis = (axis,)
+        den = reduce(operator.mul, (self.shape[i] for i in axis), 1)
+
+        if dtype is None:
+            if issubclass(self.dtype.type, (np.integer, np.bool_)):
+                dtype = inter_dtype = np.dtype("f8")
+            else:
+                dtype = self.dtype
+                inter_dtype = (
+                    np.dtype("f4") if issubclass(dtype.type, np.float16) else dtype
+                )
+        else:
+            inter_dtype = dtype
+
+        num = self.sum(axis=axis, keepdims=keepdims, dtype=inter_dtype)
+
+        if num.ndim:
+            out = np.true_divide(num, den, casting="unsafe")
+            return out.astype(dtype) if out.dtype != dtype else out
+        return np.divide(num, den, dtype=dtype, out=out)
+
+    def var(self, axis=None, dtype=None, out=None, ddof=0, keepdims=False):
+        """
+        Compute the variance along the gi66ven axes. Uses all axes by default.
+
+        Parameters
+        ----------
+        axis : Union[int, Iterable[int]], optional
+            The axes along which to compute the variance. Uses all axes by default.
+        dtype : numpy.dtype, optional
+            The output datatype.
+        out: SparseArray, optional
+            The array to write the output to.
+        ddof: int
+            The degrees of freedom.
+        keepdims : bool, optional
+            Whether or not to keep the dimensions of the original array.
+
+        Returns
+        -------
+        SparseArray
+            The reduced output sparse array.
+
+        See Also
+        --------
+        numpy.ndarray.var : Equivalent numpy method.
+
+        Notes
+        -----
+        * This function internally calls :obj:`COO.sum_duplicates` to bring the
+          array into canonical form.
+
+        Examples
+        --------
+        You can use :obj:`COO.var` to compute the variance of an array across any
+        dimension.
+
+        >>> from sparse import COO
+        >>> x = np.array([[1, 2, 0, 0],
+        ...               [0, 1, 0, 0]], dtype='i8')
+        >>> s = COO.from_numpy(x)
+        >>> s2 = s.var(axis=1)
+        >>> s2.todense()  # doctest: +SKIP
+        array([0.6875, 0.1875])
+
+        You can also use the :code:`keepdims` argument to keep the dimensions
+        after the variance.
+
+        >>> s3 = s.var(axis=0, keepdims=True)
+        >>> s3.shape
+        (1, 4)
+
+        You can pass in an output datatype, if needed.
+
+        >>> s4 = s.var(axis=0, dtype=np.float16)
+        >>> s4.dtype
+        dtype('float16')
+
+        By default, this reduces the array down to one number, computing the
+        variance along all axes.
+
+        >>> s.var()
+        0.5
+        """
+        axis = normalize_axis(axis, self.ndim)
+
+        if axis is None:
+            axis = tuple(range(self.ndim))
+
+        if not isinstance(axis, tuple):
+            axis = (axis,)
+
+        rcount = reduce(operator.mul, (self.shape[a] for a in axis), 1)
+        # Make this warning show up on top.
+        if ddof >= rcount:
+            warnings.warn("Degrees of freedom <= 0 for slice", RuntimeWarning)
+
+        # Cast bool, unsigned int, and int to float64 by default
+        if dtype is None and issubclass(self.dtype.type, (np.integer, np.bool_)):
+            dtype = np.dtype("f8")
+
+        arrmean = self.sum(axis, dtype=dtype, keepdims=True)
+        np.divide(arrmean, rcount, out=arrmean)
+        x = self - arrmean
+        if issubclass(self.dtype.type, np.complexfloating):
+            x = x.real * x.real + x.imag * x.imag
+        else:
+            x = np.multiply(x, x, out=x)
+
+        ret = x.sum(axis=axis, dtype=dtype, out=out, keepdims=keepdims)
+
+        # Compute degrees of freedom and make sure it is not negative.
+        rcount = max([rcount - ddof, 0])
+
+        ret = ret[...]
+        np.divide(ret, rcount, out=ret, casting="unsafe")
+        return ret[()]
+
+    def std(self, axis=None, dtype=None, out=None, ddof=0, keepdims=False):
+        """
+        Compute the standard deviation along the given axes. Uses all axes by default.
+
+        Parameters
+        ----------
+        axis : Union[int, Iterable[int]], optional
+            The axes along which to compute the standard deviation. Uses
+            all axes by default.
+        dtype : numpy.dtype, optional
+            The output datatype.
+        out: SparseArray, optional
+            The array to write the output to.
+        ddof: int
+            The degrees of freedom.
+        keepdims : bool, optional
+            Whether or not to keep the dimensions of the original array.
+
+        Returns
+        -------
+        SparseArray
+            The reduced output sparse array.
+
+        See Also
+        --------
+        numpy.ndarray.std : Equivalent numpy method.
+
+        Notes
+        -----
+        * This function internally calls :obj:`COO.sum_duplicates` to bring the
+          array into canonical form.
+
+        Examples
+        --------
+        You can use :obj:`COO.std` to compute the standard deviation of an array
+        across any dimension.
+
+        >>> from sparse import COO
+        >>> x = np.array([[1, 2, 0, 0],
+        ...               [0, 1, 0, 0]], dtype='i8')
+        >>> s = COO.from_numpy(x)
+        >>> s2 = s.std(axis=1)
+        >>> s2.todense()  # doctest: +SKIP
+        array([0.8291562, 0.4330127])
+
+        You can also use the :code:`keepdims` argument to keep the dimensions
+        after the standard deviation.
+
+        >>> s3 = s.std(axis=0, keepdims=True)
+        >>> s3.shape
+        (1, 4)
+
+        You can pass in an output datatype, if needed.
+
+        >>> s4 = s.std(axis=0, dtype=np.float16)
+        >>> s4.dtype
+        dtype('float16')
+
+        By default, this reduces the array down to one number, computing the
+        standard deviation along all axes.
+
+        >>> s.std()  # doctest: +SKIP
+        0.7071067811865476
+        """
+        ret = self.var(axis=axis, dtype=dtype, out=out, ddof=ddof, keepdims=keepdims)
+
+        ret = np.sqrt(ret)
+        return ret
diff --git a/sparse/_coo/umath.py b/sparse/_umath.py
similarity index 97%
rename from sparse/_coo/umath.py
rename to sparse/_umath.py
index bb0bc1a7..6854d930 100644
--- a/sparse/_coo/umath.py
+++ b/sparse/_umath.py
@@ -6,7 +6,7 @@
 
 from itertools import zip_longest
 
-from .._utils import isscalar, equivalent, _zero_of_dtype
+from ._utils import isscalar, equivalent, _zero_of_dtype
 
 
 def elemwise(func, *args, **kwargs):
@@ -25,7 +25,7 @@ def elemwise(func, *args, **kwargs):
 
     Returns
     -------
-    COO
+    SparseArray
         The result of applying the function.
 
     Raises
@@ -377,7 +377,7 @@ def broadcast_to(x, shape):
     --------
     :obj:`numpy.broadcast_to` : NumPy equivalent function
     """
-    from .core import COO
+    from ._coo import COO
 
     if shape == x.shape:
         return x
@@ -417,12 +417,17 @@ def __init__(self, func, *args, **kwargs):
         kwargs : dict
             Extra arguments to pass to the function.
         """
-        from .core import COO
-        from .._sparse_array import SparseArray
+        from ._coo import COO
+        from ._sparse_array import SparseArray
+        from ._compressed import GCXS
+        from ._dok import DOK
 
         processed_args = []
+        out_type = GCXS
 
         for arg in args:
+            if isinstance(arg, COO) or isinstance(arg, DOK):
+                out_type = COO
             if isinstance(arg, scipy.sparse.spmatrix):
                 processed_args.append(COO.from_scipy_sparse(arg))
             elif isscalar(arg) or isinstance(arg, np.ndarray):
@@ -436,6 +441,7 @@ def __init__(self, func, *args, **kwargs):
             else:
                 processed_args.append(arg)
 
+        self.out_type = out_type
         self.args = tuple(processed_args)
         self.func = func
         self.dtype = kwargs.pop("dtype", None)
@@ -447,7 +453,7 @@ def __init__(self, func, *args, **kwargs):
         self._get_fill_value()
 
     def get_result(self):
-        from .core import COO
+        from ._coo import COO
 
         if self.args is None:
             return NotImplemented
@@ -500,7 +506,7 @@ def get_result(self):
             shape=self.shape,
             has_duplicates=False,
             fill_value=self.fill_value,
-        )
+        ).asformat(self.out_type)
 
     def _get_fill_value(self):
         """
@@ -511,7 +517,7 @@ def _get_fill_value(self):
         ValueError
             If the fill-value is inconsistent.
         """
-        from .core import COO
+        from ._coo import COO
 
         zero_args = tuple(
             arg.fill_value[...] if isinstance(arg, COO) else arg for arg in self.args
@@ -561,7 +567,7 @@ def _check_broadcast(self):
         ValueError
             If the check fails.
         """
-        from .core import COO
+        from ._coo import COO
 
         full_shape = _get_nary_broadcast_shape(*tuple(arg.shape for arg in self.args))
         non_ndarray_shape = _get_nary_broadcast_shape(
@@ -589,7 +595,7 @@ def _get_func_coords_data(self, mask):
         None or tuple
             The coords/data tuple for the given mask.
         """
-        from .core import COO
+        from ._coo import COO
 
         matched_args = [arg for arg, m in zip(self.args, mask) if m is not None and m]
         unmatched_args = [
@@ -692,8 +698,8 @@ def _match_coo(*args, **kwargs):
             The expanded, matched :obj:`COO` objects. Only returned if
             ``return_midx`` is ``False``.
         """
-        from .core import COO
-        from .common import linear_loc
+        from ._coo import COO
+        from ._coo.common import linear_loc
 
         cache = kwargs.pop("cache", None)
         return_midx = kwargs.pop("return_midx", False)
diff --git a/sparse/tests/test_compressed.py b/sparse/tests/test_compressed.py
index a10d45e7..eae6fc17 100644
--- a/sparse/tests/test_compressed.py
+++ b/sparse/tests/test_compressed.py
@@ -42,9 +42,13 @@ def data_rvs(n):
     [
         ("sum", {}),
         ("sum", {"dtype": np.float32}),
+        ("mean", {}),
+        ("mean", {"dtype": np.float32}),
         ("prod", {}),
         ("max", {}),
         ("min", {}),
+        ("std", {}),
+        ("var", {}),
     ],
 )
 @pytest.mark.parametrize("axis", [None, 0, 1, 2, (0, 2), -3, (1, -1)])
@@ -63,7 +67,7 @@ def test_reductions(reduction, random_sparse, axis, keepdims, kwargs):
 @pytest.mark.filterwarnings("ignore:overflow")
 @pytest.mark.parametrize(
     "reduction, kwargs",
-    [("sum", {"dtype": np.float16})],
+    [("sum", {"dtype": np.float16}), ("mean", {"dtype": np.float16})],
 )
 @pytest.mark.parametrize("axis", [None, 0, 1, 2, (0, 2)])
 def test_reductions_float16(random_sparse, reduction, kwargs, axis):
@@ -93,6 +97,8 @@ def test_reductions_bool(random_sparse, reduction, kwargs, axis, keepdims):
         (np.max, {}),
         (np.sum, {}),
         (np.sum, {"dtype": np.float32}),
+        (np.mean, {}),
+        (np.mean, {"dtype": np.float32}),
         (np.prod, {}),
         (np.min, {}),
     ],
diff --git a/sparse/tests/test_coo.py b/sparse/tests/test_coo.py
index 9f488569..f213e4a6 100644
--- a/sparse/tests/test_coo.py
+++ b/sparse/tests/test_coo.py
@@ -438,683 +438,6 @@ def test_kron_scalar(ndim):
     assert_eq(sparse.kron(scalar, sa), sol)
 
 
-@pytest.mark.parametrize(
-    "func",
-    [
-        np.expm1,
-        np.log1p,
-        np.sin,
-        np.tan,
-        np.sinh,
-        np.tanh,
-        np.floor,
-        np.ceil,
-        np.sqrt,
-        np.conj,
-        np.round,
-        np.rint,
-        lambda x: x.astype("int32"),
-        np.conjugate,
-        np.conj,
-        lambda x: x.round(decimals=2),
-        abs,
-    ],
-)
-def test_elemwise(func):
-    s = sparse.random((2, 3, 4), density=0.5)
-    x = s.todense()
-
-    fs = func(s)
-    assert isinstance(fs, COO)
-    assert fs.nnz <= s.nnz
-
-    assert_eq(func(x), fs)
-
-
-@pytest.mark.parametrize(
-    "func",
-    [
-        np.expm1,
-        np.log1p,
-        np.sin,
-        np.tan,
-        np.sinh,
-        np.tanh,
-        np.floor,
-        np.ceil,
-        np.sqrt,
-        np.conj,
-        np.round,
-        np.rint,
-        np.conjugate,
-        np.conj,
-        lambda x, out: x.round(decimals=2, out=out),
-    ],
-)
-def test_elemwise_inplace(func):
-    s = sparse.random((2, 3, 4), density=0.5)
-    x = s.todense()
-
-    func(s, out=s)
-    func(x, out=x)
-    assert isinstance(s, COO)
-
-    assert_eq(x, s)
-
-
-@pytest.mark.parametrize(
-    "shape1, shape2",
-    [
-        ((2, 3, 4), (3, 4)),
-        ((3, 4), (2, 3, 4)),
-        ((3, 1, 4), (3, 2, 4)),
-        ((1, 3, 4), (3, 4)),
-        ((3, 4, 1), (3, 4, 2)),
-        ((1, 5), (5, 1)),
-        ((3, 1), (3, 4)),
-        ((3, 1), (1, 4)),
-        ((1, 4), (3, 4)),
-        ((2, 2, 2), (1, 1, 1)),
-    ],
-)
-def test_elemwise_mixed(shape1, shape2):
-    s1 = sparse.random(shape1, density=0.5)
-    x2 = np.random.rand(*shape2)
-
-    x1 = s1.todense()
-
-    assert_eq(s1 * x2, x1 * x2)
-
-
-def test_elemwise_mixed_empty():
-    s1 = sparse.random((2, 0, 4), density=0.5)
-    x2 = np.random.rand(2, 0, 4)
-
-    x1 = s1.todense()
-
-    assert_eq(s1 * x2, x1 * x2)
-
-
-def test_elemwise_unsupported():
-    class A:
-        pass
-
-    s1 = sparse.random((2, 3, 4), density=0.5)
-    x2 = A()
-
-    with pytest.raises(TypeError):
-        s1 + x2
-
-    assert sparse.elemwise(operator.add, s1, x2) is NotImplemented
-
-
-def test_elemwise_mixed_broadcast():
-    s1 = sparse.random((2, 3, 4), density=0.5)
-    s2 = sparse.random(4, density=0.5)
-    x3 = np.random.rand(3, 4)
-
-    x1 = s1.todense()
-    x2 = s2.todense()
-
-    def func(x1, x2, x3):
-        return x1 * x2 * x3
-
-    assert_eq(sparse.elemwise(func, s1, s2, x3), func(x1, x2, x3))
-
-
-@pytest.mark.parametrize(
-    "func",
-    [operator.mul, operator.add, operator.sub, operator.gt, operator.lt, operator.ne],
-)
-@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)])
-def test_elemwise_binary(func, shape):
-    xs = sparse.random(shape, density=0.5)
-    ys = sparse.random(shape, density=0.5)
-
-    x = xs.todense()
-    y = ys.todense()
-
-    assert_eq(func(xs, ys), func(x, y))
-
-
-@pytest.mark.parametrize("func", [operator.imul, operator.iadd, operator.isub])
-@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)])
-def test_elemwise_binary_inplace(func, shape):
-    xs = sparse.random(shape, density=0.5)
-    ys = sparse.random(shape, density=0.5)
-
-    x = xs.todense()
-    y = ys.todense()
-
-    xs = func(xs, ys)
-    x = func(x, y)
-
-    assert_eq(xs, x)
-
-
-@pytest.mark.parametrize(
-    "func",
-    [
-        lambda x, y, z: x + y + z,
-        lambda x, y, z: x * y * z,
-        lambda x, y, z: x + y * z,
-        lambda x, y, z: (x + y) * z,
-    ],
-)
-@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)])
-def test_elemwise_trinary(func, shape):
-    xs = sparse.random(shape, density=0.5)
-    ys = sparse.random(shape, density=0.5)
-    zs = sparse.random(shape, density=0.5)
-
-    x = xs.todense()
-    y = ys.todense()
-    z = zs.todense()
-
-    fs = sparse.elemwise(func, xs, ys, zs)
-    assert isinstance(fs, COO)
-
-    assert_eq(fs, func(x, y, z))
-
-
-@pytest.mark.parametrize("func", [operator.add, operator.mul])
-@pytest.mark.parametrize(
-    "shape1,shape2",
-    [
-        ((2, 3, 4), (3, 4)),
-        ((3, 4), (2, 3, 4)),
-        ((3, 1, 4), (3, 2, 4)),
-        ((1, 3, 4), (3, 4)),
-        ((3, 4, 1), (3, 4, 2)),
-        ((1, 5), (5, 1)),
-        ((3, 1), (3, 4)),
-        ((3, 1), (1, 4)),
-        ((1, 4), (3, 4)),
-        ((2, 2, 2), (1, 1, 1)),
-    ],
-)
-def test_binary_broadcasting(func, shape1, shape2):
-    density1 = 1 if np.prod(shape1) == 1 else 0.5
-    density2 = 1 if np.prod(shape2) == 1 else 0.5
-
-    xs = sparse.random(shape1, density=density1)
-    x = xs.todense()
-
-    ys = sparse.random(shape2, density=density2)
-    y = ys.todense()
-
-    expected = func(x, y)
-    actual = func(xs, ys)
-
-    assert isinstance(actual, COO)
-    assert_eq(expected, actual)
-
-    assert np.count_nonzero(expected) == actual.nnz
-
-
-@pytest.mark.parametrize(
-    "shape1,shape2",
-    [((3, 4), (2, 3, 4)), ((3, 1, 4), (3, 2, 4)), ((3, 4, 1), (3, 4, 2))],
-)
-def test_broadcast_to(shape1, shape2):
-    a = sparse.random(shape1, density=0.5)
-    x = a.todense()
-
-    assert_eq(np.broadcast_to(x, shape2), a.broadcast_to(shape2))
-
-
-@pytest.mark.parametrize(
-    "shapes",
-    [
-        [(2,), (3, 2), (4, 3, 2)],
-        [(3,), (2, 3), (2, 2, 3)],
-        [(2,), (2, 2), (2, 2, 2)],
-        [(4,), (4, 4), (4, 4, 4)],
-        [(4,), (4, 4), (4, 4, 4)],
-        [(4,), (4, 4), (4, 4, 4)],
-        [(1, 1, 2), (1, 3, 1), (4, 1, 1)],
-        [(2,), (2, 1), (2, 1, 1)],
-    ],
-)
-@pytest.mark.parametrize(
-    "func",
-    [
-        lambda x, y, z: (x + y) * z,
-        lambda x, y, z: x * (y + z),
-        lambda x, y, z: x * y * z,
-        lambda x, y, z: x + y + z,
-        lambda x, y, z: x + y - z,
-        lambda x, y, z: x - y + z,
-    ],
-)
-def test_trinary_broadcasting(shapes, func):
-    args = [sparse.random(s, density=0.5) for s in shapes]
-    dense_args = [arg.todense() for arg in args]
-
-    fs = sparse.elemwise(func, *args)
-    assert isinstance(fs, COO)
-
-    assert_eq(fs, func(*dense_args))
-
-
-@pytest.mark.parametrize(
-    "shapes, func",
-    [
-        ([(2,), (3, 2), (4, 3, 2)], lambda x, y, z: (x + y) * z),
-        ([(3,), (2, 3), (2, 2, 3)], lambda x, y, z: x * (y + z)),
-        ([(2,), (2, 2), (2, 2, 2)], lambda x, y, z: x * y * z),
-        ([(4,), (4, 4), (4, 4, 4)], lambda x, y, z: x + y + z),
-    ],
-)
-@pytest.mark.parametrize("value", [np.nan, np.inf, -np.inf])
-@pytest.mark.parametrize("fraction", [0.25, 0.5, 0.75, 1.0])
-@pytest.mark.filterwarnings("ignore:invalid value")
-def test_trinary_broadcasting_pathological(shapes, func, value, fraction):
-    args = [
-        sparse.random(s, density=0.5, data_rvs=random_value_array(value, fraction))
-        for s in shapes
-    ]
-    dense_args = [arg.todense() for arg in args]
-
-    fs = sparse.elemwise(func, *args)
-    assert isinstance(fs, COO)
-
-    assert_eq(fs, func(*dense_args))
-
-
-def test_sparse_broadcasting(monkeypatch):
-    orig_unmatch_coo = sparse._coo.umath._Elemwise._get_func_coords_data
-
-    state = {"num_matches": 0}
-
-    xs = sparse.random((3, 4), density=0.5)
-    ys = sparse.random((3, 4), density=0.5)
-
-    def mock_unmatch_coo(*args, **kwargs):
-        result = orig_unmatch_coo(*args, **kwargs)
-        if result is not None:
-            state["num_matches"] += 1
-        return result
-
-    monkeypatch.setattr(
-        sparse._coo.umath._Elemwise, "_get_func_coords_data", mock_unmatch_coo
-    )
-
-    xs * ys
-
-    # Less than in case there's absolutely no overlap in some cases.
-    assert state["num_matches"] <= 1
-
-
-def test_dense_broadcasting(monkeypatch):
-    orig_unmatch_coo = sparse._coo.umath._Elemwise._get_func_coords_data
-
-    state = {"num_matches": 0}
-
-    xs = sparse.random((3, 4), density=0.5)
-    ys = sparse.random((3, 4), density=0.5)
-
-    def mock_unmatch_coo(*args, **kwargs):
-        result = orig_unmatch_coo(*args, **kwargs)
-        if result is not None:
-            state["num_matches"] += 1
-        return result
-
-    monkeypatch.setattr(
-        sparse._coo.umath._Elemwise, "_get_func_coords_data", mock_unmatch_coo
-    )
-
-    xs + ys
-
-    # Less than in case there's absolutely no overlap in some cases.
-    assert state["num_matches"] <= 3
-
-
-@pytest.mark.parametrize("format", ["coo", "dok"])
-def test_sparsearray_elemwise(format):
-    xs = sparse.random((3, 4), density=0.5, format=format)
-    ys = sparse.random((3, 4), density=0.5, format=format)
-
-    x = xs.todense()
-    y = ys.todense()
-
-    fs = sparse.elemwise(operator.add, xs, ys)
-    assert isinstance(fs, COO)
-
-    assert_eq(fs, x + y)
-
-
-def test_ndarray_densification_fails():
-    xs = sparse.random((2, 3, 4), density=0.5)
-    y = np.random.rand(3, 4)
-
-    with pytest.raises(ValueError):
-        xs + y
-
-
-def test_elemwise_noargs():
-    def func():
-        return np.float_(5.0)
-
-    assert_eq(sparse.elemwise(func), func())
-
-
-@pytest.mark.parametrize(
-    "func",
-    [
-        operator.pow,
-        operator.truediv,
-        operator.floordiv,
-        operator.ge,
-        operator.le,
-        operator.eq,
-        operator.mod,
-    ],
-)
-@pytest.mark.filterwarnings("ignore:divide by zero")
-@pytest.mark.filterwarnings("ignore:invalid value")
-def test_nonzero_outout_fv_ufunc(func):
-    xs = sparse.random((2, 3, 4), density=0.5)
-    ys = sparse.random((2, 3, 4), density=0.5)
-
-    x = xs.todense()
-    y = ys.todense()
-
-    f = func(x, y)
-    fs = func(xs, ys)
-    assert isinstance(fs, COO)
-
-    assert_eq(f, fs)
-
-
-@pytest.mark.parametrize(
-    "func, scalar",
-    [
-        (operator.mul, 5),
-        (operator.add, 0),
-        (operator.sub, 0),
-        (operator.pow, 5),
-        (operator.truediv, 3),
-        (operator.floordiv, 4),
-        (operator.gt, 5),
-        (operator.lt, -5),
-        (operator.ne, 0),
-        (operator.ge, 5),
-        (operator.le, -3),
-        (operator.eq, 1),
-        (operator.mod, 5),
-    ],
-)
-@pytest.mark.parametrize("convert_to_np_number", [True, False])
-def test_elemwise_scalar(func, scalar, convert_to_np_number):
-    xs = sparse.random((2, 3, 4), density=0.5)
-    if convert_to_np_number:
-        scalar = np.float32(scalar)
-    y = scalar
-
-    x = xs.todense()
-    fs = func(xs, y)
-
-    assert isinstance(fs, COO)
-    assert xs.nnz >= fs.nnz
-
-    assert_eq(fs, func(x, y))
-
-
-@pytest.mark.parametrize(
-    "func, scalar",
-    [
-        (operator.mul, 5),
-        (operator.add, 0),
-        (operator.sub, 0),
-        (operator.gt, -5),
-        (operator.lt, 5),
-        (operator.ne, 0),
-        (operator.ge, -5),
-        (operator.le, 3),
-        (operator.eq, 1),
-    ],
-)
-@pytest.mark.parametrize("convert_to_np_number", [True, False])
-def test_leftside_elemwise_scalar(func, scalar, convert_to_np_number):
-    xs = sparse.random((2, 3, 4), density=0.5)
-    if convert_to_np_number:
-        scalar = np.float32(scalar)
-    y = scalar
-
-    x = xs.todense()
-    fs = func(y, xs)
-
-    assert isinstance(fs, COO)
-    assert xs.nnz >= fs.nnz
-
-    assert_eq(fs, func(y, x))
-
-
-@pytest.mark.parametrize(
-    "func, scalar",
-    [
-        (operator.add, 5),
-        (operator.sub, -5),
-        (operator.pow, -3),
-        (operator.truediv, 0),
-        (operator.floordiv, 0),
-        (operator.gt, -5),
-        (operator.lt, 5),
-        (operator.ne, 1),
-        (operator.ge, -3),
-        (operator.le, 3),
-        (operator.eq, 0),
-    ],
-)
-@pytest.mark.filterwarnings("ignore:divide by zero")
-@pytest.mark.filterwarnings("ignore:invalid value")
-def test_scalar_output_nonzero_fv(func, scalar):
-    xs = sparse.random((2, 3, 4), density=0.5)
-    y = scalar
-
-    x = xs.todense()
-
-    f = func(x, y)
-    fs = func(xs, y)
-
-    assert isinstance(fs, COO)
-    assert fs.nnz <= xs.nnz
-
-    assert_eq(f, fs)
-
-
-@pytest.mark.parametrize("func", [operator.and_, operator.or_, operator.xor])
-@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)])
-def test_bitwise_binary(func, shape):
-    # Small arrays need high density to have nnz entries
-    # Casting floats to int will result in all zeros, hence the * 100
-    xs = (sparse.random(shape, density=0.5) * 100).astype(np.int_)
-    ys = (sparse.random(shape, density=0.5) * 100).astype(np.int_)
-
-    x = xs.todense()
-    y = ys.todense()
-
-    assert_eq(func(xs, ys), func(x, y))
-
-
-@pytest.mark.parametrize("func", [operator.iand, operator.ior, operator.ixor])
-@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)])
-def test_bitwise_binary_inplace(func, shape):
-    # Small arrays need high density to have nnz entries
-    # Casting floats to int will result in all zeros, hence the * 100
-    xs = (sparse.random(shape, density=0.5) * 100).astype(np.int_)
-    ys = (sparse.random(shape, density=0.5) * 100).astype(np.int_)
-
-    x = xs.todense()
-    y = ys.todense()
-
-    xs = func(xs, ys)
-    x = func(x, y)
-
-    assert_eq(xs, x)
-
-
-@pytest.mark.parametrize("func", [operator.lshift, operator.rshift])
-@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)])
-def test_bitshift_binary(func, shape):
-    # Small arrays need high density to have nnz entries
-    # Casting floats to int will result in all zeros, hence the * 100
-    xs = (sparse.random(shape, density=0.5) * 100).astype(np.int_)
-
-    # Can't merge into test_bitwise_binary because left/right shifting
-    # with something >= 64 isn't defined.
-    ys = (sparse.random(shape, density=0.5) * 64).astype(np.int_)
-
-    x = xs.todense()
-    y = ys.todense()
-
-    assert_eq(func(xs, ys), func(x, y))
-
-
-@pytest.mark.parametrize("func", [operator.ilshift, operator.irshift])
-@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)])
-def test_bitshift_binary_inplace(func, shape):
-    # Small arrays need high density to have nnz entries
-    # Casting floats to int will result in all zeros, hence the * 100
-    xs = (sparse.random(shape, density=0.5) * 100).astype(np.int_)
-
-    # Can't merge into test_bitwise_binary because left/right shifting
-    # with something >= 64 isn't defined.
-    ys = (sparse.random(shape, density=0.5) * 64).astype(np.int_)
-
-    x = xs.todense()
-    y = ys.todense()
-
-    xs = func(xs, ys)
-    x = func(x, y)
-
-    assert_eq(xs, x)
-
-
-@pytest.mark.parametrize("func", [operator.and_])
-@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)])
-def test_bitwise_scalar(func, shape):
-    # Small arrays need high density to have nnz entries
-    # Casting floats to int will result in all zeros, hence the * 100
-    xs = (sparse.random(shape, density=0.5) * 100).astype(np.int_)
-    y = np.random.randint(100)
-
-    x = xs.todense()
-
-    assert_eq(func(xs, y), func(x, y))
-    assert_eq(func(y, xs), func(y, x))
-
-
-@pytest.mark.parametrize("func", [operator.lshift, operator.rshift])
-@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)])
-def test_bitshift_scalar(func, shape):
-    # Small arrays need high density to have nnz entries
-    # Casting floats to int will result in all zeros, hence the * 100
-    xs = (sparse.random(shape, density=0.5) * 100).astype(np.int_)
-
-    # Can't merge into test_bitwise_binary because left/right shifting
-    # with something >= 64 isn't defined.
-    y = np.random.randint(64)
-
-    x = xs.todense()
-
-    assert_eq(func(xs, y), func(x, y))
-
-
-@pytest.mark.parametrize("func", [operator.invert])
-@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)])
-def test_unary_bitwise_nonzero_output_fv(func, shape):
-    # Small arrays need high density to have nnz entries
-    # Casting floats to int will result in all zeros, hence the * 100
-    xs = (sparse.random(shape, density=0.5) * 100).astype(np.int_)
-    x = xs.todense()
-
-    f = func(x)
-    fs = func(xs)
-
-    assert isinstance(fs, COO)
-    assert fs.nnz <= xs.nnz
-
-    assert_eq(f, fs)
-
-
-@pytest.mark.parametrize("func", [operator.or_, operator.xor])
-@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)])
-def test_binary_bitwise_nonzero_output_fv(func, shape):
-    # Small arrays need high density to have nnz entries
-    # Casting floats to int will result in all zeros, hence the * 100
-    xs = (sparse.random(shape, density=0.5) * 100).astype(np.int_)
-    y = np.random.randint(1, 100)
-
-    x = xs.todense()
-
-    f = func(x, y)
-    fs = func(xs, y)
-
-    assert isinstance(fs, COO)
-    assert fs.nnz <= xs.nnz
-
-    assert_eq(f, fs)
-
-
-@pytest.mark.parametrize(
-    "func",
-    [operator.mul, operator.add, operator.sub, operator.gt, operator.lt, operator.ne],
-)
-@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)])
-def test_elemwise_nonzero_input_fv(func, shape):
-    xs = sparse.random(shape, density=0.5, fill_value=np.random.rand())
-    ys = sparse.random(shape, density=0.5, fill_value=np.random.rand())
-
-    x = xs.todense()
-    y = ys.todense()
-
-    assert_eq(func(xs, ys), func(x, y))
-
-
-@pytest.mark.parametrize("func", [operator.lshift, operator.rshift])
-@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)])
-def test_binary_bitshift_densification_fails(func, shape):
-    # Small arrays need high density to have nnz entries
-    # Casting floats to int will result in all zeros, hence the * 100
-    x = np.random.randint(1, 100)
-    ys = (sparse.random(shape, density=0.5) * 64).astype(np.int_)
-
-    y = ys.todense()
-
-    f = func(x, y)
-    fs = func(x, ys)
-
-    assert isinstance(fs, COO)
-    assert fs.nnz <= ys.nnz
-
-    assert_eq(f, fs)
-
-
-@pytest.mark.parametrize("func", [operator.and_, operator.or_, operator.xor])
-@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)])
-def test_bitwise_binary_bool(func, shape):
-    # Small arrays need high density to have nnz entries
-    xs = sparse.random(shape, density=0.5).astype(bool)
-    ys = sparse.random(shape, density=0.5).astype(bool)
-
-    x = xs.todense()
-    y = ys.todense()
-
-    assert_eq(func(xs, ys), func(x, y))
-
-
-def test_elemwise_binary_empty():
-    x = COO({}, shape=(10, 10))
-    y = sparse.random((10, 10), density=0.5)
-
-    for z in [x * y, y * x]:
-        assert z.nnz == 0
-        assert z.coords.shape == (2, 0)
-        assert z.data.shape == (0,)
-
-
 def test_gt():
     s = sparse.random((2, 3, 4), density=0.5)
     x = s.todense()
diff --git a/sparse/tests/test_elemwise.py b/sparse/tests/test_elemwise.py
new file mode 100644
index 00000000..a911f28c
--- /dev/null
+++ b/sparse/tests/test_elemwise.py
@@ -0,0 +1,707 @@
+import numpy as np
+import sparse
+import pytest
+import operator
+from sparse import COO
+from sparse._compressed import GCXS
+from sparse._utils import assert_eq, random_value_array
+
+
+@pytest.mark.parametrize(
+    "func",
+    [
+        np.expm1,
+        np.log1p,
+        np.sin,
+        np.tan,
+        np.sinh,
+        np.tanh,
+        np.floor,
+        np.ceil,
+        np.sqrt,
+        np.conj,
+        np.round,
+        np.rint,
+        lambda x: x.astype("int32"),
+        np.conjugate,
+        np.conj,
+        lambda x: x.round(decimals=2),
+        abs,
+    ],
+)
+@pytest.mark.parametrize("format", [COO, GCXS])
+def test_elemwise(func, format):
+    s = sparse.random((2, 3, 4), density=0.5, format=format)
+    x = s.todense()
+
+    fs = func(s)
+    assert isinstance(fs, format)
+    assert fs.nnz <= s.nnz
+
+    assert_eq(func(x), fs)
+
+
+@pytest.mark.parametrize(
+    "func",
+    [
+        np.expm1,
+        np.log1p,
+        np.sin,
+        np.tan,
+        np.sinh,
+        np.tanh,
+        np.floor,
+        np.ceil,
+        np.sqrt,
+        np.conj,
+        np.round,
+        np.rint,
+        np.conjugate,
+        np.conj,
+        lambda x, out: x.round(decimals=2, out=out),
+    ],
+)
+@pytest.mark.parametrize("format", [COO, GCXS])
+def test_elemwise_inplace(func, format):
+    s = sparse.random((2, 3, 4), density=0.5, format=format)
+    x = s.todense()
+
+    func(s, out=s)
+    func(x, out=x)
+    assert isinstance(s, format)
+
+    assert_eq(x, s)
+
+
+@pytest.mark.parametrize(
+    "shape1, shape2",
+    [
+        ((2, 3, 4), (3, 4)),
+        ((3, 4), (2, 3, 4)),
+        ((3, 1, 4), (3, 2, 4)),
+        ((1, 3, 4), (3, 4)),
+        ((3, 4, 1), (3, 4, 2)),
+        ((1, 5), (5, 1)),
+        ((3, 1), (3, 4)),
+        ((3, 1), (1, 4)),
+        ((1, 4), (3, 4)),
+        ((2, 2, 2), (1, 1, 1)),
+    ],
+)
+@pytest.mark.parametrize("format", [COO, GCXS])
+def test_elemwise_mixed(shape1, shape2, format):
+    s1 = sparse.random(shape1, density=0.5, format=format)
+    x2 = np.random.rand(*shape2)
+
+    x1 = s1.todense()
+
+    assert_eq(s1 * x2, x1 * x2)
+
+
+def test_elemwise_mixed_empty():
+    s1 = sparse.random((2, 0, 4), density=0.5)
+    x2 = np.random.rand(2, 0, 4)
+
+    x1 = s1.todense()
+
+    assert_eq(s1 * x2, x1 * x2)
+
+
+def test_elemwise_unsupported():
+    class A:
+        pass
+
+    s1 = sparse.random((2, 3, 4), density=0.5)
+    x2 = A()
+
+    with pytest.raises(TypeError):
+        s1 + x2
+
+    assert sparse.elemwise(operator.add, s1, x2) is NotImplemented
+
+
+def test_elemwise_mixed_broadcast():
+    s1 = sparse.random((2, 3, 4), density=0.5)
+    s2 = sparse.random(4, density=0.5)
+    x3 = np.random.rand(3, 4)
+
+    x1 = s1.todense()
+    x2 = s2.todense()
+
+    def func(x1, x2, x3):
+        return x1 * x2 * x3
+
+    assert_eq(sparse.elemwise(func, s1, s2, x3), func(x1, x2, x3))
+
+
+@pytest.mark.parametrize(
+    "func",
+    [operator.mul, operator.add, operator.sub, operator.gt, operator.lt, operator.ne],
+)
+@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)])
+@pytest.mark.parametrize("format", [COO, GCXS])
+def test_elemwise_binary(func, shape, format):
+    xs = sparse.random(shape, density=0.5, format=format)
+    ys = sparse.random(shape, density=0.5, format=format)
+
+    x = xs.todense()
+    y = ys.todense()
+
+    assert_eq(func(xs, ys), func(x, y))
+
+
+@pytest.mark.parametrize("func", [operator.imul, operator.iadd, operator.isub])
+@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)])
+@pytest.mark.parametrize("format", [COO, GCXS])
+def test_elemwise_binary_inplace(func, shape, format):
+    xs = sparse.random(shape, density=0.5, format=format)
+    ys = sparse.random(shape, density=0.5, format=format)
+
+    x = xs.todense()
+    y = ys.todense()
+
+    xs = func(xs, ys)
+    x = func(x, y)
+
+    assert_eq(xs, x)
+
+
+@pytest.mark.parametrize(
+    "func",
+    [
+        lambda x, y, z: x + y + z,
+        lambda x, y, z: x * y * z,
+        lambda x, y, z: x + y * z,
+        lambda x, y, z: (x + y) * z,
+    ],
+)
+@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)])
+@pytest.mark.parametrize(
+    "formats",
+    [
+        [COO, COO, COO],
+        [GCXS, GCXS, GCXS],
+        [COO, GCXS, GCXS],
+    ],
+)
+def test_elemwise_trinary(func, shape, formats):
+    xs = sparse.random(shape, density=0.5, format=formats[0])
+    ys = sparse.random(shape, density=0.5, format=formats[1])
+    zs = sparse.random(shape, density=0.5, format=formats[2])
+
+    x = xs.todense()
+    y = ys.todense()
+    z = zs.todense()
+
+    fs = sparse.elemwise(func, xs, ys, zs)
+    if COO in formats:
+        assert isinstance(fs, COO)
+    else:
+        assert isinstance(fs, GCXS)
+
+    assert_eq(fs, func(x, y, z))
+
+
+@pytest.mark.parametrize("func", [operator.add, operator.mul])
+@pytest.mark.parametrize(
+    "shape1,shape2",
+    [
+        ((2, 3, 4), (3, 4)),
+        ((3, 4), (2, 3, 4)),
+        ((3, 1, 4), (3, 2, 4)),
+        ((1, 3, 4), (3, 4)),
+        ((3, 4, 1), (3, 4, 2)),
+        ((1, 5), (5, 1)),
+        ((3, 1), (3, 4)),
+        ((3, 1), (1, 4)),
+        ((1, 4), (3, 4)),
+        ((2, 2, 2), (1, 1, 1)),
+    ],
+)
+def test_binary_broadcasting(func, shape1, shape2):
+    density1 = 1 if np.prod(shape1) == 1 else 0.5
+    density2 = 1 if np.prod(shape2) == 1 else 0.5
+
+    xs = sparse.random(shape1, density=density1)
+    x = xs.todense()
+
+    ys = sparse.random(shape2, density=density2)
+    y = ys.todense()
+
+    expected = func(x, y)
+    actual = func(xs, ys)
+
+    assert isinstance(actual, COO)
+    assert_eq(expected, actual)
+
+    assert np.count_nonzero(expected) == actual.nnz
+
+
+@pytest.mark.parametrize(
+    "shape1,shape2",
+    [((3, 4), (2, 3, 4)), ((3, 1, 4), (3, 2, 4)), ((3, 4, 1), (3, 4, 2))],
+)
+def test_broadcast_to(shape1, shape2):
+    a = sparse.random(shape1, density=0.5)
+    x = a.todense()
+
+    assert_eq(np.broadcast_to(x, shape2), a.broadcast_to(shape2))
+
+
+@pytest.mark.parametrize(
+    "shapes",
+    [
+        [(2,), (3, 2), (4, 3, 2)],
+        [(3,), (2, 3), (2, 2, 3)],
+        [(2,), (2, 2), (2, 2, 2)],
+        [(4,), (4, 4), (4, 4, 4)],
+        [(4,), (4, 4), (4, 4, 4)],
+        [(4,), (4, 4), (4, 4, 4)],
+        [(1, 1, 2), (1, 3, 1), (4, 1, 1)],
+        [(2,), (2, 1), (2, 1, 1)],
+    ],
+)
+@pytest.mark.parametrize(
+    "func",
+    [
+        lambda x, y, z: (x + y) * z,
+        lambda x, y, z: x * (y + z),
+        lambda x, y, z: x * y * z,
+        lambda x, y, z: x + y + z,
+        lambda x, y, z: x + y - z,
+        lambda x, y, z: x - y + z,
+    ],
+)
+def test_trinary_broadcasting(shapes, func):
+    args = [sparse.random(s, density=0.5) for s in shapes]
+    dense_args = [arg.todense() for arg in args]
+
+    fs = sparse.elemwise(func, *args)
+    assert isinstance(fs, COO)
+
+    assert_eq(fs, func(*dense_args))
+
+
+@pytest.mark.parametrize(
+    "shapes, func",
+    [
+        ([(2,), (3, 2), (4, 3, 2)], lambda x, y, z: (x + y) * z),
+        ([(3,), (2, 3), (2, 2, 3)], lambda x, y, z: x * (y + z)),
+        ([(2,), (2, 2), (2, 2, 2)], lambda x, y, z: x * y * z),
+        ([(4,), (4, 4), (4, 4, 4)], lambda x, y, z: x + y + z),
+    ],
+)
+@pytest.mark.parametrize("value", [np.nan, np.inf, -np.inf])
+@pytest.mark.parametrize("fraction", [0.25, 0.5, 0.75, 1.0])
+@pytest.mark.filterwarnings("ignore:invalid value")
+def test_trinary_broadcasting_pathological(shapes, func, value, fraction):
+    args = [
+        sparse.random(s, density=0.5, data_rvs=random_value_array(value, fraction))
+        for s in shapes
+    ]
+    dense_args = [arg.todense() for arg in args]
+
+    fs = sparse.elemwise(func, *args)
+    assert isinstance(fs, COO)
+
+    assert_eq(fs, func(*dense_args))
+
+
+def test_sparse_broadcasting(monkeypatch):
+    orig_unmatch_coo = sparse._umath._Elemwise._get_func_coords_data
+
+    state = {"num_matches": 0}
+
+    xs = sparse.random((3, 4), density=0.5)
+    ys = sparse.random((3, 4), density=0.5)
+
+    def mock_unmatch_coo(*args, **kwargs):
+        result = orig_unmatch_coo(*args, **kwargs)
+        if result is not None:
+            state["num_matches"] += 1
+        return result
+
+    monkeypatch.setattr(
+        sparse._umath._Elemwise, "_get_func_coords_data", mock_unmatch_coo
+    )
+
+    xs * ys
+
+    # Less than in case there's absolutely no overlap in some cases.
+    assert state["num_matches"] <= 1
+
+
+def test_dense_broadcasting(monkeypatch):
+    orig_unmatch_coo = sparse._umath._Elemwise._get_func_coords_data
+
+    state = {"num_matches": 0}
+
+    xs = sparse.random((3, 4), density=0.5)
+    ys = sparse.random((3, 4), density=0.5)
+
+    def mock_unmatch_coo(*args, **kwargs):
+        result = orig_unmatch_coo(*args, **kwargs)
+        if result is not None:
+            state["num_matches"] += 1
+        return result
+
+    monkeypatch.setattr(
+        sparse._umath._Elemwise, "_get_func_coords_data", mock_unmatch_coo
+    )
+
+    xs + ys
+
+    # Less than in case there's absolutely no overlap in some cases.
+    assert state["num_matches"] <= 3
+
+
+@pytest.mark.parametrize("format", ["coo", "dok", "gcxs"])
+def test_sparsearray_elemwise(format):
+    xs = sparse.random((3, 4), density=0.5, format=format)
+    ys = sparse.random((3, 4), density=0.5, format=format)
+
+    x = xs.todense()
+    y = ys.todense()
+
+    fs = sparse.elemwise(operator.add, xs, ys)
+    if format == "gcxs":
+        assert isinstance(fs, GCXS)
+    else:
+        assert isinstance(fs, COO)
+
+    assert_eq(fs, x + y)
+
+
+def test_ndarray_densification_fails():
+    xs = sparse.random((2, 3, 4), density=0.5)
+    y = np.random.rand(3, 4)
+
+    with pytest.raises(ValueError):
+        xs + y
+
+
+def test_elemwise_noargs():
+    def func():
+        return np.float_(5.0)
+
+    assert_eq(sparse.elemwise(func), func())
+
+
+@pytest.mark.parametrize(
+    "func",
+    [
+        operator.pow,
+        operator.truediv,
+        operator.floordiv,
+        operator.ge,
+        operator.le,
+        operator.eq,
+        operator.mod,
+    ],
+)
+@pytest.mark.filterwarnings("ignore:divide by zero")
+@pytest.mark.filterwarnings("ignore:invalid value")
+@pytest.mark.parametrize("format", [COO, GCXS])
+def test_nonzero_outout_fv_ufunc(func, format):
+    xs = sparse.random((2, 3, 4), density=0.5, format=format)
+    ys = sparse.random((2, 3, 4), density=0.5, format=format)
+
+    x = xs.todense()
+    y = ys.todense()
+
+    f = func(x, y)
+    fs = func(xs, ys)
+    assert isinstance(fs, format)
+
+    assert_eq(f, fs)
+
+
+@pytest.mark.parametrize(
+    "func, scalar",
+    [
+        (operator.mul, 5),
+        (operator.add, 0),
+        (operator.sub, 0),
+        (operator.pow, 5),
+        (operator.truediv, 3),
+        (operator.floordiv, 4),
+        (operator.gt, 5),
+        (operator.lt, -5),
+        (operator.ne, 0),
+        (operator.ge, 5),
+        (operator.le, -3),
+        (operator.eq, 1),
+        (operator.mod, 5),
+    ],
+)
+@pytest.mark.parametrize("convert_to_np_number", [True, False])
+@pytest.mark.parametrize("format", [COO, GCXS])
+def test_elemwise_scalar(func, scalar, convert_to_np_number, format):
+    xs = sparse.random((2, 3, 4), density=0.5, format=format)
+    if convert_to_np_number:
+        scalar = np.float32(scalar)
+    y = scalar
+
+    x = xs.todense()
+    fs = func(xs, y)
+
+    assert isinstance(fs, format)
+    assert xs.nnz >= fs.nnz
+
+    assert_eq(fs, func(x, y))
+
+
+@pytest.mark.parametrize(
+    "func, scalar",
+    [
+        (operator.mul, 5),
+        (operator.add, 0),
+        (operator.sub, 0),
+        (operator.gt, -5),
+        (operator.lt, 5),
+        (operator.ne, 0),
+        (operator.ge, -5),
+        (operator.le, 3),
+        (operator.eq, 1),
+    ],
+)
+@pytest.mark.parametrize("convert_to_np_number", [True, False])
+def test_leftside_elemwise_scalar(func, scalar, convert_to_np_number):
+    xs = sparse.random((2, 3, 4), density=0.5)
+    if convert_to_np_number:
+        scalar = np.float32(scalar)
+    y = scalar
+
+    x = xs.todense()
+    fs = func(y, xs)
+
+    assert isinstance(fs, COO)
+    assert xs.nnz >= fs.nnz
+
+    assert_eq(fs, func(y, x))
+
+
+@pytest.mark.parametrize(
+    "func, scalar",
+    [
+        (operator.add, 5),
+        (operator.sub, -5),
+        (operator.pow, -3),
+        (operator.truediv, 0),
+        (operator.floordiv, 0),
+        (operator.gt, -5),
+        (operator.lt, 5),
+        (operator.ne, 1),
+        (operator.ge, -3),
+        (operator.le, 3),
+        (operator.eq, 0),
+    ],
+)
+@pytest.mark.filterwarnings("ignore:divide by zero")
+@pytest.mark.filterwarnings("ignore:invalid value")
+def test_scalar_output_nonzero_fv(func, scalar):
+    xs = sparse.random((2, 3, 4), density=0.5)
+    y = scalar
+
+    x = xs.todense()
+
+    f = func(x, y)
+    fs = func(xs, y)
+
+    assert isinstance(fs, COO)
+    assert fs.nnz <= xs.nnz
+
+    assert_eq(f, fs)
+
+
+@pytest.mark.parametrize("func", [operator.and_, operator.or_, operator.xor])
+@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)])
+@pytest.mark.parametrize("format", [COO, GCXS])
+def test_bitwise_binary(func, shape, format):
+    # Small arrays need high density to have nnz entries
+    # Casting floats to int will result in all zeros, hence the * 100
+    xs = (sparse.random(shape, density=0.5, format=format) * 100).astype(np.int_)
+    ys = (sparse.random(shape, density=0.5, format=format) * 100).astype(np.int_)
+
+    x = xs.todense()
+    y = ys.todense()
+
+    assert_eq(func(xs, ys), func(x, y))
+
+
+@pytest.mark.parametrize("func", [operator.iand, operator.ior, operator.ixor])
+@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)])
+@pytest.mark.parametrize("format", [COO, GCXS])
+def test_bitwise_binary_inplace(func, shape, format):
+    # Small arrays need high density to have nnz entries
+    # Casting floats to int will result in all zeros, hence the * 100
+    xs = (sparse.random(shape, density=0.5, format=format) * 100).astype(np.int_)
+    ys = (sparse.random(shape, density=0.5, format=format) * 100).astype(np.int_)
+
+    x = xs.todense()
+    y = ys.todense()
+
+    xs = func(xs, ys)
+    x = func(x, y)
+
+    assert_eq(xs, x)
+
+
+@pytest.mark.parametrize("func", [operator.lshift, operator.rshift])
+@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)])
+def test_bitshift_binary(func, shape):
+    # Small arrays need high density to have nnz entries
+    # Casting floats to int will result in all zeros, hence the * 100
+    xs = (sparse.random(shape, density=0.5) * 100).astype(np.int_)
+
+    # Can't merge into test_bitwise_binary because left/right shifting
+    # with something >= 64 isn't defined.
+    ys = (sparse.random(shape, density=0.5) * 64).astype(np.int_)
+
+    x = xs.todense()
+    y = ys.todense()
+
+    assert_eq(func(xs, ys), func(x, y))
+
+
+@pytest.mark.parametrize("func", [operator.ilshift, operator.irshift])
+@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)])
+def test_bitshift_binary_inplace(func, shape):
+    # Small arrays need high density to have nnz entries
+    # Casting floats to int will result in all zeros, hence the * 100
+    xs = (sparse.random(shape, density=0.5) * 100).astype(np.int_)
+
+    # Can't merge into test_bitwise_binary because left/right shifting
+    # with something >= 64 isn't defined.
+    ys = (sparse.random(shape, density=0.5) * 64).astype(np.int_)
+
+    x = xs.todense()
+    y = ys.todense()
+
+    xs = func(xs, ys)
+    x = func(x, y)
+
+    assert_eq(xs, x)
+
+
+@pytest.mark.parametrize("func", [operator.and_])
+@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)])
+def test_bitwise_scalar(func, shape):
+    # Small arrays need high density to have nnz entries
+    # Casting floats to int will result in all zeros, hence the * 100
+    xs = (sparse.random(shape, density=0.5) * 100).astype(np.int_)
+    y = np.random.randint(100)
+
+    x = xs.todense()
+
+    assert_eq(func(xs, y), func(x, y))
+    assert_eq(func(y, xs), func(y, x))
+
+
+@pytest.mark.parametrize("func", [operator.lshift, operator.rshift])
+@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)])
+def test_bitshift_scalar(func, shape):
+    # Small arrays need high density to have nnz entries
+    # Casting floats to int will result in all zeros, hence the * 100
+    xs = (sparse.random(shape, density=0.5) * 100).astype(np.int_)
+
+    # Can't merge into test_bitwise_binary because left/right shifting
+    # with something >= 64 isn't defined.
+    y = np.random.randint(64)
+
+    x = xs.todense()
+
+    assert_eq(func(xs, y), func(x, y))
+
+
+@pytest.mark.parametrize("func", [operator.invert])
+@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)])
+def test_unary_bitwise_nonzero_output_fv(func, shape):
+    # Small arrays need high density to have nnz entries
+    # Casting floats to int will result in all zeros, hence the * 100
+    xs = (sparse.random(shape, density=0.5) * 100).astype(np.int_)
+    x = xs.todense()
+
+    f = func(x)
+    fs = func(xs)
+
+    assert isinstance(fs, COO)
+    assert fs.nnz <= xs.nnz
+
+    assert_eq(f, fs)
+
+
+@pytest.mark.parametrize("func", [operator.or_, operator.xor])
+@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)])
+def test_binary_bitwise_nonzero_output_fv(func, shape):
+    # Small arrays need high density to have nnz entries
+    # Casting floats to int will result in all zeros, hence the * 100
+    xs = (sparse.random(shape, density=0.5) * 100).astype(np.int_)
+    y = np.random.randint(1, 100)
+
+    x = xs.todense()
+
+    f = func(x, y)
+    fs = func(xs, y)
+
+    assert isinstance(fs, COO)
+    assert fs.nnz <= xs.nnz
+
+    assert_eq(f, fs)
+
+
+@pytest.mark.parametrize(
+    "func",
+    [operator.mul, operator.add, operator.sub, operator.gt, operator.lt, operator.ne],
+)
+@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)])
+def test_elemwise_nonzero_input_fv(func, shape):
+    xs = sparse.random(shape, density=0.5, fill_value=np.random.rand())
+    ys = sparse.random(shape, density=0.5, fill_value=np.random.rand())
+
+    x = xs.todense()
+    y = ys.todense()
+
+    assert_eq(func(xs, ys), func(x, y))
+
+
+@pytest.mark.parametrize("func", [operator.lshift, operator.rshift])
+@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)])
+def test_binary_bitshift_densification_fails(func, shape):
+    # Small arrays need high density to have nnz entries
+    # Casting floats to int will result in all zeros, hence the * 100
+    x = np.random.randint(1, 100)
+    ys = (sparse.random(shape, density=0.5) * 64).astype(np.int_)
+
+    y = ys.todense()
+
+    f = func(x, y)
+    fs = func(x, ys)
+
+    assert isinstance(fs, COO)
+    assert fs.nnz <= ys.nnz
+
+    assert_eq(f, fs)
+
+
+@pytest.mark.parametrize("func", [operator.and_, operator.or_, operator.xor])
+@pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)])
+def test_bitwise_binary_bool(func, shape):
+    # Small arrays need high density to have nnz entries
+    xs = sparse.random(shape, density=0.5).astype(bool)
+    ys = sparse.random(shape, density=0.5).astype(bool)
+
+    x = xs.todense()
+    y = ys.todense()
+
+    assert_eq(func(xs, ys), func(x, y))
+
+
+def test_elemwise_binary_empty():
+    x = COO({}, shape=(10, 10))
+    y = sparse.random((10, 10), density=0.5)
+
+    for z in [x * y, y * x]:
+        assert z.nnz == 0
+        assert z.coords.shape == (2, 0)
+        assert z.data.shape == (0,)