diff --git a/sparse/__init__.py b/sparse/__init__.py
index ab2613d1..34cbe168 100644
--- a/sparse/__init__.py
+++ b/sparse/__init__.py
@@ -1,4 +1,4 @@
-from ._coo import COO
+from ._coo import COO, as_coo
 from ._dok import DOK
 from ._sparse_array import SparseArray
 from ._utils import random
diff --git a/sparse/_common.py b/sparse/_common.py
index 59471fd0..21b519ee 100644
--- a/sparse/_common.py
+++ b/sparse/_common.py
@@ -1,13 +1,16 @@
 import numpy as np
+import numba
+import scipy.sparse
+from functools import wraps
+from itertools import chain
 from collections.abc import Iterable
-from sparse import COO
 
-from ._utils import check_compressed_axes, normalize_axis
-from ._coo import (
+from ._sparse_array import SparseArray
+from ._utils import check_compressed_axes, normalize_axis, check_zero_fill_value
+
+from ._coo.umath import elemwise
+from ._coo.common import (
     clip,
-    tensordot,
-    dot,
-    matmul,
     triu,
     tril,
     where,
@@ -25,11 +28,1143 @@
     result_type,
     diagonal,
     diagonalize,
-    elemwise,
-    as_coo,
+    asCOO,
+    linear_loc,
 )
 
 
+def tensordot(a, b, axes=2, *, return_type=None):
+    """
+    Perform the equivalent of :obj:`numpy.tensordot`.
+
+    Parameters
+    ----------
+    a, b : Union[COO, np.ndarray, scipy.sparse.spmatrix]
+        The arrays to perform the :code:`tensordot` operation on.
+    axes : tuple[Union[int, tuple[int], Union[int, tuple[int]], optional
+        The axes to match when performing the sum.
+    return_type : {None, COO, np.ndarray}, optional
+        Type of returned array.
+
+
+    Returns
+    -------
+    Union[COO, numpy.ndarray]
+        The result of the operation.
+
+    Raises
+    ------
+    ValueError
+        If all arguments don't have zero fill-values.
+
+    See Also
+    --------
+    numpy.tensordot : NumPy equivalent function
+    """
+    from ._compressed import GCXS
+
+    # Much of this is stolen from numpy/core/numeric.py::tensordot
+    # Please see license at https://github.com/numpy/numpy/blob/master/LICENSE.txt
+    check_zero_fill_value(a, b)
+
+    if scipy.sparse.issparse(a):
+        a = GCXS.from_scipy_sparse(a)
+    if scipy.sparse.issparse(b):
+        b = GCXS.from_scipy_sparse(b)
+
+    try:
+        iter(axes)
+    except TypeError:
+        axes_a = list(range(-axes, 0))
+        axes_b = list(range(0, axes))
+    else:
+        axes_a, axes_b = axes
+    try:
+        na = len(axes_a)
+        axes_a = list(axes_a)
+    except TypeError:
+        axes_a = [axes_a]
+        na = 1
+    try:
+        nb = len(axes_b)
+        axes_b = list(axes_b)
+    except TypeError:
+        axes_b = [axes_b]
+        nb = 1
+
+    # a, b = asarray(a), asarray(b)  # <--- modified
+    as_ = a.shape
+    nda = a.ndim
+    bs = b.shape
+    ndb = b.ndim
+    equal = True
+    if nda == 0 or ndb == 0:
+        pos = int(nda != 0)
+        raise ValueError("Input {} operand does not have enough dimensions".format(pos))
+    if na != nb:
+        equal = False
+    else:
+        for k in range(na):
+            if as_[axes_a[k]] != bs[axes_b[k]]:
+                equal = False
+                break
+            if axes_a[k] < 0:
+                axes_a[k] += nda
+            if axes_b[k] < 0:
+                axes_b[k] += ndb
+    if not equal:
+        raise ValueError("shape-mismatch for sum")
+
+    # Move the axes to sum over to the end of "a"
+    # and to the front of "b"
+    notin = [k for k in range(nda) if k not in axes_a]
+    newaxes_a = notin + axes_a
+    N2 = 1
+    for axis in axes_a:
+        N2 *= as_[axis]
+    newshape_a = (-1, N2)
+    olda = [as_[axis] for axis in notin]
+
+    notin = [k for k in range(ndb) if k not in axes_b]
+    newaxes_b = axes_b + notin
+    N2 = 1
+    for axis in axes_b:
+        N2 *= bs[axis]
+    newshape_b = (N2, -1)
+    oldb = [bs[axis] for axis in notin]
+
+    if any(dim == 0 for dim in chain(newshape_a, newshape_b)):
+        res = asCOO(np.empty(olda + oldb), check=False)
+        if isinstance(a, np.ndarray) or isinstance(b, np.ndarray):
+            res = res.todense()
+
+        return res
+
+    at = a.transpose(newaxes_a).reshape(newshape_a)
+    bt = b.transpose(newaxes_b).reshape(newshape_b)
+    res = _dot(at, bt, return_type)
+    return res.reshape(olda + oldb)
+
+
+def matmul(a, b):
+    """Perform the equivalent of :obj:`numpy.matmul` on two arrays.
+
+    Parameters
+    ----------
+    a, b : Union[COO, np.ndarray, scipy.sparse.spmatrix]
+        The arrays to perform the :code:`matmul` operation on.
+
+    Returns
+    -------
+    Union[COO, numpy.ndarray]
+        The result of the operation.
+
+    Raises
+    ------
+    ValueError
+        If all arguments don't have zero fill-values, or the shape of the two arrays is not broadcastable.
+
+    See Also
+    --------
+    numpy.matmul : NumPy equivalent function.
+    COO.__matmul__ : Equivalent function for COO objects.
+    """
+    check_zero_fill_value(a, b)
+    if not hasattr(a, "ndim") or not hasattr(b, "ndim"):
+        raise TypeError(
+            "Cannot perform dot product on types %s, %s" % (type(a), type(b))
+        )
+
+    # When b is 2-d, it is equivalent to dot
+    if b.ndim <= 2:
+        return dot(a, b)
+
+    # when a is 2-d, we need to transpose result after dot
+    if a.ndim <= 2:
+        res = dot(a, b)
+        axes = list(range(res.ndim))
+        axes.insert(-1, axes.pop(0))
+        return res.transpose(axes)
+
+    # If a can be squeeze to a vector, use dot will be faster
+    if a.ndim <= b.ndim and np.prod(a.shape[:-1]) == 1:
+        res = dot(a.reshape(-1), b)
+        shape = list(res.shape)
+        shape.insert(-1, 1)
+        return res.reshape(shape)
+
+    # If b can be squeeze to a matrix, use dot will be faster
+    if b.ndim <= a.ndim and np.prod(b.shape[:-2]) == 1:
+        return dot(a, b.reshape(b.shape[-2:]))
+
+    if a.ndim < b.ndim:
+        a = a[(None,) * (b.ndim - a.ndim)]
+    if a.ndim > b.ndim:
+        b = b[(None,) * (a.ndim - b.ndim)]
+    for i, j in zip(a.shape[:-2], b.shape[:-2]):
+        if i != 1 and j != 1 and i != j:
+            raise ValueError("shapes of a and b are not broadcastable")
+
+    def _matmul_recurser(a, b):
+        if a.ndim == 2:
+            return dot(a, b)
+        res = []
+        for i in range(max(a.shape[0], b.shape[0])):
+            a_i = a[0] if a.shape[0] == 1 else a[i]
+            b_i = b[0] if b.shape[0] == 1 else b[i]
+            res.append(_matmul_recurser(a_i, b_i))
+        mask = [isinstance(x, SparseArray) for x in res]
+        if all(mask):
+            return stack(res)
+        else:
+            res = [x.todense() if isinstance(x, SparseArray) else x for x in res]
+            return np.stack(res)
+
+    return _matmul_recurser(a, b)
+
+
+def dot(a, b):
+    """
+    Perform the equivalent of :obj:`numpy.dot` on two arrays.
+
+    Parameters
+    ----------
+    a, b : Union[COO, np.ndarray, scipy.sparse.spmatrix]
+        The arrays to perform the :code:`dot` operation on.
+
+    Returns
+    -------
+    Union[COO, numpy.ndarray]
+        The result of the operation.
+
+    Raises
+    ------
+    ValueError
+        If all arguments don't have zero fill-values.
+
+    See Also
+    --------
+    numpy.dot : NumPy equivalent function.
+    COO.dot : Equivalent function for COO objects.
+    """
+    check_zero_fill_value(a, b)
+    if not hasattr(a, "ndim") or not hasattr(b, "ndim"):
+        raise TypeError(
+            "Cannot perform dot product on types %s, %s" % (type(a), type(b))
+        )
+
+    if a.ndim == 1 and b.ndim == 1:
+        if isinstance(a, SparseArray):
+            a = asCOO(a)
+        if isinstance(b, SparseArray):
+            b = asCOO(b)
+        return (a * b).sum()
+
+    a_axis = -1
+    b_axis = -2
+
+    if b.ndim == 1:
+        b_axis = -1
+    return tensordot(a, b, axes=(a_axis, b_axis))
+
+
+def _dot(a, b, return_type=None):
+    from ._coo import COO
+    from ._compressed import GCXS
+    from ._compressed.convert import uncompress_dimension
+    from ._sparse_array import SparseArray
+
+    def sort_indices(data, indices, indptr, shape):
+        """
+        Several of the dot algorithms produce indices that
+        are out of order. So we have to do a sort of indices
+        and data. 
+        """
+        coords = np.empty((2, indices.shape[0]), dtype=np.intp)
+        coords[0, :] = uncompress_dimension(indptr)
+        coords[1, :] = indices
+        linear = linear_loc(coords, shape)
+        order = np.argsort(linear, kind="mergesort")
+        indices = indices[order]
+        data = data[order]
+
+    out_shape = (a.shape[0], b.shape[1])
+    if all(isinstance(arr, SparseArray) for arr in [a, b]) and any(
+        isinstance(arr, GCXS) for arr in [a, b]
+    ):
+        a = a.asformat("gcxs")
+        b = b.asformat("gcxs", compressed_axes=a.compressed_axes)
+
+    if isinstance(a, GCXS) and isinstance(b, GCXS):
+        if a.nbytes > b.nbytes:
+            b = b.change_compressed_axes(a.compressed_axes)
+        else:
+            a = a.change_compressed_axes(b.compressed_axes)
+
+        if a.compressed_axes == (0,):  # csr @ csr
+            compressed_axes = (0,)
+            data, indices, indptr = _dot_csr_csr_type(a.dtype, b.dtype)(
+                out_shape, a.data, b.data, a.indices, b.indices, a.indptr, b.indptr
+            )
+            sort_indices(data, indices, indptr, out_shape)
+        elif a.compressed_axes == (1,):  # csc @ csc
+            # a @ b = (b.T @ a.T).T
+            compressed_axes = (1,)
+            data, indices, indptr = _dot_csr_csr_type(b.dtype, a.dtype)(
+                out_shape[::-1],
+                b.data,
+                a.data,
+                b.indices,
+                a.indices,
+                b.indptr,
+                a.indptr,
+            )
+            sort_indices(data, indices, indptr, out_shape[::-1])
+        out = GCXS(
+            (data, indices, indptr), shape=out_shape, compressed_axes=compressed_axes
+        )
+        if return_type == np.ndarray:
+            return out.todense()
+        elif return_type == COO:
+            return out.tocoo()
+        return out
+
+    if isinstance(a, GCXS) and isinstance(b, np.ndarray):
+        if a.compressed_axes == (0,):  # csr @ ndarray
+            if return_type is None or return_type == np.ndarray:
+                return _dot_csr_ndarray_type(a.dtype, b.dtype)(
+                    out_shape, a.data, a.indices, a.indptr, b
+                )
+            data, indices, indptr = _dot_csr_ndarray_type_sparse(a.dtype, b.dtype)(
+                out_shape, a.data, a.indices, a.indptr, b
+            )
+            out = GCXS((data, indices, indptr), shape=out_shape, compressed_axes=(0,))
+            if return_type == COO:
+                return out.tocoo()
+            return out
+        if return_type is None or return_type == np.ndarray:  # csc @ ndarray
+            return _dot_csc_ndarray_type(a.dtype, b.dtype)(
+                a.shape, b.shape, a.data, a.indices, a.indptr, b
+            )
+        data, indices, indptr = _dot_csc_ndarray_type_sparse(a.dtype, b.dtype)(
+            a.shape, b.shape, a.data, a.indices, a.indptr, b
+        )
+        sort_indices(data, indices, indptr, out_shape[::-1])
+        compressed_axes = (1,)
+        out = GCXS(
+            (data, indices, indptr), shape=out_shape, compressed_axes=compressed_axes
+        )
+        if return_type == COO:
+            return out.tocoo()
+        return out
+
+    if isinstance(a, np.ndarray) and isinstance(b, GCXS):
+        at = a.view(type=np.ndarray).T
+        bt = b.T  # constant-time transpose
+        if b.compressed_axes == (0,):
+            if return_type is None or return_type == np.ndarray:
+                out = _dot_csc_ndarray_type(bt.dtype, at.dtype)(
+                    bt.shape, at.shape, bt.data, bt.indices, bt.indptr, at
+                )
+                return out.T
+            data, indices, indptr = _dot_csc_ndarray_type_sparse(bt.dtype, at.dtype)(
+                bt.shape, at.shape, bt.data, b.indices, b.indptr, at
+            )
+            out = GCXS((data, indices, indptr), shape=out_shape, compressed_axes=(0,))
+            if return_type == COO:
+                return out.tocoo()
+            return out
+
+        # compressed_axes == (1,)
+        if return_type is None or return_type == np.ndarray:
+            return _dot_ndarray_csc_type(a.dtype, b.dtype)(
+                out_shape, b.data, b.indices, b.indptr, a
+            )
+        data, indices, indptr = _dot_csr_ndarray_type_sparse(bt.dtype, at.dtype)(
+            out_shape[::-1], bt.data, bt.indices, bt.indptr, at
+        )
+        out = GCXS((data, indices, indptr), shape=out_shape, compressed_axes=(1,))
+        if return_type == COO:
+            return out.tocoo()
+        return out
+
+    if isinstance(a, COO) and isinstance(b, COO):
+        # convert to csr
+        a_indptr = np.empty(a.shape[0] + 1, dtype=np.intp)
+        a_indptr[0] = 0
+        np.cumsum(np.bincount(a.coords[0], minlength=a.shape[0]), out=a_indptr[1:])
+
+        b_indptr = np.empty(b.shape[0] + 1, dtype=np.intp)
+        b_indptr[0] = 0
+        np.cumsum(np.bincount(b.coords[0], minlength=b.shape[0]), out=b_indptr[1:])
+        coords, data = _dot_coo_coo_type(a.dtype, b.dtype)(
+            out_shape, a.coords, b.coords, a.data, b.data, a_indptr, b_indptr
+        )
+        out = COO(coords, data, shape=out_shape, has_duplicates=False, sorted=False)
+
+        if return_type == np.ndarray:
+            return out.todense()
+        elif return_type == GCXS:
+            return out.asformat("gcxs")
+        return out
+
+    if isinstance(a, COO) and isinstance(b, np.ndarray):
+        b = b.view(type=np.ndarray).T
+
+        if return_type is None or return_type == np.ndarray:
+            return _dot_coo_ndarray_type(a.dtype, b.dtype)(
+                a.coords, a.data, b, out_shape
+            )
+        coords, data = _dot_coo_ndarray_type_sparse(a.dtype, b.dtype)(
+            a.coords, a.data, b, out_shape
+        )
+        out = COO(coords, data, shape=out_shape, has_duplicates=False, sorted=True)
+        if return_type == GCXS:
+            return out.asformat("gcxs")
+        return out
+
+    if isinstance(a, np.ndarray) and isinstance(b, COO):
+        b = b.T
+        a = a.view(type=np.ndarray)
+
+        if return_type is None or return_type == np.ndarray:
+            return _dot_ndarray_coo_type(a.dtype, b.dtype)(
+                a, b.coords, b.data, out_shape
+            )
+        coords, data = _dot_ndarray_coo_type_sparse(a.dtype, b.dtype)(
+            a, b.coords, b.data, out_shape
+        )
+        out = COO(coords, data, shape=out_shape, has_duplicates=False, sorted=True)
+        if return_type == GCXS:
+            return out.asformat("gcxs")
+        return out
+
+
+def _memoize_dtype(f):
+    """
+    Memoizes a function taking in NumPy dtypes.
+
+    Parameters
+    ----------
+    f : Callable
+
+    Returns
+    -------
+    wrapped : Callable
+
+    Examples
+    --------
+    >>> def func(dt1):
+    ...     return object()
+    >>> func = _memoize_dtype(func)
+    >>> func(np.dtype('i8')) is func(np.dtype('int64'))
+    True
+    >>> func(np.dtype('i8')) is func(np.dtype('i4'))
+    False
+    """
+    cache = {}
+
+    @wraps(f)
+    def wrapped(*args):
+        key = tuple(arg.name for arg in args)
+        if key in cache:
+            return cache[key]
+
+        result = f(*args)
+        cache[key] = result
+        return result
+
+    return wrapped
+
+
+@numba.jit(nopython=True, nogil=True)
+def _csr_csr_count_nnz(
+    out_shape, a_indices, b_indices, a_indptr, b_indptr
+):  # pragma: no cover
+    """
+    A function for computing the number of nonzero values in the resulting
+    array from multiplying an array with compressed rows with an array
+    with compressed rows: (a @ b).nnz.
+
+    Parameters
+        ----------
+        out_shape : tuple
+            The shape of the output array.
+
+        indptr : ndarray
+            The empty index pointer array for the output.
+
+        a_indices, a_indptr : np.ndarray
+            The indices and index pointer array of ``a``.
+
+        b_data, b_indices, b_indptr : np.ndarray
+            The indices and index pointer array of ``b``.
+    """
+    n_row, n_col = out_shape
+    nnz = 0
+    mask = np.full(n_col, -1)
+    for i in range(n_row):
+        row_nnz = 0
+        for j in a_indices[a_indptr[i] : a_indptr[i + 1]]:
+            for k in b_indices[b_indptr[j] : b_indptr[j + 1]]:
+                if mask[k] != i:
+                    mask[k] = i
+                    row_nnz += 1
+        nnz += row_nnz
+    return nnz
+
+
+@numba.jit(nopython=True, nogil=True)
+def _csr_ndarray_count_nnz(
+    out_shape, indptr, a_indices, a_indptr, b
+):  # pragma: no cover
+    """
+    A function for computing the number of nonzero values in the resulting
+    array from multiplying an array with compressed rows with a dense
+    numpy array: (a @ b).nnz.
+
+    Parameters
+        ----------
+        out_shape : tuple
+            The shape of the output array.
+
+        indptr : ndarray
+            The empty index pointer array for the output.
+
+        a_indices, a_indptr : np.ndarray
+            The indices and index pointer array of ``a``.
+
+        b : np.ndarray
+            The second input array ``b``.
+    """
+    nnz = 0
+    for i in range(out_shape[0]):
+        cur_row = a_indices[a_indptr[i] : a_indptr[i + 1]]
+        for j in range(out_shape[1]):
+            for k in cur_row:
+                if b[k, j] != 0:
+                    nnz += 1
+                    break
+        indptr[i + 1] = nnz
+    return nnz
+
+
+@numba.jit(nopython=True, nogil=True)
+def _csc_ndarray_count_nnz(
+    a_shape, b_shape, indptr, a_indices, a_indptr, b
+):  # pragma: no cover
+    """
+    A function for computing the number of nonzero values in the resulting
+    array from multiplying an array with compressed columns with a dense
+    numpy array: (a @ b).nnz.
+
+    Parameters
+        ----------
+        a_shape, b_shape : tuple
+            The shapes of the input arrays.
+
+        indptr : ndarray
+            The empty index pointer array for the output.
+
+        a_indices, a_indptr : np.ndarray
+            The indices and index pointer array of ``a``.
+
+        b : np.ndarray
+            The second input array ``b``.
+    """
+    nnz = 0
+    mask = np.full(a_shape[0], -1)
+    for i in range(b_shape[1]):
+        col_nnz = 0
+        for j in range(b_shape[0]):
+            for k in a_indices[a_indptr[j] : a_indptr[j + 1]]:
+                if b[j, i] != 0 and mask[k] != i:
+                    mask[k] = i
+                    col_nnz += 1
+        nnz += col_nnz
+        indptr[i + 1] = nnz
+    return nnz
+
+
+@_memoize_dtype
+def _dot_csr_csr_type(dt1, dt2):
+    dtr = np.result_type(dt1, dt2)
+
+    @numba.jit(
+        nopython=True,
+        nogil=True,
+        locals={"data_curr": numba.np.numpy_support.from_dtype(dtr)},
+    )
+    def _dot_csr_csr(
+        out_shape, a_data, b_data, a_indices, b_indices, a_indptr, b_indptr
+    ):  # pragma: no cover
+        """
+        Utility function taking in two ``GCXS`` objects and calculating 
+        their dot product: a @ b for a and b with compressed rows.
+
+        Parameters
+        ----------
+        out_shape : tuple
+            The shape of the output array.
+
+        a_data, a_indices, a_indptr : np.ndarray
+            The data, indices, and index pointer arrays of ``a``.
+
+        b_data, b_indices, b_indptr : np.ndarray
+            The data, indices, and index pointer arrays of ``b``.
+        """
+
+        # much of this is borrowed from:
+        # https://github.com/scipy/scipy/blob/master/scipy/sparse/sparsetools/csr.h
+
+        # calculate nnz before multiplying so we can use static arrays
+        nnz = _csr_csr_count_nnz(out_shape, a_indices, b_indices, a_indptr, b_indptr)
+        n_row, n_col = out_shape
+        indptr = np.empty(n_row + 1, dtype=np.intp)
+        indptr[0] = 0
+        indices = np.empty(nnz, dtype=np.intp)
+        data = np.empty(nnz, dtype=dtr)
+        next_ = np.full(n_col, -1)
+        sums = np.zeros(n_col)
+        nnz = 0
+
+        for i in range(n_row):
+            head = -2
+            length = 0
+            for j, av in zip(
+                a_indices[a_indptr[i] : a_indptr[i + 1]],
+                a_data[a_indptr[i] : a_indptr[i + 1]],
+            ):
+                for k, bv in zip(
+                    b_indices[b_indptr[j] : b_indptr[j + 1]],
+                    b_data[b_indptr[j] : b_indptr[j + 1]],
+                ):
+                    sums[k] += av * bv
+                    if next_[k] == -1:
+                        next_[k] = head
+                        head = k
+                        length += 1
+
+            for _ in range(length):
+                if sums[head] != 0:
+                    indices[nnz] = head
+                    data[nnz] = sums[head]
+                    nnz += 1
+
+                temp = head
+                head = next_[head]
+
+                next_[temp] = -1
+                sums[temp] = 0
+
+            indptr[i + 1] = nnz
+        return data, indices, indptr
+
+    return _dot_csr_csr
+
+
+@_memoize_dtype
+def _dot_csr_ndarray_type(dt1, dt2):
+    dtr = np.result_type(dt1, dt2)
+
+    @numba.jit(
+        nopython=True,
+        nogil=True,
+        locals={"data_curr": numba.np.numpy_support.from_dtype(dtr)},
+    )
+    def _dot_csr_ndarray(out_shape, a_data, a_indices, a_indptr, b):  # pragma: no cover
+        """
+        Utility function taking in one `GCXS` and one ``ndarray`` and
+        calculating their dot product: a @ b for a with compressed rows.
+        Returns a dense result.
+
+        Parameters
+        ----------
+        a_data, a_indices, a_indptr : np.ndarray
+            The data, indices, and index pointers of ``a``.
+
+        b : np.ndarray
+            The second input array ``b``.
+
+        out_shape : Tuple[int]
+            The shape of the output array.
+        """
+        out = np.empty(out_shape, dtype=dtr)
+        for i in range(out_shape[0]):
+            for j in range(out_shape[1]):
+                val = 0
+                for k in range(a_indptr[i], a_indptr[i + 1]):
+                    ind = a_indices[k]
+                    v = a_data[k]
+                    val += v * b[ind, j]
+                out[i, j] = val
+        return out
+
+    return _dot_csr_ndarray
+
+
+@_memoize_dtype
+def _dot_csr_ndarray_type_sparse(dt1, dt2):
+    dtr = np.result_type(dt1, dt2)
+
+    @numba.jit(
+        nopython=True,
+        nogil=True,
+        locals={"data_curr": numba.np.numpy_support.from_dtype(dtr)},
+    )
+    def _dot_csr_ndarray_sparse(
+        out_shape, a_data, a_indices, a_indptr, b
+    ):  # pragma: no cover
+        """
+        Utility function taking in one `GCXS` and one ``ndarray`` and
+        calculating their dot product: a @ b for a with compressed rows.
+        Returns a sparse result.
+
+        Parameters
+        ----------
+        a_data, a_indices, a_indptr : np.ndarray
+            The data, indices, and index pointers of ``a``.
+
+        b : np.ndarray
+            The second input array ``b``.
+
+        out_shape : Tuple[int]
+            The shape of the output array.
+        """
+        indptr = np.empty(out_shape[0] + 1, dtype=np.intp)
+        indptr[0] = 0
+        nnz = _csr_ndarray_count_nnz(out_shape, indptr, a_indices, a_indptr, b)
+        indices = np.empty(nnz, dtype=np.intp)
+        data = np.empty(nnz, dtype=dtr)
+        current = 0
+        for i in range(out_shape[0]):
+            for j in range(out_shape[1]):
+                val = 0
+                nonzero = False
+                for k in range(a_indptr[i], a_indptr[i + 1]):
+                    ind = a_indices[k]
+                    v = a_data[k]
+                    val += v * b[ind, j]
+                    if b[ind, j] != 0:
+                        nonzero = True
+                if nonzero:
+                    data[current] = val
+                    indices[current] = j
+                    current += 1
+        return data, indices, indptr
+
+    return _dot_csr_ndarray_sparse
+
+
+@_memoize_dtype
+def _dot_csc_ndarray_type_sparse(dt1, dt2):
+    dtr = np.result_type(dt1, dt2)
+
+    @numba.jit(
+        nopython=True,
+        nogil=True,
+        locals={"data_curr": numba.np.numpy_support.from_dtype(dtr)},
+    )
+    def _dot_csc_ndarray_sparse(
+        a_shape, b_shape, a_data, a_indices, a_indptr, b
+    ):  # pragma: no cover
+        """
+        Utility function taking in one `GCXS` and one ``ndarray`` and
+        calculating their dot product: a @ b for a with compressed columns.
+        Returns a sparse result.
+
+        Parameters
+        ----------
+        a_data, a_indices, a_indptr : np.ndarray
+            The data, indices, and index pointers of ``a``.
+
+        b : np.ndarray
+            The second input array ``b``.
+
+        a_shape, b_shape : Tuple[int]
+            The shapes of the input arrays.
+        """
+        indptr = np.empty(b_shape[1] + 1, dtype=np.intp)
+        nnz = _csc_ndarray_count_nnz(a_shape, b_shape, indptr, a_indices, a_indptr, b)
+        indices = np.empty(nnz, dtype=np.intp)
+        data = np.empty(nnz, dtype=dtr)
+        sums = np.zeros(a_shape[0])
+        mask = np.full(a_shape[0], -1)
+        nnz = 0
+        for i in range(b_shape[1]):
+            head = -2
+            length = 0
+            for j in range(b_shape[0]):
+                u = b[j, i]
+                if u != 0:
+                    for k in range(a_indptr[j], a_indptr[j + 1]):
+                        ind = a_indices[k]
+                        v = a_data[k]
+                        sums[ind] += u * v
+                        if mask[ind] == -1:
+                            mask[ind] = head
+                            head = ind
+                            length += 1
+            start = nnz
+            for _ in range(length):
+                if sums[head] != 0:
+                    indices[nnz] = head
+                    data[nnz] = sums[head]
+                    nnz += 1
+
+                temp = head
+                head = mask[head]
+
+                mask[temp] = -1
+                sums[temp] = 0
+        return data, indices, indptr
+
+    return _dot_csc_ndarray_sparse
+
+
+@_memoize_dtype
+def _dot_csc_ndarray_type(dt1, dt2):
+    dtr = np.result_type(dt1, dt2)
+
+    @numba.jit(
+        nopython=True,
+        nogil=True,
+        locals={"data_curr": numba.np.numpy_support.from_dtype(dtr)},
+    )
+    def _dot_csc_ndarray(
+        a_shape, b_shape, a_data, a_indices, a_indptr, b
+    ):  # pragma: no cover
+        """
+        Utility function taking in one `GCXS` and one ``ndarray`` and
+        calculating their dot product: a @ b for a with compressed columns.
+        Returns a dense result.
+
+        Parameters
+        ----------
+        a_data, a_indices, a_indptr : np.ndarray
+            The data, indices, and index pointers of ``a``.
+
+        b : np.ndarray
+            The second input array ``b``.
+
+        a_shape, b_shape : Tuple[int]
+            The shapes of the input arrays.
+        """
+        out = np.zeros((a_shape[0], b_shape[1]), dtype=dtr)
+        for j in range(b_shape[1]):
+            for i in range(b_shape[0]):
+                for k in range(a_indptr[i], a_indptr[i + 1]):
+                    out[a_indices[k], j] += a_data[k] * b[i, j]
+        return out
+
+    return _dot_csc_ndarray
+
+
+@_memoize_dtype
+def _dot_ndarray_csc_type(dt1, dt2):
+    dtr = np.result_type(dt1, dt2)
+
+    @numba.jit(
+        nopython=True,
+        nogil=True,
+        locals={"data_curr": numba.np.numpy_support.from_dtype(dtr)},
+    )
+    def _dot_ndarray_csc(out_shape, b_data, b_indices, b_indptr, a):  # pragma: no cover
+        """
+        Utility function taking in one `ndarray` and one ``GCXS`` and
+        calculating their dot product: a @ b for b with compressed columns.
+
+        Parameters
+        ----------
+        a : np.ndarray
+            The input array ``a``.
+
+        b_data, b_indices, b_indptr : np.ndarray
+            The data, indices, and index pointers of ``b``.
+
+        out_shape : Tuple[int]
+            The shape of the output array.
+        """
+        out = np.empty(out_shape, dtype=dtr)
+        for i in range(out_shape[0]):
+            for j in range(out_shape[1]):
+                total = 0
+                for k in range(b_indptr[j], b_indptr[j + 1]):
+                    total += a[i, b_indices[k]] * b_data[k]
+                out[i, j] = total
+        return out
+
+    return _dot_ndarray_csc
+
+
+@_memoize_dtype
+def _dot_coo_coo_type(dt1, dt2):
+    dtr = np.result_type(dt1, dt2)
+
+    @numba.jit(
+        nopython=True,
+        nogil=True,
+        locals={"data_curr": numba.np.numpy_support.from_dtype(dtr)},
+    )
+    def _dot_coo_coo(
+        out_shape, a_coords, b_coords, a_data, b_data, a_indptr, b_indptr
+    ):  # pragma: no cover
+        """
+        Utility function taking in two ``COO`` objects and calculating 
+        their dot product: a @ b.
+
+        Parameters
+        ----------
+        a_shape, b_shape : tuple
+            The shapes of the input arrays.
+
+        a_data, a_coords : np.ndarray
+            The data and coordinates of ``a``.
+
+        b_data, b_coords : np.ndarray
+            The data and coordinates of ``b``.
+        """
+
+        # much of this is borrowed from:
+        # https://github.com/scipy/scipy/blob/master/scipy/sparse/sparsetools/csr.h
+
+        n_row, n_col = out_shape
+        # calculate nnz before multiplying so we can use static arrays
+        nnz = _csr_csr_count_nnz(
+            out_shape, a_coords[1], b_coords[1], a_indptr, b_indptr
+        )
+        coords = np.empty((2, nnz), dtype=np.intp)
+        data = np.empty(nnz, dtype=dtr)
+        next_ = np.full(n_col, -1)
+        sums = np.zeros(n_col)
+        nnz = 0
+
+        for i in range(n_row):
+            head = -2
+            length = 0
+            for j, av in zip(
+                a_coords[1, a_indptr[i] : a_indptr[i + 1]],
+                a_data[a_indptr[i] : a_indptr[i + 1]],
+            ):
+                for k, bv in zip(
+                    b_coords[1, b_indptr[j] : b_indptr[j + 1]],
+                    b_data[b_indptr[j] : b_indptr[j + 1]],
+                ):
+                    sums[k] += av * bv
+                    if next_[k] == -1:
+                        next_[k] = head
+                        head = k
+                        length += 1
+
+            start = nnz
+            for _ in range(length):
+                if sums[head] != 0:
+                    coords[0, nnz] = i
+                    coords[1, nnz] = head
+                    data[nnz] = sums[head]
+                    nnz += 1
+
+                temp = head
+                head = next_[head]
+
+                next_[temp] = -1
+                sums[temp] = 0
+
+        return coords, data
+
+    return _dot_coo_coo
+
+
+@_memoize_dtype
+def _dot_coo_ndarray_type(dt1, dt2):
+    dtr = np.result_type(dt1, dt2)
+
+    @numba.jit(nopython=True, nogil=True)
+    def _dot_coo_ndarray(coords1, data1, array2, out_shape):  # pragma: no cover
+        """
+        Utility function taking in one `COO` and one ``ndarray`` and
+        calculating a "sense" of their dot product. Acually computes
+        ``s1 @ x2.T``.
+
+        Parameters
+        ----------
+        data1, coords1 : np.ndarray
+            The data and coordinates of ``s1``.
+
+        array2 : np.ndarray
+            The second input array ``x2``.
+
+        out_shape : Tuple[int]
+            The output shape.
+        """
+        out = np.zeros(out_shape, dtype=dtr)
+        didx1 = 0
+
+        while didx1 < len(data1):
+            oidx1 = coords1[0, didx1]
+            didx1_curr = didx1
+
+            for oidx2 in range(out_shape[1]):
+                didx1 = didx1_curr
+                while didx1 < len(data1) and coords1[0, didx1] == oidx1:
+                    out[oidx1, oidx2] += data1[didx1] * array2[oidx2, coords1[1, didx1]]
+                    didx1 += 1
+
+        return out
+
+    return _dot_coo_ndarray
+
+
+@_memoize_dtype
+def _dot_coo_ndarray_type_sparse(dt1, dt2):
+    dtr = np.result_type(dt1, dt2)
+
+    @numba.jit(
+        nopython=True,
+        nogil=True,
+        locals={"data_curr": numba.np.numpy_support.from_dtype(dtr)},
+    )
+    def _dot_coo_ndarray(coords1, data1, array2, out_shape):  # pragma: no cover
+        """
+        Utility function taking in one `COO` and one ``ndarray`` and
+        calculating a "sense" of their dot product. Acually computes
+        ``s1 @ x2.T``.
+
+        Parameters
+        ----------
+        data1, coords1 : np.ndarray
+            The data and coordinates of ``s1``.
+
+        array2 : np.ndarray
+            The second input array ``x2``.
+
+        out_shape : Tuple[int]
+            The output shape.
+        """
+
+        out_data = []
+        out_coords = []
+
+        # coords1.shape = (2, len(data1))
+        # coords1[0, :] = rows, sorted
+        # coords1[1, :] = columns
+
+        didx1 = 0
+        while didx1 < len(data1):
+            current_row = coords1[0, didx1]
+
+            cur_didx1 = didx1
+            oidx2 = 0
+            while oidx2 < out_shape[1]:
+                cur_didx1 = didx1
+                data_curr = 0
+                while cur_didx1 < len(data1) and coords1[0, cur_didx1] == current_row:
+                    data_curr += data1[cur_didx1] * array2[oidx2, coords1[1, cur_didx1]]
+                    cur_didx1 += 1
+                if data_curr != 0:
+                    out_data.append(data_curr)
+                    out_coords.append((current_row, oidx2))
+                oidx2 += 1
+            didx1 = cur_didx1
+
+        if len(out_data) == 0:
+            return np.empty((2, 0), dtype=np.intp), np.empty((0,), dtype=dtr)
+
+        return np.array(out_coords).T, np.array(out_data)
+
+    return _dot_coo_ndarray
+
+
+@_memoize_dtype
+def _dot_ndarray_coo_type(dt1, dt2):
+    dtr = np.result_type(dt1, dt2)
+
+    @numba.jit(nopython=True, nogil=True)
+    def _dot_ndarray_coo(array1, coords2, data2, out_shape):  # pragma: no cover
+        """
+        Utility function taking in two one ``ndarray`` and one ``COO`` and
+        calculating a "sense" of their dot product. Acually computes ``x1 @ s2.T``.
+
+        Parameters
+        ----------
+        array1 : np.ndarray
+            The input array ``x1``.
+
+        data2, coords2 : np.ndarray
+            The data and coordinates of ``s2``.
+
+        out_shape : Tuple[int]
+            The output shape.
+        """
+        out = np.zeros(out_shape, dtype=dtr)
+
+        for oidx1 in range(out_shape[0]):
+            for didx2 in range(len(data2)):
+                oidx2 = coords2[0, didx2]
+                out[oidx1, oidx2] += array1[oidx1, coords2[1, didx2]] * data2[didx2]
+
+        return out
+
+    return _dot_ndarray_coo
+
+
+@_memoize_dtype
+def _dot_ndarray_coo_type_sparse(dt1, dt2):
+    dtr = np.result_type(dt1, dt2)
+
+    @numba.jit(
+        nopython=True,
+        nogil=True,
+        locals={"data_curr": numba.np.numpy_support.from_dtype(dtr)},
+    )
+    def _dot_ndarray_coo(array1, coords2, data2, out_shape):  # pragma: no cover
+        """
+        Utility function taking in two one ``ndarray`` and one ``COO`` and
+        calculating a "sense" of their dot product. Acually computes ``x1 @ s2.T``.
+
+        Parameters
+        ----------
+        array1 : np.ndarray
+            The input array ``x1``.
+
+        data2, coords2 : np.ndarray
+            The data and coordinates of ``s2``.
+
+        out_shape : Tuple[int]
+            The output shape.
+        """
+        out_data = []
+        out_coords = []
+
+        # coords2.shape = (2, len(data2))
+        # coords2[0, :] = columns, sorted
+        # coords2[1, :] = rows
+
+        for oidx1 in range(out_shape[0]):
+            data_curr = 0
+            current_col = 0
+            for didx2 in range(len(data2)):
+                if coords2[0, didx2] != current_col:
+                    if data_curr != 0:
+                        out_data.append(data_curr)
+                        out_coords.append([oidx1, current_col])
+                        data_curr = 0
+                    current_col = coords2[0, didx2]
+
+                data_curr += array1[oidx1, coords2[1, didx2]] * data2[didx2]
+
+            if data_curr != 0:
+                out_data.append(data_curr)
+                out_coords.append([oidx1, current_col])
+
+        if len(out_data) == 0:
+            return np.empty((2, 0), dtype=np.intp), np.empty((0,), dtype=dtr)
+
+        return np.array(out_coords).T, np.array(out_data)
+
+    return _dot_ndarray_coo
+
+
 def stack(arrays, axis=0, compressed_axes=None):
     """
     Stack the input arrays along the given dimension.
diff --git a/sparse/_compressed/compressed.py b/sparse/_compressed/compressed.py
index 77bceeb2..faec1822 100644
--- a/sparse/_compressed/compressed.py
+++ b/sparse/_compressed/compressed.py
@@ -7,6 +7,7 @@
 
 from .._sparse_array import SparseArray
 from .._coo.common import linear_loc
+from .._common import dot, matmul
 from .._utils import normalize_axis, check_zero_fill_value, check_compressed_axes
 from .._coo.core import COO
 from .convert import uncompress_dimension, _transpose, _1d_reshape
@@ -42,11 +43,15 @@ def _from_coo(x, compressed_axes=None):
     compressed_shape = (row_size, col_size)
     shape = x.shape
 
-    x = x.transpose(axis_order)
-    linear = linear_loc(x.coords, reordered_shape)
+    # transpose axes, linearize, reshape, and compress
+    linear = linear_loc(x.coords[axis_order], reordered_shape)
     order = np.argsort(linear)
-    # linearizing twice is unnecessary, fix needed
-    coords = x.reshape((compressed_shape)).coords
+    linear = linear[order]
+    coords = np.empty((2, x.nnz), dtype=np.intp)
+    strides = 1
+    for i, d in enumerate(compressed_shape[::-1]):
+        coords[-(i + 1), :] = (linear // strides) % d
+        strides *= d
     indptr = np.empty(row_size + 1, dtype=np.intp)
     indptr[0] = 0
     np.cumsum(np.bincount(coords[0], minlength=row_size), out=indptr[1:])
@@ -56,6 +61,44 @@ def _from_coo(x, compressed_axes=None):
 
 
 class GCXS(SparseArray, NDArrayOperatorsMixin):
+    """
+    A sparse multidimensional array.
+
+    This is stored in GCXS format, a generalization of the GCRS/GCCS formats 
+    from 'Efficient storage scheme for n-dimensional sparse array: GCRS/GCCS':
+    https://ieeexplore.ieee.org/document/7237032. GCXS generalizes the csr/csc
+    sparse matrix formats. For arrays with ndim == 2, GCXS is the same csr/csc.
+    For arrays with ndim >2, any combination of axes can be compressed, 
+    significantly reducing storage. 
+
+
+    Parameters
+    ----------
+    arg : tuple (data, indices, indptr)
+        A tuple of arrays holding the data, indices, and 
+        index pointers for the nonzero values of the array.
+    shape : tuple[int] (COO.ndim,)
+        The shape of the array.
+    compressed_axes : Iterable[int]
+        The axes to compress.
+    fill_value: scalar, optional
+        The fill value for this array.
+
+    Attributes
+    ----------
+    data : numpy.ndarray (nnz,)
+        An array holding the nonzero values corresponding to :obj:`GCXS.indices`.
+    indices : numpy.ndarray (nnz,)
+        An array holding the coordinates of every nonzero element along uncompressed dimensions.
+    indptr : numpy.ndarray
+        An array holding the cumulative sums of the nonzeros along the compressed dimensions. 
+    shape : tuple[int] (ndim,)
+        The dimensions of this array.
+
+    See Also
+    --------
+    DOK : A mostly write-only sparse array.
+    """
 
     __array_priority__ = 12
 
@@ -78,8 +121,14 @@ def __init__(self, arg, shape=None, compressed_axes=None, fill_value=0):
             compressed_axes = None
 
         self.data, self.indices, self.indptr = arg
+
+        if self.data.ndim != 1:
+            raise ValueError("data must be a scalar or 1-dimensional.")
+
         self.shape = shape
-        self.compressed_axes = compressed_axes
+        self.compressed_axes = (
+            tuple(compressed_axes) if isinstance(compressed_axes, Iterable) else None
+        )
         self.fill_value = fill_value
 
     @classmethod
@@ -193,7 +242,7 @@ def _reordered_shape(self):
 
     @property
     def T(self):
-        return self.tranpose()
+        return self.transpose()
 
     def __str__(self):
         return "<GCXS: shape={}, dtype={}, nnz={}, fill_value={}, compressed_axes={}>".format(
@@ -222,6 +271,9 @@ def change_compressed_axes(self, new_compressed_axes):
             for i in range(len(new_compressed_axes))
         )
 
+        if new_compressed_axes == self.compressed_axes:
+            return self
+
         if len(new_compressed_axes) >= len(self.shape):
             raise ValueError("cannot compress all axes")
         if len(set(new_compressed_axes)) != len(new_compressed_axes):
@@ -292,11 +344,11 @@ def todense(self):
         """
         if self.compressed_axes is None:
             out = np.full(self.shape, self.fill_value, self.dtype)
-            if self.indices != ():
+            if len(self.indices) != 0:
                 out[self.indices] = self.data
             else:
                 if len(self.data) != 0:
-                    out[self.indices] = self.data
+                    out[()] = self.data[0]
             return out
         return self.tocoo().todense()
 
@@ -360,7 +412,7 @@ def asformat(self, format, compressed_axes=None):
             return self.todok()
         elif format == "gcxs":
             if compressed_axes is None:
-                compressed_axes = self.compressed_axess
+                compressed_axes = self.compressed_axes
             return self.change_compressed_axes(compressed_axes)
 
         raise NotImplementedError("The given format is not supported.")
@@ -396,6 +448,25 @@ def maybe_densify(self, max_size=1000, min_density=0.25):
                 "Operation would require converting " "large sparse array to dense"
             )
 
+    def flatten(self, order="C"):
+        """
+        Returns a new :obj:`GCXS` array that is a flattened version of this array.
+
+        Returns
+        -------
+        GCXS
+            The flattened output array.
+
+        Notes
+        -----
+        The :code:`order` parameter is provided just for compatibility with
+        Numpy and isn't actually supported.
+        """
+        if order not in {"C", None}:
+            raise NotImplementedError("The `order` parameter is not" "supported.")
+
+        return self.reshape(-1)
+
     def reshape(self, shape, order="C", compressed_axes=None):
         """
         Returns a new :obj:`GCXS` array that is a reshaped version of this array.
@@ -420,7 +491,10 @@ def reshape(self, shape, order="C", compressed_axes=None):
         Numpy and isn't actually supported.
 
         """
-
+        if isinstance(shape, Iterable):
+            shape = tuple(shape)
+        else:
+            shape = (shape,)
         if order not in {"C", None}:
             raise NotImplementedError("The 'order' parameter is not supported")
         if any(d == -1 for d in shape):
@@ -430,6 +504,13 @@ def reshape(self, shape, order="C", compressed_axes=None):
         if self.shape == shape:
             return self
 
+        if self.size != reduce(mul, shape, 1):
+            raise ValueError(
+                "cannot reshape array of size {} into shape {}".format(self.size, shape)
+            )
+        if len(shape) == 0:
+            return self.tocoo().reshape(shape).asformat("gcxs")
+
         if compressed_axes is None:
             if len(shape) == self.ndim:
                 compressed_axes = self.compressed_axes
@@ -438,18 +519,13 @@ def reshape(self, shape, order="C", compressed_axes=None):
             else:
                 compressed_axes = (np.argmin(shape),)
 
-        if self.size != reduce(mul, shape, 1):
-            raise ValueError(
-                "cannot reshape array of size {} into shape {}".format(self.size, shape)
-            )
-
         if self.ndim == 1:
             arg = _1d_reshape(self, shape, compressed_axes)
         else:
             arg = _transpose(self, shape, np.arange(self.ndim), compressed_axes)
         return GCXS(
             arg,
-            shape=shape,
+            shape=tuple(shape),
             compressed_axes=compressed_axes,
             fill_value=self.fill_value,
         )
@@ -555,6 +631,20 @@ def transpose(self, axes=None, compressed_axes=None):
         )
 
     def _2d_transpose(self):
+        """
+        A function for performing constant-time transposes on 2d GCXS arrays.
+        
+        Returns
+        -------
+        GCXS
+            The new transposed array with the opposite compressed axes as the input.
+
+        See Also
+        --------
+        scipy.sparse.csr_matrix.tocsc : Scipy equivalent function.
+        scipy.sparse.csc_matrix.tocsr : Scipy equivalent function.
+        numpy.ndarray.transpose : Numpy equivalent function.
+        """
         if self.ndim != 2:
             raise ValueError(
                 "cannot perform 2d transpose on array with dimension {}".format(
@@ -570,3 +660,43 @@ def _2d_transpose(self):
             compressed_axes=compressed_axes,
             fill_value=self.fill_value,
         )
+
+    def dot(self, other):
+        """
+        Performs the equivalent of :code:`x.dot(y)` for :obj:`GCXS`.
+
+        Parameters
+        ----------
+        other : Union[GCXS, COO, numpy.ndarray, scipy.sparse.spmatrix]
+            The second operand of the dot product operation.
+
+        Returns
+        -------
+        {GCXS, numpy.ndarray}
+            The result of the dot product. If the result turns out to be dense,
+            then a dense array is returned, otherwise, a sparse array.
+
+        Raises
+        ------
+        ValueError
+            If all arguments don't have zero fill-values.
+
+        See Also
+        --------
+        dot : Equivalent function for two arguments.
+        :obj:`numpy.dot` : Numpy equivalent function.
+        scipy.sparse.csr_matrix.dot : Scipy equivalent function.
+        """
+        return dot(self, other)
+
+    def __matmul__(self, other):
+        try:
+            return matmul(self, other)
+        except NotImplementedError:
+            return NotImplemented
+
+    def __rmatmul__(self, other):
+        try:
+            return matmul(other, self)
+        except NotImplementedError:
+            return NotImplemented
diff --git a/sparse/_coo/__init__.py b/sparse/_coo/__init__.py
index 4255e05a..48385774 100644
--- a/sparse/_coo/__init__.py
+++ b/sparse/_coo/__init__.py
@@ -1,9 +1,6 @@
 from .core import COO, as_coo
 from .umath import elemwise
 from .common import (
-    tensordot,
-    dot,
-    matmul,
     concatenate,
     clip,
     stack,
@@ -30,9 +27,6 @@
     "COO",
     "as_coo",
     "elemwise",
-    "tensordot",
-    "dot",
-    "matmul",
     "concatenate",
     "clip",
     "stack",
diff --git a/sparse/_coo/common.py b/sparse/_coo/common.py
index 4d323f74..ecf0bcf3 100644
--- a/sparse/_coo/common.py
+++ b/sparse/_coo/common.py
@@ -1,5 +1,4 @@
-from functools import reduce, wraps
-from itertools import chain
+from functools import reduce
 import operator
 import warnings
 from collections.abc import Iterable
@@ -64,276 +63,6 @@ def linear_loc(coords, shape):
         return np.ravel_multi_index(coords, shape)
 
 
-def tensordot(a, b, axes=2, *, return_type=None):
-    """
-    Perform the equivalent of :obj:`numpy.tensordot`.
-
-    Parameters
-    ----------
-    a, b : Union[COO, np.ndarray, scipy.sparse.spmatrix]
-        The arrays to perform the :code:`tensordot` operation on.
-    axes : tuple[Union[int, tuple[int], Union[int, tuple[int]], optional
-        The axes to match when performing the sum.
-    return_type : {None, COO, np.ndarray}, optional
-        Type of returned array.
-
-
-    Returns
-    -------
-    Union[COO, numpy.ndarray]
-        The result of the operation.
-
-    Raises
-    ------
-    ValueError
-        If all arguments don't have zero fill-values.
-
-    See Also
-    --------
-    numpy.tensordot : NumPy equivalent function
-    """
-    # Much of this is stolen from numpy/core/numeric.py::tensordot
-    # Please see license at https://github.com/numpy/numpy/blob/master/LICENSE.txt
-    check_zero_fill_value(a, b)
-
-    if scipy.sparse.issparse(a):
-        a = asCOO(a)
-    if scipy.sparse.issparse(b):
-        b = asCOO(b)
-
-    try:
-        iter(axes)
-    except TypeError:
-        axes_a = list(range(-axes, 0))
-        axes_b = list(range(0, axes))
-    else:
-        axes_a, axes_b = axes
-    try:
-        na = len(axes_a)
-        axes_a = list(axes_a)
-    except TypeError:
-        axes_a = [axes_a]
-        na = 1
-    try:
-        nb = len(axes_b)
-        axes_b = list(axes_b)
-    except TypeError:
-        axes_b = [axes_b]
-        nb = 1
-
-    # a, b = asarray(a), asarray(b)  # <--- modified
-    as_ = a.shape
-    nda = a.ndim
-    bs = b.shape
-    ndb = b.ndim
-    equal = True
-    if nda == 0 or ndb == 0:
-        pos = int(nda != 0)
-        raise ValueError("Input {} operand does not have enough dimensions".format(pos))
-    if na != nb:
-        equal = False
-    else:
-        for k in range(na):
-            if as_[axes_a[k]] != bs[axes_b[k]]:
-                equal = False
-                break
-            if axes_a[k] < 0:
-                axes_a[k] += nda
-            if axes_b[k] < 0:
-                axes_b[k] += ndb
-    if not equal:
-        raise ValueError("shape-mismatch for sum")
-
-    # Move the axes to sum over to the end of "a"
-    # and to the front of "b"
-    notin = [k for k in range(nda) if k not in axes_a]
-    newaxes_a = notin + axes_a
-    N2 = 1
-    for axis in axes_a:
-        N2 *= as_[axis]
-    newshape_a = (-1, N2)
-    olda = [as_[axis] for axis in notin]
-
-    notin = [k for k in range(ndb) if k not in axes_b]
-    newaxes_b = axes_b + notin
-    N2 = 1
-    for axis in axes_b:
-        N2 *= bs[axis]
-    newshape_b = (N2, -1)
-    oldb = [bs[axis] for axis in notin]
-
-    if any(dim == 0 for dim in chain(newshape_a, newshape_b)):
-        res = asCOO(np.empty(olda + oldb), check=False)
-        if isinstance(a, np.ndarray) or isinstance(b, np.ndarray):
-            res = res.todense()
-
-        return res
-
-    at = a.transpose(newaxes_a).reshape(newshape_a)
-    bt = b.transpose(newaxes_b).reshape(newshape_b)
-    res = _dot(at, bt, return_type)
-    return res.reshape(olda + oldb)
-
-
-def matmul(a, b):
-    """Perform the equivalent of :obj:`numpy.matmul` on two arrays.
-
-    Parameters
-    ----------
-    a, b : Union[COO, np.ndarray, scipy.sparse.spmatrix]
-        The arrays to perform the :code:`matmul` operation on.
-
-    Returns
-    -------
-    Union[COO, numpy.ndarray]
-        The result of the operation.
-
-    Raises
-    ------
-    ValueError
-        If all arguments don't have zero fill-values, or the shape of the two arrays is not broadcastable.
-
-    See Also
-    --------
-    numpy.matmul : NumPy equivalent function.
-    COO.__matmul__ : Equivalent function for COO objects.
-    """
-    check_zero_fill_value(a, b)
-    if not hasattr(a, "ndim") or not hasattr(b, "ndim"):
-        raise TypeError(
-            "Cannot perform dot product on types %s, %s" % (type(a), type(b))
-        )
-
-    # When b is 2-d, it is equivalent to dot
-    if b.ndim <= 2:
-        return dot(a, b)
-
-    # when a is 2-d, we need to transpose result after dot
-    if a.ndim <= 2:
-        res = dot(a, b)
-        axes = list(range(res.ndim))
-        axes.insert(-1, axes.pop(0))
-        return res.transpose(axes)
-
-    # If a can be squeeze to a vector, use dot will be faster
-    if a.ndim <= b.ndim and np.prod(a.shape[:-1]) == 1:
-        res = dot(a.reshape(-1), b)
-        shape = list(res.shape)
-        shape.insert(-1, 1)
-        return res.reshape(shape)
-
-    # If b can be squeeze to a matrix, use dot will be faster
-    if b.ndim <= a.ndim and np.prod(b.shape[:-2]) == 1:
-        return dot(a, b.reshape(b.shape[-2:]))
-
-    if a.ndim < b.ndim:
-        a = a[(None,) * (b.ndim - a.ndim)]
-    if a.ndim > b.ndim:
-        b = b[(None,) * (a.ndim - b.ndim)]
-    for i, j in zip(a.shape[:-2], b.shape[:-2]):
-        if i != 1 and j != 1 and i != j:
-            raise ValueError("shapes of a and b are not broadcastable")
-
-    def _matmul_recurser(a, b):
-        if a.ndim == 2:
-            return dot(a, b)
-        res = []
-        for i in range(max(a.shape[0], b.shape[0])):
-            a_i = a[0] if a.shape[0] == 1 else a[i]
-            b_i = b[0] if b.shape[0] == 1 else b[i]
-            res.append(_matmul_recurser(a_i, b_i))
-        mask = [isinstance(x, SparseArray) for x in res]
-        if all(mask):
-            return stack(res)
-        else:
-            res = [x.todense() if isinstance(x, SparseArray) else x for x in res]
-            return np.stack(res)
-
-    return _matmul_recurser(a, b)
-
-
-def dot(a, b):
-    """
-    Perform the equivalent of :obj:`numpy.dot` on two arrays.
-
-    Parameters
-    ----------
-    a, b : Union[COO, np.ndarray, scipy.sparse.spmatrix]
-        The arrays to perform the :code:`dot` operation on.
-
-    Returns
-    -------
-    Union[COO, numpy.ndarray]
-        The result of the operation.
-
-    Raises
-    ------
-    ValueError
-        If all arguments don't have zero fill-values.
-
-    See Also
-    --------
-    numpy.dot : NumPy equivalent function.
-    COO.dot : Equivalent function for COO objects.
-    """
-    check_zero_fill_value(a, b)
-    if not hasattr(a, "ndim") or not hasattr(b, "ndim"):
-        raise TypeError(
-            "Cannot perform dot product on types %s, %s" % (type(a), type(b))
-        )
-
-    if a.ndim == 1 and b.ndim == 1:
-        return (a * b).sum()
-
-    a_axis = -1
-    b_axis = -2
-
-    if b.ndim == 1:
-        b_axis = -1
-    return tensordot(a, b, axes=(a_axis, b_axis))
-
-
-def _dot(a, b, return_type=None):
-    from .core import COO
-
-    out_shape = (a.shape[0], b.shape[1])
-    if isinstance(a, COO) and isinstance(b, COO):
-        b = b.T
-        coords, data = _dot_coo_coo_type(a.dtype, b.dtype)(
-            a.coords, a.data, b.coords, b.data
-        )
-
-        if return_type == np.ndarray:
-            return COO(
-                coords, data, shape=out_shape, has_duplicates=False, sorted=True
-            ).todense()
-
-        return COO(coords, data, shape=out_shape, has_duplicates=False, sorted=True)
-
-    if isinstance(a, COO) and isinstance(b, np.ndarray):
-        b = b.view(type=np.ndarray).T
-
-        if return_type == COO:
-            coords, data = _dot_coo_ndarray_type_sparse(a.dtype, b.dtype)(
-                a.coords, a.data, b, out_shape
-            )
-            return COO(coords, data, shape=out_shape, has_duplicates=False, sorted=True)
-
-        return _dot_coo_ndarray_type(a.dtype, b.dtype)(a.coords, a.data, b, out_shape)
-
-    if isinstance(a, np.ndarray) and isinstance(b, COO):
-        b = b.T
-        a = a.view(type=np.ndarray)
-
-        if return_type == COO:
-            coords, data = _dot_ndarray_coo_type_sparse(a.dtype, b.dtype)(
-                a, b.coords, b.data, out_shape
-            )
-            return COO(coords, data, shape=out_shape, has_duplicates=False, sorted=True)
-
-        return _dot_ndarray_coo_type(a.dtype, b.dtype)(a, b.coords, b.data, out_shape)
-
-
 def kron(a, b):
     """Kronecker product of 2 sparse arrays.
 
@@ -1126,309 +855,6 @@ def diagonalize(a, axis=0):
     return COO(diag_coords, a.data, diag_shape)
 
 
-def _memoize_dtype(f):
-    """
-    Memoizes a function taking in NumPy dtypes.
-
-    Parameters
-    ----------
-    f : Callable
-
-    Returns
-    -------
-    wrapped : Callable
-
-    Examples
-    --------
-    >>> def func(dt1):
-    ...     return object()
-    >>> func = _memoize_dtype(func)
-    >>> func(np.dtype('i8')) is func(np.dtype('int64'))
-    True
-    >>> func(np.dtype('i8')) is func(np.dtype('i4'))
-    False
-    """
-    cache = {}
-
-    @wraps(f)
-    def wrapped(*args):
-        key = tuple(arg.name for arg in args)
-        if key in cache:
-            return cache[key]
-
-        result = f(*args)
-        cache[key] = result
-        return result
-
-    return wrapped
-
-
-@_memoize_dtype
-def _dot_coo_coo_type(dt1, dt2):
-    dtr = np.result_type(dt1, dt2)
-
-    @numba.jit(
-        nopython=True,
-        nogil=True,
-        locals={"data_curr": numba.np.numpy_support.from_dtype(dtr)},
-    )
-    def _dot_coo_coo(coords1, data1, coords2, data2):  # pragma: no cover
-        """
-        Utility function taking in two ``COO`` objects and calculating a "sense"
-        of their dot product. Acually computes ``s1 @ s2.T``.
-
-        Parameters
-        ----------
-        data1, coords1 : np.ndarray
-            The data and coordinates of ``s1``.
-
-        data2, coords2 : np.ndarray
-            The data and coordinates of ``s2``.
-        """
-        coords_out = []
-        data_out = []
-        didx1 = 0
-        data1_end = len(data1)
-        data2_end = len(data2)
-
-        while didx1 < data1_end:
-            oidx1 = coords1[0, didx1]
-            didx2 = 0
-            didx1_curr = didx1
-
-            while (
-                didx2 < data2_end and didx1 < data1_end and coords1[0, didx1] == oidx1
-            ):
-                oidx2 = coords2[0, didx2]
-                data_curr = 0
-
-                while (
-                    didx2 < data2_end
-                    and didx1 < data1_end
-                    and coords2[0, didx2] == oidx2
-                    and coords1[0, didx1] == oidx1
-                ):
-                    c1 = coords1[1, didx1]
-                    c2 = coords2[1, didx2]
-                    k = min(c1, c2)
-                    if c1 == k and c2 == k:
-                        data_curr += data1[didx1] * data2[didx2]
-                    didx1 += c1 == k
-                    didx2 += c2 == k
-
-                while didx2 < data2_end and coords2[0, didx2] == oidx2:
-                    didx2 += 1
-
-                if didx2 < data2_end:
-                    didx1 = didx1_curr
-
-                if data_curr != 0:
-                    coords_out.append((oidx1, oidx2))
-                    data_out.append(data_curr)
-
-            while didx1 < data1_end and coords1[0, didx1] == oidx1:
-                didx1 += 1
-
-        if len(data_out) == 0:
-            return np.empty((2, 0), dtype=np.intp), np.empty((0,), dtype=dtr)
-
-        return np.array(coords_out).T, np.array(data_out)
-
-    return _dot_coo_coo
-
-
-@_memoize_dtype
-def _dot_coo_ndarray_type(dt1, dt2):
-    dtr = np.result_type(dt1, dt2)
-
-    @numba.jit(nopython=True, nogil=True)
-    def _dot_coo_ndarray(coords1, data1, array2, out_shape):  # pragma: no cover
-        """
-        Utility function taking in one `COO` and one ``ndarray`` and
-        calculating a "sense" of their dot product. Acually computes
-        ``s1 @ x2.T``.
-
-        Parameters
-        ----------
-        data1, coords1 : np.ndarray
-            The data and coordinates of ``s1``.
-
-        array2 : np.ndarray
-            The second input array ``x2``.
-
-        out_shape : Tuple[int]
-            The output shape.
-        """
-        out = np.zeros(out_shape, dtype=dtr)
-        didx1 = 0
-
-        while didx1 < len(data1):
-            oidx1 = coords1[0, didx1]
-            didx1_curr = didx1
-
-            for oidx2 in range(out_shape[1]):
-                didx1 = didx1_curr
-                while didx1 < len(data1) and coords1[0, didx1] == oidx1:
-                    out[oidx1, oidx2] += data1[didx1] * array2[oidx2, coords1[1, didx1]]
-                    didx1 += 1
-
-        return out
-
-    return _dot_coo_ndarray
-
-
-@_memoize_dtype
-def _dot_coo_ndarray_type_sparse(dt1, dt2):
-    dtr = np.result_type(dt1, dt2)
-
-    @numba.jit(
-        nopython=True,
-        nogil=True,
-        locals={"data_curr": numba.np.numpy_support.from_dtype(dtr)},
-    )
-    def _dot_coo_ndarray(coords1, data1, array2, out_shape):  # pragma: no cover
-        """
-        Utility function taking in one `COO` and one ``ndarray`` and
-        calculating a "sense" of their dot product. Acually computes
-        ``s1 @ x2.T``.
-
-        Parameters
-        ----------
-        data1, coords1 : np.ndarray
-            The data and coordinates of ``s1``.
-
-        array2 : np.ndarray
-            The second input array ``x2``.
-
-        out_shape : Tuple[int]
-            The output shape.
-        """
-
-        out_data = []
-        out_coords = []
-
-        # coords1.shape = (2, len(data1))
-        # coords1[0, :] = rows, sorted
-        # coords1[1, :] = columns
-
-        didx1 = 0
-        while didx1 < len(data1):
-            current_row = coords1[0, didx1]
-
-            cur_didx1 = didx1
-            oidx2 = 0
-            while oidx2 < out_shape[1]:
-                cur_didx1 = didx1
-                data_curr = 0
-                while cur_didx1 < len(data1) and coords1[0, cur_didx1] == current_row:
-                    data_curr += data1[cur_didx1] * array2[oidx2, coords1[1, cur_didx1]]
-                    cur_didx1 += 1
-                if data_curr != 0:
-                    out_data.append(data_curr)
-                    out_coords.append((current_row, oidx2))
-                oidx2 += 1
-            didx1 = cur_didx1
-
-        if len(out_data) == 0:
-            return np.empty((2, 0), dtype=np.intp), np.empty((0,), dtype=dtr)
-
-        return np.array(out_coords).T, np.array(out_data)
-
-    return _dot_coo_ndarray
-
-
-@_memoize_dtype
-def _dot_ndarray_coo_type(dt1, dt2):
-    dtr = np.result_type(dt1, dt2)
-
-    @numba.jit(
-        nopython=True, nogil=True,
-    )
-    def _dot_ndarray_coo(array1, coords2, data2, out_shape):  # pragma: no cover
-        """
-        Utility function taking in two one ``ndarray`` and one ``COO`` and
-        calculating a "sense" of their dot product. Acually computes ``x1 @ s2.T``.
-
-        Parameters
-        ----------
-        array1 : np.ndarray
-            The input array ``x1``.
-
-        data2, coords2 : np.ndarray
-            The data and coordinates of ``s2``.
-
-        out_shape : Tuple[int]
-            The output shape.
-        """
-        out = np.zeros(out_shape, dtype=dtr)
-
-        for oidx1 in range(out_shape[0]):
-            for didx2 in range(len(data2)):
-                oidx2 = coords2[0, didx2]
-                out[oidx1, oidx2] += array1[oidx1, coords2[1, didx2]] * data2[didx2]
-
-        return out
-
-    return _dot_ndarray_coo
-
-
-@_memoize_dtype
-def _dot_ndarray_coo_type_sparse(dt1, dt2):
-    dtr = np.result_type(dt1, dt2)
-
-    @numba.jit(
-        nopython=True,
-        nogil=True,
-        locals={"data_curr": numba.np.numpy_support.from_dtype(dtr)},
-    )
-    def _dot_ndarray_coo(array1, coords2, data2, out_shape):  # pragma: no cover
-        """
-        Utility function taking in two one ``ndarray`` and one ``COO`` and
-        calculating a "sense" of their dot product. Acually computes ``x1 @ s2.T``.
-
-        Parameters
-        ----------
-        array1 : np.ndarray
-            The input array ``x1``.
-
-        data2, coords2 : np.ndarray
-            The data and coordinates of ``s2``.
-
-        out_shape : Tuple[int]
-            The output shape.
-        """
-        out_data = []
-        out_coords = []
-
-        # coords2.shape = (2, len(data2))
-        # coords2[0, :] = columns, sorted
-        # coords2[1, :] = rows
-
-        for oidx1 in range(out_shape[0]):
-            data_curr = 0
-            current_col = 0
-            for didx2 in range(len(data2)):
-                if coords2[0, didx2] != current_col:
-                    if data_curr != 0:
-                        out_data.append(data_curr)
-                        out_coords.append([oidx1, current_col])
-                        data_curr = 0
-                    current_col = coords2[0, didx2]
-
-                data_curr += array1[oidx1, coords2[1, didx2]] * data2[didx2]
-
-            if data_curr != 0:
-                out_data.append(data_curr)
-                out_coords.append([oidx1, current_col])
-
-        if len(out_data) == 0:
-            return np.empty((2, 0), dtype=np.intp), np.empty((0,), dtype=dtr)
-
-        return np.array(out_coords).T, np.array(out_data)
-
-    return _dot_ndarray_coo
-
-
 def isposinf(x, out=None):
     """
     Test element-wise for positive infinity, return result as sparse ``bool`` array.
diff --git a/sparse/_coo/core.py b/sparse/_coo/core.py
index 0f6c661e..1a872502 100644
--- a/sparse/_coo/core.py
+++ b/sparse/_coo/core.py
@@ -10,7 +10,7 @@
 from numpy.lib.mixins import NDArrayOperatorsMixin
 import numba
 
-from .common import dot, matmul
+from .._common import dot, matmul
 from .indexing import getitem
 from .umath import elemwise, broadcast_to
 from .._sparse_array import SparseArray
diff --git a/sparse/_dok.py b/sparse/_dok.py
index 5e43df5b..b82c4fb1 100644
--- a/sparse/_dok.py
+++ b/sparse/_dok.py
@@ -242,6 +242,55 @@ def nnz(self):
         """
         return len(self.data)
 
+    @property
+    def format(self):
+        """
+        The storage format of this array.
+        
+        Returns
+        -------
+        str
+            The storage format of this array.
+        
+        See Also
+        -------
+        COO.format : Equivalent :obj:`COO` array property.
+        GCXS.format : Equivalent :obj:`GCXS` array property.
+        scipy.sparse.dok_matrix.format : The Scipy equivalent property.
+        
+        Examples
+        -------
+        >>> import sparse
+        >>> s = sparse.random((5,5), density=0.2, format='dok')
+        >>> s.format
+        'dok'
+        """
+        return "dok"
+
+    @property
+    def nbytes(self):
+        """
+        The number of bytes taken up by this object. Note that for small arrays,
+        this may undercount the number of bytes due to the large constant overhead.
+
+        Returns
+        -------
+        int
+            The approximate bytes of memory taken by this object.
+
+        See Also
+        --------
+        numpy.ndarray.nbytes : The equivalent Numpy property.
+
+        Examples
+        --------
+        >>> import sparse
+        >>> x = sparse.random((100,100),density=.1,format='dok')
+        >>> x.nbytes
+        8000
+        """
+        return self.nnz * self.dtype.itemsize
+
     def __getitem__(self, key):
         key = normalize_index(key, self.shape)
 
diff --git a/sparse/_io.py b/sparse/_io.py
index 1bedcc7a..c29f1a45 100644
--- a/sparse/_io.py
+++ b/sparse/_io.py
@@ -1,12 +1,14 @@
 import numpy as np
 
 from ._coo.core import COO
+from ._compressed import GCXS
 
 
 def save_npz(filename, matrix, compressed=True):
     """ Save a sparse matrix to disk in numpy's ``.npz`` format.
     Note: This is not binary compatible with scipy's ``save_npz()``.
-    Will save a file that can only be opend with this package's ``load_npz()``.
+    This binary format is not currently stable. Will save a file 
+    that can only be opend with this package's ``load_npz()``.
 
     Parameters
     ----------
@@ -15,7 +17,7 @@ def save_npz(filename, matrix, compressed=True):
         where the data will be saved. If file is a string or a Path, the
         ``.npz`` extension will be appended to the file name if it is not
         already there
-    matrix : COO
+    matrix : SparseArray
         The matrix to save to disk
     compressed : bool
         Whether to save in compressed or uncompressed mode
@@ -49,11 +51,17 @@ def save_npz(filename, matrix, compressed=True):
 
     nodes = {
         "data": matrix.data,
-        "coords": matrix.coords,
         "shape": matrix.shape,
         "fill_value": matrix.fill_value,
     }
 
+    if type(matrix) == COO:
+        nodes["coords"] = matrix.coords
+    elif type(matrix) == GCXS:
+        nodes["indices"] = matrix.indices
+        nodes["indptr"] = matrix.indptr
+        nodes["compressed_axes"] = matrix.compressed_axes
+
     if compressed:
         np.savez_compressed(filename, **nodes)
     else:
@@ -63,7 +71,8 @@ def save_npz(filename, matrix, compressed=True):
 def load_npz(filename):
     """ Load a sparse matrix in numpy's ``.npz`` format from disk.
     Note: This is not binary compatible with scipy's ``save_npz()``
-    output. Will only load files saved by this package.
+    output. This binary format is not currently stable.
+    Will only load files saved by this package.
 
     Parameters
     ----------
@@ -73,8 +82,8 @@ def load_npz(filename):
 
     Returns
     -------
-    COO
-        The sparse matrix at path ``filename``
+    SparseArray
+        The sparse matrix at path ``filename``. 
 
     Example
     --------
@@ -104,6 +113,21 @@ def load_npz(filename):
                 has_duplicates=False,
                 fill_value=fill_value,
             )
+        except KeyError:
+            pass
+        try:
+            data = fp["data"]
+            indices = fp["indices"]
+            indptr = fp["indptr"]
+            comp_axes = fp["compressed_axes"]
+            shape = tuple(fp["shape"])
+            fill_value = fp["fill_value"][()]
+            return GCXS(
+                (data, indices, indptr),
+                shape=shape,
+                fill_value=fill_value,
+                compressed_axes=comp_axes,
+            )
         except KeyError:
             raise RuntimeError(
                 "The file {!s} does not contain a valid sparse matrix".format(filename)
diff --git a/sparse/tests/test_coo.py b/sparse/tests/test_coo.py
index 3acc3a46..0734b1a7 100644
--- a/sparse/tests/test_coo.py
+++ b/sparse/tests/test_coo.py
@@ -214,6 +214,34 @@ def test_transpose_error(axis):
         x.transpose(axis)
 
 
+@pytest.mark.parametrize(
+    "a,b",
+    [
+        [(3, 4), (5, 5)],
+        [(12,), (3, 4)],
+        [(12,), (3, 6)],
+        [(5, 5, 5), (6, 6, 6)],
+        [(3, 4), (9, 4)],
+        [(5,), (4,)],
+        [(2, 3, 4, 5), (2, 3, 4, 5, 6)],
+        [(100,), (5, 5)],
+        [(2, 3, 4, 5), (20, 6)],
+        [(), ()],
+    ],
+)
+def test_resize(a, b):
+    s = sparse.random(a, density=0.5)
+    orig_size = s.size
+    x = s.todense()
+
+    x.resize(b)
+    s.resize(b)
+    temp = x.reshape(x.size)
+    temp[orig_size:] = s.fill_value
+    assert isinstance(s, sparse.SparseArray)
+    assert_eq(x, s)
+
+
 @pytest.mark.parametrize("axis1", [-3, -2, -1, 0, 1, 2])
 @pytest.mark.parametrize("axis2", [-3, -2, -1, 0, 1, 2])
 def test_swapaxes(axis1, axis2):
@@ -351,193 +379,6 @@ def test_to_scipy_sparse():
     assert_eq(a, b)
 
 
-@pytest.mark.parametrize(
-    "a_shape,b_shape,axes",
-    [
-        [(3, 4), (4, 3), (1, 0)],
-        [(3, 4), (4, 3), (0, 1)],
-        [(3, 4, 5), (4, 3), (1, 0)],
-        [(3, 4), (5, 4, 3), (1, 1)],
-        [(3, 4), (5, 4, 3), ((0, 1), (2, 1))],
-        [(3, 4), (5, 4, 3), ((1, 0), (1, 2))],
-        [(3, 4, 5), (4,), (1, 0)],
-        [(4,), (3, 4, 5), (0, 1)],
-        [(4,), (4,), (0, 0)],
-        [(4,), (4,), 0],
-    ],
-)
-def test_tensordot(a_shape, b_shape, axes):
-    sa = sparse.random(a_shape, density=0.5)
-    sb = sparse.random(b_shape, density=0.5)
-
-    a = sa.todense()
-    b = sb.todense()
-
-    a_b = np.tensordot(a, b, axes)
-
-    # tests for return_type=None
-    sa_sb = sparse.tensordot(sa, sb, axes)
-    sa_b = sparse.tensordot(sa, b, axes)
-    a_sb = sparse.tensordot(a, sb, axes)
-
-    assert_eq(a_b, sa_sb)
-    assert_eq(a_b, sa_b)
-    assert_eq(a_b, a_sb)
-    assert isinstance(sa_sb, COO)
-    assert isinstance(sa_b, np.ndarray)
-    assert isinstance(a_sb, np.ndarray)
-
-    # tests for return_type=COO
-    sa_b = sparse.tensordot(sa, b, axes, return_type=COO)
-    a_sb = sparse.tensordot(a, sb, axes, return_type=COO)
-
-    assert_eq(a_b, sa_b)
-    assert_eq(a_b, a_sb)
-    assert isinstance(sa_b, COO)
-    assert isinstance(a_sb, COO)
-
-    # tests for return_type=np.ndarray
-    sa_sb = sparse.tensordot(sa, sb, axes, return_type=np.ndarray)
-
-    assert_eq(a_b, sa_sb)
-    assert isinstance(sa_sb, np.ndarray)
-
-
-def test_tensordot_empty():
-    x1 = np.empty((0, 0, 0))
-    x2 = np.empty((0, 0, 0))
-    s1 = sparse.COO.from_numpy(x1)
-    s2 = sparse.COO.from_numpy(x2)
-
-    assert_eq(np.tensordot(x1, x2), sparse.tensordot(s1, s2))
-
-
-def test_tensordot_valueerror():
-    x1 = sparse.COO(np.array(1))
-    x2 = sparse.COO(np.array(1))
-
-    with pytest.raises(ValueError):
-        x1 @ x2
-
-
-@pytest.mark.parametrize(
-    "a_shape, b_shape",
-    [
-        ((3, 1, 6, 5), (2, 1, 4, 5, 6)),
-        ((2, 1, 4, 5, 6), (3, 1, 6, 5)),
-        ((1, 1, 5), (3, 5, 6)),
-        ((3, 4, 5), (1, 5, 6)),
-        ((3, 4, 5), (3, 5, 6)),
-        ((3, 4, 5), (5, 6)),
-        ((4, 5), (5, 6)),
-        ((5,), (5, 6)),
-        ((4, 5), (5,)),
-        ((5,), (5,)),
-        ((3, 4), (1, 2, 4, 3)),
-    ],
-)
-def test_matmul(a_shape, b_shape):
-    sa = sparse.random(a_shape, density=0.5)
-    sb = sparse.random(b_shape, density=0.5)
-
-    a = sa.todense()
-    b = sb.todense()
-
-    assert_eq(np.matmul(a, b), sparse.matmul(sa, sb))
-    assert_eq(sparse.matmul(sa, b), sparse.matmul(a, sb))
-    assert_eq(np.matmul(a, b), sparse.matmul(sa, sb))
-
-    if a.ndim == 2 or b.ndim == 2:
-        assert_eq(
-            np.matmul(a, b),
-            sparse.matmul(
-                scipy.sparse.coo_matrix(a) if a.ndim == 2 else sa,
-                scipy.sparse.coo_matrix(b) if b.ndim == 2 else sb,
-            ),
-        )
-
-    if hasattr(operator, "matmul"):
-        assert_eq(operator.matmul(a, b), operator.matmul(sa, sb))
-
-
-def test_matmul_errors():
-    with pytest.raises(ValueError):
-        sa = sparse.random((3, 4, 5, 6), 0.5)
-        sb = sparse.random((3, 6, 5, 6), 0.5)
-        sparse.matmul(sa, sb)
-
-
-@pytest.mark.parametrize(
-    "a_shape, b_shape",
-    [
-        ((1, 4, 5), (3, 5, 6)),
-        ((3, 4, 5), (1, 5, 6)),
-        ((3, 4, 5), (3, 5, 6)),
-        ((3, 4, 5), (5, 6)),
-        ((4, 5), (5, 6)),
-        ((5,), (5, 6)),
-        ((4, 5), (5,)),
-        ((5,), (5,)),
-    ],
-)
-def test_dot(a_shape, b_shape):
-    sa = sparse.random(a_shape, density=0.5)
-    sb = sparse.random(b_shape, density=0.5)
-
-    a = sa.todense()
-    b = sb.todense()
-
-    assert_eq(a.dot(b), sa.dot(sb))
-    assert_eq(np.dot(a, b), sparse.dot(sa, sb))
-    assert_eq(sparse.dot(sa, b), sparse.dot(a, sb))
-    assert_eq(np.dot(a, b), sparse.dot(sa, sb))
-
-    if hasattr(operator, "matmul"):
-        # Basic equivalences
-        assert_eq(operator.matmul(a, b), operator.matmul(sa, sb))
-        # Test that SOO's and np.array's combine correctly
-        # Not possible due to https://github.com/numpy/numpy/issues/9028
-        # assert_eq(eval("a @ sb"), eval("sa @ b"))
-
-
-@pytest.mark.parametrize(
-    "a_dense, b_dense, o_type",
-    [
-        (False, False, sparse.SparseArray),
-        (False, True, np.ndarray),
-        (True, False, np.ndarray),
-    ],
-)
-def test_dot_type(a_dense, b_dense, o_type):
-    a = sparse.random((3, 4), density=0.8)
-    b = sparse.random((4, 5), density=0.8)
-
-    if a_dense:
-        a = a.todense()
-
-    if b_dense:
-        b = b.todense()
-
-    assert isinstance(sparse.dot(a, b), o_type)
-
-
-@pytest.mark.xfail
-def test_dot_nocoercion():
-    sa = sparse.random((3, 4, 5), density=0.5)
-    sb = sparse.random((5, 6), density=0.5)
-
-    a = sa.todense()
-    b = sb.todense()
-
-    la = a.tolist()
-    lb = b.tolist()
-
-    if hasattr(operator, "matmul"):
-        # Operations with naive collection (list)
-        assert_eq(operator.matmul(la, b), operator.matmul(la, sb))
-        assert_eq(operator.matmul(a, lb), operator.matmul(sa, lb))
-
-
 @pytest.mark.parametrize("a_ndim", [1, 2, 3])
 @pytest.mark.parametrize("b_ndim", [1, 2, 3])
 def test_kron(a_ndim, b_ndim):
diff --git a/sparse/tests/test_dot.py b/sparse/tests/test_dot.py
new file mode 100644
index 00000000..439f5ae5
--- /dev/null
+++ b/sparse/tests/test_dot.py
@@ -0,0 +1,242 @@
+import numpy as np
+import pytest
+import scipy.sparse
+import scipy.stats
+
+import operator
+import sparse
+from sparse._compressed import GCXS
+from sparse import COO
+from sparse._utils import assert_eq
+
+
+@pytest.mark.parametrize(
+    "a_shape,b_shape,axes",
+    [
+        [(3, 4), (4, 3), (1, 0)],
+        [(3, 4), (4, 3), (0, 1)],
+        [(3, 4, 5), (4, 3), (1, 0)],
+        [(3, 4), (5, 4, 3), (1, 1)],
+        [(3, 4), (5, 4, 3), ((0, 1), (2, 1))],
+        [(3, 4), (5, 4, 3), ((1, 0), (1, 2))],
+        [(3, 4, 5), (4,), (1, 0)],
+        [(4,), (3, 4, 5), (0, 1)],
+        [(4,), (4,), (0, 0)],
+        [(4,), (4,), 0],
+    ],
+)
+@pytest.mark.parametrize(
+    "a_format, b_format",
+    [("coo", "coo"), ("coo", "gcxs"), ("gcxs", "coo"), ("gcxs", "gcxs")],
+)
+def test_tensordot(a_shape, b_shape, axes, a_format, b_format):
+    sa = sparse.random(a_shape, density=0.5, format=a_format)
+    sb = sparse.random(b_shape, density=0.5, format=b_format)
+
+    a = sa.todense()
+    b = sb.todense()
+
+    a_b = np.tensordot(a, b, axes)
+
+    # tests for return_type=None
+    sa_sb = sparse.tensordot(sa, sb, axes)
+    sa_b = sparse.tensordot(sa, b, axes)
+    a_sb = sparse.tensordot(a, sb, axes)
+
+    assert_eq(a_b, sa_sb)
+    assert_eq(a_b, sa_b)
+    assert_eq(a_b, a_sb)
+    if all(isinstance(arr, COO) for arr in [sa, sb]):
+        assert isinstance(sa_sb, COO)
+    else:
+        assert isinstance(sa_sb, GCXS)
+    assert isinstance(sa_b, np.ndarray)
+    assert isinstance(a_sb, np.ndarray)
+
+    # tests for return_type=COO
+    sa_b = sparse.tensordot(sa, b, axes, return_type=COO)
+    a_sb = sparse.tensordot(a, sb, axes, return_type=COO)
+
+    assert_eq(a_b, sa_b)
+    assert_eq(a_b, a_sb)
+    assert isinstance(sa_b, COO)
+    assert isinstance(a_sb, COO)
+
+    # tests form return_type=GCXS
+    sa_b = sparse.tensordot(sa, b, axes, return_type=GCXS)
+    a_sb = sparse.tensordot(a, sb, axes, return_type=GCXS)
+
+    assert_eq(a_b, sa_b)
+    assert_eq(a_b, a_sb)
+    assert isinstance(sa_b, GCXS)
+    assert isinstance(a_sb, GCXS)
+
+    # tests for return_type=np.ndarray
+    sa_sb = sparse.tensordot(sa, sb, axes, return_type=np.ndarray)
+
+    assert_eq(a_b, sa_sb)
+    assert isinstance(sa_sb, np.ndarray)
+
+
+def test_tensordot_empty():
+    x1 = np.empty((0, 0, 0))
+    x2 = np.empty((0, 0, 0))
+    s1 = sparse.COO.from_numpy(x1)
+    s2 = sparse.COO.from_numpy(x2)
+
+    assert_eq(np.tensordot(x1, x2), sparse.tensordot(s1, s2))
+
+
+def test_tensordot_valueerror():
+    x1 = sparse.COO(np.array(1))
+    x2 = sparse.COO(np.array(1))
+
+    with pytest.raises(ValueError):
+        x1 @ x2
+
+
+@pytest.mark.parametrize(
+    "a_shape, b_shape",
+    [
+        ((3, 1, 6, 5), (2, 1, 4, 5, 6)),
+        ((2, 1, 4, 5, 6), (3, 1, 6, 5)),
+        ((1, 1, 5), (3, 5, 6)),
+        ((3, 4, 5), (1, 5, 6)),
+        ((3, 4, 5), (3, 5, 6)),
+        ((3, 4, 5), (5, 6)),
+        ((4, 5), (5, 6)),
+        ((5,), (5, 6)),
+        ((4, 5), (5,)),
+        ((5,), (5,)),
+        ((3, 4), (1, 2, 4, 3)),
+    ],
+)
+@pytest.mark.parametrize(
+    "a_format, b_format",
+    [("coo", "coo"), ("coo", "gcxs"), ("gcxs", "coo"), ("gcxs", "gcxs")],
+)
+@pytest.mark.parametrize(
+    "a_comp_axes, b_comp_axes", [([0], [0]), ([0], [1]), ([1], [0]), ([1], [1])]
+)
+def test_matmul(a_shape, b_shape, a_format, b_format, a_comp_axes, b_comp_axes):
+    if a_format == "coo" or len(a_shape) == 1:
+        a_comp_axes = None
+    if b_format == "coo" or len(b_shape) == 1:
+        b_comp_axes = None
+    sa = sparse.random(
+        a_shape, density=0.5, format=a_format, compressed_axes=a_comp_axes
+    )
+    sb = sparse.random(
+        b_shape, density=0.5, format=b_format, compressed_axes=b_comp_axes
+    )
+
+    a = sa.todense()
+    b = sb.todense()
+
+    assert_eq(np.matmul(a, b), sparse.matmul(sa, sb))
+    assert_eq(sparse.matmul(sa, b), sparse.matmul(a, sb))
+    assert_eq(np.matmul(a, b), sparse.matmul(sa, sb))
+
+    if a.ndim == 2 or b.ndim == 2:
+        assert_eq(
+            np.matmul(a, b),
+            sparse.matmul(
+                scipy.sparse.coo_matrix(a) if a.ndim == 2 else sa,
+                scipy.sparse.coo_matrix(b) if b.ndim == 2 else sb,
+            ),
+        )
+
+    if hasattr(operator, "matmul"):
+        assert_eq(operator.matmul(a, b), operator.matmul(sa, sb))
+
+
+def test_matmul_errors():
+    with pytest.raises(ValueError):
+        sa = sparse.random((3, 4, 5, 6), 0.5)
+        sb = sparse.random((3, 6, 5, 6), 0.5)
+        sparse.matmul(sa, sb)
+
+
+@pytest.mark.parametrize(
+    "a_shape, b_shape",
+    [
+        ((1, 4, 5), (3, 5, 6)),
+        ((3, 4, 5), (1, 5, 6)),
+        ((3, 4, 5), (3, 5, 6)),
+        ((3, 4, 5), (5, 6)),
+        ((4, 5), (5, 6)),
+        ((5,), (5, 6)),
+        ((4, 5), (5,)),
+        ((5,), (5,)),
+    ],
+)
+@pytest.mark.parametrize(
+    "a_format, b_format",
+    [("coo", "coo"), ("coo", "gcxs"), ("gcxs", "coo"), ("gcxs", "gcxs")],
+)
+@pytest.mark.parametrize(
+    "a_comp_axes, b_comp_axes", [([0], [0]), ([0], [1]), ([1], [0]), ([1], [1])]
+)
+def test_dot(a_shape, b_shape, a_format, b_format, a_comp_axes, b_comp_axes):
+    if a_format == "coo" or len(a_shape) == 1:
+        a_comp_axes = None
+    if b_format == "coo" or len(b_shape) == 1:
+        b_comp_axes = None
+    sa = sparse.random(
+        a_shape, density=0.5, format=a_format, compressed_axes=a_comp_axes
+    )
+    sb = sparse.random(
+        b_shape, density=0.5, format=b_format, compressed_axes=b_comp_axes
+    )
+
+    a = sa.todense()
+    b = sb.todense()
+
+    assert_eq(a.dot(b), sa.dot(sb))
+    assert_eq(np.dot(a, b), sparse.dot(sa, sb))
+    assert_eq(sparse.dot(sa, b), sparse.dot(a, sb))
+    assert_eq(np.dot(a, b), sparse.dot(sa, sb))
+
+    # Basic equivalences
+    assert_eq(operator.matmul(a, b), operator.matmul(sa, sb))
+    # Test that COO's and np.array's combine correctly
+    # Not possible due to https://github.com/numpy/numpy/issues/9028
+    # assert_eq(eval("a @ sb"), eval("sa @ b"))
+
+
+@pytest.mark.parametrize(
+    "a_dense, b_dense, o_type",
+    [
+        (False, False, sparse.SparseArray),
+        (False, True, np.ndarray),
+        (True, False, np.ndarray),
+    ],
+)
+def test_dot_type(a_dense, b_dense, o_type):
+    a = sparse.random((3, 4), density=0.8)
+    b = sparse.random((4, 5), density=0.8)
+
+    if a_dense:
+        a = a.todense()
+
+    if b_dense:
+        b = b.todense()
+
+    assert isinstance(sparse.dot(a, b), o_type)
+
+
+@pytest.mark.xfail
+def test_dot_nocoercion():
+    sa = sparse.random((3, 4, 5), density=0.5)
+    sb = sparse.random((5, 6), density=0.5)
+
+    a = sa.todense()
+    b = sb.todense()
+
+    la = a.tolist()
+    lb = b.tolist()
+
+    if hasattr(operator, "matmul"):
+        # Operations with naive collection (list)
+        assert_eq(operator.matmul(la, b), operator.matmul(la, sb))
+        assert_eq(operator.matmul(a, lb), operator.matmul(sa, lb))
diff --git a/sparse/tests/test_io.py b/sparse/tests/test_io.py
index b808eba7..e82815fc 100644
--- a/sparse/tests/test_io.py
+++ b/sparse/tests/test_io.py
@@ -1,6 +1,3 @@
-import os
-import tempfile
-import shutil
 import pytest
 import numpy as np
 
@@ -11,29 +8,23 @@
 
 
 @pytest.mark.parametrize("compression", [True, False])
-def test_save_load_npz_file(compression):
-    x = sparse.random((2, 3, 4, 5), density=0.25)
+@pytest.mark.parametrize("format", ["coo", "gcxs"])
+def test_save_load_npz_file(tmp_path, compression, format):
+    x = sparse.random((2, 3, 4, 5), density=0.25, format=format)
     y = x.todense()
 
-    dir_name = tempfile.mkdtemp()
-    filename = os.path.join(dir_name, "mat.npz")
-
+    filename = tmp_path / "mat.npz"
     save_npz(filename, x, compressed=compression)
     z = load_npz(filename)
     assert_eq(x, z)
     assert_eq(y, z.todense())
 
-    shutil.rmtree(dir_name)
-
 
-def test_load_wrong_format_exception():
+def test_load_wrong_format_exception(tmp_path):
     x = np.array([1, 2, 3])
 
-    dir_name = tempfile.mkdtemp()
-    filename = os.path.join(dir_name, "mat.npz")
+    filename = tmp_path / "mat.npz"
 
     np.savez(filename, x)
     with pytest.raises(RuntimeError):
         load_npz(filename)
-
-    shutil.rmtree(dir_name)