From 9ea4053f906ffd49b3adb97fc961745d32c2befc Mon Sep 17 00:00:00 2001 From: Dale Tovar <37719258+daletovar@users.noreply.github.com> Date: Thu, 18 Apr 2019 22:23:58 -0700 Subject: [PATCH 01/72] Update core.py --- sparse/coo/core.py | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/sparse/coo/core.py b/sparse/coo/core.py index f7cdbe1a..da8209c5 100644 --- a/sparse/coo/core.py +++ b/sparse/coo/core.py @@ -1704,6 +1704,9 @@ def reshape(self, shape, order='C'): if self.shape == shape: return self + + if np.prod(self.shape) != np.prod(shape): + raise ValueError('cannot reshape array of size {} into shape {}'.format(np.prod(self.shape),shape)) if self._cache is not None: for sh, value in self._cache['reshape']: @@ -1728,6 +1731,45 @@ def reshape(self, shape, order='C'): self._cache['reshape'].append((shape, result)) return result + + def resize(self,*args,refcheck=False): + """ + This method changes the shape and size of an array in-place. + + Parameters + ---------- + args : tuple, or series of integers + The desired shape of the output array. + + See Also + -------- + numpy.ndarray.resize : The equivalent Numpy function. + + """ + if len(args)==1 and isinstance(args[0],tuple): + shape = args[0] + elif all(isinstance(arg,int) for arg in args): + shape = tuple(args) + else: + raise ValueError('Invalid input') + + if any(d == -1 for d in shape): + extra = int(self.size / + np.prod([d for d in shape if d != -1])) + shape = tuple([d if d != -1 else extra for d in shape]) + + # TODO: this self.size enforces a 2**64 limit to array size + linear_loc = self.linear_loc() + + coords = np.empty((len(shape), self.nnz), dtype=np.intp) + strides = 1 + for i, d in enumerate(shape[::-1]): + coords[-(i + 1), :] = (linear_loc // strides) % d + strides *= d + + self.shape = shape + self.coords = coords + def to_scipy_sparse(self): """ Converts this :obj:`COO` object into a :obj:`scipy.sparse.coo_matrix`. From fa9ef222496a65b70afad41dced8efbc989b1f2c Mon Sep 17 00:00:00 2001 From: Dale Tovar <37719258+daletovar@users.noreply.github.com> Date: Mon, 29 Apr 2019 19:15:42 -0700 Subject: [PATCH 02/72] Update test_coo.py --- sparse/tests/test_coo.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/sparse/tests/test_coo.py b/sparse/tests/test_coo.py index 0f53841f..c98a508f 100644 --- a/sparse/tests/test_coo.py +++ b/sparse/tests/test_coo.py @@ -211,7 +211,24 @@ def test_transpose_error(axis): with pytest.raises(ValueError): x.transpose(axis) +@pytest.mark.parametrize('a,b', [ + [(3, 4), (5, 5)], + [(12,), (3, 4)], + [(12,), (3, 6)], + [(5,5,5), (6,6,6)], + [(3, 4), (9, 4)], + [(5,), (4,)], + [(2, 3, 4, 5), (2, 3, 4, 5, 6)], + [(100,), (5,5)], + [(2, 3, 4, 5), (20, 6)], + [(), ()], +]) +def test_resize(a,b): + s = sparse.random(a, density=0.5) + x = s.todense() + assert_eq(x.resize(b), s.resize(b)) + @pytest.mark.parametrize('a,b', [ [(3, 4), (3, 4)], [(12,), (3, 4)], From 1683e00427e71e85856024a5c3f3f16990101bcd Mon Sep 17 00:00:00 2001 From: Dale Tovar <37719258+daletovar@users.noreply.github.com> Date: Mon, 29 Apr 2019 19:17:38 -0700 Subject: [PATCH 03/72] Update core.py Raises an error for negative dimensions. --- sparse/coo/core.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sparse/coo/core.py b/sparse/coo/core.py index da8209c5..6cd08ae8 100644 --- a/sparse/coo/core.py +++ b/sparse/coo/core.py @@ -1753,10 +1753,8 @@ def resize(self,*args,refcheck=False): else: raise ValueError('Invalid input') - if any(d == -1 for d in shape): - extra = int(self.size / - np.prod([d for d in shape if d != -1])) - shape = tuple([d if d != -1 else extra for d in shape]) + if any(d < 0 for d in shape): + raise ValueError('negative dimensions not allowed') # TODO: this self.size enforces a 2**64 limit to array size linear_loc = self.linear_loc() From c70d39758f8d0a0c026cbb8543c9e2650be59f2e Mon Sep 17 00:00:00 2001 From: Dale Tovar <37719258+daletovar@users.noreply.github.com> Date: Mon, 29 Apr 2019 19:33:24 -0700 Subject: [PATCH 04/72] Update test_coo.py --- sparse/tests/test_coo.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sparse/tests/test_coo.py b/sparse/tests/test_coo.py index c98a508f..5a29a29f 100644 --- a/sparse/tests/test_coo.py +++ b/sparse/tests/test_coo.py @@ -226,8 +226,9 @@ def test_transpose_error(axis): def test_resize(a,b): s = sparse.random(a, density=0.5) x = s.todense() - - assert_eq(x.resize(b), s.resize(b)) + x.resize(b) + s.resize(b) + assert_eq(x, s) @pytest.mark.parametrize('a,b', [ [(3, 4), (3, 4)], From de14873d5cb480525afbd0ab557200450117a1b7 Mon Sep 17 00:00:00 2001 From: Hameer Abbasi Date: Tue, 30 Apr 2019 10:36:03 +0200 Subject: [PATCH 05/72] Add JUnit directory to gitignore. --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 2c48ede0..c498256f 100644 --- a/.gitignore +++ b/.gitignore @@ -47,6 +47,7 @@ coverage.xml *,cover .pytest_cache/ test_results/ +junit/ # Airspeed velocity .asv/ From fa80a624af820d7910d7428df98ca87e7c7c6c75 Mon Sep 17 00:00:00 2001 From: Hameer Abbasi Date: Tue, 30 Apr 2019 10:36:17 +0200 Subject: [PATCH 06/72] Fix Flake8 issues. --- sparse/coo/core.py | 25 ++++++++++++------------- sparse/tests/test_coo.py | 10 ++++++---- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/sparse/coo/core.py b/sparse/coo/core.py index 6cd08ae8..7bc50dd8 100644 --- a/sparse/coo/core.py +++ b/sparse/coo/core.py @@ -1704,9 +1704,9 @@ def reshape(self, shape, order='C'): if self.shape == shape: return self - + if np.prod(self.shape) != np.prod(shape): - raise ValueError('cannot reshape array of size {} into shape {}'.format(np.prod(self.shape),shape)) + raise ValueError('cannot reshape array of size {} into shape {}'.format(np.prod(self.shape), shape)) if self._cache is not None: for sh, value in self._cache['reshape']: @@ -1731,31 +1731,30 @@ def reshape(self, shape, order='C'): self._cache['reshape'].append((shape, result)) return result - - def resize(self,*args,refcheck=False): + def resize(self, *args, refcheck=False): """ This method changes the shape and size of an array in-place. - + Parameters ---------- args : tuple, or series of integers The desired shape of the output array. - + See Also -------- numpy.ndarray.resize : The equivalent Numpy function. - + """ - if len(args)==1 and isinstance(args[0],tuple): + if len(args) == 1 and isinstance(args[0], tuple): shape = args[0] - elif all(isinstance(arg,int) for arg in args): + elif all(isinstance(arg, int) for arg in args): shape = tuple(args) else: raise ValueError('Invalid input') - + if any(d < 0 for d in shape): raise ValueError('negative dimensions not allowed') - + # TODO: this self.size enforces a 2**64 limit to array size linear_loc = self.linear_loc() @@ -1764,10 +1763,10 @@ def resize(self,*args,refcheck=False): for i, d in enumerate(shape[::-1]): coords[-(i + 1), :] = (linear_loc // strides) % d strides *= d - + self.shape = shape self.coords = coords - + def to_scipy_sparse(self): """ Converts this :obj:`COO` object into a :obj:`scipy.sparse.coo_matrix`. diff --git a/sparse/tests/test_coo.py b/sparse/tests/test_coo.py index 5a29a29f..bcf47d92 100644 --- a/sparse/tests/test_coo.py +++ b/sparse/tests/test_coo.py @@ -211,25 +211,27 @@ def test_transpose_error(axis): with pytest.raises(ValueError): x.transpose(axis) + @pytest.mark.parametrize('a,b', [ [(3, 4), (5, 5)], [(12,), (3, 4)], [(12,), (3, 6)], - [(5,5,5), (6,6,6)], + [(5, 5, 5), (6, 6, 6)], [(3, 4), (9, 4)], [(5,), (4,)], [(2, 3, 4, 5), (2, 3, 4, 5, 6)], - [(100,), (5,5)], + [(100,), (5, 5)], [(2, 3, 4, 5), (20, 6)], [(), ()], ]) -def test_resize(a,b): +def test_resize(a, b): s = sparse.random(a, density=0.5) x = s.todense() x.resize(b) s.resize(b) assert_eq(x, s) - + + @pytest.mark.parametrize('a,b', [ [(3, 4), (3, 4)], [(12,), (3, 4)], From 91e0367335933135b710d8a023605b88b850ce1e Mon Sep 17 00:00:00 2001 From: Hameer Abbasi Date: Tue, 30 Apr 2019 11:21:23 +0200 Subject: [PATCH 07/72] Fix up tests and code. --- sparse/coo/core.py | 15 +++++++++++---- sparse/tests/test_coo.py | 12 ++++++++---- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/sparse/coo/core.py b/sparse/coo/core.py index 7bc50dd8..730e04e7 100644 --- a/sparse/coo/core.py +++ b/sparse/coo/core.py @@ -1705,8 +1705,8 @@ def reshape(self, shape, order='C'): if self.shape == shape: return self - if np.prod(self.shape) != np.prod(shape): - raise ValueError('cannot reshape array of size {} into shape {}'.format(np.prod(self.shape), shape)) + if self.size != reduce(operator.mul, shape, 1): + raise ValueError('cannot reshape array of size {} into shape {}'.format(self.size, shape)) if self._cache is not None: for sh, value in self._cache['reshape']: @@ -1731,7 +1731,7 @@ def reshape(self, shape, order='C'): self._cache['reshape'].append((shape, result)) return result - def resize(self, *args, refcheck=False): + def resize(self, *args, refcheck=True): """ This method changes the shape and size of an array in-place. @@ -1755,10 +1755,14 @@ def resize(self, *args, refcheck=False): if any(d < 0 for d in shape): raise ValueError('negative dimensions not allowed') + new_size = reduce(operator.mul, shape, 1) + # TODO: this self.size enforces a 2**64 limit to array size linear_loc = self.linear_loc() + end_idx = np.searchsorted(linear_loc, new_size, side='left') + linear_loc = linear_loc[:end_idx] - coords = np.empty((len(shape), self.nnz), dtype=np.intp) + coords = np.empty((len(shape), len(linear_loc)), dtype=np.intp) strides = 1 for i, d in enumerate(shape[::-1]): coords[-(i + 1), :] = (linear_loc // strides) % d @@ -1767,6 +1771,9 @@ def resize(self, *args, refcheck=False): self.shape = shape self.coords = coords + if len(self.data) != len(linear_loc): + self.data = self.data[:end_idx].copy() + def to_scipy_sparse(self): """ Converts this :obj:`COO` object into a :obj:`scipy.sparse.coo_matrix`. diff --git a/sparse/tests/test_coo.py b/sparse/tests/test_coo.py index bcf47d92..c9992d4c 100644 --- a/sparse/tests/test_coo.py +++ b/sparse/tests/test_coo.py @@ -226,9 +226,13 @@ def test_transpose_error(axis): ]) def test_resize(a, b): s = sparse.random(a, density=0.5) + orig_size = s.size x = s.todense() - x.resize(b) + x = np.resize(x, b) s.resize(b) + temp = x.reshape(x.size) + temp[orig_size:] = s.fill_value + assert isinstance(s, sparse.SparseArray) assert_eq(x, s) @@ -1554,16 +1558,16 @@ def test_add_many_sparse_arrays(): def test_caching(): - x = COO({(10, 10, 10): 1}) + x = COO({(9, 9, 9): 1}) assert x[:].reshape((100, 10)).transpose().tocsr() is not x[:].reshape((100, 10)).transpose().tocsr() - x = COO({(10, 10, 10): 1}, cache=True) + x = COO({(9, 9, 9): 1}, cache=True) assert x[:].reshape((100, 10)).transpose().tocsr() is x[:].reshape((100, 10)).transpose().tocsr() x = COO({(1, 1, 1, 1, 1, 1, 1, 2): 1}, cache=True) for i in range(x.ndim): - x.reshape((1,) * i + (2,) + (1,) * (x.ndim - i - 1)) + x.reshape(x.size) assert len(x._cache['reshape']) < 5 From e980993b2ddfdb0d03deacb468d337630ac202ec Mon Sep 17 00:00:00 2001 From: Hameer Abbasi Date: Tue, 30 Apr 2019 11:28:01 +0200 Subject: [PATCH 08/72] Add docs. --- docs/generated/sparse.COO.resize.rst | 6 ++++++ docs/generated/sparse.COO.rst | 1 + 2 files changed, 7 insertions(+) create mode 100644 docs/generated/sparse.COO.resize.rst diff --git a/docs/generated/sparse.COO.resize.rst b/docs/generated/sparse.COO.resize.rst new file mode 100644 index 00000000..7c0155d8 --- /dev/null +++ b/docs/generated/sparse.COO.resize.rst @@ -0,0 +1,6 @@ +COO.resize +========== + +.. currentmodule:: sparse + +.. automethod:: COO.resize \ No newline at end of file diff --git a/docs/generated/sparse.COO.rst b/docs/generated/sparse.COO.rst index 689d9222..e33fc070 100644 --- a/docs/generated/sparse.COO.rst +++ b/docs/generated/sparse.COO.rst @@ -73,6 +73,7 @@ COO COO.copy COO.dot COO.reshape + COO.resize COO.transpose COO.nonzero From a1f948c8152973f315ccd13d7a2dd98d3aad7c43 Mon Sep 17 00:00:00 2001 From: daletovar Date: Wed, 28 Aug 2019 15:07:22 -0700 Subject: [PATCH 09/72] change gxcs with gcxs --- sparse/__init__.py | 1 + sparse/_compressed/__init__.py | 2 +- sparse/_compressed/compressed.py | 6 +++--- sparse/_compressed/indexing.py | 6 +++--- sparse/_coo/core.py | 6 +++--- sparse/_utils.py | 2 +- sparse/tests/test_compressed.py | 22 +++++++++++----------- 7 files changed, 23 insertions(+), 22 deletions(-) diff --git a/sparse/__init__.py b/sparse/__init__.py index 961c36b6..2f84ad99 100644 --- a/sparse/__init__.py +++ b/sparse/__init__.py @@ -1,4 +1,5 @@ from ._coo import * +from ._compressed import GCXS from ._dok import DOK from ._sparse_array import SparseArray from ._utils import random diff --git a/sparse/_compressed/__init__.py b/sparse/_compressed/__init__.py index e25afe3d..87ba5637 100644 --- a/sparse/_compressed/__init__.py +++ b/sparse/_compressed/__init__.py @@ -1 +1 @@ -from .compressed import GXCS +from .compressed import GCXS diff --git a/sparse/_compressed/compressed.py b/sparse/_compressed/compressed.py index fd9d0192..bceceb3e 100644 --- a/sparse/_compressed/compressed.py +++ b/sparse/_compressed/compressed.py @@ -65,7 +65,7 @@ def _from_coo(x, compressed_axes=None): new_shape, axisptr, x.fill_value) -class GXCS(SparseArray, NDArrayOperatorsMixin): +class GCXS(SparseArray, NDArrayOperatorsMixin): __array_priority__ = 12 @@ -172,7 +172,7 @@ def ndim(self): return len(self.shape) def __str__(self): - return ''.format( + return ''.format( self.shape, self.dtype, self.nnz, self.fill_value, self.compressed_axes) __repr__ = __str__ @@ -345,7 +345,7 @@ def reshape(self, shape, order='C', compressed_axes=None): # there's likely a way to do this without decompressing to COO coo = self.tocoo().reshape(shape) - return GXCS.from_coo(coo, compressed_axes) + return GCXS.from_coo(coo, compressed_axes) def resize(self, *args, refcheck=True, compressed_axes=None): """ diff --git a/sparse/_compressed/indexing.py b/sparse/_compressed/indexing.py index 25d5f0f4..a619b82a 100644 --- a/sparse/_compressed/indexing.py +++ b/sparse/_compressed/indexing.py @@ -12,11 +12,11 @@ def getitem(x, key): """ - from .compressed import GXCS + from .compressed import GCXS if x.ndim == 1: coo = x.tocoo()[key] - return GXCS.from_coo(coo) + return GCXS.from_coo(coo) key = list(normalize_index(key, x.shape)) @@ -150,7 +150,7 @@ def getitem(x, key): if len(shape) == 1: compressed_axes = None - return GXCS( + return GCXS( arg, shape=shape, compressed_axes=compressed_axes, diff --git a/sparse/_coo/core.py b/sparse/_coo/core.py index 40851c18..c4f5be6c 100644 --- a/sparse/_coo/core.py +++ b/sparse/_coo/core.py @@ -2176,9 +2176,9 @@ def asformat(self, format, compressed_axes=None): NotImplementedError If the format isn't supported. """ - from .._compressed import GXCS - if format == 'gxcs' or format is GXCS: - return GXCS.from_coo(self, compressed_axes=compressed_axes) + from .._compressed import GCXS + if format == 'gcxs' or format is GCXS: + return GCXS.from_coo(self, compressed_axes=compressed_axes) elif compressed_axes is not None: raise ValueError( 'compressed_axes is not supported for {} format'.format(format)) diff --git a/sparse/_utils.py b/sparse/_utils.py index aed8acb9..9865264d 100644 --- a/sparse/_utils.py +++ b/sparse/_utils.py @@ -137,7 +137,7 @@ def random( nnz = int(elements * density) - if format != 'gxcs' and compressed_axes is not None: + if format != 'gcxs' and compressed_axes is not None: raise ValueError( 'compressed_axes is not supported for {} format'.format(format)) diff --git a/sparse/tests/test_compressed.py b/sparse/tests/test_compressed.py index 83513805..4c4be5d8 100644 --- a/sparse/tests/test_compressed.py +++ b/sparse/tests/test_compressed.py @@ -3,7 +3,7 @@ import numpy as np import scipy -from sparse._compressed import GXCS +from sparse._compressed import GCXS from sparse._utils import assert_eq @@ -20,7 +20,7 @@ [(), ()], ]) def test_resize(a, b): - s = sparse.random(a, density=0.5, format='gxcs') + s = sparse.random(a, density=0.5, format='gcxs') orig_size = s.size x = s.todense() x = np.resize(x, b) @@ -44,20 +44,20 @@ def test_resize(a, b): [(), ()], ]) def test_reshape(a, b): - s = sparse.random(a, density=0.5, format='gxcs') + s = sparse.random(a, density=0.5, format='gcxs') x = s.todense() assert_eq(x.reshape(b), s.reshape(b)) def test_reshape_same(): - s = sparse.random((3, 5), density=0.5, format='gxcs') + s = sparse.random((3, 5), density=0.5, format='gcxs') assert s.reshape(s.shape) is s def test_to_scipy_sparse(): - s = sparse.random((3, 5), density=0.5, format='gxcs', compressed_axes=(0,)) + s = sparse.random((3, 5), density=0.5, format='gcxs', compressed_axes=(0,)) a = s.to_scipy_sparse() b = scipy.sparse.csr_matrix(s.todense()) @@ -66,7 +66,7 @@ def test_to_scipy_sparse(): def test_tocoo(): coo = sparse.random((5, 6), density=.5) - b = GXCS.from_coo(coo) + b = GCXS.from_coo(coo) assert_eq(b.tocoo(), coo) @@ -126,7 +126,7 @@ def test_tocoo(): (slice(0, 5, -1),), ]) def test_slicing(index): - s = sparse.random((2, 3, 4), density=0.5, format='gxcs') + s = sparse.random((2, 3, 4), density=0.5, format='gcxs') x = s.todense() assert_eq(x[index], s[index]) @@ -145,7 +145,7 @@ def test_slicing(index): (1, [2, 0, 1],), ]) def test_advanced_indexing(index): - s = sparse.random((2, 3, 4), density=0.5, format='gxcs') + s = sparse.random((2, 3, 4), density=0.5, format='gcxs') x = s.todense() assert_eq(x[index], s[index]) @@ -165,7 +165,7 @@ def test_advanced_indexing(index): ([[0, 1]],), ]) def test_slicing_errors(index): - s = sparse.random((2, 3, 4), density=0.5, format='gxcs') + s = sparse.random((2, 3, 4), density=0.5, format='gcxs') with pytest.raises(IndexError): s[index] @@ -173,8 +173,8 @@ def test_slicing_errors(index): def test_change_compressed_axes(): coo = sparse.random((3, 4, 5), density=.5) - s = GXCS.from_coo(coo, compressed_axes=(0, 1)) - b = GXCS.from_coo(coo, compressed_axes=(1, 2)) + s = GCXS.from_coo(coo, compressed_axes=(0, 1)) + b = GCXS.from_coo(coo, compressed_axes=(1, 2)) s.change_compressed_axes((1, 2)) From 81b1b03d1e440f660a067652a3d3ce6fddd52091 Mon Sep 17 00:00:00 2001 From: Dale Tovar <37719258+daletovar@users.noreply.github.com> Date: Fri, 30 Aug 2019 19:48:11 -0700 Subject: [PATCH 10/72] Update __init__.py --- sparse/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sparse/__init__.py b/sparse/__init__.py index 2f84ad99..961c36b6 100644 --- a/sparse/__init__.py +++ b/sparse/__init__.py @@ -1,5 +1,4 @@ from ._coo import * -from ._compressed import GCXS from ._dok import DOK from ._sparse_array import SparseArray from ._utils import random From cf085fba27a4ff371f631acf7b24eaf0569acd32 Mon Sep 17 00:00:00 2001 From: Dale Tovar <37719258+daletovar@users.noreply.github.com> Date: Fri, 30 Aug 2019 20:06:16 -0700 Subject: [PATCH 11/72] add html_table --- sparse/_utils.py | 62 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/sparse/_utils.py b/sparse/_utils.py index 9865264d..cd60039e 100644 --- a/sparse/_utils.py +++ b/sparse/_utils.py @@ -266,6 +266,67 @@ def equivalent(x, y): # lgtm [py/comparison-of-identical-expressions] return (x == y) | ((x != x) & (y != y)) +# copied from zarr +# See https://github.com/zarr-developers/zarr-python/blob/master/zarr/util.py +def human_readable_size(size): + if size < 2**10: + return '%s' % size + elif size < 2**20: + return '%.1fK' % (size / float(2**10)) + elif size < 2**30: + return '%.1fM' % (size / float(2**20)) + elif size < 2**40: + return '%.1fG' % (size / float(2**30)) + elif size < 2**50: + return '%.1fT' % (size / float(2**40)) + else: + return '%.1fP' % (size / float(2**50)) + +def html_table(arr): + table = '' + table += '' + headings = ['Format','Data Type', 'Shape', + 'nnz', 'Density','Read-only', 'No. Bytes', + 'No. Bytes as dense'] + info = [arr.format, str(arr.dtype), str(arr.shape), + str(arr.nnz), str(arr.nnz/np.prod(arr.shape))] + + # read-only + if arr.format == 'dok': + info.append(str(False)) + else: + info.append(str(True)) + + if arr.nbytes > 2**10: + info.append('%s (%s)' % (arr.nbytes, human_readable_size(arr.nbytes))) + else: + info.append(str(arr.nbytes)) + + dense_bytes = np.prod(arr.shape) * arr.dtype.itemsize + if dense_bytes > 2**10: + info.append('%s (%s)' % (dense_bytes, human_readable_size(dense_bytes))) + else: + info.append(dense_bytes) + + headings.append('Storage ratio') + info.append('%.1f' % (arr.nbytes / dense_bytes)) + + # compressed_axes + if arr.format == 'gcxs': + headings.append('Compressed Axes') + info.append(str(arr.compressed_axes)) + + for h, i in zip(headings,info): + table += '' \ + '' \ + '' \ + '' \ + % (h, i) + table += '' + table += '
%s%s
' + return table + + def check_zero_fill_value(*args): """ @@ -343,3 +404,4 @@ def check_consistent_fill_value(arrays): 'but argument {:d} had a fill value of {!s}, which ' 'is different from a fill_value of {!s} in the first ' 'argument.'.format(i, arg.fill_value, fv)) + From 551629b16ba6dfe0a4e47909fdb2d860e68ed6a4 Mon Sep 17 00:00:00 2001 From: Dale Tovar <37719258+daletovar@users.noreply.github.com> Date: Fri, 30 Aug 2019 20:17:41 -0700 Subject: [PATCH 12/72] add _repr_html_ --- sparse/_sparse_array.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/sparse/_sparse_array.py b/sparse/_sparse_array.py index 35e1e0ac..03a7e07b 100644 --- a/sparse/_sparse_array.py +++ b/sparse/_sparse_array.py @@ -7,7 +7,7 @@ import numpy as np -from ._utils import _zero_of_dtype +from ._utils import _zero_of_dtype, html_table class SparseArray: @@ -158,6 +158,29 @@ def density(self): """ return self.nnz / self.size + + def _repr_html_(self): + """ + Diagnostic report about this array. + Renders in Jupyter. + + Examples + -------- + >>> import sparse + >>> x = sparse.random((100,100,100),density=.1) + >>> x + Format : coo + Data type : float64 + Shape : (100, 100, 100) + nnz : 100000 + Density : 0.1 + Read-only : True + No. bytes : 3200000 (3.1M) + No. Bytes as dense : 8000000 (7.6M) + Storage ratio : 0.4 + """ + return html_table(self) + @abstractmethod def asformat(self, format): """ From de82d5bc9ad7f7e2d1e8a38f99c3bd0dae46a5d0 Mon Sep 17 00:00:00 2001 From: Dale Tovar <37719258+daletovar@users.noreply.github.com> Date: Fri, 30 Aug 2019 20:24:10 -0700 Subject: [PATCH 13/72] add self.nbytes property and self.format attribute --- sparse/_dok.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/sparse/_dok.py b/sparse/_dok.py index 53e67ffe..29618f13 100644 --- a/sparse/_dok.py +++ b/sparse/_dok.py @@ -91,6 +91,7 @@ class DOK(SparseArray): def __init__(self, shape, data=None, dtype=None, fill_value=None): from ._coo import COO self.data = dict() + self.format = 'dok' if isinstance(shape, COO): ar = DOK.from_coo(shape) @@ -238,6 +239,30 @@ def nnz(self): """ return len(self.data) + @property + def nbytes(self): + """ + The number of bytes taken up by this object. Note that for small arrays, + this may undercount the number of bytes due to the large constant overhead. + + Returns + ------- + int + The approximate bytes of memory taken by this object. + + See Also + -------- + numpy.ndarray.nbytes : The equivalent Numpy property. + + Examples + -------- + >>> import sparse + >>> x = sparse.random((100,100),density=.1,format='dok') + >>> x.nbytes + 8000 + """ + return self.nnz * self.dtype.itemsize + def __getitem__(self, key): key = normalize_index(key, self.shape) From 2a0fdba23473de279310e0738149d7e976022dd2 Mon Sep 17 00:00:00 2001 From: Dale Tovar <37719258+daletovar@users.noreply.github.com> Date: Fri, 30 Aug 2019 20:26:27 -0700 Subject: [PATCH 14/72] fix empty indptr for 1d and add self.format --- sparse/_compressed/compressed.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sparse/_compressed/compressed.py b/sparse/_compressed/compressed.py index bceceb3e..d3396a8e 100644 --- a/sparse/_compressed/compressed.py +++ b/sparse/_compressed/compressed.py @@ -25,7 +25,7 @@ def _from_coo(x, compressed_axes=None): if compressed_axes is not None: raise ValueError('no axes to compress for 1d array') return ( - x.data, x.coords[0], []), x.shape, None, None, None, None, None, x.fill_value + x.data, x.coords[0], np.array([])), x.shape, None, None, None, None, None, x.fill_value compressed_axes = normalize_axis(compressed_axes, x.ndim) if compressed_axes is None: @@ -110,7 +110,8 @@ def __init__(self, arg, shape=None, compressed_axes=None, fill_value=0): compressed_shape = (row_size, col_size) else: compressed_axes = compressed_shape = axis_order = reordered_shape = axisptr = None - + + self.format = 'gcxs' self.data, self.indices, self.indptr = arg self.shape = shape self.compressed_shape = compressed_shape From 16c707e4d5e769a743cc5cc328bd5b96c772de8b Mon Sep 17 00:00:00 2001 From: Dale Tovar <37719258+daletovar@users.noreply.github.com> Date: Fri, 30 Aug 2019 20:27:49 -0700 Subject: [PATCH 15/72] add self.format --- sparse/_coo/core.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sparse/_coo/core.py b/sparse/_coo/core.py index c4f5be6c..ac48f4df 100644 --- a/sparse/_coo/core.py +++ b/sparse/_coo/core.py @@ -213,6 +213,7 @@ def __init__(self, coords, data=None, shape=None, has_duplicates=True, self.data = np.asarray(data) self.coords = np.asarray(coords) + self.format = 'coo' if self.coords.ndim == 1: self.coords = self.coords[None, :] From 937e8198a09cd57ba4785463f325639eab2a7e6e Mon Sep 17 00:00:00 2001 From: Dale Tovar <37719258+daletovar@users.noreply.github.com> Date: Fri, 30 Aug 2019 23:37:26 -0700 Subject: [PATCH 16/72] remove whitespace --- sparse/_sparse_array.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sparse/_sparse_array.py b/sparse/_sparse_array.py index 03a7e07b..a06f9745 100644 --- a/sparse/_sparse_array.py +++ b/sparse/_sparse_array.py @@ -158,7 +158,6 @@ def density(self): """ return self.nnz / self.size - def _repr_html_(self): """ Diagnostic report about this array. @@ -177,7 +176,7 @@ def _repr_html_(self): Read-only : True No. bytes : 3200000 (3.1M) No. Bytes as dense : 8000000 (7.6M) - Storage ratio : 0.4 + Storage ratio : 0.4 """ return html_table(self) From 80a958ba250536f1e03010c375b87ede1984c6c4 Mon Sep 17 00:00:00 2001 From: Dale Tovar <37719258+daletovar@users.noreply.github.com> Date: Sat, 31 Aug 2019 13:46:26 -0700 Subject: [PATCH 17/72] remove failed example --- sparse/_sparse_array.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/sparse/_sparse_array.py b/sparse/_sparse_array.py index a06f9745..8a0f6542 100644 --- a/sparse/_sparse_array.py +++ b/sparse/_sparse_array.py @@ -162,21 +162,6 @@ def _repr_html_(self): """ Diagnostic report about this array. Renders in Jupyter. - - Examples - -------- - >>> import sparse - >>> x = sparse.random((100,100,100),density=.1) - >>> x - Format : coo - Data type : float64 - Shape : (100, 100, 100) - nnz : 100000 - Density : 0.1 - Read-only : True - No. bytes : 3200000 (3.1M) - No. Bytes as dense : 8000000 (7.6M) - Storage ratio : 0.4 """ return html_table(self) From fedd68a7b368d8b1a962ded0b414d632a527d6c6 Mon Sep 17 00:00:00 2001 From: Dale Tovar <37719258+daletovar@users.noreply.github.com> Date: Sun, 1 Sep 2019 10:27:58 -0700 Subject: [PATCH 18/72] Update _utils.py --- sparse/_utils.py | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/sparse/_utils.py b/sparse/_utils.py index cd60039e..1100ba12 100644 --- a/sparse/_utils.py +++ b/sparse/_utils.py @@ -266,8 +266,10 @@ def equivalent(x, y): # lgtm [py/comparison-of-identical-expressions] return (x == y) | ((x != x) & (y != y)) -# copied from zarr +# copied from zarr # See https://github.com/zarr-developers/zarr-python/blob/master/zarr/util.py + + def human_readable_size(size): if size < 2**10: return '%s' % size @@ -282,32 +284,34 @@ def human_readable_size(size): else: return '%.1fP' % (size / float(2**50)) + def html_table(arr): table = '' table += '' - headings = ['Format','Data Type', 'Shape', - 'nnz', 'Density','Read-only', 'No. Bytes', - 'No. Bytes as dense'] + headings = ['Format', 'Data Type', 'Shape', + 'nnz', 'Density', 'Read-only', 'No. Bytes', + 'No. Bytes as dense'] info = [arr.format, str(arr.dtype), str(arr.shape), - str(arr.nnz), str(arr.nnz/np.prod(arr.shape))] - + str(arr.nnz), str(arr.nnz/np.prod(arr.shape))] + # read-only if arr.format == 'dok': info.append(str(False)) else: info.append(str(True)) - + if arr.nbytes > 2**10: info.append('%s (%s)' % (arr.nbytes, human_readable_size(arr.nbytes))) else: info.append(str(arr.nbytes)) - + dense_bytes = np.prod(arr.shape) * arr.dtype.itemsize if dense_bytes > 2**10: - info.append('%s (%s)' % (dense_bytes, human_readable_size(dense_bytes))) + info.append('%s (%s)' % + (dense_bytes, human_readable_size(dense_bytes))) else: info.append(dense_bytes) - + headings.append('Storage ratio') info.append('%.1f' % (arr.nbytes / dense_bytes)) @@ -316,18 +320,17 @@ def html_table(arr): headings.append('Compressed Axes') info.append(str(arr.compressed_axes)) - for h, i in zip(headings,info): + for h, i in zip(headings, info): table += '' \ - '' \ - '' \ - '' \ - % (h, i) + '' \ + '' \ + '' \ + % (h, i) table += '' table += '
%s%s
%s%s
' return table - def check_zero_fill_value(*args): """ Checks if all the arguments have zero fill-values. @@ -404,4 +407,3 @@ def check_consistent_fill_value(arrays): 'but argument {:d} had a fill value of {!s}, which ' 'is different from a fill_value of {!s} in the first ' 'argument.'.format(i, arg.fill_value, fv)) - From d7516d4abf0a677883b87902dacb51de4138a9ad Mon Sep 17 00:00:00 2001 From: Dale Tovar <37719258+daletovar@users.noreply.github.com> Date: Sun, 1 Sep 2019 10:36:26 -0700 Subject: [PATCH 19/72] Update compressed.py --- sparse/_compressed/compressed.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sparse/_compressed/compressed.py b/sparse/_compressed/compressed.py index d3396a8e..50060df7 100644 --- a/sparse/_compressed/compressed.py +++ b/sparse/_compressed/compressed.py @@ -110,7 +110,7 @@ def __init__(self, arg, shape=None, compressed_axes=None, fill_value=0): compressed_shape = (row_size, col_size) else: compressed_axes = compressed_shape = axis_order = reordered_shape = axisptr = None - + self.format = 'gcxs' self.data, self.indices, self.indptr = arg self.shape = shape From 8827ac4b7502079a3f531063b09f85e53c3d9d16 Mon Sep 17 00:00:00 2001 From: Dale Tovar <37719258+daletovar@users.noreply.github.com> Date: Fri, 6 Sep 2019 11:13:48 -0700 Subject: [PATCH 20/72] add format property --- sparse/_sparse_array.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/sparse/_sparse_array.py b/sparse/_sparse_array.py index 8a0f6542..f1a30905 100644 --- a/sparse/_sparse_array.py +++ b/sparse/_sparse_array.py @@ -75,7 +75,26 @@ def nnz(self): >>> np.count_nonzero(x) == s.nnz True """ - + + @property + @abstractmethod + def format(self): + """ + The storage format of this array. + + Returns + ------- + str + The storage format of this array. + + Examples + ------- + >>> import sparse + >>> s = sparse.random((5,5), density=0.2) + >>> s.format + 'coo' + """ + @property def ndim(self): """ From 9a4fd74455ec90a4f5d917b61019a8b2f08edf18 Mon Sep 17 00:00:00 2001 From: Dale Tovar <37719258+daletovar@users.noreply.github.com> Date: Fri, 6 Sep 2019 11:19:45 -0700 Subject: [PATCH 21/72] add format property --- sparse/_coo/core.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/sparse/_coo/core.py b/sparse/_coo/core.py index ac48f4df..c098034a 100644 --- a/sparse/_coo/core.py +++ b/sparse/_coo/core.py @@ -213,7 +213,6 @@ def __init__(self, coords, data=None, shape=None, has_duplicates=True, self.data = np.asarray(data) self.coords = np.asarray(coords) - self.format = 'coo' if self.coords.ndim == 1: self.coords = self.coords[None, :] @@ -527,6 +526,31 @@ def dtype(self): True """ return self.data.dtype + + @property + def format(self): + """ + The storage format of this array. + + Returns + ------- + str + The storage format of this array. + + See Also + ------- + DOK.format : Equivalent :obj:`DOK` array property. + GCXS.format : Equivalent :obj:`GCXS` array property. + scipy.sparse.coo_matrix.format : The Scipy equivalent property. + + Examples + ------- + >>> import sparse + >>> s = sparse.random((5,5), density=0.2) + >>> s.format + 'coo' + """ + return 'coo' @property def nnz(self): From 667eb572c25b4378fe6511f697f7bfa61aac1e57 Mon Sep 17 00:00:00 2001 From: Dale Tovar <37719258+daletovar@users.noreply.github.com> Date: Fri, 6 Sep 2019 11:22:06 -0700 Subject: [PATCH 22/72] add format --- sparse/_dok.py | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/sparse/_dok.py b/sparse/_dok.py index 29618f13..17f4a30d 100644 --- a/sparse/_dok.py +++ b/sparse/_dok.py @@ -91,7 +91,6 @@ class DOK(SparseArray): def __init__(self, shape, data=None, dtype=None, fill_value=None): from ._coo import COO self.data = dict() - self.format = 'dok' if isinstance(shape, COO): ar = DOK.from_coo(shape) @@ -238,7 +237,32 @@ def nnz(self): 1 """ return len(self.data) - + + @property + def format(self): + """ + The storage format of this array. + + Returns + ------- + str + The storage format of this array. + + See Also + ------- + COO.format : Equivalent :obj:`COO` array property. + GCXS.format : Equivalent :obj:`GCXS` array property. + scipy.sparse.dok_matrix.format : The Scipy equivalent property. + + Examples + ------- + >>> import sparse + >>> s = sparse.random((5,5), density=0.2, format='dok') + >>> s.format + 'dok' + """ + return 'dok' + @property def nbytes(self): """ From 53ccd0f01c9f7f4ab317fc0e5d026d5b0d9af79c Mon Sep 17 00:00:00 2001 From: Dale Tovar <37719258+daletovar@users.noreply.github.com> Date: Fri, 6 Sep 2019 11:39:37 -0700 Subject: [PATCH 23/72] remove spurious properties, add docs --- sparse/_compressed/compressed.py | 86 ++++++++++++++++++++++++++++---- 1 file changed, 75 insertions(+), 11 deletions(-) diff --git a/sparse/_compressed/compressed.py b/sparse/_compressed/compressed.py index 50060df7..e911ef0f 100644 --- a/sparse/_compressed/compressed.py +++ b/sparse/_compressed/compressed.py @@ -25,7 +25,7 @@ def _from_coo(x, compressed_axes=None): if compressed_axes is not None: raise ValueError('no axes to compress for 1d array') return ( - x.data, x.coords[0], np.array([])), x.shape, None, None, None, None, None, x.fill_value + x.data, x.coords[0], ()), x.shape, None, None, None, None, None, x.fill_value compressed_axes = normalize_axis(compressed_axes, x.ndim) if compressed_axes is None: @@ -111,7 +111,6 @@ def __init__(self, arg, shape=None, compressed_axes=None, fill_value=0): else: compressed_axes = compressed_shape = axis_order = reordered_shape = axisptr = None - self.format = 'gcxs' self.data, self.indices, self.indptr = arg self.shape = shape self.compressed_shape = compressed_shape @@ -156,21 +155,86 @@ def from_iter(cls, x, shape=None, compressed_axes=None, fill_value=None): fill_value), compressed_axes=compressed_axes) + @property + def dtype(self): + """ + The datatype of this array. + + Returns + ------- + numpy.dtype + The datatype of this array. + + See Also + -------- + numpy.ndarray.dtype : Numpy equivalent property. + scipy.sparse.csr_matrix.dtype : Scipy equivalent property. + """ + return self.data.dtype + @property def nnz(self): + """ + The number of nonzero elements in this array. + + Returns + ------- + int + The number of nonzero elements in this array. + + See Also + -------- + COO.nnz : Equivalent :obj:`COO` array property. + DOK.nnz : Equivalent :obj:`DOK` array property. + numpy.count_nonzero : A similar Numpy function. + scipy.sparse.csr_matrix.nnz : The Scipy equivalent property. + """ return self.data.shape[0] @property - def nbytes(self): - return self.data.nbytes + self.indices.nbytes + self.indptr.nbytes - - @property - def density(self): - return self.nnz / reduce(mul, self.shape, 1) - + def format(self): + """ + The storage format of this array. + + Returns + ------- + str + The storage format of this array. + + See Also + ------- + COO.format : Equivalent :obj:`COO` array property. + DOK.format : Equivalent :obj:`DOK` array property. + scipy.sparse.coo_matrix.format : The Scipy equivalent property. + + Examples + ------- + >>> import sparse + >>> s = sparse.random((5,5), density=0.2, format='gcxs') + >>> s.format + 'gcxs' + """ + return 'gcxs' + @property - def ndim(self): - return len(self.shape) + def nbytes(self): + """ + The number of bytes taken up by this object. Note that for small arrays, + this may undercount the number of bytes due to the large constant overhead. + + Returns + ------- + int + The approximate bytes of memory taken by this object. + + See Also + -------- + numpy.ndarray.nbytes : The equivalent Numpy property. + """ + nbytes = self.data.nbytes + self.indices.nbytes + if self.indptr != (): + nbytes += self.indptr.nbytes + return nbytes def __str__(self): return ''.format( From a41a054fa4bcc1d53686b6cbf5f042eb1a110795 Mon Sep 17 00:00:00 2001 From: Dale Tovar <37719258+daletovar@users.noreply.github.com> Date: Fri, 6 Sep 2019 11:43:16 -0700 Subject: [PATCH 24/72] Update sparse/_utils.py Co-Authored-By: Hameer Abbasi --- sparse/_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sparse/_utils.py b/sparse/_utils.py index 1100ba12..734b1676 100644 --- a/sparse/_utils.py +++ b/sparse/_utils.py @@ -292,7 +292,7 @@ def html_table(arr): 'nnz', 'Density', 'Read-only', 'No. Bytes', 'No. Bytes as dense'] info = [arr.format, str(arr.dtype), str(arr.shape), - str(arr.nnz), str(arr.nnz/np.prod(arr.shape))] + str(arr.nnz), str(arr.nnz/arr.size)] # read-only if arr.format == 'dok': From 0a441ee202521bc82f5a1bc510db89d2dccbb07e Mon Sep 17 00:00:00 2001 From: Dale Tovar <37719258+daletovar@users.noreply.github.com> Date: Fri, 6 Sep 2019 11:53:08 -0700 Subject: [PATCH 25/72] update html table --- sparse/_utils.py | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/sparse/_utils.py b/sparse/_utils.py index 734b1676..8f80c835 100644 --- a/sparse/_utils.py +++ b/sparse/_utils.py @@ -1,7 +1,9 @@ import functools from collections.abc import Iterable from numbers import Integral +from functools import reduce +import operator import numpy as np @@ -268,8 +270,6 @@ def equivalent(x, y): # copied from zarr # See https://github.com/zarr-developers/zarr-python/blob/master/zarr/util.py - - def human_readable_size(size): if size < 2**10: return '%s' % size @@ -289,8 +289,7 @@ def html_table(arr): table = '' table += '' headings = ['Format', 'Data Type', 'Shape', - 'nnz', 'Density', 'Read-only', 'No. Bytes', - 'No. Bytes as dense'] + 'nnz', 'Density', 'Read-only', 'size'] info = [arr.format, str(arr.dtype), str(arr.shape), str(arr.nnz), str(arr.nnz/arr.size)] @@ -305,15 +304,9 @@ def html_table(arr): else: info.append(str(arr.nbytes)) - dense_bytes = np.prod(arr.shape) * arr.dtype.itemsize - if dense_bytes > 2**10: - info.append('%s (%s)' % - (dense_bytes, human_readable_size(dense_bytes))) - else: - info.append(dense_bytes) - headings.append('Storage ratio') - info.append('%.1f' % (arr.nbytes / dense_bytes)) + info.append('%.1f' % (arr.nbytes / (reduce(operator.mul, + arr.shape,1) * arr.dtype.itemsize))) # compressed_axes if arr.format == 'gcxs': From 9cfdb090c3c8a23a272b37280db4463b989b71dd Mon Sep 17 00:00:00 2001 From: Dale Tovar <37719258+daletovar@users.noreply.github.com> Date: Fri, 6 Sep 2019 11:54:30 -0700 Subject: [PATCH 26/72] Update test_compressed.py --- sparse/tests/test_compressed.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sparse/tests/test_compressed.py b/sparse/tests/test_compressed.py index 4c4be5d8..4a568fb8 100644 --- a/sparse/tests/test_compressed.py +++ b/sparse/tests/test_compressed.py @@ -175,7 +175,6 @@ def test_change_compressed_axes(): coo = sparse.random((3, 4, 5), density=.5) s = GCXS.from_coo(coo, compressed_axes=(0, 1)) b = GCXS.from_coo(coo, compressed_axes=(1, 2)) - + assert_eq(s, b) s.change_compressed_axes((1, 2)) - assert_eq(s, b) From 77eabe5cd209e7b7f4ebbdd657658526c20f6414 Mon Sep 17 00:00:00 2001 From: Dale Tovar <37719258+daletovar@users.noreply.github.com> Date: Fri, 6 Sep 2019 12:21:50 -0700 Subject: [PATCH 27/72] Update compressed.py --- sparse/_compressed/compressed.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sparse/_compressed/compressed.py b/sparse/_compressed/compressed.py index e2c59380..30fc93c8 100644 --- a/sparse/_compressed/compressed.py +++ b/sparse/_compressed/compressed.py @@ -134,7 +134,6 @@ def __init__(self, arg, shape=None, compressed_axes=None, fill_value=0): self.axisptr = axisptr self.reordered_shape = reordered_shape self.fill_value = fill_value - self.dtype = self.data.dtype @classmethod def from_numpy(cls, x, compressed_axes=None, fill_value=0): From 702ca4575b4aa06534d526dcfb5a60f0030c5d72 Mon Sep 17 00:00:00 2001 From: Dale Tovar <37719258+daletovar@users.noreply.github.com> Date: Fri, 6 Sep 2019 13:16:00 -0700 Subject: [PATCH 28/72] formatting --- sparse/_compressed/compressed.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/sparse/_compressed/compressed.py b/sparse/_compressed/compressed.py index 30fc93c8..02d15cad 100644 --- a/sparse/_compressed/compressed.py +++ b/sparse/_compressed/compressed.py @@ -183,7 +183,7 @@ def dtype(self): scipy.sparse.csr_matrix.dtype : Scipy equivalent property. """ return self.data.dtype - + @property def nnz(self): """ @@ -225,9 +225,9 @@ def format(self): >>> s = sparse.random((5,5), density=0.2, format='gcxs') >>> s.format 'gcxs' - """ - return 'gcxs' - + """ + return "gcxs" + @property def nbytes(self): """ @@ -249,8 +249,9 @@ def nbytes(self): return nbytes def __str__(self): - return ''.format( - self.shape, self.dtype, self.nnz, self.fill_value, self.compressed_axes) + return "".format( + self.shape, self.dtype, self.nnz, self.fill_value, self.compressed_axes + ) __repr__ = __str__ From 3cdeabdd42074b6af93a846bf36a4807afd716f3 Mon Sep 17 00:00:00 2001 From: Dale Tovar <37719258+daletovar@users.noreply.github.com> Date: Fri, 6 Sep 2019 13:17:04 -0700 Subject: [PATCH 29/72] formatting --- sparse/_compressed/indexing.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/sparse/_compressed/indexing.py b/sparse/_compressed/indexing.py index 3e487cd9..475daf23 100644 --- a/sparse/_compressed/indexing.py +++ b/sparse/_compressed/indexing.py @@ -152,11 +152,8 @@ def getitem(x, key): compressed_axes = None return GCXS( - arg, - shape=shape, - compressed_axes=compressed_axes, - fill_value=x.fill_value) - + arg, shape=shape, compressed_axes=compressed_axes, fill_value=x.fill_value + ) @numba.jit(nopython=True, nogil=True) From e7a5e1d53407215ae91c345e2d5e82e1a8779efe Mon Sep 17 00:00:00 2001 From: Dale Tovar <37719258+daletovar@users.noreply.github.com> Date: Fri, 6 Sep 2019 13:18:06 -0700 Subject: [PATCH 30/72] formatting --- sparse/_dok.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sparse/_dok.py b/sparse/_dok.py index b325aaa2..d539a5f1 100644 --- a/sparse/_dok.py +++ b/sparse/_dok.py @@ -241,7 +241,7 @@ def nnz(self): 1 """ return len(self.data) - + @property def format(self): """ @@ -264,9 +264,9 @@ def format(self): >>> s = sparse.random((5,5), density=0.2, format='dok') >>> s.format 'dok' - """ - return 'dok' - + """ + return "dok" + @property def nbytes(self): """ From 341ddbab2c50b3aba4cb93be6135d0cca803b2e7 Mon Sep 17 00:00:00 2001 From: Dale Tovar <37719258+daletovar@users.noreply.github.com> Date: Fri, 6 Sep 2019 13:19:11 -0700 Subject: [PATCH 31/72] Update test_compressed.py --- sparse/tests/test_compressed.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/sparse/tests/test_compressed.py b/sparse/tests/test_compressed.py index 1f389b0e..fce5ae13 100644 --- a/sparse/tests/test_compressed.py +++ b/sparse/tests/test_compressed.py @@ -23,7 +23,7 @@ ], ) def test_resize(a, b): - s = sparse.random(a, density=0.5, format='gcxs') + s = sparse.random(a, density=0.5, format="gcxs") orig_size = s.size x = s.todense() x = np.resize(x, b) @@ -50,7 +50,7 @@ def test_resize(a, b): ], ) def test_reshape(a, b): - s = sparse.random(a, density=0.5, format='gcxs') + s = sparse.random(a, density=0.5, format="gcxs") x = s.todense() assert_eq(x.reshape(b), s.reshape(b)) @@ -58,12 +58,12 @@ def test_reshape(a, b): def test_reshape_same(): - s = sparse.random((3, 5), density=0.5, format='gcxs') + s = sparse.random((3, 5), density=0.5, format="gcxs") assert s.reshape(s.shape) is s def test_to_scipy_sparse(): - s = sparse.random((3, 5), density=0.5, format='gcxs', compressed_axes=(0,)) + s = sparse.random((3, 5), density=0.5, format="gcxs", compressed_axes=(0,)) a = s.to_scipy_sparse() b = scipy.sparse.csr_matrix(s.todense()) @@ -71,7 +71,7 @@ def test_to_scipy_sparse(): def test_tocoo(): - coo = sparse.random((5, 6), density=.5) + coo = sparse.random((5, 6), density=0.5) b = GCXS.from_coo(coo) assert_eq(b.tocoo(), coo) @@ -134,7 +134,7 @@ def test_tocoo(): ], ) def test_slicing(index): - s = sparse.random((2, 3, 4), density=0.5, format='gcxs') + s = sparse.random((2, 3, 4), density=0.5, format="gcxs") x = s.todense() assert_eq(x[index], s[index]) @@ -155,7 +155,7 @@ def test_slicing(index): ], ) def test_advanced_indexing(index): - s = sparse.random((2, 3, 4), density=0.5, format='gcxs') + s = sparse.random((2, 3, 4), density=0.5, format="gcxs") x = s.todense() assert_eq(x[index], s[index]) @@ -178,14 +178,14 @@ def test_advanced_indexing(index): ], ) def test_slicing_errors(index): - s = sparse.random((2, 3, 4), density=0.5, format='gcxs') + s = sparse.random((2, 3, 4), density=0.5, format="gcxs") with pytest.raises(IndexError): s[index] def test_change_compressed_axes(): - coo = sparse.random((3, 4, 5), density=.5) + coo = sparse.random((3, 4, 5), density=0.5) s = GCXS.from_coo(coo, compressed_axes=(0, 1)) b = GCXS.from_coo(coo, compressed_axes=(1, 2)) assert_eq(s, b) From 3cff9720488f7a3d54a7d035fdde6f6b19407896 Mon Sep 17 00:00:00 2001 From: Dale Tovar <37719258+daletovar@users.noreply.github.com> Date: Fri, 6 Sep 2019 13:20:51 -0700 Subject: [PATCH 32/72] formatting --- sparse/_utils.py | 74 +++++++++++++++++++++++++++--------------------- 1 file changed, 41 insertions(+), 33 deletions(-) diff --git a/sparse/_utils.py b/sparse/_utils.py index 073a85ca..79d4425a 100644 --- a/sparse/_utils.py +++ b/sparse/_utils.py @@ -144,7 +144,7 @@ def random( nnz = int(elements * density) - if format != 'gcxs' and compressed_axes is not None: + if format != "gcxs" and compressed_axes is not None: raise ValueError( "compressed_axes is not supported for {} format".format(format) ) @@ -272,59 +272,67 @@ def equivalent(x, y): # lgtm [py/comparison-of-identical-expressions] return (x == y) | ((x != x) & (y != y)) + # copied from zarr # See https://github.com/zarr-developers/zarr-python/blob/master/zarr/util.py def human_readable_size(size): - if size < 2**10: - return '%s' % size - elif size < 2**20: - return '%.1fK' % (size / float(2**10)) - elif size < 2**30: - return '%.1fM' % (size / float(2**20)) - elif size < 2**40: - return '%.1fG' % (size / float(2**30)) - elif size < 2**50: - return '%.1fT' % (size / float(2**40)) + if size < 2 ** 10: + return "%s" % size + elif size < 2 ** 20: + return "%.1fK" % (size / float(2 ** 10)) + elif size < 2 ** 30: + return "%.1fM" % (size / float(2 ** 20)) + elif size < 2 ** 40: + return "%.1fG" % (size / float(2 ** 30)) + elif size < 2 ** 50: + return "%.1fT" % (size / float(2 ** 40)) else: - return '%.1fP' % (size / float(2**50)) + return "%.1fP" % (size / float(2 ** 50)) def html_table(arr): - table = '
' - table += '' - headings = ['Format', 'Data Type', 'Shape', - 'nnz', 'Density', 'Read-only', 'size'] - info = [arr.format, str(arr.dtype), str(arr.shape), - str(arr.nnz), str(arr.nnz/arr.size)] + table = "
" + table += "" + headings = ["Format", "Data Type", "Shape", "nnz", "Density", "Read-only", "Size"] + info = [ + arr.format, + str(arr.dtype), + str(arr.shape), + str(arr.nnz), + str(arr.nnz / arr.size), + ] # read-only - if arr.format == 'dok': + if arr.format == "dok": info.append(str(False)) else: info.append(str(True)) - if arr.nbytes > 2**10: - info.append('%s (%s)' % (arr.nbytes, human_readable_size(arr.nbytes))) + if arr.nbytes > 2 ** 10: + info.append("%s (%s)" % (arr.nbytes, human_readable_size(arr.nbytes))) else: info.append(str(arr.nbytes)) - headings.append('Storage ratio') - info.append('%.1f' % (arr.nbytes / (reduce(operator.mul, - arr.shape,1) * arr.dtype.itemsize))) + headings.append("Storage ratio") + info.append( + "%.1f" + % (arr.nbytes / (reduce(operator.mul, arr.shape, 1) * arr.dtype.itemsize)) + ) # compressed_axes - if arr.format == 'gcxs': - headings.append('Compressed Axes') + if arr.format == "gcxs": + headings.append("Compressed Axes") info.append(str(arr.compressed_axes)) for h, i in zip(headings, info): - table += '' \ - '' \ - '' \ - '' \ - % (h, i) - table += '' - table += '
%s%s
' + table += ( + "" + '%s' + '%s' + "" % (h, i) + ) + table += "" + table += "" return table From fb0757b922238591e15ddebb96ade1988fdbe6e4 Mon Sep 17 00:00:00 2001 From: Dale Tovar <37719258+daletovar@users.noreply.github.com> Date: Fri, 6 Sep 2019 13:28:19 -0700 Subject: [PATCH 33/72] Update core.py --- sparse/_coo/core.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/sparse/_coo/core.py b/sparse/_coo/core.py index 0ca45c99..4c739018 100644 --- a/sparse/_coo/core.py +++ b/sparse/_coo/core.py @@ -556,7 +556,7 @@ def dtype(self): True """ return self.data.dtype - + @property def format(self): """ @@ -579,8 +579,8 @@ def format(self): >>> s = sparse.random((5,5), density=0.2) >>> s.format 'coo' - """ - return 'coo' + """ + return "coo" @property def nnz(self): @@ -2265,9 +2265,10 @@ def asformat(self, format, compressed_axes=None): If the format isn't supported. """ from .._compressed import GCXS - if format == 'gcxs' or format is GCXS: + + if format == "gcxs" or format is GCXS: return GCXS.from_coo(self, compressed_axes=compressed_axes) - + elif compressed_axes is not None: raise ValueError( "compressed_axes is not supported for {} format".format(format) From 33df589406e1d1f2c5de27ede9d9922b34b58212 Mon Sep 17 00:00:00 2001 From: Dale Tovar <37719258+daletovar@users.noreply.github.com> Date: Fri, 6 Sep 2019 13:30:48 -0700 Subject: [PATCH 34/72] formatting --- sparse/_sparse_array.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sparse/_sparse_array.py b/sparse/_sparse_array.py index 59157a43..fa8dba54 100644 --- a/sparse/_sparse_array.py +++ b/sparse/_sparse_array.py @@ -77,7 +77,7 @@ def nnz(self): >>> np.count_nonzero(x) == s.nnz True """ - + @property @abstractmethod def format(self): @@ -96,7 +96,7 @@ def format(self): >>> s.format 'coo' """ - + @property def ndim(self): """ From e00cccc6ccb72a4067467b56c74be8accfc9ccdb Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 7 Oct 2019 23:37:11 -0700 Subject: [PATCH 35/72] add common --- sparse/_compressed/common.py | 377 +++++++++++++++++++++++++++++++++++ sparse/_compressed/umath.py | 377 +++++++++++++++++++++++++++++++++++ 2 files changed, 754 insertions(+) create mode 100644 sparse/_compressed/common.py create mode 100644 sparse/_compressed/umath.py diff --git a/sparse/_compressed/common.py b/sparse/_compressed/common.py new file mode 100644 index 00000000..1f921c7f --- /dev/null +++ b/sparse/_compressed/common.py @@ -0,0 +1,377 @@ +from itertools import zip_longest +import numpy as np +import numba + + + + + +def diagonal(a, offset=0, axis1=0, axis2=1): + + if a.shape[axis1] != a.shape[axis2]: + raise ValueError("a.shape[axis1] != a.shape[axis2]") + + diag_axes = [ + axis for axis in range(len(a.shape)) if axis != axis1 and axis != axis2 + ] + [axis1] + diag_shape = [a.shape[axis] for axis in diag_axes] + diag_shape[-1] -= abs(offset) + + # convert to linearized coordinates + rows, cols = [], [] + operations = np.prod(diag_shape) + current_idx = np.zeros(a.shape) + current_idx[axis1] = offset + a1 = offset + axes = list(reversed(diag_axes[:-1])) + first_axis = axes[0] + for _ in range(operations): + if a1 == a.shape[axis1]: + a1 = offset + current_idx[axis1] = offset + current_idx[axis2] = 0 + current_idx[first_axis] +=1 + for i in range(len(axes-1)): + if current_idx[axes[i]] == a.shape[axes[i]]: + current_idx[axes[i]] = 0 + current_idx[axes[i+1]] += 1 + + ind = np.ravel_multi_index(current_idx, a.reordered_shape) + row, col = np.unravel_index(ind, a.compressed_shape) + rows.append(row) + cols.append(col) + a1 += 1 + current_idx[axis1] = a1 + current_idx[axis2] += 1 + + # search the diagonals + coords = [] + mask = [] + count = 0 + for r in rows: + current_row = a.indices[a.indptr[r:r+1]] + for c in cols: + s = np.searchsorted(current_row, c) + if not (s >= current_row.size or current_row[s] != col[c]): + s += a.indptr[r] + mask.append(s) + coords.append(count) + count += 1 + coords = np.array(coords) + return GCXS.from_coo(COO(coords[None,:],a.data[mask], fill_value=a.fill_value).reshape(diag_shape)) + + + +@numba.jit(nopython=True,nogil=True) +def _diagonal_idx(indices, indptr, axis1, axis2, offset): + + # convert from nd + linearized = np.ravel_multi_index() + +def matmul(a, b): + pass + +def dot(a, b): + pass + +def tensordot(a, b, axes=2): + pass + +def kron(a, b): + from .._coo.umath import _cartesian_product + + check_zero_fill_value(a, b) + + a_sparse = isinstance(a, (SparseArray, scipy.sparse.spmatrix)) + b_sparse = isinstance(b, (SparseArray, scipy.sparse.spmatrix)) + a_ndim = np.ndim(a) + b_ndim = np.ndim(b) + + if not (a_sparse or b_sparse): + raise ValueError( + "Performing this operation would produce a dense " "result: kron" + ) + + if a_ndim == 0 or b_ndim == 0: + return a * b + + a = asCOO(a, check=False) + b = asCOO(b, check=False) + + # Match dimensions + max_dim = max(a.ndim, b.ndim) + a = a.reshape((1,) * (max_dim - a.ndim) + a.shape) + b = b.reshape((1,) * (max_dim - b.ndim) + b.shape) + + a_idx, b_idx = _cartesian_product(np.arange(a.nnz), np.arange(b.nnz)) + + a_expanded_coords = a.coords[:, a_idx] + b_expanded_coords = b.coords[:, b_idx] + o_coords = a_expanded_coords * np.asarray(b.shape)[:, None] + b_expanded_coords + o_data = a.data[a_idx] * b.data[b_idx] + o_shape = tuple(i * j for i, j in zip(a.shape, b.shape)) + + return COO(o_coords, o_data, shape=o_shape, has_duplicates=False) + +def concatenate(arrays, axis=0, compressed_axes=(0,)): + + check_consistent_fill_value(arrays) + arrays = [arr if isinstance(arr, GCXS) else GCXS(arr) for arr in arrays] + axis = normalize_axis(axis, arrays[0].ndim) + nnz = 0 + dim = sum(x.shape[axis] for x in arrays) + shape = list(arrays[0].shape) + shape[axis] = dim + assert all( + x.shape[ax] == arrays[0].shape[ax] + for x in arrays + for ax in set(range(arrays[0].ndim)) - {axis} + ) + # arrays may have different compressed_axes + # flatten to have a better coordinate system + arrays = [arr.flatten() for arr in arrays] + indices = np.concatenate([arr.indices for arr in arrays]) + data = np.concatenate([arr.data for arr in arrays]) + + dim = 0 + for x in arrays: + if dim: + indices[nnz : x.nnz + nnz] += dim + dim += x.shape[axis] + nnz += x.nnz + + if axis != 0: + order = np.argsort(indices, kind='mergesort') + indices = indices[order] + data = data[order] + + return GCXS((data, indices, ()), + fill_value=arrays[0].fill_value).reshape(shape, + compressed_axes=compressed_axes) + +def stack(arrays, axis=0): + + from .compressed import GCXS + check_consistent_fill_value(arrays) + arrays = [arr if isinstance(arr, GCXS) else GCXS(arr) for arr in arrays] + axis = normalize_axis(axis, arrays[0].ndim) + nnz = 0 + shape = list(arrays[0].shape) + shape.insert(len(arrays), axis) + assert all( + x.shape[ax] == arrays[0].shape[ax] + for x in arrays + for ax in set(range(arrays[0].ndim)) - {axis} + ) + # arrays may have different compressed_axes + # flatten to have a better coordinate system + arrays = [arr.flatten() for arr in arrays] + indices = np.concatenate([arr.indices for arr in arrays]) + data = np.concatenate([arr.data for arr in arrays]) + + dim = 0 + for x in arrays: + if dim: + indices[nnz : x.nnz + nnz] += dim + dim += x.shape[axis] + nnz += x.nnz + + if axis != 0: + order = np.argsort(indices, kind='mergesort') + indices = indices[order] + data = data[order] + + return GCXS((data, indices, ()), + fill_value=arrays[0].fill_value).reshape(shape, + compressed_axes=compressed_axes) + +def where(condition, x=None, y=None): + pass + +def eye(N, M=None, k=0, dtype=float, compressed_axis=0): + + if M is None: + M = N + + N = int(N) + M = int(M) + k = int(k) + + data_length = min(N, M) + if k > 0: + data_length = max(min(data_length, M - k), 0) + n_coords = np.arange(data_length, dtype=np.intp) + m_coords = n_coords + k + elif k < 0: + data_length = max(min(data_length, N + k), 0) + m_coords = np.arange(data_length, dtype=np.intp) + n_coords = m_coords - k + else: + n_coords = m_coords = np.arange(data_length, dtype=np.intp) + + if compressed_axis==0: + indptr = np.empty(N, dtype=np.intp) + indptr[0] = 0 + np.cumsum(np.bincount(n_coords, minlength=N), out=indptr[1:]) + indices = m_coords + else: + indptr = np.empty(M, dtype=np.intp) + indptr[0] = 0 + np.cumsum(np.bincount(m_coords, minlength=M), out=indptr[1:]) + indices = n_coords + + data = np.array(1, dtype=dtype) + return GCXS((data,indices,indptr), + compressed_axes=(compressed_axis,), + dtype=dtype, + fill_value=0) + +def full(shape, fill_value, dtype=None): + """Return a GCXS array of given shape and type, filled with `fill_value`. + Parameters + ---------- + shape : int or tuple of ints + Shape of the new array, e.g., ``(2, 3)`` or ``2``. + fill_value : scalar + Fill value. + dtype : data-type, optional + The desired data-type for the array. The default, `None`, means + `np.array(fill_value).dtype`. + Returns + ------- + out : COO + Array of `fill_value` with the given shape and dtype. + Examples + -------- + >>> full(5, 9).todense() # doctest: +NORMALIZE_WHITESPACE + array([9, 9, 9, 9, 9]) + >>> full((2, 2), 9, dtype=float).todense() # doctest: +SKIP + array([[9., 9.], + [9., 9.]]) + """ + + if dtype is None: + dtype = np.array(fill_value).dtype + if not isinstance(shape, tuple): + shape = (shape,) + data = np.empty(0, dtype=dtype) + indices = np.empty((0, 0), dtype=np.intp) + indptr = np.empty((0, 0), dtype=np.intp) + return GCXS( + (data, + indices, + indptr), + shape=shape, + fill_value=fill_value, + ) + +def full_like(a, fill_value, dtype=None): + """Return a full array with the same shape and type as a given array. + Parameters + ---------- + a : array_like + The shape and data-type of the result will match those of `a`. + dtype : data-type, optional + Overrides the data type of the result. + Returns + ------- + out : COO + Array of `fill_value` with the same shape and type as `a`. + Examples + -------- + >>> x = np.ones((2, 3), dtype='i8') + >>> full_like(x, 9.0).todense() # doctest: +NORMALIZE_WHITESPACE + array([[9, 9, 9], + [9, 9, 9]]) + """ + return full(a.shape, fill_value, dtype=(a.dtype if dtype is None else dtype)) + +def zeros(shape, dtype=float): + """Return a COO array of given shape and type, filled with zeros. + Parameters + ---------- + shape : int or tuple of ints + Shape of the new array, e.g., ``(2, 3)`` or ``2``. + dtype : data-type, optional + The desired data-type for the array, e.g., `numpy.int8`. Default is + `numpy.float64`. + Returns + ------- + out : COO + Array of zeros with the given shape and dtype. + Examples + -------- + >>> zeros(5).todense() # doctest: +SKIP + array([0., 0., 0., 0., 0.]) + >>> zeros((2, 2), dtype=int).todense() # doctest: +NORMALIZE_WHITESPACE + array([[0, 0], + [0, 0]]) + """ + return full(shape, 0, np.dtype(dtype)) + +def zeros_like(a, dtype=float): + """Return a COO array of zeros with the same shape and type as ``a``. + Parameters + ---------- + a : array_like + The shape and data-type of the result will match those of `a`. + dtype : data-type, optional + Overrides the data type of the result. + Returns + ------- + out : COO + Array of zeros with the same shape and type as `a`. + Examples + -------- + >>> x = np.ones((2, 3), dtype='i8') + >>> zeros_like(x).todense() # doctest: +NORMALIZE_WHITESPACE + array([[0, 0, 0], + [0, 0, 0]]) + """ + return zeros(a.shape, dtype=(a.dtype if dtype is None else dtype)) + + +def ones(shape, dtype=float): + """Return a COO array of given shape and type, filled with ones. + Parameters + ---------- + shape : int or tuple of ints + Shape of the new array, e.g., ``(2, 3)`` or ``2``. + dtype : data-type, optional + The desired data-type for the array, e.g., `numpy.int8`. Default is + `numpy.float64`. + Returns + ------- + out : COO + Array of ones with the given shape and dtype. + Examples + -------- + >>> ones(5).todense() # doctest: +SKIP + array([1., 1., 1., 1., 1.]) + >>> ones((2, 2), dtype=int).todense() # doctest: +NORMALIZE_WHITESPACE + array([[1, 1], + [1, 1]]) + """ + return full(shape, 1, np.dtype(dtype)) + +def ones_like(a, dtype=None): + """Return a COO array of ones with the same shape and type as ``a``. + Parameters + ---------- + a : array_like + The shape and data-type of the result will match those of `a`. + dtype : data-type, optional + Overrides the data type of the result. + Returns + ------- + out : COO + Array of ones with the same shape and type as `a`. + Examples + -------- + >>> x = np.ones((2, 3), dtype='i8') + >>> ones_like(x).todense() # doctest: +NORMALIZE_WHITESPACE + array([[1, 1, 1], + [1, 1, 1]]) + """ + return ones(a.shape, dtype=(a.dtype if dtype is None else dtype)) + \ No newline at end of file diff --git a/sparse/_compressed/umath.py b/sparse/_compressed/umath.py new file mode 100644 index 00000000..1f921c7f --- /dev/null +++ b/sparse/_compressed/umath.py @@ -0,0 +1,377 @@ +from itertools import zip_longest +import numpy as np +import numba + + + + + +def diagonal(a, offset=0, axis1=0, axis2=1): + + if a.shape[axis1] != a.shape[axis2]: + raise ValueError("a.shape[axis1] != a.shape[axis2]") + + diag_axes = [ + axis for axis in range(len(a.shape)) if axis != axis1 and axis != axis2 + ] + [axis1] + diag_shape = [a.shape[axis] for axis in diag_axes] + diag_shape[-1] -= abs(offset) + + # convert to linearized coordinates + rows, cols = [], [] + operations = np.prod(diag_shape) + current_idx = np.zeros(a.shape) + current_idx[axis1] = offset + a1 = offset + axes = list(reversed(diag_axes[:-1])) + first_axis = axes[0] + for _ in range(operations): + if a1 == a.shape[axis1]: + a1 = offset + current_idx[axis1] = offset + current_idx[axis2] = 0 + current_idx[first_axis] +=1 + for i in range(len(axes-1)): + if current_idx[axes[i]] == a.shape[axes[i]]: + current_idx[axes[i]] = 0 + current_idx[axes[i+1]] += 1 + + ind = np.ravel_multi_index(current_idx, a.reordered_shape) + row, col = np.unravel_index(ind, a.compressed_shape) + rows.append(row) + cols.append(col) + a1 += 1 + current_idx[axis1] = a1 + current_idx[axis2] += 1 + + # search the diagonals + coords = [] + mask = [] + count = 0 + for r in rows: + current_row = a.indices[a.indptr[r:r+1]] + for c in cols: + s = np.searchsorted(current_row, c) + if not (s >= current_row.size or current_row[s] != col[c]): + s += a.indptr[r] + mask.append(s) + coords.append(count) + count += 1 + coords = np.array(coords) + return GCXS.from_coo(COO(coords[None,:],a.data[mask], fill_value=a.fill_value).reshape(diag_shape)) + + + +@numba.jit(nopython=True,nogil=True) +def _diagonal_idx(indices, indptr, axis1, axis2, offset): + + # convert from nd + linearized = np.ravel_multi_index() + +def matmul(a, b): + pass + +def dot(a, b): + pass + +def tensordot(a, b, axes=2): + pass + +def kron(a, b): + from .._coo.umath import _cartesian_product + + check_zero_fill_value(a, b) + + a_sparse = isinstance(a, (SparseArray, scipy.sparse.spmatrix)) + b_sparse = isinstance(b, (SparseArray, scipy.sparse.spmatrix)) + a_ndim = np.ndim(a) + b_ndim = np.ndim(b) + + if not (a_sparse or b_sparse): + raise ValueError( + "Performing this operation would produce a dense " "result: kron" + ) + + if a_ndim == 0 or b_ndim == 0: + return a * b + + a = asCOO(a, check=False) + b = asCOO(b, check=False) + + # Match dimensions + max_dim = max(a.ndim, b.ndim) + a = a.reshape((1,) * (max_dim - a.ndim) + a.shape) + b = b.reshape((1,) * (max_dim - b.ndim) + b.shape) + + a_idx, b_idx = _cartesian_product(np.arange(a.nnz), np.arange(b.nnz)) + + a_expanded_coords = a.coords[:, a_idx] + b_expanded_coords = b.coords[:, b_idx] + o_coords = a_expanded_coords * np.asarray(b.shape)[:, None] + b_expanded_coords + o_data = a.data[a_idx] * b.data[b_idx] + o_shape = tuple(i * j for i, j in zip(a.shape, b.shape)) + + return COO(o_coords, o_data, shape=o_shape, has_duplicates=False) + +def concatenate(arrays, axis=0, compressed_axes=(0,)): + + check_consistent_fill_value(arrays) + arrays = [arr if isinstance(arr, GCXS) else GCXS(arr) for arr in arrays] + axis = normalize_axis(axis, arrays[0].ndim) + nnz = 0 + dim = sum(x.shape[axis] for x in arrays) + shape = list(arrays[0].shape) + shape[axis] = dim + assert all( + x.shape[ax] == arrays[0].shape[ax] + for x in arrays + for ax in set(range(arrays[0].ndim)) - {axis} + ) + # arrays may have different compressed_axes + # flatten to have a better coordinate system + arrays = [arr.flatten() for arr in arrays] + indices = np.concatenate([arr.indices for arr in arrays]) + data = np.concatenate([arr.data for arr in arrays]) + + dim = 0 + for x in arrays: + if dim: + indices[nnz : x.nnz + nnz] += dim + dim += x.shape[axis] + nnz += x.nnz + + if axis != 0: + order = np.argsort(indices, kind='mergesort') + indices = indices[order] + data = data[order] + + return GCXS((data, indices, ()), + fill_value=arrays[0].fill_value).reshape(shape, + compressed_axes=compressed_axes) + +def stack(arrays, axis=0): + + from .compressed import GCXS + check_consistent_fill_value(arrays) + arrays = [arr if isinstance(arr, GCXS) else GCXS(arr) for arr in arrays] + axis = normalize_axis(axis, arrays[0].ndim) + nnz = 0 + shape = list(arrays[0].shape) + shape.insert(len(arrays), axis) + assert all( + x.shape[ax] == arrays[0].shape[ax] + for x in arrays + for ax in set(range(arrays[0].ndim)) - {axis} + ) + # arrays may have different compressed_axes + # flatten to have a better coordinate system + arrays = [arr.flatten() for arr in arrays] + indices = np.concatenate([arr.indices for arr in arrays]) + data = np.concatenate([arr.data for arr in arrays]) + + dim = 0 + for x in arrays: + if dim: + indices[nnz : x.nnz + nnz] += dim + dim += x.shape[axis] + nnz += x.nnz + + if axis != 0: + order = np.argsort(indices, kind='mergesort') + indices = indices[order] + data = data[order] + + return GCXS((data, indices, ()), + fill_value=arrays[0].fill_value).reshape(shape, + compressed_axes=compressed_axes) + +def where(condition, x=None, y=None): + pass + +def eye(N, M=None, k=0, dtype=float, compressed_axis=0): + + if M is None: + M = N + + N = int(N) + M = int(M) + k = int(k) + + data_length = min(N, M) + if k > 0: + data_length = max(min(data_length, M - k), 0) + n_coords = np.arange(data_length, dtype=np.intp) + m_coords = n_coords + k + elif k < 0: + data_length = max(min(data_length, N + k), 0) + m_coords = np.arange(data_length, dtype=np.intp) + n_coords = m_coords - k + else: + n_coords = m_coords = np.arange(data_length, dtype=np.intp) + + if compressed_axis==0: + indptr = np.empty(N, dtype=np.intp) + indptr[0] = 0 + np.cumsum(np.bincount(n_coords, minlength=N), out=indptr[1:]) + indices = m_coords + else: + indptr = np.empty(M, dtype=np.intp) + indptr[0] = 0 + np.cumsum(np.bincount(m_coords, minlength=M), out=indptr[1:]) + indices = n_coords + + data = np.array(1, dtype=dtype) + return GCXS((data,indices,indptr), + compressed_axes=(compressed_axis,), + dtype=dtype, + fill_value=0) + +def full(shape, fill_value, dtype=None): + """Return a GCXS array of given shape and type, filled with `fill_value`. + Parameters + ---------- + shape : int or tuple of ints + Shape of the new array, e.g., ``(2, 3)`` or ``2``. + fill_value : scalar + Fill value. + dtype : data-type, optional + The desired data-type for the array. The default, `None`, means + `np.array(fill_value).dtype`. + Returns + ------- + out : COO + Array of `fill_value` with the given shape and dtype. + Examples + -------- + >>> full(5, 9).todense() # doctest: +NORMALIZE_WHITESPACE + array([9, 9, 9, 9, 9]) + >>> full((2, 2), 9, dtype=float).todense() # doctest: +SKIP + array([[9., 9.], + [9., 9.]]) + """ + + if dtype is None: + dtype = np.array(fill_value).dtype + if not isinstance(shape, tuple): + shape = (shape,) + data = np.empty(0, dtype=dtype) + indices = np.empty((0, 0), dtype=np.intp) + indptr = np.empty((0, 0), dtype=np.intp) + return GCXS( + (data, + indices, + indptr), + shape=shape, + fill_value=fill_value, + ) + +def full_like(a, fill_value, dtype=None): + """Return a full array with the same shape and type as a given array. + Parameters + ---------- + a : array_like + The shape and data-type of the result will match those of `a`. + dtype : data-type, optional + Overrides the data type of the result. + Returns + ------- + out : COO + Array of `fill_value` with the same shape and type as `a`. + Examples + -------- + >>> x = np.ones((2, 3), dtype='i8') + >>> full_like(x, 9.0).todense() # doctest: +NORMALIZE_WHITESPACE + array([[9, 9, 9], + [9, 9, 9]]) + """ + return full(a.shape, fill_value, dtype=(a.dtype if dtype is None else dtype)) + +def zeros(shape, dtype=float): + """Return a COO array of given shape and type, filled with zeros. + Parameters + ---------- + shape : int or tuple of ints + Shape of the new array, e.g., ``(2, 3)`` or ``2``. + dtype : data-type, optional + The desired data-type for the array, e.g., `numpy.int8`. Default is + `numpy.float64`. + Returns + ------- + out : COO + Array of zeros with the given shape and dtype. + Examples + -------- + >>> zeros(5).todense() # doctest: +SKIP + array([0., 0., 0., 0., 0.]) + >>> zeros((2, 2), dtype=int).todense() # doctest: +NORMALIZE_WHITESPACE + array([[0, 0], + [0, 0]]) + """ + return full(shape, 0, np.dtype(dtype)) + +def zeros_like(a, dtype=float): + """Return a COO array of zeros with the same shape and type as ``a``. + Parameters + ---------- + a : array_like + The shape and data-type of the result will match those of `a`. + dtype : data-type, optional + Overrides the data type of the result. + Returns + ------- + out : COO + Array of zeros with the same shape and type as `a`. + Examples + -------- + >>> x = np.ones((2, 3), dtype='i8') + >>> zeros_like(x).todense() # doctest: +NORMALIZE_WHITESPACE + array([[0, 0, 0], + [0, 0, 0]]) + """ + return zeros(a.shape, dtype=(a.dtype if dtype is None else dtype)) + + +def ones(shape, dtype=float): + """Return a COO array of given shape and type, filled with ones. + Parameters + ---------- + shape : int or tuple of ints + Shape of the new array, e.g., ``(2, 3)`` or ``2``. + dtype : data-type, optional + The desired data-type for the array, e.g., `numpy.int8`. Default is + `numpy.float64`. + Returns + ------- + out : COO + Array of ones with the given shape and dtype. + Examples + -------- + >>> ones(5).todense() # doctest: +SKIP + array([1., 1., 1., 1., 1.]) + >>> ones((2, 2), dtype=int).todense() # doctest: +NORMALIZE_WHITESPACE + array([[1, 1], + [1, 1]]) + """ + return full(shape, 1, np.dtype(dtype)) + +def ones_like(a, dtype=None): + """Return a COO array of ones with the same shape and type as ``a``. + Parameters + ---------- + a : array_like + The shape and data-type of the result will match those of `a`. + dtype : data-type, optional + Overrides the data type of the result. + Returns + ------- + out : COO + Array of ones with the same shape and type as `a`. + Examples + -------- + >>> x = np.ones((2, 3), dtype='i8') + >>> ones_like(x).todense() # doctest: +NORMALIZE_WHITESPACE + array([[1, 1, 1], + [1, 1, 1]]) + """ + return ones(a.shape, dtype=(a.dtype if dtype is None else dtype)) + \ No newline at end of file From 3d347fae4e23ba76f68860e0007b8c9a12d4348e Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 25 Nov 2019 00:00:53 -0800 Subject: [PATCH 36/72] clean up indexing, fix bug for multiple compressed axes --- sparse/_compressed/convert.py | 11 +++++------ sparse/_compressed/indexing.py | 27 +++++++++++---------------- 2 files changed, 16 insertions(+), 22 deletions(-) diff --git a/sparse/_compressed/convert.py b/sparse/_compressed/convert.py index 912ffabc..f68e4c6a 100644 --- a/sparse/_compressed/convert.py +++ b/sparse/_compressed/convert.py @@ -3,16 +3,15 @@ from numba.typed import List -def convert_to_flat(inds, shape, axisptr): +def convert_to_flat(inds, shape): inds = [np.array(ind) for ind in inds] if any(ind.ndim > 1 for ind in inds): raise IndexError("Only one-dimensional iterable indices supported.") - uncompressed_inds = inds[axisptr:] - cols = np.empty(np.prod([ind.size for ind in uncompressed_inds]), dtype=np.intp) - shape_bins = transform_shape(shape[axisptr:]) + cols = np.empty(np.prod([ind.size for ind in inds]), dtype=np.intp) + shape_bins = transform_shape(shape) increments = List() - for i in range(len(uncompressed_inds)): - increments.append((uncompressed_inds[i] * shape_bins[i]).astype(np.int32)) + for i in range(len(inds)): + increments.append((inds[i] * shape_bins[i]).astype(np.int32)) operations = np.prod([ind.shape[0] for ind in increments[:-1]]) return compute_flat(increments, cols, operations) diff --git a/sparse/_compressed/indexing.py b/sparse/_compressed/indexing.py index 475daf23..7f9d1190 100644 --- a/sparse/_compressed/indexing.py +++ b/sparse/_compressed/indexing.py @@ -69,27 +69,22 @@ def getitem(x, key): count += 1 reordered_key = [Nones_removed[i] for i in x.axis_order] - - # prepare for converting to flat indices - for i, ind in enumerate(reordered_key[: x.axisptr]): - if isinstance(ind, slice): - reordered_key[i] = range(ind.start, ind.stop, ind.step) - for i, ind in enumerate(reordered_key[x.axisptr :]): + + for i, ind in enumerate(reordered_key): if isinstance(ind, Integral): - reordered_key[i + x.axisptr] = [ind] - elif isinstance(ind, slice): - reordered_key[i + x.axisptr] = np.arange(ind.start, ind.stop, ind.step) - - # find starts and ends of rows - a = x.indptr[:-1].reshape(x.reordered_shape[: x.axisptr]) - b = x.indptr[1:].reshape(x.reordered_shape[: x.axisptr]) - starts = a[tuple(reordered_key[: x.axisptr])].flatten() - ends = b[tuple(reordered_key[: x.axisptr])].flatten() + reordered_key[i] = [ind] + elif isinstance(ind,slice): + reordered_key[i] = np.arange(ind.start, ind.stop, ind.step) + shape = np.array(shape) - cols = convert_to_flat(reordered_key, x.reordered_shape, x.axisptr) + rows = convert_to_flat(reordered_key[:x.axisptr], x.reordered_shape[:x.axisptr]) + cols = convert_to_flat(reordered_key[x.axisptr:], x.reordered_shape[x.axisptr:]) + + starts = x.indptr[:-1][rows] + ends = x.indptr[1:][rows] if np.any(compressed_inds): compressed_axes = shape_key[compressed_inds] From 132ca3c4fb9fe3f43ef3c0c1b57ff3f54660d71b Mon Sep 17 00:00:00 2001 From: Dale Tovar <37719258+daletovar@users.noreply.github.com> Date: Thu, 18 Apr 2019 22:23:58 -0700 Subject: [PATCH 37/72] Update core.py --- sparse/_coo/core.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sparse/_coo/core.py b/sparse/_coo/core.py index 0f6c661e..478685ff 100644 --- a/sparse/_coo/core.py +++ b/sparse/_coo/core.py @@ -1834,6 +1834,9 @@ def reshape(self, shape, order="C"): if self.shape == shape: return self + + if np.prod(self.shape) != np.prod(shape): + raise ValueError('cannot reshape array of size {} into shape {}'.format(np.prod(self.shape),shape)) if self.size != reduce(operator.mul, shape, 1): raise ValueError( From 77fae532549a05ed9f6545eac009efa41b086c40 Mon Sep 17 00:00:00 2001 From: Dale Tovar <37719258+daletovar@users.noreply.github.com> Date: Mon, 29 Apr 2019 19:15:42 -0700 Subject: [PATCH 38/72] Update test_coo.py --- sparse/tests/test_coo.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/sparse/tests/test_coo.py b/sparse/tests/test_coo.py index 3acc3a46..15479ae5 100644 --- a/sparse/tests/test_coo.py +++ b/sparse/tests/test_coo.py @@ -213,7 +213,23 @@ def test_transpose_error(axis): with pytest.raises(ValueError): x.transpose(axis) +@pytest.mark.parametrize('a,b', [ + [(3, 4), (5, 5)], + [(12,), (3, 4)], + [(12,), (3, 6)], + [(5,5,5), (6,6,6)], + [(3, 4), (9, 4)], + [(5,), (4,)], + [(2, 3, 4, 5), (2, 3, 4, 5, 6)], + [(100,), (5,5)], + [(2, 3, 4, 5), (20, 6)], + [(), ()], +]) +def test_resize(a,b): + s = sparse.random(a, density=0.5) + x = s.todense() +<<<<<<< HEAD @pytest.mark.parametrize("axis1", [-3, -2, -1, 0, 1, 2]) @pytest.mark.parametrize("axis2", [-3, -2, -1, 0, 1, 2]) def test_swapaxes(axis1, axis2): @@ -306,6 +322,22 @@ def test_resize(a, b): [(), ()], ], ) +======= + assert_eq(x.resize(b), s.resize(b)) + +@pytest.mark.parametrize('a,b', [ + [(3, 4), (3, 4)], + [(12,), (3, 4)], + [(12,), (3, -1)], + [(3, 4), (12,)], + [(3, 4), (-1, 4)], + [(3, 4), (3, -1)], + [(2, 3, 4, 5), (8, 15)], + [(2, 3, 4, 5), (24, 5)], + [(2, 3, 4, 5), (20, 6)], + [(), ()], +]) +>>>>>>> Update test_coo.py def test_reshape(a, b): s = sparse.random(a, density=0.5) x = s.todense() From 9b1f720a6b112a69d03e49a03df6bb92c138eeb0 Mon Sep 17 00:00:00 2001 From: Dale Tovar <37719258+daletovar@users.noreply.github.com> Date: Mon, 29 Apr 2019 19:17:38 -0700 Subject: [PATCH 39/72] Update core.py Raises an error for negative dimensions. --- sparse/tests/test_coo.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/sparse/tests/test_coo.py b/sparse/tests/test_coo.py index 15479ae5..75700f0a 100644 --- a/sparse/tests/test_coo.py +++ b/sparse/tests/test_coo.py @@ -229,7 +229,6 @@ def test_resize(a,b): s = sparse.random(a, density=0.5) x = s.todense() -<<<<<<< HEAD @pytest.mark.parametrize("axis1", [-3, -2, -1, 0, 1, 2]) @pytest.mark.parametrize("axis2", [-3, -2, -1, 0, 1, 2]) def test_swapaxes(axis1, axis2): @@ -322,22 +321,6 @@ def test_resize(a, b): [(), ()], ], ) -======= - assert_eq(x.resize(b), s.resize(b)) - -@pytest.mark.parametrize('a,b', [ - [(3, 4), (3, 4)], - [(12,), (3, 4)], - [(12,), (3, -1)], - [(3, 4), (12,)], - [(3, 4), (-1, 4)], - [(3, 4), (3, -1)], - [(2, 3, 4, 5), (8, 15)], - [(2, 3, 4, 5), (24, 5)], - [(2, 3, 4, 5), (20, 6)], - [(), ()], -]) ->>>>>>> Update test_coo.py def test_reshape(a, b): s = sparse.random(a, density=0.5) x = s.todense() From 9288e4732bdf5da05e81f5b1cdc696ed7547e176 Mon Sep 17 00:00:00 2001 From: Dale Tovar <37719258+daletovar@users.noreply.github.com> Date: Mon, 29 Apr 2019 19:33:24 -0700 Subject: [PATCH 40/72] Update test_coo.py --- sparse/tests/test_coo.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sparse/tests/test_coo.py b/sparse/tests/test_coo.py index 75700f0a..d4efe5b4 100644 --- a/sparse/tests/test_coo.py +++ b/sparse/tests/test_coo.py @@ -229,6 +229,10 @@ def test_resize(a,b): s = sparse.random(a, density=0.5) x = s.todense() + x.resize(b) + s.resize(b) + assert_eq(x, s) + @pytest.mark.parametrize("axis1", [-3, -2, -1, 0, 1, 2]) @pytest.mark.parametrize("axis2", [-3, -2, -1, 0, 1, 2]) def test_swapaxes(axis1, axis2): From bf38b4da5c4ba5d2dcc6de71fa4e44644de733df Mon Sep 17 00:00:00 2001 From: Hameer Abbasi Date: Tue, 30 Apr 2019 10:36:17 +0200 Subject: [PATCH 41/72] Fix Flake8 issues. --- sparse/_coo/core.py | 4 ++-- sparse/tests/test_coo.py | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/sparse/_coo/core.py b/sparse/_coo/core.py index 478685ff..2ae595ca 100644 --- a/sparse/_coo/core.py +++ b/sparse/_coo/core.py @@ -1834,9 +1834,9 @@ def reshape(self, shape, order="C"): if self.shape == shape: return self - + if np.prod(self.shape) != np.prod(shape): - raise ValueError('cannot reshape array of size {} into shape {}'.format(np.prod(self.shape),shape)) + raise ValueError('cannot reshape array of size {} into shape {}'.format(np.prod(self.shape), shape)) if self.size != reduce(operator.mul, shape, 1): raise ValueError( diff --git a/sparse/tests/test_coo.py b/sparse/tests/test_coo.py index d4efe5b4..3c477080 100644 --- a/sparse/tests/test_coo.py +++ b/sparse/tests/test_coo.py @@ -213,19 +213,20 @@ def test_transpose_error(axis): with pytest.raises(ValueError): x.transpose(axis) + @pytest.mark.parametrize('a,b', [ [(3, 4), (5, 5)], [(12,), (3, 4)], [(12,), (3, 6)], - [(5,5,5), (6,6,6)], + [(5, 5, 5), (6, 6, 6)], [(3, 4), (9, 4)], [(5,), (4,)], [(2, 3, 4, 5), (2, 3, 4, 5, 6)], - [(100,), (5,5)], + [(100,), (5, 5)], [(2, 3, 4, 5), (20, 6)], [(), ()], ]) -def test_resize(a,b): +def test_resize(a, b): s = sparse.random(a, density=0.5) x = s.todense() From 0956a251b36117d32e7a2330e8be9ce632b92b2a Mon Sep 17 00:00:00 2001 From: Hameer Abbasi Date: Tue, 30 Apr 2019 11:21:23 +0200 Subject: [PATCH 42/72] Fix up tests and code. --- sparse/_coo/core.py | 6 ++++-- sparse/tests/test_coo.py | 11 +++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/sparse/_coo/core.py b/sparse/_coo/core.py index 2ae595ca..de2f5ad2 100644 --- a/sparse/_coo/core.py +++ b/sparse/_coo/core.py @@ -1835,8 +1835,8 @@ def reshape(self, shape, order="C"): if self.shape == shape: return self - if np.prod(self.shape) != np.prod(shape): - raise ValueError('cannot reshape array of size {} into shape {}'.format(np.prod(self.shape), shape)) + if self.size != reduce(operator.mul, shape, 1): + raise ValueError('cannot reshape array of size {} into shape {}'.format(self.size, shape)) if self.size != reduce(operator.mul, shape, 1): raise ValueError( @@ -1896,6 +1896,8 @@ def resize(self, *args, refcheck=True): new_size = reduce(operator.mul, shape, 1) + new_size = reduce(operator.mul, shape, 1) + # TODO: this self.size enforces a 2**64 limit to array size linear_loc = self.linear_loc() end_idx = np.searchsorted(linear_loc, new_size, side="left") diff --git a/sparse/tests/test_coo.py b/sparse/tests/test_coo.py index 3c477080..52f187d2 100644 --- a/sparse/tests/test_coo.py +++ b/sparse/tests/test_coo.py @@ -228,10 +228,14 @@ def test_transpose_error(axis): ]) def test_resize(a, b): s = sparse.random(a, density=0.5) + orig_size = s.size x = s.todense() x.resize(b) s.resize(b) + temp = x.reshape(x.size) + temp[orig_size:] = s.fill_value + assert isinstance(s, sparse.SparseArray) assert_eq(x, s) @pytest.mark.parametrize("axis1", [-3, -2, -1, 0, 1, 2]) @@ -1689,6 +1693,7 @@ def test_add_many_sparse_arrays(): def test_caching(): x = COO({(9, 9, 9): 1}) +<<<<<<< HEAD assert ( x[:].reshape((100, 10)).transpose().tocsr() is not x[:].reshape((100, 10)).transpose().tocsr() @@ -1699,6 +1704,12 @@ def test_caching(): x[:].reshape((100, 10)).transpose().tocsr() is x[:].reshape((100, 10)).transpose().tocsr() ) +======= + assert x[:].reshape((100, 10)).transpose().tocsr() is not x[:].reshape((100, 10)).transpose().tocsr() + + x = COO({(9, 9, 9): 1}, cache=True) + assert x[:].reshape((100, 10)).transpose().tocsr() is x[:].reshape((100, 10)).transpose().tocsr() +>>>>>>> Fix up tests and code. x = COO({(1, 1, 1, 1, 1, 1, 1, 2): 1}, cache=True) From ec0c2725ac0d98b57e3f7405385d4ba730431655 Mon Sep 17 00:00:00 2001 From: daletovar Date: Wed, 28 Aug 2019 15:07:22 -0700 Subject: [PATCH 43/72] change gxcs with gcxs --- sparse/_compressed/indexing.py | 7 +++++++ sparse/_coo/core.py | 5 +++++ 2 files changed, 12 insertions(+) diff --git a/sparse/_compressed/indexing.py b/sparse/_compressed/indexing.py index 02db7f4c..cdd1eeed 100644 --- a/sparse/_compressed/indexing.py +++ b/sparse/_compressed/indexing.py @@ -165,6 +165,7 @@ def getitem(x, key): compressed_axes = None return GCXS( +<<<<<<< HEAD arg, shape=shape, compressed_axes=compressed_axes, fill_value=x.fill_value ) @@ -232,6 +233,12 @@ def get_slicing_selection( indices = np.array(indices) data = arr_data[ind_list] return (data, indices, indptr) +======= + arg, + shape=shape, + compressed_axes=compressed_axes, + fill_value=x.fill_value) +>>>>>>> change gxcs with gcxs @numba.jit(nopython=True, nogil=True) diff --git a/sparse/_coo/core.py b/sparse/_coo/core.py index de2f5ad2..02aa788a 100644 --- a/sparse/_coo/core.py +++ b/sparse/_coo/core.py @@ -2304,10 +2304,15 @@ def asformat(self, format, compressed_axes=None): If the format isn't supported. """ from .._compressed import GCXS +<<<<<<< HEAD if format == "gcxs" or format is GCXS: return GCXS.from_coo(self, compressed_axes=compressed_axes) +======= + if format == 'gcxs' or format is GCXS: + return GCXS.from_coo(self, compressed_axes=compressed_axes) +>>>>>>> change gxcs with gcxs elif compressed_axes is not None: raise ValueError( "compressed_axes is not supported for {} format".format(format) From b212d96ed9e47e2306f8b5313550a8598aa92ad3 Mon Sep 17 00:00:00 2001 From: Dale Tovar <37719258+daletovar@users.noreply.github.com> Date: Fri, 30 Aug 2019 20:24:10 -0700 Subject: [PATCH 44/72] add self.nbytes property and self.format attribute --- sparse/_dok.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/sparse/_dok.py b/sparse/_dok.py index 5e43df5b..71af3e82 100644 --- a/sparse/_dok.py +++ b/sparse/_dok.py @@ -92,6 +92,7 @@ def __init__(self, shape, data=None, dtype=None, fill_value=None): from ._coo import COO self.data = dict() + self.format = 'dok' if isinstance(shape, COO): ar = DOK.from_coo(shape) @@ -242,6 +243,30 @@ def nnz(self): """ return len(self.data) + @property + def nbytes(self): + """ + The number of bytes taken up by this object. Note that for small arrays, + this may undercount the number of bytes due to the large constant overhead. + + Returns + ------- + int + The approximate bytes of memory taken by this object. + + See Also + -------- + numpy.ndarray.nbytes : The equivalent Numpy property. + + Examples + -------- + >>> import sparse + >>> x = sparse.random((100,100),density=.1,format='dok') + >>> x.nbytes + 8000 + """ + return self.nnz * self.dtype.itemsize + def __getitem__(self, key): key = normalize_index(key, self.shape) From b1959a17b452a2c7fa8ce682b578f26241a628f0 Mon Sep 17 00:00:00 2001 From: Dale Tovar <37719258+daletovar@users.noreply.github.com> Date: Fri, 30 Aug 2019 20:27:49 -0700 Subject: [PATCH 45/72] add self.format --- sparse/_coo/core.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sparse/_coo/core.py b/sparse/_coo/core.py index 02aa788a..cac4a2b6 100644 --- a/sparse/_coo/core.py +++ b/sparse/_coo/core.py @@ -220,6 +220,7 @@ def __init__( self.data = np.asarray(data) self.coords = np.asarray(coords) + self.format = 'coo' if self.coords.ndim == 1: if self.coords.size == 0 and shape is not None: From 196d24fc62c4d0fa60cc0d6989173a40e5527a79 Mon Sep 17 00:00:00 2001 From: Dale Tovar <37719258+daletovar@users.noreply.github.com> Date: Fri, 6 Sep 2019 11:13:48 -0700 Subject: [PATCH 46/72] add format property --- sparse/_sparse_array.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/sparse/_sparse_array.py b/sparse/_sparse_array.py index 2d4b093f..1320d812 100644 --- a/sparse/_sparse_array.py +++ b/sparse/_sparse_array.py @@ -77,7 +77,26 @@ def nnz(self): >>> np.count_nonzero(x) == s.nnz True """ - + + @property + @abstractmethod + def format(self): + """ + The storage format of this array. + + Returns + ------- + str + The storage format of this array. + + Examples + ------- + >>> import sparse + >>> s = sparse.random((5,5), density=0.2) + >>> s.format + 'coo' + """ + @property def ndim(self): """ From 8017082cd0d1d1e298b6d96a51a401b32f2f1a1d Mon Sep 17 00:00:00 2001 From: Dale Tovar <37719258+daletovar@users.noreply.github.com> Date: Fri, 6 Sep 2019 11:19:45 -0700 Subject: [PATCH 47/72] add format property --- sparse/_coo/core.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/sparse/_coo/core.py b/sparse/_coo/core.py index cac4a2b6..ae9cd974 100644 --- a/sparse/_coo/core.py +++ b/sparse/_coo/core.py @@ -220,7 +220,6 @@ def __init__( self.data = np.asarray(data) self.coords = np.asarray(coords) - self.format = 'coo' if self.coords.ndim == 1: if self.coords.size == 0 and shape is not None: @@ -570,6 +569,31 @@ def dtype(self): True """ return self.data.dtype + + @property + def format(self): + """ + The storage format of this array. + + Returns + ------- + str + The storage format of this array. + + See Also + ------- + DOK.format : Equivalent :obj:`DOK` array property. + GCXS.format : Equivalent :obj:`GCXS` array property. + scipy.sparse.coo_matrix.format : The Scipy equivalent property. + + Examples + ------- + >>> import sparse + >>> s = sparse.random((5,5), density=0.2) + >>> s.format + 'coo' + """ + return 'coo' @property def nnz(self): From 3b8b20f355aef450180503005bd05c639b2dfbe0 Mon Sep 17 00:00:00 2001 From: Dale Tovar <37719258+daletovar@users.noreply.github.com> Date: Fri, 6 Sep 2019 11:22:06 -0700 Subject: [PATCH 48/72] add format --- sparse/_dok.py | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/sparse/_dok.py b/sparse/_dok.py index 71af3e82..ab3eebb5 100644 --- a/sparse/_dok.py +++ b/sparse/_dok.py @@ -92,7 +92,6 @@ def __init__(self, shape, data=None, dtype=None, fill_value=None): from ._coo import COO self.data = dict() - self.format = 'dok' if isinstance(shape, COO): ar = DOK.from_coo(shape) @@ -242,7 +241,32 @@ def nnz(self): 1 """ return len(self.data) - + + @property + def format(self): + """ + The storage format of this array. + + Returns + ------- + str + The storage format of this array. + + See Also + ------- + COO.format : Equivalent :obj:`COO` array property. + GCXS.format : Equivalent :obj:`GCXS` array property. + scipy.sparse.dok_matrix.format : The Scipy equivalent property. + + Examples + ------- + >>> import sparse + >>> s = sparse.random((5,5), density=0.2, format='dok') + >>> s.format + 'dok' + """ + return 'dok' + @property def nbytes(self): """ From b8866804bc21867063bee7d8a0994c097d4c6e36 Mon Sep 17 00:00:00 2001 From: Dale Tovar <37719258+daletovar@users.noreply.github.com> Date: Fri, 6 Sep 2019 13:18:06 -0700 Subject: [PATCH 49/72] formatting --- sparse/_dok.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sparse/_dok.py b/sparse/_dok.py index ab3eebb5..b82c4fb1 100644 --- a/sparse/_dok.py +++ b/sparse/_dok.py @@ -241,7 +241,7 @@ def nnz(self): 1 """ return len(self.data) - + @property def format(self): """ @@ -264,9 +264,9 @@ def format(self): >>> s = sparse.random((5,5), density=0.2, format='dok') >>> s.format 'dok' - """ - return 'dok' - + """ + return "dok" + @property def nbytes(self): """ From 8e46dc5247c00234df01ce002ae6ac40671d1d1d Mon Sep 17 00:00:00 2001 From: Dale Tovar <37719258+daletovar@users.noreply.github.com> Date: Fri, 6 Sep 2019 13:30:48 -0700 Subject: [PATCH 50/72] formatting --- sparse/_sparse_array.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sparse/_sparse_array.py b/sparse/_sparse_array.py index 1320d812..28f9e459 100644 --- a/sparse/_sparse_array.py +++ b/sparse/_sparse_array.py @@ -77,7 +77,7 @@ def nnz(self): >>> np.count_nonzero(x) == s.nnz True """ - + @property @abstractmethod def format(self): @@ -96,7 +96,7 @@ def format(self): >>> s.format 'coo' """ - + @property def ndim(self): """ From afdcb9016b6174fa1eeab82ec2b32b3980faf108 Mon Sep 17 00:00:00 2001 From: daletovar Date: Thu, 30 Jul 2020 15:18:55 -0700 Subject: [PATCH 51/72] refactor dot code --- sparse/__init__.py | 2 +- sparse/_common.py | 1266 +++++++++++++++++++++++++++++++++++++- sparse/_coo/common.py | 576 +---------------- sparse/_coo/core.py | 61 +- sparse/tests/test_coo.py | 224 +------ sparse/tests/test_dot.py | 240 ++++++++ 6 files changed, 1521 insertions(+), 848 deletions(-) create mode 100644 sparse/tests/test_dot.py diff --git a/sparse/__init__.py b/sparse/__init__.py index ab2613d1..34cbe168 100644 --- a/sparse/__init__.py +++ b/sparse/__init__.py @@ -1,4 +1,4 @@ -from ._coo import COO +from ._coo import COO, as_coo from ._dok import DOK from ._sparse_array import SparseArray from ._utils import random diff --git a/sparse/_common.py b/sparse/_common.py index 59471fd0..a879a185 100644 --- a/sparse/_common.py +++ b/sparse/_common.py @@ -1,13 +1,16 @@ import numpy as np +import numba +import scipy.sparse +from functools import wraps +from itertools import chain from collections.abc import Iterable -from sparse import COO -from ._utils import check_compressed_axes, normalize_axis -from ._coo import ( +from ._sparse_array import SparseArray +from ._utils import check_compressed_axes, normalize_axis, check_zero_fill_value + +from ._coo.umath import elemwise +from ._coo.common import ( clip, - tensordot, - dot, - matmul, triu, tril, where, @@ -25,11 +28,1258 @@ result_type, diagonal, diagonalize, - elemwise, - as_coo, + asCOO, ) +def tensordot(a, b, axes=2, *, return_type=None): + """ + Perform the equivalent of :obj:`numpy.tensordot`. + + Parameters + ---------- + a, b : Union[COO, np.ndarray, scipy.sparse.spmatrix] + The arrays to perform the :code:`tensordot` operation on. + axes : tuple[Union[int, tuple[int], Union[int, tuple[int]], optional + The axes to match when performing the sum. + return_type : {None, COO, np.ndarray}, optional + Type of returned array. + + + Returns + ------- + Union[COO, numpy.ndarray] + The result of the operation. + + Raises + ------ + ValueError + If all arguments don't have zero fill-values. + + See Also + -------- + numpy.tensordot : NumPy equivalent function + """ + from ._compressed import GCXS + + # Much of this is stolen from numpy/core/numeric.py::tensordot + # Please see license at https://github.com/numpy/numpy/blob/master/LICENSE.txt + check_zero_fill_value(a, b) + + if scipy.sparse.issparse(a): + a = GCXS.from_scipy_sparse(a) + if scipy.sparse.issparse(b): + b = GCXS.from_scipy_sparse(b) + + try: + iter(axes) + except TypeError: + axes_a = list(range(-axes, 0)) + axes_b = list(range(0, axes)) + else: + axes_a, axes_b = axes + try: + na = len(axes_a) + axes_a = list(axes_a) + except TypeError: + axes_a = [axes_a] + na = 1 + try: + nb = len(axes_b) + axes_b = list(axes_b) + except TypeError: + axes_b = [axes_b] + nb = 1 + + # a, b = asarray(a), asarray(b) # <--- modified + as_ = a.shape + nda = a.ndim + bs = b.shape + ndb = b.ndim + equal = True + if nda == 0 or ndb == 0: + pos = int(nda != 0) + raise ValueError("Input {} operand does not have enough dimensions".format(pos)) + if na != nb: + equal = False + else: + for k in range(na): + if as_[axes_a[k]] != bs[axes_b[k]]: + equal = False + break + if axes_a[k] < 0: + axes_a[k] += nda + if axes_b[k] < 0: + axes_b[k] += ndb + if not equal: + raise ValueError("shape-mismatch for sum") + + # Move the axes to sum over to the end of "a" + # and to the front of "b" + notin = [k for k in range(nda) if k not in axes_a] + newaxes_a = notin + axes_a + N2 = 1 + for axis in axes_a: + N2 *= as_[axis] + newshape_a = (-1, N2) + olda = [as_[axis] for axis in notin] + + notin = [k for k in range(ndb) if k not in axes_b] + newaxes_b = axes_b + notin + N2 = 1 + for axis in axes_b: + N2 *= bs[axis] + newshape_b = (N2, -1) + oldb = [bs[axis] for axis in notin] + + if any(dim == 0 for dim in chain(newshape_a, newshape_b)): + res = asCOO(np.empty(olda + oldb), check=False) + if isinstance(a, np.ndarray) or isinstance(b, np.ndarray): + res = res.todense() + + return res + + at = a.transpose(newaxes_a).reshape(newshape_a) + bt = b.transpose(newaxes_b).reshape(newshape_b) + res = _dot(at, bt, return_type) + return res.reshape(olda + oldb) + + +def matmul(a, b): + """Perform the equivalent of :obj:`numpy.matmul` on two arrays. + + Parameters + ---------- + a, b : Union[COO, np.ndarray, scipy.sparse.spmatrix] + The arrays to perform the :code:`matmul` operation on. + + Returns + ------- + Union[COO, numpy.ndarray] + The result of the operation. + + Raises + ------ + ValueError + If all arguments don't have zero fill-values, or the shape of the two arrays is not broadcastable. + + See Also + -------- + numpy.matmul : NumPy equivalent function. + COO.__matmul__ : Equivalent function for COO objects. + """ + check_zero_fill_value(a, b) + if not hasattr(a, "ndim") or not hasattr(b, "ndim"): + raise TypeError( + "Cannot perform dot product on types %s, %s" % (type(a), type(b)) + ) + + # When b is 2-d, it is equivalent to dot + if b.ndim <= 2: + return dot(a, b) + + # when a is 2-d, we need to transpose result after dot + if a.ndim <= 2: + res = dot(a, b) + axes = list(range(res.ndim)) + axes.insert(-1, axes.pop(0)) + return res.transpose(axes) + + # If a can be squeeze to a vector, use dot will be faster + if a.ndim <= b.ndim and np.prod(a.shape[:-1]) == 1: + res = dot(a.reshape(-1), b) + shape = list(res.shape) + shape.insert(-1, 1) + return res.reshape(shape) + + # If b can be squeeze to a matrix, use dot will be faster + if b.ndim <= a.ndim and np.prod(b.shape[:-2]) == 1: + return dot(a, b.reshape(b.shape[-2:])) + + if a.ndim < b.ndim: + a = a[(None,) * (b.ndim - a.ndim)] + if a.ndim > b.ndim: + b = b[(None,) * (a.ndim - b.ndim)] + for i, j in zip(a.shape[:-2], b.shape[:-2]): + if i != 1 and j != 1 and i != j: + raise ValueError("shapes of a and b are not broadcastable") + + def _matmul_recurser(a, b): + if a.ndim == 2: + return dot(a, b) + res = [] + for i in range(max(a.shape[0], b.shape[0])): + a_i = a[0] if a.shape[0] == 1 else a[i] + b_i = b[0] if b.shape[0] == 1 else b[i] + res.append(_matmul_recurser(a_i, b_i)) + mask = [isinstance(x, SparseArray) for x in res] + if all(mask): + return stack(res) + else: + res = [x.todense() if isinstance(x, SparseArray) else x for x in res] + return np.stack(res) + + return _matmul_recurser(a, b) + + +def dot(a, b): + """ + Perform the equivalent of :obj:`numpy.dot` on two arrays. + + Parameters + ---------- + a, b : Union[COO, np.ndarray, scipy.sparse.spmatrix] + The arrays to perform the :code:`dot` operation on. + + Returns + ------- + Union[COO, numpy.ndarray] + The result of the operation. + + Raises + ------ + ValueError + If all arguments don't have zero fill-values. + + See Also + -------- + numpy.dot : NumPy equivalent function. + COO.dot : Equivalent function for COO objects. + """ + check_zero_fill_value(a, b) + if not hasattr(a, "ndim") or not hasattr(b, "ndim"): + raise TypeError( + "Cannot perform dot product on types %s, %s" % (type(a), type(b)) + ) + + if a.ndim == 1 and b.ndim == 1: + if isinstance(a, SparseArray): + a = asCOO(a) + if isinstance(b, SparseArray): + b = asCOO(b) + return (a * b).sum() + + a_axis = -1 + b_axis = -2 + + if b.ndim == 1: + b_axis = -1 + return tensordot(a, b, axes=(a_axis, b_axis)) + + +def _dot(a, b, return_type=None): + from ._coo import COO + from ._compressed import GCXS + from ._sparse_array import SparseArray + + out_shape = (a.shape[0], b.shape[1]) + if all(isinstance(arr, SparseArray) for arr in [a, b]) and any( + isinstance(arr, GCXS) for arr in [a, b] + ): + a = a.asformat("gcxs") + b = b.asformat("gcxs") + + if isinstance(a, GCXS) and isinstance(b, GCXS): + if a.compressed_axes == (0,): + compressed_axes = (0,) + if b.compressed_axes == (0,): + data, indices, indptr = _dot_csr_csr_type(a.dtype, b.dtype)( + out_shape, a.data, b.data, a.indices, b.indices, a.indptr, b.indptr + ) + else: + data, indices, indptr = _dot_csr_csc_type(a.dtype, b.dtype)( + out_shape, a.data, b.data, a.indices, b.indices, a.indptr, b.indptr + ) + elif a.compressed_axes == (1,): + if b.compressed_axes == (0,): + # convert the smaller matrix + if a.size >= b.size: + compressed_axes = (0,) + a = a.change_compressed_axes((0,)) + data, indices, indptr = _dot_csr_csr_type(a.dtype, b.dtype)( + out_shape, + a.data, + b.data, + a.indices, + b.indices, + a.indptr, + b.indptr, + ) + else: + compressed_axes = (1,) + b = b.change_compressed_axes((1,)) + data, indices, indptr = _dot_csr_csr_type(b.dtype, a.dtype)( + out_shape[::-1], + b.data, + a.data, + b.indices, + a.indices, + b.indptr, + a.indptr, + ) + else: + # a @ b = (b.T @ a.T).T + compressed_axes = (1,) + data, indices, indptr = _dot_csr_csr_type(b.dtype, a.dtype)( + out_shape[::-1], + b.data, + a.data, + b.indices, + a.indices, + b.indptr, + a.indptr, + ) + + out = GCXS( + (data, indices, indptr), shape=out_shape, compressed_axes=compressed_axes + ) + if return_type == np.ndarray: + return out.todense() + elif return_type == COO: + return out.tocoo() + return out + + if isinstance(a, GCXS) and isinstance(b, np.ndarray): + if a.compressed_axes == (0,): + if return_type is None or return_type == np.ndarray: + return _dot_csr_ndarray_type(a.dtype, b.dtype)( + out_shape, a.data, a.indices, a.indptr, b + ) + data, indices, indptr = _dot_csr_ndarray_type_sparse(a.dtype, b.dtype)( + out_shape, a.data, a.indices, a.indptr, b + ) + out = GCXS((data, indices, indptr), shape=out_shape, compressed_axes=(0,)) + if return_type == COO: + return out.tocoo() + return out + if return_type is None or return_type == np.ndarray: + return _dot_csc_ndarray_type(a.dtype, b.dtype)( + a.shape, b.shape, a.data, a.indices, a.indptr, b + ) + data, indices, indptr = _dot_csc_ndarray_type_sparse(a.dtype, b.dtype)( + a.shape, b.shape, a.data, a.indices, a.indptr, b + ) + compressed_axes = (1,) + out = GCXS( + (data, indices, indptr), shape=out_shape, compressed_axes=compressed_axes + ) + if return_type == np.ndarray: + return out.todense() + elif return_type == COO: + return out.tocoo() + return out + + if isinstance(a, np.ndarray) and isinstance(b, GCXS): + at = a.view(type=np.ndarray).T + bt = b.T # constant-time transpose + if b.compressed_axes == (0,): + if return_type is None or return_type == np.ndarray: + out = _dot_csc_ndarray_type(bt.dtype, at.dtype)( + bt.shape, at.shape, bt.data, bt.indices, bt.indptr, at + ) + return out.T + data, indices, indptr = _dot_csc_ndarray_type_sparse(bt.dtype, at.dtype)( + bt.shape, at.shape, bt.data, b.indices, b.indptr, at + ) + out = GCXS((data, indices, indptr), shape=out_shape, compressed_axes=(0,)) + if return_type == COO: + return out.tocoo() + return out + + # compressed_axes == (1,) + if return_type is None or return_type == np.ndarray: + return _dot_ndarray_csc_type(a.dtype, b.dtype)( + out_shape, b.data, b.indices, b.indptr, a + ) + data, indices, indptr = _dot_csr_ndarray_type_sparse(bt.dtype, at.dtype)( + out_shape[::-1], bt.data, bt.indices, bt.indptr, at + ) + out = GCXS((data, indices, indptr), shape=out_shape, compressed_axes=(1,)) + if return_type == COO: + return out.tocoo() + return out + + if isinstance(a, COO) and isinstance(b, COO): + # convert to csr + a_indptr = np.empty(a.shape[0] + 1, dtype=np.intp) + a_indptr[0] = 0 + np.cumsum(np.bincount(a.coords[0], minlength=a.shape[0]), out=a_indptr[1:]) + + b_indptr = np.empty(b.shape[0] + 1, dtype=np.intp) + b_indptr[0] = 0 + np.cumsum(np.bincount(b.coords[0], minlength=b.shape[0]), out=b_indptr[1:]) + coords, data = _dot_coo_coo_type(a.dtype, b.dtype)( + out_shape, a.coords, b.coords, a.data, b.data, a_indptr, b_indptr + ) + out = COO(coords, data, shape=out_shape, has_duplicates=False, sorted=True) + + if return_type == np.ndarray: + return out.todense() + elif return_type == GCXS: + return out.asformat("gcxs") + return COO(coords, data, shape=out_shape, has_duplicates=False, sorted=True) + + if isinstance(a, COO) and isinstance(b, np.ndarray): + b = b.view(type=np.ndarray).T + + if return_type is None or return_type == np.ndarray: + return _dot_coo_ndarray_type(a.dtype, b.dtype)( + a.coords, a.data, b, out_shape + ) + coords, data = _dot_coo_ndarray_type_sparse(a.dtype, b.dtype)( + a.coords, a.data, b, out_shape + ) + out = COO(coords, data, shape=out_shape, has_duplicates=False, sorted=True) + if return_type == GCXS: + return out.asformat("gcxs") + + if isinstance(a, np.ndarray) and isinstance(b, COO): + b = b.T + a = a.view(type=np.ndarray) + + if return_type is None or return_type == np.ndarray: + return _dot_ndarray_coo_type(a.dtype, b.dtype)( + a, b.coords, b.data, out_shape + ) + coords, data = _dot_ndarray_coo_type_sparse(a.dtype, b.dtype)( + a, b.coords, b.data, out_shape + ) + out = COO(coords, data, shape=out_shape, has_duplicates=False, sorted=True) + if return_type == GCXS: + return out.asformat("gcxs") + return out + + +def _memoize_dtype(f): + """ + Memoizes a function taking in NumPy dtypes. + + Parameters + ---------- + f : Callable + + Returns + ------- + wrapped : Callable + + Examples + -------- + >>> def func(dt1): + ... return object() + >>> func = _memoize_dtype(func) + >>> func(np.dtype('i8')) is func(np.dtype('int64')) + True + >>> func(np.dtype('i8')) is func(np.dtype('i4')) + False + """ + cache = {} + + @wraps(f) + def wrapped(*args): + key = tuple(arg.name for arg in args) + if key in cache: + return cache[key] + + result = f(*args) + cache[key] = result + return result + + return wrapped + + +@numba.jit(nopython=True, nogil=True) +def _csr_csc_count_nnz(out_shape, indptr, a_indices, b_indices, a_indptr, b_indptr): + """ + A function for computing the number of nonzero values in the resulting + array from multiplying an array with compressed rows with an array + with compressed columns: (a @ b).nnz. + + Parameters + ---------- + out_shape : tuple + The shape of the output array. + + indptr : ndarray + The empty index pointer array for the output. + + a_indices, a_indptr : np.ndarray + The indices and index pointer array of ``a``. + + b_data, b_indices, b_indptr : np.ndarray + The indices and index pointer array of ``b``. + """ + nnz = 0 + for i in range(out_shape[0]): + cur_row = a_indices[a_indptr[i] : a_indptr[i + 1]] + for j in range(out_shape[1]): + cur_col = b_indices[b_indptr[j] : b_indptr[j + 1]] + a_next = 0 + b_next = 0 + while a_next < cur_row.size and b_next < cur_col.size: + if cur_row[a_next] < cur_col[b_next]: + a_next += 1 + elif cur_row[a_next] > cur_col[b_next]: + b_next += 1 + else: + nnz += 1 + break + indptr[i + 1] = nnz + return nnz + + +@numba.jit(nopython=True, nogil=True) +def _csr_csr_count_nnz(out_shape, a_indices, b_indices, a_indptr, b_indptr): + """ + A function for computing the number of nonzero values in the resulting + array from multiplying an array with compressed rows with an array + with compressed rows: (a @ b).nnz. + + Parameters + ---------- + out_shape : tuple + The shape of the output array. + + indptr : ndarray + The empty index pointer array for the output. + + a_indices, a_indptr : np.ndarray + The indices and index pointer array of ``a``. + + b_data, b_indices, b_indptr : np.ndarray + The indices and index pointer array of ``b``. + """ + n_row, n_col = out_shape + nnz = 0 + mask = np.full(n_col, -1) + for i in range(n_row): + row_nnz = 0 + for j in a_indices[a_indptr[i] : a_indptr[i + 1]]: + for k in b_indices[b_indptr[j] : b_indptr[j + 1]]: + if mask[k] != i: + mask[k] = i + row_nnz += 1 + nnz += row_nnz + return nnz + + +@numba.jit(nopython=True, nogil=True) +def _csr_ndarray_count_nnz(out_shape, indptr, a_indices, a_indptr, b): + """ + A function for computing the number of nonzero values in the resulting + array from multiplying an array with compressed rows with a dense + numpy array: (a @ b).nnz. + + Parameters + ---------- + out_shape : tuple + The shape of the output array. + + indptr : ndarray + The empty index pointer array for the output. + + a_indices, a_indptr : np.ndarray + The indices and index pointer array of ``a``. + + b : np.ndarray + The second input array ``b``. + """ + nnz = 0 + for i in range(out_shape[0]): + cur_row = a_indices[a_indptr[i] : a_indptr[i + 1]] + for j in range(out_shape[1]): + for k in cur_row: + if b[k, j] != 0: + nnz += 1 + break + indptr[i + 1] = nnz + return nnz + + +@numba.jit(nopython=True, nogil=True) +def _csc_ndarray_count_nnz(a_shape, b_shape, indptr, a_indices, a_indptr, b): + """ + A function for computing the number of nonzero values in the resulting + array from multiplying an array with compressed columns with a dense + numpy array: (a @ b).nnz. + + Parameters + ---------- + a_shape, b_shape : tuple + The shapes of the input arrays. + + indptr : ndarray + The empty index pointer array for the output. + + a_indices, a_indptr : np.ndarray + The indices and index pointer array of ``a``. + + b : np.ndarray + The second input array ``b``. + """ + nnz = 0 + mask = np.full(a_shape[0], -1) + for i in range(b_shape[1]): + col_nnz = 0 + for j in range(b_shape[0]): + for k in a_indices[a_indptr[j] : a_indptr[j + 1]]: + if b[j, i] != 0 and mask[k] != i: + mask[k] = i + col_nnz += 1 + nnz += col_nnz + indptr[i + 1] = nnz + return nnz + + +@_memoize_dtype +def _dot_csr_csc_type(dt1, dt2): + dtr = np.result_type(dt1, dt2) + + @numba.jit( + nopython=True, + nogil=True, + locals={"data_curr": numba.np.numpy_support.from_dtype(dtr)}, + ) + def _dot_csr_csc( + out_shape, a_data, b_data, a_indices, b_indices, a_indptr, b_indptr + ): + """ + Utility function taking in two ``GCXS`` objects and calculating + their dot product: a @ b for a with compressed rows and b + with compressed columns. + + Parameters + ---------- + out_shape : tuple + The shape of the output array. + + a_data, a_indices, a_indptr : np.ndarray + The data, indices, and index pointer arrays of ``a``. + + b_data, b_indices, b_indptr : np.ndarray + The data, indices, and index pointer arrays of ``b``. + """ + indptr = np.empty(out_shape[0] + 1, dtype=np.intp) + indptr[0] = 0 + + # calculate nnz before multiplying so we can use static arrays + nnz = _csr_csc_count_nnz( + out_shape, indptr, a_indices, b_indices, a_indptr, b_indptr + ) + indices = np.empty(nnz, dtype=np.intp) + data = np.empty(nnz) + next_val = 0 + for i in range(out_shape[0]): + cur_row = a_indices[a_indptr[i] : a_indptr[i + 1]] + cur_a_data = a_data[a_indptr[i] : a_indptr[i + 1]] + for j in range(out_shape[1]): + cur_col = b_indices[b_indptr[j] : b_indptr[j + 1]] + cur_b_data = b_data[b_indptr[j] : b_indptr[j + 1]] + a_cur = 0 + b_cur = 0 + cur_val = 0 + while a_cur < cur_row.size and b_cur < cur_col.size: + if cur_row[a_cur] < cur_col[b_cur]: + a_cur += 1 + elif cur_row[a_cur] > cur_col[b_cur]: + b_cur += 1 + else: + cur_val += cur_a_data[a_cur] * cur_b_data[b_cur] + a_cur += 1 + b_cur += 1 + if cur_val != 0: + indices[next_val] = j + data[next_val] = cur_val + next_val += 1 + return data, indices, indptr + + return _dot_csr_csc + + +@_memoize_dtype +def _dot_csr_csr_type(dt1, dt2): + dtr = np.result_type(dt1, dt2) + + @numba.jit( + nopython=True, + nogil=True, + locals={"data_curr": numba.np.numpy_support.from_dtype(dtr)}, + ) + def _dot_csr_csr( + out_shape, a_data, b_data, a_indices, b_indices, a_indptr, b_indptr + ): + """ + Utility function taking in two ``GCXS`` objects and calculating + their dot product: a @ b for a and b with compressed rows. + + Parameters + ---------- + out_shape : tuple + The shape of the output array. + + a_data, a_indices, a_indptr : np.ndarray + The data, indices, and index pointer arrays of ``a``. + + b_data, b_indices, b_indptr : np.ndarray + The data, indices, and index pointer arrays of ``b``. + """ + + # much of this is borrowed from: + # https://github.com/scipy/scipy/blob/master/scipy/sparse/sparsetools/csr.h + + # calculate nnz before multiplying so we can use static arrays + nnz = _csr_csr_count_nnz(out_shape, a_indices, b_indices, a_indptr, b_indptr) + n_row, n_col = out_shape + indptr = np.empty(n_row + 1, dtype=np.intp) + indptr[0] = 0 + indices = np.empty(nnz, dtype=np.intp) + data = np.empty(nnz) + next_ = np.full(n_col, -1) + sums = np.zeros(n_col) + nnz = 0 + + for i in range(n_row): + head = -2 + length = 0 + for j, av in zip( + a_indices[a_indptr[i] : a_indptr[i + 1]], + a_data[a_indptr[i] : a_indptr[i + 1]], + ): + for k, bv in zip( + b_indices[b_indptr[j] : b_indptr[j + 1]], + b_data[b_indptr[j] : b_indptr[j + 1]], + ): + sums[k] += av * bv + if next_[k] == -1: + next_[k] = head + head = k + length += 1 + + for _ in range(length): + if sums[head] != 0: + indices[nnz] = head + data[nnz] = sums[head] + nnz += 1 + + temp = head + head = next_[head] + + next_[temp] = -1 + sums[temp] = 0 + + indptr[i + 1] = nnz + # ensure sorted indices + order = np.argsort(indices[indptr[i] : indptr[i + 1]]) + data[indptr[i] : indptr[i + 1]] = data[indptr[i] : indptr[i + 1]][order] + indices[indptr[i] : indptr[i + 1]] = indices[indptr[i] : indptr[i + 1]][ + order + ] + return data, indices, indptr + + return _dot_csr_csr + + +@_memoize_dtype +def _dot_csr_ndarray_type(dt1, dt2): + dtr = np.result_type(dt1, dt2) + + @numba.jit( + nopython=True, + nogil=True, + locals={"data_curr": numba.np.numpy_support.from_dtype(dtr)}, + ) + def _dot_csr_ndarray(out_shape, a_data, a_indices, a_indptr, b): + """ + Utility function taking in one `GCXS` and one ``ndarray`` and + calculating their dot product: a @ b for a with compressed rows. + Returns a dense result. + + Parameters + ---------- + a_data, a_indices, a_indptr : np.ndarray + The data, indices, and index pointers of ``a``. + + b : np.ndarray + The second input array ``b``. + + out_shape : Tuple[int] + The shape of the output array. + """ + out = np.empty(out_shape) + for i in range(out_shape[0]): + for j in range(out_shape[1]): + val = 0 + for k in range(a_indptr[i], a_indptr[i + 1]): + ind = a_indices[k] + v = a_data[k] + val += v * b[ind, j] + out[i, j] = val + return out + + return _dot_csr_ndarray + + +@_memoize_dtype +def _dot_csr_ndarray_type_sparse(dt1, dt2): + dtr = np.result_type(dt1, dt2) + + @numba.jit( + nopython=True, + nogil=True, + locals={"data_curr": numba.np.numpy_support.from_dtype(dtr)}, + ) + def _dot_csr_ndarray_sparse(out_shape, a_data, a_indices, a_indptr, b): + """ + Utility function taking in one `GCXS` and one ``ndarray`` and + calculating their dot product: a @ b for a with compressed rows. + Returns a sparse result. + + Parameters + ---------- + a_data, a_indices, a_indptr : np.ndarray + The data, indices, and index pointers of ``a``. + + b : np.ndarray + The second input array ``b``. + + out_shape : Tuple[int] + The shape of the output array. + """ + indptr = np.empty(out_shape[0] + 1, dtype=np.intp) + indptr[0] = 0 + nnz = _csr_ndarray_count_nnz(out_shape, indptr, a_indices, a_indptr, b) + indices = np.empty(nnz, dtype=np.intp) + data = np.empty(nnz, dtype=a_data.dtype) + current = 0 + for i in range(out_shape[0]): + for j in range(out_shape[1]): + val = 0 + nonzero = False + for k in range(a_indptr[i], a_indptr[i + 1]): + ind = a_indices[k] + v = a_data[k] + val += v * b[ind, j] + if b[ind, j] != 0: + nonzero = True + if nonzero: + data[current] = val + indices[current] = j + current += 1 + return data, indices, indptr + + return _dot_csr_ndarray_sparse + + +@_memoize_dtype +def _dot_csc_ndarray_type_sparse(dt1, dt2): + dtr = np.result_type(dt1, dt2) + + @numba.jit( + nopython=True, + nogil=True, + locals={"data_curr": numba.np.numpy_support.from_dtype(dtr)}, + ) + def _dot_csc_ndarray_sparse(a_shape, b_shape, a_data, a_indices, a_indptr, b): + """ + Utility function taking in one `GCXS` and one ``ndarray`` and + calculating their dot product: a @ b for a with compressed columns. + Returns a sparse result. + + Parameters + ---------- + a_data, a_indices, a_indptr : np.ndarray + The data, indices, and index pointers of ``a``. + + b : np.ndarray + The second input array ``b``. + + a_shape, b_shape : Tuple[int] + The shapes of the input arrays. + """ + indptr = np.empty(b_shape[1] + 1, dtype=np.intp) + nnz = _csc_ndarray_count_nnz(a_shape, b_shape, indptr, a_indices, a_indptr, b) + indices = np.empty(nnz, dtype=np.intp) + data = np.empty(nnz, dtype=a_data.dtype) + sums = np.zeros(a_shape[0]) + mask = np.full(a_shape[0], -1) + nnz = 0 + current = 0 + for i in range(b_shape[1]): + head = -2 + length = 0 + for j in range(b_shape[0]): + u = b[j, i] + if u != 0: + for k in range(a_indptr[j], a_indptr[j + 1]): + ind = a_indices[k] + v = a_data[k] + sums[ind] += u * v + if mask[ind] == -1: + mask[ind] = head + head = ind + length += 1 + start = nnz + for _ in range(length): + if sums[head] != 0: + indices[nnz] = head + data[nnz] = sums[head] + nnz += 1 + + temp = head + head = mask[head] + + mask[temp] = -1 + sums[temp] = 0 + order = np.argsort(indices[start:nnz]) + indices[start:nnz] = indices[start:nnz][order] + data[start:nnz] = data[start:nnz][order] + return data, indices, indptr + + return _dot_csc_ndarray_sparse + + +@_memoize_dtype +def _dot_csc_ndarray_type(dt1, dt2): + dtr = np.result_type(dt1, dt2) + + @numba.jit( + nopython=True, + nogil=True, + locals={"data_curr": numba.np.numpy_support.from_dtype(dtr)}, + ) + def _dot_csc_ndarray(a_shape, b_shape, a_data, a_indices, a_indptr, b): + """ + Utility function taking in one `GCXS` and one ``ndarray`` and + calculating their dot product: a @ b for a with compressed columns. + Returns a dense result. + + Parameters + ---------- + a_data, a_indices, a_indptr : np.ndarray + The data, indices, and index pointers of ``a``. + + b : np.ndarray + The second input array ``b``. + + a_shape, b_shape : Tuple[int] + The shapes of the input arrays. + """ + out = np.zeros((a_shape[0], b_shape[1])) + for j in range(b_shape[1]): + for i in range(b_shape[0]): + for k in range(a_indptr[i], a_indptr[i + 1]): + out[a_indices[k], j] += a_data[k] * b[i, j] + return out + + return _dot_csc_ndarray + + +@_memoize_dtype +def _dot_ndarray_csc_type(dt1, dt2): + dtr = np.result_type(dt1, dt2) + + @numba.jit( + nopython=True, + nogil=True, + locals={"data_curr": numba.np.numpy_support.from_dtype(dtr)}, + ) + def _dot_ndarray_csc(out_shape, b_data, b_indices, b_indptr, a): + """ + Utility function taking in one `ndarray` and one ``GCXS`` and + calculating their dot product: a @ b for b with compressed columns. + + Parameters + ---------- + a : np.ndarray + The input array ``a``. + + b_data, b_indices, b_indptr : np.ndarray + The data, indices, and index pointers of ``b``. + + out_shape : Tuple[int] + The shape of the output array. + """ + out = np.empty(out_shape) + for i in range(out_shape[0]): + for j in range(out_shape[1]): + total = 0 + for k in range(b_indptr[j], b_indptr[j + 1]): + total += a[i, b_indices[k]] * b_data[k] + out[i, j] = total + return out + + return _dot_ndarray_csc + + +@_memoize_dtype +def _dot_coo_coo_type(dt1, dt2): + dtr = np.result_type(dt1, dt2) + + @numba.jit( + nopython=True, + nogil=True, + locals={"data_curr": numba.np.numpy_support.from_dtype(dtr)}, + ) + def _dot_coo_coo(out_shape, a_coords, b_coords, a_data, b_data, a_indptr, b_indptr): + """ + Utility function taking in two ``COO`` objects and calculating + their dot product: a @ b. + + Parameters + ---------- + a_shape, b_shape : tuple + The shapes of the input arrays. + + a_data, a_coords : np.ndarray + The data and coordinates of ``a``. + + b_data, b_coords : np.ndarray + The data and coordinates of ``b``. + """ + + # much of this is borrowed from: + # https://github.com/scipy/scipy/blob/master/scipy/sparse/sparsetools/csr.h + + n_row, n_col = out_shape + # calculate nnz before multiplying so we can use static arrays + nnz = _csr_csr_count_nnz( + out_shape, a_coords[1], b_coords[1], a_indptr, b_indptr + ) + coords = np.empty((2, nnz), dtype=np.intp) + data = np.empty(nnz) + next_ = np.full(n_col, -1) + sums = np.zeros(n_col) + nnz = 0 + + for i in range(n_row): + head = -2 + length = 0 + for j, av in zip( + a_coords[1, a_indptr[i] : a_indptr[i + 1]], + a_data[a_indptr[i] : a_indptr[i + 1]], + ): + for k, bv in zip( + b_coords[1, b_indptr[j] : b_indptr[j + 1]], + b_data[b_indptr[j] : b_indptr[j + 1]], + ): + sums[k] += av * bv + if next_[k] == -1: + next_[k] = head + head = k + length += 1 + + start = nnz + for _ in range(length): + if sums[head] != 0: + coords[0, nnz] = i + coords[1, nnz] = head + data[nnz] = sums[head] + nnz += 1 + + temp = head + head = next_[head] + + next_[temp] = -1 + sums[temp] = 0 + + # ensure sorted coords + order = np.argsort(coords[1, start:nnz]) + data[start:nnz] = data[start:nnz][order] + coords[1, start:nnz] = coords[1, start:nnz][order] + return coords, data + + return _dot_coo_coo + + +@_memoize_dtype +def _dot_coo_ndarray_type(dt1, dt2): + dtr = np.result_type(dt1, dt2) + + @numba.jit(nopython=True, nogil=True) + def _dot_coo_ndarray(coords1, data1, array2, out_shape): # pragma: no cover + """ + Utility function taking in one `COO` and one ``ndarray`` and + calculating a "sense" of their dot product. Acually computes + ``s1 @ x2.T``. + + Parameters + ---------- + data1, coords1 : np.ndarray + The data and coordinates of ``s1``. + + array2 : np.ndarray + The second input array ``x2``. + + out_shape : Tuple[int] + The output shape. + """ + out = np.zeros(out_shape, dtype=dtr) + didx1 = 0 + + while didx1 < len(data1): + oidx1 = coords1[0, didx1] + didx1_curr = didx1 + + for oidx2 in range(out_shape[1]): + didx1 = didx1_curr + while didx1 < len(data1) and coords1[0, didx1] == oidx1: + out[oidx1, oidx2] += data1[didx1] * array2[oidx2, coords1[1, didx1]] + didx1 += 1 + + return out + + return _dot_coo_ndarray + + +@_memoize_dtype +def _dot_coo_ndarray_type_sparse(dt1, dt2): + dtr = np.result_type(dt1, dt2) + + @numba.jit( + nopython=True, + nogil=True, + locals={"data_curr": numba.np.numpy_support.from_dtype(dtr)}, + ) + def _dot_coo_ndarray(coords1, data1, array2, out_shape): # pragma: no cover + """ + Utility function taking in one `COO` and one ``ndarray`` and + calculating a "sense" of their dot product. Acually computes + ``s1 @ x2.T``. + + Parameters + ---------- + data1, coords1 : np.ndarray + The data and coordinates of ``s1``. + + array2 : np.ndarray + The second input array ``x2``. + + out_shape : Tuple[int] + The output shape. + """ + + out_data = [] + out_coords = [] + + # coords1.shape = (2, len(data1)) + # coords1[0, :] = rows, sorted + # coords1[1, :] = columns + + didx1 = 0 + while didx1 < len(data1): + current_row = coords1[0, didx1] + + cur_didx1 = didx1 + oidx2 = 0 + while oidx2 < out_shape[1]: + cur_didx1 = didx1 + data_curr = 0 + while cur_didx1 < len(data1) and coords1[0, cur_didx1] == current_row: + data_curr += data1[cur_didx1] * array2[oidx2, coords1[1, cur_didx1]] + cur_didx1 += 1 + if data_curr != 0: + out_data.append(data_curr) + out_coords.append((current_row, oidx2)) + oidx2 += 1 + didx1 = cur_didx1 + + if len(out_data) == 0: + return np.empty((2, 0), dtype=np.intp), np.empty((0,), dtype=dtr) + + return np.array(out_coords).T, np.array(out_data) + + return _dot_coo_ndarray + + +@_memoize_dtype +def _dot_ndarray_coo_type(dt1, dt2): + dtr = np.result_type(dt1, dt2) + + @numba.jit(nopython=True, nogil=True) + def _dot_ndarray_coo(array1, coords2, data2, out_shape): # pragma: no cover + """ + Utility function taking in two one ``ndarray`` and one ``COO`` and + calculating a "sense" of their dot product. Acually computes ``x1 @ s2.T``. + + Parameters + ---------- + array1 : np.ndarray + The input array ``x1``. + + data2, coords2 : np.ndarray + The data and coordinates of ``s2``. + + out_shape : Tuple[int] + The output shape. + """ + out = np.zeros(out_shape, dtype=dtr) + + for oidx1 in range(out_shape[0]): + for didx2 in range(len(data2)): + oidx2 = coords2[0, didx2] + out[oidx1, oidx2] += array1[oidx1, coords2[1, didx2]] * data2[didx2] + + return out + + return _dot_ndarray_coo + + +@_memoize_dtype +def _dot_ndarray_coo_type_sparse(dt1, dt2): + dtr = np.result_type(dt1, dt2) + + @numba.jit( + nopython=True, + nogil=True, + locals={"data_curr": numba.np.numpy_support.from_dtype(dtr)}, + ) + def _dot_ndarray_coo(array1, coords2, data2, out_shape): # pragma: no cover + """ + Utility function taking in two one ``ndarray`` and one ``COO`` and + calculating a "sense" of their dot product. Acually computes ``x1 @ s2.T``. + + Parameters + ---------- + array1 : np.ndarray + The input array ``x1``. + + data2, coords2 : np.ndarray + The data and coordinates of ``s2``. + + out_shape : Tuple[int] + The output shape. + """ + out_data = [] + out_coords = [] + + # coords2.shape = (2, len(data2)) + # coords2[0, :] = columns, sorted + # coords2[1, :] = rows + + for oidx1 in range(out_shape[0]): + data_curr = 0 + current_col = 0 + for didx2 in range(len(data2)): + if coords2[0, didx2] != current_col: + if data_curr != 0: + out_data.append(data_curr) + out_coords.append([oidx1, current_col]) + data_curr = 0 + current_col = coords2[0, didx2] + + data_curr += array1[oidx1, coords2[1, didx2]] * data2[didx2] + + if data_curr != 0: + out_data.append(data_curr) + out_coords.append([oidx1, current_col]) + + if len(out_data) == 0: + return np.empty((2, 0), dtype=np.intp), np.empty((0,), dtype=dtr) + + return np.array(out_coords).T, np.array(out_data) + + return _dot_ndarray_coo + + def stack(arrays, axis=0, compressed_axes=None): """ Stack the input arrays along the given dimension. diff --git a/sparse/_coo/common.py b/sparse/_coo/common.py index 4d323f74..ecf0bcf3 100644 --- a/sparse/_coo/common.py +++ b/sparse/_coo/common.py @@ -1,5 +1,4 @@ -from functools import reduce, wraps -from itertools import chain +from functools import reduce import operator import warnings from collections.abc import Iterable @@ -64,276 +63,6 @@ def linear_loc(coords, shape): return np.ravel_multi_index(coords, shape) -def tensordot(a, b, axes=2, *, return_type=None): - """ - Perform the equivalent of :obj:`numpy.tensordot`. - - Parameters - ---------- - a, b : Union[COO, np.ndarray, scipy.sparse.spmatrix] - The arrays to perform the :code:`tensordot` operation on. - axes : tuple[Union[int, tuple[int], Union[int, tuple[int]], optional - The axes to match when performing the sum. - return_type : {None, COO, np.ndarray}, optional - Type of returned array. - - - Returns - ------- - Union[COO, numpy.ndarray] - The result of the operation. - - Raises - ------ - ValueError - If all arguments don't have zero fill-values. - - See Also - -------- - numpy.tensordot : NumPy equivalent function - """ - # Much of this is stolen from numpy/core/numeric.py::tensordot - # Please see license at https://github.com/numpy/numpy/blob/master/LICENSE.txt - check_zero_fill_value(a, b) - - if scipy.sparse.issparse(a): - a = asCOO(a) - if scipy.sparse.issparse(b): - b = asCOO(b) - - try: - iter(axes) - except TypeError: - axes_a = list(range(-axes, 0)) - axes_b = list(range(0, axes)) - else: - axes_a, axes_b = axes - try: - na = len(axes_a) - axes_a = list(axes_a) - except TypeError: - axes_a = [axes_a] - na = 1 - try: - nb = len(axes_b) - axes_b = list(axes_b) - except TypeError: - axes_b = [axes_b] - nb = 1 - - # a, b = asarray(a), asarray(b) # <--- modified - as_ = a.shape - nda = a.ndim - bs = b.shape - ndb = b.ndim - equal = True - if nda == 0 or ndb == 0: - pos = int(nda != 0) - raise ValueError("Input {} operand does not have enough dimensions".format(pos)) - if na != nb: - equal = False - else: - for k in range(na): - if as_[axes_a[k]] != bs[axes_b[k]]: - equal = False - break - if axes_a[k] < 0: - axes_a[k] += nda - if axes_b[k] < 0: - axes_b[k] += ndb - if not equal: - raise ValueError("shape-mismatch for sum") - - # Move the axes to sum over to the end of "a" - # and to the front of "b" - notin = [k for k in range(nda) if k not in axes_a] - newaxes_a = notin + axes_a - N2 = 1 - for axis in axes_a: - N2 *= as_[axis] - newshape_a = (-1, N2) - olda = [as_[axis] for axis in notin] - - notin = [k for k in range(ndb) if k not in axes_b] - newaxes_b = axes_b + notin - N2 = 1 - for axis in axes_b: - N2 *= bs[axis] - newshape_b = (N2, -1) - oldb = [bs[axis] for axis in notin] - - if any(dim == 0 for dim in chain(newshape_a, newshape_b)): - res = asCOO(np.empty(olda + oldb), check=False) - if isinstance(a, np.ndarray) or isinstance(b, np.ndarray): - res = res.todense() - - return res - - at = a.transpose(newaxes_a).reshape(newshape_a) - bt = b.transpose(newaxes_b).reshape(newshape_b) - res = _dot(at, bt, return_type) - return res.reshape(olda + oldb) - - -def matmul(a, b): - """Perform the equivalent of :obj:`numpy.matmul` on two arrays. - - Parameters - ---------- - a, b : Union[COO, np.ndarray, scipy.sparse.spmatrix] - The arrays to perform the :code:`matmul` operation on. - - Returns - ------- - Union[COO, numpy.ndarray] - The result of the operation. - - Raises - ------ - ValueError - If all arguments don't have zero fill-values, or the shape of the two arrays is not broadcastable. - - See Also - -------- - numpy.matmul : NumPy equivalent function. - COO.__matmul__ : Equivalent function for COO objects. - """ - check_zero_fill_value(a, b) - if not hasattr(a, "ndim") or not hasattr(b, "ndim"): - raise TypeError( - "Cannot perform dot product on types %s, %s" % (type(a), type(b)) - ) - - # When b is 2-d, it is equivalent to dot - if b.ndim <= 2: - return dot(a, b) - - # when a is 2-d, we need to transpose result after dot - if a.ndim <= 2: - res = dot(a, b) - axes = list(range(res.ndim)) - axes.insert(-1, axes.pop(0)) - return res.transpose(axes) - - # If a can be squeeze to a vector, use dot will be faster - if a.ndim <= b.ndim and np.prod(a.shape[:-1]) == 1: - res = dot(a.reshape(-1), b) - shape = list(res.shape) - shape.insert(-1, 1) - return res.reshape(shape) - - # If b can be squeeze to a matrix, use dot will be faster - if b.ndim <= a.ndim and np.prod(b.shape[:-2]) == 1: - return dot(a, b.reshape(b.shape[-2:])) - - if a.ndim < b.ndim: - a = a[(None,) * (b.ndim - a.ndim)] - if a.ndim > b.ndim: - b = b[(None,) * (a.ndim - b.ndim)] - for i, j in zip(a.shape[:-2], b.shape[:-2]): - if i != 1 and j != 1 and i != j: - raise ValueError("shapes of a and b are not broadcastable") - - def _matmul_recurser(a, b): - if a.ndim == 2: - return dot(a, b) - res = [] - for i in range(max(a.shape[0], b.shape[0])): - a_i = a[0] if a.shape[0] == 1 else a[i] - b_i = b[0] if b.shape[0] == 1 else b[i] - res.append(_matmul_recurser(a_i, b_i)) - mask = [isinstance(x, SparseArray) for x in res] - if all(mask): - return stack(res) - else: - res = [x.todense() if isinstance(x, SparseArray) else x for x in res] - return np.stack(res) - - return _matmul_recurser(a, b) - - -def dot(a, b): - """ - Perform the equivalent of :obj:`numpy.dot` on two arrays. - - Parameters - ---------- - a, b : Union[COO, np.ndarray, scipy.sparse.spmatrix] - The arrays to perform the :code:`dot` operation on. - - Returns - ------- - Union[COO, numpy.ndarray] - The result of the operation. - - Raises - ------ - ValueError - If all arguments don't have zero fill-values. - - See Also - -------- - numpy.dot : NumPy equivalent function. - COO.dot : Equivalent function for COO objects. - """ - check_zero_fill_value(a, b) - if not hasattr(a, "ndim") or not hasattr(b, "ndim"): - raise TypeError( - "Cannot perform dot product on types %s, %s" % (type(a), type(b)) - ) - - if a.ndim == 1 and b.ndim == 1: - return (a * b).sum() - - a_axis = -1 - b_axis = -2 - - if b.ndim == 1: - b_axis = -1 - return tensordot(a, b, axes=(a_axis, b_axis)) - - -def _dot(a, b, return_type=None): - from .core import COO - - out_shape = (a.shape[0], b.shape[1]) - if isinstance(a, COO) and isinstance(b, COO): - b = b.T - coords, data = _dot_coo_coo_type(a.dtype, b.dtype)( - a.coords, a.data, b.coords, b.data - ) - - if return_type == np.ndarray: - return COO( - coords, data, shape=out_shape, has_duplicates=False, sorted=True - ).todense() - - return COO(coords, data, shape=out_shape, has_duplicates=False, sorted=True) - - if isinstance(a, COO) and isinstance(b, np.ndarray): - b = b.view(type=np.ndarray).T - - if return_type == COO: - coords, data = _dot_coo_ndarray_type_sparse(a.dtype, b.dtype)( - a.coords, a.data, b, out_shape - ) - return COO(coords, data, shape=out_shape, has_duplicates=False, sorted=True) - - return _dot_coo_ndarray_type(a.dtype, b.dtype)(a.coords, a.data, b, out_shape) - - if isinstance(a, np.ndarray) and isinstance(b, COO): - b = b.T - a = a.view(type=np.ndarray) - - if return_type == COO: - coords, data = _dot_ndarray_coo_type_sparse(a.dtype, b.dtype)( - a, b.coords, b.data, out_shape - ) - return COO(coords, data, shape=out_shape, has_duplicates=False, sorted=True) - - return _dot_ndarray_coo_type(a.dtype, b.dtype)(a, b.coords, b.data, out_shape) - - def kron(a, b): """Kronecker product of 2 sparse arrays. @@ -1126,309 +855,6 @@ def diagonalize(a, axis=0): return COO(diag_coords, a.data, diag_shape) -def _memoize_dtype(f): - """ - Memoizes a function taking in NumPy dtypes. - - Parameters - ---------- - f : Callable - - Returns - ------- - wrapped : Callable - - Examples - -------- - >>> def func(dt1): - ... return object() - >>> func = _memoize_dtype(func) - >>> func(np.dtype('i8')) is func(np.dtype('int64')) - True - >>> func(np.dtype('i8')) is func(np.dtype('i4')) - False - """ - cache = {} - - @wraps(f) - def wrapped(*args): - key = tuple(arg.name for arg in args) - if key in cache: - return cache[key] - - result = f(*args) - cache[key] = result - return result - - return wrapped - - -@_memoize_dtype -def _dot_coo_coo_type(dt1, dt2): - dtr = np.result_type(dt1, dt2) - - @numba.jit( - nopython=True, - nogil=True, - locals={"data_curr": numba.np.numpy_support.from_dtype(dtr)}, - ) - def _dot_coo_coo(coords1, data1, coords2, data2): # pragma: no cover - """ - Utility function taking in two ``COO`` objects and calculating a "sense" - of their dot product. Acually computes ``s1 @ s2.T``. - - Parameters - ---------- - data1, coords1 : np.ndarray - The data and coordinates of ``s1``. - - data2, coords2 : np.ndarray - The data and coordinates of ``s2``. - """ - coords_out = [] - data_out = [] - didx1 = 0 - data1_end = len(data1) - data2_end = len(data2) - - while didx1 < data1_end: - oidx1 = coords1[0, didx1] - didx2 = 0 - didx1_curr = didx1 - - while ( - didx2 < data2_end and didx1 < data1_end and coords1[0, didx1] == oidx1 - ): - oidx2 = coords2[0, didx2] - data_curr = 0 - - while ( - didx2 < data2_end - and didx1 < data1_end - and coords2[0, didx2] == oidx2 - and coords1[0, didx1] == oidx1 - ): - c1 = coords1[1, didx1] - c2 = coords2[1, didx2] - k = min(c1, c2) - if c1 == k and c2 == k: - data_curr += data1[didx1] * data2[didx2] - didx1 += c1 == k - didx2 += c2 == k - - while didx2 < data2_end and coords2[0, didx2] == oidx2: - didx2 += 1 - - if didx2 < data2_end: - didx1 = didx1_curr - - if data_curr != 0: - coords_out.append((oidx1, oidx2)) - data_out.append(data_curr) - - while didx1 < data1_end and coords1[0, didx1] == oidx1: - didx1 += 1 - - if len(data_out) == 0: - return np.empty((2, 0), dtype=np.intp), np.empty((0,), dtype=dtr) - - return np.array(coords_out).T, np.array(data_out) - - return _dot_coo_coo - - -@_memoize_dtype -def _dot_coo_ndarray_type(dt1, dt2): - dtr = np.result_type(dt1, dt2) - - @numba.jit(nopython=True, nogil=True) - def _dot_coo_ndarray(coords1, data1, array2, out_shape): # pragma: no cover - """ - Utility function taking in one `COO` and one ``ndarray`` and - calculating a "sense" of their dot product. Acually computes - ``s1 @ x2.T``. - - Parameters - ---------- - data1, coords1 : np.ndarray - The data and coordinates of ``s1``. - - array2 : np.ndarray - The second input array ``x2``. - - out_shape : Tuple[int] - The output shape. - """ - out = np.zeros(out_shape, dtype=dtr) - didx1 = 0 - - while didx1 < len(data1): - oidx1 = coords1[0, didx1] - didx1_curr = didx1 - - for oidx2 in range(out_shape[1]): - didx1 = didx1_curr - while didx1 < len(data1) and coords1[0, didx1] == oidx1: - out[oidx1, oidx2] += data1[didx1] * array2[oidx2, coords1[1, didx1]] - didx1 += 1 - - return out - - return _dot_coo_ndarray - - -@_memoize_dtype -def _dot_coo_ndarray_type_sparse(dt1, dt2): - dtr = np.result_type(dt1, dt2) - - @numba.jit( - nopython=True, - nogil=True, - locals={"data_curr": numba.np.numpy_support.from_dtype(dtr)}, - ) - def _dot_coo_ndarray(coords1, data1, array2, out_shape): # pragma: no cover - """ - Utility function taking in one `COO` and one ``ndarray`` and - calculating a "sense" of their dot product. Acually computes - ``s1 @ x2.T``. - - Parameters - ---------- - data1, coords1 : np.ndarray - The data and coordinates of ``s1``. - - array2 : np.ndarray - The second input array ``x2``. - - out_shape : Tuple[int] - The output shape. - """ - - out_data = [] - out_coords = [] - - # coords1.shape = (2, len(data1)) - # coords1[0, :] = rows, sorted - # coords1[1, :] = columns - - didx1 = 0 - while didx1 < len(data1): - current_row = coords1[0, didx1] - - cur_didx1 = didx1 - oidx2 = 0 - while oidx2 < out_shape[1]: - cur_didx1 = didx1 - data_curr = 0 - while cur_didx1 < len(data1) and coords1[0, cur_didx1] == current_row: - data_curr += data1[cur_didx1] * array2[oidx2, coords1[1, cur_didx1]] - cur_didx1 += 1 - if data_curr != 0: - out_data.append(data_curr) - out_coords.append((current_row, oidx2)) - oidx2 += 1 - didx1 = cur_didx1 - - if len(out_data) == 0: - return np.empty((2, 0), dtype=np.intp), np.empty((0,), dtype=dtr) - - return np.array(out_coords).T, np.array(out_data) - - return _dot_coo_ndarray - - -@_memoize_dtype -def _dot_ndarray_coo_type(dt1, dt2): - dtr = np.result_type(dt1, dt2) - - @numba.jit( - nopython=True, nogil=True, - ) - def _dot_ndarray_coo(array1, coords2, data2, out_shape): # pragma: no cover - """ - Utility function taking in two one ``ndarray`` and one ``COO`` and - calculating a "sense" of their dot product. Acually computes ``x1 @ s2.T``. - - Parameters - ---------- - array1 : np.ndarray - The input array ``x1``. - - data2, coords2 : np.ndarray - The data and coordinates of ``s2``. - - out_shape : Tuple[int] - The output shape. - """ - out = np.zeros(out_shape, dtype=dtr) - - for oidx1 in range(out_shape[0]): - for didx2 in range(len(data2)): - oidx2 = coords2[0, didx2] - out[oidx1, oidx2] += array1[oidx1, coords2[1, didx2]] * data2[didx2] - - return out - - return _dot_ndarray_coo - - -@_memoize_dtype -def _dot_ndarray_coo_type_sparse(dt1, dt2): - dtr = np.result_type(dt1, dt2) - - @numba.jit( - nopython=True, - nogil=True, - locals={"data_curr": numba.np.numpy_support.from_dtype(dtr)}, - ) - def _dot_ndarray_coo(array1, coords2, data2, out_shape): # pragma: no cover - """ - Utility function taking in two one ``ndarray`` and one ``COO`` and - calculating a "sense" of their dot product. Acually computes ``x1 @ s2.T``. - - Parameters - ---------- - array1 : np.ndarray - The input array ``x1``. - - data2, coords2 : np.ndarray - The data and coordinates of ``s2``. - - out_shape : Tuple[int] - The output shape. - """ - out_data = [] - out_coords = [] - - # coords2.shape = (2, len(data2)) - # coords2[0, :] = columns, sorted - # coords2[1, :] = rows - - for oidx1 in range(out_shape[0]): - data_curr = 0 - current_col = 0 - for didx2 in range(len(data2)): - if coords2[0, didx2] != current_col: - if data_curr != 0: - out_data.append(data_curr) - out_coords.append([oidx1, current_col]) - data_curr = 0 - current_col = coords2[0, didx2] - - data_curr += array1[oidx1, coords2[1, didx2]] * data2[didx2] - - if data_curr != 0: - out_data.append(data_curr) - out_coords.append([oidx1, current_col]) - - if len(out_data) == 0: - return np.empty((2, 0), dtype=np.intp), np.empty((0,), dtype=dtr) - - return np.array(out_coords).T, np.array(out_data) - - return _dot_ndarray_coo - - def isposinf(x, out=None): """ Test element-wise for positive infinity, return result as sparse ``bool`` array. diff --git a/sparse/_coo/core.py b/sparse/_coo/core.py index 7579ed49..453873ae 100644 --- a/sparse/_coo/core.py +++ b/sparse/_coo/core.py @@ -10,7 +10,7 @@ from numpy.lib.mixins import NDArrayOperatorsMixin import numba -from .common import dot, matmul +from .._common import dot, matmul from .indexing import getitem from .umath import elemwise, broadcast_to from .._sparse_array import SparseArray @@ -569,56 +569,6 @@ def dtype(self): True """ return self.data.dtype - - @property - def format(self): - """ - The storage format of this array. - - Returns - ------- - str - The storage format of this array. - - See Also - ------- - DOK.format : Equivalent :obj:`DOK` array property. - GCXS.format : Equivalent :obj:`GCXS` array property. - scipy.sparse.coo_matrix.format : The Scipy equivalent property. - - Examples - ------- - >>> import sparse - >>> s = sparse.random((5,5), density=0.2) - >>> s.format - 'coo' - """ - return 'coo' - - @property - def format(self): - """ - The storage format of this array. - - Returns - ------- - str - The storage format of this array. - - See Also - ------- - DOK.format : Equivalent :obj:`DOK` array property. - GCXS.format : Equivalent :obj:`GCXS` array property. - scipy.sparse.coo_matrix.format : The Scipy equivalent property. - - Examples - ------- - >>> import sparse - >>> s = sparse.random((5,5), density=0.2) - >>> s.format - 'coo' - """ - return "coo" @property def nnz(self): @@ -1886,7 +1836,9 @@ def reshape(self, shape, order="C"): return self if self.size != reduce(operator.mul, shape, 1): - raise ValueError('cannot reshape array of size {} into shape {}'.format(self.size, shape)) + raise ValueError( + "cannot reshape array of size {} into shape {}".format(self.size, shape) + ) if self.size != reduce(operator.mul, shape, 1): raise ValueError( @@ -2354,15 +2306,10 @@ def asformat(self, format, compressed_axes=None): If the format isn't supported. """ from .._compressed import GCXS -<<<<<<< HEAD if format == "gcxs" or format is GCXS: return GCXS.from_coo(self, compressed_axes=compressed_axes) -======= - if format == 'gcxs' or format is GCXS: - return GCXS.from_coo(self, compressed_axes=compressed_axes) ->>>>>>> change gxcs with gcxs elif compressed_axes is not None: raise ValueError( "compressed_axes is not supported for {} format".format(format) diff --git a/sparse/tests/test_coo.py b/sparse/tests/test_coo.py index 52f187d2..0734b1a7 100644 --- a/sparse/tests/test_coo.py +++ b/sparse/tests/test_coo.py @@ -214,18 +214,21 @@ def test_transpose_error(axis): x.transpose(axis) -@pytest.mark.parametrize('a,b', [ - [(3, 4), (5, 5)], - [(12,), (3, 4)], - [(12,), (3, 6)], - [(5, 5, 5), (6, 6, 6)], - [(3, 4), (9, 4)], - [(5,), (4,)], - [(2, 3, 4, 5), (2, 3, 4, 5, 6)], - [(100,), (5, 5)], - [(2, 3, 4, 5), (20, 6)], - [(), ()], -]) +@pytest.mark.parametrize( + "a,b", + [ + [(3, 4), (5, 5)], + [(12,), (3, 4)], + [(12,), (3, 6)], + [(5, 5, 5), (6, 6, 6)], + [(3, 4), (9, 4)], + [(5,), (4,)], + [(2, 3, 4, 5), (2, 3, 4, 5, 6)], + [(100,), (5, 5)], + [(2, 3, 4, 5), (20, 6)], + [(), ()], + ], +) def test_resize(a, b): s = sparse.random(a, density=0.5) orig_size = s.size @@ -237,7 +240,8 @@ def test_resize(a, b): temp[orig_size:] = s.fill_value assert isinstance(s, sparse.SparseArray) assert_eq(x, s) - + + @pytest.mark.parametrize("axis1", [-3, -2, -1, 0, 1, 2]) @pytest.mark.parametrize("axis2", [-3, -2, -1, 0, 1, 2]) def test_swapaxes(axis1, axis2): @@ -375,193 +379,6 @@ def test_to_scipy_sparse(): assert_eq(a, b) -@pytest.mark.parametrize( - "a_shape,b_shape,axes", - [ - [(3, 4), (4, 3), (1, 0)], - [(3, 4), (4, 3), (0, 1)], - [(3, 4, 5), (4, 3), (1, 0)], - [(3, 4), (5, 4, 3), (1, 1)], - [(3, 4), (5, 4, 3), ((0, 1), (2, 1))], - [(3, 4), (5, 4, 3), ((1, 0), (1, 2))], - [(3, 4, 5), (4,), (1, 0)], - [(4,), (3, 4, 5), (0, 1)], - [(4,), (4,), (0, 0)], - [(4,), (4,), 0], - ], -) -def test_tensordot(a_shape, b_shape, axes): - sa = sparse.random(a_shape, density=0.5) - sb = sparse.random(b_shape, density=0.5) - - a = sa.todense() - b = sb.todense() - - a_b = np.tensordot(a, b, axes) - - # tests for return_type=None - sa_sb = sparse.tensordot(sa, sb, axes) - sa_b = sparse.tensordot(sa, b, axes) - a_sb = sparse.tensordot(a, sb, axes) - - assert_eq(a_b, sa_sb) - assert_eq(a_b, sa_b) - assert_eq(a_b, a_sb) - assert isinstance(sa_sb, COO) - assert isinstance(sa_b, np.ndarray) - assert isinstance(a_sb, np.ndarray) - - # tests for return_type=COO - sa_b = sparse.tensordot(sa, b, axes, return_type=COO) - a_sb = sparse.tensordot(a, sb, axes, return_type=COO) - - assert_eq(a_b, sa_b) - assert_eq(a_b, a_sb) - assert isinstance(sa_b, COO) - assert isinstance(a_sb, COO) - - # tests for return_type=np.ndarray - sa_sb = sparse.tensordot(sa, sb, axes, return_type=np.ndarray) - - assert_eq(a_b, sa_sb) - assert isinstance(sa_sb, np.ndarray) - - -def test_tensordot_empty(): - x1 = np.empty((0, 0, 0)) - x2 = np.empty((0, 0, 0)) - s1 = sparse.COO.from_numpy(x1) - s2 = sparse.COO.from_numpy(x2) - - assert_eq(np.tensordot(x1, x2), sparse.tensordot(s1, s2)) - - -def test_tensordot_valueerror(): - x1 = sparse.COO(np.array(1)) - x2 = sparse.COO(np.array(1)) - - with pytest.raises(ValueError): - x1 @ x2 - - -@pytest.mark.parametrize( - "a_shape, b_shape", - [ - ((3, 1, 6, 5), (2, 1, 4, 5, 6)), - ((2, 1, 4, 5, 6), (3, 1, 6, 5)), - ((1, 1, 5), (3, 5, 6)), - ((3, 4, 5), (1, 5, 6)), - ((3, 4, 5), (3, 5, 6)), - ((3, 4, 5), (5, 6)), - ((4, 5), (5, 6)), - ((5,), (5, 6)), - ((4, 5), (5,)), - ((5,), (5,)), - ((3, 4), (1, 2, 4, 3)), - ], -) -def test_matmul(a_shape, b_shape): - sa = sparse.random(a_shape, density=0.5) - sb = sparse.random(b_shape, density=0.5) - - a = sa.todense() - b = sb.todense() - - assert_eq(np.matmul(a, b), sparse.matmul(sa, sb)) - assert_eq(sparse.matmul(sa, b), sparse.matmul(a, sb)) - assert_eq(np.matmul(a, b), sparse.matmul(sa, sb)) - - if a.ndim == 2 or b.ndim == 2: - assert_eq( - np.matmul(a, b), - sparse.matmul( - scipy.sparse.coo_matrix(a) if a.ndim == 2 else sa, - scipy.sparse.coo_matrix(b) if b.ndim == 2 else sb, - ), - ) - - if hasattr(operator, "matmul"): - assert_eq(operator.matmul(a, b), operator.matmul(sa, sb)) - - -def test_matmul_errors(): - with pytest.raises(ValueError): - sa = sparse.random((3, 4, 5, 6), 0.5) - sb = sparse.random((3, 6, 5, 6), 0.5) - sparse.matmul(sa, sb) - - -@pytest.mark.parametrize( - "a_shape, b_shape", - [ - ((1, 4, 5), (3, 5, 6)), - ((3, 4, 5), (1, 5, 6)), - ((3, 4, 5), (3, 5, 6)), - ((3, 4, 5), (5, 6)), - ((4, 5), (5, 6)), - ((5,), (5, 6)), - ((4, 5), (5,)), - ((5,), (5,)), - ], -) -def test_dot(a_shape, b_shape): - sa = sparse.random(a_shape, density=0.5) - sb = sparse.random(b_shape, density=0.5) - - a = sa.todense() - b = sb.todense() - - assert_eq(a.dot(b), sa.dot(sb)) - assert_eq(np.dot(a, b), sparse.dot(sa, sb)) - assert_eq(sparse.dot(sa, b), sparse.dot(a, sb)) - assert_eq(np.dot(a, b), sparse.dot(sa, sb)) - - if hasattr(operator, "matmul"): - # Basic equivalences - assert_eq(operator.matmul(a, b), operator.matmul(sa, sb)) - # Test that SOO's and np.array's combine correctly - # Not possible due to https://github.com/numpy/numpy/issues/9028 - # assert_eq(eval("a @ sb"), eval("sa @ b")) - - -@pytest.mark.parametrize( - "a_dense, b_dense, o_type", - [ - (False, False, sparse.SparseArray), - (False, True, np.ndarray), - (True, False, np.ndarray), - ], -) -def test_dot_type(a_dense, b_dense, o_type): - a = sparse.random((3, 4), density=0.8) - b = sparse.random((4, 5), density=0.8) - - if a_dense: - a = a.todense() - - if b_dense: - b = b.todense() - - assert isinstance(sparse.dot(a, b), o_type) - - -@pytest.mark.xfail -def test_dot_nocoercion(): - sa = sparse.random((3, 4, 5), density=0.5) - sb = sparse.random((5, 6), density=0.5) - - a = sa.todense() - b = sb.todense() - - la = a.tolist() - lb = b.tolist() - - if hasattr(operator, "matmul"): - # Operations with naive collection (list) - assert_eq(operator.matmul(la, b), operator.matmul(la, sb)) - assert_eq(operator.matmul(a, lb), operator.matmul(sa, lb)) - - @pytest.mark.parametrize("a_ndim", [1, 2, 3]) @pytest.mark.parametrize("b_ndim", [1, 2, 3]) def test_kron(a_ndim, b_ndim): @@ -1693,7 +1510,6 @@ def test_add_many_sparse_arrays(): def test_caching(): x = COO({(9, 9, 9): 1}) -<<<<<<< HEAD assert ( x[:].reshape((100, 10)).transpose().tocsr() is not x[:].reshape((100, 10)).transpose().tocsr() @@ -1704,12 +1520,6 @@ def test_caching(): x[:].reshape((100, 10)).transpose().tocsr() is x[:].reshape((100, 10)).transpose().tocsr() ) -======= - assert x[:].reshape((100, 10)).transpose().tocsr() is not x[:].reshape((100, 10)).transpose().tocsr() - - x = COO({(9, 9, 9): 1}, cache=True) - assert x[:].reshape((100, 10)).transpose().tocsr() is x[:].reshape((100, 10)).transpose().tocsr() ->>>>>>> Fix up tests and code. x = COO({(1, 1, 1, 1, 1, 1, 1, 2): 1}, cache=True) diff --git a/sparse/tests/test_dot.py b/sparse/tests/test_dot.py new file mode 100644 index 00000000..3069e31a --- /dev/null +++ b/sparse/tests/test_dot.py @@ -0,0 +1,240 @@ +import numpy as np +import pytest +import scipy.sparse +import scipy.stats + +import sparse +from sparse import COO + + +@pytest.mark.parametrize( + "a_shape,b_shape,axes", + [ + [(3, 4), (4, 3), (1, 0)], + [(3, 4), (4, 3), (0, 1)], + [(3, 4, 5), (4, 3), (1, 0)], + [(3, 4), (5, 4, 3), (1, 1)], + [(3, 4), (5, 4, 3), ((0, 1), (2, 1))], + [(3, 4), (5, 4, 3), ((1, 0), (1, 2))], + [(3, 4, 5), (4,), (1, 0)], + [(4,), (3, 4, 5), (0, 1)], + [(4,), (4,), (0, 0)], + [(4,), (4,), 0], + ], +) +@pytest.mark.parametrize( + "a_format, b_format", + [("coo", "coo"), ("coo", "gcxs"), ("gcxs", "coo"), ("gcxs", "gcxs")], +) +def test_tensordot(a_shape, b_shape, axes, a_format, b_format): + sa = sparse.random(a_shape, density=0.5, format=a_format) + sb = sparse.random(b_shape, density=0.5, format=b_format) + + a = sa.todense() + b = sb.todense() + + a_b = np.tensordot(a, b, axes) + + # tests for return_type=None + sa_sb = sparse.tensordot(sa, sb, axes) + sa_b = sparse.tensordot(sa, b, axes) + a_sb = sparse.tensordot(a, sb, axes) + + assert_eq(a_b, sa_sb) + assert_eq(a_b, sa_b) + assert_eq(a_b, a_sb) + if all(isinstance(arr, COO) for arr in [sa, sb]): + assert isinstance(sa_sb, COO) + else: + assert isinstance(sa_sb, GCXS) + assert isinstance(sa_b, np.ndarray) + assert isinstance(a_sb, np.ndarray) + + # tests for return_type=COO + sa_b = sparse.tensordot(sa, b, axes, return_type=COO) + a_sb = sparse.tensordot(a, sb, axes, return_type=COO) + + assert_eq(a_b, sa_b) + assert_eq(a_b, a_sb) + assert isinstance(sa_b, COO) + assert isinstance(a_sb, COO) + + # tests form return_type=GCXS + sa_b = sparse.tensordot(sa, b, axes, return_type=GCXS) + a_sb = sparse.tensordot(a, sb, axes, return_type=GCXS) + + assert_eq(a_b, sa_b) + assert_eq(a_b, a_sb) + assert isinstance(sa_b, GCXS) + assert isinstance(a_sb, GCXS) + + # tests for return_type=np.ndarray + sa_sb = sparse.tensordot(sa, sb, axes, return_type=np.ndarray) + + assert_eq(a_b, sa_sb) + assert isinstance(sa_sb, np.ndarray) + + +def test_tensordot_empty(): + x1 = np.empty((0, 0, 0)) + x2 = np.empty((0, 0, 0)) + s1 = sparse.COO.from_numpy(x1) + s2 = sparse.COO.from_numpy(x2) + + assert_eq(np.tensordot(x1, x2), sparse.tensordot(s1, s2)) + + +def test_tensordot_valueerror(): + x1 = sparse.COO(np.array(1)) + x2 = sparse.COO(np.array(1)) + + with pytest.raises(ValueError): + x1 @ x2 + + +@pytest.mark.parametrize( + "a_shape, b_shape", + [ + ((3, 1, 6, 5), (2, 1, 4, 5, 6)), + ((2, 1, 4, 5, 6), (3, 1, 6, 5)), + ((1, 1, 5), (3, 5, 6)), + ((3, 4, 5), (1, 5, 6)), + ((3, 4, 5), (3, 5, 6)), + ((3, 4, 5), (5, 6)), + ((4, 5), (5, 6)), + ((5,), (5, 6)), + ((4, 5), (5,)), + ((5,), (5,)), + ((3, 4), (1, 2, 4, 3)), + ], +) +@pytest.mark.parametrize( + "a_format, b_format", + [("coo", "coo"), ("coo", "gcxs"), ("gcxs", "coo"), ("gcxs", "gcxs")], +) +@pytest.mark.parametrize( + "a_comp_axes, b_comp_axes", [([0], [0]), ([0], [1]), ([1], [0]), ([1], [1])] +) +def test_matmul(a_shape, b_shape, a_format, b_format, a_comp_axes, b_comp_axes): + if a_format == "coo": + a_comp_axes = None + if b_format == "coo": + b_comp_axes = None + sa = sparse.random( + a_shape, density=0.5, format=a_format, compressed_axes=a_comp_axes + ) + sb = sparse.random( + b_shape, density=0.5, format=b_format, compressed_axes=b_comp_axes + ) + + a = sa.todense() + b = sb.todense() + + assert_eq(np.matmul(a, b), sparse.matmul(sa, sb)) + assert_eq(sparse.matmul(sa, b), sparse.matmul(a, sb)) + assert_eq(np.matmul(a, b), sparse.matmul(sa, sb)) + + if a.ndim == 2 or b.ndim == 2: + assert_eq( + np.matmul(a, b), + sparse.matmul( + scipy.sparse.coo_matrix(a) if a.ndim == 2 else sa, + scipy.sparse.coo_matrix(b) if b.ndim == 2 else sb, + ), + ) + + if hasattr(operator, "matmul"): + assert_eq(operator.matmul(a, b), operator.matmul(sa, sb)) + + +def test_matmul_errors(): + with pytest.raises(ValueError): + sa = sparse.random((3, 4, 5, 6), 0.5) + sb = sparse.random((3, 6, 5, 6), 0.5) + sparse.matmul(sa, sb) + + +@pytest.mark.parametrize( + "a_shape, b_shape", + [ + ((1, 4, 5), (3, 5, 6)), + ((3, 4, 5), (1, 5, 6)), + ((3, 4, 5), (3, 5, 6)), + ((3, 4, 5), (5, 6)), + ((4, 5), (5, 6)), + ((5,), (5, 6)), + ((4, 5), (5,)), + ((5,), (5,)), + ], +) +@pytest.mark.parametrize( + "a_format, b_format", + [("coo", "coo"), ("coo", "gcxs"), ("gcxs", "coo"), ("gcxs", "gcxs")], +) +@pytest.mark.parametrize( + "a_comp_axes, b_comp_axes", [([0], [0]), ([0], [1]), ([1], [0]), ([1], [1])] +) +def test_dot(a_shape, b_shape, a_format, b_format, a_comp_axes, b_comp_axes): + if a_format == "coo": + a_comp_axes = None + if b_format == "coo": + b_comp_axes = None + sa = sparse.random( + a_shape, density=0.5, format=a_format, compressed_axes=a_comp_axes + ) + sb = sparse.random( + b_shape, density=0.5, format=b_format, compressed_axes=b_comp_axes + ) + + a = sa.todense() + b = sb.todense() + + assert_eq(a.dot(b), sa.dot(sb)) + assert_eq(np.dot(a, b), sparse.dot(sa, sb)) + assert_eq(sparse.dot(sa, b), sparse.dot(a, sb)) + assert_eq(np.dot(a, b), sparse.dot(sa, sb)) + + if hasattr(operator, "matmul"): + # Basic equivalences + assert_eq(operator.matmul(a, b), operator.matmul(sa, sb)) + # Test that SOO's and np.array's combine correctly + # Not possible due to https://github.com/numpy/numpy/issues/9028 + # assert_eq(eval("a @ sb"), eval("sa @ b")) + + +@pytest.mark.parametrize( + "a_dense, b_dense, o_type", + [ + (False, False, sparse.SparseArray), + (False, True, np.ndarray), + (True, False, np.ndarray), + ], +) +def test_dot_type(a_dense, b_dense, o_type): + a = sparse.random((3, 4), density=0.8) + b = sparse.random((4, 5), density=0.8) + + if a_dense: + a = a.todense() + + if b_dense: + b = b.todense() + + assert isinstance(sparse.dot(a, b), o_type) + + +@pytest.mark.xfail +def test_dot_nocoercion(): + sa = sparse.random((3, 4, 5), density=0.5) + sb = sparse.random((5, 6), density=0.5) + + a = sa.todense() + b = sb.todense() + + la = a.tolist() + lb = b.tolist() + + if hasattr(operator, "matmul"): + # Operations with naive collection (list) + assert_eq(operator.matmul(la, b), operator.matmul(la, sb)) + assert_eq(operator.matmul(a, lb), operator.matmul(sa, lb)) From 6a56a179be31c474467ca41b7fbf8eeeb072a00c Mon Sep 17 00:00:00 2001 From: daletovar Date: Thu, 30 Jul 2020 15:24:15 -0700 Subject: [PATCH 52/72] add io for gcxs --- sparse/_io.py | 22 +++++++++++++++++++++- sparse/tests/test_io.py | 5 +++-- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/sparse/_io.py b/sparse/_io.py index 1bedcc7a..f54dd27a 100644 --- a/sparse/_io.py +++ b/sparse/_io.py @@ -1,6 +1,7 @@ import numpy as np from ._coo.core import COO +from ._compressed import GCXS def save_npz(filename, matrix, compressed=True): @@ -49,11 +50,17 @@ def save_npz(filename, matrix, compressed=True): nodes = { "data": matrix.data, - "coords": matrix.coords, "shape": matrix.shape, "fill_value": matrix.fill_value, } + if type(matrix) == COO: + nodes["coords"] = matrix.coords + elif type(matrix) == GCXS: + nodes["indices"] = matrix.indices + nodes["indptr"] = matrix.indptr + nodes["compressed_axes"] = matrix.compressed_axes + if compressed: np.savez_compressed(filename, **nodes) else: @@ -104,6 +111,19 @@ def load_npz(filename): has_duplicates=False, fill_value=fill_value, ) + except: + pass + try: + data = fp["data"] + indices = fp["indices"] + indptr = fp["indptr"] + comp_axes = fp["compressed_axes"] + shape = tuple(fp["shape"]) + fill_value = fp["fill_value"][()] + return GCXS((data, indices, indptr), + shape=shape, + fill_value=fill_value, + compressed_axes=comp_axes) except KeyError: raise RuntimeError( "The file {!s} does not contain a valid sparse matrix".format(filename) diff --git a/sparse/tests/test_io.py b/sparse/tests/test_io.py index b808eba7..c1930648 100644 --- a/sparse/tests/test_io.py +++ b/sparse/tests/test_io.py @@ -11,8 +11,9 @@ @pytest.mark.parametrize("compression", [True, False]) -def test_save_load_npz_file(compression): - x = sparse.random((2, 3, 4, 5), density=0.25) +@pytest.mark.parametrize("format", ["coo", "gcxs"]) +def test_save_load_npz_file(compression, format): + x = sparse.random((2, 3, 4, 5), density=0.25, format=format) y = x.todense() dir_name = tempfile.mkdtemp() From c21fd07355bc519f8b648a80257f4772cf75bba4 Mon Sep 17 00:00:00 2001 From: daletovar Date: Thu, 30 Jul 2020 15:26:35 -0700 Subject: [PATCH 53/72] add docs, dot, flatten --- sparse/_compressed/compressed.py | 150 +++++++++++++++++++++++++++---- sparse/_coo/__init__.py | 6 -- 2 files changed, 134 insertions(+), 22 deletions(-) diff --git a/sparse/_compressed/compressed.py b/sparse/_compressed/compressed.py index 77bceeb2..25be8c06 100644 --- a/sparse/_compressed/compressed.py +++ b/sparse/_compressed/compressed.py @@ -7,6 +7,7 @@ from .._sparse_array import SparseArray from .._coo.common import linear_loc +from .._common import dot from .._utils import normalize_axis, check_zero_fill_value, check_compressed_axes from .._coo.core import COO from .convert import uncompress_dimension, _transpose, _1d_reshape @@ -42,11 +43,15 @@ def _from_coo(x, compressed_axes=None): compressed_shape = (row_size, col_size) shape = x.shape - x = x.transpose(axis_order) - linear = linear_loc(x.coords, reordered_shape) + # transpose axes, linearize, reshape, and compress + linear = linear_loc(x.coords[axis_order], reordered_shape) order = np.argsort(linear) - # linearizing twice is unnecessary, fix needed - coords = x.reshape((compressed_shape)).coords + linear = linear[order] + coords = np.empty((2, x.nnz), dtype=np.intp) + strides = 1 + for i, d in enumerate(compressed_shape[::-1]): + coords[-(i + 1), :] = (linear // strides) % d + strides *= d indptr = np.empty(row_size + 1, dtype=np.intp) indptr[0] = 0 np.cumsum(np.bincount(coords[0], minlength=row_size), out=indptr[1:]) @@ -56,6 +61,44 @@ def _from_coo(x, compressed_axes=None): class GCXS(SparseArray, NDArrayOperatorsMixin): + """ + A sparse multidimensional array. + + This is stored in GCXS format, a generalization of the GCRS/GCCS formats + from 'Efficient storage scheme for n-dimensional sparse array: GCRS/GCCS': + https://ieeexplore.ieee.org/document/7237032. GCXS generalizes the csr/csc + sparse matrix formats. For arrays with ndim == 2, GCXS is the same csr/csc. + For arrays with ndim >2, any combination of axes can be compressed, + significantly reducing storage. + + + Parameters + ---------- + arg : tuple (data, indices, indptr) + A tuple of arrays holding the data, indices, and + index pointers for the nonzero values of the array. + shape : tuple[int] (COO.ndim,) + The shape of the array. + compressed_axes : Iterable[int] + The axes to compress. + fill_value: scalar, optional + The fill value for this array. + + Attributes + ---------- + data : numpy.ndarray (nnz,) + An array holding the nonzero values corresponding to :obj:`GCXS.indices`. + indices : numpy.ndarray (nnz,) + An array holding the coordinates of every nonzero element along uncompressed dimensions. + indptr : numpy.ndarray + An array holding the cumulative sums of the nonzeros along the compressed dimensions. + shape : tuple[int] (ndim,) + The dimensions of this array. + + See Also + -------- + DOK : A mostly write-only sparse array. + """ __array_priority__ = 12 @@ -78,8 +121,14 @@ def __init__(self, arg, shape=None, compressed_axes=None, fill_value=0): compressed_axes = None self.data, self.indices, self.indptr = arg + + if self.data.ndim != 1: + raise ValueError("data must be a scalar or 1-dimensional.") + self.shape = shape - self.compressed_axes = compressed_axes + self.compressed_axes = ( + tuple(compressed_axes) if isinstance(compressed_axes, Iterable) else None + ) self.fill_value = fill_value @classmethod @@ -193,7 +242,7 @@ def _reordered_shape(self): @property def T(self): - return self.tranpose() + return self.transpose() def __str__(self): return "".format( @@ -222,6 +271,9 @@ def change_compressed_axes(self, new_compressed_axes): for i in range(len(new_compressed_axes)) ) + if new_compressed_axes == self.compressed_axes: + return self + if len(new_compressed_axes) >= len(self.shape): raise ValueError("cannot compress all axes") if len(set(new_compressed_axes)) != len(new_compressed_axes): @@ -292,11 +344,11 @@ def todense(self): """ if self.compressed_axes is None: out = np.full(self.shape, self.fill_value, self.dtype) - if self.indices != (): + if len(self.indices) != 0: out[self.indices] = self.data else: if len(self.data) != 0: - out[self.indices] = self.data + out[()] = self.data[0] return out return self.tocoo().todense() @@ -360,7 +412,7 @@ def asformat(self, format, compressed_axes=None): return self.todok() elif format == "gcxs": if compressed_axes is None: - compressed_axes = self.compressed_axess + compressed_axes = self.compressed_axes return self.change_compressed_axes(compressed_axes) raise NotImplementedError("The given format is not supported.") @@ -396,6 +448,25 @@ def maybe_densify(self, max_size=1000, min_density=0.25): "Operation would require converting " "large sparse array to dense" ) + def flatten(self, order="C"): + """ + Returns a new :obj:`GCXS` array that is a flattened version of this array. + + Returns + ------- + GCXS + The flattened output array. + + Notes + ----- + The :code:`order` parameter is provided just for compatibility with + Numpy and isn't actually supported. + """ + if order not in {"C", None}: + raise NotImplementedError("The `order` parameter is not" "supported.") + + return self.reshape(-1) + def reshape(self, shape, order="C", compressed_axes=None): """ Returns a new :obj:`GCXS` array that is a reshaped version of this array. @@ -420,7 +491,10 @@ def reshape(self, shape, order="C", compressed_axes=None): Numpy and isn't actually supported. """ - + if isinstance(shape, Iterable): + shape = tuple(shape) + else: + shape = (shape,) if order not in {"C", None}: raise NotImplementedError("The 'order' parameter is not supported") if any(d == -1 for d in shape): @@ -430,6 +504,13 @@ def reshape(self, shape, order="C", compressed_axes=None): if self.shape == shape: return self + if self.size != reduce(mul, shape, 1): + raise ValueError( + "cannot reshape array of size {} into shape {}".format(self.size, shape) + ) + if len(shape) == 0: + return self.tocoo().reshape(shape).asformat("gcxs") + if compressed_axes is None: if len(shape) == self.ndim: compressed_axes = self.compressed_axes @@ -438,18 +519,13 @@ def reshape(self, shape, order="C", compressed_axes=None): else: compressed_axes = (np.argmin(shape),) - if self.size != reduce(mul, shape, 1): - raise ValueError( - "cannot reshape array of size {} into shape {}".format(self.size, shape) - ) - if self.ndim == 1: arg = _1d_reshape(self, shape, compressed_axes) else: arg = _transpose(self, shape, np.arange(self.ndim), compressed_axes) return GCXS( arg, - shape=shape, + shape=tuple(shape), compressed_axes=compressed_axes, fill_value=self.fill_value, ) @@ -555,6 +631,20 @@ def transpose(self, axes=None, compressed_axes=None): ) def _2d_transpose(self): + """ + A function for performing constant-time transposes on 2d GCXS arrays. + + Returns + ------- + GCXS + The new transposed array with the opposite compressed axes as the input. + + See Also + -------- + scipy.sparse.csr_matrix.tocsc : Scipy equivalent function. + scipy.sparse.csc_matrix.tocsr : Scipy equivalent function. + numpy.ndarray.transpose : Numpy equivalent function. + """ if self.ndim != 2: raise ValueError( "cannot perform 2d transpose on array with dimension {}".format( @@ -570,3 +660,31 @@ def _2d_transpose(self): compressed_axes=compressed_axes, fill_value=self.fill_value, ) + + def dot(self, other): + """ + Performs the equivalent of :code:`x.dot(y)` for :obj:`GCXS`. + + Parameters + ---------- + other : Union[GCXS, COO, numpy.ndarray, scipy.sparse.spmatrix] + The second operand of the dot product operation. + + Returns + ------- + {GCXS, numpy.ndarray} + The result of the dot product. If the result turns out to be dense, + then a dense array is returned, otherwise, a sparse array. + + Raises + ------ + ValueError + If all arguments don't have zero fill-values. + + See Also + -------- + dot : Equivalent function for two arguments. + :obj:`numpy.dot` : Numpy equivalent function. + scipy.sparse.csr_matrix.dot : Scipy equivalent function. + """ + return dot(self, other) diff --git a/sparse/_coo/__init__.py b/sparse/_coo/__init__.py index 4255e05a..48385774 100644 --- a/sparse/_coo/__init__.py +++ b/sparse/_coo/__init__.py @@ -1,9 +1,6 @@ from .core import COO, as_coo from .umath import elemwise from .common import ( - tensordot, - dot, - matmul, concatenate, clip, stack, @@ -30,9 +27,6 @@ "COO", "as_coo", "elemwise", - "tensordot", - "dot", - "matmul", "concatenate", "clip", "stack", From 4b11dfac74abcc528c5899dc1a527bdd4ce6bf6f Mon Sep 17 00:00:00 2001 From: daletovar Date: Thu, 30 Jul 2020 15:35:13 -0700 Subject: [PATCH 54/72] add matmul --- sparse/_compressed/compressed.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/sparse/_compressed/compressed.py b/sparse/_compressed/compressed.py index 25be8c06..0df841d7 100644 --- a/sparse/_compressed/compressed.py +++ b/sparse/_compressed/compressed.py @@ -7,7 +7,7 @@ from .._sparse_array import SparseArray from .._coo.common import linear_loc -from .._common import dot +from .._common import dot, matmul from .._utils import normalize_axis, check_zero_fill_value, check_compressed_axes from .._coo.core import COO from .convert import uncompress_dimension, _transpose, _1d_reshape @@ -688,3 +688,15 @@ def dot(self, other): scipy.sparse.csr_matrix.dot : Scipy equivalent function. """ return dot(self, other) + + def __matmul__(self, other): + try: + return matmul(self, other) + except NotImplementedError: + return NotImplemented + + def __rmatmul__(self, other): + try: + return matmul(other, self) + except NotImplementedError: + return NotImplemented \ No newline at end of file From 2809752ec95a0244817ba67648ce32743de708ce Mon Sep 17 00:00:00 2001 From: daletovar Date: Mon, 3 Aug 2020 12:39:50 -0700 Subject: [PATCH 55/72] remove slow csr csc algorithm, formatting --- sparse/_common.py | 185 +++++-------------------------- sparse/_compressed/compressed.py | 2 +- sparse/_io.py | 8 +- 3 files changed, 31 insertions(+), 164 deletions(-) diff --git a/sparse/_common.py b/sparse/_common.py index a879a185..b10cabbe 100644 --- a/sparse/_common.py +++ b/sparse/_common.py @@ -277,59 +277,31 @@ def _dot(a, b, return_type=None): isinstance(arr, GCXS) for arr in [a, b] ): a = a.asformat("gcxs") - b = b.asformat("gcxs") + b = b.asformat("gcxs", compressed_axes=a.compressed_axes) if isinstance(a, GCXS) and isinstance(b, GCXS): - if a.compressed_axes == (0,): - compressed_axes = (0,) - if b.compressed_axes == (0,): - data, indices, indptr = _dot_csr_csr_type(a.dtype, b.dtype)( - out_shape, a.data, b.data, a.indices, b.indices, a.indptr, b.indptr - ) - else: - data, indices, indptr = _dot_csr_csc_type(a.dtype, b.dtype)( - out_shape, a.data, b.data, a.indices, b.indices, a.indptr, b.indptr - ) - elif a.compressed_axes == (1,): - if b.compressed_axes == (0,): - # convert the smaller matrix - if a.size >= b.size: - compressed_axes = (0,) - a = a.change_compressed_axes((0,)) - data, indices, indptr = _dot_csr_csr_type(a.dtype, b.dtype)( - out_shape, - a.data, - b.data, - a.indices, - b.indices, - a.indptr, - b.indptr, - ) - else: - compressed_axes = (1,) - b = b.change_compressed_axes((1,)) - data, indices, indptr = _dot_csr_csr_type(b.dtype, a.dtype)( - out_shape[::-1], - b.data, - a.data, - b.indices, - a.indices, - b.indptr, - a.indptr, - ) - else: - # a @ b = (b.T @ a.T).T - compressed_axes = (1,) - data, indices, indptr = _dot_csr_csr_type(b.dtype, a.dtype)( - out_shape[::-1], - b.data, - a.data, - b.indices, - a.indices, - b.indptr, - a.indptr, - ) + if a.nbytes > b.nbytes: + b = b.change_compressed_axes(a.compressed_axes) + else: + a = a.change_compressed_axes(b.compressed_axes) + if a.compressed_axes == (0,): # csr @ csr + compressed_axes = (0,) + data, indices, indptr = _dot_csr_csr_type(a.dtype, b.dtype)( + out_shape, a.data, b.data, a.indices, b.indices, a.indptr, b.indptr + ) + elif a.compressed_axes == (1,): # csc @ csc + # a @ b = (b.T @ a.T).T + compressed_axes = (1,) + data, indices, indptr = _dot_csr_csr_type(b.dtype, a.dtype)( + out_shape[::-1], + b.data, + a.data, + b.indices, + a.indices, + b.indptr, + a.indptr, + ) out = GCXS( (data, indices, indptr), shape=out_shape, compressed_axes=compressed_axes ) @@ -340,7 +312,7 @@ def _dot(a, b, return_type=None): return out if isinstance(a, GCXS) and isinstance(b, np.ndarray): - if a.compressed_axes == (0,): + if a.compressed_axes == (0,): # csr @ ndarray if return_type is None or return_type == np.ndarray: return _dot_csr_ndarray_type(a.dtype, b.dtype)( out_shape, a.data, a.indices, a.indptr, b @@ -352,7 +324,7 @@ def _dot(a, b, return_type=None): if return_type == COO: return out.tocoo() return out - if return_type is None or return_type == np.ndarray: + if return_type is None or return_type == np.ndarray: # csc @ ndarray return _dot_csc_ndarray_type(a.dtype, b.dtype)( a.shape, b.shape, a.data, a.indices, a.indptr, b ) @@ -363,9 +335,7 @@ def _dot(a, b, return_type=None): out = GCXS( (data, indices, indptr), shape=out_shape, compressed_axes=compressed_axes ) - if return_type == np.ndarray: - return out.todense() - elif return_type == COO: + if return_type == COO: return out.tocoo() return out @@ -487,46 +457,6 @@ def wrapped(*args): return wrapped -@numba.jit(nopython=True, nogil=True) -def _csr_csc_count_nnz(out_shape, indptr, a_indices, b_indices, a_indptr, b_indptr): - """ - A function for computing the number of nonzero values in the resulting - array from multiplying an array with compressed rows with an array - with compressed columns: (a @ b).nnz. - - Parameters - ---------- - out_shape : tuple - The shape of the output array. - - indptr : ndarray - The empty index pointer array for the output. - - a_indices, a_indptr : np.ndarray - The indices and index pointer array of ``a``. - - b_data, b_indices, b_indptr : np.ndarray - The indices and index pointer array of ``b``. - """ - nnz = 0 - for i in range(out_shape[0]): - cur_row = a_indices[a_indptr[i] : a_indptr[i + 1]] - for j in range(out_shape[1]): - cur_col = b_indices[b_indptr[j] : b_indptr[j + 1]] - a_next = 0 - b_next = 0 - while a_next < cur_row.size and b_next < cur_col.size: - if cur_row[a_next] < cur_col[b_next]: - a_next += 1 - elif cur_row[a_next] > cur_col[b_next]: - b_next += 1 - else: - nnz += 1 - break - indptr[i + 1] = nnz - return nnz - - @numba.jit(nopython=True, nogil=True) def _csr_csr_count_nnz(out_shape, a_indices, b_indices, a_indptr, b_indptr): """ @@ -630,71 +560,6 @@ def _csc_ndarray_count_nnz(a_shape, b_shape, indptr, a_indices, a_indptr, b): return nnz -@_memoize_dtype -def _dot_csr_csc_type(dt1, dt2): - dtr = np.result_type(dt1, dt2) - - @numba.jit( - nopython=True, - nogil=True, - locals={"data_curr": numba.np.numpy_support.from_dtype(dtr)}, - ) - def _dot_csr_csc( - out_shape, a_data, b_data, a_indices, b_indices, a_indptr, b_indptr - ): - """ - Utility function taking in two ``GCXS`` objects and calculating - their dot product: a @ b for a with compressed rows and b - with compressed columns. - - Parameters - ---------- - out_shape : tuple - The shape of the output array. - - a_data, a_indices, a_indptr : np.ndarray - The data, indices, and index pointer arrays of ``a``. - - b_data, b_indices, b_indptr : np.ndarray - The data, indices, and index pointer arrays of ``b``. - """ - indptr = np.empty(out_shape[0] + 1, dtype=np.intp) - indptr[0] = 0 - - # calculate nnz before multiplying so we can use static arrays - nnz = _csr_csc_count_nnz( - out_shape, indptr, a_indices, b_indices, a_indptr, b_indptr - ) - indices = np.empty(nnz, dtype=np.intp) - data = np.empty(nnz) - next_val = 0 - for i in range(out_shape[0]): - cur_row = a_indices[a_indptr[i] : a_indptr[i + 1]] - cur_a_data = a_data[a_indptr[i] : a_indptr[i + 1]] - for j in range(out_shape[1]): - cur_col = b_indices[b_indptr[j] : b_indptr[j + 1]] - cur_b_data = b_data[b_indptr[j] : b_indptr[j + 1]] - a_cur = 0 - b_cur = 0 - cur_val = 0 - while a_cur < cur_row.size and b_cur < cur_col.size: - if cur_row[a_cur] < cur_col[b_cur]: - a_cur += 1 - elif cur_row[a_cur] > cur_col[b_cur]: - b_cur += 1 - else: - cur_val += cur_a_data[a_cur] * cur_b_data[b_cur] - a_cur += 1 - b_cur += 1 - if cur_val != 0: - indices[next_val] = j - data[next_val] = cur_val - next_val += 1 - return data, indices, indptr - - return _dot_csr_csc - - @_memoize_dtype def _dot_csr_csr_type(dt1, dt2): dtr = np.result_type(dt1, dt2) diff --git a/sparse/_compressed/compressed.py b/sparse/_compressed/compressed.py index 0df841d7..faec1822 100644 --- a/sparse/_compressed/compressed.py +++ b/sparse/_compressed/compressed.py @@ -699,4 +699,4 @@ def __rmatmul__(self, other): try: return matmul(other, self) except NotImplementedError: - return NotImplemented \ No newline at end of file + return NotImplemented diff --git a/sparse/_io.py b/sparse/_io.py index f54dd27a..34231379 100644 --- a/sparse/_io.py +++ b/sparse/_io.py @@ -120,10 +120,12 @@ def load_npz(filename): comp_axes = fp["compressed_axes"] shape = tuple(fp["shape"]) fill_value = fp["fill_value"][()] - return GCXS((data, indices, indptr), - shape=shape, + return GCXS( + (data, indices, indptr), + shape=shape, fill_value=fill_value, - compressed_axes=comp_axes) + compressed_axes=comp_axes, + ) except KeyError: raise RuntimeError( "The file {!s} does not contain a valid sparse matrix".format(filename) From a5f64b9bd07e2c2aca3c3c994decd95c1b844877 Mon Sep 17 00:00:00 2001 From: Dale Tovar <37719258+daletovar@users.noreply.github.com> Date: Mon, 3 Aug 2020 13:04:06 -0700 Subject: [PATCH 56/72] Delete umath.py --- sparse/_compressed/umath.py | 377 ------------------------------------ 1 file changed, 377 deletions(-) delete mode 100644 sparse/_compressed/umath.py diff --git a/sparse/_compressed/umath.py b/sparse/_compressed/umath.py deleted file mode 100644 index 1f921c7f..00000000 --- a/sparse/_compressed/umath.py +++ /dev/null @@ -1,377 +0,0 @@ -from itertools import zip_longest -import numpy as np -import numba - - - - - -def diagonal(a, offset=0, axis1=0, axis2=1): - - if a.shape[axis1] != a.shape[axis2]: - raise ValueError("a.shape[axis1] != a.shape[axis2]") - - diag_axes = [ - axis for axis in range(len(a.shape)) if axis != axis1 and axis != axis2 - ] + [axis1] - diag_shape = [a.shape[axis] for axis in diag_axes] - diag_shape[-1] -= abs(offset) - - # convert to linearized coordinates - rows, cols = [], [] - operations = np.prod(diag_shape) - current_idx = np.zeros(a.shape) - current_idx[axis1] = offset - a1 = offset - axes = list(reversed(diag_axes[:-1])) - first_axis = axes[0] - for _ in range(operations): - if a1 == a.shape[axis1]: - a1 = offset - current_idx[axis1] = offset - current_idx[axis2] = 0 - current_idx[first_axis] +=1 - for i in range(len(axes-1)): - if current_idx[axes[i]] == a.shape[axes[i]]: - current_idx[axes[i]] = 0 - current_idx[axes[i+1]] += 1 - - ind = np.ravel_multi_index(current_idx, a.reordered_shape) - row, col = np.unravel_index(ind, a.compressed_shape) - rows.append(row) - cols.append(col) - a1 += 1 - current_idx[axis1] = a1 - current_idx[axis2] += 1 - - # search the diagonals - coords = [] - mask = [] - count = 0 - for r in rows: - current_row = a.indices[a.indptr[r:r+1]] - for c in cols: - s = np.searchsorted(current_row, c) - if not (s >= current_row.size or current_row[s] != col[c]): - s += a.indptr[r] - mask.append(s) - coords.append(count) - count += 1 - coords = np.array(coords) - return GCXS.from_coo(COO(coords[None,:],a.data[mask], fill_value=a.fill_value).reshape(diag_shape)) - - - -@numba.jit(nopython=True,nogil=True) -def _diagonal_idx(indices, indptr, axis1, axis2, offset): - - # convert from nd - linearized = np.ravel_multi_index() - -def matmul(a, b): - pass - -def dot(a, b): - pass - -def tensordot(a, b, axes=2): - pass - -def kron(a, b): - from .._coo.umath import _cartesian_product - - check_zero_fill_value(a, b) - - a_sparse = isinstance(a, (SparseArray, scipy.sparse.spmatrix)) - b_sparse = isinstance(b, (SparseArray, scipy.sparse.spmatrix)) - a_ndim = np.ndim(a) - b_ndim = np.ndim(b) - - if not (a_sparse or b_sparse): - raise ValueError( - "Performing this operation would produce a dense " "result: kron" - ) - - if a_ndim == 0 or b_ndim == 0: - return a * b - - a = asCOO(a, check=False) - b = asCOO(b, check=False) - - # Match dimensions - max_dim = max(a.ndim, b.ndim) - a = a.reshape((1,) * (max_dim - a.ndim) + a.shape) - b = b.reshape((1,) * (max_dim - b.ndim) + b.shape) - - a_idx, b_idx = _cartesian_product(np.arange(a.nnz), np.arange(b.nnz)) - - a_expanded_coords = a.coords[:, a_idx] - b_expanded_coords = b.coords[:, b_idx] - o_coords = a_expanded_coords * np.asarray(b.shape)[:, None] + b_expanded_coords - o_data = a.data[a_idx] * b.data[b_idx] - o_shape = tuple(i * j for i, j in zip(a.shape, b.shape)) - - return COO(o_coords, o_data, shape=o_shape, has_duplicates=False) - -def concatenate(arrays, axis=0, compressed_axes=(0,)): - - check_consistent_fill_value(arrays) - arrays = [arr if isinstance(arr, GCXS) else GCXS(arr) for arr in arrays] - axis = normalize_axis(axis, arrays[0].ndim) - nnz = 0 - dim = sum(x.shape[axis] for x in arrays) - shape = list(arrays[0].shape) - shape[axis] = dim - assert all( - x.shape[ax] == arrays[0].shape[ax] - for x in arrays - for ax in set(range(arrays[0].ndim)) - {axis} - ) - # arrays may have different compressed_axes - # flatten to have a better coordinate system - arrays = [arr.flatten() for arr in arrays] - indices = np.concatenate([arr.indices for arr in arrays]) - data = np.concatenate([arr.data for arr in arrays]) - - dim = 0 - for x in arrays: - if dim: - indices[nnz : x.nnz + nnz] += dim - dim += x.shape[axis] - nnz += x.nnz - - if axis != 0: - order = np.argsort(indices, kind='mergesort') - indices = indices[order] - data = data[order] - - return GCXS((data, indices, ()), - fill_value=arrays[0].fill_value).reshape(shape, - compressed_axes=compressed_axes) - -def stack(arrays, axis=0): - - from .compressed import GCXS - check_consistent_fill_value(arrays) - arrays = [arr if isinstance(arr, GCXS) else GCXS(arr) for arr in arrays] - axis = normalize_axis(axis, arrays[0].ndim) - nnz = 0 - shape = list(arrays[0].shape) - shape.insert(len(arrays), axis) - assert all( - x.shape[ax] == arrays[0].shape[ax] - for x in arrays - for ax in set(range(arrays[0].ndim)) - {axis} - ) - # arrays may have different compressed_axes - # flatten to have a better coordinate system - arrays = [arr.flatten() for arr in arrays] - indices = np.concatenate([arr.indices for arr in arrays]) - data = np.concatenate([arr.data for arr in arrays]) - - dim = 0 - for x in arrays: - if dim: - indices[nnz : x.nnz + nnz] += dim - dim += x.shape[axis] - nnz += x.nnz - - if axis != 0: - order = np.argsort(indices, kind='mergesort') - indices = indices[order] - data = data[order] - - return GCXS((data, indices, ()), - fill_value=arrays[0].fill_value).reshape(shape, - compressed_axes=compressed_axes) - -def where(condition, x=None, y=None): - pass - -def eye(N, M=None, k=0, dtype=float, compressed_axis=0): - - if M is None: - M = N - - N = int(N) - M = int(M) - k = int(k) - - data_length = min(N, M) - if k > 0: - data_length = max(min(data_length, M - k), 0) - n_coords = np.arange(data_length, dtype=np.intp) - m_coords = n_coords + k - elif k < 0: - data_length = max(min(data_length, N + k), 0) - m_coords = np.arange(data_length, dtype=np.intp) - n_coords = m_coords - k - else: - n_coords = m_coords = np.arange(data_length, dtype=np.intp) - - if compressed_axis==0: - indptr = np.empty(N, dtype=np.intp) - indptr[0] = 0 - np.cumsum(np.bincount(n_coords, minlength=N), out=indptr[1:]) - indices = m_coords - else: - indptr = np.empty(M, dtype=np.intp) - indptr[0] = 0 - np.cumsum(np.bincount(m_coords, minlength=M), out=indptr[1:]) - indices = n_coords - - data = np.array(1, dtype=dtype) - return GCXS((data,indices,indptr), - compressed_axes=(compressed_axis,), - dtype=dtype, - fill_value=0) - -def full(shape, fill_value, dtype=None): - """Return a GCXS array of given shape and type, filled with `fill_value`. - Parameters - ---------- - shape : int or tuple of ints - Shape of the new array, e.g., ``(2, 3)`` or ``2``. - fill_value : scalar - Fill value. - dtype : data-type, optional - The desired data-type for the array. The default, `None`, means - `np.array(fill_value).dtype`. - Returns - ------- - out : COO - Array of `fill_value` with the given shape and dtype. - Examples - -------- - >>> full(5, 9).todense() # doctest: +NORMALIZE_WHITESPACE - array([9, 9, 9, 9, 9]) - >>> full((2, 2), 9, dtype=float).todense() # doctest: +SKIP - array([[9., 9.], - [9., 9.]]) - """ - - if dtype is None: - dtype = np.array(fill_value).dtype - if not isinstance(shape, tuple): - shape = (shape,) - data = np.empty(0, dtype=dtype) - indices = np.empty((0, 0), dtype=np.intp) - indptr = np.empty((0, 0), dtype=np.intp) - return GCXS( - (data, - indices, - indptr), - shape=shape, - fill_value=fill_value, - ) - -def full_like(a, fill_value, dtype=None): - """Return a full array with the same shape and type as a given array. - Parameters - ---------- - a : array_like - The shape and data-type of the result will match those of `a`. - dtype : data-type, optional - Overrides the data type of the result. - Returns - ------- - out : COO - Array of `fill_value` with the same shape and type as `a`. - Examples - -------- - >>> x = np.ones((2, 3), dtype='i8') - >>> full_like(x, 9.0).todense() # doctest: +NORMALIZE_WHITESPACE - array([[9, 9, 9], - [9, 9, 9]]) - """ - return full(a.shape, fill_value, dtype=(a.dtype if dtype is None else dtype)) - -def zeros(shape, dtype=float): - """Return a COO array of given shape and type, filled with zeros. - Parameters - ---------- - shape : int or tuple of ints - Shape of the new array, e.g., ``(2, 3)`` or ``2``. - dtype : data-type, optional - The desired data-type for the array, e.g., `numpy.int8`. Default is - `numpy.float64`. - Returns - ------- - out : COO - Array of zeros with the given shape and dtype. - Examples - -------- - >>> zeros(5).todense() # doctest: +SKIP - array([0., 0., 0., 0., 0.]) - >>> zeros((2, 2), dtype=int).todense() # doctest: +NORMALIZE_WHITESPACE - array([[0, 0], - [0, 0]]) - """ - return full(shape, 0, np.dtype(dtype)) - -def zeros_like(a, dtype=float): - """Return a COO array of zeros with the same shape and type as ``a``. - Parameters - ---------- - a : array_like - The shape and data-type of the result will match those of `a`. - dtype : data-type, optional - Overrides the data type of the result. - Returns - ------- - out : COO - Array of zeros with the same shape and type as `a`. - Examples - -------- - >>> x = np.ones((2, 3), dtype='i8') - >>> zeros_like(x).todense() # doctest: +NORMALIZE_WHITESPACE - array([[0, 0, 0], - [0, 0, 0]]) - """ - return zeros(a.shape, dtype=(a.dtype if dtype is None else dtype)) - - -def ones(shape, dtype=float): - """Return a COO array of given shape and type, filled with ones. - Parameters - ---------- - shape : int or tuple of ints - Shape of the new array, e.g., ``(2, 3)`` or ``2``. - dtype : data-type, optional - The desired data-type for the array, e.g., `numpy.int8`. Default is - `numpy.float64`. - Returns - ------- - out : COO - Array of ones with the given shape and dtype. - Examples - -------- - >>> ones(5).todense() # doctest: +SKIP - array([1., 1., 1., 1., 1.]) - >>> ones((2, 2), dtype=int).todense() # doctest: +NORMALIZE_WHITESPACE - array([[1, 1], - [1, 1]]) - """ - return full(shape, 1, np.dtype(dtype)) - -def ones_like(a, dtype=None): - """Return a COO array of ones with the same shape and type as ``a``. - Parameters - ---------- - a : array_like - The shape and data-type of the result will match those of `a`. - dtype : data-type, optional - Overrides the data type of the result. - Returns - ------- - out : COO - Array of ones with the same shape and type as `a`. - Examples - -------- - >>> x = np.ones((2, 3), dtype='i8') - >>> ones_like(x).todense() # doctest: +NORMALIZE_WHITESPACE - array([[1, 1, 1], - [1, 1, 1]]) - """ - return ones(a.shape, dtype=(a.dtype if dtype is None else dtype)) - \ No newline at end of file From 5021b84c4fc5dcfbe6b147298d7b788588ce6719 Mon Sep 17 00:00:00 2001 From: daletovar Date: Mon, 3 Aug 2020 13:05:37 -0700 Subject: [PATCH 57/72] fix formatting --- sparse/_compressed/indexing.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/sparse/_compressed/indexing.py b/sparse/_compressed/indexing.py index cdd1eeed..02db7f4c 100644 --- a/sparse/_compressed/indexing.py +++ b/sparse/_compressed/indexing.py @@ -165,7 +165,6 @@ def getitem(x, key): compressed_axes = None return GCXS( -<<<<<<< HEAD arg, shape=shape, compressed_axes=compressed_axes, fill_value=x.fill_value ) @@ -233,12 +232,6 @@ def get_slicing_selection( indices = np.array(indices) data = arr_data[ind_list] return (data, indices, indptr) -======= - arg, - shape=shape, - compressed_axes=compressed_axes, - fill_value=x.fill_value) ->>>>>>> change gxcs with gcxs @numba.jit(nopython=True, nogil=True) From fc5ed605652115d63233fe1ca97225f500389f0e Mon Sep 17 00:00:00 2001 From: daletovar Date: Mon, 3 Aug 2020 14:08:27 -0700 Subject: [PATCH 58/72] add import --- sparse/tests/test_dot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sparse/tests/test_dot.py b/sparse/tests/test_dot.py index 3069e31a..3df9267c 100644 --- a/sparse/tests/test_dot.py +++ b/sparse/tests/test_dot.py @@ -5,7 +5,7 @@ import sparse from sparse import COO - +from .._utils import assert_eq @pytest.mark.parametrize( "a_shape,b_shape,axes", From 2dc550bbe2322f4c2a702ed5d1a7f7e4b1d5ffd7 Mon Sep 17 00:00:00 2001 From: daletovar Date: Mon, 3 Aug 2020 14:15:37 -0700 Subject: [PATCH 59/72] fix import --- sparse/tests/test_dot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sparse/tests/test_dot.py b/sparse/tests/test_dot.py index 3df9267c..37442b3e 100644 --- a/sparse/tests/test_dot.py +++ b/sparse/tests/test_dot.py @@ -5,7 +5,7 @@ import sparse from sparse import COO -from .._utils import assert_eq +from sparse._utils import assert_eq @pytest.mark.parametrize( "a_shape,b_shape,axes", From 11c8b6f2e04a5f22f0ac1a99a35fe9d239ec8d39 Mon Sep 17 00:00:00 2001 From: daletovar Date: Mon, 3 Aug 2020 14:47:44 -0700 Subject: [PATCH 60/72] add return for COO and add import --- sparse/_common.py | 1 + sparse/tests/test_dot.py | 1 + 2 files changed, 2 insertions(+) diff --git a/sparse/_common.py b/sparse/_common.py index b10cabbe..0c1dca1c 100644 --- a/sparse/_common.py +++ b/sparse/_common.py @@ -402,6 +402,7 @@ def _dot(a, b, return_type=None): out = COO(coords, data, shape=out_shape, has_duplicates=False, sorted=True) if return_type == GCXS: return out.asformat("gcxs") + return out if isinstance(a, np.ndarray) and isinstance(b, COO): b = b.T diff --git a/sparse/tests/test_dot.py b/sparse/tests/test_dot.py index 37442b3e..ccc9027d 100644 --- a/sparse/tests/test_dot.py +++ b/sparse/tests/test_dot.py @@ -4,6 +4,7 @@ import scipy.stats import sparse +from sparse._compressed import GCXS from sparse import COO from sparse._utils import assert_eq From f4be9ae89d6bdb0311e843c03aa6c361bc508f1f Mon Sep 17 00:00:00 2001 From: daletovar Date: Mon, 3 Aug 2020 15:21:02 -0700 Subject: [PATCH 61/72] fix dtypes, add import --- sparse/_common.py | 12 ++++++------ sparse/tests/test_dot.py | 1 + 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/sparse/_common.py b/sparse/_common.py index 0c1dca1c..b90489ad 100644 --- a/sparse/_common.py +++ b/sparse/_common.py @@ -598,7 +598,7 @@ def _dot_csr_csr( indptr = np.empty(n_row + 1, dtype=np.intp) indptr[0] = 0 indices = np.empty(nnz, dtype=np.intp) - data = np.empty(nnz) + data = np.empty(nnz, dtype=dtr) next_ = np.full(n_col, -1) sums = np.zeros(n_col) nnz = 0 @@ -670,7 +670,7 @@ def _dot_csr_ndarray(out_shape, a_data, a_indices, a_indptr, b): out_shape : Tuple[int] The shape of the output array. """ - out = np.empty(out_shape) + out = np.empty(out_shape, dtype=dtr) for i in range(out_shape[0]): for j in range(out_shape[1]): val = 0 @@ -714,7 +714,7 @@ def _dot_csr_ndarray_sparse(out_shape, a_data, a_indices, a_indptr, b): indptr[0] = 0 nnz = _csr_ndarray_count_nnz(out_shape, indptr, a_indices, a_indptr, b) indices = np.empty(nnz, dtype=np.intp) - data = np.empty(nnz, dtype=a_data.dtype) + data = np.empty(nnz, dtype=dtr) current = 0 for i in range(out_shape[0]): for j in range(out_shape[1]): @@ -764,7 +764,7 @@ def _dot_csc_ndarray_sparse(a_shape, b_shape, a_data, a_indices, a_indptr, b): indptr = np.empty(b_shape[1] + 1, dtype=np.intp) nnz = _csc_ndarray_count_nnz(a_shape, b_shape, indptr, a_indices, a_indptr, b) indices = np.empty(nnz, dtype=np.intp) - data = np.empty(nnz, dtype=a_data.dtype) + data = np.empty(nnz, dtype=dtr) sums = np.zeros(a_shape[0]) mask = np.full(a_shape[0], -1) nnz = 0 @@ -829,7 +829,7 @@ def _dot_csc_ndarray(a_shape, b_shape, a_data, a_indices, a_indptr, b): a_shape, b_shape : Tuple[int] The shapes of the input arrays. """ - out = np.zeros((a_shape[0], b_shape[1])) + out = np.zeros((a_shape[0], b_shape[1]), dtype=dtr) for j in range(b_shape[1]): for i in range(b_shape[0]): for k in range(a_indptr[i], a_indptr[i + 1]): @@ -864,7 +864,7 @@ def _dot_ndarray_csc(out_shape, b_data, b_indices, b_indptr, a): out_shape : Tuple[int] The shape of the output array. """ - out = np.empty(out_shape) + out = np.empty(out_shape, dtype=dtr) for i in range(out_shape[0]): for j in range(out_shape[1]): total = 0 diff --git a/sparse/tests/test_dot.py b/sparse/tests/test_dot.py index ccc9027d..ddc6525c 100644 --- a/sparse/tests/test_dot.py +++ b/sparse/tests/test_dot.py @@ -3,6 +3,7 @@ import scipy.sparse import scipy.stats +import operator import sparse from sparse._compressed import GCXS from sparse import COO From a47abbed713758df3d73b68a0f5459e3e7823489 Mon Sep 17 00:00:00 2001 From: daletovar Date: Mon, 3 Aug 2020 15:38:33 -0700 Subject: [PATCH 62/72] fix test --- sparse/tests/test_dot.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sparse/tests/test_dot.py b/sparse/tests/test_dot.py index ddc6525c..46856250 100644 --- a/sparse/tests/test_dot.py +++ b/sparse/tests/test_dot.py @@ -118,9 +118,9 @@ def test_tensordot_valueerror(): "a_comp_axes, b_comp_axes", [([0], [0]), ([0], [1]), ([1], [0]), ([1], [1])] ) def test_matmul(a_shape, b_shape, a_format, b_format, a_comp_axes, b_comp_axes): - if a_format == "coo": + if a_format == "coo" or len(a_shape) == 1: a_comp_axes = None - if b_format == "coo": + if b_format == "coo" or len(b_shape) == 1: b_comp_axes = None sa = sparse.random( a_shape, density=0.5, format=a_format, compressed_axes=a_comp_axes @@ -177,9 +177,9 @@ def test_matmul_errors(): "a_comp_axes, b_comp_axes", [([0], [0]), ([0], [1]), ([1], [0]), ([1], [1])] ) def test_dot(a_shape, b_shape, a_format, b_format, a_comp_axes, b_comp_axes): - if a_format == "coo": + if a_format == "coo" or len(a_shape) == 1: a_comp_axes = None - if b_format == "coo": + if b_format == "coo" or len(b_shape) == 1: b_comp_axes = None sa = sparse.random( a_shape, density=0.5, format=a_format, compressed_axes=a_comp_axes From abc32b394dd37a68ff6714f9886ef944674290eb Mon Sep 17 00:00:00 2001 From: daletovar Date: Mon, 3 Aug 2020 17:25:44 -0700 Subject: [PATCH 63/72] fix out dtype --- sparse/_common.py | 2 +- sparse/_sparse_array.py | 19 ------------------- sparse/tests/test_dot.py | 1 + 3 files changed, 2 insertions(+), 20 deletions(-) diff --git a/sparse/_common.py b/sparse/_common.py index b90489ad..5c875b21 100644 --- a/sparse/_common.py +++ b/sparse/_common.py @@ -911,7 +911,7 @@ def _dot_coo_coo(out_shape, a_coords, b_coords, a_data, b_data, a_indptr, b_indp out_shape, a_coords[1], b_coords[1], a_indptr, b_indptr ) coords = np.empty((2, nnz), dtype=np.intp) - data = np.empty(nnz) + data = np.empty(nnz, dtype=dtr) next_ = np.full(n_col, -1) sums = np.zeros(n_col) nnz = 0 diff --git a/sparse/_sparse_array.py b/sparse/_sparse_array.py index 28f9e459..2d4b093f 100644 --- a/sparse/_sparse_array.py +++ b/sparse/_sparse_array.py @@ -78,25 +78,6 @@ def nnz(self): True """ - @property - @abstractmethod - def format(self): - """ - The storage format of this array. - - Returns - ------- - str - The storage format of this array. - - Examples - ------- - >>> import sparse - >>> s = sparse.random((5,5), density=0.2) - >>> s.format - 'coo' - """ - @property def ndim(self): """ diff --git a/sparse/tests/test_dot.py b/sparse/tests/test_dot.py index 46856250..fb34d288 100644 --- a/sparse/tests/test_dot.py +++ b/sparse/tests/test_dot.py @@ -9,6 +9,7 @@ from sparse import COO from sparse._utils import assert_eq + @pytest.mark.parametrize( "a_shape,b_shape,axes", [ From fbe404c45342573290751f5993681115a7321648 Mon Sep 17 00:00:00 2001 From: daletovar Date: Wed, 5 Aug 2020 12:20:28 -0700 Subject: [PATCH 64/72] remove redundant code --- sparse/_coo/core.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/sparse/_coo/core.py b/sparse/_coo/core.py index 453873ae..1a872502 100644 --- a/sparse/_coo/core.py +++ b/sparse/_coo/core.py @@ -1840,11 +1840,6 @@ def reshape(self, shape, order="C"): "cannot reshape array of size {} into shape {}".format(self.size, shape) ) - if self.size != reduce(operator.mul, shape, 1): - raise ValueError( - "cannot reshape array of size {} into shape {}".format(self.size, shape) - ) - if self._cache is not None: for sh, value in self._cache["reshape"]: if sh == shape: @@ -1898,8 +1893,6 @@ def resize(self, *args, refcheck=True): new_size = reduce(operator.mul, shape, 1) - new_size = reduce(operator.mul, shape, 1) - # TODO: this self.size enforces a 2**64 limit to array size linear_loc = self.linear_loc() end_idx = np.searchsorted(linear_loc, new_size, side="left") From 5b4f08a2ebae071b638758a1cae3ea33fd26fae7 Mon Sep 17 00:00:00 2001 From: daletovar Date: Wed, 5 Aug 2020 12:20:49 -0700 Subject: [PATCH 65/72] add keyerror --- sparse/_io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sparse/_io.py b/sparse/_io.py index 34231379..9cd5781f 100644 --- a/sparse/_io.py +++ b/sparse/_io.py @@ -111,7 +111,7 @@ def load_npz(filename): has_duplicates=False, fill_value=fill_value, ) - except: + except KeyError: pass try: data = fp["data"] From eb8c0e5e1be681bb599ee88286eec09641599ad5 Mon Sep 17 00:00:00 2001 From: daletovar Date: Wed, 5 Aug 2020 13:20:57 -0700 Subject: [PATCH 66/72] remove unused variable --- sparse/_common.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sparse/_common.py b/sparse/_common.py index 5c875b21..77715cfb 100644 --- a/sparse/_common.py +++ b/sparse/_common.py @@ -768,7 +768,6 @@ def _dot_csc_ndarray_sparse(a_shape, b_shape, a_data, a_indices, a_indptr, b): sums = np.zeros(a_shape[0]) mask = np.full(a_shape[0], -1) nnz = 0 - current = 0 for i in range(b_shape[1]): head = -2 length = 0 From ae6304015c989c24ff5eb698b6b1354d9a491184 Mon Sep 17 00:00:00 2001 From: daletovar Date: Wed, 5 Aug 2020 16:05:28 -0700 Subject: [PATCH 67/72] remove coverage for jitted funcs --- sparse/_common.py | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/sparse/_common.py b/sparse/_common.py index 77715cfb..86cf57d2 100644 --- a/sparse/_common.py +++ b/sparse/_common.py @@ -459,7 +459,9 @@ def wrapped(*args): @numba.jit(nopython=True, nogil=True) -def _csr_csr_count_nnz(out_shape, a_indices, b_indices, a_indptr, b_indptr): +def _csr_csr_count_nnz( + out_shape, a_indices, b_indices, a_indptr, b_indptr +): # pragma: no cover """ A function for computing the number of nonzero values in the resulting array from multiplying an array with compressed rows with an array @@ -494,7 +496,9 @@ def _csr_csr_count_nnz(out_shape, a_indices, b_indices, a_indptr, b_indptr): @numba.jit(nopython=True, nogil=True) -def _csr_ndarray_count_nnz(out_shape, indptr, a_indices, a_indptr, b): +def _csr_ndarray_count_nnz( + out_shape, indptr, a_indices, a_indptr, b +): # pragma: no cover """ A function for computing the number of nonzero values in the resulting array from multiplying an array with compressed rows with a dense @@ -527,7 +531,9 @@ def _csr_ndarray_count_nnz(out_shape, indptr, a_indices, a_indptr, b): @numba.jit(nopython=True, nogil=True) -def _csc_ndarray_count_nnz(a_shape, b_shape, indptr, a_indices, a_indptr, b): +def _csc_ndarray_count_nnz( + a_shape, b_shape, indptr, a_indices, a_indptr, b +): # pragma: no cover """ A function for computing the number of nonzero values in the resulting array from multiplying an array with compressed columns with a dense @@ -572,7 +578,7 @@ def _dot_csr_csr_type(dt1, dt2): ) def _dot_csr_csr( out_shape, a_data, b_data, a_indices, b_indices, a_indptr, b_indptr - ): + ): # pragma: no cover """ Utility function taking in two ``GCXS`` objects and calculating their dot product: a @ b for a and b with compressed rows. @@ -653,7 +659,7 @@ def _dot_csr_ndarray_type(dt1, dt2): nogil=True, locals={"data_curr": numba.np.numpy_support.from_dtype(dtr)}, ) - def _dot_csr_ndarray(out_shape, a_data, a_indices, a_indptr, b): + def _dot_csr_ndarray(out_shape, a_data, a_indices, a_indptr, b): # pragma: no cover """ Utility function taking in one `GCXS` and one ``ndarray`` and calculating their dot product: a @ b for a with compressed rows. @@ -693,7 +699,9 @@ def _dot_csr_ndarray_type_sparse(dt1, dt2): nogil=True, locals={"data_curr": numba.np.numpy_support.from_dtype(dtr)}, ) - def _dot_csr_ndarray_sparse(out_shape, a_data, a_indices, a_indptr, b): + def _dot_csr_ndarray_sparse( + out_shape, a_data, a_indices, a_indptr, b + ): # pragma: no cover """ Utility function taking in one `GCXS` and one ``ndarray`` and calculating their dot product: a @ b for a with compressed rows. @@ -744,7 +752,9 @@ def _dot_csc_ndarray_type_sparse(dt1, dt2): nogil=True, locals={"data_curr": numba.np.numpy_support.from_dtype(dtr)}, ) - def _dot_csc_ndarray_sparse(a_shape, b_shape, a_data, a_indices, a_indptr, b): + def _dot_csc_ndarray_sparse( + a_shape, b_shape, a_data, a_indices, a_indptr, b + ): # pragma: no cover """ Utility function taking in one `GCXS` and one ``ndarray`` and calculating their dot product: a @ b for a with compressed columns. @@ -811,7 +821,9 @@ def _dot_csc_ndarray_type(dt1, dt2): nogil=True, locals={"data_curr": numba.np.numpy_support.from_dtype(dtr)}, ) - def _dot_csc_ndarray(a_shape, b_shape, a_data, a_indices, a_indptr, b): + def _dot_csc_ndarray( + a_shape, b_shape, a_data, a_indices, a_indptr, b + ): # pragma: no cover """ Utility function taking in one `GCXS` and one ``ndarray`` and calculating their dot product: a @ b for a with compressed columns. @@ -847,7 +859,7 @@ def _dot_ndarray_csc_type(dt1, dt2): nogil=True, locals={"data_curr": numba.np.numpy_support.from_dtype(dtr)}, ) - def _dot_ndarray_csc(out_shape, b_data, b_indices, b_indptr, a): + def _dot_ndarray_csc(out_shape, b_data, b_indices, b_indptr, a): # pragma: no cover """ Utility function taking in one `ndarray` and one ``GCXS`` and calculating their dot product: a @ b for b with compressed columns. @@ -884,7 +896,9 @@ def _dot_coo_coo_type(dt1, dt2): nogil=True, locals={"data_curr": numba.np.numpy_support.from_dtype(dtr)}, ) - def _dot_coo_coo(out_shape, a_coords, b_coords, a_data, b_data, a_indptr, b_indptr): + def _dot_coo_coo( + out_shape, a_coords, b_coords, a_data, b_data, a_indptr, b_indptr + ): # pragma: no cover """ Utility function taking in two ``COO`` objects and calculating their dot product: a @ b. From 0a741ffabcb8f177eb3f417f9b8cf433c25e2ff3 Mon Sep 17 00:00:00 2001 From: daletovar Date: Thu, 6 Aug 2020 14:05:06 -0700 Subject: [PATCH 68/72] do all sorting after computing dot --- sparse/_common.py | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/sparse/_common.py b/sparse/_common.py index 86cf57d2..6314c9be 100644 --- a/sparse/_common.py +++ b/sparse/_common.py @@ -29,6 +29,7 @@ diagonal, diagonalize, asCOO, + linear_loc ) @@ -270,8 +271,23 @@ def dot(a, b): def _dot(a, b, return_type=None): from ._coo import COO from ._compressed import GCXS + from ._compressed.convert import uncompress_dimension from ._sparse_array import SparseArray + def sort_indices(data, indices, indptr, shape): + """ + Several of the dot algorithms produce indices that + are out of order. So we have to do a sort of indices + and data. + """ + coords = np.empty((2, indices.shape[0]), dtype=np.intp) + coords[0,:] = uncompress_dimension(indptr) + coords[1,:] = indices + linear = linear_loc(coords, shape) + order = np.argsort(linear, kind='mergesort') + indices = indices[order] + data = data[order] + out_shape = (a.shape[0], b.shape[1]) if all(isinstance(arr, SparseArray) for arr in [a, b]) and any( isinstance(arr, GCXS) for arr in [a, b] @@ -290,6 +306,7 @@ def _dot(a, b, return_type=None): data, indices, indptr = _dot_csr_csr_type(a.dtype, b.dtype)( out_shape, a.data, b.data, a.indices, b.indices, a.indptr, b.indptr ) + sort_indices(data, indices, indptr, out_shape) elif a.compressed_axes == (1,): # csc @ csc # a @ b = (b.T @ a.T).T compressed_axes = (1,) @@ -302,6 +319,7 @@ def _dot(a, b, return_type=None): b.indptr, a.indptr, ) + sort_indices(data, indices, indptr, out_shape[::-1]) out = GCXS( (data, indices, indptr), shape=out_shape, compressed_axes=compressed_axes ) @@ -331,6 +349,7 @@ def _dot(a, b, return_type=None): data, indices, indptr = _dot_csc_ndarray_type_sparse(a.dtype, b.dtype)( a.shape, b.shape, a.data, a.indices, a.indptr, b ) + sort_indices(data, indices, indptr, out_shape[::-1]) compressed_axes = (1,) out = GCXS( (data, indices, indptr), shape=out_shape, compressed_axes=compressed_axes @@ -381,13 +400,13 @@ def _dot(a, b, return_type=None): coords, data = _dot_coo_coo_type(a.dtype, b.dtype)( out_shape, a.coords, b.coords, a.data, b.data, a_indptr, b_indptr ) - out = COO(coords, data, shape=out_shape, has_duplicates=False, sorted=True) + out = COO(coords, data, shape=out_shape, has_duplicates=False, sorted=False) if return_type == np.ndarray: return out.todense() elif return_type == GCXS: return out.asformat("gcxs") - return COO(coords, data, shape=out_shape, has_duplicates=False, sorted=True) + return out if isinstance(a, COO) and isinstance(b, np.ndarray): b = b.view(type=np.ndarray).T @@ -639,12 +658,6 @@ def _dot_csr_csr( sums[temp] = 0 indptr[i + 1] = nnz - # ensure sorted indices - order = np.argsort(indices[indptr[i] : indptr[i + 1]]) - data[indptr[i] : indptr[i + 1]] = data[indptr[i] : indptr[i + 1]][order] - indices[indptr[i] : indptr[i + 1]] = indices[indptr[i] : indptr[i + 1]][ - order - ] return data, indices, indptr return _dot_csr_csr @@ -804,9 +817,6 @@ def _dot_csc_ndarray_sparse( mask[temp] = -1 sums[temp] = 0 - order = np.argsort(indices[start:nnz]) - indices[start:nnz] = indices[start:nnz][order] - data[start:nnz] = data[start:nnz][order] return data, indices, indptr return _dot_csc_ndarray_sparse @@ -960,10 +970,6 @@ def _dot_coo_coo( next_[temp] = -1 sums[temp] = 0 - # ensure sorted coords - order = np.argsort(coords[1, start:nnz]) - data[start:nnz] = data[start:nnz][order] - coords[1, start:nnz] = coords[1, start:nnz][order] return coords, data return _dot_coo_coo From 2d7b1592331755369d6a72ab134f535694a7ed55 Mon Sep 17 00:00:00 2001 From: daletovar Date: Thu, 6 Aug 2020 14:06:42 -0700 Subject: [PATCH 69/72] update tests, docstrings --- sparse/_io.py | 12 +++++++----- sparse/tests/test_io.py | 21 +++++---------------- 2 files changed, 12 insertions(+), 21 deletions(-) diff --git a/sparse/_io.py b/sparse/_io.py index 9cd5781f..c29f1a45 100644 --- a/sparse/_io.py +++ b/sparse/_io.py @@ -7,7 +7,8 @@ def save_npz(filename, matrix, compressed=True): """ Save a sparse matrix to disk in numpy's ``.npz`` format. Note: This is not binary compatible with scipy's ``save_npz()``. - Will save a file that can only be opend with this package's ``load_npz()``. + This binary format is not currently stable. Will save a file + that can only be opend with this package's ``load_npz()``. Parameters ---------- @@ -16,7 +17,7 @@ def save_npz(filename, matrix, compressed=True): where the data will be saved. If file is a string or a Path, the ``.npz`` extension will be appended to the file name if it is not already there - matrix : COO + matrix : SparseArray The matrix to save to disk compressed : bool Whether to save in compressed or uncompressed mode @@ -70,7 +71,8 @@ def save_npz(filename, matrix, compressed=True): def load_npz(filename): """ Load a sparse matrix in numpy's ``.npz`` format from disk. Note: This is not binary compatible with scipy's ``save_npz()`` - output. Will only load files saved by this package. + output. This binary format is not currently stable. + Will only load files saved by this package. Parameters ---------- @@ -80,8 +82,8 @@ def load_npz(filename): Returns ------- - COO - The sparse matrix at path ``filename`` + SparseArray + The sparse matrix at path ``filename``. Example -------- diff --git a/sparse/tests/test_io.py b/sparse/tests/test_io.py index c1930648..83822963 100644 --- a/sparse/tests/test_io.py +++ b/sparse/tests/test_io.py @@ -1,6 +1,3 @@ -import os -import tempfile -import shutil import pytest import numpy as np @@ -12,29 +9,21 @@ @pytest.mark.parametrize("compression", [True, False]) @pytest.mark.parametrize("format", ["coo", "gcxs"]) -def test_save_load_npz_file(compression, format): +def test_save_load_npz_file(tmp_path, compression, format): x = sparse.random((2, 3, 4, 5), density=0.25, format=format) y = x.todense() - dir_name = tempfile.mkdtemp() - filename = os.path.join(dir_name, "mat.npz") - + filename = tmp_path / "mat.npz" save_npz(filename, x, compressed=compression) z = load_npz(filename) assert_eq(x, z) assert_eq(y, z.todense()) - shutil.rmtree(dir_name) - - -def test_load_wrong_format_exception(): +def test_load_wrong_format_exception(tmp_path): x = np.array([1, 2, 3]) - dir_name = tempfile.mkdtemp() - filename = os.path.join(dir_name, "mat.npz") + filename = tmp_path / "mat.npz" np.savez(filename, x) with pytest.raises(RuntimeError): - load_npz(filename) - - shutil.rmtree(dir_name) + load_npz(filename) \ No newline at end of file From 111438345fda46829569c3c0afe3412d7feb413d Mon Sep 17 00:00:00 2001 From: daletovar Date: Thu, 6 Aug 2020 14:07:38 -0700 Subject: [PATCH 70/72] remove python 2 compat check --- sparse/tests/test_dot.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/sparse/tests/test_dot.py b/sparse/tests/test_dot.py index fb34d288..b22510fe 100644 --- a/sparse/tests/test_dot.py +++ b/sparse/tests/test_dot.py @@ -197,14 +197,6 @@ def test_dot(a_shape, b_shape, a_format, b_format, a_comp_axes, b_comp_axes): assert_eq(sparse.dot(sa, b), sparse.dot(a, sb)) assert_eq(np.dot(a, b), sparse.dot(sa, sb)) - if hasattr(operator, "matmul"): - # Basic equivalences - assert_eq(operator.matmul(a, b), operator.matmul(sa, sb)) - # Test that SOO's and np.array's combine correctly - # Not possible due to https://github.com/numpy/numpy/issues/9028 - # assert_eq(eval("a @ sb"), eval("sa @ b")) - - @pytest.mark.parametrize( "a_dense, b_dense, o_type", [ From 2b7a8e4446a37e80a1b57157403622d14d3701bd Mon Sep 17 00:00:00 2001 From: daletovar Date: Thu, 6 Aug 2020 14:21:16 -0700 Subject: [PATCH 71/72] add formatting --- sparse/_common.py | 8 ++++---- sparse/tests/test_dot.py | 1 + sparse/tests/test_io.py | 3 ++- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/sparse/_common.py b/sparse/_common.py index 6314c9be..21b519ee 100644 --- a/sparse/_common.py +++ b/sparse/_common.py @@ -29,7 +29,7 @@ diagonal, diagonalize, asCOO, - linear_loc + linear_loc, ) @@ -281,10 +281,10 @@ def sort_indices(data, indices, indptr, shape): and data. """ coords = np.empty((2, indices.shape[0]), dtype=np.intp) - coords[0,:] = uncompress_dimension(indptr) - coords[1,:] = indices + coords[0, :] = uncompress_dimension(indptr) + coords[1, :] = indices linear = linear_loc(coords, shape) - order = np.argsort(linear, kind='mergesort') + order = np.argsort(linear, kind="mergesort") indices = indices[order] data = data[order] diff --git a/sparse/tests/test_dot.py b/sparse/tests/test_dot.py index b22510fe..9b975aea 100644 --- a/sparse/tests/test_dot.py +++ b/sparse/tests/test_dot.py @@ -197,6 +197,7 @@ def test_dot(a_shape, b_shape, a_format, b_format, a_comp_axes, b_comp_axes): assert_eq(sparse.dot(sa, b), sparse.dot(a, sb)) assert_eq(np.dot(a, b), sparse.dot(sa, sb)) + @pytest.mark.parametrize( "a_dense, b_dense, o_type", [ diff --git a/sparse/tests/test_io.py b/sparse/tests/test_io.py index 83822963..e82815fc 100644 --- a/sparse/tests/test_io.py +++ b/sparse/tests/test_io.py @@ -19,6 +19,7 @@ def test_save_load_npz_file(tmp_path, compression, format): assert_eq(x, z) assert_eq(y, z.todense()) + def test_load_wrong_format_exception(tmp_path): x = np.array([1, 2, 3]) @@ -26,4 +27,4 @@ def test_load_wrong_format_exception(tmp_path): np.savez(filename, x) with pytest.raises(RuntimeError): - load_npz(filename) \ No newline at end of file + load_npz(filename) From 1296798f0c51b2440d28de4b8031969547147435 Mon Sep 17 00:00:00 2001 From: daletovar Date: Fri, 7 Aug 2020 12:52:27 -0700 Subject: [PATCH 72/72] add back @ test --- sparse/tests/test_dot.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sparse/tests/test_dot.py b/sparse/tests/test_dot.py index 9b975aea..439f5ae5 100644 --- a/sparse/tests/test_dot.py +++ b/sparse/tests/test_dot.py @@ -197,6 +197,12 @@ def test_dot(a_shape, b_shape, a_format, b_format, a_comp_axes, b_comp_axes): assert_eq(sparse.dot(sa, b), sparse.dot(a, sb)) assert_eq(np.dot(a, b), sparse.dot(sa, sb)) + # Basic equivalences + assert_eq(operator.matmul(a, b), operator.matmul(sa, sb)) + # Test that COO's and np.array's combine correctly + # Not possible due to https://github.com/numpy/numpy/issues/9028 + # assert_eq(eval("a @ sb"), eval("sa @ b")) + @pytest.mark.parametrize( "a_dense, b_dense, o_type",