Skip to content

Commit

Permalink
add _repr_html_ (#283)
Browse files Browse the repository at this point in the history
Allows a nice repr to be shown in Jupyter.
  • Loading branch information
daletovar authored and hameerabbasi committed Sep 18, 2019
1 parent 161650b commit 96fd044
Show file tree
Hide file tree
Showing 8 changed files with 142 additions and 39 deletions.
1 change: 0 additions & 1 deletion docs/generated/sparse.DOK.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ DOK
DOK.nnz

DOK.size




Expand Down
2 changes: 1 addition & 1 deletion sparse/_compressed/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from .compressed import GXCS
from .compressed import GCXS
67 changes: 53 additions & 14 deletions sparse/_compressed/compressed.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def _from_coo(x, compressed_axes=None):
if compressed_axes is not None:
raise ValueError("no axes to compress for 1d array")
return (
(x.data, x.coords[0], []),
(x.data, x.coords[0], ()),
x.shape,
None,
None,
Expand Down Expand Up @@ -80,7 +80,7 @@ def _from_coo(x, compressed_axes=None):
)


class GXCS(SparseArray, NDArrayOperatorsMixin):
class GCXS(SparseArray, NDArrayOperatorsMixin):

__array_priority__ = 12

Expand Down Expand Up @@ -134,7 +134,6 @@ def __init__(self, arg, shape=None, compressed_axes=None, fill_value=0):
self.axisptr = axisptr
self.reordered_shape = reordered_shape
self.fill_value = fill_value
self.dtype = self.data.dtype

@classmethod
def from_numpy(cls, x, compressed_axes=None, fill_value=0):
Expand Down Expand Up @@ -168,24 +167,64 @@ def from_iter(cls, x, shape=None, compressed_axes=None, fill_value=None):
COO.from_iter(x, shape, fill_value), compressed_axes=compressed_axes
)

@property
def dtype(self):
"""
The datatype of this array.
Returns
-------
numpy.dtype
The datatype of this array.
See Also
--------
numpy.ndarray.dtype : Numpy equivalent property.
scipy.sparse.csr_matrix.dtype : Scipy equivalent property.
"""
return self.data.dtype

@property
def nnz(self):
"""
The number of nonzero elements in this array.
Returns
-------
int
The number of nonzero elements in this array.
See Also
--------
COO.nnz : Equivalent :obj:`COO` array property.
DOK.nnz : Equivalent :obj:`DOK` array property.
numpy.count_nonzero : A similar Numpy function.
scipy.sparse.csr_matrix.nnz : The Scipy equivalent property.
"""
return self.data.shape[0]

@property
def nbytes(self):
return self.data.nbytes + self.indices.nbytes + self.indptr.nbytes

@property
def density(self):
return self.nnz / reduce(mul, self.shape, 1)

@property
def ndim(self):
return len(self.shape)
"""
The number of bytes taken up by this object. Note that for small arrays,
this may undercount the number of bytes due to the large constant overhead.
Returns
-------
int
The approximate bytes of memory taken by this object.
See Also
--------
numpy.ndarray.nbytes : The equivalent Numpy property.
"""
nbytes = self.data.nbytes + self.indices.nbytes
if self.indptr != ():
nbytes += self.indptr.nbytes
return nbytes

def __str__(self):
return "<GXCS: shape={}, dtype={}, nnz={}, fill_value={}, compressed_axes={}>".format(
return "<GCXS: shape={}, dtype={}, nnz={}, fill_value={}, compressed_axes={}>".format(
self.shape, self.dtype, self.nnz, self.fill_value, self.compressed_axes
)

Expand Down Expand Up @@ -374,7 +413,7 @@ def reshape(self, shape, order="C", compressed_axes=None):

# there's likely a way to do this without decompressing to COO
coo = self.tocoo().reshape(shape)
return GXCS.from_coo(coo, compressed_axes)
return GCXS.from_coo(coo, compressed_axes)

def resize(self, *args, refcheck=True, compressed_axes=None):
"""
Expand Down
6 changes: 3 additions & 3 deletions sparse/_compressed/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ def getitem(x, key):
"""
from .compressed import GXCS
from .compressed import GCXS

if x.ndim == 1:
coo = x.tocoo()[key]
return GXCS.from_coo(coo)
return GCXS.from_coo(coo)

key = list(normalize_index(key, x.shape))

Expand Down Expand Up @@ -151,7 +151,7 @@ def getitem(x, key):
if len(shape) == 1:
compressed_axes = None

return GXCS(
return GCXS(
arg, shape=shape, compressed_axes=compressed_axes, fill_value=x.fill_value
)

Expand Down
7 changes: 4 additions & 3 deletions sparse/_coo/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2239,10 +2239,11 @@ def asformat(self, format, compressed_axes=None):
NotImplementedError
If the format isn't supported.
"""
from .._compressed import GXCS
from .._compressed import GCXS

if format == "gcxs" or format is GCXS:
return GCXS.from_coo(self, compressed_axes=compressed_axes)

if format == "gxcs" or format is GXCS:
return GXCS.from_coo(self, compressed_axes=compressed_axes)
elif compressed_axes is not None:
raise ValueError(
"compressed_axes is not supported for {} format".format(format)
Expand Down
9 changes: 8 additions & 1 deletion sparse/_sparse_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import numpy as np

from ._utils import _zero_of_dtype
from ._utils import _zero_of_dtype, html_table


class SparseArray:
Expand Down Expand Up @@ -160,6 +160,13 @@ def density(self):
"""
return self.nnz / self.size

def _repr_html_(self):
"""
Diagnostic report about this array.
Renders in Jupyter.
"""
return html_table(self)

@abstractmethod
def asformat(self, format):
"""
Expand Down
62 changes: 61 additions & 1 deletion sparse/_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import functools
from collections.abc import Iterable
from numbers import Integral
from functools import reduce

import operator
import numpy as np


Expand Down Expand Up @@ -142,7 +144,7 @@ def random(

nnz = int(elements * density)

if format != "gxcs" and compressed_axes is not None:
if format != "gcxs" and compressed_axes is not None:
raise ValueError(
"compressed_axes is not supported for {} format".format(format)
)
Expand Down Expand Up @@ -271,6 +273,64 @@ def equivalent(x, y):
return (x == y) | ((x != x) & (y != y))


# copied from zarr
# See https://github.com/zarr-developers/zarr-python/blob/master/zarr/util.py
def human_readable_size(size):
if size < 2 ** 10:
return "%s" % size
elif size < 2 ** 20:
return "%.1fK" % (size / float(2 ** 10))
elif size < 2 ** 30:
return "%.1fM" % (size / float(2 ** 20))
elif size < 2 ** 40:
return "%.1fG" % (size / float(2 ** 30))
elif size < 2 ** 50:
return "%.1fT" % (size / float(2 ** 40))
else:
return "%.1fP" % (size / float(2 ** 50))


def html_table(arr):
table = "<table>"
table += "<tbody>"
headings = ["Format", "Data Type", "Shape", "nnz", "Density", "Read-only"]
info = [
type(arr).__name__.lower(),
str(arr.dtype),
str(arr.shape),
str(arr.nnz),
str(arr.nnz / arr.size),
]

# read-only
info.append(str(hasattr(arr, "__setitem__")))

if hasattr(arr, "nbytes"):
headings.append("Size")
info.append(human_readable_size(arr.nbytes))
headings.append("Storage ratio")
info.append(
"%.1f"
% (arr.nbytes / (reduce(operator.mul, arr.shape, 1) * arr.dtype.itemsize))
)

# compressed_axes
if type(arr).__name__ == "GCXS":
headings.append("Compressed Axes")
info.append(str(arr.compressed_axes))

for h, i in zip(headings, info):
table += (
"<tr>"
'<th style="text-align: left">%s</th>'
'<td style="text-align: left">%s</td>'
"</tr>" % (h, i)
)
table += "</tbody>"
table += "</table>"
return table


def check_zero_fill_value(*args):
"""
Checks if all the arguments have zero fill-values.
Expand Down
27 changes: 12 additions & 15 deletions sparse/tests/test_compressed.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np
import scipy

from sparse._compressed import GXCS
from sparse._compressed import GCXS
from sparse._utils import assert_eq


Expand All @@ -23,7 +23,7 @@
],
)
def test_resize(a, b):
s = sparse.random(a, density=0.5, format="gxcs")
s = sparse.random(a, density=0.5, format="gcxs")
orig_size = s.size
x = s.todense()
x = np.resize(x, b)
Expand All @@ -50,20 +50,20 @@ def test_resize(a, b):
],
)
def test_reshape(a, b):
s = sparse.random(a, density=0.5, format="gxcs")
s = sparse.random(a, density=0.5, format="gcxs")
x = s.todense()

assert_eq(x.reshape(b), s.reshape(b))


def test_reshape_same():
s = sparse.random((3, 5), density=0.5, format="gxcs")

s = sparse.random((3, 5), density=0.5, format="gcxs")
assert s.reshape(s.shape) is s


def test_to_scipy_sparse():
s = sparse.random((3, 5), density=0.5, format="gxcs", compressed_axes=(0,))
s = sparse.random((3, 5), density=0.5, format="gcxs", compressed_axes=(0,))
a = s.to_scipy_sparse()
b = scipy.sparse.csr_matrix(s.todense())

Expand All @@ -72,8 +72,7 @@ def test_to_scipy_sparse():

def test_tocoo():
coo = sparse.random((5, 6), density=0.5)
b = GXCS.from_coo(coo)

b = GCXS.from_coo(coo)
assert_eq(b.tocoo(), coo)


Expand Down Expand Up @@ -135,9 +134,8 @@ def test_tocoo():
],
)
def test_slicing(index):
s = sparse.random((2, 3, 4), density=0.5, format="gxcs")
s = sparse.random((2, 3, 4), density=0.5, format="gcxs")
x = s.todense()

assert_eq(x[index], s[index])


Expand All @@ -157,7 +155,7 @@ def test_slicing(index):
],
)
def test_advanced_indexing(index):
s = sparse.random((2, 3, 4), density=0.5, format="gxcs")
s = sparse.random((2, 3, 4), density=0.5, format="gcxs")
x = s.todense()

assert_eq(x[index], s[index])
Expand All @@ -180,17 +178,16 @@ def test_advanced_indexing(index):
],
)
def test_slicing_errors(index):
s = sparse.random((2, 3, 4), density=0.5, format="gxcs")
s = sparse.random((2, 3, 4), density=0.5, format="gcxs")

with pytest.raises(IndexError):
s[index]


def test_change_compressed_axes():
coo = sparse.random((3, 4, 5), density=0.5)
s = GXCS.from_coo(coo, compressed_axes=(0, 1))
b = GXCS.from_coo(coo, compressed_axes=(1, 2))

s = GCXS.from_coo(coo, compressed_axes=(0, 1))
b = GCXS.from_coo(coo, compressed_axes=(1, 2))
assert_eq(s, b)
s.change_compressed_axes((1, 2))

assert_eq(s, b)

0 comments on commit 96fd044

Please sign in to comment.