Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add _repr_html_ #283

Merged
merged 40 commits into from
Sep 18, 2019
Merged
Show file tree
Hide file tree
Changes from 38 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
9ea4053
Update core.py
daletovar Apr 19, 2019
aa14159
Merge pull request #1 from daletovar/daletovar-patch-1
daletovar Apr 19, 2019
fa9ef22
Update test_coo.py
daletovar Apr 30, 2019
1683e00
Update core.py
daletovar Apr 30, 2019
c70d397
Update test_coo.py
daletovar Apr 30, 2019
de14873
Add JUnit directory to gitignore.
hameerabbasi Apr 30, 2019
fa80a62
Fix Flake8 issues.
hameerabbasi Apr 30, 2019
91e0367
Fix up tests and code.
hameerabbasi Apr 30, 2019
e980993
Add docs.
hameerabbasi Apr 30, 2019
c826e62
Resolved merge.
daletovar Aug 28, 2019
a1f948c
change gxcs with gcxs
daletovar Aug 28, 2019
81b1b03
Update __init__.py
daletovar Aug 31, 2019
cf085fb
add html_table
daletovar Aug 31, 2019
551629b
add _repr_html_
daletovar Aug 31, 2019
de82d5b
add self.nbytes property and self.format attribute
daletovar Aug 31, 2019
2a0fdba
fix empty indptr for 1d and add self.format
daletovar Aug 31, 2019
16c707e
add self.format
daletovar Aug 31, 2019
937e819
remove whitespace
daletovar Aug 31, 2019
80a958b
remove failed example
daletovar Aug 31, 2019
fedd68a
Update _utils.py
daletovar Sep 1, 2019
d7516d4
Update compressed.py
daletovar Sep 1, 2019
8827ac4
add format property
daletovar Sep 6, 2019
9a4fd74
add format property
daletovar Sep 6, 2019
667eb57
add format
daletovar Sep 6, 2019
53ccd0f
remove spurious properties, add docs
daletovar Sep 6, 2019
a41a054
Update sparse/_utils.py
daletovar Sep 6, 2019
0a441ee
update html table
daletovar Sep 6, 2019
9cfdb09
Update test_compressed.py
daletovar Sep 6, 2019
9f8ccf6
Merge branch 'master' into sparse_html
daletovar Sep 6, 2019
77eabe5
Update compressed.py
daletovar Sep 6, 2019
702ca45
formatting
daletovar Sep 6, 2019
3cdeabd
formatting
daletovar Sep 6, 2019
e7a5e1d
formatting
daletovar Sep 6, 2019
341ddba
Update test_compressed.py
daletovar Sep 6, 2019
3cff972
formatting
daletovar Sep 6, 2019
fb0757b
Update core.py
daletovar Sep 6, 2019
33df589
formatting
daletovar Sep 6, 2019
5f9d3ef
Merge branch 'master' of https://github.com/pydata/sparse
daletovar Sep 18, 2019
53c3b8e
added documentation
daletovar Sep 18, 2019
5c3fc1a
Fix up docs.
hameerabbasi Sep 18, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion sparse/_compressed/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from .compressed import GXCS
from .compressed import GCXS
90 changes: 77 additions & 13 deletions sparse/_compressed/compressed.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def _from_coo(x, compressed_axes=None):
if compressed_axes is not None:
raise ValueError("no axes to compress for 1d array")
return (
(x.data, x.coords[0], []),
(x.data, x.coords[0], ()),
x.shape,
None,
None,
Expand Down Expand Up @@ -80,7 +80,7 @@ def _from_coo(x, compressed_axes=None):
)


class GXCS(SparseArray, NDArrayOperatorsMixin):
class GCXS(SparseArray, NDArrayOperatorsMixin):

__array_priority__ = 12

Expand Down Expand Up @@ -134,7 +134,6 @@ def __init__(self, arg, shape=None, compressed_axes=None, fill_value=0):
self.axisptr = axisptr
self.reordered_shape = reordered_shape
self.fill_value = fill_value
self.dtype = self.data.dtype

@classmethod
def from_numpy(cls, x, compressed_axes=None, fill_value=0):
Expand Down Expand Up @@ -169,23 +168,88 @@ def from_iter(cls, x, shape=None, compressed_axes=None, fill_value=None):
)

@property
def nnz(self):
return self.data.shape[0]
def dtype(self):
"""
The datatype of this array.

Returns
-------
numpy.dtype
The datatype of this array.

See Also
--------
numpy.ndarray.dtype : Numpy equivalent property.
scipy.sparse.csr_matrix.dtype : Scipy equivalent property.
"""
return self.data.dtype

@property
def nbytes(self):
return self.data.nbytes + self.indices.nbytes + self.indptr.nbytes
def nnz(self):
"""
The number of nonzero elements in this array.

Returns
-------
int
The number of nonzero elements in this array.

See Also
--------
COO.nnz : Equivalent :obj:`COO` array property.
DOK.nnz : Equivalent :obj:`DOK` array property.
numpy.count_nonzero : A similar Numpy function.
scipy.sparse.csr_matrix.nnz : The Scipy equivalent property.
"""
return self.data.shape[0]

@property
def density(self):
return self.nnz / reduce(mul, self.shape, 1)
def format(self):
"""
The storage format of this array.

Returns
-------
str
The storage format of this array.

See Also
-------
COO.format : Equivalent :obj:`COO` array property.
DOK.format : Equivalent :obj:`DOK` array property.
scipy.sparse.coo_matrix.format : The Scipy equivalent property.

Examples
-------
>>> import sparse
>>> s = sparse.random((5,5), density=0.2, format='gcxs')
>>> s.format
'gcxs'
"""
return "gcxs"

@property
def ndim(self):
return len(self.shape)
def nbytes(self):
"""
The number of bytes taken up by this object. Note that for small arrays,
this may undercount the number of bytes due to the large constant overhead.

Returns
-------
int
The approximate bytes of memory taken by this object.

See Also
--------
numpy.ndarray.nbytes : The equivalent Numpy property.
"""
nbytes = self.data.nbytes + self.indices.nbytes
if self.indptr != ():
nbytes += self.indptr.nbytes
return nbytes

def __str__(self):
return "<GXCS: shape={}, dtype={}, nnz={}, fill_value={}, compressed_axes={}>".format(
return "<GCXS: shape={}, dtype={}, nnz={}, fill_value={}, compressed_axes={}>".format(
self.shape, self.dtype, self.nnz, self.fill_value, self.compressed_axes
)

Expand Down Expand Up @@ -374,7 +438,7 @@ def reshape(self, shape, order="C", compressed_axes=None):

# there's likely a way to do this without decompressing to COO
coo = self.tocoo().reshape(shape)
return GXCS.from_coo(coo, compressed_axes)
return GCXS.from_coo(coo, compressed_axes)

def resize(self, *args, refcheck=True, compressed_axes=None):
"""
Expand Down
6 changes: 3 additions & 3 deletions sparse/_compressed/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ def getitem(x, key):


"""
from .compressed import GXCS
from .compressed import GCXS

if x.ndim == 1:
coo = x.tocoo()[key]
return GXCS.from_coo(coo)
return GCXS.from_coo(coo)

key = list(normalize_index(key, x.shape))

Expand Down Expand Up @@ -151,7 +151,7 @@ def getitem(x, key):
if len(shape) == 1:
compressed_axes = None

return GXCS(
return GCXS(
arg, shape=shape, compressed_axes=compressed_axes, fill_value=x.fill_value
)

Expand Down
32 changes: 29 additions & 3 deletions sparse/_coo/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,31 @@ def dtype(self):
"""
return self.data.dtype

@property
def format(self):
"""
The storage format of this array.

Returns
-------
str
The storage format of this array.

See Also
-------
DOK.format : Equivalent :obj:`DOK` array property.
GCXS.format : Equivalent :obj:`GCXS` array property.
scipy.sparse.coo_matrix.format : The Scipy equivalent property.

Examples
-------
>>> import sparse
>>> s = sparse.random((5,5), density=0.2)
>>> s.format
'coo'
"""
return "coo"

@property
def nnz(self):
"""
Expand Down Expand Up @@ -2239,10 +2264,11 @@ def asformat(self, format, compressed_axes=None):
NotImplementedError
If the format isn't supported.
"""
from .._compressed import GXCS
from .._compressed import GCXS

if format == "gcxs" or format is GCXS:
return GCXS.from_coo(self, compressed_axes=compressed_axes)

if format == "gxcs" or format is GXCS:
return GXCS.from_coo(self, compressed_axes=compressed_axes)
elif compressed_axes is not None:
raise ValueError(
"compressed_axes is not supported for {} format".format(format)
Expand Down
49 changes: 49 additions & 0 deletions sparse/_dok.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,55 @@ def nnz(self):
"""
return len(self.data)

@property
def format(self):
"""
The storage format of this array.

Returns
-------
str
The storage format of this array.

See Also
-------
COO.format : Equivalent :obj:`COO` array property.
GCXS.format : Equivalent :obj:`GCXS` array property.
scipy.sparse.dok_matrix.format : The Scipy equivalent property.

Examples
-------
>>> import sparse
>>> s = sparse.random((5,5), density=0.2, format='dok')
>>> s.format
'dok'
"""
return "dok"

@property
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will be highly inaccurate, I suggest you remove it, since it's a Python dict and not a NumPy one.

def nbytes(self):
"""
The number of bytes taken up by this object. Note that for small arrays,
this may undercount the number of bytes due to the large constant overhead.

Returns
-------
int
The approximate bytes of memory taken by this object.

See Also
--------
numpy.ndarray.nbytes : The equivalent Numpy property.

Examples
--------
>>> import sparse
>>> x = sparse.random((100,100),density=.1,format='dok')
>>> x.nbytes
8000
"""
return self.nnz * self.dtype.itemsize

def __getitem__(self, key):
key = normalize_index(key, self.shape)

Expand Down
28 changes: 27 additions & 1 deletion sparse/_sparse_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import numpy as np

from ._utils import _zero_of_dtype
from ._utils import _zero_of_dtype, html_table


class SparseArray:
Expand Down Expand Up @@ -78,6 +78,25 @@ def nnz(self):
True
"""

@property
@abstractmethod
def format(self):
"""
The storage format of this array.

Returns
-------
str
The storage format of this array.

Examples
-------
>>> import sparse
>>> s = sparse.random((5,5), density=0.2)
>>> s.format
'coo'
"""

@property
def ndim(self):
"""
Expand Down Expand Up @@ -160,6 +179,13 @@ def density(self):
"""
return self.nnz / self.size

def _repr_html_(self):
"""
Diagnostic report about this array.
Renders in Jupyter.
"""
return html_table(self)

@abstractmethod
def asformat(self, format):
"""
Expand Down
67 changes: 66 additions & 1 deletion sparse/_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import functools
from collections.abc import Iterable
from numbers import Integral
from functools import reduce

import operator
import numpy as np


Expand Down Expand Up @@ -142,7 +144,7 @@ def random(

nnz = int(elements * density)

if format != "gxcs" and compressed_axes is not None:
if format != "gcxs" and compressed_axes is not None:
raise ValueError(
"compressed_axes is not supported for {} format".format(format)
)
Expand Down Expand Up @@ -271,6 +273,69 @@ def equivalent(x, y):
return (x == y) | ((x != x) & (y != y))


# copied from zarr
# See https://github.com/zarr-developers/zarr-python/blob/master/zarr/util.py
def human_readable_size(size):
if size < 2 ** 10:
return "%s" % size
elif size < 2 ** 20:
return "%.1fK" % (size / float(2 ** 10))
elif size < 2 ** 30:
return "%.1fM" % (size / float(2 ** 20))
elif size < 2 ** 40:
return "%.1fG" % (size / float(2 ** 30))
elif size < 2 ** 50:
return "%.1fT" % (size / float(2 ** 40))
else:
return "%.1fP" % (size / float(2 ** 50))


def html_table(arr):
table = "<table>"
table += "<tbody>"
headings = ["Format", "Data Type", "Shape", "nnz", "Density", "Read-only", "Size"]
info = [
arr.format,
str(arr.dtype),
str(arr.shape),
str(arr.nnz),
str(arr.nnz / arr.size),
]

# read-only
if arr.format == "dok":
info.append(str(False))
else:
info.append(str(True))

if arr.nbytes > 2 ** 10:
info.append("%s (%s)" % (arr.nbytes, human_readable_size(arr.nbytes)))
else:
info.append(str(arr.nbytes))

headings.append("Storage ratio")
info.append(
"%.1f"
% (arr.nbytes / (reduce(operator.mul, arr.shape, 1) * arr.dtype.itemsize))
)

# compressed_axes
if arr.format == "gcxs":
headings.append("Compressed Axes")
info.append(str(arr.compressed_axes))

for h, i in zip(headings, info):
table += (
"<tr>"
'<th style="text-align: left">%s</th>'
'<td style="text-align: left">%s</td>'
"</tr>" % (h, i)
)
table += "</tbody>"
table += "</table>"
return table


def check_zero_fill_value(*args):
"""
Checks if all the arguments have zero fill-values.
Expand Down
Loading