Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add _repr_html_ #283

Merged
merged 40 commits into from
Sep 18, 2019
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
9ea4053
Update core.py
daletovar Apr 19, 2019
aa14159
Merge pull request #1 from daletovar/daletovar-patch-1
daletovar Apr 19, 2019
fa9ef22
Update test_coo.py
daletovar Apr 30, 2019
1683e00
Update core.py
daletovar Apr 30, 2019
c70d397
Update test_coo.py
daletovar Apr 30, 2019
de14873
Add JUnit directory to gitignore.
hameerabbasi Apr 30, 2019
fa80a62
Fix Flake8 issues.
hameerabbasi Apr 30, 2019
91e0367
Fix up tests and code.
hameerabbasi Apr 30, 2019
e980993
Add docs.
hameerabbasi Apr 30, 2019
c826e62
Resolved merge.
daletovar Aug 28, 2019
a1f948c
change gxcs with gcxs
daletovar Aug 28, 2019
81b1b03
Update __init__.py
daletovar Aug 31, 2019
cf085fb
add html_table
daletovar Aug 31, 2019
551629b
add _repr_html_
daletovar Aug 31, 2019
de82d5b
add self.nbytes property and self.format attribute
daletovar Aug 31, 2019
2a0fdba
fix empty indptr for 1d and add self.format
daletovar Aug 31, 2019
16c707e
add self.format
daletovar Aug 31, 2019
937e819
remove whitespace
daletovar Aug 31, 2019
80a958b
remove failed example
daletovar Aug 31, 2019
fedd68a
Update _utils.py
daletovar Sep 1, 2019
d7516d4
Update compressed.py
daletovar Sep 1, 2019
8827ac4
add format property
daletovar Sep 6, 2019
9a4fd74
add format property
daletovar Sep 6, 2019
667eb57
add format
daletovar Sep 6, 2019
53ccd0f
remove spurious properties, add docs
daletovar Sep 6, 2019
a41a054
Update sparse/_utils.py
daletovar Sep 6, 2019
0a441ee
update html table
daletovar Sep 6, 2019
9cfdb09
Update test_compressed.py
daletovar Sep 6, 2019
9f8ccf6
Merge branch 'master' into sparse_html
daletovar Sep 6, 2019
77eabe5
Update compressed.py
daletovar Sep 6, 2019
702ca45
formatting
daletovar Sep 6, 2019
3cdeabd
formatting
daletovar Sep 6, 2019
e7a5e1d
formatting
daletovar Sep 6, 2019
341ddba
Update test_compressed.py
daletovar Sep 6, 2019
3cff972
formatting
daletovar Sep 6, 2019
fb0757b
Update core.py
daletovar Sep 6, 2019
33df589
formatting
daletovar Sep 6, 2019
5f9d3ef
Merge branch 'master' of https://github.com/pydata/sparse
daletovar Sep 18, 2019
53c3b8e
added documentation
daletovar Sep 18, 2019
5c3fc1a
Fix up docs.
hameerabbasi Sep 18, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion sparse/_compressed/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from .compressed import GXCS
from .compressed import GCXS
9 changes: 5 additions & 4 deletions sparse/_compressed/compressed.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def _from_coo(x, compressed_axes=None):
if compressed_axes is not None:
raise ValueError('no axes to compress for 1d array')
return (
x.data, x.coords[0], []), x.shape, None, None, None, None, None, x.fill_value
x.data, x.coords[0], np.array([])), x.shape, None, None, None, None, None, x.fill_value
daletovar marked this conversation as resolved.
Show resolved Hide resolved

compressed_axes = normalize_axis(compressed_axes, x.ndim)
if compressed_axes is None:
Expand Down Expand Up @@ -65,7 +65,7 @@ def _from_coo(x, compressed_axes=None):
new_shape, axisptr, x.fill_value)


class GXCS(SparseArray, NDArrayOperatorsMixin):
class GCXS(SparseArray, NDArrayOperatorsMixin):

__array_priority__ = 12

Expand Down Expand Up @@ -111,6 +111,7 @@ def __init__(self, arg, shape=None, compressed_axes=None, fill_value=0):
else:
compressed_axes = compressed_shape = axis_order = reordered_shape = axisptr = None

self.format = 'gcxs'
self.data, self.indices, self.indptr = arg
self.shape = shape
self.compressed_shape = compressed_shape
Expand Down Expand Up @@ -172,7 +173,7 @@ def ndim(self):
return len(self.shape)

def __str__(self):
return '<GXCS: shape={}, dtype={}, nnz={}, fill_value={}, compressed_axes={}>'.format(
return '<GCXS: shape={}, dtype={}, nnz={}, fill_value={}, compressed_axes={}>'.format(
self.shape, self.dtype, self.nnz, self.fill_value, self.compressed_axes)

__repr__ = __str__
Expand Down Expand Up @@ -345,7 +346,7 @@ def reshape(self, shape, order='C', compressed_axes=None):

# there's likely a way to do this without decompressing to COO
coo = self.tocoo().reshape(shape)
return GXCS.from_coo(coo, compressed_axes)
return GCXS.from_coo(coo, compressed_axes)

def resize(self, *args, refcheck=True, compressed_axes=None):
"""
Expand Down
6 changes: 3 additions & 3 deletions sparse/_compressed/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ def getitem(x, key):


"""
from .compressed import GXCS
from .compressed import GCXS

if x.ndim == 1:
coo = x.tocoo()[key]
return GXCS.from_coo(coo)
return GCXS.from_coo(coo)

key = list(normalize_index(key, x.shape))

Expand Down Expand Up @@ -150,7 +150,7 @@ def getitem(x, key):
if len(shape) == 1:
compressed_axes = None

return GXCS(
return GCXS(
arg,
shape=shape,
compressed_axes=compressed_axes,
Expand Down
7 changes: 4 additions & 3 deletions sparse/_coo/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ def __init__(self, coords, data=None, shape=None, has_duplicates=True,

self.data = np.asarray(data)
self.coords = np.asarray(coords)
self.format = 'coo'

if self.coords.ndim == 1:
self.coords = self.coords[None, :]
Expand Down Expand Up @@ -2176,9 +2177,9 @@ def asformat(self, format, compressed_axes=None):
NotImplementedError
If the format isn't supported.
"""
from .._compressed import GXCS
if format == 'gxcs' or format is GXCS:
return GXCS.from_coo(self, compressed_axes=compressed_axes)
from .._compressed import GCXS
if format == 'gcxs' or format is GCXS:
return GCXS.from_coo(self, compressed_axes=compressed_axes)
elif compressed_axes is not None:
raise ValueError(
'compressed_axes is not supported for {} format'.format(format))
Expand Down
25 changes: 25 additions & 0 deletions sparse/_dok.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ class DOK(SparseArray):
def __init__(self, shape, data=None, dtype=None, fill_value=None):
from ._coo import COO
self.data = dict()
self.format = 'dok'

if isinstance(shape, COO):
ar = DOK.from_coo(shape)
Expand Down Expand Up @@ -238,6 +239,30 @@ def nnz(self):
"""
return len(self.data)

@property
def nbytes(self):
"""
The number of bytes taken up by this object. Note that for small arrays,
this may undercount the number of bytes due to the large constant overhead.

Returns
-------
int
The approximate bytes of memory taken by this object.

See Also
--------
numpy.ndarray.nbytes : The equivalent Numpy property.

Examples
--------
>>> import sparse
>>> x = sparse.random((100,100),density=.1,format='dok')
>>> x.nbytes
8000
"""
return self.nnz * self.dtype.itemsize

def __getitem__(self, key):
key = normalize_index(key, self.shape)

Expand Down
9 changes: 8 additions & 1 deletion sparse/_sparse_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import numpy as np

from ._utils import _zero_of_dtype
from ._utils import _zero_of_dtype, html_table


class SparseArray:
Expand Down Expand Up @@ -158,6 +158,13 @@ def density(self):
"""
return self.nnz / self.size

def _repr_html_(self):
"""
Diagnostic report about this array.
Renders in Jupyter.
"""
return html_table(self)

@abstractmethod
def asformat(self, format):
"""
Expand Down
66 changes: 65 additions & 1 deletion sparse/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def random(

nnz = int(elements * density)

if format != 'gxcs' and compressed_axes is not None:
if format != 'gcxs' and compressed_axes is not None:
raise ValueError(
'compressed_axes is not supported for {} format'.format(format))

Expand Down Expand Up @@ -266,6 +266,70 @@ def equivalent(x, y):
# lgtm [py/comparison-of-identical-expressions]
return (x == y) | ((x != x) & (y != y))

# copied from zarr
# See https://github.com/zarr-developers/zarr-python/blob/master/zarr/util.py

daletovar marked this conversation as resolved.
Show resolved Hide resolved

def human_readable_size(size):
if size < 2**10:
return '%s' % size
elif size < 2**20:
return '%.1fK' % (size / float(2**10))
elif size < 2**30:
return '%.1fM' % (size / float(2**20))
elif size < 2**40:
return '%.1fG' % (size / float(2**30))
elif size < 2**50:
return '%.1fT' % (size / float(2**40))
else:
return '%.1fP' % (size / float(2**50))


def html_table(arr):
table = '<table>'
table += '<tbody>'
headings = ['Format', 'Data Type', 'Shape',
'nnz', 'Density', 'Read-only', 'No. Bytes',
'No. Bytes as dense']
info = [arr.format, str(arr.dtype), str(arr.shape),
str(arr.nnz), str(arr.nnz/np.prod(arr.shape))]
daletovar marked this conversation as resolved.
Show resolved Hide resolved

# read-only
if arr.format == 'dok':
info.append(str(False))
else:
info.append(str(True))

if arr.nbytes > 2**10:
info.append('%s (%s)' % (arr.nbytes, human_readable_size(arr.nbytes)))
else:
info.append(str(arr.nbytes))

dense_bytes = np.prod(arr.shape) * arr.dtype.itemsize
if dense_bytes > 2**10:
info.append('%s (%s)' %
(dense_bytes, human_readable_size(dense_bytes)))
else:
info.append(dense_bytes)

headings.append('Storage ratio')
info.append('%.1f' % (arr.nbytes / dense_bytes))

# compressed_axes
if arr.format == 'gcxs':
headings.append('Compressed Axes')
info.append(str(arr.compressed_axes))

for h, i in zip(headings, info):
table += '<tr>' \
'<th style="text-align: left">%s</th>' \
'<td style="text-align: left">%s</td>' \
'</tr>' \
% (h, i)
table += '</tbody>'
table += '</table>'
return table


def check_zero_fill_value(*args):
"""
Expand Down
22 changes: 11 additions & 11 deletions sparse/tests/test_compressed.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np
import scipy

from sparse._compressed import GXCS
from sparse._compressed import GCXS
from sparse._utils import assert_eq


Expand All @@ -20,7 +20,7 @@
[(), ()],
])
def test_resize(a, b):
s = sparse.random(a, density=0.5, format='gxcs')
s = sparse.random(a, density=0.5, format='gcxs')
orig_size = s.size
x = s.todense()
x = np.resize(x, b)
Expand All @@ -44,20 +44,20 @@ def test_resize(a, b):
[(), ()],
])
def test_reshape(a, b):
s = sparse.random(a, density=0.5, format='gxcs')
s = sparse.random(a, density=0.5, format='gcxs')
x = s.todense()

assert_eq(x.reshape(b), s.reshape(b))


def test_reshape_same():
s = sparse.random((3, 5), density=0.5, format='gxcs')
s = sparse.random((3, 5), density=0.5, format='gcxs')

assert s.reshape(s.shape) is s


def test_to_scipy_sparse():
s = sparse.random((3, 5), density=0.5, format='gxcs', compressed_axes=(0,))
s = sparse.random((3, 5), density=0.5, format='gcxs', compressed_axes=(0,))
a = s.to_scipy_sparse()
b = scipy.sparse.csr_matrix(s.todense())

Expand All @@ -66,7 +66,7 @@ def test_to_scipy_sparse():

def test_tocoo():
coo = sparse.random((5, 6), density=.5)
b = GXCS.from_coo(coo)
b = GCXS.from_coo(coo)

assert_eq(b.tocoo(), coo)

Expand Down Expand Up @@ -126,7 +126,7 @@ def test_tocoo():
(slice(0, 5, -1),),
])
def test_slicing(index):
s = sparse.random((2, 3, 4), density=0.5, format='gxcs')
s = sparse.random((2, 3, 4), density=0.5, format='gcxs')
x = s.todense()

assert_eq(x[index], s[index])
Expand All @@ -145,7 +145,7 @@ def test_slicing(index):
(1, [2, 0, 1],),
])
def test_advanced_indexing(index):
s = sparse.random((2, 3, 4), density=0.5, format='gxcs')
s = sparse.random((2, 3, 4), density=0.5, format='gcxs')
x = s.todense()

assert_eq(x[index], s[index])
Expand All @@ -165,16 +165,16 @@ def test_advanced_indexing(index):
([[0, 1]],),
])
def test_slicing_errors(index):
s = sparse.random((2, 3, 4), density=0.5, format='gxcs')
s = sparse.random((2, 3, 4), density=0.5, format='gcxs')

with pytest.raises(IndexError):
s[index]


def test_change_compressed_axes():
coo = sparse.random((3, 4, 5), density=.5)
s = GXCS.from_coo(coo, compressed_axes=(0, 1))
b = GXCS.from_coo(coo, compressed_axes=(1, 2))
s = GCXS.from_coo(coo, compressed_axes=(0, 1))
b = GCXS.from_coo(coo, compressed_axes=(1, 2))

s.change_compressed_axes((1, 2))
daletovar marked this conversation as resolved.
Show resolved Hide resolved

Expand Down