Skip to content

Commit

Permalink
implement bit unpacking
Browse files Browse the repository at this point in the history
  • Loading branch information
paleolimbot committed May 1, 2024
1 parent 4c8513c commit 846a189
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 0 deletions.
39 changes: 39 additions & 0 deletions python/src/nanoarrow/_lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1815,6 +1815,45 @@ cdef class CBufferView:
else:
return self._iter_dispatch(offset, length)

def unpack_bits_into(self, dest, offset=0, length=None):
if self._data_type != NANOARROW_TYPE_BOOL:
raise ValueError("Can't unpack non-boolean buffer")

if length is None:
length = self.n_elements

if offset < 0 or length < 0 or (offset + length) > self.n_elements:
raise IndexError(
f"offset {offset} and length {length} do not describe a valid slice "
f"of buffer with {self.n_elements} elements"
)

cdef Py_buffer buffer
PyObject_GetBuffer(dest, &buffer, PyBUF_WRITABLE | PyBUF_ANY_CONTIGUOUS)
if buffer.itemsize != 1:
PyBuffer_Release(&buffer)
raise ValueError("Destination buffer has itemsize != 1")

if buffer.len < length:
buffer_len = buffer.len
PyBuffer_Release(&buffer)
raise IndexError(
f"Can't unpack {length} elements into buffer of size {buffer_len}"
)

ArrowBitsUnpackInt8(self._ptr.data.as_uint8, offset, length, <int8_t*>buffer.buf)
PyBuffer_Release(&buffer)

def unpack_bits(self, offset=0, length=None):
if length is None:
length = self.n_elements

out = CBufferBuilder().set_data_type(NANOARROW_TYPE_UINT8)
out.reserve_bytes(length)
self.unpack_bits_into(out, offset, length)
out.advance(length)
return out.finish()

def _iter_bitmap(self, int64_t offset, int64_t length):
cdef uint8_t item
cdef int64_t i
Expand Down
42 changes: 42 additions & 0 deletions python/tests/test_c_buffer_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,44 @@ def test_buffer_view_bool_():
assert "10010000" in repr(view)


def test_buffer_view_bool_unpack():
from array import array

bool_array_view = na.c_array_view([1, 0, 0, 1], na.bool_())
view = bool_array_view.buffer(1)

# Check unpacking
unpacked_all = view.unpack_bits()
assert len(unpacked_all) == view.n_elements
assert unpacked_all.data_type == "uint8"
assert list(unpacked_all) == [1, 0, 0, 1, 0, 0, 0, 0]

unpacked_some = view.unpack_bits(1, 4)
assert len(unpacked_some) == 4
assert list(unpacked_some) == [0, 0, 1, 0]

# Check errors from requesting out-of-bounds slices
msg = "do not describe a valid slice"
with pytest.raises(IndexError, match=msg):
view.unpack_bits(-1, None)
with pytest.raises(IndexError, match=msg):
view.unpack_bits(0, -1)
with pytest.raises(IndexError, match=msg):
view.unpack_bits(0, 9)

# Check errors from an output buffer of insufficient length
out = bytearray()
msg = "Can't unpack 8 elements into buffer of size 0"
with pytest.raises(IndexError, match=msg):
view.unpack_bits_into(out)

# Check errors from an output buffer with the wrong data type
out = array("i", [0, 0, 0, 0])
msg = "Destination buffer has itemsize != 1"
with pytest.raises(ValueError, match=msg):
view.unpack_bits_into(out)


def test_buffer_view_non_bool():
array_view = na.c_array_view([1, 2, 3, 5], na.int32())
view = array_view.buffer(1)
Expand Down Expand Up @@ -99,5 +137,9 @@ def test_buffer_view_non_bool():
with pytest.raises(IndexError, match="do not describe a valid slice"):
view.elements(1, 4)

# Check that unpacking will error
with pytest.raises(ValueError, match="Can't unpack non-boolean buffer"):
view.unpack_bits()

# Check repr
assert "1 2 3 5" in repr(view)

0 comments on commit 846a189

Please sign in to comment.