diff --git a/python/src/nanoarrow/_lib.pyi b/python/src/nanoarrow/_lib.pyi index 31d2d0a6c..ed6cf32ff 100644 --- a/python/src/nanoarrow/_lib.pyi +++ b/python/src/nanoarrow/_lib.pyi @@ -236,7 +236,7 @@ class CBuffer: data_type_id: Incomplete element_size_bits: Incomplete format: Incomplete - item_size: Incomplete + itemsize: Incomplete n_elements: Incomplete size_bytes: Incomplete @classmethod @@ -264,6 +264,7 @@ class CBufferBuilder: __pyx_vtable__: ClassVar[PyCapsule] = ... capacity_bytes: Incomplete format: Incomplete + itemsize: Incomplete size_bytes: Incomplete @classmethod def __init__(cls, *args, **kwargs) -> None: @@ -310,8 +311,15 @@ class CBufferBuilder: This method returns the number of elements that were written. """ + def write_fill(self, *args, **kwargs): + """Write fill bytes to this buffer + + Appends the byte ``value`` to this buffer ``size_bytes`` times. + """ def __buffer__(self, *args, **kwargs): """Return a buffer object that exposes the underlying memory of the object.""" + def __len__(self) -> int: + """Return len(self).""" def __reduce__(self): ... def __release_buffer__(self, *args, **kwargs): """Release the buffer object that exposes the underlying memory of the object.""" @@ -323,7 +331,7 @@ class CBufferView: device: Incomplete element_size_bits: Incomplete format: Incomplete - item_size: Incomplete + itemsize: Incomplete n_elements: Incomplete size_bytes: Incomplete @classmethod diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx index 0b2fd7b2c..590406a0f 100644 --- a/python/src/nanoarrow/_lib.pyx +++ b/python/src/nanoarrow/_lib.pyx @@ -2394,6 +2394,15 @@ cdef class CBufferBuilder: self._buffer._ptr.size_bytes = new_size return self + def write_fill(self, uint8_t value, int64_t size_bytes): + """Write fill bytes to this buffer + + Appends the byte ``value`` to this buffer ``size_bytes`` times. + """ + self._assert_unlocked() + cdef int code = ArrowBufferAppendFill(self._buffer._ptr, value, size_bytes) + Error.raise_error_not_ok("ArrowBufferAppendFill", code) + def write(self, content): """Write bytes to this buffer diff --git a/python/src/nanoarrow/visitor.py b/python/src/nanoarrow/visitor.py index 0b0d76d7b..7adaf4710 100644 --- a/python/src/nanoarrow/visitor.py +++ b/python/src/nanoarrow/visitor.py @@ -339,13 +339,13 @@ def begin(self, total_elements: Union[int, None]): self._builder.reserve_bytes(total_elements * element_size_bytes) def visit_chunk_view(self, array_view: CArrayView) -> None: - converter = self._builder + builder = self._builder offset, length = array_view.offset, array_view.length - dst_bytes = length * converter.itemsize + dst_bytes = length * builder.itemsize - converter.reserve_bytes(dst_bytes) - array_view.buffer(1).copy_into(converter, offset, length, len(converter)) - converter.advance(dst_bytes) + builder.reserve_bytes(dst_bytes) + array_view.buffer(1).copy_into(builder, offset, length, len(builder)) + builder.advance(dst_bytes) def finish(self) -> Any: return self._builder.finish() @@ -355,16 +355,15 @@ class ToBooleanBufferConverter(ArrayViewVisitor): def begin(self, total_elements: Union[int, None]): self._builder = CBufferBuilder() self._builder.set_format("?") - if total_elements is not None: self._builder.reserve_bytes(total_elements) def visit_chunk_view(self, array_view: CArrayView) -> None: - converter = self._builder + builder = self._builder offset, length = array_view.offset, array_view.length - converter.reserve_bytes(length) - array_view.buffer(1).unpack_bits_into(converter, offset, length, len(converter)) - converter.advance(length) + builder.reserve_bytes(length) + array_view.buffer(1).unpack_bits_into(builder, offset, length, len(builder)) + builder.advance(length) def finish(self) -> Any: return self._builder.finish() @@ -404,7 +403,7 @@ def visit_chunk_view(self, array_view: CArrayView) -> None: if chunk_contains_nulls: current_length = self._length if not bitmap_allocated: - self._fill_valid(current_length) + builder.write_fill(1, current_length) builder.reserve_bytes(length) array_view.buffer(0).unpack_bits_into( @@ -413,7 +412,7 @@ def visit_chunk_view(self, array_view: CArrayView) -> None: builder.advance(length) elif bitmap_allocated: - self._fill_valid(length) + builder.write_fill(1, length) self._length += length self._converter.visit_chunk_view(array_view) @@ -423,13 +422,6 @@ def finish(self) -> Any: data = self._converter.finish() return self._handle_nulls(is_valid if len(is_valid) > 0 else None, data) - def _fill_valid(self, length): - builder = self._builder - builder.reserve_bytes(length) - out_start = len(builder) - memoryview(builder)[out_start : out_start + length] = b"\x01" * length - builder.advance(length) - def _resolve_converter_cls(schema, handle_nulls=None): schema_view = c_schema_view(schema) diff --git a/python/tests/test_c_buffer.py b/python/tests/test_c_buffer.py index c84d866ee..fea83dc4c 100644 --- a/python/tests/test_c_buffer.py +++ b/python/tests/test_c_buffer.py @@ -222,28 +222,22 @@ def test_c_buffer_builder(): def test_c_buffer_builder_buffer_protocol(): - import platform - builder = CBufferBuilder() builder.reserve_bytes(1) - mv = memoryview(builder) - assert len(mv) == 1 + with memoryview(builder) as mv: + assert len(mv) == 1 - with pytest.raises(BufferError, match="CBufferBuilder is locked"): - memoryview(builder) + with pytest.raises(BufferError, match="CBufferBuilder is locked"): + memoryview(builder) - with pytest.raises(BufferError, match="CBufferBuilder is locked"): - assert bytes(builder.finish()) == b"abcdefghij" + with pytest.raises(BufferError, match="CBufferBuilder is locked"): + assert bytes(builder.finish()) == b"abcdefghij" - # On at least some versions of PyPy the call to mv.release() does not seem - # to deterministically call the CBufferBuilder's __releasebuffer__(). - if platform.python_implementation() == "PyPy": - pytest.skip("CBufferBuilder buffer release is non-deterministic on PyPy") + mv[builder.size_bytes] = ord("k") - mv[builder.size_bytes] = ord("k") builder.advance(1) - mv.release() + assert bytes(builder.finish()) == b"k"