Skip to content

Commit

Permalink
FixedSizeListArray.from_arrays supports mask parameter
Browse files Browse the repository at this point in the history
  • Loading branch information
LucasG0 committed Dec 29, 2023
1 parent 7c3480e commit 5b956cf
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 17 deletions.
15 changes: 6 additions & 9 deletions cpp/src/arrow/array/array_nested.cc
Original file line number Diff line number Diff line change
Expand Up @@ -865,7 +865,8 @@ FixedSizeListArray::FixedSizeListArray(const std::shared_ptr<DataType>& type,
int64_t length,
const std::shared_ptr<Array>& values,
const std::shared_ptr<Buffer>& null_bitmap,
int64_t null_count, int64_t offset) {
int64_t null_count,
int64_t offset) {
auto internal_data = ArrayData::Make(type, length, {null_bitmap}, null_count, offset);
internal_data->child_data.emplace_back(values->data());
SetData(internal_data);
Expand Down Expand Up @@ -894,7 +895,7 @@ const std::shared_ptr<DataType>& FixedSizeListArray::value_type() const {
const std::shared_ptr<Array>& FixedSizeListArray::values() const { return values_; }

Result<std::shared_ptr<Array>> FixedSizeListArray::FromArrays(
const std::shared_ptr<Array>& values, int32_t list_size) {
const std::shared_ptr<Array>& values, int32_t list_size, std::shared_ptr<Buffer> null_bitmap) {
if (list_size <= 0) {
return Status::Invalid("list_size needs to be a strict positive integer");
}
Expand All @@ -905,14 +906,12 @@ Result<std::shared_ptr<Array>> FixedSizeListArray::FromArrays(
}
int64_t length = values->length() / list_size;
auto list_type = std::make_shared<FixedSizeListType>(values->type(), list_size);
std::shared_ptr<Buffer> validity_buf;

return std::make_shared<FixedSizeListArray>(list_type, length, values, validity_buf,
/*null_count=*/0, /*offset=*/0);
return std::make_shared<FixedSizeListArray>(list_type, length, values, null_bitmap);
}

Result<std::shared_ptr<Array>> FixedSizeListArray::FromArrays(
const std::shared_ptr<Array>& values, std::shared_ptr<DataType> type) {
const std::shared_ptr<Array>& values, std::shared_ptr<DataType> type, std::shared_ptr<Buffer> null_bitmap) {
if (type->id() != Type::FIXED_SIZE_LIST) {
return Status::TypeError("Expected fixed size list type, got ", type->ToString());
}
Expand All @@ -926,10 +925,8 @@ Result<std::shared_ptr<Array>> FixedSizeListArray::FromArrays(
"The length of the values Array needs to be a multiple of the list size");
}
int64_t length = values->length() / list_type.list_size();
std::shared_ptr<Buffer> validity_buf;

return std::make_shared<FixedSizeListArray>(type, length, values, validity_buf,
/*null_count=*/0, /*offset=*/0);
return std::make_shared<FixedSizeListArray>(type, length, values, null_bitmap);
}

Result<std::shared_ptr<Array>> FixedSizeListArray::Flatten(
Expand Down
6 changes: 4 additions & 2 deletions cpp/src/arrow/array/array_nested.h
Original file line number Diff line number Diff line change
Expand Up @@ -601,15 +601,17 @@ class ARROW_EXPORT FixedSizeListArray : public Array {
/// \param[in] list_size The fixed length of each list
/// \return Will have length equal to values.length() / list_size
static Result<std::shared_ptr<Array>> FromArrays(const std::shared_ptr<Array>& values,
int32_t list_size);
int32_t list_size,
std::shared_ptr<Buffer> null_bitmap = NULLPTR);

/// \brief Construct FixedSizeListArray from child value array and type
///
/// \param[in] values Array containing list values
/// \param[in] type The fixed sized list type
/// \return Will have length equal to values.length() / type.list_size()
static Result<std::shared_ptr<Array>> FromArrays(const std::shared_ptr<Array>& values,
std::shared_ptr<DataType> type);
std::shared_ptr<DataType> type,
std::shared_ptr<Buffer> null_bitmap = NULLPTR);

protected:
void SetData(const std::shared_ptr<ArrayData>& data);
Expand Down
14 changes: 10 additions & 4 deletions python/pyarrow/array.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ from cpython.pycapsule cimport PyCapsule_CheckExact, PyCapsule_GetPointer, PyCap
import os
import warnings
from cython import sizeof
from libc.stdio cimport printf


cdef _sequence_to_array(object sequence, object mask, object size,
Expand Down Expand Up @@ -2484,7 +2485,7 @@ cdef class MapArray(ListArray):
Examples
--------
First, let's understand the structure of our dataset when viewed in a rectangular data model.
First, let's understand the structure of our dataset when viewed in a rectangular data model.
The total of 5 respondents answered the question "How much did you like the movie x?".
The value -1 in the integer array means that the value is missing. The boolean array
represents the null bitmask corresponding to the missing values in the integer array.
Expand Down Expand Up @@ -2590,7 +2591,7 @@ cdef class FixedSizeListArray(BaseListArray):
"""

@staticmethod
def from_arrays(values, list_size=None, DataType type=None):
def from_arrays(values, list_size=None, DataType type=None, mask=None):
"""
Construct FixedSizeListArray from array of values and a list length.
Expand All @@ -2602,6 +2603,9 @@ cdef class FixedSizeListArray(BaseListArray):
type : DataType, optional
If not specified, a default ListType with the values' type and
`list_size` length is used.
mask : Array (boolean type), optional
Indicate which values are null (True) or not null (False).
Returns
-------
Expand Down Expand Up @@ -2652,19 +2656,21 @@ cdef class FixedSizeListArray(BaseListArray):

_values = asarray(values)

c_mask = c_mask_inverted_from_obj(mask, None)

if type is not None:
if list_size is not None:
raise ValueError("Cannot specify both list_size and type")
with nogil:
c_result = CFixedSizeListArray.FromArraysAndType(
_values.sp_array, type.sp_type)
_values.sp_array, type.sp_type, c_mask)
else:
if list_size is None:
raise ValueError("Should specify one of list_size and type")
_list_size = <int32_t>list_size
with nogil:
c_result = CFixedSizeListArray.FromArrays(
_values.sp_array, _list_size)
_values.sp_array, _list_size, c_mask)
cdef Array result = pyarrow_wrap_array(GetResultValue(c_result))
result.validate()
return result
Expand Down
8 changes: 6 additions & 2 deletions python/pyarrow/includes/libarrow.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -673,11 +673,15 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
cdef cppclass CFixedSizeListArray" arrow::FixedSizeListArray"(CArray):
@staticmethod
CResult[shared_ptr[CArray]] FromArrays(
const shared_ptr[CArray]& values, int32_t list_size)
const shared_ptr[CArray]& values,
int32_t list_size,
shared_ptr[CBuffer] null_bitmap)

@staticmethod
CResult[shared_ptr[CArray]] FromArraysAndType" FromArrays"(
const shared_ptr[CArray]& values, shared_ptr[CDataType])
const shared_ptr[CArray]& values,
shared_ptr[CDataType],
shared_ptr[CBuffer] null_bitmap)

int64_t value_offset(int i)
int64_t value_length(int i)
Expand Down
10 changes: 10 additions & 0 deletions python/pyarrow/tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1091,6 +1091,16 @@ def test_fixed_size_list_from_arrays():
assert result.type.equals(typ)
assert result.type.value_field.name == "name"

result = pa.FixedSizeListArray.from_arrays(values,
type=typ,
mask=pa.array([False, True, False]))
assert result.to_pylist() == [[0, 1, 2, 3], None, [8, 9, 10, 11]]

result = pa.FixedSizeListArray.from_arrays(values,
list_size=4,
mask=pa.array([False, True, False]))
assert result.to_pylist() == [[0, 1, 2, 3], None, [8, 9, 10, 11]]

# raise on invalid values / list_size
with pytest.raises(ValueError):
pa.FixedSizeListArray.from_arrays(values, -4)
Expand Down

0 comments on commit 5b956cf

Please sign in to comment.