diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc index b967af28e4aec..033371d3d6719 100644 --- a/cpp/src/arrow/c/bridge.cc +++ b/cpp/src/arrow/c/bridge.cc @@ -478,6 +478,8 @@ struct SchemaExporter { return Status::OK(); } + Status Visit(const RunEndEncodedType& type) { return SetFormat("+r"); } + ExportedSchemaPrivateData export_; int64_t flags_ = 0; std::vector> additional_metadata_; @@ -1106,6 +1108,8 @@ struct SchemaImporter { return ProcessMap(); case 'u': return ProcessUnion(); + case 'r': + return ProcessREE(); } return f_parser_.Invalid(); } @@ -1280,6 +1284,22 @@ struct SchemaImporter { return Status::OK(); } + Status ProcessREE() { + RETURN_NOT_OK(f_parser_.CheckAtEnd()); + RETURN_NOT_OK(CheckNumChildren(2)); + ARROW_ASSIGN_OR_RAISE(auto run_ends_field, MakeChildField(0)); + ARROW_ASSIGN_OR_RAISE(auto values_field, MakeChildField(1)); + if (!is_run_end_type(run_ends_field->type()->id())) { + return Status::Invalid("Expected a valid run-end integer type, but struct has ", + run_ends_field->type()->ToString()); + } + if (values_field->type()->id() == Type::RUN_END_ENCODED) { + return Status::Invalid("ArrowArray struct contains a nested run-end encoded array"); + } + type_ = run_end_encoded(run_ends_field->type(), values_field->type()); + return Status::OK(); + } + Result> MakeChildField(int64_t child_id) { const auto& child = child_importers_[child_id]; if (child.c_struct_->name == nullptr) { @@ -1601,6 +1621,17 @@ struct ArrayImporter { return Status::OK(); } + Status Visit(const RunEndEncodedType& type) { + RETURN_NOT_OK(CheckNumChildren(2)); + RETURN_NOT_OK(CheckNumBuffers(0)); + RETURN_NOT_OK(AllocateArrayData()); + // Always have a null bitmap buffer as much of the code in arrow assumes + // the buffers vector to have at least one entry on every array format. + data_->buffers.emplace_back(nullptr); + data_->null_count = 0; + return Status::OK(); + } + Status ImportFixedSizePrimitive(const FixedWidthType& type) { RETURN_NOT_OK(CheckNoChildren()); RETURN_NOT_OK(CheckNumBuffers(2)); diff --git a/cpp/src/arrow/c/bridge_test.cc b/cpp/src/arrow/c/bridge_test.cc index 9727403163e58..bd0e498a9f332 100644 --- a/cpp/src/arrow/c/bridge_test.cc +++ b/cpp/src/arrow/c/bridge_test.cc @@ -43,6 +43,11 @@ #include "arrow/util/logging.h" #include "arrow/util/macros.h" +// TODO(GH-37221): Remove these ifdef checks when compute dependency is removed +#ifdef ARROW_COMPUTE +#include "arrow/compute/api_vector.h" +#endif + namespace arrow { using internal::ArrayExportGuard; @@ -443,6 +448,20 @@ TEST_F(TestSchemaExport, Union) { {ARROW_FLAG_NULLABLE}); } +#ifdef ARROW_COMPUTE +TEST_F(TestSchemaExport, RunEndEncoded) { + TestNested(run_end_encoded(int16(), uint8()), {"+r", "s", "C"}, + {"", "run_ends", "values"}, {ARROW_FLAG_NULLABLE, 0, ARROW_FLAG_NULLABLE}); + TestNested(run_end_encoded(int32(), float64()), {"+r", "i", "g"}, + {"", "run_ends", "values"}, {ARROW_FLAG_NULLABLE, 0, ARROW_FLAG_NULLABLE}); + TestNested(run_end_encoded(int64(), utf8()), {"+r", "l", "u"}, + {"", "run_ends", "values"}, {ARROW_FLAG_NULLABLE, 0, ARROW_FLAG_NULLABLE}); + TestNested(run_end_encoded(int32(), list(utf8())), {"+r", "i", "+l", "u"}, + {"", "run_ends", "values", "item"}, + {ARROW_FLAG_NULLABLE, 0, ARROW_FLAG_NULLABLE, ARROW_FLAG_NULLABLE}); +} +#endif + std::string GetIndexFormat(Type::type type_id) { switch (type_id) { case Type::UINT8: @@ -952,6 +971,36 @@ TEST_F(TestArrayExport, Union) { TestNested(type, data); } +#ifdef ARROW_COMPUTE +Result> REEFromJSON(const std::shared_ptr& ree_type, + const std::string& json) { + auto ree_type_ptr = checked_cast(ree_type.get()); + auto array = ArrayFromJSON(ree_type_ptr->value_type(), json); + ARROW_ASSIGN_OR_RAISE( + auto datum, + RunEndEncode(array, compute::RunEndEncodeOptions{ree_type_ptr->run_end_type()})); + return datum.make_array(); +} + +TEST_F(TestArrayExport, RunEndEncoded) { + auto factory = []() { + return REEFromJSON(run_end_encoded(int32(), int8()), + "[1, 2, 2, 3, null, null, null, 4]"); + }; + TestNested(factory); +} + +TEST_F(TestArrayExport, RunEndEncodedSliced) { + auto factory = []() -> Result> { + ARROW_ASSIGN_OR_RAISE(auto ree_array, + REEFromJSON(run_end_encoded(int32(), int8()), + "[1, 2, 2, 3, null, null, null, 4]")); + return ree_array->Slice(1, 5); + }; + TestNested(factory); +} +#endif + TEST_F(TestArrayExport, Dictionary) { { auto factory = []() { @@ -1269,6 +1318,17 @@ class TestDeviceArrayExport : public ::testing::Test { return [=]() { return ToDevice(mm, *ArrayFromJSON(type, json)->data()); }; } +#ifdef ARROW_COMPUTE + static std::function>()> JSONREEArrayFactory( + const std::shared_ptr& mm, std::shared_ptr type, + const char* json) { + return [=]() -> Result> { + ARROW_ASSIGN_OR_RAISE(auto result, REEFromJSON(type, json)); + return ToDevice(mm, *result->data()); + }; + } +#endif + template void TestWithArrayFactory(ArrayFactory&& factory, ExportCheckFunc&& check_func) { auto orig_bytes = pool_->bytes_allocated(); @@ -1465,6 +1525,17 @@ TEST_F(TestDeviceArrayExport, Union) { TestNested(mm, type, data); } +#ifdef ARROW_COMPUTE +TEST_F(TestDeviceArrayExport, RunEndEncoded) { + std::shared_ptr device = std::make_shared(1); + auto mm = device->default_memory_manager(); + + auto type = run_end_encoded(int32(), int32()); + const char* data = "[1, null, 2, 2, 4, 5]"; + TestNested(JSONREEArrayFactory(mm, type, data)); +} +#endif + TEST_F(TestDeviceArrayExport, Extension) { std::shared_ptr device = std::make_shared(1); auto mm = device->default_memory_manager(); @@ -1564,11 +1635,10 @@ class SchemaStructBuilder { // Create a new ArrowSchema struct with a stable C pointer struct ArrowSchema* AddChild() { - nested_structs_.emplace_back(); - struct ArrowSchema* result = &nested_structs_.back(); - memset(result, 0, sizeof(*result)); - result->release = NoOpSchemaRelease; - return result; + auto& result = nested_structs_.emplace_back(); + memset(&result, 0, sizeof(result)); + result.release = NoOpSchemaRelease; + return &result; } // Create a stable C pointer to the N last structs in nested_structs_ @@ -1620,6 +1690,17 @@ class SchemaStructBuilder { c->children = NLastChildren(c->n_children, c); } + void FillRunEndEncoded(struct ArrowSchema* c, const char* format, + const char* name = nullptr, int64_t flags = kDefaultFlags) { + c->flags = flags; + c->format = format; + c->name = name; + c->n_children = 2; + c->children = NLastChildren(2, c); + c->children[0]->name = "run_ends"; + c->children[1]->name = "values"; + } + void FillPrimitive(const char* format, const char* name = nullptr, int64_t flags = kDefaultFlags) { FillPrimitive(&c_struct_, format, name, flags); @@ -1637,6 +1718,11 @@ class SchemaStructBuilder { FillStructLike(&c_struct_, format, n_children, name, flags); } + void FillRunEndEncoded(const char* format, const char* name = nullptr, + int64_t flags = kDefaultFlags) { + FillRunEndEncoded(&c_struct_, format, name, flags); + } + struct ArrowSchema c_struct_; // Deque elements don't move when the deque is appended to, which allows taking // stable C pointers to them. @@ -1902,6 +1988,15 @@ TEST_F(TestSchemaImport, Map) { CheckImport(expected); } +#ifdef ARROW_COMPUTE +TEST_F(TestSchemaImport, RunEndEncoded) { + FillPrimitive(AddChild(), "s", "run_ends"); + FillPrimitive(AddChild(), "I", "values"); + FillRunEndEncoded("+r"); + CheckImport(run_end_encoded(int16(), uint32())); +} +#endif + TEST_F(TestSchemaImport, Dictionary) { FillPrimitive(AddChild(), "u"); FillPrimitive("c"); @@ -2021,6 +2116,33 @@ TEST_F(TestSchemaImport, UnionError) { CheckImportError(); } +TEST_F(TestSchemaImport, RunEndEncodedError) { + // Bad run-end type + FillPrimitive(AddChild(), "c", "run_ends"); + FillPrimitive(AddChild(), "u", "values"); + FillRunEndEncoded("+r"); + CheckImportError(); + + // REE of a REE also causes an error + ArrowSchema* run_ends = AddChild(); + ArrowSchema* values; + FillPrimitive(run_ends, "i", "run_ends"); + { + FillPrimitive(AddChild(), "i", "run_ends"); + FillPrimitive(AddChild(), "u", "values"); + values = AddChild(); + FillRunEndEncoded(values, "+r", "values"); + } + // Fill the top-level REE + ArrowSchema* children[2] = {run_ends, values}; + c_struct_.flags = kDefaultFlags; + c_struct_.format = "+r"; + c_struct_.name = ""; + c_struct_.n_children = 2; + c_struct_.children = children; + CheckImportError(); +} + TEST_F(TestSchemaImport, DictionaryError) { // Bad index type FillPrimitive(AddChild(), "c"); @@ -2178,6 +2300,10 @@ static const void* timestamp_buffers_no_nulls2[2] = {nullptr, timestamp_data_buf static const void* timestamp_buffers_no_nulls3[2] = {nullptr, timestamp_data_buffer3}; static const void* timestamp_buffers_no_nulls4[2] = {nullptr, timestamp_data_buffer4}; +static const uint16_t run_ends_data_buffer5[5] = {1, 2, 4, 7, 9}; +[[maybe_unused]] static const void* run_ends_buffers5[2] = {nullptr, + run_ends_data_buffer5}; + static const uint8_t string_data_buffer1[] = "foobarquuxxyzzy"; static const int32_t string_offsets_buffer1[] = {0, 3, 3, 6, 10, 15}; @@ -2354,6 +2480,20 @@ class TestArrayImport : public ::testing::Test { legacy); } + void FillRunEndEncoded(int64_t length, int64_t offset) { + FillRunEndEncoded(&c_struct_, length, offset); + } + + void FillRunEndEncoded(struct ArrowArray* c, int64_t length, int64_t offset) { + c->length = length; + c->null_count = 0; + c->offset = offset; + c->n_buffers = 0; + c->buffers = nullptr; + c->n_children = 2; + c->children = NLastChildren(2, c); + } + void CheckImport(const std::shared_ptr& expected) { ArrayReleaseCallback cb(&c_struct_); @@ -2704,6 +2844,51 @@ TEST_F(TestArrayImport, Struct) { CheckImport(expected); } +#ifdef ARROW_COMPUTE +TEST_F(TestArrayImport, RunEndEncoded) { + FillPrimitive(AddChild(), 5, 0, 0, run_ends_buffers5); + FillPrimitive(AddChild(), 5, 0, 0, primitive_buffers_no_nulls5); + FillRunEndEncoded(9, 0); + ASSERT_OK_AND_ASSIGN(auto expected, + REEFromJSON(run_end_encoded(int16(), float32()), + "[0.0, 1.5, -2.0, -2.0, 3.0, 3.0, 3.0, 4.0, 4.0]")); + ASSERT_OK(expected->ValidateFull()); + CheckImport(expected); +} + +TEST_F(TestArrayImport, RunEndEncodedWithOffset) { + auto ree_type = run_end_encoded(int16(), float32()); + // Offset in children + FillPrimitive(AddChild(), 3, 0, 2, run_ends_buffers5); + FillPrimitive(AddChild(), 3, 0, 2, primitive_buffers_no_nulls5); + FillRunEndEncoded(7, 0); + ASSERT_OK_AND_ASSIGN(auto expected, + REEFromJSON(ree_type, "[-2.0, -2.0, -2.0, -2.0, 3.0, 3.0, 3.0]")); + CheckImport(expected); + + // Ofsset in parent + FillPrimitive(AddChild(), 5, 0, 0, run_ends_buffers5); + FillPrimitive(AddChild(), 5, 0, 0, primitive_buffers_no_nulls5); + FillRunEndEncoded(5, 2); + ASSERT_OK_AND_ASSIGN(expected, REEFromJSON(ree_type, "[-2.0, -2.0, 3.0, 3.0, 3.0]")); + CheckImport(expected); + + // Length in parent that cuts last run + FillPrimitive(AddChild(), 5, 0, 0, run_ends_buffers5); + FillPrimitive(AddChild(), 5, 0, 0, primitive_buffers_no_nulls5); + FillRunEndEncoded(4, 2); + ASSERT_OK_AND_ASSIGN(expected, REEFromJSON(ree_type, "[-2.0, -2.0, 3.0, 3.0]")); + CheckImport(expected); + + // Offset in both children and parent + FillPrimitive(AddChild(), 3, 0, 2, run_ends_buffers5); + FillPrimitive(AddChild(), 3, 0, 2, primitive_buffers_no_nulls5); + FillRunEndEncoded(4, 2); + ASSERT_OK_AND_ASSIGN(expected, REEFromJSON(ree_type, "[-2.0, -2.0, 3.0, 3.0]")); + CheckImport(expected); +} +#endif + TEST_F(TestArrayImport, SparseUnion) { auto type = sparse_union({field("strs", utf8()), field("ints", int8())}, {43, 42}); auto expected = @@ -3209,6 +3394,13 @@ TEST_F(TestSchemaRoundtrip, Union) { TestWithTypeFactory([&]() { return dense_union({f1, f2}, type_codes); }); } +#ifdef ARROW_COMPUTE +TEST_F(TestSchemaRoundtrip, RunEndEncoded) { + TestWithTypeFactory([]() { return run_end_encoded(int16(), float32()); }); + TestWithTypeFactory([]() { return run_end_encoded(int32(), list(float32())); }); +} +#endif + TEST_F(TestSchemaRoundtrip, Dictionary) { for (auto index_ty : all_dictionary_index_types()) { TestWithTypeFactory([&]() { return dictionary(index_ty, utf8()); }); @@ -3500,6 +3692,34 @@ TEST_F(TestArrayRoundtrip, Union) { } } +#ifdef ARROW_COMPUTE +TEST_F(TestArrayRoundtrip, RunEndEncoded) { + { + auto factory = []() -> Result> { + ARROW_ASSIGN_OR_RAISE(auto ree_array, + REEFromJSON(run_end_encoded(int32(), int8()), + "[1, 2, 2, 3, null, null, null, 4]")); + return ree_array->Slice(1, 5); + }; + TestWithArrayFactory(factory); + } + { + auto factory = []() -> Result> { + ARROW_ASSIGN_OR_RAISE( + auto ree_array, + RunEndEncodedArray::Make( + run_end_encoded(int64(), list(utf8())), 8, + ArrayFromJSON(int64(), "[1, 3, 4, 7, 8]"), + ArrayFromJSON(list(utf8()), + R"([["abc", "def"], ["efg"], [], null, ["efg", "hij"]])"))); + RETURN_NOT_OK(ree_array->ValidateFull()); + return ree_array; + }; + TestWithArrayFactory(factory); + } +} +#endif + TEST_F(TestArrayRoundtrip, Dictionary) { { auto factory = []() { diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc index 21c47e17239cb..3d294a3fa8642 100644 --- a/cpp/src/arrow/type.cc +++ b/cpp/src/arrow/type.cc @@ -2658,7 +2658,7 @@ std::shared_ptr struct_( return std::make_shared(MakeFields(fields)); } -std::shared_ptr run_end_encoded(std::shared_ptr run_end_type, +std::shared_ptr run_end_encoded(std::shared_ptr run_end_type, std::shared_ptr value_type) { return std::make_shared(std::move(run_end_type), std::move(value_type)); diff --git a/docs/source/format/CDataInterface.rst b/docs/source/format/CDataInterface.rst index b55d64a2fa6c2..8f491470965b0 100644 --- a/docs/source/format/CDataInterface.rst +++ b/docs/source/format/CDataInterface.rst @@ -217,6 +217,8 @@ names and types of child fields are read from the child arrays. +------------------------+---------------------------------------------------+------------+ | ``+us:I,J,...`` | sparse union with type ids I,J... | | +------------------------+---------------------------------------------------+------------+ +| ``+r`` | run-end encoded | \(3) | ++------------------------+---------------------------------------------------+------------+ Notes: @@ -228,6 +230,11 @@ Notes: As specified in the Arrow columnar format, the map type has a single child type named ``entries``, itself a 2-child struct type of ``(key, value)``. +(3) + As specified in the Arrow columnar format, the run-end encoded type has two + children where the first is the (integral) ``run_ends`` and the second is the + ``values``. + Examples -------- @@ -245,6 +252,9 @@ Examples * A ``sparse_union`` with type ids ``4, 5`` has format string ``+us:4,5``; its two children have names ``ints`` and ``floats``, and format strings ``i`` and ``f`` respectively. +* A ``run_end_encoded`` has format string ``+r``; its two + children have names ``run_ends`` and ``values``, and format strings + ``i`` and ``f`` respectively. .. _c-data-interface-struct-defs: diff --git a/go/arrow/array/encoded.go b/go/arrow/array/encoded.go index 10e9b949c7c35..bf4a942cf1c35 100644 --- a/go/arrow/array/encoded.go +++ b/go/arrow/array/encoded.go @@ -424,7 +424,7 @@ func (b *RunEndEncodedBuilder) newData() (data *Data) { defer runEnds.Release() data = NewData( - b.dt, b.length, []*memory.Buffer{nil}, + b.dt, b.length, []*memory.Buffer{}, []arrow.ArrayData{runEnds.Data(), values.Data()}, 0, 0) b.reset() return diff --git a/go/arrow/cdata/cdata.go b/go/arrow/cdata/cdata.go index 97e1cb91ad7e0..bc8fc6e987b93 100644 --- a/go/arrow/cdata/cdata.go +++ b/go/arrow/cdata/cdata.go @@ -278,6 +278,11 @@ func importSchema(schema *CArrowSchema) (ret arrow.Field, err error) { dt = arrow.FixedSizeListOfField(int32(listSize), childFields[0]) case 's': // struct dt = arrow.StructOf(childFields...) + case 'r': // run-end encoded + if len(childFields) != 2 { + return ret, fmt.Errorf("%w: run-end encoded arrays must have 2 children", arrow.ErrInvalid) + } + dt = arrow.RunEndEncodedOf(childFields[0].Type, childFields[1].Type) case 'm': // map type is basically a list of structs. st := childFields[0].Type.(*arrow.StructType) dt = arrow.MapOf(st.Field(0).Type, st.Field(1).Type) @@ -379,6 +384,16 @@ func (imp *cimporter) doImportChildren() error { imp.children[i].dt = st.Field(i).Type imp.children[i].importChild(imp, c) } + case arrow.RUN_END_ENCODED: // import run-ends and values + st := imp.dt.(*arrow.RunEndEncodedType) + imp.children[0].dt = st.RunEnds() + if err := imp.children[0].importChild(imp, children[0]); err != nil { + return err + } + imp.children[1].dt = st.Encoded() + if err := imp.children[1].importChild(imp, children[1]); err != nil { + return err + } case arrow.MAP: // only one child to import, it's a struct array imp.children[0].dt = imp.dt.(*arrow.MapType).Elem() if err := imp.children[0].importChild(imp, children[0]); err != nil { @@ -491,6 +506,17 @@ func (imp *cimporter) doImport(src *CArrowArray) error { } imp.data = array.NewData(dt, int(imp.arr.length), []*memory.Buffer{nulls}, children, int(imp.arr.null_count), int(imp.arr.offset)) + case *arrow.RunEndEncodedType: + if err := imp.checkNumBuffers(0); err != nil { + return err + } + + if len(imp.children) != 2 { + return fmt.Errorf("%w: run-end encoded array should have 2 children", arrow.ErrInvalid) + } + + children := []arrow.ArrayData{imp.children[0].data, imp.children[1].data} + imp.data = array.NewData(dt, int(imp.arr.length), []*memory.Buffer{}, children, int(imp.arr.null_count), int(imp.arr.offset)) case *arrow.DenseUnionType: if err := imp.checkNoNulls(); err != nil { return err diff --git a/go/arrow/cdata/cdata_exports.go b/go/arrow/cdata/cdata_exports.go index 209bd237f6f53..ae6247494b100 100644 --- a/go/arrow/cdata/cdata_exports.go +++ b/go/arrow/cdata/cdata_exports.go @@ -232,6 +232,8 @@ func (exp *schemaExporter) exportFormat(dt arrow.DataType) string { return fmt.Sprintf("+w:%d", dt.Len()) case *arrow.StructType: return "+s" + case *arrow.RunEndEncodedType: + return "+r" case *arrow.MapType: if dt.KeysSorted { exp.flags |= C.ARROW_FLAG_MAP_KEYS_SORTED @@ -425,6 +427,14 @@ func exportArray(arr arrow.Array, out *CArrowArray, outSchema *CArrowSchema) { childPtrs[i] = &children[i] } out.children = (**CArrowArray)(unsafe.Pointer(&childPtrs[0])) + case *array.RunEndEncoded: + out.n_children = 2 + childPtrs := allocateArrowArrayPtrArr(2) + children := allocateArrowArrayArr(2) + exportArray(arr.RunEndsArr(), &children[0], nil) + exportArray(arr.Values(), &children[1], nil) + childPtrs[0], childPtrs[1] = &children[0], &children[1] + out.children = (**CArrowArray)(unsafe.Pointer(&childPtrs[0])) case *array.Dictionary: out.dictionary = (*CArrowArray)(C.malloc(C.sizeof_struct_ArrowArray)) exportArray(arr.Dictionary(), out.dictionary, nil) diff --git a/go/arrow/cdata/cdata_test.go b/go/arrow/cdata/cdata_test.go index 5b8082a4322d0..a0c2f25496a6b 100644 --- a/go/arrow/cdata/cdata_test.go +++ b/go/arrow/cdata/cdata_test.go @@ -24,6 +24,7 @@ package cdata import ( + "encoding/json" "errors" "fmt" "io" @@ -586,6 +587,18 @@ func createTestStructArr() arrow.Array { return bld.NewArray() } +func createTestRunEndsArr() arrow.Array { + bld := array.NewRunEndEncodedBuilder(memory.DefaultAllocator, + arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int8) + defer bld.Release() + + if err := json.Unmarshal([]byte(`[1, 2, 2, 3, null, null, null, 4]`), bld); err != nil { + panic(err) + } + + return bld.NewArray() +} + func createTestMapArr() arrow.Array { bld := array.NewMapBuilder(memory.DefaultAllocator, arrow.PrimitiveTypes.Int8, arrow.BinaryTypes.String, false) defer bld.Release() @@ -662,6 +675,7 @@ func TestNestedArrays(t *testing.T) { {"map", createTestMapArr}, {"sparse union", createTestSparseUnion}, {"dense union", createTestDenseUnion}, + {"run-end encoded", createTestRunEndsArr}, } for _, tt := range tests { diff --git a/go/arrow/cdata/cdata_test_framework.go b/go/arrow/cdata/cdata_test_framework.go index 5a9b696e296ca..7dd23926a607a 100644 --- a/go/arrow/cdata/cdata_test_framework.go +++ b/go/arrow/cdata/cdata_test_framework.go @@ -262,6 +262,11 @@ func createCArr(arr arrow.Array) *CArrowArray { clist := []*CArrowArray{createCArr(arr.ListValues())} children = (**CArrowArray)(unsafe.Pointer(&clist[0])) nchildren += 1 + case *array.RunEndEncoded: + clist := []*CArrowArray{createCArr(arr.RunEndsArr()), + createCArr(arr.Values())} + children = (**CArrowArray)(unsafe.Pointer(&clist[0])) + nchildren += 2 case array.Union: clist := []*CArrowArray{} for i := 0; i < arr.NumFields(); i++ { @@ -277,7 +282,13 @@ func createCArr(arr arrow.Array) *CArrowArray { carr.length = C.int64_t(arr.Len()) carr.null_count = C.int64_t(arr.NullN()) carr.offset = C.int64_t(arr.Data().Offset()) + carr.release = (*[0]byte)(C.release_test_arr) + buffers := arr.Data().Buffers() + if len(buffers) == 0 { + return carr + } + cbufs := allocateBufferPtrArr(len(buffers)) for i, b := range buffers { if b != nil { @@ -290,7 +301,6 @@ func createCArr(arr arrow.Array) *CArrowArray { if len(cbufs) > 0 { carr.buffers = (*unsafe.Pointer)(unsafe.Pointer(&cbufs[0])) } - carr.release = (*[0]byte)(C.release_test_arr) return carr } diff --git a/go/arrow/cdata/test/test_export_to_cgo.py b/go/arrow/cdata/test/test_export_to_cgo.py index e794a84bd43b8..4b669f6424437 100644 --- a/go/arrow/cdata/test/test_export_to_cgo.py +++ b/go/arrow/cdata/test/test_export_to_cgo.py @@ -182,28 +182,29 @@ def test_batch_roundtrip(self): pa.float32(), pa.float64(), pa.decimal128(19, 4), - # pa.string(), - # pa.binary(), - # pa.binary(10), - # pa.large_string(), - # pa.large_binary(), + pa.string(), + pa.binary(), + pa.binary(10), + pa.large_string(), + pa.large_binary(), pa.list_(pa.int32()), pa.list_(pa.int32(), 2), pa.large_list(pa.uint16()), pa.struct([ pa.field("a", pa.int32()), pa.field("b", pa.int8()), - # pa.field("c", pa.string()), + pa.field("c", pa.string()), ]), pa.struct([ pa.field("a", pa.int32(), nullable=False), pa.field("b", pa.int8(), nullable=False), - # pa.field("c", pa.string()), + pa.field("c", pa.string()), ]), pa.dictionary(pa.int8(), pa.int64()), - # pa.dictionary(pa.int8(), pa.string()), - # pa.map_(pa.string(), pa.int32()), + pa.dictionary(pa.int8(), pa.string()), + pa.map_(pa.string(), pa.int32()), pa.map_(pa.int64(), pa.int32()), + # pa.run_end_encoded(pa.int16(), pa.int64()), ] def test_empty_roundtrip(self):