From 234554164a29e84350d43ad986926cbd59a9da4d Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Mon, 7 Mar 2016 11:33:08 -0800 Subject: [PATCH] Test basic conversion of nested lists --- cpp/src/arrow/types/list-test.cc | 2 +- cpp/src/arrow/types/list.h | 4 ++- python/arrow/tests/test_convert_builtin.py | 14 ++++----- python/src/pyarrow/adapters/builtin.cc | 36 +++++++++++++++++----- 4 files changed, 39 insertions(+), 17 deletions(-) diff --git a/cpp/src/arrow/types/list-test.cc b/cpp/src/arrow/types/list-test.cc index 516008b7763c7..02991de2648e7 100644 --- a/cpp/src/arrow/types/list-test.cc +++ b/cpp/src/arrow/types/list-test.cc @@ -116,7 +116,7 @@ TEST_F(TestListBuilder, TestBasics) { vector lengths = {3, 0, 4}; vector is_null = {0, 1, 0}; - Int32Builder* vb = static_cast(builder_->value_builder()); + Int32Builder* vb = static_cast(builder_->value_builder().get()); int pos = 0; for (size_t i = 0; i < lengths.size(); ++i) { diff --git a/cpp/src/arrow/types/list.h b/cpp/src/arrow/types/list.h index cdd1e5a0b1cc9..f40a8245362b1 100644 --- a/cpp/src/arrow/types/list.h +++ b/cpp/src/arrow/types/list.h @@ -179,7 +179,9 @@ class ListBuilder : public Int32Builder { return Append(true); } - ArrayBuilder* value_builder() const { return value_builder_.get();} + const std::shared_ptr& value_builder() const { + return value_builder_; + } protected: std::shared_ptr value_builder_; diff --git a/python/arrow/tests/test_convert_builtin.py b/python/arrow/tests/test_convert_builtin.py index d651fbe357aa8..57e6ab9f0e7b5 100644 --- a/python/arrow/tests/test_convert_builtin.py +++ b/python/arrow/tests/test_convert_builtin.py @@ -63,13 +63,6 @@ def test_string(self): assert arr.null_count == 1 assert arr.type == arrow.string() - def test_list_of_int(self): - data = [[1, 2, 3], [], None, [1, 2]] - arr = arrow.from_pylist(data) - # assert len(arr) == 4 - # assert arr.null_count == 1 - assert arr.type == arrow.list_(arrow.int64()) - def test_mixed_nesting_levels(self): arrow.from_pylist([1, 2, None]) arrow.from_pylist([[1], [2], None]) @@ -83,3 +76,10 @@ def test_mixed_nesting_levels(self): with self.assertRaises(arrow.ArrowException): arrow.from_pylist([[1], [2], [None, [1]]]) + + def test_list_of_int(self): + data = [[1, 2, 3], [], None, [1, 2]] + arr = arrow.from_pylist(data) + assert len(arr) == 4 + assert arr.null_count == 1 + assert arr.type == arrow.list_(arrow.int64()) diff --git a/python/src/pyarrow/adapters/builtin.cc b/python/src/pyarrow/adapters/builtin.cc index 0b689113ad49a..ae84fa12b0de6 100644 --- a/python/src/pyarrow/adapters/builtin.cc +++ b/python/src/pyarrow/adapters/builtin.cc @@ -228,7 +228,7 @@ class SeqConverter { return Status::OK(); } - virtual Status AppendData(PyObject* seq, int64_t size) = 0; + virtual Status AppendData(PyObject* seq) = 0; protected: std::shared_ptr builder_; @@ -249,15 +249,16 @@ class TypedConverter : public SeqConverter { class BoolConverter : public TypedConverter { public: - Status AppendData(PyObject* seq, int64_t size) override { + Status AppendData(PyObject* seq) override { return Status::OK(); } }; class Int64Converter : public TypedConverter { public: - Status AppendData(PyObject* seq, int64_t size) override { + Status AppendData(PyObject* seq) override { int64_t val; + Py_ssize_t size = PySequence_Size(seq); for (int64_t i = 0; i < size; ++i) { OwnedRef item(PySequence_GetItem(seq, i)); if (item.obj() == Py_None) { @@ -274,8 +275,9 @@ class Int64Converter : public TypedConverter { class DoubleConverter : public TypedConverter { public: - Status AppendData(PyObject* seq, int64_t size) override { + Status AppendData(PyObject* seq) override { int64_t val; + Py_ssize_t size = PySequence_Size(seq); for (int64_t i = 0; i < size; ++i) { OwnedRef item(PySequence_GetItem(seq, i)); if (item.obj() == Py_None) { @@ -292,12 +294,13 @@ class DoubleConverter : public TypedConverter { class StringConverter : public TypedConverter { public: - Status AppendData(PyObject* seq, int64_t size) override { + Status AppendData(PyObject* seq) override { PyObject* item; PyObject* bytes_obj; OwnedRef tmp; const char* bytes; int32_t length; + Py_ssize_t size = PySequence_Size(seq); for (int64_t i = 0; i < size; ++i) { item = PySequence_GetItem(seq, i); OwnedRef holder(item); @@ -327,7 +330,17 @@ class ListConverter : public TypedConverter { public: Status Init(const std::shared_ptr& builder) override; - Status AppendData(PyObject* seq, int64_t size) override { + Status AppendData(PyObject* seq) override { + Py_ssize_t size = PySequence_Size(seq); + for (int64_t i = 0; i < size; ++i) { + OwnedRef item(PySequence_GetItem(seq, i)); + if (item.obj() == Py_None) { + RETURN_ARROW_NOT_OK(typed_builder_->AppendNull()); + } else { + typed_builder_->Append(); + PY_RETURN_NOT_OK(value_converter_->AppendData(item.obj())); + } + } return Status::OK(); } protected: @@ -357,7 +370,14 @@ std::shared_ptr GetConverter(const std::shared_ptr& type Status ListConverter::Init(const std::shared_ptr& builder) { builder_ = builder; typed_builder_ = static_cast(builder.get()); - value_converter_ = GetConverter(builder->type()); + + value_converter_ = GetConverter(static_cast( + builder->type().get())->value_type); + if (value_converter_ == nullptr) { + return Status::NotImplemented("value type not implemented"); + } + + value_converter_->Init(typed_builder_->value_builder()); return Status::OK(); } @@ -385,7 +405,7 @@ Status ConvertPySequence(PyObject* obj, std::shared_ptr* out) { RETURN_ARROW_NOT_OK(arrow::MakeBuilder(GetMemoryPool(), type, &builder)); converter->Init(builder); - PY_RETURN_NOT_OK(converter->AppendData(obj, size)); + PY_RETURN_NOT_OK(converter->AppendData(obj)); *out = builder->Finish();