Skip to content

Commit

Permalink
Test basic conversion of nested lists
Browse files Browse the repository at this point in the history
  • Loading branch information
wesm committed Mar 7, 2016
1 parent e6b7a4e commit 31c0ec3
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 17 deletions.
2 changes: 1 addition & 1 deletion cpp/src/arrow/types/list-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ TEST_F(TestListBuilder, TestBasics) {
vector<int> lengths = {3, 0, 4};
vector<uint8_t> is_null = {0, 1, 0};

Int32Builder* vb = static_cast<Int32Builder*>(builder_->value_builder());
Int32Builder* vb = static_cast<Int32Builder*>(builder_->value_builder().get());

int pos = 0;
for (size_t i = 0; i < lengths.size(); ++i) {
Expand Down
4 changes: 3 additions & 1 deletion cpp/src/arrow/types/list.h
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,9 @@ class ListBuilder : public Int32Builder {
return Append(true);
}

ArrayBuilder* value_builder() const { return value_builder_.get();}
const std::shared_ptr<ArrayBuilder>& value_builder() const {
return value_builder_;
}

protected:
std::shared_ptr<ArrayBuilder> value_builder_;
Expand Down
14 changes: 7 additions & 7 deletions python/arrow/tests/test_convert_builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,6 @@ def test_string(self):
assert arr.null_count == 1
assert arr.type == arrow.string()

def test_list_of_int(self):
data = [[1, 2, 3], [], None, [1, 2]]
arr = arrow.from_pylist(data)
# assert len(arr) == 4
# assert arr.null_count == 1
assert arr.type == arrow.list_(arrow.int64())

def test_mixed_nesting_levels(self):
arrow.from_pylist([1, 2, None])
arrow.from_pylist([[1], [2], None])
Expand All @@ -83,3 +76,10 @@ def test_mixed_nesting_levels(self):

with self.assertRaises(arrow.ArrowException):
arrow.from_pylist([[1], [2], [None, [1]]])

def test_list_of_int(self):
data = [[1, 2, 3], [], None, [1, 2]]
arr = arrow.from_pylist(data)
assert len(arr) == 4
assert arr.null_count == 1
assert arr.type == arrow.list_(arrow.int64())
36 changes: 28 additions & 8 deletions python/src/pyarrow/adapters/builtin.cc
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ class SeqConverter {
return Status::OK();
}

virtual Status AppendData(PyObject* seq, int64_t size) = 0;
virtual Status AppendData(PyObject* seq) = 0;

protected:
std::shared_ptr<ArrayBuilder> builder_;
Expand All @@ -249,15 +249,16 @@ class TypedConverter : public SeqConverter {

class BoolConverter : public TypedConverter<arrow::BooleanBuilder> {
public:
Status AppendData(PyObject* seq, int64_t size) override {
Status AppendData(PyObject* seq) override {
return Status::OK();
}
};

class Int64Converter : public TypedConverter<arrow::Int64Builder> {
public:
Status AppendData(PyObject* seq, int64_t size) override {
Status AppendData(PyObject* seq) override {
int64_t val;
Py_ssize_t size = PySequence_Size(seq);
for (int64_t i = 0; i < size; ++i) {
OwnedRef item(PySequence_GetItem(seq, i));
if (item.obj() == Py_None) {
Expand All @@ -274,8 +275,9 @@ class Int64Converter : public TypedConverter<arrow::Int64Builder> {

class DoubleConverter : public TypedConverter<arrow::DoubleBuilder> {
public:
Status AppendData(PyObject* seq, int64_t size) override {
Status AppendData(PyObject* seq) override {
int64_t val;
Py_ssize_t size = PySequence_Size(seq);
for (int64_t i = 0; i < size; ++i) {
OwnedRef item(PySequence_GetItem(seq, i));
if (item.obj() == Py_None) {
Expand All @@ -292,12 +294,13 @@ class DoubleConverter : public TypedConverter<arrow::DoubleBuilder> {

class StringConverter : public TypedConverter<arrow::StringBuilder> {
public:
Status AppendData(PyObject* seq, int64_t size) override {
Status AppendData(PyObject* seq) override {
PyObject* item;
PyObject* bytes_obj;
OwnedRef tmp;
const char* bytes;
int32_t length;
Py_ssize_t size = PySequence_Size(seq);
for (int64_t i = 0; i < size; ++i) {
item = PySequence_GetItem(seq, i);
OwnedRef holder(item);
Expand Down Expand Up @@ -327,7 +330,17 @@ class ListConverter : public TypedConverter<arrow::ListBuilder> {
public:
Status Init(const std::shared_ptr<ArrayBuilder>& builder) override;

Status AppendData(PyObject* seq, int64_t size) override {
Status AppendData(PyObject* seq) override {
Py_ssize_t size = PySequence_Size(seq);
for (int64_t i = 0; i < size; ++i) {
OwnedRef item(PySequence_GetItem(seq, i));
if (item.obj() == Py_None) {
RETURN_ARROW_NOT_OK(typed_builder_->AppendNull());
} else {
typed_builder_->Append();
PY_RETURN_NOT_OK(value_converter_->AppendData(item.obj()));
}
}
return Status::OK();
}
protected:
Expand Down Expand Up @@ -357,7 +370,14 @@ std::shared_ptr<SeqConverter> GetConverter(const std::shared_ptr<DataType>& type
Status ListConverter::Init(const std::shared_ptr<ArrayBuilder>& builder) {
builder_ = builder;
typed_builder_ = static_cast<arrow::ListBuilder*>(builder.get());
value_converter_ = GetConverter(builder->type());

value_converter_ = GetConverter(static_cast<arrow::ListType*>(
builder->type().get())->value_type);
if (value_converter_ == nullptr) {
return Status::NotImplemented("value type not implemented");
}

value_converter_->Init(typed_builder_->value_builder());
return Status::OK();
}

Expand Down Expand Up @@ -385,7 +405,7 @@ Status ConvertPySequence(PyObject* obj, std::shared_ptr<arrow::Array>* out) {
RETURN_ARROW_NOT_OK(arrow::MakeBuilder(GetMemoryPool(), type, &builder));
converter->Init(builder);

PY_RETURN_NOT_OK(converter->AppendData(obj, size));
PY_RETURN_NOT_OK(converter->AppendData(obj));

*out = builder->Finish();

Expand Down

0 comments on commit 31c0ec3

Please sign in to comment.