Skip to content

Commit

Permalink
Prototype string and double converters
Browse files Browse the repository at this point in the history
  • Loading branch information
wesm committed Mar 7, 2016
1 parent fba2ab8 commit e6b7a4e
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 8 deletions.
9 changes: 5 additions & 4 deletions cpp/src/arrow/types/string.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,12 +136,13 @@ class StringBuilder : public ListBuilder {
}

Status Append(const std::string& value) {
RETURN_NOT_OK(ListBuilder::Append());
return byte_builder_->Append(reinterpret_cast<const uint8_t*>(value.c_str()),
value.size());
return Append(value.c_str(), value.size());
}

Status Append(const uint8_t* value, int32_t length);
Status Append(const char* value, int32_t length) {
RETURN_NOT_OK(ListBuilder::Append());
return byte_builder_->Append(reinterpret_cast<const uint8_t*>(value), length);
}
Status Append(const std::vector<std::string>& values,
uint8_t* null_bytes);

Expand Down
9 changes: 9 additions & 0 deletions python/arrow/array.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,14 @@ cdef class UInt64Array(NumericArray):
pass


cdef class FloatArray(NumericArray):
pass


cdef class DoubleArray(NumericArray):
pass


cdef class ListArray(Array):
pass

Expand All @@ -141,6 +149,7 @@ cdef dict _array_classes = {
LogicalType_NA: NullArray,
LogicalType_BOOL: BooleanArray,
LogicalType_INT64: Int64Array,
LogicalType_DOUBLE: DoubleArray,
LogicalType_LIST: ListArray,
LogicalType_STRING: StringArray,
}
Expand Down
12 changes: 10 additions & 2 deletions python/arrow/tests/test_convert_builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,18 @@ def test_garbage_collection(self):
assert arrow.total_allocated_bytes() == bytes_before

def test_double(self):
pass
data = [1.5, 1, None, 2.5, None, None]
arr = arrow.from_pylist(data)
assert len(arr) == 6
assert arr.null_count == 3
assert arr.type == arrow.double()

def test_string(self):
pass
data = ['foo', b'bar', None, 'arrow']
arr = arrow.from_pylist(data)
assert len(arr) == 4
assert arr.null_count == 1
assert arr.type == arrow.string()

def test_list_of_int(self):
data = [[1, 2, 3], [], None, [1, 2]]
Expand Down
41 changes: 39 additions & 2 deletions python/src/pyarrow/adapters/builtin.cc
Original file line number Diff line number Diff line change
Expand Up @@ -256,10 +256,10 @@ class BoolConverter : public TypedConverter<arrow::BooleanBuilder> {

class Int64Converter : public TypedConverter<arrow::Int64Builder> {
public:
Status AppendData(PyObject* obj, int64_t size) override {
Status AppendData(PyObject* seq, int64_t size) override {
int64_t val;
for (int64_t i = 0; i < size; ++i) {
OwnedRef item(PySequence_GetItem(obj, i));
OwnedRef item(PySequence_GetItem(seq, i));
if (item.obj() == Py_None) {
RETURN_ARROW_NOT_OK(typed_builder_->AppendNull());
} else {
Expand All @@ -275,13 +275,50 @@ class Int64Converter : public TypedConverter<arrow::Int64Builder> {
class DoubleConverter : public TypedConverter<arrow::DoubleBuilder> {
public:
Status AppendData(PyObject* seq, int64_t size) override {
int64_t val;
for (int64_t i = 0; i < size; ++i) {
OwnedRef item(PySequence_GetItem(seq, i));
if (item.obj() == Py_None) {
RETURN_ARROW_NOT_OK(typed_builder_->AppendNull());
} else {
val = PyFloat_AsDouble(item.obj());
RETURN_IF_PYERROR();
RETURN_ARROW_NOT_OK(typed_builder_->Append(val));
}
}
return Status::OK();
}
};

class StringConverter : public TypedConverter<arrow::StringBuilder> {
public:
Status AppendData(PyObject* seq, int64_t size) override {
PyObject* item;
PyObject* bytes_obj;
OwnedRef tmp;
const char* bytes;
int32_t length;
for (int64_t i = 0; i < size; ++i) {
item = PySequence_GetItem(seq, i);
OwnedRef holder(item);

if (item == Py_None) {
RETURN_ARROW_NOT_OK(typed_builder_->AppendNull());
continue;
} else if (PyUnicode_Check(item)) {
tmp.reset(PyUnicode_AsUTF8String(item));
RETURN_IF_PYERROR();
bytes_obj = tmp.obj();
} else if (PyBytes_Check(item)) {
bytes_obj = item;
} else {
return Status::TypeError("Non-string value encountered");
}
// No error checking
length = PyBytes_GET_SIZE(bytes_obj);
bytes = PyBytes_AS_STRING(bytes_obj);
RETURN_ARROW_NOT_OK(typed_builder_->Append(bytes, length));
}
return Status::OK();
}
};
Expand Down

0 comments on commit e6b7a4e

Please sign in to comment.