diff --git a/python/arrow/array.pxd b/python/arrow/array.pxd index 2274f55262eea..1abb4fe5855fa 100644 --- a/python/arrow/array.pxd +++ b/python/arrow/array.pxd @@ -18,10 +18,15 @@ from arrow.includes.common cimport shared_ptr from arrow.includes.arrow cimport CArray, LogicalType +from arrow.schema cimport DataType + cdef class Array: cdef: shared_ptr[CArray] sp_array + cdef readonly: + DataType type + cdef init(self, const shared_ptr[CArray]& sp_array) diff --git a/python/arrow/array.pyx b/python/arrow/array.pyx index 2845c9bdec1b2..3f1efe79be164 100644 --- a/python/arrow/array.pyx +++ b/python/arrow/array.pyx @@ -29,6 +29,8 @@ cdef class Array: cdef init(self, const shared_ptr[CArray]& sp_array): self.sp_array = sp_array + self.type = DataType() + self.type.init(self.sp_array.get().type()) property null_count: @@ -36,7 +38,7 @@ cdef class Array: return self.sp_array.get().null_count() def __len__(self): - return self.array.length() + return self.sp_array.get().length() cdef class NullArray(Array): diff --git a/python/arrow/includes/arrow.pxd b/python/arrow/includes/arrow.pxd index a1a8c25467a24..a67c3bf5e0abc 100644 --- a/python/arrow/includes/arrow.pxd +++ b/python/arrow/includes/arrow.pxd @@ -47,6 +47,8 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: LogicalType type c_bool nullable + c_bool Equals(const CDataType* other) + c_string ToString() cdef cppclass CListType" arrow::ListType"(CDataType): diff --git a/python/arrow/schema.pyx b/python/arrow/schema.pyx index d22c3937efc13..63cd6e888abd0 100644 --- a/python/arrow/schema.pyx +++ b/python/arrow/schema.pyx @@ -26,6 +26,8 @@ from arrow.compat import frombytes, tobytes from arrow.includes.arrow cimport * cimport arrow.includes.pyarrow as pyarrow +cimport cpython + cdef class DataType: def __cinit__(self): @@ -41,6 +43,15 @@ cdef class DataType: def __repr__(self): return 'DataType({0})'.format(str(self)) + def __richcmp__(DataType self, DataType other, int op): + if op == cpython.Py_EQ: + return self.type.Equals(other.type) + elif op == cpython.Py_NE: + return not self.type.Equals(other.type) + else: + raise TypeError('Invalid comparison') + + cdef class Field: def __cinit__(self, object name, DataType type): diff --git a/python/arrow/tests/test_convert_builtin.py b/python/arrow/tests/test_convert_builtin.py index 68875d5b39ddb..1a926acf24d6c 100644 --- a/python/arrow/tests/test_convert_builtin.py +++ b/python/arrow/tests/test_convert_builtin.py @@ -27,11 +27,19 @@ def test_boolean(self): def test_empty_list(self): arr = arrow.from_list([]) assert len(arr) == 0 + assert arr.null_count == 0 + assert arr.type == arrow.null() + + def test_all_none(self): + arr = arrow.from_list([None, None]) + assert len(arr) == 2 + assert arr.null_count == 2 assert arr.type == arrow.null() def test_integer(self): - arr = arrow.from_list([1, 2, 3]) - assert len(arr) == 3 + arr = arrow.from_list([1, None, 3, None]) + assert len(arr) == 4 + assert arr.null_count == 2 assert arr.type == arrow.int64() def test_double(self): diff --git a/python/setup.py b/python/setup.py index d7338a97d9d23..2da242970517c 100644 --- a/python/setup.py +++ b/python/setup.py @@ -124,7 +124,10 @@ def _run_cmake(self): static_lib_option, source] self.spawn(cmake_command) - self.spawn(['make']) + args = ['make'] + if 'PYARROW_PARALLEL' in os.environ: + args.append('-j{0}'.format(os.environ['PYARROW_PARALLEL'])) + self.spawn(args) else: import shlex cmake_generator = 'Visual Studio 14 2015' diff --git a/python/src/pyarrow/adapters/builtin.cc b/python/src/pyarrow/adapters/builtin.cc index e8429fd491219..d5560594f850d 100644 --- a/python/src/pyarrow/adapters/builtin.cc +++ b/python/src/pyarrow/adapters/builtin.cc @@ -123,7 +123,7 @@ static Status InferArrowType(PyObject* obj, int64_t* size, // TODO(wesm): inferring types for collections return Status::NotImplemented("No type inference for collections"); } else { - inferer.Visit(obj); + inferer.Visit(item); } } @@ -139,7 +139,7 @@ class SeqConverter { return Status::OK(); } - virtual Status AppendData(PyObject* seq) = 0; + virtual Status AppendData(PyObject* seq, int64_t size) = 0; protected: std::shared_ptr builder_; @@ -160,28 +160,39 @@ class TypedConverter : public SeqConverter { class BoolConverter : public TypedConverter { public: - Status AppendData(PyObject* obj) override { + Status AppendData(PyObject* seq, int64_t size) override { return Status::OK(); } }; class Int64Converter : public TypedConverter { public: - Status AppendData(PyObject* obj) override { + Status AppendData(PyObject* obj, int64_t size) override { + int64_t val; + for (int64_t i = 0; i < size; ++i) { + OwnedRef item(PySequence_GetItem(obj, i)); + if (item.obj() == Py_None) { + RETURN_ARROW_NOT_OK(typed_builder_->AppendNull()); + } else { + val = PyLong_AsLongLong(item.obj()); + RETURN_IF_PYERROR(); + RETURN_ARROW_NOT_OK(typed_builder_->Append(val)); + } + } return Status::OK(); } }; class DoubleConverter : public TypedConverter { public: - Status AppendData(PyObject* obj) override { + Status AppendData(PyObject* seq, int64_t size) override { return Status::OK(); } }; class StringConverter : public TypedConverter { public: - Status AppendData(PyObject* obj) override { + Status AppendData(PyObject* seq, int64_t size) override { return Status::OK(); } }; @@ -190,7 +201,7 @@ class ListConverter : public TypedConverter { public: Status Init(const std::shared_ptr& builder) override; - Status AppendData(PyObject* obj) override { + Status AppendData(PyObject* seq, int64_t size) override { return Status::OK(); } protected: @@ -231,7 +242,7 @@ Status ConvertPySequence(PyObject* obj, std::shared_ptr* out) { // Handle NA / NullType case if (type->type == LogicalType::NA) { - out->reset(new arrow::Array(type, size)); + out->reset(new arrow::Array(type, size, size)); return Status::OK(); } @@ -248,7 +259,7 @@ Status ConvertPySequence(PyObject* obj, std::shared_ptr* out) { RETURN_ARROW_NOT_OK(arrow::MakeBuilder(GetMemoryPool(), type, &builder)); converter->Init(builder); - RETURN_NOT_OK(converter->AppendData(obj)); + RETURN_NOT_OK(converter->AppendData(obj, size)); *out = builder->Finish(); diff --git a/python/src/pyarrow/common.h b/python/src/pyarrow/common.h index 7847912b68ce8..a43e4d28c899a 100644 --- a/python/src/pyarrow/common.h +++ b/python/src/pyarrow/common.h @@ -26,18 +26,6 @@ namespace pyarrow { #define PYARROW_IS_PY2 PY_MAJOR_VERSION < 2 -// TODO(wesm): We can just let errors pass through. To be explored later -#define RETURN_IF_PYERROR() \ - if (PyErr_Occurred()) { \ - PyObject *exc_type, *exc_value, *traceback; \ - PyErr_Fetch(&exc_type, &exc_value, &traceback); \ - std::string message(PyString_AsString(exc_value)); \ - Py_DECREF(exc_type); \ - Py_DECREF(exc_value); \ - Py_DECREF(traceback); \ - return Status::UnknownError(message); \ - } - #define RETURN_ARROW_NOT_OK(s) do { \ arrow::Status _s = (s); \ if (!_s.ok()) { \ @@ -47,6 +35,8 @@ namespace pyarrow { class OwnedRef { public: + OwnedRef() : obj_(nullptr) {} + OwnedRef(PyObject* obj) : obj_(obj) {} @@ -54,6 +44,13 @@ class OwnedRef { Py_XDECREF(obj_); } + void reset(PyObject* obj) { + if (obj_ != nullptr) { + Py_XDECREF(obj_); + } + obj_ = obj; + } + PyObject* obj() const{ return obj_; } @@ -62,6 +59,35 @@ class OwnedRef { PyObject* obj_; }; +struct PyObjectStringify { + OwnedRef tmp_obj; + const char* bytes; + + PyObjectStringify(PyObject* obj) { + PyObject* bytes_obj; + if (PyUnicode_Check(obj)) { + bytes_obj = PyUnicode_AsUTF8String(obj); + tmp_obj.reset(bytes_obj); + } else { + bytes_obj = obj; + } + bytes = PyBytes_AsString(bytes_obj); + } +}; + +// TODO(wesm): We can just let errors pass through. To be explored later +#define RETURN_IF_PYERROR() \ + if (PyErr_Occurred()) { \ + PyObject *exc_type, *exc_value, *traceback; \ + PyErr_Fetch(&exc_type, &exc_value, &traceback); \ + PyObjectStringify stringified(exc_value); \ + std::string message(stringified.bytes); \ + Py_DECREF(exc_type); \ + Py_DECREF(exc_value); \ + Py_DECREF(traceback); \ + return Status::UnknownError(message); \ + } + arrow::MemoryPool* GetMemoryPool(); } // namespace pyarrow