Skip to content

Commit

Permalink
Smoke test that array garbage collection deallocates memory
Browse files Browse the repository at this point in the history
  • Loading branch information
wesm committed Mar 7, 2016
1 parent 6d9c721 commit db34836
Show file tree
Hide file tree
Showing 11 changed files with 62 additions and 5 deletions.
3 changes: 3 additions & 0 deletions cpp/src/arrow/api.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,7 @@
#include "arrow/types/string.h"
#include "arrow/types/struct.h"

#include "arrow/util/memory-pool.h"
#include "arrow/util/status.h"

#endif // ARROW_API_H
2 changes: 1 addition & 1 deletion cpp/src/arrow/builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class Array;
class MemoryPool;
class PoolBuffer;

static constexpr int32_t MIN_BUILDER_CAPACITY = 1 << 8;
static constexpr int32_t MIN_BUILDER_CAPACITY = 1 << 5;

// Base class for all data array builders
class ArrayBuilder {
Expand Down
27 changes: 25 additions & 2 deletions cpp/src/arrow/types/primitive-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,29 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendNull) {
}
}

TYPED_TEST(TestPrimitiveBuilder, TestArrayDtorDealloc) {
DECL_T();

int size = 10000;

vector<T>& draws = this->draws_;
vector<uint8_t>& nulls = this->nulls_;

int64_t memory_before = this->pool_->bytes_allocated();

this->RandomData(size);

int i;
for (i = 0; i < size; ++i) {
ASSERT_OK(this->builder_->Append(draws[i], nulls[i] > 0));
}

do {
std::shared_ptr<Array> result = this->builder_->Finish();
} while (false);

ASSERT_EQ(memory_before, this->pool_->bytes_allocated());
}

TYPED_TEST(TestPrimitiveBuilder, TestAppendScalar) {
DECL_T();
Expand Down Expand Up @@ -332,11 +355,11 @@ TYPED_TEST(TestPrimitiveBuilder, TestResize) {
}

TYPED_TEST(TestPrimitiveBuilder, TestReserve) {
int n = 100;
ASSERT_OK(this->builder_->Reserve(n));
ASSERT_OK(this->builder_->Reserve(10));
ASSERT_EQ(0, this->builder_->length());
ASSERT_EQ(MIN_BUILDER_CAPACITY, this->builder_->capacity());

ASSERT_OK(this->builder_->Reserve(90));
ASSERT_OK(this->builder_->Advance(100));
ASSERT_OK(this->builder_->Reserve(MIN_BUILDER_CAPACITY));

Expand Down
2 changes: 2 additions & 0 deletions cpp/src/arrow/types/primitive.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ class PrimitiveArrayImpl : public PrimitiveArray {

PrimitiveArrayImpl() : PrimitiveArray() {}

virtual ~PrimitiveArrayImpl() {}

PrimitiveArrayImpl(int32_t length, const std::shared_ptr<Buffer>& data,
int32_t null_count = 0,
const std::shared_ptr<Buffer>& nulls = nullptr) {
Expand Down
8 changes: 8 additions & 0 deletions cpp/src/arrow/util/buffer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ Buffer::Buffer(const std::shared_ptr<Buffer>& parent, int64_t offset,
parent_ = parent;
}

Buffer::~Buffer() {}

std::shared_ptr<Buffer> MutableBuffer::GetImmutableView() {
return std::make_shared<Buffer>(this->get_shared_ptr(), 0, size());
}
Expand All @@ -43,6 +45,12 @@ PoolBuffer::PoolBuffer(MemoryPool* pool) :
pool_ = pool;
}

PoolBuffer::~PoolBuffer() {
if (mutable_data_ != nullptr) {
pool_->Free(mutable_data_, capacity_);
}
}

Status PoolBuffer::Reserve(int64_t new_capacity) {
if (!mutable_data_ || new_capacity > capacity_) {
uint8_t* new_data;
Expand Down
2 changes: 2 additions & 0 deletions cpp/src/arrow/util/buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class Buffer : public std::enable_shared_from_this<Buffer> {
Buffer(const uint8_t* data, int64_t size) :
data_(data),
size_(size) {}
virtual ~Buffer();

// An offset into data that is owned by another buffer, but we want to be
// able to retain a valid pointer to it even after other shared_ptr's to the
Expand Down Expand Up @@ -136,6 +137,7 @@ class ResizableBuffer : public MutableBuffer {
class PoolBuffer : public ResizableBuffer {
public:
explicit PoolBuffer(MemoryPool* pool = nullptr);
virtual ~PoolBuffer();

virtual Status Resize(int64_t new_size);
virtual Status Reserve(int64_t new_capacity);
Expand Down
2 changes: 1 addition & 1 deletion python/arrow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

# flake8: noqa

from arrow.array import Array, from_list
from arrow.array import Array, from_list, total_allocated_bytes
from arrow.schema import (null, bool_,
int8, int16, int32, int64,
uint8, uint16, uint32, uint64,
Expand Down
6 changes: 6 additions & 0 deletions python/arrow/array.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@ cimport arrow.includes.pyarrow as pyarrow
from arrow.compat import frombytes, tobytes
from arrow.error cimport check_status


def total_allocated_bytes():
cdef MemoryPool* pool = pyarrow.GetMemoryPool()
return pool.bytes_allocated()


cdef class Array:

cdef init(self, const shared_ptr[CArray]& sp_array):
Expand Down
3 changes: 3 additions & 0 deletions python/arrow/includes/arrow.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:

c_string ToString()

cdef cppclass MemoryPool" arrow::MemoryPool":
int64_t bytes_allocated()

cdef cppclass CListType" arrow::ListType"(CDataType):
CListType(const shared_ptr[CDataType]& value_type,
c_bool nullable)
Expand Down
5 changes: 4 additions & 1 deletion python/arrow/includes/pyarrow.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
# distutils: language = c++

from arrow.includes.common cimport *
from arrow.includes.arrow cimport CArray, CDataType, LogicalType
from arrow.includes.arrow cimport (CArray, CDataType, LogicalType,
MemoryPool)

cdef extern from "pyarrow/api.h" namespace "pyarrow" nogil:
# We can later add more of the common status factory methods as needed
Expand All @@ -40,3 +41,5 @@ cdef extern from "pyarrow/api.h" namespace "pyarrow" nogil:

shared_ptr[CDataType] GetPrimitiveType(LogicalType type, c_bool nullable)
Status ConvertPySequence(object obj, shared_ptr[CArray]* out)

MemoryPool* GetMemoryPool()
7 changes: 7 additions & 0 deletions python/arrow/tests/test_convert_builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,13 @@ def test_integer(self):
assert arr.null_count == 2
assert arr.type == arrow.int64()

def test_garbage_collection(self):
import gc
bytes_before = arrow.total_allocated_bytes()
arrow.from_list([1, None, 3, None])
gc.collect()
assert arrow.total_allocated_bytes() == bytes_before

def test_double(self):
pass

Expand Down

0 comments on commit db34836

Please sign in to comment.