Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARROW-42: Add Python tests to Travis CI build #22

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ addons:
packages:
- gcc-4.9 # Needed for C++11
- g++-4.9 # Needed for C++11
- gdb
- gcov
- ccache
- cmake
- valgrind

Expand All @@ -17,11 +19,32 @@ matrix:
- compiler: gcc
language: cpp
os: linux
before_script:
- $TRAVIS_BUILD_DIR/ci/travis_before_script_cpp.sh
script:
- export CC="gcc-4.9"
- export CXX="g++-4.9"
- $TRAVIS_BUILD_DIR/ci/travis_script_cpp.sh
- $TRAVIS_BUILD_DIR/ci/travis_script_python.sh
- compiler: clang
language: cpp
os: osx
addons:
before_script:
- $TRAVIS_BUILD_DIR/ci/travis_before_script_cpp.sh
script:
- $TRAVIS_BUILD_DIR/ci/travis_script_cpp.sh
- $TRAVIS_BUILD_DIR/ci/travis_script_python.sh

before_install:
- ulimit -c unlimited -S
- export CPP_BUILD_DIR=$TRAVIS_BUILD_DIR/cpp-build
- export ARROW_CPP_INSTALL=$TRAVIS_BUILD_DIR/cpp-install
- export LD_LIBRARY_PATH=$ARROW_CPP_INSTALL/lib:$LD_LIBRARY_PATH

after_script:
- rm -rf $CPP_BUILD_DIR

after_failure:
- COREFILE=$(find . -maxdepth 2 -name "core*" | head -n 1)
- if [[ -f "$COREFILE" ]]; then gdb -c "$COREFILE" example -ex "thread apply all bt" -ex "set pagination 0" -batch; fi
26 changes: 26 additions & 0 deletions ci/travis_before_script_cpp.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/usr/bin/env bash

set -e

: ${CPP_BUILD_DIR=$TRAVIS_BUILD_DIR/cpp-build}

mkdir $CPP_BUILD_DIR
pushd $CPP_BUILD_DIR

CPP_DIR=$TRAVIS_BUILD_DIR/cpp

# Build an isolated thirdparty
cp -r $CPP_DIR/thirdparty .
cp $CPP_DIR/setup_build_env.sh .

source setup_build_env.sh

echo $GTEST_HOME

: ${ARROW_CPP_INSTALL=$TRAVIS_BUILD_DIR/cpp-install}

cmake -DCMAKE_INSTALL_PREFIX=$ARROW_CPP_INSTALL -DCMAKE_CXX_FLAGS="-Werror" $CPP_DIR
make -j4
make install

popd
22 changes: 2 additions & 20 deletions ci/travis_script_cpp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,28 +2,11 @@

set -e

mkdir $TRAVIS_BUILD_DIR/cpp-build
pushd $TRAVIS_BUILD_DIR/cpp-build
: ${CPP_BUILD_DIR=$TRAVIS_BUILD_DIR/cpp-build}

CPP_DIR=$TRAVIS_BUILD_DIR/cpp
pushd $CPP_BUILD_DIR

# Build an isolated thirdparty
cp -r $CPP_DIR/thirdparty .
cp $CPP_DIR/setup_build_env.sh .

if [ $TRAVIS_OS_NAME == "linux" ]; then
# Use a C++11 compiler on Linux
export CC="gcc-4.9"
export CXX="g++-4.9"
fi

source setup_build_env.sh

echo $GTEST_HOME

cmake -DCMAKE_CXX_FLAGS="-Werror" $CPP_DIR
make lint
make -j4

if [ $TRAVIS_OS_NAME == "linux" ]; then
valgrind --tool=memcheck --leak-check=yes --error-exitcode=1 ctest
Expand All @@ -32,4 +15,3 @@ else
fi

popd
rm -rf cpp-build
59 changes: 59 additions & 0 deletions ci/travis_script_python.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/usr/bin/env bash

set -e

PYTHON_DIR=$TRAVIS_BUILD_DIR/python

# Share environment with C++
pushd $CPP_BUILD_DIR
source setup_build_env.sh
popd

pushd $PYTHON_DIR

# Bootstrap a Conda Python environment

if [ $TRAVIS_OS_NAME == "linux" ]; then
MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh"
else
MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda-latest-MacOSX-x86_64.sh"
fi

curl $MINICONDA_URL > miniconda.sh
MINICONDA=$TRAVIS_BUILD_DIR/miniconda
bash miniconda.sh -b -p $MINICONDA
export PATH="$MINICONDA/bin:$PATH"
conda update -y -q conda
conda info -a

PYTHON_VERSION=3.5
CONDA_ENV_NAME=pyarrow-test

conda create -y -q -n $CONDA_ENV_NAME python=$PYTHON_VERSION
source activate $CONDA_ENV_NAME

python --version
which python

# faster builds, please
conda install -y nomkl

# Expensive dependencies install from Continuum package repo
conda install -y pip numpy pandas cython

# Other stuff pip install
pip install -r requirements.txt

export ARROW_HOME=$ARROW_CPP_INSTALL

python setup.py build_ext --inplace

py.test -vv -r sxX arrow

# if [ $TRAVIS_OS_NAME == "linux" ]; then
# valgrind --tool=memcheck py.test -vv -r sxX arrow
# else
# py.test -vv -r sxX arrow
# fi

popd
2 changes: 2 additions & 0 deletions cpp/src/arrow/table/column-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ using std::vector;

namespace arrow {

const auto INT32 = std::make_shared<Int32Type>();

class TestColumn : public TestBase {
protected:
std::shared_ptr<ChunkedArray> data_;
Expand Down
2 changes: 2 additions & 0 deletions cpp/src/arrow/table/schema-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ using std::vector;

namespace arrow {

const auto INT32 = std::make_shared<Int32Type>();

TEST(TestField, Basics) {
shared_ptr<DataType> ftype = INT32;
shared_ptr<DataType> ftype_nn = std::make_shared<Int32Type>(false);
Expand Down
4 changes: 4 additions & 0 deletions cpp/src/arrow/table/table-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ using std::vector;

namespace arrow {

const auto INT16 = std::make_shared<Int16Type>();
const auto UINT8 = std::make_shared<UInt8Type>();
const auto INT32 = std::make_shared<Int32Type>();

class TestTable : public TestBase {
public:
void MakeExample1(int length) {
Expand Down
14 changes: 0 additions & 14 deletions cpp/src/arrow/type.cc
Original file line number Diff line number Diff line change
Expand Up @@ -66,18 +66,4 @@ std::string StructType::ToString() const {
return s.str();
}

const std::shared_ptr<NullType> NA = std::make_shared<NullType>();
const std::shared_ptr<BooleanType> BOOL = std::make_shared<BooleanType>();
const std::shared_ptr<UInt8Type> UINT8 = std::make_shared<UInt8Type>();
const std::shared_ptr<UInt16Type> UINT16 = std::make_shared<UInt16Type>();
const std::shared_ptr<UInt32Type> UINT32 = std::make_shared<UInt32Type>();
const std::shared_ptr<UInt64Type> UINT64 = std::make_shared<UInt64Type>();
const std::shared_ptr<Int8Type> INT8 = std::make_shared<Int8Type>();
const std::shared_ptr<Int16Type> INT16 = std::make_shared<Int16Type>();
const std::shared_ptr<Int32Type> INT32 = std::make_shared<Int32Type>();
const std::shared_ptr<Int64Type> INT64 = std::make_shared<Int64Type>();
const std::shared_ptr<FloatType> FLOAT = std::make_shared<FloatType>();
const std::shared_ptr<DoubleType> DOUBLE = std::make_shared<DoubleType>();
const std::shared_ptr<StringType> STRING = std::make_shared<StringType>();

} // namespace arrow
14 changes: 0 additions & 14 deletions cpp/src/arrow/type.h
Original file line number Diff line number Diff line change
Expand Up @@ -338,20 +338,6 @@ struct StructType : public DataType {
std::string ToString() const override;
};

extern const std::shared_ptr<NullType> NA;
extern const std::shared_ptr<BooleanType> BOOL;
extern const std::shared_ptr<UInt8Type> UINT8;
extern const std::shared_ptr<UInt16Type> UINT16;
extern const std::shared_ptr<UInt32Type> UINT32;
extern const std::shared_ptr<UInt64Type> UINT64;
extern const std::shared_ptr<Int8Type> INT8;
extern const std::shared_ptr<Int16Type> INT16;
extern const std::shared_ptr<Int32Type> INT32;
extern const std::shared_ptr<Int64Type> INT64;
extern const std::shared_ptr<FloatType> FLOAT;
extern const std::shared_ptr<DoubleType> DOUBLE;
extern const std::shared_ptr<StringType> STRING;

} // namespace arrow

#endif // ARROW_TYPE_H
2 changes: 0 additions & 2 deletions python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,6 @@ set(PYARROW_SRCS
)

set(LINK_LIBS
pyarrow_util
arrow
)

Expand All @@ -428,7 +427,6 @@ set(CYTHON_EXTENSIONS
array
config
error
parquet
scalar
schema
)
Expand Down
56 changes: 56 additions & 0 deletions python/arrow/formatting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# Pretty-printing and other formatting utilities for Arrow data structures

import arrow.scalar as scalar


def array_format(arr, window=None):
values = []

if window is None or window * 2 >= len(arr):
for x in arr:
values.append(value_format(x, 0))
contents = _indent(',\n'.join(values), 2)
else:
for i in range(window):
values.append(value_format(arr[i], 0) + ',')
values.append('...')
for i in range(len(arr) - window, len(arr)):
formatted = value_format(arr[i], 0)
if i < len(arr) - 1:
formatted += ','
values.append(formatted)
contents = _indent('\n'.join(values), 2)

return '[\n{0}\n]'.format(contents)


def value_format(x, indent_level=0):
if isinstance(x, scalar.ListValue):
contents = ',\n'.join(value_format(item) for item in x)
return '[{0}]'.format(_indent(contents, 1).strip())
else:
return repr(x)


def _indent(text, spaces):
if spaces == 0:
return text
block = ' ' * spaces
return '\n'.join(block + x for x in text.split('\n'))
5 changes: 5 additions & 0 deletions python/cmake_modules/UseCython.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,11 @@ function( compile_pyx _name pyx_target_name generated_files pyx_file)
set( _generated_files "${_name}.${extension}")
endif()
set_source_files_properties( ${_generated_files} PROPERTIES GENERATED TRUE )

# Cython creates a lot of compiler warning detritus on clang
set_source_files_properties(${_generated_files} PROPERTIES
COMPILE_FLAGS -Wno-unused-function)

set( ${generated_files} ${_generated_files} PARENT_SCOPE )

# Add the command to run the compiler.
Expand Down
4 changes: 4 additions & 0 deletions python/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
pytest
numpy>=1.7.0
pandas>=0.12.0
six
2 changes: 1 addition & 1 deletion python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ def get_ext_built(self, name):
return name + suffix

def get_cmake_cython_names(self):
return ['array', 'config', 'error', 'parquet', 'scalar', 'schema']
return ['array', 'config', 'error', 'scalar', 'schema']

def get_names(self):
return self._found_names
Expand Down
20 changes: 13 additions & 7 deletions python/src/pyarrow/adapters/builtin.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

#include <arrow/api.h>

#include "pyarrow/helpers.h"
#include "pyarrow/status.h"

using arrow::ArrayBuilder;
Expand Down Expand Up @@ -74,16 +75,16 @@ class ScalarVisitor {
std::shared_ptr<DataType> GetType() {
// TODO(wesm): handling mixed-type cases
if (float_count_) {
return arrow::DOUBLE;
return DOUBLE;
} else if (int_count_) {
// TODO(wesm): tighter type later
return arrow::INT64;
return INT64;
} else if (bool_count_) {
return arrow::BOOL;
return BOOL;
} else if (string_count_) {
return arrow::STRING;
return STRING;
} else {
return arrow::NA;
return NA;
}
}

Expand Down Expand Up @@ -145,7 +146,7 @@ class SeqVisitor {
std::shared_ptr<DataType> GetType() {
if (scalars_.total_count() == 0) {
if (max_nesting_level_ == 0) {
return arrow::NA;
return NA;
} else {
return nullptr;
}
Expand Down Expand Up @@ -209,14 +210,19 @@ static Status InferArrowType(PyObject* obj, int64_t* size,

// For 0-length sequences, refuse to guess
if (*size == 0) {
*out_type = arrow::NA;
*out_type = NA;
}

SeqVisitor seq_visitor;
PY_RETURN_NOT_OK(seq_visitor.Visit(obj));
PY_RETURN_NOT_OK(seq_visitor.Validate());

*out_type = seq_visitor.GetType();

if (*out_type == nullptr) {
return Status::TypeError("Unable to determine data type");
}

return Status::OK();
}

Expand Down
Loading