Skip to content

Commit

Permalink
ARROW-8175: [Python] Setup type checking with mypy
Browse files Browse the repository at this point in the history
  • Loading branch information
xhochy committed Mar 20, 2020
1 parent d29066c commit b4578b2
Show file tree
Hide file tree
Showing 16 changed files with 81 additions and 55 deletions.
1 change: 1 addition & 0 deletions ci/scripts/python_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,4 @@ export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH}
export PYTHONDEVMODE=1

pytest -r s --pyargs pyarrow
mypy pyarrow
3 changes: 2 additions & 1 deletion python/pyarrow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

# flake8: noqa

from typing import Optional

import os as _os
import sys as _sys
Expand All @@ -40,7 +41,7 @@ def parse_git(root, **kwargs):
__version__ = setuptools_scm.get_version('../',
parse=parse_git)
except ImportError:
__version__ = None
__version__ = None # type: ignore


import pyarrow.compat as compat
Expand Down
5 changes: 3 additions & 2 deletions python/pyarrow/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,10 @@
try:
import pickle5 as builtin_pickle
except ImportError:
import pickle as builtin_pickle
import pickle as builtin_pickle # type: ignore

from collections.abc import Iterable, Mapping, Sequence
from typing import List

def guid():
from uuid import uuid4
Expand Down Expand Up @@ -122,4 +123,4 @@ def descr_to_dtype(descr):
return np.dtype({'names': names, 'formats': formats, 'titles': titles,
'offsets': offsets, 'itemsize': offset})

__all__ = []
__all__: List[str] = []
3 changes: 2 additions & 1 deletion python/pyarrow/filesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import inspect
import posixpath
import urllib.parse
from typing import Optional

from os.path import join as pjoin

Expand Down Expand Up @@ -194,7 +195,7 @@ def pathsep(self):

class LocalFileSystem(FileSystem):

_instance = None
_instance: Optional["LocalFileSystem"] = None

@classmethod
def get_instance(cls):
Expand Down
3 changes: 2 additions & 1 deletion python/pyarrow/pandas_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import ast
from itertools import zip_longest
from typing import Any, Dict
import json
import operator
import re
Expand All @@ -32,7 +33,7 @@
frombytes, Sequence)


_logical_type_map = {}
_logical_type_map: Dict[Any, str] = {}


def get_logical_type_map():
Expand Down
8 changes: 6 additions & 2 deletions python/pyarrow/plasma.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,14 @@
# the function build_plasma_tensorflow_op can be used to compile it.


TF_PLASMA_OP_PATH = os.path.join(pa.__path__[0], "tensorflow", "plasma_op.so")
TF_PLASMA_OP_PATH = os.path.join(
pa.__path__[0], # type: ignore
"tensorflow",
"plasma_op.so",
)


tf_plasma_op = None
tf_plasma_op = None # type: ignore


def load_plasma_tensorflow_op():
Expand Down
6 changes: 4 additions & 2 deletions python/pyarrow/tests/strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
# specific language governing permissions and limitations
# under the License.

from typing import Any

import pytz
import hypothesis as h
import hypothesis.strategies as st
Expand Down Expand Up @@ -60,7 +62,7 @@
pa.float32(),
pa.float64()
])
decimal_type = st.builds(
decimal_type: Any = st.builds(
pa.decimal128,
precision=st.integers(min_value=1, max_value=38),
scale=st.integers(min_value=1, max_value=38)
Expand All @@ -77,7 +79,7 @@
pa.time64('us'),
pa.time64('ns')
])
timestamp_types = st.builds(
timestamp_types: Any = st.builds(
pa.timestamp,
unit=st.sampled_from(['s', 'ms', 'us', 'ns']),
tz=tzst.timezones()
Expand Down
86 changes: 46 additions & 40 deletions python/pyarrow/tests/test_cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,20 +39,18 @@
not has_ipc_support,
reason='CUDA IPC not supported in platform `%s`' % (platform))

global_context = None # for flake8
global_context1 = None # for flake8

@pytest.fixture(scope="module")
def global_context():
return cuda.Context(0)

def setup_module(module):
module.global_context = cuda.Context(0)
module.global_context1 = cuda.Context(cuda.Context.get_num_devices() - 1)

@pytest.fixture(scope="module")
def global_context1():
return cuda.Context(cuda.Context.get_num_devices() - 1)

def teardown_module(module):
del module.global_context


def test_Context():
def test_Context(global_context, global_context1):
assert cuda.Context.get_num_devices() > 0
assert global_context.device_number == 0
assert global_context1.device_number == cuda.Context.get_num_devices() - 1
Expand All @@ -74,15 +72,15 @@ def test_manage_allocate_free_host(size):
assert buf.size == size


def test_context_allocate_del():
def test_context_allocate_del(global_context):
bytes_allocated = global_context.bytes_allocated
cudabuf = global_context.new_buffer(128)
assert global_context.bytes_allocated == bytes_allocated + 128
del cudabuf
assert global_context.bytes_allocated == bytes_allocated


def make_random_buffer(size, target='host'):
def make_random_buffer(size, target='host', context=None):
"""Return a host or device buffer with random data.
"""
if target == 'host':
Expand All @@ -97,18 +95,18 @@ def make_random_buffer(size, target='host'):
np.testing.assert_equal(arr, arr_)
return arr, buf
elif target == 'device':
arr, buf = make_random_buffer(size, target='host')
dbuf = global_context.new_buffer(size)
arr, buf = make_random_buffer(size, target='host', context=context)
dbuf = context.new_buffer(size)
assert dbuf.size == size
dbuf.copy_from_host(buf, position=0, nbytes=size)
return arr, dbuf
raise ValueError('invalid target value')


@pytest.mark.parametrize("size", [0, 1, 1000])
def test_context_device_buffer(size):
def test_context_device_buffer(size, global_context):
# Creating device buffer from host buffer;
arr, buf = make_random_buffer(size)
arr, buf = make_random_buffer(size, context=global_context)
cudabuf = global_context.buffer_from_data(buf)
assert cudabuf.size == size
arr2 = np.frombuffer(cudabuf.copy_to_host(), dtype=np.uint8)
Expand Down Expand Up @@ -234,9 +232,9 @@ def test_context_device_buffer(size):


@pytest.mark.parametrize("size", [0, 1, 1000])
def test_context_from_object(size):
def test_context_from_object(size, global_context):
ctx = global_context
arr, cbuf = make_random_buffer(size, target='device')
arr, cbuf = make_random_buffer(size, target='device', contect=ctx)
dtype = arr.dtype

# Creating device buffer from a CUDA host buffer
Expand Down Expand Up @@ -265,7 +263,7 @@ def test_context_from_object(size):
ctx.buffer_from_object(np.array([1, 2, 3]))


def test_foreign_buffer():
def test_foreign_buffer(global_context):
ctx = global_context
dtype = np.dtype(np.uint8)
size = 10
Expand Down Expand Up @@ -293,8 +291,8 @@ def test_foreign_buffer():


@pytest.mark.parametrize("size", [0, 1, 1000])
def test_CudaBuffer(size):
arr, buf = make_random_buffer(size)
def test_CudaBuffer(size, global_context):
arr, buf = make_random_buffer(size, context=global_context)
assert arr.tobytes() == buf.to_pybytes()
cbuf = global_context.buffer_from_data(buf)
assert cbuf.size == size
Expand All @@ -321,8 +319,8 @@ def test_CudaBuffer(size):


@pytest.mark.parametrize("size", [0, 1, 1000])
def test_HostBuffer(size):
arr, buf = make_random_buffer(size)
def test_HostBuffer(size, global_context):
arr, buf = make_random_buffer(size, context=global_context)
assert arr.tobytes() == buf.to_pybytes()
hbuf = cuda.new_host_buffer(size)
np.frombuffer(hbuf, dtype=np.uint8)[:] = arr
Expand All @@ -348,7 +346,7 @@ def test_HostBuffer(size):


@pytest.mark.parametrize("size", [0, 1, 1000])
def test_copy_from_to_host(size):
def test_copy_from_to_host(size, global_context):

# Create a buffer in host containing range(size)
buf = pa.allocate_buffer(size, resizable=True) # in host
Expand All @@ -374,8 +372,9 @@ def test_copy_from_to_host(size):


@pytest.mark.parametrize("size", [0, 1, 1000])
def test_copy_to_host(size):
arr, dbuf = make_random_buffer(size, target='device')
def test_copy_to_host(size, global_context):
arr, dbuf = make_random_buffer(size, target='device',
context=global_context)

buf = dbuf.copy_to_host()
assert buf.is_cpu
Expand Down Expand Up @@ -439,8 +438,9 @@ def test_copy_to_host(size):

@pytest.mark.parametrize("dest_ctx", ['same', 'another'])
@pytest.mark.parametrize("size", [0, 1, 1000])
def test_copy_from_device(dest_ctx, size):
arr, buf = make_random_buffer(size=size, target='device')
def test_copy_from_device(dest_ctx, size, global_context):
arr, buf = make_random_buffer(size=size, target='device',
context=global_context)
lst = arr.tolist()
if dest_ctx == 'another':
dest_ctx = global_context1
Expand Down Expand Up @@ -489,8 +489,9 @@ def put(*args, **kwargs):


@pytest.mark.parametrize("size", [0, 1, 1000])
def test_copy_from_host(size):
arr, buf = make_random_buffer(size=size, target='host')
def test_copy_from_host(size, global_context):
arr, buf = make_random_buffer(size=size, target='host',
context=global_context)
lst = arr.tolist()
dbuf = global_context.new_buffer(size)

Expand Down Expand Up @@ -532,15 +533,16 @@ def put(*args, **kwargs):
put(position=position, nbytes=nbytes)


def test_BufferWriter():
def test_BufferWriter(global_context):
def allocate(size):
cbuf = global_context.new_buffer(size)
writer = cuda.BufferWriter(cbuf)
return cbuf, writer

def test_writes(total_size, chunksize, buffer_size=0):
cbuf, writer = allocate(total_size)
arr, buf = make_random_buffer(size=total_size, target='host')
arr, buf = make_random_buffer(size=total_size, target='host',
contect=global_context)

if buffer_size > 0:
writer.buffer_size = buffer_size
Expand Down Expand Up @@ -583,12 +585,13 @@ def test_writes(total_size, chunksize, buffer_size=0):
np.testing.assert_equal(arr[75:], np.arange(25, dtype=np.uint8))


def test_BufferWriter_edge_cases():
def test_BufferWriter_edge_cases(global_context):
# edge cases, see cuda-test.cc for more information:
size = 1000
cbuf = global_context.new_buffer(size)
writer = cuda.BufferWriter(cbuf)
arr, buf = make_random_buffer(size=size, target='host')
arr, buf = make_random_buffer(size=size, target='host',
context=global_context)

assert writer.buffer_size == 0
writer.buffer_size = 100
Expand Down Expand Up @@ -619,9 +622,10 @@ def test_BufferWriter_edge_cases():
np.testing.assert_equal(arr, arr2)


def test_BufferReader():
def test_BufferReader(global_context):
size = 1000
arr, cbuf = make_random_buffer(size=size, target='device')
arr, cbuf = make_random_buffer(size=size, target='device',
context=global_context)

reader = cuda.BufferReader(cbuf)
reader.seek(950)
Expand All @@ -645,8 +649,9 @@ def test_BufferReader():
np.testing.assert_equal(arr, arr2)


def test_BufferReader_zero_size():
arr, cbuf = make_random_buffer(size=0, target='device')
def test_BufferReader_zero_size(global_context):
arr, cbuf = make_random_buffer(size=0, target='device',
context=global_context1)
reader = cuda.BufferReader(cbuf)
reader.seek(0)
data = reader.read()
Expand All @@ -666,7 +671,7 @@ def make_recordbatch(length):
return batch


def test_batch_serialize():
def test_batch_serialize(global_context):
batch = make_recordbatch(10)
hbuf = batch.serialize()
cbuf = cuda.serialize_record_batch(batch, global_context)
Expand Down Expand Up @@ -695,10 +700,11 @@ def other_process_for_test_IPC(handle_buffer, expected_arr):

@cuda_ipc
@pytest.mark.parametrize("size", [0, 1, 1000])
def test_IPC(size):
def test_IPC(size, global_context):
import multiprocessing
ctx = multiprocessing.get_context('spawn')
arr, cbuf = make_random_buffer(size=size, target='device')
arr, cbuf = make_random_buffer(size=size, target='device',
context=global_context)
ipc_handle = cbuf.export_for_ipc()
handle_buffer = ipc_handle.serialize()
p = ctx.Process(target=other_process_for_test_IPC,
Expand Down
4 changes: 3 additions & 1 deletion python/pyarrow/tests/test_cuda_numba_interop.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@
from numba.cuda.cudadrv.devicearray import DeviceNDArray # noqa: E402


context_choices = None
# TODO(ARROW-8174): Refactor context_choices in test_cuda_numba_interop
# to be a module level fixture
context_choices = None # type: ignore
context_choice_ids = ['pyarrow.cuda', 'numba.cuda']


Expand Down
2 changes: 1 addition & 1 deletion python/pyarrow/tests/test_cython.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def test_cython_api(tmpdir):
arr = mod.make_null_array(5)
assert mod.get_array_length(arr) == 5
assert arr.null_count == 5
""".format(mod_path=str(tmpdir), mod_name='pyarrow_cython_example')
""".format(mod_name='pyarrow_cython_example')

if sys.platform == 'win32':
delim, var = ';', 'PATH'
Expand Down
2 changes: 1 addition & 1 deletion python/pyarrow/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
try:
import pyarrow.dataset as ds
except ImportError:
ds = None
ds = None # type: ignore

# Marks all of the tests in this module
# Ignore these with pytest ... -m 'not dataset'
Expand Down
2 changes: 1 addition & 1 deletion python/pyarrow/tests/test_flight.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
ClientMiddleware, ClientMiddlewareFactory,
)
except ImportError:
flight = None
flight = None # type: ignore
FlightClient, FlightServerBase = object, object
ServerAuthHandler, ClientAuthHandler = object, object
ServerMiddleware, ServerMiddlewareFactory = object, object
Expand Down
Loading

0 comments on commit b4578b2

Please sign in to comment.