Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARROW-8175: [Python] Setup type checking with mypy #6676

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ci/conda_env_python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ cffi
cython
cloudpickle
hypothesis
mypy=0.770
numpy>=1.14
pytest
pytest-faulthandler
Expand Down
3 changes: 3 additions & 0 deletions ci/scripts/python_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,6 @@ export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH}
export PYTHONDEVMODE=1

pytest -r s --pyargs pyarrow
pushd ${arrow_dir}/python
mypy pyarrow
popd
4 changes: 3 additions & 1 deletion python/pyarrow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
For more information see the official page at https://arrow.apache.org
"""

from typing import Optional

import os as _os
import sys as _sys

Expand All @@ -51,7 +53,7 @@ def parse_git(root, **kwargs):
__version__ = setuptools_scm.get_version('../',
parse=parse_git)
except ImportError:
__version__ = None
__version__ = None # type: ignore


import pyarrow.compat as compat
Expand Down
5 changes: 3 additions & 2 deletions python/pyarrow/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,10 @@
try:
import pickle5 as builtin_pickle
except ImportError:
import pickle as builtin_pickle
import pickle as builtin_pickle # type: ignore

from collections.abc import Iterable, Mapping, Sequence
from typing import List

def guid():
from uuid import uuid4
Expand Down Expand Up @@ -122,4 +123,4 @@ def descr_to_dtype(descr):
return np.dtype({'names': names, 'formats': formats, 'titles': titles,
'offsets': offsets, 'itemsize': offset})

__all__ = []
__all__: List[str] = []
3 changes: 2 additions & 1 deletion python/pyarrow/filesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import inspect
import posixpath
import urllib.parse
from typing import Optional

from os.path import join as pjoin

Expand Down Expand Up @@ -235,7 +236,7 @@ def pathsep(self):

class LocalFileSystem(FileSystem):

_instance = None
_instance: Optional["LocalFileSystem"] = None

@classmethod
def get_instance(cls):
Expand Down
3 changes: 2 additions & 1 deletion python/pyarrow/pandas_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import ast
from itertools import zip_longest
from typing import Any, Dict
import json
import operator
import re
Expand All @@ -32,7 +33,7 @@
frombytes, Sequence)


_logical_type_map = {}
_logical_type_map: Dict[Any, str] = {}


def get_logical_type_map():
Expand Down
8 changes: 6 additions & 2 deletions python/pyarrow/plasma.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,14 @@
# the function build_plasma_tensorflow_op can be used to compile it.


TF_PLASMA_OP_PATH = os.path.join(pa.__path__[0], "tensorflow", "plasma_op.so")
TF_PLASMA_OP_PATH = os.path.join(
pa.__path__[0], # type: ignore
"tensorflow",
"plasma_op.so",
)


tf_plasma_op = None
tf_plasma_op = None # type: ignore


def load_plasma_tensorflow_op():
Expand Down
6 changes: 4 additions & 2 deletions python/pyarrow/tests/strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
# specific language governing permissions and limitations
# under the License.

from typing import Any

import pytz
import hypothesis as h
import hypothesis.strategies as st
Expand Down Expand Up @@ -60,7 +62,7 @@
pa.float32(),
pa.float64()
])
decimal_type = st.builds(
decimal_type: Any = st.builds(
pa.decimal128,
precision=st.integers(min_value=1, max_value=38),
scale=st.integers(min_value=1, max_value=38)
Expand All @@ -77,7 +79,7 @@
pa.time64('us'),
pa.time64('ns')
])
timestamp_types = st.builds(
timestamp_types: Any = st.builds(
pa.timestamp,
unit=st.sampled_from(['s', 'ms', 'us', 'ns']),
tz=tzst.timezones()
Expand Down
92 changes: 49 additions & 43 deletions python/pyarrow/tests/test_cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,20 +39,18 @@
not has_ipc_support,
reason='CUDA IPC not supported in platform `%s`' % (platform))

global_context = None # for flake8
global_context1 = None # for flake8

@pytest.fixture(scope="module")
def global_context():
return cuda.Context(0)

def setup_module(module):
module.global_context = cuda.Context(0)
module.global_context1 = cuda.Context(cuda.Context.get_num_devices() - 1)

@pytest.fixture(scope="module")
def global_context1():
return cuda.Context(cuda.Context.get_num_devices() - 1)

def teardown_module(module):
del module.global_context


def test_Context():
def test_Context(global_context, global_context1):
assert cuda.Context.get_num_devices() > 0
assert global_context.device_number == 0
assert global_context1.device_number == cuda.Context.get_num_devices() - 1
Expand All @@ -74,15 +72,15 @@ def test_manage_allocate_free_host(size):
assert buf.size == size


def test_context_allocate_del():
def test_context_allocate_del(global_context):
bytes_allocated = global_context.bytes_allocated
cudabuf = global_context.new_buffer(128)
assert global_context.bytes_allocated == bytes_allocated + 128
del cudabuf
assert global_context.bytes_allocated == bytes_allocated


def make_random_buffer(size, target='host'):
def make_random_buffer(size, target='host', context=None):
"""Return a host or device buffer with random data.
"""
if target == 'host':
Expand All @@ -97,18 +95,18 @@ def make_random_buffer(size, target='host'):
np.testing.assert_equal(arr, arr_)
return arr, buf
elif target == 'device':
arr, buf = make_random_buffer(size, target='host')
dbuf = global_context.new_buffer(size)
arr, buf = make_random_buffer(size, target='host', context=context)
dbuf = context.new_buffer(size)
assert dbuf.size == size
dbuf.copy_from_host(buf, position=0, nbytes=size)
return arr, dbuf
raise ValueError('invalid target value')


@pytest.mark.parametrize("size", [0, 1, 1000])
def test_context_device_buffer(size):
def test_context_device_buffer(size, global_context):
# Creating device buffer from host buffer;
arr, buf = make_random_buffer(size)
arr, buf = make_random_buffer(size, context=global_context)
cudabuf = global_context.buffer_from_data(buf)
assert cudabuf.size == size
arr2 = np.frombuffer(cudabuf.copy_to_host(), dtype=np.uint8)
Expand Down Expand Up @@ -234,9 +232,9 @@ def test_context_device_buffer(size):


@pytest.mark.parametrize("size", [0, 1, 1000])
def test_context_from_object(size):
def test_context_from_object(size, global_context):
ctx = global_context
arr, cbuf = make_random_buffer(size, target='device')
arr, cbuf = make_random_buffer(size, target='device', context=ctx)
dtype = arr.dtype

# Creating device buffer from a CUDA host buffer
Expand Down Expand Up @@ -265,7 +263,7 @@ def test_context_from_object(size):
ctx.buffer_from_object(np.array([1, 2, 3]))


def test_foreign_buffer():
def test_foreign_buffer(global_context):
ctx = global_context
dtype = np.dtype(np.uint8)
size = 10
Expand Down Expand Up @@ -293,8 +291,8 @@ def test_foreign_buffer():


@pytest.mark.parametrize("size", [0, 1, 1000])
def test_CudaBuffer(size):
arr, buf = make_random_buffer(size)
def test_CudaBuffer(size, global_context):
arr, buf = make_random_buffer(size, context=global_context)
assert arr.tobytes() == buf.to_pybytes()
cbuf = global_context.buffer_from_data(buf)
assert cbuf.size == size
Expand All @@ -321,8 +319,8 @@ def test_CudaBuffer(size):


@pytest.mark.parametrize("size", [0, 1, 1000])
def test_HostBuffer(size):
arr, buf = make_random_buffer(size)
def test_HostBuffer(size, global_context):
arr, buf = make_random_buffer(size, context=global_context)
assert arr.tobytes() == buf.to_pybytes()
hbuf = cuda.new_host_buffer(size)
np.frombuffer(hbuf, dtype=np.uint8)[:] = arr
Expand All @@ -348,7 +346,7 @@ def test_HostBuffer(size):


@pytest.mark.parametrize("size", [0, 1, 1000])
def test_copy_from_to_host(size):
def test_copy_from_to_host(size, global_context):

# Create a buffer in host containing range(size)
buf = pa.allocate_buffer(size, resizable=True) # in host
Expand All @@ -374,8 +372,9 @@ def test_copy_from_to_host(size):


@pytest.mark.parametrize("size", [0, 1, 1000])
def test_copy_to_host(size):
arr, dbuf = make_random_buffer(size, target='device')
def test_copy_to_host(size, global_context):
arr, dbuf = make_random_buffer(size, target='device',
context=global_context)

buf = dbuf.copy_to_host()
assert buf.is_cpu
Expand Down Expand Up @@ -439,8 +438,9 @@ def test_copy_to_host(size):

@pytest.mark.parametrize("dest_ctx", ['same', 'another'])
@pytest.mark.parametrize("size", [0, 1, 1000])
def test_copy_from_device(dest_ctx, size):
arr, buf = make_random_buffer(size=size, target='device')
def test_copy_from_device(dest_ctx, size, global_context, global_context1):
arr, buf = make_random_buffer(size=size, target='device',
context=global_context)
lst = arr.tolist()
if dest_ctx == 'another':
dest_ctx = global_context1
Expand Down Expand Up @@ -489,8 +489,9 @@ def put(*args, **kwargs):


@pytest.mark.parametrize("size", [0, 1, 1000])
def test_copy_from_host(size):
arr, buf = make_random_buffer(size=size, target='host')
def test_copy_from_host(size, global_context):
arr, buf = make_random_buffer(size=size, target='host',
context=global_context)
lst = arr.tolist()
dbuf = global_context.new_buffer(size)

Expand Down Expand Up @@ -532,15 +533,16 @@ def put(*args, **kwargs):
put(position=position, nbytes=nbytes)


def test_BufferWriter():
def test_BufferWriter(global_context):
def allocate(size):
cbuf = global_context.new_buffer(size)
writer = cuda.BufferWriter(cbuf)
return cbuf, writer

def test_writes(total_size, chunksize, buffer_size=0):
def test_writes(total_size, global_context, chunksize, buffer_size=0):
cbuf, writer = allocate(total_size)
arr, buf = make_random_buffer(size=total_size, target='host')
arr, buf = make_random_buffer(size=total_size, target='host',
context=global_context)

if buffer_size > 0:
writer.buffer_size = buffer_size
Expand Down Expand Up @@ -568,8 +570,8 @@ def test_writes(total_size, chunksize, buffer_size=0):
np.testing.assert_equal(arr, arr2)

total_size, chunk_size = 1 << 16, 1000
test_writes(total_size, chunk_size)
test_writes(total_size, chunk_size, total_size // 16)
test_writes(total_size, global_context, chunk_size)
test_writes(total_size, global_context, chunk_size, total_size // 16)

cbuf, writer = allocate(100)
writer.write(np.arange(100, dtype=np.uint8))
Expand All @@ -583,12 +585,13 @@ def test_writes(total_size, chunksize, buffer_size=0):
np.testing.assert_equal(arr[75:], np.arange(25, dtype=np.uint8))


def test_BufferWriter_edge_cases():
def test_BufferWriter_edge_cases(global_context):
# edge cases, see cuda-test.cc for more information:
size = 1000
cbuf = global_context.new_buffer(size)
writer = cuda.BufferWriter(cbuf)
arr, buf = make_random_buffer(size=size, target='host')
arr, buf = make_random_buffer(size=size, target='host',
context=global_context)

assert writer.buffer_size == 0
writer.buffer_size = 100
Expand Down Expand Up @@ -619,9 +622,10 @@ def test_BufferWriter_edge_cases():
np.testing.assert_equal(arr, arr2)


def test_BufferReader():
def test_BufferReader(global_context):
size = 1000
arr, cbuf = make_random_buffer(size=size, target='device')
arr, cbuf = make_random_buffer(size=size, target='device',
context=global_context)

reader = cuda.BufferReader(cbuf)
reader.seek(950)
Expand All @@ -645,8 +649,9 @@ def test_BufferReader():
np.testing.assert_equal(arr, arr2)


def test_BufferReader_zero_size():
arr, cbuf = make_random_buffer(size=0, target='device')
def test_BufferReader_zero_size(global_context1):
arr, cbuf = make_random_buffer(size=0, target='device',
context=global_context1)
reader = cuda.BufferReader(cbuf)
reader.seek(0)
data = reader.read()
Expand All @@ -666,7 +671,7 @@ def make_recordbatch(length):
return batch


def test_batch_serialize():
def test_batch_serialize(global_context):
batch = make_recordbatch(10)
hbuf = batch.serialize()
cbuf = cuda.serialize_record_batch(batch, global_context)
Expand Down Expand Up @@ -695,10 +700,11 @@ def other_process_for_test_IPC(handle_buffer, expected_arr):

@cuda_ipc
@pytest.mark.parametrize("size", [0, 1, 1000])
def test_IPC(size):
def test_IPC(size, global_context):
import multiprocessing
ctx = multiprocessing.get_context('spawn')
arr, cbuf = make_random_buffer(size=size, target='device')
arr, cbuf = make_random_buffer(size=size, target='device',
context=global_context)
ipc_handle = cbuf.export_for_ipc()
handle_buffer = ipc_handle.serialize()
p = ctx.Process(target=other_process_for_test_IPC,
Expand Down
4 changes: 3 additions & 1 deletion python/pyarrow/tests/test_cuda_numba_interop.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@
from numba.cuda.cudadrv.devicearray import DeviceNDArray # noqa: E402


context_choices = None
# TODO(ARROW-8174): Refactor context_choices in test_cuda_numba_interop
# to be a module level fixture
context_choices = None # type: ignore
context_choice_ids = ['pyarrow.cuda', 'numba.cuda']


Expand Down
Loading