From b4578b2d7f526bb965f3e30c5813c360c29b8164 Mon Sep 17 00:00:00 2001 From: "Uwe L. Korn" Date: Fri, 20 Mar 2020 19:01:28 +0100 Subject: [PATCH] =?UTF-8?q?ARROW-8175:=20[Python]=C2=A0Setup=20type=20chec?= =?UTF-8?q?king=20with=20mypy?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ci/scripts/python_test.sh | 1 + python/pyarrow/__init__.py | 3 +- python/pyarrow/compat.py | 5 +- python/pyarrow/filesystem.py | 3 +- python/pyarrow/pandas_compat.py | 3 +- python/pyarrow/plasma.py | 8 +- python/pyarrow/tests/strategies.py | 6 +- python/pyarrow/tests/test_cuda.py | 86 ++++++++++--------- .../pyarrow/tests/test_cuda_numba_interop.py | 4 +- python/pyarrow/tests/test_cython.py | 2 +- python/pyarrow/tests/test_dataset.py | 2 +- python/pyarrow/tests/test_flight.py | 2 +- python/pyarrow/tests/test_parquet.py | 2 +- python/pyarrow/tests/test_serialization.py | 2 +- python/requirements-test.txt | 1 + python/setup.cfg | 6 ++ 16 files changed, 81 insertions(+), 55 deletions(-) diff --git a/ci/scripts/python_test.sh b/ci/scripts/python_test.sh index 6f961d2f8e0f9..b73cea7aa2386 100755 --- a/ci/scripts/python_test.sh +++ b/ci/scripts/python_test.sh @@ -30,3 +30,4 @@ export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH} export PYTHONDEVMODE=1 pytest -r s --pyargs pyarrow +mypy pyarrow diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py index 6f76508ed7598..e968dda2a85bc 100644 --- a/python/pyarrow/__init__.py +++ b/python/pyarrow/__init__.py @@ -17,6 +17,7 @@ # flake8: noqa +from typing import Optional import os as _os import sys as _sys @@ -40,7 +41,7 @@ def parse_git(root, **kwargs): __version__ = setuptools_scm.get_version('../', parse=parse_git) except ImportError: - __version__ = None + __version__ = None # type: ignore import pyarrow.compat as compat diff --git a/python/pyarrow/compat.py b/python/pyarrow/compat.py index 0890ade0577ea..970fcace40e3d 100644 --- a/python/pyarrow/compat.py +++ b/python/pyarrow/compat.py @@ -26,9 +26,10 @@ try: import pickle5 as builtin_pickle except ImportError: - import pickle as builtin_pickle + import pickle as builtin_pickle # type: ignore from collections.abc import Iterable, Mapping, Sequence +from typing import List def guid(): from uuid import uuid4 @@ -122,4 +123,4 @@ def descr_to_dtype(descr): return np.dtype({'names': names, 'formats': formats, 'titles': titles, 'offsets': offsets, 'itemsize': offset}) -__all__ = [] +__all__: List[str] = [] diff --git a/python/pyarrow/filesystem.py b/python/pyarrow/filesystem.py index c46f1168e3342..95643efe979b2 100644 --- a/python/pyarrow/filesystem.py +++ b/python/pyarrow/filesystem.py @@ -20,6 +20,7 @@ import inspect import posixpath import urllib.parse +from typing import Optional from os.path import join as pjoin @@ -194,7 +195,7 @@ def pathsep(self): class LocalFileSystem(FileSystem): - _instance = None + _instance: Optional["LocalFileSystem"] = None @classmethod def get_instance(cls): diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py index 749a62357f390..2a92097d59fe2 100644 --- a/python/pyarrow/pandas_compat.py +++ b/python/pyarrow/pandas_compat.py @@ -18,6 +18,7 @@ import ast from itertools import zip_longest +from typing import Any, Dict import json import operator import re @@ -32,7 +33,7 @@ frombytes, Sequence) -_logical_type_map = {} +_logical_type_map: Dict[Any, str] = {} def get_logical_type_map(): diff --git a/python/pyarrow/plasma.py b/python/pyarrow/plasma.py index 5bfa0b47dddab..d99a26b92a4c1 100644 --- a/python/pyarrow/plasma.py +++ b/python/pyarrow/plasma.py @@ -37,10 +37,14 @@ # the function build_plasma_tensorflow_op can be used to compile it. -TF_PLASMA_OP_PATH = os.path.join(pa.__path__[0], "tensorflow", "plasma_op.so") +TF_PLASMA_OP_PATH = os.path.join( + pa.__path__[0], # type: ignore + "tensorflow", + "plasma_op.so", +) -tf_plasma_op = None +tf_plasma_op = None # type: ignore def load_plasma_tensorflow_op(): diff --git a/python/pyarrow/tests/strategies.py b/python/pyarrow/tests/strategies.py index 2d5808056d306..784bc7ca5d5f4 100644 --- a/python/pyarrow/tests/strategies.py +++ b/python/pyarrow/tests/strategies.py @@ -15,6 +15,8 @@ # specific language governing permissions and limitations # under the License. +from typing import Any + import pytz import hypothesis as h import hypothesis.strategies as st @@ -60,7 +62,7 @@ pa.float32(), pa.float64() ]) -decimal_type = st.builds( +decimal_type: Any = st.builds( pa.decimal128, precision=st.integers(min_value=1, max_value=38), scale=st.integers(min_value=1, max_value=38) @@ -77,7 +79,7 @@ pa.time64('us'), pa.time64('ns') ]) -timestamp_types = st.builds( +timestamp_types: Any = st.builds( pa.timestamp, unit=st.sampled_from(['s', 'ms', 'us', 'ns']), tz=tzst.timezones() diff --git a/python/pyarrow/tests/test_cuda.py b/python/pyarrow/tests/test_cuda.py index 0e4d3c49893c3..1aaa03194cf35 100644 --- a/python/pyarrow/tests/test_cuda.py +++ b/python/pyarrow/tests/test_cuda.py @@ -39,20 +39,18 @@ not has_ipc_support, reason='CUDA IPC not supported in platform `%s`' % (platform)) -global_context = None # for flake8 -global_context1 = None # for flake8 +@pytest.fixture(scope="module") +def global_context(): + return cuda.Context(0) -def setup_module(module): - module.global_context = cuda.Context(0) - module.global_context1 = cuda.Context(cuda.Context.get_num_devices() - 1) +@pytest.fixture(scope="module") +def global_context1(): + return cuda.Context(cuda.Context.get_num_devices() - 1) -def teardown_module(module): - del module.global_context - -def test_Context(): +def test_Context(global_context, global_context1): assert cuda.Context.get_num_devices() > 0 assert global_context.device_number == 0 assert global_context1.device_number == cuda.Context.get_num_devices() - 1 @@ -74,7 +72,7 @@ def test_manage_allocate_free_host(size): assert buf.size == size -def test_context_allocate_del(): +def test_context_allocate_del(global_context): bytes_allocated = global_context.bytes_allocated cudabuf = global_context.new_buffer(128) assert global_context.bytes_allocated == bytes_allocated + 128 @@ -82,7 +80,7 @@ def test_context_allocate_del(): assert global_context.bytes_allocated == bytes_allocated -def make_random_buffer(size, target='host'): +def make_random_buffer(size, target='host', context=None): """Return a host or device buffer with random data. """ if target == 'host': @@ -97,8 +95,8 @@ def make_random_buffer(size, target='host'): np.testing.assert_equal(arr, arr_) return arr, buf elif target == 'device': - arr, buf = make_random_buffer(size, target='host') - dbuf = global_context.new_buffer(size) + arr, buf = make_random_buffer(size, target='host', context=context) + dbuf = context.new_buffer(size) assert dbuf.size == size dbuf.copy_from_host(buf, position=0, nbytes=size) return arr, dbuf @@ -106,9 +104,9 @@ def make_random_buffer(size, target='host'): @pytest.mark.parametrize("size", [0, 1, 1000]) -def test_context_device_buffer(size): +def test_context_device_buffer(size, global_context): # Creating device buffer from host buffer; - arr, buf = make_random_buffer(size) + arr, buf = make_random_buffer(size, context=global_context) cudabuf = global_context.buffer_from_data(buf) assert cudabuf.size == size arr2 = np.frombuffer(cudabuf.copy_to_host(), dtype=np.uint8) @@ -234,9 +232,9 @@ def test_context_device_buffer(size): @pytest.mark.parametrize("size", [0, 1, 1000]) -def test_context_from_object(size): +def test_context_from_object(size, global_context): ctx = global_context - arr, cbuf = make_random_buffer(size, target='device') + arr, cbuf = make_random_buffer(size, target='device', contect=ctx) dtype = arr.dtype # Creating device buffer from a CUDA host buffer @@ -265,7 +263,7 @@ def test_context_from_object(size): ctx.buffer_from_object(np.array([1, 2, 3])) -def test_foreign_buffer(): +def test_foreign_buffer(global_context): ctx = global_context dtype = np.dtype(np.uint8) size = 10 @@ -293,8 +291,8 @@ def test_foreign_buffer(): @pytest.mark.parametrize("size", [0, 1, 1000]) -def test_CudaBuffer(size): - arr, buf = make_random_buffer(size) +def test_CudaBuffer(size, global_context): + arr, buf = make_random_buffer(size, context=global_context) assert arr.tobytes() == buf.to_pybytes() cbuf = global_context.buffer_from_data(buf) assert cbuf.size == size @@ -321,8 +319,8 @@ def test_CudaBuffer(size): @pytest.mark.parametrize("size", [0, 1, 1000]) -def test_HostBuffer(size): - arr, buf = make_random_buffer(size) +def test_HostBuffer(size, global_context): + arr, buf = make_random_buffer(size, context=global_context) assert arr.tobytes() == buf.to_pybytes() hbuf = cuda.new_host_buffer(size) np.frombuffer(hbuf, dtype=np.uint8)[:] = arr @@ -348,7 +346,7 @@ def test_HostBuffer(size): @pytest.mark.parametrize("size", [0, 1, 1000]) -def test_copy_from_to_host(size): +def test_copy_from_to_host(size, global_context): # Create a buffer in host containing range(size) buf = pa.allocate_buffer(size, resizable=True) # in host @@ -374,8 +372,9 @@ def test_copy_from_to_host(size): @pytest.mark.parametrize("size", [0, 1, 1000]) -def test_copy_to_host(size): - arr, dbuf = make_random_buffer(size, target='device') +def test_copy_to_host(size, global_context): + arr, dbuf = make_random_buffer(size, target='device', + context=global_context) buf = dbuf.copy_to_host() assert buf.is_cpu @@ -439,8 +438,9 @@ def test_copy_to_host(size): @pytest.mark.parametrize("dest_ctx", ['same', 'another']) @pytest.mark.parametrize("size", [0, 1, 1000]) -def test_copy_from_device(dest_ctx, size): - arr, buf = make_random_buffer(size=size, target='device') +def test_copy_from_device(dest_ctx, size, global_context): + arr, buf = make_random_buffer(size=size, target='device', + context=global_context) lst = arr.tolist() if dest_ctx == 'another': dest_ctx = global_context1 @@ -489,8 +489,9 @@ def put(*args, **kwargs): @pytest.mark.parametrize("size", [0, 1, 1000]) -def test_copy_from_host(size): - arr, buf = make_random_buffer(size=size, target='host') +def test_copy_from_host(size, global_context): + arr, buf = make_random_buffer(size=size, target='host', + context=global_context) lst = arr.tolist() dbuf = global_context.new_buffer(size) @@ -532,7 +533,7 @@ def put(*args, **kwargs): put(position=position, nbytes=nbytes) -def test_BufferWriter(): +def test_BufferWriter(global_context): def allocate(size): cbuf = global_context.new_buffer(size) writer = cuda.BufferWriter(cbuf) @@ -540,7 +541,8 @@ def allocate(size): def test_writes(total_size, chunksize, buffer_size=0): cbuf, writer = allocate(total_size) - arr, buf = make_random_buffer(size=total_size, target='host') + arr, buf = make_random_buffer(size=total_size, target='host', + contect=global_context) if buffer_size > 0: writer.buffer_size = buffer_size @@ -583,12 +585,13 @@ def test_writes(total_size, chunksize, buffer_size=0): np.testing.assert_equal(arr[75:], np.arange(25, dtype=np.uint8)) -def test_BufferWriter_edge_cases(): +def test_BufferWriter_edge_cases(global_context): # edge cases, see cuda-test.cc for more information: size = 1000 cbuf = global_context.new_buffer(size) writer = cuda.BufferWriter(cbuf) - arr, buf = make_random_buffer(size=size, target='host') + arr, buf = make_random_buffer(size=size, target='host', + context=global_context) assert writer.buffer_size == 0 writer.buffer_size = 100 @@ -619,9 +622,10 @@ def test_BufferWriter_edge_cases(): np.testing.assert_equal(arr, arr2) -def test_BufferReader(): +def test_BufferReader(global_context): size = 1000 - arr, cbuf = make_random_buffer(size=size, target='device') + arr, cbuf = make_random_buffer(size=size, target='device', + context=global_context) reader = cuda.BufferReader(cbuf) reader.seek(950) @@ -645,8 +649,9 @@ def test_BufferReader(): np.testing.assert_equal(arr, arr2) -def test_BufferReader_zero_size(): - arr, cbuf = make_random_buffer(size=0, target='device') +def test_BufferReader_zero_size(global_context): + arr, cbuf = make_random_buffer(size=0, target='device', + context=global_context1) reader = cuda.BufferReader(cbuf) reader.seek(0) data = reader.read() @@ -666,7 +671,7 @@ def make_recordbatch(length): return batch -def test_batch_serialize(): +def test_batch_serialize(global_context): batch = make_recordbatch(10) hbuf = batch.serialize() cbuf = cuda.serialize_record_batch(batch, global_context) @@ -695,10 +700,11 @@ def other_process_for_test_IPC(handle_buffer, expected_arr): @cuda_ipc @pytest.mark.parametrize("size", [0, 1, 1000]) -def test_IPC(size): +def test_IPC(size, global_context): import multiprocessing ctx = multiprocessing.get_context('spawn') - arr, cbuf = make_random_buffer(size=size, target='device') + arr, cbuf = make_random_buffer(size=size, target='device', + context=global_context) ipc_handle = cbuf.export_for_ipc() handle_buffer = ipc_handle.serialize() p = ctx.Process(target=other_process_for_test_IPC, diff --git a/python/pyarrow/tests/test_cuda_numba_interop.py b/python/pyarrow/tests/test_cuda_numba_interop.py index ff1722d278d5e..6faae0c735380 100644 --- a/python/pyarrow/tests/test_cuda_numba_interop.py +++ b/python/pyarrow/tests/test_cuda_numba_interop.py @@ -26,7 +26,9 @@ from numba.cuda.cudadrv.devicearray import DeviceNDArray # noqa: E402 -context_choices = None +# TODO(ARROW-8174): Refactor context_choices in test_cuda_numba_interop +# to be a module level fixture +context_choices = None # type: ignore context_choice_ids = ['pyarrow.cuda', 'numba.cuda'] diff --git a/python/pyarrow/tests/test_cython.py b/python/pyarrow/tests/test_cython.py index 202868d5c71e2..30fd806ce7801 100644 --- a/python/pyarrow/tests/test_cython.py +++ b/python/pyarrow/tests/test_cython.py @@ -116,7 +116,7 @@ def test_cython_api(tmpdir): arr = mod.make_null_array(5) assert mod.get_array_length(arr) == 5 assert arr.null_count == 5 - """.format(mod_path=str(tmpdir), mod_name='pyarrow_cython_example') + """.format(mod_name='pyarrow_cython_example') if sys.platform == 'win32': delim, var = ';', 'PATH' diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py index 72fa280e67158..7fbe8bdc46373 100644 --- a/python/pyarrow/tests/test_dataset.py +++ b/python/pyarrow/tests/test_dataset.py @@ -34,7 +34,7 @@ try: import pyarrow.dataset as ds except ImportError: - ds = None + ds = None # type: ignore # Marks all of the tests in this module # Ignore these with pytest ... -m 'not dataset' diff --git a/python/pyarrow/tests/test_flight.py b/python/pyarrow/tests/test_flight.py index 322702a2f6c38..6351a339e3774 100644 --- a/python/pyarrow/tests/test_flight.py +++ b/python/pyarrow/tests/test_flight.py @@ -38,7 +38,7 @@ ClientMiddleware, ClientMiddlewareFactory, ) except ImportError: - flight = None + flight = None # type: ignore FlightClient, FlightServerBase = object, object ServerAuthHandler, ClientAuthHandler = object, object ServerMiddleware, ServerMiddlewareFactory = object, object diff --git a/python/pyarrow/tests/test_parquet.py b/python/pyarrow/tests/test_parquet.py index 6936e34645165..132a460200bda 100644 --- a/python/pyarrow/tests/test_parquet.py +++ b/python/pyarrow/tests/test_parquet.py @@ -35,7 +35,7 @@ try: import pyarrow.parquet as pq except ImportError: - pq = None + pq = None # type: ignore try: diff --git a/python/pyarrow/tests/test_serialization.py b/python/pyarrow/tests/test_serialization.py index e54cd88a3f6e9..cea7fd3a0cde1 100644 --- a/python/pyarrow/tests/test_serialization.py +++ b/python/pyarrow/tests/test_serialization.py @@ -37,7 +37,7 @@ torch = None # Blacklist the module in case `import torch` is costly before # failing (ARROW-2071) - sys.modules['torch'] = None + sys.modules['torch'] = None # type: ignore try: from scipy.sparse import coo_matrix, csr_matrix, csc_matrix diff --git a/python/requirements-test.txt b/python/requirements-test.txt index b019eac002067..08f8796420801 100644 --- a/python/requirements-test.txt +++ b/python/requirements-test.txt @@ -2,6 +2,7 @@ cffi cython hypothesis==5.0; python_version <= "3.5.2" hypothesis; python_version > "3.5.2" +mypy pandas==0.24; python_version <= "3.5.2" pandas; python_version > "3.5.2" pickle5; python_version == "3.6" or python_version == "3.7" diff --git a/python/setup.cfg b/python/setup.cfg index 162a507c9c668..8ed3fded6517b 100644 --- a/python/setup.cfg +++ b/python/setup.cfg @@ -28,3 +28,9 @@ build-dir = doc/_build addopts = --ignore=scripts filterwarnings = error:The SparseDataFrame:FutureWarning + +[mypy] +ignore_missing_imports=True +no_implicit_optional=True +check_untyped_defs=False +strict_equality=True