Skip to content

Commit

Permalink
Make isinstance check pass for proxy ndarrays (rapidsai#16601)
Browse files Browse the repository at this point in the history
Closes rapidsai#14537.

Authors:
  - Matthew Murray (https://github.com/Matt711)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Matthew Roeschke (https://github.com/mroeschke)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: rapidsai#16601
  • Loading branch information
Matt711 authored and nvliyuan committed Sep 10, 2024
1 parent 5b84457 commit e0aa822
Show file tree
Hide file tree
Showing 4 changed files with 119 additions and 2 deletions.
23 changes: 23 additions & 0 deletions python/cudf/cudf/pandas/_wrappers/numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,13 @@
from packaging import version

from ..fast_slow_proxy import (
_fast_slow_function_call,
_FastSlowAttribute,
is_proxy_object,
make_final_proxy_type,
make_intermediate_proxy_type,
)
from ..proxy_base import ProxyNDarrayBase
from .common import (
array_interface,
array_method,
Expand Down Expand Up @@ -105,18 +108,38 @@ def wrap_ndarray(cls, arr: cupy.ndarray | numpy.ndarray, constructor):
return super(cls, cls)._fsproxy_wrap(arr, constructor)


def ndarray__array_ufunc__(self, ufunc, method, *inputs, **kwargs):
result, _ = _fast_slow_function_call(
getattr(ufunc, method),
*inputs,
**kwargs,
)
if isinstance(result, tuple):
if is_proxy_object(result[0]) and isinstance(
result[0]._fsproxy_wrapped, numpy.ndarray
):
return tuple(numpy.asarray(x) for x in result)
elif is_proxy_object(result) and isinstance(
result._fsproxy_wrapped, numpy.ndarray
):
return numpy.asarray(result)
return result


ndarray = make_final_proxy_type(
"ndarray",
cupy.ndarray,
numpy.ndarray,
fast_to_slow=cupy.ndarray.get,
slow_to_fast=cupy.asarray,
bases=(ProxyNDarrayBase,),
additional_attributes={
"__array__": array_method,
# So that pa.array(wrapped-numpy-array) works
"__arrow_array__": arrow_array_method,
"__cuda_array_interface__": cuda_array_interface,
"__array_interface__": array_interface,
"__array_ufunc__": ndarray__array_ufunc__,
# ndarrays are unhashable
"__hash__": None,
# iter(cupy-array) produces an iterable of zero-dim device
Expand Down
26 changes: 25 additions & 1 deletion python/cudf/cudf/pandas/fast_slow_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from ..options import _env_get_bool
from ..testing import assert_eq
from .annotation import nvtx
from .proxy_base import ProxyNDarrayBase


def call_operator(fn, args, kwargs):
Expand Down Expand Up @@ -564,7 +565,17 @@ def _fsproxy_wrap(cls, value, func):
_FinalProxy subclasses can override this classmethod if they
need particular behaviour when wrapped up.
"""
proxy = object.__new__(cls)
# TODO: Replace the if-elif-else using singledispatch helper function
base_class = _get_proxy_base_class(cls)
if base_class is object:
proxy = base_class.__new__(cls)
elif base_class is ProxyNDarrayBase:
proxy = base_class.__new__(cls, value)
else:
raise TypeError(
f"Cannot create an proxy instance of {cls.__name__} using base class {base_class.__name__}. "
f"Expected either 'object' or another type in 'PROXY_BASE_CLASSES'"
)
proxy._fsproxy_wrapped = value
return proxy

Expand Down Expand Up @@ -1193,6 +1204,19 @@ def is_proxy_object(obj: Any) -> bool:
return False


def _get_proxy_base_class(cls):
"""Returns the proxy base class if one exists"""
for proxy_class in PROXY_BASE_CLASSES:
if proxy_class in cls.__mro__:
return proxy_class
return object


PROXY_BASE_CLASSES: set[type] = {
ProxyNDarrayBase,
}


NUMPY_TYPES: set[str] = set(np.sctypeDict.values())


Expand Down
22 changes: 22 additions & 0 deletions python/cudf/cudf/pandas/proxy_base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import cupy as cp
import numpy as np


class ProxyNDarrayBase(np.ndarray):
def __new__(cls, arr):
if isinstance(arr, cp.ndarray):
arr = arr.get()
if not isinstance(arr, np.ndarray):
raise TypeError(
"Unsupported array type. Must be numpy.ndarray or cupy.ndarray"
)
return np.asarray(arr, dtype=arr.dtype).view(cls)

def __array_finalize__(self, obj):
if obj is None:
return
self._fsproxy_wrapped = getattr(obj, "_fsproxy_wrapped", obj)
50 changes: 49 additions & 1 deletion python/cudf/cudf_pandas_tests/test_cudf_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,20 @@
import types
from io import BytesIO, StringIO

import cupy as cp
import jupyter_client
import nbformat
import numpy as np
import pyarrow as pa
import pytest
from nbconvert.preprocessors import ExecutePreprocessor
from numba import NumbaDeprecationWarning
from numba import NumbaDeprecationWarning, vectorize
from pytz import utc

from cudf.core._compat import PANDAS_GE_220
from cudf.pandas import LOADED, Profiler
from cudf.pandas.fast_slow_proxy import _Unusable, is_proxy_object
from cudf.testing import assert_eq

if not LOADED:
raise ImportError("These tests must be run with cudf.pandas loaded")
Expand Down Expand Up @@ -1690,3 +1692,49 @@ def test_notebook_slow_repr():
assert (
string in html_result
), f"Expected string {string} not found in the output"


def test_numpy_ndarray_isinstancecheck(array):
arr1, arr2 = array
assert isinstance(arr1, np.ndarray)
assert isinstance(arr2, np.ndarray)


def test_numpy_ndarray_np_ufunc(array):
arr1, arr2 = array

@np.vectorize
def add_one_ufunc(arr):
return arr + 1

assert_eq(add_one_ufunc(arr1), add_one_ufunc(arr2))


def test_numpy_ndarray_cp_ufunc(array):
arr1, arr2 = array

@cp.vectorize
def add_one_ufunc(arr):
return arr + 1

assert_eq(add_one_ufunc(cp.asarray(arr1)), add_one_ufunc(arr2))


def test_numpy_ndarray_numba_ufunc(array):
arr1, arr2 = array

@vectorize
def add_one_ufunc(arr):
return arr + 1

assert_eq(add_one_ufunc(arr1), add_one_ufunc(arr2))


def test_numpy_ndarray_numba_cuda_ufunc(array):
arr1, arr2 = array

@vectorize(["int64(int64)"], target="cuda")
def add_one_ufunc(a):
return a + 1

assert_eq(cp.asarray(add_one_ufunc(arr1)), cp.asarray(add_one_ufunc(arr2)))

0 comments on commit e0aa822

Please sign in to comment.