Skip to content

Commit

Permalink
Use CuPy array in pip_bitmap_column_to_binary_array (#1418)
Browse files Browse the repository at this point in the history
The performance regression in #1413 is due to numba's `DeviceNDArray`
is slow in slicing. Recent cudf's DataFrame construction has simplified the construction and delegated construction
to similar logic that handles __cuda_array_interface__. Since the construction involves slicing the array, we need
this operation to be fast. In that sense, we should cast the use of DeviceNDArray to cupy array to support fast
slicing.

closes #1413

Authors:
  - Michael Wang (https://github.com/isVoid)

Approvers:
  - Paul Taylor (https://github.com/trxcllnt)
  - Mark Harris (https://github.com/harrism)
  - https://github.com/jakirkham

URL: #1418
  • Loading branch information
isVoid authored Jul 31, 2024
1 parent 58d4212 commit fe3b0c9
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 8 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019, NVIDIA CORPORATION.
# Copyright (c) 2019-2024, NVIDIA CORPORATION.

import numpy as np

Expand Down Expand Up @@ -245,17 +245,17 @@ def test_pip_bitmap_column_to_binary_array():
expected = np.array(
[[0, 0, 0, 0], [1, 1, 0, 1], [0, 0, 1, 1], [1, 0, 0, 1]], dtype="int8"
)
np.testing.assert_array_equal(got.copy_to_host(), expected)
np.testing.assert_array_equal(got.get(), expected)

col = cudf.Series([], dtype="i8")._column
got = pip_bitmap_column_to_binary_array(col, width=0)
expected = np.array([], dtype="int8").reshape(0, 0)
np.testing.assert_array_equal(got.copy_to_host(), expected)
np.testing.assert_array_equal(got.get(), expected)

col = cudf.Series([None, None], dtype="float64")._column
got = pip_bitmap_column_to_binary_array(col, width=0)
expected = np.array([], dtype="int8").reshape(2, 0)
np.testing.assert_array_equal(got.copy_to_host(), expected)
np.testing.assert_array_equal(got.get(), expected)

col = cudf.Series(
[
Expand All @@ -273,9 +273,9 @@ def test_pip_bitmap_column_to_binary_array():
],
dtype="int8",
)
np.testing.assert_array_equal(got.copy_to_host(), expected)
np.testing.assert_array_equal(got.get(), expected)

col = cudf.Series([0, 0, 0])._column
got = pip_bitmap_column_to_binary_array(col, width=3)
expected = np.array([[0, 0, 0], [0, 0, 0], [0, 0, 0]], dtype="int8")
np.testing.assert_array_equal(got.copy_to_host(), expected)
np.testing.assert_array_equal(got.get(), expected)
5 changes: 3 additions & 2 deletions python/cuspatial/cuspatial/utils/join_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.

import operator

import cupy as cp
from numba import cuda

import rmm
Expand All @@ -27,7 +28,7 @@ def binarize(in_col, out, width):

def apply_binarize(in_col, width):
buf = rmm.DeviceBuffer(size=(in_col.size * width))
out = cuda.as_cuda_array(buf).view("int8").reshape((in_col.size, width))
out = cp.asarray(buf).view("int8").reshape((in_col.size, width))
if out.size > 0:
out[:] = 0
binarize.forall(out.size)(in_col, out, width)
Expand Down

0 comments on commit fe3b0c9

Please sign in to comment.