diff --git a/cpp/benchmarks/join/conditional_join.cu b/cpp/benchmarks/join/conditional_join.cu index e332d09d31b..2deb888cc5c 100644 --- a/cpp/benchmarks/join/conditional_join.cu +++ b/cpp/benchmarks/join/conditional_join.cu @@ -16,6 +16,8 @@ #include "join_common.hpp" +auto const CONDITIONAL_JOIN_SIZE_RANGE = std::vector{1000, 100'000}; + template void nvbench_conditional_inner_join(nvbench::state& state, nvbench::type_list>) @@ -46,12 +48,12 @@ NVBENCH_BENCH_TYPES(nvbench_conditional_inner_join, NVBENCH_TYPE_AXES(JOIN_KEY_TYPE_RANGE, JOIN_NULLABLE_RANGE)) .set_name("conditional_inner_join") .set_type_axes_names({"Key", "Nullable"}) - .add_int64_axis("left_size", JOIN_SIZE_RANGE) - .add_int64_axis("right_size", JOIN_SIZE_RANGE); + .add_int64_axis("left_size", CONDITIONAL_JOIN_SIZE_RANGE) + .add_int64_axis("right_size", CONDITIONAL_JOIN_SIZE_RANGE); NVBENCH_BENCH_TYPES(nvbench_conditional_left_join, NVBENCH_TYPE_AXES(JOIN_KEY_TYPE_RANGE, JOIN_NULLABLE_RANGE)) .set_name("conditional_left_join") .set_type_axes_names({"Key", "Nullable"}) - .add_int64_axis("left_size", JOIN_SIZE_RANGE) - .add_int64_axis("right_size", JOIN_SIZE_RANGE); + .add_int64_axis("left_size", CONDITIONAL_JOIN_SIZE_RANGE) + .add_int64_axis("right_size", CONDITIONAL_JOIN_SIZE_RANGE); diff --git a/cpp/include/cudf/rolling.hpp b/cpp/include/cudf/rolling.hpp index 2cd34f48265..d55322dd3e8 100644 --- a/cpp/include/cudf/rolling.hpp +++ b/cpp/include/cudf/rolling.hpp @@ -57,6 +57,7 @@ namespace cudf { * @param[in] min_periods Minimum number of observations in window required to have a value, * otherwise element `i` is null. * @param[in] agg The rolling window aggregation type (SUM, MAX, MIN, etc.) + * @param[in] stream CUDA stream used for device memory operations and kernel launches * @param[in] mr Device memory resource used to allocate the returned column's device memory * * @returns A nullable output column containing the rolling window results @@ -67,6 +68,7 @@ std::unique_ptr rolling_window( size_type following_window, size_type min_periods, rolling_aggregation const& agg, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @@ -77,6 +79,7 @@ std::unique_ptr rolling_window( * size_type following_window, * size_type min_periods, * rolling_aggregation const& agg, + * rmm::cuda_stream_view stream, * rmm::device_async_resource_ref mr) * * @param default_outputs A column of per-row default values to be returned instead @@ -90,6 +93,7 @@ std::unique_ptr rolling_window( size_type following_window, size_type min_periods, rolling_aggregation const& agg, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @@ -227,6 +231,7 @@ struct window_bounds { * @param[in] min_periods Minimum number of observations in window required to have a value, * otherwise element `i` is null. * @param[in] aggr The rolling window aggregation type (SUM, MAX, MIN, etc.) + * @param[in] stream CUDA stream used for device memory operations and kernel launches * @param[in] mr Device memory resource used to allocate the returned column's device memory * * @returns A nullable output column containing the rolling window results @@ -238,6 +243,7 @@ std::unique_ptr grouped_rolling_window( size_type following_window, size_type min_periods, rolling_aggregation const& aggr, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @@ -249,6 +255,7 @@ std::unique_ptr grouped_rolling_window( * size_type following_window, * size_type min_periods, * rolling_aggregation const& aggr, + * rmm::cuda_stream_view stream, * rmm::device_async_resource_ref mr) */ std::unique_ptr grouped_rolling_window( @@ -258,6 +265,7 @@ std::unique_ptr grouped_rolling_window( window_bounds following_window, size_type min_periods, rolling_aggregation const& aggr, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @@ -269,6 +277,7 @@ std::unique_ptr grouped_rolling_window( * size_type following_window, * size_type min_periods, * rolling_aggregation const& aggr, + * rmm::cuda_stream_view stream,, * rmm::device_async_resource_ref mr) * * @param default_outputs A column of per-row default values to be returned instead @@ -283,6 +292,7 @@ std::unique_ptr grouped_rolling_window( size_type following_window, size_type min_periods, rolling_aggregation const& aggr, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @@ -295,6 +305,7 @@ std::unique_ptr grouped_rolling_window( * size_type following_window, * size_type min_periods, * rolling_aggregation const& aggr, + * rmm::cuda_stream_view stream, * rmm::device_async_resource_ref mr) */ std::unique_ptr grouped_rolling_window( @@ -305,6 +316,7 @@ std::unique_ptr grouped_rolling_window( window_bounds following_window, size_type min_periods, rolling_aggregation const& aggr, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @@ -387,6 +399,7 @@ std::unique_ptr grouped_rolling_window( * @param[in] min_periods Minimum number of observations in window required to have a value, * otherwise element `i` is null. * @param[in] aggr The rolling window aggregation type (SUM, MAX, MIN, etc.) + * @param[in] stream CUDA stream used for device memory operations and kernel launches * @param[in] mr Device memory resource used to allocate the returned column's device memory * * @returns A nullable output column containing the rolling window results @@ -400,6 +413,7 @@ std::unique_ptr grouped_time_range_rolling_window( size_type following_window_in_days, size_type min_periods, rolling_aggregation const& aggr, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @@ -415,6 +429,7 @@ std::unique_ptr grouped_time_range_rolling_window( * size_type following_window_in_days, * size_type min_periods, * rolling_aggregation const& aggr, + * rmm::cuda_stream_view stream, * rmm::device_async_resource_ref mr) * * The `preceding_window_in_days` and `following_window_in_days` are specified as a `window_bounds` @@ -429,6 +444,7 @@ std::unique_ptr grouped_time_range_rolling_window( window_bounds following_window_in_days, size_type min_periods, rolling_aggregation const& aggr, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @@ -536,6 +552,7 @@ std::unique_ptr grouped_time_range_rolling_window( * @param[in] min_periods Minimum number of observations in window required to have a value, * otherwise element `i` is null. * @param[in] aggr The rolling window aggregation type (SUM, MAX, MIN, etc.) + * @param[in] stream CUDA stream used for device memory operations and kernel launches * @param[in] mr Device memory resource used to allocate the returned column's device memory * * @returns A nullable output column containing the rolling window results @@ -549,6 +566,7 @@ std::unique_ptr grouped_range_rolling_window( range_window_bounds const& following, size_type min_periods, rolling_aggregation const& aggr, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @@ -582,6 +600,7 @@ std::unique_ptr grouped_range_rolling_window( * @param[in] min_periods Minimum number of observations in window required to have a value, * otherwise element `i` is null. * @param[in] agg The rolling window aggregation type (sum, max, min, etc.) + * @param[in] stream CUDA stream used for device memory operations and kernel launches * @param[in] mr Device memory resource used to allocate the returned column's device memory * * @returns A nullable output column containing the rolling window results @@ -592,6 +611,7 @@ std::unique_ptr rolling_window( column_view const& following_window, size_type min_periods, rolling_aggregation const& agg, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group diff --git a/cpp/include/cudf/rolling/range_window_bounds.hpp b/cpp/include/cudf/rolling/range_window_bounds.hpp index 81885ade2f0..a9ee12cea27 100644 --- a/cpp/include/cudf/rolling/range_window_bounds.hpp +++ b/cpp/include/cudf/rolling/range_window_bounds.hpp @@ -56,18 +56,22 @@ struct range_window_bounds { * @brief Factory method to construct a bounded window boundary. * * @param boundary Finite window boundary + * @param stream CUDA stream used for device memory operations and kernel launches * @return A bounded window boundary object */ - static range_window_bounds get(scalar const& boundary); + static range_window_bounds get(scalar const& boundary, + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** * @brief Factory method to construct a window boundary * limited to the value of the current row * * @param type The datatype of the window boundary + * @param stream CUDA stream used for device memory operations and kernel launches * @return A "current row" window boundary object */ - static range_window_bounds current_row(data_type type); + static range_window_bounds current_row(data_type type, + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** * @brief Whether or not the window is bounded to the current row @@ -81,9 +85,11 @@ struct range_window_bounds { * @brief Factory method to construct an unbounded window boundary. * * @param type The datatype of the window boundary + * @param stream CUDA stream used for device memory operations and kernel launches * @return An unbounded window boundary object */ - static range_window_bounds unbounded(data_type type); + static range_window_bounds unbounded(data_type type, + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** * @brief Whether or not the window is unbounded @@ -107,7 +113,9 @@ struct range_window_bounds { extent_type _extent{extent_type::UNBOUNDED}; std::shared_ptr _range_scalar{nullptr}; // To enable copy construction/assignment. - range_window_bounds(extent_type extent_, std::unique_ptr range_scalar_); + range_window_bounds(extent_type extent_, + std::unique_ptr range_scalar_, + rmm::cuda_stream_view = cudf::get_default_stream()); }; /** @} */ // end of group diff --git a/cpp/src/rolling/grouped_rolling.cu b/cpp/src/rolling/grouped_rolling.cu index d461ed7a109..1158bf22494 100644 --- a/cpp/src/rolling/grouped_rolling.cu +++ b/cpp/src/rolling/grouped_rolling.cu @@ -40,59 +40,6 @@ #include namespace cudf { -std::unique_ptr grouped_rolling_window(table_view const& group_keys, - column_view const& input, - size_type preceding_window, - size_type following_window, - size_type min_periods, - rolling_aggregation const& aggr, - rmm::device_async_resource_ref mr) -{ - return grouped_rolling_window(group_keys, - input, - window_bounds::get(preceding_window), - window_bounds::get(following_window), - min_periods, - aggr, - mr); -} - -std::unique_ptr grouped_rolling_window(table_view const& group_keys, - column_view const& input, - window_bounds preceding_window, - window_bounds following_window, - size_type min_periods, - rolling_aggregation const& aggr, - rmm::device_async_resource_ref mr) -{ - return grouped_rolling_window(group_keys, - input, - empty_like(input)->view(), - preceding_window, - following_window, - min_periods, - aggr, - mr); -} - -std::unique_ptr grouped_rolling_window(table_view const& group_keys, - column_view const& input, - column_view const& default_outputs, - size_type preceding_window, - size_type following_window, - size_type min_periods, - rolling_aggregation const& aggr, - rmm::device_async_resource_ref mr) -{ - return grouped_rolling_window(group_keys, - input, - default_outputs, - window_bounds::get(preceding_window), - window_bounds::get(following_window), - min_periods, - aggr, - mr); -} namespace detail { @@ -237,8 +184,8 @@ std::unique_ptr grouped_rolling_window(table_view const& group_keys, if (group_keys.num_columns() == 0) { // No Groupby columns specified. Treat as one big group. - return rolling_window( - input, default_outputs, preceding_window, following_window, min_periods, aggr, mr); + return detail::rolling_window( + input, default_outputs, preceding_window, following_window, min_periods, aggr, stream, mr); } using sort_groupby_helper = cudf::groupby::detail::sort::sort_groupby_helper; @@ -306,6 +253,7 @@ std::unique_ptr grouped_rolling_window(table_view const& group_keys, window_bounds following_window_bounds, size_type min_periods, rolling_aggregation const& aggr, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { return detail::grouped_rolling_window(group_keys, @@ -315,7 +263,67 @@ std::unique_ptr grouped_rolling_window(table_view const& group_keys, following_window_bounds, min_periods, aggr, - cudf::get_default_stream(), + stream, + mr); +} + +std::unique_ptr grouped_rolling_window(table_view const& group_keys, + column_view const& input, + size_type preceding_window, + size_type following_window, + size_type min_periods, + rolling_aggregation const& aggr, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + return grouped_rolling_window(group_keys, + input, + window_bounds::get(preceding_window), + window_bounds::get(following_window), + min_periods, + aggr, + stream, + mr); +} + +std::unique_ptr grouped_rolling_window(table_view const& group_keys, + column_view const& input, + window_bounds preceding_window, + window_bounds following_window, + size_type min_periods, + rolling_aggregation const& aggr, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + return detail::grouped_rolling_window(group_keys, + input, + empty_like(input)->view(), + preceding_window, + following_window, + min_periods, + aggr, + stream, + mr); +} + +std::unique_ptr grouped_rolling_window(table_view const& group_keys, + column_view const& input, + column_view const& default_outputs, + size_type preceding_window, + size_type following_window, + size_type min_periods, + rolling_aggregation const& aggr, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + return detail::grouped_rolling_window(group_keys, + input, + default_outputs, + window_bounds::get(preceding_window), + window_bounds::get(following_window), + min_periods, + aggr, + stream, mr); } @@ -1047,14 +1055,15 @@ struct dispatch_grouped_range_rolling_window { */ struct to_duration_bounds { template (), void>* = nullptr> - range_window_bounds operator()(size_type num_days) const + range_window_bounds operator()(size_type num_days, rmm::cuda_stream_view stream) const { using DurationT = typename OrderBy::duration; - return range_window_bounds::get(duration_scalar{duration_D{num_days}, true}); + return range_window_bounds::get(duration_scalar{duration_D{num_days}, true, stream}, + stream); } template (), void>* = nullptr> - range_window_bounds operator()(size_type) const + range_window_bounds operator()(size_type, rmm::cuda_stream_view) const { CUDF_FAIL("Expected timestamp orderby column."); } @@ -1085,9 +1094,11 @@ data_type get_duration_type_for(cudf::data_type timestamp_type) * @param timestamp_type Data-type of the orderby column to which the `num_days` is to be adapted. * @return range_window_bounds A `range_window_bounds` to be used with the new API. */ -range_window_bounds to_range_bounds(cudf::size_type num_days, cudf::data_type timestamp_type) +range_window_bounds to_range_bounds(cudf::size_type num_days, + cudf::data_type timestamp_type, + rmm::cuda_stream_view stream) { - return cudf::type_dispatcher(timestamp_type, to_duration_bounds{}, num_days); + return cudf::type_dispatcher(timestamp_type, to_duration_bounds{}, num_days, stream); } /** @@ -1101,11 +1112,13 @@ range_window_bounds to_range_bounds(cudf::size_type num_days, cudf::data_type ti * @return range_window_bounds A `range_window_bounds` to be used with the new API. */ range_window_bounds to_range_bounds(cudf::window_bounds const& days_bounds, - cudf::data_type timestamp_type) + cudf::data_type timestamp_type, + rmm::cuda_stream_view stream) { return days_bounds.is_unbounded() - ? range_window_bounds::unbounded(get_duration_type_for(timestamp_type)) - : cudf::type_dispatcher(timestamp_type, to_duration_bounds{}, days_bounds.value()); + ? range_window_bounds::unbounded(get_duration_type_for(timestamp_type), stream) + : cudf::type_dispatcher( + timestamp_type, to_duration_bounds{}, days_bounds.value(), stream); } } // namespace @@ -1199,11 +1212,12 @@ std::unique_ptr grouped_time_range_rolling_window(table_view const& grou size_type following_window_in_days, size_type min_periods, rolling_aggregation const& aggr, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - auto preceding = to_range_bounds(preceding_window_in_days, timestamp_column.type()); - auto following = to_range_bounds(following_window_in_days, timestamp_column.type()); + auto preceding = to_range_bounds(preceding_window_in_days, timestamp_column.type(), stream); + auto following = to_range_bounds(following_window_in_days, timestamp_column.type(), stream); return detail::grouped_range_rolling_window(group_keys, timestamp_column, @@ -1213,7 +1227,7 @@ std::unique_ptr grouped_time_range_rolling_window(table_view const& grou following, min_periods, aggr, - cudf::get_default_stream(), + stream, mr); } @@ -1237,13 +1251,14 @@ std::unique_ptr grouped_time_range_rolling_window(table_view const& grou window_bounds following_window_in_days, size_type min_periods, rolling_aggregation const& aggr, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); range_window_bounds preceding = - to_range_bounds(preceding_window_in_days, timestamp_column.type()); + to_range_bounds(preceding_window_in_days, timestamp_column.type(), stream); range_window_bounds following = - to_range_bounds(following_window_in_days, timestamp_column.type()); + to_range_bounds(following_window_in_days, timestamp_column.type(), stream); return detail::grouped_range_rolling_window(group_keys, timestamp_column, @@ -1253,7 +1268,7 @@ std::unique_ptr grouped_time_range_rolling_window(table_view const& grou following, min_periods, aggr, - cudf::get_default_stream(), + stream, mr); } @@ -1277,6 +1292,7 @@ std::unique_ptr grouped_range_rolling_window(table_view const& group_key range_window_bounds const& following, size_type min_periods, rolling_aggregation const& aggr, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); @@ -1288,7 +1304,7 @@ std::unique_ptr grouped_range_rolling_window(table_view const& group_key following, min_periods, aggr, - cudf::get_default_stream(), + stream, mr); } diff --git a/cpp/src/rolling/range_window_bounds.cpp b/cpp/src/rolling/range_window_bounds.cpp index 68e80c6e84e..69792136c64 100644 --- a/cpp/src/rolling/range_window_bounds.cpp +++ b/cpp/src/rolling/range_window_bounds.cpp @@ -32,7 +32,8 @@ namespace { */ struct range_scalar_constructor { template ())> - std::unique_ptr operator()(scalar const& range_scalar_) const + std::unique_ptr operator()(scalar const& range_scalar_, + rmm::cuda_stream_view stream) const { CUDF_FAIL( "Unsupported range type. " @@ -40,51 +41,57 @@ struct range_scalar_constructor { } template ())> - std::unique_ptr operator()(scalar const& range_scalar_) const + std::unique_ptr operator()(scalar const& range_scalar_, + rmm::cuda_stream_view stream) const { return std::make_unique>( - static_cast const&>(range_scalar_)); + static_cast const&>(range_scalar_), stream); } template () && not cudf::is_boolean())> - std::unique_ptr operator()(scalar const& range_scalar_) const + std::unique_ptr operator()(scalar const& range_scalar_, + rmm::cuda_stream_view stream) const { - return std::make_unique>( - static_cast const&>(range_scalar_)); + return std::make_unique>(static_cast const&>(range_scalar_), + stream); } template ())> - std::unique_ptr operator()(scalar const& range_scalar_) const + std::unique_ptr operator()(scalar const& range_scalar_, + rmm::cuda_stream_view stream) const { return std::make_unique>( - static_cast const&>(range_scalar_)); + static_cast const&>(range_scalar_), stream); } }; } // namespace -range_window_bounds::range_window_bounds(extent_type extent_, std::unique_ptr range_scalar_) +range_window_bounds::range_window_bounds(extent_type extent_, + std::unique_ptr range_scalar_, + rmm::cuda_stream_view stream) : _extent{extent_}, _range_scalar{std::move(range_scalar_)} { CUDF_EXPECTS(_range_scalar.get(), "Range window scalar cannot be null."); CUDF_EXPECTS(_extent == extent_type::UNBOUNDED || _extent == extent_type::CURRENT_ROW || - _range_scalar->is_valid(), + _range_scalar->is_valid(stream), "Bounded Range window scalar must be valid."); } -range_window_bounds range_window_bounds::unbounded(data_type type) +range_window_bounds range_window_bounds::unbounded(data_type type, rmm::cuda_stream_view stream) { - return {extent_type::UNBOUNDED, make_default_constructed_scalar(type)}; + return {extent_type::UNBOUNDED, make_default_constructed_scalar(type, stream), stream}; } -range_window_bounds range_window_bounds::current_row(data_type type) +range_window_bounds range_window_bounds::current_row(data_type type, rmm::cuda_stream_view stream) { - return {extent_type::CURRENT_ROW, make_default_constructed_scalar(type)}; + return {extent_type::CURRENT_ROW, make_default_constructed_scalar(type, stream), stream}; } -range_window_bounds range_window_bounds::get(scalar const& boundary) +range_window_bounds range_window_bounds::get(scalar const& boundary, rmm::cuda_stream_view stream) { return {extent_type::BOUNDED, - cudf::type_dispatcher(boundary.type(), range_scalar_constructor{}, boundary)}; + cudf::type_dispatcher(boundary.type(), range_scalar_constructor{}, boundary, stream), + stream}; } } // namespace cudf diff --git a/cpp/src/rolling/rolling.cu b/cpp/src/rolling/rolling.cu index a308ed8a7a6..e612bd01118 100644 --- a/cpp/src/rolling/rolling.cu +++ b/cpp/src/rolling/rolling.cu @@ -32,17 +32,12 @@ std::unique_ptr rolling_window(column_view const& input, size_type following_window, size_type min_periods, rolling_aggregation const& agg, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::rolling_window(input, - default_outputs, - preceding_window, - following_window, - min_periods, - agg, - cudf::get_default_stream(), - mr); + return detail::rolling_window( + input, default_outputs, preceding_window, following_window, min_periods, agg, stream, mr); } // Applies a fixed-size rolling window function to the values in a column, without default specified @@ -51,6 +46,7 @@ std::unique_ptr rolling_window(column_view const& input, size_type following_window, size_type min_periods, rolling_aggregation const& agg, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); @@ -62,7 +58,7 @@ std::unique_ptr rolling_window(column_view const& input, following_window, min_periods, agg, - cudf::get_default_stream(), + stream, mr); } @@ -72,11 +68,12 @@ std::unique_ptr rolling_window(column_view const& input, column_view const& following_window, size_type min_periods, rolling_aggregation const& agg, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::rolling_window( - input, preceding_window, following_window, min_periods, agg, cudf::get_default_stream(), mr); + input, preceding_window, following_window, min_periods, agg, stream, mr); } } // namespace cudf diff --git a/cpp/src/strings/case.cu b/cpp/src/strings/case.cu index c1688d20791..27befdea209 100644 --- a/cpp/src/strings/case.cu +++ b/cpp/src/strings/case.cu @@ -294,7 +294,7 @@ CUDF_KERNEL void has_multibytes_kernel(char const* d_input_chars, auto const mb_total = block_reduce(temp_storage).Reduce(mb_count, cub::Sum()); if ((lane_idx == 0) && (mb_total > 0)) { - cuda::atomic_ref ref{*d_output}; + cuda::atomic_ref ref{*d_output}; ref.fetch_add(mb_total, cuda::std::memory_order_relaxed); } } diff --git a/cpp/src/strings/replace/multi.cu b/cpp/src/strings/replace/multi.cu index 8e5c5cf60b8..43a3d69091a 100644 --- a/cpp/src/strings/replace/multi.cu +++ b/cpp/src/strings/replace/multi.cu @@ -283,7 +283,7 @@ CUDF_KERNEL void count_targets(replace_multi_parallel_fn fn, int64_t chars_bytes auto const total = block_reduce(temp_storage).Reduce(count, cub::Sum()); if ((lane_idx == 0) && (total > 0)) { - cuda::atomic_ref ref{*d_output}; + cuda::atomic_ref ref{*d_output}; ref.fetch_add(total, cuda::std::memory_order_relaxed); } } diff --git a/cpp/src/strings/replace/replace.cu b/cpp/src/strings/replace/replace.cu index 501e6d547e6..f7a3a3aea5c 100644 --- a/cpp/src/strings/replace/replace.cu +++ b/cpp/src/strings/replace/replace.cu @@ -238,6 +238,31 @@ struct replace_parallel_chars_fn { cudf::size_type maxrepl; }; +template +CUDF_KERNEL void count_targets_kernel(replace_parallel_chars_fn fn, + int64_t chars_bytes, + int64_t* d_output) +{ + auto const idx = cudf::detail::grid_1d::global_thread_id(); + auto const byte_idx = static_cast(idx) * bytes_per_thread; + auto const lane_idx = static_cast(threadIdx.x); + + using block_reduce = cub::BlockReduce; + __shared__ typename block_reduce::TempStorage temp_storage; + + int64_t count = 0; + // each thread processes multiple bytes + for (auto i = byte_idx; (i < (byte_idx + bytes_per_thread)) && (i < chars_bytes); ++i) { + count += fn.has_target(i); + } + auto const total = block_reduce(temp_storage).Reduce(count, cub::Sum()); + + if ((lane_idx == 0) && (total > 0)) { + cuda::atomic_ref ref{*d_output}; + ref.fetch_add(total, cuda::std::memory_order_relaxed); + } +} + std::unique_ptr replace_character_parallel(strings_column_view const& input, string_view const& d_target, string_view const& d_replacement, @@ -260,10 +285,14 @@ std::unique_ptr replace_character_parallel(strings_column_view const& in // Count the number of targets in the entire column. // Note this may over-count in the case where a target spans adjacent strings. - auto target_count = thrust::count_if(rmm::exec_policy_nosync(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(chars_bytes), - [fn] __device__(int64_t idx) { return fn.has_target(idx); }); + rmm::device_scalar d_target_count(0, stream); + constexpr int64_t block_size = 512; + constexpr size_type bytes_per_thread = 4; + auto const num_blocks = util::div_rounding_up_safe( + util::div_rounding_up_safe(chars_bytes, static_cast(bytes_per_thread)), block_size); + count_targets_kernel + <<>>(fn, chars_bytes, d_target_count.data()); + auto target_count = d_target_count.value(stream); // Create a vector of every target position in the chars column. // These may also include overlapping targets which will be resolved later. diff --git a/cpp/src/strings/split/split.cuh b/cpp/src/strings/split/split.cuh index ae3c0b3aa12..23614ac0733 100644 --- a/cpp/src/strings/split/split.cuh +++ b/cpp/src/strings/split/split.cuh @@ -327,7 +327,7 @@ CUDF_KERNEL void count_delimiters_kernel(Tokenizer tokenizer, auto const total = block_reduce(temp_storage).Reduce(count, cub::Sum()); if ((lane_idx == 0) && (total > 0)) { - cuda::atomic_ref ref{*d_output}; + cuda::atomic_ref ref{*d_output}; ref.fetch_add(total, cuda::std::memory_order_relaxed); } } diff --git a/cpp/src/text/tokenize.cu b/cpp/src/text/tokenize.cu index 25406bce759..3ce6064d9c2 100644 --- a/cpp/src/text/tokenize.cu +++ b/cpp/src/text/tokenize.cu @@ -121,7 +121,7 @@ CUDF_KERNEL void count_characters(uint8_t const* d_chars, int64_t chars_bytes, i auto const total = block_reduce(temp_storage).Reduce(count, cub::Sum()); if ((lane_idx == 0) && (total > 0)) { - cuda::atomic_ref ref{*d_output}; + cuda::atomic_ref ref{*d_output}; ref.fetch_add(total, cuda::std::memory_order_relaxed); } } diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index f6d762cc2ec..329edbe4d36 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -571,9 +571,9 @@ ConfigureTest( large_strings/concatenate_tests.cpp large_strings/case_tests.cpp large_strings/large_strings_fixture.cpp - large_strings/many_strings_tests.cpp large_strings/merge_tests.cpp large_strings/parquet_tests.cpp + large_strings/replace_tests.cpp large_strings/reshape_tests.cpp large_strings/split_strings_tests.cpp GPUS 1 @@ -693,10 +693,11 @@ ConfigureTest(STREAM_NULL_MASK_TEST streams/null_mask_test.cpp STREAM_MODE testi ConfigureTest(STREAM_ORCIO_TEST streams/io/orc_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_PARQUETIO_TEST streams/io/parquet_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_POOL_TEST streams/pool_test.cu STREAM_MODE testing) +ConfigureTest(STREAM_REDUCTION_TEST streams/reduction_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_REPLACE_TEST streams/replace_test.cpp STREAM_MODE testing) +ConfigureTest(STREAM_ROLLING_TEST streams/rolling_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_SEARCH_TEST streams/search_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_SORTING_TEST streams/sorting_test.cpp STREAM_MODE testing) -ConfigureTest(STREAM_REDUCTION_TEST streams/reduction_test.cpp STREAM_MODE testing) ConfigureTest( STREAM_STRINGS_TEST streams/strings/case_test.cpp diff --git a/cpp/tests/large_strings/many_strings_tests.cpp b/cpp/tests/large_strings/replace_tests.cpp similarity index 72% rename from cpp/tests/large_strings/many_strings_tests.cpp rename to cpp/tests/large_strings/replace_tests.cpp index 73fbb21d014..aa65ec0c010 100644 --- a/cpp/tests/large_strings/many_strings_tests.cpp +++ b/cpp/tests/large_strings/replace_tests.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -28,9 +29,9 @@ #include #include -struct StringsManyTest : public cudf::test::StringsLargeTest {}; +struct ReplaceTest : public cudf::test::StringsLargeTest {}; -TEST_F(StringsManyTest, Replace) +TEST_F(ReplaceTest, ReplaceLong) { auto const expected = this->very_long_column(); auto const view = cudf::column_view(expected); @@ -65,3 +66,22 @@ TEST_F(StringsManyTest, Replace) CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(c, expected); } } + +TEST_F(ReplaceTest, ReplaceWide) +{ + auto const expected = this->long_column(); + auto const view = cudf::column_view(expected); + auto const multiplier = 10; + auto const separator = cudf::string_scalar("|"); + auto const input = cudf::strings::concatenate( + cudf::table_view(std::vector(multiplier, view)), separator); + + auto const input_view = cudf::strings_column_view(input->view()); + auto const target = cudf::string_scalar("3"); // fake the actual replace; + auto const repl = cudf::string_scalar("3"); // logic still builds the output + auto result = cudf::strings::replace(input_view, target, repl); + + auto sv = cudf::strings_column_view(result->view()); + EXPECT_EQ(sv.offsets().type(), cudf::data_type{cudf::type_id::INT64}); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(input->view(), result->view()); +} diff --git a/cpp/tests/streams/rolling_test.cpp b/cpp/tests/streams/rolling_test.cpp new file mode 100644 index 00000000000..b352ad2c0d2 --- /dev/null +++ b/cpp/tests/streams/rolling_test.cpp @@ -0,0 +1,246 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include + +class RollingTest : public cudf::test::BaseFixture {}; + +TEST_F(RollingTest, FixedSize) +{ + cudf::test::fixed_width_column_wrapper input({1, 2, 3, 4, 5, 6, 7, 8, 9}); + + cudf::rolling_window(input, + 2, + 3, + 1, + *cudf::make_min_aggregation(), + cudf::test::get_default_stream()); +} + +TEST_F(RollingTest, FixedSizeDefault) +{ + cudf::test::fixed_width_column_wrapper input({1, 2, 3, 4, 5, 6, 7, 8, 9}); + cudf::test::fixed_width_column_wrapper defaults({42, 42, 42, 42, 9, 9, 7, 1, 1}); + + cudf::rolling_window(input, + defaults, + 2, + 3, + 1, + *cudf::make_lead_aggregation(1), + cudf::test::get_default_stream()); +} + +TEST_F(RollingTest, VariableSize) +{ + cudf::test::fixed_width_column_wrapper input({1, 2, 3, 4, 5, 6, 7, 8, 9}); + cudf::test::fixed_width_column_wrapper preceding({2, 2, 2, 2, 3, 3, 3, 3, 3}); + cudf::test::fixed_width_column_wrapper following({3, 3, 3, 3, 3, 2, 2, 2, 2}); + + cudf::rolling_window(input, + preceding, + following, + 1, + *cudf::make_min_aggregation(), + cudf::test::get_default_stream()); +} + +class GroupedRollingTest : public cudf::test::BaseFixture {}; + +TEST_F(GroupedRollingTest, FixedSize) +{ + cudf::test::fixed_width_column_wrapper input({1, 2, 3, 4, 5, 6, 7, 8, 9}); + + cudf::test::fixed_width_column_wrapper key_0({1, 1, 1, 2, 2, 2, 3, 3, 3}); + + cudf::test::fixed_width_column_wrapper key_1({4, 4, 4, 5, 5, 5, 6, 6, 6}); + + cudf::table_view grouping_keys{std::vector{key_0, key_1}}; + + cudf::grouped_rolling_window(grouping_keys, + input, + 2, + 3, + 1, + *cudf::make_min_aggregation(), + cudf::test::get_default_stream()); +} + +TEST_F(GroupedRollingTest, FixedSizeDefault) +{ + cudf::test::fixed_width_column_wrapper input({1, 2, 3, 4, 5, 6, 7, 8, 9}); + + cudf::test::fixed_width_column_wrapper key_0({1, 1, 1, 2, 2, 2, 3, 3, 3}); + + cudf::test::fixed_width_column_wrapper key_1({4, 4, 4, 5, 5, 5, 6, 6, 6}); + + cudf::test::fixed_width_column_wrapper defaults({42, 42, 42, 42, 9, 9, 7, 1, 1}); + + cudf::table_view grouping_keys{std::vector{key_0, key_1}}; + + cudf::grouped_rolling_window(grouping_keys, + input, + defaults, + 2, + 3, + 1, + *cudf::make_lead_aggregation(1), + cudf::test::get_default_stream()); +} + +TEST_F(GroupedRollingTest, WindowBounds) +{ + cudf::test::fixed_width_column_wrapper input({1, 2, 3, 4, 5, 6, 7, 8, 9}); + + cudf::test::fixed_width_column_wrapper key_0({1, 1, 1, 2, 2, 2, 3, 3, 3}); + + cudf::test::fixed_width_column_wrapper key_1({4, 4, 4, 5, 5, 5, 6, 6, 6}); + + auto const unbounded_preceding = cudf::window_bounds::unbounded(); + auto const following = cudf::window_bounds::get(1L); + + cudf::table_view grouping_keys{std::vector{key_0, key_1}}; + + cudf::grouped_rolling_window(grouping_keys, + input, + unbounded_preceding, + following, + 1, + *cudf::make_min_aggregation(), + cudf::test::get_default_stream()); +} + +TEST_F(GroupedRollingTest, WindowBoundsDefault) +{ + cudf::test::fixed_width_column_wrapper input({1, 2, 3, 4, 5, 6, 7, 8, 9}); + + cudf::test::fixed_width_column_wrapper key_0({1, 1, 1, 2, 2, 2, 3, 3, 3}); + + cudf::test::fixed_width_column_wrapper key_1({4, 4, 4, 5, 5, 5, 6, 6, 6}); + + cudf::test::fixed_width_column_wrapper defaults({42, 42, 42, 42, 9, 9, 7, 1, 1}); + + auto const unbounded_preceding = cudf::window_bounds::unbounded(); + auto const following = cudf::window_bounds::get(1L); + + cudf::table_view grouping_keys{std::vector{key_0, key_1}}; + + cudf::grouped_rolling_window(grouping_keys, + input, + defaults, + unbounded_preceding, + following, + 1, + *cudf::make_lead_aggregation(1), + cudf::test::get_default_stream()); +} + +class GroupedTimeRollingTest : public cudf::test::BaseFixture {}; + +TEST_F(GroupedTimeRollingTest, FixedSize) +{ + auto const grp_col = + cudf::test::fixed_width_column_wrapper{0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + auto const agg_col = cudf::test::fixed_width_column_wrapper{ + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, {1, 1, 1, 1, 1, 0, 1, 1, 1, 1}}; + auto const time_col = + cudf::test::fixed_width_column_wrapper{ + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, {0, 0, 0, 0, 1, 1, 1, 1, 1, 1}}; + + auto const grouping_keys = cudf::table_view{std::vector{grp_col}}; + auto const preceding = 1L; + auto const following = 1L; + auto const min_periods = 1L; + cudf::grouped_time_range_rolling_window( + grouping_keys, + time_col, + cudf::order::ASCENDING, + agg_col, + preceding, + following, + min_periods, + *cudf::make_count_aggregation(), + cudf::test::get_default_stream()); +} + +TEST_F(GroupedTimeRollingTest, WindowBounds) +{ + auto const grp_col = + cudf::test::fixed_width_column_wrapper{0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + auto const agg_col = cudf::test::fixed_width_column_wrapper{ + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, {1, 1, 1, 1, 1, 0, 1, 1, 1, 1}}; + auto const time_col = + cudf::test::fixed_width_column_wrapper{ + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, {0, 0, 0, 0, 1, 1, 1, 1, 1, 1}}; + + auto const grouping_keys = cudf::table_view{std::vector{grp_col}}; + auto const unbounded_preceding = cudf::window_bounds::unbounded(); + auto const following = cudf::window_bounds::get(1L); + + auto const min_periods = 1L; + cudf::grouped_time_range_rolling_window( + grouping_keys, + time_col, + cudf::order::ASCENDING, + agg_col, + unbounded_preceding, + following, + min_periods, + *cudf::make_count_aggregation(), + cudf::test::get_default_stream()); +} + +class GroupedRangeRollingTest : public cudf::test::BaseFixture {}; + +TEST_F(GroupedRangeRollingTest, RangeWindowBounds) +{ + auto const grp_col = cudf::test::fixed_width_column_wrapper{0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + auto const agg_col = cudf::test::fixed_width_column_wrapper{{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, + {1, 1, 1, 1, 1, 0, 1, 1, 1, 1}}; + + auto const order_by = cudf::test::fixed_width_column_wrapper{{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, + {0, 0, 0, 0, 1, 1, 1, 1, 1, 1}}; + + cudf::range_window_bounds preceding = cudf::range_window_bounds::get( + cudf::numeric_scalar{int{1}, true, cudf::test::get_default_stream()}, + cudf::test::get_default_stream()); + + cudf::range_window_bounds following = cudf::range_window_bounds::get( + cudf::numeric_scalar{int{1}, true, cudf::test::get_default_stream()}, + cudf::test::get_default_stream()); + + auto const min_periods = cudf::size_type{1}; + + auto const grouping_keys = cudf::table_view{std::vector{grp_col}}; + + cudf::grouped_range_rolling_window(grouping_keys, + order_by, + cudf::order::ASCENDING, + agg_col, + preceding, + following, + min_periods, + *cudf::make_count_aggregation(), + cudf::test::get_default_stream()); +} diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index 7fdebda7d76..9ac761b6be1 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -377,6 +377,16 @@ def floor(self, freq: str) -> ColumnBase: def round(self, freq: str) -> ColumnBase: return libcudf.datetime.round_datetime(self, freq) + def isocalendar(self) -> dict[str, ColumnBase]: + return { + field: self.as_string_column("str", format=directive).astype( + "uint32" + ) + for field, directive in zip( + ["year", "week", "day"], ["%G", "%V", "%u"] + ) + } + def normalize_binop_value(self, other: DatetimeLikeScalar) -> ScalarLike: if isinstance(other, (cudf.Scalar, ColumnBase, cudf.DateOffset)): return other diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py index 8eec84b64f7..26b449f1863 100644 --- a/python/cudf/cudf/core/column/timedelta.py +++ b/python/cudf/cudf/core/column/timedelta.py @@ -392,7 +392,7 @@ def corr(self, other: TimeDeltaColumn) -> float: other.as_numerical_column("int64") ) - def components(self, index=None) -> "cudf.DataFrame": + def components(self) -> dict[str, ColumnBase]: """ Return a Dataframe of the components of the Timedeltas. @@ -484,11 +484,7 @@ def components(self, index=None) -> "cudf.DataFrame": if self.nullable: res_col = res_col.set_mask(self.mask) data[name] = res_col - - return cudf.DataFrame( - data=data, - index=index, - ) + return data @property def days(self) -> "cudf.core.column.NumericalColumn": diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 13fa187842d..df21d392311 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -2157,7 +2157,7 @@ def month_name(self, locale: str | None = None) -> Index: return Index._from_data({self.name: month_names}) @_cudf_nvtx_annotate - def isocalendar(self): + def isocalendar(self) -> cudf.DataFrame: """ Returns a DataFrame with the year, week, and day calculated according to the ISO 8601 standard. @@ -2176,7 +2176,10 @@ def isocalendar(self): 2020-05-31 08:00:00 2020 22 7 1999-12-31 18:40:00 1999 52 5 """ - return cudf.core.tools.datetimes._to_iso_calendar(self) + ca = cudf.core.column_accessor.ColumnAccessor( + self._column.isocalendar(), verify=False + ) + return cudf.DataFrame._from_data(ca, index=self) @_cudf_nvtx_annotate def to_pandas( @@ -2546,7 +2549,10 @@ def components(self): Return a dataframe of the components (days, hours, minutes, seconds, milliseconds, microseconds, nanoseconds) of the Timedeltas. """ - return self._values.components() + ca = cudf.core.column_accessor.ColumnAccessor( + self._column.components(), verify=False + ) + return cudf.DataFrame._from_data(ca) @property def inferred_freq(self): diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index e532948fd11..c0716d7709a 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -778,9 +778,9 @@ def dt(self): ------ TypeError if the Series does not contain datetimelike values. """ - if isinstance(self._column, DatetimeColumn): + if self.dtype.kind == "M": return DatetimeProperties(self) - elif isinstance(self._column, TimeDeltaColumn): + elif self.dtype.kind == "m": return TimedeltaProperties(self) else: raise AttributeError( @@ -3677,7 +3677,21 @@ def wrapper(self, other, level=None, fill_value=None, axis=0): setattr(Series, binop, make_binop_func(binop)) -class DatetimeProperties: +class BaseDatelikeProperties: + """ + Base accessor class for Series values. + """ + + def __init__(self, series: Series): + self.series = series + + def _return_result_like_self(self, column: ColumnBase) -> Series: + """Return the method result like self.series""" + data = ColumnAccessor({self.series.name: column}, verify=False) + return self.series._from_data_like_self(data) + + +class DatetimeProperties(BaseDatelikeProperties): """ Accessor object for datetimelike properties of the Series values. @@ -3727,12 +3741,9 @@ class DatetimeProperties: dtype: int16 """ - def __init__(self, series): - self.series = series - @property # type: ignore @_cudf_nvtx_annotate - def year(self): + def year(self) -> Series: """ The year of the datetime. @@ -3757,7 +3768,7 @@ def year(self): @property # type: ignore @_cudf_nvtx_annotate - def month(self): + def month(self) -> Series: """ The month as January=1, December=12. @@ -3782,7 +3793,7 @@ def month(self): @property # type: ignore @_cudf_nvtx_annotate - def day(self): + def day(self) -> Series: """ The day of the datetime. @@ -3807,7 +3818,7 @@ def day(self): @property # type: ignore @_cudf_nvtx_annotate - def hour(self): + def hour(self) -> Series: """ The hours of the datetime. @@ -3832,7 +3843,7 @@ def hour(self): @property # type: ignore @_cudf_nvtx_annotate - def minute(self): + def minute(self) -> Series: """ The minutes of the datetime. @@ -3857,7 +3868,7 @@ def minute(self): @property # type: ignore @_cudf_nvtx_annotate - def second(self): + def second(self) -> Series: """ The seconds of the datetime. @@ -3882,7 +3893,7 @@ def second(self): @property # type: ignore @_cudf_nvtx_annotate - def microsecond(self): + def microsecond(self) -> Series: """ The microseconds of the datetime. @@ -3903,22 +3914,18 @@ def microsecond(self): 2 2 dtype: int32 """ - return Series( - data=( - # Need to manually promote column to int32 because - # pandas-matching binop behaviour requires that this - # __mul__ returns an int16 column. - self.series._column.get_dt_field("millisecond").astype("int32") - * cudf.Scalar(1000, dtype="int32") - ) - + self.series._column.get_dt_field("microsecond"), - index=self.series.index, - name=self.series.name, - ) + micro = self.series._column.get_dt_field("microsecond") + # Need to manually promote column to int32 because + # pandas-matching binop behaviour requires that this + # __mul__ returns an int16 column. + extra = self.series._column.get_dt_field("millisecond").astype( + "int32" + ) * cudf.Scalar(1000, dtype="int32") + return self._return_result_like_self(micro + extra) @property # type: ignore @_cudf_nvtx_annotate - def nanosecond(self): + def nanosecond(self) -> Series: """ The nanoseconds of the datetime. @@ -3943,7 +3950,7 @@ def nanosecond(self): @property # type: ignore @_cudf_nvtx_annotate - def weekday(self): + def weekday(self) -> Series: """ The day of the week with Monday=0, Sunday=6. @@ -3980,7 +3987,7 @@ def weekday(self): @property # type: ignore @_cudf_nvtx_annotate - def dayofweek(self): + def dayofweek(self) -> Series: """ The day of the week with Monday=0, Sunday=6. @@ -4017,7 +4024,7 @@ def dayofweek(self): @property # type: ignore @_cudf_nvtx_annotate - def dayofyear(self): + def dayofyear(self) -> Series: """ The day of the year, from 1-365 in non-leap years and from 1-366 in leap years. @@ -4055,7 +4062,7 @@ def dayofyear(self): @property # type: ignore @_cudf_nvtx_annotate - def day_of_year(self): + def day_of_year(self) -> Series: """ The day of the year, from 1-365 in non-leap years and from 1-366 in leap years. @@ -4093,7 +4100,7 @@ def day_of_year(self): @property # type: ignore @_cudf_nvtx_annotate - def is_leap_year(self): + def is_leap_year(self) -> Series: """ Boolean indicator if the date belongs to a leap year. @@ -4144,15 +4151,11 @@ def is_leap_year(self): dtype: bool """ res = libcudf.datetime.is_leap_year(self.series._column).fillna(False) - return Series._from_data( - ColumnAccessor({None: res}), - index=self.series.index, - name=self.series.name, - ) + return self._return_result_like_self(res) @property # type: ignore @_cudf_nvtx_annotate - def quarter(self): + def quarter(self) -> Series: """ Integer indicator for which quarter of the year the date belongs in. @@ -4178,14 +4181,10 @@ def quarter(self): res = libcudf.datetime.extract_quarter(self.series._column).astype( np.int8 ) - return Series._from_data( - {None: res}, - index=self.series.index, - name=self.series.name, - ) + return self._return_result_like_self(res) @_cudf_nvtx_annotate - def day_name(self, locale=None): + def day_name(self, locale: str | None = None) -> Series: """ Return the day names. Currently supports English locale only. @@ -4216,11 +4215,8 @@ def day_name(self, locale=None): 7 Saturday dtype: object """ - day_names = self.series._column.get_day_names(locale) - return Series._from_data( - ColumnAccessor({None: day_names}), - index=self.series.index, - name=self.series.name, + return self._return_result_like_self( + self.series._column.get_day_names(locale) ) @_cudf_nvtx_annotate @@ -4249,15 +4245,12 @@ def month_name(self, locale: str | None = None) -> Series: 5 February dtype: object """ - month_names = self.series._column.get_month_names(locale) - return Series._from_data( - ColumnAccessor({None: month_names}), - index=self.series.index, - name=self.series.name, + return self._return_result_like_self( + self.series._column.get_month_names(locale) ) @_cudf_nvtx_annotate - def isocalendar(self): + def isocalendar(self) -> cudf.DataFrame: """ Returns a DataFrame with the year, week, and day calculated according to the ISO 8601 standard. @@ -4298,11 +4291,14 @@ def isocalendar(self): 1 Name: year, dtype: object """ - return cudf.core.tools.datetimes._to_iso_calendar(self) + ca = ColumnAccessor(self.series._column.isocalendar(), verify=False) + return self.series._constructor_expanddim._from_data( + ca, index=self.series.index + ) @property # type: ignore @_cudf_nvtx_annotate - def is_month_start(self): + def is_month_start(self) -> Series: """ Booleans indicating if dates are the first day of the month. """ @@ -4310,7 +4306,7 @@ def is_month_start(self): @property # type: ignore @_cudf_nvtx_annotate - def days_in_month(self): + def days_in_month(self) -> Series: """ Get the total number of days in the month that the date falls on. @@ -4353,16 +4349,13 @@ def days_in_month(self): 11 31 dtype: int16 """ - res = libcudf.datetime.days_in_month(self.series._column) - return Series._from_data( - ColumnAccessor({None: res}), - index=self.series.index, - name=self.series.name, + return self._return_result_like_self( + libcudf.datetime.days_in_month(self.series._column) ) @property # type: ignore @_cudf_nvtx_annotate - def is_month_end(self): + def is_month_end(self) -> Series: """ Boolean indicator if the date is the last day of the month. @@ -4399,17 +4392,13 @@ def is_month_end(self): 8 False dtype: bool """ # noqa: E501 - last_day = libcudf.datetime.last_day_of_month(self.series._column) - last_day = Series._from_data( - ColumnAccessor({None: last_day}), - index=self.series.index, - name=self.series.name, - ) + last_day_col = libcudf.datetime.last_day_of_month(self.series._column) + last_day = self._return_result_like_self(last_day_col) return (self.day == last_day.dt.day).fillna(False) @property # type: ignore @_cudf_nvtx_annotate - def is_quarter_start(self): + def is_quarter_start(self) -> Series: """ Boolean indicator if the date is the first day of a quarter. @@ -4450,15 +4439,11 @@ def is_quarter_start(self): ) result = ((day == cudf.Scalar(1)) & first_month).fillna(False) - return Series._from_data( - {None: result}, - index=self.series.index, - name=self.series.name, - ) + return self._return_result_like_self(result) @property # type: ignore @_cudf_nvtx_annotate - def is_quarter_end(self): + def is_quarter_end(self) -> Series: """ Boolean indicator if the date is the last day of a quarter. @@ -4501,15 +4486,11 @@ def is_quarter_end(self): ) result = ((day == last_day) & last_month).fillna(False) - return Series._from_data( - {None: result}, - index=self.series.index, - name=self.series.name, - ) + return self._return_result_like_self(result) @property # type: ignore @_cudf_nvtx_annotate - def is_year_start(self): + def is_year_start(self) -> Series: """ Boolean indicator if the date is the first day of the year. @@ -4536,15 +4517,11 @@ def is_year_start(self): outcol = self.series._column.get_dt_field( "day_of_year" ) == cudf.Scalar(1) - return Series._from_data( - {None: outcol.fillna(False)}, - index=self.series.index, - name=self.series.name, - ) + return self._return_result_like_self(outcol.fillna(False)) @property # type: ignore @_cudf_nvtx_annotate - def is_year_end(self): + def is_year_end(self) -> Series: """ Boolean indicator if the date is the last day of the year. @@ -4574,22 +4551,16 @@ def is_year_end(self): leap = day_of_year == cudf.Scalar(366) non_leap = day_of_year == cudf.Scalar(365) result = cudf._lib.copying.copy_if_else(leap, non_leap, leap_dates) - result = result.fillna(False) - return Series._from_data( - {None: result}, - index=self.series.index, - name=self.series.name, - ) + return self._return_result_like_self(result.fillna(False)) @_cudf_nvtx_annotate - def _get_dt_field(self, field): - out_column = self.series._column.get_dt_field(field) - return Series( - data=out_column, index=self.series.index, name=self.series.name + def _get_dt_field(self, field: str) -> Series: + return self._return_result_like_self( + self.series._column.get_dt_field(field) ) @_cudf_nvtx_annotate - def ceil(self, freq): + def ceil(self, freq: str) -> Series: """ Perform ceil operation on the data to the specified freq. @@ -4619,14 +4590,10 @@ def ceil(self, freq): 2 2001-01-01 00:06:00 dtype: datetime64[ns] """ - out_column = self.series._column.ceil(freq) - - return Series._from_data( - data={self.series.name: out_column}, index=self.series.index - ) + return self._return_result_like_self(self.series._column.ceil(freq)) @_cudf_nvtx_annotate - def floor(self, freq): + def floor(self, freq: str) -> Series: """ Perform floor operation on the data to the specified freq. @@ -4656,14 +4623,10 @@ def floor(self, freq): 2 2001-01-01 00:05:00 dtype: datetime64[ns] """ - out_column = self.series._column.floor(freq) - - return Series._from_data( - data={self.series.name: out_column}, index=self.series.index - ) + return self._return_result_like_self(self.series._column.floor(freq)) @_cudf_nvtx_annotate - def round(self, freq): + def round(self, freq: str) -> Series: """ Perform round operation on the data to the specified freq. @@ -4696,14 +4659,10 @@ def round(self, freq): 2 2001-01-01 00:05:00 dtype: datetime64[ns] """ - out_column = self.series._column.round(freq) - - return Series._from_data( - data={self.series.name: out_column}, index=self.series.index - ) + return self._return_result_like_self(self.series._column.round(freq)) @_cudf_nvtx_annotate - def strftime(self, date_format, *args, **kwargs): + def strftime(self, date_format: str, *args, **kwargs) -> Series: """ Convert to Series using specified ``date_format``. @@ -4777,11 +4736,10 @@ def strftime(self, date_format, *args, **kwargs): f"https://github.com/rapidsai/cudf/issues/5991 " f"for tracking purposes." ) - str_col = self.series._column.as_string_column( - dtype="str", format=date_format - ) - return Series( - data=str_col, index=self.series.index, name=self.series.name + return self._return_result_like_self( + self.series._column.as_string_column( + dtype="str", format=date_format + ) ) @copy_docstring(DatetimeIndex.tz_localize) @@ -4790,17 +4748,13 @@ def tz_localize( tz: str | None, ambiguous: Literal["NaT"] = "NaT", nonexistent: Literal["NaT"] = "NaT", - ): - result_col = self.series._column.tz_localize( - tz, ambiguous, nonexistent - ) - return Series._from_data( - data={self.series.name: result_col}, - index=self.series.index, + ) -> Series: + return self._return_result_like_self( + self.series._column.tz_localize(tz, ambiguous, nonexistent) ) @copy_docstring(DatetimeIndex.tz_convert) - def tz_convert(self, tz: str | None): + def tz_convert(self, tz: str | None) -> Series: """ Parameters ---------- @@ -4810,13 +4764,12 @@ def tz_convert(self, tz: str | None): A `tz` of None will convert to UTC and remove the timezone information. """ - result_col = self.series._column.tz_convert(tz) - return Series._from_data( - {self.series.name: result_col}, index=self.series.index + return self._return_result_like_self( + self.series._column.tz_convert(tz) ) -class TimedeltaProperties: +class TimedeltaProperties(BaseDatelikeProperties): """ Accessor object for timedelta-like properties of the Series values. @@ -4884,12 +4837,9 @@ class TimedeltaProperties: dtype: int64 """ - def __init__(self, series): - self.series = series - @property # type: ignore @_cudf_nvtx_annotate - def days(self): + def days(self) -> Series: """ Number of days. @@ -4921,7 +4871,7 @@ def days(self): @property # type: ignore @_cudf_nvtx_annotate - def seconds(self): + def seconds(self) -> Series: """ Number of seconds (>= 0 and less than 1 day). @@ -4960,7 +4910,7 @@ def seconds(self): @property # type: ignore @_cudf_nvtx_annotate - def microseconds(self): + def microseconds(self) -> Series: """ Number of microseconds (>= 0 and less than 1 second). @@ -4992,7 +4942,7 @@ def microseconds(self): @property # type: ignore @_cudf_nvtx_annotate - def nanoseconds(self): + def nanoseconds(self) -> Series: """ Return the number of nanoseconds (n), where 0 <= n < 1 microsecond. @@ -5024,7 +4974,7 @@ def nanoseconds(self): @property # type: ignore @_cudf_nvtx_annotate - def components(self): + def components(self) -> cudf.DataFrame: """ Return a Dataframe of the components of the Timedeltas. @@ -5050,13 +5000,15 @@ def components(self): 3 0 0 35 35 656 0 0 4 37 13 12 14 234 0 0 """ # noqa: E501 - return self.series._column.components(index=self.series.index) + ca = ColumnAccessor(self.series._column.components(), verify=False) + return self.series._constructor_expanddim._from_data( + ca, index=self.series.index + ) @_cudf_nvtx_annotate - def _get_td_field(self, field): - out_column = getattr(self.series._column, field) - return Series( - data=out_column, index=self.series.index, name=self.series.name + def _get_td_field(self, field: str) -> Series: + return self._return_result_like_self( + getattr(self.series._column, field) ) diff --git a/python/cudf/cudf/core/tools/datetimes.py b/python/cudf/cudf/core/tools/datetimes.py index 29130130732..397bfe1d472 100644 --- a/python/cudf/cudf/core/tools/datetimes.py +++ b/python/cudf/cudf/core/tools/datetimes.py @@ -1048,22 +1048,3 @@ def _offset_to_nanoseconds_lower_bound(offset: DateOffset) -> int: + kwds.get("microseconds", 0) * 10**3 + kwds.get("nanoseconds", 0) ) - - -def _to_iso_calendar(arg): - formats = ["%G", "%V", "%u"] - if not isinstance(arg, (cudf.Index, cudf.core.series.DatetimeProperties)): - raise AttributeError( - "Can only use .isocalendar accessor with series or index" - ) - if isinstance(arg, cudf.Index): - iso_params = [ - arg._column.as_string_column(arg.dtype, fmt) for fmt in formats - ] - index = arg._column - elif isinstance(arg.series, cudf.Series): - iso_params = [arg.strftime(fmt) for fmt in formats] - index = arg.series.index - - data = dict(zip(["year", "week", "day"], iso_params)) - return cudf.DataFrame(data, index=index, dtype=np.int32) diff --git a/python/cudf_polars/cudf_polars/dsl/expr.py b/python/cudf_polars/cudf_polars/dsl/expr.py index 0605bba6642..c92e0714d54 100644 --- a/python/cudf_polars/cudf_polars/dsl/expr.py +++ b/python/cudf_polars/cudf_polars/dsl/expr.py @@ -512,13 +512,17 @@ def do_evaluate( ] if self.name == pl_expr.BooleanFunction.Any: (column,) = columns - return plc.Column.from_scalar( - plc.reduce.reduce(column.obj, plc.aggregation.any(), self.dtype), 1 + return Column( + plc.Column.from_scalar( + plc.reduce.reduce(column.obj, plc.aggregation.any(), self.dtype), 1 + ) ) elif self.name == pl_expr.BooleanFunction.All: (column,) = columns - return plc.Column.from_scalar( - plc.reduce.reduce(column.obj, plc.aggregation.all(), self.dtype), 1 + return Column( + plc.Column.from_scalar( + plc.reduce.reduce(column.obj, plc.aggregation.all(), self.dtype), 1 + ) ) if self.name == pl_expr.BooleanFunction.IsNull: (column,) = columns @@ -612,20 +616,32 @@ def do_evaluate( column, lo, hi = columns (closed,) = self.options lop, rop = self._BETWEEN_OPS[closed] + lo_obj = ( + lo.obj_scalar + if lo.is_scalar and lo.obj.size() != column.obj.size() + else lo.obj + ) + hi_obj = ( + hi.obj_scalar + if hi.is_scalar and hi.obj.size() != column.obj.size() + else hi.obj + ) return Column( plc.binaryop.binary_operation( plc.binaryop.binary_operation( - column.obj, lo.obj, lop, output_type=self.dtype + column.obj, lo_obj, lop, output_type=self.dtype ), plc.binaryop.binary_operation( - column.obj, hi.obj, rop, output_type=self.dtype + column.obj, hi_obj, rop, output_type=self.dtype ), plc.binaryop.BinaryOperator.LOGICAL_AND, self.dtype, ) ) else: - raise NotImplementedError(f"BooleanFunction {self.name}") + raise NotImplementedError( + f"BooleanFunction {self.name}" + ) # pragma: no cover; handled by init raising class StringFunction(Expr): diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py index 7f0920e1b57..83957e4286d 100644 --- a/python/cudf_polars/cudf_polars/dsl/ir.py +++ b/python/cudf_polars/cudf_polars/dsl/ir.py @@ -17,7 +17,7 @@ import itertools import types from functools import cache -from typing import TYPE_CHECKING, Any, Callable, ClassVar, NoReturn +from typing import TYPE_CHECKING, Any, Callable, ClassVar import pyarrow as pa from typing_extensions import assert_never @@ -56,7 +56,6 @@ "MapFunction", "Union", "HConcat", - "ExtContext", ] @@ -153,7 +152,9 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: since the translation phase should pick up things that we cannot handle. """ - raise NotImplementedError + raise NotImplementedError( + f"Evaluation of plan {type(self).__name__}" + ) # pragma: no cover @dataclasses.dataclass(slots=True) @@ -346,7 +347,9 @@ class Reduce(IR): expr: list[expr.NamedExpr] """List of expressions to evaluate to form the new dataframe.""" - def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: + def evaluate( + self, *, cache: MutableMapping[int, DataFrame] + ) -> DataFrame: # pragma: no cover; polars doesn't emit this node yet """Evaluate and return a dataframe.""" df = self.df.evaluate(cache=cache) columns = broadcast(*(e.evaluate(df) for e in self.expr)) @@ -938,23 +941,3 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: return DataFrame( list(itertools.chain.from_iterable(df.columns for df in dfs)), ) - - -@dataclasses.dataclass(slots=True) -class ExtContext(IR): - """ - Concatenate dataframes horizontally. - - Prefer HConcat, since this is going to be deprecated on the polars side. - """ - - df: IR - """Input.""" - extra: list[IR] - """List of extra inputs.""" - - def __post_init__(self) -> NoReturn: - """Validate preconditions.""" - raise NotImplementedError( - "ExtContext will be deprecated, use horizontal concat instead." - ) diff --git a/python/cudf_polars/cudf_polars/dsl/translate.py b/python/cudf_polars/cudf_polars/dsl/translate.py index adde3b1a9dc..41bc3032bc5 100644 --- a/python/cudf_polars/cudf_polars/dsl/translate.py +++ b/python/cudf_polars/cudf_polars/dsl/translate.py @@ -63,7 +63,9 @@ def __exit__(self, *args: Any) -> None: def _translate_ir( node: Any, visitor: NodeTraverser, schema: dict[str, plc.DataType] ) -> ir.IR: - raise NotImplementedError(f"Translation for {type(node).__name__}") + raise NotImplementedError( + f"Translation for {type(node).__name__}" + ) # pragma: no cover @_translate_ir.register @@ -172,7 +174,7 @@ def _( @_translate_ir.register def _( node: pl_ir.Reduce, visitor: NodeTraverser, schema: dict[str, plc.DataType] -) -> ir.IR: +) -> ir.IR: # pragma: no cover; polars doesn't emit this node yet with set_node(visitor, node.input): inp = translate_ir(visitor, n=None) exprs = [translate_named_expr(visitor, n=e) for e in node.expr] @@ -256,17 +258,6 @@ def _( return ir.HConcat(schema, [translate_ir(visitor, n=n) for n in node.inputs]) -@_translate_ir.register -def _( - node: pl_ir.ExtContext, visitor: NodeTraverser, schema: dict[str, plc.DataType] -) -> ir.IR: - return ir.ExtContext( - schema, - translate_ir(visitor, n=node.input), - [translate_ir(visitor, n=n) for n in node.contexts], - ) - - def translate_ir(visitor: NodeTraverser, *, n: int | None = None) -> ir.IR: """ Translate a polars-internal IR node to our representation. @@ -333,7 +324,9 @@ def translate_named_expr( def _translate_expr( node: Any, visitor: NodeTraverser, dtype: plc.DataType ) -> expr.Expr: - raise NotImplementedError(f"Translation for {type(node).__name__}") + raise NotImplementedError( + f"Translation for {type(node).__name__}" + ) # pragma: no cover @_translate_expr.register diff --git a/python/cudf_polars/tests/expressions/test_booleanfunction.py b/python/cudf_polars/tests/expressions/test_booleanfunction.py new file mode 100644 index 00000000000..951b749e670 --- /dev/null +++ b/python/cudf_polars/tests/expressions/test_booleanfunction.py @@ -0,0 +1,129 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-License-Identifier: Apache-2.0 +from __future__ import annotations + +import pytest + +import polars as pl + +from cudf_polars.testing.asserts import assert_gpu_result_equal + + +@pytest.fixture(params=[False, True], ids=["no_nulls", "nulls"]) +def has_nulls(request): + return request.param + + +@pytest.mark.parametrize( + "ignore_nulls", + [ + pytest.param( + False, marks=pytest.mark.xfail(reason="No support for Kleene logic") + ), + True, + ], +) +def test_booleanfunction_reduction(ignore_nulls): + ldf = pl.LazyFrame( + { + "a": [1, 2, 3.0, 2, 5], + "b": [0, 3, 1, -1, None], + "c": [1, 6, 5, 3, 2], + } + ) + + query = ldf.select( + (pl.col("a") > 3).any(ignore_nulls=ignore_nulls), + (pl.col("b") > 2).all(ignore_nulls=ignore_nulls), + ) + + assert_gpu_result_equal(query) + + +@pytest.mark.parametrize( + "expr", + [ + pl.Expr.is_null, + pl.Expr.is_not_null, + pl.Expr.is_nan, + pl.Expr.is_not_nan, + ], + ids=lambda f: f"{f.__name__}()", +) +@pytest.mark.parametrize("has_nans", [False, True], ids=["no_nans", "nans"]) +def test_boolean_function_unary(request, expr, has_nans, has_nulls): + if has_nulls and expr in (pl.Expr.is_nan, pl.Expr.is_not_nan): + request.applymarker( + pytest.mark.xfail( + reason="Need to copy null mask since is_{not_}nan(null) => null" + ) + ) + + values: list[float | None] = [1, 2, 3, 4, 5] + if has_nans: + values[3] = float("nan") + if has_nulls: + values[0] = None + + df = pl.LazyFrame({"a": pl.Series(values, dtype=pl.Float32())}) + + q = df.select(expr(pl.col("a"))) + + assert_gpu_result_equal(q) + + +@pytest.mark.xfail(reason="Evaluation handlers not yet implemented") +@pytest.mark.parametrize( + "expr", + [ + pl.col("a").is_finite(), + pl.col("a").is_infinite(), + pl.col("a").is_in(pl.col("b")), + ], +) +def test_unsupported_boolean_function(expr): + df = pl.LazyFrame({"a": [1, float("nan"), 2, 4], "b": [1, 2, 3, 4]}) + + q = df.select(expr) + + assert_gpu_result_equal(q) + + +@pytest.mark.parametrize("closed", ["both", "left", "right", "none"]) +@pytest.mark.parametrize( + "bounds", [(1, 2), (-1, 10), (11, 10), (pl.col("lo"), pl.col("hi"))] +) +def test_boolean_isbetween(closed, bounds): + df = pl.LazyFrame( + {"a": [1, float("nan"), 2, 4], "lo": [1, 2, 2, 3], "hi": [10, 4, 2, 4]} + ) + + q = df.select(pl.col("a").is_between(*bounds, closed=closed)) + + assert_gpu_result_equal(q) + + +@pytest.mark.parametrize( + "expr", [pl.any_horizontal("*"), pl.all_horizontal("*")], ids=["any", "all"] +) +@pytest.mark.parametrize("wide", [False, True], ids=["narrow", "wide"]) +def test_boolean_horizontal(request, expr, has_nulls, wide): + if has_nulls: + request.applymarker(pytest.mark.xfail(reason="No support for Kleene logic")) + ldf = pl.LazyFrame( + { + "a": [False, False, False, False, False, True], + "b": [False, False, True, None, None, True], + "c": [False, True, True, False, True, True], + } + ) + if not has_nulls: + ldf = ldf.select(pl.col("a"), pl.col("c")) + + # To see the All/Any Horizontal nodes, we need a dataframe with + # more than 128 columns + if wide: + ldf = ldf.with_columns(pl.col("c").alias(f"col{i}") for i in range(128)) + q = ldf.select(expr) + + assert_gpu_result_equal(q) diff --git a/python/cudf_polars/tests/test_extcontext.py b/python/cudf_polars/tests/test_extcontext.py deleted file mode 100644 index 9daf88b4338..00000000000 --- a/python/cudf_polars/tests/test_extcontext.py +++ /dev/null @@ -1,23 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. -# SPDX-License-Identifier: Apache-2.0 -from __future__ import annotations - -import pytest - -import polars as pl - -from cudf_polars.testing.asserts import assert_gpu_result_equal - - -def test_extcontext(): - ldf = pl.DataFrame( - { - "a": [1, 2, 3, 4, 5, 6, 7], - "b": [1, 1, 1, 1, 1, 1, 1], - } - ).lazy() - ldf2 = ldf.select((pl.col("b") + pl.col("a")).alias("c")) - query = ldf.with_context(ldf2).select(pl.col("b"), pl.col("c")) - with pytest.raises(pl.exceptions.ComputeError): - # ExtContext to be deprecated so we're not implementing it. - assert_gpu_result_equal(query)