-
Notifications
You must be signed in to change notification settings - Fork 156
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Pairwise Multipoint Equals Count function #1022
Changes from 55 commits
96b28a3
b8785f7
c6e3910
45c128c
0f0a0d4
d1fb60c
da247f8
2b7d7fa
dd01d6c
b4ac329
3dc6195
9931ed1
dbd1bbf
bd03668
313c020
1c1e7ce
28d5c13
6abb44e
c400646
d2ac985
23f4363
63eea9d
29050fc
30c9c86
2d64f3b
f99810f
7f606f9
c8ae2dd
ec24574
b1f6207
0050be0
c178414
719d065
05090bb
ca1218d
76c8aac
5c78146
e3aeac7
e0bba71
3d28ee9
32050f9
2420dfe
ac9b755
d904762
102915b
72d6e97
a806371
fbcd2bd
f7c2c98
89a54c5
37cba19
e63c594
66498cf
ce1df18
07401fe
03bfc44
da0baa5
3684b26
2b0c489
cff14bd
f66e2c4
19d5fce
90fccc2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
/* | ||
* Copyright (c) 2023, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#pragma once | ||
|
||
#include <cuspatial/cuda_utils.hpp> | ||
#include <cuspatial/detail/utility/zero_data.cuh> | ||
#include <cuspatial/error.hpp> | ||
#include <cuspatial/experimental/iterator_factory.cuh> | ||
#include <cuspatial/experimental/ranges/multipoint_range.cuh> | ||
#include <cuspatial/experimental/ranges/range.cuh> | ||
#include <cuspatial/traits.hpp> | ||
#include <cuspatial/vec_2d.hpp> | ||
|
||
#include <rmm/cuda_stream_view.hpp> | ||
#include <rmm/device_uvector.hpp> | ||
#include <rmm/exec_policy.hpp> | ||
|
||
#include <thrust/binary_search.h> | ||
#include <thrust/sort.h> | ||
#include <thrust/transform.h> | ||
|
||
#include <iterator> | ||
#include <type_traits> | ||
|
||
namespace cuspatial { | ||
|
||
namespace detail { | ||
|
||
template <class MultiPointRangeA, class MultiPointRangeB, class OutputIt> | ||
void __global__ pairwise_multipoint_equals_count_kernel(MultiPointRangeA lhs, | ||
MultiPointRangeB rhs, | ||
OutputIt output) | ||
{ | ||
using T = typename MultiPointRangeA::point_t::value_type; | ||
|
||
for (auto idx = threadIdx.x + blockIdx.x * blockDim.x; idx < lhs.num_points(); | ||
idx += gridDim.x * blockDim.x) { | ||
auto geometry_id = lhs.geometry_idx_from_point_idx(idx); | ||
vec_2d<T> lhs_point = lhs.point_begin()[idx]; | ||
auto rhs_multipoint = rhs[geometry_id]; | ||
|
||
atomicAdd( | ||
&output[geometry_id], | ||
thrust::binary_search(thrust::seq, rhs_multipoint.begin(), rhs_multipoint.end(), lhs_point)); | ||
} | ||
} | ||
|
||
} // namespace detail | ||
|
||
template <class MultiPointRangeA, class MultiPointRangeB, class OutputIt> | ||
OutputIt pairwise_multipoint_equals_count(MultiPointRangeA lhs, | ||
MultiPointRangeB rhs, | ||
OutputIt output, | ||
rmm::cuda_stream_view stream) | ||
{ | ||
using T = typename MultiPointRangeA::point_t::value_type; | ||
using index_t = typename MultiPointRangeB::index_t; | ||
|
||
static_assert(is_same_floating_point<T, typename MultiPointRangeB::point_t::value_type>(), | ||
"Origin and input must have the same base floating point type."); | ||
|
||
CUSPATIAL_EXPECTS(lhs.size() == rhs.size(), "lhs and rhs inputs should have the same size."); | ||
|
||
if (lhs.size() == 0) return output; | ||
|
||
// Create a sorted copy of the rhs points. | ||
auto key_it = make_geometry_id_iterator<index_t>(rhs.offsets_begin(), rhs.offsets_end()); | ||
|
||
rmm::device_uvector<index_t> rhs_keys(rhs.num_points(), stream); | ||
rmm::device_uvector<vec_2d<T>> rhs_point_sorted(rhs.num_points(), stream); | ||
|
||
thrust::copy(rmm::exec_policy(stream), key_it, key_it + rhs.num_points(), rhs_keys.begin()); | ||
thrust::copy( | ||
rmm::exec_policy(stream), rhs.point_begin(), rhs.point_end(), rhs_point_sorted.begin()); | ||
|
||
auto rhs_with_keys = | ||
thrust::make_zip_iterator(thrust::make_tuple(rhs_keys.begin(), rhs_point_sorted.begin())); | ||
|
||
thrust::sort(rmm::exec_policy(stream), rhs_with_keys, rhs_with_keys + rhs.num_points()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For a future optimization: avoid copying the keys and points before sorting by using an out-of-place sort from CUB rather than |
||
|
||
auto rhs_sorted = multipoint_range{ | ||
rhs.offsets_begin(), rhs.offsets_end(), rhs_point_sorted.begin(), rhs_point_sorted.end()}; | ||
|
||
detail::zero_data_async(output, output + lhs.size(), stream); | ||
auto [tpb, n_blocks] = grid_1d(lhs.num_points()); | ||
detail::pairwise_multipoint_equals_count_kernel<<<n_blocks, tpb, 0, stream.value()>>>( | ||
lhs, rhs_sorted, output); | ||
|
||
CUSPATIAL_CHECK_CUDA(stream.value()); | ||
|
||
return output + lhs.size(); | ||
} | ||
|
||
} // namespace cuspatial |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,77 @@ | ||||||
/* | ||||||
* Copyright (c) 2023, NVIDIA CORPORATION. | ||||||
* | ||||||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||||||
* you may not use this file except in compliance with the License. | ||||||
* You may obtain a copy of the License at | ||||||
* | ||||||
* http://www.apache.org/licenses/LICENSE-2.0 | ||||||
* | ||||||
* Unless required by applicable law or agreed to in writing, software | ||||||
* distributed under the License is distributed on an "AS IS" BASIS, | ||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||
* See the License for the specific language governing permissions and | ||||||
* limitations under the License. | ||||||
*/ | ||||||
|
||||||
#pragma once | ||||||
|
||||||
#include <cuspatial/vec_2d.hpp> | ||||||
|
||||||
#include <rmm/cuda_stream_view.hpp> | ||||||
|
||||||
#include <iterator> | ||||||
|
||||||
namespace cuspatial { | ||||||
|
||||||
/** | ||||||
* @brief Compute the number of multipoint pairs that are equal. | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not addressed yet. The original @brief is not accurate. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks Mark it is now addressed. |
||||||
* | ||||||
* Given two ranges of multipoints, this function counts points in the left-hand | ||||||
* multipoint that exist in the corresponding right-hand multipoint. | ||||||
* | ||||||
* @example | ||||||
* | ||||||
* lhs: { {0, 0} } | ||||||
* rhs: { {0, 0}, {1, 1}, {2, 2}, {3, 3} } | ||||||
* count: { 1 } | ||||||
|
||||||
* lhs: { {0, 0}, {1, 1}, {2, 2}, {3, 3} } | ||||||
* rhs: { {0, 0} } | ||||||
* count: { 1 } | ||||||
|
||||||
* lhs: { { {3, 3}, {3, 3}, {0, 0} }, { {0, 0}, {1, 1}, {2, 2} }, { {0, 0} } } | ||||||
* rhs: { { {0, 0}, {2, 2}, {1, 1} }, { {2, 2}, {0, 0}, {1, 1} }, { {1, 1} } } | ||||||
* count: { 1, 3, 0 } | ||||||
* | ||||||
* @note All input iterators must conform to the specification defined by | ||||||
* `multipoint_range.cuh` and the output iterator must be able to accept for | ||||||
* storage values of type | ||||||
* `uint32_t`. | ||||||
* | ||||||
* @param[in] lhs_first multipoint_range of first array of multipoints | ||||||
* @param[in] rhs_first multipoint_range of second array of multipoints | ||||||
* @param[out] count_first: beginning of range of uint32_t counts | ||||||
* @param[in] stream: The CUDA stream on which to perform computations and allocate memory. | ||||||
* | ||||||
* @tparam MultiPointRangeA Iterator over multipoints. Must meet the requirements of | ||||||
* [LegacyRandomAccessIterator][LinkLRAI] and be device-accessible. | ||||||
* @tparam MultiPointRangeB Iterator over multipoints. Must meet the requirements of | ||||||
* [LegacyRandomAccessIterator][LinkLRAI] and be device-accessible. | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These aren't iterators. Not sure the LRAI link is appropriate here anymore. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This needs to be addressed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is now addressed, thank you. |
||||||
* @tparam OutputIt Iterator over uint32_t. Must meet the requirements of | ||||||
* [LegacyRandomAccessIterator][LinkLRAI] and be device-accessible and mutable. | ||||||
* | ||||||
* @return Output iterator to the element past the last count result written. | ||||||
* | ||||||
* [LinkLRAI]: https://en.cppreference.com/w/cpp/named_req/RandomAccessIterator | ||||||
* "LegacyRandomAccessIterator" | ||||||
*/ | ||||||
template <class MultiPointRangeA, class MultiPointRangeB, class OutputIt> | ||||||
OutputIt pairwise_multipoint_equals_count(MultiPointRangeA lhs_first, | ||||||
MultiPointRangeB rhs_first, | ||||||
OutputIt count_first, | ||||||
rmm::cuda_stream_view stream = rmm::cuda_stream_default); | ||||||
|
||||||
} // namespace cuspatial | ||||||
|
||||||
#include <cuspatial/experimental/detail/pairwise_multipoint_equals_count.cuh> |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
/* | ||
* Copyright (c) 2023, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#pragma once | ||
|
||
#include <cudf/types.hpp> | ||
|
||
#include <cuspatial/column/geometry_column_view.hpp> | ||
|
||
#include <rmm/mr/device/per_device_resource.hpp> | ||
|
||
#include <memory> | ||
|
||
namespace cuspatial { | ||
|
||
/** | ||
* @addtogroup spatial | ||
* @brief Count the number of equal points in pairs of multipoints.. | ||
* | ||
* Given two columns of multipoints, returns a column containing the | ||
* count of points in each multipoint from `lhs` that exist in the | ||
* corresponding multipoint in `rhs`. | ||
* | ||
* @param lhs Geometry column of multipoints with interleaved coordinates | ||
* @param rhs Geometry column of multipoints with interleaved coordinates | ||
* @param mr Device memory resource used to allocate the returned column. | ||
* @return A column of size len(lhs) containing the number of points in each | ||
* multipoint from `lhs` that are equal to a point in the corresponding | ||
* multipoint in `rhs`. | ||
* | ||
* @throw cuspatial::logic_error if `lhs` and `rhs` have different coordinate | ||
* types or lengths. | ||
* | ||
* @example | ||
* ``` | ||
* lhs: MultiPoint(0, 0) | ||
* rhs: MultiPoint((0, 0), (1, 1), (2, 2), (3, 3)) | ||
* result: 1 | ||
|
||
* lhs: MultiPoint((0, 0), (1, 1), (2, 2), (3, 3)) | ||
* rhs: MultiPoint((0, 0)) | ||
* result: 1 | ||
|
||
* lhs: ( | ||
* MultiPoint((3, 3), (3, 3), (0, 0)), | ||
* MultiPoint((0, 0), (1, 1), (2, 2)), | ||
* MultiPoint((0, 0)) | ||
* ) | ||
* rhs: ( | ||
* MultiPoint((0, 0), (2, 2), (1, 1)), | ||
* MultiPoint((2, 2), (0, 0), (1, 1)), | ||
* MultiPoint((1, 1)) | ||
* ) | ||
* result: ( 1, 3, 0 ) | ||
*/ | ||
std::unique_ptr<cudf::column> pairwise_multipoint_equals_count( | ||
geometry_column_view const& lhs, | ||
geometry_column_view const& rhs, | ||
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); | ||
|
||
} // namespace cuspatial |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Did you consider using a thrust::reduce_by_key instead of a custom kernel? I don't know if it would be faster or slower. The keys would be a counting_transform_iterator that converts the thread index into the geometry index as you do in the inner loop. The reduction operator would do the sequential binary search.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's an interesting question. I'm going to punt on it as implementation and benchmarking would take a few hours at least (for me, if I'm lucky :) and the current implementation meets my needs for binary predicate implementation work. This would be an interesting project to pair on, if you think it is worth investigating more deeply.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In this case the kernel is relatively simple, so I was just curious if it had been considered. In general we should always try to use algorithms before raw loops, and in CUDA that means before raw kernels.