Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor haversine_distance to a header-only API #477

Merged
merged 31 commits into from
Apr 28, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
314580d
Create header-only refactoring of cuspatial::haversine_distance
harrism Jan 19, 2022
0736356
Merge branch 'branch-22.04' into fea-header-only-haversine
harrism Jan 19, 2022
5830b66
Apply suggestions from code review
harrism Jan 20, 2022
4c16cd6
require RandomAccessIterator
harrism Jan 20, 2022
7580661
Merge branch 'fea-header-only-haversine' of github.com:harrism/cuspat…
harrism Jan 20, 2022
073e2d7
Convert haversine API to use AOS inputs.
harrism Feb 16, 2022
e37d61e
Revert cosmetic changes to top-level haversine.hpp
harrism Feb 16, 2022
9d8e3eb
Align location_2d and remove unused location_3d and coord_2d.
harrism Feb 16, 2022
7f2bbad
__device__ only
harrism Feb 16, 2022
2448677
"" --> <>
harrism Feb 16, 2022
0d954e0
Remove unused macro.
harrism Feb 16, 2022
7d100dc
Add refactoring guide.
harrism Mar 30, 2022
daa82a8
Add refactoring guide.
harrism Mar 30, 2022
c4ba1f7
Merge branch 'branch-22.04' into fea-header-only-haversine
harrism Mar 31, 2022
454d967
Add fancy iterator test
harrism Mar 31, 2022
a5dab4a
Merge branch 'branch-22.06' into fea-header-only-haversine
harrism Mar 31, 2022
08dfe95
.hpp->.cuh
harrism Mar 31, 2022
e373065
Add note about not making tests depend on libcudf_test
harrism Mar 31, 2022
3ae133e
gitignore
harrism Apr 5, 2022
f8947eb
Add missing include and @
harrism Apr 5, 2022
564cc4c
Don't hide the stream parameter in the detail layer.
harrism Apr 5, 2022
4a6976e
header cleanup
harrism Apr 5, 2022
c208144
Simplify coordinate types to a single vec_2d
harrism Apr 7, 2022
21db279
Clean up haversine_test.cu includes
harrism Apr 7, 2022
be1226c
type-safe vectors
harrism Apr 26, 2022
8134f12
Fix typo
harrism Apr 26, 2022
42f909e
vec_2d --> lonlat_2d in docs
harrism Apr 26, 2022
e5cb703
Merge branch 'branch-22.06' into fea-header-only-haversine
harrism Apr 26, 2022
0b78d87
Review suggestions
harrism Apr 27, 2022
17569c6
Clarified documentation / refactoring guide.
harrism Apr 27, 2022
d5ef3b7
style
harrism Apr 28, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 45 additions & 59 deletions cpp/include/cuspatial/experimental/detail/haversine.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#pragma once

#include "cuspatial/types.hpp"
harrism marked this conversation as resolved.
Show resolved Hide resolved
#include <cuspatial/constants.hpp>
#include <cuspatial/error.hpp>

Expand All @@ -36,87 +37,72 @@ namespace cuspatial {
namespace detail {

template <typename T>
__device__ T calculate_haversine_distance(T radius, T a_lon, T a_lat, T b_lon, T b_lat)
{
auto ax = a_lon * DEGREE_TO_RADIAN;
auto ay = a_lat * DEGREE_TO_RADIAN;
auto bx = b_lon * DEGREE_TO_RADIAN;
auto by = b_lat * DEGREE_TO_RADIAN;

// haversine formula
auto x = (bx - ax) / 2;
auto y = (by - ay) / 2;
auto sinysqrd = sin(y) * sin(y);
auto sinxsqrd = sin(x) * sin(x);
auto scale = cos(ay) * cos(by);

return 2 * radius * asin(sqrt(sinysqrd + sinxsqrd * scale));
struct haversine_distance_functor {
haversine_distance_functor(T radius) : radius_(radius) {}

__device__ T operator()(location_2d<T> a_lonlat, location_2d<T> b_lonlat)
{
auto ax = a_lonlat.longitude * DEGREE_TO_RADIAN;
auto ay = a_lonlat.latitude * DEGREE_TO_RADIAN;
auto bx = b_lonlat.longitude * DEGREE_TO_RADIAN;
auto by = b_lonlat.latitude * DEGREE_TO_RADIAN;

// haversine formula
auto x = (bx - ax) / 2;
auto y = (by - ay) / 2;
auto sinysqrd = sin(y) * sin(y);
auto sinxsqrd = sin(x) * sin(x);
auto scale = cos(ay) * cos(by);

return 2 * radius_ * asin(sqrt(sinysqrd + sinxsqrd * scale));
}

T radius_{};
};

template <class LonItA,
class LatItA,
class LonItB,
class LatItB,
template <class LonLatItA,
class LonLatItB,
class OutputIt,
class T = typename std::iterator_traits<LonItA>::value_type>
OutputIt haversine_distance(LonItA a_lon_first,
LonItA a_lon_last,
LatItA a_lat_first,
LonItB b_lon_first,
LatItB b_lat_first,
class Location = typename std::iterator_traits<LonLatItA>::value_type,
class T = typename Location::value_type>
OutputIt haversine_distance(LonLatItA a_lonlat_first,
LonLatItA a_lonlat_last,
LonLatItB b_lonlat_first,
OutputIt distance_first,
T const radius = EARTH_RADIUS_KM,
rmm::cuda_stream_view stream = rmm::cuda_stream_default)
{
using LocationB = typename std::iterator_traits<LonLatItB>::value_type;
static_assert(std::conjunction_v<std::is_same<location_2d<T>, Location>,
std::is_same<location_2d<T>, LocationB>>,
"Inputs must be cuspatial::location_2d");
static_assert(
std::conjunction_v<std::is_floating_point<T>,
std::is_floating_point<typename std::iterator_traits<LonItA>::value_type>,
std::is_floating_point<typename std::iterator_traits<LatItA>::value_type>,
std::is_floating_point<typename std::iterator_traits<LonItB>::value_type>,
std::is_floating_point<typename std::iterator_traits<LatItB>::value_type>,
std::is_floating_point<typename LocationB::value_type>,
std::is_floating_point<typename std::iterator_traits<OutputIt>::value_type>>,
"Haversine distance supports only floating-point coordinates.");

CUSPATIAL_EXPECTS(radius > 0, "radius must be positive.");

auto input_tuple = thrust::make_tuple(thrust::make_constant_iterator(static_cast<T>(radius)),
a_lon_first,
a_lat_first,
b_lon_first,
b_lat_first);

auto input_iter = thrust::make_zip_iterator(input_tuple);

auto output_size = std::distance(a_lon_first, a_lon_last);

return thrust::transform(rmm::exec_policy(stream),
input_iter,
input_iter + output_size,
a_lonlat_first,
a_lonlat_last,
b_lonlat_first,
distance_first,
[] __device__(auto inputs) {
return calculate_haversine_distance(thrust::get<0>(inputs),
thrust::get<1>(inputs),
thrust::get<2>(inputs),
thrust::get<3>(inputs),
thrust::get<4>(inputs));
});
haversine_distance_functor<T>(radius));
}
} // namespace detail

template <class LonItA, class LatItA, class LonItB, class LatItB, class OutputIt, class T>
OutputIt haversine_distance(LonItA a_lon_first,
LonItA a_lon_last,
LatItA a_lat_first,
LonItB b_lon_first,
LatItB b_lat_first,
template <class LonLatItA, class LonLatItB, class OutputIt, class Location, class T>
OutputIt haversine_distance(LonLatItA a_lonlat_first,
LonLatItA a_lonlat_last,
LonLatItB b_lonlat_first,
OutputIt distance_first,
T const radius)
{
return detail::haversine_distance(a_lon_first,
a_lon_last,
a_lat_first,
b_lon_first,
b_lat_first,
return detail::haversine_distance(a_lonlat_first,
a_lonlat_last,
b_lonlat_first,
distance_first,
radius,
rmm::cuda_stream_default);
Expand Down
50 changes: 24 additions & 26 deletions cpp/include/cuspatial/experimental/haversine.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,48 +27,46 @@ namespace cuspatial {
* brief Compute haversine distances between points in set A to the corresponding points in set B.
*
* Computes N haversine distances, where N is `std::distance(a_lon_first, a_lon_last)`.
* The distance for each `(a_lon[i], a_lat[i])` and `(b_lon[i], b_lat[i])` point pair is assigned to `distance_first[i]`.
* `distance_first` must be an iterator to output storage allocated for N distances.
* The distance for each `(a_lon[i], a_lat[i])` and `(b_lon[i], b_lat[i])` point pair is assigned to
* `distance_first[i]`. `distance_first` must be an iterator to output storage allocated for N
* distances.
*
* https://en.wikipedia.org/wiki/Haversine_formula
*
* @param[in] a_lon_first: beginning of range of longitude of points in set A
* @param[in] a_lon_last: end of the range of longitude of points in set A
* @param[in] b_lon_first: beginning of range of longitude of points in set B
* @param[in] b_lat_first: beginning of range of latitude of points in set B
* @param[in] a_lonlat_first: beginning of range of (longitude, latitude) locations in set A
* @param[in] a_lonlat_last: end of range of (longitude, latitude) locations in set A
* @param[in] b_lonlat_first: beginning of range of (longitude, latitude) locations in set B
* @param[out] distance_first: beginning of output range of haversine distances
* @param[in] radius: radius of the sphere on which the points reside. default: 6371.0
* (approximate radius of Earth in km)
*
* All iterators must have the same floating-point `value_type`.
*
* @tparam LonItA must meet the requirements of [LegacyRandomAccessIterator][LinkLRAI] and be
* device-accessible.
* @tparam LatItA must meet the requirements of [LegacyRandomAccessIterator][LinkLRAI] and be
* device-accessible.
* @tparam LonItB must meet the requirements of [LegacyRandomAccessIterator][LinkLRAI] and be
* device-accessible.
* @tparam LatItB must meet the requirements of [LegacyRandomAccessIterator][LinkLRAI] and be
* device-accessible.
* @tparam OutputIt must meet the requirements of [LegacyRandomAccessIterator][LinkLRAI] and be
* device-accessible.
* Computed distances will have the same units as `radius`.
*
* @tparam LonLatItA Iterator to input location set A. Must meet the requirements of
* [LegacyRandomAccessIterator][LinkLRAI] and be device-accessible.
* @tparam LonLatItB Iterator to input location set B. Must meet the requirements of
* [LegacyRandomAccessIterator][LinkLRAI] and be device-accessible.
* @tparam OutputIt Output iterator. Must meet the requirements of
* [LegacyRandomAccessIterator][LinkLRAI] and be device-accessible.
* @tparam Location The `value_type` of `LonLatItA` and `LonLatItB`. Must be
* `cuspatial::location_2d<T>`.
* @tparam T The underlying coordinate type. Must be a floating-point type.
*
* @return Output iterator to the element past the last distance computed.
harrism marked this conversation as resolved.
Show resolved Hide resolved
*
* [LinkLRAI]: https://en.cppreference.com/w/cpp/named_req/RandomAccessIterator
* "LegacyRandomAccessIterator"
*/
template <class LonItA,
class LatItA,
class LonItB,
class LatItB,
template <class LonLatItA,
class LonLatItB,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The top level API seems to have asserted that A and B coordinates must have the same type. The transform iterator generated from these two arrays should have the same type as well?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure I follow your question. Can you elaborate?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's not necessarily a transform iterator. And we do assert that they have the same type. See the implementation file, where it has

using LocationB = typename std::iterator_traits<LonLatItB>::value_type;
static_assert(std::conjunction_v<std::is_same<location_2d<T>, Location>,
                                   std::is_same<location_2d<T>, LocationB>>,
                "Inputs must be cuspatial::location_2d");

It uses T as the type template parameter of the location_2d<T> in the assertion, which means that all the underlying data and the radius must have the same type. There's another assertion that the types are floating-point.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, I think I understand this a bit better now. We don't enforce that the iterator on coordinate A to be the same type as coordinate B. As long as they are all iterating on location_2d<T> type, and that they are both RAI on device. A concrete example would be that one iterator constructed from a device_vector<location_2d> and the other iterator constructed from a pair of cudf columns, it would still suffice this API.

class OutputIt,
class T = typename std::iterator_traits<LonItA>::value_type>
OutputIt haversine_distance(LonItA a_lon_first,
LonItA a_lon_last,
LatItA a_lat_first,
LonItB b_lon_first,
LatItB b_lat_first,
class Location = typename std::iterator_traits<LonLatItA>::value_type,
class T = typename Location::value_type>
OutputIt haversine_distance(LonLatItA a_lonlat_first,
LonLatItA a_lonlat_last,
LonLatItB b_lonlat_first,
OutputIt distance_first,
T const radius = EARTH_RADIUS_KM);
} // namespace cuspatial
Expand Down
49 changes: 49 additions & 0 deletions cpp/include/cuspatial/experimental/type_utils.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <cuspatial/types.hpp>

#include <thrust/iterator/transform_iterator.h>
#include <thrust/iterator/zip_iterator.h>

namespace cuspatial {

namespace {
harrism marked this conversation as resolved.
Show resolved Hide resolved
template <typename T>
struct tuple_to_location_2d {
// using result_type = cuspatial::location_2d<T>;

CUSPATIAL_HOST_DEVICE cuspatial::location_2d<T> operator()(thrust::tuple<T, T> lonlat)
{
return cuspatial::location_2d<T>{thrust::get<0>(lonlat), thrust::get<1>(lonlat)};
}
};

} // anonymous namespace

// convert two iterators to an iterator<location_2d<T>
template <typename LonIter, typename LatIter>
auto to_location_2d(LonIter lon, LatIter lat)
{
using T = typename std::iterator_traits<LonIter>::value_type;
static_assert(std::is_same_v<T, typename std::iterator_traits<LatIter>::value_type>,
"Iterator value_type mismatch");

auto zipped = thrust::make_zip_iterator(thrust::make_tuple(lon, lat));
return thrust::make_transform_iterator(zipped, tuple_to_location_2d<T>());
}

} // namespace cuspatial
49 changes: 35 additions & 14 deletions cpp/include/cuspatial/types.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION.
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -16,18 +16,39 @@

#pragma once

#ifdef __CUDACC__
#define CUSPATIAL_HOST_DEVICE __host__ __device__
#else
#define CUSPATIAL_HOST_DEVICE
#endif

#include <cstdint>

namespace cuspatial {

/**
* @brief A 2D location: latitude and longitude, altitude
*
* @tparam T the base type for the coordinates
*/
template <typename T>
struct location_2d {
using value_type = T;
value_type longitude;
value_type latitude;
};
harrism marked this conversation as resolved.
Show resolved Hide resolved

/**
* @brief A 3D location: latitude, longitude, altitude
*
* @tparam T the base type for the coordinates
*/
template <typename T>
struct location_3d {
T latitude;
T longitude;
T altitude;
using value_type = T;
value_type longitude;
value_type latitude;
value_type altitude;
harrism marked this conversation as resolved.
Show resolved Hide resolved
};

/**
Expand All @@ -46,16 +67,16 @@ struct coord_2d {
*
*/
struct its_timestamp {
uint32_t y : 6;
uint32_t m : 4;
uint32_t d : 5;
uint32_t hh : 5;
uint32_t mm : 6;
uint32_t ss : 6;
uint32_t wd : 3;
uint32_t yd : 9;
uint32_t ms : 10;
uint32_t pid : 10;
std::uint32_t y : 6;
std::uint32_t m : 4;
std::uint32_t d : 5;
std::uint32_t hh : 5;
std::uint32_t mm : 6;
std::uint32_t ss : 6;
std::uint32_t wd : 3;
std::uint32_t yd : 9;
std::uint32_t ms : 10;
std::uint32_t pid : 10;
};

} // namespace cuspatial
16 changes: 9 additions & 7 deletions cpp/src/spatial/haversine.cu
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
#include <cuspatial/constants.hpp>
#include <cuspatial/error.hpp>
#include <cuspatial/experimental/haversine.hpp>
#include <cuspatial/experimental/type_utils.hpp>
#include <cuspatial/types.hpp>

#include <cudf/column/column.hpp>
#include <cudf/column/column_view.hpp>
Expand All @@ -37,7 +39,7 @@ struct haversine_functor {
std::enable_if_t<not std::is_floating_point<T>::value, std::unique_ptr<cudf::column>> operator()(
Args&&...)
{
CUSPATIAL_FAIL("haversine_distance does not support non-floating-point types.");
CUSPATIAL_FAIL("haversine_distance supports only floating-point types.");
}

template <typename T>
Expand All @@ -55,12 +57,13 @@ struct haversine_functor {
auto mask_policy = cudf::mask_allocation_policy::NEVER;
auto result = cudf::allocate_like(a_lon, a_lon.size(), mask_policy, mr);

auto lonlat_a = cuspatial::to_location_2d(a_lon.begin<T>(), a_lat.begin<T>());
auto lonlat_b = cuspatial::to_location_2d(b_lon.begin<T>(), b_lat.begin<T>());

cuspatial::detail::haversine_distance(
a_lon.begin<T>(),
a_lon.end<T>(),
a_lat.begin<T>(),
b_lon.begin<T>(),
b_lat.begin<T>(),
lonlat_a,
lonlat_a + a_lon.size(),
lonlat_b,
static_cast<cudf::mutable_column_view>(*result).begin<T>(),
T{radius},
stream);
Expand All @@ -72,7 +75,6 @@ struct haversine_functor {
} // anonymous namespace

namespace cuspatial {

namespace detail {

std::unique_ptr<cudf::column> haversine_distance(cudf::column_view const& a_lon,
Expand Down
Loading