From 200b42ecbd7574929cc57c2efeb12d97cc7e3d00 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 15 Feb 2023 11:15:58 -0800 Subject: [PATCH 01/10] update points in window API --- python/cuspatial/benchmarks/api/bench_api.py | 7 ++- .../cuspatial/core/spatial/filtering.py | 41 ++++++++------ .../test_points_in_spatial_window.py | 55 +++++++++++-------- 3 files changed, 62 insertions(+), 41 deletions(-) diff --git a/python/cuspatial/benchmarks/api/bench_api.py b/python/cuspatial/benchmarks/api/bench_api.py index 261746f1c..782a0660a 100644 --- a/python/cuspatial/benchmarks/api/bench_api.py +++ b/python/cuspatial/benchmarks/api/bench_api.py @@ -121,14 +121,17 @@ def bench_points_in_spatial_window(benchmark, gpu_dataframe): geometry = gpu_dataframe["geometry"] mean_x, std_x = (geometry.polygons.x.mean(), geometry.polygons.x.std()) mean_y, std_y = (geometry.polygons.y.mean(), geometry.polygons.y.std()) + xy = cudf.DataFrame( + {"x": geometry.polygons.x, "y": geometry.polygons.y} + ).interleave_columns() + points = cuspatial.GeoSeries.from_points_xy(xy) benchmark( cuspatial.points_in_spatial_window, + points, mean_x - std_x, mean_x + std_x, mean_y - std_y, mean_y + std_y, - geometry.polygons.x, - geometry.polygons.y, ) diff --git a/python/cuspatial/cuspatial/core/spatial/filtering.py b/python/cuspatial/cuspatial/core/spatial/filtering.py index d05e6dd49..be7689e1d 100644 --- a/python/cuspatial/cuspatial/core/spatial/filtering.py +++ b/python/cuspatial/cuspatial/core/spatial/filtering.py @@ -1,13 +1,14 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. from cudf import DataFrame from cudf.core.column import as_column from cuspatial._lib import spatial_window -from cuspatial.utils.column_utils import normalize_point_columns +from cuspatial.core.geoseries import GeoSeries +from cuspatial.utils.column_utils import contains_only_points -def points_in_spatial_window(min_x, max_x, min_y, max_y, xs, ys): +def points_in_spatial_window(points: GeoSeries, min_x, max_x, min_y, max_y): """Return only the subset of coordinates that fall within a rectangular window. @@ -19,32 +20,40 @@ def points_in_spatial_window(min_x, max_x, min_y, max_y, xs, ys): Parameters ---------- - min_x + points: GeoSeries + A geoseries of points + min_x: float lower x-coordinate of the query window - max_x + max_x: float upper x-coordinate of the query window - min_y + min_y: float lower y-coordinate of the query window - max_y + max_y: float upper y-coordinate of the query window - xs - column of x-coordinates that may fall within the window - ys - column of y-coordinates that may fall within the window Returns ------- - result : cudf.DataFrame - subset of `(x, y)` pairs above that fall within the window + result : GeoSeries + subset of `points` above that fall within the window Notes ----- * Swaps ``min_x`` and ``max_x`` if ``min_x > max_x`` * Swaps ``min_y`` and ``max_y`` if ``min_y > max_y`` """ - xs, ys = normalize_point_columns(as_column(xs), as_column(ys)) - return DataFrame._from_data( + + if len(points) == 0: + return GeoSeries([]) + + if not contains_only_points(points): + raise ValueError("GeoSeries must contain only points.") + + xs = as_column(points.points.x) + ys = as_column(points.points.y) + + res_xy = DataFrame._from_data( *spatial_window.points_in_spatial_window( min_x, max_x, min_y, max_y, xs, ys ) - ) + ).interleave_columns() + return GeoSeries.from_points_xy(res_xy) diff --git a/python/cuspatial/cuspatial/tests/spatial/filtering/test_points_in_spatial_window.py b/python/cuspatial/cuspatial/tests/spatial/filtering/test_points_in_spatial_window.py index 677f37eca..214a44b82 100644 --- a/python/cuspatial/cuspatial/tests/spatial/filtering/test_points_in_spatial_window.py +++ b/python/cuspatial/cuspatial/tests/spatial/filtering/test_points_in_spatial_window.py @@ -1,26 +1,25 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. +# Copyright (c) 2019-2023, NVIDIA CORPORATION. +import geopandas as gpd import pytest - -import cudf +from geopandas.testing import assert_geoseries_equal +from shapely.geometry import Point import cuspatial def test_zeros(): result = cuspatial.points_in_spatial_window( # noqa: F841 - 0, 0, 0, 0, cudf.Series([0.0]), cudf.Series([0.0]) + cuspatial.GeoSeries([Point(0, 0)]), 0, 0, 0, 0 ) assert result.empty def test_centered(): - result = cuspatial.points_in_spatial_window( - -1, 1, -1, 1, cudf.Series([0.0]), cudf.Series([0.0]) - ) - cudf.testing.assert_frame_equal( - result, cudf.DataFrame({"x": [0.0], "y": [0.0]}) - ) + s = cuspatial.GeoSeries([Point(0, 0)]) + result = cuspatial.points_in_spatial_window(s, -1, 1, -1, 1) + + assert_geoseries_equal(result.to_geopandas(), gpd.GeoSeries([Point(0, 0)])) @pytest.mark.parametrize( @@ -29,38 +28,48 @@ def test_centered(): def test_corners(coords): x, y = coords result = cuspatial.points_in_spatial_window( - -1.1, 1.1, -1.1, 1.1, cudf.Series([x]), cudf.Series([y]) - ) - cudf.testing.assert_frame_equal( - result, cudf.DataFrame({"x": [x], "y": [y]}) + cuspatial.GeoSeries([Point(x, y)]), -1.1, 1.1, -1.1, 1.1 ) + assert_geoseries_equal(result.to_geopandas(), gpd.GeoSeries([Point(x, y)])) def test_pair(): result = cuspatial.points_in_spatial_window( - -1.1, 1.1, -1.1, 1.1, cudf.Series([0.0, 1.0]), cudf.Series([1.0, 0.0]) + cuspatial.GeoSeries([Point(0, 1), Point(1, 0)]), -1.1, 1.1, -1.1, 1.1 ) - cudf.testing.assert_frame_equal( - result, cudf.DataFrame({"x": [0.0, 1.0], "y": [1.0, 0.0]}) + assert_geoseries_equal( + result.to_geopandas(), gpd.GeoSeries([Point(0, 1), Point(1, 0)]) ) def test_oob(): result = cuspatial.points_in_spatial_window( - -1, 1, -1, 1, cudf.Series([-2.0, 2.0]), cudf.Series([2.0, -2.0]) + cuspatial.GeoSeries([Point(-2.0, 2.0), Point(2.0, -2.0)]), + -1, + 1, + -1, + 1, ) - cudf.testing.assert_frame_equal(result, cudf.DataFrame({"x": [], "y": []})) + assert_geoseries_equal(result.to_geopandas(), gpd.GeoSeries([])) def test_half(): result = cuspatial.points_in_spatial_window( + cuspatial.GeoSeries( + [ + Point(-1.0, 1.0), + Point(1.0, -1.0), + Point(3.0, 3.0), + Point(-3.0, -3.0), + ] + ), -2, 2, -2, 2, - cudf.Series([-1.0, 1.0, 3.0, -3.0]), - cudf.Series([1.0, -1.0, 3.0, -3.0]), ) - cudf.testing.assert_frame_equal( - result, cudf.DataFrame({"x": [-1.0, 1.0], "y": [1.0, -1.0]}) + + assert_geoseries_equal( + result.to_geopandas(), + gpd.GeoSeries([Point(-1.0, 1.0), Point(1.0, -1.0)]), ) From 6a5104cbc9fd103b3dbf0fdac8dac4f2db804388 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 15 Feb 2023 12:07:12 -0800 Subject: [PATCH 02/10] update spatial join tests --- .../cuspatial/core/spatial/indexing.py | 20 +- .../tests/spatial/indexing/test_indexing.py | 327 +++++++++--------- .../tests/spatial/join/test_spatial_join.py | 55 ++- 3 files changed, 219 insertions(+), 183 deletions(-) diff --git a/python/cuspatial/cuspatial/core/spatial/indexing.py b/python/cuspatial/cuspatial/core/spatial/indexing.py index 435355819..52d63e274 100644 --- a/python/cuspatial/cuspatial/core/spatial/indexing.py +++ b/python/cuspatial/cuspatial/core/spatial/indexing.py @@ -1,18 +1,19 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. import warnings from cudf import DataFrame, Series from cudf.core.column import as_column +from cuspatial import GeoSeries from cuspatial._lib.quadtree import ( quadtree_on_points as cpp_quadtree_on_points, ) -from cuspatial.utils.column_utils import normalize_point_columns +from cuspatial.utils.column_utils import contains_only_points def quadtree_on_points( - xs, ys, x_min, x_max, y_min, y_max, scale, max_depth, max_size + points: GeoSeries, x_min, x_max, y_min, y_max, scale, max_depth, max_size ): """ Construct a quadtree from a set of points for a given area-of-interest @@ -20,10 +21,8 @@ def quadtree_on_points( Parameters ---------- - xs - Column of x-coordinates for each point. - ys - Column of y-coordinates for each point. + points + Series of points. x_min The lower-left x-coordinate of the area of interest bounding box. x_max @@ -157,7 +156,12 @@ def quadtree_on_points( Length: 120, dtype: int32 """ - xs, ys = normalize_point_columns(as_column(xs), as_column(ys)) + if not len(points) == 0 and not contains_only_points(points): + raise ValueError("GeoSeries must contain only points.") + + xs = as_column(points.points.x) + ys = as_column(points.points.y) + x_min, x_max, y_min, y_max = ( min(x_min, x_max), max(x_min, x_max), diff --git a/python/cuspatial/cuspatial/tests/spatial/indexing/test_indexing.py b/python/cuspatial/cuspatial/tests/spatial/indexing/test_indexing.py index 4163f547b..9ede78630 100644 --- a/python/cuspatial/cuspatial/tests/spatial/indexing/test_indexing.py +++ b/python/cuspatial/cuspatial/tests/spatial/indexing/test_indexing.py @@ -1,6 +1,7 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. import numpy as np import pytest +from shapely.geometry import Point import cudf @@ -13,8 +14,7 @@ def test_empty(): # empty should not throw order, quadtree = cuspatial.quadtree_on_points( - cudf.Series([]), # x - cudf.Series([]), # y + cuspatial.GeoSeries([]), *bbox_1, # bbox 1, # scale 1, # max_depth @@ -34,11 +34,9 @@ def test_empty(): ) -@pytest.mark.parametrize("dtype", ["float32", "float64"]) -def test_one_point(dtype): +def test_one_point(): order, quadtree = cuspatial.quadtree_on_points( - cudf.Series([0.5]).astype(dtype), # x - cudf.Series([0.5]).astype(dtype), # y + cuspatial.GeoSeries([Point(0.5, 0.5)]), *bbox_1, # bbox 1, # scale 1, # max_depth @@ -58,11 +56,9 @@ def test_one_point(dtype): ) -@pytest.mark.parametrize("dtype", ["float32", "float64"]) -def test_two_points(dtype): +def test_two_points(): order, quadtree = cuspatial.quadtree_on_points( - cudf.Series([0.5, 1.5]).astype(dtype), # x - cudf.Series([0.5, 1.5]).astype(dtype), # y + cuspatial.GeoSeries([Point(0.5, 0.5), Point(1.5, 1.5)]), *bbox_2, # bbox 1, # scale 1, # max_depth @@ -84,161 +80,162 @@ def test_two_points(dtype): @pytest.mark.parametrize("dtype", ["float32", "float64"]) def test_small_number_of_points(dtype): + x = cudf.Series( + [ + 1.9804558865545805, + 0.1895259128530169, + 1.2591725716781235, + 0.8178039499335275, + 0.48171647380517046, + 1.3890664414691907, + 0.2536015260915061, + 3.1907684812039956, + 3.028362149164369, + 3.918090468102582, + 3.710910700915217, + 3.0706987088385853, + 3.572744183805594, + 3.7080407833612004, + 3.70669993057843, + 3.3588457228653024, + 2.0697434332621234, + 2.5322042870739683, + 2.175448214220591, + 2.113652420701984, + 2.520755151373394, + 2.9909779614491687, + 2.4613232527836137, + 4.975578758530645, + 4.07037627210835, + 4.300706849071861, + 4.5584381091040616, + 4.822583857757069, + 4.849847745942472, + 4.75489831780737, + 4.529792124514895, + 4.732546857961497, + 3.7622247877537456, + 3.2648444465931474, + 3.01954722322135, + 3.7164018490892348, + 3.7002781846945347, + 2.493975723955388, + 2.1807636574967466, + 2.566986568683904, + 2.2006520196663066, + 2.5104987015171574, + 2.8222482218882474, + 2.241538022180476, + 2.3007438625108882, + 6.0821276168848994, + 6.291790729917634, + 6.109985464455084, + 6.101327777646798, + 6.325158445513714, + 6.6793884701899, + 6.4274219368674315, + 6.444584786789386, + 7.897735998643542, + 7.079453687660189, + 7.430677191305505, + 7.5085184104988, + 7.886010001346151, + 7.250745898479374, + 7.769497359206111, + 1.8703303641352362, + 1.7015273093278767, + 2.7456295127617385, + 2.2065031771469, + 3.86008672302403, + 1.9143371250907073, + 3.7176098065039747, + 0.059011873032214, + 3.1162712022943757, + 2.4264509160270813, + 3.154282922203257, + ] + ).astype( + dtype + ) # x + y = cudf.Series( + [ + 1.3472225743317712, + 0.5431061133894604, + 0.1448705855995005, + 0.8138440641113271, + 1.9022922214961997, + 1.5177694304735412, + 1.8762161698642947, + 0.2621847215928189, + 0.027638405909631958, + 0.3338651960183463, + 0.9937713340192049, + 0.9376313558467103, + 0.33184908855075124, + 0.09804238103130436, + 0.7485845679979923, + 0.2346381514128677, + 1.1809465376402173, + 1.419555755682142, + 1.2372448404986038, + 1.2774712415624014, + 1.902015274420646, + 1.2420487904041893, + 1.0484414482621331, + 0.9606291981013242, + 1.9486902798139454, + 0.021365525588281198, + 1.8996548860019926, + 0.3234041700489503, + 1.9531893897409585, + 0.7800065259479418, + 1.942673409259531, + 0.5659923375279095, + 2.8709552313924487, + 2.693039435509084, + 2.57810040095543, + 2.4612194182614333, + 2.3345952955903906, + 3.3999020934055837, + 3.2296461832828114, + 3.6607732238530897, + 3.7672478678985257, + 3.0668114607133137, + 3.8159308233351266, + 3.8812819070357545, + 3.6045900851589048, + 2.5470532680258002, + 2.983311357415729, + 2.2235950639628523, + 2.5239201807166616, + 2.8765450351723674, + 2.5605928243991434, + 2.9754616970668213, + 2.174562817047202, + 3.380784914178574, + 3.063690547962938, + 3.380489849365283, + 3.623862886287816, + 3.538128217886674, + 3.4154469467473447, + 3.253257011908445, + 4.209727933188015, + 7.478882372510933, + 7.474216636277054, + 6.896038613284851, + 7.513564222799629, + 6.885401350515916, + 6.194330707468438, + 5.823535317960799, + 6.789029097334483, + 5.188939408363776, + 5.788316610960881, + ] + ).astype(dtype) + xy = cudf.DataFrame({"x": x, "y": y}).interleave_columns() + points = cuspatial.GeoSeries.from_points_xy(xy) order, quadtree = cuspatial.quadtree_on_points( - cudf.Series( - [ - 1.9804558865545805, - 0.1895259128530169, - 1.2591725716781235, - 0.8178039499335275, - 0.48171647380517046, - 1.3890664414691907, - 0.2536015260915061, - 3.1907684812039956, - 3.028362149164369, - 3.918090468102582, - 3.710910700915217, - 3.0706987088385853, - 3.572744183805594, - 3.7080407833612004, - 3.70669993057843, - 3.3588457228653024, - 2.0697434332621234, - 2.5322042870739683, - 2.175448214220591, - 2.113652420701984, - 2.520755151373394, - 2.9909779614491687, - 2.4613232527836137, - 4.975578758530645, - 4.07037627210835, - 4.300706849071861, - 4.5584381091040616, - 4.822583857757069, - 4.849847745942472, - 4.75489831780737, - 4.529792124514895, - 4.732546857961497, - 3.7622247877537456, - 3.2648444465931474, - 3.01954722322135, - 3.7164018490892348, - 3.7002781846945347, - 2.493975723955388, - 2.1807636574967466, - 2.566986568683904, - 2.2006520196663066, - 2.5104987015171574, - 2.8222482218882474, - 2.241538022180476, - 2.3007438625108882, - 6.0821276168848994, - 6.291790729917634, - 6.109985464455084, - 6.101327777646798, - 6.325158445513714, - 6.6793884701899, - 6.4274219368674315, - 6.444584786789386, - 7.897735998643542, - 7.079453687660189, - 7.430677191305505, - 7.5085184104988, - 7.886010001346151, - 7.250745898479374, - 7.769497359206111, - 1.8703303641352362, - 1.7015273093278767, - 2.7456295127617385, - 2.2065031771469, - 3.86008672302403, - 1.9143371250907073, - 3.7176098065039747, - 0.059011873032214, - 3.1162712022943757, - 2.4264509160270813, - 3.154282922203257, - ] - ).astype( - dtype - ), # x - cudf.Series( - [ - 1.3472225743317712, - 0.5431061133894604, - 0.1448705855995005, - 0.8138440641113271, - 1.9022922214961997, - 1.5177694304735412, - 1.8762161698642947, - 0.2621847215928189, - 0.027638405909631958, - 0.3338651960183463, - 0.9937713340192049, - 0.9376313558467103, - 0.33184908855075124, - 0.09804238103130436, - 0.7485845679979923, - 0.2346381514128677, - 1.1809465376402173, - 1.419555755682142, - 1.2372448404986038, - 1.2774712415624014, - 1.902015274420646, - 1.2420487904041893, - 1.0484414482621331, - 0.9606291981013242, - 1.9486902798139454, - 0.021365525588281198, - 1.8996548860019926, - 0.3234041700489503, - 1.9531893897409585, - 0.7800065259479418, - 1.942673409259531, - 0.5659923375279095, - 2.8709552313924487, - 2.693039435509084, - 2.57810040095543, - 2.4612194182614333, - 2.3345952955903906, - 3.3999020934055837, - 3.2296461832828114, - 3.6607732238530897, - 3.7672478678985257, - 3.0668114607133137, - 3.8159308233351266, - 3.8812819070357545, - 3.6045900851589048, - 2.5470532680258002, - 2.983311357415729, - 2.2235950639628523, - 2.5239201807166616, - 2.8765450351723674, - 2.5605928243991434, - 2.9754616970668213, - 2.174562817047202, - 3.380784914178574, - 3.063690547962938, - 3.380489849365283, - 3.623862886287816, - 3.538128217886674, - 3.4154469467473447, - 3.253257011908445, - 4.209727933188015, - 7.478882372510933, - 7.474216636277054, - 6.896038613284851, - 7.513564222799629, - 6.885401350515916, - 6.194330707468438, - 5.823535317960799, - 6.789029097334483, - 5.188939408363776, - 5.788316610960881, - ] - ).astype( - dtype - ), # y + points, 0, # x_min 8, # x_max 0, # y_min diff --git a/python/cuspatial/cuspatial/tests/spatial/join/test_spatial_join.py b/python/cuspatial/cuspatial/tests/spatial/join/test_spatial_join.py index 13e5fa0ed..190fca450 100644 --- a/python/cuspatial/cuspatial/tests/spatial/join/test_spatial_join.py +++ b/python/cuspatial/cuspatial/tests/spatial/join/test_spatial_join.py @@ -210,9 +210,16 @@ @pytest.mark.parametrize("dtype", [np.float32, np.float64]) def test_empty(dtype): + points = cuspatial.GeoSeries.from_points_xy( + cudf.DataFrame( + { + "x": cudf.Series([], dtype=dtype), # x + "y": cudf.Series([], dtype=dtype), # y + } + ).interleave_columns() + ) order, quadtree = cuspatial.quadtree_on_points( - cudf.Series([], dtype=dtype), # x - cudf.Series([], dtype=dtype), # y + points, *bbox_1, # bbox 1, # scale 1, # max_depth @@ -254,11 +261,18 @@ def test_polygon_join_small(dtype): min_size = 12 points_x = small_points_x.astype(dtype) points_y = small_points_y.astype(dtype) + points = cuspatial.GeoSeries.from_points_xy( + cudf.DataFrame( + { + "x": points_x, # x + "y": points_y, # y + } + ).interleave_columns() + ) poly_points_x = small_poly_xs.astype(dtype) poly_points_y = small_poly_ys.astype(dtype) point_indices, quadtree = cuspatial.quadtree_on_points( - points_x, - points_y, + points, x_min, x_max, y_min, @@ -310,11 +324,18 @@ def test_linestring_join_small(dtype): expansion_radius = 2.0 points_x = small_points_x.astype(dtype) points_y = small_points_y.astype(dtype) + points = cuspatial.GeoSeries.from_points_xy( + cudf.DataFrame( + { + "x": points_x, # x + "y": points_y, # y + } + ).interleave_columns() + ) linestring_points_x = small_poly_xs.astype(dtype) linestring_points_y = small_poly_ys.astype(dtype) point_indices, quadtree = cuspatial.quadtree_on_points( - points_x, - points_y, + points, x_min, x_max, y_min, @@ -411,11 +432,18 @@ def test_quadtree_point_in_polygon_small(dtype): min_size = 12 points_x = small_points_x.astype(dtype) points_y = small_points_y.astype(dtype) + points = cuspatial.GeoSeries.from_points_xy( + cudf.DataFrame( + { + "x": points_x, # x + "y": points_y, # y + } + ).interleave_columns() + ) poly_points_x = small_poly_xs.astype(dtype) poly_points_y = small_poly_ys.astype(dtype) point_indices, quadtree = cuspatial.quadtree_on_points( - points_x, - points_y, + points, x_min, x_max, y_min, @@ -501,11 +529,18 @@ def run_test_quadtree_point_to_nearest_linestring_small( expansion_radius = 2.0 points_x = small_points_x.astype(dtype) points_y = small_points_y.astype(dtype) + points = cuspatial.GeoSeries.from_points_xy( + cudf.DataFrame( + { + "x": points_x, # x + "y": points_y, # y + } + ).interleave_columns() + ) linestring_points_x = small_poly_xs.astype(dtype) linestring_points_y = small_poly_ys.astype(dtype) point_indices, quadtree = cuspatial.quadtree_on_points( - points_x, - points_y, + points, x_min, x_max, y_min, From 88ed25d6bcae5f7313ace56703a057909b23f57e Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 15 Feb 2023 13:21:25 -0800 Subject: [PATCH 03/10] update benchmarks --- python/cuspatial/benchmarks/api/bench_api.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/python/cuspatial/benchmarks/api/bench_api.py b/python/cuspatial/benchmarks/api/bench_api.py index 782a0660a..25d173370 100644 --- a/python/cuspatial/benchmarks/api/bench_api.py +++ b/python/cuspatial/benchmarks/api/bench_api.py @@ -139,13 +139,16 @@ def bench_quadtree_on_points(benchmark, gpu_dataframe): polygons = gpu_dataframe["geometry"].polygons x_points = (cupy.random.random(10000000) - 0.5) * 360 y_points = (cupy.random.random(10000000) - 0.5) * 180 + points = cuspatial.GeoSeries.from_points_xy( + cudf.DataFrame({"x": x_points, "y": y_points}).interleave_columns() + ) + scale = 5 max_depth = 7 min_size = 125 benchmark( cuspatial.quadtree_on_points, - x_points, - y_points, + points, polygons.x.min(), polygons.x.max(), polygons.y.min(), @@ -163,9 +166,11 @@ def bench_quadtree_point_in_polygon(benchmark, polygons): scale = 5 max_depth = 7 min_size = 125 + points = cuspatial.GeoSeries.from_points_xy( + cudf.DataFrame({"x": x_points, "y": y_points}).interleave_columns() + ) point_indices, quadtree = cuspatial.quadtree_on_points( - x_points, - y_points, + points, polygons.x.min(), polygons.x.max(), polygons.y.min(), @@ -218,9 +223,11 @@ def bench_quadtree_point_to_nearest_linestring(benchmark): polygons = gpu_countries["geometry"].polygons points_x = gpu_cities["geometry"].points.x points_y = gpu_cities["geometry"].points.y + points = cuspatial.GeoSeries.from_points_xy( + cudf.DataFrame({"x": points_x, "y": points_y}).interleave_columns() + ) point_indices, quadtree = cuspatial.quadtree_on_points( - points_x, - points_y, + points, polygons.x.min(), polygons.x.max(), polygons.y.min(), From 5570d223b46adf5ba3a928b264375c3d821051b7 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 15 Feb 2023 19:05:38 -0800 Subject: [PATCH 04/10] refactors point in polygon interface --- python/cuspatial/benchmarks/api/bench_api.py | 22 +- .../cuspatial/cuspatial/core/spatial/join.py | 104 +++---- .../spatial/join/test_point_in_polygon.py | 289 +++++++++--------- 3 files changed, 184 insertions(+), 231 deletions(-) diff --git a/python/cuspatial/benchmarks/api/bench_api.py b/python/cuspatial/benchmarks/api/bench_api.py index 261746f1c..31e40db35 100644 --- a/python/cuspatial/benchmarks/api/bench_api.py +++ b/python/cuspatial/benchmarks/api/bench_api.py @@ -252,18 +252,12 @@ def bench_quadtree_point_to_nearest_linestring(benchmark): ) -def bench_point_in_polygon(benchmark, gpu_dataframe): - x_points = (cupy.random.random(50000000) - 0.5) * 360 - y_points = (cupy.random.random(50000000) - 0.5) * 180 - short_dataframe = gpu_dataframe.iloc[0:32] - geometry = short_dataframe["geometry"] - polygon_offset = cudf.Series(geometry.polygons.geometry_offset[0:31]) - benchmark( - cuspatial.point_in_polygon, - x_points, - y_points, - polygon_offset, - geometry.polygons.ring_offset, - geometry.polygons.x, - geometry.polygons.y, +def bench_point_in_polygon(benchmark, polygons): + x_points = (cupy.random.random(5000) - 0.5) * 360 + y_points = (cupy.random.random(5000) - 0.5) * 180 + points = cuspatial.GeoSeries.from_points_xy( + cudf.DataFrame({"x": x_points, "y": y_points}).interleave_columns() ) + short_dataframe = polygons.iloc[0:31] + geometry = short_dataframe["geometry"] + benchmark(cuspatial.point_in_polygon, points, geometry) diff --git a/python/cuspatial/cuspatial/core/spatial/join.py b/python/cuspatial/cuspatial/core/spatial/join.py index 4f3877211..6383042bd 100644 --- a/python/cuspatial/cuspatial/core/spatial/join.py +++ b/python/cuspatial/cuspatial/core/spatial/join.py @@ -5,6 +5,7 @@ from cudf import DataFrame from cudf.core.column import as_column +from cuspatial import GeoSeries from cuspatial._lib import spatial_join from cuspatial._lib.point_in_polygon import ( point_in_polygon as cpp_point_in_polygon, @@ -13,14 +14,7 @@ from cuspatial.utils.column_utils import normalize_point_columns -def point_in_polygon( - test_points_x, - test_points_y, - poly_offsets, - poly_ring_offsets, - poly_points_x, - poly_points_y, -): +def point_in_polygon(points: GeoSeries, polygons: GeoSeries): """Compute from a set of points and a set of polygons which points fall within which polygons. Note that `polygons_(x,y)` must be specified as closed polygons: the first and last coordinate of each polygon must be @@ -28,31 +22,21 @@ def point_in_polygon( Parameters ---------- - test_points_x - x-coordinate of test points - test_points_y - y-coordinate of test points - poly_offsets - beginning index of the first ring in each polygon - poly_ring_offsets - beginning index of the first point in each ring - poly_points_x - x closed-coordinate of polygon points - poly_points_y - y closed-coordinate of polygon points + points : GeoSeries + A Series of points to test + polygons: GeoSeries + A Series of polygons to test Examples -------- - Test whether 3 points fall within either of two polygons >>> result = cuspatial.point_in_polygon( - [0, -8, 6.0], # test_points_x - [0, -8, 6.0], # test_points_y - cudf.Series([0, 1], index=['nyc', 'hudson river']), # poly_offsets - [0, 3], # ring_offsets - [-10, 5, 5, -10, 0, 10, 10, 0], # poly_points_x - [-10, -10, 5, 5, 0, 0, 10, 10], # poly_points_y + GeoSeries([Point(0, 0), Point(-8, -8), Point(6, 6)]), + GeoSeries([ + Polygon([(-10, -10), (5, -10), (5, 5), (-10, 5), (-10, -10)]), + Polygon([(0, 0), (10, 0), (10, 10), (0, 10), (0, 0)]) + ], index=['nyc', 'hudson river']) ) # The result of point_in_polygon is a DataFrame of Boolean # values indicating whether each point (rows) falls within @@ -66,16 +50,6 @@ def point_in_polygon( # Point 1: (-8, -8) falls in the first polygon # Point 2: (6.0, 6.0) falls in the second polygon - Notes - ----- - - * input Series x and y will not be index aligned, but computed as - sequential arrays. - * poly_ring_offsets must contain only the rings that make up the polygons - indexed by poly_offsets. If there are rings in poly_ring_offsets that - are not part of the polygons in poly_offsets, results are likely to be - incorrect and behavior is undefined. - Returns ------- result : cudf.DataFrame @@ -83,40 +57,38 @@ def point_in_polygon( within each polygon. """ - if len(poly_offsets) == 0: + if len(polygons) == 0: return DataFrame() - ( - test_points_x, - test_points_y, - poly_points_x, - poly_points_y, - ) = normalize_point_columns( - as_column(test_points_x), - as_column(test_points_y), - as_column(poly_points_x), - as_column(poly_points_y), - ) - - result = cpp_point_in_polygon( - test_points_x, - test_points_y, - as_column(poly_offsets, dtype="int32"), - as_column(poly_ring_offsets, dtype="int32"), - poly_points_x, - poly_points_y, + # The C++ API only supports single-polygon, reject if input has + # multipolygons + if len(polygons.polygons.part_offset) != len( + polygons.polygons.geometry_offset + ): + raise ValueError("GeoSeries cannot contain multipolygon.") + + x = as_column(points.points.x) + y = as_column(points.points.y) + + poly_offsets = as_column(polygons.polygons.part_offset[:-1]) + ring_offsets = as_column(polygons.polygons.ring_offset[:-1]) + px = as_column(polygons.polygons.x) + py = as_column(polygons.polygons.y) + + result = cpp_point_in_polygon(x, y, poly_offsets, ring_offsets, px, py) + result = DataFrame( + gis_utils.pip_bitmap_column_to_binary_array( + polygon_bitmap_column=result, width=len(poly_offsets) + ) ) - result = gis_utils.pip_bitmap_column_to_binary_array( - polygon_bitmap_column=result, width=len(poly_offsets) - ) - result = DataFrame(result) - result = DataFrame._from_data( - {name: col.astype("bool") for name, col in result._data.items()} + result.columns = polygons.index[::-1] + return DataFrame._from_data( + { + name: result[name].astype("bool") + for name in reversed(result.columns) + } ) - result.columns = [x for x in list(reversed(poly_offsets.index))] - result = result[list(reversed(result.columns))] - return result def join_quadtree_and_bounding_boxes( diff --git a/python/cuspatial/cuspatial/tests/spatial/join/test_point_in_polygon.py b/python/cuspatial/cuspatial/tests/spatial/join/test_point_in_polygon.py index c01f37e05..a9feee6cb 100644 --- a/python/cuspatial/cuspatial/tests/spatial/join/test_point_in_polygon.py +++ b/python/cuspatial/cuspatial/tests/spatial/join/test_point_in_polygon.py @@ -1,7 +1,6 @@ # Copyright (c) 2019, NVIDIA CORPORATION. import numpy as np -import pytest import cudf @@ -9,100 +8,24 @@ from cuspatial.utils import gis_utils -def test_missing_0(): - with pytest.raises(RuntimeError): - cuspatial.point_in_polygon( - cudf.Series(), - cudf.Series([0.0]), - cudf.Series([0]), - cudf.Series([0]), - cudf.Series([0.0]), - cudf.Series([0.0]), - ) - - -def test_missing_1(): - with pytest.raises(RuntimeError): - cuspatial.point_in_polygon( - cudf.Series([0.0]), - cudf.Series(), - cudf.Series([0]), - cudf.Series([0]), - cudf.Series([0.0]), - cudf.Series([0.0]), - ) - - -def test_missing_2(): +def test_empty(): result = cuspatial.point_in_polygon( - cudf.Series([0.0]), - cudf.Series([0.0]), - cudf.Series(), - cudf.Series([0]), - cudf.Series([0.0]), - cudf.Series([0.0]), + cuspatial.GeoSeries([]), cuspatial.GeoSeries([]) ) - - expected = cudf.DataFrame() - cudf.testing.assert_frame_equal(expected, result) - - -def test_missing_3(): - with pytest.raises(RuntimeError): - cuspatial.point_in_polygon( - cudf.Series([0.0]), - cudf.Series([0.0]), - cudf.Series([0]), - cudf.Series(), - cudf.Series([0.0]), - cudf.Series([0.0]), - ) - - -def test_missing_4(): - with pytest.raises(RuntimeError): - cuspatial.point_in_polygon( - cudf.Series([0.0]), - cudf.Series([0.0]), - cudf.Series([0]), - cudf.Series([0]), - cudf.Series(), - cudf.Series([0.0]), - ) - - -def test_missing_5(): - with pytest.raises(RuntimeError): - cuspatial.point_in_polygon( - cudf.Series([0.0]), - cudf.Series([0.0]), - cudf.Series([0]), - cudf.Series([0]), - cudf.Series([0.0]), - cudf.Series(), - ) - - -def test_zeros(): - with pytest.raises(RuntimeError): - cuspatial.point_in_polygon( - cudf.Series([0.0]), - cudf.Series([0.0]), - cudf.Series([0]), - cudf.Series([0]), - cudf.Series([0.0]), - cudf.Series([0.0]), - ) + cudf.testing.assert_frame_equal(result, cudf.DataFrame()) def test_one_point_in(): result = cuspatial.point_in_polygon( - cudf.Series([0.0]), - cudf.Series([0.0]), - cudf.Series([0]), - cudf.Series([0]), - cudf.Series([-1, 0, 1, -1]), - cudf.Series([-1, 1, -1, -1]), + cuspatial.GeoSeries.from_points_xy( + cudf.Series([0.0, 0.0], dtype="f8") + ), + cuspatial.GeoSeries.from_polygons_xy( + cudf.Series([-1, -1, 0, 1, 1, -1, -1, -1], dtype="f8"), + cudf.Series([0, 4]), + cudf.Series([0, 1]), + cudf.Series([0, 1]), + ), ) expected = cudf.DataFrame({0: True}) cudf.testing.assert_frame_equal(expected, result) @@ -110,38 +33,46 @@ def test_one_point_in(): def test_one_point_out(): result = cuspatial.point_in_polygon( - cudf.Series([1]), - cudf.Series([1]), - cudf.Series([0]), - cudf.Series([0]), - cudf.Series([-1, 0, 1, -1]), - cudf.Series([-1, 1, -1, -1]), + cuspatial.GeoSeries.from_points_xy(cudf.Series([1, 1], dtype="f8")), + cuspatial.GeoSeries.from_polygons_xy( + cudf.Series([-1, -1, 0, 1, 1, -1, -1, -1], dtype="f8"), + cudf.Series([0, 4]), + cudf.Series([0, 1]), + cudf.Series([0, 1]), + ), ) expected = cudf.DataFrame({0: False}) cudf.testing.assert_frame_equal(expected, result) def test_one_point_in_two_rings(): + result = cuspatial.point_in_polygon( - cudf.Series([0]), - cudf.Series([0]), - cudf.Series([0]), - cudf.Series([0, 4]), - cudf.Series([-1, 0, 1, -1, -1, 0, 1, -1]), - cudf.Series([-1, 1, -1, -1, 3, 5, 3, 3]), + cuspatial.GeoSeries.from_points_xy(cudf.Series([0, 0], dtype="f8")), + cuspatial.GeoSeries.from_polygons_xy( + cudf.Series( + [-1, -1, 1, 0, -1, 1, -1, -1, 3, -1, 5, 0, 3, 1, 3, -1], + dtype="f8", + ), + cudf.Series([0, 4, 8]), + cudf.Series([0, 2]), + cudf.Series([0, 1]), + ), ) + expected = cudf.DataFrame({0: True}) cudf.testing.assert_frame_equal(expected, result) def test_one_point_in_two_rings_no_repeat(): result = cuspatial.point_in_polygon( - cudf.Series([0]), - cudf.Series([0]), - cudf.Series([0]), - cudf.Series([0, 3]), - cudf.Series([-1, 0, 1, -1, 0, 1]), - cudf.Series([-1, 1, -1, 3, 5, 3]), + cuspatial.GeoSeries.from_points_xy(cudf.Series([0, 0], dtype="f8")), + cuspatial.GeoSeries.from_polygons_xy( + cudf.Series([-1, -1, 1, 0, -1, 1, 3, -1, 5, 0, 3, 1], dtype="f8"), + cudf.Series([0, 3, 6]), + cudf.Series([0, 2]), + cudf.Series([0, 1]), + ), ) expected = cudf.DataFrame({0: True}) cudf.testing.assert_frame_equal(expected, result) @@ -149,12 +80,16 @@ def test_one_point_in_two_rings_no_repeat(): def test_one_point_out_two_rings(): result = cuspatial.point_in_polygon( - cudf.Series([1]), - cudf.Series([1]), - cudf.Series([0]), - cudf.Series([0, 4]), - cudf.Series([-1, 0, 1, -1, -1, 0, 1, -1]), - cudf.Series([-1, 1, -1, -1, 3, 5, 3, 3]), + cuspatial.GeoSeries.from_points_xy(cudf.Series([1, 1], dtype="f8")), + cuspatial.GeoSeries.from_polygons_xy( + cudf.Series( + [-1, -1, 1, 0, -1, 1, -1, -1, 3, -1, 5, 0, 3, 1, 3, -1], + dtype="f8", + ), + cudf.Series([0, 4, 8]), + cudf.Series([0, 2]), + cudf.Series([0, 1]), + ), ) expected = cudf.DataFrame({0: False}) cudf.testing.assert_frame_equal(expected, result) @@ -162,38 +97,51 @@ def test_one_point_out_two_rings(): def test_one_point_out_two_rings_no_repeat(): result = cuspatial.point_in_polygon( - cudf.Series([1]), - cudf.Series([1]), - cudf.Series([0]), - cudf.Series([0, 3]), - cudf.Series([-1, 0, 1, -1, 0, 1]), - cudf.Series([-1, 1, -1, 3, 5, 3]), + cuspatial.GeoSeries.from_points_xy(cudf.Series([1, 1], dtype="f8")), + cuspatial.GeoSeries.from_polygons_xy( + cudf.Series([-1, -1, 1, 0, -1, 1, 3, -1, 5, 0, 3, 1], dtype="f8"), + cudf.Series([0, 3, 6]), + cudf.Series([0, 2]), + cudf.Series([0, 1]), + ), ) expected = cudf.DataFrame({0: False}) cudf.testing.assert_frame_equal(expected, result) -def test_one_point_in_one_out_two_rings(): +def test_two_point_in_one_out_two_rings(): result = cuspatial.point_in_polygon( - cudf.Series([0, 1]), - cudf.Series([0, 1]), - cudf.Series([0]), - cudf.Series([0, 4]), - cudf.Series([-1, 0, 1, -1, -1, 0, 1, -1]), - cudf.Series([-1, 1, -1, -1, 3, 5, 3, 3]), + cuspatial.GeoSeries.from_points_xy( + cudf.Series([0, 0, 1, 1], dtype="f8") + ), + cuspatial.GeoSeries.from_polygons_xy( + cudf.Series( + [-1, -1, 1, 0, -1, 1, -1, -1, 3, -1, 5, 0, 3, 1, 3, -1], + dtype="f8", + ), + cudf.Series([0, 4, 8]), + cudf.Series([0, 2]), + cudf.Series([0, 1]), + ), ) expected = cudf.DataFrame({0: [True, False]}) cudf.testing.assert_frame_equal(expected, result) -def test_one_point_out_one_in_two_rings(): +def test_two_point_out_one_in_two_rings(): result = cuspatial.point_in_polygon( - cudf.Series([1, 0]), - cudf.Series([1, 0]), - cudf.Series([0]), - cudf.Series([0, 4]), - cudf.Series([-1, 0, 1, -1, -1, 0, 1, -1]), - cudf.Series([-1, 1, -1, -1, 3, 5, 3, 3]), + cuspatial.GeoSeries.from_points_xy( + cudf.Series([1, 1, 0, 0], dtype="f8") + ), + cuspatial.GeoSeries.from_polygons_xy( + cudf.Series( + [-1, -1, 1, 0, -1, 1, -1, -1, 3, -1, 5, 0, 3, 1, 3, -1], + dtype="f8", + ), + cudf.Series([0, 4, 8]), + cudf.Series([0, 2]), + cudf.Series([0, 1]), + ), ) expected = cudf.DataFrame({0: [False, True]}) cudf.testing.assert_frame_equal(expected, result) @@ -201,12 +149,18 @@ def test_one_point_out_one_in_two_rings(): def test_two_points_out_two_rings(): result = cuspatial.point_in_polygon( - cudf.Series([1, -1]), - cudf.Series([1, 1]), - cudf.Series([0]), - cudf.Series([0, 4]), - cudf.Series([-1, 0, 1, -1, -1, 0, 1, -1]), - cudf.Series([-1, 1, -1, -1, 3, 5, 3, 3]), + cuspatial.GeoSeries.from_points_xy( + cudf.Series([1, 1, -1, 1], dtype="f8") + ), + cuspatial.GeoSeries.from_polygons_xy( + cudf.Series( + [-1, -1, 1, 0, -1, 1, -1, -1, 3, -1, 5, 0, 3, 1, 3, -1], + dtype="f8", + ), + cudf.Series([0, 4, 8]), + cudf.Series([0, 2]), + cudf.Series([0, 1]), + ), ) expected = cudf.DataFrame({0: [False, False]}) cudf.testing.assert_frame_equal(expected, result) @@ -214,12 +168,18 @@ def test_two_points_out_two_rings(): def test_two_points_in_two_rings(): result = cuspatial.point_in_polygon( - cudf.Series([0, 0]), - cudf.Series([0, 4]), - cudf.Series([0]), - cudf.Series([0, 4]), - cudf.Series([-1, 0, 1, -1, -1, 0, 1, -1]), - cudf.Series([-1, 1, -1, -1, 3, 5, 3, 3]), + cuspatial.GeoSeries.from_points_xy( + cudf.Series([0, 0, 0, 4], dtype="f8") + ), + cuspatial.GeoSeries.from_polygons_xy( + cudf.Series( + [-1, -1, 0, 1, 1, -1, -1, -1, -1, 3, 0, 5, 1, 3, -1, 3], + dtype="f8", + ), + cudf.Series([0, 4, 8]), + cudf.Series([0, 2]), + cudf.Series([0, 1]), + ), ) expected = cudf.DataFrame({0: [True, True]}) cudf.testing.assert_frame_equal(expected, result) @@ -227,12 +187,39 @@ def test_two_points_in_two_rings(): def test_three_points_two_features(): result = cuspatial.point_in_polygon( - cudf.Series([0, -8, 6.0]), - cudf.Series([0, -8, 6.0]), - cudf.Series([0, 1]), - cudf.Series([0, 5]), - cudf.Series([-10.0, 5, 5, -10, -10, 0, 10, 10, 0, 0]), - cudf.Series([-10.0, -10, 5, 5, -10, 0, 0, 10, 10, 0]), + cuspatial.GeoSeries.from_points_xy( + cudf.Series([0, 0, -8, -8, 6.0, 6.0], dtype="f8") + ), + cuspatial.GeoSeries.from_polygons_xy( + cudf.Series( + [ + -10.0, + -10.0, + 5, + -10, + 5, + 5, + -10, + 5, + -10, + -10, + 0, + 0, + 10, + 0, + 10, + 10, + 0, + 10, + 0, + 0, + ], + dtype="f8", + ), + cudf.Series([0, 5, 10]), + cudf.Series([0, 1, 2]), + cudf.Series([0, 1, 2]), + ), ) expected = cudf.DataFrame() expected[0] = [True, True, False] From 3ca954713f0f2363de2081e65166774aef95ad79 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 15 Feb 2023 19:30:25 -0800 Subject: [PATCH 05/10] Update python/cuspatial/benchmarks/api/bench_api.py Co-authored-by: H. Thomson Comer --- python/cuspatial/benchmarks/api/bench_api.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/cuspatial/benchmarks/api/bench_api.py b/python/cuspatial/benchmarks/api/bench_api.py index 25d173370..ac18db303 100644 --- a/python/cuspatial/benchmarks/api/bench_api.py +++ b/python/cuspatial/benchmarks/api/bench_api.py @@ -121,10 +121,10 @@ def bench_points_in_spatial_window(benchmark, gpu_dataframe): geometry = gpu_dataframe["geometry"] mean_x, std_x = (geometry.polygons.x.mean(), geometry.polygons.x.std()) mean_y, std_y = (geometry.polygons.y.mean(), geometry.polygons.y.std()) - xy = cudf.DataFrame( - {"x": geometry.polygons.x, "y": geometry.polygons.y} - ).interleave_columns() - points = cuspatial.GeoSeries.from_points_xy(xy) + points = cuspatial.GeoSeries.from_points_xy(cudf.DataFrame({ + "x": geometry.polygons.x, + "y": geometry.polygons.y + }).interleave_columns()) benchmark( cuspatial.points_in_spatial_window, points, From 6d936268cb47847c21f6254cf3184d6d0b1b7b9f Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 15 Feb 2023 19:36:42 -0800 Subject: [PATCH 06/10] style --- python/cuspatial/benchmarks/api/bench_api.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/python/cuspatial/benchmarks/api/bench_api.py b/python/cuspatial/benchmarks/api/bench_api.py index ac18db303..0ad32b4ed 100644 --- a/python/cuspatial/benchmarks/api/bench_api.py +++ b/python/cuspatial/benchmarks/api/bench_api.py @@ -121,10 +121,11 @@ def bench_points_in_spatial_window(benchmark, gpu_dataframe): geometry = gpu_dataframe["geometry"] mean_x, std_x = (geometry.polygons.x.mean(), geometry.polygons.x.std()) mean_y, std_y = (geometry.polygons.y.mean(), geometry.polygons.y.std()) - points = cuspatial.GeoSeries.from_points_xy(cudf.DataFrame({ - "x": geometry.polygons.x, - "y": geometry.polygons.y - }).interleave_columns()) + points = cuspatial.GeoSeries.from_points_xy( + cudf.DataFrame( + {"x": geometry.polygons.x, "y": geometry.polygons.y} + ).interleave_columns() + ) benchmark( cuspatial.points_in_spatial_window, points, From 6b34546ebc0fb45712a5084ebb7fb70b26fafbfb Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Sat, 18 Feb 2023 16:49:20 -0800 Subject: [PATCH 07/10] move bit helper into join.py, improve test readability --- .../cuspatial/cuspatial/core/spatial/join.py | 45 ++++++++++++++++++- .../spatial/join/test_point_in_polygon.py | 22 +++++---- python/cuspatial/cuspatial/utils/gis_utils.py | 44 ------------------ 3 files changed, 57 insertions(+), 54 deletions(-) delete mode 100644 python/cuspatial/cuspatial/utils/gis_utils.py diff --git a/python/cuspatial/cuspatial/core/spatial/join.py b/python/cuspatial/cuspatial/core/spatial/join.py index 6383042bd..5a66dfb34 100644 --- a/python/cuspatial/cuspatial/core/spatial/join.py +++ b/python/cuspatial/cuspatial/core/spatial/join.py @@ -1,8 +1,13 @@ # Copyright (c) 2022, NVIDIA CORPORATION. +import operator import warnings +import rmm +from numba import cuda + from cudf import DataFrame +from cudf.core.buffer import acquire_spill_lock from cudf.core.column import as_column from cuspatial import GeoSeries @@ -10,7 +15,6 @@ from cuspatial._lib.point_in_polygon import ( point_in_polygon as cpp_point_in_polygon, ) -from cuspatial.utils import gis_utils from cuspatial.utils.column_utils import normalize_point_columns @@ -77,7 +81,7 @@ def point_in_polygon(points: GeoSeries, polygons: GeoSeries): result = cpp_point_in_polygon(x, y, poly_offsets, ring_offsets, px, py) result = DataFrame( - gis_utils.pip_bitmap_column_to_binary_array( + pip_bitmap_column_to_binary_array( polygon_bitmap_column=result, width=len(poly_offsets) ) ) @@ -314,3 +318,40 @@ def quadtree_point_to_nearest_linestring( linestring_points_y, ) ) + + +@cuda.jit +def binarize(in_col, out, width): + """Convert any positive integer to a binary array.""" + i = cuda.grid(1) + if i < in_col.size: + n = in_col[i] + idx = width - 1 + + out[i, idx] = operator.mod(n, 2) + idx -= 1 + + while n > 1: + n = operator.rshift(n, 1) + out[i, idx] = operator.mod(n, 2) + idx -= 1 + + +def apply_binarize(in_col, width): + buf = rmm.DeviceBuffer(size=(in_col.size * width)) + out = cuda.as_cuda_array(buf).view("int8").reshape((in_col.size, width)) + if out.size > 0: + out[:] = 0 + binarize.forall(out.size)(in_col, out, width) + return out + + +def pip_bitmap_column_to_binary_array(polygon_bitmap_column, width): + """Convert the bitmap output of point_in_polygon + to an array of 0s and 1s. + """ + with acquire_spill_lock(): + binary_maps = apply_binarize( + polygon_bitmap_column.data_array_view(mode="read"), width + ) + return binary_maps diff --git a/python/cuspatial/cuspatial/tests/spatial/join/test_point_in_polygon.py b/python/cuspatial/cuspatial/tests/spatial/join/test_point_in_polygon.py index a9feee6cb..37f9282f4 100644 --- a/python/cuspatial/cuspatial/tests/spatial/join/test_point_in_polygon.py +++ b/python/cuspatial/cuspatial/tests/spatial/join/test_point_in_polygon.py @@ -5,7 +5,7 @@ import cudf import cuspatial -from cuspatial.utils import gis_utils +from cuspatial.core.spatial.join import pip_bitmap_column_to_binary_array def test_empty(): @@ -228,25 +228,31 @@ def test_three_points_two_features(): def test_pip_bitmap_column_to_binary_array(): - col = cudf.Series([0, 13, 3, 9])._column - got = gis_utils.pip_bitmap_column_to_binary_array(col, width=4) + col = cudf.Series([0b00000000, 0b00001101, 0b00000011, 0b00001001])._column + got = pip_bitmap_column_to_binary_array(col, width=4) expected = np.array( [[0, 0, 0, 0], [1, 1, 0, 1], [0, 0, 1, 1], [1, 0, 0, 1]], dtype="int8" ) np.testing.assert_array_equal(got.copy_to_host(), expected) col = cudf.Series([])._column - got = gis_utils.pip_bitmap_column_to_binary_array(col, width=0) + got = pip_bitmap_column_to_binary_array(col, width=0) expected = np.array([], dtype="int8").reshape(0, 0) np.testing.assert_array_equal(got.copy_to_host(), expected) col = cudf.Series([None, None], dtype="float64")._column - got = gis_utils.pip_bitmap_column_to_binary_array(col, width=0) + got = pip_bitmap_column_to_binary_array(col, width=0) expected = np.array([], dtype="int8").reshape(2, 0) np.testing.assert_array_equal(got.copy_to_host(), expected) - col = cudf.Series([238, 13, 29594])._column - got = gis_utils.pip_bitmap_column_to_binary_array(col, width=15) + col = cudf.Series( + [ + 0b000000011101110, + 0b000000000001101, + 0b111001110011010, + ] + )._column + got = pip_bitmap_column_to_binary_array(col, width=15) expected = np.array( [ [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0], @@ -258,6 +264,6 @@ def test_pip_bitmap_column_to_binary_array(): np.testing.assert_array_equal(got.copy_to_host(), expected) col = cudf.Series([0, 0, 0])._column - got = gis_utils.pip_bitmap_column_to_binary_array(col, width=3) + got = pip_bitmap_column_to_binary_array(col, width=3) expected = np.array([[0, 0, 0], [0, 0, 0], [0, 0, 0]], dtype="int8") np.testing.assert_array_equal(got.copy_to_host(), expected) diff --git a/python/cuspatial/cuspatial/utils/gis_utils.py b/python/cuspatial/cuspatial/utils/gis_utils.py deleted file mode 100644 index 6ffd96bc2..000000000 --- a/python/cuspatial/cuspatial/utils/gis_utils.py +++ /dev/null @@ -1,44 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. -import operator - -import rmm -from numba import cuda - -from cudf.core.buffer import acquire_spill_lock - - -@cuda.jit -def binarize(in_col, out, width): - """Convert any positive integer to a binary array.""" - i = cuda.grid(1) - if i < in_col.size: - n = in_col[i] - idx = width - 1 - - out[i, idx] = operator.mod(n, 2) - idx -= 1 - - while n > 1: - n = operator.rshift(n, 1) - out[i, idx] = operator.mod(n, 2) - idx -= 1 - - -def apply_binarize(in_col, width): - buf = rmm.DeviceBuffer(size=(in_col.size * width)) - out = cuda.as_cuda_array(buf).view("int8").reshape((in_col.size, width)) - if out.size > 0: - out[:] = 0 - binarize.forall(out.size)(in_col, out, width) - return out - - -def pip_bitmap_column_to_binary_array(polygon_bitmap_column, width): - """Convert the bitmap output of point_in_polygon - to an array of 0s and 1s. - """ - with acquire_spill_lock(): - binary_maps = apply_binarize( - polygon_bitmap_column.data_array_view(mode="read"), width - ) - return binary_maps From 387174b261b854b18388fee36be01f65f69f70e3 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Mon, 20 Feb 2023 15:03:57 -0800 Subject: [PATCH 08/10] quadtree pip refactor --- .../cuspatial/cuspatial/core/spatial/join.py | 64 ++++++++-------- .../tests/spatial/join/test_spatial_join.py | 76 ++++++++++++++----- 2 files changed, 88 insertions(+), 52 deletions(-) diff --git a/python/cuspatial/cuspatial/core/spatial/join.py b/python/cuspatial/cuspatial/core/spatial/join.py index 5a66dfb34..3a2bbfbfe 100644 --- a/python/cuspatial/cuspatial/core/spatial/join.py +++ b/python/cuspatial/cuspatial/core/spatial/join.py @@ -15,7 +15,11 @@ from cuspatial._lib.point_in_polygon import ( point_in_polygon as cpp_point_in_polygon, ) -from cuspatial.utils.column_utils import normalize_point_columns +from cuspatial.utils.column_utils import ( + normalize_point_columns, + contains_only_points, + contains_only_polygons, +) def point_in_polygon(points: GeoSeries, polygons: GeoSeries): @@ -166,12 +170,8 @@ def quadtree_point_in_polygon( poly_quad_pairs, quadtree, point_indices, - points_x, - points_y, - poly_offsets, - ring_offsets, - poly_points_x, - poly_points_y, + points: GeoSeries, + polygons: GeoSeries, ): """Test whether the specified points are inside any of the specified polygons. @@ -193,18 +193,10 @@ def quadtree_point_in_polygon( A complete quadtree for a given area-of-interest bounding box. point_indices : cudf.Series Sorted point indices returned by ``cuspatial.quadtree_on_points`` - points_x : cudf.Series - x-coordinates of points used to construct the quadtree. - points_y : cudf.Series - y-coordinates of points used to construct the quadtree. - poly_offsets : cudf.Series - Begin index of the first ring in each polygon. - ring_offsets : cudf.Series - Begin index of the first point in each ring. - poly_points_x : cudf.Series - Polygon point x-coodinates. - poly_points_y : cudf.Series - Polygon point y-coodinates. + points: GeoSeries + Points used to build the quadtree + polygons: GeoSeries + Polygons to test against Returns ------- @@ -218,26 +210,32 @@ def quadtree_point_in_polygon( so it is an index to an index. """ - ( - points_x, - points_y, - poly_points_x, - poly_points_y, - ) = normalize_point_columns( - as_column(points_x), - as_column(points_y), - as_column(poly_points_x), - as_column(poly_points_y), - ) + if not contains_only_points(points): + raise ValueError( + "`point` Geoseries must contains only point geometries." + ) + if not contains_only_polygons(polygons): + raise ValueError( + "`polygons` Geoseries must contains only polygons geometries." + ) + + points_x = as_column(points.points.x) + points_y = as_column(points.points.y) + + poly_offsets = as_column(polygons.polygons.part_offset) + ring_offsets = as_column(polygons.polygons.ring_offset) + poly_points_x = as_column(polygons.polygons.x) + poly_points_y = as_column(polygons.polygons.y) + return DataFrame._from_data( *spatial_join.quadtree_point_in_polygon( poly_quad_pairs, quadtree, - as_column(point_indices, dtype="uint32"), + point_indices._column, points_x, points_y, - as_column(poly_offsets, dtype="uint32"), - as_column(ring_offsets, dtype="uint32"), + poly_offsets, + ring_offsets, poly_points_x, poly_points_y, ) diff --git a/python/cuspatial/cuspatial/tests/spatial/join/test_spatial_join.py b/python/cuspatial/cuspatial/tests/spatial/join/test_spatial_join.py index 13e5fa0ed..6ef954035 100644 --- a/python/cuspatial/cuspatial/tests/spatial/join/test_spatial_join.py +++ b/python/cuspatial/cuspatial/tests/spatial/join/test_spatial_join.py @@ -1,6 +1,7 @@ # Copyright (c) 2020-2022, NVIDIA CORPORATION. import numpy as np +import cupy as cp import pytest import cudf @@ -210,9 +211,16 @@ @pytest.mark.parametrize("dtype", [np.float32, np.float64]) def test_empty(dtype): + points = cuspatial.GeoSeries.from_points_xy( + cudf.DataFrame( + { + "x": cudf.Series([], dtype=dtype), # x + "y": cudf.Series([], dtype=dtype), # y + } + ).interleave_columns() + ) order, quadtree = cuspatial.quadtree_on_points( - cudf.Series([], dtype=dtype), # x - cudf.Series([], dtype=dtype), # y + points, *bbox_1, # bbox 1, # scale 1, # max_depth @@ -254,11 +262,18 @@ def test_polygon_join_small(dtype): min_size = 12 points_x = small_points_x.astype(dtype) points_y = small_points_y.astype(dtype) + points = cuspatial.GeoSeries.from_points_xy( + cudf.DataFrame( + { + "x": points_x, # x + "y": points_y, # y + } + ).interleave_columns() + ) poly_points_x = small_poly_xs.astype(dtype) poly_points_y = small_poly_ys.astype(dtype) point_indices, quadtree = cuspatial.quadtree_on_points( - points_x, - points_y, + points, x_min, x_max, y_min, @@ -310,11 +325,18 @@ def test_linestring_join_small(dtype): expansion_radius = 2.0 points_x = small_points_x.astype(dtype) points_y = small_points_y.astype(dtype) + points = cuspatial.GeoSeries.from_points_xy( + cudf.DataFrame( + { + "x": points_x, # x + "y": points_y, # y + } + ).interleave_columns() + ) linestring_points_x = small_poly_xs.astype(dtype) linestring_points_y = small_poly_ys.astype(dtype) point_indices, quadtree = cuspatial.quadtree_on_points( - points_x, - points_y, + points, x_min, x_max, y_min, @@ -411,11 +433,28 @@ def test_quadtree_point_in_polygon_small(dtype): min_size = 12 points_x = small_points_x.astype(dtype) points_y = small_points_y.astype(dtype) + points = cuspatial.GeoSeries.from_points_xy( + cudf.DataFrame( + { + "x": points_x, # x + "y": points_y, # y + } + ).interleave_columns() + ) poly_points_x = small_poly_xs.astype(dtype) poly_points_y = small_poly_ys.astype(dtype) + poly_points = cudf.DataFrame( + {"x": poly_points_x, "y": poly_points_y} + ).interleave_columns() + + polygons = cuspatial.GeoSeries.from_polygons_xy( + poly_points, + small_ring_offsets, + small_poly_offsets, + cp.arange(len(small_poly_offsets)), + ) point_indices, quadtree = cuspatial.quadtree_on_points( - points_x, - points_y, + points, x_min, x_max, y_min, @@ -441,15 +480,7 @@ def test_quadtree_point_in_polygon_small(dtype): max_depth, ) polygons_and_points = cuspatial.quadtree_point_in_polygon( - intersections, - quadtree, - point_indices, - points_x, - points_y, - small_poly_offsets, - small_ring_offsets, - poly_points_x, - poly_points_y, + intersections, quadtree, point_indices, points, polygons ) cudf.testing.assert_frame_equal( polygons_and_points, @@ -501,11 +532,18 @@ def run_test_quadtree_point_to_nearest_linestring_small( expansion_radius = 2.0 points_x = small_points_x.astype(dtype) points_y = small_points_y.astype(dtype) + points = cuspatial.GeoSeries.from_points_xy( + cudf.DataFrame( + { + "x": points_x, # x + "y": points_y, # y + } + ).interleave_columns() + ) linestring_points_x = small_poly_xs.astype(dtype) linestring_points_y = small_poly_ys.astype(dtype) point_indices, quadtree = cuspatial.quadtree_on_points( - points_x, - points_y, + points, x_min, x_max, y_min, From 620acf2e04c39a4b3146d67674a42336fbe31465 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Mon, 20 Feb 2023 16:35:09 -0800 Subject: [PATCH 09/10] update point to nearest line --- .../cuspatial/cuspatial/core/spatial/join.py | 61 +++++++++++-------- .../tests/spatial/join/test_spatial_join.py | 18 +++--- 2 files changed, 43 insertions(+), 36 deletions(-) diff --git a/python/cuspatial/cuspatial/core/spatial/join.py b/python/cuspatial/cuspatial/core/spatial/join.py index 3a2bbfbfe..dd610a2c9 100644 --- a/python/cuspatial/cuspatial/core/spatial/join.py +++ b/python/cuspatial/cuspatial/core/spatial/join.py @@ -16,7 +16,7 @@ point_in_polygon as cpp_point_in_polygon, ) from cuspatial.utils.column_utils import ( - normalize_point_columns, + contains_only_linestrings, contains_only_points, contains_only_polygons, ) @@ -219,6 +219,11 @@ def quadtree_point_in_polygon( "`polygons` Geoseries must contains only polygons geometries." ) + if len(polygons.polygons.part_offset) != len( + polygons.polygons.geometry_offset + ): + raise ValueError("GeoSeries cannot contain multipolygon.") + points_x = as_column(points.points.x) points_y = as_column(points.points.y) @@ -246,11 +251,8 @@ def quadtree_point_to_nearest_linestring( linestring_quad_pairs, quadtree, point_indices, - points_x, - points_y, - linestring_offsets, - linestring_points_x, - linestring_points_y, + points: GeoSeries, + linestrings: GeoSeries, ): """Finds the nearest linestring to each point in a quadrant, and computes the distances between each point and linestring. @@ -268,16 +270,10 @@ def quadtree_point_to_nearest_linestring( A complete quadtree for a given area-of-interest bounding box. point_indices : cudf.Series Sorted point indices returned by ``cuspatial.quadtree_on_points`` - points_x : cudf.Series - x-coordinates of points used to construct the quadtree. - points_y : cudf.Series - y-coordinates of points used to construct the quadtree. - linestring_offsets : cudf.Series - Begin index of the first point in each linestring. - poly_points_x : cudf.Series - Linestring point x-coordinates. - poly_points_y : cudf.Series - Linestring point y-coordinates. + points: GeoSeries + Points to find nearest linestring for + linestrings: GeoSeries + Linestrings to test for Returns ------- @@ -293,17 +289,28 @@ def quadtree_point_to_nearest_linestring( distance : cudf.Series Distance between point and its nearest linestring. """ - ( - points_x, - points_y, - linestring_points_x, - linestring_points_y, - ) = normalize_point_columns( - as_column(points_x), - as_column(points_y), - as_column(linestring_points_x), - as_column(linestring_points_y), - ) + + if not contains_only_points(points): + raise ValueError( + "`point` Geoseries must contains only point geometries." + ) + if not contains_only_linestrings(linestrings): + raise ValueError( + "`linestrings` Geoseries must contains only linestring geometries." + ) + + if len(linestrings.lines.part_offset) != len( + linestrings.lines.geometry_offset + ): + raise ValueError("GeoSeries cannot contain multilinestrings.") + + points_x = as_column(points.points.x) + points_y = as_column(points.points.y) + + linestring_points_x = as_column(linestrings.lines.x) + linestring_points_y = as_column(linestrings.lines.y) + linestring_offsets = as_column(linestrings.lines.part_offset) + return DataFrame._from_data( *spatial_join.quadtree_point_to_nearest_linestring( linestring_quad_pairs, diff --git a/python/cuspatial/cuspatial/tests/spatial/join/test_spatial_join.py b/python/cuspatial/cuspatial/tests/spatial/join/test_spatial_join.py index 6ef954035..b91573325 100644 --- a/python/cuspatial/cuspatial/tests/spatial/join/test_spatial_join.py +++ b/python/cuspatial/cuspatial/tests/spatial/join/test_spatial_join.py @@ -1,7 +1,7 @@ # Copyright (c) 2020-2022, NVIDIA CORPORATION. -import numpy as np import cupy as cp +import numpy as np import pytest import cudf @@ -542,6 +542,13 @@ def run_test_quadtree_point_to_nearest_linestring_small( ) linestring_points_x = small_poly_xs.astype(dtype) linestring_points_y = small_poly_ys.astype(dtype) + linestrings = cuspatial.GeoSeries.from_linestrings_xy( + cudf.DataFrame( + {"x": linestring_points_x, "y": linestring_points_y} + ).interleave_columns(), + small_ring_offsets, + cp.arange(len(small_ring_offsets)), + ) point_indices, quadtree = cuspatial.quadtree_on_points( points, x_min, @@ -569,14 +576,7 @@ def run_test_quadtree_point_to_nearest_linestring_small( max_depth, ) p2np_result = cuspatial.quadtree_point_to_nearest_linestring( - intersections, - quadtree, - point_indices, - points_x, - points_y, - small_ring_offsets, - linestring_points_x, - linestring_points_y, + intersections, quadtree, point_indices, points, linestrings ) cudf.testing.assert_frame_equal( p2np_result, From 08ceb65e58a42568fa8028a76855ff8625a6a006 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Mon, 20 Feb 2023 16:35:55 -0800 Subject: [PATCH 10/10] updates benchmarks --- python/cuspatial/benchmarks/api/bench_api.py | 24 +++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/python/cuspatial/benchmarks/api/bench_api.py b/python/cuspatial/benchmarks/api/bench_api.py index caabd1464..f511e3aed 100644 --- a/python/cuspatial/benchmarks/api/bench_api.py +++ b/python/cuspatial/benchmarks/api/bench_api.py @@ -161,6 +161,7 @@ def bench_quadtree_on_points(benchmark, gpu_dataframe): def bench_quadtree_point_in_polygon(benchmark, polygons): + df = polygons polygons = polygons["geometry"].polygons x_points = (cupy.random.random(50000000) - 0.5) * 360 y_points = (cupy.random.random(50000000) - 0.5) * 180 @@ -198,12 +199,8 @@ def bench_quadtree_point_in_polygon(benchmark, polygons): intersections, quadtree, point_indices, - x_points, - y_points, - polygons.part_offset, - polygons.ring_offset, - polygons.x, - polygons.y, + points, + df["geometry"], ) @@ -227,6 +224,14 @@ def bench_quadtree_point_to_nearest_linestring(benchmark): points = cuspatial.GeoSeries.from_points_xy( cudf.DataFrame({"x": points_x, "y": points_y}).interleave_columns() ) + + linestrings = cuspatial.GeoSeries.from_linestrings_xy( + cudf.DataFrame( + {"x": polygons.x, "y": polygons.y} + ).interleave_columns(), + polygons.ring_offset, + cupy.arange(len(polygons.ring_offset)), + ) point_indices, quadtree = cuspatial.quadtree_on_points( points, polygons.x.min(), @@ -255,11 +260,8 @@ def bench_quadtree_point_to_nearest_linestring(benchmark): intersections, quadtree, point_indices, - points_x, - points_y, - polygons.ring_offset, - polygons.x, - polygons.y, + points, + linestrings, )