From 3e9e10b88b68fe86d5dfb8c7e3bed61b095b6fb8 Mon Sep 17 00:00:00 2001 From: "H. Thomson Comer" Date: Wed, 31 May 2023 19:05:03 -0500 Subject: [PATCH 01/23] Add files that support binary_predicates.ipynb PR. --- .../cuspatial/core/binpreds/contains.py | 63 ++++++++++++++++++- .../binpreds/contains_geometry_processor.py | 5 +- .../binpreds/feature_contains_properly.py | 30 +++++---- .../core/binpreds/feature_disjoint.py | 15 +++-- .../core/binpreds/feature_intersects.py | 15 ++--- python/cuspatial/cuspatial/core/geoseries.py | 7 +-- .../cuspatial/utils/binpred_utils.py | 5 +- .../cuspatial/cuspatial/utils/column_utils.py | 20 +++--- 8 files changed, 112 insertions(+), 48 deletions(-) diff --git a/python/cuspatial/cuspatial/core/binpreds/contains.py b/python/cuspatial/cuspatial/core/binpreds/contains.py index 398f134ff..15346f1b7 100644 --- a/python/cuspatial/cuspatial/core/binpreds/contains.py +++ b/python/cuspatial/cuspatial/core/binpreds/contains.py @@ -2,10 +2,14 @@ from math import ceil, sqrt +import cudf from cudf import DataFrame, Series from cudf.core.column import as_column import cuspatial +from cuspatial._lib.pairwise_point_in_polygon import ( + pairwise_point_in_polygon as cpp_pairwise_point_in_polygon, +) from cuspatial._lib.point_in_polygon import ( point_in_polygon as cpp_byte_point_in_polygon, ) @@ -35,7 +39,7 @@ def _quadtree_contains_properly(points, polygons): within its corresponding polygon. """ - scale = -1 + # Set the scale to the default minimum scale without triggering a warning. max_depth = 15 min_size = ceil(sqrt(len(points))) if len(polygons) == 0: @@ -44,6 +48,7 @@ def _quadtree_contains_properly(points, polygons): x_min = polygons.polygons.x.min() y_max = polygons.polygons.y.max() y_min = polygons.polygons.y.min() + scale = max(x_max - x_min, y_max - y_min) / ((1 << max_depth) + 2) point_indices, quadtree = cuspatial.quadtree_on_points( points, x_min, @@ -115,9 +120,61 @@ def _brute_force_contains_properly(points, polygons): return final_result -def contains_properly(polygons, points, quadtree=True): - if quadtree: +def _pairwise_contains_properly(points, polygons): + """Compute from a series of points and a series of polygons which points + are properly contained within the corresponding polygon. Polygon A contains + Point B properly if B intersects the interior of A but not the boundary (or + exterior). + + Note that polygons must be closed: the first and last vertex of each + polygon must be the same. + + This version provides the best performance when the input is a large + number of points with an equal number of polygons. + + Parameters + ---------- + points : GeoSeries + A GeoSeries of points. + polygons : GeoSeries + A GeoSeries of polygons. + + Returns + ------- + result : cudf.DataFrame + A DataFrame of boolean values indicating whether each point falls + within its corresponding polygon. + """ + pip_result = cpp_pairwise_point_in_polygon( + as_column(points.points.x), + as_column(points.points.y), + as_column(polygons.polygons.part_offset), + as_column(polygons.polygons.ring_offset), + as_column(polygons.polygons.x), + as_column(polygons.polygons.y), + ) + # Pairwise returns a boolean column where the point and polygon index + # always correspond. We can use this to create a dataframe with the + # same shape as the quadtree result. Finally all the False results + # are dropped, as quadtree doesn't report False results. + quadtree_shaped_result = ( + cudf.Series(pip_result).reset_index().reset_index() + ) + quadtree_shaped_result.columns = ["part_index", "point_index", "result"] + result = quadtree_shaped_result[["point_index", "part_index"]][ + quadtree_shaped_result["result"].astype("bool") + ] + result = result.sort_values(["point_index", "part_index"]).reset_index( + drop=True + ) + return result + + +def contains_properly(polygons, points, mode="pairwise"): + if mode == "quadtree": return _quadtree_contains_properly(points, polygons) + elif mode == "pairwise": + return _pairwise_contains_properly(points, polygons) else: # Use stack to convert the result to the same shape as quadtree's # result, name the columns appropriately, and return the diff --git a/python/cuspatial/cuspatial/core/binpreds/contains_geometry_processor.py b/python/cuspatial/cuspatial/core/binpreds/contains_geometry_processor.py index 12b2fc37d..024bbdcfc 100644 --- a/python/cuspatial/cuspatial/core/binpreds/contains_geometry_processor.py +++ b/python/cuspatial/cuspatial/core/binpreds/contains_geometry_processor.py @@ -49,10 +49,10 @@ def _preprocess_multipoint_rhs(self, lhs, rhs): if contains_only_linestrings(rhs): # condition for linestrings geom = rhs.lines - elif contains_only_polygons(rhs) is True: + elif contains_only_polygons(rhs): # polygon in polygon geom = rhs.polygons - elif contains_only_multipoints(rhs) is True: + elif contains_only_multipoints(rhs): # mpoint in polygon geom = rhs.multipoints else: @@ -150,6 +150,7 @@ def _reindex_allpairs(self, lhs, op_result) -> DataFrame: # once their index is converted to a polygon index. allpairs_result = polygon_indices.drop_duplicates() + # TODO: This is slow and needs optimization # Replace the polygon index with the original index allpairs_result["polygon_index"] = allpairs_result[ "polygon_index" diff --git a/python/cuspatial/cuspatial/core/binpreds/feature_contains_properly.py b/python/cuspatial/cuspatial/core/binpreds/feature_contains_properly.py index 0c81ead59..17ebffc4a 100644 --- a/python/cuspatial/cuspatial/core/binpreds/feature_contains_properly.py +++ b/python/cuspatial/cuspatial/core/binpreds/feature_contains_properly.py @@ -60,7 +60,7 @@ def _preprocess(self, lhs, rhs): preprocessor_result = super()._preprocess_multipoint_rhs(lhs, rhs) return self._compute_predicate(lhs, rhs, preprocessor_result) - def _should_use_quadtree(self, lhs): + def _pip_mode(self, lhs, rhs): """Determine if the quadtree should be used for the binary predicate. Returns @@ -74,14 +74,17 @@ def _should_use_quadtree(self, lhs): 2. If the number of polygons in the lhs is less than 32, we use the brute-force algorithm because it is faster and has less memory overhead. - 3. If the lhs contains more than 32 polygons, we use the quadtree - because it does not have a polygon-count limit. - 4. If the lhs contains multipolygons, we use quadtree because the - performance between quadtree and brute-force is similar, but - code complexity would be higher if we did multipolygon - reconstruction on both code paths. + 3. If the lhs contains multipolygons, we use quadtree because + the quadtree code path already handles multipolygons. + 4. Otherwise pairwise is defaulted to since the default GeoPandas + behavior is to use the pairwise algorithm. """ - return len(lhs) >= 32 or has_multipolygons(lhs) or self.config.allpairs + if len(lhs) <= 31: + return "brute_force" + elif self.config.allpairs or has_multipolygons(lhs): + return "quadtree" + else: + return "pairwise" def _compute_predicate( self, @@ -97,9 +100,14 @@ def _compute_predicate( raise TypeError( "`.contains` can only be called with polygon series." ) - use_quadtree = self._should_use_quadtree(lhs) + mode = self._pip_mode(lhs, preprocessor_result.final_rhs) + lhs_indices = lhs.index + # Duplicates the lhs polygon for each point in the final_rhs result + # that was computed by _preprocess. + if mode == "pairwise": + lhs_indices = preprocessor_result.point_indices pip_result = contains_properly( - lhs, preprocessor_result.final_rhs, quadtree=use_quadtree + lhs[lhs_indices], preprocessor_result.final_rhs, mode=mode ) op_result = ContainsOpResult(pip_result, preprocessor_result) return self._postprocess(lhs, rhs, preprocessor_result, op_result) @@ -168,7 +176,7 @@ def _preprocess(self, lhs, rhs): left and right hand side types. """ DispatchDict = { (Point, Point): ContainsProperlyByIntersection, - (Point, MultiPoint): ImpossiblePredicate, + (Point, MultiPoint): ContainsProperlyByIntersection, (Point, LineString): ImpossiblePredicate, (Point, Polygon): ImpossiblePredicate, (MultiPoint, Point): NotImplementedPredicate, diff --git a/python/cuspatial/cuspatial/core/binpreds/feature_disjoint.py b/python/cuspatial/cuspatial/core/binpreds/feature_disjoint.py index a0347b76a..8d0852ba4 100644 --- a/python/cuspatial/cuspatial/core/binpreds/feature_disjoint.py +++ b/python/cuspatial/cuspatial/core/binpreds/feature_disjoint.py @@ -2,6 +2,7 @@ from cuspatial.core.binpreds.basic_predicates import ( _basic_contains_any, + _basic_equals_any, _basic_intersects, ) from cuspatial.core.binpreds.binpred_interface import ( @@ -23,13 +24,17 @@ def _preprocess(self, lhs, rhs): and then negate the result. Used by: - (Point, Point) (Point, Polygon) (Polygon, Point) """ return ~_basic_contains_any(lhs, rhs) +class PointPointDisjoint(BinPred): + def _preprocess(self, lhs, rhs): + return ~_basic_equals_any(lhs, rhs) + + class PointLineStringDisjoint(BinPred): def _preprocess(self, lhs, rhs): """Disjoint is the opposite of intersects, so just implement intersects @@ -40,9 +45,8 @@ def _preprocess(self, lhs, rhs): class PointPolygonDisjoint(BinPred): def _preprocess(self, lhs, rhs): - intersects = _basic_intersects(lhs, rhs) contains = _basic_contains_any(lhs, rhs) - return ~intersects & ~contains + return ~contains class LineStringPointDisjoint(PointLineStringDisjoint): @@ -61,9 +65,8 @@ def _postprocess(self, lhs, rhs, op_result): class LineStringPolygonDisjoint(BinPred): def _preprocess(self, lhs, rhs): - intersects = _basic_intersects(lhs, rhs) contains = _basic_contains_any(rhs, lhs) - return ~intersects & ~contains + return ~contains class PolygonPolygonDisjoint(BinPred): @@ -72,7 +75,7 @@ def _preprocess(self, lhs, rhs): DispatchDict = { - (Point, Point): DisjointByWayOfContains, + (Point, Point): PointPointDisjoint, (Point, MultiPoint): NotImplementedPredicate, (Point, LineString): PointLineStringDisjoint, (Point, Polygon): PointPolygonDisjoint, diff --git a/python/cuspatial/cuspatial/core/binpreds/feature_intersects.py b/python/cuspatial/cuspatial/core/binpreds/feature_intersects.py index c35947826..a180112ca 100644 --- a/python/cuspatial/cuspatial/core/binpreds/feature_intersects.py +++ b/python/cuspatial/cuspatial/core/binpreds/feature_intersects.py @@ -93,15 +93,13 @@ class IntersectsByEquals(EqualsPredicateBase): class PolygonPointIntersects(BinPred): def _preprocess(self, lhs, rhs): contains = _basic_contains_any(lhs, rhs) - intersects = _basic_intersects(lhs, rhs) - return contains | intersects + return contains class PointPolygonIntersects(BinPred): def _preprocess(self, lhs, rhs): contains = _basic_contains_any(rhs, lhs) - intersects = _basic_intersects(rhs, lhs) - return contains | intersects + return contains class LineStringPointIntersects(IntersectsPredicateBase): @@ -117,25 +115,22 @@ def _preprocess(self, lhs, rhs): class LineStringPolygonIntersects(BinPred): def _preprocess(self, lhs, rhs): - intersects = _basic_intersects(lhs, rhs) contains = _basic_contains_any(rhs, lhs) - return intersects | contains + return contains class PolygonLineStringIntersects(BinPred): def _preprocess(self, lhs, rhs): - intersects = _basic_intersects(lhs, rhs) contains = _basic_contains_any(lhs, rhs) - return intersects | contains + return contains class PolygonPolygonIntersects(BinPred): def _preprocess(self, lhs, rhs): - intersects = _basic_intersects(lhs, rhs) contains_rhs = _basic_contains_any(rhs, lhs) contains_lhs = _basic_contains_any(lhs, rhs) - return intersects | contains_rhs | contains_lhs + return contains_rhs | contains_lhs """ Type dispatch dictionary for intersects binary predicates. """ diff --git a/python/cuspatial/cuspatial/core/geoseries.py b/python/cuspatial/cuspatial/core/geoseries.py index c13b673ed..299efa475 100644 --- a/python/cuspatial/cuspatial/core/geoseries.py +++ b/python/cuspatial/cuspatial/core/geoseries.py @@ -109,7 +109,7 @@ def __init__( @property def feature_types(self): - return self._column._meta.input_types + return self._column._meta.input_types.reset_index(drop=True) @property def type(self): @@ -323,8 +323,7 @@ def point_indices(self): self.geometry_offset ) sizes = offsets[1:] - offsets[:-1] - - return self._series.index.repeat(sizes).values + return self._meta.input_types.index.repeat(sizes) @property def points(self): @@ -967,7 +966,7 @@ def reset_index( # and use `cudf` reset_index to identify what our result # should look like. cudf_series = cudf.Series( - np.arange(len(geo_series.index)), index=geo_series.index + cp.arange(len(geo_series.index)), index=geo_series.index ) cudf_result = cudf_series.reset_index(level, drop, name, inplace) diff --git a/python/cuspatial/cuspatial/utils/binpred_utils.py b/python/cuspatial/cuspatial/utils/binpred_utils.py index 22b495513..79594a8d8 100644 --- a/python/cuspatial/cuspatial/utils/binpred_utils.py +++ b/python/cuspatial/cuspatial/utils/binpred_utils.py @@ -7,6 +7,7 @@ import cuspatial from cuspatial.core._column.geocolumn import ColumnType +from cuspatial.core._column.geometa import Feature_Enum """Column-Type objects to use for simple syntax in the `DispatchDict` contained in each `feature_.py` file. For example, instead of writing out @@ -348,8 +349,8 @@ def _points_and_lines_to_multipoints(geoseries, offsets): 1 MULTIPOINT (3.00000 3.00000, 4.00000, 4.0000, ... dtype: geometry """ - points_mask = geoseries.type == "Point" - lines_mask = geoseries.type == "Linestring" + points_mask = geoseries.feature_types == Feature_Enum.POINT.value + lines_mask = geoseries.feature_types == Feature_Enum.LINESTRING.value if (points_mask + lines_mask).sum() != len(geoseries): raise ValueError("Geoseries must contain only points and lines") points = geoseries[points_mask] diff --git a/python/cuspatial/cuspatial/utils/column_utils.py b/python/cuspatial/cuspatial/utils/column_utils.py index c3cb1dd1a..36deac4eb 100644 --- a/python/cuspatial/cuspatial/utils/column_utils.py +++ b/python/cuspatial/cuspatial/utils/column_utils.py @@ -78,34 +78,34 @@ def contains_only_points(gs: GeoSeries): """ Returns true if `gs` contains only points or multipoints """ - - return contain_single_type_geometry(gs) and ( - len(gs.points.xy) > 0 or len(gs.multipoints.xy) > 0 - ) + points = gs._column._meta.input_types == Feature_Enum.POINT.value + mpoints = gs._column._meta.input_types == Feature_Enum.MULTIPOINT.value + return (points | mpoints).all() def contains_only_multipoints(gs: GeoSeries): """ Returns true if `gs` contains only multipoints """ - - return contain_single_type_geometry(gs) and (len(gs.multipoints.xy) > 0) + return ( + gs._column._meta.input_types == Feature_Enum.MULTIPOINT.value + ).all() def contains_only_linestrings(gs: GeoSeries): """ Returns true if `gs` contains only linestrings """ - - return contain_single_type_geometry(gs) and len(gs.lines.xy) > 0 + return ( + gs._column._meta.input_types == Feature_Enum.LINESTRING.value + ).all() def contains_only_polygons(gs: GeoSeries): """ Returns true if `gs` contains only polygons """ - - return contain_single_type_geometry(gs) and len(gs.polygons.xy) > 0 + return (gs._column._meta.input_types == Feature_Enum.POLYGON.value).all() def has_same_geometry(lhs: GeoSeries, rhs: GeoSeries): From 3332ab98252685a8049cde131dfcdc92873e3bb0 Mon Sep 17 00:00:00 2001 From: "H. Thomson Comer" Date: Wed, 31 May 2023 20:38:38 -0500 Subject: [PATCH 02/23] Add large binpred tests. --- .../binpreds/test_binpred_large_examples.py | 82 +++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 python/cuspatial/cuspatial/tests/binpreds/test_binpred_large_examples.py diff --git a/python/cuspatial/cuspatial/tests/binpreds/test_binpred_large_examples.py b/python/cuspatial/cuspatial/tests/binpreds/test_binpred_large_examples.py new file mode 100644 index 000000000..b36b5a33c --- /dev/null +++ b/python/cuspatial/cuspatial/tests/binpreds/test_binpred_large_examples.py @@ -0,0 +1,82 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. + +import geopandas +import numpy as np + +import cuspatial +from cuspatial.tests.binpreds.binpred_test_dispatch import ( # noqa: F401 + features, + linestring_linestring_dispatch_list, + linestring_polygon_dispatch_list, + point_linestring_dispatch_list, + point_point_dispatch_list, + point_polygon_dispatch_list, + polygon_polygon_dispatch_list, + predicate, +) + + +def sample_test_data(features, dispatch_list, size, lib=cuspatial): + """Creates either a cuspatial or geopandas GeoSeries object using the + Feature objects in `features`, the list of features to sample from in + `dispatch_list`, and the size of the resultant GeoSeries. + """ + geometry_tuples = [features[key][1:3] for key in dispatch_list] + geometries = [ + [lhs_geo for lhs_geo, _ in geometry_tuples], + [rhs_geo for _, rhs_geo in geometry_tuples], + ] + lhs = lib.GeoSeries(list(geometries[0])) + rhs = lib.GeoSeries(list(geometries[1])) + np.random.seed(0) + lhs_picks = np.random.randint(0, len(lhs), size) + rhs_picks = np.random.randint(0, len(rhs), size) + return ( + lhs[lhs_picks].reset_index(drop=True), + rhs[rhs_picks].reset_index(drop=True), + ) + + +def run_test(pred, dispatch_list): + lhs, rhs = sample_test_data(features, dispatch_list, 100000, cuspatial) + gpdlhs, gpdrhs = sample_test_data( + features, dispatch_list, 100000, geopandas + ) + + # Reverse + pred_fn = getattr(rhs, pred) + got = pred_fn(lhs) + gpd_pred_fn = getattr(gpdrhs, pred) + expected = gpd_pred_fn(gpdlhs) + assert (got.values_host == expected.values).all() + + # Forward + pred_fn = getattr(lhs, pred) + got = pred_fn(rhs) + gpd_pred_fn = getattr(gpdlhs, pred) + expected = gpd_pred_fn(gpdrhs) + assert (got.values_host == expected.values).all() + + +def test_point_point_large_examples(predicate): # noqa: F811 + run_test(predicate, point_point_dispatch_list) + + +def test_point_linestring_large_examples(predicate): # noqa: F811 + run_test(predicate, point_linestring_dispatch_list) + + +def test_point_polygon_large_examples(predicate): # noqa: F811 + run_test(predicate, point_polygon_dispatch_list) + + +def test_linestring_linestring_large_examples(predicate): # noqa: F811 + run_test(predicate, linestring_linestring_dispatch_list) + + +def test_linestring_polygon_large_examples(predicate): # noqa: F811 + run_test(predicate, linestring_polygon_dispatch_list) + + +def test_polygon_polygon_large_examples(predicate): # noqa: F811 + run_test(predicate, polygon_polygon_dispatch_list) From fa9bbe66c1605a8fa8684b1040e37721f2ee8d68 Mon Sep 17 00:00:00 2001 From: "H. Thomson Comer" Date: Thu, 1 Jun 2023 18:44:53 +0000 Subject: [PATCH 03/23] Fix two indexing issues. --- .../cuspatial/core/binpreds/contains_geometry_processor.py | 1 + python/cuspatial/cuspatial/core/binpreds/feature_equals.py | 5 ++++- python/cuspatial/cuspatial/core/geoseries.py | 4 +++- .../tests/binpreds/test_binpred_large_examples.py | 7 +++---- 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/python/cuspatial/cuspatial/core/binpreds/contains_geometry_processor.py b/python/cuspatial/cuspatial/core/binpreds/contains_geometry_processor.py index 024bbdcfc..5615d9ea1 100644 --- a/python/cuspatial/cuspatial/core/binpreds/contains_geometry_processor.py +++ b/python/cuspatial/cuspatial/core/binpreds/contains_geometry_processor.py @@ -260,6 +260,7 @@ def _postprocess_points(self, lhs, rhs, preprocessor_result, op_result): return allpairs_result final_result = _false_series(len(rhs)) + breakpoint() if len(lhs) == len(rhs): matches = ( allpairs_result["polygon_index"] diff --git a/python/cuspatial/cuspatial/core/binpreds/feature_equals.py b/python/cuspatial/cuspatial/core/binpreds/feature_equals.py index bf6997e0a..0bf109980 100644 --- a/python/cuspatial/cuspatial/core/binpreds/feature_equals.py +++ b/python/cuspatial/cuspatial/core/binpreds/feature_equals.py @@ -324,8 +324,11 @@ def _compute_predicate(self, lhs, rhs, preprocessor_result): lhs_reversed, rhs_lengths_equal.lines.xy ) result = forward_result | reverse_result + original_point_indices = cudf.Series( + lhs_lengths_equal.point_indices + ).replace(cudf.Series(lhs_lengths_equal.index)) return self._postprocess( - lhs, rhs, EqualsOpResult(result, lhs_lengths_equal.point_indices) + lhs, rhs, EqualsOpResult(result, original_point_indices) ) diff --git a/python/cuspatial/cuspatial/core/geoseries.py b/python/cuspatial/cuspatial/core/geoseries.py index 299efa475..ebcf2f063 100644 --- a/python/cuspatial/cuspatial/core/geoseries.py +++ b/python/cuspatial/cuspatial/core/geoseries.py @@ -400,7 +400,9 @@ def _type_int_to_field(self): def __getitem__(self, indexes): # Slice the types and offsets union_offsets = self._sr._column._meta.union_offsets.iloc[indexes] - union_types = self._sr._column._meta.input_types.iloc[indexes] + union_types = self._sr._column._meta.input_types.iloc[ + indexes + ].reset_index(drop=True) points = self._sr._column.points mpoints = self._sr._column.mpoints diff --git a/python/cuspatial/cuspatial/tests/binpreds/test_binpred_large_examples.py b/python/cuspatial/cuspatial/tests/binpreds/test_binpred_large_examples.py index b36b5a33c..11ccb3ddc 100644 --- a/python/cuspatial/cuspatial/tests/binpreds/test_binpred_large_examples.py +++ b/python/cuspatial/cuspatial/tests/binpreds/test_binpred_large_examples.py @@ -38,10 +38,9 @@ def sample_test_data(features, dispatch_list, size, lib=cuspatial): def run_test(pred, dispatch_list): - lhs, rhs = sample_test_data(features, dispatch_list, 100000, cuspatial) - gpdlhs, gpdrhs = sample_test_data( - features, dispatch_list, 100000, geopandas - ) + size = 20 + lhs, rhs = sample_test_data(features, dispatch_list, size, cuspatial) + gpdlhs, gpdrhs = sample_test_data(features, dispatch_list, size, geopandas) # Reverse pred_fn = getattr(rhs, pred) From 2117883da6ce2a68f05bafca9386b7dd7aa5c0fe Mon Sep 17 00:00:00 2001 From: "H. Thomson Comer" Date: Thu, 1 Jun 2023 20:37:06 +0000 Subject: [PATCH 04/23] Fix issue with polygon.contains(linestring) not being generic enough. --- .../core/binpreds/feature_contains.py | 71 +++++++++++++------ python/cuspatial/cuspatial/core/geoseries.py | 4 +- 2 files changed, 50 insertions(+), 25 deletions(-) diff --git a/python/cuspatial/cuspatial/core/binpreds/feature_contains.py b/python/cuspatial/cuspatial/core/binpreds/feature_contains.py index 562ce03b7..80bda7d62 100644 --- a/python/cuspatial/cuspatial/core/binpreds/feature_contains.py +++ b/python/cuspatial/cuspatial/core/binpreds/feature_contains.py @@ -83,33 +83,60 @@ def _compute_polygon_polygon_contains(self, lhs, rhs, preprocessor_result): ) - rhs.polygons.part_offset.take(rhs.polygons.geometry_offset[:-1]) return contains + intersects >= rhs.sizes - polygon_size_reduction + def _test_interior(self, lhs, rhs): + # The hardest case. We need to check if the linestring is + # contained in the boundary of the polygon, the interior, + # or the exterior. + # We only need to test linestrings that are length 2. + # Divide the linestring in half and test the point for containment + # in the polygon. + + size_two = rhs.sizes == 2 + if (size_two).any(): + center_points = _linestrings_to_center_point(rhs[size_two]) + size_two_results = _false_series(len(lhs)) + size_two_results.iloc[rhs.index[size_two]] = ( + _basic_contains_count(lhs, center_points) > 0 + ) + return size_two_results + else: + return _false_series(len(lhs)) + def _compute_polygon_linestring_contains( self, lhs, rhs, preprocessor_result ): contains = _basic_contains_count(lhs, rhs).reset_index(drop=True) intersects = self._intersection_results_for_contains(lhs, rhs) - if (contains == 0).all() and (intersects != 0).all(): - # The hardest case. We need to check if the linestring is - # contained in the boundary of the polygon, the interior, - # or the exterior. - # We only need to test linestrings that are length 2. - # Divide the linestring in half and test the point for containment - # in the polygon. - - if (rhs.sizes == 2).any(): - center_points = _linestrings_to_center_point( - rhs[rhs.sizes == 2] - ) - size_two_results = _false_series(len(lhs)) - size_two_results[rhs.sizes == 2] = ( - _basic_contains_count(lhs, center_points) > 0 - ) - return size_two_results - else: - line_intersections = _false_series(len(lhs)) - line_intersections[intersects == rhs.sizes] = True - return line_intersections - return contains + intersects >= rhs.sizes + + # Four tests: + # 1. Intersection with no containment: + # May be a line that shares points with the polygon boundary and + # crosses over the interior, which is contained. + # 2. Intersection with containment: + # A Linestring that shares boundary points as well as interior points + # is contained. + # 3. Containment with no intersection: + # If every point of a linestring is within a polygon and none of its + # segments intersect the polygon, then it is contained. + # 4. Containment with intersection: + # If every point of a linestring is within a polygon and it has an + # intersection, the linestring is crossing a concave region and is + # not contained. + + final_result = _false_series(len(lhs)) + intersection_with_no_containment = (contains == 0) & (intersects != 0) + interior_tests = self._test_interior( + lhs[intersection_with_no_containment].reset_index(drop=True), + rhs[intersection_with_no_containment].reset_index(drop=True), + ) + interior_tests.index = intersection_with_no_containment[ + intersection_with_no_containment + ].index + final_result[intersection_with_no_containment] = interior_tests + final_result[~intersection_with_no_containment] = ( + contains + intersects >= rhs.sizes + ) + return final_result def _compute_predicate(self, lhs, rhs, preprocessor_result): if contains_only_points(rhs): diff --git a/python/cuspatial/cuspatial/core/geoseries.py b/python/cuspatial/cuspatial/core/geoseries.py index ebcf2f063..299efa475 100644 --- a/python/cuspatial/cuspatial/core/geoseries.py +++ b/python/cuspatial/cuspatial/core/geoseries.py @@ -400,9 +400,7 @@ def _type_int_to_field(self): def __getitem__(self, indexes): # Slice the types and offsets union_offsets = self._sr._column._meta.union_offsets.iloc[indexes] - union_types = self._sr._column._meta.input_types.iloc[ - indexes - ].reset_index(drop=True) + union_types = self._sr._column._meta.input_types.iloc[indexes] points = self._sr._column.points mpoints = self._sr._column.mpoints From 93b3b92661bdc64374c4f894675ec1a7919c7859 Mon Sep 17 00:00:00 2001 From: "H. Thomson Comer" Date: Thu, 1 Jun 2023 21:18:07 +0000 Subject: [PATCH 05/23] Fix issue with linestring.geom_equals(linestring) having bad input_types due to slice construction. --- python/cuspatial/cuspatial/core/geoseries.py | 11 +++++++- .../binpreds/test_equals_only_binpreds.py | 25 +++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/python/cuspatial/cuspatial/core/geoseries.py b/python/cuspatial/cuspatial/core/geoseries.py index 299efa475..2a66ccec3 100644 --- a/python/cuspatial/cuspatial/core/geoseries.py +++ b/python/cuspatial/cuspatial/core/geoseries.py @@ -251,7 +251,9 @@ def point_indices(self): sizes = offsets[1:] - offsets[:-1] return cp.repeat(self._series.index, sizes) """ - return self._meta.input_types.index[self._meta.input_types != -1] + return self._meta.input_types.reset_index(drop=True).index[ + self._meta.input_types != -1 + ] def column(self): """Return the ListColumn reordered by union offset.""" @@ -402,6 +404,13 @@ def __getitem__(self, indexes): union_offsets = self._sr._column._meta.union_offsets.iloc[indexes] union_types = self._sr._column._meta.input_types.iloc[indexes] + # Very important to reset the index if it has been constructed from + # a slice. + if isinstance(union_offsets, cudf.Series): + union_offsets = union_offsets.reset_index(drop=True) + if isinstance(union_types, cudf.Series): + union_types = union_types.reset_index(drop=True) + points = self._sr._column.points mpoints = self._sr._column.mpoints lines = self._sr._column.lines diff --git a/python/cuspatial/cuspatial/tests/binpreds/test_equals_only_binpreds.py b/python/cuspatial/cuspatial/tests/binpreds/test_equals_only_binpreds.py index 47a07bee9..7aec17920 100644 --- a/python/cuspatial/cuspatial/tests/binpreds/test_equals_only_binpreds.py +++ b/python/cuspatial/cuspatial/tests/binpreds/test_equals_only_binpreds.py @@ -722,3 +722,28 @@ def test_linestring_orders(): got = linestring1.geom_equals(linestring2) expected = gpdlinestring1.geom_equals(gpdlinestring2) pd.testing.assert_series_equal(expected, got.to_pandas()) + + +def test_linestring_indexes(): + linestring1 = cuspatial.GeoSeries( + [ + LineString([(0, 0), (1, 0), (1, 1), (0, 0)]), + LineString([(0, 0), (1, 1), (1, 0), (0, 0)]), + ] + ) + linestring2 = cuspatial.GeoSeries( + [ + LineString([(0, 0), (1, 0), (1, 1), (0, 0)]), + LineString([(0, 0), (1, 1), (1, 0), (0, 0)]), + ] + ) + index1 = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1] + index2 = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] + linestring1 = linestring1[index1].reset_index(drop=True) + linestring2 = linestring2[index2].reset_index(drop=True) + + gpdlinestring1 = linestring1.to_geopandas() + gpdlinestring2 = linestring2.to_geopandas() + got = linestring1.geom_equals(linestring2) + expected = gpdlinestring1.geom_equals(gpdlinestring2) + pd.testing.assert_series_equal(expected, got.to_pandas()) From 3af37c39454a890dce1770e0aa345af4981503b2 Mon Sep 17 00:00:00 2001 From: "H. Thomson Comer" Date: Mon, 5 Jun 2023 10:49:08 -0500 Subject: [PATCH 06/23] Rest of bugfixes for large datasets. --- .../cuspatial/core/binpreds/contains.py | 10 +++++++--- .../binpreds/contains_geometry_processor.py | 2 -- .../cuspatial/core/binpreds/feature_contains.py | 3 ++- .../core/binpreds/feature_contains_properly.py | 14 ++++++++++++++ .../cuspatial/core/binpreds/feature_crosses.py | 11 ++++++----- .../cuspatial/core/binpreds/feature_touches.py | 17 ++++++++++++++--- .../tests/binpreds/binpred_test_dispatch.py | 13 +++++++++++++ .../binpreds/test_binpred_large_examples.py | 2 +- .../cuspatial/cuspatial/utils/binpred_utils.py | 7 ++++++- 9 files changed, 63 insertions(+), 16 deletions(-) diff --git a/python/cuspatial/cuspatial/core/binpreds/contains.py b/python/cuspatial/cuspatial/core/binpreds/contains.py index 15346f1b7..3e42c7957 100644 --- a/python/cuspatial/cuspatial/core/binpreds/contains.py +++ b/python/cuspatial/cuspatial/core/binpreds/contains.py @@ -160,11 +160,15 @@ def _pairwise_contains_properly(points, polygons): quadtree_shaped_result = ( cudf.Series(pip_result).reset_index().reset_index() ) - quadtree_shaped_result.columns = ["part_index", "point_index", "result"] - result = quadtree_shaped_result[["point_index", "part_index"]][ + quadtree_shaped_result.columns = [ + "pairwise_index", + "point_index", + "result", + ] + result = quadtree_shaped_result[["point_index", "pairwise_index"]][ quadtree_shaped_result["result"].astype("bool") ] - result = result.sort_values(["point_index", "part_index"]).reset_index( + result = result.sort_values(["point_index", "pairwise_index"]).reset_index( drop=True ) return result diff --git a/python/cuspatial/cuspatial/core/binpreds/contains_geometry_processor.py b/python/cuspatial/cuspatial/core/binpreds/contains_geometry_processor.py index 5615d9ea1..8a1996613 100644 --- a/python/cuspatial/cuspatial/core/binpreds/contains_geometry_processor.py +++ b/python/cuspatial/cuspatial/core/binpreds/contains_geometry_processor.py @@ -213,7 +213,6 @@ def _postprocess_multipoint_rhs( result_df = hits.reset_index().merge( expected_count.reset_index(), on="rhs_index" ) - # Handling for the basic predicates if mode == "basic_none": none_result = _true_series(len(rhs)) @@ -260,7 +259,6 @@ def _postprocess_points(self, lhs, rhs, preprocessor_result, op_result): return allpairs_result final_result = _false_series(len(rhs)) - breakpoint() if len(lhs) == len(rhs): matches = ( allpairs_result["polygon_index"] diff --git a/python/cuspatial/cuspatial/core/binpreds/feature_contains.py b/python/cuspatial/cuspatial/core/binpreds/feature_contains.py index 80bda7d62..6e65884ab 100644 --- a/python/cuspatial/cuspatial/core/binpreds/feature_contains.py +++ b/python/cuspatial/cuspatial/core/binpreds/feature_contains.py @@ -81,7 +81,8 @@ def _compute_polygon_polygon_contains(self, lhs, rhs, preprocessor_result): polygon_size_reduction = rhs.polygons.part_offset.take( rhs.polygons.geometry_offset[1:] ) - rhs.polygons.part_offset.take(rhs.polygons.geometry_offset[:-1]) - return contains + intersects >= rhs.sizes - polygon_size_reduction + result = contains + intersects >= rhs.sizes - polygon_size_reduction + return result def _test_interior(self, lhs, rhs): # The hardest case. We need to check if the linestring is diff --git a/python/cuspatial/cuspatial/core/binpreds/feature_contains_properly.py b/python/cuspatial/cuspatial/core/binpreds/feature_contains_properly.py index 17ebffc4a..489b9401b 100644 --- a/python/cuspatial/cuspatial/core/binpreds/feature_contains_properly.py +++ b/python/cuspatial/cuspatial/core/binpreds/feature_contains_properly.py @@ -2,6 +2,10 @@ from typing import TypeVar +import cupy as cp + +import cudf + from cuspatial.core.binpreds.basic_predicates import ( _basic_equals_all, _basic_intersects, @@ -109,6 +113,16 @@ def _compute_predicate( pip_result = contains_properly( lhs[lhs_indices], preprocessor_result.final_rhs, mode=mode ) + if mode == "pairwise": + pairwise_index_df = cudf.DataFrame( + { + "pairwise_index": cp.arange(len(lhs_indices)), + "part_index": rhs.point_indices, + } + ) + pip_result = pip_result.merge( + pairwise_index_df, on="pairwise_index" + )[["part_index", "point_index"]] op_result = ContainsOpResult(pip_result, preprocessor_result) return self._postprocess(lhs, rhs, preprocessor_result, op_result) diff --git a/python/cuspatial/cuspatial/core/binpreds/feature_crosses.py b/python/cuspatial/cuspatial/core/binpreds/feature_crosses.py index 0316f3cbd..f9b8505e2 100644 --- a/python/cuspatial/cuspatial/core/binpreds/feature_crosses.py +++ b/python/cuspatial/cuspatial/core/binpreds/feature_crosses.py @@ -44,11 +44,12 @@ def _compute_predicate(self, lhs, rhs, preprocessor_result): # intersection are in the boundary of the other pli = _basic_intersects_pli(rhs, lhs) intersections = _points_and_lines_to_multipoints(pli[1], pli[0]) - equals = (_basic_equals_count(intersections, lhs) > 0) | ( - _basic_equals_count(intersections, rhs) > 0 - ) - intersects = _basic_intersects_count(rhs, lhs) > 0 - return intersects & ~equals + equals_lhs_count = _basic_equals_count(intersections, lhs) + equals_rhs_count = _basic_equals_count(intersections, rhs) + equals_lhs = equals_lhs_count != intersections.sizes + equals_rhs = equals_rhs_count != intersections.sizes + equals = equals_lhs & equals_rhs + return equals class LineStringPolygonCrosses(BinPred): diff --git a/python/cuspatial/cuspatial/core/binpreds/feature_touches.py b/python/cuspatial/cuspatial/core/binpreds/feature_touches.py index c1ddc1312..d76dc6200 100644 --- a/python/cuspatial/cuspatial/core/binpreds/feature_touches.py +++ b/python/cuspatial/cuspatial/core/binpreds/feature_touches.py @@ -92,7 +92,7 @@ def _preprocess(self, lhs, rhs): equals_lhs = _basic_equals_count(points, lhs) > 0 equals_rhs = _basic_equals_count(points, rhs) > 0 touches = point_intersection & (equals_lhs | equals_rhs) - return touches + return touches & ~lhs.crosses(rhs) class LineStringPolygonTouches(BinPred): @@ -127,9 +127,20 @@ class PolygonPolygonTouches(BinPred): def _preprocess(self, lhs, rhs): contains_lhs_none = _basic_contains_count(lhs, rhs) == 0 contains_rhs_none = _basic_contains_count(rhs, lhs) == 0 + contains_lhs = lhs.contains(rhs) + contains_rhs = rhs.contains(lhs) equals = lhs.geom_equals(rhs) - intersects = _basic_intersects_count(lhs, rhs) > 0 - return ~equals & contains_lhs_none & contains_rhs_none & intersects + intersect_count = _basic_intersects_count(lhs, rhs) + intersects = (intersect_count > 0) & (intersect_count < rhs.sizes - 1) + result = ( + ~equals + & contains_lhs_none + & contains_rhs_none + & ~contains_lhs + & ~contains_rhs + & intersects + ) + return result DispatchDict = { diff --git a/python/cuspatial/cuspatial/tests/binpreds/binpred_test_dispatch.py b/python/cuspatial/cuspatial/tests/binpreds/binpred_test_dispatch.py index 55ceeaea3..a5a62e238 100644 --- a/python/cuspatial/cuspatial/tests/binpreds/binpred_test_dispatch.py +++ b/python/cuspatial/cuspatial/tests/binpreds/binpred_test_dispatch.py @@ -171,6 +171,18 @@ def predicate(request): LineString([(0.5, 0.0), (0.5, 1.0)]), LineString([(0.0, 0.5), (1.0, 0.5)]), ), + "linestring-linestring-touch-and-cross": ( + """ + x + | + x + |\\ + x---x + x + """, + LineString([(0.0, 0.0), (1.0, 1.0)]), + LineString([(0.5, 0.5), (1.0, 0.1), (-1.0, 0.1)]), + ), "linestring-polygon-disjoint": ( """ point_polygon above is drawn as @@ -493,6 +505,7 @@ def predicate(request): "linestring-linestring-touch-edge", "linestring-linestring-touch-edge-twice", "linestring-linestring-crosses", + "linestring-linestring-touch-and-cross", ] linestring_polygon_dispatch_list = [ diff --git a/python/cuspatial/cuspatial/tests/binpreds/test_binpred_large_examples.py b/python/cuspatial/cuspatial/tests/binpreds/test_binpred_large_examples.py index 11ccb3ddc..bc4a95a80 100644 --- a/python/cuspatial/cuspatial/tests/binpreds/test_binpred_large_examples.py +++ b/python/cuspatial/cuspatial/tests/binpreds/test_binpred_large_examples.py @@ -38,7 +38,7 @@ def sample_test_data(features, dispatch_list, size, lib=cuspatial): def run_test(pred, dispatch_list): - size = 20 + size = 10000 lhs, rhs = sample_test_data(features, dispatch_list, size, cuspatial) gpdlhs, gpdrhs = sample_test_data(features, dispatch_list, size, geopandas) diff --git a/python/cuspatial/cuspatial/utils/binpred_utils.py b/python/cuspatial/cuspatial/utils/binpred_utils.py index 79594a8d8..42bb48f5e 100644 --- a/python/cuspatial/cuspatial/utils/binpred_utils.py +++ b/python/cuspatial/cuspatial/utils/binpred_utils.py @@ -62,7 +62,12 @@ def _count_results_in_multipoint_geometries(point_indices, point_result): index=cudf.RangeIndex(len(point_indices), name="point_index"), ).reset_index() with_rhs_indices = point_result.merge(point_indices_df, on="point_index") - points_grouped_by_original_polygon = with_rhs_indices[ + # Because we are doing pairwise operations, we're only interested in the + # results where polygon_index and rhs_index match + pairwise_matches = with_rhs_indices[ + with_rhs_indices["polygon_index"] == with_rhs_indices["rhs_index"] + ] + points_grouped_by_original_polygon = pairwise_matches[ ["point_index", "rhs_index"] ].drop_duplicates() hits = ( From 6dc21698adea17a1e7c4c93c4d31953aade19267 Mon Sep 17 00:00:00 2001 From: "H. Thomson Comer" Date: Mon, 5 Jun 2023 10:50:35 -0500 Subject: [PATCH 07/23] Add test_feature_groups which tests via binpred dispatch but runs each group as a single test. --- .../tests/binpreds/test_feature_groups.py | 73 +++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 python/cuspatial/cuspatial/tests/binpreds/test_feature_groups.py diff --git a/python/cuspatial/cuspatial/tests/binpreds/test_feature_groups.py b/python/cuspatial/cuspatial/tests/binpreds/test_feature_groups.py new file mode 100644 index 000000000..c4a7552a0 --- /dev/null +++ b/python/cuspatial/cuspatial/tests/binpreds/test_feature_groups.py @@ -0,0 +1,73 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. + +import geopandas + +import cuspatial +from cuspatial.tests.binpreds.binpred_test_dispatch import ( # noqa: F401 + features, + linestring_linestring_dispatch_list, + linestring_polygon_dispatch_list, + point_linestring_dispatch_list, + point_point_dispatch_list, + point_polygon_dispatch_list, + polygon_polygon_dispatch_list, + predicate, +) + + +def sample_test_data(features, dispatch_list, lib=cuspatial): + """Creates either a cuSpatial or geopandas GeoSeries object using the + features in `features` and the list of features to sample from + `dispatch_list`. + """ + geometry_tuples = [features[key][1:3] for key in dispatch_list] + geometries = [ + [lhs_geo for lhs_geo, _ in geometry_tuples], + [rhs_geo for _, rhs_geo in geometry_tuples], + ] + lhs = lib.GeoSeries(list(geometries[0])) + rhs = lib.GeoSeries(list(geometries[1])) + return (lhs, rhs) + + +def run_test(pred, dispatch_list): + lhs, rhs = sample_test_data(features, dispatch_list, cuspatial) + gpdlhs, gpdrhs = sample_test_data(features, dispatch_list, geopandas) + + # Reverse + pred_fn = getattr(rhs, pred) + got = pred_fn(lhs) + gpd_pred_fn = getattr(gpdrhs, pred) + expected = gpd_pred_fn(gpdlhs) + assert (got.values_host == expected.values).all() + + # Forward + pred_fn = getattr(lhs, pred) + got = pred_fn(rhs) + gpd_pred_fn = getattr(gpdlhs, pred) + expected = gpd_pred_fn(gpdrhs) + assert (got.values_host == expected.values).all() + + +def test_point_point_all_examples(predicate): # noqa: F811 + run_test(predicate, point_point_dispatch_list) + + +def test_point_linestring_all_examples(predicate): # noqa: F811 + run_test(predicate, point_linestring_dispatch_list) + + +def test_point_polygon_all_examples(predicate): # noqa: F811 + run_test(predicate, point_polygon_dispatch_list) + + +def test_linestring_linestring_all_examples(predicate): # noqa: F811 + run_test(predicate, linestring_linestring_dispatch_list) + + +def test_linestring_polygon_all_examples(predicate): # noqa: F811 + run_test(predicate, linestring_polygon_dispatch_list) + + +def test_polygon_polygon_all_examples(predicate): # noqa: F811 + run_test(predicate, polygon_polygon_dispatch_list) From 46cdb8c1efc892bf38fbb1ea7f0ef663d9d57aa0 Mon Sep 17 00:00:00 2001 From: "H. Thomson Comer" Date: Mon, 5 Jun 2023 13:53:24 -0500 Subject: [PATCH 08/23] Fix test import statement. --- .../cuspatial/tests/binpreds/test_binpred_large_examples.py | 6 +++--- .../cuspatial/tests/binpreds/test_feature_groups.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/python/cuspatial/cuspatial/tests/binpreds/test_binpred_large_examples.py b/python/cuspatial/cuspatial/tests/binpreds/test_binpred_large_examples.py index bc4a95a80..6e56c115e 100644 --- a/python/cuspatial/cuspatial/tests/binpreds/test_binpred_large_examples.py +++ b/python/cuspatial/cuspatial/tests/binpreds/test_binpred_large_examples.py @@ -2,9 +2,7 @@ import geopandas import numpy as np - -import cuspatial -from cuspatial.tests.binpreds.binpred_test_dispatch import ( # noqa: F401 +from binpred_test_dispatch import ( # noqa: F401 features, linestring_linestring_dispatch_list, linestring_polygon_dispatch_list, @@ -15,6 +13,8 @@ predicate, ) +import cuspatial + def sample_test_data(features, dispatch_list, size, lib=cuspatial): """Creates either a cuspatial or geopandas GeoSeries object using the diff --git a/python/cuspatial/cuspatial/tests/binpreds/test_feature_groups.py b/python/cuspatial/cuspatial/tests/binpreds/test_feature_groups.py index c4a7552a0..9f7aa3219 100644 --- a/python/cuspatial/cuspatial/tests/binpreds/test_feature_groups.py +++ b/python/cuspatial/cuspatial/tests/binpreds/test_feature_groups.py @@ -1,9 +1,7 @@ # Copyright (c) 2023, NVIDIA CORPORATION. import geopandas - -import cuspatial -from cuspatial.tests.binpreds.binpred_test_dispatch import ( # noqa: F401 +from binpred_test_dispatch import ( # noqa: F401 features, linestring_linestring_dispatch_list, linestring_polygon_dispatch_list, @@ -14,6 +12,8 @@ predicate, ) +import cuspatial + def sample_test_data(features, dispatch_list, lib=cuspatial): """Creates either a cuSpatial or geopandas GeoSeries object using the From 7f950dd76e75e83d19c8a962dc9720e8a9d55c0a Mon Sep 17 00:00:00 2001 From: "H. Thomson Comer" Date: Mon, 5 Jun 2023 20:09:05 +0000 Subject: [PATCH 09/23] Clean up contains changes and comments. --- .../cuspatial/core/binpreds/contains.py | 4 +-- .../core/binpreds/feature_contains.py | 26 +++++-------------- .../binpreds/feature_contains_properly.py | 17 +++++++----- 3 files changed, 20 insertions(+), 27 deletions(-) diff --git a/python/cuspatial/cuspatial/core/binpreds/contains.py b/python/cuspatial/cuspatial/core/binpreds/contains.py index 3e42c7957..0d4fc4f3a 100644 --- a/python/cuspatial/cuspatial/core/binpreds/contains.py +++ b/python/cuspatial/cuspatial/core/binpreds/contains.py @@ -129,8 +129,8 @@ def _pairwise_contains_properly(points, polygons): Note that polygons must be closed: the first and last vertex of each polygon must be the same. - This version provides the best performance when the input is a large - number of points with an equal number of polygons. + This version provides support for a very large number of points with + an equal number of polygons. Parameters ---------- diff --git a/python/cuspatial/cuspatial/core/binpreds/feature_contains.py b/python/cuspatial/cuspatial/core/binpreds/feature_contains.py index 6e65884ab..8744de567 100644 --- a/python/cuspatial/cuspatial/core/binpreds/feature_contains.py +++ b/python/cuspatial/cuspatial/core/binpreds/feature_contains.py @@ -85,13 +85,9 @@ def _compute_polygon_polygon_contains(self, lhs, rhs, preprocessor_result): return result def _test_interior(self, lhs, rhs): - # The hardest case. We need to check if the linestring is - # contained in the boundary of the polygon, the interior, - # or the exterior. # We only need to test linestrings that are length 2. # Divide the linestring in half and test the point for containment # in the polygon. - size_two = rhs.sizes == 2 if (size_two).any(): center_points = _linestrings_to_center_point(rhs[size_two]) @@ -109,21 +105,8 @@ def _compute_polygon_linestring_contains( contains = _basic_contains_count(lhs, rhs).reset_index(drop=True) intersects = self._intersection_results_for_contains(lhs, rhs) - # Four tests: - # 1. Intersection with no containment: - # May be a line that shares points with the polygon boundary and - # crosses over the interior, which is contained. - # 2. Intersection with containment: - # A Linestring that shares boundary points as well as interior points - # is contained. - # 3. Containment with no intersection: - # If every point of a linestring is within a polygon and none of its - # segments intersect the polygon, then it is contained. - # 4. Containment with intersection: - # If every point of a linestring is within a polygon and it has an - # intersection, the linestring is crossing a concave region and is - # not contained. - + # If a linestring has intersection but not containment, we need to + # test if the linestring is in the interior of the polygon. final_result = _false_series(len(lhs)) intersection_with_no_containment = (contains == 0) & (intersects != 0) interior_tests = self._test_interior( @@ -133,7 +116,12 @@ def _compute_polygon_linestring_contains( interior_tests.index = intersection_with_no_containment[ intersection_with_no_containment ].index + # LineStrings that have intersection but no containment are set + # according to the `intersection_with_no_containment` mask. final_result[intersection_with_no_containment] = interior_tests + # LineStrings that do not are contained if the sum of intersecting + # and containing points is greater than or equal to the number of + # points that make up the linestring. final_result[~intersection_with_no_containment] = ( contains + intersects >= rhs.sizes ) diff --git a/python/cuspatial/cuspatial/core/binpreds/feature_contains_properly.py b/python/cuspatial/cuspatial/core/binpreds/feature_contains_properly.py index 489b9401b..bb2502bbc 100644 --- a/python/cuspatial/cuspatial/core/binpreds/feature_contains_properly.py +++ b/python/cuspatial/cuspatial/core/binpreds/feature_contains_properly.py @@ -74,13 +74,13 @@ def _pip_mode(self, lhs, rhs): Notes ----- - 1. Quadtree is always used if user requests `allpairs=True`. - 2. If the number of polygons in the lhs is less than 32, we use the + 1. If the number of polygons in the lhs is less than 32, we use the brute-force algorithm because it is faster and has less memory overhead. - 3. If the lhs contains multipolygons, we use quadtree because - the quadtree code path already handles multipolygons. - 4. Otherwise pairwise is defaulted to since the default GeoPandas + 2. If the lhs contains multipolygons, or `allpairs=True` is specified, + we use quadtree because the quadtree code path already handles + multipolygons. + 3. Otherwise pairwise is defaulted to since the default GeoPandas behavior is to use the pairwise algorithm. """ if len(lhs) <= 31: @@ -107,12 +107,17 @@ def _compute_predicate( mode = self._pip_mode(lhs, preprocessor_result.final_rhs) lhs_indices = lhs.index # Duplicates the lhs polygon for each point in the final_rhs result - # that was computed by _preprocess. + # that was computed by _preprocess. Will always ensure that the + # number of points in the rhs is equal to the number of polygons in the + # lhs. if mode == "pairwise": lhs_indices = preprocessor_result.point_indices pip_result = contains_properly( lhs[lhs_indices], preprocessor_result.final_rhs, mode=mode ) + # If the mode is pairwise, we need to replace the `pairwise_index` + # of each repeated polygon with the `part_index` from the + # preprocessor result. if mode == "pairwise": pairwise_index_df = cudf.DataFrame( { From 217a99aa8d02a02952c8317bbdd7decb2553a4a8 Mon Sep 17 00:00:00 2001 From: "H. Thomson Comer" Date: Tue, 6 Jun 2023 18:38:18 +0000 Subject: [PATCH 10/23] Implement a couple of review suggestions and drop large tests for cartesian tests. --- .../cuspatial/core/binpreds/contains.py | 40 +++++++++---------- .../core/binpreds/feature_contains.py | 9 +++-- .../binpreds/feature_contains_properly.py | 8 ++-- ...> test_binpred_cartesian_dispatch_list.py} | 5 +-- ....py => test_binpred_each_dispatch_list.py} | 0 5 files changed, 32 insertions(+), 30 deletions(-) rename python/cuspatial/cuspatial/tests/binpreds/{test_binpred_large_examples.py => test_binpred_cartesian_dispatch_list.py} (95%) rename python/cuspatial/cuspatial/tests/binpreds/{test_feature_groups.py => test_binpred_each_dispatch_list.py} (100%) diff --git a/python/cuspatial/cuspatial/core/binpreds/contains.py b/python/cuspatial/cuspatial/core/binpreds/contains.py index 0d4fc4f3a..a5b827961 100644 --- a/python/cuspatial/cuspatial/core/binpreds/contains.py +++ b/python/cuspatial/cuspatial/core/binpreds/contains.py @@ -120,6 +120,21 @@ def _brute_force_contains_properly(points, polygons): return final_result +def _pairwise_pip_result_to_point_polygon_index_pairs(pairwise_result): + pairwise_result.columns = [ + "pairwise_index", + "point_index", + "result", + ] + result = pairwise_result[["point_index", "pairwise_index"]][ + pairwise_result["result"].astype("bool") + ] + result = result.sort_values(["point_index", "pairwise_index"]).reset_index( + drop=True + ) + return result + + def _pairwise_contains_properly(points, polygons): """Compute from a series of points and a series of polygons which points are properly contained within the corresponding polygon. Polygon A contains @@ -160,16 +175,8 @@ def _pairwise_contains_properly(points, polygons): quadtree_shaped_result = ( cudf.Series(pip_result).reset_index().reset_index() ) - quadtree_shaped_result.columns = [ - "pairwise_index", - "point_index", - "result", - ] - result = quadtree_shaped_result[["point_index", "pairwise_index"]][ - quadtree_shaped_result["result"].astype("bool") - ] - result = result.sort_values(["point_index", "pairwise_index"]).reset_index( - drop=True + result = _pairwise_pip_result_to_point_polygon_index_pairs( + quadtree_shaped_result ) return result @@ -185,15 +192,8 @@ def contains_properly(polygons, points, mode="pairwise"): # two-column DataFrame. bitmask_result = _brute_force_contains_properly(points, polygons) quadtree_shaped_result = bitmask_result.stack().reset_index() - quadtree_shaped_result.columns = [ - "point_index", - "part_index", - "result", - ] - result = quadtree_shaped_result[["point_index", "part_index"]][ - quadtree_shaped_result["result"] - ] - result = result.sort_values(["point_index", "part_index"]).reset_index( - drop=True + result = _pairwise_pip_result_to_point_polygon_index_pairs( + quadtree_shaped_result ) + result.columns = ["part_index", "point_index"] return result diff --git a/python/cuspatial/cuspatial/core/binpreds/feature_contains.py b/python/cuspatial/cuspatial/core/binpreds/feature_contains.py index 8744de567..0617c12b3 100644 --- a/python/cuspatial/cuspatial/core/binpreds/feature_contains.py +++ b/python/cuspatial/cuspatial/core/binpreds/feature_contains.py @@ -78,9 +78,12 @@ def _compute_polygon_polygon_contains(self, lhs, rhs, preprocessor_result): # A closed polygon has an extra line segment that is not used in # counting the number of points. We need to subtract this from the # number of points in the polygon. - polygon_size_reduction = rhs.polygons.part_offset.take( - rhs.polygons.geometry_offset[1:] - ) - rhs.polygons.part_offset.take(rhs.polygons.geometry_offset[:-1]) + multipolygon_part_offset = rhs.polygons.part_offset.take( + rhs.polygons.geometry_offset + ) + polygon_size_reduction = ( + multipolygon_part_offset[1:] - multipolygon_part_offset[:-1] + ) result = contains + intersects >= rhs.sizes - polygon_size_reduction return result diff --git a/python/cuspatial/cuspatial/core/binpreds/feature_contains_properly.py b/python/cuspatial/cuspatial/core/binpreds/feature_contains_properly.py index bb2502bbc..1d01cc9ce 100644 --- a/python/cuspatial/cuspatial/core/binpreds/feature_contains_properly.py +++ b/python/cuspatial/cuspatial/core/binpreds/feature_contains_properly.py @@ -115,10 +115,10 @@ def _compute_predicate( pip_result = contains_properly( lhs[lhs_indices], preprocessor_result.final_rhs, mode=mode ) - # If the mode is pairwise, we need to replace the `pairwise_index` - # of each repeated polygon with the `part_index` from the - # preprocessor result. - if mode == "pairwise": + # If the mode is pairwise or brute_force, we need to replace the + # `pairwise_index` of each repeated polygon with the `part_index` + # from the preprocessor result. + if "pairwise_index" in pip_result.columns: pairwise_index_df = cudf.DataFrame( { "pairwise_index": cp.arange(len(lhs_indices)), diff --git a/python/cuspatial/cuspatial/tests/binpreds/test_binpred_large_examples.py b/python/cuspatial/cuspatial/tests/binpreds/test_binpred_cartesian_dispatch_list.py similarity index 95% rename from python/cuspatial/cuspatial/tests/binpreds/test_binpred_large_examples.py rename to python/cuspatial/cuspatial/tests/binpreds/test_binpred_cartesian_dispatch_list.py index 6e56c115e..772853ef2 100644 --- a/python/cuspatial/cuspatial/tests/binpreds/test_binpred_large_examples.py +++ b/python/cuspatial/cuspatial/tests/binpreds/test_binpred_cartesian_dispatch_list.py @@ -28,9 +28,8 @@ def sample_test_data(features, dispatch_list, size, lib=cuspatial): ] lhs = lib.GeoSeries(list(geometries[0])) rhs = lib.GeoSeries(list(geometries[1])) - np.random.seed(0) - lhs_picks = np.random.randint(0, len(lhs), size) - rhs_picks = np.random.randint(0, len(rhs), size) + lhs_picks = np.repeat(np.arange(len(lhs)), len(lhs)) + rhs_picks = np.tile(np.arange(len(rhs)), len(rhs)) return ( lhs[lhs_picks].reset_index(drop=True), rhs[rhs_picks].reset_index(drop=True), diff --git a/python/cuspatial/cuspatial/tests/binpreds/test_feature_groups.py b/python/cuspatial/cuspatial/tests/binpreds/test_binpred_each_dispatch_list.py similarity index 100% rename from python/cuspatial/cuspatial/tests/binpreds/test_feature_groups.py rename to python/cuspatial/cuspatial/tests/binpreds/test_binpred_each_dispatch_list.py From bf7706965c0e089b4daf000269c15c47e883013b Mon Sep 17 00:00:00 2001 From: "H. Thomson Comer" Date: Wed, 7 Jun 2023 09:10:44 -0500 Subject: [PATCH 11/23] Update python/cuspatial/cuspatial/core/binpreds/contains.py Co-authored-by: Mark Harris <783069+harrism@users.noreply.github.com> --- python/cuspatial/cuspatial/core/binpreds/contains.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/cuspatial/cuspatial/core/binpreds/contains.py b/python/cuspatial/cuspatial/core/binpreds/contains.py index a5b827961..20029ab4f 100644 --- a/python/cuspatial/cuspatial/core/binpreds/contains.py +++ b/python/cuspatial/cuspatial/core/binpreds/contains.py @@ -144,8 +144,6 @@ def _pairwise_contains_properly(points, polygons): Note that polygons must be closed: the first and last vertex of each polygon must be the same. - This version provides support for a very large number of points with - an equal number of polygons. Parameters ---------- From c8cb7ea02f1536e44c930925e84e6664e4eeeb1a Mon Sep 17 00:00:00 2001 From: "H. Thomson Comer" Date: Wed, 7 Jun 2023 09:10:57 -0500 Subject: [PATCH 12/23] Update python/cuspatial/cuspatial/core/binpreds/contains.py Co-authored-by: Mark Harris <783069+harrism@users.noreply.github.com> --- python/cuspatial/cuspatial/core/binpreds/contains.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cuspatial/cuspatial/core/binpreds/contains.py b/python/cuspatial/cuspatial/core/binpreds/contains.py index 20029ab4f..e40961be9 100644 --- a/python/cuspatial/cuspatial/core/binpreds/contains.py +++ b/python/cuspatial/cuspatial/core/binpreds/contains.py @@ -136,7 +136,7 @@ def _pairwise_pip_result_to_point_polygon_index_pairs(pairwise_result): def _pairwise_contains_properly(points, polygons): - """Compute from a series of points and a series of polygons which points + """Compute from a series of polygons and an equal-length series of points which points are properly contained within the corresponding polygon. Polygon A contains Point B properly if B intersects the interior of A but not the boundary (or exterior). From 2bf49e32fe75a0bc135448c668eed25221f0771c Mon Sep 17 00:00:00 2001 From: "H. Thomson Comer" Date: Wed, 7 Jun 2023 09:11:39 -0500 Subject: [PATCH 13/23] Update python/cuspatial/cuspatial/core/binpreds/contains.py Co-authored-by: Mark Harris <783069+harrism@users.noreply.github.com> --- python/cuspatial/cuspatial/core/binpreds/contains.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/python/cuspatial/cuspatial/core/binpreds/contains.py b/python/cuspatial/cuspatial/core/binpreds/contains.py index e40961be9..edc4ce0f0 100644 --- a/python/cuspatial/cuspatial/core/binpreds/contains.py +++ b/python/cuspatial/cuspatial/core/binpreds/contains.py @@ -166,10 +166,9 @@ def _pairwise_contains_properly(points, polygons): as_column(polygons.polygons.x), as_column(polygons.polygons.y), ) - # Pairwise returns a boolean column where the point and polygon index - # always correspond. We can use this to create a dataframe with the - # same shape as the quadtree result. Finally all the False results - # are dropped, as quadtree doesn't report False results. + # Pairwise returns a boolean column with a True value for each (polygon, point) pair + # where the point is contained properly by the polygon. We can use this to create a + # dataframe with only (polygon, point) pairs that satisfy the relationship. quadtree_shaped_result = ( cudf.Series(pip_result).reset_index().reset_index() ) From 285b0c72909f1822cb8d6532efc9ffc4ba1e6ca0 Mon Sep 17 00:00:00 2001 From: "H. Thomson Comer" Date: Wed, 7 Jun 2023 09:13:27 -0500 Subject: [PATCH 14/23] Update python/cuspatial/cuspatial/core/binpreds/feature_contains_properly.py Co-authored-by: Mark Harris <783069+harrism@users.noreply.github.com> --- .../cuspatial/core/binpreds/feature_contains_properly.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cuspatial/cuspatial/core/binpreds/feature_contains_properly.py b/python/cuspatial/cuspatial/core/binpreds/feature_contains_properly.py index 1d01cc9ce..04fb5788c 100644 --- a/python/cuspatial/cuspatial/core/binpreds/feature_contains_properly.py +++ b/python/cuspatial/cuspatial/core/binpreds/feature_contains_properly.py @@ -80,8 +80,8 @@ def _pip_mode(self, lhs, rhs): 2. If the lhs contains multipolygons, or `allpairs=True` is specified, we use quadtree because the quadtree code path already handles multipolygons. - 3. Otherwise pairwise is defaulted to since the default GeoPandas - behavior is to use the pairwise algorithm. + 3. Otherwise default to pairwise to match the default GeoPandas + behavior. """ if len(lhs) <= 31: return "brute_force" From 788960379f7206ec0d951f5a0c528a990e437a11 Mon Sep 17 00:00:00 2001 From: "H. Thomson Comer" Date: Wed, 7 Jun 2023 09:13:50 -0500 Subject: [PATCH 15/23] Update python/cuspatial/cuspatial/core/binpreds/feature_disjoint.py Co-authored-by: Mark Harris <783069+harrism@users.noreply.github.com> --- python/cuspatial/cuspatial/core/binpreds/feature_disjoint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cuspatial/cuspatial/core/binpreds/feature_disjoint.py b/python/cuspatial/cuspatial/core/binpreds/feature_disjoint.py index 8d0852ba4..1f9f7d35d 100644 --- a/python/cuspatial/cuspatial/core/binpreds/feature_disjoint.py +++ b/python/cuspatial/cuspatial/core/binpreds/feature_disjoint.py @@ -46,7 +46,7 @@ def _preprocess(self, lhs, rhs): class PointPolygonDisjoint(BinPred): def _preprocess(self, lhs, rhs): contains = _basic_contains_any(lhs, rhs) - return ~contains + return ~_basic_contains_any(lhs, rhs) class LineStringPointDisjoint(PointLineStringDisjoint): From a28ec99cee4d4df81d232571f100a8f22a467db5 Mon Sep 17 00:00:00 2001 From: "H. Thomson Comer" Date: Wed, 7 Jun 2023 09:14:05 -0500 Subject: [PATCH 16/23] Update python/cuspatial/cuspatial/core/binpreds/feature_disjoint.py Co-authored-by: Mark Harris <783069+harrism@users.noreply.github.com> --- python/cuspatial/cuspatial/core/binpreds/feature_disjoint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cuspatial/cuspatial/core/binpreds/feature_disjoint.py b/python/cuspatial/cuspatial/core/binpreds/feature_disjoint.py index 1f9f7d35d..ab758bdc3 100644 --- a/python/cuspatial/cuspatial/core/binpreds/feature_disjoint.py +++ b/python/cuspatial/cuspatial/core/binpreds/feature_disjoint.py @@ -66,7 +66,7 @@ def _postprocess(self, lhs, rhs, op_result): class LineStringPolygonDisjoint(BinPred): def _preprocess(self, lhs, rhs): contains = _basic_contains_any(rhs, lhs) - return ~contains + return ~_basic_contains_any(rhs, lhs) class PolygonPolygonDisjoint(BinPred): From a94c232e778607db6144221600883c49754fd89e Mon Sep 17 00:00:00 2001 From: "H. Thomson Comer" Date: Wed, 7 Jun 2023 09:14:22 -0500 Subject: [PATCH 17/23] Update python/cuspatial/cuspatial/core/binpreds/feature_intersects.py Co-authored-by: Mark Harris <783069+harrism@users.noreply.github.com> --- python/cuspatial/cuspatial/core/binpreds/feature_intersects.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cuspatial/cuspatial/core/binpreds/feature_intersects.py b/python/cuspatial/cuspatial/core/binpreds/feature_intersects.py index a180112ca..c5ee309bd 100644 --- a/python/cuspatial/cuspatial/core/binpreds/feature_intersects.py +++ b/python/cuspatial/cuspatial/core/binpreds/feature_intersects.py @@ -93,7 +93,7 @@ class IntersectsByEquals(EqualsPredicateBase): class PolygonPointIntersects(BinPred): def _preprocess(self, lhs, rhs): contains = _basic_contains_any(lhs, rhs) - return contains + return _basic_contains_any(lhs, rhs) class PointPolygonIntersects(BinPred): From 8e38e8e69008be483fb0e375f6ea2924f52aa1e6 Mon Sep 17 00:00:00 2001 From: "H. Thomson Comer" Date: Wed, 7 Jun 2023 09:14:35 -0500 Subject: [PATCH 18/23] Update python/cuspatial/cuspatial/core/binpreds/feature_intersects.py Co-authored-by: Mark Harris <783069+harrism@users.noreply.github.com> --- python/cuspatial/cuspatial/core/binpreds/feature_intersects.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cuspatial/cuspatial/core/binpreds/feature_intersects.py b/python/cuspatial/cuspatial/core/binpreds/feature_intersects.py index c5ee309bd..0f2b4ded7 100644 --- a/python/cuspatial/cuspatial/core/binpreds/feature_intersects.py +++ b/python/cuspatial/cuspatial/core/binpreds/feature_intersects.py @@ -99,7 +99,7 @@ def _preprocess(self, lhs, rhs): class PointPolygonIntersects(BinPred): def _preprocess(self, lhs, rhs): contains = _basic_contains_any(rhs, lhs) - return contains + return _basic_contains_any(rhs, lhs) class LineStringPointIntersects(IntersectsPredicateBase): From 6e9884f75752cc5cda0c16b1821d4bd508c62519 Mon Sep 17 00:00:00 2001 From: "H. Thomson Comer" Date: Wed, 7 Jun 2023 09:14:46 -0500 Subject: [PATCH 19/23] Update python/cuspatial/cuspatial/core/binpreds/feature_intersects.py Co-authored-by: Mark Harris <783069+harrism@users.noreply.github.com> --- python/cuspatial/cuspatial/core/binpreds/feature_intersects.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cuspatial/cuspatial/core/binpreds/feature_intersects.py b/python/cuspatial/cuspatial/core/binpreds/feature_intersects.py index 0f2b4ded7..f15a12a7e 100644 --- a/python/cuspatial/cuspatial/core/binpreds/feature_intersects.py +++ b/python/cuspatial/cuspatial/core/binpreds/feature_intersects.py @@ -116,7 +116,7 @@ def _preprocess(self, lhs, rhs): class LineStringPolygonIntersects(BinPred): def _preprocess(self, lhs, rhs): contains = _basic_contains_any(rhs, lhs) - return contains + return _basic_contains_any(rhs, lhs) class PolygonLineStringIntersects(BinPred): From d0d1888b2363326b96dc70d558653775e82c7793 Mon Sep 17 00:00:00 2001 From: "H. Thomson Comer" Date: Wed, 7 Jun 2023 09:15:02 -0500 Subject: [PATCH 20/23] Update python/cuspatial/cuspatial/core/binpreds/feature_intersects.py Co-authored-by: Mark Harris <783069+harrism@users.noreply.github.com> --- python/cuspatial/cuspatial/core/binpreds/feature_intersects.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cuspatial/cuspatial/core/binpreds/feature_intersects.py b/python/cuspatial/cuspatial/core/binpreds/feature_intersects.py index f15a12a7e..f8632081c 100644 --- a/python/cuspatial/cuspatial/core/binpreds/feature_intersects.py +++ b/python/cuspatial/cuspatial/core/binpreds/feature_intersects.py @@ -122,7 +122,7 @@ def _preprocess(self, lhs, rhs): class PolygonLineStringIntersects(BinPred): def _preprocess(self, lhs, rhs): contains = _basic_contains_any(lhs, rhs) - return contains + return _basic_contains_any(lhs, rhs) class PolygonPolygonIntersects(BinPred): From aa49915550b8d098706d08eac735feaefe658b36 Mon Sep 17 00:00:00 2001 From: "H. Thomson Comer" Date: Wed, 7 Jun 2023 11:15:54 -0500 Subject: [PATCH 21/23] Optimize result processing for brute_force and pairwise. --- .../cuspatial/core/binpreds/contains.py | 42 ++++++++++--------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/python/cuspatial/cuspatial/core/binpreds/contains.py b/python/cuspatial/cuspatial/core/binpreds/contains.py index edc4ce0f0..6675402fe 100644 --- a/python/cuspatial/cuspatial/core/binpreds/contains.py +++ b/python/cuspatial/cuspatial/core/binpreds/contains.py @@ -136,10 +136,10 @@ def _pairwise_pip_result_to_point_polygon_index_pairs(pairwise_result): def _pairwise_contains_properly(points, polygons): - """Compute from a series of polygons and an equal-length series of points which points - are properly contained within the corresponding polygon. Polygon A contains - Point B properly if B intersects the interior of A but not the boundary (or - exterior). + """Compute from a series of polygons and an equal-length series of points + which points are properly contained within the corresponding polygon. + Polygon A contains Point B properly if B intersects the interior of A + but not the boundary (or exterior). Note that polygons must be closed: the first and last vertex of each polygon must be the same. @@ -158,7 +158,7 @@ def _pairwise_contains_properly(points, polygons): A DataFrame of boolean values indicating whether each point falls within its corresponding polygon. """ - pip_result = cpp_pairwise_point_in_polygon( + result_column = cpp_pairwise_point_in_polygon( as_column(points.points.x), as_column(points.points.y), as_column(polygons.polygons.part_offset), @@ -166,16 +166,20 @@ def _pairwise_contains_properly(points, polygons): as_column(polygons.polygons.x), as_column(polygons.polygons.y), ) - # Pairwise returns a boolean column with a True value for each (polygon, point) pair - # where the point is contained properly by the polygon. We can use this to create a - # dataframe with only (polygon, point) pairs that satisfy the relationship. - quadtree_shaped_result = ( - cudf.Series(pip_result).reset_index().reset_index() - ) - result = _pairwise_pip_result_to_point_polygon_index_pairs( - quadtree_shaped_result + # Pairwise returns a boolean column with a True value for each (polygon, + # point) pair where the point is contained properly by the polygon. We can + # use this to create a dataframe with only (polygon, point) pairs that + # satisfy the relationship. + pip_result = cudf.Series(result_column, dtype="bool") + trues = pip_result[pip_result].index + true_pairs = cudf.DataFrame( + { + "pairwise_index": trues, + "point_index": trues, + "result": True, + } ) - return result + return true_pairs def contains_properly(polygons, points, mode="pairwise"): @@ -188,9 +192,7 @@ def contains_properly(polygons, points, mode="pairwise"): # result, name the columns appropriately, and return the # two-column DataFrame. bitmask_result = _brute_force_contains_properly(points, polygons) - quadtree_shaped_result = bitmask_result.stack().reset_index() - result = _pairwise_pip_result_to_point_polygon_index_pairs( - quadtree_shaped_result - ) - result.columns = ["part_index", "point_index"] - return result + bitmask_result_df = bitmask_result.stack().reset_index() + trues = bitmask_result_df[bitmask_result_df[0]] + trues.columns = ["point_index", "part_index", "result"] + return trues From 6d1bd1b74a05ca90a3034c0f798f4239321acd9d Mon Sep 17 00:00:00 2001 From: "H. Thomson Comer" Date: Wed, 7 Jun 2023 16:16:30 +0000 Subject: [PATCH 22/23] Drop unused function. --- .../cuspatial/cuspatial/core/binpreds/contains.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/python/cuspatial/cuspatial/core/binpreds/contains.py b/python/cuspatial/cuspatial/core/binpreds/contains.py index 6675402fe..8111074a5 100644 --- a/python/cuspatial/cuspatial/core/binpreds/contains.py +++ b/python/cuspatial/cuspatial/core/binpreds/contains.py @@ -120,21 +120,6 @@ def _brute_force_contains_properly(points, polygons): return final_result -def _pairwise_pip_result_to_point_polygon_index_pairs(pairwise_result): - pairwise_result.columns = [ - "pairwise_index", - "point_index", - "result", - ] - result = pairwise_result[["point_index", "pairwise_index"]][ - pairwise_result["result"].astype("bool") - ] - result = result.sort_values(["point_index", "pairwise_index"]).reset_index( - drop=True - ) - return result - - def _pairwise_contains_properly(points, polygons): """Compute from a series of polygons and an equal-length series of points which points are properly contained within the corresponding polygon. From 29abbbf9711ed38f10417cc6ee78ec4a88bf6f45 Mon Sep 17 00:00:00 2001 From: "H. Thomson Comer" Date: Wed, 7 Jun 2023 12:14:24 -0500 Subject: [PATCH 23/23] Flake8 issues unfixed due to github commits. --- python/cuspatial/cuspatial/core/binpreds/feature_disjoint.py | 2 -- .../cuspatial/cuspatial/core/binpreds/feature_intersects.py | 4 ---- 2 files changed, 6 deletions(-) diff --git a/python/cuspatial/cuspatial/core/binpreds/feature_disjoint.py b/python/cuspatial/cuspatial/core/binpreds/feature_disjoint.py index ab758bdc3..2ada86abb 100644 --- a/python/cuspatial/cuspatial/core/binpreds/feature_disjoint.py +++ b/python/cuspatial/cuspatial/core/binpreds/feature_disjoint.py @@ -45,7 +45,6 @@ def _preprocess(self, lhs, rhs): class PointPolygonDisjoint(BinPred): def _preprocess(self, lhs, rhs): - contains = _basic_contains_any(lhs, rhs) return ~_basic_contains_any(lhs, rhs) @@ -65,7 +64,6 @@ def _postprocess(self, lhs, rhs, op_result): class LineStringPolygonDisjoint(BinPred): def _preprocess(self, lhs, rhs): - contains = _basic_contains_any(rhs, lhs) return ~_basic_contains_any(rhs, lhs) diff --git a/python/cuspatial/cuspatial/core/binpreds/feature_intersects.py b/python/cuspatial/cuspatial/core/binpreds/feature_intersects.py index f8632081c..25c463b7c 100644 --- a/python/cuspatial/cuspatial/core/binpreds/feature_intersects.py +++ b/python/cuspatial/cuspatial/core/binpreds/feature_intersects.py @@ -92,13 +92,11 @@ class IntersectsByEquals(EqualsPredicateBase): class PolygonPointIntersects(BinPred): def _preprocess(self, lhs, rhs): - contains = _basic_contains_any(lhs, rhs) return _basic_contains_any(lhs, rhs) class PointPolygonIntersects(BinPred): def _preprocess(self, lhs, rhs): - contains = _basic_contains_any(rhs, lhs) return _basic_contains_any(rhs, lhs) @@ -115,13 +113,11 @@ def _preprocess(self, lhs, rhs): class LineStringPolygonIntersects(BinPred): def _preprocess(self, lhs, rhs): - contains = _basic_contains_any(rhs, lhs) return _basic_contains_any(rhs, lhs) class PolygonLineStringIntersects(BinPred): def _preprocess(self, lhs, rhs): - contains = _basic_contains_any(lhs, rhs) return _basic_contains_any(lhs, rhs)