Skip to content

Commit

Permalink
Reduced equals time and fixed a bug. (#1051)
Browse files Browse the repository at this point in the history
Closes #1014

This PR simply reduces the size of some of the equals tests from 10000 to 100. Long run time had to do with serializing 10000x Shapely objects.

It also fixes a bug in `MultiPointMultiPointEquals` that I guess was able to exist due to the size of the old test and the seed of the random generator.

Authors:
  - H. Thomson Comer (https://github.com/thomcom)

Approvers:
  - Michael Wang (https://github.com/isVoid)

URL: #1051
  • Loading branch information
thomcom authored Apr 7, 2023
1 parent 67d0f33 commit 0985aef
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 61 deletions.
21 changes: 10 additions & 11 deletions python/cuspatial/cuspatial/core/binpreds/feature_equals.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,19 +289,18 @@ def _postprocess(self, lhs, rhs, op_result):


class MultiPointMultiPointEquals(PolygonComplexEquals):
def _preprocess(self, lhs, rhs):
"""Sort the multipoints by their coordinates. This is necessary
because the order of the points in a multipoint is not significant
for the equals predicate."""
(lhs_result, rhs_result) = self._sort_multipoints(lhs, rhs)
return self._compute_predicate(
lhs_result, rhs_result, rhs_result.point_indices
)

def _compute_predicate(self, lhs, rhs, point_indices):
result = self._vertices_equals(lhs.multipoints.xy, rhs.multipoints.xy)
lengths_equal = self._offset_equals(
lhs.multipoints.geometry_offset, rhs.multipoints.geometry_offset
)
(lhs_sorted, rhs_sorted) = self._sort_multipoints(
lhs[lengths_equal], rhs[lengths_equal]
)
result = self._vertices_equals(
lhs_sorted.multipoints.xy, rhs_sorted.multipoints.xy
)
return self._postprocess(
lhs, rhs, EqualsOpResult(result, point_indices)
lhs, rhs, EqualsOpResult(result, rhs_sorted.point_indices)
)


Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION

import cupy as cp
import geopandas as gpd
import pandas as pd
import pytest
from shapely.geometry import LineString, MultiPoint, Point, Polygon

import cudf

import cuspatial


Expand Down Expand Up @@ -39,9 +36,9 @@ def test_3_points_equals_3_points_one_equal(lhs):
pd.testing.assert_series_equal(expected, got.to_pandas())


def test_10000_points_geom_equals_10000_points(point_generator):
gpdpoints1 = gpd.GeoSeries([*point_generator(10000)])
gpdpoints2 = gpd.GeoSeries([*point_generator(10000)])
def test_100_points_geom_equals_100_points(point_generator):
gpdpoints1 = gpd.GeoSeries([*point_generator(100)])
gpdpoints2 = gpd.GeoSeries([*point_generator(100)])
points1 = cuspatial.from_geopandas(gpdpoints1)
points2 = cuspatial.from_geopandas(gpdpoints2)
got = points1.geom_equals(points2)
Expand Down Expand Up @@ -115,9 +112,9 @@ def test_10_linestrings_geom_equals_10_linestrings(linestring_generator):
pd.testing.assert_series_equal(expected, got.to_pandas())


def test_10000_linestrings_geom_equals_10000_linestrings(linestring_generator):
gpdlines1 = gpd.GeoSeries([*linestring_generator(10000, 5)])
gpdlines2 = gpd.GeoSeries([*linestring_generator(10000, 5)])
def test_100_linestrings_geom_equals_100_linestrings(linestring_generator):
gpdlines1 = gpd.GeoSeries([*linestring_generator(100, 5)])
gpdlines2 = gpd.GeoSeries([*linestring_generator(100, 5)])
lines1 = cuspatial.from_geopandas(gpdlines1)
lines2 = cuspatial.from_geopandas(gpdlines2)
got = lines1.geom_equals(lines2)
Expand All @@ -135,11 +132,11 @@ def test_linestring_geom_equals_polygon():
pd.testing.assert_series_equal(expected, got.to_pandas())


def test_10000_linestrings_geom_equals_10000_polygons(
def test_100_linestrings_geom_equals_100_polygons(
polygon_generator, linestring_generator
):
gpdlines = gpd.GeoSeries([*linestring_generator(10000, 5)])
gpdpolygons = gpd.GeoSeries([*polygon_generator(10000, 0)])
gpdlines = gpd.GeoSeries([*linestring_generator(100, 5)])
gpdpolygons = gpd.GeoSeries([*polygon_generator(100, 0)])
lines = cuspatial.from_geopandas(gpdlines)
polygons = cuspatial.from_geopandas(gpdpolygons)
got = lines.geom_equals(polygons)
Expand All @@ -157,11 +154,11 @@ def test_polygon_geom_equals_linestring():
pd.testing.assert_series_equal(expected, got.to_pandas())


def test_10000_polygons_geom_equals_10000_linestrings(
def test_100_polygons_geom_equals_100_linestrings(
polygon_generator, linestring_generator
):
gpdpolygons = gpd.GeoSeries([*polygon_generator(10000, 0)])
gpdlines = gpd.GeoSeries([*linestring_generator(10000, 5)])
gpdpolygons = gpd.GeoSeries([*polygon_generator(100, 0)])
gpdlines = gpd.GeoSeries([*linestring_generator(100, 5)])
polygons = cuspatial.from_geopandas(gpdpolygons)
lines = cuspatial.from_geopandas(gpdlines)
got = polygons.geom_equals(lines)
Expand Down Expand Up @@ -189,9 +186,9 @@ def test_point_not_contains_point():
assert (got.values_host == expected.values).all()


def test_10000_points_contains_10000_points(point_generator):
gpdpoints1 = gpd.GeoSeries([*point_generator(10000)])
gpdpoints2 = gpd.GeoSeries([*point_generator(10000)])
def test_100_points_contains_100_points(point_generator):
gpdpoints1 = gpd.GeoSeries([*point_generator(100)])
gpdpoints2 = gpd.GeoSeries([*point_generator(100)])
points1 = cuspatial.from_geopandas(gpdpoints1)
points2 = cuspatial.from_geopandas(gpdpoints2)
got = points1.contains_properly(points2)
Expand Down Expand Up @@ -219,9 +216,9 @@ def test_point_not_covers_point():
assert (got.values_host == expected.values).all()


def test_10000_points_covers_10000_points(point_generator):
gpdpoints1 = gpd.GeoSeries([*point_generator(10000)])
gpdpoints2 = gpd.GeoSeries([*point_generator(10000)])
def test_100_points_covers_100_points(point_generator):
gpdpoints1 = gpd.GeoSeries([*point_generator(100)])
gpdpoints2 = gpd.GeoSeries([*point_generator(100)])
points1 = cuspatial.from_geopandas(gpdpoints1)
points2 = cuspatial.from_geopandas(gpdpoints2)
got = points1.covers(points2)
Expand Down Expand Up @@ -249,9 +246,9 @@ def test_point_not_intersects_point():
assert (got.values_host == expected.values).all()


def test_10000_points_intersects_10000_points(point_generator):
gpdpoints1 = gpd.GeoSeries([*point_generator(10000)])
gpdpoints2 = gpd.GeoSeries([*point_generator(10000)])
def test_100_points_intersects_100_points(point_generator):
gpdpoints1 = gpd.GeoSeries([*point_generator(100)])
gpdpoints2 = gpd.GeoSeries([*point_generator(100)])
points1 = cuspatial.from_geopandas(gpdpoints1)
points2 = cuspatial.from_geopandas(gpdpoints2)
got = points1.intersects(points2)
Expand Down Expand Up @@ -279,9 +276,9 @@ def test_point_not_within_point():
assert (got.values_host == expected.values).all()


def test_10000_points_within_10000_points(point_generator):
gpdpoints1 = gpd.GeoSeries(point_generator(10000))
gpdpoints2 = gpd.GeoSeries(point_generator(10000))
def test_100_points_within_100_points(point_generator):
gpdpoints1 = gpd.GeoSeries(point_generator(100))
gpdpoints2 = gpd.GeoSeries(point_generator(100))
points1 = cuspatial.from_geopandas(gpdpoints1)
points2 = cuspatial.from_geopandas(gpdpoints2)
got = points1.within(points2).values_host
Expand Down Expand Up @@ -327,9 +324,9 @@ def test_three_points_crosses_three_points(points):
assert (got.values_host == expected.values).all()


def test_10000_points_crosses_10000_points(point_generator):
gpdpoints1 = gpd.GeoSeries([*point_generator(10000)])
gpdpoints2 = gpd.GeoSeries([*point_generator(10000)])
def test_100_points_crosses_100_points(point_generator):
gpdpoints1 = gpd.GeoSeries([*point_generator(100)])
gpdpoints2 = gpd.GeoSeries([*point_generator(100)])
points1 = cuspatial.from_geopandas(gpdpoints1)
points2 = cuspatial.from_geopandas(gpdpoints2)
got = points1.crosses(points2)
Expand Down Expand Up @@ -375,9 +372,9 @@ def test_three_points_overlaps_three_points(points):
assert (got.values_host == expected.values).all()


def test_10000_points_overlaps_10000_points(point_generator):
gpdpoint1 = gpd.GeoSeries([*point_generator(10000)])
gpdpoint2 = gpd.GeoSeries([*point_generator(10000)])
def test_100_points_overlaps_100_points(point_generator):
gpdpoint1 = gpd.GeoSeries([*point_generator(100)])
gpdpoint2 = gpd.GeoSeries([*point_generator(100)])
point1 = cuspatial.from_geopandas(gpdpoint1)
point2 = cuspatial.from_geopandas(gpdpoint2)
got = point1.overlaps(point2)
Expand Down Expand Up @@ -405,9 +402,9 @@ def test_multipoint_not_geom_equals_multipoint():
assert (got.values_host == expected.values).all()


def test_10000_multipoints_geom_equals_10000_multipoints(multipoint_generator):
gpdpoints1 = gpd.GeoSeries([*multipoint_generator(10000, 10)])
gpdpoints2 = gpd.GeoSeries([*multipoint_generator(10000, 10)])
def test_100_multipoints_geom_equals_100_multipoints(multipoint_generator):
gpdpoints1 = gpd.GeoSeries([*multipoint_generator(100, 10)])
gpdpoints2 = gpd.GeoSeries([*multipoint_generator(100, 10)])
points1 = cuspatial.from_geopandas(gpdpoints1)
points2 = cuspatial.from_geopandas(gpdpoints2)
got = points1.geom_equals(points2)
Expand Down Expand Up @@ -656,9 +653,9 @@ def test_3_polygons_geom_equals_3_polygons_one_equal(lhs):


@pytest.mark.skip(reason="NotImplemented: Depends on .contains")
def test_10000_polygons_geom_equals_10000_polygons(polygon_generator):
gpdpolygons1 = gpd.GeoSeries([*polygon_generator(10000, 0)])
gpdpolygons2 = gpd.GeoSeries([*polygon_generator(10000, 0)])
def test_100_polygons_geom_equals_100_polygons(polygon_generator):
gpdpolygons1 = gpd.GeoSeries([*polygon_generator(100, 0)])
gpdpolygons2 = gpd.GeoSeries([*polygon_generator(100, 0)])
polygons1 = cuspatial.from_geopandas(gpdpolygons1)
polygons2 = cuspatial.from_geopandas(gpdpolygons2)
got = polygons1.geom_equals(polygons2)
Expand Down Expand Up @@ -730,14 +727,3 @@ def test_linestring_orders():
got = linestring1.geom_equals(linestring2)
expected = gpdlinestring1.geom_equals(gpdlinestring2)
pd.testing.assert_series_equal(expected, got.to_pandas())


def test_from_points_xy_large():
points = cuspatial.GeoSeries(
cuspatial.core._column.geocolumn.GeoColumn._from_points_xy(
cudf.core.column.column.as_column(
cp.arange(10000000, dtype="float64")
)
)
)
assert points.geom_equals(points).all()

0 comments on commit 0985aef

Please sign in to comment.