Skip to content

Commit

Permalink
✨ GPU accelerated point in polygon using cuspatial
Browse files Browse the repository at this point in the history
A very fast way to find points inside polygons! This is really just a convenience function that wraps around `cuspatial.point_in_polygon`, hiding all sorts of boilerplate. Specifically, this handles:

1. Converting a geopandas geodataframe into a cuspatial friendly format, see rapidsai/cuspatial#165
2. Hacky workaround the 31 polygon limit using a for-loop, based on https://github.com/rapidsai/cuspatial/blob/branch-0.15/notebooks/nyc_taxi_years_correlation.ipynb
3. Outputting actual string labels from the geodataframe, instead of non human readable index numbers

Also added tests for this in test_spatiotemporal_gpu.py, though it won't work on the CI, only locally where a GPU is available.
  • Loading branch information
weiji14 committed Aug 20, 2020
1 parent 3c540c9 commit 14b020c
Show file tree
Hide file tree
Showing 4 changed files with 134 additions and 6 deletions.
12 changes: 8 additions & 4 deletions deepicedrain/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
import importlib.resources
import logging

import intake

import deepicedrain
from deepicedrain.deltamath import calculate_delta, nanptp, nan_linregress
import intake
from deepicedrain.deltamath import calculate_delta, nan_linregress, nanptp
from deepicedrain.extraload import array_to_dataframe
from deepicedrain.spatiotemporal import Region, deltatime_to_utctime, lonlat_to_xy
from deepicedrain.spatiotemporal import (
Region,
deltatime_to_utctime,
lonlat_to_xy,
point_in_polygon_gpu,
)

__version__: str = "0.2.1"

Expand Down
90 changes: 89 additions & 1 deletion deepicedrain/spatiotemporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,17 @@
"""
import dataclasses
import datetime
import os
import tempfile

import datashader
import geopandas as gpd
import numpy as np
import pandas as pd
import pyproj
import xarray as xr

import datashader


@dataclasses.dataclass(frozen=True)
class Region:
Expand Down Expand Up @@ -140,3 +144,87 @@ def lonlat_to_xy(
)
else:
return x, y


def point_in_polygon_gpu(
points_df, # cudf.DataFrame with x and y columns of point coordinates
poly_df: gpd.GeoDataFrame, # geopandas.GeoDataFrame with polygon shapes
points_x_col: str = "x",
points_y_col: str = "y",
poly_label_col: str = None,
):
"""
Find polygon labels for each of the input points.
This is a GPU accelerated version that requires cuspatial!
Parameters
----------
points_df : cudf.DataFrame
A dataframe in GPU memory containing the x and y coordinates.
points_x_col : str
Name of the x coordinate column in points_df. Default is "x".
points_y_col : str
Name of the y coordinate column in points_df. Default is "y".
poly_df : geopandas.GeoDataFrame
A geodataframe in CPU memory containing polygons geometries in each
row.
poly_label_col : str
Name of the column in poly_df that will be used to label the points,
e.g. "placename". Default is to automatically use the first column
unless otherwise specified.
Returns
-------
point_labels : cudf.Series
A column of labels that indicates which polygon the points fall into.
"""
import cudf
import cuspatial

poly_df_: gpd.GeoDataFrame = poly_df.reset_index()
if poly_label_col is None:
# Simply use first column of geodataframe as label if not provided
poly_label_col: str = poly_df.columns[0]
point_labels: cudf.Series = cudf.Series(index=points_df.index).astype(
poly_df[poly_label_col].dtype
)

# Load CPU-based GeoDataFrame into a GPU-based cuspatial friendly format
# This is a workaround until the related feature request at
# https://github.com/rapidsai/cuspatial/issues/165 is implemented
with tempfile.TemporaryDirectory() as tmpdir:
# Save geodataframe to a temporary shapefile,
# so that we can load it into GPU memory using cuspatial
tmpshpfile = os.path.join(tmpdir, "poly_df.shp")
poly_df_.to_file(filename=tmpshpfile, driver="ESRI Shapefile")

# Load polygon_offsets, ring_offsets and polygon xy points
# from temporary shapefile into GPU memory
poly_offsets, poly_ring_offsets, poly_points = cuspatial.read_polygon_shapefile(
filename=tmpshpfile
)

# Run the actual point in polygon algorithm!
# Note that cuspatial's point_in_polygon function has a 31 polygon limit,
# hence the for-loop code below. See also
# https://github.com/rapidsai/cuspatial/blob/branch-0.15/notebooks/nyc_taxi_years_correlation.ipynb
num_poly: int = len(poly_df_)
point_in_poly_iter: list = list(np.arange(0, num_poly, 31)) + [num_poly]
for i in range(len(point_in_poly_iter) - 1):
start, end = point_in_poly_iter[i], point_in_poly_iter[i + 1]
poly_labels: cudf.DataFrame = cuspatial.point_in_polygon(
test_points_x=points_df[points_x_col],
test_points_y=points_df[points_y_col],
poly_offsets=poly_offsets[start:end],
poly_ring_offsets=poly_ring_offsets,
poly_points_x=poly_points.x,
poly_points_y=poly_points.y,
)

# Label each point with polygon they fall in
for label in poly_labels.columns:
point_labels.loc[poly_labels[label]] = poly_df_.loc[label][poly_label_col]

return point_labels
2 changes: 1 addition & 1 deletion deepicedrain/tests/test_spatiotemporal_conversions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
"""
import datetime

import dask
import numpy as np
import numpy.testing as npt
import pandas as pd
import xarray as xr

import dask
from deepicedrain import catalog, deltatime_to_utctime, lonlat_to_xy


Expand Down
36 changes: 36 additions & 0 deletions deepicedrain/tests/test_spatiotemporal_gpu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""
Tests GPU accelerated spatial algorithms
"""

import geopandas as gpd
import numpy as np
import pytest
import shapely.geometry

from deepicedrain import point_in_polygon_gpu

cudf = pytest.importorskip(modname="cudf")


def test_point_in_polygon_gpu():
"""
Tests that the Point in Polygon GPU algorithm works
"""
points_df: cudf.DataFrame = cudf.DataFrame(
data={
"x": np.linspace(start=-200, stop=200, num=50),
"y": np.linspace(start=-160, stop=160, num=50),
}
)
polygon = {
"placename": ["South Pole"],
"geometry": shapely.geometry.box(minx=-5, maxx=5, miny=-5, maxy=5).buffer(100),
}
poly_df: gpd.GeoDataFrame = gpd.GeoDataFrame(polygon)

point_labels = point_in_polygon_gpu(
points_df=points_df, poly_df=poly_df, poly_label_col="placename"
)
assert isinstance(point_labels, cudf.Series)
assert point_labels.count() == 20 # Count non-NaN labels
assert list(point_labels.unique().to_pandas()) == [None, "South Pole"]

0 comments on commit 14b020c

Please sign in to comment.