Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support GeoPandas Polygons and MultiPolygons #1285

Merged
merged 12 commits into from
Oct 16, 2023
Merged
53 changes: 48 additions & 5 deletions datashader/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -733,22 +733,27 @@ def polygons(self, source, geometry, agg=None):
"""
from .glyphs import PolygonGeom
from .reductions import any as any_rdn

if spatialpandas and isinstance(source, spatialpandas.dask.DaskGeoDataFrame):
# Downselect partitions to those that may contain polygons in viewport
x_range = self.x_range if self.x_range is not None else (None, None)
y_range = self.y_range if self.y_range is not None else (None, None)
source = source.cx_partitions[slice(*x_range), slice(*y_range)]
glyph = PolygonGeom(geometry)
elif spatialpandas and isinstance(source, spatialpandas.GeoDataFrame):
pass
glyph = PolygonGeom(geometry)
elif (geopandas_source := self._source_from_geopandas(source)) is not None:
source = geopandas_source
from .glyphs.polygon import GeopandasPolygonGeom
glyph = GeopandasPolygonGeom(geometry)
else:
raise ValueError(
"source must be an instance of spatialpandas.GeoDataFrame or \n"
"spatialpandas.dask.DaskGeoDataFrame.\n"
" Received value of type {typ}".format(typ=type(source)))
"source must be an instance of spatialpandas.GeoDataFrame, "
"spatialpandas.dask.DaskGeoDataFrame, geopandas.GeoDataFrame or "
f"dask_geopandas.GeoDataFrame, not {type(source)}")

if agg is None:
agg = any_rdn()
glyph = PolygonGeom(geometry)
return bypixel(source, self, glyph, agg)

def quadmesh(self, source, x=None, y=None, agg=None):
Expand Down Expand Up @@ -1228,6 +1233,44 @@ def validate(self):
self.validate_ranges(self.x_range, self.y_range)
self.validate_size(self.plot_width, self.plot_height)

def _source_from_geopandas(self, source):
"""
Check if the specified source is a geopandas or dask-geopandas GeoDataFrame.
If so, spatially filter the source and return it.
If not, return None.
"""
try:
import geopandas
except ImportError:
geopandas = None

try:
import dask_geopandas
except ImportError:
dask_geopandas = None

if ((geopandas and isinstance(source, geopandas.GeoDataFrame)) or
(dask_geopandas and isinstance(source, dask_geopandas.GeoDataFrame))):
# Explicit shapely version check as cannot continue unless shapely >= 2
from packaging.version import Version
from shapely import __version__ as shapely_version
if Version(shapely_version) < Version('2.0.0'):
raise ImportError("Use of GeoPandas in Datashader requires Shapely >= 2.0.0")

if isinstance(source, geopandas.GeoDataFrame):
x_range = self.x_range if self.x_range is not None else (-np.inf, np.inf)
y_range = self.y_range if self.y_range is not None else (-np.inf, np.inf)
from shapely import box
query = source.sindex.query(box(x_range[0], y_range[0], x_range[1], y_range[1]))
source = source.iloc[query]
else:
x_range = self.x_range if self.x_range is not None else (None, None)
y_range = self.y_range if self.y_range is not None else (None, None)
source = source.cx[slice(*x_range), slice(*y_range)]
return source
else:
return None


def bypixel(source, canvas, glyph, agg, *, antialias=False):
"""Compute an aggregate grouped by pixel sized bins.
Expand Down
26 changes: 24 additions & 2 deletions datashader/glyphs/points.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@
cudf = None
cuda_args = None

try:
from geopandas.array import GeometryDtype as gpd_GeometryDtype
except ImportError:
gpd_GeometryDtype = type(None)

try:
import spatialpandas
except Exception:
Expand All @@ -35,6 +40,7 @@ def values(s):
class _GeometryLike(Glyph):
def __init__(self, geometry):
self.geometry = geometry
self._cached_bounds = None

@property
def ndims(self):
Expand Down Expand Up @@ -72,11 +78,27 @@ def required_columns(self):
return [self.geometry]

def compute_x_bounds(self, df):
bounds = df[self.geometry].array.total_bounds_x
col = df[self.geometry]
if isinstance(col.dtype, gpd_GeometryDtype):
# geopandas
if self._cached_bounds is None:
self._cached_bounds = col.total_bounds
bounds = self._cached_bounds[::2]
else:
# spatialpandas
bounds = col.array.total_bounds_x
return self.maybe_expand_bounds(bounds)

def compute_y_bounds(self, df):
bounds = df[self.geometry].array.total_bounds_y
col = df[self.geometry]
if isinstance(col.dtype, gpd_GeometryDtype):
# geopandas
if self._cached_bounds is None:
self._cached_bounds = col.total_bounds
bounds = self._cached_bounds[1::2]
else:
# spatialpandas
bounds = col.array.total_bounds_y
return self.maybe_expand_bounds(bounds)

@memoize
Expand Down
107 changes: 101 additions & 6 deletions datashader/glyphs/polygon.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,40 @@
spatialpandas = None


class GeopandasPolygonGeom(_GeometryLike):
@property
def geom_dtypes(self):
from geopandas.array import GeometryDtype
return (GeometryDtype,)

@memoize
def _build_extend(self, x_mapper, y_mapper, info, append, _antialias_stage_2, _antialias_stage_2_funcs):
expand_aggs_and_cols = self.expand_aggs_and_cols(append)
map_onto_pixel = _build_map_onto_pixel_for_line(x_mapper, y_mapper)
draw_polygon = _build_draw_polygon(
append, map_onto_pixel, x_mapper, y_mapper, expand_aggs_and_cols
)

perform_extend_cpu = _build_extend_geopandas_polygon_geometry(
draw_polygon, expand_aggs_and_cols
)
geom_name = self.geometry

def extend(aggs, df, vt, bounds, plot_start=True):
sx, tx, sy, ty = vt
xmin, xmax, ymin, ymax = bounds
aggs_and_cols = aggs + info(df, aggs[0].shape[:2])
geom_array = df[geom_name].array

perform_extend_cpu(
sx, tx, sy, ty,
xmin, xmax, ymin, ymax,
geom_array, *aggs_and_cols
)

return extend


class PolygonGeom(_GeometryLike):
# spatialpandas must be available if a PolygonGeom object is created.

Expand Down Expand Up @@ -52,7 +86,7 @@ def _build_draw_polygon(append, map_onto_pixel, x_mapper, y_mapper, expand_aggs_
@expand_aggs_and_cols
def draw_polygon(
i, sx, tx, sy, ty, xmin, xmax, ymin, ymax,
offsets, values, xs, ys, yincreasing, eligible,
offsets, offset_multiplier, values, xs, ys, yincreasing, eligible,
*aggs_and_cols
):
"""Draw a polygon using a winding-number scan-line algorithm
Expand All @@ -64,8 +98,8 @@ def draw_polygon(
eligible.fill(1)

# First pass, compute bounding box of polygon vertices in data coordinates
start_index = offsets[0]
stop_index = offsets[-1]
start_index = offset_multiplier*offsets[0]
stop_index = offset_multiplier*offsets[-1]
# num_edges = stop_index - start_index - 2
poly_xmin = np.min(values[start_index:stop_index:2])
poly_ymin = np.min(values[start_index + 1:stop_index:2])
Expand Down Expand Up @@ -105,8 +139,8 @@ def draw_polygon(
# Build arrays of edges in canvas coordinates
ei = 0
for j in range(len(offsets) - 1):
start = offsets[j]
stop = offsets[j + 1]
start = offset_multiplier*offsets[j]
stop = offset_multiplier*offsets[j + 1]
for k in range(start, stop - 2, 2):
x0 = values[k]
y0 = values[k + 1]
Expand Down Expand Up @@ -201,6 +235,67 @@ def draw_polygon(
return draw_polygon


def _build_extend_geopandas_polygon_geometry(
draw_polygon, expand_aggs_and_cols
):
# Lazy import shapely. Cannot get here if geopandas and shapely are not available.
import shapely

def extend_cpu(
sx, tx, sy, ty, xmin, xmax, ymin, ymax, geometry, *aggs_and_cols
):
ragged = shapely.to_ragged_array(geometry)
geometry_type = ragged[0]
if geometry_type not in (shapely.GeometryType.POLYGON, shapely.GeometryType.MULTIPOLYGON):
raise ValueError(
"Canvas.polygons supports GeoPandas geometry types of POLYGON and MULTIPOLYGON, "
f"not {repr(geometry_type)}")

coords = ragged[1].ravel()
if geometry_type == shapely.GeometryType.MULTIPOLYGON:
offsets0, offsets1, offsets2 = ragged[2]
else: # POLYGON
offsets0, offsets1 = ragged[2]
offsets2 = np.arange(len(offsets1))

extend_cpu_numba(
sx, tx, sy, ty, xmin, xmax, ymin, ymax,
coords, offsets0, offsets1, offsets2, *aggs_and_cols
)

@ngjit
@expand_aggs_and_cols
def extend_cpu_numba(sx, tx, sy, ty, xmin, xmax, ymin, ymax,
coords, offsets0, offsets1, offsets2, *aggs_and_cols):
# Pre-allocate temp arrays
max_edges = 0
n_multipolygons = len(offsets2) - 1
for i in range(n_multipolygons):
polygon_inds = offsets1[offsets2[i]:offsets2[i + 1] + 1]
for j in range(len(polygon_inds) - 1):
start = offsets0[polygon_inds[j]]
stop = offsets0[polygon_inds[j + 1]]
max_edges = max(max_edges, stop - start - 1)

xs = np.full((max_edges, 2), np.nan, dtype=np.float32)
ys = np.full((max_edges, 2), np.nan, dtype=np.float32)
yincreasing = np.zeros(max_edges, dtype=np.int8)

# Initialize array indicating which edges are still eligible for processing
eligible = np.ones(max_edges, dtype=np.int8)

for i in range(n_multipolygons):
polygon_inds = offsets1[offsets2[i]:offsets2[i + 1] + 1]
for j in range(len(polygon_inds) - 1):
start = polygon_inds[j]
stop = polygon_inds[j + 1]
draw_polygon(i, sx, tx, sy, ty, xmin, xmax, ymin, ymax,
offsets0[start:stop + 1], 2,
coords, xs, ys, yincreasing, eligible, *aggs_and_cols)

return extend_cpu


def _build_extend_polygon_geometry(
draw_polygon, expand_aggs_and_cols
):
Expand Down Expand Up @@ -269,7 +364,7 @@ def extend_cpu_numba(
stop = polygon_inds[j + 1]

draw_polygon(i, sx, tx, sy, ty, xmin, xmax, ymin, ymax,
offsets2[start:stop + 1], values,
offsets2[start:stop + 1], 1, values,
xs, ys, yincreasing, eligible, *aggs_and_cols)

return extend_cpu
73 changes: 72 additions & 1 deletion datashader/tests/test_polygons.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pytest
import pandas as pd
import numpy as np
from numpy import nan
import xarray as xr
import datashader as ds
from datashader.tests.test_pandas import assert_eq_ndarray, assert_eq_xr
Expand All @@ -16,6 +17,18 @@
GeoDataFrame = None
MultiPolygonArray = None

try:
from geodatasets import get_path
import geopandas
except ImportError:
get_path = None
geopandas = None

try:
import dask_geopandas
except ImportError:
dask_geopandas = None


def dask_GeoDataFrame(*args, **kwargs):
return dd.from_pandas(GeoDataFrame(*args, **kwargs), npartitions=3)
Expand Down Expand Up @@ -122,7 +135,7 @@ def test_multiple_polygons_auto_range(DataFrame):
out = xr.DataArray(sol, coords=[lincoords_y, lincoords_x], dims=['y', 'x'])

assert_eq_xr(agg, out)

assert_eq_ndarray(agg.x_range, (-1, 3.5), close=True)
assert_eq_ndarray(agg.y_range, (0.1, 2), close=True)

Expand Down Expand Up @@ -319,3 +332,61 @@ def test_spatial_index_not_dropped():

assert df2.columns == ['some_geom']
assert df2.some_geom.array._sindex == df.some_geom.array._sindex


natural_earth_sol = np.array([
[nan, 7, 7, 7, 7, 7, 0, 2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, nan],
[nan, nan, nan, nan, 5, nan, 6, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, 9, nan, nan, nan, nan, nan, nan, 10, nan, nan, nan, nan, 11, 12],
[nan, nan, nan, nan, nan, nan, 95, nan, nan, nan, nan, 112, nan, nan, nan, nan, 21, 21, 21, 13],
[ 17, nan, nan, nan, nan, nan, 95, 95, nan, nan, nan, 112, 20, nan, nan, nan, 31, 32, 34, 22],
[nan, nan, nan, nan, nan, nan, 95, nan, nan, 112, 112, 112, 112, nan, 44, 41, 50, 43, 37, nan],
[nan, 60, nan, nan, 95, 65, 54, nan, nan, 112, 112, 112, 112, nan, 112, 112, 63, nan, nan, nan],
[nan, nan, nan, 95, 95, 95, 74, nan, nan, nan, 72, 68, 112, 112, 112, 112, 112, 71, 73, nan],
[ 87, 82, 78, 95, 95, 88, 95, nan, nan, 80, 83, 112, 112, 112, 112, 112, 112, 112, nan, nan],
[ 94, nan, nan, 116, 118, 125, 125, 126, 126, nan, nan, 121, 122, 109, nan, 123, nan, 101, 106, 93],
])


@pytest.mark.skipif(not geopandas, reason="geopandas not installed")
def test_natural_earth_geopandas():
df = geopandas.read_file(get_path("naturalearth.land"))
df["col"] = np.arange(len(df))

canvas = ds.Canvas(plot_height=10, plot_width=20)
agg = canvas.polygons(source=df, geometry="geometry", agg=ds.max("col"))

assert_eq_ndarray(agg.data, natural_earth_sol)


@pytest.mark.skipif(not geopandas, reason="geopandas not installed")
@pytest.mark.skipif(not dask_geopandas, reason="dask_geopandas not installed")
@pytest.mark.parametrize('npartitions', [1, 2, 5])
def test_natural_earth_dask_geopandas(npartitions):
df = geopandas.read_file(get_path("naturalearth.land"))
df["col"] = np.arange(len(df))
df = dd.from_pandas(df, npartitions=npartitions)
assert df.npartitions == npartitions
df.calculate_spatial_partitions()

canvas = ds.Canvas(plot_height=10, plot_width=20)
agg = canvas.polygons(source=df, geometry="geometry", agg=ds.max("col"))

assert_eq_ndarray(agg.data, natural_earth_sol)


@pytest.mark.skipif(not geopandas, reason="geopandas not installed")
@pytest.mark.skipif(not spatialpandas, reason="spatialpandas not installed")
@pytest.mark.parametrize('npartitions', [0, 1, 2, 5])
def test_natural_earth_spatialpandas(npartitions):
df = geopandas.read_file(get_path("naturalearth.land"))
df["col"] = np.arange(len(df))
df = spatialpandas.GeoDataFrame(df)
if npartitions > 0:
df = dd.from_pandas(df, npartitions=npartitions)
assert df.npartitions == npartitions

canvas = ds.Canvas(plot_height=10, plot_width=20)
agg = canvas.polygons(source=df, geometry="geometry", agg=ds.max("col"))

assert_eq_ndarray(agg.data, natural_earth_sol)
7 changes: 7 additions & 0 deletions datashader/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@
except Exception:
cudf = None

try:
from geopandas.array import GeometryDtype as gpd_GeometryDtype
except ImportError:
gpd_GeometryDtype = type(None)

try:
from spatialpandas.geometry import GeometryDtype
except ImportError:
Expand Down Expand Up @@ -430,6 +435,8 @@ def dshape_from_pandas_helper(col):
return datashape.Option(datashape.DateTime(tz=tz))
elif isinstance(col.dtype, (RaggedDtype, GeometryDtype)):
return col.dtype
elif gpd_GeometryDtype and isinstance(col.dtype, gpd_GeometryDtype):
return col.dtype
dshape = datashape.CType.from_numpy_dtype(col.dtype)
dshape = datashape.string if dshape == datashape.object_ else dshape
if dshape in (datashape.string, datashape.datetime_):
Expand Down
Loading