Remove geopandas dependency in viz for shapely input (#607)

developmentseed · Aug 20, 2024 · 2364d1b · 2364d1b
1 parent 916f8d2
commit 2364d1b
Show file tree

Hide file tree

Showing 2 changed files with 72 additions and 26 deletions.
diff --git a/lonboard/_utils.py b/lonboard/_utils.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, TypeVar
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple, TypeVar
 
 import numpy as np
 from arro3.core import Schema
@@ -12,6 +12,7 @@
 if TYPE_CHECKING:
     import geopandas as gpd
     import pandas as pd
+    from numpy.typing import NDArray
 
     DF = TypeVar("DF", bound=pd.DataFrame)
 
@@ -120,10 +121,61 @@ def remove_extension_kwargs(
 
 def split_mixed_gdf(gdf: gpd.GeoDataFrame) -> List[gpd.GeoDataFrame]:
     """Split a GeoDataFrame into one or more GeoDataFrames with unique geometry type"""
+    indices = indices_by_geometry_type(gdf.geometry)
+    if indices is None:
+        return [gdf]
+
+    point_indices, linestring_indices, polygon_indices = indices
+
+    # Here we intentionally check geometries in a specific order.
+    # Starting from polygons, then linestrings, then points,
+    # so that the order of generated layers is polygon, then path then scatterplot.
+    # This ensures that points are rendered on top and polygons on the bottom.
+    gdfs = []
+    for single_type_geometry_indices in (
+        polygon_indices,
+        linestring_indices,
+        point_indices,
+    ):
+        if len(single_type_geometry_indices) > 0:
+            gdfs.append(gdf.iloc[single_type_geometry_indices])
+
+    return gdfs
+
+
+def split_mixed_shapely_array(
+    geometry: NDArray[np.object_],
+) -> List[NDArray[np.object_]]:
+    """Split a shapely array into one or more arrays with unique geometry type"""
+    indices = indices_by_geometry_type(geometry)
+    if indices is None:
+        return [geometry]
+
+    point_indices, linestring_indices, polygon_indices = indices
+
+    # Here we intentionally check geometries in a specific order.
+    # Starting from polygons, then linestrings, then points,
+    # so that the order of generated layers is polygon, then path then scatterplot.
+    # This ensures that points are rendered on top and polygons on the bottom.
+    arrays = []
+    for single_type_geometry_indices in (
+        polygon_indices,
+        linestring_indices,
+        point_indices,
+    ):
+        if len(single_type_geometry_indices) > 0:
+            arrays.append(geometry[single_type_geometry_indices])
+
+    return arrays
+
+
+def indices_by_geometry_type(
+    geometry: NDArray[np.object_],
+) -> Tuple[NDArray[np.int64], NDArray[np.int64], NDArray[np.int64]] | None:
     import shapely
     from shapely import GeometryType
 
-    type_ids = np.array(shapely.get_type_id(gdf.geometry))
+    type_ids = np.array(shapely.get_type_id(geometry))
     unique_type_ids = set(np.unique(type_ids))
 
     if GeometryType.GEOMETRYCOLLECTION in unique_type_ids:
@@ -133,17 +185,17 @@ def split_mixed_gdf(gdf: gpd.GeoDataFrame) -> List[gpd.GeoDataFrame]:
         raise ValueError("LinearRings not currently supported")
 
     if len(unique_type_ids) == 1:
-        return [gdf]
+        return None
 
     if len(unique_type_ids) == 2:
         if unique_type_ids == {GeometryType.POINT, GeometryType.MULTIPOINT}:
-            return [gdf]
+            return None
 
         if unique_type_ids == {GeometryType.LINESTRING, GeometryType.MULTILINESTRING}:
-            return [gdf]
+            return None
 
         if unique_type_ids == {GeometryType.POLYGON, GeometryType.MULTIPOLYGON}:
-            return [gdf]
+            return None
 
     point_indices = np.where(
         (type_ids == GeometryType.POINT) | (type_ids == GeometryType.MULTIPOINT)
@@ -158,17 +210,4 @@ def split_mixed_gdf(gdf: gpd.GeoDataFrame) -> List[gpd.GeoDataFrame]:
         (type_ids == GeometryType.POLYGON) | (type_ids == GeometryType.MULTIPOLYGON)
     )[0]
 
-    # Here we intentionally check geometries in a specific order.
-    # Starting from polygons, then linestrings, then points,
-    # so that the order of generated layers is polygon, then path then scatterplot.
-    # This ensures that points are rendered on top and polygons on the bottom.
-    gdfs = []
-    for single_type_geometry_indices in (
-        polygon_indices,
-        linestring_indices,
-        point_indices,
-    ):
-        if len(single_type_geometry_indices) > 0:
-            gdfs.append(gdf.iloc[single_type_geometry_indices])
-
-    return gdfs
+    return point_indices, linestring_indices, polygon_indices
diff --git a/lonboard/_viz.py b/lonboard/_viz.py
@@ -28,7 +28,11 @@
 from lonboard._geoarrow.parse_wkb import parse_serialized_table
 from lonboard._layer import PathLayer, PolygonLayer, ScatterplotLayer
 from lonboard._map import Map
-from lonboard._utils import get_geometry_column_index, split_mixed_gdf
+from lonboard._utils import (
+    get_geometry_column_index,
+    split_mixed_gdf,
+    split_mixed_shapely_array,
+)
 from lonboard.basemap import CartoBasemap
 
 if TYPE_CHECKING:
@@ -361,12 +365,15 @@ def _viz_shapely_scalar(
 def _viz_shapely_array(
     data: NDArray[np.object_], **kwargs
 ) -> List[Union[ScatterplotLayer, PathLayer, PolygonLayer]]:
-    # Note: for now we pass this through a GeoDataFrame to handle mixed-type geometry
-    # arrays. Longer term we should do this without a GeoPandas dependency.
-    import geopandas as gpd
+    layers: List[Union[ScatterplotLayer, PathLayer, PolygonLayer]] = []
+    for partial_geometry_array in split_mixed_shapely_array(data):
+        field, geom_arr = construct_geometry_array(
+            partial_geometry_array,
+        )
+        table = Table.from_arrays([geom_arr], schema=Schema([field]))
+        layers.extend(_viz_geoarrow_table(table, **kwargs))
 
-    gdf = gpd.GeoDataFrame(geometry=data)  # type: ignore
-    return _viz_geopandas_geodataframe(gdf, **kwargs)
+    return layers
 
 
 def _viz_geo_interface(