Skip to content

Commit

Permalink
Add from_linestrings_xy and from_polygons_xy (#928)
Browse files Browse the repository at this point in the history
Provides public GeoSeries methods to create single-type GeoSeries for linestrings and polygons in addition to the points and multipoints methods added in #924.

Authors:
  - H. Thomson Comer (https://github.com/thomcom)
  - Michael Wang (https://github.com/isVoid)

Approvers:
  - Michael Wang (https://github.com/isVoid)
  - Mark Harris (https://github.com/harrism)

URL: #928
  • Loading branch information
thomcom authored Feb 15, 2023
1 parent 93fbc73 commit aae60ba
Show file tree
Hide file tree
Showing 3 changed files with 282 additions and 2 deletions.
124 changes: 124 additions & 0 deletions python/cuspatial/cuspatial/core/_column/geocolumn.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,130 @@ def _from_multipoints_xy(
meta,
)

@classmethod
def _from_linestrings_xy(
cls,
linestrings_xy: ColumnBase,
part_offsets: ColumnBase,
geometry_offsets: ColumnBase,
):
"""
Create a GeoColumn of multilinestrings from a cudf Series with
interleaved xy coordinates.
"""
if not linestrings_xy.dtype.kind == "f":
raise ValueError("Coordinates must be floating point numbers.")

parts_col = build_list_column(
indices=part_offsets,
elements=_xy_as_variable_sized_list(linestrings_xy),
size=len(part_offsets) - 1,
)
linestrings_col = build_list_column(
indices=geometry_offsets,
elements=parts_col,
size=len(geometry_offsets) - 1,
)
num_linestrings = len(linestrings_col)

meta = GeoMeta(
{
"input_types": as_column(
cp.full(
num_linestrings,
Feature_Enum.LINESTRING.value,
dtype=cp.int8,
)
),
"union_offsets": as_column(
cp.arange(num_linestrings, dtype=cp.int32)
),
}
)

coord_dtype = linestrings_xy.dtype

return cls(
(
cudf.Series(
empty_geometry_column(Feature_Enum.POINT, coord_dtype)
),
cudf.Series(
empty_geometry_column(Feature_Enum.MULTIPOINT, coord_dtype)
),
cudf.Series(linestrings_col),
cudf.Series(
empty_geometry_column(Feature_Enum.POLYGON, coord_dtype)
),
),
meta,
)

@classmethod
def _from_polygons_xy(
cls,
polygons_xy: ColumnBase,
ring_offsets: ColumnBase,
part_offsets: ColumnBase,
geometry_offsets: ColumnBase,
):
"""
Create a GeoColumn of multipolygons from a cudf Series with
interleaved xy coordinates.
"""
if not polygons_xy.dtype.kind == "f":
raise ValueError("Coordinates must be floating point numbers.")

rings_col = build_list_column(
indices=ring_offsets,
elements=_xy_as_variable_sized_list(polygons_xy),
size=len(ring_offsets) - 1,
)
parts_col = build_list_column(
indices=part_offsets,
elements=rings_col,
size=len(part_offsets) - 1,
)
polygons_col = build_list_column(
indices=geometry_offsets,
elements=parts_col,
size=len(geometry_offsets) - 1,
)
num_polygons = len(polygons_col)

meta = GeoMeta(
{
"input_types": as_column(
cp.full(
num_polygons,
Feature_Enum.POLYGON.value,
dtype=cp.int8,
)
),
"union_offsets": as_column(
cp.arange(num_polygons, dtype=cp.int32)
),
}
)

coord_dtype = polygons_xy.dtype

return cls(
(
cudf.Series(
empty_geometry_column(Feature_Enum.POINT, coord_dtype)
),
cudf.Series(
empty_geometry_column(Feature_Enum.MULTIPOINT, coord_dtype)
),
cudf.Series(
empty_geometry_column(Feature_Enum.LINESTRING, coord_dtype)
),
cudf.Series(polygons_col),
),
meta,
)

@cached_property
def memory_usage(self) -> int:
"""
Expand Down
101 changes: 99 additions & 2 deletions python/cuspatial/cuspatial/core/geoseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -630,8 +630,8 @@ def from_points_xy(cls, points_xy):
@classmethod
def from_multipoints_xy(cls, multipoints_xy, geometry_offset):
"""
Construct a GeoSeries of MULTIPOINTs from an array of interleaved xy
coordinates.
Construct a GeoSeries of MULTIPOINTs from an array of interleaved
xy coordinates.
Parameters
----------
Expand Down Expand Up @@ -663,6 +663,103 @@ def from_multipoints_xy(cls, multipoints_xy, geometry_offset):
)
)

@classmethod
def from_linestrings_xy(
cls, linestrings_xy, part_offset, geometry_offset
) -> T:
"""
Construct a GeoSeries of MULTILINESTRINGs from an array of interleaved
xy coordinates.
Parameters
----------
linestrings_xy : array-like
Coordinates of the points, interpreted as interleaved x-y coords.
geometry_offset : array-like
Offsets of the first coordinate of each geometry. The length of
this array is the number of geometries. Offsets with a difference
greater than 1 indicate a MultiLinestring.
part_offset : array-like
Offsets into the coordinates array indicating the beginning of
each part. The length of this array is the number of parts.
Returns
-------
GeoSeries:
A GeoSeries of MULTILINESTRINGs.
Example
-------
>>> import cudf
>>> import cuspatial
>>> linestrings_xy = cudf.Series(
[0.0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5])
>>> part_offset = cudf.Series([0, 6])
>>> geometry_offset = cudf.Series([0, 1])
>>> cuspatial.GeoSeries.from_linestrings_xy(
linestrings_xy, part_offset, geometry_offset)
0 LINESTRING (0 0, 1 1, 2 2, 3 3, 4 4, 5 5)
dtype: geometry
"""
return cls(
GeoColumn._from_linestrings_xy(
as_column(linestrings_xy),
as_column(part_offset, dtype="int32"),
as_column(geometry_offset, dtype="int32"),
)
)

@classmethod
def from_polygons_xy(
cls, polygons_xy, ring_offset, part_offset, geometry_offset
) -> T:
"""
Construct a GeoSeries of MULTIPOLYGONs from an array of interleaved xy
coordinates.
Parameters
----------
polygons_xy : array-like
Coordinates of the points, interpreted as interleaved x-y coords.
geometry_offset : array-like
Offsets of the first coordinate of each geometry. The length of
this array is the number of geometries. Offsets with a difference
greater than 1 indicate a MultiLinestring.
part_offset : array-like
Offsets into the coordinates array indicating the beginning of
each part. The length of this array is the number of parts.
rint_offset : array-like
Offsets into the part array indicating the beginning of each ring.
The length of this array is the number of rings.
Returns
-------
GeoSeries:
A GeoSeries of MULTIPOLYGONs.
Example
-------
>>> import cudf
>>> import cuspatial
>>> polygons_xy = cudf.Series(
[0.0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5])
>>> ring_offset = cudf.Series([0, 6])
>>> part_offset = cudf.Series([0, 1])
>>> geometry_offset = cudf.Series([0, 1])
>>> cuspatial.GeoSeries.from_polygons_xy(
polygons_xy, ring_offset, part_offset, geometry_offset)
0 POLYGON (0 0, 1 1, 2 2, 3 3, 4 4, 5 5)
dtype: geometry
"""
return cls(
GeoColumn._from_polygons_xy(
as_column(polygons_xy),
as_column(ring_offset, dtype="int32"),
as_column(part_offset, dtype="int32"),
as_column(geometry_offset, dtype="int32"),
)
)

def align(self, other):
"""
Align the rows of two GeoSeries using outer join.
Expand Down
59 changes: 59 additions & 0 deletions python/cuspatial/cuspatial/tests/test_geoseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -736,3 +736,62 @@ def test_from_multipoints_xy(multipoint_generator):
)

gpd.testing.assert_geoseries_equal(hs, gs2.to_geopandas())


def test_from_linestrings_xy(linestring_generator):
hs = gpd.GeoSeries(linestring_generator(10, 10))
gs = cuspatial.from_geopandas(hs)

gs2 = cuspatial.GeoSeries.from_linestrings_xy(
gs.lines.xy, gs.lines.part_offset, gs.lines.geometry_offset
)

gpd.testing.assert_geoseries_equal(hs, gs2.to_geopandas())


def test_from_polygons_xy(polygon_generator):
hs = gpd.GeoSeries(polygon_generator(10, 10))
gs = cuspatial.from_geopandas(hs)

gs2 = cuspatial.GeoSeries.from_polygons_xy(
gs.polygons.xy,
gs.polygons.ring_offset,
gs.polygons.part_offset,
gs.polygons.geometry_offset,
)

gpd.testing.assert_geoseries_equal(hs, gs2.to_geopandas())


def test_from_linestrings_xy_example():
linestrings_xy = cudf.Series([0.0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5])
part_offset = cudf.Series([0, 6])
geometry_offset = cudf.Series([0, 1])
gline = cuspatial.GeoSeries.from_linestrings_xy(
linestrings_xy, part_offset, geometry_offset
)
hline = gpd.GeoSeries(
[
LineString([(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)]),
]
)
gpd.testing.assert_geoseries_equal(
gline.to_geopandas(), hline, check_less_precise=True
)


def test_from_polygons_xy_example():
polygons_xy = cudf.Series([0.0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 0, 0])
ring_offset = cudf.Series([0, 6])
part_offset = cudf.Series([0, 1])
geometry_offset = cudf.Series([0, 1])
gpolygon = cuspatial.GeoSeries.from_polygons_xy(
polygons_xy,
ring_offset,
part_offset,
geometry_offset,
)
hpolygon = gpd.GeoSeries(
[Polygon([(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (0, 0)])]
)
gpd.testing.assert_geoseries_equal(gpolygon.to_geopandas(), hpolygon)

0 comments on commit aae60ba

Please sign in to comment.