Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add from_linestrings_xy and from_polygons_xy #928

124 changes: 124 additions & 0 deletions python/cuspatial/cuspatial/core/_column/geocolumn.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,130 @@ def _from_multipoints_xy(
meta,
)

@classmethod
def _from_linestrings_xy(
cls,
linestrings_xy: ColumnBase,
part_offsets: ColumnBase,
geometry_offsets: ColumnBase,
):
"""
Create a GeoColumn of multilinestrings from a cudf Series with
interleaved xy coordinates.
"""
if not linestrings_xy.dtype.kind == "f":
raise ValueError("Coordinates must be floating point numbers.")

parts_col = build_list_column(
indices=part_offsets,
elements=_xy_as_variable_sized_list(linestrings_xy),
size=len(part_offsets) - 1,
)
linestrings_col = build_list_column(
indices=geometry_offsets,
elements=parts_col,
size=len(geometry_offsets) - 1,
)
num_linestrings = len(linestrings_col)

meta = GeoMeta(
{
"input_types": as_column(
cp.full(
num_linestrings,
Feature_Enum.LINESTRING.value,
dtype=cp.int8,
)
),
"union_offsets": as_column(
cp.arange(num_linestrings, dtype=cp.int32)
),
}
)

coord_dtype = linestrings_xy.dtype

return cls(
(
cudf.Series(
empty_geometry_column(Feature_Enum.POINT, coord_dtype)
),
cudf.Series(
empty_geometry_column(Feature_Enum.MULTIPOINT, coord_dtype)
),
cudf.Series(linestrings_col),
cudf.Series(
empty_geometry_column(Feature_Enum.POLYGON, coord_dtype)
),
),
meta,
)

@classmethod
def _from_polygons_xy(
cls,
polygons_xy: ColumnBase,
ring_offsets: ColumnBase,
part_offsets: ColumnBase,
geometry_offsets: ColumnBase,
):
"""
Create a GeoColumn of multipolygons from a cudf Series with
interleaved xy coordinates.
"""
if not polygons_xy.dtype.kind == "f":
raise ValueError("Coordinates must be floating point numbers.")

rings_col = build_list_column(
indices=ring_offsets,
elements=_xy_as_variable_sized_list(polygons_xy),
size=len(ring_offsets) - 1,
)
parts_col = build_list_column(
indices=part_offsets,
elements=rings_col,
size=len(part_offsets) - 1,
)
polygons_col = build_list_column(
indices=geometry_offsets,
elements=parts_col,
size=len(geometry_offsets) - 1,
)
num_polygons = len(polygons_col)

meta = GeoMeta(
{
"input_types": as_column(
cp.full(
num_polygons,
Feature_Enum.POLYGON.value,
dtype=cp.int8,
)
),
"union_offsets": as_column(
cp.arange(num_polygons, dtype=cp.int32)
),
}
)

coord_dtype = polygons_xy.dtype

return cls(
(
cudf.Series(
empty_geometry_column(Feature_Enum.POINT, coord_dtype)
),
cudf.Series(
empty_geometry_column(Feature_Enum.MULTIPOINT, coord_dtype)
),
cudf.Series(
empty_geometry_column(Feature_Enum.LINESTRING, coord_dtype)
),
cudf.Series(polygons_col),
),
meta,
)

@cached_property
def memory_usage(self) -> int:
"""
Expand Down
101 changes: 99 additions & 2 deletions python/cuspatial/cuspatial/core/geoseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -630,8 +630,8 @@ def from_points_xy(cls, points_xy):
@classmethod
def from_multipoints_xy(cls, multipoints_xy, geometry_offset):
"""
Construct a GeoSeries of MULTIPOINTs from an array of interleaved xy
coordinates.
Construct a GeoSeries of MULTIPOINTs from an array of interleaved
xy coordinates.

Parameters
----------
Expand Down Expand Up @@ -663,6 +663,103 @@ def from_multipoints_xy(cls, multipoints_xy, geometry_offset):
)
)

@classmethod
def from_linestrings_xy(
cls, linestrings_xy, part_offset, geometry_offset
) -> T:
"""
Construct a GeoSeries of MULTILINESTRINGs from an array of interleaved
xy coordinates.

Parameters
----------
linestrings_xy : array-like
Coordinates of the points, interpreted as interleaved x-y coords.
geometry_offset : array-like
Offsets of the first coordinate of each geometry. The length of
this array is the number of geometries. Offsets with a difference
greater than 1 indicate a MultiLinestring.
part_offset : array-like
Offsets into the coordinates array indicating the beginning of
each part. The length of this array is the number of parts.

Returns
-------
GeoSeries:
A GeoSeries of MULTILINESTRINGs.

Example
-------
>>> import cudf
>>> import cuspatial
>>> linestrings_xy = cudf.Series(
[0.0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5])
>>> part_offset = cudf.Series([0, 6])
>>> geometry_offset = cudf.Series([0, 1])
>>> cuspatial.GeoSeries.from_linestrings_xy(
linestrings_xy, part_offset, geometry_offset)
0 LINESTRING (0 0, 1 1, 2 2, 3 3, 4 4, 5 5)
dtype: geometry
"""
return cls(
GeoColumn._from_linestrings_xy(
as_column(linestrings_xy),
as_column(part_offset, dtype="int32"),
as_column(geometry_offset, dtype="int32"),
)
)

@classmethod
def from_polygons_xy(
cls, polygons_xy, ring_offset, part_offset, geometry_offset
) -> T:
"""
Construct a GeoSeries of MULTIPOLYGONs from an array of interleaved xy
coordinates.

Parameters
----------
polygons_xy : array-like
Coordinates of the points, interpreted as interleaved x-y coords.
geometry_offset : array-like
Offsets of the first coordinate of each geometry. The length of
this array is the number of geometries. Offsets with a difference
greater than 1 indicate a MultiLinestring.
part_offset : array-like
Offsets into the coordinates array indicating the beginning of
each part. The length of this array is the number of parts.
rint_offset : array-like
Offsets into the part array indicating the beginning of each ring.
The length of this array is the number of rings.

Returns
-------
GeoSeries:
A GeoSeries of MULTIPOLYGONs.

Example
-------
>>> import cudf
>>> import cuspatial
>>> polygons_xy = cudf.Series(
[0.0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5])
>>> ring_offset = cudf.Series([0, 6])
>>> part_offset = cudf.Series([0, 1])
>>> geometry_offset = cudf.Series([0, 1])
>>> cuspatial.GeoSeries.from_polygons_xy(
polygons_xy, ring_offset, part_offset, geometry_offset)
0 POLYGON (0 0, 1 1, 2 2, 3 3, 4 4, 5 5)
dtype: geometry
"""
return cls(
GeoColumn._from_polygons_xy(
as_column(polygons_xy),
as_column(ring_offset, dtype="int32"),
as_column(part_offset, dtype="int32"),
as_column(geometry_offset, dtype="int32"),
)
)

def align(self, other):
"""
Align the rows of two GeoSeries using outer join.
Expand Down
59 changes: 59 additions & 0 deletions python/cuspatial/cuspatial/tests/test_geoseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -736,3 +736,62 @@ def test_from_multipoints_xy(multipoint_generator):
)

gpd.testing.assert_geoseries_equal(hs, gs2.to_geopandas())


def test_from_linestrings_xy(linestring_generator):
hs = gpd.GeoSeries(linestring_generator(10, 10))
gs = cuspatial.from_geopandas(hs)

gs2 = cuspatial.GeoSeries.from_linestrings_xy(
gs.lines.xy, gs.lines.part_offset, gs.lines.geometry_offset
)

gpd.testing.assert_geoseries_equal(hs, gs2.to_geopandas())


def test_from_polygons_xy(polygon_generator):
hs = gpd.GeoSeries(polygon_generator(10, 10))
gs = cuspatial.from_geopandas(hs)

gs2 = cuspatial.GeoSeries.from_polygons_xy(
gs.polygons.xy,
gs.polygons.ring_offset,
gs.polygons.part_offset,
gs.polygons.geometry_offset,
)

gpd.testing.assert_geoseries_equal(hs, gs2.to_geopandas())


def test_from_linestrings_xy_example():
linestrings_xy = cudf.Series([0.0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5])
part_offset = cudf.Series([0, 6])
geometry_offset = cudf.Series([0, 1])
gline = cuspatial.GeoSeries.from_linestrings_xy(
linestrings_xy, part_offset, geometry_offset
)
hline = gpd.GeoSeries(
[
LineString([(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)]),
]
)
gpd.testing.assert_geoseries_equal(
gline.to_geopandas(), hline, check_less_precise=True
)


def test_from_polygons_xy_example():
polygons_xy = cudf.Series([0.0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 0, 0])
ring_offset = cudf.Series([0, 6])
part_offset = cudf.Series([0, 1])
geometry_offset = cudf.Series([0, 1])
gpolygon = cuspatial.GeoSeries.from_polygons_xy(
polygons_xy,
ring_offset,
part_offset,
geometry_offset,
)
hpolygon = gpd.GeoSeries(
[Polygon([(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (0, 0)])]
)
gpd.testing.assert_geoseries_equal(gpolygon.to_geopandas(), hpolygon)