Skip to content

Commit

Permalink
Add Union-style indexing to .points, .multipoints, .lines, and …
Browse files Browse the repository at this point in the history
…`.polygons` `GeoSeries` accessors (#685)

This PR uses the `column._meta` object to slice the sub-geometry accessors named in the title so that `.x`, `.y`, and `.xy` uses reflect any previous slicing that has occurred to the `GeoSeries`.

Fixes #683

Authors:
  - H. Thomson Comer (https://github.com/thomcom)

Approvers:
  - Michael Wang (https://github.com/isVoid)

URL: #685
  • Loading branch information
thomcom authored Sep 23, 2022
1 parent c0ac4f5 commit 626ea58
Show file tree
Hide file tree
Showing 3 changed files with 209 additions and 13 deletions.
51 changes: 40 additions & 11 deletions python/cuspatial/cuspatial/core/geoseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,9 @@ def __init__(
index = data.index
if index is None:
index = cudf.RangeIndex(0, len(column))
super().__init__(column, index, dtype, name, nan_as_null)
super().__init__(
column, index, dtype=dtype, name=name, nan_as_null=nan_as_null
)

@property
def type(self):
Expand All @@ -120,28 +122,42 @@ def type(self):
return result

class GeoColumnAccessor:
def __init__(self, list_series):
def __init__(self, list_series, meta):
self._series = list_series
self._col = self._series._column
self._meta = meta
self._type = Feature_Enum.POINT

@property
def x(self):
return cudf.Series(self._col.leaves().values[0::2])
return self.xy[::2].reset_index(drop=True)

@property
def y(self):
return cudf.Series(self._col.leaves().values[1::2])
return self.xy[1::2].reset_index(drop=True)

@property
def xy(self):
return cudf.Series(self._col.leaves().values)
types = self._meta.input_types
offsets = self._meta.union_offsets
indices = offsets[types == self._type.value]
result = self._col.take(indices._column).leaves().values
return cudf.Series(result)

class MultiPointGeoColumnAccessor(GeoColumnAccessor):
def __init__(self, list_series, meta):
super().__init__(list_series, meta)
self._type = Feature_Enum.MULTIPOINT

@property
def geometry_offset(self):
return cudf.Series(self._col.offsets.values)

class LineStringGeoColumnAccessor(GeoColumnAccessor):
def __init__(self, list_series, meta):
super().__init__(list_series, meta)
self._type = Feature_Enum.LINESTRING

@property
def geometry_offset(self):
return cudf.Series(self._col.offsets.values)
Expand All @@ -151,6 +167,10 @@ def part_offset(self):
return cudf.Series(self._col.elements.offsets.values)

class PolygonGeoColumnAccessor(GeoColumnAccessor):
def __init__(self, list_series, meta):
super().__init__(list_series, meta)
self._type = Feature_Enum.POLYGON

@property
def geometry_offset(self):
return cudf.Series(self._col.offsets.values)
Expand All @@ -168,28 +188,34 @@ def points(self):
"""
Access the `PointsArray` of the underlying `GeoArrowBuffers`.
"""
return self.GeoColumnAccessor(self._column.points)
return self.GeoColumnAccessor(self._column.points, self._column._meta)

@property
def multipoints(self):
"""
Access the `MultiPointArray` of the underlying `GeoArrowBuffers`.
"""
return self.MultiPointGeoColumnAccessor(self._column.mpoints)
return self.MultiPointGeoColumnAccessor(
self._column.mpoints, self._column._meta
)

@property
def lines(self):
"""
Access the `LineArray` of the underlying `GeoArrowBuffers`.
"""
return self.LineStringGeoColumnAccessor(self._column.lines)
return self.LineStringGeoColumnAccessor(
self._column.lines, self._column._meta
)

@property
def polygons(self):
"""
Access the `PolygonArray` of the underlying `GeoArrowBuffers`.
"""
return self.PolygonGeoColumnAccessor(self._column.polygons)
return self.PolygonGeoColumnAccessor(
self._column.polygons, self._column._meta
)

def __repr__(self):
# TODO: Implement Iloc with slices so that we can use `Series.__repr__`
Expand Down Expand Up @@ -274,9 +300,11 @@ def __getitem__(self, item):
)

if isinstance(item, Integral):
return GeoSeries(column).to_shapely()
return GeoSeries(column, name=self._sr.name).to_shapely()
else:
return GeoSeries(column, index=self._sr.index[indexes])
return GeoSeries(
column, index=self._sr.index[indexes], name=self._sr.name
)

def from_arrow(union):
column = GeoColumn(
Expand Down Expand Up @@ -318,6 +346,7 @@ def to_geopandas(self, nullable=False):
return gpGeoSeries(
final_union_slice.to_shapely(),
index=self.index.to_pandas(),
name=self.name,
)

def to_pandas(self):
Expand Down
20 changes: 18 additions & 2 deletions python/cuspatial/cuspatial/tests/test_geodataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,13 +135,19 @@ def test_interleaved_point(gpdf, polys):
cugs = cugpdf["geometry"]
gs = gpdf["geometry"]
pd.testing.assert_series_equal(
cugs.points.x.to_pandas(),
cugs.points.x.to_pandas().reset_index(drop=True),
gs[gs.type == "Point"].x.reset_index(drop=True),
)
pd.testing.assert_series_equal(
cugs.points.y.to_pandas(),
cugs.points.y.to_pandas().reset_index(drop=True),
gs[gs.type == "Point"].y.reset_index(drop=True),
)


def test_interleaved_multipoint(gpdf, polys):
cugpdf = cuspatial.from_geopandas(gpdf)
cugs = cugpdf["geometry"]
gs = gpdf["geometry"]
cudf.testing.assert_series_equal(
cudf.Series.from_arrow(cugs.multipoints.x.to_arrow()),
cudf.Series(
Expand All @@ -164,6 +170,11 @@ def test_interleaved_point(gpdf, polys):
).flatten()
),
)


def test_interleaved_lines(gpdf, polys):
cugpdf = cuspatial.from_geopandas(gpdf)
cugs = cugpdf["geometry"]
cudf.testing.assert_series_equal(
cudf.Series.from_arrow(cugs.lines.x.to_arrow()),
cudf.Series(
Expand All @@ -178,6 +189,11 @@ def test_interleaved_point(gpdf, polys):
dtype="float64",
),
)


def test_interleaved_polygons(gpdf, polys):
cugpdf = cuspatial.from_geopandas(gpdf)
cugs = cugpdf["geometry"]
cudf.testing.assert_series_equal(
cudf.Series.from_arrow(cugs.polygons.x.to_arrow()),
cudf.Series(polys[:, 0], dtype="float64"),
Expand Down
151 changes: 151 additions & 0 deletions python/cuspatial/cuspatial/tests/test_geoseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,157 @@ def test_size(gs, series_slice):
assert len(gi) == len(cugs)


def test_geometry_point_slicing(gs):
cugs = cuspatial.from_geopandas(gs)
assert (cugs[:1].points.x == cudf.Series([-1])).all()
assert (cugs[:1].points.y == cudf.Series([0])).all()
assert (cugs[:1].points.xy == cudf.Series([-1, 0])).all()
assert (cugs[3:].points.x == cudf.Series([9])).all()
assert (cugs[3:].points.y == cudf.Series([10])).all()
assert (cugs[3:].points.xy == cudf.Series([9, 10])).all()
assert (cugs[0:4].points.x == cudf.Series([-1, 9])).all()
assert (cugs[0:4].points.y == cudf.Series([0, 10])).all()
assert (cugs[0:4].points.xy == cudf.Series([-1, 0, 9, 10])).all()


def test_geometry_multipoint_slicing(gs):
cugs = cuspatial.from_geopandas(gs)
assert (cugs[:2].multipoints.x == cudf.Series([1, 3])).all()
assert (cugs[:2].multipoints.y == cudf.Series([2, 4])).all()
assert (cugs[:2].multipoints.xy == cudf.Series([1, 2, 3, 4])).all()
assert (cugs[2:].multipoints.x == cudf.Series([5, 7])).all()
assert (cugs[2:].multipoints.y == cudf.Series([6, 8])).all()
assert (cugs[2:].multipoints.xy == cudf.Series([5, 6, 7, 8])).all()
assert (cugs[0:4].multipoints.x == cudf.Series([1, 3, 5, 7])).all()
assert (cugs[0:4].multipoints.y == cudf.Series([2, 4, 6, 8])).all()
assert (
cugs[0:4].multipoints.xy == cudf.Series([1, 2, 3, 4, 5, 6, 7, 8])
).all()


def test_geometry_linestring_slicing(gs):
cugs = cuspatial.from_geopandas(gs)
assert (cugs[:5].lines.x == cudf.Series([11, 13])).all()
assert (cugs[:5].lines.y == cudf.Series([12, 14])).all()
assert (cugs[:5].lines.xy == cudf.Series([11, 12, 13, 14])).all()
assert (cugs[:6].lines.x == cudf.Series([11, 13, 15, 17, 19, 21])).all()
assert (cugs[:6].lines.y == cudf.Series([12, 14, 16, 18, 20, 22])).all()
assert (
cugs[:6].lines.xy
== cudf.Series([11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22])
).all()
assert (cugs[7:].lines.x == cudf.Series([31, 33])).all()
assert (cugs[7:].lines.y == cudf.Series([32, 34])).all()
assert (cugs[7:].lines.xy == cudf.Series([31, 32, 33, 34])).all()
assert (cugs[6:].lines.x == cudf.Series([23, 25, 27, 29, 31, 33])).all()
assert (cugs[6:].lines.y == cudf.Series([24, 26, 28, 30, 32, 34])).all()
assert (
cugs[6:].lines.xy
== cudf.Series([23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34])
).all()


def test_geometry_polygon_slicing(gs):
cugs = cuspatial.from_geopandas(gs)
assert (cugs[:9].polygons.x == cudf.Series([35, 37, 39, 41, 35])).all()
assert (cugs[:9].polygons.y == cudf.Series([36, 38, 40, 42, 36])).all()
assert (
cugs[:9].polygons.xy
== cudf.Series([35, 36, 37, 38, 39, 40, 41, 42, 35, 36])
).all()
assert (
cugs[:10].polygons.x
== cudf.Series(
[
35,
37,
39,
41,
35,
43,
45,
47,
43,
49,
51,
53,
49,
55,
57,
59,
55,
61,
63,
65,
61,
]
)
).all()
assert (
cugs[:10].polygons.y
== cudf.Series(
[
36,
38,
40,
42,
36,
44,
46,
48,
44,
50,
52,
54,
50,
56,
58,
60,
56,
62,
64,
66,
62,
]
)
).all()
assert (
cugs[11:].polygons.x
== cudf.Series([97, 99, 102, 101, 97, 106, 108, 110, 113, 106])
).all()
assert (
cugs[11:].polygons.y
== cudf.Series([98, 101, 103, 108, 98, 107, 109, 111, 108, 107])
).all()
assert (
cugs[11:].polygons.xy
== cudf.Series(
[
97,
98,
99,
101,
102,
103,
101,
108,
97,
98,
106,
107,
108,
109,
110,
111,
113,
108,
106,
107,
]
)
).all()


def test_loc(gs):
index = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l"]
gs.index = index
Expand Down

0 comments on commit 626ea58

Please sign in to comment.