Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add from_linestrings_xy and from_polygons_xy #928

270 changes: 95 additions & 175 deletions docs/source/user_guide/cuspatial_api_examples.ipynb

Large diffs are not rendered by default.

28 changes: 11 additions & 17 deletions python/cuspatial/benchmarks/api/bench_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,26 +92,20 @@ def bench_sinusoidal_projection(benchmark, gpu_dataframe):


def bench_directed_hausdorff_distance(benchmark, sorted_trajectories):
benchmark(
cuspatial.directed_hausdorff_distance,
sorted_trajectories[0]["x"],
sorted_trajectories[0]["y"],
sorted_trajectories[1],
)
coords = sorted_trajectories[0][["x", "y"]].interleave_columns()
offsets = sorted_trajectories[1]
s = cuspatial.GeoSeries.from_multipoints_xy(coords, offsets)
benchmark(cuspatial.directed_hausdorff_distance, s)


def bench_haversine_distance(benchmark, gpu_dataframe):
polygons_first = gpu_dataframe["geometry"][0:10]
polygons_second = gpu_dataframe["geometry"][10:20]
# The number of coordinates in two sets of polygons vary, so
# we'll just compare the first set of 1000 values here.
benchmark(
cuspatial.haversine_distance,
polygons_first.polygons.x[0:1000],
polygons_first.polygons.y[0:1000],
polygons_second.polygons.x[0:1000],
polygons_second.polygons.y[0:1000],
)
coords_first = gpu_dataframe["geometry"][0:10].polygons.xy[0:1000]
coords_second = gpu_dataframe["geometry"][10:20].polygons.xy[0:1000]

points_first = cuspatial.GeoSeries.from_points_xy(coords_first)
points_second = cuspatial.GeoSeries.from_points_xy(coords_second)

benchmark(cuspatial.haversine_distance, points_first, points_second)


def bench_pairwise_linestring_distance(benchmark, gpu_dataframe):
Expand Down
196 changes: 189 additions & 7 deletions python/cuspatial/cuspatial/core/_column/geocolumn.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,10 +204,9 @@ def _from_points_xy(cls, points_xy: ColumnBase):
if not points_xy.dtype.kind == "f":
raise ValueError("Coordinates must be floating point numbers.")

if len(points_xy) % 2 != 0:
raise ValueError("points_xy must have an even number of elements")
point_col = _xy_as_variable_sized_list(points_xy)
num_points = len(point_col)

num_points = len(points_xy) // 2
meta = GeoMeta(
{
"input_types": as_column(
Expand All @@ -221,10 +220,6 @@ def _from_points_xy(cls, points_xy: ColumnBase):
}
)

indices = arange(0, num_points * 2 + 1, 2, dtype="int32")
point_col = build_list_column(
indices=indices, elements=points_xy, size=num_points
)
coord_dtype = points_xy.dtype
return cls(
(
Expand All @@ -242,6 +237,181 @@ def _from_points_xy(cls, points_xy: ColumnBase):
meta,
)

@classmethod
def _from_multipoints_xy(
cls, multipoints_xy: ColumnBase, geometry_offsets: ColumnBase
):
"""
Create a GeoColumn of only single points from a cudf Series with
interleaved xy coordinates.
"""
if not multipoints_xy.dtype.kind == "f":
raise ValueError("Coordinates must be floating point numbers.")

multipoint_col = build_list_column(
indices=geometry_offsets,
elements=_xy_as_variable_sized_list(multipoints_xy),
size=len(geometry_offsets) - 1,
)
num_multipoints = len(multipoint_col)

meta = GeoMeta(
{
"input_types": as_column(
cp.full(
num_multipoints,
Feature_Enum.MULTIPOINT.value,
dtype=cp.int8,
)
),
"union_offsets": as_column(
cp.arange(num_multipoints, dtype=cp.int32)
),
}
)

coord_dtype = multipoints_xy.dtype

return cls(
(
cudf.Series(
empty_geometry_column(Feature_Enum.POINT, coord_dtype)
),
cudf.Series(multipoint_col),
cudf.Series(
empty_geometry_column(Feature_Enum.LINESTRING, coord_dtype)
),
cudf.Series(
empty_geometry_column(Feature_Enum.POLYGON, coord_dtype)
),
),
meta,
)

@classmethod
def _from_linestrings_xy(
cls,
linestrings_xy: ColumnBase,
part_offsets: ColumnBase,
geometry_offsets: ColumnBase,
):
"""
Create a GeoColumn of only single points from a cudf Series with
interleaved xy coordinates.
"""
if not linestrings_xy.dtype.kind == "f":
raise ValueError("Coordinates must be floating point numbers.")

parts_col = build_list_column(
indices=part_offsets,
elements=_xy_as_variable_sized_list(linestrings_xy),
size=len(part_offsets) - 1,
)
linestrings_col = build_list_column(
indices=geometry_offsets,
elements=parts_col,
size=len(geometry_offsets) - 1,
)
num_linestrings = len(linestrings_col)

meta = GeoMeta(
{
"input_types": as_column(
cp.full(
num_linestrings,
Feature_Enum.LINESTRING.value,
dtype=cp.int8,
)
),
"union_offsets": as_column(
cp.arange(num_linestrings, dtype=cp.int32)
),
}
)

coord_dtype = linestrings_xy.dtype

return cls(
(
cudf.Series(
empty_geometry_column(Feature_Enum.POINT, coord_dtype)
),
cudf.Series(
empty_geometry_column(Feature_Enum.MULTIPOINT, coord_dtype)
),
cudf.Series(linestrings_col),
cudf.Series(
empty_geometry_column(Feature_Enum.POLYGON, coord_dtype)
),
),
meta,
)

@classmethod
def _from_polygons_xy(
cls,
polygons_xy: ColumnBase,
ring_offsets: ColumnBase,
part_offsets: ColumnBase,
geometry_offsets: ColumnBase,
):
"""
Create a GeoColumn of only single points from a cudf Series with
interleaved xy coordinates.
"""
if not polygons_xy.dtype.kind == "f":
raise ValueError("Coordinates must be floating point numbers.")

rings_col = build_list_column(
indices=ring_offsets,
elements=_xy_as_variable_sized_list(polygons_xy),
size=len(ring_offsets) - 1,
)
parts_col = build_list_column(
indices=part_offsets,
elements=rings_col,
size=len(part_offsets) - 1,
)
polygons_col = build_list_column(
indices=geometry_offsets,
elements=parts_col,
size=len(geometry_offsets) - 1,
)
num_polygons = len(polygons_col)

meta = GeoMeta(
{
"input_types": as_column(
cp.full(
num_polygons,
Feature_Enum.POLYGON.value,
dtype=cp.int8,
)
),
"union_offsets": as_column(
cp.arange(num_polygons, dtype=cp.int32)
),
}
)

coord_dtype = polygons_xy.dtype

return cls(
(
cudf.Series(
empty_geometry_column(Feature_Enum.POINT, coord_dtype)
),
cudf.Series(
empty_geometry_column(Feature_Enum.MULTIPOINT, coord_dtype)
),
cudf.Series(
empty_geometry_column(Feature_Enum.LINESTRING, coord_dtype)
),
cudf.Series(polygons_col),
),
meta,
)

@cached_property
def memory_usage(self) -> int:
"""
Expand All @@ -254,3 +424,15 @@ def memory_usage(self) -> int:
final_size = final_size + self.lines._column.memory_usage
final_size = final_size + self.polygons._column.memory_usage
return final_size


def _xy_as_variable_sized_list(xy: ColumnBase):
"""Given an array of interleaved x-y coordinate, construct a cuDF ListDtype
type array, where each row is the coordinate.
"""
if len(xy) % 2 != 0:
raise ValueError("xy must have an even number of elements")

num_points = len(xy) // 2
indices = arange(0, num_points * 2 + 1, 2, dtype="int32")
return build_list_column(indices=indices, elements=xy, size=num_points)
Loading