diff --git a/python/cuspatial/cuspatial/core/_column/geocolumn.py b/python/cuspatial/cuspatial/core/_column/geocolumn.py index a3b87d271..8e346b751 100644 --- a/python/cuspatial/cuspatial/core/_column/geocolumn.py +++ b/python/cuspatial/cuspatial/core/_column/geocolumn.py @@ -6,9 +6,10 @@ import pyarrow as pa import cudf -from cudf.core.column import ColumnBase, as_column, build_list_column +from cudf.core.column import ColumnBase, arange, as_column, build_list_column from cuspatial.core._column.geometa import Feature_Enum, GeoMeta +from cuspatial.utils.column_utils import empty_geometry_column T = TypeVar("T", bound="GeoColumn") @@ -185,6 +186,9 @@ def _from_points_xy(cls, points_xy: ColumnBase): Create a GeoColumn of only single points from a cudf Series with interleaved xy coordinates. """ + if not points_xy.dtype.kind == "f": + raise ValueError("Coordinates must be floating point numbers.") + if len(points_xy) % 2 != 0: raise ValueError("points_xy must have an even number of elements") @@ -202,16 +206,23 @@ def _from_points_xy(cls, points_xy: ColumnBase): } ) - indices = as_column(cp.arange(0, num_points * 2 + 1, 2), dtype="int32") + indices = arange(0, num_points * 2 + 1, 2, dtype="int32") point_col = build_list_column( indices=indices, elements=points_xy, size=num_points ) + coord_dtype = points_xy.dtype return cls( ( cudf.Series(point_col), - cudf.Series(), - cudf.Series(), - cudf.Series(), + cudf.Series( + empty_geometry_column(Feature_Enum.MULTIPOINT, coord_dtype) + ), + cudf.Series( + empty_geometry_column(Feature_Enum.LINESTRING, coord_dtype) + ), + cudf.Series( + empty_geometry_column(Feature_Enum.POLYGON, coord_dtype) + ), ), meta, ) diff --git a/python/cuspatial/cuspatial/core/dtypes.py b/python/cuspatial/cuspatial/core/dtypes.py new file mode 100644 index 000000000..8c0218594 --- /dev/null +++ b/python/cuspatial/cuspatial/core/dtypes.py @@ -0,0 +1,19 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. + +from cudf.core.dtypes import ListDtype + + +def point_dtype(base_dtype): + return ListDtype(base_dtype) + + +def multipoint_dtype(base_dtype): + return ListDtype(ListDtype(base_dtype)) + + +def linestring_dtype(base_dtype): + return ListDtype(ListDtype(ListDtype(base_dtype))) + + +def polygon_dtype(base_dtype): + return ListDtype(ListDtype(ListDtype(ListDtype(base_dtype)))) diff --git a/python/cuspatial/cuspatial/utils/column_utils.py b/python/cuspatial/cuspatial/utils/column_utils.py index 808eda840..c2155c178 100644 --- a/python/cuspatial/cuspatial/utils/column_utils.py +++ b/python/cuspatial/cuspatial/utils/column_utils.py @@ -1,10 +1,19 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. from typing import TypeVar import numpy as np from cudf.api.types import is_datetime_dtype +from cudf.core.column.column import column_empty + +from cuspatial.core._column.geometa import Feature_Enum +from cuspatial.core.dtypes import ( + linestring_dtype, + multipoint_dtype, + point_dtype, + polygon_dtype, +) GeoSeries = TypeVar("GeoSeries", bound="GeoSeries") @@ -115,3 +124,15 @@ def has_same_geometry(lhs: GeoSeries, rhs: GeoSeries): return True else: return False + + +def empty_geometry_column(feature: Feature_Enum, base_type): + """Return a geometry column of type `feature`. Length is 0.""" + if feature == Feature_Enum.POINT: + return column_empty(0, point_dtype(base_type), masked=False) + elif feature == Feature_Enum.MULTIPOINT: + return column_empty(0, multipoint_dtype(base_type), masked=False) + elif feature == Feature_Enum.LINESTRING: + return column_empty(0, linestring_dtype(base_type), masked=False) + elif feature == Feature_Enum.POLYGON: + return column_empty(0, polygon_dtype(base_type), masked=False)