Skip to content

Commit

Permalink
Refactor CubicSpline to be independent of trajectory component.
Browse files Browse the repository at this point in the history
  • Loading branch information
isVoid committed Aug 16, 2022
1 parent a007d57 commit b2017fb
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 91 deletions.
4 changes: 2 additions & 2 deletions python/cuspatial/cuspatial/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from ._version import get_versions
from .core.trajectory import interpolate
from .core import interpolate
from .core.gis import (
directed_hausdorff_distance,
haversine_distance,
Expand All @@ -10,7 +10,7 @@
pairwise_linestring_distance,
)
from .core.indexing import quadtree_on_points
from .core.trajectory import CubicSpline
from .core.interpolate import CubicSpline
from .core.spatial_join import (
join_quadtree_and_bounding_boxes,
quadtree_point_in_polygon,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
cubicspline_interpolate,
)


def _cubic_spline_coefficients(x, y, ids, prefix_sums):
x_c = x._column
y_c = y._column
Expand All @@ -36,87 +35,82 @@ class CubicSpline:
"""
Fits each column of the input Series `y` to a hermetic cubic spline.
``cuspatial.CubicSpline`` supports two usage patterns: The first is
identical to scipy.interpolate.CubicSpline::
curve = cuspatial.CubicSpline(t, y)
new_points = curve(np.linspace(t.min, t.max, 50))
This allows API parity with scipy. This isn't recommended, as scipy
host based interpolation performance is likely to exceed GPU performance
for a single curve.
However, cuSpatial significantly outperforms scipy when many splines are
``cuspatial.CubicSpline`` supports basic usage identical to
scipy.interpolate.CubicSpline::
curve = cuspatial.CubicSpline(x, y)
new_points = curve(np.linspace(x.min, x.max, 50))
Parameters
----------
x : cudf.Series
1-D array containing values of the independent variable.
Values must be real, finite and in strictly increasing order.
y : cudf.Series
Array containing values of the dependent variable.
ids (Optional) : cudf.Series
ids of each spline
size (Optional) : cudf.Series
fixed size of each spline
offset (Optional) : cudf.Series
alternative to `size`, allows splines of varying
length. Not yet fully supported.
Returns
-------
CubicSpline : callable `o`
``o.c`` contains the coefficients that can be used to compute new
points along the spline fitting the original ``t`` data. ``o(n)``
interpolates the spline coordinates along new input values ``n``.
Note
----
cuSpatial will outperform scipy when many splines are
fit simultaneously. Data must be arranged in a SoA format, and the
exclusive `prefix_sum` of the separate curves must also be passed to the
function.::
NUM_SPLINES = 100000
SPLINE_LENGTH = 101
t = cudf.Series(
function. See example for detail.
Example
-------
>>> import cudf, cuspatial
>>> NUM_SPLINES = 100000
>>> SPLINE_LENGTH = 101
>>> x = cudf.Series(
np.hstack((np.arange(SPLINE_LENGTH),) * NUM_SPLINES)
).astype('float32')
y = cudf.Series(
>>> y = cudf.Series(
np.random.random(SPLINE_LENGTH*NUM_SPLINES)
).astype('float32')
prefix_sum = cudf.Series(
cp.arange(NUM_SPLINES + 1)*SPLINE_LENGTH
>>> prefix_sum = cudf.Series(
np.arange(NUM_SPLINES + 1)*SPLINE_LENGTH
).astype('int32')
curve = cuspatial.CubicSpline(t, y, prefixes=prefix_sum)
new_samples = cudf.Series(
>>> curve = cuspatial.CubicSpline(x, y, offset=prefix_sum)
>>> new_samples = cudf.Series(
np.hstack((np.linspace(
0, (SPLINE_LENGTH - 1), (SPLINE_LENGTH - 1) * 2 + 1
),) * NUM_SPLINES)
).astype('float32')
curve_ids = cudf.Series(np.repeat(
>>> curve_ids = cudf.Series(np.repeat(
np.arange(0, NUM_SPLINES), SPLINE_LENGTH * 2 - 1
), dtype="int32")
new_points = curve(new_samples, curve_ids)
>>> new_points = curve(new_samples, curve_ids)
"""

def __init__(self, t, y, ids=None, size=None, prefixes=None):
"""
Computes various error preconditions on the input data, then
uses CUDA to compute cubic splines for each set of input
coordinates on the GPU in parallel.
Parameters
----------
t : cudf.Series
time sample values. Must be monotonically increasing.
y : cudf.Series
columns to have curves fit to according to x
ids (Optional) : cudf.Series
ids of each spline
size (Optional) : cudf.Series
fixed size of each spline
prefixes (Optional) : cudf.Series
alternative to `size`, allows splines of varying
length. Not yet fully supported.
Returns
-------
CubicSpline : callable `o`
``o.c`` contains the coefficients that can be used to compute new
points along the spline fitting the original ``t`` data. ``o(n)``
interpolates the spline coordinates along new input values ``n``.
"""

def __init__(self, x, y, ids=None, size=None, offset=None):
# error protections:
if len(t) < 5:
if len(x) < 5:
raise ValueError(
"Use of GPU cubic spline requires splines of length > 4"
)
if not isinstance(t, Series):
if not isinstance(x, Series):
raise TypeError(
"Error: input independent vars must be cudf Series"
)
if not isinstance(y, (Series, DataFrame)):
raise TypeError(
"Error: input dependent vars must be cudf Series or DataFrame"
)
if not len(t) == len(y):
if not len(x) == len(y):
raise TypeError(
"Error: dependent and independent vars have different length"
)
Expand All @@ -128,33 +122,33 @@ def __init__(self, t, y, ids=None, size=None, prefixes=None):
if not ids.dtype == np.int32:
raise TypeError("Error: int32 only supported at this time.")
self.ids = ids
self.size = size if size is not None else len(t)
self.size = size if size is not None else len(x)
if not isinstance(self.size, int):
raise TypeError("Error: size must be an integer")
if not ((len(t) % self.size) == 0):
if not ((len(x) % self.size) == 0):
raise ValueError(
"Error: length of input is not a multiple of size"
)
if not isinstance(t, Series):
if not isinstance(x, Series):
raise TypeError("cuspatial.CubicSpline requires a cudf.Series")
if not t.dtype == np.float32:
if not x.dtype == np.float32:
raise TypeError("Error: float32 only supported at this time.")
if not isinstance(y, Series):
raise TypeError("cuspatial.CubicSpline requires a cudf.Series")
if not y.dtype == np.float32:
raise TypeError("Error: float32 only supported at this time.")
self.t = t
self.x = x
self.y = y
if prefixes is None:
self.prefix = Series(
cp.arange((len(t) / self.size) + 1) * self.size
if offset is None:
self.offset = Series(
cp.arange((len(x) / self.size) + 1) * self.size
).astype("int32")
else:
if not isinstance(prefixes, Series):
if not isinstance(offset, Series):
raise TypeError("cuspatial.CubicSpline requires a cudf.Series")
if not prefixes.dtype == np.int32:
if not offset.dtype == np.int32:
raise TypeError("Error: int32 only supported at this time.")
self.prefix = prefixes
self.offset = offset

self.c = self._compute_coefficients()

Expand All @@ -164,13 +158,13 @@ def _compute_coefficients(self):
"""
if isinstance(self.y, Series):
return _cubic_spline_coefficients(
self.t, self.y, self.ids, self.prefix
self.x, self.y, self.ids, self.offset
)
else:
c = {}
for col in self.y.columns:
c[col] = _cubic_spline_coefficients(
self.t, self.y, self.ids, self.prefix
self.x, self.y, self.ids, self.offset
)
return c

Expand All @@ -187,7 +181,7 @@ def __call__(self, coordinates, groups=None):
cp.repeat(cp.array(0), len(coordinates))
).astype("int32")
result = _cubic_spline_fit(
coordinates, self.groups, self.prefix, self.t, self.c
coordinates, self.groups, self.offset, self.x, self.c
)
return Series(result)
else:
Expand Down
14 changes: 0 additions & 14 deletions python/cuspatial/cuspatial/core/trajectory/__init__.py

This file was deleted.

10 changes: 5 additions & 5 deletions python/cuspatial/cuspatial/tests/trajectory/test_interpolate.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def test_cusparse():
cudf.Series([3, 2, 3, 4, 3, 3, 2, 3, 4, 3, 3, 2, 3, 4, 3]).astype(
"float32"
),
prefixes=cudf.Series([0, 5, 10, 15]).astype("int32"),
offset=cudf.Series([0, 5, 10, 15]).astype("int32"),
)
cudf.testing.assert_frame_equal(
result.c,
Expand Down Expand Up @@ -156,7 +156,7 @@ def test_class_triple():
"float32"
)
prefixes = cudf.Series([0, 5, 10, 15]).astype("int32")
g = cuspatial.interpolate.CubicSpline(t, x, prefixes=prefixes)
g = cuspatial.interpolate.CubicSpline(t, x, offset=prefixes)
groups = cudf.Series(
np.ravel(np.array([np.repeat(0, 5), np.repeat(1, 5), np.repeat(2, 5)]))
)
Expand All @@ -171,7 +171,7 @@ def test_class_triple_six():
[3, 2, 3, 4, 3, 1, 3, 2, 3, 4, 3, 1, 3, 2, 3, 4, 3, 1]
).astype("float32")
prefixes = cudf.Series([0, 6, 12, 18]).astype("int32")
g = cuspatial.interpolate.CubicSpline(t, x, prefixes=prefixes)
g = cuspatial.interpolate.CubicSpline(t, x, offset=prefixes)
groups = cudf.Series(
np.ravel(np.array([np.repeat(0, 6), np.repeat(1, 6), np.repeat(2, 6)]))
)
Expand All @@ -186,7 +186,7 @@ def test_class_triple_six_splits():
[3, 2, 3, 4, 3, 1, 3, 2, 3, 4, 3, 1, 3, 2, 3, 4, 3, 1]
).astype("float32")
prefixes = cudf.Series([0, 6, 12, 18]).astype("int32")
g = cuspatial.interpolate.CubicSpline(t, x, prefixes=prefixes)
g = cuspatial.interpolate.CubicSpline(t, x, offset=prefixes)
groups = cudf.Series(
np.ravel(
np.array([np.repeat(0, 12), np.repeat(1, 12), np.repeat(2, 12)])
Expand Down Expand Up @@ -236,7 +236,7 @@ def test_class_new_interpolation():
new_samples = cudf.Series(np.hstack((np.linspace(0, 4, 9),) * 3)).astype(
"float32"
)
curve = cuspatial.CubicSpline(t, y, prefixes=prefix_sum)
curve = cuspatial.CubicSpline(t, y, offset=prefix_sum)
new_x = cudf.Series(np.repeat(np.arange(0, 3), 9)).astype("int32")
old_x = cudf.Series(np.repeat(np.arange(0, 3), 5)).astype("int32")
new_points = curve(new_samples, groups=new_x)
Expand Down

0 comments on commit b2017fb

Please sign in to comment.