diff --git a/src/pyinterp/core/fill.pyi b/src/pyinterp/core/fill.pyi index c94375ed..9504d797 100644 --- a/src/pyinterp/core/fill.pyi +++ b/src/pyinterp/core/fill.pyi @@ -16,7 +16,6 @@ from . import ( ) class FirstGuess: - __doc__: ClassVar[str] = ... # read-only __members__: ClassVar[dict] = ... # read-only Zero: ClassVar[FirstGuess] = ... ZonalAverage: ClassVar[FirstGuess] = ... @@ -56,7 +55,6 @@ class FirstGuess: class ValueType: - __doc__: ClassVar[str] = ... # read-only __members__: ClassVar[dict] = ... # read-only All: ClassVar[ValueType] = ... Defined: ClassVar[ValueType] = ... @@ -96,11 +94,6 @@ class ValueType: ... -def fill_time_series(x: numpy.ndarray[numpy.int64], - fill_value: int) -> numpy.ndarray[numpy.int64]: - ... - - def gauss_seidel_float32(grid: numpy.ndarray[numpy.float32], first_guess: FirstGuess = ..., is_circle: bool = ..., @@ -212,10 +205,24 @@ def loess_float64(grid: TemporalGrid4DFloat64, def matrix_float32(x: numpy.ndarray[numpy.float32], - y: numpy.ndarray[numpy.float32]) -> None: + fill_value: float = ...) -> None: ... def matrix_float64(x: numpy.ndarray[numpy.float64], - y: numpy.ndarray[numpy.float64]) -> None: + fill_value: float = ...) -> None: + ... + + +def vector_float32(x: numpy.ndarray[numpy.float32], + fill_value: float = ...) -> None: + ... + + +def vector_float64(x: numpy.ndarray[numpy.float64], + fill_value: float = ...) -> None: + ... + + +def vector_int64(x: numpy.ndarray[numpy.int64], fill_value: int) -> None: ... diff --git a/src/pyinterp/core/include/pyinterp/fill.hpp b/src/pyinterp/core/include/pyinterp/fill.hpp index 37338e3b..d2fcc98e 100644 --- a/src/pyinterp/core/include/pyinterp/fill.hpp +++ b/src/pyinterp/core/include/pyinterp/fill.hpp @@ -198,22 +198,17 @@ auto gauss_seidel(pybind11::EigenDRef> &grid, return std::max(calculate(0), calculate(1)); } -/// Fills in the gaps between defined points in a line with interpolated values. +/// Fills in the gaps between defined values in a line with interpolated +/// values. /// /// @tparam T The type of the coordinates. -/// @param x The x-coordinates of the points defining the line. -/// @param y The y-coordinates of the points defining the line. +/// @param x The values of the points defining the line. /// @param is_undefined A boolean vector indicating which points are undefined. -/// If is_undefined[i] is true, then the point (x[i], y[i]) is undefined. template -void fill_line(EigenRefBlock x, EigenRefBlock y, - EigenRefBlock is_undefined) { +void fill_line(EigenRefBlock x, EigenRefBlock is_undefined) { T x0; T x1; T dx; - T y0; - T y1; - T dy; Eigen::Index di; Eigen::Index last_valid = -1; Eigen::Index first_valid = -1; @@ -227,15 +222,11 @@ void fill_line(EigenRefBlock x, EigenRefBlock y, if (last_valid != -1 && (ix - last_valid) > 1) { x0 = x[last_valid]; x1 = x[ix]; - y0 = y[last_valid]; - y1 = y[ix]; di = ix - last_valid; dx = (x1 - x0) / di; - dy = (y1 - y0) / di; for (Eigen::Index jx = last_valid + 1; jx < ix; ++jx) { di = jx - last_valid; x[jx] = dx * di + x0; - y[jx] = dy * di + y0; } } else if (first_valid == -1) { // If this is the first valid point, then we can't interpolate the @@ -257,9 +248,6 @@ void fill_line(EigenRefBlock x, EigenRefBlock y, x0 = x[first_valid]; x1 = x[last_valid]; dx = (x1 - x0) / (last_valid - first_valid); - y0 = y[first_valid]; - y1 = y[last_valid]; - dy = (y1 - y0) / (last_valid - first_valid); // If there is a gap between the last valid point and the end of the line, // then interpolate the gap. @@ -267,7 +255,6 @@ void fill_line(EigenRefBlock x, EigenRefBlock y, for (Eigen::Index jx = last_valid + 1; jx < size; ++jx) { di = jx - last_valid; x[jx] = dx * di + x1; - y[jx] = dy * di + y1; } } // If there is a gap between the first valid point and the beginning of the @@ -276,7 +263,6 @@ void fill_line(EigenRefBlock x, EigenRefBlock y, for (Eigen::Index jx = 0; jx < first_valid; ++jx) { di = first_valid - jx; x[jx] = x0 - dx * di; - y[jx] = y0 - dy * di; } } // Mark all points as defined. @@ -692,79 +678,49 @@ auto loess(const Grid4D &grid, const uint32_t nx, return result; } -/// Fills in the gaps between defined points in a matrix with interpolated +/// Fills in the gaps between defined values in a matrix with interpolated /// values. /// -/// @param x The x-coordinates of the points defining the matrix. -/// @param y The y-coordinates of the points defining the matrix. +/// @param x The data to be processed. template -void fill_matrix(pybind11::EigenDRef> x, - pybind11::EigenDRef> y) { - auto mask = Matrix(Eigen::isnan(x.array()) || Eigen::isnan(y.array())); +void matrix(pybind11::EigenDRef> x, const T &fill_value) { + Matrix mask; + if (std::isnan(fill_value)) { + mask = Eigen::isnan(x.array()); + } else { + mask = x.array() == fill_value; + } auto num_rows = x.rows(); - auto num_cols = y.cols(); + auto num_cols = x.cols(); // Fill in the rows. for (int ix = 0; ix < num_rows; ix++) { auto m = mask.row(ix); if (m.all()) { continue; } - detail::fill_line(x.row(ix), y.row(ix), m); + detail::fill_line(x.row(ix), m); } // Fill in the columns. for (int ix = 0; ix < num_cols; ix++) { - detail::fill_line(x.col(ix), y.col(ix), mask.col(ix)); + detail::fill_line(x.col(ix), mask.col(ix)); } } -/// Fill gaps in a time series using linear interpolation. +/// Fill gaps between defined values in a vector with interpolated values. /// -/// The time series is assumed to be monotonically increasing or decreasing. +/// The data is assumed to be monotonically increasing or decreasing. /// /// @param array Array of dates. /// @param fill_value Value to use for missing data. template -auto fill_time_series(const Eigen::Ref> &array, - const T fill_value) -> Vector { - auto result = Vector(array); - auto size = array.size(); - Eigen::Index last_valid = -1; - Eigen::Index first_valid = -1; - - for (Eigen::Index ix = 0; ix < size; ++ix) { - auto item = array[ix]; - if (item != fill_value) { - if (last_valid != -1 && (ix - last_valid) > 1) { - auto x0 = array[last_valid]; - auto x1 = item; - auto dx = (x1 - x0) / static_cast(ix - last_valid); - for (Eigen::Index jx = last_valid + 1; jx < ix; ++jx) { - result[jx] = dx * static_cast(jx - last_valid) + x0; - } - } else if (first_valid == -1) { - first_valid = ix; - } - last_valid = ix; - } +auto vector(Eigen::Ref> array, const T &fill_value) { + Vector mask; + if (std::isnan(fill_value)) { + mask = Eigen::isnan(array.array()); + } else { + mask = array.array() == fill_value; } - - if (last_valid != first_valid) { - auto x0 = array[first_valid]; - auto x1 = array[last_valid]; - auto dx = (x1 - x0) / static_cast(last_valid - first_valid); - if (last_valid < (size - 1)) { - for (Eigen::Index jx = last_valid + 1; jx < size; ++jx) { - result[jx] = dx * static_cast(jx - last_valid) + x1; - } - } - - if (first_valid > 0) { - for (Eigen::Index jx = 0; jx < first_valid; ++jx) { - result[jx] = x0 - dx * static_cast(first_valid - jx); - } - } - } - return result; + detail::fill_line(array, mask); } } // namespace fill diff --git a/src/pyinterp/core/module/fill.cpp b/src/pyinterp/core/module/fill.cpp index 7789ea82..c2aa7d2a 100644 --- a/src/pyinterp/core/module/fill.cpp +++ b/src/pyinterp/core/module/fill.cpp @@ -78,15 +78,31 @@ method by relaxation. )__doc__", py::call_guard()); - m.def(("matrix_" + function_suffix).c_str(), - &pyinterp::fill::fill_matrix, py::arg("x"), py::arg("y"), + m.def(("matrix_" + function_suffix).c_str(), &pyinterp::fill::matrix, + py::arg("x"), + py::arg("fill_value") = std::numeric_limits::quiet_NaN(), R"__doc__( Fills in the gaps between defined points in a matrix with interpolated values. Args: - x: X coordinates of the points to be interpolated. - y: Y coordinates of the points to be interpolated. + x: data to be interpolated. + fill_value: Value used to detect gaps in the matrix. Defaults to + ``NaN``. +)__doc__", + py::call_guard()); + + m.def(("vector_" + function_suffix).c_str(), &pyinterp::fill::vector, + py::arg("x"), + py::arg("fill_value") = std::numeric_limits::quiet_NaN(), + R"__doc__( +Fills in the gaps between defined points in a vector with interpolated +values. + +Args: + x: data to be interpolated. + fill_value: Value used to detect gaps in the matrix. Defaults to + ``NaN``. )__doc__", py::call_guard()); } @@ -146,14 +162,14 @@ void init_fill(py::module &m) { implement_loess>(m, "Temporal", "Float32"); - m.def("fill_time_series", &pyinterp::fill::fill_time_series, - py::arg("x"), py::arg("fill_value"), + m.def("vector_int64", &pyinterp::fill::vector, py::arg("x"), + py::arg("fill_value"), R"__doc__( -Fill gaps in a time series using linear interpolation. +Fill gaps in a vector with interpolated values. Args: - x: Time series to be filled. - fill_value: Value used to detect gaps in the time series. + x: vector to be filled. + fill_value: Value used to detect gaps in the matrix. )__doc__", py::call_guard()); } diff --git a/src/pyinterp/fill.py b/src/pyinterp/fill.py index c8d620d2..42a94430 100644 --- a/src/pyinterp/fill.py +++ b/src/pyinterp/fill.py @@ -2,7 +2,7 @@ Replace undefined values ------------------------ """ -from typing import Optional, Union +from typing import Any, Optional, Union import concurrent.futures import numpy @@ -140,44 +140,57 @@ def gauss_seidel(mesh: Union[grid.Grid2D, grid.Grid3D], return residual <= epsilon, filled -def matrix(x: NDArray, y: NDArray) -> None: - """Fills in the gaps between defined points in a matrix with interpolated - values. +def matrix(x: NDArray, + fill_value: Any = numpy.nan, + in_place: bool = True) -> None: + """Fills in the gaps between defined values in a 2-dimensional array. Args: - x: X-axis coordinates of the grid. - y: Y-axis coordinates of the grid. + x: data to be filled. + fill_value: Value used to fill undefined values. + in_place: If true, the data is filled in place. Defaults to ``True``. """ if len(x.shape) != 2: raise ValueError('x must be a 2-dimensional array') - if len(y.shape) != 2: - raise ValueError('y must be a 2-dimensional array') - dtype_x = x.dtype - dtype_y = y.dtype - if (dtype_x != dtype_y): - return core.fill.matrix_float64(x, y) - if dtype_x == numpy.float32: - return core.fill.matrix_float32(x, y) - return core.fill.matrix_float64(x, y) + dtype = x.dtype + if not in_place: + x = numpy.copy(x) + if dtype == numpy.float32: + core.fill.matrix_float32(x, fill_value) + core.fill.matrix_float64(x, fill_value) + return x -def time_series(x: NDArray, fill_value=numpy.datetime64('NaT')) -> NDArray: - """Fill undefined values in a time series. +def vector(x: NDArray, + fill_value: Any = numpy.nan, + in_place: bool = True) -> NDArray: + """Fill in the gaps between defined values in a 1-dimensional array. Args: - x (numpy.ndarray[numpy.datetime64]): Time series to be filled. - fill_value (numpy.datetime64): Value used to fill undefined values. + x: data to be filled. + fill_value: Value used to fill undefined values. + in_place: If true, the data is filled in place. Defaults to ``True``. Returns: - numpy.ndarray[numpy.datetime64]: Time series with undefined values - filled. + The data filled. """ if not isinstance(x, numpy.ndarray): raise ValueError('x must be a numpy.ndarray') - if not numpy.issubdtype(x.dtype, numpy.datetime64): - raise ValueError('x must be a numpy.ndarray[numpy.datetime64]') - if not numpy.issubdtype(fill_value.dtype, numpy.datetime64): - raise ValueError('fill_value must be a numpy.datetime64') - return core.fill.fill_time_series(x.astype(numpy.int64), - fill_value.astype(numpy.int64)).astype( - x.dtype) + if len(x.shape) != 1: + raise ValueError('x must be a 1-dimensional array') + dtype = x.dtype + if not in_place: + x = numpy.copy(x) + if dtype == numpy.float32: + core.fill.vector_float32(x, fill_value) + elif dtype == numpy.float64: + core.fill.vector_float64(x, fill_value) + elif dtype == numpy.int64: + core.fill.vector_int64(x, fill_value) + elif numpy.issubdtype(dtype, numpy.datetime64) or numpy.issubdtype( + dtype, numpy.timedelta64): + core.fill.vector_int64(x.view(numpy.int64), + fill_value.view(numpy.int64)) + else: + raise ValueError(f'unsupported data type {dtype}') + return x diff --git a/src/pyinterp/tests/test_fill.py b/src/pyinterp/tests/test_fill.py index e5556d84..ff90fef3 100644 --- a/src/pyinterp/tests/test_fill.py +++ b/src/pyinterp/tests/test_fill.py @@ -109,7 +109,8 @@ def test_matrix(): dtype=np.float64) y = np.copy(x) - fill.matrix(x, y) + fill.matrix(x) + fill.matrix(y) assert np.all(x[:, 0] == np.arange(10)) assert np.all(x[:, 1] == np.arange(10)) @@ -121,7 +122,7 @@ def test_matrix(): assert np.all(y[:, 3] == np.arange(10)) -def test_time_series(): +def test_vector(): x = np.arange(np.datetime64('2000-01-01'), np.datetime64('2000-01-10'), np.timedelta64(1, 'h')) random_indices = np.random.choice(len(x), 48, replace=False) @@ -129,5 +130,5 @@ def test_time_series(): xp[random_indices] = np.datetime64('NaT') xp[:2] = np.datetime64('NaT') xp[-2:] = np.datetime64('NaT') - yp = fill.time_series(xp) + yp = fill.vector(xp, fill_value=np.datetime64('NaT')) assert np.all(yp == x)