From 662736656a4dbf5c1ac2bc6c8c5888395453c769 Mon Sep 17 00:00:00 2001 From: Marshall Crumiller Date: Wed, 15 Jan 2025 18:05:33 -0500 Subject: [PATCH] Make 'num_samples' expr and add docstring --- .../dsl/function_expr/range/linear_space.rs | 28 +++-- .../src/dsl/function_expr/range/mod.rs | 13 +- crates/polars-plan/src/dsl/functions/range.rs | 9 +- crates/polars-python/src/functions/range.rs | 3 +- .../polars/functions/range/linear_space.py | 112 +++++++++++++++++- .../unit/functions/range/test_linear_space.py | 28 ++++- 6 files changed, 159 insertions(+), 34 deletions(-) diff --git a/crates/polars-plan/src/dsl/function_expr/range/linear_space.rs b/crates/polars-plan/src/dsl/function_expr/range/linear_space.rs index e648a7211a5..0e46e43261a 100644 --- a/crates/polars-plan/src/dsl/function_expr/range/linear_space.rs +++ b/crates/polars-plan/src/dsl/function_expr/range/linear_space.rs @@ -1,28 +1,34 @@ use arrow::temporal_conversions::MILLISECONDS_IN_DAY; use polars_core::prelude::*; -// use polars_core::with_match_physical_integer_polars_type; use polars_ops::series::{new_linear_space_f32, new_linear_space_f64, ClosedInterval}; use super::utils::ensure_range_bounds_contain_exactly_one_value; -pub(super) fn linear_space( - s: &[Column], - num_samples: i64, - closed: ClosedInterval, -) -> PolarsResult { +pub(super) fn linear_space(s: &[Column], closed: ClosedInterval) -> PolarsResult { let start = &s[0]; let end = &s[1]; + let num_samples = &s[2]; let name = start.name(); ensure_range_bounds_contain_exactly_one_value(start, end)?; - let num_samples = u64::try_from(num_samples).map_err(|v| { - PolarsError::ComputeError( - format!("'num_samples' must be nonnegative integer, got {}", v).into(), - ) - })?; + polars_ensure!( + num_samples.len() == 1, + ComputeError: "`num_samples` must contain exactly one value, got {} values", num_samples.len() + ); let start = start.get(0).unwrap(); let end = end.get(0).unwrap(); + let num_samples = num_samples.get(0).unwrap(); + let num_samples = num_samples + .extract::() + .ok_or(PolarsError::ComputeError( + format!( + "'num_samples' must be non-negative integer, got {}", + num_samples + ) + .into(), + ))?; + match (start.dtype(), end.dtype()) { (DataType::Float32, DataType::Float32) => new_linear_space_f32( start.extract::().unwrap(), diff --git a/crates/polars-plan/src/dsl/function_expr/range/mod.rs b/crates/polars-plan/src/dsl/function_expr/range/mod.rs index bb0b272bda5..7acc092555f 100644 --- a/crates/polars-plan/src/dsl/function_expr/range/mod.rs +++ b/crates/polars-plan/src/dsl/function_expr/range/mod.rs @@ -31,7 +31,6 @@ pub enum RangeFunction { }, IntRanges, LinearSpace { - num_samples: i64, closed: ClosedInterval, }, #[cfg(feature = "dtype-date")] @@ -76,10 +75,7 @@ impl RangeFunction { match self { IntRange { dtype, .. } => mapper.with_dtype(dtype.clone()), IntRanges => mapper.with_dtype(DataType::List(Box::new(DataType::Int64))), - LinearSpace { - num_samples: _, - closed: _, - } => mapper.with_dtype(DataType::Float64), + LinearSpace { closed: _ } => mapper.with_dtype(DataType::Float64), #[cfg(feature = "dtype-date")] DateRange { .. } => mapper.with_dtype(DataType::Date), #[cfg(feature = "dtype-date")] @@ -150,11 +146,8 @@ impl From for SpecialEq> { IntRanges => { map_as_slice!(int_range::int_ranges) }, - LinearSpace { - num_samples, - closed, - } => { - map_as_slice!(linear_space::linear_space, num_samples, closed) + LinearSpace { closed } => { + map_as_slice!(linear_space::linear_space, closed) }, #[cfg(feature = "dtype-date")] DateRange { interval, closed } => { diff --git a/crates/polars-plan/src/dsl/functions/range.rs b/crates/polars-plan/src/dsl/functions/range.rs index 8e8b273c47d..a7ff54100e8 100644 --- a/crates/polars-plan/src/dsl/functions/range.rs +++ b/crates/polars-plan/src/dsl/functions/range.rs @@ -161,15 +161,12 @@ pub fn time_ranges(start: Expr, end: Expr, interval: Duration, closed: ClosedWin } /// Generate a series of equally-spaced points. -pub fn linear_space(start: Expr, end: Expr, num_samples: i64, closed: ClosedInterval) -> Expr { - let input = vec![start, end]; +pub fn linear_space(start: Expr, end: Expr, num_samples: Expr, closed: ClosedInterval) -> Expr { + let input = vec![start, end, num_samples]; Expr::Function { input, - function: FunctionExpr::Range(RangeFunction::LinearSpace { - num_samples, - closed, - }), + function: FunctionExpr::Range(RangeFunction::LinearSpace { closed }), options: FunctionOptions { collect_groups: ApplyOptions::GroupWise, flags: FunctionFlags::default() | FunctionFlags::ALLOW_RENAME, diff --git a/crates/polars-python/src/functions/range.rs b/crates/polars-python/src/functions/range.rs index 57ea87002f3..4f7a1167d47 100644 --- a/crates/polars-python/src/functions/range.rs +++ b/crates/polars-python/src/functions/range.rs @@ -165,11 +165,12 @@ pub fn time_ranges( pub fn linear_space( start: PyExpr, end: PyExpr, - num_samples: i64, + num_samples: PyExpr, closed: Wrap, ) -> PyResult { let start = start.inner; let end = end.inner; + let num_samples = num_samples.inner; let closed = closed.0; Ok(dsl::linear_space(start, end, num_samples, closed).into()) } diff --git a/py-polars/polars/functions/range/linear_space.py b/py-polars/polars/functions/range/linear_space.py index 7cf36e697c9..15d684481ce 100644 --- a/py-polars/polars/functions/range/linear_space.py +++ b/py-polars/polars/functions/range/linear_space.py @@ -21,7 +21,7 @@ def linear_space( start: NumericLiteral | TemporalLiteral | IntoExpr, end: NumericLiteral | TemporalLiteral | IntoExpr, - num_samples: int, + num_samples: int | IntoExpr, *, closed: ClosedInterval = ..., eager: Literal[False] = ..., @@ -32,7 +32,7 @@ def linear_space( def linear_space( start: NumericLiteral | TemporalLiteral | IntoExpr, end: NumericLiteral | TemporalLiteral | IntoExpr, - num_samples: int, + num_samples: int | IntoExpr, *, closed: ClosedInterval = ..., eager: Literal[True], @@ -43,7 +43,7 @@ def linear_space( def linear_space( start: NumericLiteral | TemporalLiteral | IntoExpr, end: NumericLiteral | TemporalLiteral | IntoExpr, - num_samples: int, + num_samples: int | IntoExpr, *, closed: ClosedInterval = ..., eager: bool, @@ -53,14 +53,116 @@ def linear_space( def linear_space( start: NumericLiteral | TemporalLiteral | IntoExpr, end: NumericLiteral | TemporalLiteral | IntoExpr, - num_samples: int, + num_samples: int | IntoExpr, *, closed: ClosedInterval = "both", eager: bool = False, ) -> Expr | Series: - """Linearly-spaced elements.""" + """ + Create sequence of evenly-spaced points. + + Parameters + ---------- + start + Lower bound of the range. + end + Upper bound of the time range. + num_samples + Number of samples in the output sequence. + closed : {'both', 'left', 'right', 'none'} + Define which sides of the interval are closed (inclusive). + eager + Evaluate immediately and return a `Series`. + If set to `False` (default), return an expression instead. + + Notes + ----- + `linear_space` works with numeric and temporal dtypes. When the `start` and `end` + parameters are `Date` dtypes, the output sequence consists of equally-spaced + `Datetime` elements with millisecond precision. + + Returns + ------- + Expr or Series + Column of data type `:class:Time`. + + Examples + -------- + >>> pl.linear_space(start=0, end=1, num_samples=3, eager=True) + shape: (3,) + Series: 'literal' [f64] + [ + 0.0 + 0.5 + 1.0 + ] + >>> pl.linear_space(start=0, end=1, num_samples=3, closed="left", eager=True) + shape: (3,) + Series: 'literal' [f64] + [ + 0.0 + 0.333333 + 0.666667 + ] + >>> pl.linear_space(start=0, end=1, num_samples=3, closed="right", eager=True) + shape: (3,) + Series: 'literal' [f64] + [ + 0.333333 + 0.666667 + 1.0 + ] + >>> from datetime import time + >>> pl.linear_space( + ... start=time(hour=1), end=time(hour=12), num_samples=3, eager=True + ... ) + shape: (3,) + Series: 'literal' [time] + [ + 01:00:00 + 06:30:00 + 12:00:00 + ] + + `Date` endpoints generate a sequence of `Datetime` values: + + >>> from datetime import date + >>> pl.linear_space( + ... start=date(2025, 1, 1), + ... end=date(2025, 2, 1), + ... num_samples=3, + ... closed="right", + ... eager=True, + ... ) + shape: (3,) + Series: 'literal' [datetime[ms]] + [ + 2025-01-11 08:00:00 + 2025-01-21 16:00:00 + 2025-02-01 00:00:00 + ] + + When `eager=False` (default), an expression is produced. You can generate a sequence + using the length of the dataframe: + + >>> df = pl.DataFrame({"a": [1, 2, 3, 4, 5]}) + >>> df.with_columns(pl.linear_space(0, 1, pl.len()).alias("ls")) + shape: (5, 2) + ┌─────┬──────┐ + │ a ┆ ls │ + │ --- ┆ --- │ + │ i64 ┆ f64 │ + ╞═════╪══════╡ + │ 1 ┆ 0.0 │ + │ 2 ┆ 0.25 │ + │ 3 ┆ 0.5 │ + │ 4 ┆ 0.75 │ + │ 5 ┆ 1.0 │ + └─────┴──────┘ + """ start = parse_into_expression(start) end = parse_into_expression(end) + num_samples = parse_into_expression(num_samples) result = wrap_expr(plr.linear_space(start, end, num_samples, closed)) if eager: diff --git a/py-polars/tests/unit/functions/range/test_linear_space.py b/py-polars/tests/unit/functions/range/test_linear_space.py index 129007fdfcc..f87898c7845 100644 --- a/py-polars/tests/unit/functions/range/test_linear_space.py +++ b/py-polars/tests/unit/functions/range/test_linear_space.py @@ -8,7 +8,7 @@ import pytest import polars as pl -from polars.exceptions import ComputeError +from polars.exceptions import ComputeError, ShapeError from polars.testing import assert_frame_equal, assert_series_equal if TYPE_CHECKING: @@ -186,3 +186,29 @@ def test_linear_space_incompatible_temporals( ), ): pl.linear_space(value1, value2, 11, eager=True) + + +def test_linear_space_expr_wrong_length() -> None: + df = pl.DataFrame({"a": [1, 2, 3, 4, 5]}) + with pytest.raises( + ShapeError, + match="unable to add a column of length 6 to a DataFrame of height 5", + ): + df.with_columns(pl.linear_space(0, 1, 6)) + + +def test_linear_space_num_samples_expr() -> None: + df = pl.DataFrame({"a": [1, 2, 3, 4, 5]}) + result = df.with_columns(pl.linear_space(0, 1, pl.len(), closed="left").alias("ls")) + expected = df.with_columns( + pl.Series([0, 0.2, 0.4, 0.6, 0.8], dtype=pl.Float64).alias("ls") + ) + assert_frame_equal(result, expected) + + +def test_linear_space_invalid_num_samples_expr() -> None: + df = pl.DataFrame({"x": [1, 2, 3]}) + with pytest.raises( + ComputeError, match="`num_samples` must contain exactly one value, got 3 values" + ): + df.select(pl.linear_space(0, 1, pl.col("x")))