Make 'num_samples' expr and add docstring

pola-rs · Jan 15, 2025 · 6627366 · 6627366
1 parent 97bbae8
commit 6627366
Show file tree

Hide file tree

Showing 6 changed files with 159 additions and 34 deletions.
diff --git a/crates/polars-plan/src/dsl/function_expr/range/linear_space.rs b/crates/polars-plan/src/dsl/function_expr/range/linear_space.rs
@@ -1,28 +1,34 @@
 use arrow::temporal_conversions::MILLISECONDS_IN_DAY;
 use polars_core::prelude::*;
-// use polars_core::with_match_physical_integer_polars_type;
 use polars_ops::series::{new_linear_space_f32, new_linear_space_f64, ClosedInterval};
 
 use super::utils::ensure_range_bounds_contain_exactly_one_value;
 
-pub(super) fn linear_space(
-    s: &[Column],
-    num_samples: i64,
-    closed: ClosedInterval,
-) -> PolarsResult<Column> {
+pub(super) fn linear_space(s: &[Column], closed: ClosedInterval) -> PolarsResult<Column> {
     let start = &s[0];
     let end = &s[1];
+    let num_samples = &s[2];
     let name = start.name();
 
     ensure_range_bounds_contain_exactly_one_value(start, end)?;
-    let num_samples = u64::try_from(num_samples).map_err(|v| {
-        PolarsError::ComputeError(
-            format!("'num_samples' must be nonnegative integer, got {}", v).into(),
-        )
-    })?;
+    polars_ensure!(
+        num_samples.len() == 1,
+        ComputeError: "`num_samples` must contain exactly one value, got {} values", num_samples.len()
+    );
 
     let start = start.get(0).unwrap();
     let end = end.get(0).unwrap();
+    let num_samples = num_samples.get(0).unwrap();
+    let num_samples = num_samples
+        .extract::<u64>()
+        .ok_or(PolarsError::ComputeError(
+            format!(
+                "'num_samples' must be non-negative integer, got {}",
+                num_samples
+            )
+            .into(),
+        ))?;
+
     match (start.dtype(), end.dtype()) {
         (DataType::Float32, DataType::Float32) => new_linear_space_f32(
             start.extract::<f32>().unwrap(),

diff --git a/crates/polars-plan/src/dsl/function_expr/range/mod.rs b/crates/polars-plan/src/dsl/function_expr/range/mod.rs
@@ -31,7 +31,6 @@ pub enum RangeFunction {
     },
     IntRanges,
     LinearSpace {
-        num_samples: i64,
         closed: ClosedInterval,
     },
     #[cfg(feature = "dtype-date")]
@@ -76,10 +75,7 @@ impl RangeFunction {
         match self {
             IntRange { dtype, .. } => mapper.with_dtype(dtype.clone()),
             IntRanges => mapper.with_dtype(DataType::List(Box::new(DataType::Int64))),
-            LinearSpace {
-                num_samples: _,
-                closed: _,
-            } => mapper.with_dtype(DataType::Float64),
+            LinearSpace { closed: _ } => mapper.with_dtype(DataType::Float64),
             #[cfg(feature = "dtype-date")]
             DateRange { .. } => mapper.with_dtype(DataType::Date),
             #[cfg(feature = "dtype-date")]
@@ -150,11 +146,8 @@ impl From<RangeFunction> for SpecialEq<Arc<dyn ColumnsUdf>> {
             IntRanges => {
                 map_as_slice!(int_range::int_ranges)
             },
-            LinearSpace {
-                num_samples,
-                closed,
-            } => {
-                map_as_slice!(linear_space::linear_space, num_samples, closed)
+            LinearSpace { closed } => {
+                map_as_slice!(linear_space::linear_space, closed)
             },
             #[cfg(feature = "dtype-date")]
             DateRange { interval, closed } => {

diff --git a/crates/polars-plan/src/dsl/functions/range.rs b/crates/polars-plan/src/dsl/functions/range.rs
@@ -161,15 +161,12 @@ pub fn time_ranges(start: Expr, end: Expr, interval: Duration, closed: ClosedWin
 }
 
 /// Generate a series of equally-spaced points.
-pub fn linear_space(start: Expr, end: Expr, num_samples: i64, closed: ClosedInterval) -> Expr {
-    let input = vec![start, end];
+pub fn linear_space(start: Expr, end: Expr, num_samples: Expr, closed: ClosedInterval) -> Expr {
+    let input = vec![start, end, num_samples];
 
     Expr::Function {
         input,
-        function: FunctionExpr::Range(RangeFunction::LinearSpace {
-            num_samples,
-            closed,
-        }),
+        function: FunctionExpr::Range(RangeFunction::LinearSpace { closed }),
         options: FunctionOptions {
             collect_groups: ApplyOptions::GroupWise,
             flags: FunctionFlags::default() | FunctionFlags::ALLOW_RENAME,

diff --git a/crates/polars-python/src/functions/range.rs b/crates/polars-python/src/functions/range.rs
@@ -165,11 +165,12 @@ pub fn time_ranges(
 pub fn linear_space(
     start: PyExpr,
     end: PyExpr,
-    num_samples: i64,
+    num_samples: PyExpr,
     closed: Wrap<ClosedInterval>,
 ) -> PyResult<PyExpr> {
     let start = start.inner;
     let end = end.inner;
+    let num_samples = num_samples.inner;
     let closed = closed.0;
     Ok(dsl::linear_space(start, end, num_samples, closed).into())
 }
diff --git a/py-polars/polars/functions/range/linear_space.py b/py-polars/polars/functions/range/linear_space.py
@@ -21,7 +21,7 @@
 def linear_space(
     start: NumericLiteral | TemporalLiteral | IntoExpr,
     end: NumericLiteral | TemporalLiteral | IntoExpr,
-    num_samples: int,
+    num_samples: int | IntoExpr,
     *,
     closed: ClosedInterval = ...,
     eager: Literal[False] = ...,
@@ -32,7 +32,7 @@ def linear_space(
 def linear_space(
     start: NumericLiteral | TemporalLiteral | IntoExpr,
     end: NumericLiteral | TemporalLiteral | IntoExpr,
-    num_samples: int,
+    num_samples: int | IntoExpr,
     *,
     closed: ClosedInterval = ...,
     eager: Literal[True],
@@ -43,7 +43,7 @@ def linear_space(
 def linear_space(
     start: NumericLiteral | TemporalLiteral | IntoExpr,
     end: NumericLiteral | TemporalLiteral | IntoExpr,
-    num_samples: int,
+    num_samples: int | IntoExpr,
     *,
     closed: ClosedInterval = ...,
     eager: bool,
@@ -53,14 +53,116 @@ def linear_space(
 def linear_space(
     start: NumericLiteral | TemporalLiteral | IntoExpr,
     end: NumericLiteral | TemporalLiteral | IntoExpr,
-    num_samples: int,
+    num_samples: int | IntoExpr,
     *,
     closed: ClosedInterval = "both",
     eager: bool = False,
 ) -> Expr | Series:
-    """Linearly-spaced elements."""
+    """
+    Create sequence of evenly-spaced points.
+
+    Parameters
+    ----------
+    start
+        Lower bound of the range.
+    end
+        Upper bound of the time range.
+    num_samples
+        Number of samples in the output sequence.
+    closed : {'both', 'left', 'right', 'none'}
+        Define which sides of the interval are closed (inclusive).
+    eager
+        Evaluate immediately and return a `Series`.
+        If set to `False` (default), return an expression instead.
+
+    Notes
+    -----
+    `linear_space` works with numeric and temporal dtypes. When the `start` and `end`
+    parameters are `Date` dtypes, the output sequence consists of equally-spaced
+    `Datetime` elements with millisecond precision.
+
+    Returns
+    -------
+    Expr or Series
+        Column of data type `:class:Time`.
+
+    Examples
+    --------
+    >>> pl.linear_space(start=0, end=1, num_samples=3, eager=True)
+    shape: (3,)
+    Series: 'literal' [f64]
+    [
+            0.0
+            0.5
+            1.0
+    ]
+    >>> pl.linear_space(start=0, end=1, num_samples=3, closed="left", eager=True)
+    shape: (3,)
+    Series: 'literal' [f64]
+    [
+            0.0
+            0.333333
+            0.666667
+    ]
+    >>> pl.linear_space(start=0, end=1, num_samples=3, closed="right", eager=True)
+    shape: (3,)
+    Series: 'literal' [f64]
+    [
+            0.333333
+            0.666667
+            1.0
+    ]
+    >>> from datetime import time
+    >>> pl.linear_space(
+    ...     start=time(hour=1), end=time(hour=12), num_samples=3, eager=True
+    ... )
+    shape: (3,)
+    Series: 'literal' [time]
+    [
+            01:00:00
+            06:30:00
+            12:00:00
+    ]
+
+    `Date` endpoints generate a sequence of `Datetime` values:
+
+    >>> from datetime import date
+    >>> pl.linear_space(
+    ...     start=date(2025, 1, 1),
+    ...     end=date(2025, 2, 1),
+    ...     num_samples=3,
+    ...     closed="right",
+    ...     eager=True,
+    ... )
+    shape: (3,)
+    Series: 'literal' [datetime[ms]]
+    [
+            2025-01-11 08:00:00
+            2025-01-21 16:00:00
+            2025-02-01 00:00:00
+    ]
+
+    When `eager=False` (default), an expression is produced. You can generate a sequence
+    using the length of the dataframe:
+
+    >>> df = pl.DataFrame({"a": [1, 2, 3, 4, 5]})
+    >>> df.with_columns(pl.linear_space(0, 1, pl.len()).alias("ls"))
+    shape: (5, 2)
+    ┌─────┬──────┐
+    │ a   ┆ ls   │
+    │ --- ┆ ---  │
+    │ i64 ┆ f64  │
+    ╞═════╪══════╡
+    │ 1   ┆ 0.0  │
+    │ 2   ┆ 0.25 │
+    │ 3   ┆ 0.5  │
+    │ 4   ┆ 0.75 │
+    │ 5   ┆ 1.0  │
+    └─────┴──────┘
+    """
     start = parse_into_expression(start)
     end = parse_into_expression(end)
+    num_samples = parse_into_expression(num_samples)
     result = wrap_expr(plr.linear_space(start, end, num_samples, closed))
 
     if eager:

diff --git a/py-polars/tests/unit/functions/range/test_linear_space.py b/py-polars/tests/unit/functions/range/test_linear_space.py
@@ -8,7 +8,7 @@
 import pytest
 
 import polars as pl
-from polars.exceptions import ComputeError
+from polars.exceptions import ComputeError, ShapeError
 from polars.testing import assert_frame_equal, assert_series_equal
 
 if TYPE_CHECKING:
@@ -186,3 +186,29 @@ def test_linear_space_incompatible_temporals(
         ),
     ):
         pl.linear_space(value1, value2, 11, eager=True)
+
+
+def test_linear_space_expr_wrong_length() -> None:
+    df = pl.DataFrame({"a": [1, 2, 3, 4, 5]})
+    with pytest.raises(
+        ShapeError,
+        match="unable to add a column of length 6 to a DataFrame of height 5",
+    ):
+        df.with_columns(pl.linear_space(0, 1, 6))
+
+
+def test_linear_space_num_samples_expr() -> None:
+    df = pl.DataFrame({"a": [1, 2, 3, 4, 5]})
+    result = df.with_columns(pl.linear_space(0, 1, pl.len(), closed="left").alias("ls"))
+    expected = df.with_columns(
+        pl.Series([0, 0.2, 0.4, 0.6, 0.8], dtype=pl.Float64).alias("ls")
+    )
+    assert_frame_equal(result, expected)
+
+
+def test_linear_space_invalid_num_samples_expr() -> None:
+    df = pl.DataFrame({"x": [1, 2, 3]})
+    with pytest.raises(
+        ComputeError, match="`num_samples` must contain exactly one value, got 3 values"
+    ):
+        df.select(pl.linear_space(0, 1, pl.col("x")))