Skip to content

Commit

Permalink
Make 'num_samples' expr and add docstring
Browse files Browse the repository at this point in the history
  • Loading branch information
mcrumiller committed Jan 15, 2025
1 parent 97bbae8 commit 6627366
Show file tree
Hide file tree
Showing 6 changed files with 159 additions and 34 deletions.
28 changes: 17 additions & 11 deletions crates/polars-plan/src/dsl/function_expr/range/linear_space.rs
Original file line number Diff line number Diff line change
@@ -1,28 +1,34 @@
use arrow::temporal_conversions::MILLISECONDS_IN_DAY;
use polars_core::prelude::*;
// use polars_core::with_match_physical_integer_polars_type;
use polars_ops::series::{new_linear_space_f32, new_linear_space_f64, ClosedInterval};

use super::utils::ensure_range_bounds_contain_exactly_one_value;

pub(super) fn linear_space(
s: &[Column],
num_samples: i64,
closed: ClosedInterval,
) -> PolarsResult<Column> {
pub(super) fn linear_space(s: &[Column], closed: ClosedInterval) -> PolarsResult<Column> {
let start = &s[0];
let end = &s[1];
let num_samples = &s[2];
let name = start.name();

ensure_range_bounds_contain_exactly_one_value(start, end)?;
let num_samples = u64::try_from(num_samples).map_err(|v| {
PolarsError::ComputeError(
format!("'num_samples' must be nonnegative integer, got {}", v).into(),
)
})?;
polars_ensure!(
num_samples.len() == 1,
ComputeError: "`num_samples` must contain exactly one value, got {} values", num_samples.len()
);

let start = start.get(0).unwrap();
let end = end.get(0).unwrap();
let num_samples = num_samples.get(0).unwrap();
let num_samples = num_samples
.extract::<u64>()
.ok_or(PolarsError::ComputeError(
format!(
"'num_samples' must be non-negative integer, got {}",
num_samples
)
.into(),
))?;

match (start.dtype(), end.dtype()) {
(DataType::Float32, DataType::Float32) => new_linear_space_f32(
start.extract::<f32>().unwrap(),
Expand Down
13 changes: 3 additions & 10 deletions crates/polars-plan/src/dsl/function_expr/range/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ pub enum RangeFunction {
},
IntRanges,
LinearSpace {
num_samples: i64,
closed: ClosedInterval,
},
#[cfg(feature = "dtype-date")]
Expand Down Expand Up @@ -76,10 +75,7 @@ impl RangeFunction {
match self {
IntRange { dtype, .. } => mapper.with_dtype(dtype.clone()),
IntRanges => mapper.with_dtype(DataType::List(Box::new(DataType::Int64))),
LinearSpace {
num_samples: _,
closed: _,
} => mapper.with_dtype(DataType::Float64),
LinearSpace { closed: _ } => mapper.with_dtype(DataType::Float64),
#[cfg(feature = "dtype-date")]
DateRange { .. } => mapper.with_dtype(DataType::Date),
#[cfg(feature = "dtype-date")]
Expand Down Expand Up @@ -150,11 +146,8 @@ impl From<RangeFunction> for SpecialEq<Arc<dyn ColumnsUdf>> {
IntRanges => {
map_as_slice!(int_range::int_ranges)
},
LinearSpace {
num_samples,
closed,
} => {
map_as_slice!(linear_space::linear_space, num_samples, closed)
LinearSpace { closed } => {
map_as_slice!(linear_space::linear_space, closed)
},
#[cfg(feature = "dtype-date")]
DateRange { interval, closed } => {
Expand Down
9 changes: 3 additions & 6 deletions crates/polars-plan/src/dsl/functions/range.rs
Original file line number Diff line number Diff line change
Expand Up @@ -161,15 +161,12 @@ pub fn time_ranges(start: Expr, end: Expr, interval: Duration, closed: ClosedWin
}

/// Generate a series of equally-spaced points.
pub fn linear_space(start: Expr, end: Expr, num_samples: i64, closed: ClosedInterval) -> Expr {
let input = vec![start, end];
pub fn linear_space(start: Expr, end: Expr, num_samples: Expr, closed: ClosedInterval) -> Expr {
let input = vec![start, end, num_samples];

Expr::Function {
input,
function: FunctionExpr::Range(RangeFunction::LinearSpace {
num_samples,
closed,
}),
function: FunctionExpr::Range(RangeFunction::LinearSpace { closed }),
options: FunctionOptions {
collect_groups: ApplyOptions::GroupWise,
flags: FunctionFlags::default() | FunctionFlags::ALLOW_RENAME,
Expand Down
3 changes: 2 additions & 1 deletion crates/polars-python/src/functions/range.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,11 +165,12 @@ pub fn time_ranges(
pub fn linear_space(
start: PyExpr,
end: PyExpr,
num_samples: i64,
num_samples: PyExpr,
closed: Wrap<ClosedInterval>,
) -> PyResult<PyExpr> {
let start = start.inner;
let end = end.inner;
let num_samples = num_samples.inner;
let closed = closed.0;
Ok(dsl::linear_space(start, end, num_samples, closed).into())
}
112 changes: 107 additions & 5 deletions py-polars/polars/functions/range/linear_space.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
def linear_space(
start: NumericLiteral | TemporalLiteral | IntoExpr,
end: NumericLiteral | TemporalLiteral | IntoExpr,
num_samples: int,
num_samples: int | IntoExpr,
*,
closed: ClosedInterval = ...,
eager: Literal[False] = ...,
Expand All @@ -32,7 +32,7 @@ def linear_space(
def linear_space(
start: NumericLiteral | TemporalLiteral | IntoExpr,
end: NumericLiteral | TemporalLiteral | IntoExpr,
num_samples: int,
num_samples: int | IntoExpr,
*,
closed: ClosedInterval = ...,
eager: Literal[True],
Expand All @@ -43,7 +43,7 @@ def linear_space(
def linear_space(
start: NumericLiteral | TemporalLiteral | IntoExpr,
end: NumericLiteral | TemporalLiteral | IntoExpr,
num_samples: int,
num_samples: int | IntoExpr,
*,
closed: ClosedInterval = ...,
eager: bool,
Expand All @@ -53,14 +53,116 @@ def linear_space(
def linear_space(
start: NumericLiteral | TemporalLiteral | IntoExpr,
end: NumericLiteral | TemporalLiteral | IntoExpr,
num_samples: int,
num_samples: int | IntoExpr,
*,
closed: ClosedInterval = "both",
eager: bool = False,
) -> Expr | Series:
"""Linearly-spaced elements."""
"""
Create sequence of evenly-spaced points.
Parameters
----------
start
Lower bound of the range.
end
Upper bound of the time range.
num_samples
Number of samples in the output sequence.
closed : {'both', 'left', 'right', 'none'}
Define which sides of the interval are closed (inclusive).
eager
Evaluate immediately and return a `Series`.
If set to `False` (default), return an expression instead.
Notes
-----
`linear_space` works with numeric and temporal dtypes. When the `start` and `end`
parameters are `Date` dtypes, the output sequence consists of equally-spaced
`Datetime` elements with millisecond precision.
Returns
-------
Expr or Series
Column of data type `:class:Time`.
Examples
--------
>>> pl.linear_space(start=0, end=1, num_samples=3, eager=True)
shape: (3,)
Series: 'literal' [f64]
[
0.0
0.5
1.0
]
>>> pl.linear_space(start=0, end=1, num_samples=3, closed="left", eager=True)
shape: (3,)
Series: 'literal' [f64]
[
0.0
0.333333
0.666667
]
>>> pl.linear_space(start=0, end=1, num_samples=3, closed="right", eager=True)
shape: (3,)
Series: 'literal' [f64]
[
0.333333
0.666667
1.0
]
>>> from datetime import time
>>> pl.linear_space(
... start=time(hour=1), end=time(hour=12), num_samples=3, eager=True
... )
shape: (3,)
Series: 'literal' [time]
[
01:00:00
06:30:00
12:00:00
]
`Date` endpoints generate a sequence of `Datetime` values:
>>> from datetime import date
>>> pl.linear_space(
... start=date(2025, 1, 1),
... end=date(2025, 2, 1),
... num_samples=3,
... closed="right",
... eager=True,
... )
shape: (3,)
Series: 'literal' [datetime[ms]]
[
2025-01-11 08:00:00
2025-01-21 16:00:00
2025-02-01 00:00:00
]
When `eager=False` (default), an expression is produced. You can generate a sequence
using the length of the dataframe:
>>> df = pl.DataFrame({"a": [1, 2, 3, 4, 5]})
>>> df.with_columns(pl.linear_space(0, 1, pl.len()).alias("ls"))
shape: (5, 2)
┌─────┬──────┐
│ a ┆ ls │
│ --- ┆ --- │
│ i64 ┆ f64 │
╞═════╪══════╡
│ 1 ┆ 0.0 │
│ 2 ┆ 0.25 │
│ 3 ┆ 0.5 │
│ 4 ┆ 0.75 │
│ 5 ┆ 1.0 │
└─────┴──────┘
"""
start = parse_into_expression(start)
end = parse_into_expression(end)
num_samples = parse_into_expression(num_samples)
result = wrap_expr(plr.linear_space(start, end, num_samples, closed))

if eager:
Expand Down
28 changes: 27 additions & 1 deletion py-polars/tests/unit/functions/range/test_linear_space.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import pytest

import polars as pl
from polars.exceptions import ComputeError
from polars.exceptions import ComputeError, ShapeError
from polars.testing import assert_frame_equal, assert_series_equal

if TYPE_CHECKING:
Expand Down Expand Up @@ -186,3 +186,29 @@ def test_linear_space_incompatible_temporals(
),
):
pl.linear_space(value1, value2, 11, eager=True)


def test_linear_space_expr_wrong_length() -> None:
df = pl.DataFrame({"a": [1, 2, 3, 4, 5]})
with pytest.raises(
ShapeError,
match="unable to add a column of length 6 to a DataFrame of height 5",
):
df.with_columns(pl.linear_space(0, 1, 6))


def test_linear_space_num_samples_expr() -> None:
df = pl.DataFrame({"a": [1, 2, 3, 4, 5]})
result = df.with_columns(pl.linear_space(0, 1, pl.len(), closed="left").alias("ls"))
expected = df.with_columns(
pl.Series([0, 0.2, 0.4, 0.6, 0.8], dtype=pl.Float64).alias("ls")
)
assert_frame_equal(result, expected)


def test_linear_space_invalid_num_samples_expr() -> None:
df = pl.DataFrame({"x": [1, 2, 3]})
with pytest.raises(
ComputeError, match="`num_samples` must contain exactly one value, got 3 values"
):
df.select(pl.linear_space(0, 1, pl.col("x")))

0 comments on commit 6627366

Please sign in to comment.