Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add linear_space #20678

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 1 addition & 13 deletions crates/polars-ops/src/series/ops/is_between.rs
Original file line number Diff line number Diff line change
@@ -1,20 +1,8 @@
use std::ops::BitAnd;

use polars_core::prelude::*;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use strum_macros::IntoStaticStr;

#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq, Default, IntoStaticStr)]
Copy link
Contributor Author

@mcrumiller mcrumiller Jan 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I moved ClosedInterval into linear_space since it's feature-gated here and I wanted to re-use it. I'm not sure if there's a better place for it.

#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[strum(serialize_all = "snake_case")]
pub enum ClosedInterval {
#[default]
Both,
Left,
Right,
None,
}
use crate::series::ClosedInterval;

pub fn is_between(
s: &Series,
Expand Down
104 changes: 104 additions & 0 deletions crates/polars-ops/src/series/ops/linear_space.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
use polars_core::prelude::*;
use polars_core::series::IsSorted;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use strum_macros::IntoStaticStr;

#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq, Default, IntoStaticStr)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[strum(serialize_all = "snake_case")]
pub enum ClosedInterval {
#[default]
Both,
Left,
Right,
None,
}

pub fn new_linear_space_f32(
start: f32,
end: f32,
n: u64,
closed: ClosedInterval,
name: PlSmallStr,
) -> PolarsResult<Series> {
let mut ca = match n {
0 => Float32Chunked::full_null(name, 0),
1 => match closed {
ClosedInterval::None => Float32Chunked::from_slice(name, &[(end + start) * 0.5]),
ClosedInterval::Left | ClosedInterval::Both => {
Float32Chunked::from_slice(name, &[start])
},
ClosedInterval::Right => Float32Chunked::from_slice(name, &[end]),
},
_ => Float32Chunked::from_iter_values(name, {
let span = end - start;

let (start, d, end) = match closed {
ClosedInterval::None => {
let d = span / (n + 1) as f32;
(start + d, d, end - d)
},
ClosedInterval::Left => (start, span / n as f32, end - span / n as f32),
ClosedInterval::Right => (start + span / n as f32, span / n as f32, end),
ClosedInterval::Both => (start, span / (n - 1) as f32, end),
};
(0..n - 1)
.map(move |v| (v as f32 * d) + start)
.chain(std::iter::once(end)) // ensures floating point accuracy of final value
}),
};

let is_sorted = if end < start {
IsSorted::Descending
} else {
IsSorted::Ascending
};
ca.set_sorted_flag(is_sorted);

Ok(ca.into_series())
}

pub fn new_linear_space_f64(
start: f64,
end: f64,
n: u64,
closed: ClosedInterval,
name: PlSmallStr,
) -> PolarsResult<Series> {
let mut ca = match n {
0 => Float64Chunked::full_null(name, 0),
1 => match closed {
ClosedInterval::None => Float64Chunked::from_slice(name, &[(end + start) * 0.5]),
ClosedInterval::Left | ClosedInterval::Both => {
Float64Chunked::from_slice(name, &[start])
},
ClosedInterval::Right => Float64Chunked::from_slice(name, &[end]),
},
_ => Float64Chunked::from_iter_values(name, {
let span = end - start;

let (start, d, end) = match closed {
ClosedInterval::None => {
let d = span / (n + 1) as f64;
(start + d, d, end - d)
},
ClosedInterval::Left => (start, span / n as f64, end - span / n as f64),
ClosedInterval::Right => (start + span / n as f64, span / n as f64, end),
ClosedInterval::Both => (start, span / (n - 1) as f64, end),
};
(0..n - 1)
.map(move |v| (v as f64 * d) + start)
.chain(std::iter::once(end)) // ensures floating point accuracy of final value
}),
};

let is_sorted = if end < start {
IsSorted::Descending
} else {
IsSorted::Ascending
};
ca.set_sorted_flag(is_sorted);

Ok(ca.into_series())
}
2 changes: 2 additions & 0 deletions crates/polars-ops/src/series/ops/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ mod is_in;
mod is_last_distinct;
#[cfg(feature = "is_unique")]
mod is_unique;
mod linear_space;
#[cfg(feature = "log")]
mod log;
#[cfg(feature = "moment")]
Expand Down Expand Up @@ -105,6 +106,7 @@ pub use is_in::*;
pub use is_last_distinct::*;
#[cfg(feature = "is_unique")]
pub use is_unique::*;
pub use linear_space::*;
#[cfg(feature = "log")]
pub use log::*;
#[cfg(feature = "moment")]
Expand Down
71 changes: 71 additions & 0 deletions crates/polars-plan/src/dsl/function_expr/range/linear_space.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
use arrow::temporal_conversions::MILLISECONDS_IN_DAY;
use polars_core::prelude::*;
use polars_ops::series::{new_linear_space_f32, new_linear_space_f64, ClosedInterval};

use super::utils::ensure_range_bounds_contain_exactly_one_value;

pub(super) fn linear_space(s: &[Column], closed: ClosedInterval) -> PolarsResult<Column> {
let start = &s[0];
let end = &s[1];
let num_samples = &s[2];
let name = start.name();

ensure_range_bounds_contain_exactly_one_value(start, end)?;
polars_ensure!(
num_samples.len() == 1,
ComputeError: "`num_samples` must contain exactly one value, got {} values", num_samples.len()
);

let start = start.get(0).unwrap();
let end = end.get(0).unwrap();
let num_samples = num_samples.get(0).unwrap();
let num_samples = num_samples
.extract::<u64>()
.ok_or(PolarsError::ComputeError(
format!(
"'num_samples' must be non-negative integer, got {}",
num_samples
)
.into(),
))?;

match (start.dtype(), end.dtype()) {
(DataType::Float32, DataType::Float32) => new_linear_space_f32(
start.extract::<f32>().unwrap(),
end.extract::<f32>().unwrap(),
num_samples,
closed,
name.clone(),
),
(mut dt, dt2) if dt.is_temporal() && dt == dt2 => {
let mut start = start.extract::<i64>().unwrap();
let mut end = end.extract::<i64>().unwrap();

// A linear space of a Date produces a sequence of Datetimes, so we must upcast.
if dt == DataType::Date {
start *= MILLISECONDS_IN_DAY;
end *= MILLISECONDS_IN_DAY;
dt = DataType::Datetime(TimeUnit::Milliseconds, None);
}
new_linear_space_f64(start as f64, end as f64, num_samples, closed, name.clone())
.map(|s| s.cast(&dt).unwrap())
},
(dt1, dt2) if !dt1.is_primitive_numeric() || !dt2.is_primitive_numeric() => {
Err(PolarsError::ComputeError(
format!(
"'start' and 'end' have incompatible dtypes, got {:?} and {:?}",
dt1, dt2
)
.into(),
))
},
(_, _) => new_linear_space_f64(
start.extract::<f64>().unwrap(),
end.extract::<f64>().unwrap(),
num_samples,
closed,
name.clone(),
),
}
.map(Column::from)
}
10 changes: 10 additions & 0 deletions crates/polars-plan/src/dsl/function_expr/range/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@ mod date_range;
#[cfg(feature = "dtype-datetime")]
mod datetime_range;
mod int_range;
mod linear_space;
#[cfg(feature = "dtype-time")]
mod time_range;
mod utils;

use std::fmt::{Display, Formatter};

use polars_core::prelude::*;
use polars_ops::series::ClosedInterval;
#[cfg(feature = "temporal")]
use polars_time::{ClosedWindow, Duration};
#[cfg(feature = "serde")]
Expand All @@ -28,6 +30,9 @@ pub enum RangeFunction {
dtype: DataType,
},
IntRanges,
LinearSpace {
closed: ClosedInterval,
},
#[cfg(feature = "dtype-date")]
DateRange {
interval: Duration,
Expand Down Expand Up @@ -70,6 +75,7 @@ impl RangeFunction {
match self {
IntRange { dtype, .. } => mapper.with_dtype(dtype.clone()),
IntRanges => mapper.with_dtype(DataType::List(Box::new(DataType::Int64))),
LinearSpace { closed: _ } => mapper.with_dtype(DataType::Float64),
#[cfg(feature = "dtype-date")]
DateRange { .. } => mapper.with_dtype(DataType::Date),
#[cfg(feature = "dtype-date")]
Expand Down Expand Up @@ -112,6 +118,7 @@ impl Display for RangeFunction {
let s = match self {
IntRange { .. } => "int_range",
IntRanges => "int_ranges",
LinearSpace { .. } => "linear_space",
#[cfg(feature = "dtype-date")]
DateRange { .. } => "date_range",
#[cfg(feature = "temporal")]
Expand Down Expand Up @@ -139,6 +146,9 @@ impl From<RangeFunction> for SpecialEq<Arc<dyn ColumnsUdf>> {
IntRanges => {
map_as_slice!(int_range::int_ranges)
},
LinearSpace { closed } => {
map_as_slice!(linear_space::linear_space, closed)
},
#[cfg(feature = "dtype-date")]
DateRange { interval, closed } => {
map_as_slice!(date_range::date_range, interval, closed)
Expand Down
18 changes: 18 additions & 0 deletions crates/polars-plan/src/dsl/functions/range.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
use polars_ops::series::ClosedInterval;
use polars_time::ClosedWindow;

use super::*;

/// Generate a range of integers.
Expand Down Expand Up @@ -156,3 +159,18 @@ pub fn time_ranges(start: Expr, end: Expr, interval: Duration, closed: ClosedWin
},
}
}

/// Generate a series of equally-spaced points.
pub fn linear_space(start: Expr, end: Expr, num_samples: Expr, closed: ClosedInterval) -> Expr {
let input = vec![start, end, num_samples];

Expr::Function {
input,
function: FunctionExpr::Range(RangeFunction::LinearSpace { closed }),
options: FunctionOptions {
collect_groups: ApplyOptions::GroupWise,
flags: FunctionFlags::default() | FunctionFlags::ALLOW_RENAME,
..Default::default()
},
}
}
15 changes: 15 additions & 0 deletions crates/polars-python/src/functions/range.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use polars::lazy::dsl;
use polars_core::with_match_physical_integer_polars_type;
use polars_ops::series::ClosedInterval;
use pyo3::prelude::*;

use crate::error::PyPolarsErr;
Expand Down Expand Up @@ -159,3 +160,17 @@ pub fn time_ranges(
let closed = closed.0;
Ok(dsl::time_ranges(start, end, every, closed).into())
}

#[pyfunction]
pub fn linear_space(
start: PyExpr,
end: PyExpr,
num_samples: PyExpr,
closed: Wrap<ClosedInterval>,
) -> PyResult<PyExpr> {
let start = start.inner;
let end = end.inner;
let num_samples = num_samples.inner;
let closed = closed.0;
Ok(dsl::linear_space(start, end, num_samples, closed).into())
}
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ These functions are available from the Polars module root and can be used as exp
int_ranges
last
len
linear_space
lit
map_batches
map_groups
Expand Down
2 changes: 2 additions & 0 deletions py-polars/polars/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@
int_ranges,
last,
len,
linear_space,
lit,
map_batches,
map_groups,
Expand Down Expand Up @@ -358,6 +359,7 @@
"int_range",
"int_ranges",
"last",
"linear_space",
"lit",
"map_batches",
"map_groups",
Expand Down
2 changes: 2 additions & 0 deletions py-polars/polars/functions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@
datetime_ranges,
int_range,
int_ranges,
linear_space,
time_range,
time_ranges,
)
Expand Down Expand Up @@ -149,6 +150,7 @@
"int_range",
"int_ranges",
"last",
"linear_space",
"lit",
"map_batches",
"map_groups",
Expand Down
2 changes: 2 additions & 0 deletions py-polars/polars/functions/range/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from polars.functions.range.date_range import date_range, date_ranges
from polars.functions.range.datetime_range import datetime_range, datetime_ranges
from polars.functions.range.int_range import arange, int_range, int_ranges
from polars.functions.range.linear_space import linear_space
from polars.functions.range.time_range import time_range, time_ranges

__all__ = [
Expand All @@ -11,6 +12,7 @@
"datetime_ranges",
"int_range",
"int_ranges",
"linear_space",
"time_range",
"time_ranges",
]
Loading
Loading