Skip to content

Commit

Permalink
feat!: Rename struct fields of rle output to len/value and upda…
Browse files Browse the repository at this point in the history
…te data type of `len` field (#15249)
  • Loading branch information
stinodego authored Jun 4, 2024
1 parent 4ef4aa4 commit c5f8117
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 40 deletions.
13 changes: 7 additions & 6 deletions crates/polars-ops/src/series/ops/rle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@ pub fn rle(s: &Series) -> PolarsResult<Series> {
let (s1, s2) = (s.slice(0, s.len() - 1), s.slice(1, s.len()));
let s_neq = s1.not_equal_missing(&s2)?;
let n_runs = s_neq.sum().unwrap() + 1;
let mut lengths = Vec::with_capacity(n_runs as usize);

let mut lengths = Vec::<IdxSize>::with_capacity(n_runs as usize);
lengths.push(1);
let mut vals = Series::new_empty("values", s.dtype());
let mut vals = Series::new_empty("value", s.dtype());
let vals = vals.extend(&s.head(Some(1)))?.extend(&s2.filter(&s_neq)?)?;
let mut idx = 0;
for v in s_neq.into_iter() {
Expand All @@ -19,24 +20,24 @@ pub fn rle(s: &Series) -> PolarsResult<Series> {
}
}

let outvals = vec![Series::from_vec("lengths", lengths), vals.to_owned()];
let outvals = vec![Series::from_vec("len", lengths), vals.to_owned()];
Ok(StructChunked::new("rle", &outvals)?.into_series())
}

/// Similar to `rle`, but maps values to run IDs.
pub fn rle_id(s: &Series) -> PolarsResult<Series> {
if s.len() == 0 {
return Ok(Series::new_empty("id", &DataType::UInt32));
return Ok(Series::new_empty("id", &IDX_DTYPE));
}
let (s1, s2) = (s.slice(0, s.len() - 1), s.slice(1, s.len()));
let s_neq = s1.not_equal_missing(&s2)?;

let mut out = Vec::with_capacity(s.len());
let mut out = Vec::<IdxSize>::with_capacity(s.len());
let mut last = 0;
out.push(last); // Run numbers start at zero
for a in s_neq.downcast_iter() {
for aa in a.values_iter() {
last += aa as u32;
last += aa as IdxSize;
out.push(last);
}
}
Expand Down
6 changes: 3 additions & 3 deletions crates/polars-plan/src/dsl/function_expr/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -283,12 +283,12 @@ impl FunctionExpr {
#[cfg(feature = "rle")]
RLE => mapper.map_dtype(|dt| {
DataType::Struct(vec![
Field::new("lengths", DataType::Int32),
Field::new("values", dt.clone()),
Field::new("len", IDX_DTYPE),
Field::new("value", dt.clone()),
])
}),
#[cfg(feature = "rle")]
RLEID => mapper.with_dtype(DataType::UInt32),
RLEID => mapper.with_dtype(IDX_DTYPE),
ToPhysical => mapper.to_physical_type(),
#[cfg(feature = "random")]
Random { .. } => mapper.with_same_dtype(),
Expand Down
28 changes: 14 additions & 14 deletions py-polars/polars/expr/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -4475,8 +4475,8 @@ def rle(self) -> Self:
Returns
-------
Expr
Expression of data type `Struct` with fields `lengths` of data type `Int32`
and `values` of the original data type.
Expression of data type `Struct` with fields `len` of data type `UInt32`
and `value` of the original data type.
See Also
--------
Expand All @@ -4487,18 +4487,18 @@ def rle(self) -> Self:
>>> df = pl.DataFrame({"a": [1, 1, 2, 1, None, 1, 3, 3]})
>>> df.select(pl.col("a").rle()).unnest("a")
shape: (6, 2)
┌─────────┬────────┐
lengthsvalues
│ --- ┆ ---
i32 ┆ i64
╞═════════╪════════╡
│ 2 ┆ 1
│ 1 ┆ 2
│ 1 ┆ 1
│ 1 ┆ null
│ 1 ┆ 1
│ 2 ┆ 3
└─────────┴────────┘
┌────────────┐
lenvalue
│ --- ┆ --- │
u32 ┆ i64 │
╞════════════╡
│ 2 ┆ 1 │
│ 1 ┆ 2 │
│ 1 ┆ 1 │
│ 1 ┆ null │
│ 1 ┆ 1 │
│ 2 ┆ 3 │
└────────────┘
"""
return self._from_pyexpr(self._pyexpr.rle())

Expand Down
28 changes: 14 additions & 14 deletions py-polars/polars/series/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2545,26 +2545,26 @@ def rle(self) -> Series:
Returns
-------
Series
Series of data type `Struct` with fields `lengths` of data type `Int32`
and `values` of the original data type.
Series of data type `Struct` with fields `len` of data type `UInt32`
and `value` of the original data type.
Examples
--------
>>> s = pl.Series("s", [1, 1, 2, 1, None, 1, 3, 3])
>>> s.rle().struct.unnest()
shape: (6, 2)
┌─────────┬────────┐
lengthsvalues
│ --- ┆ ---
i32 ┆ i64
╞═════════╪════════╡
│ 2 ┆ 1
│ 1 ┆ 2
│ 1 ┆ 1
│ 1 ┆ null
│ 1 ┆ 1
│ 2 ┆ 3
└─────────┴────────┘
┌────────────┐
lenvalue
│ --- ┆ --- │
u32 ┆ i64 │
╞════════════╡
│ 2 ┆ 1 │
│ 1 ┆ 2 │
│ 1 ┆ 1 │
│ 1 ┆ null │
│ 1 ┆ 1 │
│ 2 ┆ 3 │
└────────────┘
"""

def rle_id(self) -> Series:
Expand Down
8 changes: 5 additions & 3 deletions py-polars/tests/unit/operations/test_rle.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ def test_rle() -> None:
lf = pl.LazyFrame({"a": values})

expected = pl.LazyFrame(
{"lengths": [2, 1, 1, 1, 1, 2], "values": [1, 2, 1, None, 1, 3]},
schema_overrides={"lengths": pl.Int32},
{"len": [2, 1, 1, 1, 1, 2], "value": [1, 2, 1, None, 1, 3]},
schema_overrides={"len": pl.get_index_type()},
)

result_expr = lf.select(pl.col("a").rle()).unnest("a")
Expand All @@ -22,7 +22,9 @@ def test_rle_id() -> None:
values = [1, 1, 2, 1, None, 1, 3, 3]
lf = pl.LazyFrame({"a": values})

expected = pl.LazyFrame({"a": [0, 0, 1, 2, 3, 4, 5, 5]}, schema={"a": pl.UInt32})
expected = pl.LazyFrame(
{"a": [0, 0, 1, 2, 3, 4, 5, 5]}, schema={"a": pl.get_index_type()}
)

result_expr = lf.select(pl.col("a").rle_id())
assert_frame_equal(result_expr, expected)
Expand Down

0 comments on commit c5f8117

Please sign in to comment.