Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding replace expression #223

Merged
merged 4 commits into from
Jun 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ features = [
"timezones",
"peaks",
"string_pad",
"replace",
"cov",
"http",
"cloud",
Expand Down
33 changes: 31 additions & 2 deletions __tests__/expr.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1135,7 +1135,7 @@ describe("expr.str", () => {
expect(actual).toFrameEqual(expected);
expect(seriesActual).toFrameEqual(expected);
});
test("replace", () => {
test("str.replace", () => {
const df = pl.DataFrame({
os: ["kali-linux", "debian-linux", "ubuntu-linux", "mac-sierra"],
});
Expand All @@ -1151,7 +1151,7 @@ describe("expr.str", () => {
expect(actual).toFrameEqual(expected);
expect(seriesActual).toFrameEqual(expected);
});
test("replaceAll", () => {
test("str.replaceAll", () => {
const df = pl.DataFrame({
os: [
"kali-linux-2021.3a",
Expand All @@ -1177,6 +1177,35 @@ describe("expr.str", () => {
expect(actual).toFrameEqual(expected);
expect(seriesActual).toFrameEqual(expected);
});
test("expr.replace", () => {
const df = pl.DataFrame({ a: [1, 2, 2, 3] });
let actual = df.withColumns(pl.col("a").replace(2, 100).alias("replaced"));
let expected = pl.DataFrame({
a: [1, 2, 2, 3],
replaced: [1, 100, 100, 3],
});
expect(actual).toFrameEqual(expected);
actual = df.withColumns(
pl.col("a").replace([2, 3], [100, 200], -1, pl.Float64).alias("replaced"),
);
expected = pl.DataFrame({ a: [1, 2, 2, 3], replaced: [-1, 100, 100, 200] });
expect(actual).toFrameEqual(expected);
const mapping = { 2: 100, 3: 200 };
actual = df.withColumns(
pl
.col("a")
.replace({ old: mapping, default_: -1, returnDtype: pl.Int64 })
.alias("replaced"),
);
expected = pl.DataFrame({ a: [1, 2, 2, 3], replaced: [-1, 100, 100, 200] });
expect(actual).toFrameEqual(expected);

actual = df.withColumns(
pl.col("a").replace({ old: mapping }).alias("replaced"),
);
expected = pl.DataFrame({ a: [1, 2, 2, 3], replaced: [1, 100, 100, 200] });
expect(actual).toFrameEqual(expected);
});
test("slice", () => {
const df = pl.DataFrame({
os: ["linux-kali", "linux-debian", "windows-vista"],
Expand Down
162 changes: 162 additions & 0 deletions polars/lazy/expr/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -743,6 +743,150 @@ export interface Expr
* The column will be coerced to UInt32. Give this dtype to make the coercion a no-op.
*/
repeatBy(by: Expr | string): Expr;
/**
* Replace values by different values.
* @param old - Value or sequence of values to replace.
Accepts expression input. Sequences are parsed as Series, other non-expression inputs are parsed as literals.
* @param new_ - Value or sequence of values to replace by.
Accepts expression input. Sequences are parsed as Series, other non-expression inputs are parsed as literals.
Length must match the length of `old` or have length 1.
* @param default_ - Set values that were not replaced to this value.
Defaults to keeping the original value.
Accepts expression input. Non-expression inputs are parsed as literals.
* @param returnDtype - The data type of the resulting expression. If set to `None` (default), the data type is determined automatically based on the other inputs.
* @see {@link str.replace}
* @example
* Replace a single value by another value. Values that were not replaced remain unchanged.
* ```
>>> const df = pl.DataFrame({"a": [1, 2, 2, 3]});
>>> df.withColumns(pl.col("a").replace(2, 100).alias("replaced"));
shape: (4, 2)
┌─────┬──────────┐
│ a ┆ replaced │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪══════════╡
│ 1 ┆ 1 │
│ 2 ┆ 100 │
│ 2 ┆ 100 │
│ 3 ┆ 3 │
└─────┴──────────┘
* ```
* Replace multiple values by passing sequences to the `old` and `new_` parameters.
* ```
>>> df.withColumns(pl.col("a").replace([2, 3], [100, 200]).alias("replaced"));
shape: (4, 2)
┌─────┬──────────┐
│ a ┆ replaced │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪══════════╡
│ 1 ┆ 1 │
│ 2 ┆ 100 │
│ 2 ┆ 100 │
│ 3 ┆ 200 │
└─────┴──────────┘
* ```
* Passing a mapping with replacements is also supported as syntactic sugar.
Specify a default to set all values that were not matched.
* ```
>>> const mapping = {2: 100, 3: 200};
>>> df.withColumns(pl.col("a").replace({ old: mapping, default_: -1, returnDtype: pl.Int64 }).alias("replaced");
shape: (4, 2)
┌─────┬──────────┐
│ a ┆ replaced │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪══════════╡
│ 1 ┆ -1 │
│ 2 ┆ 100 │
│ 2 ┆ 100 │
│ 3 ┆ 200 │
└─────┴──────────┘
* ```
Replacing by values of a different data type sets the return type based on
a combination of the `new` data type and either the original data type or the
default data type if it was set.
* ```
>>> const df = pl.DataFrame({"a": ["x", "y", "z"]});
>>> const mapping = {"x": 1, "y": 2, "z": 3};
>>> df.withColumns(pl.col("a").replace({ old: mapping }).alias("replaced"));
shape: (3, 2)
┌─────┬──────────┐
│ a ┆ replaced │
│ --- ┆ --- │
│ str ┆ str │
╞═════╪══════════╡
│ x ┆ 1 │
│ y ┆ 2 │
│ z ┆ 3 │
└─────┴──────────┘
>>> df.withColumns(pl.col("a").replace({ old: mapping, default_: None }).alias("replaced"));
shape: (3, 2)
┌─────┬──────────┐
│ a ┆ replaced │
│ --- ┆ --- │
│ str ┆ i64 │
╞═════╪══════════╡
│ x ┆ 1 │
│ y ┆ 2 │
│ z ┆ 3 │
└─────┴──────────┘
* ```
Set the `returnDtype` parameter to control the resulting data type directly.
* ```
>>> df.withColumns(pl.col("a").replace({ old: mapping, returnDtype: pl.UInt8 }).alias("replaced"));
shape: (3, 2)
┌─────┬──────────┐
│ a ┆ replaced │
│ --- ┆ --- │
│ str ┆ u8 │
╞═════╪══════════╡
│ x ┆ 1 │
│ y ┆ 2 │
│ z ┆ 3 │
└─────┴──────────┘
* ```
* Expression input is supported for all parameters.
* ```
>>> const df = pl.DataFrame({"a": [1, 2, 2, 3], "b": [1.5, 2.5, 5.0, 1.0]});
>>> df.withColumns(
... pl.col("a").replace({
... old: pl.col("a").max(),
... new_: pl.col("b").sum(),
... default_: pl.col("b"),
... }).alias("replaced")
... );
shape: (4, 3)
┌─────┬─────┬──────────┐
│ a ┆ b ┆ replaced │
│ --- ┆ --- ┆ --- │
│ i64 ┆ f64 ┆ f64 │
╞═════╪═════╪══════════╡
│ 1 ┆ 1.5 ┆ 1.5 │
│ 2 ┆ 2.5 ┆ 2.5 │
│ 2 ┆ 5.0 ┆ 5.0 │
│ 3 ┆ 1.0 ┆ 10.0 │
└─────┴─────┴──────────┘
* ```
*/
replace(
old: Expr | number | number[],
new_: Expr | number | number[],
default_?: Expr | number | number[],
returnDtype?: DataType,
): Expr;
replace({
old,
new_,
default_,
returnDtype,
}: {
old: unknown | Expr | number | number[];
new_?: Expr | number | number[];
default_?: Expr | number | number[];
returnDtype?: DataType;
}): Expr;
/** Reverse the arrays in the list */
reverse(): Expr;
/**
Expand Down Expand Up @@ -1421,6 +1565,24 @@ export const _Expr = (_expr: any): Expr => {

return _Expr(_expr.repeatBy(e));
},
replace(old, newValue, defaultValue, returnDtype) {
let oldIn: any = old;
let newIn = newValue;
let defIn = defaultValue;
if (old && typeof old === "object" && !Array.isArray(old)) {
oldIn = Object.keys(old["old"]);
newIn = Object.values(old["old"]);
defIn = old["default_"];
}
return _Expr(
_expr.replace(
exprToLitOrExpr(oldIn)._expr,
exprToLitOrExpr(newIn)._expr,
defIn ? exprToLitOrExpr(defIn)._expr : undefined,
returnDtype,
),
);
},
reverse() {
return _Expr(_expr.reverse());
},
Expand Down
13 changes: 12 additions & 1 deletion src/lazy/dsl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -954,7 +954,18 @@ impl JsExpr {
.split_exact_inclusive(by.0, n as usize)
.into()
}

#[napi(catch_unwind)]
pub fn replace(&self, old: &JsExpr, new: &JsExpr, default: Option<&JsExpr>, return_dtype: Option<Wrap<DataType>>) -> JsExpr {
self.inner
.clone()
.replace(
old.inner.clone(),
new.inner.clone(),
default.map(|e| e.inner.clone()),
return_dtype.map(|dt| dt.0),
)
.into()
}
#[napi(catch_unwind)]
pub fn year(&self) -> JsExpr {
self.clone().inner.dt().year().into()
Expand Down