Skip to content

Commit

Permalink
Merge branch 'main' into prepare-altdoc-0.3.0
Browse files Browse the repository at this point in the history
  • Loading branch information
etiennebacher authored Dec 17, 2023
2 parents 5d7951d + 1b78825 commit 1f0361c
Show file tree
Hide file tree
Showing 11 changed files with 340 additions and 47 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
double (#577).
- The conversion of R's `POSIXct` class to Polars datetime now works correctly with millisecond
precision (#589).
- `<LazyFrame>$filter()`, `<DataFrame>$filter()`, and `pl$when()` now allow multiple conditions
to be separated by commas, like `lf$filter(pl$col("foo") == 1, pl$col("bar") != 2)` (#598).

## polars 0.11.0

Expand Down
18 changes: 10 additions & 8 deletions R/dataframe__frame.R
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ pl$DataFrame = function(..., make_names_unique = TRUE, schema = NULL) {

# keys are tentative new column names
keys = names(largs)
if (length(keys) == 0) keys <- rep(NA_character_, length(largs))
if (length(keys) == 0) keys = rep(NA_character_, length(largs))
keys = mapply(largs, keys, FUN = function(column, key) {
if (is.na(key) || nchar(key) == 0) {
if (inherits(column, "RPolarsSeries")) {
Expand Down Expand Up @@ -291,7 +291,7 @@ DataFrame.property_setters = new.env(parent = emptyenv())
pstop(err = paste("no setter method for", name))
}

if (polars_optenv$strictly_immutable) self <- self$clone()
if (polars_optenv$strictly_immutable) self = self$clone()
func = DataFrame.property_setters[[name]]
func(self, value)
self
Expand Down Expand Up @@ -791,25 +791,27 @@ DataFrame_tail = function(n) {
#' Filter rows of a DataFrame
#' @name DataFrame_filter
#'
#' @description This is equivalent to `dplyr::filter()`. Note that rows where
#' the condition returns `NA` are dropped, unlike base subsetting with `[`.
#' @inherit LazyFrame_filter description params details
#'
#' @param bool_expr Polars expression which will evaluate to a boolean.
#' @keywords DataFrame
#' @return A DataFrame with only the rows where the conditions are `TRUE`.
#' @examples
#' df = pl$DataFrame(iris)
#'
#' df$filter(pl$col("Sepal.Length") > 5)
#'
#' # This is equivalent to
#' # df$filter(pl$col("Sepal.Length") > 5 & pl$col("Petal.Width") < 1)
#' df$filter(pl$col("Sepal.Length") > 5, pl$col("Petal.Width") < 1)
#'
#' # rows where condition is NA are dropped
#' iris2 = iris
#' iris2[c(1, 3, 5), "Species"] = NA
#' df = pl$DataFrame(iris2)
#'
#' df$filter(pl$col("Species") == "setosa")
DataFrame_filter = function(bool_expr) {
.pr$DataFrame$lazy(self)$filter(bool_expr)$collect()
DataFrame_filter = function(...) {
.pr$DataFrame$lazy(self)$filter(...)$collect()
}

#' Group a DataFrame
Expand Down Expand Up @@ -1542,7 +1544,7 @@ DataFrame_glimpse = function(..., return_as_string = FALSE) {
max_col_name_trunc = 50
parse_column_ = \(col_name, dtype) {
dtype_str = dtype_str_repr(dtype) |> unwrap_or(paste0("??", str_string(dtype)))
if (inherits(dtype, "RPolarsDataType")) dtype_str <- paste0(" <", dtype_str, ">")
if (inherits(dtype, "RPolarsDataType")) dtype_str = paste0(" <", dtype_str, ">")
val = self$select(pl$col(col_name)$slice(0, max_num_value))$to_list()[[1]]
val_str = paste(val, collapse = ", ")
if (nchar(col_name) > max_col_name_trunc) {
Expand Down
50 changes: 39 additions & 11 deletions R/functions__whenthen.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#' @name Expr_when_then_otherwise
#' @description Start a “when, then, otherwise” expression.
#' @keywords Expr
#' @param condition Into Expr into a boolean mask to branch by. Strings interpreted as column.
#' @param ... Into Expr into a boolean mask to branch by.
#' @param statement Into Expr value to insert in when() or otherwise().
#' Strings interpreted as column.
#' @return Expr
Expand Down Expand Up @@ -37,14 +37,36 @@
#' a nested when-then-otherwise expression.
#'
#' @examples
#' df = pl$DataFrame(mtcars)
#' wtt =
#' pl$when(pl$col("cyl") <= 4)$then(pl$lit("<=4cyl"))$
#' when(pl$col("cyl") <= 6)$then(pl$lit("<=6cyl"))$
#' otherwise(pl$lit(">6cyl"))$alias("cyl_groups")
#' print(wtt)
#' df$with_columns(wtt)
pl$when = function(condition) {
#' df = pl$DataFrame(foo = c(1, 3, 4), bar = c(3, 4, 0))
#'
#' # Add a column with the value 1, where column "foo" > 2 and the value -1 where it isn’t.
#' df$with_columns(
#' pl$when(pl$col("foo") > 2)$then(1)$otherwise(-1)$alias("val")
#' )
#'
#' # With multiple when, thens chained:
#' df$with_columns(
#' pl$when(pl$col("foo") > 2)
#' $then(1)
#' $when(pl$col("bar") > 2)
#' $then(4)
#' $otherwise(-1)
#' $alias("val")
#' )
#'
#' # Pass multiple predicates, each of which must be met:
#' df$with_columns(
#' val = pl$when(
#' pl$col("bar") > 0,
#' pl$col("foo") %% 2 != 0
#' )
#' $then(99)
#' $otherwise(-1)
#' )
pl$when = function(...) {
condition = unpack_bool_expr(...) |>
unwrap("in pl$when():")

.pr$When$new(condition) |>
unwrap("in pl$when():")
}
Expand All @@ -57,7 +79,10 @@ When_then = function(statement) {
unwrap("in $then():")
}

Then_when = function(condition) {
Then_when = function(...) {
condition = unpack_bool_expr(...) |>
unwrap("in $when():")

.pr$Then$when(self, condition) |>
unwrap("in $when():")
}
Expand All @@ -72,7 +97,10 @@ ChainedWhen_then = function(statement) {
unwrap("in $then():")
}

ChainedThen_when = function(condition) {
ChainedThen_when = function(...) {
condition = unpack_bool_expr(...) |>
unwrap("in $when():")

.pr$ChainedThen$when(self, condition) |>
unwrap("in $when():")
}
Expand Down
32 changes: 24 additions & 8 deletions R/lazyframe__lazy.R
Original file line number Diff line number Diff line change
Expand Up @@ -266,15 +266,31 @@ LazyFrame_with_row_count = function(name, offset = NULL) {
.pr$LazyFrame$with_row_count(self, name, offset) |> unwrap()
}

#' @title Apply filter to LazyFrame
#' @description Filter rows with an Expression defining a boolean column
#' Apply filter to LazyFrame
#'
#' Filter rows with an Expression defining a boolean column.
#' Multiple expressions are combined with `&` (AND).
#' This is equivalent to [dplyr::filter()].
#'
#' Rows where the condition returns `NA` are dropped.
#' @keywords LazyFrame
#' @param expr one Expr or string naming a column
#' @param ... Polars expressions which will evaluate to a boolean.
#' @return A new `LazyFrame` object with add/modified column.
#' @docType NULL
#' @usage LazyFrame_filter(expr)
#' @examples pl$LazyFrame(iris)$filter(pl$col("Species") == "setosa")$collect()
LazyFrame_filter = "use_extendr_wrapper"
#' @examples
#' lf = pl$LazyFrame(iris)
#'
#' lf$filter(pl$col("Species") == "setosa")$collect()
#'
#' # This is equivalent to
#' # lf$filter(pl$col("Sepal.Length") > 5 & pl$col("Petal.Width") < 1)
#' lf$filter(pl$col("Sepal.Length") > 5, pl$col("Petal.Width") < 1)
LazyFrame_filter = function(...) {
bool_expr = unpack_bool_expr(...) |>
unwrap("in $filter()")

.pr$LazyFrame$filter(self, bool_expr)
}

#' @title Get optimization settings
#' @description Get the current optimization toggles for the lazy query
Expand Down Expand Up @@ -1148,8 +1164,8 @@ LazyFrame_join_asof = function(
tolerance = NULL,
allow_parallel = TRUE,
force_parallel = FALSE) {
if (!is.null(by)) by_left <- by_right <- by
if (!is.null(on)) left_on <- right_on <- on
if (!is.null(by)) by_left = by_right = by
if (!is.null(on)) left_on = right_on = on
tolerance_str = if (is.character(tolerance)) tolerance else NULL
tolerance_num = if (!is.character(tolerance)) tolerance else NULL

Expand Down
21 changes: 21 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,27 @@ unpack_list = function(..., skip_classes = NULL) {
}
}

#' Convert dot-dot-dot to bool expression
#' @noRd
#' @return Result, a list has `ok` (RPolarsExpr class) and `err` (RPolarsErr class)
#' @examples
#' unpack_bool_expr(pl$lit(TRUE), pl$lit(FALSE))
unpack_bool_expr = function(..., .msg = NULL) {
dots = list2(...)

if (!is.null(names(dots))) {
return(Err_plain(
"Detected a named input.",
"This usually means that you've used `=` instead of `==`."
))
}

dots |>
Reduce(`&`, x = _) |>
result(msg = .msg) |>
suppressWarnings()
}

#' Simple SQL CASE WHEN implementation for R
#' @noRd
#' @description Inspired by data.table::fcase + dplyr::case_when.
Expand Down
16 changes: 12 additions & 4 deletions man/DataFrame_filter.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

35 changes: 27 additions & 8 deletions man/Expr_when_then_otherwise.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 15 additions & 7 deletions man/LazyFrame_filter.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 1f0361c

Please sign in to comment.