From d9b5dad75b443cab6be3662e06f79e225a0ff6bf Mon Sep 17 00:00:00 2001
From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com>
Date: Sat, 19 Oct 2024 18:02:11 +0200
Subject: [PATCH] docs: add some docs for DataFrame

---
 R/dataframe-frame.R            | 249 ++++++++++++++++++++++++++++++++-
 R/lazyframe-frame.R            |  26 +++-
 man/dataframe__cast.Rd         |  27 ++++
 man/dataframe__clone.Rd        |  54 +++++++
 man/dataframe__drop.Rd         |  27 ++++
 man/dataframe__equals.Rd       |  24 ++++
 man/dataframe__filter.Rd       |  30 ++++
 man/dataframe__get_columns.Rd  |   6 +-
 man/dataframe__group_by.Rd     |  50 +++++++
 man/dataframe__lazy.Rd         |  17 +++
 man/dataframe__select.Rd       |  33 +++++
 man/dataframe__slice.Rd        |  28 ++++
 man/dataframe__sort.Rd         |  32 +++++
 man/dataframe__to_series.Rd    |  30 ++++
 man/dataframe__with_columns.Rd |  46 ++++++
 man/lazyframe__select.Rd       |  10 +-
 man/lazyframe__with_columns.Rd |  14 +-
 man/pl__LazyFrame.Rd           |   2 +-
 18 files changed, 685 insertions(+), 20 deletions(-)
 create mode 100644 man/dataframe__cast.Rd
 create mode 100644 man/dataframe__clone.Rd
 create mode 100644 man/dataframe__drop.Rd
 create mode 100644 man/dataframe__equals.Rd
 create mode 100644 man/dataframe__filter.Rd
 create mode 100644 man/dataframe__group_by.Rd
 create mode 100644 man/dataframe__lazy.Rd
 create mode 100644 man/dataframe__select.Rd
 create mode 100644 man/dataframe__slice.Rd
 create mode 100644 man/dataframe__sort.Rd
 create mode 100644 man/dataframe__to_series.Rd
 create mode 100644 man/dataframe__with_columns.Rd

diff --git a/R/dataframe-frame.R b/R/dataframe-frame.R
index 2a0057a0..d839b458 100644
--- a/R/dataframe-frame.R
+++ b/R/dataframe-frame.R
@@ -12,6 +12,7 @@
 #' so each argument in `...` is converted to a Polars Series by [as_polars_series()]
 #' and then passed to [as_polars_df()].
 #' @aliases polars_data_frame DataFrame
+#'
 #' @section Active bindings:
 #' - `columns`: `$columns` returns a character vector with the names of the columns.
 #' - `dtypes`: `$dtypes` returns a nameless list of the data type of each column.
@@ -127,19 +128,69 @@ dataframe__to_struct <- function(name = "") {
     wrap()
 }
 
+#' Convert an existing DataFrame to a LazyFrame
+#' @description Start a new lazy query from a DataFrame.
+#'
+#' @inherit as_polars_lf return
+#' @examples
+#' pl$DataFrame(a = 1:2, b = c(NA, "a"))$lazy()
 dataframe__lazy <- function() {
   self$`_df`$lazy() |>
     wrap()
 }
 
+#' Clone a DataFrame
+#'
+#' This is a cheap operation that does not copy data. Assigning does not copy
+#' the DataFrame (environment object). This is because environment objects have
+#' reference semantics. Calling $clone() creates a new environment, which can
+#' be useful when dealing with attributes (see examples).
+#'
+#' @inherit as_polars_df return
+#' @examples
+#' df1 <- as_polars_df(iris)
+#'
+#' # Assigning does not copy the DataFrame (environment object), calling
+#' # $clone() creates a new environment.
+#' df2 <- df1
+#' df3 <- df1$clone()
+#' rlang::env_label(df1)
+#' rlang::env_label(df2)
+#' rlang::env_label(df3)
+#'
+#' # Cloning can be useful to add attributes to data used in a function without
+#' # adding those attributes to the original object.
+#'
+#' # Make a function to take a DataFrame, add an attribute, and return a
+#' # DataFrame:
+#' give_attr <- function(data) {
+#'   attr(data, "created_on") <- "2024-01-29"
+#'   data
+#' }
+#' df2 <- give_attr(df1)
+#'
+#' # Problem: the original DataFrame also gets the attribute while it shouldn't
+#' attributes(df1)
+#'
+#' # Use $clone() inside the function to avoid that
+#' give_attr <- function(data) {
+#'   data <- data$clone()
+#'   attr(data, "created_on") <- "2024-01-29"
+#'   data
+#' }
+#' df1 <- as_polars_df(iris)
+#' df2 <- give_attr(df1)
+#'
+#' # now, the original DataFrame doesn't get this attribute
+#' attributes(df1)
 dataframe__clone <- function() {
   self$`_df`$clone() |>
     wrap()
 }
 
-#' Get the DataFrame as a List of Series
+#' Get the DataFrame as a list of Series
 #'
-#' @return A [list] of [Series]
+#' @return A list of [Series]
 #' @seealso
 #' - [`as.list(<polars_data_frame>)`][as.list.polars_data_frame]
 #' @examples
@@ -160,25 +211,119 @@ dataframe__get_columns <- function() {
     })
 }
 
+#' Group a DataFrame
+#'
+#' @inherit LazyFrame_group_by description params
+#' @details Within each group, the order of the rows is always preserved,
+#' regardless of the `maintain_order` argument.
+#' @return [GroupBy][GroupBy_class] (a DataFrame with special groupby methods like `$agg()`)
+#' @seealso
+#' - [`<DataFrame>$partition_by()`][DataFrame_partition_by]
+#' @examples
+#' df <- pl$DataFrame(
+#'   a = c("a", "b", "a", "b", "c"),
+#'   b = c(1, 2, 1, 3, 3),
+#'   c = c(5, 4, 3, 2, 1)
+#' )
+#'
+#' df$group_by("a")$agg(pl$col("b")$sum())
+#'
+#' # Set `maintain_order = TRUE` to ensure the order of the groups is
+#' # consistent with the input.
+#' df$group_by("a", maintain_order = TRUE)$agg(pl$col("c"))
+#'
+#' # Group by multiple columns by passing a list of column names.
+#' df$group_by(c("a", "b"))$agg(pl$max("c"))
+#'
+#' # Or pass some arguments to group by multiple columns in the same way.
+#' # Expressions are also accepted.
+#' df$group_by("a", pl$col("b") %/% 2)$agg(
+#'   pl$col("c")$mean()
+#' )
+#'
+#' # The columns will be renamed to the argument names.
+#' df$group_by(d = "a", e = pl$col("b") %/% 2)$agg(
+#'   pl$col("c")$mean()
+#' )
 dataframe__group_by <- function(..., maintain_order = FALSE) {
   wrap_to_group_by(self, list2(...), maintain_order)
 }
 
+#' Select and modify columns of a DataFrame
+#'
+#' @inherit lazyframe__select description params
+#' @inherit as_polars_df return
+#' @examples
+#' as_polars_df(iris)$select(
+#'   abs_SL = pl$col("Sepal.Length")$abs(),
+#'   add_2_SL = pl$col("Sepal.Length") + 2
+#' )
 dataframe__select <- function(...) {
   self$lazy()$select(...)$collect(`_eager` = TRUE) |>
     wrap()
 }
 
+#' Modify/append column(s) of a DataFrame
+#'
+#' @inherit lazyframe__with_columns description params
+#' @inherit as_polars_df return
+#' @examples
+#' as_polars_df(iris)$with_columns(
+#'   abs_SL = pl$col("Sepal.Length")$abs(),
+#'   add_2_SL = pl$col("Sepal.Length") + 2
+#' )
+#'
+#' # same query
+#' l_expr <- list(
+#'   pl$col("Sepal.Length")$abs()$alias("abs_SL"),
+#'   (pl$col("Sepal.Length") + 2)$alias("add_2_SL")
+#' )
+#' as_polars_df(iris)$with_columns(l_expr)
+#'
+#' as_polars_df(iris)$with_columns(
+#'   SW_add_2 = (pl$col("Sepal.Width") + 2),
+#'   # unnamed expr will keep name "Sepal.Length"
+#'   pl$col("Sepal.Length")$abs()
+#' )
 dataframe__with_columns <- function(...) {
   self$lazy()$with_columns(...)$collect(`_eager` = TRUE) |>
     wrap()
 }
 
+# TODO-REWRITE: before release, add in news that param idx was renamed "index"
+# and mention that it errors if out of bounds
+#' Select column as Series at index location
+#'
+#' @param index Index of the column to return as Series. Defaults to 0, which is
+#' the first column.
+#'
+#' @return Series or NULL
+#' @examples
+#' df <- as_polars_df(iris[1:10, ])
+#'
+#' # default is to extract the first column
+#' df$to_series()
+#'
+#' # Polars is 0-indexed, so we use index = 1 to extract the *2nd* column
+#' df$to_series(index = 1)
+#'
+#' # doesn't error if the column isn't there
+#' df$to_series(index = 8)
 dataframe__to_series <- function(index = 0) {
   self$`_df`$to_series(index) |>
     wrap()
 }
 
+#' Check whether the DataFrame is equal to another DataFrame
+#'
+#' @param other DataFrame to compare with.
+#' @return A logical value
+#' @examples
+#' dat1 <- as_polars_df(iris)
+#' dat2 <- as_polars_df(iris)
+#' dat3 <- as_polars_df(mtcars)
+#' dat1$equals(dat2)
+#' dat1$equals(dat3)
 dataframe__equals <- function(other, ..., null_equal = TRUE) {
   wrap({
     check_dots_empty0(...)
@@ -188,6 +333,19 @@ dataframe__equals <- function(other, ..., null_equal = TRUE) {
   })
 }
 
+#' Get a slice of the DataFrame.
+#'
+#' @inherit as_polars_df return
+#' @param offset Start index, can be a negative value. This is 0-indexed, so
+#' `offset = 1` skips the first row.
+#' @param length Length of the slice. If `NULL` (default), all rows starting at
+#' the offset will be selected.
+#' @examples
+#' # skip the first 2 rows and take the 4 following rows
+#' as_polars_df(mtcars)$slice(2, 4)
+#'
+#' # this is equivalent to:
+#' mtcars[3:6, ]
 dataframe__slice <- function(offset, length = NULL) {
   wrap({
     check_number_decimal(offset, allow_infinite = FALSE)
@@ -198,6 +356,17 @@ dataframe__slice <- function(offset, length = NULL) {
   })
 }
 
+#' @inherit LazyFrame_head title details
+#' @param n Number of rows to return. If a negative value is passed,
+#' return all rows except the last [`abs(n)`][abs].
+#' @return A [DataFrame][DataFrame_class]
+#' @examples
+#' df <- pl$DataFrame(foo = 1:5, bar = 6:10, ham = letters[1:5])
+#'
+#' df$head(3)
+#'
+#' # Pass a negative value to get all rows except the last `abs(n)`.
+#' df$head(-3)
 dataframe__head <- function(n = 5) {
   wrap({
     if (isTRUE(n < 0)) {
@@ -207,6 +376,17 @@ dataframe__head <- function(n = 5) {
   })
 }
 
+#' @inherit LazyFrame_tail title
+#' @param n Number of rows to return. If a negative value is passed,
+#' return all rows except the first [`abs(n)`][abs].
+#' @inherit DataFrame_head return
+#' @examples
+#' df <- pl$DataFrame(foo = 1:5, bar = 6:10, ham = letters[1:5])
+#'
+#' df$tail(3)
+#'
+#' # Pass a negative value to get all rows except the first `abs(n)`.
+#' df$tail(-3)
 dataframe__tail <- function(n = 5) {
   wrap({
     if (isTRUE(n < 0)) {
@@ -216,22 +396,87 @@ dataframe__tail <- function(n = 5) {
   })
 }
 
+#' Drop columns of a DataFrame
+#'
+#' @param ... <[`dynamic-dots`][rlang::dyn-dots]> Characters of column names to
+#' drop. Passed to [`pl$col()`][pl__col].
+#' @param strict Validate that all column names exist in the schema and throw an
+#' exception if a column name does not exist in the schema.
+#'
+#' @inherit as_polars_df return
+#' @examples
+#' as_polars_df(mtcars)$drop(c("mpg", "hp"))
+#'
+#' # equivalent
+#' as_polars_df(mtcars)$drop("mpg", "hp")
 dataframe__drop <- function(..., strict = TRUE) {
   self$lazy()$drop(..., strict = strict)$collect(`_eager` = TRUE) |>
     wrap()
 }
 
 # TODO: accept formulas for type mapping
+#' Cast DataFrame column(s) to the specified dtype
+#'
+#' @inherit LazyFrame_cast description params
+#'
+#' @inherit as_polars_df return
+#' @examples
+#' df <- pl$DataFrame(
+#'   foo = 1:3,
+#'   bar = c(6, 7, 8),
+#'   ham = as.Date(c("2020-01-02", "2020-03-04", "2020-05-06"))
+#' )
+#'
+#' # Cast only some columns
+#' df$cast(list(foo = pl$Float32, bar = pl$UInt8))
+#'
+#' # Cast all columns to the same type
+#' df$cast(pl$String)
 dataframe__cast <- function(..., strict = TRUE) {
   self$lazy()$cast(..., strict = strict)$collect(`_eager` = TRUE) |>
     wrap()
 }
 
+#' Filter rows of a DataFrame
+#'
+#' @inherit LazyFrame_filter description params details
+#'
+#' @inherit as_polars_df return
+#' @examples
+#' df <- as_polars_df(iris)
+#'
+#' df$filter(pl$col("Sepal.Length") > 5)
+#'
+#' # This is equivalent to
+#' # df$filter(pl$col("Sepal.Length") > 5 & pl$col("Petal.Width") < 1)
+#' df$filter(pl$col("Sepal.Length") > 5, pl$col("Petal.Width") < 1)
+#'
+#' # rows where condition is NA are dropped
+#' iris2 <- iris
+#' iris2[c(1, 3, 5), "Species"] <- NA
+#' df <- as_polars_df(iris2)
+#'
+#' df$filter(pl$col("Species") == "setosa")
 dataframe__filter <- function(...) {
   self$lazy()$filter(...)$collect(`_eager` = TRUE) |>
     wrap()
 }
 
+#' Sort a DataFrame
+#' @inherit LazyFrame_sort details description params
+#' @inheritParams DataFrame_unique
+#' @inherit as_polars_df return
+#' @examples
+#' df <- mtcars
+#' df$mpg[1] <- NA
+#' df <- as_polars_df(df)
+#' df$sort("mpg")
+#' df$sort("mpg", nulls_last = TRUE)
+#' df$sort("cyl", "mpg")
+#' df$sort(c("cyl", "mpg"))
+#' df$sort(c("cyl", "mpg"), descending = TRUE)
+#' df$sort(c("cyl", "mpg"), descending = c(TRUE, FALSE))
+#' df$sort(pl$col("cyl"), pl$col("mpg"))
 dataframe__sort <- function(
     ...,
     descending = FALSE,
diff --git a/R/lazyframe-frame.R b/R/lazyframe-frame.R
index 231742a2..89b890d9 100644
--- a/R/lazyframe-frame.R
+++ b/R/lazyframe-frame.R
@@ -6,7 +6,7 @@
 #' and is the preferred (and highest-performance) mode of operation for polars.
 #'
 #' The `pl$LazyFrame(...)` function is a shortcut for `pl$DataFrame(...)$lazy()`.
-#' @aliases plars_lazy_frame LazyFrame
+#' @aliases polars_lazy_frame LazyFrame
 #' @inheritParams pl__DataFrame
 #' @return A polars [LazyFrame]
 #' @seealso
@@ -46,7 +46,16 @@ wrap.PlRLazyFrame <- function(x, ...) {
 }
 
 # TODO: link to pl__select
-#' Select columns from this LazyFrame
+#' Select and modify columns of a LazyFrame
+#'
+#' @description
+#' Select and perform operations on a subset of columns only. This discards
+#' unmentioned columns (like `.()` in `data.table` and contrarily to
+#' `dplyr::mutate()`).
+#'
+#' One cannot use new variables in subsequent expressions in the same
+#' `$select()` call. For instance, if you create a variable `x`, you will only
+#' be able to use it in another `$select()` or `$with_columns()` call.
 #'
 #' @inherit pl__LazyFrame return
 #' @param ... <[`dynamic-dots`][rlang::dyn-dots]>
@@ -264,12 +273,17 @@ lazyframe__sort <- function(
   })
 }
 
-#' Add columns to this LazyFrame
+#' Modify/append column(s) of a LazyFrame
+#'
+#' @description
+#' Add columns or modify existing ones with expressions. This is similar to
+#' `dplyr::mutate()` as it keeps unmentioned columns (unlike `$select()`).
 #'
-#' Added columns will replace existing columns with the same name.
+#' However, unlike `dplyr::mutate()`, one cannot use new variables in subsequent
+#' expressions in the same `$with_columns()`call. For instance, if you create a
+#' variable `x`, you will only be able to use it in another `$with_columns()`
+#' or `$select()` call.
 #'
-#' Creating a new LazyFrame using this method does not create a new copy of
-#' existing data.
 #' @inherit pl__LazyFrame return
 #' @inheritParams lazyframe__select
 #' @examples
diff --git a/man/dataframe__cast.Rd b/man/dataframe__cast.Rd
new file mode 100644
index 00000000..afbc236d
--- /dev/null
+++ b/man/dataframe__cast.Rd
@@ -0,0 +1,27 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dataframe-frame.R
+\name{dataframe__cast}
+\alias{dataframe__cast}
+\title{Cast DataFrame column(s) to the specified dtype}
+\usage{
+dataframe__cast(..., strict = TRUE)
+}
+\value{
+A polars \link{DataFrame}
+}
+\description{
+Cast DataFrame column(s) to the specified dtype
+}
+\examples{
+df <- pl$DataFrame(
+  foo = 1:3,
+  bar = c(6, 7, 8),
+  ham = as.Date(c("2020-01-02", "2020-03-04", "2020-05-06"))
+)
+
+# Cast only some columns
+df$cast(list(foo = pl$Float32, bar = pl$UInt8))
+
+# Cast all columns to the same type
+df$cast(pl$String)
+}
diff --git a/man/dataframe__clone.Rd b/man/dataframe__clone.Rd
new file mode 100644
index 00000000..f6a70533
--- /dev/null
+++ b/man/dataframe__clone.Rd
@@ -0,0 +1,54 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dataframe-frame.R
+\name{dataframe__clone}
+\alias{dataframe__clone}
+\title{Clone a DataFrame}
+\usage{
+dataframe__clone()
+}
+\value{
+A polars \link{DataFrame}
+}
+\description{
+This is a cheap operation that does not copy data. Assigning does not copy
+the DataFrame (environment object). This is because environment objects have
+reference semantics. Calling $clone() creates a new environment, which can
+be useful when dealing with attributes (see examples).
+}
+\examples{
+df1 <- as_polars_df(iris)
+
+# Assigning does not copy the DataFrame (environment object), calling
+# $clone() creates a new environment.
+df2 <- df1
+df3 <- df1$clone()
+rlang::env_label(df1)
+rlang::env_label(df2)
+rlang::env_label(df3)
+
+# Cloning can be useful to add attributes to data used in a function without
+# adding those attributes to the original object.
+
+# Make a function to take a DataFrame, add an attribute, and return a
+# DataFrame:
+give_attr <- function(data) {
+  attr(data, "created_on") <- "2024-01-29"
+  data
+}
+df2 <- give_attr(df1)
+
+# Problem: the original DataFrame also gets the attribute while it shouldn't
+attributes(df1)
+
+# Use $clone() inside the function to avoid that
+give_attr <- function(data) {
+  data <- data$clone()
+  attr(data, "created_on") <- "2024-01-29"
+  data
+}
+df1 <- as_polars_df(iris)
+df2 <- give_attr(df1)
+
+# now, the original DataFrame doesn't get this attribute
+attributes(df1)
+}
diff --git a/man/dataframe__drop.Rd b/man/dataframe__drop.Rd
new file mode 100644
index 00000000..b979acc5
--- /dev/null
+++ b/man/dataframe__drop.Rd
@@ -0,0 +1,27 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dataframe-frame.R
+\name{dataframe__drop}
+\alias{dataframe__drop}
+\title{Drop columns of a DataFrame}
+\usage{
+dataframe__drop(..., strict = TRUE)
+}
+\arguments{
+\item{...}{<\code{\link[rlang:dyn-dots]{dynamic-dots}}> Characters of column names to
+drop. Passed to \code{\link[=pl__col]{pl$col()}}.}
+
+\item{strict}{Validate that all column names exist in the schema and throw an
+exception if a column name does not exist in the schema.}
+}
+\value{
+A polars \link{DataFrame}
+}
+\description{
+Drop columns of a DataFrame
+}
+\examples{
+as_polars_df(mtcars)$drop(c("mpg", "hp"))
+
+# equivalent
+as_polars_df(mtcars)$drop("mpg", "hp")
+}
diff --git a/man/dataframe__equals.Rd b/man/dataframe__equals.Rd
new file mode 100644
index 00000000..8fd8e193
--- /dev/null
+++ b/man/dataframe__equals.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dataframe-frame.R
+\name{dataframe__equals}
+\alias{dataframe__equals}
+\title{Check whether the DataFrame is equal to another DataFrame}
+\usage{
+dataframe__equals(other, ..., null_equal = TRUE)
+}
+\arguments{
+\item{other}{DataFrame to compare with.}
+}
+\value{
+A logical value
+}
+\description{
+Check whether the DataFrame is equal to another DataFrame
+}
+\examples{
+dat1 <- as_polars_df(iris)
+dat2 <- as_polars_df(iris)
+dat3 <- as_polars_df(mtcars)
+dat1$equals(dat2)
+dat1$equals(dat3)
+}
diff --git a/man/dataframe__filter.Rd b/man/dataframe__filter.Rd
new file mode 100644
index 00000000..71eb3993
--- /dev/null
+++ b/man/dataframe__filter.Rd
@@ -0,0 +1,30 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dataframe-frame.R
+\name{dataframe__filter}
+\alias{dataframe__filter}
+\title{Filter rows of a DataFrame}
+\usage{
+dataframe__filter(...)
+}
+\value{
+A polars \link{DataFrame}
+}
+\description{
+Filter rows of a DataFrame
+}
+\examples{
+df <- as_polars_df(iris)
+
+df$filter(pl$col("Sepal.Length") > 5)
+
+# This is equivalent to
+# df$filter(pl$col("Sepal.Length") > 5 & pl$col("Petal.Width") < 1)
+df$filter(pl$col("Sepal.Length") > 5, pl$col("Petal.Width") < 1)
+
+# rows where condition is NA are dropped
+iris2 <- iris
+iris2[c(1, 3, 5), "Species"] <- NA
+df <- as_polars_df(iris2)
+
+df$filter(pl$col("Species") == "setosa")
+}
diff --git a/man/dataframe__get_columns.Rd b/man/dataframe__get_columns.Rd
index fc93d599..820a1ded 100644
--- a/man/dataframe__get_columns.Rd
+++ b/man/dataframe__get_columns.Rd
@@ -2,15 +2,15 @@
 % Please edit documentation in R/dataframe-frame.R
 \name{dataframe__get_columns}
 \alias{dataframe__get_columns}
-\title{Get the DataFrame as a List of Series}
+\title{Get the DataFrame as a list of Series}
 \usage{
 dataframe__get_columns()
 }
 \value{
-A \link{list} of \link{Series}
+A list of \link{Series}
 }
 \description{
-Get the DataFrame as a List of Series
+Get the DataFrame as a list of Series
 }
 \examples{
 df <- pl$DataFrame(foo = c(1, 2, 3), bar = c(4, 5, 6))
diff --git a/man/dataframe__group_by.Rd b/man/dataframe__group_by.Rd
new file mode 100644
index 00000000..bcbcd3b9
--- /dev/null
+++ b/man/dataframe__group_by.Rd
@@ -0,0 +1,50 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dataframe-frame.R
+\name{dataframe__group_by}
+\alias{dataframe__group_by}
+\title{Group a DataFrame}
+\usage{
+dataframe__group_by(..., maintain_order = FALSE)
+}
+\value{
+\link[=GroupBy_class]{GroupBy} (a DataFrame with special groupby methods like \verb{$agg()})
+}
+\description{
+Group a DataFrame
+}
+\details{
+Within each group, the order of the rows is always preserved,
+regardless of the \code{maintain_order} argument.
+}
+\examples{
+df <- pl$DataFrame(
+  a = c("a", "b", "a", "b", "c"),
+  b = c(1, 2, 1, 3, 3),
+  c = c(5, 4, 3, 2, 1)
+)
+
+df$group_by("a")$agg(pl$col("b")$sum())
+
+# Set `maintain_order = TRUE` to ensure the order of the groups is
+# consistent with the input.
+df$group_by("a", maintain_order = TRUE)$agg(pl$col("c"))
+
+# Group by multiple columns by passing a list of column names.
+df$group_by(c("a", "b"))$agg(pl$max("c"))
+
+# Or pass some arguments to group by multiple columns in the same way.
+# Expressions are also accepted.
+df$group_by("a", pl$col("b") \%/\% 2)$agg(
+  pl$col("c")$mean()
+)
+
+# The columns will be renamed to the argument names.
+df$group_by(d = "a", e = pl$col("b") \%/\% 2)$agg(
+  pl$col("c")$mean()
+)
+}
+\seealso{
+\itemize{
+\item \code{\link[=DataFrame_partition_by]{<DataFrame>$partition_by()}}
+}
+}
diff --git a/man/dataframe__lazy.Rd b/man/dataframe__lazy.Rd
new file mode 100644
index 00000000..a2a8d719
--- /dev/null
+++ b/man/dataframe__lazy.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dataframe-frame.R
+\name{dataframe__lazy}
+\alias{dataframe__lazy}
+\title{Convert an existing DataFrame to a LazyFrame}
+\usage{
+dataframe__lazy()
+}
+\value{
+A polars \link{LazyFrame}
+}
+\description{
+Start a new lazy query from a DataFrame.
+}
+\examples{
+pl$DataFrame(a = 1:2, b = c(NA, "a"))$lazy()
+}
diff --git a/man/dataframe__select.Rd b/man/dataframe__select.Rd
new file mode 100644
index 00000000..9d570adc
--- /dev/null
+++ b/man/dataframe__select.Rd
@@ -0,0 +1,33 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dataframe-frame.R
+\name{dataframe__select}
+\alias{dataframe__select}
+\title{Select and modify columns of a DataFrame}
+\usage{
+dataframe__select(...)
+}
+\arguments{
+\item{...}{<\code{\link[rlang:dyn-dots]{dynamic-dots}}>
+Name-value pairs of objects to be converted to polars \link[=Expr]{expressions}
+by the \code{\link[=as_polars_expr]{as_polars_expr()}} function.
+Characters are parsed as column names, other non-expression inputs are parsed as \link[=pl__lit]{literals}.
+Each name will be used as the expression name.}
+}
+\value{
+A polars \link{DataFrame}
+}
+\description{
+Select and perform operations on a subset of columns only. This discards
+unmentioned columns (like \code{.()} in \code{data.table} and contrarily to
+\code{dplyr::mutate()}).
+
+One cannot use new variables in subsequent expressions in the same
+\verb{$select()} call. For instance, if you create a variable \code{x}, you will only
+be able to use it in another \verb{$select()} or \verb{$with_columns()} call.
+}
+\examples{
+as_polars_df(iris)$select(
+  abs_SL = pl$col("Sepal.Length")$abs(),
+  add_2_SL = pl$col("Sepal.Length") + 2
+)
+}
diff --git a/man/dataframe__slice.Rd b/man/dataframe__slice.Rd
new file mode 100644
index 00000000..56ffdcab
--- /dev/null
+++ b/man/dataframe__slice.Rd
@@ -0,0 +1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dataframe-frame.R
+\name{dataframe__slice}
+\alias{dataframe__slice}
+\title{Get a slice of the DataFrame.}
+\usage{
+dataframe__slice(offset, length = NULL)
+}
+\arguments{
+\item{offset}{Start index, can be a negative value. This is 0-indexed, so
+\code{offset = 1} skips the first row.}
+
+\item{length}{Length of the slice. If \code{NULL} (default), all rows starting at
+the offset will be selected.}
+}
+\value{
+A polars \link{DataFrame}
+}
+\description{
+Get a slice of the DataFrame.
+}
+\examples{
+# skip the first 2 rows and take the 4 following rows
+as_polars_df(mtcars)$slice(2, 4)
+
+# this is equivalent to:
+mtcars[3:6, ]
+}
diff --git a/man/dataframe__sort.Rd b/man/dataframe__sort.Rd
new file mode 100644
index 00000000..26fff1e3
--- /dev/null
+++ b/man/dataframe__sort.Rd
@@ -0,0 +1,32 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dataframe-frame.R
+\name{dataframe__sort}
+\alias{dataframe__sort}
+\title{Sort a DataFrame}
+\usage{
+dataframe__sort(
+  ...,
+  descending = FALSE,
+  nulls_last = FALSE,
+  multithreaded = TRUE,
+  maintain_order = FALSE
+)
+}
+\value{
+A polars \link{DataFrame}
+}
+\description{
+Sort a DataFrame
+}
+\examples{
+df <- mtcars
+df$mpg[1] <- NA
+df <- as_polars_df(df)
+df$sort("mpg")
+df$sort("mpg", nulls_last = TRUE)
+df$sort("cyl", "mpg")
+df$sort(c("cyl", "mpg"))
+df$sort(c("cyl", "mpg"), descending = TRUE)
+df$sort(c("cyl", "mpg"), descending = c(TRUE, FALSE))
+df$sort(pl$col("cyl"), pl$col("mpg"))
+}
diff --git a/man/dataframe__to_series.Rd b/man/dataframe__to_series.Rd
new file mode 100644
index 00000000..10f80651
--- /dev/null
+++ b/man/dataframe__to_series.Rd
@@ -0,0 +1,30 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dataframe-frame.R
+\name{dataframe__to_series}
+\alias{dataframe__to_series}
+\title{Select column as Series at index location}
+\usage{
+dataframe__to_series(index = 0)
+}
+\arguments{
+\item{index}{Index of the column to return as Series. Defaults to 0, which is
+the first column.}
+}
+\value{
+Series or NULL
+}
+\description{
+Select column as Series at index location
+}
+\examples{
+df <- as_polars_df(iris[1:10, ])
+
+# default is to extract the first column
+df$to_series()
+
+# Polars is 0-indexed, so we use index = 1 to extract the *2nd* column
+df$to_series(index = 1)
+
+# doesn't error if the column isn't there
+df$to_series(index = 8)
+}
diff --git a/man/dataframe__with_columns.Rd b/man/dataframe__with_columns.Rd
new file mode 100644
index 00000000..677dc3c0
--- /dev/null
+++ b/man/dataframe__with_columns.Rd
@@ -0,0 +1,46 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dataframe-frame.R
+\name{dataframe__with_columns}
+\alias{dataframe__with_columns}
+\title{Modify/append column(s) of a DataFrame}
+\usage{
+dataframe__with_columns(...)
+}
+\arguments{
+\item{...}{<\code{\link[rlang:dyn-dots]{dynamic-dots}}>
+Name-value pairs of objects to be converted to polars \link[=Expr]{expressions}
+by the \code{\link[=as_polars_expr]{as_polars_expr()}} function.
+Characters are parsed as column names, other non-expression inputs are parsed as \link[=pl__lit]{literals}.
+Each name will be used as the expression name.}
+}
+\value{
+A polars \link{DataFrame}
+}
+\description{
+Add columns or modify existing ones with expressions. This is similar to
+\code{dplyr::mutate()} as it keeps unmentioned columns (unlike \verb{$select()}).
+
+However, unlike \code{dplyr::mutate()}, one cannot use new variables in subsequent
+expressions in the same \verb{$with_columns()}call. For instance, if you create a
+variable \code{x}, you will only be able to use it in another \verb{$with_columns()}
+or \verb{$select()} call.
+}
+\examples{
+as_polars_df(iris)$with_columns(
+  abs_SL = pl$col("Sepal.Length")$abs(),
+  add_2_SL = pl$col("Sepal.Length") + 2
+)
+
+# same query
+l_expr <- list(
+  pl$col("Sepal.Length")$abs()$alias("abs_SL"),
+  (pl$col("Sepal.Length") + 2)$alias("add_2_SL")
+)
+as_polars_df(iris)$with_columns(l_expr)
+
+as_polars_df(iris)$with_columns(
+  SW_add_2 = (pl$col("Sepal.Width") + 2),
+  # unnamed expr will keep name "Sepal.Length"
+  pl$col("Sepal.Length")$abs()
+)
+}
diff --git a/man/lazyframe__select.Rd b/man/lazyframe__select.Rd
index 2fcdb0bb..3ef41b08 100644
--- a/man/lazyframe__select.Rd
+++ b/man/lazyframe__select.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/lazyframe-frame.R
 \name{lazyframe__select}
 \alias{lazyframe__select}
-\title{Select columns from this LazyFrame}
+\title{Select and modify columns of a LazyFrame}
 \usage{
 lazyframe__select(...)
 }
@@ -17,7 +17,13 @@ Each name will be used as the expression name.}
 A polars \link{LazyFrame}
 }
 \description{
-Select columns from this LazyFrame
+Select and perform operations on a subset of columns only. This discards
+unmentioned columns (like \code{.()} in \code{data.table} and contrarily to
+\code{dplyr::mutate()}).
+
+One cannot use new variables in subsequent expressions in the same
+\verb{$select()} call. For instance, if you create a variable \code{x}, you will only
+be able to use it in another \verb{$select()} or \verb{$with_columns()} call.
 }
 \examples{
 # Pass the name of a column to select that column.
diff --git a/man/lazyframe__with_columns.Rd b/man/lazyframe__with_columns.Rd
index 641f6f19..60262f97 100644
--- a/man/lazyframe__with_columns.Rd
+++ b/man/lazyframe__with_columns.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/lazyframe-frame.R
 \name{lazyframe__with_columns}
 \alias{lazyframe__with_columns}
-\title{Add columns to this LazyFrame}
+\title{Modify/append column(s) of a LazyFrame}
 \usage{
 lazyframe__with_columns(...)
 }
@@ -17,11 +17,13 @@ Each name will be used as the expression name.}
 A polars \link{LazyFrame}
 }
 \description{
-Added columns will replace existing columns with the same name.
-}
-\details{
-Creating a new LazyFrame using this method does not create a new copy of
-existing data.
+Add columns or modify existing ones with expressions. This is similar to
+\code{dplyr::mutate()} as it keeps unmentioned columns (unlike \verb{$select()}).
+
+However, unlike \code{dplyr::mutate()}, one cannot use new variables in subsequent
+expressions in the same \verb{$with_columns()}call. For instance, if you create a
+variable \code{x}, you will only be able to use it in another \verb{$with_columns()}
+or \verb{$select()} call.
 }
 \examples{
 # Pass an expression to add it as a new column.
diff --git a/man/pl__LazyFrame.Rd b/man/pl__LazyFrame.Rd
index 95d5dec0..1be5042f 100644
--- a/man/pl__LazyFrame.Rd
+++ b/man/pl__LazyFrame.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/lazyframe-frame.R
 \name{pl__LazyFrame}
 \alias{pl__LazyFrame}
-\alias{plars_lazy_frame}
+\alias{polars_lazy_frame}
 \alias{LazyFrame}
 \title{Polars LazyFrame class (\code{polars_lazy_frame})}
 \usage{