Skip to content

Commit

Permalink
Silently cap slice_sample() slice size at n() when `replace = FAL…
Browse files Browse the repository at this point in the history
…SE` (#6351)

Fixes #6185
  • Loading branch information
eutwt authored Jul 22, 2022
1 parent e691ace commit e47787f
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 71 deletions.
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# dplyr (development version)

* `slice_sample()` returns a data frame or group with the same number of rows as
the input when `replace = FALSE` and `n` is larger than the number of rows or
`prop` is larger than 1. This reverts a change made in 1.0.8, returning to the
behavior of 1.0.7 (#6185)

* `slice()` helpers again produce output equivalent to `slice(.data, 0)` when
the `n` or `prop` argument is 0, fixing a bug introduced in the previous
version (@eutwt, #6184).
Expand Down
12 changes: 1 addition & 11 deletions R/slice.R
Original file line number Diff line number Diff line change
Expand Up @@ -452,17 +452,7 @@ get_slice_size <- function(n, prop, allow_negative = TRUE, error_call = caller_e

sample_int <- function(n, size, replace = FALSE, wt = NULL, call = caller_env()) {
if (!replace && n < size) {
header <- paste0(
"Can't sample without replacement using a size that is larger than ",
"the number of rows in the data."
)
message <- c(
header,
i = glue("{size} rows were requested in the sample."),
i = glue("{n} rows are present in the data."),
i = "Set `replace = TRUE` to sample with replacement."
)
abort(message, call = call)
size <- n
}

if (size == 0L) {
Expand Down
46 changes: 0 additions & 46 deletions tests/testthat/_snaps/slice.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,52 +12,6 @@
<error/rlang_error>
Error in `slice_sample()`:
! `prop` must be positive.
Code
(expect_error(df %>% slice_sample(n = 4, replace = FALSE)))
Output
<error/rlang_error>
Error in `slice_sample()`:
! Problem while computing indices.
Caused by error:
! Can't sample without replacement using a size that is larger than the number of rows in the data.
i 4 rows were requested in the sample.
i 1 rows are present in the data.
i Set `replace = TRUE` to sample with replacement.
Code
(expect_error(gdf %>% slice_sample(n = 4, replace = FALSE)))
Output
<error/rlang_error>
Error in `slice_sample()`:
! Problem while computing indices.
i The error occurred in group 1: a = 1.
Caused by error:
! Can't sample without replacement using a size that is larger than the number of rows in the data.
i 4 rows were requested in the sample.
i 1 rows are present in the data.
i Set `replace = TRUE` to sample with replacement.
Code
(expect_error(df %>% slice_sample(prop = 4, replace = FALSE)))
Output
<error/rlang_error>
Error in `slice_sample()`:
! Problem while computing indices.
Caused by error:
! Can't sample without replacement using a size that is larger than the number of rows in the data.
i 4 rows were requested in the sample.
i 1 rows are present in the data.
i Set `replace = TRUE` to sample with replacement.
Code
(expect_error(gdf %>% slice_sample(prop = 4, replace = FALSE)))
Output
<error/rlang_error>
Error in `slice_sample()`:
! Problem while computing indices.
i The error occurred in group 1: a = 1.
Caused by error:
! Can't sample without replacement using a size that is larger than the number of rows in the data.
i 4 rows were requested in the sample.
i 1 rows are present in the data.
i Set `replace = TRUE` to sample with replacement.

# slice() gives meaningfull errors

Expand Down
31 changes: 17 additions & 14 deletions tests/testthat/test-slice.r
Original file line number Diff line number Diff line change
Expand Up @@ -210,34 +210,37 @@ test_that("slice_sample() handles n= and prop=", {
(expect_error(
df %>% slice_sample(prop = -1)
))

(expect_error(
df %>% slice_sample(n = 4, replace = FALSE)
))
(expect_error(
gdf %>% slice_sample(n = 4, replace = FALSE)
))

(expect_error(
df %>% slice_sample(prop = 4, replace = FALSE)
))
(expect_error(
gdf %>% slice_sample(prop = 4, replace = FALSE)
))
})
})

test_that("functions silently truncate results", {
df <- data.frame(x = 1:5)

# positive n
expect_equal(df %>% slice_sample(n = 6) %>% nrow(), 5)
expect_equal(df %>% slice_head(n = 6) %>% nrow(), 5)
expect_equal(df %>% slice_tail(n = 6) %>% nrow(), 5)
expect_equal(df %>% slice_min(x, n = 6) %>% nrow(), 5)
expect_equal(df %>% slice_max(x, n = 6) %>% nrow(), 5)

# positive prop
expect_equal(df %>% slice_sample(prop = 2) %>% nrow(), 5)
expect_equal(df %>% slice_head(prop = 2) %>% nrow(), 5)
expect_equal(df %>% slice_tail(prop = 2) %>% nrow(), 5)
expect_equal(df %>% slice_min(x, prop = 2) %>% nrow(), 5)
expect_equal(df %>% slice_max(x, prop = 2) %>% nrow(), 5)

# negative n
expect_equal(df %>% slice_head(n = -6) %>% nrow(), 0)
expect_equal(df %>% slice_tail(n = -6) %>% nrow(), 0)
expect_equal(df %>% slice_min(x, n = -6) %>% nrow(), 0)
expect_equal(df %>% slice_max(x, n = -6) %>% nrow(), 0)

# negative prop
expect_equal(df %>% slice_head(prop = -2) %>% nrow(), 0)
expect_equal(df %>% slice_tail(prop = -2) %>% nrow(), 0)
expect_equal(df %>% slice_min(x, prop = -2) %>% nrow(), 0)
expect_equal(df %>% slice_max(x, prop = -2) %>% nrow(), 0)
})

test_that("proportion computed correctly", {
Expand Down

0 comments on commit e47787f

Please sign in to comment.