Skip to content

Commit

Permalink
Merge pull request #72 from DanChaltiel/fix/71-several-subject-identi…
Browse files Browse the repository at this point in the history
…fiers

Fix/71 several subject identifiers
  • Loading branch information
DanChaltiel authored Oct 22, 2024
2 parents 1189119 + cde3825 commit c57fbb1
Show file tree
Hide file tree
Showing 4 changed files with 155 additions and 3 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Package: EDCimport
Version: 0.4.1.9055
Version: 0.4.1.9056
Title: Import Data from EDC Software
Authors@R:
c(person(given = "Dan",
Expand Down
11 changes: 9 additions & 2 deletions R/sanity_checks.R
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ edc_data_condition = function(.data, message, issue_n, max_subjid,
if(nrow(.data)>0){
if(is.character(csv_path)){
assert(str_ends(csv_path, "\\.csv"), call=parent.frame())
write.csv2(.data, csv_path)
write.csv2(.data, csv_path, row.names=FALSE)
}
message = format_inline(message)

Expand All @@ -294,7 +294,14 @@ edc_data_condition = function(.data, message, issue_n, max_subjid,
}

if(!is.null(col_subjid)){
if(!tolower(col_subjid) %in% tolower(names(.data))){
col_found = tolower(col_subjid) %in% tolower(names(.data))
if(sum(col_found)>1){
cli_warn("Found {length(col_found)} subject identifiers in the input dataset:
{.val {col_subjid[col_found]}}. Defaulting to the first one.",
class="edc_data_condition_subjid_multiple_warn", call=parent.frame())
col_subjid = col_subjid[col_found][1]
}
if(!any(col_found)){
cli_abort("Could not find column {col_subjid} in the input dataset.",
class="edc_data_condition_subjid_error", call=parent.frame())
}
Expand Down
52 changes: 52 additions & 0 deletions tests/testthat/_snaps/sanity.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# edc_data_warn snapshot

Code
db0 %>% filter(age > 60) %>% edc_data_warn("Age should not be >60")
Condition
Warning:
Issue #xx: Age should not be >60 (10 patients: #1, #7, #9, #11, #12, ...)
Code
db0 %>% filter(age > 70) %>% edc_data_warn("Age should not be >70", issue_n = NULL)
Condition
Warning:
Age should not be >70 (2 patients: #9 and #12)
Code
db0 %>% filter(age > 20) %>% edc_data_warn("Age should not be >20", issue_n = 1,
max_subjid = 2)
Condition
Warning:
Issue #01: Age should not be >20 (50 patients: #1, #2, ...)
Code
db0 %>% filter(age > 70) %>% edc_data_warn("Age should not be >70", issue_n = NULL,
col_subjid = c("SUBJID", "PATNO"))
Condition
Warning:
Age should not be >70 (2 patients: #9 and #12)
Code
db0 %>% filter(age > 70) %>% edc_data_warn("Age should not be >70", issue_n = 2,
col_subjid = c("SUBJID", "group"))
Condition
Warning in `edc_data_warn()`:
Found 2 subject identifiers in the input dataset: "SUBJID" and "group". Defaulting to the first one.
Warning:
Issue #02: Age should not be >70 (2 patients: #9 and #12)
Code
db0 %>% filter(age > 70) %>% edc_data_warn("Age should not be >70", issue_n = 3,
col_subjid = c("group", "SUBJID"))
Condition
Warning in `edc_data_warn()`:
Found 2 subject identifiers in the input dataset: "group" and "SUBJID". Defaulting to the first one.
Warning:
Issue #03: Age should not be >70 (2 patients: #A and #B)
Code
edc_data_warnings()
Output
# A tibble: 5 x 4
issue_n message subjid fun
<chr> <chr> <list> <chr>
1 01 Age should not be >20 <chr [50]> cli_warn
2 02 Age should not be >70 <chr [2]> cli_warn
3 03 Age should not be >70 <chr [2]> cli_warn
4 xx Age should not be >60 <chr [10]> cli_warn
5 <NA> Age should not be >70 <chr [2]> cli_warn

93 changes: 93 additions & 0 deletions tests/testthat/test-sanity.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@

test_that("edc_data_warn snapshot", {
clean_lookup()
tm = edc_example()
attach(tm)

expect_snapshot({
#default
db0 %>%
filter(age>60) %>%
edc_data_warn("Age should not be >60")

#no issue_n
db0 %>%
filter(age>70) %>%
edc_data_warn("Age should not be >70", issue_n=NULL)

#with issue_n & max_subjid
db0 %>%
filter(age>20) %>%
edc_data_warn("Age should not be >20", issue_n=1, max_subjid=2)

#multiple subjid proposals
db0 %>%
filter(age>70) %>%
edc_data_warn("Age should not be >70", issue_n=NULL, col_subjid=c("SUBJID", "PATNO"))


## WARNINGS

#warning, multiple subjid found
db0 %>%
filter(age>70) %>%
edc_data_warn("Age should not be >70", issue_n=2, col_subjid=c("SUBJID", "group"))

#warning, multiple subjid found, reverse order
db0 %>%
filter(age>70) %>%
edc_data_warn("Age should not be >70", issue_n=3, col_subjid=c("group", "SUBJID"))


edc_data_warnings()
})

})

test_that("edc_data_warn errors", {
clean_lookup()
tm = edc_example()
attach(tm)

#error expected
db0 %>%
filter(age>70) %>%
edc_data_stop("Age should never be >70", issue_n=99) %>%
expect_error()

#error subjid not found
db0 %>%
filter(age>70) %>%
edc_data_warn("Age should not be >70", issue_n=98, col_subjid=c("PATNO")) %>%
expect_error(class="edc_data_condition_subjid_error")

})

test_that("edc_data_warn CSV", {
clean_lookup()
tm = edc_example()
attach(tm)

path = tempfile(fileext=".csv")

expect_false(file_exists(path))

input = db0 %>%
as.data.frame() %>%
filter(age>70) %>%
select(SUBJID, age, group) %>%
labelled::remove_labels()

input %>%
edc_data_warn("Age should not be >70", issue_n=99, csv_path=path) %>%
expect_warning()

output = read.csv2(path)

expect_equal(input, output, tolerance = 0.01)

#cleanup
unlink(path)
expect_false(file_exists(path))

})

0 comments on commit c57fbb1

Please sign in to comment.