Closes 107 - read out (#202)

* exploratory programs for read-out files * markdown practice * changes to markdown table layout * basic Rmarkdown file produced & exploratory scripts * draft completion of Read-out file version 1 * format complete of .Rmd for datacutr read-out file, modified process_cut() to enable auto-generation * changed .Rmd file location and combined custom functions within the markdown * read_out function added, exploratory work on updating functions within read_out_2.Rmd * remove .Rmd test scripts * update to the .Rmd text * draft testing files for read_out() function with minor updates to .Rmd * updates made to .Rmd file and function with testing * updates to the read_out() function along with modifications to the .Rmd messages * development of read_out() function and testing continued. * updates to process_cut() and read_out() functions, completion of test-read_out.R * remove excess .Rmd files * remove final_cut argument and section in .Rmd, update file path read_out() * Update read_out.Rmd with reactables package instead of datatables to allow for larger data inputs. * removed old datacutr read-out file .Rmd * Changes made to the documentation for read-out * update renv.lock files for R4.3.1 * resolving warnings in pull request checks * updates to resolve check errors * updates to read_out.Rmd & read_out.R to address feedback * update to renv.lock file CRAN version * updates to renv file, removed unused packages * revert renv.lock file * updates to renv.lock - include "admiraldev", upgrade "remotes" * update remotes version * troubleshooting - test * update stringi repo to "CRAN" * add author * updates to the read_out function and read_out.Rmd * updated the renv.lock for datacutr R4.3.1 * update the .Rprofile * add library(tibble) to read_out.Rmd * update R version in DESCRIPTION, remove lintr complexity check from process_cut.R * update to R version DESCRIPTION, common.yml & no lint process_cut.R * updates to resolve styler errors * resolve CICD check errors * resolve styler and roxygen2 errors * update the NEWS.md to reflect update in R version
pharmaverse · Jul 8, 2024 · e3d41f1 · e3d41f1
1 parent 290d205
commit e3d41f1
Show file tree

Hide file tree

Showing 17 changed files with 2,213 additions and 434 deletions.
diff --git a/.Rprofile b/.Rprofile
@@ -1,5 +1 @@
-if (Sys.getenv("GITHUB_ACTIONS") == "" || (Sys.getenv("GITHUB_ACTIONS") == "true" && getRversion()$major == 3 && getRversion()$minor == 6)) {
-  source("renv/activate.R")
-} else {
-  options(repos = c(CRAN = "https://cran.rstudio.com"))
-}
+options(repos = c(CRAN = "https://cran.rstudio.com"))
diff --git a/.github/workflows/common.yml b/.github/workflows/common.yml
@@ -41,25 +41,25 @@ jobs:
     uses: pharmaverse/admiralci/.github/workflows/style.yml@main
     if: github.event_name == 'pull_request'
     with:
-      r-version: "4.0"
+      r-version: "4.1"
   spellcheck:
     name: Spelling
     uses: pharmaverse/admiralci/.github/workflows/spellcheck.yml@main
     if: github.event_name == 'pull_request'
     with:
-      r-version: "4.0"
+      r-version: "4.1"
   readme:
     name: Render README
     uses: pharmaverse/admiralci/.github/workflows/readme-render.yml@main
     if: github.event_name == 'push'
     with:
-      r-version: "4.0"
+      r-version: "4.1"
   validation:
     name: Validation
     uses: pharmaverse/admiralci/.github/workflows/r-pkg-validation.yml@main
     if: github.event_name == 'release'
     with:
-      r-version: "4.0"
+      r-version: "4.1"
   check:
     name: Check
     uses: pharmaverse/admiralci/.github/workflows/r-cmd-check.yml@main
@@ -69,7 +69,7 @@ jobs:
     uses: pharmaverse/admiralci/.github/workflows/pkgdown.yml@main
     if: github.event_name == 'push'
     with:
-      r-version: "4.0"
+      r-version: "4.1"
       # Whether to skip multiversion docs
       # Note that if you have multiple versions of docs,
       # your URL links are likely to break due to path changes
@@ -79,7 +79,7 @@ jobs:
     uses: pharmaverse/admiralci/.github/workflows/lintr.yml@main
     if: github.event_name == 'pull_request'
     with:
-      r-version: "4.0"
+      r-version: "4.1"
   links:
     name: Links
     uses: pharmaverse/admiralci/.github/workflows/links.yml@main
@@ -91,7 +91,7 @@ jobs:
     if: >
       github.event_name == 'push' || github.event_name == 'pull_request'
     with:
-      r-version: "4.0"
+      r-version: "4.1"
       # Whether to skip code coverage badge creation
       # Setting to 'false' will require you to create
       # an orphan branch called 'badges' in your repository
@@ -102,10 +102,10 @@ jobs:
     if: >
       github.event_name == 'push' || github.event_name == 'pull_request'
     with:
-      r-version: "4.0"
+      r-version: "4.1"
   man-pages:
     name: Man Pages
     uses: pharmaverse/admiralci/.github/workflows/man-pages.yml@main
     if: github.event_name == 'pull_request'
     with:
-      r-version: "4.0"
+      r-version: "4.1"
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -5,7 +5,8 @@ Version: 0.2.0
 Authors@R: c(
     person("Tim", "Barnett", email = "timothy.barnett@roche.com", role = c("cph","aut", "cre")), 
     person("Nathan", "Rees", email = "nathan.rees@roche.com", role = c("aut")), 
-    person("Alana", "Harris", email = "alana.harris@roche.com", role = c("aut")))
+    person("Alana", "Harris", email = "alana.harris@roche.com", role = c("aut")),
+    person("Cara", "Andrews", email = "cara.andrews@roche.com", role = c("aut")))
 Description: Supports the process of applying a cut to Standard Data Tabulation Model (SDTM),
     as part of the analysis of specific points in time of the data, normally as part of 
     investigation into clinical trials. The functions support different approaches of
@@ -17,18 +18,19 @@ Encoding: UTF-8
 Language: en-US
 LazyData: true
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.3
-Depends: R (>= 3.5)
+RoxygenNote: 7.3.2
+Depends: R (>= 4.1)
 Imports:
     admiraldev (>= 0.3.0),
     assertthat (>= 0.2.1),
     dplyr (>= 1.0.5),
     lubridate (>= 1.7.4),
     magrittr (>= 1.5),
     purrr (>= 0.3.3),
-    stringr,
+    stringr (>= 1.4.0),
     rlang (>= 0.4.4),
-    tibble
+    tibble (>= 3.0.0),
+    reactable (>= 0.4.4)
 Suggests:
     devtools,
     lintr,

diff --git a/NAMESPACE b/NAMESPACE
@@ -8,6 +8,7 @@ export(impute_dcutdtc)
 export(impute_sdtm)
 export(process_cut)
 export(pt_cut)
+export(read_out)
 export(special_dm_cut)
 importFrom(admiraldev,assert_character_scalar)
 importFrom(admiraldev,assert_data_frame)
@@ -36,12 +37,14 @@ importFrom(magrittr,"%>%")
 importFrom(purrr,map)
 importFrom(purrr,map_lgl)
 importFrom(purrr,pmap)
+importFrom(reactable,reactable)
 importFrom(rlang,"!!")
 importFrom(rlang,":=")
 importFrom(rlang,as_quosures)
 importFrom(rlang,enexpr)
 importFrom(rlang,expr_name)
 importFrom(rlang,exprs)
+importFrom(rlang,is_named)
 importFrom(rlang,is_quosure)
 importFrom(rlang,quo_is_null)
 importFrom(rlang,quo_name)

diff --git a/NEWS.md b/NEWS.md
@@ -2,6 +2,7 @@
 
 ## New Features
 - Added a "Report a bug" link to `{datacutr}` website (#182)
+- Added a `read_out` function that enables the generation of a read-out file (.html), to summarize changes applied to data during a datacut. (#107)
 
 ## Updates of Existing Functions
 - Update to `impute_dcutdtc()`, `date_cut()` and `special_dm_cut()` functions to allow for 
@@ -14,9 +15,11 @@ and not valid date or `NA`/`""` (#181)
 arguments have a default value of `NULL` (#188)
 - `process_cut` updated to have more detailed error messages when incorrect datasets 
 are fed in (#180)
+- `process_cut` updated to have arguments `read_out` and `out_path` to integrate the `read_out` function into the wrapper function; enabling auto-generation of the datacutr read-out file (#107)
 
 ## Breaking Changes
 - Added dependency on `admiraldev` >= 0.3.0 (#173)
+- Added dependency on R version >= 4.1 due to an update in `admiraldev` to use R native pipe
 
 ## Documentation
 - Added notes on SDTM compatibility (#171)

diff --git a/R/datcutr-package.R b/R/datcutr-package.R
@@ -3,11 +3,13 @@
 #' syms pull if_else
 #' @importFrom magrittr %>%
 #' @importFrom rlang := quo_name !! is_quosure quo_is_null as_quosures exprs enexpr expr_name
+#' is_named
 #' @importFrom purrr map_lgl pmap map
 #' @importFrom lubridate ymd_hms is.POSIXt
 #' @importFrom admiraldev assert_symbol assert_data_frame assert_character_scalar assert_filter_cond
 #' filter_if is_valid_dtc warn_if_invalid_dtc get_duplicates
 #' @importFrom assertthat assert_that
 #' @importFrom tibble tribble
+#' @importFrom reactable reactable
 #' @importFrom stringr str_match str_detect
 "_PACKAGE"
diff --git a/R/process_cut.R b/R/process_cut.R
@@ -1,9 +1,12 @@
+# nolint start: cyclocomp_linter
 #' @title Wrapper function to prepare and apply the datacut of SDTMv datasets
 #'
 #' @description Applies the selected type of datacut on each SDTMv dataset based on the chosen
 #' SDTMv date variable, and outputs the resulting cut datasets, as well as the datacut dataset,
-#' as a list. It also provides an option to perform a "special" cut on the demography (dm) domain
-#' in which any deaths occurring after the datacut date are removed.
+#' as a list. It provides an option to perform a "special" cut on the demography (dm) domain
+#' in which any deaths occurring after the datacut date are removed. It also provides an option
+#' to produce a .html file that summarizes the changes applied to the data during the cut, where
+#' you can inspect the records that have been removed and/or modified.
 #'
 #' @param source_sdtm_data A list of uncut SDTMv dataframes
 #' @param patient_cut_v A vector of quoted SDTMv domain names in which a patient cut should be
@@ -19,6 +22,11 @@
 #' @param special_dm A logical input indicating whether the `special dm cut` should be performed.
 #' Note that, if TRUE, dm should not be included in `patient_cut_v`, `date_cut_m` or `no_cut_v`
 #' inputs.
+#' @param read_out A logical input indicating whether a summary file for the datacut should be
+#' produced. If `TRUE`, a .html file will be returned containing a summary of the cut and
+#' records removed. Default set to `FALSE`.
+#' @param out_path A character vector of file save path for the summary file if `read_out = TRUE`;
+#' the default corresponds to the working directory, `getwd()`.
 #'
 #' @return Returns a list of all input SDTMv datasets, plus the datacut dataset, after
 #' performing the selected datacut on each SDTMv domain.
@@ -57,7 +65,9 @@ process_cut <- function(source_sdtm_data,
                         no_cut_v = NULL,
                         dataset_cut,
                         cut_var,
-                        special_dm = TRUE) {
+                        special_dm = TRUE,
+                        read_out = FALSE,
+                        out_path = ".") {
   #  Assertions for input parameters -----------------------------------------------
   assert_that(is.list(source_sdtm_data),
     msg = "source_sdtm_data must be of class list"
@@ -107,6 +117,8 @@ no_cut_v empty, in which case a default value of NULL will be used."
     )
     cut_inputs <- append(cut_inputs, "dm")
   }
+  # No cut list --------------------------------------------------------------------
+  no_cut_list <- source_sdtm_data[no_cut_v]
 
   sdtm_inputs_dups <- c()
   no_cut_method <- c()
@@ -181,6 +193,8 @@ no_cut_v empty, in which case a default value of NULL will be used."
   all_cut <- c(patient_cut_data, date_cut_data)
 
   # Conduct DM special cut for DTH flags after DCUTDTM ------------------------------
+  # dm_cut = NULL unless special dm cut applied
+  dm_cut <- NULL
 
   if (special_dm) {
     # Assertions for special dm cut
@@ -208,5 +222,11 @@ no_cut_v empty, in which case a default value of NULL will be used."
   # Return the final list of SDTM datasets + DCUT ----------------------------------
 
   final_data <- c(list(dcut = dataset_cut), cut_data, source_sdtm_data[no_cut_v])
+
+  if (read_out) {
+    read_out(dataset_cut, patient_cut_data, date_cut_data, dm_cut, no_cut_list, out_path)
+  }
+
   return(final_data)
 }
+# nolint end
diff --git a/R/read_out.R b/R/read_out.R
@@ -0,0 +1,145 @@
+#' @title Function to generate datacut summary file
+#'
+#' @description Produces a .html file summarizing the changes applied to data during a data cut.
+#' The file will contain an overview for the change in number of records for each dataset, the types
+#' of cut applied and the opportunity to inspect the removed records.
+#'
+#' @param dcut The output datacut dataset (DCUT), created via the `create_dcut()` function,
+#' containing the variable DCUTDTC.
+#' @param patient_cut_data A list of quoted SDTMv domain names in which a patient cut has been.
+#' applied (via the `pt_cut()` function). To be left blank if a patient cut has not been performed
+#' on any domains.
+#' @param date_cut_data A list of quoted SDTMv domain names in which a date cut has been applied.
+#' (via the `date_cut()` function). To be left blank if a date cut has not been performed on any
+#' domains.
+#' @param dm_cut The output dataset, created via the `special_dm_cut()` function, containing
+#' the variables DCUT_TEMP_REMOVE and DCUT_TEMP_DTHCHANGE.
+#' @param no_cut_list List of of quoted SDTMv domain names in which no cut should be applied. To be
+#' left blank if no domains are to remain exactly as source.
+#' @param out_path A character vector of file save path for the summary file;
+#' the default corresponds to the working directory, `getwd()`.
+#'
+#' @return Returns a .html file summarizing the changes made to data during a datacut.
+#'
+#' @export
+#'
+#' @keywords derive
+#'
+#' @examples
+#' \dontrun{
+#' dcut <- tibble::tribble(
+#'   ~USUBJID, ~DCUTDTM, ~DCUTDTC,
+#'   "subject1", lubridate::ymd_hms("2020-10-11T23:59:59"), "2020-10-11T23:59:59",
+#'   "subject2", lubridate::ymd_hms("2020-10-11T23:59:59"), "2020-10-11T23:59:59",
+#'   "subject4", lubridate::ymd_hms("2020-10-11T23:59:59"), "2020-10-11T23:59:59"
+#' )
+#'
+#' ae <- tibble::tribble(
+#'   ~USUBJID, ~AESEQ, ~AESTDTC,
+#'   "subject1", 1, "2020-01-02T00:00:00",
+#'   "subject1", 2, "2020-08-31T00:00:00",
+#'   "subject1", 3, "2020-10-10T00:00:00",
+#'   "subject2", 2, "2020-02-20T00:00:00",
+#'   "subject3", 1, "2020-03-02T00:00:00",
+#'   "subject4", 1, "2020-11-02T00:00:00",
+#'   "subject4", 2, ""
+#' )
+#'
+#' dm <- tibble::tribble(
+#'   ~USUBJID, ~DTHDTC, ~DTHFL,
+#'   "subject1", "2020-10-11", "Y",
+#'   "subject2", "2020-10-12", "Y",
+#' )
+#'
+#' dt_ae <- date_cut(
+#'   dataset_sdtm = ae,
+#'   sdtm_date_var = AESTDTC,
+#'   dataset_cut = dcut,
+#'   cut_var = DCUTDTM
+#' )
+#'
+#' pt_ae <- pt_cut(
+#'   dataset_sdtm = ae,
+#'   dataset_cut = dcut
+#' )
+#'
+#' dm_cut <- special_dm_cut(
+#'   dataset_dm = dm,
+#'   dataset_cut = dcut,
+#'   cut_var = DCUTDTM
+#' )
+#'
+#' read_out(dcut, patient_cut_data = list(ae = pt_ae), date_cut_data = list(ae = dt_ae), dm_cut)
+#' }
+read_out <- function(dcut = NULL,
+                     patient_cut_data = NULL,
+                     date_cut_data = NULL,
+                     dm_cut = NULL,
+                     no_cut_list = NULL,
+                     out_path = ".") {
+  if (!is.null(dcut)) {
+    assert_data_frame(dcut,
+      required_vars = exprs(USUBJID, DCUTDTC)
+    )
+  }
+  if (!is.null(patient_cut_data)) {
+    assert_that(is.list(patient_cut_data) & !is.data.frame(patient_cut_data),
+      msg = "patient_cut_data must be a list. \n
+Note: If you have not used or do not with to view patient cut on any SDTMv domains, then
+please leave patient_cut_data empty, in which case a default value of NULL will be used."
+    )
+
+    for (i in seq_along(patient_cut_data)) {
+      assert_data_frame(patient_cut_data[[i]],
+        required_vars = exprs(USUBJID, DCUT_TEMP_REMOVE)
+      )
+
+      assert_that(rlang::is_named(patient_cut_data[i]),
+        msg = "All elements patient_cut_data must be named with corresponding domain"
+      )
+    }
+  }
+  if (!is.null(date_cut_data)) {
+    assert_that(is.list(date_cut_data) & !is.data.frame(date_cut_data),
+      msg = "date_cut_data must be a list. \n
+Note: If you have not used or do not with to view date cut on any SDTMv domains, then please
+leave date_cut_data empty, in which case a default value of NULL will be used."
+    )
+    for (i in seq_along(date_cut_data)) {
+      assert_data_frame(date_cut_data[[i]],
+        required_vars = exprs(USUBJID, DCUT_TEMP_REMOVE)
+      )
+
+      assert_that(rlang::is_named(date_cut_data[i]),
+        msg = "All elements in date_cut_data must be named with corresponding domain"
+      )
+    }
+  }
+  if (!is.null(dm_cut)) {
+    assert_data_frame(dm_cut,
+      required_vars = exprs(USUBJID, DCUT_TEMP_REMOVE, DCUT_TEMP_DTHCHANGE)
+    )
+  }
+  if (!is.null(no_cut_list)) {
+    assert_that(is.list(no_cut_list) & !is.data.frame(no_cut_list),
+      msg = "no_cut_list must be a list. \n
+Note: If you have not used or do not with to view the SDTMv domains where no cut has been
+applied, then please leave no_cut_list empty, in which case a default value of NULL will be
+used."
+    )
+    for (i in seq_along(no_cut_list)) {
+      assert_data_frame(no_cut_list[[i]])
+
+      assert_that(rlang::is_named(no_cut_list[i]),
+        msg = "All elements in no_cut_list must be named with corresponding domain"
+      )
+    }
+  }
+  rmarkdown::render(
+    paste0(system.file(package = "datacutr"),
+      path = "/read-out/read_out.Rmd"
+    ),
+    output_file = paste("datacut_", format(Sys.time(), "%Y-%m-%d_%H:%M:%S", ".html")),
+    output_dir = out_path
+  )
+}