EXPERIMENT ... Generate data/admiral_adlb.rda using template, tempora…

…ry .cache storage, inner_join to reduce number of rows. NOT FINAL.
pharmaverse · Sep 9, 2024 · d79b080 · d79b080
1 parent aeaed83
commit d79b080
Show file tree

Hide file tree

Showing 3 changed files with 69 additions and 37 deletions.
diff --git a/data-raw/admiral_adlb.R b/data-raw/admiral_adlb.R
@@ -224,11 +224,18 @@ adlb <- adlb %>%
     source_var = ANRIND,
     new_var = BNRIND
   ) %>%
-  # Calculate CHG
-  derive_var_chg() %>%
-  # Calculate PCHG
-  derive_var_pchg()
-
+  # Calculate CHG for post-baseline records
+  # The decision on how to populate pre-baseline and baseline values of CHG is left to producer choice
+  restrict_derivation(
+    derivation = derive_var_chg,
+    filter = AVISITN > 0
+  ) %>%
+  # Calculate PCHG for post-baseline records
+  # The decision on how to populate pre-baseline and baseline values of PCHG is left to producer choice
+  restrict_derivation(
+    derivation = derive_var_pchg,
+    filter = AVISITN > 0
+  )
 
 ## Calculate lab grading ----
 
@@ -447,25 +454,9 @@ adlb <- adlb %>%
 # Save output ----
 
 # Change to whichever directory you want to save the dataset in
-#dir <- tools::R_user_dir("admiral_templates_data", which = "cache")
-#if (!file.exists(dir)) {
+dir <- tools::R_user_dir("admiral_templates_data", which = "cache")
+if (!file.exists(dir)) {
   # Create the folder
-#  dir.create(dir, recursive = TRUE, showWarnings = FALSE)
-#}
-#save(adlb, file = file.path(dir, "adlb.rda"), compress = "bzip2")
-
-admiral_adlb = adlb
-use_data(admiral_adlb, overwrite=T)
-
-
-# check - This dataset is identical to backup ?
-e1 = new.env()
-e2 = new.env()
-
-load("data/admiral_adlb.rda", e1)
-load("data-backup/admiral_adlb.rda", e2)
-e1$admiral_adlb
-e2$admiral_adlb
-identical(e1$admiral_adlb, e2$admiral_adlb)
-
-
+  dir.create(dir, recursive = TRUE, showWarnings = FALSE)
+}
+save(adlb, file = file.path(dir, "adlb.rda"), compress = "bzip2")
diff --git a/data-raw/create_admiral_adlb.R b/data-raw/create_admiral_adlb.R
@@ -1,18 +1,59 @@
-#  Create data/admiral_adlb.rda
+#  Create dataset:   data/admiral_adlb.rda
 
-# This is a TWO-step process.
+# This is a MULTI-step process.
 
-library(admiral)
+# First, using template to create the R script (in data-raw/admiral_adlb.R) which will generate the data.
+# Next, source this script and create the data (~/.cache/R/admiral_template_data/admiral_adlb.rda)
+# Finally, shorten this data (now ~ 1.2 MB) by selecting only certain USERJID
 
-# First, generate script from template
-adam_name="adlb"
-save_path =  paste0("./data-raw/admiral_", adam_name, ".R")
 
-use_ad_template(adam_name = adam_name,
-                save_path =  save_path,
-                open = F,
-                overwrite=T)
+# First,  create the R script (from a template)
+adam_name <- "adlb"
+save_path <- paste0("./data-raw/admiral_", adam_name, ".R")
 
+use_ad_template(
+  adam_name = adam_name,
+  save_path = save_path,
+  open = FALSE,
+  overwrite = TRUE
+)
 
-# Second, run the script (manually)
+# Second, source the script and save data in .cache
+source("data-raw/admiral_adlb.R")  # nolint
 
+# Load the data into .GlobalEnv
+load("~/.cache/R/admiral_templates_data/adlb.rda" )
+
+#nrow(adlb)   83,652
+
+# limit rows, by selecting only these USUBJID
+#' 01-701-1015, 01-701-1023, 01-701-1028, 01-701-1033,
+#' 01-701-1034, 01-701-1047, 01-701-1097, 01-705-1186,
+#' 01-705-1292, 01-705-1310, 01-708-1286
+
+USUBJID =
+c("01-701-1015",
+"01-701-1023",
+"01-701-1028",
+"01-701-1033",
+"01-701-1034",
+"01-701-1047",
+"01-701-1097",
+"01-705-1186",
+"01-705-1292",
+"01-705-1310",
+"01-708-1286"
+)
+
+#  prepare for inner join
+user = tibble(
+  USUBJID = USUBJID)
+
+result = inner_join(adlb,user)
+admiral_adlb = result
+
+# check
+#USUBJID %in% result$USUBJID
+
+# Finally, saved reduced ds
+use_data(admiral_adlb, overwrite = TRUE)
diff --git a/data/admiral_adlb.rda b/data/admiral_adlb.rda