Skip to content

Commit

Permalink
Merge pull request #1746 from Gilead-BioStats/mapping-demo
Browse files Browse the repository at this point in the history
Prototype Data Specification Framework
  • Loading branch information
lauramaxwell authored Aug 14, 2024
2 parents 7047802 + b4fce1e commit c01c643
Show file tree
Hide file tree
Showing 51 changed files with 1,614 additions and 887 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ export(Analyze_NormalApprox_PredictBounds)
export(Analyze_Poisson)
export(Analyze_Poisson_PredictBounds)
export(BindResults)
export(CheckSpec)
export(CombineSpecs)
export(Flag)
export(Flag_Fisher)
export(Flag_NormalApprox)
Expand Down
9 changes: 8 additions & 1 deletion R/RunWorkflow.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ RunWorkflow <- function(
bKeepInputData = FALSE
) {
cli::cli_h1(paste0("Initializing `", lWorkflow$meta$File, "` Workflow"))
cli::cli_alert("Colnames: {names(lData)}")

# check that the workflow has steps
if (length(lWorkflow$steps) == 0) {
Expand All @@ -54,6 +53,14 @@ RunWorkflow <- function(

lWorkflow$lData <- lData

# If the workflow has a spec, check that the data and spec are compatible
if ("spec" %in% names(lWorkflow)) {
cli::cli_h3("Checking data against spec")
CheckSpec(lData, lWorkflow$spec)
} else {
cli::cli_h3("No spec found in workflow. Proceeding without checking data.")
}

# Run through each step in lWorkflow$workflow
stepCount <- 1
for (step in lWorkflow$steps) {
Expand Down
24 changes: 24 additions & 0 deletions R/util-CombineSpecs.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#' Combine Specifications
#'
#' @description
#' `r lifecycle::badge("stable")`
#'
#' Combine a list of specifications into a single specification.
#'
#' @param lSpecs A list of specifications.
#' @return A list representing the combined specification.
#' @examples
#' all_wf <- MakeWorkflowList()
#' all_specs <- CombineSpecs(all_wf)
#'
#' @export

CombineSpecs <- function(lSpecs) {
all_specs <- list()
for (spec in lSpecs) {
for (domain in names(spec)) {
all_specs[[domain]] <- c(all_specs[[domain]], spec[[domain]])
}
}
return(all_specs)
}
71 changes: 71 additions & 0 deletions R/util-checkSpec.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#' Check if the data and spec are compatible
#'
#' @description
#' `r lifecycle::badge("stable")`
#'
#' Check if the data and spec are compatible by comparing the data.frames and
#' columns in the spec with the data.
#'
#' @param lData A list of data.frames.
#' @param lSpec A list specifying the expected structure of the data.
#'
#' @return This function does not return any value. It either prints a message indicating
#' that all data.frames and columns in the spec are present in the data, or throws an error
#' if any data.frame or column is missing.
#'
#' @examples
#' lData <- list(reporting_groups = gsm::reportingGroups, reporting_results = gsm::reportingResults)
#' lSpec <- list(
#' reporting_groups = list(
#' GroupID = list(required = TRUE),
#' GroupLevel = list(required = TRUE),
#' Param = list(required = TRUE),
#' Value = list(required = TRUE)
#' ),
#' reporting_results = list(
#' GroupID = list(required = TRUE),
#' GroupLevel = list(required = TRUE),
#' Numerator = list(required = TRUE),
#' Denominator = list(required = TRUE)
#' )
#' )
#' CheckSpec(lData, lSpec) # Prints message that everything is found
#'
#' lSpec$reporting_groups$NotACol <- list(required = TRUE)
#' CheckSpec(lData, lSpec) # Throws error that NotACol is missing
#'
#' @export
#'
CheckSpec <- function(lData, lSpec) {
# Check that all data.frames in the spec are present in the data
lSpecDataFrames <- names(lSpec)
lDataFrames <- names(lData)
if (!all(lSpecDataFrames %in% lDataFrames)) {
MissingSpecDataFrames <- lSpecDataFrames[!lSpecDataFrames %in% lDataFrames]
cli::cli_abort(c(
"{.arg lData} must contain all data.frames in {.arg lSpec}.",
i = "Missing data.frames: {MissingSpecDataFrames}"
))
} else {
cli_alert("All {length(lSpecDataFrames)} data.frame(s) in the spec are present in the data: {lSpecDataFrames}")
}

# Check that all columns in the spec are present in the data
allCols <- c()
missingCols <- c()
for (strDataFrame in lSpecDataFrames) {
lSpecColumns <- names(lSpec[[strDataFrame]])
lDataColumns <- names(lData[[strDataFrame]])
allCols <- c(allCols, paste(strDataFrame, lSpecColumns, sep = "$"))

thisMissingCols <- lSpecColumns[!lSpecColumns %in% lDataColumns]
if (length(thisMissingCols) > 0) {
missingCols <- c(missingCols, paste(strDataFrame, thisMissingCols, sep = "$"))
}
}
if (length(missingCols) > 0) {
cli_alert_danger("Not all columns in the spec are present in the data, missing columns are: {missingCols}")
} else {
cli_alert("All {length(allCols)} columns in the spec are present in the data: {allCols}")
}
}
4 changes: 3 additions & 1 deletion _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,19 +61,21 @@ reference:
desc: Utility functions for use within the Data Model
- contents:
- BindResults
- CheckSpec
- CombineSpecs
- MakeBounds
- MakeCharts
- MakeMetric
- MakeLongMeta
- MakeWideGroups
- MakeWorkflowList
- ParseThreshold
- RenderRmd
- RunStep
- RunQuery
- RunWorkflow
- RunWorkflows
- UseClindata
- RenderRmd

- title: Sample Data
desc: data used for examples and testing
Expand Down
50 changes: 25 additions & 25 deletions inst/examples/3_ReportingWorkflow.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,16 @@

# Step 1 - Create Mapped Data - filter/map raw data
lData <- list(
dfSUBJ = clindata::rawplus_dm,
dfAE = clindata::rawplus_ae,
dfPD = clindata::ctms_protdev,
dfLB = clindata::rawplus_lb,
dfSTUDCOMP = clindata::rawplus_studcomp,
dfSDRGCOMP = clindata::rawplus_sdrgcomp %>% dplyr::filter(.data$phase == 'Blinded Study Drug Completion'),
dfDATACHG = clindata::edc_data_points,
dfDATAENT = clindata::edc_data_pages,
dfQUERY = clindata::edc_queries,
dfENROLL = clindata::rawplus_enroll
Raw_SUBJ = clindata::rawplus_dm,
Raw_AE = clindata::rawplus_ae,
Raw_PD = clindata::ctms_protdev,
Raw_LB = clindata::rawplus_lb,
Raw_STUDCOMP = clindata::rawplus_studcomp,
Raw_SDRGCOMP = clindata::rawplus_sdrgcomp %>% dplyr::filter(.data$phase == 'Blinded Study Drug Completion'),
Raw_DATACHG = clindata::edc_data_points,
Raw_DATAENT = clindata::edc_data_pages,
Raw_QUERY = clindata::edc_queries,
Raw_ENROLL = clindata::rawplus_enroll
)
mapping_wf <- MakeWorkflowList(strNames = "data_mapping")
mapped <- RunWorkflows(mapping_wf, lData, bKeepInputData=TRUE)
Expand All @@ -22,9 +22,9 @@ kris <- RunWorkflows(kri_wf, mapped)

# Step 3 - Create Reporting Data - Import Metadata and stack KRI Results
lReporting_Input <- list(
ctms_site = clindata::ctms_site,
ctms_study = clindata::ctms_study,
dfEnrolled = mapped$dfEnrolled,
Raw_ctms_site = clindata::ctms_site,
Raw_ctms_study = clindata::ctms_study,
Mapped_Enrolled = mapped$Mapped_Enrolled,
lWorkflows = kri_wf,
lAnalysis = kris,
dSnapshotDate = Sys.Date(),
Expand All @@ -40,19 +40,19 @@ lReports <- RunWorkflows(wf_reports, reporting)
#### 3.2 - Create a KRI Report using 12 standard metrics with a single composite workflow
lData <- list(
# Raw Data
dfSUBJ = clindata::rawplus_dm,
dfAE = clindata::rawplus_ae,
dfPD = clindata::ctms_protdev,
dfLB = clindata::rawplus_lb,
dfSTUDCOMP = clindata::rawplus_studcomp,
dfSDRGCOMP = clindata::rawplus_sdrgcomp %>% dplyr::filter(.data$phase == 'Blinded Study Drug Completion'),
dfDATACHG = clindata::edc_data_points,
dfDATAENT = clindata::edc_data_pages,
dfQUERY = clindata::edc_queries,
dfENROLL = clindata::rawplus_enroll,
Raw_SUBJ = clindata::rawplus_dm,
Raw_AE = clindata::rawplus_ae,
Raw_PD = clindata::ctms_protdev,
Raw_LB = clindata::rawplus_lb,
Raw_STUDCOMP = clindata::rawplus_studcomp,
Raw_SDRGCOMP = clindata::rawplus_sdrgcomp %>% dplyr::filter(.data$phase == 'Blinded Study Drug Completion'),
Raw_DATACHG = clindata::edc_data_points,
Raw_DATAENT = clindata::edc_data_pages,
Raw_QUERY = clindata::edc_queries,
Raw_ENROLL = clindata::rawplus_enroll,
# CTMS data
ctms_site = clindata::ctms_site,
ctms_study = clindata::ctms_study,
Raw_ctms_site = clindata::ctms_site,
Raw_ctms_study = clindata::ctms_study,
# SnapshotDate and StudyID
dSnapshotDate = Sys.Date(),
strStudyID = "ABC-123",
Expand Down
Loading

0 comments on commit c01c643

Please sign in to comment.