Skip to content

Commit

Permalink
Merge pull request #240 from Gilead-BioStats/fix-229
Browse files Browse the repository at this point in the history
closes #229 - qc updates for Flag()
  • Loading branch information
jwildfire authored Mar 7, 2022
2 parents 2261d9f + e538841 commit fd5208d
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 18 deletions.
22 changes: 12 additions & 10 deletions R/Flag.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
#' - `strColumn` - A column to use for Thresholding (required)
#' - 'strValueColumn' - A column to be used for the sign of the flag (optional)
#'
#' @param dfAnalyzed data frame where flags should be added
#' @param strColumn Name of the Column to use for thresholding
#' @param vThreshold vector of 2 numeric values representing lower and upper threshold values. All values in strColumn are compared to vThreshold using strict comparisons. Values less than the lower threshold or greater than the upper threshold are flagged as -1 and 1 respectively. Values equal to the threshold values are set to 0 (i.e. not flagged). If NA is provided for either threshold value it is ignored, and no values are flagged based on the threshold. NA and NaN values in strColumn are given NA flag values.
#' @param dfAnalyzed data.frame where flags should be added.
#' @param strColumn Name of the column to use for thresholding.
#' @param vThreshold Vector of 2 numeric values representing lower and upper threshold values. All values in strColumn are compared to vThreshold using strict comparisons. Values less than the lower threshold or greater than the upper threshold are flagged as -1 and 1 respectively. Values equal to the threshold values are set to 0 (i.e. not flagged). If NA is provided for either threshold value it is ignored, and no values are flagged based on the threshold. NA and NaN values in strColumn are given NA flag values.
#' @param strValueColumn Optional, Name of the Column to use for sign of Flag. If value for that row is higher than median of strValueColumn then Flag = 1, if lower then Flag = -1.
#'
#' @return input data frame with the columns added for "ThresholdLow","ThresholdHigh","ThresholdCol" and "Flag"
Expand All @@ -38,16 +38,18 @@

Flag <- function( dfAnalyzed , strColumn="PValue", vThreshold=c(0.05,NA),strValueColumn = NULL){
stopifnot(
is.data.frame(dfAnalyzed),
is.character(strColumn),
is.numeric(vThreshold),
length(vThreshold) == 2,
strColumn %in% names(dfAnalyzed),
strValueColumn %in% names(dfAnalyzed)
"dfAnalyzed is not a data frame" = is.data.frame(dfAnalyzed),
"strColumn is not character" = is.character(strColumn),
"vThreshold is not numeric" = is.numeric(vThreshold),
"vThreshold must be length of 2" = length(vThreshold) == 2,
"strColumn must be length of 1" = length(strColumn) == 1,
"strColumn not found in dfAnalyzed" = strColumn %in% names(dfAnalyzed),
"strValueColumn not found in dfAnalyzed" = strValueColumn %in% names(dfAnalyzed),
"SiteID not found in dfAnalyzed" = "SiteID" %in% names(dfAnalyzed)
)

if(all(!is.na(vThreshold))){
stopifnot(vThreshold[2]>vThreshold[1])
"vThreshold must contain a minimum and maximum value (i.e., vThreshold = c(1, 2))" = stopifnot(vThreshold[2]>vThreshold[1])
}

dfFlagged<-dfAnalyzed %>%
Expand Down
6 changes: 3 additions & 3 deletions man/Flag.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 7 additions & 5 deletions tests/testthat/test_Flag.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ test_that("output created as expected and has correct structure",{
expect_true(is.data.frame(flag))
expect_equal(sort(unique(ae_input$SiteID)), sort(flag$SiteID))
expect_true(all(names(ae_anly_wilcoxon) %in% names(flag)))
expect_equal(names(flag), c("SiteID", "N", "TotalCount", "TotalExposure", "Rate", "Estimate",
"PValue", "ThresholdLow", "ThresholdHigh", "ThresholdCol", "Flag"))
})

test_that("strFlagValueColumn paramter works as intended",{
Expand Down Expand Up @@ -40,18 +42,18 @@ test_that("Expected Columns are added to dfFlagged",{
})

test_that("vThreshold parameter works as intended",{
sim1 <- Flag(data.frame(vals=seq(1:100)), strColumn="vals", vThreshold=c(10,NA))
sim1 <- Flag(data.frame(SiteID = seq(1:100), vals=seq(1:100)), strColumn="vals", vThreshold=c(10,NA))
expect_equal(sim1$Flag, c(rep(-1,9), rep(0,91)))
sim2 <- Flag(data.frame(vals=seq(1:100)), strColumn="vals", vThreshold=c(NA,91))
sim2 <- Flag(data.frame(SiteID = seq(1:100), vals=seq(1:100)), strColumn="vals", vThreshold=c(NA,91))
expect_equal(sim2$Flag, c(rep(0,91), rep(1,9)))
sim3 <- Flag(data.frame(vals=seq(1:100)), strColumn="vals", vThreshold=c(2,91))
sim3 <- Flag(data.frame(SiteID = seq(1:100), vals=seq(1:100)), strColumn="vals", vThreshold=c(2,91))
expect_equal(sim3$Flag, c(-1,rep(0,90), rep(1,9)))
sim4 <- Flag(data.frame(vals=seq(from = -100, to = 100)), strColumn="vals", vThreshold=c(-91,91))
sim4 <- Flag(data.frame(SiteID = seq(1:201), vals=seq(from = -100, to = 100)), strColumn="vals", vThreshold=c(-91,91))
expect_equal(sim4$Flag,c(rep(-1,9),rep(0,183), rep(1,9)))
})

test_that("NA values in strColumn result in NA in Flag column",{
NAsim <- Flag(data.frame(vals=c(seq(1:90),rep(NA,10))), strColumn="vals", vThreshold=c(10,NA))
NAsim <- Flag(data.frame(SiteID = seq(1:100), vals=c(seq(1:90),rep(NA,10))), strColumn="vals", vThreshold=c(10,NA))
expect_equal(NAsim$Flag, c(rep(-1,9), rep(0,81),rep(NA,10)))
})

Expand Down

0 comments on commit fd5208d

Please sign in to comment.