Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding Bland- Altman statistical function #1135

Merged
merged 20 commits into from
Jan 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ Collate:
'analyze_variables.R'
'analyze_vars_in_cols.R'
'argument_convention.R'
'bland_altman.R'
'combination_function.R'
'compare_variables.R'
'control_incidence_rate.R'
Expand Down
6 changes: 6 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ export(format_fraction_fixed_dp)
export(format_fraction_threshold)
export(format_sigfig)
export(format_xx)
export(g_bland_altman)
export(g_forest)
export(g_ipp)
export(g_km)
Expand Down Expand Up @@ -243,6 +244,7 @@ export(prop_wilson)
export(reapply_varlabels)
export(ref_group_position)
export(rtable2gg)
export(s_bland_altman)
export(s_compare)
export(s_count_occurrences)
export(s_count_occurrences_by_grade)
Expand Down Expand Up @@ -316,7 +318,11 @@ importFrom(grid,widthDetails)
importFrom(magrittr,"%>%")
importFrom(methods,new)
importFrom(rlang,.data)
importFrom(stats,complete.cases)
importFrom(stats,pchisq)
importFrom(stats,qnorm)
importFrom(stats,qt)
importFrom(stats,sd)
importFrom(stats,setNames)
importFrom(survival,Surv)
importFrom(survival,coxph)
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
* Added summarize function version of `count_occurrences` analyze function, `summarize_occurrences`.
* Added referential footnotes to `surv_time` for censored range observations, controlled via the `ref_fn_censor` parameter.
* Added helper function `h_adlb_abnormal_by_worst_grade` to prepare `ADLB` data to use as input in `count_abnormal_by_worst_grade`.
* Added `s_bland_altman` function to assess agreement between two numerical vectors.
* Added function `rtable2gg` that converts `rtable` objects to `ggplot` objects.
* Added helper function to set default `na_str` globally with `set_default_na_str()` and added `default_na_str()` for all interested functions.

Expand Down
104 changes: 104 additions & 0 deletions R/bland_altman.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#' Bland Altman analysis
#'
#' @description `r lifecycle::badge("experimental")`
#'
#' Functions of bland altman method to assess the agreement between two numerical vectors.
#'
#' @inheritParams argument_convention
#' @param y ('numeric')\cr vector of numbers we want to analyze, which we want to compare with x.
#'
#' @name bland_altman
#' @examples
#' x <- seq(1, 60, 5)
#' y <- seq(5, 50, 4)
#' conf_level <- 0.9
#' # Derive statistics that are needed for Bland Altman plot
#' s_bland_altman(x, y, conf_level = conf_level)
#' # Create a Bland Altman plot
#' g_bland_altman(x, y, conf_level = conf_level)
NULL

#' @describeIn bland_altman
#'
#' @export
s_bland_altman <- function(x, y, conf_level = 0.95) {
checkmate::assert_numeric(x, min.len = 1, any.missing = TRUE)
checkmate::assert_numeric(y, len = length(x), any.missing = TRUE)
checkmate::assert_numeric(conf_level, lower = 0, upper = 1, any.missing = TRUE)

alpha <- 1 - conf_level

ind <- complete.cases(x, y) # use only pairwise complete observations, and check if x and y have the same length
x <- x[ind]
y <- y[ind]
n <- sum(ind) # number of 'observations'

if (n == 0) {
stop("there is no valid paired data")
}

difference <- x - y # vector of differences
average <- (x + y) / 2 # vector of means
difference_mean <- mean(difference) # mean difference
difference_sd <- sd(difference) # SD of differences
al <- qnorm(1 - alpha / 2) * difference_sd
upper_agreement_limit <- difference_mean + al # agreement limits
lower_agreement_limit <- difference_mean - al


difference_se <- difference_sd / sqrt(n) # standard error of the mean
al_se <- difference_sd * sqrt(3) / sqrt(n) # standard error of the agreement limit
tvalue <- qt(1 - alpha / 2, n - 1) # t value for 95% CI calculation
difference_mean_ci <- difference_se * tvalue
al_ci <- al_se * tvalue
upper_agreement_limit_ci <- c(upper_agreement_limit - al_ci, upper_agreement_limit + al_ci)
lower_agreement_limit_ci <- c(lower_agreement_limit - al_ci, lower_agreement_limit + al_ci)


list(
df = data.frame(average, difference),
difference_mean = difference_mean,
ci_mean = difference_mean + c(-1, 1) * difference_mean_ci,
difference_sd = difference_sd,
difference_se = difference_se,
upper_agreement_limit = upper_agreement_limit,
lower_agreement_limit = lower_agreement_limit,
agreement_limit_se = al_se,
upper_agreement_limit_ci = upper_agreement_limit_ci,
lower_agreement_limit_ci = lower_agreement_limit_ci,
t_value = tvalue,
n = n
)
}

#' @describeIn bland_altman
#'
#' @export
g_bland_altman <- function(x, y, conf_level = 0.95) {
result_tem <- s_bland_altman(x, y, conf_level = conf_level)
xpos <- max(result_tem$df$average) * 0.9 + min(result_tem$df$average) * 0.1
yrange <- diff(range(result_tem$df$difference))

p <- ggplot(result_tem$df) +
geom_point(aes(x = average, y = difference), color = "blue") +
geom_hline(yintercept = result_tem$difference_mean, color = "blue", linetype = 1) +
geom_hline(yintercept = 0, color = "blue", linetype = 2) +
geom_hline(yintercept = result_tem$lower_agreement_limit, color = "red", linetype = 2) +
geom_hline(yintercept = result_tem$upper_agreement_limit, color = "red", linetype = 2) +
annotate("text", x = xpos, y = result_tem$lower_agreement_limit + 0.03 * yrange,
label = "lower limits of agreement", color = "red") +
annotate("text", x = xpos, y = result_tem$upper_agreement_limit + 0.03 * yrange,
label = "upper limits of agreement", color = "red") +
annotate("text", x = xpos, y = result_tem$difference_mean + 0.03 * yrange,
label = "mean of difference between two measures", color = "blue") +
annotate("text", x = xpos, y = result_tem$lower_agreement_limit - 0.03 * yrange,
label = sprintf("%.2f", result_tem$lower_agreement_limit), color = "red") +
annotate("text", x = xpos, y = result_tem$upper_agreement_limit - 0.03 * yrange,
label = sprintf("%.2f", result_tem$upper_agreement_limit), color = "red") +
annotate("text", x = xpos, y = result_tem$difference_mean - 0.03 * yrange,
label = sprintf("%.2f", result_tem$difference_meanm), color = "blue") +
xlab("Average of two measures") +
ylab("Difference between two measures")

return(p)
}
4 changes: 3 additions & 1 deletion R/package.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,15 @@
#' @importFrom Rdpack reprompt
#' @importFrom rlang .data
#' @importFrom survival coxph strata Surv
#' @importFrom stats pchisq setNames
#' @importFrom stats pchisq setNames complete.cases qnorm qt sd
NULL

# Resolve missing global definitions:
utils::globalVariables(c(
".",
"x",
"average",
"difference",
"control_coxph",
"control_incidence_rate",
"control_analyze_vars",
Expand Down
20 changes: 20 additions & 0 deletions inst/REFERENCES.bib
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,23 @@ @ARTICLE{Schouten1980-kd
year = 1980,
language = "en"
}

@ARTICLE{bland1986statistical,
title = "Statistical methods for assessing agreement between two methods of clinical measurement",
author = "Bland, J Martin and Altman, DouglasG",
abstract = "In clinical measurement comparison of a new measurement technique
with an established one is often needed to see whether they agree
sufficiently for the new to replace the old. Such investigations
are often analysed inappropriately, notably by using correlation
coefficients. The use of correlation is misleading. An alternative
approach, based on graphical techniques and simple calculations,
is described, together with the relation between this analysis and
the assessment of repeatability.",
journal = "The lancet",
publisher = "Elsevier",
volume ="327",
number ="8476",
pages ="307--310",
year ="1986",
language = "en"
}
1 change: 1 addition & 0 deletions inst/WORDLIST
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,4 @@ subtables
unformatted
ungroup
unstratified
altman
40 changes: 40 additions & 0 deletions man/bland_altman.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

61 changes: 61 additions & 0 deletions tests/testthat/test-bland-altman.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
testthat::test_that("unequal length vector gives correct error", {
testthat::expect_error(s_bland_altman(x = 1:5, y = 1:6, 0.95))
})

testthat::test_that("infeasible input gives correct error", {
testthat::expect_error(s_bland_altman(x = c("a", "b", "c"), y = 1:3, 0.95))
testthat::expect_error(s_bland_altman(x = 1:3, y = 4:6, 2))
})


testthat::test_that("s_bland_altman works with two vectors", {
set.seed(1)
x <- rnorm(20)
y <- rnorm(20)
res <- s_bland_altman(x, y, 0.9)
average <- (x + y) / 2
difference <- x - y
expect <- list(
df = data.frame(average, difference),
difference_mean = mean(x) - mean(y),
ci_mean = c(-0.3414723, 0.7354631),
difference_sd = 1.392664,
difference_se = 0.3114091,
upper_agreement_limit = 2.487724,
lower_agreement_limit = -2.093733,
agreement_limit_se = 0.5393764,
upper_agreement_limit_ci = c(1.555070, 3.420377),
lower_agreement_limit_ci = c(-3.026386, -1.161079),
t_value = 1.729133,
n = 20L
)
expect_identical(res, expect, tolerance = 1e-5)
})


testthat::test_that("s_bland_altman works with two vectors with NA element in either vectors", {
set.seed(1)
x <- rnorm(20)
y <- rnorm(20)
x <- c(NA_real_, 2, x, NA_real_)
y <- c(1, NA_real_, y, 2)
res <- s_bland_altman(x, y, 0.9)
average <- (x + y) / 2
difference <- x - y
df <- data.frame(na.omit(data.frame(average, difference)), row.names = NULL)
expect <- list(
df = df,
difference_mean = 0.1969954,
ci_mean = c(-0.3414723, 0.7354631),
difference_sd = 1.392664,
difference_se = 0.3114091,
upper_agreement_limit = 2.487724,
lower_agreement_limit = -2.093733,
agreement_limit_se = 0.5393764,
upper_agreement_limit_ci = c(1.555070, 3.420377),
lower_agreement_limit_ci = c(-3.026386, -1.161079),
t_value = 1.729133,
n = 20L
)
expect_identical(res, expect, tolerance = 1e-5)
})