diff --git a/R/data.R b/R/data.R index a3b265bb..3d905321 100644 --- a/R/data.R +++ b/R/data.R @@ -50,10 +50,44 @@ NULL #' @name PAP.adh -#' @title Biweekly Mean Therapy Adherence of OSA Patients over 1 Year +#' @title Weekly Mean PAP Therapy Usage of OSA Patients in the First 3 Months #' @description -#' A simulated longitudinal dataset comprising 500 patients with obstructive sleep apnea (OSA) during their -#' first year on CPAP therapy. +#' A simulated longitudinal dataset comprising 301 patients with obstructive sleep apnea (OSA) during their first 91 days (13 weeks) of PAP therapy. +#' The longitudinal patterns were inspired by the adherence patterns reported by Yi et al. (2022), interpolated to weekly hours of usage. +#' @format A `data.frame` comprising longitudinal data of 500 patients, each having 26 observations over a period of 1 year. +#' Each row represents a patient observation interval (two weeks), with columns: +#' \describe{ +#' \item{Patient}{`integer`: The patient identifier, where each level represents a simulated patient.} +#' \item{Week}{`integer`: The week number, starting from 1.} +#' \item{UsageHours}{`numeric`: The mean hours of usage in the respective week. +#' Greater than or equal to zero, and typically around 4-6 hours.} +#' \item{Group}{`factor`: The reference group (i.e., adherence pattern) from which this patient was generated.} +#' } +#' +#' \insertRef{yi2022identifying}{latrend} +#' @seealso [latrend-data] [PAP.adh1y] +#' @examples +#' data(PAP.adh) +#' +#' if (require("ggplot2")) { +#' plotTrajectories(PAP.adh, id = "Patient", time = "Week", response = "UsageHours") +#' +#' # plot according to cluster ground truth +#' plotTrajectories( +#' PAP.adh, +#' id = "Patient", +#' time = "Week", +#' response = "UsageHours", +#' cluster = "Group" +#' ) +#' } +"PAP.adh" + + +#' @name PAP.adh1y +#' @title Biweekly Mean PAP Therapy Adherence of OSA Patients over 1 Year +#' @description +#' A simulated longitudinal dataset comprising 500 patients with obstructive sleep apnea (OSA) during their first year on CPAP therapy. #' The dataset contains the patient usage hours, averaged over 2-week periods. #' #' The daily usage data underlying the downsampled dataset was simulated based on 7 different adherence patterns. @@ -77,25 +111,20 @@ NULL #' \insertRef{aloia2008time}{latrend} #' @seealso [latrend-data] #' @examples -#' data(PAP.adh) +#' data(PAP.adh1y) #' #' if (require("ggplot2")) { -#' plotTrajectories(PAP.adh, id = "Patient", time = "Biweek", response = "UsageHours") +#' plotTrajectories(PAP.adh1y, id = "Patient", time = "Biweek", response = "UsageHours") #' #' # plot according to cluster ground truth #' plotTrajectories( -#' PAP.adh, +#' PAP.adh1y, #' id = "Patient", #' time = "Biweek", #' response = "UsageHours", #' cluster = "Group" #' ) #' } -"PAP.adh" - -#' @rdname PAP.adh -#' @description The `PAP.adh1y` dataset is a subset of `PAP.adh`, comprising only patients who used therapy for at least 1 year. -#' The subset does not contain the Non-users and Early drop-out groups. "PAP.adh1y" #' @export diff --git a/data-raw/PAP.adh.R b/data-raw/PAP.adh.R new file mode 100644 index 00000000..e76ae8d0 --- /dev/null +++ b/data-raw/PAP.adh.R @@ -0,0 +1,116 @@ +library(magrittr) +library(assertthat) +library(data.table) +library(latrend) + +# Cluster 1: "adherers" +# y = 385, 326, 281 +# Cluster 2: "improvers" +# y = 256, 297, 365 +# Cluster 3: "non-adherers" +# y = 175, 137, 107 + +# convert daily usage-hour variance to weekly usage minutes SD +matrix(rnorm(1e6 * 7L, sd = 1.5 * 60), ncol = 1e6L) %>% colMeans() %>% sd() + +# define group trajectories +groupTrajMat = cbind( + Adherers = approx(x = c(1, 4, 13), y = c(385, 328, 281), xout = 1:13, method = 'linear')$y, + Improvers = approx(x = c(1, 4, 13), y = c(256, 293, 365), xout = 1:13, method = 'linear')$y, + `Non-adherers` = approx(x = c(1, 4, 13), y = c(176, 138, 108), xout = 1:13, method = 'linear')$y +) + +cluster1Data = generateLongData( + sizes = 162L, + id = 'Patient', + cluster = ~ Week + Adherers, + random = ~ 1, + data = data.frame(Week = 1:13, Adherers = groupTrajMat[, 'Adherers']), + clusterCoefs = cbind(c(0, 0, 1)), + clusterNames = 'Adherer', + randomScales = cbind(62.7), + noiseScales = 23, + seed = 1L +) + +plotTrajectories(cluster1Data, time = 'Week', id = 'Patient', cluster = 'Class', response = 'Value') + + expand_limits(y = 0) + + scale_x_continuous(breaks = seq(1, 13, by = 2)) + +cluster2Data = generateLongData( + sizes = 56L, + id = 'Patient', + cluster = ~ Week + Adherers, + random = ~ 1, + data = data.frame(Week = 1:13, Adherers = groupTrajMat[, 'Improvers']), + clusterCoefs = cbind(c(0, 0, 1)), + clusterNames = 'Improver', + randomScales = cbind(68.9), + noiseScales = 43, + seed = 2L +) %>% + .[, Patient := Patient + max(cluster1Data$Patient)] + +cluster3Data = generateLongData( + sizes = 83L, + id = 'Patient', + cluster = ~ Week + Adherers, + random = ~ 1, + data = data.frame(Week = 1:13, Adherers = groupTrajMat[, 'Non-adherers']), + clusterCoefs = cbind(c(0, 0, 1)), + clusterNames = 'Non-adherer', + randomScales = cbind(53.7), + noiseScales = 34, + seed = 3L +) %>% + .[, Patient := Patient + max(cluster2Data$Patient)] + +dataset = rbind(cluster1Data, cluster2Data, cluster3Data) %>% + .[, UsageHours := fifelse(Value > 0, Value / 60, 0)] %>% + setnames('Class', 'Group') + +plotTrajectories(dataset, time = 'Week', id = 'Patient', cluster = 'Group', response = 'UsageHours') + + expand_limits(y = 0) + + scale_x_continuous(breaks = seq(1, 13, by = 2)) + +PAP.adh = subset(dataset, select = c('Patient', 'Week', 'UsageHours', 'Group')) %>% + as.data.frame() +head(PAP.adh) +stopifnot(uniqueN(PAP.adh$Patient) == 301) + +usethis::use_data(PAP.adh, overwrite = TRUE) + + +# parallel +nCores <- parallel::detectCores(logical = FALSE) +if (.Platform$OS.type == "windows") { + cl <- parallel::makeCluster(nCores) + doParallel::registerDoParallel(cl) +} else { + doMC::registerDoMC(nCores) +} + +# test gbtm +options(latrend.id = "Patient", latrend.time = "Week") +gbtmMethod <- lcMethodLcmmGBTM( + fixed = UsageHours ~ Week, + mixture = ~ Week, + maxiter = 20, #not recommended; used for speed + idiag = TRUE +) +gbtmMethods <- lcMethods(gbtmMethod, nClusters = 1:5) +gbtmList <- latrendBatch(gbtmMethods, data = PAP.adh, parallel = TRUE) + +plotMetric(gbtmList, c("Dunn", "WMAE", "BIC", "estimationTime")) + +# test gmm +gmmMethod <- lcMethodLcmmGMM( + fixed = UsageHours ~ Week, + mixture = ~ Week, + idiag = TRUE +) +gmmMethods <- lcMethods(gmmMethod, nClusters = 1:5) +system.time({ + gmmList <- latrendBatch(gmmMethods, data = PAP.adh, parallel = TRUE) +}) +plotMetric(gmmList, c("Dunn", "WMAE", "BIC", "estimationTime")) diff --git a/data-raw/osa-adherence-data.R b/data-raw/PAP.adh1y.R similarity index 95% rename from data-raw/osa-adherence-data.R rename to data-raw/PAP.adh1y.R index 0b26daaf..2bc698a9 100644 --- a/data-raw/osa-adherence-data.R +++ b/data-raw/PAP.adh1y.R @@ -245,13 +245,5 @@ plotTrajectories(dataset, id = 'Patient', time = 'Biweek', response = 'UsageHour dataset[, Biweek := as.integer(Biweek)] dataset[, MaxDay := as.integer(MaxDay)] -PAP.adh = as.data.frame(dataset) -usethis::use_data(PAP.adh, overwrite = TRUE) - -# PAP.adh1y -PAP.adh1y = as.data.frame( - dataset[not(Group %in% c('Early drop-outs', 'Non-users'))] -) -PAP.adh1y$Patient = factor(PAP.adh1y$Patient) -PAP.adh1y$Group = factor(PAP.adh1y$Group) +PAP.adh1y = as.data.frame(dataset) usethis::use_data(PAP.adh1y, overwrite = TRUE) diff --git a/data/PAP.adh.rda b/data/PAP.adh.rda index 74e8a61d..be55ac0b 100644 Binary files a/data/PAP.adh.rda and b/data/PAP.adh.rda differ diff --git a/data/PAP.adh1y.rda b/data/PAP.adh1y.rda index c9f6b7fb..3f88bbb3 100644 Binary files a/data/PAP.adh1y.rda and b/data/PAP.adh1y.rda differ diff --git a/inst/REFERENCES.bib b/inst/REFERENCES.bib index 2667c868..0d8f0d44 100644 --- a/inst/REFERENCES.bib +++ b/inst/REFERENCES.bib @@ -434,4 +434,16 @@ @Article{denteuling2021comparison timestamp = {2021-03-25}, } +@Article{yi2022identifying, + author = {Yi, Huijie and Dong, Xiaosong and Shang, Shaomei and Zhang, Chi and Xu, Liyue and Han, Fang}, + journal = {Frontiers in Neurology}, + title = {Identifying longitudinal patterns of {CPAP} treatment in {OSA} using growth mixture modeling: Disease characteristics and psychological determinants}, + year = {2022}, + month = nov, + pages = {1063461}, + volume = {13}, + doi = {10.3389/fneur.2022.1063461}, + modificationdate = {2023-03-10T11:31:38}, +} + @Comment{jabref-meta: databaseType:bibtex;} diff --git a/man/PAP.adh.Rd b/man/PAP.adh.Rd index ffe71c16..82cdb497 100644 --- a/man/PAP.adh.Rd +++ b/man/PAP.adh.Rd @@ -3,65 +3,44 @@ \docType{data} \name{PAP.adh} \alias{PAP.adh} -\alias{PAP.adh1y} -\title{Biweekly Mean Therapy Adherence of OSA Patients over 1 Year} +\title{Weekly Mean PAP Therapy Usage of OSA Patients in the First 3 Months} \format{ A \code{data.frame} comprising longitudinal data of 500 patients, each having 26 observations over a period of 1 year. Each row represents a patient observation interval (two weeks), with columns: \describe{ -\item{Patient}{\code{factor}: The patient identifier, where each level represents a simulated patient.} -\item{Biweek}{\code{integer}: Two-week interval index. Starts from 1.} -\item{MaxDay}{\code{integer}: The last day used for the aggregation of the respective interval, \code{integer}} +\item{Patient}{\code{integer}: The patient identifier, where each level represents a simulated patient.} +\item{Week}{\code{integer}: The week number, starting from 1.} \item{UsageHours}{\code{numeric}: The mean hours of usage in the respective week. Greater than or equal to zero, and typically around 4-6 hours.} \item{Group}{\code{factor}: The reference group (i.e., adherence pattern) from which this patient was generated.} } -An object of class \code{data.frame} with 9880 rows and 5 columns. -} -\source{ -This dataset was generated based on the cluster-specific descriptive statistics table provided in Aloia et al. (2008), -with some adjustments made in order to improve cluster separation for demonstration purposes. - -\insertRef{aloia2008time}{latrend} +\insertRef{yi2022identifying}{latrend} } \usage{ PAP.adh - -PAP.adh1y } \description{ -A simulated longitudinal dataset comprising 500 patients with obstructive sleep apnea (OSA) during their -first year on CPAP therapy. -The dataset contains the patient usage hours, averaged over 2-week periods. - -The daily usage data underlying the downsampled dataset was simulated based on 7 different adherence patterns. -The defined adherence patterns were inspired by the adherence patterns identified by Aloia et al. (2008), with slight adjustments - -The \code{PAP.adh1y} dataset is a subset of \code{PAP.adh}, comprising only patients who used therapy for at least 1 year. -The subset does not contain the Non-users and Early drop-out groups. -} -\note{ -This dataset is only intended for demonstration purposes. -While the data format will remain the same, the data content is subject to change in future versions. +A simulated longitudinal dataset comprising 301 patients with obstructive sleep apnea (OSA) during their first 91 days (13 weeks) of PAP therapy. +The longitudinal patterns were inspired by the adherence patterns reported by Yi et al. (2022), interpolated to weekly hours of usage. } \examples{ data(PAP.adh) if (require("ggplot2")) { - plotTrajectories(PAP.adh, id = "Patient", time = "Biweek", response = "UsageHours") + plotTrajectories(PAP.adh, id = "Patient", time = "Week", response = "UsageHours") # plot according to cluster ground truth plotTrajectories( PAP.adh, id = "Patient", - time = "Biweek", + time = "Week", response = "UsageHours", cluster = "Group" ) } } \seealso{ -\link{latrend-data} +\link{latrend-data} \link{PAP.adh1y} } \keyword{datasets} diff --git a/man/PAP.adh1y.Rd b/man/PAP.adh1y.Rd new file mode 100644 index 00000000..b9fb069c --- /dev/null +++ b/man/PAP.adh1y.Rd @@ -0,0 +1,58 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{PAP.adh1y} +\alias{PAP.adh1y} +\title{Biweekly Mean PAP Therapy Adherence of OSA Patients over 1 Year} +\format{ +A \code{data.frame} comprising longitudinal data of 500 patients, each having 26 observations over a period of 1 year. +Each row represents a patient observation interval (two weeks), with columns: +\describe{ +\item{Patient}{\code{factor}: The patient identifier, where each level represents a simulated patient.} +\item{Biweek}{\code{integer}: Two-week interval index. Starts from 1.} +\item{MaxDay}{\code{integer}: The last day used for the aggregation of the respective interval, \code{integer}} +\item{UsageHours}{\code{numeric}: The mean hours of usage in the respective week. +Greater than or equal to zero, and typically around 4-6 hours.} +\item{Group}{\code{factor}: The reference group (i.e., adherence pattern) from which this patient was generated.} +} +} +\source{ +This dataset was generated based on the cluster-specific descriptive statistics table provided in Aloia et al. (2008), +with some adjustments made in order to improve cluster separation for demonstration purposes. + +\insertRef{aloia2008time}{latrend} +} +\usage{ +PAP.adh1y +} +\description{ +A simulated longitudinal dataset comprising 500 patients with obstructive sleep apnea (OSA) during their first year on CPAP therapy. +The dataset contains the patient usage hours, averaged over 2-week periods. + +The daily usage data underlying the downsampled dataset was simulated based on 7 different adherence patterns. +The defined adherence patterns were inspired by the adherence patterns identified by Aloia et al. (2008), with slight adjustments +} +\note{ +This dataset is only intended for demonstration purposes. +While the data format will remain the same, the data content is subject to change in future versions. +} +\examples{ +data(PAP.adh1y) + +if (require("ggplot2")) { + plotTrajectories(PAP.adh1y, id = "Patient", time = "Biweek", response = "UsageHours") + + # plot according to cluster ground truth + plotTrajectories( + PAP.adh1y, + id = "Patient", + time = "Biweek", + response = "UsageHours", + cluster = "Group" + ) +} +} +\seealso{ +\link{latrend-data} +} +\keyword{datasets} diff --git a/man/plot-lcModel-method.Rd b/man/plot-lcModel-method.Rd index 2085975b..36751c4b 100644 --- a/man/plot-lcModel-method.Rd +++ b/man/plot-lcModel-method.Rd @@ -6,7 +6,7 @@ \alias{plot,lcModel-method} \title{Plot a lcModel} \usage{ -\S4method{plot}{lcModel}(x, y, ...) +\S4method{plot}{lcModel,ANY}(x, y, ...) } \arguments{ \item{x}{The \code{lcModel} object.} diff --git a/man/plot-lcModels-method.Rd b/man/plot-lcModels-method.Rd index 160f09eb..4534a076 100644 --- a/man/plot-lcModels-method.Rd +++ b/man/plot-lcModels-method.Rd @@ -6,7 +6,7 @@ \alias{plot,lcModels-method} \title{Grid plot for a list of models} \usage{ -\S4method{plot}{lcModels}(x, y, ..., subset, gridArgs = list()) +\S4method{plot}{lcModels,ANY}(x, y, ..., subset, gridArgs = list()) } \arguments{ \item{x}{The \code{lcModels} object.} diff --git a/tests/testthat/test-data.R b/tests/testthat/test-data.R index ebf098f9..9717a63d 100644 --- a/tests/testthat/test-data.R +++ b/tests/testthat/test-data.R @@ -2,35 +2,48 @@ test_that('PAP.adh structure', { data(PAP.adh) expect_is(PAP.adh, 'data.frame') - expect_named(PAP.adh, c('Patient', 'Biweek', 'MaxDay', 'UsageHours', 'Group')) - expect_is(PAP.adh$Patient, 'factor') - expect_is(PAP.adh$Biweek, 'integer') - expect_is(PAP.adh$MaxDay, 'integer') + expect_named(PAP.adh, c('Patient', 'Week', 'UsageHours', 'Group')) + expect_is(PAP.adh$Patient, 'integer') + expect_is(PAP.adh$Week, 'integer') expect_is(PAP.adh$UsageHours, 'numeric') expect_is(PAP.adh$Group, 'factor') }) test_that('PAP.adh content', { - expect_gte(min(PAP.adh$UsageHours), 0) - expect_true(noNA(PAP.adh$Patient)) - expect_equal(uniqueN(PAP.adh$Patient), nlevels(PAP.adh$Patient)) - expect_equal(uniqueN(PAP.adh$Group), nlevels(PAP.adh$Group)) - expect_true(noNA(PAP.adh$Biweek)) - expect_true(noNA(PAP.adh$UsageHours)) - expect_equal(min(PAP.adh$Biweek), 1) - expect_equal(max(PAP.adh$Biweek), 26) + with(PAP.adh, { + expect_gte(min(UsageHours), 0) + expect_true(noNA(Patient)) + expect_equal(uniqueN(Patient), 301) + expect_equal(nlevels(Group), 3) + expect_equal(uniqueN(Group), nlevels(Group)) + expect_true(noNA(Week)) + expect_true(noNA(UsageHours)) + expect_equal(min(Week), 1) + expect_equal(max(Week), 13) + }) }) test_that('PAP.adh1y', { - data(PAP.adh) data(PAP.adh1y) - expect_lt(nlevels(PAP.adh1y$Patient), nlevels(PAP.adh$Patient)) - expect_lt(nlevels(PAP.adh1y$Group), nlevels(PAP.adh$Group)) - - # check for empty levels - expect_equal(uniqueN(PAP.adh1y$Patient), nlevels(PAP.adh1y$Patient)) - expect_equal(uniqueN(PAP.adh1y$Group), nlevels(PAP.adh1y$Group)) + expect_is(PAP.adh1y, 'data.frame') + expect_named(PAP.adh1y, c('Patient', 'Biweek', 'MaxDay', 'UsageHours', 'Group')) + expect_is(PAP.adh1y$Patient, 'factor') + expect_is(PAP.adh1y$Biweek, 'integer') + expect_is(PAP.adh1y$MaxDay, 'integer') + expect_is(PAP.adh1y$UsageHours, 'numeric') + expect_is(PAP.adh1y$Group, 'factor') +}) - expect_true(all(levels(PAP.adh1y) %in% levels(PAP.adh))) +test_that('PAP.adh1y content', { + with(PAP.adh1y, { + expect_gte(min(UsageHours), 0) + expect_true(noNA(Patient)) + expect_equal(uniqueN(Patient), nlevels(Patient)) + expect_equal(uniqueN(Group), nlevels(Group)) + expect_true(noNA(Biweek)) + expect_true(noNA(UsageHours)) + expect_equal(min(Biweek), 1) + expect_equal(max(Biweek), 26) + }) })