UCD-SERG · kristinawlai · Nov 14, 2023 · Oct 25, 2023 · Oct 25, 2023 · Oct 25, 2023
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -10,3 +10,12 @@
 ^CODE_OF_CONDUCT.md$
 ^README\.Rmd$
 ^README.html$
+^data-raw$
+^vignettes/articles$
+^CODE_OF_CONDUCT.md$
+sees_dmcmc_09.30.2021.rds$
+SEES_2022-10-24_redacted_2023-10-12.csv$
+allpopsamples_hlye.csv$
+^serocalculator\.Rcheck$
+^serocalculator.*\.tar\.gz$
+^serocalculator.*\.tgz$
diff --git a/.gitignore b/.gitignore
@@ -10,3 +10,6 @@ docs
 src-i386
 src-x64
 ..Rcheck
+serocalculator.Rcheck/
+serocalculator*.tar.gz
+serocalculator*.tgz
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -8,34 +8,37 @@ Authors@R: c(
   person(given = "Kristina", family = "Lai", role = c("aut")), 
   person(given = "Kristen", family = "Aiemjoy", email = "kaiemjoy@ucdavis.edu", role = c("aut")),
   person(given = "Douglas Ezra", family = "Morrison", email = "demorrison@ucdavis.edu", role = c("aut", "cre")))
-Description: Translates antibody levels measured in a cross-sectional population 
-  sample into an estimate of the frequency with which seroconversions (infections) 
-  occur in the sampled population. Forked from the "seroincidence" package v2.0.0 on CRAN.
-Depends: R (>= 2.10)
+Description: Translates antibody levels measured in cross-sectional population 
+  samples into estimates of the frequency with which seroconversions (infections) 
+  occur in the sampled populations. Replaces the previous `seroincidence` package.
+Depends: R (>= 3.5.0)
 License: GPL-3
 Imports: 
-    bookdown,
-    dplyr,
+    dplyr (>= 1.1.1),
     magrittr,
+    parallel,
     Rcpp,
     rlang,
     stats,
+    tibble,
     tidyr,
     utils
 Suggests: 
     knitr,
     rmarkdown,
-    parallel,
     pander,
     Hmisc,
     tidyverse,
     fs,
-    testthat (>= 3.0.0)
+    testthat (>= 3.0.0),
+    readr,
+    ggplot2,
+    bookdown
 VignetteBuilder: knitr
 LazyData: true
 Encoding: UTF-8
 URL: https://github.com/UCD-SERG/serocalculator, https://ucd-serg.github.io/serocalculator/
-RoxygenNote: 7.2.3
+RoxygenNote: 7.2.3.9000
 NeedsCompilation: no
 LinkingTo: 
     Rcpp

diff --git a/MD5 b/MD5
@@ -4,7 +4,7 @@ ce096f7d73a14d083004dc4cd4827088 *NEWS
 2f0d05fa159c0cbaf611b95b9fb8198c *R/data-help.R
 a1522b6b5522ee3108f4c18369b9cd4d *R/deltaFunc.R
 2459fe699d9f2dee2d4096a8459a69c3 *R/densFunc.R
-59400925562a9721e63e7991ff154bcb *R/estimateSeroincidence.R
+59400925562a9721e63e7991ff154bcb *R/est.incidence.by.R
 97fa0b804650de611a334024916baaca *R/getAdditionalData.R
 f0d9baae9d10e02db385f555555931fe *R/nll.R
 3ef0eb791a21c2221dafe07457466876 *R/nllByType.R
@@ -38,7 +38,7 @@ a23cfd5e4e5e88a96e2774853f48aa65 *man/campylobacterDelftParams4.Rd
 e6415e1ea335b82a24cc3ef9aa5478bc *man/campylobacterSSIParams2.Rd
 4d2797bc3c01ab24effb7a6900ae879a *man/campylobacterSSIParams4.Rd
 f35091d97beb8e70dd2fa2f655c4321b *man/campylobacterSimLowData.Rd
-c0e69fa2dec067fa5db26cdaeab47998 *man/estimateSeroincidence.Rd
+c0e69fa2dec067fa5db26cdaeab47998 *man/est.incidence.by.Rd
 a351cb2497a6e921c2eced22d0d95c1a *man/getAdditionalData.Rd
 01c20490f1e8ed5ec32473649160c8ad *man/pertussisIgGPTParams1.Rd
 02a459fb98436acfdfb292a74947c09c *man/pertussisIgGPTParams2.Rd

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,17 +1,37 @@
 # Generated by roxygen2: do not edit by hand
 
-S3method(print,seroincidence)
-S3method(print,summary.seroincidence)
-S3method(summary,seroincidence)
-export(estimateSeroincidence)
+S3method(print,seroincidence.ests)
+S3method(print,summary.seroincidence.ests)
+S3method(summary,seroincidence.ests)
+export(.optNll)
+export(est.incidence)
+export(est.incidence.by)
 export(fdev)
 export(getAdditionalData)
-export(incidence.age)
+export(postprocess_fit)
 importFrom(Rcpp,sourceCpp)
+importFrom(dplyr,across)
+importFrom(dplyr,all_of)
+importFrom(dplyr,anti_join)
+importFrom(dplyr,any_of)
+importFrom(dplyr,bind_rows)
+importFrom(dplyr,count)
+importFrom(dplyr,distinct)
+importFrom(dplyr,everything)
 importFrom(dplyr,filter)
+importFrom(dplyr,inner_join)
 importFrom(dplyr,mutate)
+importFrom(dplyr,pull)
+importFrom(dplyr,relocate)
+importFrom(dplyr,rename)
+importFrom(dplyr,row_number)
 importFrom(dplyr,select)
+importFrom(dplyr,semi_join)
+importFrom(dplyr,tibble)
 importFrom(magrittr,"%>%")
+importFrom(parallel,clusterEvalQ)
+importFrom(parallel,clusterExport)
+importFrom(parallel,parLapplyLB)
 importFrom(rlang,.data)
 importFrom(rlang,.env)
 importFrom(stats,dlnorm)
@@ -20,6 +40,9 @@ importFrom(stats,optim)
 importFrom(stats,pgamma)
 importFrom(stats,plnorm)
 importFrom(stats,qnorm)
+importFrom(tibble,as_tibble)
+importFrom(tibble,column_to_rownames)
+importFrom(tibble,tibble)
 importFrom(tidyr,drop_na)
 importFrom(utils,download.file)
 importFrom(utils,unzip)

diff --git a/R/build_likelihood_function.R b/R/build_likelihood_function.R
@@ -20,6 +20,9 @@ build_likelihood_function = function(
           longitudinal_parameter_samples[[cur_antigen]],
           noise_params[[cur_antigen]])
     }
+
+    return(res)
+
   }
 
   return(likelihood_function)

diff --git a/R/est.incidence.R b/R/est.incidence.R
@@ -0,0 +1,125 @@
+
+#
+#' Age specific seroincidence function
+#' This function models seroincidence using maximum likelihood estimation; that is, it finds the value of the seroincidence parameter which maximizes the likelihood (i.e., joint probability) of the data.
+#'
+#' @param dpop cross-sectional population data
+#' @param c.age age category
+#' @param start starting value for incidence rate
+#' @param antigen_isos antigen isotypes: a [character()] vector of one or more antigen isotype names, which should match the values of the `antigen_iso` column in the `dpop` input argument
+#' @param noise_params a [data.frame()] containing columns `nu`, etc. specifying conditional noise parameters
+#' @param dmcmc mcmc samples from distribution of longitudinal decay curve parameters
+#' @param verbose logical: if TRUE, print verbose log information to console
+#' @param iterlim an [integer()], which provides an upper limit on the number of computational iterations used to search for the maximum likelihood estimate of incidence (passed to [stats::nlm()]).
+#' @param stepmax a [numeric()], which limits how aggressively the [stats::nlm()] algorithm searches for the maximum likelihood estimate of incidence. If this function output an infinite standard error estimate, consider reducing this parameter.
+#' @inheritParams postprocess_fit
+#' @inheritParams stats::nlm
+#' @inheritDotParams stats::nlm -f -p -hessian
+#'
+#' @return A [data.frame()] containing the following:
+#' * `est.start`: the starting guess for incidence rate
+#' * `ageCat`: the age category we are analyzing
+#' * `incidence.rate`: the estimated incidence rate, per person year
+#' * `CI.lwr`: lower limit of confidence interval for incidence rate
+#' * `CI.upr`: upper limit of confidence interval for incidence rate
+#' * `coverage`: coverage probability
+#' * `neg.llik`: negative log-likelihood
+#' * `iterations`: the number of iterations used
+#'
+#' @export
+
+est.incidence <- function(
+    dpop,
+    dmcmc,
+    noise_params,
+    c.age = NULL,
+    antigen_isos = dpop$antigen_iso |> unique(),
+    start = 0.1,
+    iterlim = 100,
+    coverage = .95,
+    verbose = FALSE,
+    stepmax = 1,
+    ...)
+{
+
+  lambda = start # initial estimate: starting value
+  log.lambda = log(lambda)
+
+  if(!is.null(c.age))
+  {
+    dpop = dpop %>% dplyr::filter(.data[["ageCat"]] == c.age)
+    dmcmc = dmcmc %>% dplyr::filter(.data[["ageCat"]] == c.age)
+
+    if("ageCat" %in% names(noise_params))
+    {
+      noise_params =
+        noise_params %>%
+        dplyr::filter(.data[["ageCat"]] == c.age)
+    }
+  }
+
+  ps = list()
+  cs = list()
+  conds = list()
+
+  for (cur_antigen in antigen_isos)
+  {
+    ps[[cur_antigen]] = get_xspd_one_antigen(
+      dpop = dpop,
+      antigen = cur_antigen)
+
+    cs[[cur_antigen]] = get_curve_params_one_antigen(
+      params = dmcmc,
+      antigen = cur_antigen)
+
+    conds[[cur_antigen]] =
+      noise_params %>%
+      dplyr::filter(.data[["antigen_iso"]] == cur_antigen)
+
+  }
+
+  # noise parameters
+  # cond.hlye.IgG
+
+  objfunc = build_likelihood_function(
+    cross_sectional_data = ps,
+    longitudinal_parameter_samples = cs,
+    noise_params = conds)
+
+  # seroincidence estimation
+  {
+    fit = nlm(
+    f = objfunc,
+    p = log.lambda,
+    hessian = TRUE,
+    iterlim = iterlim,
+    stepmax = stepmax,
+    ...)
+  } |> system.time() -> time
+
+  if(verbose)
+  {
+    message('elapsed time: ')
+    print(time)
+  }
+
+  if(fit$iterations >= iterlim)
+  {
+    warning(
+      "Maximum `nlm()` iterations reached; consider increasing `iterlim` argument.")
+  }
+
+  log.lambda.est =
+    fit |>
+    postprocess_fit(
+      coverage = coverage,
+      start = start
+    ) |>
+    mutate(
+      ageCat = c.age,
+      antigen.iso = antigen_isos |> paste(collapse = "+")) %>%
+    structure(
+      noise.parameters = noise_params)
+
+  return(log.lambda.est)
+}