UCD-SERG · kristinawlai · Nov 2, 2023 · Nov 3, 2023 · Nov 3, 2023 · Nov 7, 2023
diff --git a/R/getAdditionalData.R b/R/getAdditionalData.R
@@ -1,47 +1,58 @@
 #' Get Additional Data
 #'
-#' Retrieves additional data from internet. This can be any file type, but the purpose of this
-#' function is to download data such as longitudinal response parameters from an online repository.
+#' Retrieves additional data from internet. The data format must be .RDS or a zipped .RDS. The purpose of this
+#' function is to download data such as longitudinal response parameters from an online repository or population data.
 #'
-#' @param fileName Name of the file to download. Required.
-#' @param repoURL Web address of the remote repository of files to download from. Required.
-#'   Default = `"http://ecdc.europa.eu/sites/portal/files/documents"`
-#' @param savePath Folder to save the downloaded and unzipped (if needed) file. File is saved only
+#' Data for this package is available at: <https://osf.io/ne8pc/files/osfstorage>
+#'
+#' You can save the data into your chosen directory using the optional savePath argument. Specify the file path and the file name.
+#'
+#' Large datasets may timeout. If so, you can increase the download time by updating the maximum timeout time in the code below. (Ex: increase from 300 to 1000)
+#' ```options(timeout = max(300, getOption("timeout")))```
+#'
+#' @param fileURL URL of the file to be downloaded.
+#' @param savePath Folder directory and filename to save the downloaded and unzipped (if needed) file. File is saved only
 #'   if this argument is not `NULL`. Optional. Default = `NULL`.
 #'
-#' @return
+#' @return the R object stored in the file indicated by the `fileURL` input
 #' Data object
 #'
 #' @examples
-#'
 #' \dontrun{
-#' getAdditionalData(fileName = "coxiellaIFAParams4.zip")
-#' getAdditionalData(fileName = "yersiniaSSIParams4.zip")
-#' getAdditionalData(fileName = "coxiellaIFAParams4.zip", savePath = getwd())
-#' getAdditionalData(fileName = "yersiniaSSIParams4.zip", savePath = getwd())
+#' curve_param_samples =
+#'   getAdditionalData(
+#'     fileURL = "https://osf.io/download/bhfvx")
+#'
+#' # optionally, save the data to disk
+#' curve_param_samples =
+#'   getAdditionalData(
+#'     fileURL = "https://osf.io/download/bhfvx",
+#'     savePath = "~/Downloads/curv_params.rds"))
 #' }
 #'
 #' @export
 getAdditionalData <- function(
-  fileName,
-  repoURL = "http://ecdc.europa.eu/sites/portal/files/documents",
-  savePath = NULL)
+    fileURL,
+    savePath = NULL)
 {
+  fileName <- basename(fileURL)
   tmpFileName <- file.path(tempdir(), fileName)
   on.exit({
     unlink(tmpFileName)
   })
-
+  #Increase timeout for big files
+  options(timeout = max(300, getOption("timeout")))
   # Download
-  tryCatch({
-    download.file(file.path(repoURL, fileName),
-                  tmpFileName,
-                  mode = "wb",
-                  quiet = TRUE)
-  },
-  error = function(e) {
-    print("There is problem with downloading the requested file. Please, check input arguments or the internet connection.")
-  })
+  tryCatch(
+    {
+      download.file(fileURL,
+                    tmpFileName,
+                    mode = "wb",
+                    quiet = TRUE)
+    },
+    error = function(e) {
+      print("There is problem with downloading the requested file. Please, check input arguments or the internet connection.")
+    })
 
   # Unzip
   if (tolower(tools::file_ext(tmpFileName)) == "zip") {
@@ -62,8 +73,9 @@ getAdditionalData <- function(
 
   # Store
   if (!is.null(savePath)) {
-    dir.create(savePath, showWarnings = FALSE, recursive = TRUE)
-    file.copy(tmpFileName, savePath, overwrite = TRUE, recursive = TRUE)
+    pathName <- dirname(savePath)
+    dir.create(pathName, showWarnings = FALSE, recursive = TRUE)
+    file.copy(tmpFileName, pathName, overwrite = TRUE, recursive = TRUE)
   }
 
   # Read

diff --git a/inst/extdata/SEES_mcmc_unstratified_df.rds b/inst/extdata/SEES_mcmc_unstratified_df.rds
diff --git a/inst/extdata/South_Sudan_Juba_HlyEIgG_Nov_21_22.csv b/inst/extdata/South_Sudan_Juba_HlyEIgG_Nov_21_22.csv
diff --git a/man/getAdditionalData.Rd b/man/getAdditionalData.Rd
diff --git a/vignettes/Enteric_Fever_Tutorial.Rmd b/vignettes/Enteric_Fever_Tutorial.Rmd
@@ -0,0 +1,158 @@
+---
+title: "Enteric Fever Seroincidence"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{Typhoid Seroincidence}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+bibliography: references.bib
+toc: true
+
+---
+# Introduction
+
+This vignette provides users with an example analysis using the *serocalculator* package by reproducing an analysis in [@Aiemjoy_2022_Lancet]. Users will determine the seroincidence of enteric fever in a cross-sectional serosurvey from SEES, a serologic and environmental surveillance study conducted in Asia, plus existing longitudinal antibody dynamics collected from Bangladesh, Ghana, Nepal, and Pakistan. 
+
+Further details on this study can be found here: https://doi.org/10.1016/S2666-5247(22)00114-8. 
+
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+```
+# Sample Analysis
+## Load packages
+The first step in conducting this analysis is to load our necessary packages. Follow the [installation instructions](https://ucd-serg.github.io/serocalculator/) if you have not already installed *serocalculator*. 
+
+```{r setup, message=FALSE}
+
+#library(devtools)
+#install_github("UCD-SERG/serocalculator")
+library(serocalculator)
+library(tidyverse)
+library(ggplot2)
+library(readr)
+
+
+```
+## Load data
+### a. Load and prepare longitudinal parameter data
+The next step is to load the longitudinal data to set the antibody decay parameters. In this example, these parameters were modeled with Bayesian hierarchical models to fit two-phase power-function decay models to the longitudinal antibody responses among confirmed enteric fever cases. 
+
+These parameters include the following:
+
+  * $y_0$ = baseline  
+  * $y_1$ = peak antibody responses 
+  * $t_1$ = time to peak 
+  * $\alpha$ = decay rate
+  * $r$ = decay shape 
+
+We also create two additional variables: the annual decay rate, $\alpha$, which is calculated from the daily decay rate in this example, and $d$, which is the decay shape, or 1-$r$.
+
+Finally, we select only the variables needed for the analysis. 
+
+```{r longdata, echo=FALSE, message=FALSE}
+c.hlye.IgG <-                             #Need to change this to pull from osf
+  fs::path_package(                          
+  "extdata", 
+  "dmcmc_hlyeigg_09.30.rds", 
+  package = "serocalculator") |> #Load longitudinal parameters dataset
+ readRDS()%>%
+  mutate(alpha = alpha*365.25, #Create alpha and d 
+         d = r-1) %>%
+  select(y1, alpha, d) #Select only the variables needed for analysis
+```
+
+### Load and prepare cross-sectional data 
+The simulated data represents a cross-sectional serosurvey conducted in a representative sample of the general population without regard to disease status. 
+
+In this scenario, we have selected hlye and IgG as our target measures. Users may select different serologic markers depending on what is available. From the original dataset, we rename our variables to *y* and *a*. Finally, we once again limit the dataset to only the variables needed for the analysis. 
+
+``` {r simdata, message=FALSE}
+library(fs) # filesystem utility functions
+p.hlye.IgG  <- 
+  fs::path_package(
+    package = "serocalculator", 
+    "extdata/simpophlyeigg.2.csv") %>% #Load  cross-sectional dataset
+  read_csv() %>%
+  rename( #rename variables
+    y = y.smpl,
+    a = a.smpl) %>% 
+  select(y, a) #Select only the variables needed for analysis
+```
+
+
+### Set conditions for simulated data (Should this section be deleted?)
+Next, we must set conditions based on some assumptions about the simulated data. This will differ based on background knowledge of the cross-sectional data. 
+
+The biological noise, $\nu$ ("nu"), represents error from cross-reactivity to other antibodies in addition to those for the target condition. This can artificially inflate quantitative antibody result. Measurement noise, $\varepsilon$ ("epsilon"), represents error from the laboratory testing process. 
+
+``` {r conditions, message=FALSE}
+cond.hlye.IgG <- data.frame(
+  nu = 1.027239,             # Biological noise
+  eps = 0.2,            # Measurement noise
+  y.low = 0.0,          # low cutoff
+  y.high = 5e4); # high cutoff
+```
+## 3. Visualize antibody data
+Get to know your cross-sectional antibody data by visualizing the distribution of quantitative antibody responses. We have selected HlyE as the antigen of interest, and both IgA and IgG as the isotypes of interest. Here, we will look at the distribution of HlyE IgA and HlyE IgG. 
+```{r hist}
+#Graph your data as a histogram
+hist.hlye.IgG <- ggplot(p.hlye.IgG, aes(x=y)) + geom_histogram(binwidth=5, color = "#000000", fill = "#009999")+  theme_test() + ggtitle("Distribution of Cross-sectional Antibody Responses") 
+
+hist.hlye.IgG
+```
+
+Here, we see that our data is highly skewed with the majority of responses on the lower end of our data, but there is a long tail stretching to over 300. Let's get a better look at the distribution by log transforming our *y* response. We will also need to update the bin width. 
+
+```{r loghist}
+#Let's log transform your data to better visualize the distribution. We will also need to update the bin width. 
+loghist.hlye.IgG <- ggplot(p.hlye.IgG, aes(x=y)) + geom_histogram(binwidth=0.1, color = "#000000", fill = "#009999") +scale_x_log10() +ggtitle("Distribution of Cross-sectional Antibody Responses (Log transformed)") + xlab("Log(y)")
+
+loghist.hlye.IgG
+```
+
+Once log transformed, our data looks much more normally distributed. In most cases, log transformation will be the best way to visualize serologic data. 
+
+## Estimate Seroincidence 
+Finally, we are ready to begin seroincidence estimation. We will conduct two separate analyses using two distinct functions, `est.incidence` and `est.incidence.by`, to calculate the overall seroincidence and the age-specific incidence, respectively.
+
+### Overall Seroincidence
+We define our starting value as 0.5, which will also define our initial estimate for the force of infection (FOI, $\lambda$ ("lambda")). Then we log transform $\lambda$ and set up maximum and minimum values for the confidence interval. 
+
+
+```{r seroinc}
+start <- .05 #Set starting value
+
+lambda = start #initial estimate: starting value
+log.lambda = log(lambda)
+log.lmin=log(lambda/10)
+log.lmax=log(10*lambda) 
+
+
+objfunc <- function(llam){
+  return(res <- fdev(llam, p.hlye.IgG, c.hlye.IgG, cond.hlye.IgG))
+}
+
+fit <- nlm(objfunc,log.lambda,
+           hessian=TRUE,print.level=0,stepmax=(log.lmax-log.lmin)/4)
+
+#Calculate lambda, lower, upper, LF min
+log.lambda.est <- c(exp(fit$estimate),
+                    exp(fit$estimate + qnorm(c(0.025))*sqrt(1/fit$hessian)),
+                    exp(fit$estimate + qnorm(c(0.975))*sqrt(1/fit$hessian)),
+                    fit$minimum)
+
+#Print the final results
+log.lambda.est
+```
+
+### Age-Specific Seroincidence
+
+## Conclusions
+In our simulated data, we found that the estimated seroincidence of enteric fever is 0.20 (95% CI: 0.18, 0.23). 
+
+
+## References
diff --git a/vignettes/references.bib b/vignettes/references.bib
@@ -66,3 +66,27 @@ @article{Strid_2001
 	journal = {Clinical Diagnostic Laboratory Immunology}
 }
 
+@article{Aiemjoy_2022_Lancet,
+	url = {https://doi.org/10.1016/S2666-5247(22)00114-8},
+	year = 2022,
+	volume = {3},
+	number = {8},
+	pages = {e578--e587},
+	author = {Aiemjoy K., Seidman J. C., Saha S., Munira S. J., Islam Sajib M. S., Sium, S. M. al, Sarkar, A., Alam, N., Zahan, F. N., Kabir, M. S., Tamrakar, D., Vaidya, K., Shrestha, R., Shakya, J., Katuwal, N., Shrestha, S., Yousafzai, M. T., Iqbal, J., Dehraj, I. F., … Andrews, J. R.},
+	title = {Estimating typhoid incidence from community-based serosurveys: a multicohort study},
+	journal = {The Lancet Microbe}
+}
+
+@article{Aiemjoy_2022_SouthSudan,
+	doi = {10.3201/eid2811.220239},
+	year = 2022,
+	month = {nov},
+	volume = {28},
+	number = {11},
+	pages = {2316--2320},
+	author = {Kristen Aiemjoy, John Rumunu, Juma John Hassen, Kirsten E. Wiens, Denise Garrett, Polina Kamenskaya, Jason B. Harris, Andrew S. Azman, Peter Teunis,
+Jessica C. Seidman, Joseph F. Wamala, Jason R. Andrews, Richelle C. Charles},
+	title = {Seroincidence of Enteric Fever,Juba, South Sudan},
+	journal = {Emerging Infectious Diseases}
+}
+
diff --git a/vignettes/tutorial.Rmd b/vignettes/tutorial.Rmd
@@ -185,8 +185,9 @@ is loaded into the current workspace. Assuming the package is installed already
 console (bear in mind that the text after character `#` is only a comment):
 
 ```{r}
-# Load package "seroincidence"
+# Load package "serocalculator"
 library(serocalculator)
+#load_all()
 library(dplyr)
 ```