MM-136 LT MAPIT support (#112)

* MM-136 LT MAPIT support * MM-136 another one
lcrawlab · Jun 7, 2024 · e28fbd6 · e28fbd6
1 parent bfbe934
commit e28fbd6
Show file tree

Hide file tree

Showing 11 changed files with 216 additions and 2 deletions.
diff --git a/.gitignore b/.gitignore
@@ -69,3 +69,4 @@ src/symbols.rds
 /Meta/
 
 /dev
+inst/doc
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -47,6 +47,7 @@ Imports:
     Rcpp,
     stats,
     tidyr,
+    truncnorm,
     utils
 Suggests:
     GGally,
@@ -70,4 +71,4 @@ VignetteBuilder:
 Encoding: UTF-8
 LazyData: true
 LazyDataCompression: xz
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.1
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,5 +1,6 @@
 # Generated by roxygen2: do not edit by hand
 
+export(binary_to_liability)
 export(cauchy_combined)
 export(fishers_combined)
 export(harmonic_combined)
@@ -18,8 +19,10 @@ importFrom(stats,cor)
 importFrom(stats,pcauchy)
 importFrom(stats,pchisq)
 importFrom(stats,pnorm)
+importFrom(stats,qnorm)
 importFrom(stats,sd)
 importFrom(stats,uniroot)
 importFrom(stats,var)
+importFrom(truncnorm,rtruncnorm)
 importFrom(utils,head)
 useDynLib(mvMAPIT)
diff --git a/NEWS.md b/NEWS.md
@@ -1,6 +1,7 @@
 # mvMAPIT (development version)
 
 * Fix Bonferroni correction in the `mvMAPIT.Rmd` vignette. 
+* New function `binary_to_liability` for LT-MAPIT support and corresponding vignette
 
 # mvMAPIT 2.0.3 release
 

diff --git a/R/binary_to_liability.R b/R/binary_to_liability.R
@@ -0,0 +1,44 @@
+#' Convert binary traits to liabilities for low prevalence
+#'
+#' @description
+#' This function implements an approximate conversion of binary traits to liabilties as
+#' proposed in the LT-MAPIT model (Crawford and Zhou 2018,
+#' https://doi.org/10.1101/374983). Note that this is only good for low prevalence. 
+#' To run LT-MAPIT (MAPIT on case-control traits), convert the binary traits to
+#' liabilities using this function and pass the liabilities to mvmapit as trait.
+#'
+#' @param case_control_trait Case-control trait encoded as binary trait with 0 as control or 1 as case.
+#' @param prevalence Case prevalence between 0 and 1. Proportion of cases in the population.
+#' @returns A trait vector of same length as y with case-control indicators converted
+#' to liabilties.
+#'
+#' @export
+#' @importFrom truncnorm rtruncnorm
+#' @importFrom stats qnorm
+#' @import checkmate
+binary_to_liability <- function(case_control_trait, prevalence) {
+  coll <- makeAssertCollection()
+  assertInteger(
+    case_control_trait,
+    lower = 0,
+    upper = 1,
+    add = coll
+  )
+  assertDouble(prevalence,
+               lower = 0,
+               upper = 1,
+               add = coll)
+  reportAssertions(coll)
+
+  n_cases = sum(case_control_trait == 1, na.rm = T)
+  n_controls = sum(case_control_trait == 0, na.rm = T)
+  threshold = qnorm(1 - prevalence, mean = 0, sd = 1)
+
+  liabilities = rep(NA, length(case_control_trait))
+  liabilities[!is.na(case_control_trait) &
+                case_control_trait == 0] = rtruncnorm(n_controls, b = threshold)
+  liabilities[!is.na(case_control_trait) &
+                case_control_trait == 1] = rtruncnorm(n_cases, a = threshold)
+
+  return(liabilities)
+}
diff --git a/README.md b/README.md
@@ -122,9 +122,11 @@ install.packages(c( 'checkmate',
                     'RcppAlgos', 
                     'RcppArmadillo', 
                     'RcppParallel', 
+                    'RcppProgress', 
                     'RcppSpdlog', 
                     'testthat', 
-                    'tidyr'), 
+                    'tidyr', 
+                    'truncnorm'), 
                     dependencies = TRUE);
 ```
 

diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -10,6 +10,11 @@ reference:
     Functions for the marginal epistasis analysis.
   contents: 
   - mvmapit
+- title: "LT-MAPIT Analysis"
+  desc: >
+    Functions for the marginal epistasis analysis of case-control traits.
+  contents: 
+  - binary_to_liability
 - title: "Combining P-values"
   desc: >
     Functions for combining the variance component p-values.

diff --git a/man/binary_to_liability.Rd b/man/binary_to_liability.Rd
diff --git a/tests/testthat/test-binary_to_liability.R b/tests/testthat/test-binary_to_liability.R
@@ -0,0 +1,26 @@
+test_that("binary_to_liability api test", {
+  # given
+  n_samples <- 10
+  case_control_trait <- sample(0:1, n_samples, replace = TRUE)
+  prevalence <- 0.1
+  # when
+  liabilities <- binary_to_liability(case_control_trait, prevalence)
+  # then
+  expect_true(is.numeric(liabilities))
+  expect_equal(length(case_control_trait),
+               length(liabilities))
+})
+
+test_that("binary_to_liability NA in case_control_trait", {
+  # given
+  n_samples <- 10
+  n_missing <- 2
+  case_control_trait <- sample(0:1, n_samples, replace = TRUE)
+  prevalence <- 0.1
+  case_control_trait[sample(1:n_samples, n_missing)] <- NA
+  # when
+  liabilities <- binary_to_liability(case_control_trait, prevalence)
+  # then
+  expect_equal(case_control_trait[is.na(case_control_trait)], liabilities[is.na(case_control_trait)])
+  expect_equal(sum(!is.na(liabilities)), n_samples - n_missing)
+  })
diff --git a/vignettes/.gitignore b/vignettes/.gitignore
@@ -0,0 +1,2 @@
+*.html
+*.R
diff --git a/vignettes/tutorial-lt-mapit.Rmd b/vignettes/tutorial-lt-mapit.Rmd
@@ -0,0 +1,105 @@
+---
+title: "Liability threshold MAPIT"
+output: rmarkdown::html_vignette
+description: >
+  Learn how to convert binary traits to liabilities.
+vignette: >
+  %\VignetteIndexEntry{Liability threshold MAPIT}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+```
+
+```{r setup}
+library(mvMAPIT)
+```
+In this tutorial we will illustrate how to use the Liability Threshold MArginal ePIstasis Test (LT-MAPIT) by [Crawford and Zhou (2018)](https://doi.org/10.1101/374983)$^1$. For this purpose we will first simulate synthetic data
+and then analyze it.
+
+The data are single nucleotide polymorphisms (SNPs) with simulated genotypes.
+For the simulation we choose the following set of parameters:
+
+  1. **population_size** - # of samples in the population
+  2. **n_snps** - number of SNPs or variants
+  3. **PVE** - phenotypic variance explained/broad-sense heritability ($H^2$)
+  4. **rho** - measures the portion of $H^2$ that is contributed by the marignal (additive) effects
+  5. **disease_prevalence** - assumed disease prevelance in the population
+  6. **sample_size** - # of samples to analyze
+
+```{r parameters, eval = F}
+population_size <- 1e4
+n_snps <- 2e3
+pve <- 0.6
+rho <- 0.5
+disease_prevalence <- 0.3
+sample_size <- 500
+```
+
+## Simulate random genotypes
+
+Simulate the genotypes such that all variants have minor allele frequency (MAF) > 0.05.
+
+```{r random_genotypes, eval = F}
+maf <- 0.05 + 0.45 * runif(n_snps)
+random_genotypes   <- (runif(population_size * n_snps) < maf) + (runif(population_size *
+                                                                         n_snps) < maf)
+random_genotypes   <- matrix(as.double(random_genotypes), population_size, n_snps, byrow = TRUE)
+```
+
+## Simulate liabilities
+
+We can use the mvMAPIT function `simulate_traits` to simulate the liabilities.
+See the tutorial on simulations for more details on that.
+
+```{r simulate_traits, eval = F}
+n_causal <- 100
+n_epistatic <- 10
+simulated_data <- simulate_traits(
+    random_genotypes,
+    n_causal = n_causal,
+    n_trait_specific = n_epistatic,
+    n_pleiotropic = 0,
+    d = 1,
+    H2 = pve,
+    rho = rho
+)
+```
+Now that we have the liabilities, we can assign case-control labels according to the disease prevelance parameter. We will treat this like the LT-MAPIT paper and take an equal number of cases and controls.
+
+```{r case_control, eval = F}
+liabilities <- simulated_data$trait
+threshold <- qnorm(1 - disease_prevalence, mean = 0, sd = 1)
+case_control_trait <- rep(0, population_size)
+case_control_trait[liabilities > threshold] <- 1
+
+# Subsample a particular number of cases and controls
+cases <- sample(which(liabilities > threshold), sample_size / 2, replace = FALSE)
+controls <- sample(which(liabilities <= threshold), sample_size / 2, replace = FALSE)
+y <- as.integer(case_control_trait[c(cases, controls)])
+X <- simulated_data$genotype[c(cases, controls), ]
+```
+
+## Run MAPIT with Case-Control trait
+
+To run MAPIT with case-control traits, we need to convert the traits back to liabilities.
+The function `binary_to_liability` provides this conversion.
+**NOTE:** The `binary_to_liability` function is an approximation that is only suited for low prevalence in the disease trait.
+
+```{r mapit, eval = F}
+y_liabilities <- binary_to_liability(y, disease_prevalence)
+
+lt_mapit <- mvmapit(
+        t(X),
+        t(y_liabilities),
+        test = "hybrid"
+)
+```
+
+# References
+1: Lorin Crawford, Xiang Zhou (2018) Genome-wide Marginal Epistatic Association Mapping in Case-Control Studies bioRxiv 374983; <https://doi.org/10.1101/374983>
Original file line number	Diff line number	Diff line change
Expand Up		@@ -69,3 +69,4 @@ src/symbols.rds
		/Meta/

		/dev
		inst/doc