add ercc spike-in norm & clean up GSEA plot functions

j-andrews7 · Jul 15, 2024 · 4e4b635 · 4e4b635
1 parent 7f7bccb
commit 4e4b635
Show file tree

Hide file tree

Showing 2 changed files with 26 additions and 37 deletions.
diff --git a/docs/Code_Snippets_Functions.md b/docs/Code_Snippets_Functions.md
@@ -1336,6 +1336,8 @@ run_enrichKEGG <- function(res.list, padj.th = 0.05, lfc.th = 0, outdir = "./enr
                    kegg.native = TRUE),
           error = function(e) {"bah"}
         )
+
+		graphics.off()
       }
 
       # This idiotic workaround copies the PNGs to our wanted directory as pathview generates
@@ -1470,32 +1472,6 @@ run_enrichPathway <- function(res.list, padj.th = 0.05, lfc.th = 0, outdir = "./
         dev.off()
       }
 
-      # Network plot for each pathway with FC values.
-      # for (x in ego@compareClusterResult$Description) {
-      #   # Some pathways have backslashes, which will break file creation.
-      #   x_out <- str_replace_all(x, "/", "_")
-      #   
-      #   # For when it inevitably wants to crash due to not finding a pathway name or such.
-      #   tryCatch(
-      #     {
-      #       pdf(paste0(out, "/Reactome_pathways/", x_out, ".pdf"), width = 11, height = 11)
-      #       p <- viewPathway(x, readable = TRUE, foldChange = gl, organism = organism)
-      #       vals <- p$data$color[!is.na(p$data$color)]
-      #       l <- max(abs(as.numeric(vals)))
-      #       p <- p + scale_color_gradient2(limits = c(-l,l), mid = "grey90", 
-      #                                      high = "red", low = "navyblue")
-      #       print(p)
-      #       dev.off()
-      #       dev.off()
-      #       dev.off()
-      #       dev.off()
-      #       dev.off()
-      #       dev.off()
-      #     },
-      #     error = function(e) {"bah"}
-      #   )
-      # }
-
       saveRDS(ego, file = paste0(out, "/enrichPathway.reactome.RDS"))
       ego <- as.data.frame(ego)
       write.table(ego, file = paste0(out, "/enrichPathway.reactome.results.txt"), 
@@ -1967,6 +1943,7 @@ This is a super lazy function to run through a list of contrasts and create diff
 #' @param lfc.th A numeric vector of log2 fold-change thresholds. Defaults to c(log2(1.5), log2(2)).
 #' @param shrink.method The method used for shrinkage estimation. Defaults to "apeglm".
 #' @param outdir The directory where the output should be saved. Defaults to "./de".
+#' @param norm.ercc A logical indicating whether to normalize to ERCC spike-ins.
 #' @param BPPARAM The BiocParallelParam object specifying the parallel back-end to be used. Defaults to NULL.
 #' 
 #' @return A list of DESeq2 result tables for the specified contrasts, saved to the specified output directory.
@@ -1977,8 +1954,12 @@ This is a super lazy function to run through a list of contrasts and create diff
 #'                design = my_design, alpha = 0.01, lfc.th = c(log2(2), log2(3)), 
 #'                shrink.method = "normal", outdir = "./my_results", BPPARAM = MulticoreParam(2))
 #' }
-get_DESEQ2_res <- function(dds, res.list, contrasts, user.mat = FALSE, block = NULL, design = NULL, alpha = 0.05, 
-						   lfc.th = c(log2(1.5), log2(1.25)), shrink.method = "apeglm", outdir = "./de", BPPARAM = NULL) {
+#'
+#' @author Jared Andrews
+get_DESEQ2_res <- function(dds, res.list, contrasts, user.mat = FALSE, block = NULL, 
+                           design = NULL, alpha = 0.05, 
+						   lfc.th = c(log2(1.25), log2(1.5)), shrink.method = "apeglm", 
+						   outdir = "./de", norm.ercc = FALSE, BPPARAM = NULL) {
 
   dir.create(file.path(outdir), showWarnings = FALSE, recursive = TRUE)
 
@@ -2004,13 +1985,21 @@ get_DESEQ2_res <- function(dds, res.list, contrasts, user.mat = FALSE, block = N
       desgn <- as.formula(paste0("~", con[1]))
     }
 
-    message(paste0("Design for ", paste(con[1], con[2], "vs", con[3], sep = "_"),
+    message(paste0("\nDesign for ", paste(con[1], con[2], "vs", con[3], sep = "_"),
                    " is ", paste0(as.character(desgn))))
 
     dds <- DESeqDataSet(dds, design = desgn)
+
+    # Get size factor by spike-ins if specified.
+    if (norm.ercc) {
+      spikes <- rownames(dds)[grep("^ERCC-", rownames(dds))]
+      message(paste0("\nCalculating size factors from ", length(spikes), " ERCC spike-ins."))
+      dds <- estimateSizeFactors(dds, controlGenes=rownames(dds) %in% spikes)
+    }
+
     dds <- DESeq(dds, BPPARAM = BPPARAM)
 
-    res1 <- results(dds, contrast = con, alpha = alpha, BPPARAM = BPPARAM)
+    res1 <- results(dds, contrast = con, alpha = alpha)
     res1$ENSEMBL <- rownames(res1)
     res1$SYMBOL <- rowData(dds)$SYMBOL
 
@@ -2021,7 +2010,7 @@ get_DESEQ2_res <- function(dds, res.list, contrasts, user.mat = FALSE, block = N
       if (shrink.method == "ashr") {
         coef <- NULL
       }
-      shrink <- lfcShrink(dds, res = res1, coef = coef, type = shrink.method, BPPARAM = BPPARAM)
+      shrink <- lfcShrink(dds, res = res1, coef = coef, type = shrink.method)
       shrink$ENSEMBL <- rownames(shrink)
       shrink$SYMBOL <- rowData(dds)$SYMBOL
       rownames(shrink) <- shrink$SYMBOL
@@ -2040,7 +2029,7 @@ get_DESEQ2_res <- function(dds, res.list, contrasts, user.mat = FALSE, block = N
 
     for (l in lfc.th) {
 
-      res <- results(dds, contrast = con, alpha = alpha, lfcThreshold = l, BPPARAM = BPPARAM)
+      res <- results(dds, contrast = con, alpha = alpha, lfcThreshold = l)
       res$ENSEMBL <- rownames(res)
       res$SYMBOL <- rowData(dds)$SYMBOL
 
@@ -2050,7 +2039,7 @@ get_DESEQ2_res <- function(dds, res.list, contrasts, user.mat = FALSE, block = N
           coef <- NULL
         }
         out.name <- paste0(rname, "-shLFC", l)
-        shrink <- lfcShrink(dds, res = res, coef = coef, type = shrink.method, BPPARAM = BPPARAM)
+        shrink <- lfcShrink(dds, res = res, coef = coef, type = shrink.method)
         shrink$ENSEMBL <- rownames(shrink)
         shrink$SYMBOL <- rowData(dds)$SYMBOL
         rownames(shrink) <- shrink$SYMBOL

diff --git a/docs/refs.bib b/docs/refs.bib
@@ -8122,7 +8122,7 @@ @online{federationIdentificationCandidateMaster2018
   urldate = {2023-03-06},
   abstract = {Regulation of gene expression through binding of transcription factors (TFs) to cis-regulatory elements is highly complex in mammalian cells. Genome-wide measurement technologies provide new means to understand this regulation, and models of TF regulatory networks have been built with the goal of identifying critical factors. Here, we report a network model of transcriptional regulation between TFs constructed by integrating genomewide identification of active enhancers and regions of focal DNA accessibility. Network topology is confirmed by published TF ChIP-seq data. By considering multiple methods of TF prioritization following network construction, we identify master TFs in well-studied cell types, and these networks provide better prioritization than networks only considering promoter-proximal accessibility peaks. Comparisons between networks from similar cell types show stable connectivity of most TFs, while master regulator TFs show dramatic changes in connectivity and centrality. Applying this method to study chronic lymphocytic leukemia, we prioritized several network TFs amenable to pharmacological perturbation and show that compounds targeting these TFs show comparable efficacy in CLL cell lines to FDA-approved therapies. The construction of transcriptional regulatory network (TRN) models can predict the interactions between individual TFs and predict critical TFs for development or disease.},
   langid = {english},
-  pubstate = {preprint},
+  pubstate = {prepublished},
   file = {C:\Users\jandrews\Zotero\storage\KEQ5BV2G\Federation et al. - 2018 - Identification of candidate master transcription f.pdf}
 }
 
@@ -10496,7 +10496,7 @@ @online{haradaLeukemiaCoreTranscriptional2023
   urldate = {2023-04-13},
   abstract = {Lineage-defining transcription factors form densely interconnected circuits in chromatin occupancy assays, but the functional significance of these networks remains underexplored. We reconstructed the functional topology of a leukemia cell transcription network from the direct gene-regulatory programs of eight core transcriptional regulators established in pre-steady state assays coupling targeted protein degradation with nascent transcriptomics. The core regulators displayed narrow, largely non-overlapping direct transcriptional programs, forming a sparsely interconnected functional hierarchy stabilized by incoherent feed-forward loops. BET bromodomain and CDK7 inhibitors disrupted the core regulators’ direct programs, acting as mixed agonists/antagonists. The network is predictive of dynamic gene expression behaviors in time-resolved assays and clinically relevant pathway activity in patient populations.},
   langid = {english},
-  pubstate = {preprint},
+  pubstate = {prepublished},
   file = {C:\Users\jandrews\Zotero\storage\MN9YGT4H\Harada et al. - 2023 - Leukemia core transcriptional circuitry is a spars.pdf}
 }
 
@@ -10512,7 +10512,7 @@ @online{haradaRapidkineticsDegronBenchmarking2023
   urldate = {2023-04-13},
   abstract = {Attenuating aberrant transcriptional circuits holds great promise for the treatment of numerous diseases, including cancer. However, development of transcriptional inhibitors is hampered by the lack of a generally accepted functional cellular readout to characterize their target specificity and on-target activity. We benchmarked the direct gene-regulatory signatures of six agents reported as inhibitors of the oncogenic transcription factor MYB against targeted MYB degradation in a nascent transcriptomics assay. The inhibitors demonstrated partial specificity for MYB target genes but displayed significant off-target activity. Unexpectedly, the inhibitors displayed bimodal on-target effects, acting as mixed agonists-antagonists. Our data uncover unforeseen agonist effects of small molecules originally developed as TF inhibitors and argue that rapid-kinetics benchmarking against degron models should be used for functional characterization of transcriptional modulators.},
   langid = {english},
-  pubstate = {preprint},
+  pubstate = {prepublished},
   file = {C:\Users\jandrews\Zotero\storage\BIUSS6YA\Harada et al. - 2023 - Rapid-kinetics degron benchmarking reveals off-tar.pdf}
 }
 
@@ -10528,7 +10528,7 @@ @online{haradaRapidkineticsDegronBenchmarking2023a
   urldate = {2023-07-06},
   abstract = {Attenuating aberrant transcriptional circuits holds great promise for the treatment of numerous diseases, including cancer. However, development of transcriptional inhibitors is hampered by the lack of a generally accepted functional cellular readout to characterize their target specificity and on-target activity. We benchmarked the direct gene-regulatory signatures of six agents reported as inhibitors of the oncogenic transcription factor MYB against targeted MYB degradation in a nascent transcriptomics assay. The inhibitors demonstrated partial specificity for MYB target genes but displayed significant off-target activity. Unexpectedly, the inhibitors displayed bimodal on-target effects, acting as mixed agonists-antagonists. Our data uncover unforeseen agonist effects of small molecules originally developed as TF inhibitors and argue that rapid-kinetics benchmarking against degron models should be used for functional characterization of transcriptional modulators.},
   langid = {english},
-  pubstate = {preprint},
+  pubstate = {prepublished},
   file = {C:\Users\jandrews\Zotero\storage\5T8PGZD5\Harada et al. - 2023 - Rapid-kinetics degron benchmarking reveals off-tar.pdf}
 }