diff --git a/docs/Code_Snippets_Functions.md b/docs/Code_Snippets_Functions.md index a970d1f..0e64a75 100644 --- a/docs/Code_Snippets_Functions.md +++ b/docs/Code_Snippets_Functions.md @@ -1826,6 +1826,130 @@ for (i in seq_along(df_lists)) { } ``` +#### Get All Gene IDs for GO Terms Associated with a Given Search Term + +```r +#' Retrieve Genes Associated with GO Terms Containing a Specific Search Term +#' +#' This function searches for Gene Ontology (GO) Biological Process terms that contain a specified search term +#' and retrieves all associated genes for the specified species and ID type. +#' +#' @param search_term A character string specifying the term to search for within GO Biological Process terms (case-insensitive). +#' @param species A character string specifying the species. Supported species include "human", "mouse", and "rat". +#' Default is "human". +#' @param id_type A character string specifying the type of gene identifier to return. +#' Options include "SYMBOL", "ENTREZID", and "ENSEMBL". Default is "SYMBOL". +#' +#' @return A named character vector of gene identifiers of the specified type associated with GO terms that contain the search term. +#' The names of the vector are the corresponding Entrez Gene IDs (if `id_type` is not "ENTREZID"). +#' +#' @details +#' The function performs the following steps: +#' \enumerate{ +#' \item Retrieves all GO terms and their descriptions. +#' \item Searches for GO terms that include the specified search term. +#' \item Retrieves all Entrez Gene IDs associated with the matching GO terms. +#' \item Maps Entrez Gene IDs to the specified type of gene identifier. +#' } +#' +#' **Note:** The function supports species specified in the `species_packages` list. For other organisms, you can add the appropriate entries. +#' +#' @examples +#' \dontrun{ +#' # Retrieve human gene symbols associated with GO terms containing "WNT" +#' genes_wnt_human <- get_genes_by_go_term("WNT", species = "human", id_type = "SYMBOL") +#' print(genes_wnt_human) +#' +#' # Retrieve mouse Ensembl IDs associated with GO terms containing "apoptosis" +#' genes_apoptosis_mouse <- get_genes_by_go_term("apoptosis", species = "mouse", id_type = "ENSEMBL") +#' print(genes_apoptosis_mouse) +#' +#' # Retrieve rat Entrez IDs associated with GO terms containing "cell cycle" +#' genes_cell_cycle_rat <- get_genes_by_go_term("cell cycle", species = "rat", id_type = "ENTREZID") +#' print(genes_cell_cycle_rat) +#' } +#' +#' @importFrom AnnotationDbi mapIds +#' @import GO.db +#' @import org.Hs.eg.db +#' @import org.Mm.eg.db +#' @import org.Rn.eg.db +#' @export +get_genes_by_go_term <- function(search_term, species = "human", id_type = "SYMBOL") { + + # Map species to organism package names + species_packages <- list( + "human" = "org.Hs.eg.db", + "mouse" = "org.Mm.eg.db", + "rat" = "org.Rn.eg.db" + # Add more species as needed + ) + + if (!species %in% names(species_packages)) { + stop("Unsupported species. Please use one of: ", paste(names(species_packages), collapse = ", ")) + } + + org_package <- species_packages[[species]] + + # Load the organism-specific package + suppressPackageStartupMessages(require(org_package, character.only = TRUE)) + + # Get all GO terms + go_terms <- as.list(GOTERM) + + # Extract GO IDs and their associated terms + go_ids <- names(go_terms) + go_terms_text <- character(length(go_terms)) + + for (i in seq_along(go_terms)) { + go_terms_text[i] <- go_terms[[i]]@Term + } + + # Search for GO terms that include the search term (case-insensitive) + indices <- grep(search_term, go_terms_text, ignore.case = TRUE) + matched_go_ids <- go_ids[indices] + + # Retrieve genes associated with these GO IDs + # Construct the name of the GO to All Genes mapping object + org_prefix <- sub("\\.db$", "", org_package) # Remove ".db" from package name + go2allels_name <- paste0(org_prefix, "GO2ALLEGS") + go2allels <- get(go2allels_name) + + genes_entrez_list <- mget(matched_go_ids, go2allels, ifnotfound = NA) + + # Flatten the list and remove NAs + genes_entrez <- unique(unlist(genes_entrez_list)) + genes_entrez <- genes_entrez[!is.na(genes_entrez)] + + # Map Entrez Gene IDs to the specified ID type + # Get the organism-specific database object + org_db <- get(org_package) + + # Check if the requested id_type is valid + valid_id_types <- columns(org_db) + if (!(id_type %in% valid_id_types)) { + stop("Invalid 'id_type'. Valid options are: ", paste(valid_id_types, collapse = ", ")) + } + + # If id_type is ENTREZID, simply return the Entrez IDs + if (id_type == "ENTREZID") { + genes_ids <- genes_entrez + names(genes_ids) <- genes_entrez + } else { + genes_ids <- mapIds( + org_db, + keys = genes_entrez, + column = id_type, + keytype = "ENTREZID", + multiVals = "first" + ) + } + + # Return the gene identifiers + return(genes_ids) +} +``` + ### CNV Calling from Methylation Array This spits out typical genome-wide CNV plots, segmentation files, bins, and IGV tracks from Illumina methylation arrays. Users can add details regions for labels if they'd like. When mixing both 450k and EPIC arrays, set `array_type = "overlap"`. diff --git a/docs/refs.bib b/docs/refs.bib index 305cf93..c0b6a83 100644 --- a/docs/refs.bib +++ b/docs/refs.bib @@ -2668,6 +2668,23 @@ @article{birneyIdentificationAnalysisFunctional2007 file = {C\:\\Users\\jandrews\\Zotero\\storage\\CM8AZEAD\\Birney et al. - 2007 - Identification and analysis of functional elements.pdf;C\:\\Users\\jandrews\\Zotero\\storage\\XG8VUFBT\\nature05874.html} } +@article{blayneySuperenhancersIncludeClassical2023, + title = {Super-Enhancers Include Classical Enhancers and Facilitators to Fully Activate Gene Expression}, + author = {Blayney, Joseph W. and Francis, Helena and Rampasekova, Alexandra and Camellato, Brendan and Mitchell, Leslie and Stolper, Rosa and Cornell, Lucy and Babbs, Christian and Boeke, Jef D. and Higgs, Douglas R. and Kassouf, Mira}, + date = {2023-12-21}, + journaltitle = {Cell}, + volume = {186}, + number = {26}, + pages = {5826-5839.e18}, + issn = {0092-8674}, + doi = {10.1016/j.cell.2023.11.030}, + url = {https://www.sciencedirect.com/science/article/pii/S0092867423013168}, + urldate = {2024-08-12}, + abstract = {Super-enhancers are compound regulatory elements that control expression of key cell identity genes. They recruit high levels of tissue-specific transcription factors and co-activators such as the Mediator complex and contact target gene promoters with high frequency. Most super-enhancers contain multiple constituent regulatory elements, but it is unclear whether these elements have distinct roles in activating target gene expression. Here, by rebuilding the endogenous multipartite α-globin super-enhancer, we show that it contains bioinformatically equivalent but functionally distinct element types: classical enhancers and facilitator elements. Facilitators have no intrinsic enhancer activity, yet in their absence, classical enhancers are unable to fully upregulate their target genes. Without facilitators, classical enhancers exhibit reduced Mediator recruitment, enhancer RNA transcription, and enhancer-promoter interactions. Facilitators are interchangeable but display functional hierarchy based on their position within a multipartite enhancer. Facilitators thus play an important role in potentiating the activity of classical enhancers and ensuring robust activation of target genes.}, + keywords = {-regulatory elements,alpha globin locus,enhancer cluster,enhancer cooperation,facilitators,gene expression,genome engineering,super-enhancers,synthetic genome,transcriptional regulation}, + file = {C\:\\Users\\jandrews\\Zotero\\storage\\ZQEW4D4B\\Blayney et al. - 2023 - Super-enhancers include classical enhancers and fa.pdf;C\:\\Users\\jandrews\\Zotero\\storage\\4R365SKK\\S0092867423013168.html} +} + @online{BloodJournalCpG, title = {Blood {{Journal}} | {{CpG}}: Unraveling the Key to {{B-cell}} Function}, url = {http://www.bloodjournal.org/content/101/11/4230.2?sso-checked=true}, @@ -15262,6 +15279,25 @@ @article{ladaAIDAPOBECCytosine2012 file = {C\:\\Users\\jandrews\\Zotero\\storage\\I98MWXWU\\Lada et al. - 2012 - AIDAPOBEC cytosine deaminase induces genome-wide .pdf;C\:\\Users\\jandrews\\Zotero\\storage\\IMVIE7K6\\1745-6150-7-47.html} } +@article{laflammeDiagnosticUtilityDNA2024, + title = {Diagnostic Utility of {{DNA}} Methylation Analysis in Genetically Unsolved Pediatric Epilepsies and {{CHD2}} Episignature Refinement}, + author = {LaFlamme, Christy W. and Rastin, Cassandra and Sengupta, Soham and Pennington, Helen E. and Russ-Hall, Sophie J. and Schneider, Amy L. and Bonkowski, Emily S. and Almanza Fuerte, Edith P. and Allan, Talia J. and Zalusky, Miranda Perez-Galey and Goffena, Joy and Gibson, Sophia B. and Nyaga, Denis M. and Lieffering, Nico and Hebbar, Malavika and Walker, Emily V. and Darnell, Daniel and Olsen, Scott R. and Kolekar, Pandurang and Djekidel, Mohamed Nadhir and Rosikiewicz, Wojciech and McConkey, Haley and Kerkhof, Jennifer and Levy, Michael A. and Relator, Raissa and Lev, Dorit and Lerman-Sagie, Tally and Park, Kristen L. and Alders, Marielle and Cappuccio, Gerarda and Chatron, Nicolas and Demain, Leigh and Genevieve, David and Lesca, Gaetan and Roscioli, Tony and Sanlaville, Damien and Tedder, Matthew L. and Gupta, Sachin and Jones, Elizabeth A. and Weisz-Hubshman, Monika and Ketkar, Shamika and Dai, Hongzheng and Worley, Kim C. and Rosenfeld, Jill A. and Chao, Hsiao-Tuan and Neale, Geoffrey and Carvill, Gemma L. and Wang, Zhaoming and Berkovic, Samuel F. and Sadleir, Lynette G. and Miller, Danny E. and Scheffer, Ingrid E. and Sadikovic, Bekim and Mefford, Heather C.}, + date = {2024-08-06}, + journaltitle = {Nat Commun}, + volume = {15}, + number = {1}, + pages = {6524}, + publisher = {Nature Publishing Group}, + issn = {2041-1723}, + doi = {10.1038/s41467-024-50159-6}, + url = {https://www.nature.com/articles/s41467-024-50159-6}, + urldate = {2024-08-09}, + abstract = {Sequence-based genetic testing identifies causative variants in \textasciitilde\,50\% of individuals with developmental and epileptic encephalopathies (DEEs). Aberrant changes in DNA methylation are implicated in various neurodevelopmental disorders but remain unstudied in DEEs. We interrogate the diagnostic utility of genome-wide DNA methylation array analysis on peripheral blood samples from 582 individuals with genetically unsolved DEEs. We identify rare differentially methylated regions (DMRs) and explanatory episignatures to uncover causative and candidate genetic etiologies in 12 individuals. Using long-read sequencing, we identify DNA variants underlying rare DMRs, including one balanced translocation, three CG-rich repeat expansions, and four copy number variants. We also identify pathogenic variants associated with episignatures. Finally, we refine the CHD2 episignature using an 850\,K methylation array and bisulfite sequencing to investigate potential insights into CHD2 pathophysiology. Our study demonstrates the diagnostic yield of genome-wide DNA methylation analysis to identify causal and candidate variants as 2\% (12/582) for unsolved DEE cases.}, + langid = {english}, + keywords = {Diagnostic markers,DNA methylation,Epigenomics}, + file = {C:\Users\jandrews\Zotero\storage\LL7B5EFV\LaFlamme et al. - 2024 - Diagnostic utility of DNA methylation analysis in .pdf} +} + @article{laibleHomemadeSiteDirected2009, title = {Homemade {{Site Directed Mutagenesis}} of {{Whole Plasmids}}}, author = {Laible, Mark and Boonrod, Kajohn}, @@ -17280,6 +17316,22 @@ @article{lovenSelectiveInhibitionTumor2013b file = {C\:\\Users\\jandrews\\Zotero\\storage\\UTJVZAMC\\Lovén et al. - 2013 - Selective Inhibition of Tumor Oncogenes by Disrupt.pdf;C\:\\Users\\jandrews\\Zotero\\storage\\DHQQUDVV\\S0092867413003930.html} } +@article{lovenSelectiveInhibitionTumor2013c, + title = {Selective {{Inhibition}} of {{Tumor Oncogenes}} by {{Disruption}} of {{Super-Enhancers}}}, + author = {Lovén, Jakob and Hoke, Heather A. and Lin, Charles Y. and Lau, Ashley and Orlando, David A. and Vakoc, Christopher R. and Bradner, James E. and Lee, Tong Ihn and Young, Richard A.}, + date = {2013-04-11}, + journaltitle = {Cell}, + volume = {153}, + number = {2}, + pages = {320--334}, + issn = {0092-8674}, + doi = {10.1016/j.cell.2013.03.036}, + url = {https://www.sciencedirect.com/science/article/pii/S0092867413003930}, + urldate = {2024-08-12}, + abstract = {Chromatin regulators have become attractive targets for cancer therapy, but it is unclear why inhibition of these ubiquitous regulators should have gene-specific effects in tumor cells. Here, we investigate how inhibition of the widely expressed transcriptional coactivator BRD4 leads to selective inhibition of the MYC oncogene in multiple myeloma (MM). BRD4 and Mediator were found to co-occupy thousands of enhancers associated with active genes. They also co-occupied a small set of exceptionally large super-enhancers associated with genes that feature prominently in MM biology, including the MYC oncogene. Treatment of MM tumor cells with the BET-bromodomain inhibitor JQ1 led to preferential loss of BRD4 at super-enhancers and consequent transcription elongation defects that preferentially impacted~genes with super-enhancers, including MYC. Super-enhancers were found at key oncogenic drivers in many other tumor cells. These observations have implications for the discovery of cancer therapeutics directed at components of super-enhancers in diverse tumor types.}, + file = {C\:\\Users\\jandrews\\Zotero\\storage\\L4DVUVX5\\Lovén et al. - 2013 - Selective Inhibition of Tumor Oncogenes by Disrupt.pdf;C\:\\Users\\jandrews\\Zotero\\storage\\IT3FZMC6\\S0092867413003930.html} +} + @article{lueEmergingEZH2Inhibitors2018, title = {Emerging {{EZH2 Inhibitors}} and {{Their Application}} in {{Lymphoma}}}, author = {Lue, Jennifer K. and Amengual, Jennifer E.}, @@ -29878,6 +29930,22 @@ @article{whyteMasterTranscriptionFactors2013 file = {C\:\\Users\\jandrews\\Zotero\\storage\\NVT4BQCA\\Whyte et al. - 2013 - Master Transcription Factors and Mediator Establis.pdf;C\:\\Users\\jandrews\\Zotero\\storage\\NKWR7J7R\\S0092867413003929.html} } +@article{whyteMasterTranscriptionFactors2013a, + title = {Master {{Transcription Factors}} and {{Mediator Establish Super-Enhancers}} at {{Key Cell Identity Genes}}}, + author = {Whyte, Warren A. and Orlando, David A. and Hnisz, Denes and Abraham, Brian J. and Lin, Charles Y. and Kagey, Michael H. and Rahl, Peter B. and Lee, Tong Ihn and Young, Richard A.}, + date = {2013-04-11}, + journaltitle = {Cell}, + volume = {153}, + number = {2}, + pages = {307--319}, + issn = {0092-8674}, + doi = {10.1016/j.cell.2013.03.035}, + url = {https://www.sciencedirect.com/science/article/pii/S0092867413003929}, + urldate = {2024-08-12}, + abstract = {Master transcription factors Oct4, Sox2, and Nanog bind enhancer elements and recruit Mediator to activate much of the gene expression program of pluripotent embryonic stem cells (ESCs). We report here that the ESC master transcription factors form unusual enhancer domains at most genes that control the pluripotent state. These domains, which we call super-enhancers, consist of clusters of enhancers that are densely occupied by the master regulators and Mediator. Super-enhancers differ from typical enhancers in size, transcription factor density and content, ability to activate transcription, and sensitivity to perturbation. Reduced levels of Oct4 or Mediator cause preferential loss of expression of super-enhancer-associated genes relative to other genes, suggesting how changes in gene expression programs might be accomplished during development. In other more differentiated cells, super-enhancers containing cell-type-specific master transcription factors are also found at genes that define cell identity. Super-enhancers thus play key roles in the control of mammalian cell identity.}, + file = {C\:\\Users\\jandrews\\Zotero\\storage\\CW4PNXJP\\Whyte et al. - 2013 - Master Transcription Factors and Mediator Establis.pdf;C\:\\Users\\jandrews\\Zotero\\storage\\28E5Z76N\\S0092867413003929.html} +} + @book{wickhamGgplot2ElegantGraphics2009, title = {Ggplot2: {{Elegant Graphics}} for {{Data Analysis}}}, author = {Wickham, Hadley},