Skip to content

Commit

Permalink
add go term gene ID retrieval function
Browse files Browse the repository at this point in the history
  • Loading branch information
j-andrews7 committed Sep 12, 2024
1 parent 62d819f commit 08ecd1d
Show file tree
Hide file tree
Showing 2 changed files with 192 additions and 0 deletions.
124 changes: 124 additions & 0 deletions docs/Code_Snippets_Functions.md
Original file line number Diff line number Diff line change
Expand Up @@ -1826,6 +1826,130 @@ for (i in seq_along(df_lists)) {
}
```

#### Get All Gene IDs for GO Terms Associated with a Given Search Term

```r
#' Retrieve Genes Associated with GO Terms Containing a Specific Search Term
#'
#' This function searches for Gene Ontology (GO) Biological Process terms that contain a specified search term
#' and retrieves all associated genes for the specified species and ID type.
#'
#' @param search_term A character string specifying the term to search for within GO Biological Process terms (case-insensitive).
#' @param species A character string specifying the species. Supported species include "human", "mouse", and "rat".
#' Default is "human".
#' @param id_type A character string specifying the type of gene identifier to return.
#' Options include "SYMBOL", "ENTREZID", and "ENSEMBL". Default is "SYMBOL".
#'
#' @return A named character vector of gene identifiers of the specified type associated with GO terms that contain the search term.
#' The names of the vector are the corresponding Entrez Gene IDs (if `id_type` is not "ENTREZID").
#'
#' @details
#' The function performs the following steps:
#' \enumerate{
#' \item Retrieves all GO terms and their descriptions.
#' \item Searches for GO terms that include the specified search term.
#' \item Retrieves all Entrez Gene IDs associated with the matching GO terms.
#' \item Maps Entrez Gene IDs to the specified type of gene identifier.
#' }
#'
#' **Note:** The function supports species specified in the `species_packages` list. For other organisms, you can add the appropriate entries.
#'
#' @examples
#' \dontrun{
#' # Retrieve human gene symbols associated with GO terms containing "WNT"
#' genes_wnt_human <- get_genes_by_go_term("WNT", species = "human", id_type = "SYMBOL")
#' print(genes_wnt_human)
#'
#' # Retrieve mouse Ensembl IDs associated with GO terms containing "apoptosis"
#' genes_apoptosis_mouse <- get_genes_by_go_term("apoptosis", species = "mouse", id_type = "ENSEMBL")
#' print(genes_apoptosis_mouse)
#'
#' # Retrieve rat Entrez IDs associated with GO terms containing "cell cycle"
#' genes_cell_cycle_rat <- get_genes_by_go_term("cell cycle", species = "rat", id_type = "ENTREZID")
#' print(genes_cell_cycle_rat)
#' }
#'
#' @importFrom AnnotationDbi mapIds
#' @import GO.db
#' @import org.Hs.eg.db
#' @import org.Mm.eg.db
#' @import org.Rn.eg.db
#' @export
get_genes_by_go_term <- function(search_term, species = "human", id_type = "SYMBOL") {

# Map species to organism package names
species_packages <- list(
"human" = "org.Hs.eg.db",
"mouse" = "org.Mm.eg.db",
"rat" = "org.Rn.eg.db"
# Add more species as needed
)

if (!species %in% names(species_packages)) {
stop("Unsupported species. Please use one of: ", paste(names(species_packages), collapse = ", "))
}

org_package <- species_packages[[species]]

# Load the organism-specific package
suppressPackageStartupMessages(require(org_package, character.only = TRUE))

# Get all GO terms
go_terms <- as.list(GOTERM)

# Extract GO IDs and their associated terms
go_ids <- names(go_terms)
go_terms_text <- character(length(go_terms))

for (i in seq_along(go_terms)) {
go_terms_text[i] <- go_terms[[i]]@Term
}

# Search for GO terms that include the search term (case-insensitive)
indices <- grep(search_term, go_terms_text, ignore.case = TRUE)
matched_go_ids <- go_ids[indices]

# Retrieve genes associated with these GO IDs
# Construct the name of the GO to All Genes mapping object
org_prefix <- sub("\\.db$", "", org_package) # Remove ".db" from package name
go2allels_name <- paste0(org_prefix, "GO2ALLEGS")
go2allels <- get(go2allels_name)

genes_entrez_list <- mget(matched_go_ids, go2allels, ifnotfound = NA)

# Flatten the list and remove NAs
genes_entrez <- unique(unlist(genes_entrez_list))
genes_entrez <- genes_entrez[!is.na(genes_entrez)]

# Map Entrez Gene IDs to the specified ID type
# Get the organism-specific database object
org_db <- get(org_package)

# Check if the requested id_type is valid
valid_id_types <- columns(org_db)
if (!(id_type %in% valid_id_types)) {
stop("Invalid 'id_type'. Valid options are: ", paste(valid_id_types, collapse = ", "))
}

# If id_type is ENTREZID, simply return the Entrez IDs
if (id_type == "ENTREZID") {
genes_ids <- genes_entrez
names(genes_ids) <- genes_entrez
} else {
genes_ids <- mapIds(
org_db,
keys = genes_entrez,
column = id_type,
keytype = "ENTREZID",
multiVals = "first"
)
}

# Return the gene identifiers
return(genes_ids)
}
```

### CNV Calling from Methylation Array
This spits out typical genome-wide CNV plots, segmentation files, bins, and IGV tracks from Illumina methylation arrays. Users can add details regions for labels if they'd like. When mixing both 450k and EPIC arrays, set `array_type = "overlap"`.

Expand Down
68 changes: 68 additions & 0 deletions docs/refs.bib
Original file line number Diff line number Diff line change
Expand Up @@ -2668,6 +2668,23 @@ @article{birneyIdentificationAnalysisFunctional2007
file = {C\:\\Users\\jandrews\\Zotero\\storage\\CM8AZEAD\\Birney et al. - 2007 - Identification and analysis of functional elements.pdf;C\:\\Users\\jandrews\\Zotero\\storage\\XG8VUFBT\\nature05874.html}
}

@article{blayneySuperenhancersIncludeClassical2023,
title = {Super-Enhancers Include Classical Enhancers and Facilitators to Fully Activate Gene Expression},
author = {Blayney, Joseph W. and Francis, Helena and Rampasekova, Alexandra and Camellato, Brendan and Mitchell, Leslie and Stolper, Rosa and Cornell, Lucy and Babbs, Christian and Boeke, Jef D. and Higgs, Douglas R. and Kassouf, Mira},
date = {2023-12-21},
journaltitle = {Cell},
volume = {186},
number = {26},
pages = {5826-5839.e18},
issn = {0092-8674},
doi = {10.1016/j.cell.2023.11.030},
url = {https://www.sciencedirect.com/science/article/pii/S0092867423013168},
urldate = {2024-08-12},
abstract = {Super-enhancers are compound regulatory elements that control expression of key cell identity genes. They recruit high levels of tissue-specific transcription factors and co-activators such as the Mediator complex and contact target gene promoters with high frequency. Most super-enhancers contain multiple constituent regulatory elements, but it is unclear whether these elements have distinct roles in activating target gene expression. Here, by rebuilding the endogenous multipartite α-globin super-enhancer, we show that it contains bioinformatically equivalent but functionally distinct element types: classical enhancers and facilitator elements. Facilitators have no intrinsic enhancer activity, yet in their absence, classical enhancers are unable to fully upregulate their target genes. Without facilitators, classical enhancers exhibit reduced Mediator recruitment, enhancer RNA transcription, and enhancer-promoter interactions. Facilitators are interchangeable but display functional hierarchy based on their position within a multipartite enhancer. Facilitators thus play an important role in potentiating the activity of classical enhancers and ensuring robust activation of target genes.},
keywords = {-regulatory elements,alpha globin locus,enhancer cluster,enhancer cooperation,facilitators,gene expression,genome engineering,super-enhancers,synthetic genome,transcriptional regulation},
file = {C\:\\Users\\jandrews\\Zotero\\storage\\ZQEW4D4B\\Blayney et al. - 2023 - Super-enhancers include classical enhancers and fa.pdf;C\:\\Users\\jandrews\\Zotero\\storage\\4R365SKK\\S0092867423013168.html}
}

@online{BloodJournalCpG,
title = {Blood {{Journal}} | {{CpG}}: Unraveling the Key to {{B-cell}} Function},
url = {http://www.bloodjournal.org/content/101/11/4230.2?sso-checked=true},
Expand Down Expand Up @@ -15262,6 +15279,25 @@ @article{ladaAIDAPOBECCytosine2012
file = {C\:\\Users\\jandrews\\Zotero\\storage\\I98MWXWU\\Lada et al. - 2012 - AIDAPOBEC cytosine deaminase induces genome-wide .pdf;C\:\\Users\\jandrews\\Zotero\\storage\\IMVIE7K6\\1745-6150-7-47.html}
}

@article{laflammeDiagnosticUtilityDNA2024,
title = {Diagnostic Utility of {{DNA}} Methylation Analysis in Genetically Unsolved Pediatric Epilepsies and {{CHD2}} Episignature Refinement},
author = {LaFlamme, Christy W. and Rastin, Cassandra and Sengupta, Soham and Pennington, Helen E. and Russ-Hall, Sophie J. and Schneider, Amy L. and Bonkowski, Emily S. and Almanza Fuerte, Edith P. and Allan, Talia J. and Zalusky, Miranda Perez-Galey and Goffena, Joy and Gibson, Sophia B. and Nyaga, Denis M. and Lieffering, Nico and Hebbar, Malavika and Walker, Emily V. and Darnell, Daniel and Olsen, Scott R. and Kolekar, Pandurang and Djekidel, Mohamed Nadhir and Rosikiewicz, Wojciech and McConkey, Haley and Kerkhof, Jennifer and Levy, Michael A. and Relator, Raissa and Lev, Dorit and Lerman-Sagie, Tally and Park, Kristen L. and Alders, Marielle and Cappuccio, Gerarda and Chatron, Nicolas and Demain, Leigh and Genevieve, David and Lesca, Gaetan and Roscioli, Tony and Sanlaville, Damien and Tedder, Matthew L. and Gupta, Sachin and Jones, Elizabeth A. and Weisz-Hubshman, Monika and Ketkar, Shamika and Dai, Hongzheng and Worley, Kim C. and Rosenfeld, Jill A. and Chao, Hsiao-Tuan and Neale, Geoffrey and Carvill, Gemma L. and Wang, Zhaoming and Berkovic, Samuel F. and Sadleir, Lynette G. and Miller, Danny E. and Scheffer, Ingrid E. and Sadikovic, Bekim and Mefford, Heather C.},
date = {2024-08-06},
journaltitle = {Nat Commun},
volume = {15},
number = {1},
pages = {6524},
publisher = {Nature Publishing Group},
issn = {2041-1723},
doi = {10.1038/s41467-024-50159-6},
url = {https://www.nature.com/articles/s41467-024-50159-6},
urldate = {2024-08-09},
abstract = {Sequence-based genetic testing identifies causative variants in \textasciitilde\,50\% of individuals with developmental and epileptic encephalopathies (DEEs). Aberrant changes in DNA methylation are implicated in various neurodevelopmental disorders but remain unstudied in DEEs. We interrogate the diagnostic utility of genome-wide DNA methylation array analysis on peripheral blood samples from 582 individuals with genetically unsolved DEEs. We identify rare differentially methylated regions (DMRs) and explanatory episignatures to uncover causative and candidate genetic etiologies in 12 individuals. Using long-read sequencing, we identify DNA variants underlying rare DMRs, including one balanced translocation, three CG-rich repeat expansions, and four copy number variants. We also identify pathogenic variants associated with episignatures. Finally, we refine the CHD2 episignature using an 850\,K methylation array and bisulfite sequencing to investigate potential insights into CHD2 pathophysiology. Our study demonstrates the diagnostic yield of genome-wide DNA methylation analysis to identify causal and candidate variants as 2\% (12/582) for unsolved DEE cases.},
langid = {english},
keywords = {Diagnostic markers,DNA methylation,Epigenomics},
file = {C:\Users\jandrews\Zotero\storage\LL7B5EFV\LaFlamme et al. - 2024 - Diagnostic utility of DNA methylation analysis in .pdf}
}

@article{laibleHomemadeSiteDirected2009,
title = {Homemade {{Site Directed Mutagenesis}} of {{Whole Plasmids}}},
author = {Laible, Mark and Boonrod, Kajohn},
Expand Down Expand Up @@ -17280,6 +17316,22 @@ @article{lovenSelectiveInhibitionTumor2013b
file = {C\:\\Users\\jandrews\\Zotero\\storage\\UTJVZAMC\\Lovén et al. - 2013 - Selective Inhibition of Tumor Oncogenes by Disrupt.pdf;C\:\\Users\\jandrews\\Zotero\\storage\\DHQQUDVV\\S0092867413003930.html}
}

@article{lovenSelectiveInhibitionTumor2013c,
title = {Selective {{Inhibition}} of {{Tumor Oncogenes}} by {{Disruption}} of {{Super-Enhancers}}},
author = {Lovén, Jakob and Hoke, Heather A. and Lin, Charles Y. and Lau, Ashley and Orlando, David A. and Vakoc, Christopher R. and Bradner, James E. and Lee, Tong Ihn and Young, Richard A.},
date = {2013-04-11},
journaltitle = {Cell},
volume = {153},
number = {2},
pages = {320--334},
issn = {0092-8674},
doi = {10.1016/j.cell.2013.03.036},
url = {https://www.sciencedirect.com/science/article/pii/S0092867413003930},
urldate = {2024-08-12},
abstract = {Chromatin regulators have become attractive targets for cancer therapy, but it is unclear why inhibition of these ubiquitous regulators should have gene-specific effects in tumor cells. Here, we investigate how inhibition of the widely expressed transcriptional coactivator BRD4 leads to selective inhibition of the MYC oncogene in multiple myeloma (MM). BRD4 and Mediator were found to co-occupy thousands of enhancers associated with active genes. They also co-occupied a small set of exceptionally large super-enhancers associated with genes that feature prominently in MM biology, including the MYC oncogene. Treatment of MM tumor cells with the BET-bromodomain inhibitor JQ1 led to preferential loss of BRD4 at super-enhancers and consequent transcription elongation defects that preferentially impacted~genes with super-enhancers, including MYC. Super-enhancers were found at key oncogenic drivers in many other tumor cells. These observations have implications for the discovery of cancer therapeutics directed at components of super-enhancers in diverse tumor types.},
file = {C\:\\Users\\jandrews\\Zotero\\storage\\L4DVUVX5\\Lovén et al. - 2013 - Selective Inhibition of Tumor Oncogenes by Disrupt.pdf;C\:\\Users\\jandrews\\Zotero\\storage\\IT3FZMC6\\S0092867413003930.html}
}

@article{lueEmergingEZH2Inhibitors2018,
title = {Emerging {{EZH2 Inhibitors}} and {{Their Application}} in {{Lymphoma}}},
author = {Lue, Jennifer K. and Amengual, Jennifer E.},
Expand Down Expand Up @@ -29878,6 +29930,22 @@ @article{whyteMasterTranscriptionFactors2013
file = {C\:\\Users\\jandrews\\Zotero\\storage\\NVT4BQCA\\Whyte et al. - 2013 - Master Transcription Factors and Mediator Establis.pdf;C\:\\Users\\jandrews\\Zotero\\storage\\NKWR7J7R\\S0092867413003929.html}
}

@article{whyteMasterTranscriptionFactors2013a,
title = {Master {{Transcription Factors}} and {{Mediator Establish Super-Enhancers}} at {{Key Cell Identity Genes}}},
author = {Whyte, Warren A. and Orlando, David A. and Hnisz, Denes and Abraham, Brian J. and Lin, Charles Y. and Kagey, Michael H. and Rahl, Peter B. and Lee, Tong Ihn and Young, Richard A.},
date = {2013-04-11},
journaltitle = {Cell},
volume = {153},
number = {2},
pages = {307--319},
issn = {0092-8674},
doi = {10.1016/j.cell.2013.03.035},
url = {https://www.sciencedirect.com/science/article/pii/S0092867413003929},
urldate = {2024-08-12},
abstract = {Master transcription factors Oct4, Sox2, and Nanog bind enhancer elements and recruit Mediator to activate much of the gene expression program of pluripotent embryonic stem cells (ESCs). We report here that the ESC master transcription factors form unusual enhancer domains at most genes that control the pluripotent state. These domains, which we call super-enhancers, consist of clusters of enhancers that are densely occupied by the master regulators and Mediator. Super-enhancers differ from typical enhancers in size, transcription factor density and content, ability to activate transcription, and sensitivity to perturbation. Reduced levels of Oct4 or Mediator cause preferential loss of expression of super-enhancer-associated genes relative to other genes, suggesting how changes in gene expression programs might be accomplished during development. In other more differentiated cells, super-enhancers containing cell-type-specific master transcription factors are also found at genes that define cell identity. Super-enhancers thus play key roles in the control of mammalian cell identity.},
file = {C\:\\Users\\jandrews\\Zotero\\storage\\CW4PNXJP\\Whyte et al. - 2013 - Master Transcription Factors and Mediator Establis.pdf;C\:\\Users\\jandrews\\Zotero\\storage\\28E5Z76N\\S0092867413003929.html}
}

@book{wickhamGgplot2ElegantGraphics2009,
title = {Ggplot2: {{Elegant Graphics}} for {{Data Analysis}}},
author = {Wickham, Hadley},
Expand Down

0 comments on commit 08ecd1d

Please sign in to comment.