From db42a20ee68fbbabf1e05742f7f162fdb1a4db54 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 15 Apr 2022 15:18:24 -0700 Subject: [PATCH 1/4] add native support for Vierstra motifs create a motifSet option for "vierstra" and create collection options for "individual" and "archetype" corresponding to the motifs produced by Jeff Vierstra (https://github.com/jvierstra/motif-clustering). The new rds files linked on amazon have fixed the issue of ":" being a part of the motif name and now use "|" for separation of name info (https://github.com/GreenleafLab/ArchR/issues/675) --- R/AnnotationPeaks.R | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/R/AnnotationPeaks.R b/R/AnnotationPeaks.R index c98f5b6e..f4628695 100644 --- a/R/AnnotationPeaks.R +++ b/R/AnnotationPeaks.R @@ -272,13 +272,16 @@ addPeakAnnotations <- function( #' #' @param ArchRProj An `ArchRProject` object. #' @param motifSet The motif set to be used for annotation. Options include: (i) "JASPAR2016", "JASPAR2018", "JASPAR2020" -#' which gives the 2016, 2018 or 2020 version of JASPAR motifs or (ii) one of "cisbp", "encode", or "homer" which gives the -#' corresponding motif sets from the `chromVAR` package. +#' which gives the 2016, 2018 or 2020 version of JASPAR motifs, (ii) one of "cisbp", "encode", or "homer" which gives the +#' corresponding motif sets from the `chromVAR` package, or (iii) "vierstra" which gives the clustered archetype motifs +#' created by Jeff Vierstra (https://github.com/jvierstra/motif-clustering). #' @param annoName The name of the `peakAnnotation` object to be stored in the provided `ArchRProject` #' @param species The name of the species relevant to the supplied `ArchRProject`. This is used for identifying which motif to be #' used from CisBP/JASPAR. By default, this function will attempt to guess the species based on the value from `getGenome()`. #' @param collection If one of the JASPAR motif sets is used via `motifSet`, this parameter allows you to indicate the JASPAR -#' collection to be used. See `getMatrixSet()` from `TFBSTools` for all options to supply for collection. +#' collection to be used. See `getMatrixSet()` from `TFBSTools` for all options to supply for collection. If `motifSet` is +#' "vierstra", then this must either be "individual" (for individual motif models), or "archetype" (for clustered models). +#' NOTE: vierstra archetype motifs are currently in beta and have not been finalized by Jeff Vierstra. #' @param motifPWMs A custom set of motif PWMs as a PWMList for adding motif annotations. #' @param cutOff The p-value cutoff to be used for motif search. The p-value is determined vs a background set of sequences #' (see `MOODS` for more details on this determination). @@ -442,6 +445,26 @@ addMotifAnnotations <- function( motifs <- obj$motifs motifSummary <- obj$motifSummary + }else if(tolower(motifSet)=="vierstra"){ + if(tolower(collection)=="individual"){ + fileName <- "Vierstra_Individual_Motifs.rds" + download.file(url = "https://jeffgranja.s3.amazonaws.com/ArchR/Annotations/Vierstra_Individual_Motifs.rds", + destfile = fileName) + motifs <- readRDS(fileName) + file.remove(fileName) + } else if(tolower(collection == "archetype")){ + fileName <- "Vierstra_Archetype_Motifs.rds" + download.file(url = "https://jeffgranja.s3.amazonaws.com/ArchR/Annotations/Vierstra_Archetype_Motifs.rds", + destfile = fileName) + motifs <- readRDS(fileName) + file.remove(fileName) + } else { + stop(paste0("Error! collection ", collection, " not recognized for motifSet ",motifSet + ". Accepted values are 'individual' and 'archetype'")) + } + obj <- NULL + motifSummary <- NULL + }else if(tolower(motifSet)=="custom"){ obj <- NULL From b1fb5f0dbae7d07554854e7347ee70c11b52ed35 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 15 Apr 2022 15:20:31 -0700 Subject: [PATCH 2/4] typo --- R/AnnotationPeaks.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/AnnotationPeaks.R b/R/AnnotationPeaks.R index f4628695..23bb14f9 100644 --- a/R/AnnotationPeaks.R +++ b/R/AnnotationPeaks.R @@ -459,7 +459,7 @@ addMotifAnnotations <- function( motifs <- readRDS(fileName) file.remove(fileName) } else { - stop(paste0("Error! collection ", collection, " not recognized for motifSet ",motifSet + stop(paste0("Error! collection ", collection, " not recognized for motifSet ",motifSet, ". Accepted values are 'individual' and 'archetype'")) } obj <- NULL From d7a3105d910f00c11a8c90841d5a59b2436897a9 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 15 Apr 2022 15:55:29 -0700 Subject: [PATCH 3/4] unify file download workflow make file download workflow match that of annotations like lola etc --- R/AnnotationPeaks.R | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/R/AnnotationPeaks.R b/R/AnnotationPeaks.R index 23bb14f9..af705ae5 100644 --- a/R/AnnotationPeaks.R +++ b/R/AnnotationPeaks.R @@ -447,21 +447,29 @@ addMotifAnnotations <- function( }else if(tolower(motifSet)=="vierstra"){ if(tolower(collection)=="individual"){ - fileName <- "Vierstra_Individual_Motifs.rds" - download.file(url = "https://jeffgranja.s3.amazonaws.com/ArchR/Annotations/Vierstra_Individual_Motifs.rds", - destfile = fileName) - motifs <- readRDS(fileName) - file.remove(fileName) + url = "https://jeffgranja.s3.amazonaws.com/ArchR/Annotations/Vierstra_Individual_Motifs.rds" } else if(tolower(collection == "archetype")){ - fileName <- "Vierstra_Archetype_Motifs.rds" - download.file(url = "https://jeffgranja.s3.amazonaws.com/ArchR/Annotations/Vierstra_Archetype_Motifs.rds", - destfile = fileName) - motifs <- readRDS(fileName) - file.remove(fileName) + url = "https://jeffgranja.s3.amazonaws.com/ArchR/Annotations/Vierstra_Archetype_Motifs.rds" } else { stop(paste0("Error! collection ", collection, " not recognized for motifSet ",motifSet, ". Accepted values are 'individual' and 'archetype'")) } + + annoPath <- file.path(find.package("ArchR", NULL, quiet = TRUE), "data", "Annotations") + dir.create(annoPath, showWarnings = FALSE) + + #Download + if(!file.exists(file.path(annoPath, basename(url)))){ + message("Motif file ", basename(url)," does not exist! Downloading..") + download.file( + url = url, + destfile = file.path(annoPath, basename(url)), + quiet = FALSE + ) + } + motifFile <- file.path(annoPath, basename(url)) + + motifs <- readRDS(motifFile) obj <- NULL motifSummary <- NULL From 3f263a9e32662c6db9bb25eb7335bf7d231a8caa Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 15 Apr 2022 17:02:19 -0700 Subject: [PATCH 4/4] update collection param --- R/AnnotationPeaks.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/AnnotationPeaks.R b/R/AnnotationPeaks.R index af705ae5..ec8c06fe 100644 --- a/R/AnnotationPeaks.R +++ b/R/AnnotationPeaks.R @@ -280,7 +280,7 @@ addPeakAnnotations <- function( #' used from CisBP/JASPAR. By default, this function will attempt to guess the species based on the value from `getGenome()`. #' @param collection If one of the JASPAR motif sets is used via `motifSet`, this parameter allows you to indicate the JASPAR #' collection to be used. See `getMatrixSet()` from `TFBSTools` for all options to supply for collection. If `motifSet` is -#' "vierstra", then this must either be "individual" (for individual motif models), or "archetype" (for clustered models). +#' "vierstra", then this must either be "archetype" (for the v2 clustered models) or "individual" (for the original v1 individual motif models). #' NOTE: vierstra archetype motifs are currently in beta and have not been finalized by Jeff Vierstra. #' @param motifPWMs A custom set of motif PWMs as a PWMList for adding motif annotations. #' @param cutOff The p-value cutoff to be used for motif search. The p-value is determined vs a background set of sequences