From 8d8572391e6f9e9fcb24c277431c54aa213b8935 Mon Sep 17 00:00:00 2001 From: Rita Rasteiro Date: Fri, 15 Mar 2024 17:15:45 +0000 Subject: [PATCH] rm gwasglue2 functions --- DESCRIPTION | 5 +-- R/ld_scores.R | 87 --------------------------------------------------- R/query.R | 60 +++++------------------------------ 3 files changed, 8 insertions(+), 144 deletions(-) delete mode 100644 R/ld_scores.R diff --git a/DESCRIPTION b/DESCRIPTION index cac7f97..c3d5260 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -11,7 +11,7 @@ Authors@R: c( person("Rita", "Rasteiro", , "rita.rasteiro@bristol.ac.uk", role = "aut", comment = c(ORCID = "0000-0002-4217-3060")) ) -Description: R interface to the 'OpenGWAS' database API. Includes a wrapper +Description: Interface to the 'OpenGWAS' database API. Includes a wrapper to make generic calls to the API, plus convenience functions for specific queries. License: MIT + file LICENSE @@ -20,7 +20,6 @@ BugReports: https://github.com/MRCIEU/ieugwasr/issues Depends: R (>= 4.0) Imports: - digest, dplyr, googleAuthR, gwasglue2, @@ -38,8 +37,6 @@ Suggests: testthat VignetteBuilder: knitr -Remotes: - mrcieu/gwasglue2 Encoding: UTF-8 Roxygen: list(markdown = TRUE) RoxygenNote: 7.3.1 diff --git a/R/ld_scores.R b/R/ld_scores.R deleted file mode 100644 index 955234c..0000000 --- a/R/ld_scores.R +++ /dev/null @@ -1,87 +0,0 @@ -#' Converter for gwasglue2 system variant IDs -#' -#' @param afl2_list A `ieugwasr::afl2_list()` dataframe -#' @return The same dataframe with the gwasglue2 variant IDs added -#' @export -convert_variantid <- function(afl2_list){ - chr <- afl2_list$chr - pos <- afl2_list$pos - a1 <- afl2_list$alt - a2 <- afl2_list$ref - # NOTE: It is indifferently if "alt" and "ref" are "a1" or "a2". The function will sort them alphabetically. - - # variantid <- gwasglue2::create_variantid(chr,pos,a1,a2) - variantid <- create_variantid(chr,pos,a1,a2) - - afl2_list[,"variantid"] <- variantid - return(afl2_list) -} - - -#' LD scores writer -#' The LD scores are saved in compressed `.gz`files, split by chromosome name. -#' @param afl2_list A `ieugwasr::afl2_list()` dataframe. -#' @param pop A string with the population name. Default is "EUR". -#' @param path_to_save A string with the path to save the LD scores. Default is 'ldsc/population_name'. -#' @export -#' @return A directory with the compressed LD scores files. Each file is named as the chromosome number, and contains the positions, variant IDs and LD scores. -write_ldscores <- function(afl2_list, pop = "EUR", path_to_save = paste0("ldsc/",pop)){ - - # Check if there is a valid population name - pops <- c("AFR", "AMR", "EAS", "EUR", "SAS") - if (!(pop %in% pops)){ - stop("The population name is not valid. Please use one of the following: AFR, AMR, EAS, EUR, SAS") - } - - # check if variantid is present - if (!"variantid" %in% colnames(afl2_list)){ - afl2_list <- convert_variantid(afl2_list) - } - - # Create a directory to save the ldscores - if (path_to_save == path_to_save){ - dir.create(pop, showWarnings = FALSE) - } else{ - dir.create(path_to_save, showWarnings = FALSE)} - - - # subset the ldscores to only include the columns we need - split_ldscores <- afl2_list[, c("chr", "variantid","pos", paste0("L2.",pop))] %>% - # rename the columns to match the GenomicSEM format - dplyr::rename("BP" = "pos", "SNP" = "variantid", "L2" = paste0("L2.",pop)) %>% - # Split the ldscores by chromosome - split(., .$chr) - # number of chromosomes - chr <- length(split_ldscores) - # Write the ldscores to the directory - for (i in 1:chr){ - readr::write_delim(split_ldscores[[i]], paste0(path_to_save,"/", i, ".l2.ldscore.gz")) - } -} - - -# This function is a copy of the `gwasglue2::create_variantid()` function, because the gwasglue2 package is not exporting the function. When the gwasglue2 package is fixed, this function should be removed. -create_variantid <-function(chr,pos,a1,a2) { - alleles_sorted <- t(apply(cbind(a1,a2),1,sort)) - # create variantid - variantid <- paste0(chr,":", pos,"_",alleles_sorted[,1],"_",alleles_sorted[,2]) - - # create hashes when alleles nchar > 10 - # allele ea - if (all(nchar(alleles_sorted[,1]) <= 10) == FALSE){ - index = which(nchar(alleles_sorted[,1]) > 10) - variantid[index] <- lapply(index, function(i){ - v <- paste0(chr[i],":", pos[i],"_#",digest::digest(alleles_sorted[i,1],algo= "murmur32"),"_",alleles_sorted[i,2],) - }) %>% unlist() - } - - # allele nea - if (all(nchar(alleles_sorted[,2]) <= 10) == FALSE){ - index = which(nchar(alleles_sorted[,2]) > 10) - variantid[index] <- lapply(index, function(i){ - v <- paste0(chr[i],":", pos[i],"_",alleles_sorted[i,1],"_#",digest::digest(alleles_sorted[i,2],algo= "murmur32")) - }) %>% unlist() - } - - return(variantid) - } \ No newline at end of file diff --git a/R/query.R b/R/query.R index 34367c2..91189ba 100644 --- a/R/query.R +++ b/R/query.R @@ -240,42 +240,19 @@ batches <- function(access_token = check_access_token()) #' Used to authenticate level of access to data. #' By default, checks if already authenticated through [`get_access_token`] and #' if not then does not perform authentication -#' @param gwasglue Returns a gwasglue2 SummarySet object (if `gwasglue = TRUE`). Only one GWAS id can be queried at a time. See [gwasglue2::create_dataset()].Default = `FALSE`. #' #' @export #' @return Dataframe. If `gwasglue = TRUE` then returns a gwasglue2 object. -associations <- function(variants, id, proxies=1, r2=0.8, align_alleles=1, palindromes=1, maf_threshold = 0.3, access_token=check_access_token(), gwasglue=FALSE){ +associations <- function(variants, id, proxies=1, r2=0.8, align_alleles=1, palindromes=1, maf_threshold = 0.3, access_token=check_access_token()){ # Query specific variants from specific GWAS using associations_query internal function (old version) out <- associations_query(variants=variants, id=id, proxies=proxies, r2=r2, align_alleles=align_alleles, palindromes=palindromes, maf_threshold=maf_threshold, access_token=access_token) - - if(gwasglue) - { - # check if it is a tibble (trying to avoid loading the tibble package) - if(inherits(out ,"tbl_df")){ - # output gwasglue2 SummarySet object - if(id %>% length() != 1){ - stop("Only one GWAS ID can be queried at a time when using `gwasglue = TRUE`.") - } else { - # create gwasglue2 metadata - m <- gwasglue2::create_metadata(gwasinfo(id)) - # create gwasglue2 SummarySet object - s <- out %>% - gwasglue2::create_summaryset(metadata=m, qc = TRUE) %>% - return() - } - - } else { - return(out) - } - } - else{ - return(out) - } + + return(out) } -# Query specific variants from specific GWAS - associations internal function to allow for gwasglue +# Query specific variants from specific GWAS - associations internal function to allow for future gwasglue2 integration associations_query <- function(variants=variants, id=id, proxies=proxies, r2=r2, align_alleles=align_alleles, palindromes=palindromes, maf_threshold = maf_threshold, access_token=access_token) { id <- legacy_ids(id) @@ -396,42 +373,19 @@ phewas <- function(variants, pval = 0.00001, batch=c(), access_token=check_acces #' Used to authenticate level of access to data. #' By default, checks if already authenticated through [`get_access_token`] #' and if not then does not perform authentication. -#' @param gwasglue Returns a gwasglue2 SummarySet object (if `gwasglue = TRUE`). Only one GWAS id can be queried at a time. See [gwasglue2::create_dataset()].Default = `FALSE`. #' @export #' @return Dataframe. If `gwasglue = TRUE` then returns a gwasglue2 object. -tophits <- function(id, pval = 5e-8, clump = 1, r2 = 0.001, kb = 10000, pop="EUR", force_server = FALSE, access_token = check_access_token(), gwasglue = FALSE) +tophits <- function(id, pval = 5e-8, clump = 1, r2 = 0.001, kb = 10000, pop="EUR", force_server = FALSE, access_token = check_access_token()) { # Query tophits from specific GWAS using tophits_query internal function (old version) out <- tophits_query(id = id, pval = pval, clump = clump, r2 = r2, kb = kb, pop = pop, force_server = force_server, access_token = access_token) - if(isTRUE(gwasglue)) - { - # check if it is a tibble (trying to avoid loading the tibble package) - if(inherits(out, "tbl_df")){ - # output gwasglue2 SummarySet object - if(id %>% length() != 1){ - stop("Only one GWAS ID can be queried at a time when using `gwasglue = TRUE`.") - } else { - # create gwasglue2 metadata - m <- gwasglue2::create_metadata(gwasinfo(id)) - # create gwasglue2 SummarySet object - s <- out %>% - gwasglue2::create_summaryset(metadata=m, qc = TRUE) %>% - return() - } - - } else { - return(out) - } - } - else{ - return(out) - } + return(out) } -# query top hits from GWAS dataset - tophits internal function to allow for gwasglue +# query top hits from GWAS dataset - tophits internal function to allow for future gwasglue2 integration tophits_query <- function(id, pval=5e-8, clump = 1, r2 = 0.001, kb = 10000, pop="EUR", force_server = FALSE, access_token=check_access_token()) {