From 8d4da8efe6a0119f55452f43a7d65d84ed2f3115 Mon Sep 17 00:00:00 2001 From: Awa Synthia Date: Mon, 7 Oct 2024 08:13:27 +0300 Subject: [PATCH 1/5] defunct functions in acc2lin Signed-off-by: Awa Synthia --- NAMESPACE | 5 ++++ R/acc2lin.R | 17 +++++++----- R/deprecate.R | 40 ++++++++++++++++++++++++++++ man/acc2lin.Rd | 68 ++++++++++++++++++++++++++++++++++++++++++----- man/add_lins.Rd | 23 ++-------------- man/deprecate.Rd | 43 ++++++++++++++++++++++++++++++ man/efetch_ipg.Rd | 17 +----------- man/ipg2lin.Rd | 24 +---------------- man/sink.reset.Rd | 19 ------------- 9 files changed, 165 insertions(+), 91 deletions(-) create mode 100644 R/deprecate.R create mode 100644 man/deprecate.Rd delete mode 100644 man/sink.reset.Rd diff --git a/NAMESPACE b/NAMESPACE index 16cf0813..4dbb858b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,14 +4,17 @@ export(BinaryDomainNetwork) export(DownloadAssemblySummary) export(GCA2lin) export(GenContextNetwork) +export(IPG2Lineage) export(LineagePlot) export(RepresentativeAccNums) +export(acc2Lineage) export(acc2fa) export(acc2lin) export(add_leaves) export(add_lins) export(add_name) export(add_tax) +export(addlineage) export(advanced_opts2est_walltime) export(alignFasta) export(assert_count_df) @@ -34,6 +37,7 @@ export(create_all_col_params) export(create_lineage_lookup) export(create_one_col_params) export(domain_network) +export(efetchIPG) export(efetch_ipg) export(filter_by_doms) export(filter_freq) @@ -79,6 +83,7 @@ export(run_rpsblast) export(send_job_status_email) export(shorten_lineage) export(sink.reset) +export(sinkReset) export(stacked_lin_plot) export(string2accnum) export(summ.DA) diff --git a/R/acc2lin.R b/R/acc2lin.R index f8d71949..dca24140 100644 --- a/R/acc2lin.R +++ b/R/acc2lin.R @@ -10,13 +10,14 @@ #' Sink Reset #' #' @return No return, but run to close all outstanding `sink()`s +#' @rdname acc2lin #' @export #' #' @examples #' \dontrun{ #' sink.reset() #' } -sink.reset <- function() { +sinkReset <- function() { for (i in seq_len(sink.number())) { sink(NULL) } @@ -37,14 +38,15 @@ sink.reset <- function() { #' @importFrom rlang sym #' #' @return Describe return, in detail +#' @rdname acc2lin #' @export #' #' @examples #' \dontrun{ #' add_lins() #' } -add_lins <- function(df, acc_col = "AccNum", assembly_path, - lineagelookup_path, ipgout_path = NULL, plan = "sequential") { +addlineage <- function(df, acc_col = "AccNum", assembly_path, + lineagelookup_path, ipgout_path = NULL, plan = "sequential", ...) { s_acc_col <- sym(acc_col) accessions <- df %>% pull(acc_col) lins <- acc2lin(accessions, assembly_path, lineagelookup_path, ipgout_path, plan) @@ -81,13 +83,14 @@ add_lins <- function(df, acc_col = "AccNum", assembly_path, #' @param plan #' #' @return Describe return, in detail +#' @rdname acc2lin #' @export #' #' @examples #' \dontrun{ #' acc2lin() #' } -acc2lin <- function(accessions, assembly_path, lineagelookup_path, ipgout_path = NULL, plan = "sequential") { +acc2Lineage <- function(accessions, assembly_path, lineagelookup_path, ipgout_path = NULL, plan = "sequential", ...) { tmp_ipg <- F if (is.null(ipgout_path)) { tmp_ipg <- T @@ -119,13 +122,14 @@ acc2lin <- function(accessions, assembly_path, lineagelookup_path, ipgout_path = #' @importFrom rentrez entrez_fetch #' #' @return Describe return, in detail +#' @rdname acc2lin #' @export #' #' @examples #' \dontrun{ #' efetch_ipg() #' } -efetch_ipg <- function(accnums, out_path, plan = "sequential") { +efetchIPG <- function(accnums, out_path, plan = "sequential", ...) { if (length(accnums) > 0) { partition <- function(in_data, groups) { # \\TODO This function should be defined outside of efetch_ipg(). It can be non-exported/internal @@ -187,6 +191,7 @@ efetch_ipg <- function(accnums, out_path, plan = "sequential") { #' @importFrom data.table fread #' #' @return Describe return, in detail +#' @rdname acc2lin #' @export #' #' @examples @@ -194,7 +199,7 @@ efetch_ipg <- function(accnums, out_path, plan = "sequential") { #' ipg2lin() #' } #' -ipg2lin <- function(accessions, ipg_file, assembly_path, lineagelookup_path) { +IPG2Lineage <- function(accessions, ipg_file, assembly_path, lineagelookup_path, ...) { ipg_dt <- fread(ipg_file, sep = "\t", fill = T) ipg_dt <- ipg_dt[Protein %in% accessions] diff --git a/R/deprecate.R b/R/deprecate.R new file mode 100644 index 00000000..2de0bbcd --- /dev/null +++ b/R/deprecate.R @@ -0,0 +1,40 @@ +#' These functions will be deprecated. Please use other functions instead. +#' +#' @name deprecate +#' +NULL + +#' @rdname deprecate +#' @export +sink.reset <- function() { + warning("'sink.reset' is deprecated. Use 'sinkReset' instead.") + sinkReset() +} + +#' @rdname deprecate +#' @export +add_lins <- function(df, ...) { + warning("'add_lins' is deprecated. Use 'addlineage' instead.") + addlineage(df, ...) +} + +#' @rdname deprecate +#' @export +acc2lin <- function(accessions, ...) { + warning("'acc2lin' is deprecated. Use 'acc2Lineage' instead.") + acc2Lineage(accessions, ...) +} + +#' @rdname deprecate +#' @export +efetch_ipg <- function(accnums, ...) { + warning("'efetch_ipg' is deprecated. Use 'efetchIPG' instead.") + efetchIPG(accnums, ...) +} + +#' @rdname deprecate +#' @export +ipg2lin <- function(accessions, ...) { + warning("'ipg2lin' is deprecated. Use 'IPG32Lineage' instead.") + IPG32Lineage(accessions, ...) +} \ No newline at end of file diff --git a/man/acc2lin.Rd b/man/acc2lin.Rd index 6255b290..f008be5f 100644 --- a/man/acc2lin.Rd +++ b/man/acc2lin.Rd @@ -1,17 +1,39 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/acc2lin.R, R/lineage.R -\name{acc2lin} +\name{sinkReset} +\alias{sinkReset} +\alias{addlineage} +\alias{acc2Lineage} +\alias{efetchIPG} +\alias{IPG2Lineage} \alias{acc2lin} -\title{acc2lin} +\title{Sink Reset} \usage{ -acc2lin( +sinkReset() + +addlineage( + df, + acc_col = "AccNum", + assembly_path, + lineagelookup_path, + ipgout_path = NULL, + plan = "sequential", + ... +) + +acc2Lineage( accessions, assembly_path, lineagelookup_path, ipgout_path = NULL, - plan = "multicore" + plan = "sequential", + ... ) +efetchIPG(accnums, out_path, plan = "sequential", ...) + +IPG2Lineage(accessions, ipg_file, assembly_path, lineagelookup_path, ...) + acc2lin( accessions, assembly_path, @@ -21,8 +43,6 @@ acc2lin( ) } \arguments{ -\item{accessions}{Character vector of protein accessions} - \item{assembly_path}{String of the path to the assembly_summary path This file can be generated using the "DownloadAssemblySummary()" function} @@ -33,14 +53,37 @@ This file can be generated using the "DownloadAssemblySummary()" function} on the ipg database. If NULL, the file will not be written. Defaults to NULL} \item{plan}{} + +\item{accessions}{Character vector of protein accessions} + +\item{accnums}{Character vector containing the accession numbers to query on +the ipg database} + +\item{out_path}{Path to write the efetch results to} + +\item{ipg_file}{Filepath to the file containing results of an efetch run on the +ipg database. The protein accession in 'accessions' should be contained in this +file} } \value{ +No return, but run to close all outstanding \code{sink()}s + +Describe return, in detail + +Describe return, in detail + +Describe return, in detail + Describe return, in detail } \description{ This function combines 'efetch_ipg()' and 'ipg2lin()' to map a set of protein accessions to their assembly (GCA_ID), tax ID, and lineage. +Perform efetch on the ipg database and write the results to out_path + +Takes the resulting file of an efetch run on the ipg database and + Function to map protein accession numbers to lineage This function combines 'efetch_ipg()' and 'ipg2lin()' to map a set @@ -48,8 +91,21 @@ of protein accessions to their assembly (GCA_ID), tax ID, and lineage. } \examples{ \dontrun{ +sink.reset() +} +\dontrun{ +add_lins() +} +\dontrun{ acc2lin() } +\dontrun{ +efetch_ipg() +} +\dontrun{ +ipg2lin() +} + } \author{ Samuel Chen, Janani Ravi diff --git a/man/add_lins.Rd b/man/add_lins.Rd index 226e428d..9ac343ea 100644 --- a/man/add_lins.Rd +++ b/man/add_lins.Rd @@ -1,18 +1,9 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/acc2lin.R, R/lineage.R +% Please edit documentation in R/lineage.R \name{add_lins} \alias{add_lins} -\title{Add Lineages} +\title{add_lins} \usage{ -add_lins( - df, - acc_col = "AccNum", - assembly_path, - lineagelookup_path, - ipgout_path = NULL, - plan = "multicore" -) - add_lins( df, acc_col = "AccNum", @@ -25,16 +16,6 @@ add_lins( \arguments{ \item{plan}{} } -\value{ -Describe return, in detail -} \description{ -Add Lineages - add_lins } -\examples{ -\dontrun{ -add_lins() -} -} diff --git a/man/deprecate.Rd b/man/deprecate.Rd new file mode 100644 index 00000000..b8f0731f --- /dev/null +++ b/man/deprecate.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/deprecate.R +\name{deprecate} +\alias{deprecate} +\alias{sink.reset} +\alias{add_lins} +\alias{acc2lin} +\alias{efetch_ipg} +\alias{ipg2lin} +\title{These functions will be deprecated. Please use other functions instead.} +\usage{ +sink.reset() + +add_lins( + df, + acc_col = "AccNum", + assembly_path, + lineagelookup_path, + ipgout_path = NULL, + plan = "multicore" +) + +acc2lin( + accessions, + assembly_path, + lineagelookup_path, + ipgout_path = NULL, + plan = "multicore" +) + +efetch_ipg(accessions, out_path, plan = "multicore") + +ipg2lin( + accessions, + ipg_file, + refseq_assembly_path, + genbank_assembly_path, + lineagelookup_path +) +} +\description{ +These functions will be deprecated. Please use other functions instead. +} diff --git a/man/efetch_ipg.Rd b/man/efetch_ipg.Rd index ec5b6bcb..efe1e8c5 100644 --- a/man/efetch_ipg.Rd +++ b/man/efetch_ipg.Rd @@ -1,11 +1,9 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/acc2lin.R, R/lineage.R +% Please edit documentation in R/lineage.R \name{efetch_ipg} \alias{efetch_ipg} \title{efetch_ipg} \usage{ -efetch_ipg(accessions, out_path, plan = "multicore") - efetch_ipg(accessions, out_path, plan = "multicore") } \arguments{ @@ -15,22 +13,9 @@ the ipg database} \item{out_path}{Path to write the efetch results to} \item{plan}{} - -\item{accnums}{Character vector containing the accession numbers to query on -the ipg database} -} -\value{ -Describe return, in detail } \description{ Perform efetch on the ipg database and write the results to out_path - -Perform efetch on the ipg database and write the results to out_path -} -\examples{ -\dontrun{ -efetch_ipg() -} } \author{ Samuel Chen, Janani Ravi diff --git a/man/ipg2lin.Rd b/man/ipg2lin.Rd index 3a14eada..6e2b4c6f 100644 --- a/man/ipg2lin.Rd +++ b/man/ipg2lin.Rd @@ -1,17 +1,9 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/acc2lin.R, R/lineage.R +% Please edit documentation in R/lineage.R \name{ipg2lin} \alias{ipg2lin} \title{ipg2lin} \usage{ -ipg2lin( - accessions, - ipg_file, - refseq_assembly_path, - genbank_assembly_path, - lineagelookup_path -) - ipg2lin( accessions, ipg_file, @@ -30,24 +22,10 @@ file} \item{lineagelookup_path}{String of the path to the lineage lookup file (taxid to lineage mapping). This file can be generated using the "create_lineage_lookup()" function} - -\item{assembly_path}{String of the path to the assembly_summary path -This file can be generated using the "DownloadAssemblySummary()" function} -} -\value{ -Describe return, in detail } \description{ -Takes the resulting file of an efetch run on the ipg database and - Takes the resulting file of an efetch run on the ipg database and append lineage, and taxid columns -} -\examples{ -\dontrun{ -ipg2lin() -} - } \author{ Samuel Chen, Janani Ravi diff --git a/man/sink.reset.Rd b/man/sink.reset.Rd deleted file mode 100644 index a31b841d..00000000 --- a/man/sink.reset.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/acc2lin.R -\name{sink.reset} -\alias{sink.reset} -\title{Sink Reset} -\usage{ -sink.reset() -} -\value{ -No return, but run to close all outstanding \code{sink()}s -} -\description{ -Sink Reset -} -\examples{ -\dontrun{ -sink.reset() -} -} From ec96cf1b4192343716fa074f8d1e48d2af5f33e4 Mon Sep 17 00:00:00 2001 From: Awa Synthia Date: Mon, 7 Oct 2024 22:20:13 +0300 Subject: [PATCH 2/5] rename functions Signed-off-by: Awa Synthia --- NAMESPACE | 5 --- R/acc2lin.R | 28 ++++++++--------- R/deprecate.R | 40 ----------------------- R/lineage.R | 24 +++++++------- man/{ipg2lin.Rd => IPG2Lineage.Rd} | 8 ++--- man/acc2Lineage.Rd | 37 ++++++++++++++++++++++ man/acc2lin.Rd | 49 ++++++++++++----------------- man/{add_lins.Rd => addlineage.Rd} | 10 +++--- man/deprecate.Rd | 43 ------------------------- man/{efetch_ipg.Rd => efetchIPG.Rd} | 8 ++--- 10 files changed, 96 insertions(+), 156 deletions(-) delete mode 100644 R/deprecate.R rename man/{ipg2lin.Rd => IPG2Lineage.Rd} (91%) create mode 100644 man/acc2Lineage.Rd rename man/{add_lins.Rd => addlineage.Rd} (76%) delete mode 100644 man/deprecate.Rd rename man/{efetch_ipg.Rd => efetchIPG.Rd} (78%) diff --git a/NAMESPACE b/NAMESPACE index 4dbb858b..a526b959 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,9 +9,7 @@ export(LineagePlot) export(RepresentativeAccNums) export(acc2Lineage) export(acc2fa) -export(acc2lin) export(add_leaves) -export(add_lins) export(add_name) export(add_tax) export(addlineage) @@ -38,7 +36,6 @@ export(create_lineage_lookup) export(create_one_col_params) export(domain_network) export(efetchIPG) -export(efetch_ipg) export(filter_by_doms) export(filter_freq) export(find_paralogs) @@ -53,7 +50,6 @@ export(get_accnums_from_fasta_file) export(get_job_message) export(get_proc_medians) export(get_proc_weights) -export(ipg2lin) export(ipr2viz) export(ipr2viz_web) export(lineage.DA.plot) @@ -82,7 +78,6 @@ export(run_deltablast) export(run_rpsblast) export(send_job_status_email) export(shorten_lineage) -export(sink.reset) export(sinkReset) export(stacked_lin_plot) export(string2accnum) diff --git a/R/acc2lin.R b/R/acc2lin.R index dca24140..71e7ae07 100644 --- a/R/acc2lin.R +++ b/R/acc2lin.R @@ -15,7 +15,7 @@ #' #' @examples #' \dontrun{ -#' sink.reset() +#' sinkReset() #' } sinkReset <- function() { for (i in seq_len(sink.number())) { @@ -43,13 +43,13 @@ sinkReset <- function() { #' #' @examples #' \dontrun{ -#' add_lins() +#' addlineage() #' } addlineage <- function(df, acc_col = "AccNum", assembly_path, lineagelookup_path, ipgout_path = NULL, plan = "sequential", ...) { s_acc_col <- sym(acc_col) accessions <- df %>% pull(acc_col) - lins <- acc2lin(accessions, assembly_path, lineagelookup_path, ipgout_path, plan) + lins <- acc2Lineage(accessions, assembly_path, lineagelookup_path, ipgout_path, plan) # Drop a lot of the unimportant columns for now? will make merging much easier lins <- lins[, c( @@ -66,11 +66,11 @@ addlineage <- function(df, acc_col = "AccNum", assembly_path, } -#' acc2lin +#' acc2Lineage #' #' @author Samuel Chen, Janani Ravi #' -#' @description This function combines 'efetch_ipg()' and 'ipg2lin()' to map a set +#' @description This function combines 'efetchIPG()' and 'IPG2Lineage()' to map a set #' of protein accessions to their assembly (GCA_ID), tax ID, and lineage. #' #' @param accessions Character vector of protein accessions @@ -88,7 +88,7 @@ addlineage <- function(df, acc_col = "AccNum", assembly_path, #' #' @examples #' \dontrun{ -#' acc2lin() +#' acc2Lineage() #' } acc2Lineage <- function(accessions, assembly_path, lineagelookup_path, ipgout_path = NULL, plan = "sequential", ...) { tmp_ipg <- F @@ -96,9 +96,9 @@ acc2Lineage <- function(accessions, assembly_path, lineagelookup_path, ipgout_pa tmp_ipg <- T ipgout_path <- tempfile("ipg", fileext = ".txt") } - efetch_ipg(accessions, out_path = ipgout_path, plan) + efetchIPG(accessions, out_path = ipgout_path, plan) - lins <- ipg2lin(accessions, ipgout_path, assembly_path, lineagelookup_path) + lins <- IPG2Lineage(accessions, ipgout_path, assembly_path, lineagelookup_path) if (tmp_ipg) { unlink(tempdir(), recursive = T) @@ -106,7 +106,7 @@ acc2Lineage <- function(accessions, assembly_path, lineagelookup_path, ipgout_pa return(lins) } -#' efetch_ipg +#' efetchIPG #' #' @author Samuel Chen, Janani Ravi #' @@ -127,12 +127,12 @@ acc2Lineage <- function(accessions, assembly_path, lineagelookup_path, ipgout_pa #' #' @examples #' \dontrun{ -#' efetch_ipg() +#' efetchIPG() #' } efetchIPG <- function(accnums, out_path, plan = "sequential", ...) { if (length(accnums) > 0) { partition <- function(in_data, groups) { - # \\TODO This function should be defined outside of efetch_ipg(). It can be non-exported/internal + # \\TODO This function should be defined outside of efetchIPG(). It can be non-exported/internal # Partition data to limit number of queries per second for rentrez fetch: # limit of 10/second w/ key l <- length(in_data) @@ -172,7 +172,7 @@ efetchIPG <- function(accnums, out_path, plan = "sequential", ...) { } } -#' ipg2lin +#' IPG2Lineage #' #' @author Samuel Chen, Janani Ravi #' @@ -196,7 +196,7 @@ efetchIPG <- function(accnums, out_path, plan = "sequential", ...) { #' #' @examples #' \dontrun{ -#' ipg2lin() +#' IPG2Lineage() #' } #' IPG2Lineage <- function(accessions, ipg_file, assembly_path, lineagelookup_path, ...) { @@ -216,7 +216,7 @@ IPG2Lineage <- function(accessions, ipg_file, assembly_path, lineagelookup_path, -# efetch_ipg <- function(accnums, outpath) +# efetchIPG <- function(accnums, outpath) # { # SIZE = 250 # lower_bound = 1 diff --git a/R/deprecate.R b/R/deprecate.R deleted file mode 100644 index 2de0bbcd..00000000 --- a/R/deprecate.R +++ /dev/null @@ -1,40 +0,0 @@ -#' These functions will be deprecated. Please use other functions instead. -#' -#' @name deprecate -#' -NULL - -#' @rdname deprecate -#' @export -sink.reset <- function() { - warning("'sink.reset' is deprecated. Use 'sinkReset' instead.") - sinkReset() -} - -#' @rdname deprecate -#' @export -add_lins <- function(df, ...) { - warning("'add_lins' is deprecated. Use 'addlineage' instead.") - addlineage(df, ...) -} - -#' @rdname deprecate -#' @export -acc2lin <- function(accessions, ...) { - warning("'acc2lin' is deprecated. Use 'acc2Lineage' instead.") - acc2Lineage(accessions, ...) -} - -#' @rdname deprecate -#' @export -efetch_ipg <- function(accnums, ...) { - warning("'efetch_ipg' is deprecated. Use 'efetchIPG' instead.") - efetchIPG(accnums, ...) -} - -#' @rdname deprecate -#' @export -ipg2lin <- function(accessions, ...) { - warning("'ipg2lin' is deprecated. Use 'IPG32Lineage' instead.") - IPG32Lineage(accessions, ...) -} \ No newline at end of file diff --git a/R/lineage.R b/R/lineage.R index 20acec04..3775b63b 100644 --- a/R/lineage.R +++ b/R/lineage.R @@ -133,7 +133,7 @@ GCA2lin <- function(prot_data, ################################### ## !! @SAM why is this called lins? ################################### -#' add_lins +#' addlineage #' #' @param df #' @param acc_col @@ -149,11 +149,11 @@ GCA2lin <- function(prot_data, #' @export #' #' @examples -add_lins <- function(df, acc_col = "AccNum", assembly_path, +addlineage <- function(df, acc_col = "AccNum", assembly_path, lineagelookup_path, ipgout_path = NULL, plan = "multicore") { acc_sym <- sym(acc_col) accessions <- df %>% pull(acc_sym) - lins <- acc2lin(accessions, assembly_path, + lins <- acc2Lineage(accessions, assembly_path, lineagelookup_path, ipgout_path, plan = plan ) @@ -178,13 +178,13 @@ add_lins <- function(df, acc_col = "AccNum", assembly_path, ####################################### ## Map Protein Accessions to Lineage ## ####################################### -#' acc2lin +#' acc2Lineage #' #' @description #' Function to map protein accession numbers to lineage #' #' @author Samuel Chen, Janani Ravi -#' @description This function combines 'efetch_ipg()' and 'ipg2lin()' to map a set +#' @description This function combines 'efetchIPG()' and 'IPG2Lineage()' to map a set #' of protein accessions to their assembly (GCA_ID), tax ID, and lineage. #' #' @param accessions Character vector of protein accessions @@ -200,7 +200,7 @@ add_lins <- function(df, acc_col = "AccNum", assembly_path, #' @export #' #' @examples -acc2lin <- function(accessions, assembly_path, lineagelookup_path, +acc2Lineage <- function(accessions, assembly_path, lineagelookup_path, ipgout_path = NULL, plan = "multicore") { tmp_ipg <- F @@ -208,9 +208,9 @@ acc2lin <- function(accessions, assembly_path, lineagelookup_path, tmp_ipg <- T ipgout_path <- tempfile("ipg", fileext = ".txt") } - efetch_ipg(accessions, out_path = ipgout_path, plan = plan) + efetchIPG(accessions, out_path = ipgout_path, plan = plan) - lins <- ipg2lin(accessions, ipgout_path, assembly_path, lineagelookup_path) + lins <- IPG2Lineage(accessions, ipgout_path, assembly_path, lineagelookup_path) # if(tmp_ipg) # { @@ -227,7 +227,7 @@ acc2lin <- function(accessions, assembly_path, lineagelookup_path, ######################################### ## Download IPG results for Accessions ## ######################################### -#' efetch_ipg +#' efetchIPG #' #' @author Samuel Chen, Janani Ravi #' @description Perform efetch on the ipg database and write the results to out_path @@ -245,7 +245,7 @@ acc2lin <- function(accessions, assembly_path, lineagelookup_path, #' @export #' #' @examples -efetch_ipg <- function(accessions, out_path, plan = "multicore") { +efetchIPG <- function(accessions, out_path, plan = "multicore") { if (length(accessions) > 0) { partition <- function(v, groups) { # Partition data to limit number of queries per second for rentrez fetch: @@ -295,7 +295,7 @@ efetch_ipg <- function(accessions, out_path, plan = "multicore") { ######################################### ## Maps IPG results to TaxID + Lineage ## ######################################### -#' ipg2lin +#' IPG2Lineage #' #' @author Samuel Chen, Janani Ravi #' @description Takes the resulting file of an efetch run on the ipg database and @@ -317,7 +317,7 @@ efetch_ipg <- function(accessions, out_path, plan = "multicore") { #' @export #' #' @examples -ipg2lin <- function(accessions, ipg_file, +IPG2Lineage <- function(accessions, ipg_file, refseq_assembly_path, genbank_assembly_path, lineagelookup_path) { ipg_dt <- fread(ipg_file, sep = "\t", fill = T) diff --git a/man/ipg2lin.Rd b/man/IPG2Lineage.Rd similarity index 91% rename from man/ipg2lin.Rd rename to man/IPG2Lineage.Rd index 6e2b4c6f..43a920b9 100644 --- a/man/ipg2lin.Rd +++ b/man/IPG2Lineage.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/lineage.R -\name{ipg2lin} -\alias{ipg2lin} -\title{ipg2lin} +\name{IPG2Lineage} +\alias{IPG2Lineage} +\title{IPG2Lineage} \usage{ -ipg2lin( +IPG2Lineage( accessions, ipg_file, refseq_assembly_path, diff --git a/man/acc2Lineage.Rd b/man/acc2Lineage.Rd new file mode 100644 index 00000000..5ab5931a --- /dev/null +++ b/man/acc2Lineage.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lineage.R +\name{acc2Lineage} +\alias{acc2Lineage} +\title{acc2Lineage} +\usage{ +acc2Lineage( + accessions, + assembly_path, + lineagelookup_path, + ipgout_path = NULL, + plan = "multicore" +) +} +\arguments{ +\item{accessions}{Character vector of protein accessions} + +\item{assembly_path}{String of the path to the assembly_summary path +This file can be generated using the "DownloadAssemblySummary()" function} + +\item{lineagelookup_path}{String of the path to the lineage lookup file +(taxid to lineage mapping). This file can be generated using the} + +\item{ipgout_path}{Path to write the results of the efetch run of the accessions +on the ipg database. If NULL, the file will not be written. Defaults to NULL} + +\item{plan}{} +} +\description{ +Function to map protein accession numbers to lineage + +This function combines 'efetchIPG()' and 'IPG2Lineage()' to map a set +of protein accessions to their assembly (GCA_ID), tax ID, and lineage. +} +\author{ +Samuel Chen, Janani Ravi +} diff --git a/man/acc2lin.Rd b/man/acc2lin.Rd index f008be5f..92b2887b 100644 --- a/man/acc2lin.Rd +++ b/man/acc2lin.Rd @@ -1,12 +1,11 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/acc2lin.R, R/lineage.R +% Please edit documentation in R/acc2lin.R \name{sinkReset} \alias{sinkReset} \alias{addlineage} \alias{acc2Lineage} \alias{efetchIPG} \alias{IPG2Lineage} -\alias{acc2lin} \title{Sink Reset} \usage{ sinkReset() @@ -17,8 +16,7 @@ addlineage( assembly_path, lineagelookup_path, ipgout_path = NULL, - plan = "sequential", - ... + plan = "multicore" ) acc2Lineage( @@ -26,20 +24,17 @@ acc2Lineage( assembly_path, lineagelookup_path, ipgout_path = NULL, - plan = "sequential", - ... + plan = "multicore" ) -efetchIPG(accnums, out_path, plan = "sequential", ...) - -IPG2Lineage(accessions, ipg_file, assembly_path, lineagelookup_path, ...) +efetchIPG(accessions, out_path, plan = "multicore") -acc2lin( +IPG2Lineage( accessions, - assembly_path, - lineagelookup_path, - ipgout_path = NULL, - plan = "multicore" + ipg_file, + refseq_assembly_path, + genbank_assembly_path, + lineagelookup_path ) } \arguments{ @@ -47,7 +42,8 @@ acc2lin( This file can be generated using the "DownloadAssemblySummary()" function} \item{lineagelookup_path}{String of the path to the lineage lookup file -(taxid to lineage mapping). This file can be generated using the} +(taxid to lineage mapping). This file can be generated using the +"create_lineage_lookup()" function} \item{ipgout_path}{Path to write the results of the efetch run of the accessions on the ipg database. If NULL, the file will not be written. Defaults to NULL} @@ -56,14 +52,14 @@ on the ipg database. If NULL, the file will not be written. Defaults to NULL} \item{accessions}{Character vector of protein accessions} -\item{accnums}{Character vector containing the accession numbers to query on -the ipg database} - \item{out_path}{Path to write the efetch results to} \item{ipg_file}{Filepath to the file containing results of an efetch run on the ipg database. The protein accession in 'accessions' should be contained in this file} + +\item{accnums}{Character vector containing the accession numbers to query on +the ipg database} } \value{ No return, but run to close all outstanding \code{sink()}s @@ -77,33 +73,28 @@ Describe return, in detail Describe return, in detail } \description{ -This function combines 'efetch_ipg()' and 'ipg2lin()' to map a set +This function combines 'efetchIPG()' and 'IPG2Lineage()' to map a set of protein accessions to their assembly (GCA_ID), tax ID, and lineage. Perform efetch on the ipg database and write the results to out_path Takes the resulting file of an efetch run on the ipg database and - -Function to map protein accession numbers to lineage - -This function combines 'efetch_ipg()' and 'ipg2lin()' to map a set -of protein accessions to their assembly (GCA_ID), tax ID, and lineage. } \examples{ \dontrun{ -sink.reset() +sinkReset() } \dontrun{ -add_lins() +addlineage() } \dontrun{ -acc2lin() +acc2Lineage() } \dontrun{ -efetch_ipg() +efetchIPG() } \dontrun{ -ipg2lin() +IPG2Lineage() } } diff --git a/man/add_lins.Rd b/man/addlineage.Rd similarity index 76% rename from man/add_lins.Rd rename to man/addlineage.Rd index 9ac343ea..7f34dc9f 100644 --- a/man/add_lins.Rd +++ b/man/addlineage.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/lineage.R -\name{add_lins} -\alias{add_lins} -\title{add_lins} +\name{addlineage} +\alias{addlineage} +\title{addlineage} \usage{ -add_lins( +addlineage( df, acc_col = "AccNum", assembly_path, @@ -17,5 +17,5 @@ add_lins( \item{plan}{} } \description{ -add_lins +addlineage } diff --git a/man/deprecate.Rd b/man/deprecate.Rd deleted file mode 100644 index b8f0731f..00000000 --- a/man/deprecate.Rd +++ /dev/null @@ -1,43 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/deprecate.R -\name{deprecate} -\alias{deprecate} -\alias{sink.reset} -\alias{add_lins} -\alias{acc2lin} -\alias{efetch_ipg} -\alias{ipg2lin} -\title{These functions will be deprecated. Please use other functions instead.} -\usage{ -sink.reset() - -add_lins( - df, - acc_col = "AccNum", - assembly_path, - lineagelookup_path, - ipgout_path = NULL, - plan = "multicore" -) - -acc2lin( - accessions, - assembly_path, - lineagelookup_path, - ipgout_path = NULL, - plan = "multicore" -) - -efetch_ipg(accessions, out_path, plan = "multicore") - -ipg2lin( - accessions, - ipg_file, - refseq_assembly_path, - genbank_assembly_path, - lineagelookup_path -) -} -\description{ -These functions will be deprecated. Please use other functions instead. -} diff --git a/man/efetch_ipg.Rd b/man/efetchIPG.Rd similarity index 78% rename from man/efetch_ipg.Rd rename to man/efetchIPG.Rd index efe1e8c5..157ceb75 100644 --- a/man/efetch_ipg.Rd +++ b/man/efetchIPG.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/lineage.R -\name{efetch_ipg} -\alias{efetch_ipg} -\title{efetch_ipg} +\name{efetchIPG} +\alias{efetchIPG} +\title{efetchIPG} \usage{ -efetch_ipg(accessions, out_path, plan = "multicore") +efetchIPG(accessions, out_path, plan = "multicore") } \arguments{ \item{accessions}{Character vector containing the accession numbers to query on From 3e36c7200ad49e8afabfecffd1547e798c4105cc Mon Sep 17 00:00:00 2001 From: David Mayer Date: Tue, 8 Oct 2024 07:50:03 -0600 Subject: [PATCH 3/5] adjust casing - addLineage() --- NAMESPACE | 2 +- R/acc2lin.R | 6 +++--- R/lineage.R | 4 ++-- man/acc2lin.Rd | 6 +++--- man/addlineage.Rd | 10 +++++----- 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index a526b959..726d1423 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,10 +9,10 @@ export(LineagePlot) export(RepresentativeAccNums) export(acc2Lineage) export(acc2fa) +export(addLineage) export(add_leaves) export(add_name) export(add_tax) -export(addlineage) export(advanced_opts2est_walltime) export(alignFasta) export(assert_count_df) diff --git a/R/acc2lin.R b/R/acc2lin.R index 71e7ae07..2b1e7078 100644 --- a/R/acc2lin.R +++ b/R/acc2lin.R @@ -24,7 +24,7 @@ sinkReset <- function() { } -#' Add Lineages +#' addLineage #' #' @param df #' @param acc_col @@ -43,9 +43,9 @@ sinkReset <- function() { #' #' @examples #' \dontrun{ -#' addlineage() +#' addLineage() #' } -addlineage <- function(df, acc_col = "AccNum", assembly_path, +addLineage <- function(df, acc_col = "AccNum", assembly_path, lineagelookup_path, ipgout_path = NULL, plan = "sequential", ...) { s_acc_col <- sym(acc_col) accessions <- df %>% pull(acc_col) diff --git a/R/lineage.R b/R/lineage.R index 3775b63b..f136c719 100644 --- a/R/lineage.R +++ b/R/lineage.R @@ -133,7 +133,7 @@ GCA2lin <- function(prot_data, ################################### ## !! @SAM why is this called lins? ################################### -#' addlineage +#' addLineage #' #' @param df #' @param acc_col @@ -149,7 +149,7 @@ GCA2lin <- function(prot_data, #' @export #' #' @examples -addlineage <- function(df, acc_col = "AccNum", assembly_path, +addLineage <- function(df, acc_col = "AccNum", assembly_path, lineagelookup_path, ipgout_path = NULL, plan = "multicore") { acc_sym <- sym(acc_col) accessions <- df %>% pull(acc_sym) diff --git a/man/acc2lin.Rd b/man/acc2lin.Rd index 92b2887b..88663260 100644 --- a/man/acc2lin.Rd +++ b/man/acc2lin.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/acc2lin.R \name{sinkReset} \alias{sinkReset} -\alias{addlineage} +\alias{addLineage} \alias{acc2Lineage} \alias{efetchIPG} \alias{IPG2Lineage} @@ -10,7 +10,7 @@ \usage{ sinkReset() -addlineage( +addLineage( df, acc_col = "AccNum", assembly_path, @@ -85,7 +85,7 @@ Takes the resulting file of an efetch run on the ipg database and sinkReset() } \dontrun{ -addlineage() +addLineage() } \dontrun{ acc2Lineage() diff --git a/man/addlineage.Rd b/man/addlineage.Rd index 7f34dc9f..f13259fa 100644 --- a/man/addlineage.Rd +++ b/man/addlineage.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/lineage.R -\name{addlineage} -\alias{addlineage} -\title{addlineage} +\name{addLineage} +\alias{addLineage} +\title{addLineage} \usage{ -addlineage( +addLineage( df, acc_col = "AccNum", assembly_path, @@ -17,5 +17,5 @@ addlineage( \item{plan}{} } \description{ -addlineage +addLineage } From 6ce981d2922889987212dad321e6fd89210af5f6 Mon Sep 17 00:00:00 2001 From: David Mayer Date: Tue, 8 Oct 2024 08:06:11 -0600 Subject: [PATCH 4/5] adjust namespace based on upstream --- NAMESPACE | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index fa961be9..da443880 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,11 +7,11 @@ export(GenContextNetwork) export(IPG2Lineage) export(LineagePlot) export(RepresentativeAccNums) -export(acc2Lineage) export(acc2FA) +export(acc2Lineage) export(acc2fa) -export(addLineage) export(addLeaves2Alignment) +export(addLineage) export(addName) export(add_leaves) export(add_name) @@ -73,6 +73,7 @@ export(map_advanced_opts2procs) export(msa_pdf) export(plot_estimated_walltimes) export(prot2tax) +export(prot2tax_old) export(removeAsterisks) export(removeEmptyRows) export(removeTails) From 2d47952d4e53b04bf925632ea9c222dd3a83b347 Mon Sep 17 00:00:00 2001 From: David Mayer Date: Tue, 8 Oct 2024 08:13:37 -0600 Subject: [PATCH 5/5] remove old rd tag for acc2lin --- R/acc2lin.R | 5 --- man/IPG2Lineage.Rd | 24 ++++++++++- man/acc2Lineage.Rd | 21 ++++++++- man/acc2lin.Rd | 103 --------------------------------------------- man/addlineage.Rd | 21 ++++++++- man/efetchIPG.Rd | 17 +++++++- man/sinkReset.Rd | 19 +++++++++ 7 files changed, 98 insertions(+), 112 deletions(-) delete mode 100644 man/acc2lin.Rd create mode 100644 man/sinkReset.Rd diff --git a/R/acc2lin.R b/R/acc2lin.R index 2b1e7078..73aca0f4 100644 --- a/R/acc2lin.R +++ b/R/acc2lin.R @@ -10,7 +10,6 @@ #' Sink Reset #' #' @return No return, but run to close all outstanding `sink()`s -#' @rdname acc2lin #' @export #' #' @examples @@ -38,7 +37,6 @@ sinkReset <- function() { #' @importFrom rlang sym #' #' @return Describe return, in detail -#' @rdname acc2lin #' @export #' #' @examples @@ -83,7 +81,6 @@ addLineage <- function(df, acc_col = "AccNum", assembly_path, #' @param plan #' #' @return Describe return, in detail -#' @rdname acc2lin #' @export #' #' @examples @@ -122,7 +119,6 @@ acc2Lineage <- function(accessions, assembly_path, lineagelookup_path, ipgout_pa #' @importFrom rentrez entrez_fetch #' #' @return Describe return, in detail -#' @rdname acc2lin #' @export #' #' @examples @@ -191,7 +187,6 @@ efetchIPG <- function(accnums, out_path, plan = "sequential", ...) { #' @importFrom data.table fread #' #' @return Describe return, in detail -#' @rdname acc2lin #' @export #' #' @examples diff --git a/man/IPG2Lineage.Rd b/man/IPG2Lineage.Rd index 43a920b9..cf3e635e 100644 --- a/man/IPG2Lineage.Rd +++ b/man/IPG2Lineage.Rd @@ -1,9 +1,17 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/lineage.R +% Please edit documentation in R/acc2lin.R, R/lineage.R \name{IPG2Lineage} \alias{IPG2Lineage} \title{IPG2Lineage} \usage{ +IPG2Lineage( + accessions, + ipg_file, + refseq_assembly_path, + genbank_assembly_path, + lineagelookup_path +) + IPG2Lineage( accessions, ipg_file, @@ -22,10 +30,24 @@ file} \item{lineagelookup_path}{String of the path to the lineage lookup file (taxid to lineage mapping). This file can be generated using the "create_lineage_lookup()" function} + +\item{assembly_path}{String of the path to the assembly_summary path +This file can be generated using the "DownloadAssemblySummary()" function} +} +\value{ +Describe return, in detail } \description{ +Takes the resulting file of an efetch run on the ipg database and + Takes the resulting file of an efetch run on the ipg database and append lineage, and taxid columns +} +\examples{ +\dontrun{ +IPG2Lineage() +} + } \author{ Samuel Chen, Janani Ravi diff --git a/man/acc2Lineage.Rd b/man/acc2Lineage.Rd index 5ab5931a..d632c52e 100644 --- a/man/acc2Lineage.Rd +++ b/man/acc2Lineage.Rd @@ -1,9 +1,17 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/lineage.R +% Please edit documentation in R/acc2lin.R, R/lineage.R \name{acc2Lineage} \alias{acc2Lineage} \title{acc2Lineage} \usage{ +acc2Lineage( + accessions, + assembly_path, + lineagelookup_path, + ipgout_path = NULL, + plan = "multicore" +) + acc2Lineage( accessions, assembly_path, @@ -26,12 +34,23 @@ on the ipg database. If NULL, the file will not be written. Defaults to NULL} \item{plan}{} } +\value{ +Describe return, in detail +} \description{ +This function combines 'efetchIPG()' and 'IPG2Lineage()' to map a set +of protein accessions to their assembly (GCA_ID), tax ID, and lineage. + Function to map protein accession numbers to lineage This function combines 'efetchIPG()' and 'IPG2Lineage()' to map a set of protein accessions to their assembly (GCA_ID), tax ID, and lineage. } +\examples{ +\dontrun{ +acc2Lineage() +} +} \author{ Samuel Chen, Janani Ravi } diff --git a/man/acc2lin.Rd b/man/acc2lin.Rd deleted file mode 100644 index 88663260..00000000 --- a/man/acc2lin.Rd +++ /dev/null @@ -1,103 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/acc2lin.R -\name{sinkReset} -\alias{sinkReset} -\alias{addLineage} -\alias{acc2Lineage} -\alias{efetchIPG} -\alias{IPG2Lineage} -\title{Sink Reset} -\usage{ -sinkReset() - -addLineage( - df, - acc_col = "AccNum", - assembly_path, - lineagelookup_path, - ipgout_path = NULL, - plan = "multicore" -) - -acc2Lineage( - accessions, - assembly_path, - lineagelookup_path, - ipgout_path = NULL, - plan = "multicore" -) - -efetchIPG(accessions, out_path, plan = "multicore") - -IPG2Lineage( - accessions, - ipg_file, - refseq_assembly_path, - genbank_assembly_path, - lineagelookup_path -) -} -\arguments{ -\item{assembly_path}{String of the path to the assembly_summary path -This file can be generated using the "DownloadAssemblySummary()" function} - -\item{lineagelookup_path}{String of the path to the lineage lookup file -(taxid to lineage mapping). This file can be generated using the -"create_lineage_lookup()" function} - -\item{ipgout_path}{Path to write the results of the efetch run of the accessions -on the ipg database. If NULL, the file will not be written. Defaults to NULL} - -\item{plan}{} - -\item{accessions}{Character vector of protein accessions} - -\item{out_path}{Path to write the efetch results to} - -\item{ipg_file}{Filepath to the file containing results of an efetch run on the -ipg database. The protein accession in 'accessions' should be contained in this -file} - -\item{accnums}{Character vector containing the accession numbers to query on -the ipg database} -} -\value{ -No return, but run to close all outstanding \code{sink()}s - -Describe return, in detail - -Describe return, in detail - -Describe return, in detail - -Describe return, in detail -} -\description{ -This function combines 'efetchIPG()' and 'IPG2Lineage()' to map a set -of protein accessions to their assembly (GCA_ID), tax ID, and lineage. - -Perform efetch on the ipg database and write the results to out_path - -Takes the resulting file of an efetch run on the ipg database and -} -\examples{ -\dontrun{ -sinkReset() -} -\dontrun{ -addLineage() -} -\dontrun{ -acc2Lineage() -} -\dontrun{ -efetchIPG() -} -\dontrun{ -IPG2Lineage() -} - -} -\author{ -Samuel Chen, Janani Ravi -} diff --git a/man/addlineage.Rd b/man/addlineage.Rd index f13259fa..6694e94c 100644 --- a/man/addlineage.Rd +++ b/man/addlineage.Rd @@ -1,9 +1,18 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/lineage.R +% Please edit documentation in R/acc2lin.R, R/lineage.R \name{addLineage} \alias{addLineage} \title{addLineage} \usage{ +addLineage( + df, + acc_col = "AccNum", + assembly_path, + lineagelookup_path, + ipgout_path = NULL, + plan = "multicore" +) + addLineage( df, acc_col = "AccNum", @@ -16,6 +25,16 @@ addLineage( \arguments{ \item{plan}{} } +\value{ +Describe return, in detail +} \description{ addLineage + +addLineage +} +\examples{ +\dontrun{ +addLineage() +} } diff --git a/man/efetchIPG.Rd b/man/efetchIPG.Rd index 157ceb75..6a5d85a4 100644 --- a/man/efetchIPG.Rd +++ b/man/efetchIPG.Rd @@ -1,9 +1,11 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/lineage.R +% Please edit documentation in R/acc2lin.R, R/lineage.R \name{efetchIPG} \alias{efetchIPG} \title{efetchIPG} \usage{ +efetchIPG(accessions, out_path, plan = "multicore") + efetchIPG(accessions, out_path, plan = "multicore") } \arguments{ @@ -13,9 +15,22 @@ the ipg database} \item{out_path}{Path to write the efetch results to} \item{plan}{} + +\item{accnums}{Character vector containing the accession numbers to query on +the ipg database} +} +\value{ +Describe return, in detail } \description{ Perform efetch on the ipg database and write the results to out_path + +Perform efetch on the ipg database and write the results to out_path +} +\examples{ +\dontrun{ +efetchIPG() +} } \author{ Samuel Chen, Janani Ravi diff --git a/man/sinkReset.Rd b/man/sinkReset.Rd new file mode 100644 index 00000000..0285c0b2 --- /dev/null +++ b/man/sinkReset.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/acc2lin.R +\name{sinkReset} +\alias{sinkReset} +\title{Sink Reset} +\usage{ +sinkReset() +} +\value{ +No return, but run to close all outstanding \code{sink()}s +} +\description{ +Sink Reset +} +\examples{ +\dontrun{ +sinkReset() +} +}