diff --git a/NAMESPACE b/NAMESPACE index af1d0ba4..da443880 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,15 +4,16 @@ export(BinaryDomainNetwork) export(DownloadAssemblySummary) export(GCA2lin) export(GenContextNetwork) +export(IPG2Lineage) export(LineagePlot) export(RepresentativeAccNums) export(acc2FA) +export(acc2Lineage) export(acc2fa) -export(acc2lin) export(addLeaves2Alignment) +export(addLineage) export(addName) export(add_leaves) -export(add_lins) export(add_name) export(add_tax) export(advanced_opts2est_walltime) @@ -40,7 +41,7 @@ export(create_all_col_params) export(create_lineage_lookup) export(create_one_col_params) export(domain_network) -export(efetch_ipg) +export(efetchIPG) export(extractAccNum) export(filter_by_doms) export(filter_freq) @@ -57,7 +58,6 @@ export(get_accnums_from_fasta_file) export(get_job_message) export(get_proc_medians) export(get_proc_weights) -export(ipg2lin) export(ipr2viz) export(ipr2viz_web) export(lineage.DA.plot) @@ -73,6 +73,7 @@ export(map_advanced_opts2procs) export(msa_pdf) export(plot_estimated_walltimes) export(prot2tax) +export(prot2tax_old) export(removeAsterisks) export(removeEmptyRows) export(removeTails) @@ -86,7 +87,7 @@ export(run_rpsblast) export(selectLongestDuplicate) export(send_job_status_email) export(shorten_lineage) -export(sink.reset) +export(sinkReset) export(stacked_lin_plot) export(summ.DA) export(summ.DA.byLin) diff --git a/R/acc2lin.R b/R/acc2lin.R index f8d71949..73aca0f4 100644 --- a/R/acc2lin.R +++ b/R/acc2lin.R @@ -14,16 +14,16 @@ #' #' @examples #' \dontrun{ -#' sink.reset() +#' sinkReset() #' } -sink.reset <- function() { +sinkReset <- function() { for (i in seq_len(sink.number())) { sink(NULL) } } -#' Add Lineages +#' addLineage #' #' @param df #' @param acc_col @@ -41,13 +41,13 @@ sink.reset <- function() { #' #' @examples #' \dontrun{ -#' add_lins() +#' addLineage() #' } -add_lins <- function(df, acc_col = "AccNum", assembly_path, - lineagelookup_path, ipgout_path = NULL, plan = "sequential") { +addLineage <- function(df, acc_col = "AccNum", assembly_path, + lineagelookup_path, ipgout_path = NULL, plan = "sequential", ...) { s_acc_col <- sym(acc_col) accessions <- df %>% pull(acc_col) - lins <- acc2lin(accessions, assembly_path, lineagelookup_path, ipgout_path, plan) + lins <- acc2Lineage(accessions, assembly_path, lineagelookup_path, ipgout_path, plan) # Drop a lot of the unimportant columns for now? will make merging much easier lins <- lins[, c( @@ -64,11 +64,11 @@ add_lins <- function(df, acc_col = "AccNum", assembly_path, } -#' acc2lin +#' acc2Lineage #' #' @author Samuel Chen, Janani Ravi #' -#' @description This function combines 'efetch_ipg()' and 'ipg2lin()' to map a set +#' @description This function combines 'efetchIPG()' and 'IPG2Lineage()' to map a set #' of protein accessions to their assembly (GCA_ID), tax ID, and lineage. #' #' @param accessions Character vector of protein accessions @@ -85,17 +85,17 @@ add_lins <- function(df, acc_col = "AccNum", assembly_path, #' #' @examples #' \dontrun{ -#' acc2lin() +#' acc2Lineage() #' } -acc2lin <- function(accessions, assembly_path, lineagelookup_path, ipgout_path = NULL, plan = "sequential") { +acc2Lineage <- function(accessions, assembly_path, lineagelookup_path, ipgout_path = NULL, plan = "sequential", ...) { tmp_ipg <- F if (is.null(ipgout_path)) { tmp_ipg <- T ipgout_path <- tempfile("ipg", fileext = ".txt") } - efetch_ipg(accessions, out_path = ipgout_path, plan) + efetchIPG(accessions, out_path = ipgout_path, plan) - lins <- ipg2lin(accessions, ipgout_path, assembly_path, lineagelookup_path) + lins <- IPG2Lineage(accessions, ipgout_path, assembly_path, lineagelookup_path) if (tmp_ipg) { unlink(tempdir(), recursive = T) @@ -103,7 +103,7 @@ acc2lin <- function(accessions, assembly_path, lineagelookup_path, ipgout_path = return(lins) } -#' efetch_ipg +#' efetchIPG #' #' @author Samuel Chen, Janani Ravi #' @@ -123,12 +123,12 @@ acc2lin <- function(accessions, assembly_path, lineagelookup_path, ipgout_path = #' #' @examples #' \dontrun{ -#' efetch_ipg() +#' efetchIPG() #' } -efetch_ipg <- function(accnums, out_path, plan = "sequential") { +efetchIPG <- function(accnums, out_path, plan = "sequential", ...) { if (length(accnums) > 0) { partition <- function(in_data, groups) { - # \\TODO This function should be defined outside of efetch_ipg(). It can be non-exported/internal + # \\TODO This function should be defined outside of efetchIPG(). It can be non-exported/internal # Partition data to limit number of queries per second for rentrez fetch: # limit of 10/second w/ key l <- length(in_data) @@ -168,7 +168,7 @@ efetch_ipg <- function(accnums, out_path, plan = "sequential") { } } -#' ipg2lin +#' IPG2Lineage #' #' @author Samuel Chen, Janani Ravi #' @@ -191,10 +191,10 @@ efetch_ipg <- function(accnums, out_path, plan = "sequential") { #' #' @examples #' \dontrun{ -#' ipg2lin() +#' IPG2Lineage() #' } #' -ipg2lin <- function(accessions, ipg_file, assembly_path, lineagelookup_path) { +IPG2Lineage <- function(accessions, ipg_file, assembly_path, lineagelookup_path, ...) { ipg_dt <- fread(ipg_file, sep = "\t", fill = T) ipg_dt <- ipg_dt[Protein %in% accessions] @@ -211,7 +211,7 @@ ipg2lin <- function(accessions, ipg_file, assembly_path, lineagelookup_path) { -# efetch_ipg <- function(accnums, outpath) +# efetchIPG <- function(accnums, outpath) # { # SIZE = 250 # lower_bound = 1 diff --git a/R/lineage.R b/R/lineage.R index 20acec04..f136c719 100644 --- a/R/lineage.R +++ b/R/lineage.R @@ -133,7 +133,7 @@ GCA2lin <- function(prot_data, ################################### ## !! @SAM why is this called lins? ################################### -#' add_lins +#' addLineage #' #' @param df #' @param acc_col @@ -149,11 +149,11 @@ GCA2lin <- function(prot_data, #' @export #' #' @examples -add_lins <- function(df, acc_col = "AccNum", assembly_path, +addLineage <- function(df, acc_col = "AccNum", assembly_path, lineagelookup_path, ipgout_path = NULL, plan = "multicore") { acc_sym <- sym(acc_col) accessions <- df %>% pull(acc_sym) - lins <- acc2lin(accessions, assembly_path, + lins <- acc2Lineage(accessions, assembly_path, lineagelookup_path, ipgout_path, plan = plan ) @@ -178,13 +178,13 @@ add_lins <- function(df, acc_col = "AccNum", assembly_path, ####################################### ## Map Protein Accessions to Lineage ## ####################################### -#' acc2lin +#' acc2Lineage #' #' @description #' Function to map protein accession numbers to lineage #' #' @author Samuel Chen, Janani Ravi -#' @description This function combines 'efetch_ipg()' and 'ipg2lin()' to map a set +#' @description This function combines 'efetchIPG()' and 'IPG2Lineage()' to map a set #' of protein accessions to their assembly (GCA_ID), tax ID, and lineage. #' #' @param accessions Character vector of protein accessions @@ -200,7 +200,7 @@ add_lins <- function(df, acc_col = "AccNum", assembly_path, #' @export #' #' @examples -acc2lin <- function(accessions, assembly_path, lineagelookup_path, +acc2Lineage <- function(accessions, assembly_path, lineagelookup_path, ipgout_path = NULL, plan = "multicore") { tmp_ipg <- F @@ -208,9 +208,9 @@ acc2lin <- function(accessions, assembly_path, lineagelookup_path, tmp_ipg <- T ipgout_path <- tempfile("ipg", fileext = ".txt") } - efetch_ipg(accessions, out_path = ipgout_path, plan = plan) + efetchIPG(accessions, out_path = ipgout_path, plan = plan) - lins <- ipg2lin(accessions, ipgout_path, assembly_path, lineagelookup_path) + lins <- IPG2Lineage(accessions, ipgout_path, assembly_path, lineagelookup_path) # if(tmp_ipg) # { @@ -227,7 +227,7 @@ acc2lin <- function(accessions, assembly_path, lineagelookup_path, ######################################### ## Download IPG results for Accessions ## ######################################### -#' efetch_ipg +#' efetchIPG #' #' @author Samuel Chen, Janani Ravi #' @description Perform efetch on the ipg database and write the results to out_path @@ -245,7 +245,7 @@ acc2lin <- function(accessions, assembly_path, lineagelookup_path, #' @export #' #' @examples -efetch_ipg <- function(accessions, out_path, plan = "multicore") { +efetchIPG <- function(accessions, out_path, plan = "multicore") { if (length(accessions) > 0) { partition <- function(v, groups) { # Partition data to limit number of queries per second for rentrez fetch: @@ -295,7 +295,7 @@ efetch_ipg <- function(accessions, out_path, plan = "multicore") { ######################################### ## Maps IPG results to TaxID + Lineage ## ######################################### -#' ipg2lin +#' IPG2Lineage #' #' @author Samuel Chen, Janani Ravi #' @description Takes the resulting file of an efetch run on the ipg database and @@ -317,7 +317,7 @@ efetch_ipg <- function(accessions, out_path, plan = "multicore") { #' @export #' #' @examples -ipg2lin <- function(accessions, ipg_file, +IPG2Lineage <- function(accessions, ipg_file, refseq_assembly_path, genbank_assembly_path, lineagelookup_path) { ipg_dt <- fread(ipg_file, sep = "\t", fill = T) diff --git a/man/ipg2lin.Rd b/man/IPG2Lineage.Rd similarity index 92% rename from man/ipg2lin.Rd rename to man/IPG2Lineage.Rd index 3a14eada..cf3e635e 100644 --- a/man/ipg2lin.Rd +++ b/man/IPG2Lineage.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/acc2lin.R, R/lineage.R -\name{ipg2lin} -\alias{ipg2lin} -\title{ipg2lin} +\name{IPG2Lineage} +\alias{IPG2Lineage} +\title{IPG2Lineage} \usage{ -ipg2lin( +IPG2Lineage( accessions, ipg_file, refseq_assembly_path, @@ -12,7 +12,7 @@ ipg2lin( lineagelookup_path ) -ipg2lin( +IPG2Lineage( accessions, ipg_file, refseq_assembly_path, @@ -45,7 +45,7 @@ append lineage, and taxid columns } \examples{ \dontrun{ -ipg2lin() +IPG2Lineage() } } diff --git a/man/acc2lin.Rd b/man/acc2Lineage.Rd similarity index 82% rename from man/acc2lin.Rd rename to man/acc2Lineage.Rd index 6255b290..d632c52e 100644 --- a/man/acc2lin.Rd +++ b/man/acc2Lineage.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/acc2lin.R, R/lineage.R -\name{acc2lin} -\alias{acc2lin} -\title{acc2lin} +\name{acc2Lineage} +\alias{acc2Lineage} +\title{acc2Lineage} \usage{ -acc2lin( +acc2Lineage( accessions, assembly_path, lineagelookup_path, @@ -12,7 +12,7 @@ acc2lin( plan = "multicore" ) -acc2lin( +acc2Lineage( accessions, assembly_path, lineagelookup_path, @@ -38,17 +38,17 @@ on the ipg database. If NULL, the file will not be written. Defaults to NULL} Describe return, in detail } \description{ -This function combines 'efetch_ipg()' and 'ipg2lin()' to map a set +This function combines 'efetchIPG()' and 'IPG2Lineage()' to map a set of protein accessions to their assembly (GCA_ID), tax ID, and lineage. Function to map protein accession numbers to lineage -This function combines 'efetch_ipg()' and 'ipg2lin()' to map a set +This function combines 'efetchIPG()' and 'IPG2Lineage()' to map a set of protein accessions to their assembly (GCA_ID), tax ID, and lineage. } \examples{ \dontrun{ -acc2lin() +acc2Lineage() } } \author{ diff --git a/man/add_lins.Rd b/man/addlineage.Rd similarity index 79% rename from man/add_lins.Rd rename to man/addlineage.Rd index 226e428d..6694e94c 100644 --- a/man/add_lins.Rd +++ b/man/addlineage.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/acc2lin.R, R/lineage.R -\name{add_lins} -\alias{add_lins} -\title{Add Lineages} +\name{addLineage} +\alias{addLineage} +\title{addLineage} \usage{ -add_lins( +addLineage( df, acc_col = "AccNum", assembly_path, @@ -13,7 +13,7 @@ add_lins( plan = "multicore" ) -add_lins( +addLineage( df, acc_col = "AccNum", assembly_path, @@ -29,12 +29,12 @@ add_lins( Describe return, in detail } \description{ -Add Lineages +addLineage -add_lins +addLineage } \examples{ \dontrun{ -add_lins() +addLineage() } } diff --git a/man/efetch_ipg.Rd b/man/efetchIPG.Rd similarity index 78% rename from man/efetch_ipg.Rd rename to man/efetchIPG.Rd index ec5b6bcb..6a5d85a4 100644 --- a/man/efetch_ipg.Rd +++ b/man/efetchIPG.Rd @@ -1,12 +1,12 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/acc2lin.R, R/lineage.R -\name{efetch_ipg} -\alias{efetch_ipg} -\title{efetch_ipg} +\name{efetchIPG} +\alias{efetchIPG} +\title{efetchIPG} \usage{ -efetch_ipg(accessions, out_path, plan = "multicore") +efetchIPG(accessions, out_path, plan = "multicore") -efetch_ipg(accessions, out_path, plan = "multicore") +efetchIPG(accessions, out_path, plan = "multicore") } \arguments{ \item{accessions}{Character vector containing the accession numbers to query on @@ -29,7 +29,7 @@ Perform efetch on the ipg database and write the results to out_path } \examples{ \dontrun{ -efetch_ipg() +efetchIPG() } } \author{ diff --git a/man/sink.reset.Rd b/man/sinkReset.Rd similarity index 79% rename from man/sink.reset.Rd rename to man/sinkReset.Rd index a31b841d..0285c0b2 100644 --- a/man/sink.reset.Rd +++ b/man/sinkReset.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/acc2lin.R -\name{sink.reset} -\alias{sink.reset} +\name{sinkReset} +\alias{sinkReset} \title{Sink Reset} \usage{ -sink.reset() +sinkReset() } \value{ No return, but run to close all outstanding \code{sink()}s @@ -14,6 +14,6 @@ Sink Reset } \examples{ \dontrun{ -sink.reset() +sinkReset() } }