Skip to content

Commit

Permalink
Merge pull request #65 from awasyn/depacc2lin
Browse files Browse the repository at this point in the history
Rename functions in acc2lin
- fixes #35
  • Loading branch information
the-mayer authored Oct 8, 2024
2 parents 4a4efa6 + 2d47952 commit 3d8ec75
Show file tree
Hide file tree
Showing 8 changed files with 71 additions and 70 deletions.
11 changes: 6 additions & 5 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,16 @@ export(BinaryDomainNetwork)
export(DownloadAssemblySummary)
export(GCA2lin)
export(GenContextNetwork)
export(IPG2Lineage)
export(LineagePlot)
export(RepresentativeAccNums)
export(acc2FA)
export(acc2Lineage)
export(acc2fa)
export(acc2lin)
export(addLeaves2Alignment)
export(addLineage)
export(addName)
export(add_leaves)
export(add_lins)
export(add_name)
export(add_tax)
export(advanced_opts2est_walltime)
Expand Down Expand Up @@ -40,7 +41,7 @@ export(create_all_col_params)
export(create_lineage_lookup)
export(create_one_col_params)
export(domain_network)
export(efetch_ipg)
export(efetchIPG)
export(extractAccNum)
export(filter_by_doms)
export(filter_freq)
Expand All @@ -57,7 +58,6 @@ export(get_accnums_from_fasta_file)
export(get_job_message)
export(get_proc_medians)
export(get_proc_weights)
export(ipg2lin)
export(ipr2viz)
export(ipr2viz_web)
export(lineage.DA.plot)
Expand All @@ -73,6 +73,7 @@ export(map_advanced_opts2procs)
export(msa_pdf)
export(plot_estimated_walltimes)
export(prot2tax)
export(prot2tax_old)
export(removeAsterisks)
export(removeEmptyRows)
export(removeTails)
Expand All @@ -86,7 +87,7 @@ export(run_rpsblast)
export(selectLongestDuplicate)
export(send_job_status_email)
export(shorten_lineage)
export(sink.reset)
export(sinkReset)
export(stacked_lin_plot)
export(summ.DA)
export(summ.DA.byLin)
Expand Down
42 changes: 21 additions & 21 deletions R/acc2lin.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,16 @@
#'
#' @examples
#' \dontrun{
#' sink.reset()
#' sinkReset()
#' }
sink.reset <- function() {
sinkReset <- function() {
for (i in seq_len(sink.number())) {
sink(NULL)
}
}


#' Add Lineages
#' addLineage
#'
#' @param df
#' @param acc_col
Expand All @@ -41,13 +41,13 @@ sink.reset <- function() {
#'
#' @examples
#' \dontrun{
#' add_lins()
#' addLineage()
#' }
add_lins <- function(df, acc_col = "AccNum", assembly_path,
lineagelookup_path, ipgout_path = NULL, plan = "sequential") {
addLineage <- function(df, acc_col = "AccNum", assembly_path,
lineagelookup_path, ipgout_path = NULL, plan = "sequential", ...) {
s_acc_col <- sym(acc_col)
accessions <- df %>% pull(acc_col)
lins <- acc2lin(accessions, assembly_path, lineagelookup_path, ipgout_path, plan)
lins <- acc2Lineage(accessions, assembly_path, lineagelookup_path, ipgout_path, plan)

# Drop a lot of the unimportant columns for now? will make merging much easier
lins <- lins[, c(
Expand All @@ -64,11 +64,11 @@ add_lins <- function(df, acc_col = "AccNum", assembly_path,
}


#' acc2lin
#' acc2Lineage
#'
#' @author Samuel Chen, Janani Ravi
#'
#' @description This function combines 'efetch_ipg()' and 'ipg2lin()' to map a set
#' @description This function combines 'efetchIPG()' and 'IPG2Lineage()' to map a set
#' of protein accessions to their assembly (GCA_ID), tax ID, and lineage.
#'
#' @param accessions Character vector of protein accessions
Expand All @@ -85,25 +85,25 @@ add_lins <- function(df, acc_col = "AccNum", assembly_path,
#'
#' @examples
#' \dontrun{
#' acc2lin()
#' acc2Lineage()
#' }
acc2lin <- function(accessions, assembly_path, lineagelookup_path, ipgout_path = NULL, plan = "sequential") {
acc2Lineage <- function(accessions, assembly_path, lineagelookup_path, ipgout_path = NULL, plan = "sequential", ...) {
tmp_ipg <- F
if (is.null(ipgout_path)) {
tmp_ipg <- T
ipgout_path <- tempfile("ipg", fileext = ".txt")
}
efetch_ipg(accessions, out_path = ipgout_path, plan)
efetchIPG(accessions, out_path = ipgout_path, plan)

lins <- ipg2lin(accessions, ipgout_path, assembly_path, lineagelookup_path)
lins <- IPG2Lineage(accessions, ipgout_path, assembly_path, lineagelookup_path)

if (tmp_ipg) {
unlink(tempdir(), recursive = T)
}
return(lins)
}

#' efetch_ipg
#' efetchIPG
#'
#' @author Samuel Chen, Janani Ravi
#'
Expand All @@ -123,12 +123,12 @@ acc2lin <- function(accessions, assembly_path, lineagelookup_path, ipgout_path =
#'
#' @examples
#' \dontrun{
#' efetch_ipg()
#' efetchIPG()
#' }
efetch_ipg <- function(accnums, out_path, plan = "sequential") {
efetchIPG <- function(accnums, out_path, plan = "sequential", ...) {
if (length(accnums) > 0) {
partition <- function(in_data, groups) {
# \\TODO This function should be defined outside of efetch_ipg(). It can be non-exported/internal
# \\TODO This function should be defined outside of efetchIPG(). It can be non-exported/internal
# Partition data to limit number of queries per second for rentrez fetch:
# limit of 10/second w/ key
l <- length(in_data)
Expand Down Expand Up @@ -168,7 +168,7 @@ efetch_ipg <- function(accnums, out_path, plan = "sequential") {
}
}

#' ipg2lin
#' IPG2Lineage
#'
#' @author Samuel Chen, Janani Ravi
#'
Expand All @@ -191,10 +191,10 @@ efetch_ipg <- function(accnums, out_path, plan = "sequential") {
#'
#' @examples
#' \dontrun{
#' ipg2lin()
#' IPG2Lineage()
#' }
#'
ipg2lin <- function(accessions, ipg_file, assembly_path, lineagelookup_path) {
IPG2Lineage <- function(accessions, ipg_file, assembly_path, lineagelookup_path, ...) {
ipg_dt <- fread(ipg_file, sep = "\t", fill = T)

ipg_dt <- ipg_dt[Protein %in% accessions]
Expand All @@ -211,7 +211,7 @@ ipg2lin <- function(accessions, ipg_file, assembly_path, lineagelookup_path) {



# efetch_ipg <- function(accnums, outpath)
# efetchIPG <- function(accnums, outpath)
# {
# SIZE = 250
# lower_bound = 1
Expand Down
24 changes: 12 additions & 12 deletions R/lineage.R
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ GCA2lin <- function(prot_data,
###################################
## !! @SAM why is this called lins?
###################################
#' add_lins
#' addLineage
#'
#' @param df
#' @param acc_col
Expand All @@ -149,11 +149,11 @@ GCA2lin <- function(prot_data,
#' @export
#'
#' @examples
add_lins <- function(df, acc_col = "AccNum", assembly_path,
addLineage <- function(df, acc_col = "AccNum", assembly_path,
lineagelookup_path, ipgout_path = NULL, plan = "multicore") {
acc_sym <- sym(acc_col)
accessions <- df %>% pull(acc_sym)
lins <- acc2lin(accessions, assembly_path,
lins <- acc2Lineage(accessions, assembly_path,
lineagelookup_path, ipgout_path,
plan = plan
)
Expand All @@ -178,13 +178,13 @@ add_lins <- function(df, acc_col = "AccNum", assembly_path,
#######################################
## Map Protein Accessions to Lineage ##
#######################################
#' acc2lin
#' acc2Lineage
#'
#' @description
#' Function to map protein accession numbers to lineage
#'
#' @author Samuel Chen, Janani Ravi
#' @description This function combines 'efetch_ipg()' and 'ipg2lin()' to map a set
#' @description This function combines 'efetchIPG()' and 'IPG2Lineage()' to map a set
#' of protein accessions to their assembly (GCA_ID), tax ID, and lineage.
#'
#' @param accessions Character vector of protein accessions
Expand All @@ -200,17 +200,17 @@ add_lins <- function(df, acc_col = "AccNum", assembly_path,
#' @export
#'
#' @examples
acc2lin <- function(accessions, assembly_path, lineagelookup_path,
acc2Lineage <- function(accessions, assembly_path, lineagelookup_path,
ipgout_path = NULL, plan = "multicore") {
tmp_ipg <- F

if (is.null(ipgout_path)) {
tmp_ipg <- T
ipgout_path <- tempfile("ipg", fileext = ".txt")
}
efetch_ipg(accessions, out_path = ipgout_path, plan = plan)
efetchIPG(accessions, out_path = ipgout_path, plan = plan)

lins <- ipg2lin(accessions, ipgout_path, assembly_path, lineagelookup_path)
lins <- IPG2Lineage(accessions, ipgout_path, assembly_path, lineagelookup_path)

# if(tmp_ipg)
# {
Expand All @@ -227,7 +227,7 @@ acc2lin <- function(accessions, assembly_path, lineagelookup_path,
#########################################
## Download IPG results for Accessions ##
#########################################
#' efetch_ipg
#' efetchIPG
#'
#' @author Samuel Chen, Janani Ravi
#' @description Perform efetch on the ipg database and write the results to out_path
Expand All @@ -245,7 +245,7 @@ acc2lin <- function(accessions, assembly_path, lineagelookup_path,
#' @export
#'
#' @examples
efetch_ipg <- function(accessions, out_path, plan = "multicore") {
efetchIPG <- function(accessions, out_path, plan = "multicore") {
if (length(accessions) > 0) {
partition <- function(v, groups) {
# Partition data to limit number of queries per second for rentrez fetch:
Expand Down Expand Up @@ -295,7 +295,7 @@ efetch_ipg <- function(accessions, out_path, plan = "multicore") {
#########################################
## Maps IPG results to TaxID + Lineage ##
#########################################
#' ipg2lin
#' IPG2Lineage
#'
#' @author Samuel Chen, Janani Ravi
#' @description Takes the resulting file of an efetch run on the ipg database and
Expand All @@ -317,7 +317,7 @@ efetch_ipg <- function(accessions, out_path, plan = "multicore") {
#' @export
#'
#' @examples
ipg2lin <- function(accessions, ipg_file,
IPG2Lineage <- function(accessions, ipg_file,
refseq_assembly_path, genbank_assembly_path,
lineagelookup_path) {
ipg_dt <- fread(ipg_file, sep = "\t", fill = T)
Expand Down
12 changes: 6 additions & 6 deletions man/ipg2lin.Rd → man/IPG2Lineage.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 8 additions & 8 deletions man/acc2lin.Rd → man/acc2Lineage.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 8 additions & 8 deletions man/add_lins.Rd → man/addlineage.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 3d8ec75

Please sign in to comment.