From e004cd4091ae3cb111873076365619ce0ba42430 Mon Sep 17 00:00:00 2001 From: teddyCodex Date: Sun, 6 Oct 2024 20:10:30 +0100 Subject: [PATCH 1/3] refactor function names in R/cleanup.R --- R/cleanup.R | 142 ++++++++++++++++++++++++++-------------------------- 1 file changed, 71 insertions(+), 71 deletions(-) diff --git a/R/cleanup.R b/R/cleanup.R index 3a708415..f82722f2 100755 --- a/R/cleanup.R +++ b/R/cleanup.R @@ -31,10 +31,10 @@ #' @return [string] string with only alphanumerics, "_", "+", and "." #' @examples #' \dontrun{ -#' clean_string() +#' cleanString() #' } #' -clean_string <- function(string) { +cleanString <- function(string) { # replace spaces with "_" string <- stringr::str_replace_all(string, "\\s+", "_") # keep only alphanumeric characters, "_", and "." @@ -44,7 +44,7 @@ clean_string <- function(string) { # use the same code as upstream_scripts/00_submit_full.R's # get_sequences() function to extract accession numbers -#' string2accnum +#' extractAccNum #' #' @param string #' @@ -53,9 +53,9 @@ clean_string <- function(string) { #' #' @examples #' \dontrun{ -#' string2accnum() +#' extractAccNum() #' } -string2accnum <- function(string) { +extractAccNum <- function(string) { if (grepl("\\|", string)) { accnum <- strsplit(string, "\\|")[[1]][2] accnum <- strsplit(accnum, " ")[[1]][1] @@ -81,9 +81,9 @@ string2accnum <- function(string) { #' @examples #' \dontrun{ #' c("xxx", "xxx", "xxx", "yyy", "yyy") |> -#' make_accnums_unique() +#' ensureUniqAccNum() #' } -make_accnums_unique <- function(accnums) { +ensureUniqAccNum <- function(accnums) { # group by accnums then use the row count as a proxy # for the index of occurence for each accession number df_accnums <- tibble::tibble("accnum" = accnums) @@ -113,14 +113,14 @@ make_accnums_unique <- function(accnums) { #' @examples #' \dontrun{ #' AAStringSet(c("xxx" = "ATCG", "xxx" = "GGGC")) |> -#' cleanup_fasta_header() +#' cleanFAHeaders() #' } -cleanup_fasta_header <- function(fasta) { +cleanFAHeaders <- function(fasta) { headers <- names(fasta) # try parsing accession numbers from header headers <- purrr::map_chr( headers, - string2accnum + extractAccNum ) # sanitize string for pathing (file read/write-ing) headers <- purrr::map_chr( @@ -128,7 +128,7 @@ cleanup_fasta_header <- function(fasta) { fs::path_sanitize ) # append an index suffix for the ith occurence of each accnum - headers <- make_accnums_unique(headers) + headers <- ensureUniqAccNum(headers) names(fasta) <- headers return(fasta) } @@ -153,9 +153,9 @@ cleanup_fasta_header <- function(fasta) { #' #' @examples #' \dontrun{ -#' remove_empty(prot, "DomArch") +#' removeEmptyRows(prot, "DomArch") #' } -remove_empty <- function(prot, by_column = "DomArch") { +removeEmptyRows <- function(prot, by_column = "DomArch") { # ?? Don't call other psp functions within these functions prot <- prot %>% as_tibble() %>% @@ -168,7 +168,7 @@ remove_empty <- function(prot, by_column = "DomArch") { } ########################### -#' repeat2s +#' condenseRepeatedDomains #' #' @description #' Condense repeated domains @@ -181,7 +181,7 @@ remove_empty <- function(prot, by_column = "DomArch") { #' #' @param prot A data frame containing 'DomArch', 'GenContext', 'ClustName' columns. #' @param by_column Column in which repeats are condensed to domain+domain -> domain(s). -#' @param excluded_prots Vector of strings that repeat2s should not reduce to (s). Defaults to c() +#' @param excluded_prots Vector of strings that condenseRepeatedDomains should not reduce to (s). Defaults to c() #' #' @return Describe return, in detail #' @export @@ -191,10 +191,10 @@ remove_empty <- function(prot, by_column = "DomArch") { #' #' @examples #' \dontrun{ -#' repeat2s(prot, "DomArch") +#' condenseRepeatedDomains(prot, "DomArch") #' } -repeat2s <- function(prot, by_column = "DomArch", excluded_prots = c()) { - # If there are strings that repeat2s should not affect, the pattern to search +condenseRepeatedDomains <- function(prot, by_column = "DomArch", excluded_prots = c()) { + # If there are strings that condenseRepeatedDomains should not affect, the pattern to search # for must be changed to exclude a search for those desired strings collapsed_prots <- paste0(excluded_prots, collapse = "\\s|") @@ -253,10 +253,10 @@ repeat2s <- function(prot, by_column = "DomArch", excluded_prots = c()) { #' #' @examples #' \dontrun{ -#' replaceQMs() +#' replaceQuestionMarks() #' } #' -replaceQMs <- function(prot, by_column = "GenContext") { +replaceQuestionMarks <- function(prot, by_column = "GenContext") { by <- sym(by_column) # Regex for finding repeated `?` @@ -290,9 +290,9 @@ replaceQMs <- function(prot, by_column = "GenContext") { #' #' @examples #' \dontrun{ -#' remove_astrk() +#' removeAsterisks() #' } -remove_astrk <- function(query_data, colname = "GenContext") { +removeAsterisks <- function(query_data, colname = "GenContext") { query_data[, colname] <- map(query_data[, colname], function(x) str_remove_all(x, pattern = "\\*")) return(query_data) @@ -323,9 +323,9 @@ remove_astrk <- function(query_data, colname = "GenContext") { #' #' @examples #' \dontrun{ -#' remove_tails(prot, "DomArch") +#' removeTails(prot, "DomArch") #' } -remove_tails <- function(prot, by_column = "DomArch", +removeTails <- function(prot, by_column = "DomArch", keep_domains = FALSE) { # !! currently redundant by_column <- sym(by_column) @@ -369,7 +369,7 @@ remove_tails <- function(prot, by_column = "DomArch", #' A cleaned up version of the data table is returned. #' #' @param prot A data frame that contains columns 'Species'. -#' @param remove_empty Boolean. If TRUE, rows with empty/unnecessary values in 'Species' are removed. +#' @param removeEmptyRows Boolean. If TRUE, rows with empty/unnecessary values in 'Species' are removed. #' Default is false. #' #' @importFrom stringr coll str_replace_all @@ -379,9 +379,9 @@ remove_tails <- function(prot, by_column = "DomArch", #' #' @examples #' \dontrun{ -#' cleanup_species(prot, TRUE) +#' cleanSpecies(prot, TRUE) #' } -cleanup_species <- function(prot, remove_empty = FALSE) { +cleanSpecies <- function(prot, removeEmptyRows = FALSE) { # FUNCTIONS CALLED HERE, if else might be better since only two options, T and F # Create cleaned up Species column @@ -404,8 +404,8 @@ cleanup_species <- function(prot, remove_empty = FALSE) { str_replace_all(coll(" ", TRUE), " ") # !! CHECK !! Species vs Species_old - if (remove_empty) { - prot <- remove_empty(prot = prot, by_column = "Species") + if (removeEmptyRows) { + prot <- removeEmptyRows(prot = prot, by_column = "Species") } return(prot) @@ -425,9 +425,9 @@ cleanup_species <- function(prot, remove_empty = FALSE) { #' @param prot A data frame that must contain columns Query and ClustName. #' @param domains_rename A data frame containing the domain names to be replaced in a column 'old' and the corresponding replacement values in a column 'new'. #' @param domains_keep A data frame containing the domain names to be retained. -#' @param repeat2s Boolean. If TRUE, repeated domains in 'ClustName' are condensed. Default is TRUE. -#' @param remove_tails Boolean. If TRUE, 'ClustName' will be filtered based on domains to keep/remove. Default is FALSE. -#' @param remove_empty Boolean. If TRUE, rows with empty/unnecessary values in 'ClustName' are removed. Default is FALSE. +#' @param condenseRepeatedDomains Boolean. If TRUE, repeated domains in 'ClustName' are condensed. Default is TRUE. +#' @param removeTails Boolean. If TRUE, 'ClustName' will be filtered based on domains to keep/remove. Default is FALSE. +#' @param removeEmptyRows Boolean. If TRUE, rows with empty/unnecessary values in 'ClustName' are removed. Default is FALSE. #' #' @importFrom dplyr filter #' @importFrom stringr coll str_replace_all @@ -437,12 +437,12 @@ cleanup_species <- function(prot, remove_empty = FALSE) { #' #' @examples #' \dontrun{ -#' cleanup_clust(prot, TRUE, FALSE, domains_keep, domains_rename) +#' cleanClusters(prot, TRUE, FALSE, domains_keep, domains_rename) #' } -cleanup_clust <- function(prot, +cleanClusters <- function(prot, domains_rename, domains_keep, - repeat2s = TRUE, remove_tails = FALSE, - remove_empty = FALSE) { + condenseRepeatedDomains = TRUE, removeTails = FALSE, + removeEmptyRows = FALSE) { # Create cleaned up ClustName column prot$ClustName <- prot$ClustName.orig @@ -469,19 +469,19 @@ cleanup_clust <- function(prot, ## Optional parameters # Condense repeats - if (repeat2s) { - prot <- repeat2s(prot, by_column = "ClustName") + if (condenseRepeatedDomains) { + prot <- condenseRepeatedDomains(prot, by_column = "ClustName") } # Remove singletons - # if(remove_tails){ + # if(removeTails){ # prot <- prot %>% filter(!grepl(".1$", ClustID)) # } - if (remove_tails) { - prot <- remove_tails(prot, by_column = "ClustName") + if (removeTails) { + prot <- removeTails(prot, by_column = "ClustName") } # Remove empty rows - if (remove_empty) { - prot <- remove_empty(prot = prot, by_column = "ClustName") + if (removeEmptyRows) { + prot <- removeEmptyRows(prot = prot, by_column = "ClustName") } @@ -509,9 +509,9 @@ cleanup_clust <- function(prot, #' @param domains_keep A data frame containing the domain names to be retained. #' @param domains_rename A data frame containing the domain names to be replaced in a column 'old' and the #' corresponding replacement values in a column 'new'. -#' @param repeat2s Boolean. If TRUE, repeated domains in 'DomArch' are condensed. Default is TRUE. -#' @param remove_tails Boolean. If TRUE, 'ClustName' will be filtered based on domains to keep/remove. Default is FALSE. -#' @param remove_empty Boolean. If TRUE, rows with empty/unnecessary values in 'DomArch' are removed. Default is FALSE. +#' @param condenseRepeatedDomains Boolean. If TRUE, repeated domains in 'DomArch' are condensed. Default is TRUE. +#' @param removeTails Boolean. If TRUE, 'ClustName' will be filtered based on domains to keep/remove. Default is FALSE. +#' @param removeEmptyRows Boolean. If TRUE, rows with empty/unnecessary values in 'DomArch' are removed. Default is FALSE. #' @param domains_ignore A data frame containing the domain names to be removed in a column called 'domains' #' #' @importFrom dplyr pull @@ -522,12 +522,12 @@ cleanup_clust <- function(prot, #' #' @examples #' \dontrun{ -#' cleanup_domarch(prot, TRUE, FALSE, domains_keep, domains_rename, domains_ignore = NULL) +#' cleanDomainArchitecture(prot, TRUE, FALSE, domains_keep, domains_rename, domains_ignore = NULL) #' } -cleanup_domarch <- function(prot, old = "DomArch.orig", new = "DomArch", +cleanDomainArchitecture <- function(prot, old = "DomArch.orig", new = "DomArch", domains_keep, domains_rename, - repeat2s = TRUE, remove_tails = FALSE, - remove_empty = F, + condenseRepeatedDomains = TRUE, removeTails = FALSE, + removeEmptyRows = F, domains_ignore = NULL) { old_sym <- sym(old) new_sym <- sym(new) @@ -577,22 +577,22 @@ cleanup_domarch <- function(prot, old = "DomArch.orig", new = "DomArch", ## Optional parameters # Remove singletons - if (remove_tails) { - prot <- remove_tails(prot = prot, by_column = new) + if (removeTails) { + prot <- removeTails(prot = prot, by_column = new) } # Condense repeats - if (repeat2s) { + if (condenseRepeatedDomains) { ## Error in UseMethod("tbl_vars") : no applicable method for 'tbl_vars' applied to an object of class "character" - prot <- repeat2s(prot = prot, by_column = new) + prot <- condenseRepeatedDomains(prot = prot, by_column = new) } # Remove empty rows # ! FUNCTIONS CALLED HERE, if else might be better since only two options, T and F # ! Make a separate function of out of this? - if (remove_empty) { - prot <- remove_empty(prot = prot, by_column = new) + if (removeEmptyRows) { + prot <- removeEmptyRows(prot = prot, by_column = new) } - prot <- replaceQMs(prot, new) + prot <- replaceQuestionMarks(prot, new) return(prot) } @@ -610,7 +610,7 @@ cleanup_domarch <- function(prot, old = "DomArch.orig", new = "DomArch", #' @param prot A data frame that contains columns 'GenContext.orig' #' @param domains_rename A data frame containing the domain names to be replaced in a column 'old' and the replacement in a column 'new'. #' Defaults to an empty data frame with a new and old column such that non of the domains will be renamed -#' @param repeat2s Boolean. If TRUE, repeated domains in 'GenContext' are condensed. Default is TRUE. +#' @param condenseRepeatedDomains Boolean. If TRUE, repeated domains in 'GenContext' are condensed. Default is TRUE. #' @param remove_asterisk Boolean. If TRUE, asterisks in 'ClustName' are removed. Default is TRUE. #' #' @importFrom stringr str_replace_all @@ -620,11 +620,11 @@ cleanup_domarch <- function(prot, old = "DomArch.orig", new = "DomArch", #' #' @examples #' \dontrun{ -#' cleanup_gencontext(prot, domains_rename, T, F) +#' cleanGenomicContext(prot, domains_rename, T, F) #' } #' -cleanup_gencontext <- function(prot, domains_rename = data.frame("old" = character(0), "new" = character(0), stringsAsFactors = F), - repeat2s = TRUE, remove_asterisk = TRUE) { +cleanGenomicContext <- function(prot, domains_rename = data.frame("old" = character(0), "new" = character(0), stringsAsFactors = F), + condenseRepeatedDomains = TRUE, remove_asterisk = TRUE) { # Create cleaned up GenContext column prot$GenContext <- prot$GenContext.orig @@ -641,16 +641,16 @@ cleanup_gencontext <- function(prot, domains_rename = data.frame("old" = charact ## Reverse operons | Straighten them out! prot <- reverse_operon(prot) - prot <- replaceQMs(prot, "GenContext") + prot <- replaceQuestionMarks(prot, "GenContext") ## Optional parameters # Condense repeats - if (repeat2s) { - prot <- repeat2s(prot, "GenContext") + if (condenseRepeatedDomains) { + prot <- condenseRepeatedDomains(prot, "GenContext") } # Remove the Asterisks if (remove_asterisk) { - prot <- remove_astrk(prot, colname = "GenContext") + prot <- removeAsterisks(prot, colname = "GenContext") } return(prot) @@ -666,9 +666,9 @@ cleanup_gencontext <- function(prot, domains_rename = data.frame("old" = charact #' #' @examples #' \dontrun{ -#' cleanup_GeneDesc() +#' cleanGeneDescription() #' } -cleanup_GeneDesc <- function(prot, column) { +cleanGeneDescription <- function(prot, column) { prot[, "GeneDesc"] <- gsub("\\.$", "", prot %>% pull(column)) prot[, "GeneDesc"] <- gsub("%2C", ",", prot %>% pull(column)) return(prot) @@ -688,9 +688,9 @@ cleanup_GeneDesc <- function(prot, column) { #' #' @examples #' \dontrun{ -#' pick_longer_duplicate() +#' selectLongestDuplicate() #' } -pick_longer_duplicate <- function(prot, column) { +selectLongestDuplicate <- function(prot, column) { col <- sym(column) prot$row.orig <- 1:nrow(prot) @@ -736,9 +736,9 @@ pick_longer_duplicate <- function(prot, column) { #' #' @examples #' \dontrun{ -#' cleanup_lineage() +#' cleanLineage() #' } -cleanup_lineage <- function(prot, lins_rename) { +cleanLineage <- function(prot, lins_rename) { for (i in 1:nrow(lins_rename)) { prot$Lineage <- gsub(lins_rename$old[i], lins_rename$new[i], x = prot$Lineage, From 2c1ce1a213d7b36395acbd76d11cb4bf6b8a89f7 Mon Sep 17 00:00:00 2001 From: teddyCodex Date: Mon, 7 Oct 2024 09:33:02 +0100 Subject: [PATCH 2/3] update NAMESPACE using roxygen2 --- NAMESPACE | 26 +++++++++---------- man/{cleanup_clust.Rd => cleanClusters.Rd} | 20 +++++++------- ..._domarch.Rd => cleanDomainArchitecture.Rd} | 20 +++++++------- ...anup_fasta_header.Rd => cleanFAHeaders.Rd} | 8 +++--- ...up_GeneDesc.Rd => cleanGeneDescription.Rd} | 8 +++--- ...p_gencontext.Rd => cleanGenomicContext.Rd} | 12 ++++----- man/{cleanup_lineage.Rd => cleanLineage.Rd} | 8 +++--- man/{cleanup_species.Rd => cleanSpecies.Rd} | 10 +++---- man/{clean_string.Rd => cleanString.Rd} | 8 +++--- ...repeat2s.Rd => condenseRepeatedDomains.Rd} | 12 ++++----- ..._accnums_unique.Rd => ensureUniqAccNum.Rd} | 8 +++--- man/{string2accnum.Rd => extractAccNum.Rd} | 12 ++++----- man/{remove_astrk.Rd => removeAsterisks.Rd} | 8 +++--- man/{remove_empty.Rd => removeEmptyRows.Rd} | 8 +++--- man/{remove_tails.Rd => removeTails.Rd} | 8 +++--- ...{replaceQMs.Rd => replaceQuestionMarks.Rd} | 8 +++--- ...duplicate.Rd => selectLongestDuplicate.Rd} | 8 +++--- 17 files changed, 96 insertions(+), 96 deletions(-) rename man/{cleanup_clust.Rd => cleanClusters.Rd} (59%) rename man/{cleanup_domarch.Rd => cleanDomainArchitecture.Rd} (66%) rename man/{cleanup_fasta_header.Rd => cleanFAHeaders.Rd} (78%) rename man/{cleanup_GeneDesc.Rd => cleanGeneDescription.Rd} (70%) rename man/{cleanup_gencontext.Rd => cleanGenomicContext.Rd} (78%) rename man/{cleanup_lineage.Rd => cleanLineage.Rd} (71%) rename man/{cleanup_species.Rd => cleanSpecies.Rd} (70%) rename man/{clean_string.Rd => cleanString.Rd} (84%) rename man/{repeat2s.Rd => condenseRepeatedDomains.Rd} (67%) rename man/{make_accnums_unique.Rd => ensureUniqAccNum.Rd} (80%) rename man/{string2accnum.Rd => extractAccNum.Rd} (63%) rename man/{remove_astrk.Rd => removeAsterisks.Rd} (72%) rename man/{remove_empty.Rd => removeEmptyRows.Rd} (84%) rename man/{remove_tails.Rd => removeTails.Rd} (83%) rename man/{replaceQMs.Rd => replaceQuestionMarks.Rd} (73%) rename man/{pick_longer_duplicate.Rd => selectLongestDuplicate.Rd} (67%) diff --git a/NAMESPACE b/NAMESPACE index 16cf0813..9724f0dd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -16,15 +16,16 @@ export(advanced_opts2est_walltime) export(alignFasta) export(assert_count_df) export(assign_job_queue) -export(cleanup_GeneDesc) -export(cleanup_clust) -export(cleanup_domarch) -export(cleanup_gencontext) -export(cleanup_lineage) -export(cleanup_species) +export(cleanClusters) +export(cleanDomainArchitecture) +export(cleanGeneDescription) +export(cleanGenomicContext) +export(cleanLineage) +export(cleanSpecies) export(combine_files) export(combine_full) export(combine_ipr) +export(condenseRepeatedDomains) export(convert_aln2fa) export(convert_fa2tre) export(count_bycol) @@ -35,6 +36,7 @@ export(create_lineage_lookup) export(create_one_col_params) export(domain_network) export(efetch_ipg) +export(extractAccNum) export(filter_by_doms) export(filter_freq) export(find_paralogs) @@ -62,25 +64,23 @@ export(make_opts2procs) export(map_acc2name) export(map_advanced_opts2procs) export(msa_pdf) -export(pick_longer_duplicate) export(plot_estimated_walltimes) export(prot2tax) export(prot2tax_old) -export(remove_astrk) -export(remove_empty) -export(remove_tails) +export(removeAsterisks) +export(removeEmptyRows) +export(removeTails) export(rename_fasta) -export(repeat2s) -export(replaceQMs) +export(replaceQuestionMarks) export(reveql) export(reverse_operon) export(run_deltablast) export(run_rpsblast) +export(selectLongestDuplicate) export(send_job_status_email) export(shorten_lineage) export(sink.reset) export(stacked_lin_plot) -export(string2accnum) export(summ.DA) export(summ.DA.byLin) export(summ.GC) diff --git a/man/cleanup_clust.Rd b/man/cleanClusters.Rd similarity index 59% rename from man/cleanup_clust.Rd rename to man/cleanClusters.Rd index 4eed8be8..7ef4f3b9 100644 --- a/man/cleanup_clust.Rd +++ b/man/cleanClusters.Rd @@ -1,16 +1,16 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cleanup.R -\name{cleanup_clust} -\alias{cleanup_clust} +\name{cleanClusters} +\alias{cleanClusters} \title{Cleanup Clust} \usage{ -cleanup_clust( +cleanClusters( prot, domains_rename, domains_keep, - repeat2s = TRUE, - remove_tails = FALSE, - remove_empty = FALSE + condenseRepeatedDomains = TRUE, + removeTails = FALSE, + removeEmptyRows = FALSE ) } \arguments{ @@ -20,11 +20,11 @@ cleanup_clust( \item{domains_keep}{A data frame containing the domain names to be retained.} -\item{repeat2s}{Boolean. If TRUE, repeated domains in 'ClustName' are condensed. Default is TRUE.} +\item{condenseRepeatedDomains}{Boolean. If TRUE, repeated domains in 'ClustName' are condensed. Default is TRUE.} -\item{remove_tails}{Boolean. If TRUE, 'ClustName' will be filtered based on domains to keep/remove. Default is FALSE.} +\item{removeTails}{Boolean. If TRUE, 'ClustName' will be filtered based on domains to keep/remove. Default is FALSE.} -\item{remove_empty}{Boolean. If TRUE, rows with empty/unnecessary values in 'ClustName' are removed. Default is FALSE.} +\item{removeEmptyRows}{Boolean. If TRUE, rows with empty/unnecessary values in 'ClustName' are removed. Default is FALSE.} } \value{ Cleaned up data frame @@ -39,6 +39,6 @@ The return value is the cleaned up data frame. } \examples{ \dontrun{ -cleanup_clust(prot, TRUE, FALSE, domains_keep, domains_rename) +cleanClusters(prot, TRUE, FALSE, domains_keep, domains_rename) } } diff --git a/man/cleanup_domarch.Rd b/man/cleanDomainArchitecture.Rd similarity index 66% rename from man/cleanup_domarch.Rd rename to man/cleanDomainArchitecture.Rd index 21955509..887b5388 100644 --- a/man/cleanup_domarch.Rd +++ b/man/cleanDomainArchitecture.Rd @@ -1,18 +1,18 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cleanup.R -\name{cleanup_domarch} -\alias{cleanup_domarch} +\name{cleanDomainArchitecture} +\alias{cleanDomainArchitecture} \title{Cleanup DomArch} \usage{ -cleanup_domarch( +cleanDomainArchitecture( prot, old = "DomArch.orig", new = "DomArch", domains_keep, domains_rename, - repeat2s = TRUE, - remove_tails = FALSE, - remove_empty = F, + condenseRepeatedDomains = TRUE, + removeTails = FALSE, + removeEmptyRows = F, domains_ignore = NULL ) } @@ -24,11 +24,11 @@ cleanup_domarch( \item{domains_rename}{A data frame containing the domain names to be replaced in a column 'old' and the corresponding replacement values in a column 'new'.} -\item{repeat2s}{Boolean. If TRUE, repeated domains in 'DomArch' are condensed. Default is TRUE.} +\item{condenseRepeatedDomains}{Boolean. If TRUE, repeated domains in 'DomArch' are condensed. Default is TRUE.} -\item{remove_tails}{Boolean. If TRUE, 'ClustName' will be filtered based on domains to keep/remove. Default is FALSE.} +\item{removeTails}{Boolean. If TRUE, 'ClustName' will be filtered based on domains to keep/remove. Default is FALSE.} -\item{remove_empty}{Boolean. If TRUE, rows with empty/unnecessary values in 'DomArch' are removed. Default is FALSE.} +\item{removeEmptyRows}{Boolean. If TRUE, rows with empty/unnecessary values in 'DomArch' are removed. Default is FALSE.} \item{domains_ignore}{A data frame containing the domain names to be removed in a column called 'domains'} } @@ -46,6 +46,6 @@ The original data frame is returned with the clean DomArchs column and the old d } \examples{ \dontrun{ -cleanup_domarch(prot, TRUE, FALSE, domains_keep, domains_rename, domains_ignore = NULL) +cleanDomainArchitecture(prot, TRUE, FALSE, domains_keep, domains_rename, domains_ignore = NULL) } } diff --git a/man/cleanup_fasta_header.Rd b/man/cleanFAHeaders.Rd similarity index 78% rename from man/cleanup_fasta_header.Rd rename to man/cleanFAHeaders.Rd index 416f6be2..e9ad9b30 100644 --- a/man/cleanup_fasta_header.Rd +++ b/man/cleanFAHeaders.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cleanup.R -\name{cleanup_fasta_header} -\alias{cleanup_fasta_header} +\name{cleanFAHeaders} +\alias{cleanFAHeaders} \title{Cleanup FASTA Header} \usage{ -cleanup_fasta_header(fasta) +cleanFAHeaders(fasta) } \arguments{ \item{fasta}{} @@ -19,6 +19,6 @@ suffix of the ith occurence to handle duplicates \examples{ \dontrun{ AAStringSet(c("xxx" = "ATCG", "xxx" = "GGGC")) |> - cleanup_fasta_header() + cleanFAHeaders() } } diff --git a/man/cleanup_GeneDesc.Rd b/man/cleanGeneDescription.Rd similarity index 70% rename from man/cleanup_GeneDesc.Rd rename to man/cleanGeneDescription.Rd index 3068fe49..f98a25d4 100644 --- a/man/cleanup_GeneDesc.Rd +++ b/man/cleanGeneDescription.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cleanup.R -\name{cleanup_GeneDesc} -\alias{cleanup_GeneDesc} +\name{cleanGeneDescription} +\alias{cleanGeneDescription} \title{Cleanup GeneDesc} \usage{ -cleanup_GeneDesc(prot, column) +cleanGeneDescription(prot, column) } \arguments{ \item{column}{} @@ -17,6 +17,6 @@ Cleanup GeneDesc } \examples{ \dontrun{ -cleanup_GeneDesc() +cleanGeneDescription() } } diff --git a/man/cleanup_gencontext.Rd b/man/cleanGenomicContext.Rd similarity index 78% rename from man/cleanup_gencontext.Rd rename to man/cleanGenomicContext.Rd index 8e26a447..2c2dcc18 100644 --- a/man/cleanup_gencontext.Rd +++ b/man/cleanGenomicContext.Rd @@ -1,14 +1,14 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cleanup.R -\name{cleanup_gencontext} -\alias{cleanup_gencontext} +\name{cleanGenomicContext} +\alias{cleanGenomicContext} \title{Cleanup Genomic Contexts} \usage{ -cleanup_gencontext( +cleanGenomicContext( prot, domains_rename = data.frame(old = character(0), new = character(0), stringsAsFactors = F), - repeat2s = TRUE, + condenseRepeatedDomains = TRUE, remove_asterisk = TRUE ) } @@ -18,7 +18,7 @@ cleanup_gencontext( \item{domains_rename}{A data frame containing the domain names to be replaced in a column 'old' and the replacement in a column 'new'. Defaults to an empty data frame with a new and old column such that non of the domains will be renamed} -\item{repeat2s}{Boolean. If TRUE, repeated domains in 'GenContext' are condensed. Default is TRUE.} +\item{condenseRepeatedDomains}{Boolean. If TRUE, repeated domains in 'GenContext' are condensed. Default is TRUE.} \item{remove_asterisk}{Boolean. If TRUE, asterisks in 'ClustName' are removed. Default is TRUE.} } @@ -33,7 +33,7 @@ A cleaned up version of the data table is returned. } \examples{ \dontrun{ -cleanup_gencontext(prot, domains_rename, T, F) +cleanGenomicContext(prot, domains_rename, T, F) } } diff --git a/man/cleanup_lineage.Rd b/man/cleanLineage.Rd similarity index 71% rename from man/cleanup_lineage.Rd rename to man/cleanLineage.Rd index 35669f4e..adcea312 100644 --- a/man/cleanup_lineage.Rd +++ b/man/cleanLineage.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cleanup.R -\name{cleanup_lineage} -\alias{cleanup_lineage} +\name{cleanLineage} +\alias{cleanLineage} \title{Cleanup Lineage} \usage{ -cleanup_lineage(prot, lins_rename) +cleanLineage(prot, lins_rename) } \arguments{ \item{lins_rename}{} @@ -17,6 +17,6 @@ Cleanup Lineage } \examples{ \dontrun{ -cleanup_lineage() +cleanLineage() } } diff --git a/man/cleanup_species.Rd b/man/cleanSpecies.Rd similarity index 70% rename from man/cleanup_species.Rd rename to man/cleanSpecies.Rd index beedb23c..82b5444c 100644 --- a/man/cleanup_species.Rd +++ b/man/cleanSpecies.Rd @@ -1,15 +1,15 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cleanup.R -\name{cleanup_species} -\alias{cleanup_species} +\name{cleanSpecies} +\alias{cleanSpecies} \title{Cleanup Species} \usage{ -cleanup_species(prot, remove_empty = FALSE) +cleanSpecies(prot, removeEmptyRows = FALSE) } \arguments{ \item{prot}{A data frame that contains columns 'Species'.} -\item{remove_empty}{Boolean. If TRUE, rows with empty/unnecessary values in 'Species' are removed. +\item{removeEmptyRows}{Boolean. If TRUE, rows with empty/unnecessary values in 'Species' are removed. Default is false.} } \value{ @@ -25,6 +25,6 @@ A cleaned up version of the data table is returned. } \examples{ \dontrun{ -cleanup_species(prot, TRUE) +cleanSpecies(prot, TRUE) } } diff --git a/man/clean_string.Rd b/man/cleanString.Rd similarity index 84% rename from man/clean_string.Rd rename to man/cleanString.Rd index a17a95bb..0dc2937e 100644 --- a/man/clean_string.Rd +++ b/man/cleanString.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cleanup.R -\name{clean_string} -\alias{clean_string} +\name{cleanString} +\alias{cleanString} \title{Clean String} \usage{ -clean_string(string) +cleanString(string) } \arguments{ \item{string}{} @@ -19,7 +19,7 @@ cleanup domain architecture values } \examples{ \dontrun{ -clean_string() +cleanString() } } diff --git a/man/repeat2s.Rd b/man/condenseRepeatedDomains.Rd similarity index 67% rename from man/repeat2s.Rd rename to man/condenseRepeatedDomains.Rd index 30a09cc6..3b239129 100644 --- a/man/repeat2s.Rd +++ b/man/condenseRepeatedDomains.Rd @@ -1,17 +1,17 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cleanup.R -\name{repeat2s} -\alias{repeat2s} -\title{repeat2s} +\name{condenseRepeatedDomains} +\alias{condenseRepeatedDomains} +\title{condenseRepeatedDomains} \usage{ -repeat2s(prot, by_column = "DomArch", excluded_prots = c()) +condenseRepeatedDomains(prot, by_column = "DomArch", excluded_prots = c()) } \arguments{ \item{prot}{A data frame containing 'DomArch', 'GenContext', 'ClustName' columns.} \item{by_column}{Column in which repeats are condensed to domain+domain -> domain(s).} -\item{excluded_prots}{Vector of strings that repeat2s should not reduce to (s). Defaults to c()} +\item{excluded_prots}{Vector of strings that condenseRepeatedDomains should not reduce to (s). Defaults to c()} } \value{ Describe return, in detail @@ -27,6 +27,6 @@ The original data frame is returned with the corresponding cleaned up column. } \examples{ \dontrun{ -repeat2s(prot, "DomArch") +condenseRepeatedDomains(prot, "DomArch") } } diff --git a/man/make_accnums_unique.Rd b/man/ensureUniqAccNum.Rd similarity index 80% rename from man/make_accnums_unique.Rd rename to man/ensureUniqAccNum.Rd index 62866a24..ddb4a70d 100644 --- a/man/make_accnums_unique.Rd +++ b/man/ensureUniqAccNum.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cleanup.R -\name{make_accnums_unique} -\alias{make_accnums_unique} +\name{ensureUniqAccNum} +\alias{ensureUniqAccNum} \title{make accnums unique} \usage{ -make_accnums_unique(accnums) +ensureUniqAccNum(accnums) } \arguments{ \item{accnums}{\link{chr} a vector of accession numbers} @@ -19,6 +19,6 @@ character vector) making them unique \examples{ \dontrun{ c("xxx", "xxx", "xxx", "yyy", "yyy") |> - make_accnums_unique() + ensureUniqAccNum() } } diff --git a/man/string2accnum.Rd b/man/extractAccNum.Rd similarity index 63% rename from man/string2accnum.Rd rename to man/extractAccNum.Rd index dd7de249..15870f3f 100644 --- a/man/string2accnum.Rd +++ b/man/extractAccNum.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cleanup.R -\name{string2accnum} -\alias{string2accnum} -\title{string2accnum} +\name{extractAccNum} +\alias{extractAccNum} +\title{extractAccNum} \usage{ -string2accnum(string) +extractAccNum(string) } \arguments{ \item{string}{} @@ -13,10 +13,10 @@ string2accnum(string) Describe return, in detail } \description{ -string2accnum +extractAccNum } \examples{ \dontrun{ -string2accnum() +extractAccNum() } } diff --git a/man/remove_astrk.Rd b/man/removeAsterisks.Rd similarity index 72% rename from man/remove_astrk.Rd rename to man/removeAsterisks.Rd index 3562521d..691a7adf 100644 --- a/man/remove_astrk.Rd +++ b/man/removeAsterisks.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cleanup.R -\name{remove_astrk} -\alias{remove_astrk} +\name{removeAsterisks} +\alias{removeAsterisks} \title{Remove Astrk} \usage{ -remove_astrk(query_data, colname = "GenContext") +removeAsterisks(query_data, colname = "GenContext") } \arguments{ \item{colname}{} @@ -18,6 +18,6 @@ Used for removing * from GenContext columns } \examples{ \dontrun{ -remove_astrk() +removeAsterisks() } } diff --git a/man/remove_empty.Rd b/man/removeEmptyRows.Rd similarity index 84% rename from man/remove_empty.Rd rename to man/removeEmptyRows.Rd index cfbf707b..66551810 100644 --- a/man/remove_empty.Rd +++ b/man/removeEmptyRows.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cleanup.R -\name{remove_empty} -\alias{remove_empty} +\name{removeEmptyRows} +\alias{removeEmptyRows} \title{Remove Empty} \usage{ -remove_empty(prot, by_column = "DomArch") +removeEmptyRows(prot, by_column = "DomArch") } \arguments{ \item{prot}{A data frame containing 'DomArch', 'Species', 'GenContext', 'ClustName' columns.} @@ -25,6 +25,6 @@ The original data frame is returned with the corresponding cleaned up column. } \examples{ \dontrun{ -remove_empty(prot, "DomArch") +removeEmptyRows(prot, "DomArch") } } diff --git a/man/remove_tails.Rd b/man/removeTails.Rd similarity index 83% rename from man/remove_tails.Rd rename to man/removeTails.Rd index 1cd20861..76d1e18a 100644 --- a/man/remove_tails.Rd +++ b/man/removeTails.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cleanup.R -\name{remove_tails} -\alias{remove_tails} +\name{removeTails} +\alias{removeTails} \title{Remove Tails} \usage{ -remove_tails(prot, by_column = "DomArch", keep_domains = FALSE) +removeTails(prot, by_column = "DomArch", keep_domains = FALSE) } \arguments{ \item{prot}{A data frame containing 'DomArch', 'GenContext', 'ClustName' columns.} @@ -25,6 +25,6 @@ The original data frame is returned with the corresponding cleaned up column. } \examples{ \dontrun{ -remove_tails(prot, "DomArch") +removeTails(prot, "DomArch") } } diff --git a/man/replaceQMs.Rd b/man/replaceQuestionMarks.Rd similarity index 73% rename from man/replaceQMs.Rd rename to man/replaceQuestionMarks.Rd index 604a8ece..0949568f 100644 --- a/man/replaceQMs.Rd +++ b/man/replaceQuestionMarks.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cleanup.R -\name{replaceQMs} -\alias{replaceQMs} +\name{replaceQuestionMarks} +\alias{replaceQuestionMarks} \title{Replace QMs} \usage{ -replaceQMs(prot, by_column = "GenContext") +replaceQuestionMarks(prot, by_column = "GenContext") } \arguments{ \item{prot}{DataTable to operate on} @@ -20,7 +20,7 @@ Replace '?' with 'X' } \examples{ \dontrun{ -replaceQMs() +replaceQuestionMarks() } } diff --git a/man/pick_longer_duplicate.Rd b/man/selectLongestDuplicate.Rd similarity index 67% rename from man/pick_longer_duplicate.Rd rename to man/selectLongestDuplicate.Rd index d7858da7..c177d289 100644 --- a/man/pick_longer_duplicate.Rd +++ b/man/selectLongestDuplicate.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cleanup.R -\name{pick_longer_duplicate} -\alias{pick_longer_duplicate} +\name{selectLongestDuplicate} +\alias{selectLongestDuplicate} \title{Pick Longer Duplicate} \usage{ -pick_longer_duplicate(prot, column) +selectLongestDuplicate(prot, column) } \arguments{ \item{column}{} @@ -17,6 +17,6 @@ Pick Longer Duplicate } \examples{ \dontrun{ -pick_longer_duplicate() +selectLongestDuplicate() } } From 843ecda71722bd4d152c43075d7f49567e46b0b6 Mon Sep 17 00:00:00 2001 From: David Mayer Date: Mon, 7 Oct 2024 15:40:15 -0600 Subject: [PATCH 3/3] use new function name --- R/networks_domarch.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/networks_domarch.R b/R/networks_domarch.R index 66385a74..010b7619 100755 --- a/R/networks_domarch.R +++ b/R/networks_domarch.R @@ -74,11 +74,11 @@ domain_network <- function(prot, column = "DomArch", domains_of_interest, cutoff # string clean up all of the Domain Architecture columns prot <- prot |> - mutate(DomArch.ntwrk = clean_string(DomArch.ntwrk)) |> + mutate(DomArch.ntwrk = cleanString(DomArch.ntwrk)) |> mutate( across( all_of(column), - clean_string + cleanString ) ) domains_of_interest_regex <- paste(domains_of_interest, collapse = "|")