From e004cd4091ae3cb111873076365619ce0ba42430 Mon Sep 17 00:00:00 2001
From: teddyCodex <samted.uche@gmail.com>
Date: Sun, 6 Oct 2024 20:10:30 +0100
Subject: [PATCH 1/3] refactor function names in R/cleanup.R

---
 R/cleanup.R | 142 ++++++++++++++++++++++++++--------------------------
 1 file changed, 71 insertions(+), 71 deletions(-)

diff --git a/R/cleanup.R b/R/cleanup.R
index 3a708415..f82722f2 100755
--- a/R/cleanup.R
+++ b/R/cleanup.R
@@ -31,10 +31,10 @@
 #' @return [string] string with only alphanumerics, "_", "+", and "."
 #' @examples
 #' \dontrun{
-#' clean_string()
+#' cleanString()
 #' }
 #'
-clean_string <- function(string) {
+cleanString <- function(string) {
     # replace spaces with "_"
     string <- stringr::str_replace_all(string, "\\s+", "_")
     # keep only alphanumeric characters, "_", and "."
@@ -44,7 +44,7 @@ clean_string <- function(string) {
 
 # use the same code as upstream_scripts/00_submit_full.R's
 # get_sequences() function to extract accession numbers
-#' string2accnum
+#' extractAccNum
 #'
 #' @param string
 #'
@@ -53,9 +53,9 @@ clean_string <- function(string) {
 #'
 #' @examples
 #' \dontrun{
-#' string2accnum()
+#' extractAccNum()
 #' }
-string2accnum <- function(string) {
+extractAccNum <- function(string) {
     if (grepl("\\|", string)) {
         accnum <- strsplit(string, "\\|")[[1]][2]
         accnum <- strsplit(accnum, " ")[[1]][1]
@@ -81,9 +81,9 @@ string2accnum <- function(string) {
 #' @examples
 #' \dontrun{
 #' c("xxx", "xxx", "xxx", "yyy", "yyy") |>
-#'     make_accnums_unique()
+#'     ensureUniqAccNum()
 #' }
-make_accnums_unique <- function(accnums) {
+ensureUniqAccNum <- function(accnums) {
     # group by accnums then use the row count as a proxy
     # for the index of occurence for each accession number
     df_accnums <- tibble::tibble("accnum" = accnums)
@@ -113,14 +113,14 @@ make_accnums_unique <- function(accnums) {
 #' @examples
 #' \dontrun{
 #' AAStringSet(c("xxx" = "ATCG", "xxx" = "GGGC")) |>
-#'     cleanup_fasta_header()
+#'     cleanFAHeaders()
 #' }
-cleanup_fasta_header <- function(fasta) {
+cleanFAHeaders <- function(fasta) {
     headers <- names(fasta)
     # try parsing accession numbers from header
     headers <- purrr::map_chr(
         headers,
-        string2accnum
+        extractAccNum
     )
     # sanitize string for pathing (file read/write-ing)
     headers <- purrr::map_chr(
@@ -128,7 +128,7 @@ cleanup_fasta_header <- function(fasta) {
         fs::path_sanitize
     )
     # append an index suffix for the ith occurence of each accnum
-    headers <- make_accnums_unique(headers)
+    headers <- ensureUniqAccNum(headers)
     names(fasta) <- headers
     return(fasta)
 }
@@ -153,9 +153,9 @@ cleanup_fasta_header <- function(fasta) {
 #'
 #' @examples
 #' \dontrun{
-#' remove_empty(prot, "DomArch")
+#' removeEmptyRows(prot, "DomArch")
 #' }
-remove_empty <- function(prot, by_column = "DomArch") {
+removeEmptyRows <- function(prot, by_column = "DomArch") {
     # ?? Don't call other psp functions within these functions
     prot <- prot %>%
         as_tibble() %>%
@@ -168,7 +168,7 @@ remove_empty <- function(prot, by_column = "DomArch") {
 }
 
 ###########################
-#' repeat2s
+#' condenseRepeatedDomains
 #'
 #' @description
 #' Condense repeated domains
@@ -181,7 +181,7 @@ remove_empty <- function(prot, by_column = "DomArch") {
 #'
 #' @param prot A data frame containing 'DomArch', 'GenContext', 'ClustName' columns.
 #' @param by_column Column in which repeats are condensed to domain+domain -> domain(s).
-#' @param excluded_prots Vector of strings that repeat2s should not reduce to (s). Defaults to c()
+#' @param excluded_prots Vector of strings that condenseRepeatedDomains should not reduce to (s). Defaults to c()
 #'
 #' @return Describe return, in detail
 #' @export
@@ -191,10 +191,10 @@ remove_empty <- function(prot, by_column = "DomArch") {
 #'
 #' @examples
 #' \dontrun{
-#' repeat2s(prot, "DomArch")
+#' condenseRepeatedDomains(prot, "DomArch")
 #' }
-repeat2s <- function(prot, by_column = "DomArch", excluded_prots = c()) {
-    # If there are strings that repeat2s should not affect, the pattern to search
+condenseRepeatedDomains <- function(prot, by_column = "DomArch", excluded_prots = c()) {
+    # If there are strings that condenseRepeatedDomains should not affect, the pattern to search
     # for must be changed to exclude a search for those desired strings
 
     collapsed_prots <- paste0(excluded_prots, collapse = "\\s|")
@@ -253,10 +253,10 @@ repeat2s <- function(prot, by_column = "DomArch", excluded_prots = c()) {
 #'
 #' @examples
 #' \dontrun{
-#' replaceQMs()
+#' replaceQuestionMarks()
 #' }
 #'
-replaceQMs <- function(prot, by_column = "GenContext") {
+replaceQuestionMarks <- function(prot, by_column = "GenContext") {
     by <- sym(by_column)
 
     # Regex for finding repeated `?`
@@ -290,9 +290,9 @@ replaceQMs <- function(prot, by_column = "GenContext") {
 #'
 #' @examples
 #' \dontrun{
-#' remove_astrk()
+#' removeAsterisks()
 #' }
-remove_astrk <- function(query_data, colname = "GenContext") {
+removeAsterisks <- function(query_data, colname = "GenContext") {
     query_data[, colname] <- map(query_data[, colname], function(x) str_remove_all(x, pattern = "\\*"))
 
     return(query_data)
@@ -323,9 +323,9 @@ remove_astrk <- function(query_data, colname = "GenContext") {
 #'
 #' @examples
 #' \dontrun{
-#' remove_tails(prot, "DomArch")
+#' removeTails(prot, "DomArch")
 #' }
-remove_tails <- function(prot, by_column = "DomArch",
+removeTails <- function(prot, by_column = "DomArch",
     keep_domains = FALSE) { # !! currently redundant
 
     by_column <- sym(by_column)
@@ -369,7 +369,7 @@ remove_tails <- function(prot, by_column = "DomArch",
 #' A cleaned up version of the data table is returned.
 #'
 #' @param prot A data frame that contains columns 'Species'.
-#' @param remove_empty Boolean. If TRUE, rows with empty/unnecessary values in 'Species' are removed.
+#' @param removeEmptyRows Boolean. If TRUE, rows with empty/unnecessary values in 'Species' are removed.
 #' Default is false.
 #'
 #' @importFrom stringr coll str_replace_all
@@ -379,9 +379,9 @@ remove_tails <- function(prot, by_column = "DomArch",
 #'
 #' @examples
 #' \dontrun{
-#' cleanup_species(prot, TRUE)
+#' cleanSpecies(prot, TRUE)
 #' }
-cleanup_species <- function(prot, remove_empty = FALSE) {
+cleanSpecies <- function(prot, removeEmptyRows = FALSE) {
     # FUNCTIONS CALLED HERE, if else might be better since only two options, T and F
 
     # Create cleaned up Species column
@@ -404,8 +404,8 @@ cleanup_species <- function(prot, remove_empty = FALSE) {
         str_replace_all(coll("  ", TRUE), " ")
 
     # !! CHECK !! Species vs Species_old
-    if (remove_empty) {
-        prot <- remove_empty(prot = prot, by_column = "Species")
+    if (removeEmptyRows) {
+        prot <- removeEmptyRows(prot = prot, by_column = "Species")
     }
 
     return(prot)
@@ -425,9 +425,9 @@ cleanup_species <- function(prot, remove_empty = FALSE) {
 #' @param prot A data frame that must contain columns Query and ClustName.
 #' @param domains_rename A data frame containing the domain names to be replaced in a column 'old' and the corresponding replacement values in a column 'new'.
 #' @param domains_keep A data frame containing the domain names to be retained.
-#' @param repeat2s Boolean. If TRUE, repeated domains in 'ClustName' are condensed. Default is TRUE.
-#' @param remove_tails Boolean. If TRUE, 'ClustName' will be filtered based on domains to keep/remove. Default is FALSE.
-#' @param remove_empty  Boolean. If TRUE, rows with empty/unnecessary values in 'ClustName' are removed. Default is FALSE.
+#' @param condenseRepeatedDomains Boolean. If TRUE, repeated domains in 'ClustName' are condensed. Default is TRUE.
+#' @param removeTails Boolean. If TRUE, 'ClustName' will be filtered based on domains to keep/remove. Default is FALSE.
+#' @param removeEmptyRows  Boolean. If TRUE, rows with empty/unnecessary values in 'ClustName' are removed. Default is FALSE.
 #'
 #' @importFrom dplyr filter
 #' @importFrom stringr coll str_replace_all
@@ -437,12 +437,12 @@ cleanup_species <- function(prot, remove_empty = FALSE) {
 #'
 #' @examples
 #' \dontrun{
-#' cleanup_clust(prot, TRUE, FALSE, domains_keep, domains_rename)
+#' cleanClusters(prot, TRUE, FALSE, domains_keep, domains_rename)
 #' }
-cleanup_clust <- function(prot,
+cleanClusters <- function(prot,
     domains_rename, domains_keep,
-    repeat2s = TRUE, remove_tails = FALSE,
-    remove_empty = FALSE) {
+    condenseRepeatedDomains = TRUE, removeTails = FALSE,
+    removeEmptyRows = FALSE) {
     # Create cleaned up ClustName column
     prot$ClustName <- prot$ClustName.orig
 
@@ -469,19 +469,19 @@ cleanup_clust <- function(prot,
 
     ## Optional parameters
     # Condense repeats
-    if (repeat2s) {
-        prot <- repeat2s(prot, by_column = "ClustName")
+    if (condenseRepeatedDomains) {
+        prot <- condenseRepeatedDomains(prot, by_column = "ClustName")
     }
     # Remove singletons
-    # if(remove_tails){
+    # if(removeTails){
     #  prot <- prot %>% filter(!grepl(".1$", ClustID))
     # }
-    if (remove_tails) {
-        prot <- remove_tails(prot, by_column = "ClustName")
+    if (removeTails) {
+        prot <- removeTails(prot, by_column = "ClustName")
     }
     # Remove empty rows
-    if (remove_empty) {
-        prot <- remove_empty(prot = prot, by_column = "ClustName")
+    if (removeEmptyRows) {
+        prot <- removeEmptyRows(prot = prot, by_column = "ClustName")
     }
 
 
@@ -509,9 +509,9 @@ cleanup_clust <- function(prot,
 #' @param domains_keep A data frame containing the domain names to be retained.
 #' @param domains_rename A data frame containing the domain names to be replaced in a column 'old' and the
 #' corresponding replacement values in a column 'new'.
-#' @param repeat2s Boolean. If TRUE, repeated domains in 'DomArch' are condensed. Default is TRUE.
-#' @param remove_tails Boolean. If TRUE, 'ClustName' will be filtered based on domains to keep/remove. Default is FALSE.
-#' @param remove_empty Boolean. If TRUE, rows with empty/unnecessary values in 'DomArch' are removed. Default is FALSE.
+#' @param condenseRepeatedDomains Boolean. If TRUE, repeated domains in 'DomArch' are condensed. Default is TRUE.
+#' @param removeTails Boolean. If TRUE, 'ClustName' will be filtered based on domains to keep/remove. Default is FALSE.
+#' @param removeEmptyRows Boolean. If TRUE, rows with empty/unnecessary values in 'DomArch' are removed. Default is FALSE.
 #' @param domains_ignore A data frame containing the domain names to be removed in a column called 'domains'
 #'
 #' @importFrom dplyr pull
@@ -522,12 +522,12 @@ cleanup_clust <- function(prot,
 #'
 #' @examples
 #' \dontrun{
-#' cleanup_domarch(prot, TRUE, FALSE, domains_keep, domains_rename, domains_ignore = NULL)
+#' cleanDomainArchitecture(prot, TRUE, FALSE, domains_keep, domains_rename, domains_ignore = NULL)
 #' }
-cleanup_domarch <- function(prot, old = "DomArch.orig", new = "DomArch",
+cleanDomainArchitecture <- function(prot, old = "DomArch.orig", new = "DomArch",
     domains_keep, domains_rename,
-    repeat2s = TRUE, remove_tails = FALSE,
-    remove_empty = F,
+    condenseRepeatedDomains = TRUE, removeTails = FALSE,
+    removeEmptyRows = F,
     domains_ignore = NULL) {
     old_sym <- sym(old)
     new_sym <- sym(new)
@@ -577,22 +577,22 @@ cleanup_domarch <- function(prot, old = "DomArch.orig", new = "DomArch",
 
     ## Optional parameters
     # Remove singletons
-    if (remove_tails) {
-        prot <- remove_tails(prot = prot, by_column = new)
+    if (removeTails) {
+        prot <- removeTails(prot = prot, by_column = new)
     }
     # Condense repeats
-    if (repeat2s) {
+    if (condenseRepeatedDomains) {
         ## Error in UseMethod("tbl_vars") : no applicable method for 'tbl_vars' applied to an object of class "character"
-        prot <- repeat2s(prot = prot, by_column = new)
+        prot <- condenseRepeatedDomains(prot = prot, by_column = new)
     }
     # Remove empty rows
     # ! FUNCTIONS CALLED HERE, if else might be better since only two options, T and F
     # ! Make a separate function of out of this?
-    if (remove_empty) {
-        prot <- remove_empty(prot = prot, by_column = new)
+    if (removeEmptyRows) {
+        prot <- removeEmptyRows(prot = prot, by_column = new)
     }
 
-    prot <- replaceQMs(prot, new)
+    prot <- replaceQuestionMarks(prot, new)
 
     return(prot)
 }
@@ -610,7 +610,7 @@ cleanup_domarch <- function(prot, old = "DomArch.orig", new = "DomArch",
 #' @param prot A data frame that contains columns 'GenContext.orig'
 #' @param domains_rename A data frame containing the domain names to be replaced in a column 'old' and the replacement in a column 'new'.
 #' Defaults to an empty data frame with a new and old column such that non of the domains will be renamed
-#' @param repeat2s Boolean. If TRUE, repeated domains in 'GenContext' are condensed. Default is TRUE.
+#' @param condenseRepeatedDomains Boolean. If TRUE, repeated domains in 'GenContext' are condensed. Default is TRUE.
 #' @param remove_asterisk Boolean. If TRUE, asterisks in 'ClustName' are removed. Default is TRUE.
 #'
 #' @importFrom stringr str_replace_all
@@ -620,11 +620,11 @@ cleanup_domarch <- function(prot, old = "DomArch.orig", new = "DomArch",
 #'
 #' @examples
 #' \dontrun{
-#' cleanup_gencontext(prot, domains_rename, T, F)
+#' cleanGenomicContext(prot, domains_rename, T, F)
 #' }
 #'
-cleanup_gencontext <- function(prot, domains_rename = data.frame("old" = character(0), "new" = character(0), stringsAsFactors = F),
-    repeat2s = TRUE, remove_asterisk = TRUE) {
+cleanGenomicContext <- function(prot, domains_rename = data.frame("old" = character(0), "new" = character(0), stringsAsFactors = F),
+    condenseRepeatedDomains = TRUE, remove_asterisk = TRUE) {
     # Create cleaned up GenContext column
     prot$GenContext <- prot$GenContext.orig
 
@@ -641,16 +641,16 @@ cleanup_gencontext <- function(prot, domains_rename = data.frame("old" = charact
     ## Reverse operons | Straighten them out!
     prot <- reverse_operon(prot)
 
-    prot <- replaceQMs(prot, "GenContext")
+    prot <- replaceQuestionMarks(prot, "GenContext")
     ## Optional parameters
     # Condense repeats
-    if (repeat2s) {
-        prot <- repeat2s(prot, "GenContext")
+    if (condenseRepeatedDomains) {
+        prot <- condenseRepeatedDomains(prot, "GenContext")
     }
 
     # Remove the Asterisks
     if (remove_asterisk) {
-        prot <- remove_astrk(prot, colname = "GenContext")
+        prot <- removeAsterisks(prot, colname = "GenContext")
     }
 
     return(prot)
@@ -666,9 +666,9 @@ cleanup_gencontext <- function(prot, domains_rename = data.frame("old" = charact
 #'
 #' @examples
 #' \dontrun{
-#' cleanup_GeneDesc()
+#' cleanGeneDescription()
 #' }
-cleanup_GeneDesc <- function(prot, column) {
+cleanGeneDescription <- function(prot, column) {
     prot[, "GeneDesc"] <- gsub("\\.$", "", prot %>% pull(column))
     prot[, "GeneDesc"] <- gsub("%2C", ",", prot %>% pull(column))
     return(prot)
@@ -688,9 +688,9 @@ cleanup_GeneDesc <- function(prot, column) {
 #'
 #' @examples
 #' \dontrun{
-#' pick_longer_duplicate()
+#' selectLongestDuplicate()
 #' }
-pick_longer_duplicate <- function(prot, column) {
+selectLongestDuplicate <- function(prot, column) {
     col <- sym(column)
 
     prot$row.orig <- 1:nrow(prot)
@@ -736,9 +736,9 @@ pick_longer_duplicate <- function(prot, column) {
 #'
 #' @examples
 #' \dontrun{
-#' cleanup_lineage()
+#' cleanLineage()
 #' }
-cleanup_lineage <- function(prot, lins_rename) {
+cleanLineage <- function(prot, lins_rename) {
     for (i in 1:nrow(lins_rename)) {
         prot$Lineage <- gsub(lins_rename$old[i], lins_rename$new[i],
             x = prot$Lineage,

From 2c1ce1a213d7b36395acbd76d11cb4bf6b8a89f7 Mon Sep 17 00:00:00 2001
From: teddyCodex <samted.uche@gmail.com>
Date: Mon, 7 Oct 2024 09:33:02 +0100
Subject: [PATCH 2/3] update NAMESPACE using roxygen2

---
 NAMESPACE                                     | 26 +++++++++----------
 man/{cleanup_clust.Rd => cleanClusters.Rd}    | 20 +++++++-------
 ..._domarch.Rd => cleanDomainArchitecture.Rd} | 20 +++++++-------
 ...anup_fasta_header.Rd => cleanFAHeaders.Rd} |  8 +++---
 ...up_GeneDesc.Rd => cleanGeneDescription.Rd} |  8 +++---
 ...p_gencontext.Rd => cleanGenomicContext.Rd} | 12 ++++-----
 man/{cleanup_lineage.Rd => cleanLineage.Rd}   |  8 +++---
 man/{cleanup_species.Rd => cleanSpecies.Rd}   | 10 +++----
 man/{clean_string.Rd => cleanString.Rd}       |  8 +++---
 ...repeat2s.Rd => condenseRepeatedDomains.Rd} | 12 ++++-----
 ..._accnums_unique.Rd => ensureUniqAccNum.Rd} |  8 +++---
 man/{string2accnum.Rd => extractAccNum.Rd}    | 12 ++++-----
 man/{remove_astrk.Rd => removeAsterisks.Rd}   |  8 +++---
 man/{remove_empty.Rd => removeEmptyRows.Rd}   |  8 +++---
 man/{remove_tails.Rd => removeTails.Rd}       |  8 +++---
 ...{replaceQMs.Rd => replaceQuestionMarks.Rd} |  8 +++---
 ...duplicate.Rd => selectLongestDuplicate.Rd} |  8 +++---
 17 files changed, 96 insertions(+), 96 deletions(-)
 rename man/{cleanup_clust.Rd => cleanClusters.Rd} (59%)
 rename man/{cleanup_domarch.Rd => cleanDomainArchitecture.Rd} (66%)
 rename man/{cleanup_fasta_header.Rd => cleanFAHeaders.Rd} (78%)
 rename man/{cleanup_GeneDesc.Rd => cleanGeneDescription.Rd} (70%)
 rename man/{cleanup_gencontext.Rd => cleanGenomicContext.Rd} (78%)
 rename man/{cleanup_lineage.Rd => cleanLineage.Rd} (71%)
 rename man/{cleanup_species.Rd => cleanSpecies.Rd} (70%)
 rename man/{clean_string.Rd => cleanString.Rd} (84%)
 rename man/{repeat2s.Rd => condenseRepeatedDomains.Rd} (67%)
 rename man/{make_accnums_unique.Rd => ensureUniqAccNum.Rd} (80%)
 rename man/{string2accnum.Rd => extractAccNum.Rd} (63%)
 rename man/{remove_astrk.Rd => removeAsterisks.Rd} (72%)
 rename man/{remove_empty.Rd => removeEmptyRows.Rd} (84%)
 rename man/{remove_tails.Rd => removeTails.Rd} (83%)
 rename man/{replaceQMs.Rd => replaceQuestionMarks.Rd} (73%)
 rename man/{pick_longer_duplicate.Rd => selectLongestDuplicate.Rd} (67%)

diff --git a/NAMESPACE b/NAMESPACE
index 16cf0813..9724f0dd 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -16,15 +16,16 @@ export(advanced_opts2est_walltime)
 export(alignFasta)
 export(assert_count_df)
 export(assign_job_queue)
-export(cleanup_GeneDesc)
-export(cleanup_clust)
-export(cleanup_domarch)
-export(cleanup_gencontext)
-export(cleanup_lineage)
-export(cleanup_species)
+export(cleanClusters)
+export(cleanDomainArchitecture)
+export(cleanGeneDescription)
+export(cleanGenomicContext)
+export(cleanLineage)
+export(cleanSpecies)
 export(combine_files)
 export(combine_full)
 export(combine_ipr)
+export(condenseRepeatedDomains)
 export(convert_aln2fa)
 export(convert_fa2tre)
 export(count_bycol)
@@ -35,6 +36,7 @@ export(create_lineage_lookup)
 export(create_one_col_params)
 export(domain_network)
 export(efetch_ipg)
+export(extractAccNum)
 export(filter_by_doms)
 export(filter_freq)
 export(find_paralogs)
@@ -62,25 +64,23 @@ export(make_opts2procs)
 export(map_acc2name)
 export(map_advanced_opts2procs)
 export(msa_pdf)
-export(pick_longer_duplicate)
 export(plot_estimated_walltimes)
 export(prot2tax)
 export(prot2tax_old)
-export(remove_astrk)
-export(remove_empty)
-export(remove_tails)
+export(removeAsterisks)
+export(removeEmptyRows)
+export(removeTails)
 export(rename_fasta)
-export(repeat2s)
-export(replaceQMs)
+export(replaceQuestionMarks)
 export(reveql)
 export(reverse_operon)
 export(run_deltablast)
 export(run_rpsblast)
+export(selectLongestDuplicate)
 export(send_job_status_email)
 export(shorten_lineage)
 export(sink.reset)
 export(stacked_lin_plot)
-export(string2accnum)
 export(summ.DA)
 export(summ.DA.byLin)
 export(summ.GC)
diff --git a/man/cleanup_clust.Rd b/man/cleanClusters.Rd
similarity index 59%
rename from man/cleanup_clust.Rd
rename to man/cleanClusters.Rd
index 4eed8be8..7ef4f3b9 100644
--- a/man/cleanup_clust.Rd
+++ b/man/cleanClusters.Rd
@@ -1,16 +1,16 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cleanup.R
-\name{cleanup_clust}
-\alias{cleanup_clust}
+\name{cleanClusters}
+\alias{cleanClusters}
 \title{Cleanup Clust}
 \usage{
-cleanup_clust(
+cleanClusters(
   prot,
   domains_rename,
   domains_keep,
-  repeat2s = TRUE,
-  remove_tails = FALSE,
-  remove_empty = FALSE
+  condenseRepeatedDomains = TRUE,
+  removeTails = FALSE,
+  removeEmptyRows = FALSE
 )
 }
 \arguments{
@@ -20,11 +20,11 @@ cleanup_clust(
 
 \item{domains_keep}{A data frame containing the domain names to be retained.}
 
-\item{repeat2s}{Boolean. If TRUE, repeated domains in 'ClustName' are condensed. Default is TRUE.}
+\item{condenseRepeatedDomains}{Boolean. If TRUE, repeated domains in 'ClustName' are condensed. Default is TRUE.}
 
-\item{remove_tails}{Boolean. If TRUE, 'ClustName' will be filtered based on domains to keep/remove. Default is FALSE.}
+\item{removeTails}{Boolean. If TRUE, 'ClustName' will be filtered based on domains to keep/remove. Default is FALSE.}
 
-\item{remove_empty}{Boolean. If TRUE, rows with empty/unnecessary values in 'ClustName' are removed. Default is FALSE.}
+\item{removeEmptyRows}{Boolean. If TRUE, rows with empty/unnecessary values in 'ClustName' are removed. Default is FALSE.}
 }
 \value{
 Cleaned up data frame
@@ -39,6 +39,6 @@ The return value is the cleaned up data frame.
 }
 \examples{
 \dontrun{
-cleanup_clust(prot, TRUE, FALSE, domains_keep, domains_rename)
+cleanClusters(prot, TRUE, FALSE, domains_keep, domains_rename)
 }
 }
diff --git a/man/cleanup_domarch.Rd b/man/cleanDomainArchitecture.Rd
similarity index 66%
rename from man/cleanup_domarch.Rd
rename to man/cleanDomainArchitecture.Rd
index 21955509..887b5388 100644
--- a/man/cleanup_domarch.Rd
+++ b/man/cleanDomainArchitecture.Rd
@@ -1,18 +1,18 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cleanup.R
-\name{cleanup_domarch}
-\alias{cleanup_domarch}
+\name{cleanDomainArchitecture}
+\alias{cleanDomainArchitecture}
 \title{Cleanup DomArch}
 \usage{
-cleanup_domarch(
+cleanDomainArchitecture(
   prot,
   old = "DomArch.orig",
   new = "DomArch",
   domains_keep,
   domains_rename,
-  repeat2s = TRUE,
-  remove_tails = FALSE,
-  remove_empty = F,
+  condenseRepeatedDomains = TRUE,
+  removeTails = FALSE,
+  removeEmptyRows = F,
   domains_ignore = NULL
 )
 }
@@ -24,11 +24,11 @@ cleanup_domarch(
 \item{domains_rename}{A data frame containing the domain names to be replaced in a column 'old' and the
 corresponding replacement values in a column 'new'.}
 
-\item{repeat2s}{Boolean. If TRUE, repeated domains in 'DomArch' are condensed. Default is TRUE.}
+\item{condenseRepeatedDomains}{Boolean. If TRUE, repeated domains in 'DomArch' are condensed. Default is TRUE.}
 
-\item{remove_tails}{Boolean. If TRUE, 'ClustName' will be filtered based on domains to keep/remove. Default is FALSE.}
+\item{removeTails}{Boolean. If TRUE, 'ClustName' will be filtered based on domains to keep/remove. Default is FALSE.}
 
-\item{remove_empty}{Boolean. If TRUE, rows with empty/unnecessary values in 'DomArch' are removed. Default is FALSE.}
+\item{removeEmptyRows}{Boolean. If TRUE, rows with empty/unnecessary values in 'DomArch' are removed. Default is FALSE.}
 
 \item{domains_ignore}{A data frame containing the domain names to be removed in a column called 'domains'}
 }
@@ -46,6 +46,6 @@ The original data frame is returned with the clean DomArchs column and the old d
 }
 \examples{
 \dontrun{
-cleanup_domarch(prot, TRUE, FALSE, domains_keep, domains_rename, domains_ignore = NULL)
+cleanDomainArchitecture(prot, TRUE, FALSE, domains_keep, domains_rename, domains_ignore = NULL)
 }
 }
diff --git a/man/cleanup_fasta_header.Rd b/man/cleanFAHeaders.Rd
similarity index 78%
rename from man/cleanup_fasta_header.Rd
rename to man/cleanFAHeaders.Rd
index 416f6be2..e9ad9b30 100644
--- a/man/cleanup_fasta_header.Rd
+++ b/man/cleanFAHeaders.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cleanup.R
-\name{cleanup_fasta_header}
-\alias{cleanup_fasta_header}
+\name{cleanFAHeaders}
+\alias{cleanFAHeaders}
 \title{Cleanup FASTA Header}
 \usage{
-cleanup_fasta_header(fasta)
+cleanFAHeaders(fasta)
 }
 \arguments{
 \item{fasta}{}
@@ -19,6 +19,6 @@ suffix of the ith occurence to handle duplicates
 \examples{
 \dontrun{
 AAStringSet(c("xxx" = "ATCG", "xxx" = "GGGC")) |>
-    cleanup_fasta_header()
+    cleanFAHeaders()
 }
 }
diff --git a/man/cleanup_GeneDesc.Rd b/man/cleanGeneDescription.Rd
similarity index 70%
rename from man/cleanup_GeneDesc.Rd
rename to man/cleanGeneDescription.Rd
index 3068fe49..f98a25d4 100644
--- a/man/cleanup_GeneDesc.Rd
+++ b/man/cleanGeneDescription.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cleanup.R
-\name{cleanup_GeneDesc}
-\alias{cleanup_GeneDesc}
+\name{cleanGeneDescription}
+\alias{cleanGeneDescription}
 \title{Cleanup GeneDesc}
 \usage{
-cleanup_GeneDesc(prot, column)
+cleanGeneDescription(prot, column)
 }
 \arguments{
 \item{column}{}
@@ -17,6 +17,6 @@ Cleanup GeneDesc
 }
 \examples{
 \dontrun{
-cleanup_GeneDesc()
+cleanGeneDescription()
 }
 }
diff --git a/man/cleanup_gencontext.Rd b/man/cleanGenomicContext.Rd
similarity index 78%
rename from man/cleanup_gencontext.Rd
rename to man/cleanGenomicContext.Rd
index 8e26a447..2c2dcc18 100644
--- a/man/cleanup_gencontext.Rd
+++ b/man/cleanGenomicContext.Rd
@@ -1,14 +1,14 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cleanup.R
-\name{cleanup_gencontext}
-\alias{cleanup_gencontext}
+\name{cleanGenomicContext}
+\alias{cleanGenomicContext}
 \title{Cleanup Genomic Contexts}
 \usage{
-cleanup_gencontext(
+cleanGenomicContext(
   prot,
   domains_rename = data.frame(old = character(0), new = character(0), stringsAsFactors =
     F),
-  repeat2s = TRUE,
+  condenseRepeatedDomains = TRUE,
   remove_asterisk = TRUE
 )
 }
@@ -18,7 +18,7 @@ cleanup_gencontext(
 \item{domains_rename}{A data frame containing the domain names to be replaced in a column 'old' and the replacement in a column 'new'.
 Defaults to an empty data frame with a new and old column such that non of the domains will be renamed}
 
-\item{repeat2s}{Boolean. If TRUE, repeated domains in 'GenContext' are condensed. Default is TRUE.}
+\item{condenseRepeatedDomains}{Boolean. If TRUE, repeated domains in 'GenContext' are condensed. Default is TRUE.}
 
 \item{remove_asterisk}{Boolean. If TRUE, asterisks in 'ClustName' are removed. Default is TRUE.}
 }
@@ -33,7 +33,7 @@ A cleaned up version of the data table is returned.
 }
 \examples{
 \dontrun{
-cleanup_gencontext(prot, domains_rename, T, F)
+cleanGenomicContext(prot, domains_rename, T, F)
 }
 
 }
diff --git a/man/cleanup_lineage.Rd b/man/cleanLineage.Rd
similarity index 71%
rename from man/cleanup_lineage.Rd
rename to man/cleanLineage.Rd
index 35669f4e..adcea312 100644
--- a/man/cleanup_lineage.Rd
+++ b/man/cleanLineage.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cleanup.R
-\name{cleanup_lineage}
-\alias{cleanup_lineage}
+\name{cleanLineage}
+\alias{cleanLineage}
 \title{Cleanup Lineage}
 \usage{
-cleanup_lineage(prot, lins_rename)
+cleanLineage(prot, lins_rename)
 }
 \arguments{
 \item{lins_rename}{}
@@ -17,6 +17,6 @@ Cleanup Lineage
 }
 \examples{
 \dontrun{
-cleanup_lineage()
+cleanLineage()
 }
 }
diff --git a/man/cleanup_species.Rd b/man/cleanSpecies.Rd
similarity index 70%
rename from man/cleanup_species.Rd
rename to man/cleanSpecies.Rd
index beedb23c..82b5444c 100644
--- a/man/cleanup_species.Rd
+++ b/man/cleanSpecies.Rd
@@ -1,15 +1,15 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cleanup.R
-\name{cleanup_species}
-\alias{cleanup_species}
+\name{cleanSpecies}
+\alias{cleanSpecies}
 \title{Cleanup Species}
 \usage{
-cleanup_species(prot, remove_empty = FALSE)
+cleanSpecies(prot, removeEmptyRows = FALSE)
 }
 \arguments{
 \item{prot}{A data frame that contains columns 'Species'.}
 
-\item{remove_empty}{Boolean. If TRUE, rows with empty/unnecessary values in 'Species' are removed.
+\item{removeEmptyRows}{Boolean. If TRUE, rows with empty/unnecessary values in 'Species' are removed.
 Default is false.}
 }
 \value{
@@ -25,6 +25,6 @@ A cleaned up version of the data table is returned.
 }
 \examples{
 \dontrun{
-cleanup_species(prot, TRUE)
+cleanSpecies(prot, TRUE)
 }
 }
diff --git a/man/clean_string.Rd b/man/cleanString.Rd
similarity index 84%
rename from man/clean_string.Rd
rename to man/cleanString.Rd
index a17a95bb..0dc2937e 100644
--- a/man/clean_string.Rd
+++ b/man/cleanString.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cleanup.R
-\name{clean_string}
-\alias{clean_string}
+\name{cleanString}
+\alias{cleanString}
 \title{Clean String}
 \usage{
-clean_string(string)
+cleanString(string)
 }
 \arguments{
 \item{string}{}
@@ -19,7 +19,7 @@ cleanup domain architecture values
 }
 \examples{
 \dontrun{
-clean_string()
+cleanString()
 }
 
 }
diff --git a/man/repeat2s.Rd b/man/condenseRepeatedDomains.Rd
similarity index 67%
rename from man/repeat2s.Rd
rename to man/condenseRepeatedDomains.Rd
index 30a09cc6..3b239129 100644
--- a/man/repeat2s.Rd
+++ b/man/condenseRepeatedDomains.Rd
@@ -1,17 +1,17 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cleanup.R
-\name{repeat2s}
-\alias{repeat2s}
-\title{repeat2s}
+\name{condenseRepeatedDomains}
+\alias{condenseRepeatedDomains}
+\title{condenseRepeatedDomains}
 \usage{
-repeat2s(prot, by_column = "DomArch", excluded_prots = c())
+condenseRepeatedDomains(prot, by_column = "DomArch", excluded_prots = c())
 }
 \arguments{
 \item{prot}{A data frame containing 'DomArch', 'GenContext', 'ClustName' columns.}
 
 \item{by_column}{Column in which repeats are condensed to domain+domain -> domain(s).}
 
-\item{excluded_prots}{Vector of strings that repeat2s should not reduce to (s). Defaults to c()}
+\item{excluded_prots}{Vector of strings that condenseRepeatedDomains should not reduce to (s). Defaults to c()}
 }
 \value{
 Describe return, in detail
@@ -27,6 +27,6 @@ The original data frame is returned with the corresponding cleaned up column.
 }
 \examples{
 \dontrun{
-repeat2s(prot, "DomArch")
+condenseRepeatedDomains(prot, "DomArch")
 }
 }
diff --git a/man/make_accnums_unique.Rd b/man/ensureUniqAccNum.Rd
similarity index 80%
rename from man/make_accnums_unique.Rd
rename to man/ensureUniqAccNum.Rd
index 62866a24..ddb4a70d 100644
--- a/man/make_accnums_unique.Rd
+++ b/man/ensureUniqAccNum.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cleanup.R
-\name{make_accnums_unique}
-\alias{make_accnums_unique}
+\name{ensureUniqAccNum}
+\alias{ensureUniqAccNum}
 \title{make accnums unique}
 \usage{
-make_accnums_unique(accnums)
+ensureUniqAccNum(accnums)
 }
 \arguments{
 \item{accnums}{\link{chr} a vector of accession numbers}
@@ -19,6 +19,6 @@ character vector) making them unique
 \examples{
 \dontrun{
 c("xxx", "xxx", "xxx", "yyy", "yyy") |>
-    make_accnums_unique()
+    ensureUniqAccNum()
 }
 }
diff --git a/man/string2accnum.Rd b/man/extractAccNum.Rd
similarity index 63%
rename from man/string2accnum.Rd
rename to man/extractAccNum.Rd
index dd7de249..15870f3f 100644
--- a/man/string2accnum.Rd
+++ b/man/extractAccNum.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cleanup.R
-\name{string2accnum}
-\alias{string2accnum}
-\title{string2accnum}
+\name{extractAccNum}
+\alias{extractAccNum}
+\title{extractAccNum}
 \usage{
-string2accnum(string)
+extractAccNum(string)
 }
 \arguments{
 \item{string}{}
@@ -13,10 +13,10 @@ string2accnum(string)
 Describe return, in detail
 }
 \description{
-string2accnum
+extractAccNum
 }
 \examples{
 \dontrun{
-string2accnum()
+extractAccNum()
 }
 }
diff --git a/man/remove_astrk.Rd b/man/removeAsterisks.Rd
similarity index 72%
rename from man/remove_astrk.Rd
rename to man/removeAsterisks.Rd
index 3562521d..691a7adf 100644
--- a/man/remove_astrk.Rd
+++ b/man/removeAsterisks.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cleanup.R
-\name{remove_astrk}
-\alias{remove_astrk}
+\name{removeAsterisks}
+\alias{removeAsterisks}
 \title{Remove Astrk}
 \usage{
-remove_astrk(query_data, colname = "GenContext")
+removeAsterisks(query_data, colname = "GenContext")
 }
 \arguments{
 \item{colname}{}
@@ -18,6 +18,6 @@ Used for removing * from GenContext columns
 }
 \examples{
 \dontrun{
-remove_astrk()
+removeAsterisks()
 }
 }
diff --git a/man/remove_empty.Rd b/man/removeEmptyRows.Rd
similarity index 84%
rename from man/remove_empty.Rd
rename to man/removeEmptyRows.Rd
index cfbf707b..66551810 100644
--- a/man/remove_empty.Rd
+++ b/man/removeEmptyRows.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cleanup.R
-\name{remove_empty}
-\alias{remove_empty}
+\name{removeEmptyRows}
+\alias{removeEmptyRows}
 \title{Remove Empty}
 \usage{
-remove_empty(prot, by_column = "DomArch")
+removeEmptyRows(prot, by_column = "DomArch")
 }
 \arguments{
 \item{prot}{A data frame containing 'DomArch', 'Species', 'GenContext', 'ClustName' columns.}
@@ -25,6 +25,6 @@ The original data frame is returned with the corresponding cleaned up column.
 }
 \examples{
 \dontrun{
-remove_empty(prot, "DomArch")
+removeEmptyRows(prot, "DomArch")
 }
 }
diff --git a/man/remove_tails.Rd b/man/removeTails.Rd
similarity index 83%
rename from man/remove_tails.Rd
rename to man/removeTails.Rd
index 1cd20861..76d1e18a 100644
--- a/man/remove_tails.Rd
+++ b/man/removeTails.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cleanup.R
-\name{remove_tails}
-\alias{remove_tails}
+\name{removeTails}
+\alias{removeTails}
 \title{Remove Tails}
 \usage{
-remove_tails(prot, by_column = "DomArch", keep_domains = FALSE)
+removeTails(prot, by_column = "DomArch", keep_domains = FALSE)
 }
 \arguments{
 \item{prot}{A data frame containing 'DomArch', 'GenContext', 'ClustName' columns.}
@@ -25,6 +25,6 @@ The original data frame is returned with the corresponding cleaned up column.
 }
 \examples{
 \dontrun{
-remove_tails(prot, "DomArch")
+removeTails(prot, "DomArch")
 }
 }
diff --git a/man/replaceQMs.Rd b/man/replaceQuestionMarks.Rd
similarity index 73%
rename from man/replaceQMs.Rd
rename to man/replaceQuestionMarks.Rd
index 604a8ece..0949568f 100644
--- a/man/replaceQMs.Rd
+++ b/man/replaceQuestionMarks.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cleanup.R
-\name{replaceQMs}
-\alias{replaceQMs}
+\name{replaceQuestionMarks}
+\alias{replaceQuestionMarks}
 \title{Replace QMs}
 \usage{
-replaceQMs(prot, by_column = "GenContext")
+replaceQuestionMarks(prot, by_column = "GenContext")
 }
 \arguments{
 \item{prot}{DataTable to operate on}
@@ -20,7 +20,7 @@ Replace '?' with 'X'
 }
 \examples{
 \dontrun{
-replaceQMs()
+replaceQuestionMarks()
 }
 
 }
diff --git a/man/pick_longer_duplicate.Rd b/man/selectLongestDuplicate.Rd
similarity index 67%
rename from man/pick_longer_duplicate.Rd
rename to man/selectLongestDuplicate.Rd
index d7858da7..c177d289 100644
--- a/man/pick_longer_duplicate.Rd
+++ b/man/selectLongestDuplicate.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cleanup.R
-\name{pick_longer_duplicate}
-\alias{pick_longer_duplicate}
+\name{selectLongestDuplicate}
+\alias{selectLongestDuplicate}
 \title{Pick Longer Duplicate}
 \usage{
-pick_longer_duplicate(prot, column)
+selectLongestDuplicate(prot, column)
 }
 \arguments{
 \item{column}{}
@@ -17,6 +17,6 @@ Pick Longer Duplicate
 }
 \examples{
 \dontrun{
-pick_longer_duplicate()
+selectLongestDuplicate()
 }
 }

From 843ecda71722bd4d152c43075d7f49567e46b0b6 Mon Sep 17 00:00:00 2001
From: David Mayer <david.mayer@cuanschutz.edu>
Date: Mon, 7 Oct 2024 15:40:15 -0600
Subject: [PATCH 3/3] use new function name

---
 R/networks_domarch.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/networks_domarch.R b/R/networks_domarch.R
index 66385a74..010b7619 100755
--- a/R/networks_domarch.R
+++ b/R/networks_domarch.R
@@ -74,11 +74,11 @@ domain_network <- function(prot, column = "DomArch", domains_of_interest, cutoff
 
             # string clean up all of the Domain Architecture columns
             prot <- prot |>
-                mutate(DomArch.ntwrk = clean_string(DomArch.ntwrk)) |>
+                mutate(DomArch.ntwrk = cleanString(DomArch.ntwrk)) |>
                 mutate(
                     across(
                         all_of(column),
-                        clean_string
+                        cleanString
                     )
                 )
             domains_of_interest_regex <- paste(domains_of_interest, collapse = "|")