Merge branch 'JRaviLab:main' into testthat

JRaviLab · Oct 10, 2024 · c91d38b · c91d38b
2 parents 3838564 + b96198e
commit c91d38b
Show file tree

Hide file tree

Showing 14 changed files with 155 additions and 65 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -117,6 +117,7 @@ importFrom(assertthat,assert_that)
 importFrom(assertthat,has_name)
 importFrom(base64enc,base64encode)
 importFrom(biomartr,getKingdomAssemblySummary)
+importFrom(d3r,d3_nest)
 importFrom(data.table,as.data.table)
 importFrom(data.table,fread)
 importFrom(data.table,fwrite)
@@ -181,6 +182,7 @@ importFrom(ggplot2,theme)
 importFrom(ggplot2,theme_classic)
 importFrom(ggplot2,theme_grey)
 importFrom(ggplot2,theme_minimal)
+importFrom(ggplot2,unit)
 importFrom(ggplot2,xlab)
 importFrom(ggplot2,ylab)
 importFrom(grDevices,adjustcolor)
@@ -237,13 +239,15 @@ importFrom(readr,write_file)
 importFrom(readr,write_lines)
 importFrom(readr,write_tsv)
 importFrom(rentrez,entrez_fetch)
+importFrom(rlang,.data)
 importFrom(rlang,as_string)
 importFrom(rlang,sym)
 importFrom(sendmailR,mime_part)
 importFrom(sendmailR,sendmail)
 importFrom(seqinr,dist.alignment)
 importFrom(seqinr,read.alignment)
 importFrom(shiny,showNotification)
+importFrom(stats,as.formula)
 importFrom(stats,complete.cases)
 importFrom(stats,logLik)
 importFrom(stats,na.omit)
@@ -264,6 +268,7 @@ importFrom(stringr,str_sub)
 importFrom(stringr,str_trim)
 importFrom(stringr,word)
 importFrom(sunburstR,sunburst)
+importFrom(sunburstR,sund2b)
 importFrom(tibble,as_tibble)
 importFrom(tibble,tibble)
 importFrom(tidyr,drop_na)

diff --git a/R/acc2lin.R b/R/acc2lin.R
@@ -25,18 +25,25 @@ sinkReset <- function() {
 
 #' addLineage
 #'
-#' @param df
-#' @param acc_col
-#' @param assembly_path
-#' @param lineagelookup_path
-#' @param ipgout_path
-#' @param plan
+#' @param df A `data.frame` containing the input data. One column must contain 
+#' the accession numbers.
+#' @param acc_col A string specifying the column name in `df` that holds the 
+#' accession numbers. Defaults to `"AccNum"`.
+#' @param assembly_path A string specifying the path to the `assembly_summary.txt` 
+#' file. This file contains metadata about assemblies.
+#' @param lineagelookup_path A string specifying the path to the lineage lookup 
+#' file, which contains a mapping from tax IDs to their corresponding lineages.
+#' @param ipgout_path (Optional) A string specifying the path where IPG database 
+#' fetch results will be saved. If `NULL`, the results are not written to a file.
+#' @param plan A string specifying the parallelization strategy for the future
+#' package, such as `"sequential"` or `"multisession"`.
 #'
 #' @importFrom dplyr pull
 #' @importFrom magrittr %>%
 #' @importFrom rlang sym
 #'
-#' @return Describe return, in detail
+#' @return A `data.frame` that combines the original `df` with the lineage 
+#' information.
 #' @export
 #'
 #' @examples
@@ -78,9 +85,12 @@ addLineage <- function(df, acc_col = "AccNum", assembly_path,
 #' (taxid to lineage mapping). This file can be generated using the
 #' @param ipgout_path Path to write the results of the efetch run of the accessions
 #' on the ipg database. If NULL, the file will not be written. Defaults to NULL
-#' @param plan
+#' @param plan A string specifying the parallelization strategy for the future
+#' package, such as `"sequential"` or `"multisession"`.
 #'
-#' @return Describe return, in detail
+#' @return A `data.table` that contains the lineage information, mapping protein 
+#' accessions to their tax IDs and lineages.
+#' @export
 #' @export
 #'
 #' @examples
@@ -112,13 +122,14 @@ acc2Lineage <- function(accessions, assembly_path, lineagelookup_path, ipgout_pa
 #' @param accnums Character vector containing the accession numbers to query on
 #' the ipg database
 #' @param out_path Path to write the efetch results to
-#' @param plan
+#' @param plan A string specifying the parallelization strategy for the future
+#' package, such as `"sequential"` or `"multisession"`.
 #'
 #' @importFrom furrr future_map
 #' @importFrom future plan
 #' @importFrom rentrez entrez_fetch
 #'
-#' @return Describe return, in detail
+#' @return No return value. The function writes the fetched results to `out_path`.
 #' @export
 #'
 #' @examples
@@ -186,7 +197,8 @@ efetchIPG <- function(accnums, out_path, plan = "sequential", ...) {
 #'
 #' @importFrom data.table fread
 #'
-#' @return Describe return, in detail
+#' @return A `data.table` with the lineage information for the provided protein 
+#' accessions.
 #' @export
 #'
 #' @examples

diff --git a/R/cleanup.R b/R/cleanup.R
@@ -88,12 +88,12 @@ ensureUniqAccNum <- function(accnums) {
     # for the index of occurence for each accession number
     df_accnums <- tibble::tibble("accnum" = accnums)
     df_accnums <- df_accnums |>
-        dplyr::group_by(accnum) |>
+        dplyr::group_by(.data$accnum) |>
         dplyr::mutate(suffix = dplyr::row_number()) |>
         dplyr::ungroup() |>
-        dplyr::mutate(accnum_adjusted = paste0(accnum, "_", suffix)) |>
-        dplyr::arrange(accnum_adjusted)
-    accnums_adjusted <- df_accnums |> dplyr::pull(accnum_adjusted)
+        dplyr::mutate(accnum_adjusted = paste0(.data$accnum, "_", .data$suffix)) |>
+        dplyr::arrange(.data$accnum_adjusted)
+    accnums_adjusted <- df_accnums |> dplyr::pull(.data$accnum_adjusted)
 
     return(accnums_adjusted)
 }

diff --git a/R/fa2domain.R b/R/fa2domain.R
@@ -148,10 +148,10 @@ createIPRScanDomainTable <- function(
     # filter for the accnum of interest (note: it's possible the accession
     # number is not in the table [i.e., it had no domains])
     df_iprscan_accnum <- df_iprscan |>
-        dplyr::filter(Analysis %in% analysis) |>
-        dplyr::filter(AccNum == accnum) |>
+        dplyr::filter(.data$Analysis %in% analysis) |>
+        dplyr::filter(.data$AccNum == accnum) |>
         dplyr::select(dplyr::all_of(c("AccNum", "DB.ID", "StartLoc", "StopLoc"))) |>
-        dplyr::arrange(StartLoc)
+        dplyr::arrange(.data$StartLoc)
     # handle the case of no records after filtering by "Analysis"; return the tibble
     # with 0 rows quickly
     if (nrow(df_iprscan_accnum) < 1) {
@@ -163,9 +163,9 @@ createIPRScanDomainTable <- function(
         dplyr::rowwise() |>
         dplyr::mutate(
             seq_domain = XVector::subseq(
-                fasta[[grep(pattern = AccNum, x = names(fasta), fixed = TRUE)]],
-                start = StartLoc,
-                end = StopLoc
+                fasta[[grep(pattern = .data$AccNum, x = names(fasta), fixed = TRUE)]],
+                start = .data$StartLoc,
+                end = .data$StopLoc
             ) |>
                 as.character()
         )
@@ -176,7 +176,7 @@ createIPRScanDomainTable <- function(
             id_domain = stringr::str_glue("{AccNum}-{DB.ID}-{StartLoc}_{StopLoc}")
         ) |>
         dplyr::ungroup() |>
-        dplyr::relocate(id_domain, .before = 1)
+        dplyr::relocate(.data$id_domain, .before = 1)
     return(df_iprscan_domains)
 }
 

diff --git a/R/ipr2viz.R b/R/ipr2viz.R
@@ -53,6 +53,7 @@ theme_genes2 <- function() {
 #' @importFrom shiny showNotification
 #' @importFrom stats na.omit
 #' @importFrom rlang sym
+#' @importFrom rlang .data
 #'
 #' @return
 #' @export
@@ -105,9 +106,10 @@ find_top_acc <- function(infile_full,
 #'
 #' @importFrom dplyr distinct filter select
 #' @importFrom gggenes geom_gene_arrow geom_subgene_arrow
-#' @importFrom ggplot2 aes aes_string as_labeller element_text facet_wrap ggplot guides margin scale_fill_manual theme theme_minimal ylab
+#' @importFrom ggplot2 aes aes_string as_labeller element_text facet_wrap ggplot guides margin scale_fill_manual theme theme_minimal unit ylab
 #' @importFrom readr read_tsv
 #' @importFrom tidyr pivot_wider
+#' @importFrom stats as.formula
 #'
 #' @return
 #' @export
@@ -134,10 +136,10 @@ ipr2viz <- function(infile_ipr = NULL, infile_full = NULL, accessions = c(),
     ADDITIONAL_COLORS <- sample(CPCOLS, 1000, replace = TRUE)
     CPCOLS <- append(x = CPCOLS, values = ADDITIONAL_COLORS)
     ## Read IPR file
-    ipr_out <- read_tsv(infile_ipr, col_names = T, col_types = iprscan_cols)
-    ipr_out <- ipr_out %>% filter(Name %in% accessions)
+    ipr_out <- read_tsv(infile_ipr, col_names = T, col_types = MolEvolvR::iprscan_cols)
+    ipr_out <- ipr_out %>% filter(.data$Name %in% accessions)
     analysis_cols <- paste0("DomArch.", analysis)
-    infile_full <- infile_full %>% select(analysis_cols, Lineage_short, QueryName, PcPositive, AccNum)
+    infile_full <- infile_full %>% select(.data$analysis_cols, .data$Lineage_short, .data$QueryName, .data$PcPositive, .data$AccNum)
     ## To filter by Analysis
     analysis <- paste(analysis, collapse = "|")
     ## @SAM: This can't be set in stone since the analysis may change!
@@ -157,22 +159,22 @@ ipr2viz <- function(infile_ipr = NULL, infile_full = NULL, accessions = c(),
     ## Need to fix this eventually based on the 'real' gene orientation! :)
     ipr_out$Strand <- rep("forward", nrow(ipr_out))
 
-    ipr_out <- ipr_out %>% arrange(AccNum, StartLoc, StopLoc)
+    ipr_out <- ipr_out %>% arrange(.data$AccNum, .data$StartLoc, .data$StopLoc)
     ipr_out_sub <- filter(
         ipr_out,
-        grepl(pattern = analysis, x = Analysis)
+        grepl(pattern = analysis, x = .data$Analysis)
     )
     # dynamic analysis labeller
     analyses <- ipr_out_sub %>%
-        select(Analysis) %>%
+        select(.data$Analysis) %>%
         distinct()
     analysis_labeler <- analyses %>%
-        pivot_wider(names_from = Analysis, values_from = Analysis)
+        pivot_wider(names_from = .data$Analysis, values_from = .data$Analysis)
 
     lookup_tbl_path <- "/data/research/jravilab/common_data/cln_lookup_tbl.tsv"
-    lookup_tbl <- read_tsv(lookup_tbl_path, col_names = T, col_types = lookup_table_cols)
+    lookup_tbl <- read_tsv(lookup_tbl_path, col_names = T, col_types = MolEvolvR::lookup_table_cols)
 
-    lookup_tbl <- lookup_tbl %>% select(-ShortName) # Already has ShortName -- Just needs SignDesc
+    lookup_tbl <- lookup_tbl %>% select(-.data$ShortName) # Already has ShortName -- Just needs SignDesc
     # ipr_out_sub = ipr_out_sub %>% select(-ShortName)
     # TODO: Fix lookup table and uncomment below
     # ipr_out_sub <- merge(ipr_out_sub, lookup_tbl, by.x = "DB.ID", by.y = "DB.ID")
@@ -195,7 +197,7 @@ ipr2viz <- function(infile_ipr = NULL, infile_full = NULL, accessions = c(),
             ), color = "white") +
             geom_gene_arrow(fill = NA, color = "grey") +
             # geom_blank(data = dummies) +
-            facet_wrap(~Analysis,
+            facet_wrap(~.data$Analysis,
                 strip.position = "top", ncol = 5,
                 labeller = as_labeller(analysis_labeler)
             ) +
@@ -216,9 +218,9 @@ ipr2viz <- function(infile_ipr = NULL, infile_full = NULL, accessions = c(),
         plot <- ggplot(
             ipr_out_sub,
             aes(
-                xmin = 1, xmax = SLength,
-                y = Analysis, # y = AccNum
-                label = ShortName
+                xmin = 1, xmax = .data$SLength,
+                y = .data$Analysis, # y = AccNum
+                label = .data$ShortName
             )
         ) +
             geom_subgene_arrow(data = ipr_out_sub, aes_string(
@@ -295,15 +297,15 @@ ipr2viz_web <- function(infile_ipr,
     ## @SAM, colnames, merges, everything neeeds to be done now based on the
     ## combined lookup table from "common_data"
     lookup_tbl_path <- "/data/research/jravilab/common_data/cln_lookup_tbl.tsv"
-    lookup_tbl <- read_tsv(lookup_tbl_path, col_names = T, col_types = lookup_table_cols)
+    lookup_tbl <- read_tsv(lookup_tbl_path, col_names = T, col_types = MolEvolvR::lookup_table_cols)
 
     ## Read IPR file and subset by Accessions
     ipr_out <- read_tsv(infile_ipr, col_names = T)
     ipr_out <- ipr_out %>% filter(Name %in% accessions)
     ## Need to fix eventually based on 'real' gene orientation!
     ipr_out$Strand <- rep("forward", nrow(ipr_out))
 
-    ipr_out <- ipr_out %>% arrange(AccNum, StartLoc, StopLoc)
+    ipr_out <- ipr_out %>% arrange(.data$AccNum, .data$StartLoc, .data$StopLoc)
     ipr_out_sub <- filter(
         ipr_out,
         grepl(pattern = analysis, x = Analysis)

diff --git a/R/plotting.R b/R/plotting.R
@@ -521,8 +521,8 @@ plotLineageNeighbors <- function(query_data = "prot", query = "pspa",
         gather(key = TopNeighbors.DA, value = count, 19:ncol(query_data)) %>%
         select("Lineage", "TopNeighbors.DA", "count") %>% # "DomArch.norep","GenContext.norep",
         group_by(TopNeighbors.DA, Lineage) %>%
-        summarise(lincount = sum(count), bin = as.numeric(as.logical(lincount))) %>%
-        arrange(desc(lincount)) %>%
+        summarise(lincount =sum(count), bin = as.numeric(as.logical(.data$lincount))) %>%
+        arrange(desc(.data$lincount)) %>%
         within(TopNeighbors.DA <- factor(TopNeighbors.DA,
             levels = rev(names(sort(table(TopNeighbors.DA),
                 decreasing = TRUE
@@ -538,9 +538,9 @@ plotLineageNeighbors <- function(query_data = "prot", query = "pspa",
         geom_tile(
             data = subset(
                 query.ggplot,
-                !is.na(lincount)
+                !is.na(.data$lincount)
             ), # bin
-            aes(fill = lincount), # bin
+            aes(fill = .data$lincount), # bin
             colour = "coral3", size = 0.3
         ) + # , width=0.7, height=0.7),
         scale_fill_gradient(low = "white", high = "darkred") +
@@ -1183,10 +1183,11 @@ createWordCloud2Element <- function(query_data = "prot",
 #' then the legend will be in the descending order of the top level hierarchy.
 #' will be rendered. If the type is sund2b, a sund2b plot will be rendered.
 #'
+#' @importFrom d3r d3_nest
 #' @importFrom dplyr arrange desc group_by_at select summarise
 #' @importFrom htmlwidgets onRender
 #' @importFrom rlang sym
-#' @importFrom sunburstR sunburst
+#' @importFrom sunburstR sunburst sund2b
 #' @importFrom tidyr drop_na separate
 #'
 #' @return
@@ -1227,9 +1228,9 @@ plotLineageSunburst <- function(prot, lineage_column = "Lineage",
 
     # Plot sunburst
     if (type == "sunburst") {
-        result <- sunburst(tree, legend = list(w = 225, h = 15, r = 5, s = 5), colors = cpcols, legendOrder = legendOrder, width = "100%", height = "100%")
+        result <- sunburst(tree, legend = list(w = 225, h = 15, r = 5, s = 5), colors = .data$cpcols, legendOrder = legendOrder, width = "100%", height = "100%")
     } else if (type == "sund2b") {
-        result <- sund2b(tree)
+        result <- .data$sund2b(tree)
     }
 
     if (showLegend) {

diff --git a/R/tree.R b/R/tree.R
@@ -37,14 +37,23 @@
 ## !! FastTree will only work if there are unique sequence names!!
 #' convertFA2Tree
 #'
-#' @param fa_path
-#' @param tre_path
-#' @param fasttree_path
+#' @param fa_path Path to the input FASTA alignment file (.fa). Default is the 
+#' path to "data/alns/pspa_snf7.fa".
+#' @param tre_path Path to the output file where the generated tree (.tre) will 
+#' be saved. Default is the path to "data/alns/pspa_snf7.tre".
+#' @param fasttree_path Path to the FastTree executable, which is used to 
+#' generate the phylogenetic tree. Default is "src/FastTree".
 #'
-#' @return
+#' @return No return value. The function generates a tree file (.tre) from the 
+#' input FASTA file.
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' convert_fa2tre(here("data/alns/pspa_snf7.fa"), 
+#'                 here("data/alns/pspa_snf7.tre"), 
+#'                 here("src/FastTree")
+#' }
 convertFA2Tree <- function(fa_path = here("data/alns/pspa_snf7.fa"),
     tre_path = here("data/alns/pspa_snf7.tre"),
     fasttree_path = here("src/FastTree")) {
@@ -72,16 +81,22 @@ convertFA2Tree <- function(fa_path = here("data/alns/pspa_snf7.fa"),
 #' @description
 #' Generate Trees for ALL fasta files in "data/alns"
 #'
-#' @param aln_path
+#' @param aln_path Path to the directory containing all the alignment FASTA 
+#' files (.fa) for which trees will be generated. Default is "data/alns/".
+#' 
 #'
 #' @importFrom here here
 #' @importFrom purrr pmap
 #' @importFrom stringr str_replace_all
 #'
-#' @return
+#' @return No return value. The function generates tree files (.tre) for each 
+#' alignment file in the specified directory.
 #' @export
 #'
 #' @examples
+#' \dontrun{
+#' generate_trees(here("data/alns/"))
+#' }
 convertAlignment2Trees <- function(aln_path = here("data/alns/")) {
     # finding all fasta alignment files
     fa_filenames <- list.files(path = aln_path, pattern = "*.fa")
@@ -111,16 +126,19 @@ convertAlignment2Trees <- function(aln_path = here("data/alns/")) {
 #' @description
 #' Generating phylogenetic tree from alignment file '.fa'
 #'
-#' @param fa_file Character. Path to file.
-#'  Default is 'pspa_snf7.fa'
-#' @param out_file
+#' @param fa_file Character. Path to the alignment FASTA file (.fa) from which
+#'  the phylogenetic tree will be generated. Default is 'pspa_snf7.fa'.
+#' @param out_file Path to the output file where the generated tree (.tre) will 
+#' be saved. Default is "data/alns/pspa_snf7.tre".
 #'
 #' @importFrom ape write.tree
 #' @importFrom phangorn bootstrap.pml dist.ml NJ modelTest phyDat plotBS pml pml.control pratchet optim.parsimony optim.pml read.phyDat upgma
 #' @importFrom seqinr dist.alignment read.alignment
 #' @importFrom stats logLik
 #'
-#' @return
+#' @return No return value. The function generates a phylogenetic tree file 
+#' (.tre) based on different approaches like Neighbor Joining, UPGMA, and 
+#' Maximum Likelihood.
 #' @export
 #'
 #' @details The alignment file would need two columns: 1. accession +