Skip to content

Commit

Permalink
Merge branch 'JRaviLab:main' into testthat
Browse files Browse the repository at this point in the history
  • Loading branch information
awasyn authored Oct 10, 2024
2 parents 3838564 + b96198e commit c91d38b
Show file tree
Hide file tree
Showing 14 changed files with 155 additions and 65 deletions.
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ importFrom(assertthat,assert_that)
importFrom(assertthat,has_name)
importFrom(base64enc,base64encode)
importFrom(biomartr,getKingdomAssemblySummary)
importFrom(d3r,d3_nest)
importFrom(data.table,as.data.table)
importFrom(data.table,fread)
importFrom(data.table,fwrite)
Expand Down Expand Up @@ -181,6 +182,7 @@ importFrom(ggplot2,theme)
importFrom(ggplot2,theme_classic)
importFrom(ggplot2,theme_grey)
importFrom(ggplot2,theme_minimal)
importFrom(ggplot2,unit)
importFrom(ggplot2,xlab)
importFrom(ggplot2,ylab)
importFrom(grDevices,adjustcolor)
Expand Down Expand Up @@ -237,13 +239,15 @@ importFrom(readr,write_file)
importFrom(readr,write_lines)
importFrom(readr,write_tsv)
importFrom(rentrez,entrez_fetch)
importFrom(rlang,.data)
importFrom(rlang,as_string)
importFrom(rlang,sym)
importFrom(sendmailR,mime_part)
importFrom(sendmailR,sendmail)
importFrom(seqinr,dist.alignment)
importFrom(seqinr,read.alignment)
importFrom(shiny,showNotification)
importFrom(stats,as.formula)
importFrom(stats,complete.cases)
importFrom(stats,logLik)
importFrom(stats,na.omit)
Expand All @@ -264,6 +268,7 @@ importFrom(stringr,str_sub)
importFrom(stringr,str_trim)
importFrom(stringr,word)
importFrom(sunburstR,sunburst)
importFrom(sunburstR,sund2b)
importFrom(tibble,as_tibble)
importFrom(tibble,tibble)
importFrom(tidyr,drop_na)
Expand Down
36 changes: 24 additions & 12 deletions R/acc2lin.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,25 @@ sinkReset <- function() {

#' addLineage
#'
#' @param df
#' @param acc_col
#' @param assembly_path
#' @param lineagelookup_path
#' @param ipgout_path
#' @param plan
#' @param df A `data.frame` containing the input data. One column must contain
#' the accession numbers.
#' @param acc_col A string specifying the column name in `df` that holds the
#' accession numbers. Defaults to `"AccNum"`.
#' @param assembly_path A string specifying the path to the `assembly_summary.txt`
#' file. This file contains metadata about assemblies.
#' @param lineagelookup_path A string specifying the path to the lineage lookup
#' file, which contains a mapping from tax IDs to their corresponding lineages.
#' @param ipgout_path (Optional) A string specifying the path where IPG database
#' fetch results will be saved. If `NULL`, the results are not written to a file.
#' @param plan A string specifying the parallelization strategy for the future
#' package, such as `"sequential"` or `"multisession"`.
#'
#' @importFrom dplyr pull
#' @importFrom magrittr %>%
#' @importFrom rlang sym
#'
#' @return Describe return, in detail
#' @return A `data.frame` that combines the original `df` with the lineage
#' information.
#' @export
#'
#' @examples
Expand Down Expand Up @@ -78,9 +85,12 @@ addLineage <- function(df, acc_col = "AccNum", assembly_path,
#' (taxid to lineage mapping). This file can be generated using the
#' @param ipgout_path Path to write the results of the efetch run of the accessions
#' on the ipg database. If NULL, the file will not be written. Defaults to NULL
#' @param plan
#' @param plan A string specifying the parallelization strategy for the future
#' package, such as `"sequential"` or `"multisession"`.
#'
#' @return Describe return, in detail
#' @return A `data.table` that contains the lineage information, mapping protein
#' accessions to their tax IDs and lineages.
#' @export
#' @export
#'
#' @examples
Expand Down Expand Up @@ -112,13 +122,14 @@ acc2Lineage <- function(accessions, assembly_path, lineagelookup_path, ipgout_pa
#' @param accnums Character vector containing the accession numbers to query on
#' the ipg database
#' @param out_path Path to write the efetch results to
#' @param plan
#' @param plan A string specifying the parallelization strategy for the future
#' package, such as `"sequential"` or `"multisession"`.
#'
#' @importFrom furrr future_map
#' @importFrom future plan
#' @importFrom rentrez entrez_fetch
#'
#' @return Describe return, in detail
#' @return No return value. The function writes the fetched results to `out_path`.
#' @export
#'
#' @examples
Expand Down Expand Up @@ -186,7 +197,8 @@ efetchIPG <- function(accnums, out_path, plan = "sequential", ...) {
#'
#' @importFrom data.table fread
#'
#' @return Describe return, in detail
#' @return A `data.table` with the lineage information for the provided protein
#' accessions.
#' @export
#'
#' @examples
Expand Down
8 changes: 4 additions & 4 deletions R/cleanup.R
Original file line number Diff line number Diff line change
Expand Up @@ -88,12 +88,12 @@ ensureUniqAccNum <- function(accnums) {
# for the index of occurence for each accession number
df_accnums <- tibble::tibble("accnum" = accnums)
df_accnums <- df_accnums |>
dplyr::group_by(accnum) |>
dplyr::group_by(.data$accnum) |>
dplyr::mutate(suffix = dplyr::row_number()) |>
dplyr::ungroup() |>
dplyr::mutate(accnum_adjusted = paste0(accnum, "_", suffix)) |>
dplyr::arrange(accnum_adjusted)
accnums_adjusted <- df_accnums |> dplyr::pull(accnum_adjusted)
dplyr::mutate(accnum_adjusted = paste0(.data$accnum, "_", .data$suffix)) |>
dplyr::arrange(.data$accnum_adjusted)
accnums_adjusted <- df_accnums |> dplyr::pull(.data$accnum_adjusted)

return(accnums_adjusted)
}
Expand Down
14 changes: 7 additions & 7 deletions R/fa2domain.R
Original file line number Diff line number Diff line change
Expand Up @@ -148,10 +148,10 @@ createIPRScanDomainTable <- function(
# filter for the accnum of interest (note: it's possible the accession
# number is not in the table [i.e., it had no domains])
df_iprscan_accnum <- df_iprscan |>
dplyr::filter(Analysis %in% analysis) |>
dplyr::filter(AccNum == accnum) |>
dplyr::filter(.data$Analysis %in% analysis) |>
dplyr::filter(.data$AccNum == accnum) |>
dplyr::select(dplyr::all_of(c("AccNum", "DB.ID", "StartLoc", "StopLoc"))) |>
dplyr::arrange(StartLoc)
dplyr::arrange(.data$StartLoc)
# handle the case of no records after filtering by "Analysis"; return the tibble
# with 0 rows quickly
if (nrow(df_iprscan_accnum) < 1) {
Expand All @@ -163,9 +163,9 @@ createIPRScanDomainTable <- function(
dplyr::rowwise() |>
dplyr::mutate(
seq_domain = XVector::subseq(
fasta[[grep(pattern = AccNum, x = names(fasta), fixed = TRUE)]],
start = StartLoc,
end = StopLoc
fasta[[grep(pattern = .data$AccNum, x = names(fasta), fixed = TRUE)]],
start = .data$StartLoc,
end = .data$StopLoc
) |>
as.character()
)
Expand All @@ -176,7 +176,7 @@ createIPRScanDomainTable <- function(
id_domain = stringr::str_glue("{AccNum}-{DB.ID}-{StartLoc}_{StopLoc}")
) |>
dplyr::ungroup() |>
dplyr::relocate(id_domain, .before = 1)
dplyr::relocate(.data$id_domain, .before = 1)
return(df_iprscan_domains)
}

Expand Down
34 changes: 18 additions & 16 deletions R/ipr2viz.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ theme_genes2 <- function() {
#' @importFrom shiny showNotification
#' @importFrom stats na.omit
#' @importFrom rlang sym
#' @importFrom rlang .data
#'
#' @return
#' @export
Expand Down Expand Up @@ -105,9 +106,10 @@ find_top_acc <- function(infile_full,
#'
#' @importFrom dplyr distinct filter select
#' @importFrom gggenes geom_gene_arrow geom_subgene_arrow
#' @importFrom ggplot2 aes aes_string as_labeller element_text facet_wrap ggplot guides margin scale_fill_manual theme theme_minimal ylab
#' @importFrom ggplot2 aes aes_string as_labeller element_text facet_wrap ggplot guides margin scale_fill_manual theme theme_minimal unit ylab
#' @importFrom readr read_tsv
#' @importFrom tidyr pivot_wider
#' @importFrom stats as.formula
#'
#' @return
#' @export
Expand All @@ -134,10 +136,10 @@ ipr2viz <- function(infile_ipr = NULL, infile_full = NULL, accessions = c(),
ADDITIONAL_COLORS <- sample(CPCOLS, 1000, replace = TRUE)
CPCOLS <- append(x = CPCOLS, values = ADDITIONAL_COLORS)
## Read IPR file
ipr_out <- read_tsv(infile_ipr, col_names = T, col_types = iprscan_cols)
ipr_out <- ipr_out %>% filter(Name %in% accessions)
ipr_out <- read_tsv(infile_ipr, col_names = T, col_types = MolEvolvR::iprscan_cols)
ipr_out <- ipr_out %>% filter(.data$Name %in% accessions)
analysis_cols <- paste0("DomArch.", analysis)
infile_full <- infile_full %>% select(analysis_cols, Lineage_short, QueryName, PcPositive, AccNum)
infile_full <- infile_full %>% select(.data$analysis_cols, .data$Lineage_short, .data$QueryName, .data$PcPositive, .data$AccNum)
## To filter by Analysis
analysis <- paste(analysis, collapse = "|")
## @SAM: This can't be set in stone since the analysis may change!
Expand All @@ -157,22 +159,22 @@ ipr2viz <- function(infile_ipr = NULL, infile_full = NULL, accessions = c(),
## Need to fix this eventually based on the 'real' gene orientation! :)
ipr_out$Strand <- rep("forward", nrow(ipr_out))

ipr_out <- ipr_out %>% arrange(AccNum, StartLoc, StopLoc)
ipr_out <- ipr_out %>% arrange(.data$AccNum, .data$StartLoc, .data$StopLoc)
ipr_out_sub <- filter(
ipr_out,
grepl(pattern = analysis, x = Analysis)
grepl(pattern = analysis, x = .data$Analysis)
)
# dynamic analysis labeller
analyses <- ipr_out_sub %>%
select(Analysis) %>%
select(.data$Analysis) %>%
distinct()
analysis_labeler <- analyses %>%
pivot_wider(names_from = Analysis, values_from = Analysis)
pivot_wider(names_from = .data$Analysis, values_from = .data$Analysis)

lookup_tbl_path <- "/data/research/jravilab/common_data/cln_lookup_tbl.tsv"
lookup_tbl <- read_tsv(lookup_tbl_path, col_names = T, col_types = lookup_table_cols)
lookup_tbl <- read_tsv(lookup_tbl_path, col_names = T, col_types = MolEvolvR::lookup_table_cols)

lookup_tbl <- lookup_tbl %>% select(-ShortName) # Already has ShortName -- Just needs SignDesc
lookup_tbl <- lookup_tbl %>% select(-.data$ShortName) # Already has ShortName -- Just needs SignDesc
# ipr_out_sub = ipr_out_sub %>% select(-ShortName)
# TODO: Fix lookup table and uncomment below
# ipr_out_sub <- merge(ipr_out_sub, lookup_tbl, by.x = "DB.ID", by.y = "DB.ID")
Expand All @@ -195,7 +197,7 @@ ipr2viz <- function(infile_ipr = NULL, infile_full = NULL, accessions = c(),
), color = "white") +
geom_gene_arrow(fill = NA, color = "grey") +
# geom_blank(data = dummies) +
facet_wrap(~Analysis,
facet_wrap(~.data$Analysis,
strip.position = "top", ncol = 5,
labeller = as_labeller(analysis_labeler)
) +
Expand All @@ -216,9 +218,9 @@ ipr2viz <- function(infile_ipr = NULL, infile_full = NULL, accessions = c(),
plot <- ggplot(
ipr_out_sub,
aes(
xmin = 1, xmax = SLength,
y = Analysis, # y = AccNum
label = ShortName
xmin = 1, xmax = .data$SLength,
y = .data$Analysis, # y = AccNum
label = .data$ShortName
)
) +
geom_subgene_arrow(data = ipr_out_sub, aes_string(
Expand Down Expand Up @@ -295,15 +297,15 @@ ipr2viz_web <- function(infile_ipr,
## @SAM, colnames, merges, everything neeeds to be done now based on the
## combined lookup table from "common_data"
lookup_tbl_path <- "/data/research/jravilab/common_data/cln_lookup_tbl.tsv"
lookup_tbl <- read_tsv(lookup_tbl_path, col_names = T, col_types = lookup_table_cols)
lookup_tbl <- read_tsv(lookup_tbl_path, col_names = T, col_types = MolEvolvR::lookup_table_cols)

## Read IPR file and subset by Accessions
ipr_out <- read_tsv(infile_ipr, col_names = T)
ipr_out <- ipr_out %>% filter(Name %in% accessions)
## Need to fix eventually based on 'real' gene orientation!
ipr_out$Strand <- rep("forward", nrow(ipr_out))

ipr_out <- ipr_out %>% arrange(AccNum, StartLoc, StopLoc)
ipr_out <- ipr_out %>% arrange(.data$AccNum, .data$StartLoc, .data$StopLoc)
ipr_out_sub <- filter(
ipr_out,
grepl(pattern = analysis, x = Analysis)
Expand Down
15 changes: 8 additions & 7 deletions R/plotting.R
Original file line number Diff line number Diff line change
Expand Up @@ -521,8 +521,8 @@ plotLineageNeighbors <- function(query_data = "prot", query = "pspa",
gather(key = TopNeighbors.DA, value = count, 19:ncol(query_data)) %>%
select("Lineage", "TopNeighbors.DA", "count") %>% # "DomArch.norep","GenContext.norep",
group_by(TopNeighbors.DA, Lineage) %>%
summarise(lincount = sum(count), bin = as.numeric(as.logical(lincount))) %>%
arrange(desc(lincount)) %>%
summarise(lincount =sum(count), bin = as.numeric(as.logical(.data$lincount))) %>%
arrange(desc(.data$lincount)) %>%
within(TopNeighbors.DA <- factor(TopNeighbors.DA,
levels = rev(names(sort(table(TopNeighbors.DA),
decreasing = TRUE
Expand All @@ -538,9 +538,9 @@ plotLineageNeighbors <- function(query_data = "prot", query = "pspa",
geom_tile(
data = subset(
query.ggplot,
!is.na(lincount)
!is.na(.data$lincount)
), # bin
aes(fill = lincount), # bin
aes(fill = .data$lincount), # bin
colour = "coral3", size = 0.3
) + # , width=0.7, height=0.7),
scale_fill_gradient(low = "white", high = "darkred") +
Expand Down Expand Up @@ -1183,10 +1183,11 @@ createWordCloud2Element <- function(query_data = "prot",
#' then the legend will be in the descending order of the top level hierarchy.
#' will be rendered. If the type is sund2b, a sund2b plot will be rendered.
#'
#' @importFrom d3r d3_nest
#' @importFrom dplyr arrange desc group_by_at select summarise
#' @importFrom htmlwidgets onRender
#' @importFrom rlang sym
#' @importFrom sunburstR sunburst
#' @importFrom sunburstR sunburst sund2b
#' @importFrom tidyr drop_na separate
#'
#' @return
Expand Down Expand Up @@ -1227,9 +1228,9 @@ plotLineageSunburst <- function(prot, lineage_column = "Lineage",

# Plot sunburst
if (type == "sunburst") {
result <- sunburst(tree, legend = list(w = 225, h = 15, r = 5, s = 5), colors = cpcols, legendOrder = legendOrder, width = "100%", height = "100%")
result <- sunburst(tree, legend = list(w = 225, h = 15, r = 5, s = 5), colors = .data$cpcols, legendOrder = legendOrder, width = "100%", height = "100%")
} else if (type == "sund2b") {
result <- sund2b(tree)
result <- .data$sund2b(tree)
}

if (showLegend) {
Expand Down
38 changes: 28 additions & 10 deletions R/tree.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,23 @@
## !! FastTree will only work if there are unique sequence names!!
#' convertFA2Tree
#'
#' @param fa_path
#' @param tre_path
#' @param fasttree_path
#' @param fa_path Path to the input FASTA alignment file (.fa). Default is the
#' path to "data/alns/pspa_snf7.fa".
#' @param tre_path Path to the output file where the generated tree (.tre) will
#' be saved. Default is the path to "data/alns/pspa_snf7.tre".
#' @param fasttree_path Path to the FastTree executable, which is used to
#' generate the phylogenetic tree. Default is "src/FastTree".
#'
#' @return
#' @return No return value. The function generates a tree file (.tre) from the
#' input FASTA file.
#' @export
#'
#' @examples
#' \dontrun{
#' convert_fa2tre(here("data/alns/pspa_snf7.fa"),
#' here("data/alns/pspa_snf7.tre"),
#' here("src/FastTree")
#' }
convertFA2Tree <- function(fa_path = here("data/alns/pspa_snf7.fa"),
tre_path = here("data/alns/pspa_snf7.tre"),
fasttree_path = here("src/FastTree")) {
Expand Down Expand Up @@ -72,16 +81,22 @@ convertFA2Tree <- function(fa_path = here("data/alns/pspa_snf7.fa"),
#' @description
#' Generate Trees for ALL fasta files in "data/alns"
#'
#' @param aln_path
#' @param aln_path Path to the directory containing all the alignment FASTA
#' files (.fa) for which trees will be generated. Default is "data/alns/".
#'
#'
#' @importFrom here here
#' @importFrom purrr pmap
#' @importFrom stringr str_replace_all
#'
#' @return
#' @return No return value. The function generates tree files (.tre) for each
#' alignment file in the specified directory.
#' @export
#'
#' @examples
#' \dontrun{
#' generate_trees(here("data/alns/"))
#' }
convertAlignment2Trees <- function(aln_path = here("data/alns/")) {
# finding all fasta alignment files
fa_filenames <- list.files(path = aln_path, pattern = "*.fa")
Expand Down Expand Up @@ -111,16 +126,19 @@ convertAlignment2Trees <- function(aln_path = here("data/alns/")) {
#' @description
#' Generating phylogenetic tree from alignment file '.fa'
#'
#' @param fa_file Character. Path to file.
#' Default is 'pspa_snf7.fa'
#' @param out_file
#' @param fa_file Character. Path to the alignment FASTA file (.fa) from which
#' the phylogenetic tree will be generated. Default is 'pspa_snf7.fa'.
#' @param out_file Path to the output file where the generated tree (.tre) will
#' be saved. Default is "data/alns/pspa_snf7.tre".
#'
#' @importFrom ape write.tree
#' @importFrom phangorn bootstrap.pml dist.ml NJ modelTest phyDat plotBS pml pml.control pratchet optim.parsimony optim.pml read.phyDat upgma
#' @importFrom seqinr dist.alignment read.alignment
#' @importFrom stats logLik
#'
#' @return
#' @return No return value. The function generates a phylogenetic tree file
#' (.tre) based on different approaches like Neighbor Joining, UPGMA, and
#' Maximum Likelihood.
#' @export
#'
#' @details The alignment file would need two columns: 1. accession +
Expand Down
Loading

0 comments on commit c91d38b

Please sign in to comment.