Skip to content

Commit

Permalink
Merge branch 'main' into issue-38-rename-functions
Browse files Browse the repository at this point in the history
  • Loading branch information
the-mayer authored Oct 7, 2024
2 parents 843ecda + 2c35324 commit d1f0a1e
Show file tree
Hide file tree
Showing 27 changed files with 400 additions and 230 deletions.
9 changes: 8 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,11 @@ export(GCA2lin)
export(GenContextNetwork)
export(LineagePlot)
export(RepresentativeAccNums)
export(acc2FA)
export(acc2fa)
export(acc2lin)
export(addLeaves2Alignment)
export(addName)
export(add_leaves)
export(add_lins)
export(add_name)
Expand All @@ -26,6 +29,8 @@ export(combine_files)
export(combine_full)
export(combine_ipr)
export(condenseRepeatedDomains)
export(convert2TitleCase)
export(convertAlignment2FA)
export(convert_aln2fa)
export(convert_fa2tre)
export(count_bycol)
Expand All @@ -43,6 +48,7 @@ export(find_paralogs)
export(find_top_acc)
export(format_job_args)
export(gc_undirected_network)
export(generateAllAlignments2FA)
export(generate_all_aln2fa)
export(generate_fa2tre)
export(generate_msa)
Expand All @@ -61,15 +67,16 @@ export(lineage.neighbors.plot)
export(lineage_sunburst)
export(make_job_results_url)
export(make_opts2procs)
export(mapAcc2Name)
export(map_acc2name)
export(map_advanced_opts2procs)
export(msa_pdf)
export(plot_estimated_walltimes)
export(prot2tax)
export(prot2tax_old)
export(removeAsterisks)
export(removeEmptyRows)
export(removeTails)
export(renameFA)
export(rename_fasta)
export(replaceQuestionMarks)
export(reveql)
Expand Down
60 changes: 30 additions & 30 deletions R/CHANGED-pre-msa-tree.R
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
## Pre-requisites to generate MSA and Phylogenetic Tree
## Includes the following functions:
## convert_aln2fa, to_titlecase, add_leaves
## generate_all_aln2fa
## convert_aln2tsv??, convert_accnum2fa??
## Created from add_leaves.R, convert_aln2fa.R, all_aln2fa.R
## convertAlignment2FA, convert2TitleCase, addLeaves2Alignment
## generateAllAlignments2FA
## convertAlignment2TSV??, convertAccNum2FA??
## Created from addLeaves2Alignment.R, convertAlignment2FA.R, all_aln2fa.R
## Modified: Dec 24, 2019 | Jan 2021
## Janani Ravi (@jananiravi) & Samuel Chen (@samuelzornchen)

Expand Down Expand Up @@ -35,7 +35,7 @@ api_key <- Sys.getenv("ENTREZ_API_KEY", unset = "YOUR_KEY_HERE")
#' @author Andrie, Janani Ravi
#' @description Translate string to Title Case w/ delimitter.
#' @aliases totitle, to_title
#' @usage to_titlecase(text, delimitter)
#' @usage convert2TitleCase(text, delimitter)
#' @param x Character vector.
#' @param y Delimitter. Default is space (" ").
#' @seealso chartr, toupper, and tolower.
Expand All @@ -44,7 +44,7 @@ api_key <- Sys.getenv("ENTREZ_API_KEY", unset = "YOUR_KEY_HERE")
#' @export
#'
#' @examples
to_titlecase <- function(x, y = " ") {
convert2TitleCase <- function(x, y = " ") {
s <- strsplit(x, y)[[1]]
paste(toupper(substring(s, 1, 1)), substring(s, 2),
sep = "", collapse = y
Expand Down Expand Up @@ -89,9 +89,9 @@ to_titlecase <- function(x, y = " ") {
#'
#' @examples
#' \dontrun{
#' add_leaves("pspa_snf7.aln", "pspa.txt")
#' addLeaves2Alignment("pspa_snf7.aln", "pspa.txt")
#' }
add_leaves <- function(aln_file = "",
addLeaves2Alignment <- function(aln_file = "",
lin_file = "data/rawdata_tsv/all_semiclean.txt", # !! finally change to all_clean.txt!!
# lin_file="data/rawdata_tsv/PspA.txt",
reduced = FALSE) {
Expand Down Expand Up @@ -164,7 +164,7 @@ add_leaves <- function(aln_file = "",
# AccNum,
sep = "_"
))
temp$Leaf <- map(temp$Leaf, to_titlecase)
temp$Leaf <- map(temp$Leaf, convert2TitleCase)
temp <- temp %>%
mutate(Leaf_Acc = (paste(Leaf, AccNum, sep = "_")))

Expand Down Expand Up @@ -203,7 +203,7 @@ add_leaves <- function(aln_file = "",
#' @export
#'
#' @examples
add_name <- function(data,
addName <- function(data,
accnum_col = "AccNum", spec_col = "Species", lin_col = "Lineage",
lin_sep = ">", out_col = "Name") {
cols <- c(accnum_col, "Kingdom", "Phylum", "Genus", "Spp")
Expand Down Expand Up @@ -283,10 +283,10 @@ add_name <- function(data,
#'
#' @examples
#' \dontrun{
#' add_leaves("pspa_snf7.aln", "pspa.txt")
#' addLeaves2Alignment("pspa_snf7.aln", "pspa.txt")
#' }
#'
convert_aln2fa <- function(aln_file = "",
convertAlignment2FA <- function(aln_file = "",
lin_file = "data/rawdata_tsv/all_semiclean.txt", # !! finally change to all_clean.txt!!
fa_outpath = "",
reduced = FALSE) {
Expand All @@ -297,7 +297,7 @@ convert_aln2fa <- function(aln_file = "",
# fa_outpath="data/alns/pspc.fasta"

## Add leaves
aln <- add_leaves(
aln <- addLeaves2Alignment(
aln = aln_file,
lin = lin_file,
reduced = reduced
Expand All @@ -320,7 +320,7 @@ convert_aln2fa <- function(aln_file = "",
return(fasta)
}

#' Default rename_fasta() replacement function. Maps an accession number to its name
#' Default renameFA() replacement function. Maps an accession number to its name
#'
#' @param line The line of a fasta file starting with '>'
#' @param acc2name Data Table containing a column of accession numbers and a name column
Expand All @@ -335,8 +335,8 @@ convert_aln2fa <- function(aln_file = "",
#' @export
#'
#' @examples
map_acc2name <- function(line, acc2name, acc_col = "AccNum", name_col = "Name") {
# change to be the name equivalent to an add_names column
mapAcc2Name <- function(line, acc2name, acc_col = "AccNum", name_col = "Name") {
# change to be the name equivalent to an addNames column
# Find the first ' '
end_acc <- str_locate(line, " ")[[1]]

Expand Down Expand Up @@ -364,8 +364,8 @@ map_acc2name <- function(line, acc2name, acc_col = "AccNum", name_col = "Name")
#' @export
#'
#' @examples
rename_fasta <- function(fa_path, outpath,
replacement_function = map_acc2name, ...) {
renameFA <- function(fa_path, outpath,
replacement_function = mapAcc2Name, ...) {
lines <- read_lines(fa_path)
res <- map(lines, function(x) {
if (strtrim(x, 1) == ">") {
Expand All @@ -381,7 +381,7 @@ rename_fasta <- function(fa_path, outpath,
}

################################
## generate_all_aln2fa
## generateAllAlignments2FA
#' Adding Leaves to an alignment file w/ accessions
#'
#' @keywords alignment, accnum, leaves, lineage, species
Expand All @@ -408,9 +408,9 @@ rename_fasta <- function(fa_path, outpath,
#'
#' @examples
#' \dontrun{
#' generate_all_aln2fa()
#' generateAllAlignments2FA()
#' }
generate_all_aln2fa <- function(aln_path = here("data/rawdata_aln/"),
generateAllAlignments2FA <- function(aln_path = here("data/rawdata_aln/"),
fa_outpath = here("data/alns/"),
lin_file = here("data/rawdata_tsv/all_semiclean.txt"),
reduced = F) {
Expand All @@ -432,7 +432,7 @@ generate_all_aln2fa <- function(aln_path = here("data/rawdata_aln/"),
fa_outpath = paste0(fa_outpath, "/", variable, ".fa")
)
pmap(
.l = aln2fa_args, .f = convert_aln2fa,
.l = aln2fa_args, .f = convertAlignment2FA,
lin_file = lin_file,
reduced = reduced
)
Expand All @@ -441,7 +441,7 @@ generate_all_aln2fa <- function(aln_path = here("data/rawdata_aln/"),

# accessions <- c("P12345","Q9UHC1","O15530","Q14624","P0DTD1")
# accessions <- rep("ANY95992.1", 201)
#' acc2fa converts protein accession numbers to a fasta format.
#' acc2FA converts protein accession numbers to a fasta format.
#'
#' @description
#' Resulting fasta file is written to the outpath.
Expand All @@ -464,11 +464,11 @@ generate_all_aln2fa <- function(aln_path = here("data/rawdata_aln/"),
#'
#' @examples
#' \dontrun{
#' acc2fa(accessions = c("ACU53894.1", "APJ14606.1", "ABK37082.1"), outpath = "my_proteins.fasta")
#' Entrez:accessions <- rep("ANY95992.1", 201) |> acc2fa(outpath = "entrez.fa")
#' EBI:accessions <- c("P12345", "Q9UHC1", "O15530", "Q14624", "P0DTD1") |> acc2fa(outpath = "ebi.fa")
#' acc2FA(accessions = c("ACU53894.1", "APJ14606.1", "ABK37082.1"), outpath = "my_proteins.fasta")
#' Entrez:accessions <- rep("ANY95992.1", 201) |> acc2FA(outpath = "entrez.fa")
#' EBI:accessions <- c("P12345", "Q9UHC1", "O15530", "Q14624", "P0DTD1") |> acc2FA(outpath = "ebi.fa")
#' }
acc2fa <- function(accessions, outpath, plan = "sequential") {
acc2FA <- function(accessions, outpath, plan = "sequential") {
# validation
stopifnot(length(accessions) > 0)

Expand Down Expand Up @@ -663,7 +663,7 @@ get_accnums_from_fasta_file <- function(fasta_file) {


################################
## convert_accnum2fa
## convertAccNum2FA
#######
## 1 ##
#######
Expand Down Expand Up @@ -706,9 +706,9 @@ get_accnums_from_fasta_file <- function(fasta_file) {
# seqs <- retrieveseqs(seqnames,"swissprot")

################################
## convert_aln2tsv
## convertAlignment2TSV
## NEEDS FIXING!
# convert_aln2tsv <- function(file_path){
# convertAlignment2TSV <- function(file_path){
# cfile <- read_delim("data/alignments/pspc.gismo.aln", delim=" ")
# cfile <- as.data.frame(map(cfile,function(x) gsub("\\s+", "",x)))
# colnames(cfile) <- c("AccNum", "Alignment")
Expand Down
Loading

0 comments on commit d1f0a1e

Please sign in to comment.