-
Notifications
You must be signed in to change notification settings - Fork 0
/
cysteine_posterFigures.Rmd
87 lines (71 loc) · 3.29 KB
/
cysteine_posterFigures.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
---
title: "cysteine_posterFigures"
output: html_document
---
# df = scoreFiltereByMaxScore
# number of strains with gene -- barplot
```{r}
# library(dplyr)
# library(ggplot2)
# library(reshape2)
#
# df <- melt(scoreFilteredByMaxScore_cysteine_results_updatedStrains, id.vars = c("strain"), variable.name = "gene")
# group_by to reorder bars by count
subset(df, !is.na(df$value)) %>%
group_by(gene) %>%
summarise(perc = round((n() / 214)*100), digits = 3) %>%
ggplot(aes(x = reorder(gene, -perc), y = perc)) +
geom_bar(stat = "identity", fill = "darkorange", width = 0.5) +
geom_text(stat='identity', aes(label=paste(perc, "%")), vjust = -0.5, size = 4) +
ylab("Percentage of Strains") +
xlab("Gene") +
ggtitle("Percentage of Strains with Hits per Gene") +
theme_bw() +
theme(axis.text.x = element_text(face = "italic", size = 12), axis.text.y = element_text(size = 12), axis.title = element_text(size = 14)) +
scale_y_continuous(expand = c(0, 0), limits = c(0,100))
```
# proportion of each taxon with hits?
```{r}
# taxa_hits_df <- subset(subset(df, `Category by Percentage of E. coli Hit Score` == ">75%"))
# taxa_hits_df <- subset(df, select = -c(gene, hit_score, perc_of_ecoli_score, category, `Category by Percentage of E. coli Hit Score`)) %>% distinct(strain, .keep_all = TRUE)
ggplot(subset(df, `Category by Percentage of E. coli Hit Score` == ">75%"), aes(y = phylum)) +
facet_wrap(vars(gene)) +
geom_bar()
```
# heatmap (not rly bc it's binary) for strain vs. gene
```{r}
library(forcats)
#
# df$phylum_colors <- ifelse(df$phylum == "Firmicutes", "orange",
# ifelse(df$phylum == "Actinobacteria", "blue",
# ifelse(df$phylum == "Bacteroidetes", "purple",
# ifelse(df$phylum == "Verrucomicrobia", "red",
# ifelse(df$phylum == "Proteobacteria", "green", NA)))))
phylum_levels <- c("Firmicutes", "Bacteroidetes", "Actinobacteria", "Proteobacteria", "Verrumicrobia")
df %>%
mutate(across(c("phylum", "strain"), factor, levels = phylum_levels))
ggplot(subset(df, `Category by Percentage of E. coli Hit Score` == ">75%"), aes(y = reorder(strain, phylum), x = gene, fill = hit_score)) +
geom_tile() +
theme_classic() +
theme(axis.text.y = element_text(colour = phylum_colors, size = 3))
# ggplot(subset(df, `Category by Percentage of E. coli Hit Score` == ">75%" & phylum == "Firmicutes"), aes(y = strain, x = gene, fill = hit_score)) +
# geom_tile() +
# theme_classic() +
# ggtitle("Firmicutes")
# # theme(axis.text.y = element_text(colour = phylum_colors, size = 3))
#
# ggplot(subset(df, `Category by Percentage of E. coli Hit Score` == ">75%" & phylum == "Bacteroidetes"), aes(y = strain, x = gene, fill = hit_score)) +
# geom_tile() +
# theme_classic() +
# ggtitle("Bacteroidetes")
#
# ggplot(subset(df, `Category by Percentage of E. coli Hit Score` == ">75%" & phylum == "Actinobacteria"), aes(y = strain, x = gene, fill = hit_score)) +
# geom_tile() +
# theme_classic() +
# ggtitle("Actinobacteria")
#
# ggplot(subset(df, `Category by Percentage of E. coli Hit Score` == ">75%" & phylum == "Proteobacteria"), aes(y = strain, x = gene, fill = hit_score)) +
# geom_tile() +
# theme_classic() +
# ggtitle("Proteobacteria")
```