-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy path04-m6a-sites.R
80 lines (59 loc) · 2.03 KB
/
04-m6a-sites.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
library(tidyverse)
library(ggthemes)
library(ggforce)
theme_set(theme_tufte(base_family = "Helvetica"))
d <- read_csv("../191010-PredictStabilityInMZT/results-data/mzt_predictionsResidualsLog2Fc.csv")
output_plot <- snakemake@output$plot
output_stats <- snakemake@output$stats
m6a_motif <- "GGACT" # Extended Data Figure 3,
d <-
d %>%
select(coding, gene_id, `3utr`,resid, log2FC, specie)
# add m6a motif counts ----------------------------------------------------
d <-
d %>%
mutate(
m6a_coding = str_count(coding, m6a_motif),
m6a_3utr = str_count(`3utr`, m6a_motif),
) %>%
select(-coding, -`3utr`)
d <- d %>%
pivot_longer(c(m6a_coding, m6a_3utr), names_to = "position", values_to = "n")
# compute the p-values ----------------------------------------------------
d %>%
group_by(specie, position) %>%
nest() %>%
mutate(
fit = map(data, ~lm(resid ~ n, data = .)),
tfit = map(fit, broom::tidy)
) %>%
select(-data, -fit) %>%
unnest(tfit) %>%
filter(term == "n") %>%
select(specie, position, p.value) %>%
write_csv(output_stats)
d <- d %>%
filter(!is.na(n)) %>%
mutate(
n_sites = map_chr(n, ~if_else(. > 1, ">1", as.character(.))),
n_sites = factor(n_sites, levels = c("0", "1", ">1")),
position = factor(position, levels = c("m6a_coding", "m6a_3utr"), labels = c("coding", "3' UTR"))
)
# compute the median to draw line
resid_median <- d %>%
group_by(specie, position, n_sites) %>%
summarise(mediana_r = median(resid), n=n())
d %>%
ggplot(aes(x = n_sites, y = resid, color = n_sites)) +
geom_sina(size = .01/2, shape=16, alpha=.9) +
geom_errorbar(data = resid_median,
aes(y=mediana_r, x=n_sites, ymin=mediana_r, ymax=mediana_r),
color="black",
size=1/5) +
scale_color_manual(values = c("grey", "#E69F00", "#E69F00")) +
facet_grid(specie~position, scales = "free_y") +
theme(
axis.line = element_line(colour = "black", size = .1),
legend.position = "none"
)
ggsave(output_plot, height = 3, width = 2.5)