-
Notifications
You must be signed in to change notification settings - Fork 0
/
2_plot_iCT_stats.R
95 lines (69 loc) · 3.59 KB
/
2_plot_iCT_stats.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
### Plot ipyrad stats
args = commandArgs(trailingOnly=TRUE)
workdirectory <- args[1]
setwd(workdirectory)
s3 <- read.table("./all_s3.txt", sep=" ", header=T)
s5 <- read.table("./all_s5.txt", sep=" ", header=T)
loci <- read.table("./loci_all_CT.txt", sep=" ", header=T)
missing <- read.table("./missing_all_CT.txt", sep=" ", header=T)
## loci statistics ##
#number of loci
Nsamples <- max(unique(loci[,1]))
loci_4 <- loci[which(loci[,1] == Nsamples), 3]
loci_50 <- loci_4 - loci[which(loci[,1] == (Nsamples*0.5)-1), 3]
loci_80 <- loci_4 - loci[which(loci[,1] == floor((Nsamples*0.8)-1)), 3]
index <- unique(loci[,4])
max_c <- max(loci_4, loci_50, loci_80)
min_c <- min(loci_4, loci_50, loci_80)
#proportion of the loci
prop_50 <- loci_50/loci_4
prop_80 <- loci_80/loci_4
max_p <- max(prop_50, prop_80)
min_p <- min(prop_50, prop_80)
#Change in the proportion of the loci
delta_p50 <- diff(prop_50)
delta_p80 <- diff(prop_80)
index_diff <- unique(loci[,4])[-1]
#Percentage of missing data
max_m <- max(missing[,1], missing[,2])
min_m <- min(missing[,1], missing[,2])
delta_m_SNP <- diff(missing[,1])
delta_m_SEQ <- diff(missing[,2])
max_dm <- max(delta_m_SNP, delta_m_SEQ)
min_dm <- min(delta_m_SNP, delta_m_SEQ)
## plots ##
pdf(file="./iCT_plots.pdf")
boxplot(clusters_total~threshold, data=s3, xlab="clustering threshold", ylab="N clusters",
main="Total number of clusters", pch=19, cex=0.5)
boxplot(clusters_hidepth~threshold, data=s3, xlab="clustering threshold", ylab="N clusters hidepth",
main="Clusters hidepth", pch=19, cex=0.5)
boxplot(avg_depth_total~threshold, data=s3, xlab="clustering threshold",
ylab="average depth of the clusters", main="Clusters depth")
boxplot(filtered_by_maxH~threshold, data=s5, xlab="clustering threshold", ylab="N clusters",
main="Clusters rejected due to high heterozigosity", pch=19, cex=0.5)
par(mar=c(5.1, 4.1, 4.1, 5.7), xpd=TRUE)
plot(index, loci_4, pch=19, cex=0.3, ylim=c(min_c, max_c), ylab="number of loci", xlab="iCT",
main="Number of loci in the assembly", type="b")
lines(index, loci_50, pch=19, cex=0.3, col="red", type="b")
lines(index, loci_80, pch=19, cex=0.3, col="blue", type="b")
legend("bottomleft", legend=c("all","50%", "80%"), pch=c(19,19,19), col=c("black", "red", "blue"))
par(mar=c(5.1, 4.1, 4.1, 5.7), xpd=TRUE)
plot(index, prop_50, pch=19, cex=0.3, ylim=c(min_p, max_p), col="blue",
ylab="proportion of the loci", xlab="iCT", main="Proportion of shared loci", type="b")
lines(index, prop_80, pch=19, cex=0.3, ylim=c(min_p, max_p), col="red", type="b")
legend("bottomleft", legend=c("50%", "80%"), pch=c(19,19), col=c("red", "blue"))
plot(delta_p50~index_diff, pch=19, cex=0.5, ylim=c(min(delta_p50), max(delta_p50)), col="blue",
ylab="delta loci 50%", xlab="delta iCT", main="50%", type="b")
plot(delta_p80~index_diff, pch=19, cex=0.5, ylim=c(min(delta_p80), max(delta_p80)), col="red",
ylab="delta loci 80%", xlab="delta iCT", main="80%", type="b")
par(mar=c(5.1, 4.1, 4.1, 4.1), xpd=TRUE)
plot(missing[,2]~missing[,3], pch=19, cex=0.5, ylab="% of missing data", xlab="iCT", col="blue",
ylim=c(min_m, max_m), type="b")
lines(missing[,1]~missing[,3], pch=19, cex=0.5, col="red", type="b")
legend("topleft", legend=c("Concatenation", "SNPs"), pch=c(19,19), col=c("blue", "red"))
par(mar=c(5.1, 4.1, 4.1, 4.1), xpd=TRUE)
plot(delta_m_SNP~index_diff, pch=19, cex=0.5, ylim=c(min_dm, max_dm), ylab="delta % missing",
xlab="delta iCT", col="red", type="b")
lines(delta_m_SEQ~index_diff, pch=19, cex=0.5, col="blue", type="b")
legend("topleft", legend=c("Concatenation", "SNPs"), pch=c(19,19), col=c("blue", "red"))
dev.off()