-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathcorrelation-mas-exists.R
41 lines (40 loc) · 1.64 KB
/
correlation-mas-exists.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# Runs Mann-Whitney test and Kendall Correlation test between architecture-issues and code-issues
# Saves results in "correlations.csv".
#
# To run script from command line:
# "C:\Program Files\R\R-3.3.3\bin\x64\Rscript.exe" correlations.R
#
#
# Mann-Whitney test:
# p < 0.05 -->
# Shapiro-Wilkinson Normality test:
# p > 0.05 --> normal distribution --> use Pearson correlation
# p < 0.05 --> not normal distribution --> use Kendall correlation
# Kendall correlation:
# p < 0.05 -->
# Pearson correlation:
# p < 0.05 -->
data <- read.csv("mas-issues-by-package.csv", header=T)
issueName <- c()
mannWhitneyPvalue <- c()
shapiroWilkPvalue <- c()
kendallPvalue <- c()
kendallTau <- c()
pearsonPvalue <- c()
pearsonCor <- c()
for(i in 6:dim(data)[2]) {
issueName <- c(issueName, colnames(data)[i])
mat <- matrix(c(data$ud.hl.cd.exists, data[,i]),nrow=length(data$ud.hl.cd.exists))
mannWhitneyPvalue <- c(mannWhitneyPvalue, wilcox.test(mat[,1],mat[,2])$p.value)
shapiroWilkPvalue <- tryCatch({
c(shapiroWilkPvalue, shapiro.test(mat[,2])$p.value) },
error = function(e) { c(shapiroWilkPvalue, 0) })
kendall <- cor.test(mat[,1],mat[,2], method="kendall")
kendallPvalue <- c(kendallPvalue, kendall$p.value)
kendallTau <- c(kendallTau, kendall$estimate["tau"])
pearson <- cor.test(mat[,1],mat[,2], method="pearson")
pearsonPvalue <- c(pearsonPvalue, pearson$p.value)
pearsonCor <- c(pearsonCor, pearson$estimate["cor"])
}
outFrame <- data.frame(issueName,mannWhitneyPvalue,shapiroWilkPvalue,kendallPvalue,kendallTau,pearsonPvalue,pearsonCor)
write.csv(outFrame, file="correlation-mas-exists.csv", fileEncoding="UTF-8", row.names=FALSE)