forked from evidencebp/commit-classification
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlinguistic_models_performance.py
143 lines (108 loc) · 4.93 KB
/
linguistic_models_performance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
"""
Evaluation of the main models on the main data sets
"""
import pandas as pd
from configuration import DATA_PATH
from confusion_matrix import ConfusionMatrix
from language_utils import match
from adaptive_model import is_adaptive
from corrective_model import is_fix
from refactor_model import built_is_refactor, build_perfective_regex, build_refactor_regex
def classifiy_commits_df(df):
df['corrective_pred'] = df.message.map(lambda x: is_fix(x))
df['is_refactor_pred'] = df.message.map(lambda x: built_is_refactor(x))
df['perfective_pred'] = df.message.map(lambda x: (match(x, build_perfective_regex())) +
(match(x, build_refactor_regex())) > 0)
df['adaptive_pred'] = df.message.map(lambda x: is_adaptive(x) > 0)
df['adaptive_by_negation_pred'] = df.message.map(lambda x: (is_fix(x) == 0
and built_is_refactor(x) == 0
and match(x, build_perfective_regex()) ==0))
return df
def evaluate_bq_results(labels_file):
df = pd.read_csv(labels_file
, engine='python')
l = df[df.Type.isin(['corrective', 'perfective', 'adaptive', 'multi'])]
l['corrective_pred'] = l['bq_classification']
l['is_refactor_pred'] = l.refactor_matches.map(lambda x: x > 0)
l['adaptive_pred'] = l.adaptive_matches.map(lambda x: x > 0)
linguistic_model_perfomance(l)
def evaluate_regex_results(labels_file
, just_corrective=False):
df = pd.read_csv(labels_file
, engine='python')
df = classifiy_commits_df(df)
linguistic_model_perfomance(df
, just_corrective=just_corrective)
df.to_csv(labels_file
, index=False)
def corrective_performance(df):
bug_g = df.groupby(
['corrective_pred', 'Is_Corrective'], as_index=False).agg({'commit' : 'count'})
bug_cm = ConfusionMatrix(g_df=bug_g
, classifier='corrective_pred'
, concept='Is_Corrective'
, count='commit')
print("corrective commit performance")
print(bug_cm.summarize())
return bug_cm
def refactor_performance(df):
refactor_g = df.groupby(
['is_refactor_pred', 'Is_Refactor'], as_index=False).agg({'commit' : 'count'})
refactor_cm = ConfusionMatrix(g_df=refactor_g
, classifier='is_refactor_pred'
, concept='Is_Refactor'
, count='commit')
print("refactor commit performance")
print(refactor_cm.summarize())
return refactor_cm
def perfective_performance(df):
perfective_g = df.groupby(
['perfective_pred', 'Is_Perfective'], as_index=False).agg({'commit' : 'count'})
perfective_cm = ConfusionMatrix(g_df=perfective_g
, classifier='perfective_pred'
, concept='Is_Perfective'
, count='commit')
print("perfective commit performance")
print(perfective_cm.summarize())
return perfective_cm
def adaptive_performance(df):
adaptive_g = df.groupby(
['adaptive_pred', 'Is_Adaptive'], as_index=False).agg({'commit' : 'count'})
adaptive_cm = ConfusionMatrix(g_df=adaptive_g
, classifier='adaptive_pred'
, concept='Is_Adaptive'
, count='commit')
print("adaptive commit performance")
print(adaptive_cm.summarize())
return adaptive_cm
def adaptive_by_negation_performance(df):
concept = 'Is_Adaptive'
classifier_name = 'adaptive_by_negation_pred'
adaptive_g = df.groupby(
[classifier_name, concept], as_index=False).agg({'commit' : 'count'})
adaptive_cm = ConfusionMatrix(g_df=adaptive_g
, classifier=classifier_name
, concept=concept
, count='commit')
print("adaptive_by_negation commit performance")
print(adaptive_cm.summarize())
return adaptive_cm
def linguistic_model_perfomance(df
, just_corrective=False):
corrective_performance(df)
if not just_corrective:
refactor_performance(df)
adaptive_performance(df)
adaptive_by_negation_performance(df)
perfective_performance(df)
def main():
print("test performance")
print("***********************************")
evaluate_regex_results(DATA_PATH + 'repo2018_test.csv')
#evaluate_bq_results(DATA_PATH + '/labels/commits_updated2.csv')
print("validation performance")
print("***********************************")
evaluate_regex_results(DATA_PATH + 'model_validation_samples.csv'
, just_corrective=True)
if __name__ == '__main__':
main()