-
Notifications
You must be signed in to change notification settings - Fork 0
/
metrics.py
145 lines (124 loc) · 5.23 KB
/
metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import sys
import functools
import operator
def remove_col(list_2d, column_num):
return [[j for i,j in enumerate(sub) if i != column_num] for sub in list_2d]
def fill_dictionary(file, file_type, hybrid_method, sentiment_dict):
line = file.readline()
dictionary = {}
while line:
user_id = line.split("\t")[0]
movie_id = int(line.split("\t")[1])
rating = float(line.split("\t")[2].strip())
if(file_type == "test"):
if(hybrid_method == 1):
if(movie_id in sentiment_dict.keys()):
tup = (movie_id, (rating+sentiment_dict[movie_id])/2)
else:
tup = (movie_id, rating)
else:
if(movie_id in sentiment_dict.keys()):
tup = (movie_id, rating, sentiment_dict[movie_id])
else:
tup = (movie_id, rating, rating)
else:
tup = (movie_id, rating)
if user_id not in dictionary.keys():
dictionary[user_id] = [tup]
else:
dictionary[user_id].append(tup)
line = file.readline()
for key in dictionary.keys():
value_list = dictionary[key]
if(file_type == "test" and hybrid_method == 2):
value_list = sorted(value_list, key=lambda x: x[2], reverse=True)
else:
value_list = sorted(value_list, key=lambda x: x[1], reverse=True)
dictionary[key] = value_list
return dictionary
def fill_sentiment_dict(file):
line = file.readline()
dictionary = {}
while line:
movie_id = int(line.split("\t")[0])
sentiment_score = float(line.split("\t")[1].strip())
dictionary[movie_id] = sentiment_score
line = file.readline()
return dictionary
def generate_hybrid_dict(prediction_values1, prediction_values2, sentiment_dict, rmse_1, rmse_2):
dictionary = {}
for key in prediction_values1.keys():
if(key not in prediction_values2.keys()):
continue
rating_list_1 = prediction_values1[key]
rating_list_2 = prediction_values2[key]
lst = []
dictionary = {}
for element in rating_list_1:
element = (element[0],element[1],(1/rmse_1)*element[2])
lst = []
for element in rating_list_2:
element = (element[0],element[1],(1/rmse_2)*element[2])
def metric(ground_truth_values, prediction_values, sentiment_dict, hybrid_method):
precision = 0
recall = 0
f1_score = 0
for key in prediction_values.keys():
ground_truth_list = set(ground_truth_values[key][:5])
ground_truth_list = remove_col(ground_truth_list,1)
ground_truth_list = [list(elem) for elem in ground_truth_list]
ground_truth_list = set(functools.reduce(operator.iconcat, ground_truth_list, []))
prediction_list = set(prediction_values[key][:5])
if(hybrid_method == 1):
prediction_list = remove_col(prediction_list,1)
else:
prediction_list = remove_col(prediction_list,1)
prediction_list = remove_col(prediction_list,1)
prediction_list= [list(elem) for elem in prediction_list]
prediction_list = set(functools.reduce(operator.iconcat, prediction_list, []))
tp_elements = prediction_list - (prediction_list - ground_truth_list)
fp_elements = prediction_list - ground_truth_list
fn_elements = ground_truth_list - prediction_list
tp_count = len(tp_elements)
fp_count = len(fp_elements)
fn_count = len(fn_elements)
individual_precision = tp_count / float(tp_count+fp_count)
individual_recall = tp_count / float(tp_count+fn_count)
if(individual_recall == 0 and individual_precision == 0):
individual_f1 = 0
else:
individual_f1 = 2*individual_precision*individual_recall / float(individual_recall + individual_precision)
precision += individual_precision
recall += individual_recall
f1_score += individual_f1
print(precision,len(prediction_values.keys()))
print(recall,len(prediction_values.keys()))
print(f1_score,len(prediction_values.keys()))
print(precision/len(prediction_values.keys()))
print(recall/len(prediction_values.keys()))
print(f1_score/len(prediction_values.keys()))
def main():
movies_file = open(sys.argv[1],"r") #rating_ground_truth file
sentiment_file = open(sys.argv[2],"r") #sentiment analaysis file
sentiment_combining_method = int(sys.argv[3]) #0 for just content-based or collobrative, 1 for average of sentiment score and recommendation technique, 2 for sorting movies based on sentiment
pred_file = open(sys.argv[4],"r") #prediction file1
total_hybrid = int(sys.argv[5]) #1 to merge all 3 methods else 0
if(total_hybrid):
pred_file2 = open(sys.argv[6],"r") #prediction file2 needed for total hybrid
sentiment_dict = fill_sentiment_dict(sentiment_file)
ground_truth_values = fill_dictionary(movies_file, "ground_truth", sentiment_combining_method, sentiment_dict)
if(total_hybrid):
prediction_values1 = fill_dictionary(pred_file, "test", sentiment_combining_method, sentiment_dict)
prediction_values2 = fill_dictionary(pred_file2, "test", sentiment_combining_method, sentiment_dict)
content_based_rmse = 2.4
colloborative_rmse = 1.5
prediction_values = generate_hybrid_dict(prediction_values1, prediction_values2, sentiment_dict, content_based_rmse, colloborative_rmse)
precision_val = metric(ground_truth_values, prediction_values1, sentiment_dict, sentiment_combining_method)
else:
prediction_values1 = fill_dictionary(pred_file, "test", sentiment_combining_method, sentiment_dict)
precision_val = metric(ground_truth_values, prediction_values1, sentiment_dict, sentiment_combining_method)
movies_file.close()
sentiment_file.close()
pred_file.close()
if __name__ == '__main__':
main()