-
Notifications
You must be signed in to change notification settings - Fork 2
/
merging.py
77 lines (65 loc) · 2.58 KB
/
merging.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# merging.py - The program to merge the predicted reviews of users
import cPickle as pickle
import library
import operator
import sys
# The number of users
group_size = len(sys.argv)-2
def least_misery(data, user_ids, business_id):
min = data[user_ids[0]][business_id]
for x in range(1, group_size):
current = data[user_ids[x]][business_id]
min = current if current < min else min
return min
def most_happiness(data, user_ids, business_id):
most = data[user_ids[0]][business_id]
for x in range(1, group_size):
current = data[user_ids[x]][business_id]
most = current if current < most else most
return most
def average(data, user_ids, business_id):
average = 0.0
for x in range(0, group_size):
average += data[user_ids[x]][business_id]
merged_value = float(average) / float(group_size)
return merged_value
def expert(data, user_ids, business_id, original_data):
expert = 0.0
total_count = 0.0
for x in range(0, group_size):
count = len(original_data[user_ids[x]])
expert += count * data[user_ids[x]][business_id]
total_count += count
return expert / total_count
def main():
full_data = pickle.load(open("predictionary.p", "rb"))
# full_data = \
# {
# "a": {"1": 0.5, "2": 0.1, "3": 0.1, "4": 0.1, },
# "b": {"1": 0.5, "2": 0.3, "3": 0.1, "4": 0.5, },
# "c": {"1": 0.5, "2": 0.5, "3": 0.1, "4": 0.7, },
# "d": {"1": 0.5, "2": 0.7, "3": 0.1, "4": 0.3, },
# "e": {"1": 0.5, "2": 0.9, "3": 0.1, "4": 0.6, },
# }
# pickle.dump(full_data, open('data.p','wb'))
svf = sys.argv[1]
group_ids = sys.argv[2:]
merge = {}
original_reviews = {}
if svf == "expert":
original_reviews = pickle.load(open("saveReviewDictValidation.p", "rb"))
# for each business, get the ratings, then combine the user data based on the SVF
for business_id in full_data[group_ids[0]]:
# obtain an object containing all svf values
if svf == "lm":
merge[business_id] = least_misery(full_data, group_ids, business_id)
elif svf == "mh":
merge[business_id] = most_happiness(full_data, group_ids, business_id)
elif svf == "avg":
merge[business_id] = average(full_data, group_ids, business_id)
elif svf == "expert":
merge[business_id] = expert(full_data, group_ids, business_id, original_reviews)
merge = sorted(merge.iteritems(), key=lambda x: x[1], reverse=True)
for key, val in merge:
print str(val) + ' ' + str(key)
main()