-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathapriori.py
146 lines (132 loc) · 4.78 KB
/
apriori.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
from itertools import chain, combinations
import itertools
def getDistinctElt(transactions):
"""
Get the distinct element in a list of transactions.
"""
distinctSet = set()
for transaction in transactions:
for elt in transaction:
if not(elt in distinctSet):
distinctSet.add(elt)
return distinctSet
def getOccurence(item, transactions):
"""
Get the number of occurence in a list of trnasactions
"""
occurence = 0
for transaction in transactions:
if set(item) <= set(transaction):
occurence +=1
return occurence
def strToSet(string):
"""
Convert a string into a set
"""
ens = set()
for char in string:
ens.add(char)
return ens
def setToStr(ens):
"""
Convert a set into a string.
"""
string = ""
ens = sorted(ens)
for elt in ens:
string += str(elt)
return string
def getSubset(iterable):
""" Get all subset of iterable """
xs = list(iterable)
# note we return an iterator rather than a list
subsets_tuple = list(chain.from_iterable(combinations(xs,n) for n in range(len(xs)+1)))
subsets = [set(sub) for sub in subsets_tuple]
return subsets
def setRules(occurence, conf):
"""
Formate the rules from the occurence table and conserve only rules with
enought confidence.
INPUT
occurence (list) : the table of occurence
conf : the target confidence
OUTPUT
a list of rules where a rules is defined (pre, post, conf) which
means : pre => post with conf confidence.
"""
rules = []
for level in occurence[:0:-1]:
for item in level:
ens = strToSet(item[0])
occ = item[1]
for elt in getSubset(ens):
if (set(elt) != set()) and (set(elt) != ens):
for ref in occurence[len(elt)-1]:
if set(elt) == strToSet(ref[0]):
occ_ref = ref[1]
confidence = occ/occ_ref
if confidence >= conf:
rules.append((set(elt), set(ens.difference(elt)), confidence))
return rules
def getLabels(occurence_dict):
"""
Get the associations in occurence_dict without occurence
INPUT
occurence_dict (list) : contains the associations and occurence
OUTPUT
a list of the associations in occurence_dict
"""
labels = []
for elt in occurence_dict:
labels.append(elt[0])
return labels
def apriori(transactions, min_supp=3, confidence=0.75):
"""
Run the apriori algorithme to found rules.
INPUT
transactions (list) : is a list of transactions wich is list of items.
min_supp (int) : is the minimal support to consider a rules.
confidence (float) : is the minimal confidence to consider a rules.
OUTPUT
A list of rules where each item contains (pre, post, confidence).
"""
# get the distinct elt in transactions
disctinct_elt = getDistinctElt(transactions)
# a list that contains the rules, occurence_dict[n-1] contains the rules of
# length n
occurence_dict = []
# labels contains the the association of item that we have conserved.
# labels[n-1] contains set of association with n items
labels = []
# get the rules of length 1
occurence_dict_current = []
for item in disctinct_elt:
# get the occurence of the item in transactions
occurence = getOccurence(item, transactions)
if occurence >= min_supp:
occurence_dict_current.append((item,occurence))
occurence_dict.append(occurence_dict_current)
# We add the associations converded
labels.append(set(getLabels(occurence_dict[-1])))
n = 2
# if the last item of occurence_dict is empty we've finished the algorithme
while occurence_dict[-1] != []:
# Building the cartesian product of associations of n-1 element and 1
# element.
cart = []
for i in itertools.product(labels[0], labels[n-2]):
if strToSet(i[0]).intersection(strToSet(i[1])) == set() and not(strToSet(i[0]).union(strToSet(i[1])) in cart):
cart.append(strToSet(i[0]).union(strToSet(i[1])))
# finding the occurence of association of n elements
occurence_dict_current = []
for item in cart:
occurence = getOccurence(list(item), transactions)
if occurence >= min_supp:
occurence_dict_current.append((setToStr(item),occurence))
occurence_dict.append(occurence_dict_current)
labels.append(set(getLabels(occurence_dict[-1])))
n += 1
# Once we have all the associations and their occurences we formate the
# rules
rules = setRules(occurence_dict[:-1], confidence)
return rules