Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactored AssociationRule class #13

Merged
merged 1 commit into from
Feb 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
158 changes: 54 additions & 104 deletions niaarm/association_rule.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,12 @@
def normalize(value, actual_bounds, real_bounds):
return (real_bounds[0] +
(value -
real_bounds[0]) *
(real_bounds[1] -
real_bounds[0]) /
(actual_bounds[1] -
actual_bounds[0]))
return real_bounds[0] + (value - real_bounds[0]) * (real_bounds[1] - real_bounds[0]) / (actual_bounds[1] - actual_bounds[0])


def is_rule_feasible(ant, con):
ant_count = ant.count("NO")
con_count = con.count("NO")
if (ant_count == len(ant)) or (con_count == len(con)):
return False
else:
return True
def rule_feasible(ant, con):
return ant.count("NO") != len(ant) and con.count("NO") != len(con)


def get_cut_point(sol, num_attr):
def cut_point(sol, num_attr):
cut = int(sol * num_attr)
if cut == 0:
cut = 1
Expand Down Expand Up @@ -46,65 +35,42 @@ def build_rule(self, vector):
rule = []

permutation = self.map_permutation(vector)

self.permutation = get_permutation(permutation)

for i in range(len(self.features)):
current_feature = self.permutation[i]

# get threshold for each feature
threshold_position = self.get_vector_position_of_feature(
current_feature) + self.calculate_threshold_move(current_feature)
feature = self.features[current_feature]

# set current position in vector
vector_position = self.get_vector_position_of_feature(
current_feature)
vector_position = self.feature_position(current_feature)

# get threshold for each feature
threshold_position = vector_position + self.threshold_move(current_feature)

if vector[vector_position] > vector[threshold_position]:
if self.features[current_feature].dtype == 'float':
border1 = (vector[vector_position] * (self.features[current_feature].max_val -
self.features[current_feature].min_val)) + self.features[current_feature].min_val
if feature.dtype == 'float':
border1 = vector[vector_position] * (feature.max_val - feature.min_val) + feature.min_val
vector_position = vector_position + 1
border2 = (vector[vector_position] * (self.features[current_feature].max_val -
self.features[current_feature].min_val)) + self.features[current_feature].min_val
border2 = vector[vector_position] * (feature.max_val - feature.min_val) + feature.min_val

if border1 > border2:
inter = border1
border1 = border2
border2 = inter
border1, border2 = border2, border1
borders = [border1, border2]
rule.append(borders)

elif self.features[current_feature].dtype == 'int':
border1 = round(
(vector[vector_position] *
(
self.features[current_feature].max_val -
self.features[current_feature].min_val)) +
self.features[current_feature].min_val)
elif feature.dtype == 'int':
border1 = round(vector[vector_position] * (feature.max_val - feature.min_val) + feature.min_val)
vector_position = vector_position + 1

border2 = round(
(vector[vector_position] *
(
self.features[current_feature].max_val -
self.features[current_feature].min_val)) +
self.features[current_feature].min_val)
border2 = round(vector[vector_position] * (feature.max_val - feature.min_val) + feature.min_val)

if border1 > border2:
inter = border1
border1 = border2
border2 = inter
border1, border2 = border2, border1
borders = [border1, border2]

rule.append(borders)

else:
categories = self.features[current_feature].categories

categories = feature.categories
selected = round(vector[vector_position] * (len(categories) - 1))

rule.append([self.features[current_feature].categories[selected]])
rule.append([feature.categories[selected]])
else:
rule.append('NO')

Expand All @@ -113,14 +79,14 @@ def build_rule(self, vector):
def map_permutation(self, vector):
return vector[-len(self.features):]

def calculate_threshold_move(self, current_feature):
def threshold_move(self, current_feature):
if self.features[current_feature].dtype == "float" or self.features[current_feature].dtype == "int":
move = 2
else:
move = 1
return move

def get_vector_position_of_feature(self, feature):
def feature_position(self, feature):
position = 0
for i in range(feature):
if self.features[i].dtype == "float" or self.features[i].dtype == "int":
Expand All @@ -129,15 +95,7 @@ def get_vector_position_of_feature(self, feature):
position = position + 2
return position

def return_permutation(self):
return self.permutation

def calculate_support_confidence(
self,
antecedent,
consequence,
transactions):

def support_confidence(self, antecedent, consequence, transactions):
supp = 0
conf = 0
conf_counter = 0
Expand All @@ -147,35 +105,32 @@ def calculate_support_confidence(
match1 = 0
match2 = 0
for j in range(len(antecedent)):
if self.features[self.permutation[j]].dtype == 'float' or self.features[self.permutation[j]].dtype == 'int':
dtype = self.features[self.permutation[j]].dtype
if dtype == 'float' or dtype == 'int':
if antecedent[j] != 'NO':
border = antecedent[j]
if (float(transactions[i][self.permutation[j]]) >= border[0]) and (
float(transactions[i][self.permutation[j]]) <= border[1]):
if border[0] <= transactions[i, self.permutation[j]] <= border[1]:
match1 = match1 + 1
elif self.features[self.permutation[j]].dtype == 'cat':
elif dtype == 'cat':
if antecedent[j] != 'NO':
ant = antecedent[j]
if transactions[i][self.permutation[j]] == ant[0]:
if transactions[i, self.permutation[j]] == ant[0]:
match1 = match1 + 1

# secondly consequence
con_counter = 0
for ll in range(
len(antecedent),
len(antecedent) +
len(consequence)):
if self.features[self.permutation[ll]].dtype == 'float' or self.features[self.permutation[ll]].dtype == 'int':
for ll in range(len(antecedent), len(antecedent) + len(consequence)):
dtype = self.features[self.permutation[ll]].dtype
if dtype == 'float' or dtype == 'int':
if consequence[con_counter] != 'NO':
border = consequence[con_counter]
if (float(transactions[i][self.permutation[ll]]) >= border[0]) and (
float(transactions[i][self.permutation[ll]]) <= border[1]):
if border[0] <= transactions[i, self.permutation[ll]] <= border[1]:
match2 = match2 + 1
elif self.features[self.permutation[ll]].dtype == 'cat':
elif dtype == 'cat':
if consequence[con_counter] != 'NO':
con = consequence[con_counter]

if transactions[i][self.permutation[ll]] == con[0]:
if transactions[i, self.permutation[ll]] == con[0]:
match2 = match2 + 1

con_counter = con_counter + 1
Expand Down Expand Up @@ -204,46 +159,41 @@ def calculate_support_confidence(

return total_supp, total_conf

def calculate_coverage(self, antecedent, consequence):
missing_ant = antecedent.count("NO")
missing_con = consequence.count("NO")

missing_total = missing_ant + missing_con
def coverage(self, antecedent, consequence):
missing_total = antecedent.count("NO") + consequence.count("NO")
return 1 - missing_total / len(self.features)

return 1 - float(float(missing_total) / float(len(self.features)))

def calculate_shrinkage(self, antecedent, consequence):
def shrinkage(self, antecedent, consequence):
differences = []

for i in range(len(antecedent)):
if self.features[self.permutation[i]].dtype == 'float' or self.features[self.permutation[i]].dtype == 'int':
feature = self.features[self.permutation[i]]
if feature.dtype == 'float' or feature.dtype == 'int':
if antecedent[i] != 'NO':
borders = antecedent[i]
diff_borders = borders[1] - borders[0]
total_borders = self.features[self.permutation[i]].max_val - self.features[self.permutation[i]].min_val
diff = float(diff_borders / total_borders)
total_borders = feature.max_val - feature.min_val
diff = diff_borders / total_borders
differences.append(diff)

con_counter = 0
for ll in range(len(antecedent), len(antecedent) + len(consequence)):
if self.features[self.permutation[ll]].dtype == 'float' or self.features[self.permutation[ll]].dtype == 'int':
feature = self.features[self.permutation[ll]]
if feature.dtype == 'float' or feature.dtype == 'int':
if consequence[con_counter] != 'NO':
borders = consequence[con_counter]
diff_borders = borders[1] - borders[0]
total_borders = self.features[self.permutation[ll]].max_val - self.features[self.permutation[ll]].min_val
diff = float(diff_borders / total_borders)
total_borders = feature.max_val - feature.min_val
diff = diff_borders / total_borders
differences.append(diff)
con_counter = con_counter + 1

value = 0.0
for i in range(len(differences)):
value = value + differences[i]
value = sum(differences)

if len(differences) > 0:
normalized = normalize(value, [0, len(differences)], [0, 1])
else:
return 0.0

return 1 - normalized

def format_rules(self, antecedent, consequence):
Expand All @@ -252,19 +202,19 @@ def format_rules(self, antecedent, consequence):

for i in range(len(antecedent)):
if antecedent[i] != "NO":
if self.features[self.permutation[i]].dtype == "cat":
rule = self.features[self.permutation[i]].name + "(" + str(antecedent[i][0]) + ")"
feature = self.features[self.permutation[i]]
if feature.dtype == "cat":
rule = feature.name + "(" + str(antecedent[i][0]) + ")"
else:
rule = self.features[self.permutation[i]].name + "(" + str(antecedent[i]) + ")"

rule = feature.name + "(" + str(antecedent[i]) + ")"
antecedent1.append(rule)

for i in range(len(consequence)):
if consequence[i] != "NO":
if self.features[self.permutation[i + len(antecedent)]].dtype == "cat":
rule = self.features[self.permutation[i + len(antecedent)]].name + "(" + str(consequence[i]) + ")"
feature = self.features[self.permutation[i + len(antecedent)]]
if feature.dtype == "cat":
rule = feature.name + "(" + str(consequence[i][0]) + ")"
else:
rule = self.features[self.permutation[i + len(antecedent)]].name + "(" + str(consequence[i]) + ")"

rule = feature.name + "(" + str(consequence[i]) + ")"
consequence1.append(rule)
return antecedent1, consequence1
12 changes: 6 additions & 6 deletions niaarm/niaarm.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from niaarm.rule import Rule
from niaarm.association_rule import AssociationRule, is_rule_feasible, get_cut_point
from niaarm.association_rule import AssociationRule, rule_feasible, cut_point
from niapy.problems import Problem
import numpy as np
import csv
Expand Down Expand Up @@ -121,7 +121,7 @@ def _evaluate(self, sol):
cut_value = sol[self.dimension - 1] # get cut point value
solution = sol[:-1] # remove cut point

cut = get_cut_point(cut_value, len(self.features))
cut = cut_point(cut_value, len(self.features))

rule = arm.build_rule(solution)

Expand All @@ -130,20 +130,20 @@ def _evaluate(self, sol):
consequence = rule[cut:]

# check if rule is feasible
if is_rule_feasible(antecedent, consequence):
if rule_feasible(antecedent, consequence):

# get support and confidence of rule
support, confidence = arm.calculate_support_confidence(antecedent, consequence, self.transactions)
support, confidence = arm.support_confidence(antecedent, consequence, self.transactions)

if self.gamma == 0.0:
shrinkage = 0
else:
shrinkage = arm.calculate_shrinkage(antecedent, consequence)
shrinkage = arm.shrinkage(antecedent, consequence)

if self.delta == 0.0:
coverage = 0
else:
coverage = arm.calculate_coverage(antecedent, consequence)
coverage = arm.coverage(antecedent, consequence)

fitness = ((self.alpha * support) + (self.beta * confidence) + (self.gamma * shrinkage) +
(self.delta * coverage)) / (self.alpha + self.beta + self.gamma + self.delta)
Expand Down
4 changes: 2 additions & 2 deletions niaarm/tests/test_coverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def test_a(self):

oper = AssociationRule(self.features)

coverage = oper.calculate_coverage(antecedent, consequence)
coverage = oper.coverage(antecedent, consequence)

self.assertEqual(coverage, 1)

Expand All @@ -31,6 +31,6 @@ def test_b(self):

oper = AssociationRule(self.features)

coverage = oper.calculate_coverage(antecedent, consequence)
coverage = oper.coverage(antecedent, consequence)

self.assertEqual(coverage, 0.5)
8 changes: 4 additions & 4 deletions niaarm/tests/test_cut_point.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from unittest import TestCase
from niaarm.association_rule import AssociationRule, get_cut_point
from niaarm.association_rule import AssociationRule, cut_point
from niaarm.dataset import Dataset


Expand All @@ -19,7 +19,7 @@ def test_cut_pointA(self):
cut_value = sol[len(sol) - 1]
new_sol = sol[:-1]

cut = get_cut_point(cut_value, len(self.features))
cut = cut_point(cut_value, len(self.features))

rule = arm.build_rule(new_sol)

Expand Down Expand Up @@ -123,7 +123,7 @@ def test_cut_pointB(self):

new_sol = sol[:-1]

cut = get_cut_point(cut_value, len(self.features))
cut = cut_point(cut_value, len(self.features))

rule = arm.build_rule(new_sol)

Expand Down Expand Up @@ -220,7 +220,7 @@ def test_cut_pointC(self):

new_sol = sol[:-1]

cut = get_cut_point(cut_value, len(self.features))
cut = cut_point(cut_value, len(self.features))

rule = arm.build_rule(new_sol)

Expand Down
Loading