From c4e06479d30ce726a23db1ec9675a0c60af53ae0 Mon Sep 17 00:00:00 2001 From: zStupan Date: Sat, 26 Feb 2022 13:54:49 +0100 Subject: [PATCH] Refactored AssociationRule class --- niaarm/association_rule.py | 158 ++++++++---------------- niaarm/niaarm.py | 12 +- niaarm/tests/test_coverage.py | 4 +- niaarm/tests/test_cut_point.py | 8 +- niaarm/tests/test_rule_building.py | 36 +++--- niaarm/tests/test_shrinkage.py | 14 +-- niaarm/tests/test_support_confidence.py | 10 +- 7 files changed, 96 insertions(+), 146 deletions(-) diff --git a/niaarm/association_rule.py b/niaarm/association_rule.py index effe4bc..a15d919 100644 --- a/niaarm/association_rule.py +++ b/niaarm/association_rule.py @@ -1,23 +1,12 @@ def normalize(value, actual_bounds, real_bounds): - return (real_bounds[0] + - (value - - real_bounds[0]) * - (real_bounds[1] - - real_bounds[0]) / - (actual_bounds[1] - - actual_bounds[0])) + return real_bounds[0] + (value - real_bounds[0]) * (real_bounds[1] - real_bounds[0]) / (actual_bounds[1] - actual_bounds[0]) -def is_rule_feasible(ant, con): - ant_count = ant.count("NO") - con_count = con.count("NO") - if (ant_count == len(ant)) or (con_count == len(con)): - return False - else: - return True +def rule_feasible(ant, con): + return ant.count("NO") != len(ant) and con.count("NO") != len(con) -def get_cut_point(sol, num_attr): +def cut_point(sol, num_attr): cut = int(sol * num_attr) if cut == 0: cut = 1 @@ -46,65 +35,42 @@ def build_rule(self, vector): rule = [] permutation = self.map_permutation(vector) - self.permutation = get_permutation(permutation) for i in range(len(self.features)): current_feature = self.permutation[i] - - # get threshold for each feature - threshold_position = self.get_vector_position_of_feature( - current_feature) + self.calculate_threshold_move(current_feature) + feature = self.features[current_feature] # set current position in vector - vector_position = self.get_vector_position_of_feature( - current_feature) + vector_position = self.feature_position(current_feature) + + # get threshold for each feature + threshold_position = vector_position + self.threshold_move(current_feature) if vector[vector_position] > vector[threshold_position]: - if self.features[current_feature].dtype == 'float': - border1 = (vector[vector_position] * (self.features[current_feature].max_val - - self.features[current_feature].min_val)) + self.features[current_feature].min_val + if feature.dtype == 'float': + border1 = vector[vector_position] * (feature.max_val - feature.min_val) + feature.min_val vector_position = vector_position + 1 - border2 = (vector[vector_position] * (self.features[current_feature].max_val - - self.features[current_feature].min_val)) + self.features[current_feature].min_val + border2 = vector[vector_position] * (feature.max_val - feature.min_val) + feature.min_val if border1 > border2: - inter = border1 - border1 = border2 - border2 = inter + border1, border2 = border2, border1 borders = [border1, border2] rule.append(borders) - elif self.features[current_feature].dtype == 'int': - border1 = round( - (vector[vector_position] * - ( - self.features[current_feature].max_val - - self.features[current_feature].min_val)) + - self.features[current_feature].min_val) + elif feature.dtype == 'int': + border1 = round(vector[vector_position] * (feature.max_val - feature.min_val) + feature.min_val) vector_position = vector_position + 1 - - border2 = round( - (vector[vector_position] * - ( - self.features[current_feature].max_val - - self.features[current_feature].min_val)) + - self.features[current_feature].min_val) + border2 = round(vector[vector_position] * (feature.max_val - feature.min_val) + feature.min_val) if border1 > border2: - inter = border1 - border1 = border2 - border2 = inter + border1, border2 = border2, border1 borders = [border1, border2] - rule.append(borders) - else: - categories = self.features[current_feature].categories - + categories = feature.categories selected = round(vector[vector_position] * (len(categories) - 1)) - - rule.append([self.features[current_feature].categories[selected]]) + rule.append([feature.categories[selected]]) else: rule.append('NO') @@ -113,14 +79,14 @@ def build_rule(self, vector): def map_permutation(self, vector): return vector[-len(self.features):] - def calculate_threshold_move(self, current_feature): + def threshold_move(self, current_feature): if self.features[current_feature].dtype == "float" or self.features[current_feature].dtype == "int": move = 2 else: move = 1 return move - def get_vector_position_of_feature(self, feature): + def feature_position(self, feature): position = 0 for i in range(feature): if self.features[i].dtype == "float" or self.features[i].dtype == "int": @@ -129,15 +95,7 @@ def get_vector_position_of_feature(self, feature): position = position + 2 return position - def return_permutation(self): - return self.permutation - - def calculate_support_confidence( - self, - antecedent, - consequence, - transactions): - + def support_confidence(self, antecedent, consequence, transactions): supp = 0 conf = 0 conf_counter = 0 @@ -147,35 +105,32 @@ def calculate_support_confidence( match1 = 0 match2 = 0 for j in range(len(antecedent)): - if self.features[self.permutation[j]].dtype == 'float' or self.features[self.permutation[j]].dtype == 'int': + dtype = self.features[self.permutation[j]].dtype + if dtype == 'float' or dtype == 'int': if antecedent[j] != 'NO': border = antecedent[j] - if (float(transactions[i][self.permutation[j]]) >= border[0]) and ( - float(transactions[i][self.permutation[j]]) <= border[1]): + if border[0] <= transactions[i, self.permutation[j]] <= border[1]: match1 = match1 + 1 - elif self.features[self.permutation[j]].dtype == 'cat': + elif dtype == 'cat': if antecedent[j] != 'NO': ant = antecedent[j] - if transactions[i][self.permutation[j]] == ant[0]: + if transactions[i, self.permutation[j]] == ant[0]: match1 = match1 + 1 # secondly consequence con_counter = 0 - for ll in range( - len(antecedent), - len(antecedent) + - len(consequence)): - if self.features[self.permutation[ll]].dtype == 'float' or self.features[self.permutation[ll]].dtype == 'int': + for ll in range(len(antecedent), len(antecedent) + len(consequence)): + dtype = self.features[self.permutation[ll]].dtype + if dtype == 'float' or dtype == 'int': if consequence[con_counter] != 'NO': border = consequence[con_counter] - if (float(transactions[i][self.permutation[ll]]) >= border[0]) and ( - float(transactions[i][self.permutation[ll]]) <= border[1]): + if border[0] <= transactions[i, self.permutation[ll]] <= border[1]: match2 = match2 + 1 - elif self.features[self.permutation[ll]].dtype == 'cat': + elif dtype == 'cat': if consequence[con_counter] != 'NO': con = consequence[con_counter] - if transactions[i][self.permutation[ll]] == con[0]: + if transactions[i, self.permutation[ll]] == con[0]: match2 = match2 + 1 con_counter = con_counter + 1 @@ -204,46 +159,41 @@ def calculate_support_confidence( return total_supp, total_conf - def calculate_coverage(self, antecedent, consequence): - missing_ant = antecedent.count("NO") - missing_con = consequence.count("NO") - - missing_total = missing_ant + missing_con + def coverage(self, antecedent, consequence): + missing_total = antecedent.count("NO") + consequence.count("NO") + return 1 - missing_total / len(self.features) - return 1 - float(float(missing_total) / float(len(self.features))) - - def calculate_shrinkage(self, antecedent, consequence): + def shrinkage(self, antecedent, consequence): differences = [] for i in range(len(antecedent)): - if self.features[self.permutation[i]].dtype == 'float' or self.features[self.permutation[i]].dtype == 'int': + feature = self.features[self.permutation[i]] + if feature.dtype == 'float' or feature.dtype == 'int': if antecedent[i] != 'NO': borders = antecedent[i] diff_borders = borders[1] - borders[0] - total_borders = self.features[self.permutation[i]].max_val - self.features[self.permutation[i]].min_val - diff = float(diff_borders / total_borders) + total_borders = feature.max_val - feature.min_val + diff = diff_borders / total_borders differences.append(diff) con_counter = 0 for ll in range(len(antecedent), len(antecedent) + len(consequence)): - if self.features[self.permutation[ll]].dtype == 'float' or self.features[self.permutation[ll]].dtype == 'int': + feature = self.features[self.permutation[ll]] + if feature.dtype == 'float' or feature.dtype == 'int': if consequence[con_counter] != 'NO': borders = consequence[con_counter] diff_borders = borders[1] - borders[0] - total_borders = self.features[self.permutation[ll]].max_val - self.features[self.permutation[ll]].min_val - diff = float(diff_borders / total_borders) + total_borders = feature.max_val - feature.min_val + diff = diff_borders / total_borders differences.append(diff) con_counter = con_counter + 1 - value = 0.0 - for i in range(len(differences)): - value = value + differences[i] + value = sum(differences) if len(differences) > 0: normalized = normalize(value, [0, len(differences)], [0, 1]) else: return 0.0 - return 1 - normalized def format_rules(self, antecedent, consequence): @@ -252,19 +202,19 @@ def format_rules(self, antecedent, consequence): for i in range(len(antecedent)): if antecedent[i] != "NO": - if self.features[self.permutation[i]].dtype == "cat": - rule = self.features[self.permutation[i]].name + "(" + str(antecedent[i][0]) + ")" + feature = self.features[self.permutation[i]] + if feature.dtype == "cat": + rule = feature.name + "(" + str(antecedent[i][0]) + ")" else: - rule = self.features[self.permutation[i]].name + "(" + str(antecedent[i]) + ")" - + rule = feature.name + "(" + str(antecedent[i]) + ")" antecedent1.append(rule) for i in range(len(consequence)): if consequence[i] != "NO": - if self.features[self.permutation[i + len(antecedent)]].dtype == "cat": - rule = self.features[self.permutation[i + len(antecedent)]].name + "(" + str(consequence[i]) + ")" + feature = self.features[self.permutation[i + len(antecedent)]] + if feature.dtype == "cat": + rule = feature.name + "(" + str(consequence[i][0]) + ")" else: - rule = self.features[self.permutation[i + len(antecedent)]].name + "(" + str(consequence[i]) + ")" - + rule = feature.name + "(" + str(consequence[i]) + ")" consequence1.append(rule) return antecedent1, consequence1 diff --git a/niaarm/niaarm.py b/niaarm/niaarm.py index a19c9f5..1dd5114 100644 --- a/niaarm/niaarm.py +++ b/niaarm/niaarm.py @@ -1,5 +1,5 @@ from niaarm.rule import Rule -from niaarm.association_rule import AssociationRule, is_rule_feasible, get_cut_point +from niaarm.association_rule import AssociationRule, rule_feasible, cut_point from niapy.problems import Problem import numpy as np import csv @@ -121,7 +121,7 @@ def _evaluate(self, sol): cut_value = sol[self.dimension - 1] # get cut point value solution = sol[:-1] # remove cut point - cut = get_cut_point(cut_value, len(self.features)) + cut = cut_point(cut_value, len(self.features)) rule = arm.build_rule(solution) @@ -130,20 +130,20 @@ def _evaluate(self, sol): consequence = rule[cut:] # check if rule is feasible - if is_rule_feasible(antecedent, consequence): + if rule_feasible(antecedent, consequence): # get support and confidence of rule - support, confidence = arm.calculate_support_confidence(antecedent, consequence, self.transactions) + support, confidence = arm.support_confidence(antecedent, consequence, self.transactions) if self.gamma == 0.0: shrinkage = 0 else: - shrinkage = arm.calculate_shrinkage(antecedent, consequence) + shrinkage = arm.shrinkage(antecedent, consequence) if self.delta == 0.0: coverage = 0 else: - coverage = arm.calculate_coverage(antecedent, consequence) + coverage = arm.coverage(antecedent, consequence) fitness = ((self.alpha * support) + (self.beta * confidence) + (self.gamma * shrinkage) + (self.delta * coverage)) / (self.alpha + self.beta + self.gamma + self.delta) diff --git a/niaarm/tests/test_coverage.py b/niaarm/tests/test_coverage.py index a5f12df..17aadf1 100644 --- a/niaarm/tests/test_coverage.py +++ b/niaarm/tests/test_coverage.py @@ -19,7 +19,7 @@ def test_a(self): oper = AssociationRule(self.features) - coverage = oper.calculate_coverage(antecedent, consequence) + coverage = oper.coverage(antecedent, consequence) self.assertEqual(coverage, 1) @@ -31,6 +31,6 @@ def test_b(self): oper = AssociationRule(self.features) - coverage = oper.calculate_coverage(antecedent, consequence) + coverage = oper.coverage(antecedent, consequence) self.assertEqual(coverage, 0.5) diff --git a/niaarm/tests/test_cut_point.py b/niaarm/tests/test_cut_point.py index 7d62624..c641456 100644 --- a/niaarm/tests/test_cut_point.py +++ b/niaarm/tests/test_cut_point.py @@ -1,5 +1,5 @@ from unittest import TestCase -from niaarm.association_rule import AssociationRule, get_cut_point +from niaarm.association_rule import AssociationRule, cut_point from niaarm.dataset import Dataset @@ -19,7 +19,7 @@ def test_cut_pointA(self): cut_value = sol[len(sol) - 1] new_sol = sol[:-1] - cut = get_cut_point(cut_value, len(self.features)) + cut = cut_point(cut_value, len(self.features)) rule = arm.build_rule(new_sol) @@ -123,7 +123,7 @@ def test_cut_pointB(self): new_sol = sol[:-1] - cut = get_cut_point(cut_value, len(self.features)) + cut = cut_point(cut_value, len(self.features)) rule = arm.build_rule(new_sol) @@ -220,7 +220,7 @@ def test_cut_pointC(self): new_sol = sol[:-1] - cut = get_cut_point(cut_value, len(self.features)) + cut = cut_point(cut_value, len(self.features)) rule = arm.build_rule(new_sol) diff --git a/niaarm/tests/test_rule_building.py b/niaarm/tests/test_rule_building.py index 00db028..87cc915 100644 --- a/niaarm/tests/test_rule_building.py +++ b/niaarm/tests/test_rule_building.py @@ -1,5 +1,5 @@ from unittest import TestCase -from niaarm.association_rule import AssociationRule, is_rule_feasible, get_permutation +from niaarm.association_rule import AssociationRule, rule_feasible, get_permutation from niaarm.dataset import Dataset @@ -31,34 +31,34 @@ def test_if_feasible_rule(self): consequence_c = ["NO"] self.assertEqual( - is_rule_feasible( + rule_feasible( antecedent_a, consequence_a), False) self.assertEqual( - is_rule_feasible( + rule_feasible( antecedent_b, consequence_b), True) self.assertEqual( - is_rule_feasible( + rule_feasible( antecedent_c, consequence_a), True) self.assertEqual( - is_rule_feasible( + rule_feasible( antecedent_c, consequence_b), True) self.assertEqual( - is_rule_feasible( + rule_feasible( antecedent_a, consequence_c), False) def test_threshold_move(self): - move = self.oper.calculate_threshold_move(0) - move2 = self.oper.calculate_threshold_move(1) + move = self.oper.threshold_move(0) + move2 = self.oper.threshold_move(1) self.assertEqual(move, 1) self.assertEqual(move2, 2) @@ -75,8 +75,8 @@ def test_vector_position(self): order = get_permutation(permutation) - position1 = self.oper.get_vector_position_of_feature(0) - position2 = self.oper.get_vector_position_of_feature(1) + position1 = self.oper.feature_position(0) + position2 = self.oper.feature_position(1) self.assertEqual(position1, 0) self.assertEqual(position2, 2) @@ -204,11 +204,11 @@ def test_get_permutation(self): 0.49068101]) def test_threshold_move(self): - move = self.oper.calculate_threshold_move(0) - move2 = self.oper.calculate_threshold_move(1) - move3 = self.oper.calculate_threshold_move(2) - move4 = self.oper.calculate_threshold_move(3) - move5 = self.oper.calculate_threshold_move(4) + move = self.oper.threshold_move(0) + move2 = self.oper.threshold_move(1) + move3 = self.oper.threshold_move(2) + move4 = self.oper.threshold_move(3) + move5 = self.oper.threshold_move(4) self.assertEqual(move, 1) self.assertEqual(move2, 2) @@ -260,9 +260,9 @@ def test_vector_position(self): self.assertEqual(order, [1, 3, 5, 4, 8, 7, 2, 6, 0]) - position1 = self.oper.get_vector_position_of_feature(0) - position2 = self.oper.get_vector_position_of_feature(1) - position3 = self.oper.get_vector_position_of_feature(2) + position1 = self.oper.feature_position(0) + position2 = self.oper.feature_position(1) + position3 = self.oper.feature_position(2) self.assertEqual(position1, 0) self.assertEqual(position2, 2) diff --git a/niaarm/tests/test_shrinkage.py b/niaarm/tests/test_shrinkage.py index f39724f..7edee68 100644 --- a/niaarm/tests/test_shrinkage.py +++ b/niaarm/tests/test_shrinkage.py @@ -1,5 +1,5 @@ from unittest import TestCase -from niaarm.association_rule import AssociationRule, normalize, get_cut_point +from niaarm.association_rule import AssociationRule, normalize, cut_point from niaarm.dataset import Dataset @@ -22,14 +22,14 @@ def test_A(self): oper = AssociationRule(self.features) - cut = get_cut_point(0, len(self.features)) + cut = cut_point(0, len(self.features)) rule = oper.build_rule(vector) antecedent = rule[:cut] consequence = rule[cut:] - shrinkage = oper.calculate_shrinkage(antecedent, consequence) + shrinkage = oper.shrinkage(antecedent, consequence) self.assertEqual(shrinkage, 1) @@ -54,14 +54,14 @@ def test_B(self): oper = AssociationRule(self.features) - cut = get_cut_point(0, len(self.features)) + cut = cut_point(0, len(self.features)) rule = oper.build_rule(vector) antecedent = rule[:cut] consequence = rule[cut:] - shrinkage = oper.calculate_shrinkage(antecedent, consequence) + shrinkage = oper.shrinkage(antecedent, consequence) self.assertEqual(shrinkage, 1) @@ -135,7 +135,7 @@ def test_get_permutation(self): cut_value = vector1[len(vector1) - 1] new_sol = vector1[:-1] - cut = get_cut_point(cut_value, len(self.features)) + cut = cut_point(cut_value, len(self.features)) rule = oper.build_rule(new_sol) @@ -143,7 +143,7 @@ def test_get_permutation(self): antecedent = rule[:cut] consequence = rule[cut:] - shrinkage = oper.calculate_shrinkage(antecedent, consequence) + shrinkage = oper.shrinkage(antecedent, consequence) norm = normalize(1.11324989, [0, 3], [0, 1]) diff --git a/niaarm/tests/test_support_confidence.py b/niaarm/tests/test_support_confidence.py index d77c9dd..4c0a701 100644 --- a/niaarm/tests/test_support_confidence.py +++ b/niaarm/tests/test_support_confidence.py @@ -1,5 +1,5 @@ from unittest import TestCase -from niaarm.association_rule import AssociationRule, get_cut_point +from niaarm.association_rule import AssociationRule, cut_point from niaarm.dataset import Dataset @@ -33,14 +33,14 @@ def test_a(self): oper = AssociationRule(self.features) - cut = get_cut_point(0, len(self.features)) + cut = cut_point(0, len(self.features)) rule = oper.build_rule(vector) antecedent = rule[:cut] consequence = rule[cut:] - support, confidence = oper.calculate_support_confidence( + support, confidence = oper.support_confidence( antecedent, consequence, self.transactions) self.assertEqual(antecedent, antecedent_a) @@ -69,14 +69,14 @@ def test_B(self): oper = AssociationRule(self.features) - cut = get_cut_point(0, len(self.features)) + cut = cut_point(0, len(self.features)) rule = oper.build_rule(vector) antecedent = rule[:cut] consequence = rule[cut:] - support, confidence = oper.calculate_support_confidence( + support, confidence = oper.support_confidence( antecedent, consequence, self.transactions) self.assertEqual(antecedent, antecedent_b)