Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Code clean up and refactoring #12

Merged
merged 5 commits into from
Feb 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 21 additions & 39 deletions basic_run.py
Original file line number Diff line number Diff line change
@@ -1,50 +1,32 @@
from niaarm import NiaARM
from niaarm.dataset import Dataset
from niapy.algorithms.basic import ParticleSwarmAlgorithm, DifferentialEvolution
from niapy.algorithms.basic import DifferentialEvolution
from niapy.task import Task, OptimizationType

# load dataset from csv
data = Dataset("datasets/Abalone.csv")

# preprocess dataset and obtain features
features = data.get_features()
if __name__ == '__main__':
# load and preprocess dataset from csv
data = Dataset("datasets/Abalone.csv")

# calculate dimension of the problem
dimension = data.calculate_dimension_of_individual()
# Create a problem:::
# dimension represents dimension of the problem;
# features represent the list of features, while transactions depicts the list of transactions
# the following 4 elements represent weights (support, confidence, coverage, shrinkage)
# None defines that criteria is omitted and is therefore excluded from fitness function
problem = NiaARM(data.dimension, data.features, data.transactions, alpha=1.0, beta=1.0)

# obtain transaction database
transactions = data.transaction_data
# build niapy task
task = Task(problem=problem, max_iters=30, optimization_type=OptimizationType.MAXIMIZATION)

# Create a problem:::
# dimension represents dimension of the problem;
# 0, 1 represents the range of search space
# features represent the list of features, while transactions depicts the list of transactions
# the following 4 elements represent weights (support, confidence, coverage, shrinkage)
# None defines that criteria is omitted and is therefore excluded from fitness function
# final element represents the filename in which obtained rules in csv format are stored
problem = NiaARM(dimension, 0, 1, features, transactions, 1.0, 1.0, None, None, "results.csv")
# use Differential Evolution (DE) algorithm from the NiaPy library
# see full list of available algorithms: https://github.com/NiaOrg/NiaPy/blob/master/Algorithms.md
algo = DifferentialEvolution(population_size=50, differential_weight=0.5, crossover_probability=0.9)

# build niapy task
task = Task(
problem=problem,
max_iters=3,
optimization_type=OptimizationType.MAXIMIZATION)
# run algorithm
best = algo.run(task=task)

# use Differential Evolution (DE) algorithm
# see full list of available algorithms: https://github.com/NiaOrg/NiaPy/blob/master/Algorithms.md
algo = DifferentialEvolution(population_size=50, differential_weight=0.5, crossover_probability=0.9)
# sort rules
problem.sort_rules()

# use Particle swarm Optimization (PSO) algorithm from NiaPy library
algo2 = ParticleSwarmAlgorithm(
population_size=100,
min_velocity=-4.0,
max_velocity=4.0)

# run algorithm
best = algo.run(task=task)

# sort rules
problem.sort_rules()

# export all rules to csv
problem.rules_to_csv()
# export all rules to csv
problem.export_rules('output.csv')
193 changes: 67 additions & 126 deletions niaarm/association_rule.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,33 @@
import math
def normalize(value, actual_bounds, real_bounds):
return (real_bounds[0] +
(value -
real_bounds[0]) *
(real_bounds[1] -
real_bounds[0]) /
(actual_bounds[1] -
actual_bounds[0]))


def is_rule_feasible(ant, con):
ant_count = ant.count("NO")
con_count = con.count("NO")
if (ant_count == len(ant)) or (con_count == len(con)):
return False
else:
return True


def get_cut_point(sol, num_attr):
cut = int(sol * num_attr)
if cut == 0:
cut = 1
if cut > num_attr - 1:
cut = num_attr - 2
return cut


def get_permutation(s):
return sorted(range(len(s)), key=lambda k: s[k])


class AssociationRule:
Expand All @@ -18,7 +47,7 @@ def build_rule(self, vector):

permutation = self.map_permutation(vector)

self.permutation = self.get_permutation(permutation)
self.permutation = get_permutation(permutation)

for i in range(len(self.features)):
current_feature = self.permutation[i]
Expand All @@ -33,10 +62,10 @@ def build_rule(self, vector):

if vector[vector_position] > vector[threshold_position]:
if self.features[current_feature].dtype == 'float':
border1 = (vector[vector_position] * (self.features[current_feature].max_val - \
border1 = (vector[vector_position] * (self.features[current_feature].max_val -
self.features[current_feature].min_val)) + self.features[current_feature].min_val
vector_position = vector_position + 1
border2 = (vector[vector_position] * (self.features[current_feature].max_val - \
border2 = (vector[vector_position] * (self.features[current_feature].max_val -
self.features[current_feature].min_val)) + self.features[current_feature].min_val

if border1 > border2:
Expand Down Expand Up @@ -73,11 +102,9 @@ def build_rule(self, vector):
else:
categories = self.features[current_feature].categories

selected = round(vector[vector_position]
* (len(categories) - 1))
selected = round(vector[vector_position] * (len(categories) - 1))

rule.append(
[self.features[current_feature].categories[selected]])
rule.append([self.features[current_feature].categories[selected]])
else:
rule.append('NO')

Expand All @@ -86,14 +113,6 @@ def build_rule(self, vector):
def map_permutation(self, vector):
return vector[-len(self.features):]

def is_rule_feasible(self, ant, con):
ant_count = ant.count("NO")
con_count = con.count("NO")
if (ant_count == len(ant)) or (con_count == len(con)):
return False
else:
return True

def calculate_threshold_move(self, current_feature):
if self.features[current_feature].dtype == "float" or self.features[current_feature].dtype == "int":
move = 2
Expand All @@ -113,23 +132,6 @@ def get_vector_position_of_feature(self, feature):
def return_permutation(self):
return self.permutation

def get_cut_point(self, sol, num_attr):
cut = int(sol * num_attr)
if cut == 0:
cut = 1
if cut > num_attr - 1:
cut = num_attr - 2
return cut

def get_ant_con(self, rule, cut):
ant = rule[:cut]
con = rule[cut:]

return ant, con

def get_permutation(self, s):
return sorted(range(len(s)), key=lambda k: s[k])

def calculate_support_confidence(
self,
antecedent,
Expand All @@ -141,26 +143,20 @@ def calculate_support_confidence(
conf_counter = 0

# firstly antecedent
for i in range(0, len(transactions)):
for i in range(len(transactions)):
match1 = 0
match2 = 0
for l in range(len(antecedent)):
if self.features[self.permutation[l]
].dtype == 'float' or self.features[self.permutation[l]].dtype == 'int':
if antecedent[l] == 'NO':
pass
else:
border = antecedent[l]
if (float(transactions[i][self.permutation[l]]) >= border[0]) and (
float(transactions[i][self.permutation[l]]) <= border[1]):
for j in range(len(antecedent)):
if self.features[self.permutation[j]].dtype == 'float' or self.features[self.permutation[j]].dtype == 'int':
if antecedent[j] != 'NO':
border = antecedent[j]
if (float(transactions[i][self.permutation[j]]) >= border[0]) and (
float(transactions[i][self.permutation[j]]) <= border[1]):
match1 = match1 + 1
elif self.features[self.permutation[l]].dtype == 'cat':
if antecedent[l] == 'NO':
pass
else:
ant = antecedent[l]
if transactions[i][self.permutation[l]
] == ant[0]:
elif self.features[self.permutation[j]].dtype == 'cat':
if antecedent[j] != 'NO':
ant = antecedent[j]
if transactions[i][self.permutation[j]] == ant[0]:
match1 = match1 + 1

# secondly consequence
Expand All @@ -169,19 +165,14 @@ def calculate_support_confidence(
len(antecedent),
len(antecedent) +
len(consequence)):
if self.features[self.permutation[ll]
].dtype == 'float' or self.features[self.permutation[ll]].dtype == 'int':
if consequence[con_counter] == 'NO':
pass
else:
if self.features[self.permutation[ll]].dtype == 'float' or self.features[self.permutation[ll]].dtype == 'int':
if consequence[con_counter] != 'NO':
border = consequence[con_counter]
if (float(transactions[i][self.permutation[ll]]) >= border[0]) and (
float(transactions[i][self.permutation[ll]]) <= border[1]):
match2 = match2 + 1
elif self.features[self.permutation[ll]].dtype == 'cat':
if consequence[con_counter] == 'NO':
pass
else:
if consequence[con_counter] != 'NO':
con = consequence[con_counter]

if transactions[i][self.permutation[ll]] == con[0]:
Expand Down Expand Up @@ -213,63 +204,33 @@ def calculate_support_confidence(

return total_supp, total_conf

def check_no(self, antecedent, consequence):
check = True
missing_ant = antecedent.count("NO")
missing_con = consequence.count("NO")

if missing_ant == len(antecedent):
check = False

if missing_con == len(consequence):
check = False

return check

def calculate_coverage(self, antecedent, consequence):
missing_ant = antecedent.count("NO")
missing_con = consequence.count("NO")

missing_total = missing_ant + missing_con

return (1 - float(float(missing_total) / float(len(self.features))))

def normalize(self, value, actual_bounds, real_bounds):
return (real_bounds[0] +
(value -
real_bounds[0]) *
(real_bounds[1] -
real_bounds[0]) /
(actual_bounds[1] -
actual_bounds[0]))
return 1 - float(float(missing_total) / float(len(self.features)))

def calculate_shrinkage(self, antecedent, consequence):
differences = []

for i in range(len(antecedent)):
if self.features[self.permutation[i]
].dtype == 'float' or self.features[self.permutation[i]].dtype == 'int':
if antecedent[i] == 'NO':
pass
else:
if self.features[self.permutation[i]].dtype == 'float' or self.features[self.permutation[i]].dtype == 'int':
if antecedent[i] != 'NO':
borders = antecedent[i]
diff_borders = borders[1] - borders[0]
total_borders = self.features[self.permutation[i]
].max_val - self.features[self.permutation[i]].min_val
total_borders = self.features[self.permutation[i]].max_val - self.features[self.permutation[i]].min_val
diff = float(diff_borders / total_borders)
differences.append(diff)

con_counter = 0
for ll in range(len(antecedent), len(antecedent) + len(consequence)):
if self.features[self.permutation[ll]
].dtype == 'float' or self.features[self.permutation[ll]].dtype == 'int':
if consequence[con_counter] == 'NO':
pass
else:
if self.features[self.permutation[ll]].dtype == 'float' or self.features[self.permutation[ll]].dtype == 'int':
if consequence[con_counter] != 'NO':
borders = consequence[con_counter]
diff_borders = borders[1] - borders[0]
total_borders = self.features[self.permutation[ll]
].max_val - self.features[self.permutation[ll]].min_val
total_borders = self.features[self.permutation[ll]].max_val - self.features[self.permutation[ll]].min_val
diff = float(diff_borders / total_borders)
differences.append(diff)
con_counter = con_counter + 1
Expand All @@ -279,51 +240,31 @@ def calculate_shrinkage(self, antecedent, consequence):
value = value + differences[i]

if len(differences) > 0:
normalized = self.normalize(value, [0, len(differences)], [0, 1])
normalized = normalize(value, [0, len(differences)], [0, 1])
else:
return 0.0

return (1 - normalized)

def calculate_fitness(
self,
alpha,
beta,
gamma,
delta,
support,
confidence,
shrinkage,
coverage):
fitness = ((alpha * support) + (beta * confidence) + (gamma *
shrinkage) + (delta * coverage)) / (alpha + beta + gamma + delta)
return fitness
return 1 - normalized

def format_rules(self, antecedent, consequence):
antecedent1 = []
consequence1 = []

for i in range(len(antecedent)):
if antecedent[i] == "NO":
pass
else:
if antecedent[i] != "NO":
if self.features[self.permutation[i]].dtype == "cat":
rule = self.features[self.permutation[i]].name + "(" + str(antecedent[i][0]) + ")"
else:
rule = self.features[self.permutation[i]].name + "(" + str(antecedent[i]) + ")"

antecedent1.append(rule)


for i in range(len(consequence)):
if consequence[i] == "NO":
pass
else:
if self.features[self.permutation[i+len(antecedent)]].dtype == "cat":
rule = self.features[self.permutation[i+len(antecedent)]].name + "(" + str(consequence[i]) + ")"
if consequence[i] != "NO":
if self.features[self.permutation[i + len(antecedent)]].dtype == "cat":
rule = self.features[self.permutation[i + len(antecedent)]].name + "(" + str(consequence[i]) + ")"
else:
rule = self.features[self.permutation[i+len(antecedent)]].name + "(" + str(consequence[i]) + ")"

consequence1.append(rule)
rule = self.features[self.permutation[i + len(antecedent)]].name + "(" + str(consequence[i]) + ")"

return antecedent1, consequence1
consequence1.append(rule)
return antecedent1, consequence1
Loading