diff --git a/niaarm/association_rule.py b/niaarm/association_rule.py index a15d919..e3b2b90 100644 --- a/niaarm/association_rule.py +++ b/niaarm/association_rule.py @@ -1,30 +1,9 @@ -def normalize(value, actual_bounds, real_bounds): - return real_bounds[0] + (value - real_bounds[0]) * (real_bounds[1] - real_bounds[0]) / (actual_bounds[1] - actual_bounds[0]) - - -def rule_feasible(ant, con): - return ant.count("NO") != len(ant) and con.count("NO") != len(con) - - -def cut_point(sol, num_attr): - cut = int(sol * num_attr) - if cut == 0: - cut = 1 - if cut > num_attr - 1: - cut = num_attr - 2 - return cut - - -def get_permutation(s): - return sorted(range(len(s)), key=lambda k: s[k]) - - class AssociationRule: r"""Class for main operations and quality measures. Attributes: - features (Iterable[Feature]): List of features. - permutation (Iterable[]) + features (list[Feature]): List of features. + permutation (list[int]): Permuted feature indices, """ def __init__(self, features): @@ -35,7 +14,7 @@ def build_rule(self, vector): rule = [] permutation = self.map_permutation(vector) - self.permutation = get_permutation(permutation) + self.permutation = _get_permutation(permutation) for i in range(len(self.features)): current_feature = self.permutation[i] @@ -191,7 +170,7 @@ def shrinkage(self, antecedent, consequence): value = sum(differences) if len(differences) > 0: - normalized = normalize(value, [0, len(differences)], [0, 1]) + normalized = _normalize(value, [0, len(differences)], [0, 1]) else: return 0.0 return 1 - normalized @@ -218,3 +197,24 @@ def format_rules(self, antecedent, consequence): rule = feature.name + "(" + str(consequence[i]) + ")" consequence1.append(rule) return antecedent1, consequence1 + + +def _normalize(value, actual_bounds, real_bounds): + return real_bounds[0] + (value - real_bounds[0]) * (real_bounds[1] - real_bounds[0]) / (actual_bounds[1] - actual_bounds[0]) + + +def _rule_feasible(ant, con): + return ant.count("NO") != len(ant) and con.count("NO") != len(con) + + +def _cut_point(sol, num_attr): + cut = int(sol * num_attr) + if cut == 0: + cut = 1 + if cut > num_attr - 1: + cut = num_attr - 2 + return cut + + +def _get_permutation(s): + return sorted(range(len(s)), key=lambda k: s[k]) diff --git a/niaarm/dataset.py b/niaarm/dataset.py index 6fc53ce..5f7eb3f 100644 --- a/niaarm/dataset.py +++ b/niaarm/dataset.py @@ -45,13 +45,7 @@ def __analyse_types(self): min_value = None max_value = None - self.features.append( - Feature( - head, - dtype, - min_value, - max_value, - unique_categories)) + self.features.append(Feature(head, dtype, min_value, max_value, unique_categories)) def __problem_dimension(self): r"""Calculate the dimension of the problem.""" diff --git a/niaarm/feature.py b/niaarm/feature.py index f221af5..00a3e57 100644 --- a/niaarm/feature.py +++ b/niaarm/feature.py @@ -1,5 +1,5 @@ from dataclasses import dataclass -from typing import Iterable, Optional +from typing import Optional @dataclass @@ -11,7 +11,7 @@ class Feature: dtype (str): Datatype of feature. min_val (Optional[float]): Minimum value of feature in transaction database. max_val (Optional[float]): Maximum value of feature in transaction database. - categories (Optional[Iterable[float]]): Possible categorical feature's values. + categories (Optional[list[float]]): Possible categorical feature's values. """ @@ -19,4 +19,4 @@ class Feature: dtype: str min_val: Optional[float] = None max_val: Optional[float] = None - categories: Optional[Iterable[float]] = None + categories: Optional[list[float]] = None diff --git a/niaarm/niaarm.py b/niaarm/niaarm.py index 1dd5114..f96e016 100644 --- a/niaarm/niaarm.py +++ b/niaarm/niaarm.py @@ -1,36 +1,10 @@ from niaarm.rule import Rule -from niaarm.association_rule import AssociationRule, rule_feasible, cut_point +from niaarm.association_rule import AssociationRule, _rule_feasible, _cut_point from niapy.problems import Problem import numpy as np import csv -def is_border_value_the_same(antecedent, consequence): - r"""In case lower and upper bounds of interval are the same. - We need this in order to provide clean output. - - Arguments: - antecedent (np.ndarray): . - consequence (np.ndarray): . - - Returns: - antecedent (array): - consequence (array): - """ - - for i in range(len(antecedent)): - if len(antecedent[i]) > 1: - if antecedent[i][0] == antecedent[i][1]: - antecedent[i] = antecedent[i][0] - - for i in range(len(consequence)): - if len(consequence[i]) > 1: - if consequence[i][0] == consequence[i][1]: - consequence[i] = consequence[i][0] - - return antecedent, consequence - - class NiaARM(Problem): r"""Implementation of NiaARM. @@ -51,15 +25,7 @@ class NiaARM(Problem): """ - def __init__( - self, - dimension, - features, - transactions, - alpha=0.0, - beta=0.0, - gamma=0.0, - delta=0.0): + def __init__(self, dimension, features, transactions, alpha=0.0, beta=0.0, gamma=0.0, delta=0.0): r"""Initialize instance of NiaARM. Arguments: @@ -77,30 +43,20 @@ def __init__( super().__init__(dimension, 0.0, 1.0) def rule_exists(self, antecedent, consequence): - r"""Check if association rule already exists. - Arguments: - antecedent (array): . - consequence (array): . - - Returns: - None - """ + r"""Check if association rule already exists.""" for rule in self.rules: if rule.antecedent == antecedent and rule.consequence == consequence: return True return False def export_rules(self, path): - r"""Save all association rules found to csv file. - - """ + r"""Save all association rules found to csv file.""" try: with open(path, 'w', newline='') as f: writer = csv.writer(f) # write header - writer.writerow( - ["Antecedent", "Consequence", "Fitness", "Support", "Confidence", "Coverage", "Shrinkage"]) + writer.writerow(["Antecedent", "Consequence", "Fitness", "Support", "Confidence", "Coverage", "Shrinkage"]) for rule in self.rules: writer.writerow( @@ -121,7 +77,7 @@ def _evaluate(self, sol): cut_value = sol[self.dimension - 1] # get cut point value solution = sol[:-1] # remove cut point - cut = cut_point(cut_value, len(self.features)) + cut = _cut_point(cut_value, len(self.features)) rule = arm.build_rule(solution) @@ -130,8 +86,7 @@ def _evaluate(self, sol): consequence = rule[cut:] # check if rule is feasible - if rule_feasible(antecedent, consequence): - + if _rule_feasible(antecedent, consequence): # get support and confidence of rule support, confidence = arm.support_confidence(antecedent, consequence, self.transactions) @@ -153,28 +108,43 @@ def _evaluate(self, sol): fitness = 0.0 if support > 0.0 and confidence > 0.0: - - antecedent, consequence = is_border_value_the_same(antecedent, consequence) + antecedent, consequence = _fix_border(antecedent, consequence) # format rule; remove NO; add name of features antecedent1, consequence1 = arm.format_rules(antecedent, consequence) # save feasible rule if not self.rule_exists(antecedent1, consequence1): - self.rules.append( - Rule( - antecedent1, - consequence1, - fitness, - support, - confidence, - coverage, - shrinkage - )) + self.rules.append(Rule(antecedent1, consequence1, fitness, support, confidence, coverage, shrinkage)) if fitness > self.best_fitness: self.best_fitness = fitness - print("Fitness:", fitness, "Support:", support, "Confidence:", confidence, "Coverage:", coverage, - "Shrinkage:", shrinkage) + print(f'Fitness: {fitness}, Support: {support}, Confidence:{confidence}, Coverage:{coverage}, Shrinkage:{shrinkage}') return fitness else: return -1.0 + + +def _fix_border(antecedent, consequence): + r"""In case lower and upper bounds of interval are the same. + We need this in order to provide clean output. + + Arguments: + antecedent (np.ndarray): . + consequence (np.ndarray): . + + Returns: + antecedent (array): + consequence (array): + """ + + for i in range(len(antecedent)): + if len(antecedent[i]) > 1: + if antecedent[i][0] == antecedent[i][1]: + antecedent[i] = antecedent[i][0] + + for i in range(len(consequence)): + if len(consequence[i]) > 1: + if consequence[i][0] == consequence[i][1]: + consequence[i] = consequence[i][0] + + return antecedent, consequence diff --git a/niaarm/rule.py b/niaarm/rule.py index 0be3587..b72e942 100644 --- a/niaarm/rule.py +++ b/niaarm/rule.py @@ -1,14 +1,14 @@ from dataclasses import dataclass -from typing import Iterable, Optional +from typing import Optional @dataclass class Rule: - r"""Class for representation of association rule. + r"""Class representing an association rule. Attributes: - antecedent (Iterable[str]): A list of antecedents of association rule. - consequence (Iterable[str]): A list of consequents of association rule. + antecedent (list[str]): A list of antecedents of association rule. + consequence (list[str]): A list of consequents of association rule. fitness (float): Value of fitness function. support (float): Value of support. confidence (float): Value of confidence. @@ -17,8 +17,8 @@ class Rule: """ - antecedent: Iterable[str] - consequence: Iterable[str] + antecedent: list[str] + consequence: list[str] fitness: float support: float confidence: float diff --git a/niaarm/stats.py b/niaarm/stats.py index 965df44..b6fe2e8 100644 --- a/niaarm/stats.py +++ b/niaarm/stats.py @@ -5,7 +5,7 @@ class Stats: r"""Class for providing statistical evaluation. Attributes: - rules (Iterable[Rule]): List of rules. + rules (list[Rule]): List of rules. """ def __init__(self, rules): diff --git a/niaarm/tests/test_cut_point.py b/niaarm/tests/test_cut_point.py index c641456..a2c3f78 100644 --- a/niaarm/tests/test_cut_point.py +++ b/niaarm/tests/test_cut_point.py @@ -1,5 +1,5 @@ from unittest import TestCase -from niaarm.association_rule import AssociationRule, cut_point +from niaarm.association_rule import AssociationRule, _cut_point from niaarm.dataset import Dataset @@ -19,7 +19,7 @@ def test_cut_pointA(self): cut_value = sol[len(sol) - 1] new_sol = sol[:-1] - cut = cut_point(cut_value, len(self.features)) + cut = _cut_point(cut_value, len(self.features)) rule = arm.build_rule(new_sol) @@ -123,7 +123,7 @@ def test_cut_pointB(self): new_sol = sol[:-1] - cut = cut_point(cut_value, len(self.features)) + cut = _cut_point(cut_value, len(self.features)) rule = arm.build_rule(new_sol) @@ -220,7 +220,7 @@ def test_cut_pointC(self): new_sol = sol[:-1] - cut = cut_point(cut_value, len(self.features)) + cut = _cut_point(cut_value, len(self.features)) rule = arm.build_rule(new_sol) diff --git a/niaarm/tests/test_rule_building.py b/niaarm/tests/test_rule_building.py index 87cc915..4aab0b1 100644 --- a/niaarm/tests/test_rule_building.py +++ b/niaarm/tests/test_rule_building.py @@ -1,5 +1,5 @@ from unittest import TestCase -from niaarm.association_rule import AssociationRule, rule_feasible, get_permutation +from niaarm.association_rule import AssociationRule, _rule_feasible, _get_permutation from niaarm.dataset import Dataset @@ -31,27 +31,27 @@ def test_if_feasible_rule(self): consequence_c = ["NO"] self.assertEqual( - rule_feasible( + _rule_feasible( antecedent_a, consequence_a), False) self.assertEqual( - rule_feasible( + _rule_feasible( antecedent_b, consequence_b), True) self.assertEqual( - rule_feasible( + _rule_feasible( antecedent_c, consequence_a), True) self.assertEqual( - rule_feasible( + _rule_feasible( antecedent_c, consequence_b), True) self.assertEqual( - rule_feasible( + _rule_feasible( antecedent_a, consequence_c), False) @@ -73,7 +73,7 @@ def test_vector_position(self): permutation = self.oper.map_permutation( [0.98328107, 0.93655004, 0.6860223, 0.78527931, 0.96291945, 0.18117294, 0.50567635]) - order = get_permutation(permutation) + order = _get_permutation(permutation) position1 = self.oper.feature_position(0) position2 = self.oper.feature_position(1) @@ -256,7 +256,7 @@ def test_vector_position(self): permutation = self.oper.map_permutation(vector1) - order = get_permutation(permutation) + order = _get_permutation(permutation) self.assertEqual(order, [1, 3, 5, 4, 8, 7, 2, 6, 0]) @@ -352,7 +352,7 @@ def test_build_rule(self): permutation = self.oper.map_permutation(vector2) - order2 = get_permutation(permutation) + order2 = _get_permutation(permutation) rule2 = self.oper.build_rule(vector2) @@ -400,7 +400,7 @@ def test_build_rule(self): permutation = self.oper.map_permutation(vector3) - order3 = get_permutation(permutation) + order3 = _get_permutation(permutation) rule3 = self.oper.build_rule(vector3) diff --git a/niaarm/tests/test_shrinkage.py b/niaarm/tests/test_shrinkage.py index 7edee68..23ad7f2 100644 --- a/niaarm/tests/test_shrinkage.py +++ b/niaarm/tests/test_shrinkage.py @@ -1,5 +1,5 @@ from unittest import TestCase -from niaarm.association_rule import AssociationRule, normalize, cut_point +from niaarm.association_rule import AssociationRule, _normalize, _cut_point from niaarm.dataset import Dataset @@ -22,7 +22,7 @@ def test_A(self): oper = AssociationRule(self.features) - cut = cut_point(0, len(self.features)) + cut = _cut_point(0, len(self.features)) rule = oper.build_rule(vector) @@ -54,7 +54,7 @@ def test_B(self): oper = AssociationRule(self.features) - cut = cut_point(0, len(self.features)) + cut = _cut_point(0, len(self.features)) rule = oper.build_rule(vector) @@ -135,7 +135,7 @@ def test_get_permutation(self): cut_value = vector1[len(vector1) - 1] new_sol = vector1[:-1] - cut = cut_point(cut_value, len(self.features)) + cut = _cut_point(cut_value, len(self.features)) rule = oper.build_rule(new_sol) @@ -145,7 +145,7 @@ def test_get_permutation(self): shrinkage = oper.shrinkage(antecedent, consequence) - norm = normalize(1.11324989, [0, 3], [0, 1]) + norm = _normalize(1.11324989, [0, 3], [0, 1]) self.assertEqual(norm, 0.3710832966666667) self.assertEqual(shrinkage, 0.6289167033333334) diff --git a/niaarm/tests/test_support_confidence.py b/niaarm/tests/test_support_confidence.py index 4c0a701..a0b12a4 100644 --- a/niaarm/tests/test_support_confidence.py +++ b/niaarm/tests/test_support_confidence.py @@ -1,5 +1,5 @@ from unittest import TestCase -from niaarm.association_rule import AssociationRule, cut_point +from niaarm.association_rule import AssociationRule, _cut_point from niaarm.dataset import Dataset @@ -33,7 +33,7 @@ def test_a(self): oper = AssociationRule(self.features) - cut = cut_point(0, len(self.features)) + cut = _cut_point(0, len(self.features)) rule = oper.build_rule(vector) @@ -69,7 +69,7 @@ def test_B(self): oper = AssociationRule(self.features) - cut = cut_point(0, len(self.features)) + cut = _cut_point(0, len(self.features)) rule = oper.build_rule(vector)