Merge pull request #14 from zStupan/main

Minor formatting
firefly-cpp · Feb 26, 2022 · b7d68b0 · b7d68b0
2 parents 188794d + b77eff8
commit b7d68b0
Show file tree

Hide file tree

Showing 10 changed files with 94 additions and 130 deletions.
diff --git a/niaarm/association_rule.py b/niaarm/association_rule.py
@@ -1,30 +1,9 @@
-def normalize(value, actual_bounds, real_bounds):
-    return real_bounds[0] + (value - real_bounds[0]) * (real_bounds[1] - real_bounds[0]) / (actual_bounds[1] - actual_bounds[0])
-
-
-def rule_feasible(ant, con):
-    return ant.count("NO") != len(ant) and con.count("NO") != len(con)
-
-
-def cut_point(sol, num_attr):
-    cut = int(sol * num_attr)
-    if cut == 0:
-        cut = 1
-    if cut > num_attr - 1:
-        cut = num_attr - 2
-    return cut
-
-
-def get_permutation(s):
-    return sorted(range(len(s)), key=lambda k: s[k])
-
-
 class AssociationRule:
     r"""Class for main operations and quality measures.
 
     Attributes:
-        features (Iterable[Feature]): List of features.
-        permutation (Iterable[])
+        features (list[Feature]): List of features.
+        permutation (list[int]): Permuted feature indices,
     """
 
     def __init__(self, features):
@@ -35,7 +14,7 @@ def build_rule(self, vector):
         rule = []
 
         permutation = self.map_permutation(vector)
-        self.permutation = get_permutation(permutation)
+        self.permutation = _get_permutation(permutation)
 
         for i in range(len(self.features)):
             current_feature = self.permutation[i]
@@ -191,7 +170,7 @@ def shrinkage(self, antecedent, consequence):
         value = sum(differences)
 
         if len(differences) > 0:
-            normalized = normalize(value, [0, len(differences)], [0, 1])
+            normalized = _normalize(value, [0, len(differences)], [0, 1])
         else:
             return 0.0
         return 1 - normalized
@@ -218,3 +197,24 @@ def format_rules(self, antecedent, consequence):
                     rule = feature.name + "(" + str(consequence[i]) + ")"
                 consequence1.append(rule)
         return antecedent1, consequence1
+
+
+def _normalize(value, actual_bounds, real_bounds):
+    return real_bounds[0] + (value - real_bounds[0]) * (real_bounds[1] - real_bounds[0]) / (actual_bounds[1] - actual_bounds[0])
+
+
+def _rule_feasible(ant, con):
+    return ant.count("NO") != len(ant) and con.count("NO") != len(con)
+
+
+def _cut_point(sol, num_attr):
+    cut = int(sol * num_attr)
+    if cut == 0:
+        cut = 1
+    if cut > num_attr - 1:
+        cut = num_attr - 2
+    return cut
+
+
+def _get_permutation(s):
+    return sorted(range(len(s)), key=lambda k: s[k])
diff --git a/niaarm/dataset.py b/niaarm/dataset.py
@@ -45,13 +45,7 @@ def __analyse_types(self):
                 min_value = None
                 max_value = None
 
-            self.features.append(
-                Feature(
-                    head,
-                    dtype,
-                    min_value,
-                    max_value,
-                    unique_categories))
+            self.features.append(Feature(head, dtype, min_value, max_value, unique_categories))
 
     def __problem_dimension(self):
         r"""Calculate the dimension of the problem."""

diff --git a/niaarm/feature.py b/niaarm/feature.py
@@ -1,5 +1,5 @@
 from dataclasses import dataclass
-from typing import Iterable, Optional
+from typing import Optional
 
 
 @dataclass
@@ -11,12 +11,12 @@ class Feature:
        dtype (str): Datatype of feature.
        min_val (Optional[float]): Minimum value of feature in transaction database.
        max_val (Optional[float]): Maximum value of feature in transaction database.
-       categories (Optional[Iterable[float]]): Possible categorical feature's values.
+       categories (Optional[list[float]]): Possible categorical feature's values.
 
     """
 
     name: str
     dtype: str
     min_val: Optional[float] = None
     max_val: Optional[float] = None
-    categories: Optional[Iterable[float]] = None
+    categories: Optional[list[float]] = None
diff --git a/niaarm/niaarm.py b/niaarm/niaarm.py
@@ -1,36 +1,10 @@
 from niaarm.rule import Rule
-from niaarm.association_rule import AssociationRule, rule_feasible, cut_point
+from niaarm.association_rule import AssociationRule, _rule_feasible, _cut_point
 from niapy.problems import Problem
 import numpy as np
 import csv
 
 
-def is_border_value_the_same(antecedent, consequence):
-    r"""In case lower and upper bounds of interval are the same.
-        We need this in order to provide clean output.
-
-        Arguments:
-            antecedent (np.ndarray): .
-            consequence (np.ndarray): .
-
-        Returns:
-            antecedent (array):
-            consequence (array):
-    """
-
-    for i in range(len(antecedent)):
-        if len(antecedent[i]) > 1:
-            if antecedent[i][0] == antecedent[i][1]:
-                antecedent[i] = antecedent[i][0]
-
-    for i in range(len(consequence)):
-        if len(consequence[i]) > 1:
-            if consequence[i][0] == consequence[i][1]:
-                consequence[i] = consequence[i][0]
-
-    return antecedent, consequence
-
-
 class NiaARM(Problem):
     r"""Implementation of NiaARM.
 
@@ -51,15 +25,7 @@ class NiaARM(Problem):
 
     """
 
-    def __init__(
-            self,
-            dimension,
-            features,
-            transactions,
-            alpha=0.0,
-            beta=0.0,
-            gamma=0.0,
-            delta=0.0):
+    def __init__(self, dimension, features, transactions, alpha=0.0, beta=0.0, gamma=0.0, delta=0.0):
         r"""Initialize instance of NiaARM.
 
         Arguments:
@@ -77,30 +43,20 @@ def __init__(
         super().__init__(dimension, 0.0, 1.0)
 
     def rule_exists(self, antecedent, consequence):
-        r"""Check if association rule already exists.
-            Arguments:
-                antecedent (array): .
-                consequence (array): .
-
-            Returns:
-                None
-        """
+        r"""Check if association rule already exists."""
         for rule in self.rules:
             if rule.antecedent == antecedent and rule.consequence == consequence:
                 return True
         return False
 
     def export_rules(self, path):
-        r"""Save all association rules found to csv file.
-
-        """
+        r"""Save all association rules found to csv file."""
         try:
             with open(path, 'w', newline='') as f:
                 writer = csv.writer(f)
 
                 # write header
-                writer.writerow(
-                    ["Antecedent", "Consequence", "Fitness", "Support", "Confidence", "Coverage", "Shrinkage"])
+                writer.writerow(["Antecedent", "Consequence", "Fitness", "Support", "Confidence", "Coverage", "Shrinkage"])
 
                 for rule in self.rules:
                     writer.writerow(
@@ -121,7 +77,7 @@ def _evaluate(self, sol):
         cut_value = sol[self.dimension - 1]  # get cut point value
         solution = sol[:-1]  # remove cut point
 
-        cut = cut_point(cut_value, len(self.features))
+        cut = _cut_point(cut_value, len(self.features))
 
         rule = arm.build_rule(solution)
 
@@ -130,8 +86,7 @@ def _evaluate(self, sol):
         consequence = rule[cut:]
 
         # check if rule is feasible
-        if rule_feasible(antecedent, consequence):
-
+        if _rule_feasible(antecedent, consequence):
             # get support and confidence of rule
             support, confidence = arm.support_confidence(antecedent, consequence, self.transactions)
 
@@ -153,28 +108,43 @@ def _evaluate(self, sol):
                 fitness = 0.0
 
             if support > 0.0 and confidence > 0.0:
-
-                antecedent, consequence = is_border_value_the_same(antecedent, consequence)
+                antecedent, consequence = _fix_border(antecedent, consequence)
                 # format rule; remove NO; add name of features
                 antecedent1, consequence1 = arm.format_rules(antecedent, consequence)
 
                 # save feasible rule
                 if not self.rule_exists(antecedent1, consequence1):
-                    self.rules.append(
-                        Rule(
-                            antecedent1,
-                            consequence1,
-                            fitness,
-                            support,
-                            confidence,
-                            coverage,
-                            shrinkage
-                        ))
+                    self.rules.append(Rule(antecedent1, consequence1, fitness, support, confidence, coverage, shrinkage))
 
                 if fitness > self.best_fitness:
                     self.best_fitness = fitness
-                    print("Fitness:", fitness, "Support:", support, "Confidence:", confidence, "Coverage:", coverage,
-                          "Shrinkage:", shrinkage)
+                    print(f'Fitness: {fitness}, Support: {support}, Confidence:{confidence}, Coverage:{coverage}, Shrinkage:{shrinkage}')
             return fitness
         else:
             return -1.0
+
+
+def _fix_border(antecedent, consequence):
+    r"""In case lower and upper bounds of interval are the same.
+        We need this in order to provide clean output.
+
+        Arguments:
+            antecedent (np.ndarray): .
+            consequence (np.ndarray): .
+
+        Returns:
+            antecedent (array):
+            consequence (array):
+    """
+
+    for i in range(len(antecedent)):
+        if len(antecedent[i]) > 1:
+            if antecedent[i][0] == antecedent[i][1]:
+                antecedent[i] = antecedent[i][0]
+
+    for i in range(len(consequence)):
+        if len(consequence[i]) > 1:
+            if consequence[i][0] == consequence[i][1]:
+                consequence[i] = consequence[i][0]
+
+    return antecedent, consequence
diff --git a/niaarm/rule.py b/niaarm/rule.py
@@ -1,14 +1,14 @@
 from dataclasses import dataclass
-from typing import Iterable, Optional
+from typing import Optional
 
 
 @dataclass
 class Rule:
-    r"""Class for representation of association rule.
+    r"""Class representing an association rule.
 
     Attributes:
-        antecedent (Iterable[str]): A list of antecedents of association rule.
-        consequence (Iterable[str]): A list of consequents of association rule.
+        antecedent (list[str]): A list of antecedents of association rule.
+        consequence (list[str]): A list of consequents of association rule.
         fitness (float): Value of fitness function.
         support (float): Value of support.
         confidence (float): Value of confidence.
@@ -17,8 +17,8 @@ class Rule:
 
     """
 
-    antecedent: Iterable[str]
-    consequence: Iterable[str]
+    antecedent: list[str]
+    consequence: list[str]
     fitness: float
     support: float
     confidence: float

diff --git a/niaarm/stats.py b/niaarm/stats.py
@@ -5,7 +5,7 @@ class Stats:
     r"""Class for providing statistical evaluation.
 
     Attributes:
-        rules (Iterable[Rule]): List of rules.
+        rules (list[Rule]): List of rules.
     """
 
     def __init__(self, rules):

diff --git a/niaarm/tests/test_cut_point.py b/niaarm/tests/test_cut_point.py
@@ -1,5 +1,5 @@
 from unittest import TestCase
-from niaarm.association_rule import AssociationRule, cut_point
+from niaarm.association_rule import AssociationRule, _cut_point
 from niaarm.dataset import Dataset
 
 
@@ -19,7 +19,7 @@ def test_cut_pointA(self):
         cut_value = sol[len(sol) - 1]
         new_sol = sol[:-1]
 
-        cut = cut_point(cut_value, len(self.features))
+        cut = _cut_point(cut_value, len(self.features))
 
         rule = arm.build_rule(new_sol)
 
@@ -123,7 +123,7 @@ def test_cut_pointB(self):
 
         new_sol = sol[:-1]
 
-        cut = cut_point(cut_value, len(self.features))
+        cut = _cut_point(cut_value, len(self.features))
 
         rule = arm.build_rule(new_sol)
 
@@ -220,7 +220,7 @@ def test_cut_pointC(self):
 
         new_sol = sol[:-1]
 
-        cut = cut_point(cut_value, len(self.features))
+        cut = _cut_point(cut_value, len(self.features))
 
         rule = arm.build_rule(new_sol)