From 4bb36f40853755b3bcd165801ef77014c553311d Mon Sep 17 00:00:00 2001
From: zStupan <ziga.stupan@gmail.com>
Date: Mon, 14 Mar 2022 22:25:06 +0100
Subject: [PATCH] more optimization

---
 niaarm/dataset.py | 12 ++++++------
 niaarm/niaarm.py  | 25 +++++++++++--------------
 niaarm/rule.py    |  9 +++++----
 3 files changed, 22 insertions(+), 24 deletions(-)

diff --git a/niaarm/dataset.py b/niaarm/dataset.py
index e85cefb..d7cd221 100644
--- a/niaarm/dataset.py
+++ b/niaarm/dataset.py
@@ -57,8 +57,8 @@ def __analyse_types(self):
                 unique_categories = None
             else:
                 dtype = "cat"
-                self.transactions[head] = self.transactions[head].astype(str)
-                unique_categories = self.transactions[head].unique().tolist()
+                self.transactions[head] = self.transactions[head].astype('category')
+                unique_categories = self.transactions[head].cat.categories.tolist()
                 min_value = None
                 max_value = None
 
@@ -76,16 +76,16 @@ def __problem_dimension(self):
 
     def __repr__(self):
         def dtype(x):
-            return str(x.dtype)[:-2] if x.dtype in ('int', 'float') else 'categorical'
+            return str(x.dtype)[:-2] if x.dtype in ('int', 'float') else 'category'
 
         def min_val(x):
-            return x.min() if x.dtype != 'object' else np.nan
+            return x.min() if x.dtype != 'category' else np.nan
 
         def max_val(x):
-            return x.max() if x.dtype != 'object' else np.nan
+            return x.max() if x.dtype != 'category' else np.nan
 
         def categories(x):
-            return x.unique().tolist() if x.dtype == 'object' else np.nan
+            return x.cat.categories.tolist() if x.dtype == 'category' else np.nan
 
         feature_report = self.transactions.agg([dtype, min_val, max_val, categories])
         return f"DATASET INFO:\n" \
diff --git a/niaarm/niaarm.py b/niaarm/niaarm.py
index ee841fd..eb11d8a 100644
--- a/niaarm/niaarm.py
+++ b/niaarm/niaarm.py
@@ -40,6 +40,7 @@ class NiaARM(Problem):
 
     def __init__(self, dimension, features, transactions, metrics, logging=False):
         self.features = features
+        self.num_features = len(features)
         self.transactions = transactions
 
         if not metrics:
@@ -68,8 +69,8 @@ def __init__(self, dimension, features, transactions, metrics, logging=False):
     def build_rule(self, vector):
         rule = []
 
-        permutation = vector[-len(self.features):]
-        permutation = sorted(range(len(permutation)), key=lambda k: permutation[k])
+        permutation = vector[-self.num_features:]
+        permutation = sorted(range(self.num_features), key=lambda k: permutation[k])
 
         for i in permutation:
             feature = self.features[i]
@@ -78,7 +79,7 @@ def build_rule(self, vector):
             vector_position = self.feature_position(i)
 
             # get a threshold for each feature
-            threshold_position = vector_position + self.threshold_move(i)
+            threshold_position = vector_position + 1 + int(feature.dtype != 'cat')
             if vector[vector_position] > vector[threshold_position]:
                 if feature.dtype != 'cat':
                     border1 = vector[vector_position] * (feature.max_val - feature.min_val) + feature.min_val
@@ -93,7 +94,7 @@ def build_rule(self, vector):
                 else:
                     categories = feature.categories
                     selected = round(vector[vector_position] * (len(categories) - 1))
-                    rule.append(Feature(feature.name, feature.dtype, categories=[feature.categories[selected]]))
+                    rule.append(Feature(feature.name, feature.dtype, categories=[categories[selected]]))
             else:
                 rule.append(None)
         return rule
@@ -103,11 +104,8 @@ def threshold_move(self, current_feature):
 
     def feature_position(self, feature):
         position = 0
-        for i in range(feature):
-            if self.features[i].dtype != 'cat':
-                position = position + 3
-            else:
-                position = position + 2
+        for f in self.features[:feature]:
+            position = position + 2 + int(f.dtype != 'cat')
         return position
 
     def _evaluate(self, sol):
@@ -115,7 +113,7 @@ def _evaluate(self, sol):
         cut_value = sol[self.dimension - 1]  # get cut point value
         solution = sol[:-1]  # remove cut point
 
-        cut = _cut_point(cut_value, len(self.features))
+        cut = _cut_point(cut_value, self.num_features)
 
         rule = self.build_rule(solution)
 
@@ -133,15 +131,14 @@ def _evaluate(self, sol):
             fitness = np.dot(self.weights, metrics) / self.sum_weights
             rule.fitness = fitness
 
-            if rule.support > 0.0 and rule.confidence > 0.0:
+            if rule.support > 0.0 and rule.confidence > 0.0 and rule not in self.rules:
                 # save feasible rule
-                if rule not in self.rules:
-                    self.rules.append(rule)
+                self.rules.append(rule)
 
                 if self.logging and fitness > self.best_fitness:
                     self.best_fitness = fitness
                     print(f'Fitness: {rule.fitness}, ' + ', '.join(
-                        [f'{metric.capitalize()}: {getattr(rule, metric)}' for metric in self.metrics]))
+                        [f'{metric.capitalize()}: {metrics[i]}' for i, metric in enumerate(self.metrics)]))
             return fitness
         else:
             return -1.0
diff --git a/niaarm/rule.py b/niaarm/rule.py
index 8bdde9c..ce9a651 100644
--- a/niaarm/rule.py
+++ b/niaarm/rule.py
@@ -1,5 +1,6 @@
 import math
 import numpy as np
+import pandas as pd
 
 
 class Rule:
@@ -55,10 +56,10 @@ def __init__(self, antecedent, consequent, fitness=0.0, transactions=None):
         self.__post_init__(transactions)
 
     def __post_init__(self, transactions):
-        min_ = transactions.min()
-        max_ = transactions.max()
+        min_ = transactions.min(numeric_only=True)
+        max_ = transactions.max(numeric_only=True)
         acc = 0
-        contains_antecedent = np.ones(self.num_transactions, dtype=bool)
+        contains_antecedent = pd.Series(np.ones(self.num_transactions, dtype=bool), dtype=bool)
         for attribute in self.antecedent:
             if attribute.dtype != 'cat':
                 feature_min = min_[attribute.name]
@@ -71,7 +72,7 @@ def __post_init__(self, transactions):
 
         self.antecedent_count = contains_antecedent.sum()
 
-        contains_consequent = np.ones(self.num_transactions, dtype=bool)
+        contains_consequent = pd.Series(np.ones(self.num_transactions, dtype=bool), dtype=bool)
         for attribute in self.consequent:
             if attribute.dtype != 'cat':
                 feature_min = min_[attribute.name]