From 4bb36f40853755b3bcd165801ef77014c553311d Mon Sep 17 00:00:00 2001 From: zStupan Date: Mon, 14 Mar 2022 22:25:06 +0100 Subject: [PATCH] more optimization --- niaarm/dataset.py | 12 ++++++------ niaarm/niaarm.py | 25 +++++++++++-------------- niaarm/rule.py | 9 +++++---- 3 files changed, 22 insertions(+), 24 deletions(-) diff --git a/niaarm/dataset.py b/niaarm/dataset.py index e85cefb..d7cd221 100644 --- a/niaarm/dataset.py +++ b/niaarm/dataset.py @@ -57,8 +57,8 @@ def __analyse_types(self): unique_categories = None else: dtype = "cat" - self.transactions[head] = self.transactions[head].astype(str) - unique_categories = self.transactions[head].unique().tolist() + self.transactions[head] = self.transactions[head].astype('category') + unique_categories = self.transactions[head].cat.categories.tolist() min_value = None max_value = None @@ -76,16 +76,16 @@ def __problem_dimension(self): def __repr__(self): def dtype(x): - return str(x.dtype)[:-2] if x.dtype in ('int', 'float') else 'categorical' + return str(x.dtype)[:-2] if x.dtype in ('int', 'float') else 'category' def min_val(x): - return x.min() if x.dtype != 'object' else np.nan + return x.min() if x.dtype != 'category' else np.nan def max_val(x): - return x.max() if x.dtype != 'object' else np.nan + return x.max() if x.dtype != 'category' else np.nan def categories(x): - return x.unique().tolist() if x.dtype == 'object' else np.nan + return x.cat.categories.tolist() if x.dtype == 'category' else np.nan feature_report = self.transactions.agg([dtype, min_val, max_val, categories]) return f"DATASET INFO:\n" \ diff --git a/niaarm/niaarm.py b/niaarm/niaarm.py index ee841fd..eb11d8a 100644 --- a/niaarm/niaarm.py +++ b/niaarm/niaarm.py @@ -40,6 +40,7 @@ class NiaARM(Problem): def __init__(self, dimension, features, transactions, metrics, logging=False): self.features = features + self.num_features = len(features) self.transactions = transactions if not metrics: @@ -68,8 +69,8 @@ def __init__(self, dimension, features, transactions, metrics, logging=False): def build_rule(self, vector): rule = [] - permutation = vector[-len(self.features):] - permutation = sorted(range(len(permutation)), key=lambda k: permutation[k]) + permutation = vector[-self.num_features:] + permutation = sorted(range(self.num_features), key=lambda k: permutation[k]) for i in permutation: feature = self.features[i] @@ -78,7 +79,7 @@ def build_rule(self, vector): vector_position = self.feature_position(i) # get a threshold for each feature - threshold_position = vector_position + self.threshold_move(i) + threshold_position = vector_position + 1 + int(feature.dtype != 'cat') if vector[vector_position] > vector[threshold_position]: if feature.dtype != 'cat': border1 = vector[vector_position] * (feature.max_val - feature.min_val) + feature.min_val @@ -93,7 +94,7 @@ def build_rule(self, vector): else: categories = feature.categories selected = round(vector[vector_position] * (len(categories) - 1)) - rule.append(Feature(feature.name, feature.dtype, categories=[feature.categories[selected]])) + rule.append(Feature(feature.name, feature.dtype, categories=[categories[selected]])) else: rule.append(None) return rule @@ -103,11 +104,8 @@ def threshold_move(self, current_feature): def feature_position(self, feature): position = 0 - for i in range(feature): - if self.features[i].dtype != 'cat': - position = position + 3 - else: - position = position + 2 + for f in self.features[:feature]: + position = position + 2 + int(f.dtype != 'cat') return position def _evaluate(self, sol): @@ -115,7 +113,7 @@ def _evaluate(self, sol): cut_value = sol[self.dimension - 1] # get cut point value solution = sol[:-1] # remove cut point - cut = _cut_point(cut_value, len(self.features)) + cut = _cut_point(cut_value, self.num_features) rule = self.build_rule(solution) @@ -133,15 +131,14 @@ def _evaluate(self, sol): fitness = np.dot(self.weights, metrics) / self.sum_weights rule.fitness = fitness - if rule.support > 0.0 and rule.confidence > 0.0: + if rule.support > 0.0 and rule.confidence > 0.0 and rule not in self.rules: # save feasible rule - if rule not in self.rules: - self.rules.append(rule) + self.rules.append(rule) if self.logging and fitness > self.best_fitness: self.best_fitness = fitness print(f'Fitness: {rule.fitness}, ' + ', '.join( - [f'{metric.capitalize()}: {getattr(rule, metric)}' for metric in self.metrics])) + [f'{metric.capitalize()}: {metrics[i]}' for i, metric in enumerate(self.metrics)])) return fitness else: return -1.0 diff --git a/niaarm/rule.py b/niaarm/rule.py index 8bdde9c..ce9a651 100644 --- a/niaarm/rule.py +++ b/niaarm/rule.py @@ -1,5 +1,6 @@ import math import numpy as np +import pandas as pd class Rule: @@ -55,10 +56,10 @@ def __init__(self, antecedent, consequent, fitness=0.0, transactions=None): self.__post_init__(transactions) def __post_init__(self, transactions): - min_ = transactions.min() - max_ = transactions.max() + min_ = transactions.min(numeric_only=True) + max_ = transactions.max(numeric_only=True) acc = 0 - contains_antecedent = np.ones(self.num_transactions, dtype=bool) + contains_antecedent = pd.Series(np.ones(self.num_transactions, dtype=bool), dtype=bool) for attribute in self.antecedent: if attribute.dtype != 'cat': feature_min = min_[attribute.name] @@ -71,7 +72,7 @@ def __post_init__(self, transactions): self.antecedent_count = contains_antecedent.sum() - contains_consequent = np.ones(self.num_transactions, dtype=bool) + contains_consequent = pd.Series(np.ones(self.num_transactions, dtype=bool), dtype=bool) for attribute in self.consequent: if attribute.dtype != 'cat': feature_min = min_[attribute.name]