Skip to content

Commit

Permalink
Merge pull request #28 from zStupan/main
Browse files Browse the repository at this point in the history
Major Refactor + Added metrics
  • Loading branch information
firefly-cpp authored Mar 11, 2022
2 parents 3281841 + 354910c commit 7648f08
Show file tree
Hide file tree
Showing 15 changed files with 677 additions and 804 deletions.
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,10 @@ data = Dataset("datasets/Abalone.csv")
# Create a problem:::
# dimension represents the dimension of the problem;
# features represent the list of features, while transactions depicts the list of transactions
# the following 4 elements represent weights (support, confidence, coverage, shrinkage)
# None defines that criteria are omitted and are, therefore, excluded from the fitness function
problem = NiaARM(data.dimension, data.features, data.transactions, alpha=1.0, beta=1.0)
# metrics is a sequence of metrics to be taken into account when computing the fitness;
# you can also pass in a dict of the shape {'metric_name': <weight of metric in range [0, 1]>};
# when passing a sequence, the weights default to 1.
problem = NiaARM(data.dimension, data.features, data.transactions, metrics=('support', 'confidence'), logging=True)

# build niapy task
task = Task(problem=problem, max_iters=30, optimization_type=OptimizationType.MAXIMIZATION)
Expand Down
2 changes: 1 addition & 1 deletion examples/basic_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# features represent the list of features, while transactions depicts the list of transactions
# the following 4 elements represent weights (support, confidence, coverage, shrinkage)
# A weight of 0.0 means that criteria are omitted and are, therefore, excluded from the fitness function
problem = NiaARM(data.dimension, data.features, data.transactions, alpha=1.0, beta=1.0, logging=True)
problem = NiaARM(data.dimension, data.features, data.transactions, metrics=('support', 'confidence'), logging=True)

# build niapy task
task = Task(problem=problem, max_iters=30, optimization_type=OptimizationType.MAXIMIZATION)
Expand Down
2 changes: 1 addition & 1 deletion examples/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
if __name__ == '__main__':
# Load the dataset and run the algorithm
data = Dataset("datasets/Abalone.csv")
problem = NiaARM(data.dimension, data.features, data.transactions, alpha=1.0, beta=1.0)
problem = NiaARM(data.dimension, data.features, data.transactions, metrics=('support', 'confidence'))
task = Task(problem=problem, max_iters=30, optimization_type=OptimizationType.MAXIMIZATION)
algo = DifferentialEvolution(population_size=50, differential_weight=0.5, crossover_probability=0.9)
algo.run(task=task)
Expand Down
226 changes: 0 additions & 226 deletions niaarm/association_rule.py

This file was deleted.

15 changes: 6 additions & 9 deletions niaarm/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,28 +14,26 @@ class Dataset:
If the file already contains a header row, pass ``header=0`` to override the feature names.
Attributes:
data (pd.DataFrame): Data as a pandas Dataframe.
transactions (np.ndarray): Transactional data.
transactions (pandas.DataFrame): Transactional data.
header (list[str]): Feature names.
features (list[Feature]): List of features.
dimension (int): Dimension of the optimization problem for the dataset.
"""

def __init__(self, path, delimiter=',', header=0, names=None):
self.data = pd.read_csv(path, delimiter=delimiter, header=header, names=names)
self.transactions = pd.read_csv(path, delimiter=delimiter, header=header, names=names)
if names is None and header is None:
self.data.columns = pd.Index([f'Feature{i}' for i in range(len(self.data.columns))])
self.header = self.data.columns.tolist()
self.transactions = self.data.values
self.transactions.columns = pd.Index([f'Feature{i}' for i in range(len(self.transactions.columns))])
self.header = self.transactions.columns.tolist()
self.features = []
self.__analyse_types()
self.dimension = self.__problem_dimension()

def __analyse_types(self):
r"""Extract data types for the data in a dataset."""
for head in self.header:
col = self.data[head]
col = self.transactions[head]

if col.dtype == "float":
dtype = "float"
Expand All @@ -48,8 +46,7 @@ def __analyse_types(self):
max_value = col.max()
unique_categories = None
elif col.dtype == 'bool':
self.data[head] = self.data[head].astype(int)
self.transactions = self.data.values
self.transactions[head] = self.transactions[head].astype(int)
dtype = 'int'
min_value = 0
max_value = 1
Expand Down
17 changes: 14 additions & 3 deletions niaarm/feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from typing import Optional


@dataclass
@dataclass(repr=False)
class Feature:
r"""Class representing a feature.
Expand All @@ -11,12 +11,23 @@ class Feature:
dtype (str): Datatype of the feature.
min_val (Optional[float]): Minimum value of the feature in the transaction database.
max_val (Optional[float]): Maximum value of the feature in the transaction database.
categories (Optional[list[float]]): Possible categorical feature's values.
categories (Optional[list[str]]): Possible categorical feature's values.
"""

name: str
dtype: str
min_val: Optional[float] = None
max_val: Optional[float] = None
categories: Optional[list[float]] = None
categories: Optional[list[str]] = None

def __repr__(self):
string = f'{self.name}('
if self.dtype == 'cat':
string += f'{self.categories if len(self.categories) != 1 else self.categories[0]})'
else:
if self.min_val == self.max_val:
string += f'{self.min_val})'
else:
string += f'[{self.min_val}, {self.max_val}])'
return string
Loading

0 comments on commit 7648f08

Please sign in to comment.