Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Major Refactor + Added metrics #28

Merged
merged 17 commits into from
Mar 11, 2022
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,10 @@ data = Dataset("datasets/Abalone.csv")
# Create a problem:::
# dimension represents the dimension of the problem;
# features represent the list of features, while transactions depicts the list of transactions
# the following 4 elements represent weights (support, confidence, coverage, shrinkage)
# None defines that criteria are omitted and are, therefore, excluded from the fitness function
problem = NiaARM(data.dimension, data.features, data.transactions, alpha=1.0, beta=1.0)
# metrics is a sequence of metrics to be taken into account when computing the fitness;
# you can also pass in a dict of the shape {'metric_name': <weight of metric in range [0, 1]>};
# when passing a sequence, the weights default to 1.
problem = NiaARM(data.dimension, data.features, data.transactions, metrics=('support', 'confidence'), logging=True)

# build niapy task
task = Task(problem=problem, max_iters=30, optimization_type=OptimizationType.MAXIMIZATION)
Expand Down
2 changes: 1 addition & 1 deletion examples/basic_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# features represent the list of features, while transactions depicts the list of transactions
# the following 4 elements represent weights (support, confidence, coverage, shrinkage)
# A weight of 0.0 means that criteria are omitted and are, therefore, excluded from the fitness function
problem = NiaARM(data.dimension, data.features, data.transactions, alpha=1.0, beta=1.0, logging=True)
problem = NiaARM(data.dimension, data.features, data.transactions, metrics=('support', 'confidence'), logging=True)

# build niapy task
task = Task(problem=problem, max_iters=30, optimization_type=OptimizationType.MAXIMIZATION)
Expand Down
2 changes: 1 addition & 1 deletion examples/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
if __name__ == '__main__':
# Load the dataset and run the algorithm
data = Dataset("datasets/Abalone.csv")
problem = NiaARM(data.dimension, data.features, data.transactions, alpha=1.0, beta=1.0)
problem = NiaARM(data.dimension, data.features, data.transactions, metrics=('support', 'confidence'))
task = Task(problem=problem, max_iters=30, optimization_type=OptimizationType.MAXIMIZATION)
algo = DifferentialEvolution(population_size=50, differential_weight=0.5, crossover_probability=0.9)
algo.run(task=task)
Expand Down
226 changes: 0 additions & 226 deletions niaarm/association_rule.py

This file was deleted.

15 changes: 6 additions & 9 deletions niaarm/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,28 +14,26 @@ class Dataset:
If the file already contains a header row, pass ``header=0`` to override the feature names.

Attributes:
data (pd.DataFrame): Data as a pandas Dataframe.
transactions (np.ndarray): Transactional data.
transactions (pandas.DataFrame): Transactional data.
header (list[str]): Feature names.
features (list[Feature]): List of features.
dimension (int): Dimension of the optimization problem for the dataset.

"""

def __init__(self, path, delimiter=',', header=0, names=None):
self.data = pd.read_csv(path, delimiter=delimiter, header=header, names=names)
self.transactions = pd.read_csv(path, delimiter=delimiter, header=header, names=names)
if names is None and header is None:
self.data.columns = pd.Index([f'Feature{i}' for i in range(len(self.data.columns))])
self.header = self.data.columns.tolist()
self.transactions = self.data.values
self.transactions.columns = pd.Index([f'Feature{i}' for i in range(len(self.transactions.columns))])
self.header = self.transactions.columns.tolist()
self.features = []
self.__analyse_types()
self.dimension = self.__problem_dimension()

def __analyse_types(self):
r"""Extract data types for the data in a dataset."""
for head in self.header:
col = self.data[head]
col = self.transactions[head]

if col.dtype == "float":
dtype = "float"
Expand All @@ -48,8 +46,7 @@ def __analyse_types(self):
max_value = col.max()
unique_categories = None
elif col.dtype == 'bool':
self.data[head] = self.data[head].astype(int)
self.transactions = self.data.values
self.transactions[head] = self.transactions[head].astype(int)
dtype = 'int'
min_value = 0
max_value = 1
Expand Down
17 changes: 14 additions & 3 deletions niaarm/feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from typing import Optional


@dataclass
@dataclass(repr=False)
class Feature:
r"""Class representing a feature.

Expand All @@ -11,12 +11,23 @@ class Feature:
dtype (str): Datatype of the feature.
min_val (Optional[float]): Minimum value of the feature in the transaction database.
max_val (Optional[float]): Maximum value of the feature in the transaction database.
categories (Optional[list[float]]): Possible categorical feature's values.
categories (Optional[list[str]]): Possible categorical feature's values.

"""

name: str
dtype: str
min_val: Optional[float] = None
max_val: Optional[float] = None
categories: Optional[list[float]] = None
categories: Optional[list[str]] = None

def __repr__(self):
string = f'{self.name}('
if self.dtype == 'cat':
string += f'{self.categories if len(self.categories) != 1 else self.categories[0]})'
else:
if self.min_val == self.max_val:
string += f'{self.min_val})'
else:
string += f'[{self.min_val}, {self.max_val}])'
return string
Loading