Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Code cleanup #2

Merged
merged 3 commits into from
Nov 26, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions basic_run.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from niaarm import NiaARM
from niaarm.dataset import _Dataset
from niaarm.dataset import Dataset
from niapy.algorithms.basic import ParticleSwarmAlgorithm, DifferentialEvolution
from niapy.task import Task, OptimizationType

# load dataset from csv
data = _Dataset("datasets/wiki_test_case.csv")
data = Dataset("datasets/wiki_test_case.csv")

# preprocess dataset and obtain features
features = data.get_features()
Expand All @@ -13,7 +13,7 @@
dimension = data.calculate_dimension_of_individual()

# obtain transaction database
transactions = data.get_transaction_data()
transactions = data.transaction_data

# create a problem
problem = NiaARM(dimension, 0, 1, features, transactions, dimension)
Expand All @@ -24,7 +24,7 @@
max_iters=100,
optimization_type=OptimizationType.MAXIMIZATION)

#use DE
# use DE
algo = DifferentialEvolution(population_size=50, differential_weight=0.5, crossover_probability=0.9)

# use PSO algorithm from niapy library
Expand Down
2 changes: 0 additions & 2 deletions niaarm/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from niaarm.niaarm import NiaARM

__all__ = ["NiaARM"]

__version__ = "0.1.0"
32 changes: 8 additions & 24 deletions niaarm/association_rule.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,16 @@
import sys
import math
from niaarm.feature import _Feature


class AssociationRule(object):
class AssociationRule:
r"""Class for main operations and quality measures.

Date:
2021

License:
MIT

Attributes:
value (Optional(any)): Categorical feature value.
min (Optional(float)): Maximum numerical feature's value.
max (Optional(float)): Minimum numerical feature's value.
features (Iterable[Feature]): List of features.
permutation (Iterable[])
"""

def __init__(self, features):
self.features = features

self.permutation = []

def build_rule(self, vector):
Expand All @@ -29,7 +19,6 @@ def build_rule(self, vector):
permutation = self.map_permutation(vector)
self.permutation = self.get_permutation(permutation)

vector_position = 0
for i in range(len(self.features)):
current_feature = self.permutation[i]

Expand All @@ -47,7 +36,6 @@ def build_rule(self, vector):
vector_position = vector_position + 1
border2 = (vector[vector_position] * (self.features[current_feature].max_val -
self.features[current_feature].min_val)) + self.features[current_feature].min_val
vector_position = vector_position + 1

if border1 > border2:
inter = border1
Expand All @@ -73,8 +61,6 @@ def build_rule(self, vector):
self.features[current_feature].min_val))) +
self.features[current_feature].min_val)

vector_position = vector_position + 1

if border1 > border2:
inter = border1
border1 = border2
Expand All @@ -86,7 +72,6 @@ def build_rule(self, vector):
categories = self.features[current_feature].categories
selected = int(vector[vector_position]
* (len(categories) - 1))
vector_position = vector_position + 1
rule.append(
[self.features[current_feature].categories[selected]])
else:
Expand All @@ -99,7 +84,7 @@ def map_permutation(self, vector):
def is_rule_feasible(self, ant, con):
ant_count = ant.count("NO")
con_count = con.count("NO")
if ((ant_count == len(ant)) or (con_count == len(con))):
if (ant_count == len(ant)) or (con_count == len(con)):
return False
else:
return True
Expand All @@ -112,14 +97,13 @@ def calculate_threshold_move(self, current_feature):
return move

def get_current_position_of_feature(self, feature):
position = feature * 3
return position
return feature * 3

def return_permutation(self):
return self.permutation

def get_cut_point(self, sol, num_attr):
cut = int(float(sol) * num_attr)
cut = int(sol * num_attr)
if cut == 0:
cut = 1
if cut > num_attr - 1:
Expand Down Expand Up @@ -210,11 +194,11 @@ def calculate_support_confidence(
if (missing_ant + missing_con) == len(self.features):
supp = 0.0

skupni_supp = float(float(supp) / float(len(transactions)))
skupni_supp = supp / transactions
if conf_counter == 0:
skupni_conf = 0.0
else:
skupni_conf = float(float(conf) / float(conf_counter))
skupni_conf = conf / conf_counter

return skupni_supp, skupni_conf

Expand Down
78 changes: 15 additions & 63 deletions niaarm/dataset.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,10 @@
import numpy as np
import pandas as pd
from niaarm.feature import _Feature
from niaarm.feature import Feature

__all__ = ["Dataset"]


class _Dataset:
class Dataset:
r"""Class for working with dataset.

Date:
2021

License:
MIT

Attributes:
path (str): Path to the dataset.
has_header (Optional(str)): Is header present in csv file.
Expand All @@ -25,53 +16,33 @@ def __init__(self, path, has_header="Yes", delimiter=","):
self.has_header = has_header
self.delimiter = delimiter

self.data = None
self.header = []
self.features = []

def read_file(self):
r"""Read dataset from file.
Arguments:
None
Returns:
None
"""
r"""Read dataset from file."""
self.data = pd.read_csv(self.path, sep=self.delimiter)

def print_raw_output(self):
r"""Print the whole datatable.
Arguments:
None
Returns:
None
"""
r"""Print the whole datatable."""
print(self.data)

def get_all_column_names(self):
r"""Preprocess all column names.
Arguments:
None
Returns:
None
"""
r"""Preprocess all column names."""
for col in self.data.columns:
self.header.append(col)

def return_header(self):
r"""Return all column names.
Arguments:
None

Returns:
Iterable[any]: list of columns.
"""
return self.header

def analyse_types(self):
r"""Extract data types for data in dataset..
Arguments:
None
Returns:
None
"""
r"""Extract data types for data in dataset."""
for head in self.header:
col = self.data[head]

Expand All @@ -94,43 +65,31 @@ def analyse_types(self):
max_value = None

self.features.append(
_Feature(
Feature(
head,
dtype,
min_value,
max_value,
unique_categories))

def get_features(self):
r"""Get feature data.
Arguments:
None
Returns:
None
"""
r"""Get feature data."""
self.read_file()
self.get_all_column_names()
self.analyse_types()

return self.features

def get_transaction_data(self):
r"""Get all transactions.
Arguments:
None
Returns:
None
"""
@property
def transaction_data(self):
return self.data.values

def calculate_dimension_of_individual(self):
r"""Calculate the dimension of the problem.
Dimension of the problem is used in optimization task.

Arguments:
None
Returns:
number (int)
int: dimension
"""
dimension = 0
for feature in self.features:
Expand All @@ -144,13 +103,6 @@ def calculate_dimension_of_individual(self):
return dimension

def get_feature_report(self):
r"""Print feature details.

Arguments:
None
Returns:
None
"""
r"""Print feature details."""
for feature in self.features:
print("Name: ", feature.name, " Type: ", feature.dtype,
" Range: (", feature.min_val, ", ", feature.max_val, ")")
print(feature)
50 changes: 15 additions & 35 deletions niaarm/feature.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,22 @@
__all__ = ["_Feature"]
from dataclasses import dataclass
from typing import Iterable, Optional


class _Feature:
r"""Class for rule representation of association rule

Date:
2021

License:
MIT
@dataclass
class Feature:
r"""Class representing a feature.

Attributes:
name (str): Name of feature.
dtype (int): Data type of feature.
min_val (Optional(float)): Minimum value of feature in transaction database.
max_val (Optional(float)): Maximum value of feature in transaction database.
categories (Optional(Iterable[float])): Possible categorical feature's values.
"""
dtype (str): Datatype of feature.
min_val (Optional[float]): Minimum value of feature in transaction database.
max_val (Optional[float]): Maximum value of feature in transaction database.
categories (Optional[Iterable[float]]): Possible categorical feature's values.

def __init__(
self,
name,
dtype,
min_val=None,
max_val=None,
categories=None,
**kwargs):
r"""Initialize instance of _Rule.
"""

Arguments:
name (str): Name of feature.
dtype (int): Data type of a feature.
min_val (Optional(float)): Minimum value of feature in transaction database.
max_val (Optional(float)): Maximum value of feature in transaction database.
categories (Optional(Iterable[float])): Possible categorical feature's values.
"""
self.name = name
self.dtype = dtype
self.min_val = min_val
self.max_val = max_val
self.categories = categories
name: str
dtype: str
min_val: Optional[float] = None
max_val: Optional[float] = None
categories: Optional[Iterable[float]] = None
Loading