Skip to content

Commit

Permalink
Merge pull request #18 from zStupan/feature-cli
Browse files Browse the repository at this point in the history
CLI
  • Loading branch information
zStupan authored Mar 2, 2022
2 parents f3a5c89 + 27f1cd8 commit c6a7cf7
Show file tree
Hide file tree
Showing 5 changed files with 267 additions and 5 deletions.
73 changes: 70 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
NiaARM is a framework for Association Rule Mining based on nature-inspired algorithms for optimization. The framework is written fully in Python and runs on all platforms. NiaARM allows users to preprocess the data in a transaction database automatically, to search for association rules and provide a pretty output of the rules found. This framework also supports numerical and real-valued types of attributes besides the categorical ones. Mining the association rules is defined as an optimization problem, and solved using the nature-inspired algorithms that come from the related framework called [NiaPy](https://github.com/NiaOrg/NiaPy).

## Detailed insights
The current version witholds (but is not limited to) the following functions:
The current version includes (but is not limited to) the following functions:

- loading datasets in CSV format,
- preprocessing of data,
Expand All @@ -36,9 +36,76 @@ Install NiaARM with pip3:
pip3 install niaarm
```

## Examples
## Usage

For a list of examples see the [examples folder](examples/).
### Basic example
```python
from niaarm import NiaARM, Dataset
from niapy.algorithms.basic import DifferentialEvolution
from niapy.task import Task, OptimizationType


# load and preprocess the dataset from csv
data = Dataset("datasets/Abalone.csv")

# Create a problem:::
# dimension represents the dimension of the problem;
# features represent the list of features, while transactions depicts the list of transactions
# the following 4 elements represent weights (support, confidence, coverage, shrinkage)
# None defines that criteria are omitted and are, therefore, excluded from the fitness function
problem = NiaARM(data.dimension, data.features, data.transactions, alpha=1.0, beta=1.0)

# build niapy task
task = Task(problem=problem, max_iters=30, optimization_type=OptimizationType.MAXIMIZATION)

# use Differential Evolution (DE) algorithm from the NiaPy library
# see full list of available algorithms: https://github.com/NiaOrg/NiaPy/blob/master/Algorithms.md
algo = DifferentialEvolution(population_size=50, differential_weight=0.5, crossover_probability=0.9)

# run algorithm
best = algo.run(task=task)

# sort rules
problem.sort_rules()

# export all rules to csv
problem.export_rules('output.csv')
```
For a full list of examples see the [examples folder](examples/).

### Command line interface

```
niaarm -h
usage: niaarm [-h] -i INPUT_FILE [-o OUTPUT_FILE] -a ALGORITHM [-s SEED]
[--max-evals MAX_EVALS] [--max-iters MAX_ITERS] [--alpha ALPHA]
[--beta BETA] [--gamma GAMMA] [--delta DELTA] [--log]
[--show-stats]
Perform ARM, output mined rules as csv, get mined rules' statistics
options:
-h, --help show this help message and exit
-i INPUT_FILE, --input-file INPUT_FILE
Input file containing a csv dataset
-o OUTPUT_FILE, --output-file OUTPUT_FILE
Output file for mined rules
-a ALGORITHM, --algorithm ALGORITHM
Algorithm to use (niapy class name, e. g.
DifferentialEvolution)
-s SEED, --seed SEED Seed for the algorithm's random number generator
--max-evals MAX_EVALS
Maximum number of fitness function evaluations
--max-iters MAX_ITERS
Maximum number of iterations
--alpha ALPHA Alpha parameter. Default 0
--beta BETA Beta parameter. Default 0
--gamma GAMMA Gamma parameter. Default 0
--delta DELTA Delta parameter. Default 0
--log Enable logging of fitness improvements
--show-stats Display stats about mined rules
```
Note: The CLI script can also run as a python module (`python -m niaarm ...`)

## Reference Papers:

Expand Down
6 changes: 6 additions & 0 deletions niaarm/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import sys
from niaarm import cli


if __name__ == '__main__':
sys.exit(cli.main())
181 changes: 181 additions & 0 deletions niaarm/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
import argparse
from inspect import getmodule, getmembers, isfunction
import os
from pathlib import Path
import platform
import subprocess
import sys
import tempfile

import numpy as np
from niaarm import NiaARM, Dataset, Stats
from niapy.task import OptimizationType, Task
from niapy.util.factory import get_algorithm
from niapy.util import distances, repair
from niapy.algorithms.other import mts
from niapy.algorithms.basic import de


def get_parser():
parser = argparse.ArgumentParser(prog='niaarm',
description='Perform ARM, output mined rules as csv, get mined rules\' statistics')
parser.add_argument('-i', '--input-file', type=str, required=True, help='Input file containing a csv dataset')
parser.add_argument('-o', '--output-file', type=str, help='Output file for mined rules')
parser.add_argument('-a', '--algorithm', type=str, required=True,
help='Algorithm to use (niapy class name, e.g. DifferentialEvolution)')
parser.add_argument('-s', '--seed', type=int, help='Seed for the algorithm\'s random number generator')
parser.add_argument('--max-evals', type=int, default=np.inf, help='Maximum number of fitness function evaluations')
parser.add_argument('--max-iters', type=int, default=np.inf, help='Maximum number of iterations')
parser.add_argument('--alpha', type=float, default=0.0, help='Alpha parameter. Default 0')
parser.add_argument('--beta', type=float, default=0.0, help='Beta parameter. Default 0')
parser.add_argument('--gamma', type=float, default=0.0, help='Gamma parameter. Default 0')
parser.add_argument('--delta', type=float, default=0.0, help='Delta parameter. Default 0')
parser.add_argument('--log', action='store_true', help='Enable logging of fitness improvements')
parser.add_argument('--show-stats', action='store_true', help='Display stats about mined rules')

return parser


def text_editor():
return os.getenv('VISUAL') or os.getenv('EDITOR') or ('notepad' if platform.system() == 'Windows' else 'vi')


def parameters_string(parameters):
params_txt = '# You can edit the algorithm\'s parameter values here\n' \
'# Save and exit to continue\n' \
'# WARNING: Do not edit parameter names\n'
for parameter, value in parameters.items():
if isinstance(value, tuple):
if callable(value[0]):
value = tuple(v.__name__ for v in value)
else:
value = tuple(str(v) for v in value)
value = ', '.join(value)
params_txt += f'{parameter} = {value.__name__ if callable(value) else value}\n'
return params_txt


def functions(algorithm):
funcs = {}
algorithm_funcs = dict(getmembers(getmodule(algorithm.__class__), isfunction))
repair_funcs = dict(getmembers(repair, isfunction))
distance_funcs = dict(getmembers(distances, isfunction))
de_funcs = dict(getmembers(de, isfunction))
mts_funcs = dict(getmembers(mts, isfunction))
funcs.update(algorithm_funcs)
funcs.update(repair_funcs)
funcs.update(distance_funcs)
funcs.update(de_funcs)
funcs.update(mts_funcs)
return funcs


def find_function(name, algorithm):
return functions(algorithm)[name]


def convert_string(string):
try:
value = float(string)
if value.is_integer():
value = int(value)
except ValueError:
return string
return value


def parse_parameters(text, algorithm):
lines: list[str] = text.strip().split('\n')
lines = [line.strip() for line in lines if line.strip() and not line.strip().startswith('#')]
parameters = {}
for line in lines:
key, value = line.split('=')
key = key.strip()
value = convert_string(value.strip())
if isinstance(value, str):
if len(value.split(', ')) > 1: # tuple
value = list(map(str.strip, value.split(', ')))
value = tuple(map(convert_string, value))
value = tuple(find_function(v, algorithm) for v in value if type(v) == str)
elif value.lower() == 'true' or value.lower() == 'false': # boolean
value = value.lower() == 'true'
else: # probably a function
value = find_function(value, algorithm)
parameters[key] = value
return parameters


def edit_parameters(parameters, algorithm):
parameters.pop('individual_type', None)
parameters.pop('initialization_function', None)
fd, filename = tempfile.mkstemp()
os.close(fd)

new_parameters = None
try:
path = Path(filename)
path.write_text(parameters_string(parameters))
command = f'{text_editor()} {filename}'
subprocess.run(command, shell=True, check=True)
params_txt = path.read_text()
new_parameters = parse_parameters(params_txt, algorithm)
finally:
try:
os.unlink(filename)
except Exception as e:
print('Error:', e, file=sys.stderr)
return new_parameters


def main():
parser = get_parser()
args = parser.parse_args()

if len(sys.argv) == 1:
parser.print_help()
if args.max_evals == np.inf and args.max_iters == np.inf:
print('--max-evals and/or --max-iters missing', file=sys.stderr)
return 1

try:
dataset = Dataset(args.input_file)
problem = NiaARM(dataset.dimension, dataset.features, dataset.transactions, args.alpha, args.beta, args.gamma,
args.delta, args.log)
task = Task(problem, max_iters=args.max_iters, max_evals=args.max_evals,
optimization_type=OptimizationType.MAXIMIZATION)

algorithm = get_algorithm(args.algorithm, seed=args.seed)
params = algorithm.get_parameters()
new_params = edit_parameters(params, algorithm.__class__)
if new_params is None:
print('Invalid parameters', file=sys.stderr)
return 1

for param in new_params:
if param not in params:
print(f'Invalid parameter: {param}', file=sys.stderr)
return 1

algorithm.set_parameters(**new_params)

algorithm.run(task)

if args.output_file:
problem.sort_rules()
problem.export_rules(args.output_file)

if args.show_stats:
stats = Stats(problem.rules)
print('\nSTATS:')
print(f'Total rules: {stats.total_rules}')
print(f'Average fitness: {stats.avg_fitness}')
print(f'Average support: {stats.avg_support}')
print(f'Average confidence: {stats.avg_confidence}')
print(f'Average coverage: {stats.avg_coverage}')
print(f'Average shrinkage: {stats.avg_shrinkage}')
print(f'Average length of antecedent: {stats.avg_ant_len}')
print(f'Average length of consequent: {stats.avg_con_len}')

except Exception as e:
print('Error:', e, file=sys.stderr)
return 1
9 changes: 7 additions & 2 deletions niaarm/niaarm.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,19 +28,24 @@ class NiaARM(Problem):
"""

def __init__(self, dimension, features, transactions, alpha=0.0, beta=0.0, gamma=0.0, delta=0.0):
def __init__(self, dimension, features, transactions, alpha=0.0, beta=0.0, gamma=0.0, delta=0.0, logging=False):
r"""Initialize instance of NiaARM.
Arguments:
"""
self.features = features
self.transactions = transactions

if alpha + beta + gamma + delta == 0:
raise ValueError('At least one of alpha, beta, gamma or delta must be set')

self.alpha = alpha
self.beta = beta
self.gamma = gamma
self.delta = delta

self.logging = logging
self.best_fitness = np.NINF
self.rules = []
super().__init__(dimension, 0.0, 1.0)
Expand Down Expand Up @@ -116,7 +121,7 @@ def _evaluate(self, sol):
self.rules.append(
Rule(antecedent1, consequent1, fitness, support, confidence, coverage, shrinkage))

if fitness > self.best_fitness:
if self.logging and fitness > self.best_fitness:
self.best_fitness = fitness
print(f'Fitness: {fitness}, Support: {support}, Confidence:{confidence}, Coverage:{coverage}, '
f'Shrinkage:{shrinkage}')
Expand Down
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ pandas = [
[tool.poetry.dev-dependencies]
pytest = "^7.0.1"

[tool.poetry.scripts]
niaarm = 'niaarm.cli:main'

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"

0 comments on commit c6a7cf7

Please sign in to comment.