From 5c3a6b08efa4f1e4d35b5949128d4f8c62758963 Mon Sep 17 00:00:00 2001 From: zStupan Date: Tue, 1 Mar 2022 19:50:15 +0100 Subject: [PATCH 1/9] Made logging optional --- niaarm/niaarm.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/niaarm/niaarm.py b/niaarm/niaarm.py index dfd0c54..bfc1590 100644 --- a/niaarm/niaarm.py +++ b/niaarm/niaarm.py @@ -28,7 +28,7 @@ class NiaARM(Problem): """ - def __init__(self, dimension, features, transactions, alpha=0.0, beta=0.0, gamma=0.0, delta=0.0): + def __init__(self, dimension, features, transactions, alpha=0.0, beta=0.0, gamma=0.0, delta=0.0, logging=False): r"""Initialize instance of NiaARM. Arguments: @@ -40,6 +40,7 @@ def __init__(self, dimension, features, transactions, alpha=0.0, beta=0.0, gamma self.beta = beta self.gamma = gamma self.delta = delta + self.logging = logging self.best_fitness = np.NINF self.rules = [] @@ -116,7 +117,7 @@ def _evaluate(self, sol): self.rules.append( Rule(antecedent1, consequent1, fitness, support, confidence, coverage, shrinkage)) - if fitness > self.best_fitness: + if self.logging and fitness > self.best_fitness: self.best_fitness = fitness print(f'Fitness: {fitness}, Support: {support}, Confidence:{confidence}, Coverage:{coverage}, ' f'Shrinkage:{shrinkage}') From 76666bf0c75f34b2844dbb0aea39fc6c4c8ecca1 Mon Sep 17 00:00:00 2001 From: zStupan Date: Tue, 1 Mar 2022 19:51:01 +0100 Subject: [PATCH 2/9] Added a command line interface --- niaarm/__main__.py | 6 ++ niaarm/cli.py | 133 +++++++++++++++++++++++++++++++++++++++++++++ pyproject.toml | 3 + 3 files changed, 142 insertions(+) create mode 100644 niaarm/__main__.py create mode 100644 niaarm/cli.py diff --git a/niaarm/__main__.py b/niaarm/__main__.py new file mode 100644 index 0000000..f4ecfe3 --- /dev/null +++ b/niaarm/__main__.py @@ -0,0 +1,6 @@ +import sys +from niaarm import cli + + +if __name__ == '__main__': + sys.exit(cli.main()) diff --git a/niaarm/cli.py b/niaarm/cli.py new file mode 100644 index 0000000..2af618f --- /dev/null +++ b/niaarm/cli.py @@ -0,0 +1,133 @@ +import argparse +import os +from pathlib import Path +import platform +import subprocess +import sys +import tempfile + +import numpy as np +from niaarm import NiaARM, Dataset, Stats +from niapy.task import OptimizationType, Task +from niapy.util.factory import get_algorithm + + +def get_parser(): + parser = argparse.ArgumentParser(description='Perform ARM, output mined rules as csv, get mined rules\' statistics') + parser.add_argument('-a', '--algorithm', type=str, required=True, + help='Algorithm to use (niapy class name, e. g. DifferentialEvolution)') + parser.add_argument('-s', '--seed', type=int, help='Seed for thr algorithm\'s random number generator') + parser.add_argument('--max-evals', type=int, default=np.inf, help='Maximum number of fitness function evaluations') + parser.add_argument('--max-iters', type=int, default=np.inf, help='Maximum number of iterations') + parser.add_argument('-i', '--input-file', type=str, required=True, help='Input file containing a csv dataset') + parser.add_argument('-o', '--output-file', type=str, help='Output file for mined rules') + parser.add_argument('--alpha', type=float, default=0.0, help='Alpha parameter') + parser.add_argument('--beta', type=float, default=0.0, help='Beta parameter') + parser.add_argument('--gamma', type=float, default=0.0, help='Gamma parameter') + parser.add_argument('--delta', type=float, default=0.0, help='Delta parameter') + parser.add_argument('--logging', action='store_true', help='Enable logging of fitness improvements') + parser.add_argument('--show-stats', action='store_true', help='Display stats about mined rules') + + return parser + + +def text_editor(): + return os.getenv('VISUAL') or os.getenv('EDITOR') or ('notepad' if platform.system() == 'Windows' else 'vi') + + +def parameters_string(parameters): + params_txt = '# You can edit the algorithm\'s parameter values here\n' \ + '# Save and exit to continue\n' \ + '# WARNING: Do not edit parameter names\n' + for parameter, value in parameters.items(): + params_txt += f'{parameter} = {value}\n' + return params_txt + + +def parse_parameters(text): + lines: list[str] = text.strip().split('\n') + lines = [line.strip() for line in lines if line.strip() and not line.strip().startswith('#')] + parameters = {} + for line in lines: + key, value = line.split('=') + key = key.strip() + value = float(value.strip()) + if value.is_integer(): + value = int(value) + parameters[key] = value + return parameters + + +def edit_parameters(parameters): + parameters.pop('individual_type', None) + parameters.pop('initialization_function', None) + fd, filename = tempfile.mkstemp() + os.close(fd) + + new_parameters = None + try: + path = Path(filename) + path.write_text(parameters_string(parameters)) + command = f'{text_editor()} {filename}' + subprocess.run(command, shell=True, check=True) + params_txt = path.read_text() + new_parameters = parse_parameters(params_txt) + finally: + try: + os.unlink(filename) + except Exception as e: + print(e) + return new_parameters + + +def main(): + parser = get_parser() + args = parser.parse_args() + + if len(sys.argv) == 1: + parser.print_help() + if args.max_evals == np.inf and args.max_iters == np.inf: + print('--max-evals and/or --max-iters missing', file=sys.stderr) + return 1 + + try: + dataset = Dataset(args.input_file) + problem = NiaARM(dataset.dimension, dataset.features, dataset.transactions, args.alpha, args.beta, args.gamma, + args.delta, args.logging) + task = Task(problem, max_iters=args.max_iters, max_evals=args.max_evals, + optimization_type=OptimizationType.MAXIMIZATION) + + algorithm = get_algorithm(args.algorithm, seed=args.seed) + params = algorithm.get_parameters() + new_params = edit_parameters(params) + if new_params is None: + print('Invalid parameters', sys.stderr) + return 1 + + for param in new_params: + if param not in params: + print(f'Invalid parameter: {param}', sys.stderr) + return 1 + + algorithm.set_parameters(**new_params) + + algorithm.run(task) + + if args.output_file: + problem.export_rules(args.output_file) + + if args.show_stats: + stats = Stats(problem.rules) + print('\nSTATS:') + print(f'Total rules: {stats.total_rules}') + print(f'Average fitness: {stats.avg_fitness}') + print(f'Average support: {stats.avg_support}') + print(f'Average confidence: {stats.avg_confidence}') + print(f'Average coverage: {stats.avg_coverage}') + print(f'Average shrinkage: {stats.avg_shrinkage}') + print(f'Average length of antecedent: {stats.avg_ant_len}') + print(f'Average length of consequent: {stats.avg_con_len}') + + except Exception as e: + print(e.args, sys.stderr) + return 1 diff --git a/pyproject.toml b/pyproject.toml index 7949d22..1b2eb51 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,9 @@ pandas = [ [tool.poetry.dev-dependencies] pytest = "^7.0.1" +[tool.poetry.scripts] +niaarm = 'niaarm.cli:main' + [build-system] requires = ["poetry-core>=1.0.0"] build-backend = "poetry.core.masonry.api" From 1e996295569c8bebda8e58e2335b0d3c55825743 Mon Sep 17 00:00:00 2001 From: zStupan Date: Tue, 1 Mar 2022 20:15:34 +0100 Subject: [PATCH 3/9] CLI fixes --- niaarm/cli.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/niaarm/cli.py b/niaarm/cli.py index 2af618f..fc691d4 100644 --- a/niaarm/cli.py +++ b/niaarm/cli.py @@ -51,9 +51,12 @@ def parse_parameters(text): for line in lines: key, value = line.split('=') key = key.strip() - value = float(value.strip()) - if value.is_integer(): - value = int(value) + try: + value = float(value.strip()) + if value.is_integer(): + value = int(value) + except ValueError: + pass parameters[key] = value return parameters @@ -76,7 +79,7 @@ def edit_parameters(parameters): try: os.unlink(filename) except Exception as e: - print(e) + print('Error:', e, file=sys.stderr) return new_parameters @@ -101,12 +104,12 @@ def main(): params = algorithm.get_parameters() new_params = edit_parameters(params) if new_params is None: - print('Invalid parameters', sys.stderr) + print('Invalid parameters', file=sys.stderr) return 1 for param in new_params: if param not in params: - print(f'Invalid parameter: {param}', sys.stderr) + print(f'Invalid parameter: {param}', file=sys.stderr) return 1 algorithm.set_parameters(**new_params) @@ -129,5 +132,5 @@ def main(): print(f'Average length of consequent: {stats.avg_con_len}') except Exception as e: - print(e.args, sys.stderr) + print('Error:', e, file=sys.stderr) return 1 From c186a8ddffaaf72f10dc4f1cd3cb54d102c365a3 Mon Sep 17 00:00:00 2001 From: zStupan Date: Tue, 1 Mar 2022 22:42:47 +0100 Subject: [PATCH 4/9] Add CLI docs --- README.md | 70 +++++++++++++++++++++++++++++++++++++++++++++++++-- niaarm/cli.py | 20 ++++++++------- 2 files changed, 79 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 248aa29..1b1b9fc 100644 --- a/README.md +++ b/README.md @@ -36,9 +36,75 @@ Install NiaARM with pip3: pip3 install niaarm ``` -## Examples +## Usage -For a list of examples see the [examples folder](examples/). +###Basic example +```python +from niaarm import NiaARM, Dataset +from niapy.algorithms.basic import DifferentialEvolution +from niapy.task import Task, OptimizationType + + +# load and preprocess the dataset from csv +data = Dataset("datasets/Abalone.csv") + +# Create a problem::: +# dimension represents the dimension of the problem; +# features represent the list of features, while transactions depicts the list of transactions +# the following 4 elements represent weights (support, confidence, coverage, shrinkage) +# None defines that criteria are omitted and are, therefore, excluded from the fitness function +problem = NiaARM(data.dimension, data.features, data.transactions, alpha=1.0, beta=1.0) + +# build niapy task +task = Task(problem=problem, max_iters=30, optimization_type=OptimizationType.MAXIMIZATION) + +# use Differential Evolution (DE) algorithm from the NiaPy library +# see full list of available algorithms: https://github.com/NiaOrg/NiaPy/blob/master/Algorithms.md +algo = DifferentialEvolution(population_size=50, differential_weight=0.5, crossover_probability=0.9) + +# run algorithm +best = algo.run(task=task) + +# sort rules +problem.sort_rules() + +# export all rules to csv +problem.export_rules('output.csv') +``` +For a full list of examples see the [examples folder](examples/). + +### Command line interface + +``` +niaarm -h +usage: niaarm [-h] -i INPUT_FILE [-o OUTPUT_FILE] -a ALGORITHM [-s SEED] + [--max-evals MAX_EVALS] [--max-iters MAX_ITERS] [--alpha ALPHA] + [--beta BETA] [--gamma GAMMA] [--delta DELTA] [--logging] + [--show-stats] + +Perform ARM, output mined rules as csv, get mined rules' statistics + +options: + -h, --help show this help message and exit + -i INPUT_FILE, --input-file INPUT_FILE + Input file containing a csv dataset + -o OUTPUT_FILE, --output-file OUTPUT_FILE + Output file for mined rules + -a ALGORITHM, --algorithm ALGORITHM + Algorithm to use (niapy class name, e. g. + DifferentialEvolution) + -s SEED, --seed SEED Seed for the algorithm's random number generator + --max-evals MAX_EVALS + Maximum number of fitness function evaluations + --max-iters MAX_ITERS + Maximum number of iterations + --alpha ALPHA Alpha parameter. Default 0 + --beta BETA Beta parameter. Default 0 + --gamma GAMMA Gamma parameter. Default 0 + --delta DELTA Delta parameter. Default 0 + --logging Enable logging of fitness improvements + --show-stats Display stats about mined rules +``` ## Reference Papers: diff --git a/niaarm/cli.py b/niaarm/cli.py index fc691d4..b8e0440 100644 --- a/niaarm/cli.py +++ b/niaarm/cli.py @@ -13,18 +13,19 @@ def get_parser(): - parser = argparse.ArgumentParser(description='Perform ARM, output mined rules as csv, get mined rules\' statistics') + parser = argparse.ArgumentParser(prog='niaarm', + description='Perform ARM, output mined rules as csv, get mined rules\' statistics') + parser.add_argument('-i', '--input-file', type=str, required=True, help='Input file containing a csv dataset') + parser.add_argument('-o', '--output-file', type=str, help='Output file for mined rules') parser.add_argument('-a', '--algorithm', type=str, required=True, - help='Algorithm to use (niapy class name, e. g. DifferentialEvolution)') - parser.add_argument('-s', '--seed', type=int, help='Seed for thr algorithm\'s random number generator') + help='Algorithm to use (niapy class name, e.g. DifferentialEvolution)') + parser.add_argument('-s', '--seed', type=int, help='Seed for the algorithm\'s random number generator') parser.add_argument('--max-evals', type=int, default=np.inf, help='Maximum number of fitness function evaluations') parser.add_argument('--max-iters', type=int, default=np.inf, help='Maximum number of iterations') - parser.add_argument('-i', '--input-file', type=str, required=True, help='Input file containing a csv dataset') - parser.add_argument('-o', '--output-file', type=str, help='Output file for mined rules') - parser.add_argument('--alpha', type=float, default=0.0, help='Alpha parameter') - parser.add_argument('--beta', type=float, default=0.0, help='Beta parameter') - parser.add_argument('--gamma', type=float, default=0.0, help='Gamma parameter') - parser.add_argument('--delta', type=float, default=0.0, help='Delta parameter') + parser.add_argument('--alpha', type=float, default=0.0, help='Alpha parameter. Default 0') + parser.add_argument('--beta', type=float, default=0.0, help='Beta parameter. Default 0') + parser.add_argument('--gamma', type=float, default=0.0, help='Gamma parameter. Default 0') + parser.add_argument('--delta', type=float, default=0.0, help='Delta parameter. Default 0') parser.add_argument('--logging', action='store_true', help='Enable logging of fitness improvements') parser.add_argument('--show-stats', action='store_true', help='Display stats about mined rules') @@ -117,6 +118,7 @@ def main(): algorithm.run(task) if args.output_file: + problem.sort_rules() problem.export_rules(args.output_file) if args.show_stats: From 7f4b86650258c8a28905e6eced0e57c068824462 Mon Sep 17 00:00:00 2001 From: zStupan Date: Tue, 1 Mar 2022 22:44:57 +0100 Subject: [PATCH 5/9] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1b1b9fc..c139519 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,7 @@ pip3 install niaarm ## Usage -###Basic example +### Basic example ```python from niaarm import NiaARM, Dataset from niapy.algorithms.basic import DifferentialEvolution From 5a6f9635e7ea81acb4305bada3fd5b74ce0fcafe Mon Sep 17 00:00:00 2001 From: zStupan Date: Tue, 1 Mar 2022 22:56:53 +0100 Subject: [PATCH 6/9] Update README.md --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index c139519..bd63bd0 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ NiaARM is a framework for Association Rule Mining based on nature-inspired algorithms for optimization. The framework is written fully in Python and runs on all platforms. NiaARM allows users to preprocess the data in a transaction database automatically, to search for association rules and provide a pretty output of the rules found. This framework also supports numerical and real-valued types of attributes besides the categorical ones. Mining the association rules is defined as an optimization problem, and solved using the nature-inspired algorithms that come from the related framework called [NiaPy](https://github.com/NiaOrg/NiaPy). ## Detailed insights -The current version witholds (but is not limited to) the following functions: +The current version includes (but is not limited to) the following functions: - loading datasets in CSV format, - preprocessing of data, @@ -105,6 +105,7 @@ options: --logging Enable logging of fitness improvements --show-stats Display stats about mined rules ``` +Note: The CLI script can also run as a python module (`python -m niaarm ...`) ## Reference Papers: From 2a4a6dce19c58f4b3044bfa83287a96f22efbdd1 Mon Sep 17 00:00:00 2001 From: zStupan Date: Wed, 2 Mar 2022 23:29:34 +0100 Subject: [PATCH 7/9] Throw exception if all scale factors are 0 --- niaarm/niaarm.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/niaarm/niaarm.py b/niaarm/niaarm.py index bfc1590..5da4b01 100644 --- a/niaarm/niaarm.py +++ b/niaarm/niaarm.py @@ -36,12 +36,16 @@ def __init__(self, dimension, features, transactions, alpha=0.0, beta=0.0, gamma """ self.features = features self.transactions = transactions + + if alpha + beta + gamma + delta == 0: + raise ValueError('At least one of alpha, beta, gamma or delta must be set') + self.alpha = alpha self.beta = beta self.gamma = gamma self.delta = delta - self.logging = logging + self.logging = logging self.best_fitness = np.NINF self.rules = [] super().__init__(dimension, 0.0, 1.0) From 0fd152a05139cb57c8d2fc81c71dcdef17d25d0f Mon Sep 17 00:00:00 2001 From: zStupan Date: Wed, 2 Mar 2022 23:30:00 +0100 Subject: [PATCH 8/9] Fix CLI --- README.md | 4 +-- niaarm/cli.py | 67 ++++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 57 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index bd63bd0..bce756e 100644 --- a/README.md +++ b/README.md @@ -79,7 +79,7 @@ For a full list of examples see the [examples folder](examples/). niaarm -h usage: niaarm [-h] -i INPUT_FILE [-o OUTPUT_FILE] -a ALGORITHM [-s SEED] [--max-evals MAX_EVALS] [--max-iters MAX_ITERS] [--alpha ALPHA] - [--beta BETA] [--gamma GAMMA] [--delta DELTA] [--logging] + [--beta BETA] [--gamma GAMMA] [--delta DELTA] [--log] [--show-stats] Perform ARM, output mined rules as csv, get mined rules' statistics @@ -102,7 +102,7 @@ options: --beta BETA Beta parameter. Default 0 --gamma GAMMA Gamma parameter. Default 0 --delta DELTA Delta parameter. Default 0 - --logging Enable logging of fitness improvements + --log Enable logging of fitness improvements --show-stats Display stats about mined rules ``` Note: The CLI script can also run as a python module (`python -m niaarm ...`) diff --git a/niaarm/cli.py b/niaarm/cli.py index b8e0440..0b4cc5a 100644 --- a/niaarm/cli.py +++ b/niaarm/cli.py @@ -1,4 +1,5 @@ import argparse +from inspect import getmodule, getmembers, isfunction import os from pathlib import Path import platform @@ -10,6 +11,9 @@ from niaarm import NiaARM, Dataset, Stats from niapy.task import OptimizationType, Task from niapy.util.factory import get_algorithm +from niapy.util import distances, repair +from niapy.algorithms.other import mts +from niapy.algorithms.basic import de def get_parser(): @@ -26,7 +30,7 @@ def get_parser(): parser.add_argument('--beta', type=float, default=0.0, help='Beta parameter. Default 0') parser.add_argument('--gamma', type=float, default=0.0, help='Gamma parameter. Default 0') parser.add_argument('--delta', type=float, default=0.0, help='Delta parameter. Default 0') - parser.add_argument('--logging', action='store_true', help='Enable logging of fitness improvements') + parser.add_argument('--log', action='store_true', help='Enable logging of fitness improvements') parser.add_argument('--show-stats', action='store_true', help='Display stats about mined rules') return parser @@ -41,28 +45,67 @@ def parameters_string(parameters): '# Save and exit to continue\n' \ '# WARNING: Do not edit parameter names\n' for parameter, value in parameters.items(): - params_txt += f'{parameter} = {value}\n' + if isinstance(value, tuple): + if callable(value[0]): + value = tuple(v.__name__ for v in value) + else: + value = tuple(str(v) for v in value) + value = ', '.join(value) + params_txt += f'{parameter} = {value.__name__ if callable(value) else value}\n' return params_txt -def parse_parameters(text): +def functions(algorithm): + funcs = {} + algorithm_funcs = dict(getmembers(getmodule(algorithm.__class__), isfunction)) + repair_funcs = dict(getmembers(repair, isfunction)) + distance_funcs = dict(getmembers(distances, isfunction)) + de_funcs = dict(getmembers(de, isfunction)) + mts_funcs = dict(getmembers(mts, isfunction)) + funcs.update(algorithm_funcs) + funcs.update(repair_funcs) + funcs.update(distance_funcs) + funcs.update(de_funcs) + funcs.update(mts_funcs) + return funcs + + +def find_function(name, algorithm): + return functions(algorithm)[name] + + +def convert_string(string): + try: + value = float(string) + if value.is_integer(): + value = int(value) + except ValueError: + return string + return value + + +def parse_parameters(text, algorithm): lines: list[str] = text.strip().split('\n') lines = [line.strip() for line in lines if line.strip() and not line.strip().startswith('#')] parameters = {} for line in lines: key, value = line.split('=') key = key.strip() - try: - value = float(value.strip()) - if value.is_integer(): - value = int(value) - except ValueError: - pass + value = convert_string(value.strip()) + if isinstance(value, str): + if len(value.split(', ')) > 1: # tuple + value = list(map(str.strip, value.split(', '))) + value = tuple(map(convert_string, value)) + value = tuple(find_function(v, algorithm) for v in value if type(v) == str) + elif value.lower() == 'true' or value.lower() == 'false': # boolean + value = value.lower() == 'true' + else: # probably a function + value = find_function(value, algorithm) parameters[key] = value return parameters -def edit_parameters(parameters): +def edit_parameters(parameters, algorithm): parameters.pop('individual_type', None) parameters.pop('initialization_function', None) fd, filename = tempfile.mkstemp() @@ -75,7 +118,7 @@ def edit_parameters(parameters): command = f'{text_editor()} {filename}' subprocess.run(command, shell=True, check=True) params_txt = path.read_text() - new_parameters = parse_parameters(params_txt) + new_parameters = parse_parameters(params_txt, algorithm) finally: try: os.unlink(filename) @@ -103,7 +146,7 @@ def main(): algorithm = get_algorithm(args.algorithm, seed=args.seed) params = algorithm.get_parameters() - new_params = edit_parameters(params) + new_params = edit_parameters(params, algorithm.__class__) if new_params is None: print('Invalid parameters', file=sys.stderr) return 1 From 27f1cd8a3b2f345d50886a89f014b5e63825c94d Mon Sep 17 00:00:00 2001 From: zStupan Date: Wed, 2 Mar 2022 23:40:34 +0100 Subject: [PATCH 9/9] minor fix --- niaarm/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/niaarm/cli.py b/niaarm/cli.py index 0b4cc5a..db5fa32 100644 --- a/niaarm/cli.py +++ b/niaarm/cli.py @@ -140,7 +140,7 @@ def main(): try: dataset = Dataset(args.input_file) problem = NiaARM(dataset.dimension, dataset.features, dataset.transactions, args.alpha, args.beta, args.gamma, - args.delta, args.logging) + args.delta, args.log) task = Task(problem, max_iters=args.max_iters, max_evals=args.max_evals, optimization_type=OptimizationType.MAXIMIZATION)