From 135a001cb4757e96de36987b6b13fddfe7c52a5d Mon Sep 17 00:00:00 2001 From: zStupan Date: Tue, 19 Apr 2022 22:56:15 +0200 Subject: [PATCH 1/6] Added visualization methods --- docs/api/index.rst | 1 + docs/api/visualize.rst | 6 +++ niaarm/visualize.py | 107 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 114 insertions(+) create mode 100644 docs/api/visualize.rst create mode 100644 niaarm/visualize.py diff --git a/docs/api/index.rst b/docs/api/index.rst index de9bcea..51e69ba 100644 --- a/docs/api/index.rst +++ b/docs/api/index.rst @@ -9,3 +9,4 @@ API Reference niaarm rule rule_list + visualize diff --git a/docs/api/visualize.rst b/docs/api/visualize.rst new file mode 100644 index 0000000..3d4a9c8 --- /dev/null +++ b/docs/api/visualize.rst @@ -0,0 +1,6 @@ +Visualize +========= + +.. automodule:: niaarm.visualize + :members: + :show-inheritance: \ No newline at end of file diff --git a/niaarm/visualize.py b/niaarm/visualize.py new file mode 100644 index 0000000..71db854 --- /dev/null +++ b/niaarm/visualize.py @@ -0,0 +1,107 @@ +import matplotlib.pyplot as plt +from matplotlib.cm import ScalarMappable +from matplotlib.colors import Normalize +import numpy as np + + +def ribbon(x, z, width=0.5): + fig, ax = plt.subplots(subplot_kw={'projection': '3d'}) + + xi = np.linspace(x[:-1], x[1:], num=100, axis=1).flatten() + zi = np.interp(xi, x, z) + + xx = np.column_stack((-np.ones(len(zi)), np.ones(len(zi)))) * width + 1 + yy = np.column_stack((xi, xi)) + zz = np.column_stack((zi, zi)) + + scalar_map = ScalarMappable(Normalize(vmin=0, vmax=zi.max())) + colors = scalar_map.to_rgba(zz) + ax.plot_surface(xx, yy, zz, rstride=1, cstride=1, facecolors=colors) + + fig.colorbar(scalar_map, shrink=0.5, aspect=10) + + ax.set_ylabel('Location') + ax.set_yticks(range(1 + len(x) // 3)) + ax.set_yticklabels(range(1 + len(x) // 3)) + ax.set_zlabel('Height') + ax.view_init(30, 240) + + return fig, ax + + +def tdf(rule, transactions): + """Visualize rule as hill slopes. + + **Reference:** Fister, I. et al. (2020). Visualization of Numerical Association Rules by Hill Slopes. + In: Analide, C., Novais, P., Camacho, D., Yin, H. (eds) Intelligent Data Engineering and Automated Learning – IDEAL 2020. + IDEAL 2020. Lecture Notes in Computer Science(), vol 12489. Springer, Cham. https://doi.org/10.1007/978-3-030-62362-3_10 + + Args: + rule (Rule): Association rule to visualize. + transactions (pandas.DataFrame): Transactions as a DataFrame. + + Returns: + tuple[matplotlib.figure.Figure, matplotlib.axes.Axes]: Figure and Axes of plot. + + """ + features = rule.antecedent + rule.consequent + num_features = len(features) + support = np.empty(num_features) + max_index = -1 + max_support = -1 + match_x = None + x_count = 0 + for i, f in enumerate(features): + if f.dtype != 'cat': + match = (transactions[f.name] <= f.max_val) & (transactions[f.name] >= f.min_val) + else: + match = transactions[f.name] == f.categories[0] + + supp_count = match.sum() + supp = supp_count / len(transactions) + support[i] = supp + if supp >= max_support: + max_support = supp + max_index = i + match_x = match + x_count = supp_count + + confidence = np.empty(num_features) + for i, y in enumerate(features): + if i == max_index: + confidence[i] = 2 + continue + if y.dtype != 'cat': + match_y = (transactions[y.name] <= y.max_val) & (transactions[y.name] >= y.min_val) + else: + match_y = transactions[y.name] == y.categories[0] + supp_count = (match_x & match_y).sum() + confidence[i] = supp_count / x_count + + indices = np.argsort(confidence)[::-1] + confidence = confidence[indices] + confidence[0] = max_support + support = support[indices] + + length = np.sqrt(support ** 2 + confidence ** 2) + position = np.empty(num_features) + position[0] = length[0] / 2 + for i, ln in enumerate(length[1:]): + position[i + 1] = position[i] + length[i] / 2 + confidence[i + 1] + ln / 2 + + s = (length + support + confidence) / 2 + a = s * (s - length) * (s - support) * (s - confidence) + + if np.all(a >= 0): + a = np.sqrt(a) + height = 2 * a / length + x = np.sqrt(support ** 2 - height ** 2) + + vec = np.concatenate((-length / 2, -length / 2 + x, length / 2)) + vec = (vec.reshape(3, num_features) + position).T.reshape(len(vec)) + + height = np.concatenate((height, np.zeros(len(vec) - num_features))) + height = np.reshape(height, (3, num_features)).T.reshape(len(vec)) + height = np.concatenate((np.zeros(1), height))[:len(vec)] + + return ribbon(vec, height) From 5f2f43c44f9af4bb39586b4dbd284585f8ffe918 Mon Sep 17 00:00:00 2001 From: zStupan Date: Tue, 19 Apr 2022 22:56:57 +0200 Subject: [PATCH 2/6] Minor update in __init__ --- niaarm/rule.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/niaarm/rule.py b/niaarm/rule.py index 2ead598..5bd4cab 100644 --- a/niaarm/rule.py +++ b/niaarm/rule.py @@ -162,10 +162,20 @@ def __init__(self, antecedent, consequent, fitness=0.0, transactions=None): self.antecedent = antecedent self.consequent = consequent self.fitness = fitness - self.num_transactions = len(transactions) - self.__inclusion = (len(self.antecedent) + len(self.consequent)) / len(transactions.columns) - - self.__post_init__(transactions) + self.num_transactions = 0 + self.__inclusion = 0 + self.__amplitude = 0 + self.antecedent_count = 0 + self.consequent_count = 0 + self.full_count = 0 + self.ant_not_con = 0 + self.con_not_ant = 0 + self.not_ant_not_con = 0 + + if transactions: + self.num_transactions = len(transactions) + self.__inclusion = (len(self.antecedent) + len(self.consequent)) / len(transactions.columns) + self.__post_init__(transactions) def __post_init__(self, transactions): min_ = transactions.min(numeric_only=True) From e439960bc39a9f7d6c6918ffcefec9e91bf99fdb Mon Sep 17 00:00:00 2001 From: zStupan Date: Tue, 19 Apr 2022 22:59:42 +0200 Subject: [PATCH 3/6] Formatting fix --- niaarm/visualize.py | 50 ++++++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/niaarm/visualize.py b/niaarm/visualize.py index 71db854..c8f02e0 100644 --- a/niaarm/visualize.py +++ b/niaarm/visualize.py @@ -4,31 +4,6 @@ import numpy as np -def ribbon(x, z, width=0.5): - fig, ax = plt.subplots(subplot_kw={'projection': '3d'}) - - xi = np.linspace(x[:-1], x[1:], num=100, axis=1).flatten() - zi = np.interp(xi, x, z) - - xx = np.column_stack((-np.ones(len(zi)), np.ones(len(zi)))) * width + 1 - yy = np.column_stack((xi, xi)) - zz = np.column_stack((zi, zi)) - - scalar_map = ScalarMappable(Normalize(vmin=0, vmax=zi.max())) - colors = scalar_map.to_rgba(zz) - ax.plot_surface(xx, yy, zz, rstride=1, cstride=1, facecolors=colors) - - fig.colorbar(scalar_map, shrink=0.5, aspect=10) - - ax.set_ylabel('Location') - ax.set_yticks(range(1 + len(x) // 3)) - ax.set_yticklabels(range(1 + len(x) // 3)) - ax.set_zlabel('Height') - ax.view_init(30, 240) - - return fig, ax - - def tdf(rule, transactions): """Visualize rule as hill slopes. @@ -105,3 +80,28 @@ def tdf(rule, transactions): height = np.concatenate((np.zeros(1), height))[:len(vec)] return ribbon(vec, height) + + +def ribbon(x, z, width=0.5): + fig, ax = plt.subplots(subplot_kw={'projection': '3d'}) + + xi = np.linspace(x[:-1], x[1:], num=100, axis=1).flatten() + zi = np.interp(xi, x, z) + + xx = np.column_stack((-np.ones(len(zi)), np.ones(len(zi)))) * width + 1 + yy = np.column_stack((xi, xi)) + zz = np.column_stack((zi, zi)) + + scalar_map = ScalarMappable(Normalize(vmin=0, vmax=zi.max())) + colors = scalar_map.to_rgba(zz) + ax.plot_surface(xx, yy, zz, rstride=1, cstride=1, facecolors=colors) + + fig.colorbar(scalar_map, shrink=0.5, aspect=10) + + ax.set_ylabel('Location') + ax.set_yticks(range(1 + len(x) // 3)) + ax.set_yticklabels(range(1 + len(x) // 3)) + ax.set_zlabel('Height') + ax.view_init(30, 240) + + return fig, ax From 39416161b6136ca32247bbe6820507c2665a9d47 Mon Sep 17 00:00:00 2001 From: zStupan Date: Tue, 19 Apr 2022 23:06:31 +0200 Subject: [PATCH 4/6] Fix comparison error --- niaarm/rule.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/niaarm/rule.py b/niaarm/rule.py index 5bd4cab..5a89312 100644 --- a/niaarm/rule.py +++ b/niaarm/rule.py @@ -172,7 +172,7 @@ def __init__(self, antecedent, consequent, fitness=0.0, transactions=None): self.con_not_ant = 0 self.not_ant_not_con = 0 - if transactions: + if transactions is not None: self.num_transactions = len(transactions) self.__inclusion = (len(self.antecedent) + len(self.consequent)) / len(transactions.columns) self.__post_init__(transactions) From d4e429106448b33a88e8eedc5fabf8f188dd9a32 Mon Sep 17 00:00:00 2001 From: zStupan Date: Tue, 19 Apr 2022 23:06:58 +0200 Subject: [PATCH 5/6] Add visualization --- niaarm/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/niaarm/__init__.py b/niaarm/__init__.py index 6e80943..dabc319 100644 --- a/niaarm/__init__.py +++ b/niaarm/__init__.py @@ -4,6 +4,7 @@ from niaarm.feature import Feature from niaarm.mine import get_rules from niaarm.rule_list import RuleList +from niaarm.visualize import tdf __all__ = ['NiaARM', 'Dataset', 'Feature', 'Rule', 'RuleList', 'get_rules'] From ac082d2b1b37f70cd0a38a8006147cd357214dd9 Mon Sep 17 00:00:00 2001 From: zStupan Date: Wed, 20 Apr 2022 00:20:59 +0200 Subject: [PATCH 6/6] Refactor ribbon function --- niaarm/visualize.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/niaarm/visualize.py b/niaarm/visualize.py index c8f02e0..b72f426 100644 --- a/niaarm/visualize.py +++ b/niaarm/visualize.py @@ -79,7 +79,13 @@ def tdf(rule, transactions): height = np.reshape(height, (3, num_features)).T.reshape(len(vec)) height = np.concatenate((np.zeros(1), height))[:len(vec)] - return ribbon(vec, height) + fig, ax = ribbon(vec, height) + ax.set_ylabel('Location') + ax.set_yticks(range(num_features + 1)) + ax.set_yticklabels(range(num_features + 1)) + ax.set_zlabel('Height') + ax.view_init(30, 240) + return fig, ax def ribbon(x, z, width=0.5): @@ -98,10 +104,4 @@ def ribbon(x, z, width=0.5): fig.colorbar(scalar_map, shrink=0.5, aspect=10) - ax.set_ylabel('Location') - ax.set_yticks(range(1 + len(x) // 3)) - ax.set_yticklabels(range(1 + len(x) // 3)) - ax.set_zlabel('Height') - ax.view_init(30, 240) - return fig, ax