diff --git a/protosignet/plot_results.py b/protosignet/plot_results.py index daffd47..8c8494d 100644 --- a/protosignet/plot_results.py +++ b/protosignet/plot_results.py @@ -1,9 +1,11 @@ -import ast from pathlib import Path +import matplotlib as mpl +import matplotlib.pyplot as plt import numpy as np -import pandas as pd -from natsort import natsorted +import seaborn as sns + +from protosignet.util import eval_pareto, tag_objectives CUSTOM_PALETTE = ["#648FFF", "#2ECC71", "#8069EC", "#EA822C", "#D143A4", "#F1C40F", "#34495E"] @@ -29,35 +31,64 @@ } -def plot_figure_1d(data_dp, save_dp): +def plot_figure_1d(data_dp, fig_fp): """Generate scatterplot of obj 1 (simplicity) vs obj 2 (performance) over all runs/repeats. Args: data_dp (str): absolute path to data directory - save_dp (str): absolute path to save directory + fig_fp (str): absolute path for saving generated figure """ - obj_scores = np.empty((0, 5)) - for i, csv_fp in enumerate(natsorted(Path(data_dp).glob("*.csv"))): - df_rep = pd.read_csv(csv_fp) - os_rep = df_rep["objective"].values - for j in range(len(os_rep)): - os_gen = np.array(ast.literal_eval(os_rep[j])) - os_gen[:, 0] = os_gen[:, 0] - os_gen[:, 1] = os_gen[:, 1] - csv_i = i * np.ones(os_gen.shape[0]) - gen_j = j * np.ones(os_gen.shape[0]) - pop_k = np.arange(os_gen.shape[0]) - address = np.column_stack((csv_i, gen_j, pop_k)) - os_ijk = np.concatenate((os_gen, address), axis=1) - obj_scores = np.vstack((obj_scores, os_ijk)) - df = pd.DataFrame(data=np.array(obj_scores), columns=["obj1", "obj2", "rep_i", "gen_j", "pop_k"]) - print(df.shape) + df = tag_objectives(data_dp) + df_gen_001 = df.loc[df["gen_j"] == 0] + df_gen_010 = df.loc[df["gen_j"] == 9] + df_gen_100 = df.loc[df["gen_j"] == 99] + df_top = df.iloc[df.groupby("obj1")["obj2"].idxmax().values].copy() + df_top["is_pareto"] = eval_pareto(df_top[["obj1", "obj2"]].to_numpy()) + df_pareto = df_top.loc[df_top["is_pareto"] == 1] + # print(df_pareto) + with plt.style.context(("seaborn-v0_8-whitegrid", CUSTOM_STYLE)): + fig, ax = plt.subplots(figsize=(24, 20)) + sns.scatterplot(data=df_gen_001, x="obj1", y="obj2", edgecolor="#212121", facecolor="#2ECC71", alpha=0.8, linewidth=2, s=600) + sns.scatterplot(data=df_gen_010, x="obj1", y="obj2", edgecolor="#212121", facecolor="#F1C40F", alpha=0.8, linewidth=2, s=600) + sns.scatterplot(data=df_gen_100, x="obj1", y="obj2", edgecolor="#212121", facecolor="#EA822C", alpha=0.8, linewidth=2, s=600) + sns.scatterplot(data=df_pareto, x="obj1", y="obj2", edgecolor="#212121", facecolor="#D143A4", alpha=1.0, linewidth=2, s=600) + handles = [ + mpl.lines.Line2D([], [], color="#2ECC71", marker="o", markersize=8, linewidth=0), + mpl.lines.Line2D([], [], color="#F1C40F", marker="o", markersize=8, linewidth=0), + mpl.lines.Line2D([], [], color="#EA822C", marker="o", markersize=8, linewidth=0), + mpl.lines.Line2D([], [], color="#D143A4", marker="o", markersize=8, linewidth=0), + ] + group_labels = ["Gen 1", "Gen 10", "Gen 100", "Best (Pareto)"] + ax.legend( + handles, + group_labels, + loc="best", + markerscale=4, + frameon=True, + shadow=False, + handletextpad=0.4, + borderpad=0.2, + labelspacing=0.2, + handlelength=1, + ) + ax.set_xlabel("Simplicity") + ax.set_ylabel("Performance") + ax.xaxis.set_ticks(np.arange(0, 1.1, 0.2)) + ax.set_xlim(-0.1, 1.1) + ax.yaxis.set_ticks(np.arange(0, 1.1, 0.2)) + ax.set_ylim(-0.1, 1.1) + fig.tight_layout() + fig.canvas.draw() + fig.savefig(fig_fp, pad_inches=0.3, dpi=200, bbox_inches="tight", transparent=False) + plt.close("all") def main(): data_dp = Path("/home/phuong/data/protosignet/dual_fm/data/") save_dp = Path("/home/phuong/data/protosignet/dual_fm/figs/") - plot_figure_1d(data_dp, save_dp) + save_dp.mkdir(parents=True, exist_ok=True) + fig_fp = save_dp / "fig_1d.png" + plot_figure_1d(data_dp, fig_fp) if __name__ == "__main__": diff --git a/protosignet/util.py b/protosignet/util.py index 0e7ec51..5670744 100644 --- a/protosignet/util.py +++ b/protosignet/util.py @@ -1,4 +1,9 @@ +import ast +from pathlib import Path + import numpy as np +import pandas as pd +from natsort import natsorted def calc_hypervolume2D(pf_obj, ref): @@ -18,3 +23,65 @@ def calc_hypervolume2D(pf_obj, ref): df2 = np.abs(pf_obj[:, 1] - ref[1]) # rectangle heights hv = (df1 * df2).sum() return hv + + +def tag_objectives(data_dp): + """Tag every objective score set with an address (repeat index, generation index, population index). + + Args: + data_dp (str): absolute path to data directory + + Returns: + df (DataFrame): reorganized data with columns ["obj1", "obj2", "rep_i", "gen_j", "pop_k"] + """ + obj_scores = np.empty((0, 5)) + for i, csv_fp in enumerate(natsorted(Path(data_dp).glob("*.csv"))): + df_rep = pd.read_csv(csv_fp) + os_rep = df_rep["objective"].values + for j in range(len(os_rep)): + os_gen = np.array(ast.literal_eval(os_rep[j])) + rep_i = i * np.ones(os_gen.shape[0]) + gen_j = j * np.ones(os_gen.shape[0]) + pop_k = np.arange(os_gen.shape[0]) + address = np.column_stack((rep_i, gen_j, pop_k)) + os_ijk = np.concatenate((os_gen, address), axis=1) + obj_scores = np.vstack((obj_scores, os_ijk)) + df = pd.DataFrame(data=np.array(obj_scores), columns=["obj1", "obj2", "rep_i", "gen_j", "pop_k"]) + return df + + +def dominates(p_obj, q_obj): + """Evaluates whether individual p dominates individual q. + + Individual p dominates individual q if p is no worse than q in all objectives and p is + strictly better than q in at least one objective. + + Args: + p_obj (1D array-like): array of j objective scores corresponding to individual p + q_obj (1D array-like): array of j objective scores corresponding to individual q + + Returns: + True if p dominates q else False + """ + return np.all(p_obj >= q_obj) and np.any(p_obj > q_obj) + + +def eval_pareto(objectives): + pop_idx = range(len(objectives)) + dom_count = [0 for i in pop_idx] + is_pareto = [0 for i in pop_idx] + for p in pop_idx: + for q in pop_idx: + if dominates(objectives[q], objectives[p]): + dom_count[p] += 1 + if dom_count[p] == 0: + is_pareto[p] = 1 + return np.array(is_pareto) + + +def fetch_indiv(csv_fp, gen_j, pop_k): + df = pd.read_csv(Path(csv_fp)) + pop_rep = df["population"].values + pop_gen = np.array(ast.literal_eval(pop_rep[int(gen_j)])) + indiv = pop_gen[int(pop_k)] + return indiv