Skip to content

Commit

Permalink
Plot progress of obj scores across generations
Browse files Browse the repository at this point in the history
  • Loading branch information
phuongho43 committed Dec 9, 2024
1 parent 6797792 commit ecfd23a
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 22 deletions.
75 changes: 53 additions & 22 deletions protosignet/plot_results.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import ast
from pathlib import Path

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from natsort import natsorted
import seaborn as sns

from protosignet.util import eval_pareto, tag_objectives

CUSTOM_PALETTE = ["#648FFF", "#2ECC71", "#8069EC", "#EA822C", "#D143A4", "#F1C40F", "#34495E"]

Expand All @@ -29,35 +31,64 @@
}


def plot_figure_1d(data_dp, save_dp):
def plot_figure_1d(data_dp, fig_fp):
"""Generate scatterplot of obj 1 (simplicity) vs obj 2 (performance) over all runs/repeats.
Args:
data_dp (str): absolute path to data directory
save_dp (str): absolute path to save directory
fig_fp (str): absolute path for saving generated figure
"""
obj_scores = np.empty((0, 5))
for i, csv_fp in enumerate(natsorted(Path(data_dp).glob("*.csv"))):
df_rep = pd.read_csv(csv_fp)
os_rep = df_rep["objective"].values
for j in range(len(os_rep)):
os_gen = np.array(ast.literal_eval(os_rep[j]))
os_gen[:, 0] = os_gen[:, 0]
os_gen[:, 1] = os_gen[:, 1]
csv_i = i * np.ones(os_gen.shape[0])
gen_j = j * np.ones(os_gen.shape[0])
pop_k = np.arange(os_gen.shape[0])
address = np.column_stack((csv_i, gen_j, pop_k))
os_ijk = np.concatenate((os_gen, address), axis=1)
obj_scores = np.vstack((obj_scores, os_ijk))
df = pd.DataFrame(data=np.array(obj_scores), columns=["obj1", "obj2", "rep_i", "gen_j", "pop_k"])
print(df.shape)
df = tag_objectives(data_dp)
df_gen_001 = df.loc[df["gen_j"] == 0]
df_gen_010 = df.loc[df["gen_j"] == 9]
df_gen_100 = df.loc[df["gen_j"] == 99]
df_top = df.iloc[df.groupby("obj1")["obj2"].idxmax().values].copy()
df_top["is_pareto"] = eval_pareto(df_top[["obj1", "obj2"]].to_numpy())
df_pareto = df_top.loc[df_top["is_pareto"] == 1]
# print(df_pareto)
with plt.style.context(("seaborn-v0_8-whitegrid", CUSTOM_STYLE)):
fig, ax = plt.subplots(figsize=(24, 20))
sns.scatterplot(data=df_gen_001, x="obj1", y="obj2", edgecolor="#212121", facecolor="#2ECC71", alpha=0.8, linewidth=2, s=600)
sns.scatterplot(data=df_gen_010, x="obj1", y="obj2", edgecolor="#212121", facecolor="#F1C40F", alpha=0.8, linewidth=2, s=600)
sns.scatterplot(data=df_gen_100, x="obj1", y="obj2", edgecolor="#212121", facecolor="#EA822C", alpha=0.8, linewidth=2, s=600)
sns.scatterplot(data=df_pareto, x="obj1", y="obj2", edgecolor="#212121", facecolor="#D143A4", alpha=1.0, linewidth=2, s=600)
handles = [
mpl.lines.Line2D([], [], color="#2ECC71", marker="o", markersize=8, linewidth=0),
mpl.lines.Line2D([], [], color="#F1C40F", marker="o", markersize=8, linewidth=0),
mpl.lines.Line2D([], [], color="#EA822C", marker="o", markersize=8, linewidth=0),
mpl.lines.Line2D([], [], color="#D143A4", marker="o", markersize=8, linewidth=0),
]
group_labels = ["Gen 1", "Gen 10", "Gen 100", "Best (Pareto)"]
ax.legend(
handles,
group_labels,
loc="best",
markerscale=4,
frameon=True,
shadow=False,
handletextpad=0.4,
borderpad=0.2,
labelspacing=0.2,
handlelength=1,
)
ax.set_xlabel("Simplicity")
ax.set_ylabel("Performance")
ax.xaxis.set_ticks(np.arange(0, 1.1, 0.2))
ax.set_xlim(-0.1, 1.1)
ax.yaxis.set_ticks(np.arange(0, 1.1, 0.2))
ax.set_ylim(-0.1, 1.1)
fig.tight_layout()
fig.canvas.draw()
fig.savefig(fig_fp, pad_inches=0.3, dpi=200, bbox_inches="tight", transparent=False)
plt.close("all")


def main():
data_dp = Path("/home/phuong/data/protosignet/dual_fm/data/")
save_dp = Path("/home/phuong/data/protosignet/dual_fm/figs/")
plot_figure_1d(data_dp, save_dp)
save_dp.mkdir(parents=True, exist_ok=True)
fig_fp = save_dp / "fig_1d.png"
plot_figure_1d(data_dp, fig_fp)


if __name__ == "__main__":
Expand Down
67 changes: 67 additions & 0 deletions protosignet/util.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
import ast
from pathlib import Path

import numpy as np
import pandas as pd
from natsort import natsorted


def calc_hypervolume2D(pf_obj, ref):
Expand All @@ -18,3 +23,65 @@ def calc_hypervolume2D(pf_obj, ref):
df2 = np.abs(pf_obj[:, 1] - ref[1]) # rectangle heights
hv = (df1 * df2).sum()
return hv


def tag_objectives(data_dp):
"""Tag every objective score set with an address (repeat index, generation index, population index).
Args:
data_dp (str): absolute path to data directory
Returns:
df (DataFrame): reorganized data with columns ["obj1", "obj2", "rep_i", "gen_j", "pop_k"]
"""
obj_scores = np.empty((0, 5))
for i, csv_fp in enumerate(natsorted(Path(data_dp).glob("*.csv"))):
df_rep = pd.read_csv(csv_fp)
os_rep = df_rep["objective"].values
for j in range(len(os_rep)):
os_gen = np.array(ast.literal_eval(os_rep[j]))
rep_i = i * np.ones(os_gen.shape[0])
gen_j = j * np.ones(os_gen.shape[0])
pop_k = np.arange(os_gen.shape[0])
address = np.column_stack((rep_i, gen_j, pop_k))
os_ijk = np.concatenate((os_gen, address), axis=1)
obj_scores = np.vstack((obj_scores, os_ijk))
df = pd.DataFrame(data=np.array(obj_scores), columns=["obj1", "obj2", "rep_i", "gen_j", "pop_k"])
return df


def dominates(p_obj, q_obj):
"""Evaluates whether individual p dominates individual q.
Individual p dominates individual q if p is no worse than q in all objectives and p is
strictly better than q in at least one objective.
Args:
p_obj (1D array-like): array of j objective scores corresponding to individual p
q_obj (1D array-like): array of j objective scores corresponding to individual q
Returns:
True if p dominates q else False
"""
return np.all(p_obj >= q_obj) and np.any(p_obj > q_obj)


def eval_pareto(objectives):
pop_idx = range(len(objectives))
dom_count = [0 for i in pop_idx]
is_pareto = [0 for i in pop_idx]
for p in pop_idx:
for q in pop_idx:
if dominates(objectives[q], objectives[p]):
dom_count[p] += 1
if dom_count[p] == 0:
is_pareto[p] = 1
return np.array(is_pareto)


def fetch_indiv(csv_fp, gen_j, pop_k):
df = pd.read_csv(Path(csv_fp))
pop_rep = df["population"].values
pop_gen = np.array(ast.literal_eval(pop_rep[int(gen_j)]))
indiv = pop_gen[int(pop_k)]
return indiv

0 comments on commit ecfd23a

Please sign in to comment.