-
Notifications
You must be signed in to change notification settings - Fork 0
/
adswitch.py
128 lines (100 loc) · 4.16 KB
/
adswitch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
from adswitch_custom import AdSwitchCustom, AdSwitchNew
import numpy as np
from typing import Set
from tqdm import trange
from matplotlib import pyplot as plt
from collections import defaultdict
import yaml
with open("variables.yaml") as f:
d = yaml.safe_load(f)
interval = d["interval"]
max_val = d["max_val"]
min_val = d["min_val"]
# Setup
# arm_rewards = {
# 0: np.hstack((.7 * np.ones(interval), 0.3 * np.ones(interval), 0.5 * np.ones(interval), 0.3 * np.ones(interval))),
# 1: np.hstack((.3 * np.ones(interval), 0.3 * np.ones(interval), 0.6 * np.ones(interval), 0.7 * np.ones(interval))),
# 2: np.hstack((.5 * np.ones(interval), 0.7 * np.ones(interval), 0.3 * np.ones(interval), 0.4 * np.ones(interval))),
# 3: np.hstack((.1 * np.ones(interval), 0.1 * np.ones(interval), 1.0 * np.ones(interval), 0.1 * np.ones(interval))),
# }
arm_rewards = {
0: np.hstack((max_val * np.ones(interval), min_val * np.ones(interval), max_val * np.ones(interval), min_val * np.ones(interval), max_val * np.ones(interval), min_val * np.ones(interval),)),
1: np.hstack((min_val * np.ones(interval), max_val * np.ones(interval), min_val * np.ones(interval), max_val * np.ones(interval), min_val * np.ones(interval), max_val * np.ones(interval),)),
}
arm_colors = {
0: "mediumseagreen",
1: "tomato",
2: "gold",
3: "blue",
}
noise_std = 0.05
HORIZON = len(arm_rewards[0])
N_ARMS = len(arm_rewards)
delta_t = 1
delta_s = 1
algo = AdSwitchCustom(N_ARMS, horizon=HORIZON, delta_t=delta_t, delta_s=delta_s) # , delta_s=4, delta_t=10
# algo = AdSwitchNew(N_ARMS, horizon=HORIZON, delta_t=delta_t, delta_s=delta_s) # , delta_s=4, delta_t=10
algo.startGame()
starts = set()
bad_arms: Set[int] = set()
bad_arm_add_hist = defaultdict(list)
arm_choices = np.zeros(HORIZON)
seen_rewards = np.zeros(HORIZON)
S_sets = {}
regret_sum = 0
regret = np.zeros(HORIZON)
for t in trange(HORIZON):
noises = np.random.normal(0.0, scale=noise_std, size=N_ARMS)
arm_idx = algo.choice()
reward_at_idx = arm_rewards[arm_idx][t] + noises[arm_idx]
algo.getReward(arm_idx, reward_at_idx)
starts.add(algo.start_of_episode)
if algo.set_S:
S_sets[t] = algo.set_S.copy()
bad_arms_t = set(range(N_ARMS)).difference(algo.set_GOOD)
for arm in bad_arms_t.difference(bad_arms):
bad_arm_add_hist[arm].append(t)
bad_arms = bad_arms_t
arm_choices[t] = arm_idx
seen_rewards[t] = reward_at_idx
regret_sum += max(*[arm_rewards[k][t] for k in arm_rewards.keys()]) - reward_at_idx
regret[t] = regret_sum
_, ax = plt.subplots(2, 1, figsize=(10, 14), sharex=True)
for arm_idx in arm_rewards.keys():
means_to_t = np.cumsum(seen_rewards * (arm_choices == arm_idx)) / np.cumsum(arm_choices == arm_idx)
ax[0].plot(np.arange(HORIZON), means_to_t, c=arm_colors[arm_idx], label=f"Arm: {arm_idx}", linewidth=4)
print(starts)
starts.remove(0)
if starts:
starts = np.sort(list(starts))
for it, start in enumerate(starts):
if it == 0:
ax[0].axvline(start, color="black", linestyle="dotted", label="Episode starts", linewidth=4)
else:
ax[0].axvline(start, color="black", linestyle="dotted", linewidth=4)
if bad_arm_add_hist:
for arm, ts in bad_arm_add_hist.items():
for t in ts:
ax[0].axvline(t, color=arm_colors[arm], linestyle="dotted", linewidth=4)
ax[0].legend()
ax[0].set_ylabel("Empirical mean of arm")
for arm_idx, weight in arm_rewards.items():
ax[1].plot(np.arange(HORIZON), weight, label=f"Arm: {arm_idx}", c=arm_colors[arm_idx], linewidth=4)
ax[1].legend()
ax[1].set_ylabel("True Rewards per arm")
ax[1].set_xlabel("Time")
plt.suptitle("AdSwitch Algorithm Estimated Arm means")
plt.savefig("plots/adswitch_arms.png", bbox_inches="tight")
plt.savefig("plots/adswitch_arms.pdf", bbox_inches="tight")
plt.clf()
plt.cla()
# Plot regret
np.save("logs/adswitch_regret.npy", regret)
fig = plt.figure(figsize=(10, 10))
plt.plot(np.arange(HORIZON), regret, label="AdSwitch", linewidth=4)
plt.xlabel("Time")
plt.ylabel("Regret")
plt.title("AdSwitch Regret")
plt.legend()
plt.savefig("plots/adswitch_regrets.pdf", bbox_inches="tight")
plt.savefig("plots/adswitch_regrets.png", bbox_inches="tight")