-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathOptimistic_Greedy_animations.py
74 lines (61 loc) · 2.46 KB
/
Optimistic_Greedy_animations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
from os import system
import numpy as np
import sys
from environments.bandit import BanditEnv
from simulation.experiments import BanditExperiment
from animator import All_Plots_Animation
class Greedy():
"""
Plays in an Greedy fasion
"""
def __init__(self, num_actions):
self.name = "Greedy"
# Initialize rewards and counts to 0
self.total_rewards = np.zeros(num_actions, dtype = np.longdouble)
self._total_counts = np.zeros(num_actions, dtype = np.longdouble)
def act(self):
"""
Chooses action with best observed reward
"""
current_averages = np.divide(self.total_rewards, self.total_counts, where = self.total_counts > 0)
# Assign 0.5 "observed" reward for actions that haven't been played, for decision purpose only
current_averages[self._total_counts <= 0] = 0.5
current_action = np.argmax(current_averages)
return current_action
def feedback(self, action, reward):
self.total_rewards[action] += reward
self.total_counts[action] += 1
@property
def total_counts(self):
return self._total_counts
class OptimisticGreedy(Greedy):
"""
Plays in an Optimistic Greedy fashion
"""
def __init__(self, num_actions, initial_value):
Greedy.__init__(self, num_actions)
self.name = "Optimistic Greedy"
# Rewards are initialized different to initial_value
self.total_rewards += np.ones(num_actions, dtype=np.longdouble) * initial_value
self._total_counts += np.ones(num_actions)
system('cls') # Clear window
evaluation_seed = 3 # Use None for random
num_actions = 20
trials = 50
R = 3
distribution = "bernoulli"
fps = 15 # For rendering animation
env = BanditEnv(num_actions, distribution, evaluation_seed)
agent = OptimisticGreedy(num_actions, R)
experiment = BanditExperiment(env, agent)
experiment.run_bandit(max_number_of_trials=trials)
data = {}
data["algorithm"] = agent.name
data["details"] = {"R": R}
data["total_counts"] = experiment.action_history
data["regret_history"] = experiment.regret_history
data["reward_parameters"] = experiment.env.reward_parameters
data["average_rewards_history"] = experiment.average_rewards
all_plots_animation = All_Plots_Animation(data, fps=fps)
all_plots_animation.generate_animation(render_action="show")
# all_plots_animation.generate_animation(render_action="save")