-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathevaluate_agents_perfomances.py
165 lines (130 loc) · 6.49 KB
/
evaluate_agents_perfomances.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
"""
Code to deploy and test the various trained algorithms
"""
import numpy as np
import pandas as pd
import glob
import sys
import os
# load the various algorithms
from stable_baselines3 import PPO # agents
from stable_baselines3 import A2C # agents
from stable_baselines3 import DQN # agents
# load the random agent
from yawning_titan.agents.random import RandomAgent
from yawning_titan.config.game_config.game_mode_config import GameModeConfig
# Load the various game modes - standard, low red skills, high red skills and rnd seeded files
from yawning_titan.config.game_modes import default_game_mode_path, default_game_mode_tests_path, \
default_game_mode_tests_rnd_path, default_game_mode_low_red_skills_rnd_path, \
default_game_mode_high_red_skills_rnd_path
from yawning_titan.config.network_config.network_config import NetworkConfig
from stable_baselines3.common.monitor import Monitor
# general yawning titan components
from yawning_titan.envs.generic.core.blue_interface import BlueInterface
from yawning_titan.envs.generic.core.red_interface import RedInterface
from yawning_titan.envs.generic.generic_env import GenericNetworkEnv
from yawning_titan.envs.generic.helpers import network_creator
from yawning_titan.envs.generic.core.network_interface import NetworkInterface
from yawning_titan.envs.generic.core.network_interface_explore import NetworkInterfaceExplore
# stable baselines evaluator
from stable_baselines3.common.evaluation import evaluate_policy
import generate_test_networks as gtn
current_dir = os.getcwd()
network_dir = os.path.join(current_dir, 'networks')
results_dir = os.path.join(current_dir, 'results_data')
model_dir = os.path.join(current_dir, 'logs_dir')
# these are useful for the comparison, also worth modifying one to spread the random seed inside
# these seeds are hard-coded to the existing yaml files.
random_seeds = [2022, 14031879, 23061912, 6061944, 17031861]
# network entries
network_entry = [['3', '5', '10'], # 18
['3', '10', '15', '25', '34', '45', '7'], # 50
['4', '10', '20', '30', '40', '55', '76', '78', '12', '88', '90']] # 100
# network size
network_sizes = [18, 50, 100]
# Naming and algorithms
agents_algos = ['PPO', 'A2C', 'DQN']
agents = [PPO, A2C, DQN]
# Gather relevant data, it will take some time
model_names = []
rewards = []
episode_lenght = []
# loop on the network sizes
for index, net_size in enumerate(network_sizes):
network_image = glob.glob(os.path.join(network_dir, f'synthetic_{net_size}_nodes_network.npz'))[0]
network_files = np.load(network_image, allow_pickle= True)
matrix = network_files['matrix']
positions = dict(np.ndenumerate(network_files['connections']))[()] # convert the positions nd array to dict
# generate the network
network = NetworkConfig.create_from_args(matrix=matrix, positions=positions,
entry_nodes = network_entry[index])
# now loop on the various random seeds
for iseed in random_seeds:
# seeded testing - if you want to use one of the red agents changes use
# default_game_mode_low_red_skills_rnd_path or default_game_mode_high_red_skills_rnd_path
game_mode = GameModeConfig.create_from_yaml(default_game_mode_tests_rnd_path(iseed))
# Now test the agent on specific scenarios:
# standard : standard yawning titan scenario initiated, if this mode num_extension is ignored
# compromise : some nodes (num_extensions) are randomly infected
# isolate : some nodes (num_extensions) are randomly isolated
# mix : some nodes (num_extensions) are isolated and some are compromised
network_interface = NetworkInterfaceExplore(game_mode=game_mode,
network=network,
num_extension=3, # number of nodes impacted by the extension
extension='standard')
# load the red and blue agent
red = RedInterface(network_interface)
blue = BlueInterface(network_interface)
# initialise the environment
env = GenericNetworkEnv(red, blue, network_interface, print_metrics=False)
# reset the environment
env.reset()
# Now we need to loop over the models - trained agents
for idx, iagent in enumerate(agents):
# collect all the possible models trained for each algorithm at given network size
trained_models = glob.glob(os.path.join(model_dir, agents_algos[idx]) +
f'\\{agents_algos[idx]}_{net_size}_*.zip' )
# instantiate the agent
algorithm = agents[idx]
# loop over the models trained
for imodel in trained_models:
print(agents_algos[idx], net_size, imodel)
model_names.append(imodel)
agent = algorithm.load(imodel)
# evaluate the policy
eval_pol = evaluate_policy(agent,
Monitor(env),
return_episode_rewards=True,
deterministic=False,
n_eval_episodes=1)
rewards.append(eval_pol[0])
episode_length.append(eval_pol[1])
# delete the agent loaded with the trained agent and reset the environment
del agent
env.reset()
# now test the random agent
R_agent = RandomAgent(env.action_space)
model_names.append(f'random_agent_{net_size}')
# reset the observations and rewards
obs = env.reset()
reward = 0
# final reward
rw = 0
for iaction in range(500): ## 500 timesteps
action = Ragent.predict(obs, reward, '')
obs, reward, done, info = env.step(action)
rw += reward
if done:
env.reset()
# print(rw, iaction, done)
break
env.reset()
rewards.append(rw)
episode_lenght.append(iaction)
# now dump all the relevant data into a dataframe containing all the models involved, the
# model names, rewards and the episode lenghts
summary_data = pd.DataFrame(data={'model': model_names,
'reward': rewards,
'lenght': episode_length})
# dump all the data into a csv
summary_data.to_csv(results_dir + 'agents_evaluation.csv', index=False)