Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Multilevel Coordinate Search (MCS) #353

Merged
merged 57 commits into from
May 13, 2024
Merged
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
57 commits
Select commit Hold shift + click to select a range
683671f
import errors
nforsg Mar 14, 2024
af9d868
still an modeunotfounderror
nforsg Mar 14, 2024
8f9164f
working on errors
nforsg Mar 14, 2024
389d2b7
got somewhere
nforsg Mar 15, 2024
c0a8e6a
omw
nforsg Mar 15, 2024
d6557dc
issue with not iterating
nforsg Mar 15, 2024
ced8f82
found better source
nforsg Mar 15, 2024
f510705
bug in source code fixed
nforsg Mar 15, 2024
765539c
gettibng somewhere
nforsg Mar 15, 2024
6836981
refactoring in progress
nforsg Mar 15, 2024
33260d9
Now I can do a complete run of the algorithm
nforsg Mar 18, 2024
e1acf9c
evaluation seems to be the problem
nforsg Apr 4, 2024
abf66e0
wip
nforsg Apr 4, 2024
17a3c2a
wip
nforsg Apr 4, 2024
f070122
batch size
nforsg Apr 4, 2024
bcd5db4
batch size
nforsg Apr 4, 2024
a558c9a
wip
nforsg Apr 4, 2024
439c8d9
wip
nforsg Apr 4, 2024
0670313
managed to cutdown file volume
nforsg Apr 4, 2024
5f7bcb9
linter wip
nforsg Apr 5, 2024
82e0f01
more linter than i thought
nforsg Apr 5, 2024
f0b0ccd
wip
nforsg Apr 5, 2024
1cff743
ls utsils lintered
nforsg Apr 5, 2024
050267c
linter wip
nforsg Apr 5, 2024
1e8000a
linter done with exception of a couple TODO's
nforsg Apr 6, 2024
66d1d55
typing in progress
nforsg Apr 12, 2024
5525315
typing wip
nforsg Apr 12, 2024
8372d81
typing wip
nforsg Apr 12, 2024
9290d83
some tricky mypy errors
nforsg Apr 12, 2024
4006daf
test commit from new computer (HP Envy 360, really nice so far)
nforsg Apr 12, 2024
9d3ced4
wip
nforsg Apr 12, 2024
ca3d07b
numyfication
nforsg Apr 13, 2024
094402f
type hintinh wip
nforsg Apr 13, 2024
1925087
GLS-error encountered for another seed, wierd
nforsg Apr 19, 2024
dad0c64
wip
nforsg Apr 19, 2024
c55354c
logger progress
nforsg Apr 19, 2024
e594188
wip
nforsg Apr 19, 2024
5937d7d
linter run
nforsg Apr 19, 2024
2b842d6
(more) mypy's and linters fixed
nforsg Apr 26, 2024
37a328c
test suite initiated
nforsg Apr 26, 2024
e19910d
test suite basically works
nforsg Apr 28, 2024
f7270a1
two edge-cases (seems to be) fixed
nforsg Apr 28, 2024
babe5d0
fixed an edge-case
nforsg Apr 28, 2024
62300f8
mypy done (with 4 ignore statments)
nforsg Apr 28, 2024
8376a91
documentation wip
nforsg Apr 28, 2024
a2c55cf
further type-hinting, more study needed on documentation
nforsg Apr 28, 2024
70634d5
more lintering
nforsg May 4, 2024
176cdb9
eliminated hard code
nforsg May 4, 2024
a841bd5
documentation comments added
nforsg May 4, 2024
2f38e2c
type hinting
nforsg May 5, 2024
b8c20cf
more tpying
nforsg May 5, 2024
8dbcb0e
typing basically donel, documentation remaining
nforsg May 7, 2024
ed8d177
more documentation made
nforsg May 9, 2024
0c5f8bd
name adjust in test suite
nforsg May 9, 2024
56e3bab
cleanup
Limmen May 13, 2024
25b2d32
Merge branch 'master' into mcs
Limmen May 13, 2024
f4a58e3
cleanup
Limmen May 13, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
import csle_agents.constants.constants as agents_constants
import csle_common.constants.constants as constants
from csle_agents.agents.MCS.mcs_agent import MCSAgent
from csle_agents.common.objective_type import ObjectiveType
from csle_common.dao.training.agent_type import AgentType
from csle_common.dao.training.experiment_config import ExperimentConfig
from csle_common.dao.training.hparam import HParam
from csle_common.dao.training.player_type import PlayerType
from csle_common.dao.training.policy_type import PolicyType
from csle_common.metastore.metastore_facade import MetastoreFacade

if __name__ == "__main__":
emulation_name = "csle-level1-050"
emulation_env_config = MetastoreFacade.get_emulation_by_name(emulation_name)
if emulation_env_config is None:
raise ValueError(
f"Could not find an emulation environment with the name: {emulation_name}"
)

simulation_name = "csle-stopping-pomdp-defender-002"
simulation_env_config = MetastoreFacade.get_simulation_by_name(simulation_name)
if simulation_env_config is None:
raise ValueError(f"Could not find a simulation with name: {simulation_name}")
experiment_config = ExperimentConfig(
output_dir=f"{constants.LOGGING.DEFAULT_LOG_DIR}particle_swarm_test",
title="Multilevel Coordinate Search",
random_seeds=[399, 98912],
agent_type=AgentType.MCS,
log_every=1,
hparams={
agents_constants.MCS.STEP: HParam(
value=1000,
name=agents_constants.MCS.STEP,
descr="step",
),
agents_constants.MCS.STEP1: HParam(
value=10000,
name=agents_constants.MCS.STEP1,
descr="step1",
),
agents_constants.MCS.U: HParam(
value=[-20, -20, -20],
name=agents_constants.MCS.U,
descr="initial lower corner",
),
agents_constants.MCS.LOCAL: HParam(
value=50,
name=agents_constants.MCS.LOCAL,
descr="local value stating to which degree to perform local searches",
),
agents_constants.MCS.V: HParam(
value=[20, 20, 20],
name=agents_constants.MCS.V,
descr="initial upper corner",
),
agents_constants.MCS.STOPPING_ACTIONS: HParam(
value=2,
name=agents_constants.MCS.L,
descr="no. of stopping actions",
),
agents_constants.COMMON.MAX_ENV_STEPS: HParam(
value=500,
name=agents_constants.COMMON.MAX_ENV_STEPS,
descr="maximum number of steps in the environment (for envs with infinite horizon generally)",
),
agents_constants.MCS.IINIT: HParam(
value=0,
name=agents_constants.MCS.GAMMA,
descr="simple initialization list",
),
agents_constants.MCS.GAMMA: HParam(
value=2.220446049250313e-16,
name=agents_constants.MCS.GAMMA,
descr="gamma value",
),
agents_constants.MCS.EPSILON: HParam(
value=2.220446049250313e-16,
name=agents_constants.MCS.EPSILON,
descr="epsilon value",
),
agents_constants.MCS.M: HParam(
value=1,
name=agents_constants.MCS.M,
descr="m value",
),
agents_constants.MCS.PRT: HParam(
value=1,
name=agents_constants.MCS.PRT,
descr="print level",
),
agents_constants.COMMON.EVAL_BATCH_SIZE: HParam(
value=10,
name=agents_constants.COMMON.EVAL_BATCH_SIZE,
descr="number of iterations to evaluate theta",
),
agents_constants.COMMON.SAVE_EVERY: HParam(
value=1000,
name=agents_constants.COMMON.SAVE_EVERY,
descr="how frequently to save the model",
),
agents_constants.COMMON.CONFIDENCE_INTERVAL: HParam(
value=0.95,
name=agents_constants.COMMON.CONFIDENCE_INTERVAL,
descr="confidence interval",
),
agents_constants.COMMON.MAX_ENV_STEPS: HParam(
value=500,
name=agents_constants.COMMON.MAX_ENV_STEPS,
descr="maximum number of steps in the environment (for envs with infinite horizon generally)",
),
agents_constants.COMMON.RUNNING_AVERAGE: HParam(
value=100,
name=agents_constants.COMMON.RUNNING_AVERAGE,
descr="the number of samples to include when computing the running avg",
),
agents_constants.COMMON.GAMMA: HParam(
value=0.99,
name=agents_constants.COMMON.GAMMA,
descr="the discount factor",
),
agents_constants.MCS.POLICY_TYPE: HParam(
value=PolicyType.MULTI_THRESHOLD,
name=agents_constants.PARTICLE_SWARM.POLICY_TYPE,
descr="policy type for the execution",
),
agents_constants.MCS.OBJECTIVE_TYPE: HParam(
value=ObjectiveType.MAX,
name=agents_constants.PARTICLE_SWARM.OBJECTIVE_TYPE,
descr="Objective type",
),
},
player_type=PlayerType.DEFENDER,
player_idx=0,
)
agent = MCSAgent(
simulation_env_config=simulation_env_config,
emulation_env_config=emulation_env_config,
experiment_config=experiment_config,
save_to_metastore=False,
)
experiment_execution = agent.train()
MetastoreFacade.save_experiment_execution(experiment_execution)
for policy in experiment_execution.result.policies.values():
if (
experiment_config.hparams[agents_constants.PARTICLE_SWARM.POLICY_TYPE].value
== PolicyType.MULTI_THRESHOLD
):
MetastoreFacade.save_multi_threshold_stopping_policy(
multi_threshold_stopping_policy=policy
)
elif (
experiment_config.hparams[agents_constants.PARTICLE_SWARM.POLICY_TYPE].value
== PolicyType.LINEAR_THRESHOLD
):
MetastoreFacade.save_linear_threshold_stopping_policy(
linear_threshold_stopping_policy=policy
)
else:
raise ValueError(
"Policy type: "
f"{experiment_config.hparams[agents_constants.PARTICLE_SWARM.POLICY_TYPE].value} "
f"not recognized for particle swarm"
)
Original file line number Diff line number Diff line change
Expand Up @@ -8,89 +8,159 @@
from csle_common.dao.training.tabular_policy import TabularPolicy
from csle_common.metastore.metastore_facade import MetastoreFacade

if __name__ == '__main__':
emulation_name = "csle-level9-040"
if __name__ == "__main__":
emulation_name = "csle-level1-050"
emulation_env_config = MetastoreFacade.get_emulation_by_name(emulation_name)
if emulation_env_config is None:
raise ValueError(f"Could not find an emulation environment with the name: {emulation_name}")
raise ValueError(
f"Could not find an emulation environment with the name: {emulation_name}"
)
simulation_name = "csle-intrusion-response-game-local-pomdp-defender-001"
simulation_env_config = MetastoreFacade.get_simulation_by_name(simulation_name)
if simulation_env_config is None:
raise ValueError(f"Could not find a simulation with name: {simulation_name}")
experiment_config = ExperimentConfig(
output_dir=f"{constants.LOGGING.DEFAULT_LOG_DIR}dqn_clean_test",
title="DQN_clean test", random_seeds=[399, 98912, 999], agent_type=AgentType.DQN_CLEAN,
title="DQN_clean test",
random_seeds=[399, 98912, 999],
agent_type=AgentType.DQN_CLEAN,
log_every=1000,
hparams={
constants.NEURAL_NETWORKS.NUM_NEURONS_PER_HIDDEN_LAYER: HParam(
value=7, name=constants.NEURAL_NETWORKS.NUM_NEURONS_PER_HIDDEN_LAYER,
descr="neurons per hidden layer of the policy network"),
value=7,
name=constants.NEURAL_NETWORKS.NUM_NEURONS_PER_HIDDEN_LAYER,
descr="neurons per hidden layer of the policy network",
),
constants.NEURAL_NETWORKS.NUM_HIDDEN_LAYERS: HParam(
value=4, name=constants.NEURAL_NETWORKS.NUM_HIDDEN_LAYERS,
descr="number of layers of the policy network"),
value=4,
name=constants.NEURAL_NETWORKS.NUM_HIDDEN_LAYERS,
descr="number of layers of the policy network",
),
agents_constants.DQN_CLEAN.EXP_FRAC: HParam(
value=0.5, name=agents_constants.DQN_CLEAN.EXP_FRAC,
descr="the fraction of `total-timesteps it takes from start-e to go end-e"),
value=0.5,
name=agents_constants.DQN_CLEAN.EXP_FRAC,
descr="the fraction of `total-timesteps it takes from start-e to go end-e",
),
agents_constants.DQN_CLEAN.TAU: HParam(
value=1.0, name=agents_constants.DQN_CLEAN.TAU, descr="target network update rate"),
value=1.0,
name=agents_constants.DQN_CLEAN.TAU,
descr="target network update rate",
),
agents_constants.COMMON.BATCH_SIZE: HParam(
value=64, name=agents_constants.COMMON.BATCH_SIZE, descr="batch size for updates"),
value=64,
name=agents_constants.COMMON.BATCH_SIZE,
descr="batch size for updates",
),
agents_constants.DQN_CLEAN.LEARNING_STARTS: HParam(
value=10000, name=agents_constants.DQN_CLEAN.LEARNING_STARTS, descr="timestep to start learning"),
value=10000,
name=agents_constants.DQN_CLEAN.LEARNING_STARTS,
descr="timestep to start learning",
),
agents_constants.DQN_CLEAN.TRAIN_FREQ: HParam(
value=10, name=agents_constants.DQN_CLEAN.TRAIN_FREQ, descr="the frequency of training"),
value=10,
name=agents_constants.DQN_CLEAN.TRAIN_FREQ,
descr="the frequency of training",
),
agents_constants.DQN_CLEAN.T_N_FREQ: HParam(
value=500, name=agents_constants.DQN_CLEAN.T_N_FREQ,
descr="the batch size of sample from the reply memory"),
value=500,
name=agents_constants.DQN_CLEAN.T_N_FREQ,
descr="the batch size of sample from the reply memory",
),
agents_constants.DQN_CLEAN.BUFFER_SIZE: HParam(
value=1000, name=agents_constants.DQN_CLEAN.BUFFER_SIZE, descr="the replay memory buffer size"),
value=1000,
name=agents_constants.DQN_CLEAN.BUFFER_SIZE,
descr="the replay memory buffer size",
),
agents_constants.DQN_CLEAN.SAVE_MODEL: HParam(
value=False, name=agents_constants.DQN_CLEAN.SAVE_MODEL, descr="decision param for model saving"),
value=False,
name=agents_constants.DQN_CLEAN.SAVE_MODEL,
descr="decision param for model saving",
),
agents_constants.COMMON.LEARNING_RATE: HParam(
value=2.4e-5, name=agents_constants.COMMON.LEARNING_RATE,
descr="learning rate for updating the policy"),
value=2.4e-5,
name=agents_constants.COMMON.LEARNING_RATE,
descr="learning rate for updating the policy",
),
agents_constants.DQN_CLEAN.NUM_STEPS: HParam(
value=164, name=agents_constants.DQN_CLEAN.NUM_STEPS, descr="number of steps in each time step"),
value=164,
name=agents_constants.DQN_CLEAN.NUM_STEPS,
descr="number of steps in each time step",
),
constants.NEURAL_NETWORKS.DEVICE: HParam(
value="cpu", name=constants.NEURAL_NETWORKS.DEVICE, descr="the device to train on (cpu or cuda:x)"),
value="cpu",
name=constants.NEURAL_NETWORKS.DEVICE,
descr="the device to train on (cpu or cuda:x)",
),
agents_constants.COMMON.NUM_PARALLEL_ENVS: HParam(
value=1, name=agents_constants.COMMON.NUM_PARALLEL_ENVS,
descr="the nunmber of parallel environments for training"),
value=1,
name=agents_constants.COMMON.NUM_PARALLEL_ENVS,
descr="the nunmber of parallel environments for training",
),
agents_constants.COMMON.GAMMA: HParam(
value=0.99, name=agents_constants.COMMON.GAMMA, descr="the discount factor"),
value=0.99,
name=agents_constants.COMMON.GAMMA,
descr="the discount factor",
),
agents_constants.COMMON.NUM_TRAINING_TIMESTEPS: HParam(
value=int(100000), name=agents_constants.COMMON.NUM_TRAINING_TIMESTEPS,
descr="number of timesteps to train"),
agents_constants.COMMON.EVAL_EVERY: HParam(value=1, name=agents_constants.COMMON.EVAL_EVERY,
descr="training iterations between evaluations"),
agents_constants.COMMON.EVAL_BATCH_SIZE: HParam(value=100, name=agents_constants.COMMON.EVAL_BATCH_SIZE,
descr="the batch size for evaluation"),
agents_constants.COMMON.SAVE_EVERY: HParam(value=10000, name=agents_constants.COMMON.SAVE_EVERY,
descr="how frequently to save the model"),
value=int(100000),
name=agents_constants.COMMON.NUM_TRAINING_TIMESTEPS,
descr="number of timesteps to train",
),
agents_constants.COMMON.EVAL_EVERY: HParam(
value=1,
name=agents_constants.COMMON.EVAL_EVERY,
descr="training iterations between evaluations",
),
agents_constants.COMMON.EVAL_BATCH_SIZE: HParam(
value=100,
name=agents_constants.COMMON.EVAL_BATCH_SIZE,
descr="the batch size for evaluation",
),
agents_constants.COMMON.SAVE_EVERY: HParam(
value=10000,
name=agents_constants.COMMON.SAVE_EVERY,
descr="how frequently to save the model",
),
agents_constants.COMMON.CONFIDENCE_INTERVAL: HParam(
value=0.95, name=agents_constants.COMMON.CONFIDENCE_INTERVAL,
descr="confidence interval"),
value=0.95,
name=agents_constants.COMMON.CONFIDENCE_INTERVAL,
descr="confidence interval",
),
agents_constants.COMMON.MAX_ENV_STEPS: HParam(
value=500, name=agents_constants.COMMON.MAX_ENV_STEPS,
descr="maximum number of steps in the environment (for envs with infinite horizon generally)"),
value=500,
name=agents_constants.COMMON.MAX_ENV_STEPS,
descr="maximum number of steps in the environment (for envs with infinite horizon generally)",
),
agents_constants.COMMON.RUNNING_AVERAGE: HParam(
value=100, name=agents_constants.COMMON.RUNNING_AVERAGE,
descr="the number of samples to include when computing the running avg"),
agents_constants.COMMON.L: HParam(value=3, name=agents_constants.COMMON.L,
descr="the number of stop actions")
value=100,
name=agents_constants.COMMON.RUNNING_AVERAGE,
descr="the number of samples to include when computing the running avg",
),
agents_constants.COMMON.L: HParam(
value=3,
name=agents_constants.COMMON.L,
descr="the number of stop actions",
),
},
player_type=PlayerType.DEFENDER, player_idx=0
player_type=PlayerType.DEFENDER,
player_idx=0,
)
simulation_env_config.simulation_env_input_config.attacker_strategy = TabularPolicy(
player_type=PlayerType.ATTACKER,
actions=simulation_env_config.joint_action_space_config.action_spaces[1].actions,
simulation_name=simulation_env_config.name, value_function=None, q_table=None,
lookup_table=[
[0.8, 0.2],
[1, 0],
[1, 0]
],
agent_type=AgentType.RANDOM, avg_R=-1)
agent = DQNCleanAgent(simulation_env_config=simulation_env_config, emulation_env_config=emulation_env_config,
experiment_config=experiment_config, save_to_metastore=False)
actions=simulation_env_config.joint_action_space_config.action_spaces[
1
].actions,
simulation_name=simulation_env_config.name,
value_function=None,
q_table=None,
lookup_table=[[0.8, 0.2], [1, 0], [1, 0]],
agent_type=AgentType.RANDOM,
avg_R=-1,
)
agent = DQNCleanAgent(
simulation_env_config=simulation_env_config,
emulation_env_config=emulation_env_config,
experiment_config=experiment_config,
save_to_metastore=False,
)
experiment_execution = agent.train()
Loading