Skip to content

Commit

Permalink
POMCP [WIP]
Browse files Browse the repository at this point in the history
  • Loading branch information
Limmen committed Jan 18, 2024
1 parent 633dec5 commit 8e4c52f
Show file tree
Hide file tree
Showing 19 changed files with 482 additions and 171 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ version of the documentation is available [here](./releases/)
| Release | Last date of support |
|---------------------------------------------------------------|----------------------|
| [v.0.4.0](https://github.com/Limmen/csle/releases/tag/v0.4.0) | 2024-02-07 |
| [v.0.3.0](https://github.com/Limmen/csle/releases/tag/v0.3.0) | 2024-01-17 |
| [v.0.3.0](https://github.com/Limmen/csle/releases/tag/v0.3.0) | ~~2024-01-17~~ |
| [v.0.2.0](https://github.com/Limmen/csle/releases/tag/v0.2.0) | ~~2023-10-30~~ |
| [v.0.1.0](https://github.com/Limmen/csle/releases/tag/v0.1.0) | ~~2023-06-06~~ |

Expand Down
63 changes: 43 additions & 20 deletions examples/manual_play/cyborg_restore_defender.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,51 @@
from gym_csle_cyborg.dao.csle_cyborg_config import CSLECyborgConfig
from gym_csle_cyborg.dao.red_agent_type import RedAgentType
from gym_csle_cyborg.envs.cyborg_scenario_two_defender import CyborgScenarioTwoDefender
from gym_csle_cyborg.dao.blue_agent_action_type import BlueAgentActionType
import gym_csle_cyborg.constants.constants as env_constants

if __name__ == '__main__':
config = CSLECyborgConfig(
gym_env_name="csle-cyborg-scenario-two-v1", scenario=2, baseline_red_agents=[RedAgentType.B_LINE_AGENT],
maximum_steps=100, red_agent_distribution=[1.0], reduced_action_space=False, decoy_state=False,
scanned_state=False, decoy_optimization=False)
maximum_steps=100, red_agent_distribution=[1.0], reduced_action_space=True, decoy_state=True,
scanned_state=True, decoy_optimization=False)
csle_cyborg_env = CyborgScenarioTwoDefender(config=config)
a = 1
R = 0
for t in range(1000):
o, r, done, _, info = csle_cyborg_env.step(a)
if done:
csle_cyborg_env.reset()
R += r
print(f"time-step: {t + 1}, cumulative reward: {R}, a: {a}")
for i in range(len(info["obs_per_host"])):
if csle_cyborg_env.cyborg_hostnames[i] == "User0":
continue
a = 1
if info["obs_per_host"][i]["compromised"].value > 0:
host = csle_cyborg_env.cyborg_hostnames[i]
action_type = BlueAgentActionType.RESTORE
a = csle_cyborg_env.cyborg_action_type_and_host_to_id[(action_type, host)]
break
o, info = csle_cyborg_env.reset()
initial_state_id = info[env_constants.ENV_METRICS.STATE]
csle_cyborg_env.step(1)
csle_cyborg_env.set_state(state=initial_state_id)
# print(csle_cyborg_env.cyborg_challenge_env.env.env.env.env.env.environment_controller.observation["Red"].data["User0"])
csle_cyborg_env.step(1)

# print("INITIAL2 STATE")
# print(csle_cyborg_env.get_true_table())
# # csle_cyborg_env.get_true_table()
# o, r, done, _, info = csle_cyborg_env.step(1)
# print("INITIAL1 STATE")
# print(csle_cyborg_env.get_true_table())
# initial_obs_id = info[env_constants.ENV_METRICS.OBSERVATION]
# initial_state_id = info[env_constants.ENV_METRICS.STATE]
# # csle_cyborg_env.set_state(state=initial_state_id)
# csle_cyborg_env.step(1)
# print("SECOND STATE")
# print(csle_cyborg_env.get_true_table())
# csle_cyborg_env.step(1)
# csle_cyborg_env.step(1)
# csle_cyborg_env.step(1)
# csle_cyborg_env.step(1)
# csle_cyborg_env.step(1)
# csle_cyborg_env.step(1)
# print(csle_cyborg_env.get_true_table())
# print("SET STATE")
# csle_cyborg_env.set_state(state=initial_state_id)
# print(csle_cyborg_env.get_true_table())
# csle_cyborg_env.step(1)
# print(csle_cyborg_env.get_true_table())
# csle_cyborg_env.step(1)
# print(csle_cyborg_env.get_true_table())
# csle_cyborg_env.step(1)
# print(csle_cyborg_env.get_true_table())
# csle_cyborg_env.step(1)
# csle_cyborg_env.step(1)
# csle_cyborg_env.step(1)
# csle_cyborg_env.step(1)
# print(csle_cyborg_env.get_true_table())
98 changes: 17 additions & 81 deletions examples/manual_play/cyborg_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,78 +2,7 @@
from gym_csle_cyborg.dao.csle_cyborg_config import CSLECyborgConfig
from gym_csle_cyborg.dao.red_agent_type import RedAgentType
from gym_csle_cyborg.envs.cyborg_scenario_two_defender import CyborgScenarioTwoDefender


def info_to_vec(info, decoy_state, hosts):
"""
Creates the state vector
:param info: the info
:param decoy_state: the decoy state
:param hosts: the host list
:return: the state vector
"""
state_vec = []
for host in hosts:
known = info[host][3]
known = int(known)
scanned = info[host][4]
scanned = int(scanned)
access = info[host][5]
if access == "None":
access = 0
elif access == "User":
access = 1
else:
access = 2
d_state = len(decoy_state[host])
state_vec.append([known, scanned, access, d_state])
return state_vec


def state_vec_to_id(state_vec):
"""
Converts a state vector to an id
:param state_vec: the state vector to convert
:return: the id
"""
bin_id = ""
for host_vec in state_vec:
host_bin_str = ""
for i, elem in enumerate(host_vec):
if i == 0:
host_bin_str += format(elem, '01b')
if i == 1:
host_bin_str += format(elem, '01b')
if i == 2:
host_bin_str += format(elem, '02b')
if i == 3:
host_bin_str += format(elem, '03b')
bin_id += host_bin_str
id = int(bin_id, 2)
return id


def id_to_state_vec(id: int):
"""
Converts an id to a state vector
:param id: the id to convert
:return: the state vector
"""
bin_str = format(id, "091b")
host_bins = [bin_str[i:i + 7] for i in range(0, len(bin_str), 7)]
state_vec = []
for host_bin in host_bins:
known = int(host_bin[0:1], 2)
scanned = int(host_bin[1:2], 2)
access = int(host_bin[2:4], 2)
decoy = int(host_bin[4:7], 2)
host_vec = [known, scanned, access, decoy]
state_vec.append(host_vec)
return state_vec

from gym_csle_cyborg.util.cyborg_env_util import CyborgEnvUtil

if __name__ == '__main__':
config = CSLECyborgConfig(
Expand All @@ -84,23 +13,30 @@ def id_to_state_vec(id: int):
str_info = str(csle_cyborg_env.cyborg_challenge_env.env.env.env.info)
states = {}
state_idx = 0
host_state_lookup = host_state_to_id(hostnames=csle_cyborg_env.cyborg_hostnames)
host_ids = list(csle_cyborg_env.cyborg_hostname_to_id.values())

for i in range(100000):
done = False
csle_cyborg_env.reset()
actions = list(csle_cyborg_env.action_id_to_type_and_host.keys())
state_key = str(csle_cyborg_env.cyborg_challenge_env.env.env.env.info)
if state_key not in states:
states[state_key] = state_idx
state_id = str(csle_cyborg_env.cyborg_challenge_env.env.env.env.info)
if state_id not in states:
states[state_id] = state_idx
state_idx += 1

while not done:
a = random.choice(actions)
o, r, done, _, info = csle_cyborg_env.step(a)
state_vec = info_to_vec(csle_cyborg_env.get_true_table().rows, csle_cyborg_env.decoy_state,
host_state_lookup, host_ids)
state_key = state_vec_to_id(state_vec=state_vec)
stv = id_to_state_vec(id=state_key)
assert stv == state_vec
state_vector = CyborgEnvUtil.state_to_vector(state=csle_cyborg_env.get_true_table().rows,
decoy_state=csle_cyborg_env.decoy_state, host_ids=host_ids,
scan_state=csle_cyborg_env.scan_state)
state_id = CyborgEnvUtil.state_vector_to_state_id(state_vector=state_vector)
converted_state_vector = CyborgEnvUtil.state_id_to_state_vector(state_id=state_id)
assert converted_state_vector == state_vector
obs_vector = CyborgEnvUtil.state_to_vector(state=csle_cyborg_env.get_table().rows,
decoy_state=csle_cyborg_env.decoy_state,
host_ids=host_ids, scan_state=csle_cyborg_env.scan_state,
observation=True)
obs_id = CyborgEnvUtil.state_vector_to_state_id(state_vector=obs_vector, observation=True)
converted_obs_vector = CyborgEnvUtil.state_id_to_state_vector(state_id=obs_id, observation=True)
assert converted_obs_vector == obs_vector
20 changes: 20 additions & 0 deletions examples/training/pomcp/cyborg_scenario_two_defender/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# POMCP for defender plannign in cyborg

## Commands

To run a script, execute:
```bash
python <script_name>
```

## Author & Maintainer

Kim Hammar <kimham@kth.se>

## Copyright and license

[LICENSE](../../../../LICENSE.md)

Creative Commons

(C) 2020-2024, Kim Hammar
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import csle_common.constants.constants as constants
from csle_common.dao.training.experiment_config import ExperimentConfig
from csle_common.metastore.metastore_facade import MetastoreFacade
from csle_common.dao.training.agent_type import AgentType
from csle_common.dao.training.hparam import HParam
from csle_common.dao.training.player_type import PlayerType
from csle_agents.agents.pomcp.pomcp_agent import POMCPAgent
import csle_agents.constants.constants as agents_constants
from csle_agents.common.objective_type import ObjectiveType
from gym_csle_cyborg.dao.csle_cyborg_config import CSLECyborgConfig
from gym_csle_cyborg.dao.red_agent_type import RedAgentType
from gym_csle_cyborg.envs.cyborg_scenario_two_defender import CyborgScenarioTwoDefender

if __name__ == '__main__':
emulation_name = "csle-level9-040"
emulation_env_config = MetastoreFacade.get_emulation_by_name(emulation_name)
if emulation_env_config is None:
raise ValueError(f"Could not find an emulation environment with the name: {emulation_name}")
simulation_name = "csle-cyborg-001"
simulation_env_config = MetastoreFacade.get_simulation_by_name(simulation_name)
if simulation_env_config is None:
raise ValueError(f"Could not find a simulation with name: {simulation_name}")
simulation_env_config.simulation_env_input_config = CSLECyborgConfig(
gym_env_name="csle-cyborg-scenario-two-v1", scenario=2, baseline_red_agents=[RedAgentType.B_LINE_AGENT],
maximum_steps=100, red_agent_distribution=[1.0], reduced_action_space=True, scanned_state=True,
decoy_state=True, decoy_optimization=False)
csle_cyborg_env = CyborgScenarioTwoDefender(config=simulation_env_config.simulation_env_input_config)
A = csle_cyborg_env.get_action_space()
b1 = csle_cyborg_env.initial_belief
experiment_config = ExperimentConfig(
output_dir=f"{constants.LOGGING.DEFAULT_LOG_DIR}pomcp_test", title="POMCP test",
random_seeds=[399, 98912, 999, 555],
agent_type=AgentType.POMCP,
log_every=1,
hparams={
agents_constants.POMCP.N: HParam(value=50, name=agents_constants.POMCP.N,
descr="the number of episodes"),
agents_constants.POMCP.OBJECTIVE_TYPE: HParam(
value=ObjectiveType.MAX, name=agents_constants.POMCP.OBJECTIVE_TYPE,
descr="the type of objective (max or min)"),
agents_constants.POMCP.ROLLOUT_POLICY: HParam(
value=None, name=agents_constants.POMCP.ROLLOUT_POLICY,
descr="the policy to use for rollouts"),
agents_constants.POMCP.VALUE_FUNCTION: HParam(
value=lambda x: 0, name=agents_constants.POMCP.VALUE_FUNCTION,
descr="the value function to use for truncated rollouts"),
agents_constants.POMCP.A: HParam(value=A, name=agents_constants.POMCP.A, descr="the action space"),
agents_constants.POMCP.GAMMA: HParam(value=0.99, name=agents_constants.POMCP.GAMMA,
descr="the discount factor"),
agents_constants.POMCP.REINVIGORATION: HParam(value=False, name=agents_constants.POMCP.REINVIGORATION,
descr="whether reinvigoration should be used"),
agents_constants.POMCP.INITIAL_BELIEF: HParam(value=b1, name=agents_constants.POMCP.INITIAL_BELIEF,
descr="the initial belief"),
agents_constants.POMCP.PLANNING_TIME: HParam(value=300, name=agents_constants.POMCP.PLANNING_TIME,
descr="the planning time"),
agents_constants.POMCP.MAX_PARTICLES: HParam(value=1000, name=agents_constants.POMCP.MAX_PARTICLES,
descr="the maximum number of belief particles"),
agents_constants.POMCP.MAX_DEPTH: HParam(value=500, name=agents_constants.POMCP.MAX_DEPTH,
descr="the maximum depth for planning"),
agents_constants.POMCP.C: HParam(value=0.35, name=agents_constants.POMCP.C,
descr="the weighting factor for UCB exploration"),
agents_constants.POMCP.LOG_STEP_FREQUENCY: HParam(
value=1, name=agents_constants.POMCP.LOG_STEP_FREQUENCY, descr="frequency of logging time-steps"),
agents_constants.POMCP.DEFAULT_NODE_VALUE: HParam(
value=-2000, name=agents_constants.POMCP.DEFAULT_NODE_VALUE, descr="the default node value in "
"the search tree"),
agents_constants.POMCP.VERBOSE: HParam(value=True, name=agents_constants.POMCP.VERBOSE,
descr="verbose logging flag"),
agents_constants.COMMON.EVAL_BATCH_SIZE: HParam(value=100, name=agents_constants.COMMON.EVAL_BATCH_SIZE,
descr="number of evaluation episodes"),
agents_constants.COMMON.CONFIDENCE_INTERVAL: HParam(
value=0.95, name=agents_constants.COMMON.CONFIDENCE_INTERVAL,
descr="confidence interval"),
agents_constants.COMMON.MAX_ENV_STEPS: HParam(
value=500, name=agents_constants.COMMON.MAX_ENV_STEPS,
descr="maximum number of steps in the environment (for envs with infinite horizon generally)"),
agents_constants.COMMON.RUNNING_AVERAGE: HParam(
value=100, name=agents_constants.COMMON.RUNNING_AVERAGE,
descr="the number of samples to include when computing the running avg"),
agents_constants.COMMON.GAMMA: HParam(
value=0.99, name=agents_constants.COMMON.GAMMA,
descr="the discount factor")
},
player_type=PlayerType.DEFENDER, player_idx=0
)
agent = POMCPAgent(emulation_env_config=emulation_env_config, simulation_env_config=simulation_env_config,
experiment_config=experiment_config, save_to_metastore=False)
experiment_execution = agent.train()
MetastoreFacade.save_experiment_execution(experiment_execution)
2 changes: 1 addition & 1 deletion examples/training/pomcp/stopping_pomdp_defender/README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Random Search for POMDP
# POMCP for defenderp planning in stopping POMDP

This directory contains example scripts for optimizing defender policies using random search for the POMDP from [https://ieeexplore.ieee.org/document/9779345](https://ieeexplore.ieee.org/document/9779345)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@
A = simulation_env_config.simulation_env_input_config.stopping_game_config.A1
O = simulation_env_config.simulation_env_input_config.stopping_game_config.O
b1 = simulation_env_config.simulation_env_input_config.stopping_game_config.b1
initial_belief = {}
for i in range(len(b1)):
initial_belief[i] = b1[i]
rollout_policy = MultiThresholdStoppingPolicy(
theta=[0.75], simulation_name=simulation_name, L=stopping_game_config.L,
states=simulation_env_config.state_space_config.states, player_type=PlayerType.DEFENDER,
Expand All @@ -75,13 +78,14 @@
agents_constants.POMCP.VALUE_FUNCTION: HParam(
value=lambda x: 0, name=agents_constants.POMCP.VALUE_FUNCTION,
descr="the value function to use for truncated rollouts"),
agents_constants.POMCP.S: HParam(value=S, name=agents_constants.POMCP.S, descr="the state space"),
agents_constants.POMCP.O: HParam(value=O, name=agents_constants.POMCP.O, descr="the observation space"),
agents_constants.POMCP.A: HParam(value=A, name=agents_constants.POMCP.A, descr="the action space"),
agents_constants.POMCP.GAMMA: HParam(value=0.99, name=agents_constants.POMCP.GAMMA,
descr="the discount factor"),
agents_constants.POMCP.INITIAL_BELIEF: HParam(value=b1, name=agents_constants.POMCP.INITIAL_BELIEF,
agents_constants.POMCP.INITIAL_BELIEF: HParam(value=initial_belief,
name=agents_constants.POMCP.INITIAL_BELIEF,
descr="the initial belief"),
agents_constants.POMCP.REINVIGORATION: HParam(value=True, name=agents_constants.POMCP.REINVIGORATION,
descr="whether reinvigoration should be used"),
agents_constants.POMCP.PLANNING_TIME: HParam(value=120, name=agents_constants.POMCP.PLANNING_TIME,
descr="the planning time"),
agents_constants.POMCP.MAX_PARTICLES: HParam(value=100, name=agents_constants.POMCP.MAX_PARTICLES,
Expand All @@ -90,8 +94,13 @@
descr="the maximum depth for planning"),
agents_constants.POMCP.C: HParam(value=0.35, name=agents_constants.POMCP.C,
descr="the weighting factor for UCB exploration"),
agents_constants.POMCP.DEFAULT_NODE_VALUE: HParam(
value=-2000, name=agents_constants.POMCP.DEFAULT_NODE_VALUE, descr="the default node value in "
"the search tree"),
agents_constants.POMCP.LOG_STEP_FREQUENCY: HParam(
value=1, name=agents_constants.POMCP.LOG_STEP_FREQUENCY, descr="frequency of logging time-steps"),
agents_constants.POMCP.VERBOSE: HParam(value=False, name=agents_constants.POMCP.VERBOSE,
descr="verbose logging flag"),
agents_constants.COMMON.EVAL_BATCH_SIZE: HParam(value=100, name=agents_constants.COMMON.EVAL_BATCH_SIZE,
descr="number of evaluation episodes"),
agents_constants.COMMON.CONFIDENCE_INTERVAL: HParam(
Expand Down
Loading

0 comments on commit 8e4c52f

Please sign in to comment.