diff --git a/README.md b/README.md index bc371c737..3bae5d58c 100644 --- a/README.md +++ b/README.md @@ -90,7 +90,7 @@ version of the documentation is available [here](./releases/) | Release | Last date of support | |---------------------------------------------------------------|----------------------| | [v.0.4.0](https://github.com/Limmen/csle/releases/tag/v0.4.0) | 2024-02-07 | -| [v.0.3.0](https://github.com/Limmen/csle/releases/tag/v0.3.0) | 2024-01-17 | +| [v.0.3.0](https://github.com/Limmen/csle/releases/tag/v0.3.0) | ~~2024-01-17~~ | | [v.0.2.0](https://github.com/Limmen/csle/releases/tag/v0.2.0) | ~~2023-10-30~~ | | [v.0.1.0](https://github.com/Limmen/csle/releases/tag/v0.1.0) | ~~2023-06-06~~ | diff --git a/examples/manual_play/cyborg_restore_defender.py b/examples/manual_play/cyborg_restore_defender.py index 8f2cce7e4..e5ffe9b8c 100644 --- a/examples/manual_play/cyborg_restore_defender.py +++ b/examples/manual_play/cyborg_restore_defender.py @@ -1,28 +1,51 @@ from gym_csle_cyborg.dao.csle_cyborg_config import CSLECyborgConfig from gym_csle_cyborg.dao.red_agent_type import RedAgentType from gym_csle_cyborg.envs.cyborg_scenario_two_defender import CyborgScenarioTwoDefender -from gym_csle_cyborg.dao.blue_agent_action_type import BlueAgentActionType +import gym_csle_cyborg.constants.constants as env_constants if __name__ == '__main__': config = CSLECyborgConfig( gym_env_name="csle-cyborg-scenario-two-v1", scenario=2, baseline_red_agents=[RedAgentType.B_LINE_AGENT], - maximum_steps=100, red_agent_distribution=[1.0], reduced_action_space=False, decoy_state=False, - scanned_state=False, decoy_optimization=False) + maximum_steps=100, red_agent_distribution=[1.0], reduced_action_space=True, decoy_state=True, + scanned_state=True, decoy_optimization=False) csle_cyborg_env = CyborgScenarioTwoDefender(config=config) - a = 1 - R = 0 - for t in range(1000): - o, r, done, _, info = csle_cyborg_env.step(a) - if done: - csle_cyborg_env.reset() - R += r - print(f"time-step: {t + 1}, cumulative reward: {R}, a: {a}") - for i in range(len(info["obs_per_host"])): - if csle_cyborg_env.cyborg_hostnames[i] == "User0": - continue - a = 1 - if info["obs_per_host"][i]["compromised"].value > 0: - host = csle_cyborg_env.cyborg_hostnames[i] - action_type = BlueAgentActionType.RESTORE - a = csle_cyborg_env.cyborg_action_type_and_host_to_id[(action_type, host)] - break + o, info = csle_cyborg_env.reset() + initial_state_id = info[env_constants.ENV_METRICS.STATE] + csle_cyborg_env.step(1) + csle_cyborg_env.set_state(state=initial_state_id) + # print(csle_cyborg_env.cyborg_challenge_env.env.env.env.env.env.environment_controller.observation["Red"].data["User0"]) + csle_cyborg_env.step(1) + + # print("INITIAL2 STATE") + # print(csle_cyborg_env.get_true_table()) + # # csle_cyborg_env.get_true_table() + # o, r, done, _, info = csle_cyborg_env.step(1) + # print("INITIAL1 STATE") + # print(csle_cyborg_env.get_true_table()) + # initial_obs_id = info[env_constants.ENV_METRICS.OBSERVATION] + # initial_state_id = info[env_constants.ENV_METRICS.STATE] + # # csle_cyborg_env.set_state(state=initial_state_id) + # csle_cyborg_env.step(1) + # print("SECOND STATE") + # print(csle_cyborg_env.get_true_table()) + # csle_cyborg_env.step(1) + # csle_cyborg_env.step(1) + # csle_cyborg_env.step(1) + # csle_cyborg_env.step(1) + # csle_cyborg_env.step(1) + # csle_cyborg_env.step(1) + # print(csle_cyborg_env.get_true_table()) + # print("SET STATE") + # csle_cyborg_env.set_state(state=initial_state_id) + # print(csle_cyborg_env.get_true_table()) + # csle_cyborg_env.step(1) + # print(csle_cyborg_env.get_true_table()) + # csle_cyborg_env.step(1) + # print(csle_cyborg_env.get_true_table()) + # csle_cyborg_env.step(1) + # print(csle_cyborg_env.get_true_table()) + # csle_cyborg_env.step(1) + # csle_cyborg_env.step(1) + # csle_cyborg_env.step(1) + # csle_cyborg_env.step(1) + # print(csle_cyborg_env.get_true_table()) diff --git a/examples/manual_play/cyborg_test.py b/examples/manual_play/cyborg_test.py index eaa030b6b..fc4707da5 100644 --- a/examples/manual_play/cyborg_test.py +++ b/examples/manual_play/cyborg_test.py @@ -2,78 +2,7 @@ from gym_csle_cyborg.dao.csle_cyborg_config import CSLECyborgConfig from gym_csle_cyborg.dao.red_agent_type import RedAgentType from gym_csle_cyborg.envs.cyborg_scenario_two_defender import CyborgScenarioTwoDefender - - -def info_to_vec(info, decoy_state, hosts): - """ - Creates the state vector - - :param info: the info - :param decoy_state: the decoy state - :param hosts: the host list - :return: the state vector - """ - state_vec = [] - for host in hosts: - known = info[host][3] - known = int(known) - scanned = info[host][4] - scanned = int(scanned) - access = info[host][5] - if access == "None": - access = 0 - elif access == "User": - access = 1 - else: - access = 2 - d_state = len(decoy_state[host]) - state_vec.append([known, scanned, access, d_state]) - return state_vec - - -def state_vec_to_id(state_vec): - """ - Converts a state vector to an id - - :param state_vec: the state vector to convert - :return: the id - """ - bin_id = "" - for host_vec in state_vec: - host_bin_str = "" - for i, elem in enumerate(host_vec): - if i == 0: - host_bin_str += format(elem, '01b') - if i == 1: - host_bin_str += format(elem, '01b') - if i == 2: - host_bin_str += format(elem, '02b') - if i == 3: - host_bin_str += format(elem, '03b') - bin_id += host_bin_str - id = int(bin_id, 2) - return id - - -def id_to_state_vec(id: int): - """ - Converts an id to a state vector - - :param id: the id to convert - :return: the state vector - """ - bin_str = format(id, "091b") - host_bins = [bin_str[i:i + 7] for i in range(0, len(bin_str), 7)] - state_vec = [] - for host_bin in host_bins: - known = int(host_bin[0:1], 2) - scanned = int(host_bin[1:2], 2) - access = int(host_bin[2:4], 2) - decoy = int(host_bin[4:7], 2) - host_vec = [known, scanned, access, decoy] - state_vec.append(host_vec) - return state_vec - +from gym_csle_cyborg.util.cyborg_env_util import CyborgEnvUtil if __name__ == '__main__': config = CSLECyborgConfig( @@ -84,23 +13,30 @@ def id_to_state_vec(id: int): str_info = str(csle_cyborg_env.cyborg_challenge_env.env.env.env.info) states = {} state_idx = 0 - host_state_lookup = host_state_to_id(hostnames=csle_cyborg_env.cyborg_hostnames) host_ids = list(csle_cyborg_env.cyborg_hostname_to_id.values()) for i in range(100000): done = False csle_cyborg_env.reset() actions = list(csle_cyborg_env.action_id_to_type_and_host.keys()) - state_key = str(csle_cyborg_env.cyborg_challenge_env.env.env.env.info) - if state_key not in states: - states[state_key] = state_idx + state_id = str(csle_cyborg_env.cyborg_challenge_env.env.env.env.info) + if state_id not in states: + states[state_id] = state_idx state_idx += 1 while not done: a = random.choice(actions) o, r, done, _, info = csle_cyborg_env.step(a) - state_vec = info_to_vec(csle_cyborg_env.get_true_table().rows, csle_cyborg_env.decoy_state, - host_state_lookup, host_ids) - state_key = state_vec_to_id(state_vec=state_vec) - stv = id_to_state_vec(id=state_key) - assert stv == state_vec + state_vector = CyborgEnvUtil.state_to_vector(state=csle_cyborg_env.get_true_table().rows, + decoy_state=csle_cyborg_env.decoy_state, host_ids=host_ids, + scan_state=csle_cyborg_env.scan_state) + state_id = CyborgEnvUtil.state_vector_to_state_id(state_vector=state_vector) + converted_state_vector = CyborgEnvUtil.state_id_to_state_vector(state_id=state_id) + assert converted_state_vector == state_vector + obs_vector = CyborgEnvUtil.state_to_vector(state=csle_cyborg_env.get_table().rows, + decoy_state=csle_cyborg_env.decoy_state, + host_ids=host_ids, scan_state=csle_cyborg_env.scan_state, + observation=True) + obs_id = CyborgEnvUtil.state_vector_to_state_id(state_vector=obs_vector, observation=True) + converted_obs_vector = CyborgEnvUtil.state_id_to_state_vector(state_id=obs_id, observation=True) + assert converted_obs_vector == obs_vector diff --git a/examples/training/pomcp/cyborg_scenario_two_defender/README.md b/examples/training/pomcp/cyborg_scenario_two_defender/README.md new file mode 100644 index 000000000..86eeededd --- /dev/null +++ b/examples/training/pomcp/cyborg_scenario_two_defender/README.md @@ -0,0 +1,20 @@ +# POMCP for defender plannign in cyborg + +## Commands + +To run a script, execute: +```bash +python +``` + +## Author & Maintainer + +Kim Hammar + +## Copyright and license + +[LICENSE](../../../../LICENSE.md) + +Creative Commons + +(C) 2020-2024, Kim Hammar \ No newline at end of file diff --git a/examples/training/pomcp/cyborg_scenario_two_defender/run_vs_random_attacker_v_001.py b/examples/training/pomcp/cyborg_scenario_two_defender/run_vs_random_attacker_v_001.py new file mode 100644 index 000000000..bf24bad67 --- /dev/null +++ b/examples/training/pomcp/cyborg_scenario_two_defender/run_vs_random_attacker_v_001.py @@ -0,0 +1,89 @@ +import csle_common.constants.constants as constants +from csle_common.dao.training.experiment_config import ExperimentConfig +from csle_common.metastore.metastore_facade import MetastoreFacade +from csle_common.dao.training.agent_type import AgentType +from csle_common.dao.training.hparam import HParam +from csle_common.dao.training.player_type import PlayerType +from csle_agents.agents.pomcp.pomcp_agent import POMCPAgent +import csle_agents.constants.constants as agents_constants +from csle_agents.common.objective_type import ObjectiveType +from gym_csle_cyborg.dao.csle_cyborg_config import CSLECyborgConfig +from gym_csle_cyborg.dao.red_agent_type import RedAgentType +from gym_csle_cyborg.envs.cyborg_scenario_two_defender import CyborgScenarioTwoDefender + +if __name__ == '__main__': + emulation_name = "csle-level9-040" + emulation_env_config = MetastoreFacade.get_emulation_by_name(emulation_name) + if emulation_env_config is None: + raise ValueError(f"Could not find an emulation environment with the name: {emulation_name}") + simulation_name = "csle-cyborg-001" + simulation_env_config = MetastoreFacade.get_simulation_by_name(simulation_name) + if simulation_env_config is None: + raise ValueError(f"Could not find a simulation with name: {simulation_name}") + simulation_env_config.simulation_env_input_config = CSLECyborgConfig( + gym_env_name="csle-cyborg-scenario-two-v1", scenario=2, baseline_red_agents=[RedAgentType.B_LINE_AGENT], + maximum_steps=100, red_agent_distribution=[1.0], reduced_action_space=True, scanned_state=True, + decoy_state=True, decoy_optimization=False) + csle_cyborg_env = CyborgScenarioTwoDefender(config=simulation_env_config.simulation_env_input_config) + A = csle_cyborg_env.get_action_space() + b1 = csle_cyborg_env.initial_belief + experiment_config = ExperimentConfig( + output_dir=f"{constants.LOGGING.DEFAULT_LOG_DIR}pomcp_test", title="POMCP test", + random_seeds=[399, 98912, 999, 555], + agent_type=AgentType.POMCP, + log_every=1, + hparams={ + agents_constants.POMCP.N: HParam(value=50, name=agents_constants.POMCP.N, + descr="the number of episodes"), + agents_constants.POMCP.OBJECTIVE_TYPE: HParam( + value=ObjectiveType.MAX, name=agents_constants.POMCP.OBJECTIVE_TYPE, + descr="the type of objective (max or min)"), + agents_constants.POMCP.ROLLOUT_POLICY: HParam( + value=None, name=agents_constants.POMCP.ROLLOUT_POLICY, + descr="the policy to use for rollouts"), + agents_constants.POMCP.VALUE_FUNCTION: HParam( + value=lambda x: 0, name=agents_constants.POMCP.VALUE_FUNCTION, + descr="the value function to use for truncated rollouts"), + agents_constants.POMCP.A: HParam(value=A, name=agents_constants.POMCP.A, descr="the action space"), + agents_constants.POMCP.GAMMA: HParam(value=0.99, name=agents_constants.POMCP.GAMMA, + descr="the discount factor"), + agents_constants.POMCP.REINVIGORATION: HParam(value=False, name=agents_constants.POMCP.REINVIGORATION, + descr="whether reinvigoration should be used"), + agents_constants.POMCP.INITIAL_BELIEF: HParam(value=b1, name=agents_constants.POMCP.INITIAL_BELIEF, + descr="the initial belief"), + agents_constants.POMCP.PLANNING_TIME: HParam(value=300, name=agents_constants.POMCP.PLANNING_TIME, + descr="the planning time"), + agents_constants.POMCP.MAX_PARTICLES: HParam(value=1000, name=agents_constants.POMCP.MAX_PARTICLES, + descr="the maximum number of belief particles"), + agents_constants.POMCP.MAX_DEPTH: HParam(value=500, name=agents_constants.POMCP.MAX_DEPTH, + descr="the maximum depth for planning"), + agents_constants.POMCP.C: HParam(value=0.35, name=agents_constants.POMCP.C, + descr="the weighting factor for UCB exploration"), + agents_constants.POMCP.LOG_STEP_FREQUENCY: HParam( + value=1, name=agents_constants.POMCP.LOG_STEP_FREQUENCY, descr="frequency of logging time-steps"), + agents_constants.POMCP.DEFAULT_NODE_VALUE: HParam( + value=-2000, name=agents_constants.POMCP.DEFAULT_NODE_VALUE, descr="the default node value in " + "the search tree"), + agents_constants.POMCP.VERBOSE: HParam(value=True, name=agents_constants.POMCP.VERBOSE, + descr="verbose logging flag"), + agents_constants.COMMON.EVAL_BATCH_SIZE: HParam(value=100, name=agents_constants.COMMON.EVAL_BATCH_SIZE, + descr="number of evaluation episodes"), + agents_constants.COMMON.CONFIDENCE_INTERVAL: HParam( + value=0.95, name=agents_constants.COMMON.CONFIDENCE_INTERVAL, + descr="confidence interval"), + agents_constants.COMMON.MAX_ENV_STEPS: HParam( + value=500, name=agents_constants.COMMON.MAX_ENV_STEPS, + descr="maximum number of steps in the environment (for envs with infinite horizon generally)"), + agents_constants.COMMON.RUNNING_AVERAGE: HParam( + value=100, name=agents_constants.COMMON.RUNNING_AVERAGE, + descr="the number of samples to include when computing the running avg"), + agents_constants.COMMON.GAMMA: HParam( + value=0.99, name=agents_constants.COMMON.GAMMA, + descr="the discount factor") + }, + player_type=PlayerType.DEFENDER, player_idx=0 + ) + agent = POMCPAgent(emulation_env_config=emulation_env_config, simulation_env_config=simulation_env_config, + experiment_config=experiment_config, save_to_metastore=False) + experiment_execution = agent.train() + MetastoreFacade.save_experiment_execution(experiment_execution) diff --git a/examples/training/pomcp/stopping_pomdp_defender/README.md b/examples/training/pomcp/stopping_pomdp_defender/README.md index ddfc2ad40..787ec14ab 100644 --- a/examples/training/pomcp/stopping_pomdp_defender/README.md +++ b/examples/training/pomcp/stopping_pomdp_defender/README.md @@ -1,4 +1,4 @@ -# Random Search for POMDP +# POMCP for defenderp planning in stopping POMDP This directory contains example scripts for optimizing defender policies using random search for the POMDP from [https://ieeexplore.ieee.org/document/9779345](https://ieeexplore.ieee.org/document/9779345) diff --git a/examples/training/pomcp/stopping_pomdp_defender/run_vs_random_attacker_v_001.py b/examples/training/pomcp/stopping_pomdp_defender/run_vs_random_attacker_v_001.py index 2e4e59340..bec0283bb 100644 --- a/examples/training/pomcp/stopping_pomdp_defender/run_vs_random_attacker_v_001.py +++ b/examples/training/pomcp/stopping_pomdp_defender/run_vs_random_attacker_v_001.py @@ -53,6 +53,9 @@ A = simulation_env_config.simulation_env_input_config.stopping_game_config.A1 O = simulation_env_config.simulation_env_input_config.stopping_game_config.O b1 = simulation_env_config.simulation_env_input_config.stopping_game_config.b1 + initial_belief = {} + for i in range(len(b1)): + initial_belief[i] = b1[i] rollout_policy = MultiThresholdStoppingPolicy( theta=[0.75], simulation_name=simulation_name, L=stopping_game_config.L, states=simulation_env_config.state_space_config.states, player_type=PlayerType.DEFENDER, @@ -75,13 +78,14 @@ agents_constants.POMCP.VALUE_FUNCTION: HParam( value=lambda x: 0, name=agents_constants.POMCP.VALUE_FUNCTION, descr="the value function to use for truncated rollouts"), - agents_constants.POMCP.S: HParam(value=S, name=agents_constants.POMCP.S, descr="the state space"), - agents_constants.POMCP.O: HParam(value=O, name=agents_constants.POMCP.O, descr="the observation space"), agents_constants.POMCP.A: HParam(value=A, name=agents_constants.POMCP.A, descr="the action space"), agents_constants.POMCP.GAMMA: HParam(value=0.99, name=agents_constants.POMCP.GAMMA, descr="the discount factor"), - agents_constants.POMCP.INITIAL_BELIEF: HParam(value=b1, name=agents_constants.POMCP.INITIAL_BELIEF, + agents_constants.POMCP.INITIAL_BELIEF: HParam(value=initial_belief, + name=agents_constants.POMCP.INITIAL_BELIEF, descr="the initial belief"), + agents_constants.POMCP.REINVIGORATION: HParam(value=True, name=agents_constants.POMCP.REINVIGORATION, + descr="whether reinvigoration should be used"), agents_constants.POMCP.PLANNING_TIME: HParam(value=120, name=agents_constants.POMCP.PLANNING_TIME, descr="the planning time"), agents_constants.POMCP.MAX_PARTICLES: HParam(value=100, name=agents_constants.POMCP.MAX_PARTICLES, @@ -90,8 +94,13 @@ descr="the maximum depth for planning"), agents_constants.POMCP.C: HParam(value=0.35, name=agents_constants.POMCP.C, descr="the weighting factor for UCB exploration"), + agents_constants.POMCP.DEFAULT_NODE_VALUE: HParam( + value=-2000, name=agents_constants.POMCP.DEFAULT_NODE_VALUE, descr="the default node value in " + "the search tree"), agents_constants.POMCP.LOG_STEP_FREQUENCY: HParam( value=1, name=agents_constants.POMCP.LOG_STEP_FREQUENCY, descr="frequency of logging time-steps"), + agents_constants.POMCP.VERBOSE: HParam(value=False, name=agents_constants.POMCP.VERBOSE, + descr="verbose logging flag"), agents_constants.COMMON.EVAL_BATCH_SIZE: HParam(value=100, name=agents_constants.COMMON.EVAL_BATCH_SIZE, descr="number of evaluation episodes"), agents_constants.COMMON.CONFIDENCE_INTERVAL: HParam( diff --git a/examples/training/ppo/cyborg_scenario_two_defender/run_cyborg_scenario_two_defender_b_line_cardiff.py b/examples/training/ppo/cyborg_scenario_two_defender/run_cyborg_scenario_two_defender_b_line_cardiff.py index cad77ea6c..40de164ff 100644 --- a/examples/training/ppo/cyborg_scenario_two_defender/run_cyborg_scenario_two_defender_b_line_cardiff.py +++ b/examples/training/ppo/cyborg_scenario_two_defender/run_cyborg_scenario_two_defender_b_line_cardiff.py @@ -34,7 +34,7 @@ descr="number of steps in the environment for doing rollouts between policy updates"), agents_constants.COMMON.BATCH_SIZE: HParam(value=16, name=agents_constants.COMMON.BATCH_SIZE, descr="batch size for updates"), - agents_constants.COMMON.LEARNING_RATE: HParam(value=0.002, + agents_constants.COMMON.LEARNING_RATE: HParam(value=0.00005148, name=agents_constants.COMMON.LEARNING_RATE, descr="learning rate for updating the policy"), constants.NEURAL_NETWORKS.DEVICE: HParam(value="cpu", @@ -66,7 +66,7 @@ name=agents_constants.PPO.NUM_GRADIENT_STEPS, descr="number of gradient steps"), agents_constants.COMMON.NUM_TRAINING_TIMESTEPS: HParam( - value=int(2048) * 3500, name=agents_constants.COMMON.NUM_TRAINING_TIMESTEPS, + value=int(2048) * 4500, name=agents_constants.COMMON.NUM_TRAINING_TIMESTEPS, descr="number of timesteps to train"), agents_constants.COMMON.EVAL_EVERY: HParam(value=10, name=agents_constants.COMMON.EVAL_EVERY, descr="training iterations between evaluations"), diff --git a/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/action_node.py b/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/action_node.py index 02a20c7e0..65b5e5671 100644 --- a/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/action_node.py +++ b/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/action_node.py @@ -7,7 +7,7 @@ class ActionNode(Node): A node in the POMCP history tree where the last element of the history was an action """ - def __init__(self, id: int, history: List[int], action: int, parent=None, value: float = 0.0, + def __init__(self, id: int, history: List[int], action: int, parent=None, value: float = -2000, visit_count: int = 0) -> None: """ Initializes the node diff --git a/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/belief_node.py b/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/belief_node.py index 6dc7829c6..7d9324321 100644 --- a/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/belief_node.py +++ b/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/belief_node.py @@ -9,7 +9,7 @@ class BeliefNode(Node): It also holds the received observation after which the belief is updated accordingly """ - def __init__(self, id: int, history: List[int], observation: int, parent=None, value: float = 0.0, + def __init__(self, id: int, history: List[int], observation: int, parent=None, value: float = -2000, visit_count: int = 0) -> None: """ Initializing the node diff --git a/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/belief_tree.py b/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/belief_tree.py index b74a1e811..af0299e9a 100644 --- a/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/belief_tree.py +++ b/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/belief_tree.py @@ -10,18 +10,25 @@ class BeliefTree: of actions and observations. """ - def __init__(self, root_particles: List[int]) -> None: + def __init__(self, root_particles: List[int], default_node_value) -> None: """ Initializes the tree with a belief node with a set of particles :param root_particles: the particles to add to the root belief node + :param default_node_value: the default value of nodes in the tree """ self.tree_size = 0 self.nodes: Dict[int, Union[Node, None]] = {} - self.root = self.add(history=[], particle=root_particles, parent=None) + self.default_node_value = default_node_value + node: Node = self.add(history=[], particle=root_particles, parent=None, value=default_node_value) + if isinstance(node, BeliefNode): + self.root: BeliefNode = node + else: + raise ValueError("Invalid root node") - def add(self, history: List[int], parent: Union[ActionNode, BeliefNode, None], action: Union[int, None] = None, - observation: Union[int, None] = None, particle: Union[Any, None] = None): + def add(self, history: List[int], parent: Union[Node, ActionNode, BeliefNode, None], + action: Union[int, None] = None, observation: Union[int, None] = None, particle: Union[Any, None] = None, + value: float = 0) -> Node: """ Creates and adds a new belief node or action node to the belief search tree @@ -31,15 +38,16 @@ def add(self, history: List[int], parent: Union[ActionNode, BeliefNode, None], a :param observation: observation :param particle: new node's particle set :param cost: action cost of an action node - :return: + :param value: the value of the node + :return: The newly added node """ # Create the node if action is not None: - new_node: Node = ActionNode(self.tree_size, history, parent=parent, action=action) + new_node: Node = ActionNode(self.tree_size, history, parent=parent, action=action, value=value) else: if observation is None: observation = 0 - new_node = BeliefNode(self.tree_size, history, parent=parent, observation=observation) + new_node = BeliefNode(self.tree_size, history, parent=parent, observation=observation, value=value) if particle is not None and isinstance(new_node, BeliefNode): new_node.add_particle(particle) @@ -58,17 +66,19 @@ def find_or_create(self, history: List[int], parent: Union[None, BeliefNode, Act Search for the node corresponds to given history, otherwise create one using given params """ # Start the search from the root node - current_node = self.root + root_node = self.root + current_node: Union[None, Node] = root_node # Start from the root node and then traverse down to the depth of the given history to see if the node # of this history exists or not, otherwise add it history_length, root_history_length = len(history), len(self.root.history) for step in range(root_history_length, history_length): - current_node = current_node.get_child(history[step]) + if current_node is not None: + current_node = current_node.get_child(history[step]) # Node of this history does not exists so we add it if current_node is None: - return self.add(history=history, parent=parent, observation=observation) + return self.add(history=history, parent=parent, observation=observation, value=self.default_node_value) return current_node def prune(self, node, exclude=None): diff --git a/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/node.py b/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/node.py index 39f7763bc..583c573ff 100644 --- a/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/node.py +++ b/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/node.py @@ -7,7 +7,7 @@ class Node: Abstract node type, represents a node in the lookahead tree """ - def __init__(self, id: int, history: List[int], parent=None, value: float = 0, visit_count: int = 0, + def __init__(self, id: int, history: List[int], parent=None, value: float = -2000, visit_count: int = 0, observation: int = -1, action: int = -1) -> None: """ Initializes the node diff --git a/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp.py b/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp.py index ae64abf20..e2aeba42c 100644 --- a/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp.py +++ b/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp.py @@ -1,4 +1,4 @@ -from typing import List, Union, Callable, Any +from typing import List, Union, Callable, Any, Dict import time import numpy as np from csle_common.dao.simulation_config.base_env import BaseEnv @@ -8,6 +8,7 @@ from csle_agents.agents.pomcp.action_node import ActionNode from csle_agents.agents.pomcp.pomcp_util import POMCPUtil import csle_agents.constants.constants as constants +from csle_common.logging.log import Logger class POMCP: @@ -15,15 +16,16 @@ class POMCP: Class that implements the POMCP algorithm """ - def __init__(self, S: List[int], O: List[int], A: List[int], gamma: float, env: BaseEnv, c: float, - initial_belief: List[float], planning_time: float = 0.5, max_particles: int = 350, + def __init__(self, A: List[int], gamma: float, env: BaseEnv, c: float, + initial_belief: Dict[int, float], planning_time: float = 0.5, max_particles: int = 350, + reinvigoration: bool = False, reinvigorated_particles_ratio: float = 0.1, rollout_policy: Union[Policy, None] = None, - value_function: Union[Callable[[Any], float], None] = None) -> None: + value_function: Union[Callable[[Any], float], None] = None, verbose: bool = False, + default_node_value: float = 0) -> None: """ Initializes the solver :param S: the state space - :param O: the observation space :param A: the action space :param gamma: the discount factor :param env: the environment for sampling @@ -32,10 +34,11 @@ def __init__(self, S: List[int], O: List[int], A: List[int], gamma: float, env: :param planning_time: the planning time :param max_particles: the maximum number of particles (samples) for the belief state :param reinvigorated_particles_ratio: probability of new particles added when updating the belief state + :param reinvigoration: boolean flag indicating whether reinvigoration should be done :param rollout_policy: the rollout policy + :param verbose: boolean flag indicating whether logging should be verbose + :param default_node_value: the default value of nodes in the tree """ - self.S = S - self.O = O self.A = A self.env = env self.gamma = gamma @@ -45,20 +48,23 @@ def __init__(self, S: List[int], O: List[int], A: List[int], gamma: float, env: self.reinvigorated_particles_ratio = reinvigorated_particles_ratio self.rollout_policy = rollout_policy self.value_function = value_function - root_particles = POMCPUtil.generate_particles( - states=self.S, num_particles=self.max_particles, probability_vector=initial_belief) - self.tree = BeliefTree(root_particles=root_particles) - - def compute_belief(self) -> List[float]: + self.initial_belief = initial_belief + self.reinvigoration = reinvigoration + self.default_node_value = default_node_value + root_particles = POMCPUtil.generate_particles(num_particles=self.max_particles, belief=initial_belief) + self.tree = BeliefTree(root_particles=root_particles, default_node_value=self.default_node_value) + self.verbose = verbose + + def compute_belief(self) -> Dict[int, float]: """ Computes the belief state based on the particles :return: the belief state """ - belief_state = [0.0] * len(self.S) + belief_state = {} particle_distribution = POMCPUtil.convert_samples_to_distribution(self.tree.root.particles) for state, prob in particle_distribution.items(): - belief_state[list(self.S).index(state)] = round(prob, 6) + belief_state[state] = round(prob, 6) return belief_state def rollout(self, state: int, history: List[int], depth: int, max_depth: int) -> float: @@ -85,6 +91,8 @@ def rollout(self, state: int, history: List[int], depth: int, max_depth: int) -> self.env.set_state(state=state) _, r, _, _, info = self.env.step(a) s_prime = info[constants.COMMON.STATE] + if s_prime not in self.initial_belief: + self.initial_belief[s_prime] = 0.0 o = info[constants.COMMON.OBSERVATION] return float(r) + self.gamma * self.rollout(state=s_prime, history=history + [a, o], depth=depth + 1, max_depth=max_depth) @@ -122,7 +130,7 @@ def simulate(self, state: int, max_depth: int, c: float, history: List[int], dep if not current_node.children: # since the node does not have any children, we first add them to the node for action in self.A: - self.tree.add(history + [action], parent=current_node, action=action) + self.tree.add(history + [action], parent=current_node, action=action, value=self.default_node_value) # Perform the rollout and return the value return self.rollout(state, history, depth, max_depth) @@ -138,6 +146,8 @@ def simulate(self, state: int, max_depth: int, c: float, history: List[int], dep _, r, _, _, info = self.env.step(a) o = info[constants.COMMON.OBSERVATION] s_prime = info[constants.COMMON.STATE] + if s_prime not in self.initial_belief: + self.initial_belief[s_prime] = 0.0 # Recursive call, continue the simulation from the new node R = float(r) + self.gamma * self.simulate( @@ -169,6 +179,8 @@ def solve(self, max_depth: int) -> None: n += 1 state = self.tree.root.sample_state() self.simulate(state, max_depth=max_depth, history=self.tree.root.history, c=self.c) + if self.verbose: + Logger.__call__().get_logger().info(f"Simulation time left {self.planning_time - time.time() + begin}s") def get_action(self) -> int: """ @@ -179,9 +191,13 @@ def get_action(self) -> int: """ root = self.tree.root action_vals = [(action.value, action.action) for action in root.children] + if self.verbose: + for a in root.children: + Logger.__call__().get_logger().info(f"action: {a.action}, value: {a.value}, " + f"visit count: {a.visit_count}") return int(max(action_vals)[1]) - def update_tree_with_new_samples(self, action: int, observation: int) -> List[float]: + def update_tree_with_new_samples(self, action: int, observation: int) -> Dict[int, float]: """ Updates the tree after an action has been selected and a new observation been received @@ -193,7 +209,11 @@ def update_tree_with_new_samples(self, action: int, observation: int) -> List[fl # Since we executed an action we advance the tree and update the root to the the node corresponding to the # action that was selected - new_root = root.get_child(action).get_child(observation) + child = root.get_child(action) + if child is not None: + new_root = child.get_child(observation) + else: + raise ValueError("Could not find child node") # If we did not have a node in the tree corresponding to the observation that was observed, we select a random # belief node to be the new root (note that the action child node will always exist by definition of the @@ -201,24 +221,32 @@ def update_tree_with_new_samples(self, action: int, observation: int) -> List[fl if new_root is None: # Get the action node action_node = root.get_child(action) + if action_node is None: + raise ValueError("Chould not find the action node") if action_node.children: # If the action node has belief state nodes, select a random of them to be the new root new_root = POMCPUtil.rand_choice(action_node.children) else: # or create the new belief node randomly - particles = POMCPUtil.generate_particles(states=self.S, num_particles=self.max_particles, - probability_vector=None) + random_belief = {} + for s in list(self.initial_belief.keys()): + random_belief[s] = 1 / len(self.initial_belief) + particles = POMCPUtil.generate_particles(num_particles=self.max_particles, belief=random_belief) new_root = self.tree.add(history=action_node.history + [observation], parent=action_node, - observation=observation, particle=particles) + observation=observation, particle=particles, value=self.default_node_value) # Check how many new particles are left to fill - new_root.particles = [] - particle_slots = self.max_particles - len(new_root.particles) + if isinstance(new_root, BeliefNode): + particle_slots = self.max_particles - len(new_root.particles) + else: + raise ValueError("Invalid root node") if particle_slots > 0: # fill particles by Monte-Carlo using reject sampling particles = [] while len(particles) < particle_slots: + if self.verbose: + Logger.__call__().get_logger().info(f"Filling particles {len(particles)}/{particle_slots}") s = root.sample_state() self.env.set_state(state=s) _, r, _, _, info = self.env.step(action) @@ -237,11 +265,16 @@ def update_tree_with_new_samples(self, action: int, observation: int) -> List[fl # To avoid particle deprivation (i.e., that the algorithm gets stuck with the wrong belief) # we do particle reinvigoration here - if any([prob == 0.0 for prob in new_belief]): + if self.reinvigoration and len(self.initial_belief) > 0 and any([prob == 0.0 for prob in new_belief.values()]): + if self.verbose: + Logger.__call__().get_logger().info("Starting reinvigoration") # Generate same new particles randomly + random_belief = {} + for s in list(self.initial_belief.keys()): + random_belief[s] = 1 / len(self.initial_belief) mutations = POMCPUtil.generate_particles( - states=self.S, num_particles=int(self.max_particles * self.reinvigorated_particles_ratio), - probability_vector=None) + num_particles=int(self.max_particles * self.reinvigorated_particles_ratio), + belief=random_belief) # Randomly exchange some old particles for the new ones for particle in mutations: diff --git a/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp_agent.py b/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp_agent.py index 79f9f3908..5c6a58884 100644 --- a/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp_agent.py +++ b/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp_agent.py @@ -1,6 +1,7 @@ from typing import Union, List, Dict, Optional import math import time +import random import gymnasium as gym import os import numpy as np @@ -30,7 +31,7 @@ class POMCPAgent(BaseAgent): def __init__(self, simulation_env_config: SimulationEnvConfig, emulation_env_config: Union[None, EmulationEnvConfig], experiment_config: ExperimentConfig, env: Optional[BaseEnv] = None, - training_job: Optional[TrainingJobConfig] = None, save_to_metastore: bool = True): + training_job: Optional[TrainingJobConfig] = None, save_to_metastore: bool = True) -> None: """ Initializes the POMCP Agent @@ -161,10 +162,11 @@ def hparam_names(self) -> List[str]: :return: a list with the hyperparameter names """ return [agents_constants.POMCP.OBJECTIVE_TYPE, agents_constants.POMCP.ROLLOUT_POLICY, - agents_constants.POMCP.VALUE_FUNCTION, agents_constants.POMCP.N, agents_constants.POMCP.S, - agents_constants.POMCP.O, agents_constants.POMCP.A, agents_constants.POMCP.GAMMA, + agents_constants.POMCP.VALUE_FUNCTION, agents_constants.POMCP.N, agents_constants.POMCP.REINVIGORATION, + agents_constants.POMCP.A, agents_constants.POMCP.GAMMA, agents_constants.POMCP.INITIAL_BELIEF, agents_constants.POMCP.PLANNING_TIME, - agents_constants.POMCP.LOG_STEP_FREQUENCY, + agents_constants.POMCP.LOG_STEP_FREQUENCY, agents_constants.POMCP.VERBOSE, + agents_constants.POMCP.DEFAULT_NODE_VALUE, agents_constants.POMCP.MAX_PARTICLES, agents_constants.POMCP.C, agents_constants.POMCP.MAX_DEPTH, agents_constants.COMMON.EVAL_BATCH_SIZE, agents_constants.COMMON.CONFIDENCE_INTERVAL, agents_constants.COMMON.RUNNING_AVERAGE, agents_constants.COMMON.MAX_ENV_STEPS] @@ -184,11 +186,12 @@ def pomcp(self, exp_result: ExperimentResult, seed: int, rollout_policy = self.experiment_config.hparams[agents_constants.POMCP.ROLLOUT_POLICY].value value_function = self.experiment_config.hparams[agents_constants.POMCP.VALUE_FUNCTION].value log_steps_frequency = self.experiment_config.hparams[agents_constants.POMCP.LOG_STEP_FREQUENCY].value + verbose = self.experiment_config.hparams[agents_constants.POMCP.VERBOSE].value + default_node_value = self.experiment_config.hparams[agents_constants.POMCP.DEFAULT_NODE_VALUE].value max_env_steps = self.experiment_config.hparams[agents_constants.COMMON.MAX_ENV_STEPS].value N = self.experiment_config.hparams[agents_constants.POMCP.N].value - S = self.experiment_config.hparams[agents_constants.POMCP.S].value - O = self.experiment_config.hparams[agents_constants.POMCP.O].value A = self.experiment_config.hparams[agents_constants.POMCP.A].value + reinvigoration = self.experiment_config.hparams[agents_constants.POMCP.REINVIGORATION].value gamma = self.experiment_config.hparams[agents_constants.POMCP.GAMMA].value b1 = self.experiment_config.hparams[agents_constants.POMCP.INITIAL_BELIEF].value planning_time = self.experiment_config.hparams[agents_constants.POMCP.PLANNING_TIME].value @@ -208,9 +211,10 @@ def pomcp(self, exp_result: ExperimentResult, seed: int, eval_env.reset() train_env.reset() belief = b1.copy() - pomcp = POMCP(S=S, O=O, A=A, gamma=gamma, env=train_env, c=c, initial_belief=belief, + pomcp = POMCP(A=A, gamma=gamma, env=train_env, c=c, initial_belief=belief, planning_time=planning_time, max_particles=max_particles, rollout_policy=rollout_policy, - value_function=value_function) + value_function=value_function, reinvigoration=reinvigoration, verbose=verbose, + default_node_value=default_node_value) R = 0 t = 1 if t % log_steps_frequency == 0: @@ -226,8 +230,10 @@ def pomcp(self, exp_result: ExperimentResult, seed: int, R += r t += 1 if t % log_steps_frequency == 0: + b = list(map(lambda x: belief[x], random.sample(list(belief.keys()), min(10, len(belief.keys()))))) Logger.__call__().get_logger().info(f"[POMCP] t: {t}, a: {action}, r: {r}, o: {o}, " - f"s_prime: {s_prime}, b: {belief}") + f"s_prime: {s_prime}, b: {b}") + Logger.__call__().get_logger().info(f"action: {eval_env.action_id_to_type_and_host[action]}") if i % self.experiment_config.log_every == 0: # Logging diff --git a/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp_util.py b/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp_util.py index 39d4dcdae..7b767abc5 100644 --- a/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp_util.py +++ b/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp_util.py @@ -1,4 +1,4 @@ -from typing import List, Dict, Union, Any +from typing import List, Dict, Any import numpy as np from csle_agents.agents.pomcp.node import Node from collections import Counter @@ -23,7 +23,7 @@ def sample_from_distribution(probability_vector: List[float]) -> int: return int(sample) @staticmethod - def rand_choice(candidates: List[int]) -> Any: + def rand_choice(candidates: List[Any]) -> Any: """ Selects an element from a given list uniformly at random @@ -45,20 +45,16 @@ def convert_samples_to_distribution(samples) -> Dict[int, float]: return {k: v / _sum for k, v in cnt.items()} @staticmethod - def generate_particles(states: List[int], num_particles: int, probability_vector: Union[None, List[float]]): + def generate_particles(num_particles: int, belief: Dict[int, float]) -> List[int]: """ Generates a list of particles (sample states) for a given list of states with a frequency determined by a given probability vector - :param states: the - :param n: - :param probability_vector: (optional) probability vector to determine the frequency of each sample - :return: + :param probability_vector: probability vector to determine the frequency of each sample + :return: sampled particles (states) """ - # by default use uniform distribution for particles generation - if probability_vector is None: - probability_vector = [1 / len(states)] * len(states) - return [states[int(POMCPUtil.sample_from_distribution(probability_vector))] for _ in range(num_particles)] + states = list(belief.keys()) + return [states[int(POMCPUtil.sample_from_distribution(list(belief.values())))] for _ in range(num_particles)] @staticmethod def ucb(history_visit_count, action_visit_count): diff --git a/simulation-system/libs/csle-agents/src/csle_agents/constants/constants.py b/simulation-system/libs/csle-agents/src/csle_agents/constants/constants.py index f1a8f7b05..5f5c8adf8 100644 --- a/simulation-system/libs/csle-agents/src/csle_agents/constants/constants.py +++ b/simulation-system/libs/csle-agents/src/csle_agents/constants/constants.py @@ -556,8 +556,11 @@ class POMCP: O = "O" GAMMA = "gamma" INITIAL_BELIEF = "initial_belief" + REINVIGORATION = "reinvigoration" PLANNING_TIME = "planning_time" MAX_PARTICLES = "max_particles" C = "c" MAX_DEPTH = "max_depth" LOG_STEP_FREQUENCY = "log_step_frequency" + DEFAULT_NODE_VALUE = "default_node_value" + VERBOSE = "verbose" diff --git a/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/dao/acitvity_type.py b/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/dao/activity_type.py similarity index 100% rename from simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/dao/acitvity_type.py rename to simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/dao/activity_type.py diff --git a/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/envs/cyborg_scenario_two_defender.py b/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/envs/cyborg_scenario_two_defender.py index af52f856e..2ab6af7db 100644 --- a/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/envs/cyborg_scenario_two_defender.py +++ b/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/envs/cyborg_scenario_two_defender.py @@ -1,4 +1,5 @@ from typing import Tuple, Dict, List, Any, Union +from copy import deepcopy import time import numpy as np from prettytable import PrettyTable @@ -10,7 +11,7 @@ import gym_csle_cyborg.constants.constants as env_constants from gym_csle_cyborg.dao.csle_cyborg_config import CSLECyborgConfig from gym_csle_cyborg.dao.blue_agent_action_type import BlueAgentActionType -from gym_csle_cyborg.dao.acitvity_type import ActivityType +from gym_csle_cyborg.dao.activity_type import ActivityType from gym_csle_cyborg.dao.compromised_type import CompromisedType from gym_csle_cyborg.dao.red_agent_type import RedAgentType from gym_csle_cyborg.util.cyborg_env_util import CyborgEnvUtil @@ -84,7 +85,13 @@ def __init__(self, config: CSLECyborgConfig): self.traces: List[SimulationTrace] = [] self.trace = SimulationTrace(simulation_env=self.config.gym_env_name) + # Lookup dict of states + self.visited_cyborg_states: Dict[int, Any] = {} + self.visited_scanned_states: Dict[int, List[int]] = {} + self.visited_decoy_states: Dict[int, List[List[BlueAgentActionType]]] = {} + # Reset + self.initial_belief = {1: 1.0} self.reset() super().__init__() @@ -164,7 +171,7 @@ def reset(self, seed: Union[None, int] = None, soft: bool = False, options: Unio for i in range(len(self.cyborg_hostnames)): self.scan_state.append(env_constants.CYBORG.NOT_SCANNED) self.decoy_state.append([]) - info = self.populate_info(info=dict(info), obs=o) + info = self.populate_info(info=dict(info), obs=o, reset=True) if self.config.scanned_state: o = np.array(info[env_constants.CYBORG.VECTOR_OBS_PER_HOST]).flatten() if self.config.decoy_optimization: @@ -177,12 +184,13 @@ def reset(self, seed: Union[None, int] = None, soft: bool = False, options: Unio self.trace = SimulationTrace(simulation_env=self.config.gym_env_name) return np.array(o), info - def populate_info(self, info: Dict[str, Any], obs: npt.NDArray[Any]) -> Dict[str, Any]: + def populate_info(self, info: Dict[str, Any], obs: npt.NDArray[Any], reset: bool = False) -> Dict[str, Any]: """ Populates the info dict :param obs: the latest obs :param info: the dict to populate + :param reset: boolean flag indicating whether this was called from reset or not :return: the populated dict """ info[env_constants.ENV_METRICS.RETURN] = sum(self.trace.defender_rewards) @@ -209,6 +217,39 @@ def populate_info(self, info: Dict[str, Any], obs: npt.NDArray[Any]) -> Dict[str info[env_constants.CYBORG.OBS_PER_HOST].append(host_obs) host_vector_obs.append(self.scan_state[i]) info[env_constants.CYBORG.VECTOR_OBS_PER_HOST].append(host_vector_obs) + host_ids = list(self.cyborg_hostname_to_id.values()) + state_vector = CyborgEnvUtil.state_to_vector(state=self.get_true_table().rows, + decoy_state=self.decoy_state, + host_ids=host_ids, + scan_state=self.scan_state) + state_id = CyborgEnvUtil.state_vector_to_state_id(state_vector=state_vector) + if reset: + self.initial_belief = {state_id: 1} + obs_vector = CyborgEnvUtil.state_to_vector(state=self.get_table().rows, + decoy_state=self.decoy_state, + host_ids=host_ids, scan_state=self.scan_state, observation=True) + obs_id = CyborgEnvUtil.state_vector_to_state_id(state_vector=obs_vector, observation=True) + info[env_constants.ENV_METRICS.STATE] = state_id + info[env_constants.ENV_METRICS.OBSERVATION] = obs_id + if state_id not in self.visited_cyborg_states: + agent_interfaces_copy = {} + for k, v in self.cyborg_challenge_env.env.env.env.env.env.environment_controller.agent_interfaces.items(): + agent_interfaces_copy[k] = v.copy() + self.visited_cyborg_states[state_id] = \ + (deepcopy(self.cyborg_challenge_env.env.env.env.env.env.environment_controller.state), + deepcopy(self.cyborg_challenge_env.env.env.env.env.scanned_ips), + agent_interfaces_copy, + self.cyborg_challenge_env.env.env.env.env.env.environment_controller.done, + deepcopy(self.cyborg_challenge_env.env.env.env.env.env.environment_controller.reward), + deepcopy(self.cyborg_challenge_env.env.env.env.env.env.environment_controller.actions), + self.cyborg_challenge_env.env.env.env.env.env.environment_controller.step, + deepcopy(self.cyborg_challenge_env.env.env.env.env.env.environment_controller.hostname_ip_map), + deepcopy(self.cyborg_challenge_env.env.env.env.env.env.environment_controller.subnet_cidr_map), + deepcopy(self.cyborg_challenge_env.env.env.env.env.env.environment_controller.observation), + self.cyborg_challenge_env.env.env.env.env.step_counter + ) + self.visited_scanned_states[state_id] = self.scan_state.copy() + self.visited_decoy_states[state_id] = self.decoy_state.copy() return info def get_table(self) -> PrettyTable: @@ -367,7 +408,37 @@ def set_state(self, state: Any) -> None: :param state: the state :return: None """ - raise NotImplementedError("This environment does not support the set_state method") + s = int(state) + if s in self.visited_cyborg_states: + self.cyborg_challenge_env.env.env.env.env.env.environment_controller.state = \ + self.visited_cyborg_states[s][0] + self.cyborg_challenge_env.env.env.env.env.scanned_ips = self.visited_cyborg_states[s][1] + self.cyborg_challenge_env.env.env.env.env.env.environment_controller.agent_interfaces \ + = self.visited_cyborg_states[s][2] + for k, v in self.cyborg_challenge_env.env.env.env.env.env.environment_controller.agent_interfaces.items(): + v.action_space.create_action_params() + self.cyborg_challenge_env.env.env.env.env.env.environment_controller.done = self.visited_cyborg_states[s][3] + self.cyborg_challenge_env.env.env.env.env.env.environment_controller.reward = \ + self.visited_cyborg_states[s][4] + self.cyborg_challenge_env.env.env.env.env.env.environment_controller.actions = \ + self.visited_cyborg_states[s][5] + self.cyborg_challenge_env.env.env.env.env.env.environment_controller.step = \ + self.visited_cyborg_states[s][6] + self.cyborg_challenge_env.env.env.env.env.env.environment_controller.hostname_ip_map = \ + self.visited_cyborg_states[s][7] + self.cyborg_challenge_env.env.env.env.env.env.environment_controller.subnet_cidr_map = \ + self.visited_cyborg_states[s][8] + self.cyborg_challenge_env.env.env.env.env.env.environment_controller.observation = \ + self.visited_cyborg_states[s][9] + self.cyborg_challenge_env.env.env.env.env.step_counter = self.visited_cyborg_states[s][10] + # self.cyborg_challenge_env.env.env.env.env.observation_change( + # self.cyborg_challenge_env.env.env.env.env.env.environment_controller.observation) + # self.cyborg_challenge_env.env.env.env.observation_change( + # self.cyborg_challenge_env.env.env.env.env.env.environment_controller.observation) + self.decoy_state = self.visited_decoy_states[s] + self.scan_state = self.visited_scanned_states[s] + else: + raise NotImplementedError(f"Unknown state: {s}") def get_observation_from_history(self, history: List[int]) -> List[Any]: """ @@ -376,7 +447,20 @@ def get_observation_from_history(self, history: List[int]) -> List[Any]: :param history: the history to get the observation form :return: the observation """ - raise NotImplementedError("This environment does not support the get_observation_from_history method") + obs_id = history[-1] + obs = CyborgEnvUtil.state_id_to_state_vector(state_id=obs_id, observation=True) + return obs + + def get_action_space(self) -> List[int]: + """ + Gets the action space of the defender + + :return: a list of action ids + """ + if self.config.reduced_action_space: + return list(self.action_id_to_type_and_host.keys()) + else: + return list(self.cyborg_action_id_to_type_and_host.keys()) def manual_play(self) -> None: """ diff --git a/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/util/cyborg_env_util.py b/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/util/cyborg_env_util.py index edd552b3e..cd773124f 100644 --- a/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/util/cyborg_env_util.py +++ b/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/util/cyborg_env_util.py @@ -7,6 +7,7 @@ from gym_csle_cyborg.dao.blue_agent_action_type import BlueAgentActionType from gym_csle_cyborg.dao.csle_cyborg_config import CSLECyborgConfig from gym_csle_cyborg.dao.red_agent_type import RedAgentType +from gym_csle_cyborg.dao.activity_type import ActivityType class CyborgEnvUtil: @@ -303,3 +304,104 @@ def get_decoy_state_space(config: CSLECyborgConfig) -> Tuple[List[int], Dict[Any return states, lookup_table, hosts_lookup_tables else: raise ValueError(f"Scenario: {config.scenario} not recognized") + + @staticmethod + def state_to_vector(state: List[List[Any]], decoy_state: List[List[BlueAgentActionType]], host_ids: List[int], + scan_state: List[int], observation: bool = False) -> List[List[int]]: + """ + Creates the state vector + + :param state: the state of the environment + :param decoy_state: the decoy state + :param scan_state: the scan state + :param host_ids: the list of host ids + :param observation: boolean flag indicating whether it is the true state or an observation of the state + :return: the state vector + """ + state_vector = [] + for host_id in host_ids: + host_known = -1 + activity = -1 + if not observation: + host_known = int(state[host_id][3]) + host_scanned = int(state[host_id][4]) + host_access = state[host_id][5] + else: + host_scanned = scan_state[host_id] + activity = ActivityType.from_str(state[host_id][3]).value + host_access = state[host_id][4] + if host_access == "None": + host_access = 0 + elif host_access == "User": + host_access = 1 + else: + host_access = 2 + host_decoy_state = len(decoy_state[host_id]) + if not observation: + state_vector.append([host_known, host_scanned, host_access, host_decoy_state]) + else: + state_vector.append([activity, host_scanned, host_access, host_decoy_state]) + return state_vector + + @staticmethod + def state_vector_to_state_id(state_vector: List[List[int]], observation: bool = False) -> int: + """ + Converts a state vector to an id + + :param state_vector: the state vector to convert + :param observation: boolean flag indicating whether it is the true state or an observation of the state + :return: the id + """ + binary_id_str = "" + for host_vec in state_vector: + host_binary_id_str = "" + for i, elem in enumerate(host_vec): + if not observation: + if i == 0: + host_binary_id_str += format(elem, '01b') + if i == 1: + host_binary_id_str += format(elem, '01b') + else: + if i == 0: + host_binary_id_str += format(elem, '02b') + if i == 1: + host_binary_id_str += format(elem, '02b') + if i == 2: + host_binary_id_str += format(elem, '02b') + if i == 3: + host_binary_id_str += format(elem, '03b') + binary_id_str += host_binary_id_str + state_id = int(binary_id_str, 2) + return state_id + + @staticmethod + def state_id_to_state_vector(state_id: int, observation: bool = False) -> List[List[int]]: + """ + Converts a state id to a state vector + + :param state_id: the state id to convert + :param observation: boolean flag indicating whether it is the true state or an observation of the state + :return: the state vector + """ + if not observation: + binary_id_str = format(state_id, "091b") + host_binary_ids_str = [binary_id_str[i:i + 7] for i in range(0, len(binary_id_str), 7)] + else: + binary_id_str = format(state_id, "0117b") + host_binary_ids_str = [binary_id_str[i:i + 9] for i in range(0, len(binary_id_str), 9)] + state_vector = [] + for host_bin in host_binary_ids_str: + if not observation: + known = int(host_bin[0:1], 2) + scanned = int(host_bin[1:2], 2) + access = int(host_bin[2:4], 2) + decoy = int(host_bin[4:7], 2) + host_vector = [known, scanned, access, decoy] + else: + activity = int(host_bin[0:2], 2) + scanned = int(host_bin[2:4], 2) + access = int(host_bin[4:6], 2) + decoy = int(host_bin[6:9], 2) + host_vector = [activity, scanned, access, decoy] + state_vector.append(host_vector) + return state_vector