From ad4e9bd539437ab1862e66b6b7d1c3f978825320 Mon Sep 17 00:00:00 2001 From: Kim Hammar Date: Sat, 20 Jan 2024 21:08:47 +0100 Subject: [PATCH] POMCP [WIP] --- examples/manual_play/cyborg_test.py | 42 --------- .../run_vs_random_attacker_v_001.py | 7 +- .../run_vs_random_attacker_v_001.py | 3 + .../src/csle_agents/agents/pomcp/pomcp.py | 37 +++++--- .../csle_agents/agents/pomcp/pomcp_agent.py | 11 ++- .../csle_agents/agents/pomcp/pomcp_util.py | 37 +++++++- .../src/csle_agents/constants/constants.py | 1 + .../envs/cyborg_scenario_two_defender.py | 87 +++++++++++++------ .../gym_csle_cyborg/util/cyborg_env_util.py | 4 + 9 files changed, 145 insertions(+), 84 deletions(-) delete mode 100644 examples/manual_play/cyborg_test.py diff --git a/examples/manual_play/cyborg_test.py b/examples/manual_play/cyborg_test.py deleted file mode 100644 index fc4707da5..000000000 --- a/examples/manual_play/cyborg_test.py +++ /dev/null @@ -1,42 +0,0 @@ -import random -from gym_csle_cyborg.dao.csle_cyborg_config import CSLECyborgConfig -from gym_csle_cyborg.dao.red_agent_type import RedAgentType -from gym_csle_cyborg.envs.cyborg_scenario_two_defender import CyborgScenarioTwoDefender -from gym_csle_cyborg.util.cyborg_env_util import CyborgEnvUtil - -if __name__ == '__main__': - config = CSLECyborgConfig( - gym_env_name="csle-cyborg-scenario-two-v1", scenario=2, baseline_red_agents=[RedAgentType.B_LINE_AGENT], - maximum_steps=100, red_agent_distribution=[1.0], reduced_action_space=True, decoy_state=True, - scanned_state=True, decoy_optimization=False) - csle_cyborg_env = CyborgScenarioTwoDefender(config=config) - str_info = str(csle_cyborg_env.cyborg_challenge_env.env.env.env.info) - states = {} - state_idx = 0 - host_ids = list(csle_cyborg_env.cyborg_hostname_to_id.values()) - - for i in range(100000): - done = False - csle_cyborg_env.reset() - actions = list(csle_cyborg_env.action_id_to_type_and_host.keys()) - state_id = str(csle_cyborg_env.cyborg_challenge_env.env.env.env.info) - if state_id not in states: - states[state_id] = state_idx - state_idx += 1 - - while not done: - a = random.choice(actions) - o, r, done, _, info = csle_cyborg_env.step(a) - state_vector = CyborgEnvUtil.state_to_vector(state=csle_cyborg_env.get_true_table().rows, - decoy_state=csle_cyborg_env.decoy_state, host_ids=host_ids, - scan_state=csle_cyborg_env.scan_state) - state_id = CyborgEnvUtil.state_vector_to_state_id(state_vector=state_vector) - converted_state_vector = CyborgEnvUtil.state_id_to_state_vector(state_id=state_id) - assert converted_state_vector == state_vector - obs_vector = CyborgEnvUtil.state_to_vector(state=csle_cyborg_env.get_table().rows, - decoy_state=csle_cyborg_env.decoy_state, - host_ids=host_ids, scan_state=csle_cyborg_env.scan_state, - observation=True) - obs_id = CyborgEnvUtil.state_vector_to_state_id(state_vector=obs_vector, observation=True) - converted_obs_vector = CyborgEnvUtil.state_id_to_state_vector(state_id=obs_id, observation=True) - assert converted_obs_vector == obs_vector diff --git a/examples/training/pomcp/cyborg_scenario_two_defender/run_vs_random_attacker_v_001.py b/examples/training/pomcp/cyborg_scenario_two_defender/run_vs_random_attacker_v_001.py index a16b114af..0923e68e1 100644 --- a/examples/training/pomcp/cyborg_scenario_two_defender/run_vs_random_attacker_v_001.py +++ b/examples/training/pomcp/cyborg_scenario_two_defender/run_vs_random_attacker_v_001.py @@ -51,7 +51,7 @@ descr="whether reinvigoration should be used"), agents_constants.POMCP.INITIAL_BELIEF: HParam(value=b1, name=agents_constants.POMCP.INITIAL_BELIEF, descr="the initial belief"), - agents_constants.POMCP.PLANNING_TIME: HParam(value=300, name=agents_constants.POMCP.PLANNING_TIME, + agents_constants.POMCP.PLANNING_TIME: HParam(value=2000, name=agents_constants.POMCP.PLANNING_TIME, descr="the planning time"), agents_constants.POMCP.MAX_PARTICLES: HParam(value=1000, name=agents_constants.POMCP.MAX_PARTICLES, descr="the maximum number of belief particles"), @@ -61,6 +61,9 @@ descr="the weighting factor for UCB exploration"), agents_constants.POMCP.LOG_STEP_FREQUENCY: HParam( value=1, name=agents_constants.POMCP.LOG_STEP_FREQUENCY, descr="frequency of logging time-steps"), + agents_constants.POMCP.MAX_NEGATIVE_SAMPLES: HParam( + value=20, name=agents_constants.POMCP.MAX_NEGATIVE_SAMPLES, + descr="maximum number of negative samples when filling belief particles"), agents_constants.POMCP.DEFAULT_NODE_VALUE: HParam( value=-2000, name=agents_constants.POMCP.DEFAULT_NODE_VALUE, descr="the default node value in " "the search tree"), @@ -72,7 +75,7 @@ value=0.95, name=agents_constants.COMMON.CONFIDENCE_INTERVAL, descr="confidence interval"), agents_constants.COMMON.MAX_ENV_STEPS: HParam( - value=500, name=agents_constants.COMMON.MAX_ENV_STEPS, + value=100, name=agents_constants.COMMON.MAX_ENV_STEPS, descr="maximum number of steps in the environment (for envs with infinite horizon generally)"), agents_constants.COMMON.RUNNING_AVERAGE: HParam( value=100, name=agents_constants.COMMON.RUNNING_AVERAGE, diff --git a/examples/training/pomcp/stopping_pomdp_defender/run_vs_random_attacker_v_001.py b/examples/training/pomcp/stopping_pomdp_defender/run_vs_random_attacker_v_001.py index bec0283bb..5c6feab75 100644 --- a/examples/training/pomcp/stopping_pomdp_defender/run_vs_random_attacker_v_001.py +++ b/examples/training/pomcp/stopping_pomdp_defender/run_vs_random_attacker_v_001.py @@ -94,6 +94,9 @@ descr="the maximum depth for planning"), agents_constants.POMCP.C: HParam(value=0.35, name=agents_constants.POMCP.C, descr="the weighting factor for UCB exploration"), + agents_constants.POMCP.MAX_NEGATIVE_SAMPLES: HParam( + value=200, name=agents_constants.POMCP.MAX_NEGATIVE_SAMPLES, + descr="maximum number of negative samples when filling belief particles"), agents_constants.POMCP.DEFAULT_NODE_VALUE: HParam( value=-2000, name=agents_constants.POMCP.DEFAULT_NODE_VALUE, descr="the default node value in " "the search tree"), diff --git a/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp.py b/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp.py index e2aeba42c..993f81090 100644 --- a/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp.py +++ b/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp.py @@ -3,12 +3,12 @@ import numpy as np from csle_common.dao.simulation_config.base_env import BaseEnv from csle_common.dao.training.policy import Policy +from csle_common.logging.log import Logger from csle_agents.agents.pomcp.belief_tree import BeliefTree from csle_agents.agents.pomcp.belief_node import BeliefNode from csle_agents.agents.pomcp.action_node import ActionNode from csle_agents.agents.pomcp.pomcp_util import POMCPUtil import csle_agents.constants.constants as constants -from csle_common.logging.log import Logger class POMCP: @@ -197,15 +197,23 @@ def get_action(self) -> int: f"visit count: {a.visit_count}") return int(max(action_vals)[1]) - def update_tree_with_new_samples(self, action: int, observation: int) -> Dict[int, float]: + def update_tree_with_new_samples(self, action_sequence: List[int], observation: int, + max_negative_samples: int = 20) -> Dict[int, float]: """ Updates the tree after an action has been selected and a new observation been received - :param action: the action that was executed + :param action_sequence: the action sequence that was executed :param observation: the observation that was received + :param max_negative_samples: the maximum number of negative samples that can be collected before + trajectory simulation is initialized :return: the updated belief state """ + observation = self.env.get_observation_id_from_vector( + observation_vector=self.env.get_observation_from_history(history=[observation])) root = self.tree.root + if len(action_sequence) == 0: + raise ValueError("Invalid action sequencee") + action = action_sequence[0] # Since we executed an action we advance the tree and update the root to the the node corresponding to the # action that was selected @@ -241,19 +249,28 @@ def update_tree_with_new_samples(self, action: int, observation: int) -> Dict[in particle_slots = self.max_particles - len(new_root.particles) else: raise ValueError("Invalid root node") + negative_samples_count = 0 if particle_slots > 0: # fill particles by Monte-Carlo using reject sampling particles = [] while len(particles) < particle_slots: if self.verbose: Logger.__call__().get_logger().info(f"Filling particles {len(particles)}/{particle_slots}") - s = root.sample_state() - self.env.set_state(state=s) - _, r, _, _, info = self.env.step(action) - s_prime = info[constants.COMMON.STATE] - o = info[constants.COMMON.OBSERVATION] - if o == observation: - particles.append(s_prime) + if negative_samples_count >= max_negative_samples: + particles += POMCPUtil.trajectory_simulation_particles( + o=observation, env=self.env, action_sequence=action_sequence, verbose=self.verbose, + num_particles=(particle_slots - len(particles))) + else: + s = root.sample_state() + self.env.set_state(state=s) + _, r, _, _, info = self.env.step(action) + s_prime = info[constants.COMMON.STATE] + o = info[constants.COMMON.OBSERVATION] + if o == observation: + particles.append(s_prime) + negative_samples_count = 0 + else: + negative_samples_count += 1 new_root.particles += particles # We now prune the old root from the tree diff --git a/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp_agent.py b/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp_agent.py index 9369045e3..460f616aa 100644 --- a/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp_agent.py +++ b/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp_agent.py @@ -166,7 +166,7 @@ def hparam_names(self) -> List[str]: agents_constants.POMCP.A, agents_constants.POMCP.GAMMA, agents_constants.POMCP.INITIAL_BELIEF, agents_constants.POMCP.PLANNING_TIME, agents_constants.POMCP.LOG_STEP_FREQUENCY, agents_constants.POMCP.VERBOSE, - agents_constants.POMCP.DEFAULT_NODE_VALUE, + agents_constants.POMCP.DEFAULT_NODE_VALUE, agents_constants.POMCP.MAX_NEGATIVE_SAMPLES, agents_constants.POMCP.MAX_PARTICLES, agents_constants.POMCP.C, agents_constants.POMCP.MAX_DEPTH, agents_constants.COMMON.EVAL_BATCH_SIZE, agents_constants.COMMON.CONFIDENCE_INTERVAL, agents_constants.COMMON.RUNNING_AVERAGE, agents_constants.COMMON.MAX_ENV_STEPS] @@ -188,6 +188,7 @@ def pomcp(self, exp_result: ExperimentResult, seed: int, log_steps_frequency = self.experiment_config.hparams[agents_constants.POMCP.LOG_STEP_FREQUENCY].value verbose = self.experiment_config.hparams[agents_constants.POMCP.VERBOSE].value default_node_value = self.experiment_config.hparams[agents_constants.POMCP.DEFAULT_NODE_VALUE].value + max_negative_samples = self.experiment_config.hparams[agents_constants.POMCP.MAX_NEGATIVE_SAMPLES].value max_env_steps = self.experiment_config.hparams[agents_constants.COMMON.MAX_ENV_STEPS].value N = self.experiment_config.hparams[agents_constants.POMCP.N].value A = self.experiment_config.hparams[agents_constants.POMCP.A].value @@ -203,9 +204,8 @@ def pomcp(self, exp_result: ExperimentResult, seed: int, # Run N episodes for i in range(N): - - # Setup environments done = False + action_sequence = [] eval_env = gym.make(self.simulation_env_config.gym_env_name, config=config) train_env: BaseEnv = gym.make(self.simulation_env_config.gym_env_name, config=config) _, info = eval_env.reset() @@ -225,9 +225,11 @@ def pomcp(self, exp_result: ExperimentResult, seed: int, pomcp.solve(max_depth=max_depth) action = pomcp.get_action() _, r, done, _, info = eval_env.step(action) + action_sequence.append(action) s_prime = info[agents_constants.COMMON.STATE] o = info[agents_constants.COMMON.OBSERVATION] - belief = pomcp.update_tree_with_new_samples(action=action, observation=o) + belief = pomcp.update_tree_with_new_samples(action_sequence=action_sequence, observation=o, + max_negative_samples=max_negative_samples) R += r t += 1 if t % log_steps_frequency == 0: @@ -235,6 +237,7 @@ def pomcp(self, exp_result: ExperimentResult, seed: int, Logger.__call__().get_logger().info(f"[POMCP] t: {t}, a: {action}, r: {r}, o: {o}, " f"s_prime: {s_prime}, b: {b}") Logger.__call__().get_logger().info(f"action: {eval_env.action_id_to_type_and_host[action]}") + s = s_prime if i % self.experiment_config.log_every == 0: # Logging diff --git a/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp_util.py b/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp_util.py index 7b767abc5..1df62cb6f 100644 --- a/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp_util.py +++ b/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp_util.py @@ -1,7 +1,10 @@ from typing import List, Dict, Any import numpy as np -from csle_agents.agents.pomcp.node import Node from collections import Counter +from csle_common.logging.log import Logger +from csle_common.dao.simulation_config.base_env import BaseEnv +from csle_agents.agents.pomcp.node import Node +import csle_agents.constants.constants as constants class POMCPUtil: @@ -84,3 +87,35 @@ def ucb_acquisition_function(action: "Node", c: float) -> float: :return: the acquisition value of the action """ return float(action.value + c * POMCPUtil.ucb(action.parent.visit_count, action.visit_count)) + + @staticmethod + def trajectory_simulation_particles(o: int, env: BaseEnv, action_sequence: List[int], num_particles: int, + verbose: bool = False) -> List[int]: + """ + Performs trajectory simulations to find possible states matching to the given observation + + :param o: the observation to match against + :param env: the black-box simulator to sue for generating trajectories + :param action_sequence: the action sequence for the trajectory + :param num_particles: the number of particles to collect + :param verbose: boolean flag indicating whether logging should be verbose or not + :return: the list of particles matching the given observation + """ + particles: List[int] = [] + while len(particles) < num_particles: + done = False + _, info = env.reset() + s = info[constants.COMMON.STATE] + t = 0 + while not done and t < len(action_sequence): + _, r, done, _, info = env.step(action=action_sequence[t]) + sampled_o = info[constants.COMMON.OBSERVATION] + if t == len(action_sequence) - 1 and sampled_o == o: + particles.append(s) + s = info[constants.COMMON.STATE] + t += 1 + if verbose: + Logger.__call__().get_logger().info(f"Filling particles {len(particles)}/{num_particles} " + f"through trajectory simulations, " + f"action sequence: {action_sequence}, observation: {o}") + return particles diff --git a/simulation-system/libs/csle-agents/src/csle_agents/constants/constants.py b/simulation-system/libs/csle-agents/src/csle_agents/constants/constants.py index 5f5c8adf8..e5d00724b 100644 --- a/simulation-system/libs/csle-agents/src/csle_agents/constants/constants.py +++ b/simulation-system/libs/csle-agents/src/csle_agents/constants/constants.py @@ -559,6 +559,7 @@ class POMCP: REINVIGORATION = "reinvigoration" PLANNING_TIME = "planning_time" MAX_PARTICLES = "max_particles" + MAX_NEGATIVE_SAMPLES = "max_negative_samples" C = "c" MAX_DEPTH = "max_depth" LOG_STEP_FREQUENCY = "log_step_frequency" diff --git a/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/envs/cyborg_scenario_two_defender.py b/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/envs/cyborg_scenario_two_defender.py index aecc1609b..5f9d04feb 100644 --- a/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/envs/cyborg_scenario_two_defender.py +++ b/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/envs/cyborg_scenario_two_defender.py @@ -31,18 +31,7 @@ def __init__(self, config: CSLECyborgConfig): self.config = config # Setup Cyborg Env - (cyborg_scenario_config_path, cyborg_challenge_env, cyborg_hostnames, cyborg_hostname_to_id, - cyborg_subnets, cyborg_subnet_to_id, cyborg_action_id_to_type_and_host, cyborg_action_type_and_host_to_id, - red_agent_type) = CyborgEnvUtil.setup_cyborg_env(config=self.config) - self.cyborg_scenario_config_path = cyborg_scenario_config_path - self.cyborg_challenge_env = cyborg_challenge_env - self.cyborg_hostnames = cyborg_hostnames - self.cyborg_hostname_to_id = cyborg_hostname_to_id - self.cyborg_subnets = cyborg_subnets - self.cyborg_subnet_to_id = cyborg_subnet_to_id - self.cyborg_action_id_to_type_and_host = cyborg_action_id_to_type_and_host - self.cyborg_action_type_and_host_to_id = cyborg_action_type_and_host_to_id - self.red_agent_type = red_agent_type + self.create_cyborg_env() # Setup defender decoy actions self.decoy_action_types = CyborgEnvUtil.get_decoy_action_types(scenario=self.config.scenario) @@ -95,6 +84,25 @@ def __init__(self, config: CSLECyborgConfig): self.reset() super().__init__() + def create_cyborg_env(self) -> None: + """ + Creates the cyborg environment + + :return: None + """ + (cyborg_scenario_config_path, cyborg_challenge_env, cyborg_hostnames, cyborg_hostname_to_id, + cyborg_subnets, cyborg_subnet_to_id, cyborg_action_id_to_type_and_host, cyborg_action_type_and_host_to_id, + red_agent_type) = CyborgEnvUtil.setup_cyborg_env(config=self.config) + self.cyborg_scenario_config_path = cyborg_scenario_config_path + self.cyborg_challenge_env = cyborg_challenge_env + self.cyborg_hostnames = cyborg_hostnames + self.cyborg_hostname_to_id = cyborg_hostname_to_id + self.cyborg_subnets = cyborg_subnets + self.cyborg_subnet_to_id = cyborg_subnet_to_id + self.cyborg_action_id_to_type_and_host = cyborg_action_id_to_type_and_host + self.cyborg_action_type_and_host_to_id = cyborg_action_type_and_host_to_id + self.red_agent_type = red_agent_type + def step(self, action: int) -> Tuple[npt.NDArray[Any], float, bool, bool, Dict[str, Any]]: """ Takes a step in the environment by executing the given action @@ -217,18 +225,10 @@ def populate_info(self, info: Dict[str, Any], obs: npt.NDArray[Any], reset: bool info[env_constants.CYBORG.OBS_PER_HOST].append(host_obs) host_vector_obs.append(self.scan_state[i]) info[env_constants.CYBORG.VECTOR_OBS_PER_HOST].append(host_vector_obs) - host_ids = list(self.cyborg_hostname_to_id.values()) - state_vector = CyborgEnvUtil.state_to_vector(state=self.get_true_table().rows, - decoy_state=self.decoy_state, - host_ids=host_ids, - scan_state=self.scan_state) - state_id = CyborgEnvUtil.state_vector_to_state_id(state_vector=state_vector) + state_id = self.get_state_id() + obs_id = self.get_observation_id() if reset: self.initial_belief = {state_id: 1} - obs_vector = CyborgEnvUtil.state_to_vector(state=self.get_table().rows, - decoy_state=self.decoy_state, - host_ids=host_ids, scan_state=self.scan_state, observation=True) - obs_id = CyborgEnvUtil.state_vector_to_state_id(state_vector=obs_vector, observation=True) info[env_constants.ENV_METRICS.STATE] = state_id info[env_constants.ENV_METRICS.OBSERVATION] = obs_id if state_id not in self.visited_cyborg_states: @@ -242,7 +242,7 @@ def populate_info(self, info: Dict[str, Any], obs: npt.NDArray[Any], reset: bool deepcopy(self.cyborg_challenge_env.env.env.env.env.env.environment_controller.done), deepcopy(self.cyborg_challenge_env.env.env.env.env.env.environment_controller.reward), deepcopy(self.cyborg_challenge_env.env.env.env.env.env.environment_controller.actions), - deepcopy(self.cyborg_challenge_env.env.env.env.env.env.environment_controller.step), + deepcopy(self.cyborg_challenge_env.env.env.env.env.env.environment_controller.steps), deepcopy(self.cyborg_challenge_env.env.env.env.env.env.environment_controller.hostname_ip_map), deepcopy(self.cyborg_challenge_env.env.env.env.env.env.environment_controller.subnet_cidr_map), deepcopy(self.cyborg_challenge_env.env.env.env.env.env.environment_controller.observation), @@ -250,7 +250,9 @@ def populate_info(self, info: Dict[str, Any], obs: npt.NDArray[Any], reset: bool deepcopy(self.cyborg_challenge_env.env.env.env.success), deepcopy(self.cyborg_challenge_env.env.env.env.baseline), deepcopy(self.cyborg_challenge_env.env.env.env.info), - deepcopy(self.cyborg_challenge_env.env.env.env.blue_info) + deepcopy(self.cyborg_challenge_env.env.env.env.blue_info), + deepcopy(self.cyborg_challenge_env.step_counter), + deepcopy(self.cyborg_challenge_env.env.env.env.env.env.environment_controller.INFO_DICT), ) self.visited_scanned_states[state_id] = deepcopy(self.scan_state) self.visited_decoy_states[state_id] = deepcopy(self.decoy_state) @@ -427,7 +429,7 @@ def set_state(self, state: Any) -> None: deepcopy(self.visited_cyborg_states[s][4]) self.cyborg_challenge_env.env.env.env.env.env.environment_controller.actions = \ deepcopy(self.visited_cyborg_states[s][5]) - self.cyborg_challenge_env.env.env.env.env.env.environment_controller.step = \ + self.cyborg_challenge_env.env.env.env.env.env.environment_controller.steps = \ deepcopy(self.visited_cyborg_states[s][6]) self.cyborg_challenge_env.env.env.env.env.env.environment_controller.hostname_ip_map = \ deepcopy(self.visited_cyborg_states[s][7]) @@ -440,6 +442,9 @@ def set_state(self, state: Any) -> None: self.cyborg_challenge_env.env.env.env.baseline = deepcopy(self.visited_cyborg_states[s][12]) self.cyborg_challenge_env.env.env.env.info = deepcopy(self.visited_cyborg_states[s][13]) self.cyborg_challenge_env.env.env.env.blue_info = deepcopy(self.visited_cyborg_states[s][14]) + self.cyborg_challenge_env.step_counter = deepcopy(self.visited_cyborg_states[s][15]) + self.cyborg_challenge_env.env.env.env.env.env.environment_controller.INFO_DICT = \ + deepcopy(self.visited_cyborg_states[s][16]) self.decoy_state = deepcopy(self.visited_decoy_states[s]) self.scan_state = deepcopy(self.visited_scanned_states[s]) self.cyborg_challenge_env.env.env.env.env.observation_change(obs) @@ -494,3 +499,35 @@ def get_state_from_id(self, state_id: int) -> List[List[int]]: :return: the observation vector """ return CyborgEnvUtil.state_id_to_state_vector(state_id=state_id, observation=False) + + def get_observation_id_from_vector(self, observation_vector: List[Any]) -> int: + """ + Converts an observation vector to an id + + :param observation_vector: the vector to convert + :return: the observation id + """ + return CyborgEnvUtil.state_vector_to_state_id(state_vector=observation_vector, observation=True) + + def get_observation_id(self) -> int: + """ + :return: the current observation id + """ + host_ids = list(self.cyborg_hostname_to_id.values()) + obs_vector = CyborgEnvUtil.state_to_vector(state=self.get_table().rows, + decoy_state=self.decoy_state, + host_ids=host_ids, scan_state=self.scan_state, observation=True) + obs_id = CyborgEnvUtil.state_vector_to_state_id(state_vector=obs_vector, observation=True) + return obs_id + + def get_state_id(self) -> int: + """ + :return: the current state id + """ + host_ids = list(self.cyborg_hostname_to_id.values()) + state_vector = CyborgEnvUtil.state_to_vector(state=self.get_true_table().rows, + decoy_state=self.decoy_state, + host_ids=host_ids, + scan_state=self.scan_state) + state_id = CyborgEnvUtil.state_vector_to_state_id(state_vector=state_vector) + return state_id diff --git a/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/util/cyborg_env_util.py b/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/util/cyborg_env_util.py index 7899d8a5f..ee75aa20f 100644 --- a/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/util/cyborg_env_util.py +++ b/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/util/cyborg_env_util.py @@ -336,6 +336,8 @@ def state_to_vector(state: List[List[Any]], decoy_state: List[List[BlueAgentActi host_access = 1 if host_access == "Privileged": host_access = 2 + if host_access == "Unknown": + host_access = 3 host_decoy_state = len(decoy_state[host_id]) if not observation: state_vector.append([host_known, host_scanned, host_access, host_decoy_state]) @@ -353,6 +355,7 @@ def state_vector_to_state_id(state_vector: List[List[int]], observation: bool = :return: the id """ binary_id_str = "" + host_bins = [] for host_vec in state_vector: host_binary_id_str = "" for i, elem in enumerate(host_vec): @@ -371,6 +374,7 @@ def state_vector_to_state_id(state_vector: List[List[int]], observation: bool = if i == 3: host_binary_id_str += format(elem, '03b') binary_id_str += host_binary_id_str + host_bins.append(host_binary_id_str) state_id = int(binary_id_str, 2) return state_id