test

Limmen · Jan 28, 2024 · cbfa212 · cbfa212
1 parent 8b777ba
commit cbfa212
Show file tree

Hide file tree

Showing 17 changed files with 61 additions and 38 deletions.
diff --git a/examples/manual_play/attacker_profiling_4.py b/examples/manual_play/attacker_profiling_4.py
@@ -258,8 +258,8 @@ def action_deterministic_success(attacker_state, state_vector, target):
     horizon = 100
     episodes = 100000000
     save_every = 100
-    id = 41
-    seed = 4124775
+    id = 10
+    seed = 10819202
     random.seed(seed)
     np.random.seed(seed)
     torch.manual_seed(seed)
@@ -290,9 +290,9 @@ def action_deterministic_success(attacker_state, state_vector, target):
         defender_actions_history = []
         red_action_types = []
         for i in range(horizon):
-            # ad = np.random.choice(defender_actions)
+            ad = np.random.choice(defender_actions)
             # ad = np.random.choice([27, 28, 29, 30, 31, 32 ,33, 34, 35])
-            ad = 31
+            # ad = 31
             # ad = 4
             # ad = 27
             o, r, done, _, info = csle_cyborg_env.step(action=ad)
@@ -390,17 +390,17 @@ def action_deterministic_success(attacker_state, state_vector, target):
         #               f"exploit prob: {exploit_prob}"
         # if exploit_counts[10][0] > 0:
         # print(exploit_success[10][3]/exploit_counts[10][3])
-        print(exploit_success[1][0]/exploit_counts[1][0])
+        # print(exploit_success[1][0]/exploit_counts[1][0])
         # print(user_counts)
 
-        # if ep % save_every == 0:
-        #     with open(f'/home/kim/exploit_success_{id}.npy', 'wb') as f:
-        #         np.save(f, np.array(exploit_success))
-        #     with open(f'/home/kim/exploit_counts_{id}.npy', 'wb') as f:
-        #         np.save(f, np.array(exploit_counts))
-        #     with open(f'/home/kim/exploit_root_{id}.npy', 'wb') as f:
-        #         np.save(f, np.array(exploit_root))
-        #     with open(f'/home/kim/exploit_user_{id}.npy', 'wb') as f:
-        #         np.save(f, np.array(exploit_user))
-        #     with open(f'/home/kim/exploit_type_counts_{id}.npy', 'wb') as f:
-        #         np.save(f, np.array(exploit_type_counts))
+        if ep % save_every == 0:
+            with open(f'/home/kim/exploit_success_{id}.npy', 'wb') as f:
+                np.save(f, np.array(exploit_success))
+            with open(f'/home/kim/exploit_counts_{id}.npy', 'wb') as f:
+                np.save(f, np.array(exploit_counts))
+            with open(f'/home/kim/exploit_root_{id}.npy', 'wb') as f:
+                np.save(f, np.array(exploit_root))
+            with open(f'/home/kim/exploit_user_{id}.npy', 'wb') as f:
+                np.save(f, np.array(exploit_user))
+            with open(f'/home/kim/exploit_type_counts_{id}.npy', 'wb') as f:
+                np.save(f, np.array(exploit_type_counts))
diff --git a/examples/manual_play/cyborg_rollout_three.py b/examples/manual_play/cyborg_rollout_three.py
@@ -31,7 +31,7 @@
         total_R = 0
         for t in range(100):
             # a = POMCPUtil.rand_choice(A)
-            a = 4
+            a = 10
             o, r, done, _, info = csle_cyborg_env.step(action=a)
             obs_vec = CyborgEnvUtil.state_id_to_state_vector(state_id=info[constants.ENV_METRICS.OBSERVATION], observation=True)
             # print(obs_vec)

diff --git a/simulation-system/libs/csle-common/src/csle_common/dao/training/alpha_vectors_policy.py b/simulation-system/libs/csle-common/src/csle_common/dao/training/alpha_vectors_policy.py
@@ -40,11 +40,12 @@ def __init__(self, player_type: PlayerType, actions: List[Action], alpha_vectors
         self.states = states
         self.policy_type = PolicyType.ALPHA_VECTORS
 
-    def action(self, o: List[Union[int, float]]) -> int:
+    def action(self, o: List[Union[int, float]], deterministic: bool = True) -> int:
         """
         Selects the next action
 
         :param o: the belief
+        :param deterministic: boolean flag indicating whether the action selection should be deterministic
         :return: the next action and its probability
         """
         b = o

diff --git a/simulation-system/libs/csle-common/src/csle_common/dao/training/dqn_policy.py b/simulation-system/libs/csle-common/src/csle_common/dao/training/dqn_policy.py
@@ -50,15 +50,16 @@ def __init__(self, model: Optional[DQN], simulation_name: str, save_path: str, p
         self.avg_R = avg_R
         self.policy_type = PolicyType.DQN
 
-    def action(self, o: List[float]) -> NDArray[Any]:
+    def action(self, o: List[float], deterministic: bool = True) -> NDArray[Any]:
         """
         Multi-threshold stopping policy
 
         :param o: the current observation
+        :param deterministic: boolean flag indicating whether the action selection should be deterministic
         :return: the selected action
         """
         if self.model is None:
-            raise ValueError("The model i None")
+            raise ValueError("The model is None")
         a = self.model.predict(np.array(o), deterministic=False)[0]
         return a
 

diff --git a/simulation-system/libs/csle-common/src/csle_common/dao/training/fnn_with_softmax_policy.py b/simulation-system/libs/csle-common/src/csle_common/dao/training/fnn_with_softmax_policy.py
@@ -74,11 +74,12 @@ def __init__(self, policy_network, simulation_name: str, save_path: str,
                     f"There was an exception loading the model from path: {self.save_path}, "
                     f"exception: {str(e)}, {repr(e)}")
 
-    def action(self, o: NDArray[Any]) -> Any:
+    def action(self, o: NDArray[Any], deterministic: bool = True) -> Any:
         """
         Multi-threshold stopping policy
 
         :param o: the current observation
+        :param deterministic: boolean flag indicating whether the action selection should be deterministic
         :return: the selected action
         """
         state = torch.from_numpy(o).float()

diff --git a/simulation-system/libs/csle-common/src/csle_common/dao/training/linear_tabular_policy.py b/simulation-system/libs/csle-common/src/csle_common/dao/training/linear_tabular_policy.py
@@ -47,11 +47,12 @@ def __init__(self, stopping_policy: LinearThresholdStoppingPolicy, action_policy
         self.avg_R = avg_R
         self.policy_type = PolicyType.LINEAR_TABULAR
 
-    def action(self, o: List[float]) -> Union[int, List[int], float, NDArray[Any]]:
+    def action(self, o: List[float], deterministic: bool = True) -> Union[int, List[int], float, NDArray[Any]]:
         """
         Multi-threshold stopping policy
 
         :param o: the current observation
+        :param deterministic: boolean flag indicating whether the action selection should be deterministic
         :return: the selected action
         """
         stop = self.stopping_policy.action(o=o[1:])

diff --git a/...-system/libs/csle-common/src/csle_common/dao/training/linear_threshold_stopping_policy.py b/...-system/libs/csle-common/src/csle_common/dao/training/linear_threshold_stopping_policy.py
@@ -44,11 +44,12 @@ def __init__(self, theta, simulation_name: str, L: int, states: List[State], pla
         self.opponent_strategy = opponent_strategy
         self.policy_type = PolicyType.LINEAR_THRESHOLD
 
-    def action(self, o: List[float]) -> int:
+    def action(self, o: List[float], deterministic: bool = True) -> int:
         """
         Multi-threshold stopping policy
 
         :param o: the current observation
+        :param deterministic: boolean flag indicating whether the action selection should be deterministic
         :return: the selected action
         """
         if self.player_type == PlayerType.DEFENDER:

diff --git a/simulation-system/libs/csle-common/src/csle_common/dao/training/mixed_linear_tabular.py b/simulation-system/libs/csle-common/src/csle_common/dao/training/mixed_linear_tabular.py
@@ -38,11 +38,12 @@ def __init__(self, simulation_name: str, player_type: PlayerType, states: List[S
         self.avg_R = avg_R
         self.policy_type = PolicyType.MIXED_LINEAR_TABULAR
 
-    def action(self, o: List[float]) -> Union[int, List[int], float, NDArray[Any]]:
+    def action(self, o: List[float], deterministic: bool = True) -> Union[int, List[int], float, NDArray[Any]]:
         """
         Multi-threshold stopping policy
 
         :param o: the current observation
+        :param deterministic: boolean flag indicating whether the action selection should be deterministic
         :return: the selected action
         """
         policy = np.random.choice(self.linear_tabular_policies)

diff --git a/...em/libs/csle-common/src/csle_common/dao/training/mixed_multi_threshold_stopping_policy.py b/...em/libs/csle-common/src/csle_common/dao/training/mixed_multi_threshold_stopping_policy.py
@@ -60,11 +60,12 @@ def probability(self, o: List[float], a: int) -> int:
         """
         return self.action(o=o) == a
 
-    def action(self, o: List[float]) -> int:
+    def action(self, o: List[float], deterministic: bool = True) -> int:
         """
         Multi-threshold stopping policy
 
         :param o: the current observation
+        :param deterministic: boolean flag indicating whether the action selection should be deterministic
         :return: the selected action
         """
         if not self.player_type == PlayerType.ATTACKER:

diff --git a/simulation-system/libs/csle-common/src/csle_common/dao/training/mixed_ppo_policy.py b/simulation-system/libs/csle-common/src/csle_common/dao/training/mixed_ppo_policy.py
@@ -37,11 +37,13 @@ def __init__(self, simulation_name: str, player_type: PlayerType, states: List[S
         self.avg_R = avg_R
         self.policy_type = PolicyType.MIXED_PPO_POLICY
 
-    def action(self, o: List[float]) -> Union[int, float, npt.NDArray[Any]]:
+    def action(self, o: List[float], deterministic: bool = True) -> Union[int, float, npt.NDArray[Any]]:
         """
         Multi-threshold stopping policy
 
+
         :param o: the current observation
+        :param deterministic: boolean flag indicating whether the action selection should be deterministic
         :return: the selected action
         """
         policy: PPOPolicy = np.random.choice(self.ppo_policies)

diff --git a/...n-system/libs/csle-common/src/csle_common/dao/training/multi_threshold_stopping_policy.py b/...n-system/libs/csle-common/src/csle_common/dao/training/multi_threshold_stopping_policy.py
@@ -47,11 +47,12 @@ def __init__(self, theta: List[float], simulation_name: str, L: int, states: Lis
         self.opponent_strategy = opponent_strategy
         self.policy_type = PolicyType.MULTI_THRESHOLD
 
-    def action(self, o: List[float]) -> int:
+    def action(self, o: List[float], deterministic: bool = True) -> int:
         """
         Multi-threshold stopping policy
 
         :param o: the current observation
+        :param deterministic: boolean flag indicating whether the action selection should be deterministic
         :return: the selected action
         """
         if not self.player_type == PlayerType.ATTACKER:

diff --git a/simulation-system/libs/csle-common/src/csle_common/dao/training/policy.py b/simulation-system/libs/csle-common/src/csle_common/dao/training/policy.py
@@ -22,11 +22,12 @@ def __init__(self, agent_type: AgentType, player_type: PlayerType) -> None:
         self.player_type = player_type
 
     @abstractmethod
-    def action(self, o: Any) -> Union[int, List[int], float, NDArray[Any]]:
+    def action(self, o: Any, deterministic: bool) -> Union[int, List[int], float, NDArray[Any]]:
         """
         Calculates the next action
 
         :param o: the input observation
+        :param deterministic: boolean flag indicating whether the action selection should be deterministic
         :return: the action
         """
         pass

diff --git a/simulation-system/libs/csle-common/src/csle_common/dao/training/ppo_policy.py b/simulation-system/libs/csle-common/src/csle_common/dao/training/ppo_policy.py
@@ -56,17 +56,18 @@ def __init__(self, model: Union[None, PPO, PPONetwork], simulation_name: str, sa
         self.avg_R = avg_R
         self.policy_type = PolicyType.PPO
 
-    def action(self, o: Union[List[float], List[int]]) -> Union[int, float, npt.NDArray[Any]]:
+    def action(self, o: Union[List[float], List[int]], deterministic: bool = True) -> Union[int, float, npt.NDArray[Any]]:
         """
         Multi-threshold stopping policy
 
         :param o: the current observation
+        :param deterministic: boolean flag indicating whether the action selection should be deterministic
         :return: the selected action
         """
         if self.model is None:
             raise ValueError("The model is None")
         if isinstance(self.model, PPO):
-            a = self.model.predict(np.array(o), deterministic=True)[0]
+            a = self.model.predict(np.array(o), deterministic=deterministic)[0]
             try:
                 return int(a)
             except Exception:

diff --git a/simulation-system/libs/csle-common/src/csle_common/dao/training/random_policy.py b/simulation-system/libs/csle-common/src/csle_common/dao/training/random_policy.py
@@ -26,11 +26,12 @@ def __init__(self, player_type: PlayerType, actions: List[Action],
         self.stage_policy_tensor = stage_policy_tensor
         self.policy_type = PolicyType.RANDOM
 
-    def action(self, o: Union[List[Union[int, float]], int, float]) -> int:
+    def action(self, o: Union[List[Union[int, float]], int, float], deterministic: bool = True) -> int:
         """
         Selects the next action
 
         :param o: the input observation
+        :param deterministic: boolean flag indicating whether the action selection should be deterministic
         :return: the next action and its probability
         """
         action = random.choice(self.actions)

diff --git a/simulation-system/libs/csle-common/src/csle_common/dao/training/tabular_policy.py b/simulation-system/libs/csle-common/src/csle_common/dao/training/tabular_policy.py
@@ -37,11 +37,12 @@ def __init__(self, player_type: PlayerType, actions: List[Action], lookup_table:
         self.avg_R = avg_R
         self.policy_type = PolicyType.TABULAR
 
-    def action(self, o: Union[int, float]) -> Union[int, List[int], float, NDArray[Any]]:
+    def action(self, o: Union[int, float], deterministic: bool = True) -> Union[int, List[int], float, NDArray[Any]]:
         """
         Selects the next action
 
         :param o: the input observation
+        :param deterministic: boolean flag indicating whether the action selection should be deterministic
         :return: the next action and its probability
         """
         return int(np.random.choice(np.arange(0, len(self.lookup_table[int(o)])), p=self.lookup_table[int(o)]))

diff --git a/simulation-system/libs/csle-common/src/csle_common/dao/training/vector_policy.py b/simulation-system/libs/csle-common/src/csle_common/dao/training/vector_policy.py
@@ -31,11 +31,13 @@ def __init__(self, player_type: PlayerType, actions: List[int], policy_vector: L
         self.avg_R = avg_R
         self.policy_type = PolicyType.VECTOR
 
-    def action(self, o: Union[List[Union[int, float]], int, float]) -> Union[int, List[int], float, NDArray[Any]]:
+    def action(self, o: Union[List[Union[int, float]], int, float], deterministic: bool = True) \
+            -> Union[int, List[int], float, NDArray[Any]]:
         """
         Selects the next action
 
         :param o: the input observation
+        :param deterministic: boolean flag indicating whether the action selection should be deterministic
         :return: the next action and its probability
         """
         return float(np.random.choice(np.arange(0, len(self.policy_vector)), p=self.policy_vector))

diff --git a/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/envs/cyborg_model_wrapper.py b/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/envs/cyborg_model_wrapper.py
@@ -59,10 +59,12 @@ def step(self, action: int) -> Tuple[npt.NDArray[Any], float, bool, bool, Dict[s
                 defender_action_host == env_constants.CYBORG.OP_SERVER0:
             self.op_server_restored = True
         self.red_action_targets[self.red_agent_state] = self.red_agent_target
-        s_prime = self.apply_defender_action_to_state(s=self.s, defender_action_type=defender_action_type,
+        s_prime, last_obs = self.apply_defender_action_to_state(s=self.s, defender_action_type=defender_action_type,
                                                       defender_action_host_id=defender_action_host_id,
                                                       decoy_action_types=self.decoy_action_types,
-                                                      decoy_actions_per_host=self.decoy_actions_per_host)
+                                                      decoy_actions_per_host=self.decoy_actions_per_host,
+                                                      last_obs=self.last_obs)
+        self.last_obs = last_obs
         next_red_action_type = CyborgModelWrapper.get_red_agent_action_type_from_state(
             red_agent_state=self.red_agent_state)
         is_red_action_feasible = CyborgModelWrapper.is_red_action_feasible(red_agent_state=self.red_agent_state,
@@ -350,7 +352,8 @@ def is_red_action_feasible(red_agent_state: int, s: List[List[int]], target_host
     @staticmethod
     def apply_defender_action_to_state(s: List[List[int]], defender_action_type: BlueAgentActionType,
                                        defender_action_host_id: int, decoy_action_types: List[BlueAgentActionType],
-                                       decoy_actions_per_host: List[List[BlueAgentActionType]]) -> List[List[int]]:
+                                       decoy_actions_per_host: List[List[BlueAgentActionType]],
+                                       last_obs: List[List[int]]) -> Tuple[List[List[int]], List[List[int]]]:
         """
         Applies a given defender action to the state
 
@@ -359,7 +362,8 @@ def apply_defender_action_to_state(s: List[List[int]], defender_action_type: Blu
         :param defender_action_host_id: the id of the host that the defender targets
         :param decoy_action_types: a list of decoy action types
         :param decoy_actions_per_host: a list of decoy action types per host
-        :return: the updated state
+        :param last_obs: the last observation
+        :return: the updated state and observation
         """
         if (defender_action_type in decoy_action_types
                 and s[defender_action_host_id][env_constants.CYBORG.HOST_STATE_DECOY_IDX] ==
@@ -371,10 +375,13 @@ def apply_defender_action_to_state(s: List[List[int]], defender_action_type: Blu
                 len(decoy_actions_per_host[defender_action_host_id]))
         elif defender_action_type == BlueAgentActionType.RESTORE:
             s[defender_action_host_id][env_constants.CYBORG.HOST_STATE_ACCESS_IDX] = CompromisedType.NO.value
+            last_obs[defender_action_host_id][env_constants.CYBORG.HOST_STATE_ACCESS_IDX] = CompromisedType.NO.value
         elif defender_action_type == BlueAgentActionType.REMOVE:
             if s[defender_action_host_id][env_constants.CYBORG.HOST_STATE_ACCESS_IDX] == CompromisedType.USER.value:
                 s[defender_action_host_id][env_constants.CYBORG.HOST_STATE_ACCESS_IDX] = CompromisedType.NO.value
-        return s
+                last_obs[defender_action_host_id][env_constants.CYBORG.HOST_STATE_ACCESS_IDX] = \
+                    CompromisedType.UNKNOWN.value
+        return s, last_obs
 
     @staticmethod
     def sample_next_red_agent_target(red_agent_state: int, red_agent_target: int) -> int: