Skip to content

Commit

Permalink
test
Browse files Browse the repository at this point in the history
  • Loading branch information
Limmen committed Jan 28, 2024
1 parent 8b777ba commit cbfa212
Show file tree
Hide file tree
Showing 17 changed files with 61 additions and 38 deletions.
32 changes: 16 additions & 16 deletions examples/manual_play/attacker_profiling_4.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,8 +258,8 @@ def action_deterministic_success(attacker_state, state_vector, target):
horizon = 100
episodes = 100000000
save_every = 100
id = 41
seed = 4124775
id = 10
seed = 10819202
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
Expand Down Expand Up @@ -290,9 +290,9 @@ def action_deterministic_success(attacker_state, state_vector, target):
defender_actions_history = []
red_action_types = []
for i in range(horizon):
# ad = np.random.choice(defender_actions)
ad = np.random.choice(defender_actions)
# ad = np.random.choice([27, 28, 29, 30, 31, 32 ,33, 34, 35])
ad = 31
# ad = 31
# ad = 4
# ad = 27
o, r, done, _, info = csle_cyborg_env.step(action=ad)
Expand Down Expand Up @@ -390,17 +390,17 @@ def action_deterministic_success(attacker_state, state_vector, target):
# f"exploit prob: {exploit_prob}"
# if exploit_counts[10][0] > 0:
# print(exploit_success[10][3]/exploit_counts[10][3])
print(exploit_success[1][0]/exploit_counts[1][0])
# print(exploit_success[1][0]/exploit_counts[1][0])
# print(user_counts)

# if ep % save_every == 0:
# with open(f'/home/kim/exploit_success_{id}.npy', 'wb') as f:
# np.save(f, np.array(exploit_success))
# with open(f'/home/kim/exploit_counts_{id}.npy', 'wb') as f:
# np.save(f, np.array(exploit_counts))
# with open(f'/home/kim/exploit_root_{id}.npy', 'wb') as f:
# np.save(f, np.array(exploit_root))
# with open(f'/home/kim/exploit_user_{id}.npy', 'wb') as f:
# np.save(f, np.array(exploit_user))
# with open(f'/home/kim/exploit_type_counts_{id}.npy', 'wb') as f:
# np.save(f, np.array(exploit_type_counts))
if ep % save_every == 0:
with open(f'/home/kim/exploit_success_{id}.npy', 'wb') as f:
np.save(f, np.array(exploit_success))
with open(f'/home/kim/exploit_counts_{id}.npy', 'wb') as f:
np.save(f, np.array(exploit_counts))
with open(f'/home/kim/exploit_root_{id}.npy', 'wb') as f:
np.save(f, np.array(exploit_root))
with open(f'/home/kim/exploit_user_{id}.npy', 'wb') as f:
np.save(f, np.array(exploit_user))
with open(f'/home/kim/exploit_type_counts_{id}.npy', 'wb') as f:
np.save(f, np.array(exploit_type_counts))
2 changes: 1 addition & 1 deletion examples/manual_play/cyborg_rollout_three.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
total_R = 0
for t in range(100):
# a = POMCPUtil.rand_choice(A)
a = 4
a = 10
o, r, done, _, info = csle_cyborg_env.step(action=a)
obs_vec = CyborgEnvUtil.state_id_to_state_vector(state_id=info[constants.ENV_METRICS.OBSERVATION], observation=True)
# print(obs_vec)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,12 @@ def __init__(self, player_type: PlayerType, actions: List[Action], alpha_vectors
self.states = states
self.policy_type = PolicyType.ALPHA_VECTORS

def action(self, o: List[Union[int, float]]) -> int:
def action(self, o: List[Union[int, float]], deterministic: bool = True) -> int:
"""
Selects the next action
:param o: the belief
:param deterministic: boolean flag indicating whether the action selection should be deterministic
:return: the next action and its probability
"""
b = o
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,15 +50,16 @@ def __init__(self, model: Optional[DQN], simulation_name: str, save_path: str, p
self.avg_R = avg_R
self.policy_type = PolicyType.DQN

def action(self, o: List[float]) -> NDArray[Any]:
def action(self, o: List[float], deterministic: bool = True) -> NDArray[Any]:
"""
Multi-threshold stopping policy
:param o: the current observation
:param deterministic: boolean flag indicating whether the action selection should be deterministic
:return: the selected action
"""
if self.model is None:
raise ValueError("The model i None")
raise ValueError("The model is None")
a = self.model.predict(np.array(o), deterministic=False)[0]
return a

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,12 @@ def __init__(self, policy_network, simulation_name: str, save_path: str,
f"There was an exception loading the model from path: {self.save_path}, "
f"exception: {str(e)}, {repr(e)}")

def action(self, o: NDArray[Any]) -> Any:
def action(self, o: NDArray[Any], deterministic: bool = True) -> Any:
"""
Multi-threshold stopping policy
:param o: the current observation
:param deterministic: boolean flag indicating whether the action selection should be deterministic
:return: the selected action
"""
state = torch.from_numpy(o).float()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,12 @@ def __init__(self, stopping_policy: LinearThresholdStoppingPolicy, action_policy
self.avg_R = avg_R
self.policy_type = PolicyType.LINEAR_TABULAR

def action(self, o: List[float]) -> Union[int, List[int], float, NDArray[Any]]:
def action(self, o: List[float], deterministic: bool = True) -> Union[int, List[int], float, NDArray[Any]]:
"""
Multi-threshold stopping policy
:param o: the current observation
:param deterministic: boolean flag indicating whether the action selection should be deterministic
:return: the selected action
"""
stop = self.stopping_policy.action(o=o[1:])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,12 @@ def __init__(self, theta, simulation_name: str, L: int, states: List[State], pla
self.opponent_strategy = opponent_strategy
self.policy_type = PolicyType.LINEAR_THRESHOLD

def action(self, o: List[float]) -> int:
def action(self, o: List[float], deterministic: bool = True) -> int:
"""
Multi-threshold stopping policy
:param o: the current observation
:param deterministic: boolean flag indicating whether the action selection should be deterministic
:return: the selected action
"""
if self.player_type == PlayerType.DEFENDER:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,12 @@ def __init__(self, simulation_name: str, player_type: PlayerType, states: List[S
self.avg_R = avg_R
self.policy_type = PolicyType.MIXED_LINEAR_TABULAR

def action(self, o: List[float]) -> Union[int, List[int], float, NDArray[Any]]:
def action(self, o: List[float], deterministic: bool = True) -> Union[int, List[int], float, NDArray[Any]]:
"""
Multi-threshold stopping policy
:param o: the current observation
:param deterministic: boolean flag indicating whether the action selection should be deterministic
:return: the selected action
"""
policy = np.random.choice(self.linear_tabular_policies)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,12 @@ def probability(self, o: List[float], a: int) -> int:
"""
return self.action(o=o) == a

def action(self, o: List[float]) -> int:
def action(self, o: List[float], deterministic: bool = True) -> int:
"""
Multi-threshold stopping policy
:param o: the current observation
:param deterministic: boolean flag indicating whether the action selection should be deterministic
:return: the selected action
"""
if not self.player_type == PlayerType.ATTACKER:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,13 @@ def __init__(self, simulation_name: str, player_type: PlayerType, states: List[S
self.avg_R = avg_R
self.policy_type = PolicyType.MIXED_PPO_POLICY

def action(self, o: List[float]) -> Union[int, float, npt.NDArray[Any]]:
def action(self, o: List[float], deterministic: bool = True) -> Union[int, float, npt.NDArray[Any]]:
"""
Multi-threshold stopping policy
:param o: the current observation
:param deterministic: boolean flag indicating whether the action selection should be deterministic
:return: the selected action
"""
policy: PPOPolicy = np.random.choice(self.ppo_policies)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,12 @@ def __init__(self, theta: List[float], simulation_name: str, L: int, states: Lis
self.opponent_strategy = opponent_strategy
self.policy_type = PolicyType.MULTI_THRESHOLD

def action(self, o: List[float]) -> int:
def action(self, o: List[float], deterministic: bool = True) -> int:
"""
Multi-threshold stopping policy
:param o: the current observation
:param deterministic: boolean flag indicating whether the action selection should be deterministic
:return: the selected action
"""
if not self.player_type == PlayerType.ATTACKER:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,12 @@ def __init__(self, agent_type: AgentType, player_type: PlayerType) -> None:
self.player_type = player_type

@abstractmethod
def action(self, o: Any) -> Union[int, List[int], float, NDArray[Any]]:
def action(self, o: Any, deterministic: bool) -> Union[int, List[int], float, NDArray[Any]]:
"""
Calculates the next action
:param o: the input observation
:param deterministic: boolean flag indicating whether the action selection should be deterministic
:return: the action
"""
pass
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,17 +56,18 @@ def __init__(self, model: Union[None, PPO, PPONetwork], simulation_name: str, sa
self.avg_R = avg_R
self.policy_type = PolicyType.PPO

def action(self, o: Union[List[float], List[int]]) -> Union[int, float, npt.NDArray[Any]]:
def action(self, o: Union[List[float], List[int]], deterministic: bool = True) -> Union[int, float, npt.NDArray[Any]]:
"""
Multi-threshold stopping policy
:param o: the current observation
:param deterministic: boolean flag indicating whether the action selection should be deterministic
:return: the selected action
"""
if self.model is None:
raise ValueError("The model is None")
if isinstance(self.model, PPO):
a = self.model.predict(np.array(o), deterministic=True)[0]
a = self.model.predict(np.array(o), deterministic=deterministic)[0]
try:
return int(a)
except Exception:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,12 @@ def __init__(self, player_type: PlayerType, actions: List[Action],
self.stage_policy_tensor = stage_policy_tensor
self.policy_type = PolicyType.RANDOM

def action(self, o: Union[List[Union[int, float]], int, float]) -> int:
def action(self, o: Union[List[Union[int, float]], int, float], deterministic: bool = True) -> int:
"""
Selects the next action
:param o: the input observation
:param deterministic: boolean flag indicating whether the action selection should be deterministic
:return: the next action and its probability
"""
action = random.choice(self.actions)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,12 @@ def __init__(self, player_type: PlayerType, actions: List[Action], lookup_table:
self.avg_R = avg_R
self.policy_type = PolicyType.TABULAR

def action(self, o: Union[int, float]) -> Union[int, List[int], float, NDArray[Any]]:
def action(self, o: Union[int, float], deterministic: bool = True) -> Union[int, List[int], float, NDArray[Any]]:
"""
Selects the next action
:param o: the input observation
:param deterministic: boolean flag indicating whether the action selection should be deterministic
:return: the next action and its probability
"""
return int(np.random.choice(np.arange(0, len(self.lookup_table[int(o)])), p=self.lookup_table[int(o)]))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,13 @@ def __init__(self, player_type: PlayerType, actions: List[int], policy_vector: L
self.avg_R = avg_R
self.policy_type = PolicyType.VECTOR

def action(self, o: Union[List[Union[int, float]], int, float]) -> Union[int, List[int], float, NDArray[Any]]:
def action(self, o: Union[List[Union[int, float]], int, float], deterministic: bool = True) \
-> Union[int, List[int], float, NDArray[Any]]:
"""
Selects the next action
:param o: the input observation
:param deterministic: boolean flag indicating whether the action selection should be deterministic
:return: the next action and its probability
"""
return float(np.random.choice(np.arange(0, len(self.policy_vector)), p=self.policy_vector))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,12 @@ def step(self, action: int) -> Tuple[npt.NDArray[Any], float, bool, bool, Dict[s
defender_action_host == env_constants.CYBORG.OP_SERVER0:
self.op_server_restored = True
self.red_action_targets[self.red_agent_state] = self.red_agent_target
s_prime = self.apply_defender_action_to_state(s=self.s, defender_action_type=defender_action_type,
s_prime, last_obs = self.apply_defender_action_to_state(s=self.s, defender_action_type=defender_action_type,
defender_action_host_id=defender_action_host_id,
decoy_action_types=self.decoy_action_types,
decoy_actions_per_host=self.decoy_actions_per_host)
decoy_actions_per_host=self.decoy_actions_per_host,
last_obs=self.last_obs)
self.last_obs = last_obs
next_red_action_type = CyborgModelWrapper.get_red_agent_action_type_from_state(
red_agent_state=self.red_agent_state)
is_red_action_feasible = CyborgModelWrapper.is_red_action_feasible(red_agent_state=self.red_agent_state,
Expand Down Expand Up @@ -350,7 +352,8 @@ def is_red_action_feasible(red_agent_state: int, s: List[List[int]], target_host
@staticmethod
def apply_defender_action_to_state(s: List[List[int]], defender_action_type: BlueAgentActionType,
defender_action_host_id: int, decoy_action_types: List[BlueAgentActionType],
decoy_actions_per_host: List[List[BlueAgentActionType]]) -> List[List[int]]:
decoy_actions_per_host: List[List[BlueAgentActionType]],
last_obs: List[List[int]]) -> Tuple[List[List[int]], List[List[int]]]:
"""
Applies a given defender action to the state
Expand All @@ -359,7 +362,8 @@ def apply_defender_action_to_state(s: List[List[int]], defender_action_type: Blu
:param defender_action_host_id: the id of the host that the defender targets
:param decoy_action_types: a list of decoy action types
:param decoy_actions_per_host: a list of decoy action types per host
:return: the updated state
:param last_obs: the last observation
:return: the updated state and observation
"""
if (defender_action_type in decoy_action_types
and s[defender_action_host_id][env_constants.CYBORG.HOST_STATE_DECOY_IDX] ==
Expand All @@ -371,10 +375,13 @@ def apply_defender_action_to_state(s: List[List[int]], defender_action_type: Blu
len(decoy_actions_per_host[defender_action_host_id]))
elif defender_action_type == BlueAgentActionType.RESTORE:
s[defender_action_host_id][env_constants.CYBORG.HOST_STATE_ACCESS_IDX] = CompromisedType.NO.value
last_obs[defender_action_host_id][env_constants.CYBORG.HOST_STATE_ACCESS_IDX] = CompromisedType.NO.value
elif defender_action_type == BlueAgentActionType.REMOVE:
if s[defender_action_host_id][env_constants.CYBORG.HOST_STATE_ACCESS_IDX] == CompromisedType.USER.value:
s[defender_action_host_id][env_constants.CYBORG.HOST_STATE_ACCESS_IDX] = CompromisedType.NO.value
return s
last_obs[defender_action_host_id][env_constants.CYBORG.HOST_STATE_ACCESS_IDX] = \
CompromisedType.UNKNOWN.value
return s, last_obs

@staticmethod
def sample_next_red_agent_target(red_agent_state: int, red_agent_target: int) -> int:
Expand Down

0 comments on commit cbfa212

Please sign in to comment.