Skip to content

Commit

Permalink
Merge pull request #214 from stratosphereips/ondra-add-optimal-trajec…
Browse files Browse the repository at this point in the history
…tories

Ondra add optimal trajectories
  • Loading branch information
ondrej-lukas authored Jul 3, 2024
2 parents 770ae46 + d6c95ee commit 4fff6ea
Show file tree
Hide file tree
Showing 11 changed files with 534 additions and 133 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/python-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,4 @@ jobs:
- name: Lint with ruff
run: |
# stop the build if there are Python syntax errors or undefined names
ruff --output-format=github --select=E9,F4,F6,F7,F8,N8 --ignore=F405 --target-version=py310 --line-length=120 .
ruff check --output-format=github --select=E9,F4,F6,F7,F8,N8 --ignore=F405 --target-version=py310 --line-length=120 .
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -156,3 +156,4 @@ aim*/
figures/*
*trajectories*.json
.vscode/settings.json
trajectories/*
11 changes: 10 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,6 @@ env:
random_seed: 42
scenario: 'scenario1'
max_steps: 15
store_replay_buffer: True
use_dynamic_addresses: False
use_firewall: True
goal_reward: 100
Expand Down Expand Up @@ -243,6 +242,16 @@ For the data exfiltration we support 3 variants. The full scenario contains 5 cl
<tr><td><img src="readme_images/scenario_1.png" alt="Scenario 1 - Data exfiltration" width="300"></td><td><img src="readme_images/scenario 1_small.png" alt="Scenario 1 - small" width="300"</td><td><img src="readme_images/scenario_1_tiny.png" alt="Scenario 1 - tiny" width="300"></td></tr>
</table>

## Trajectory storing and analysis
Trajectory is a sequence of GameStates, Actions and rewards in one run of a game. It contains the complete information of the actions played by the agent, the rewards observed and their effect on the state of the environment. Trajectory visualization and analysis tools are described in [Trajectory analysis tools](./docs/Trajectory_analysis.md)

Trajectories performed by the agents can be stored in a file using following configuration:
```YAML
env:
save_trajectories: True
```
> [!CAUTION]
> Trajectory file can grow large very fast. It is recommended to use this feature on evaluation/testing runs only. By default this feature is not enabled.
## Testing the environment

It is advised after every change you test if the env is running correctly by doing
Expand Down
55 changes: 52 additions & 3 deletions coordinator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@
# Server for the Aidojo project, coordinator
# Author: sebastian garcia, sebastian.garcia@agents.fel.cvut.cz
# Author: Ondrej Lukas, ondrej.lukas@aic.fel.cvut.cz
import jsonlines
import argparse
import logging
import json
import asyncio
from datetime import datetime
from env.network_security_game import NetworkSecurityEnvironment
from env.game_components import Action, Observation, ActionType, GameStatus, GameState
from utils.utils import observation_as_dict, get_logging_level
Expand Down Expand Up @@ -279,6 +281,8 @@ def _initialize_new_player(self, agent_addr:tuple, agent_name:str, agent_role:st
self._agent_states[agent_addr] = self._world.create_state_from_view(self._agent_starting_position[agent_addr])
self._agent_goal_reached[agent_addr] = self._goal_reached(agent_addr)
self._agent_episode_ends[agent_addr] = False
if self._world.task_config.get_store_trajectories():
self._agent_trajectories[agent_addr] = self._reset_trajectory(agent_addr)
self.logger.info(f"\tAgent {agent_name} ({agent_addr}), registred as {agent_role}")
return Observation(self._agent_states[agent_addr], 0, False, {})

Expand All @@ -297,7 +301,7 @@ def _remove_player(self, agent_addr:tuple)->dict:
agent_info["agent_info"] = self.agents.pop(agent_addr)
self.logger.debug(f"\t{agent_info}")
else:
self.logger.warning(f"\t Player {agent_addr} not present in the game!")
self.logger.info(f"\t Player {agent_addr} not present in the game!")
return agent_info

def _get_starting_position_per_role(self)->dict:
Expand Down Expand Up @@ -389,7 +393,11 @@ def _create_response_to_reset_game_action(self, agent_addr: tuple) -> dict:
self.logger.info(
f"Coordinator responding to RESET request from agent {agent_addr}"
)
# store trajectory in file if needed
self._store_trajectory_to_file(agent_addr)
new_observation = Observation(self._agent_states[agent_addr], 0, self.episode_end, {})
# reset trajectory
self._reset_trajectory(agent_addr)
output_message_dict = {
"to_agent": agent_addr,
"status": str(GameStatus.OK),
Expand All @@ -402,6 +410,41 @@ def _create_response_to_reset_game_action(self, agent_addr: tuple) -> dict:
}
return output_message_dict

def _add_step_to_trajectory(self, agent_addr:tuple, action:Action, reward:float, next_state:GameState, end_reason:str)->None:
"""
Method for adding one step to the agent trajectory.
"""
if agent_addr in self._agent_trajectories:
self.logger.debug(f"Adding step to trajectory of {agent_addr}")
self._agent_trajectories[agent_addr]["trajectory"]["actions"].append(action.as_dict)
self._agent_trajectories[agent_addr]["trajectory"]["rewards"].append(reward)
self._agent_trajectories[agent_addr]["trajectory"]["states"].append(next_state.as_dict)
if end_reason:
self._agent_trajectories[agent_addr]["end_reason"] = end_reason

def _store_trajectory_to_file(self, agent_addr, location="./trajectories"):
self.logger.debug(f"Storing Trajectory of {agent_addr}in file")
if agent_addr in self._agent_trajectories:
agent_name, agent_role = self.agents[agent_addr]
filename = os.path.join(location, f"{datetime.now():%Y-%m-%d}_{agent_name}_{agent_role}.jsonl")
with jsonlines.open(filename, "a") as writer:
writer.write(self._agent_trajectories[agent_addr])
self.logger.info(f"Trajectory of {agent_addr} strored in {filename}")

def _reset_trajectory(self,agent_addr)->dict:
agent_name, agent_role = self.agents[agent_addr]
self.logger.debug(f"Resetting trajectory of {agent_addr}")
return {
"trajectory":{
"states":[self._agent_states[agent_addr].as_dict],
"actions":[],
"rewards":[],
},
"end_reason":None,
"agent_role":agent_role,
"agent_name":agent_name
}

def _process_generic_action(self, agent_addr: tuple, action: Action) -> dict:
"""
Method processing the Actions relevant to the environment
Expand All @@ -413,19 +456,25 @@ def _process_generic_action(self, agent_addr: tuple, action: Action) -> dict:
self._agent_steps[agent_addr] += 1
self.logger.info(f"{agent_addr} steps: {self._agent_steps[agent_addr]}")

current_state = self._agent_states[agent_addr]
# Build new Observation for the agent
self._agent_states[agent_addr] = self._world.step(self._agent_states[agent_addr], action, agent_addr, self.world_type)
self._agent_states[agent_addr] = self._world.step(current_state, action, agent_addr, self.world_type)
self._agent_goal_reached[agent_addr] = self._goal_reached(agent_addr)

reward = self._world._rewards["step"]
obs_info = {}
end_reason = None
if self._agent_goal_reached[agent_addr]:
reward += self._world._rewards["goal"]
self._agent_episode_ends[agent_addr] = True
end_reason = "goal_reached"
obs_info = {'end_reason': "goal_reached"}
elif self._agent_steps[agent_addr] >= self._steps_limit:
self._agent_episode_ends[agent_addr] = True
obs_info = {"end_reason": "max_steps"}
end_reason = "max_steps"
# record step in trajecory
self._add_step_to_trajectory(agent_addr, action, reward,self._agent_states[agent_addr], end_reason)
new_observation = Observation(self._agent_states[agent_addr], reward, self.episode_end, info=obs_info)

self._agent_observations[agent_addr] = new_observation
Expand Down Expand Up @@ -552,7 +601,7 @@ def goal_dict_satistfied(goal_dict:dict, known_dict: dict)-> bool:
action="store",
required=False,
type=str,
default="ERROR",
default="WARNING",
)

args = parser.parse_args()
Expand Down
31 changes: 31 additions & 0 deletions docs/Trajectory_analysis.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Trajectories and Trajectory analusis
Trajectories capture interactions of agents in AI Dojo. They can be stored in a file for future analysis using the configuration option `save_trajectories: True` in `env` section of the task configuration file. Trajectories are stored in a JSON format, one JSON object per line using [jsonlines](https://jsonlines.readthedocs.io/en/latest/).

### Example of the trajectory
Below we show an example of a trajectory consisting only from 1 step. Starting from state *S1*, the agent takes action*A1* and moves to state *S2* and is awarded with immediate reward `r = -1`:
```json
{
"agent_name": "ExampleAgent",
"agent_role": "Attacker",
"end_reason": "goal_reached",
"trajectory":
{
"states":[
"<DictRepresentation of State 1>",
"<DictRepresentation of State 2>"
],
"actions":[
"<DictRepresentation of Action 1>"
],
"rewards":[-1]
}
}
```
`agent_name` and `agent_role` are provided by the agent upon registration in the game. `end_reason` identifies how did the episode end. Currently there are four options:
1. `goal_reached` - the attacker succcessfully reached the goal state and won the game
2. `detected` - the attacker was detected by the defender subsequently lost the game
3. `max_steps` - the agent used the max allowed amount of steps and the episode was terminated
4. `None` - the episode was interrupted before ending and the trajectory is incomplete.

## Trajectory analysis

1 change: 1 addition & 0 deletions env/netsecenv_conf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ env:
store_replay_buffer: False
use_dynamic_addresses: False
use_firewall: True
save_trajectories: False
goal_reward: 100
detection_reward: -5
step_reward: -1
Expand Down
71 changes: 35 additions & 36 deletions env/network_security_game.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
import numpy as np
import logging
from faker import Faker
import json
from utils.utils import ConfigParser
import subprocess
import xml.etree.ElementTree as ElementTree
Expand Down Expand Up @@ -213,14 +212,14 @@ def __init__(self, task_config_file) -> None:
logger.info("Dynamic change of the IP and network addresses enabled")
self._faker_object = Faker()
Faker.seed(seed)
# read if replay buffer should be store on disc
if self.task_config.get_store_replay_buffer():
logger.info("Storing of replay buffer enabled")
self._episode_replay_buffer = []
self._trajectories = []
else:
logger.info("Storing of replay buffer disabled")
self._episode_replay_buffer = None
# # read if replay buffer should be store on disc
# if self.task_config.get_store_replay_buffer():
# logger.info("Storing of replay buffer enabled")
# self._episode_replay_buffer = []
# self._trajectories = []
# else:
# logger.info("Storing of replay buffer disabled")
self._episode_replay_buffer = None

# Make a copy of data placements so it is possible to reset to it when episode ends
self._data_original = copy.deepcopy(self._data)
Expand Down Expand Up @@ -1012,34 +1011,34 @@ def update_goal_descriptions(self, goal_description):
new_description = new_description.replace(str(ip), str(self._ip_mapping[ip]))
return new_description

def store_trajectories_to_file(self, filename:str)->None:
if self._trajectories:
logger.info(f"Saving trajectories to '{filename}'")
with open(filename, "w") as outfile:
json.dump(self._trajectories, outfile)
# def store_trajectories_to_file(self, filename:str)->None:
# if self._trajectories:
# logger.info(f"Saving trajectories to '{filename}'")
# with open(filename, "w") as outfile:
# json.dump(self._trajectories, outfile)

def save_trajectories(self, trajectory_filename=None):
steps = []
for state,action,reward,next_state in self._episode_replay_buffer:
steps.append({"s": state.as_dict, "a":action.as_dict, "r":reward, "s_next":next_state.as_dict})
goal_state = components.GameState(
known_hosts=self._goal_conditions["known_hosts"],
known_networks=self._goal_conditions["known_networks"],
controlled_hosts=self._goal_conditions["controlled_hosts"],
known_services=self._goal_conditions["known_services"],
known_data=self._goal_conditions["known_data"]
)
trajectory = {
"goal": goal_state.as_dict,
"end_reason":self._end_reason,
"trajectory":steps
}
if not trajectory_filename:
trajectory_filename = "NSG_trajectories.json"
if trajectory["end_reason"]:
self._trajectories.append(trajectory)
logger.info("Saving trajectories")
self.store_trajectories_to_file(trajectory_filename)
# def save_trajectories(self, trajectory_filename=None):
# steps = []
# for state,action,reward,next_state in self._episode_replay_buffer:
# steps.append({"s": state.as_dict, "a":action.as_dict, "r":reward, "s_next":next_state.as_dict})
# goal_state = components.GameState(
# known_hosts=self._goal_conditions["known_hosts"],
# known_networks=self._goal_conditions["known_networks"],
# controlled_hosts=self._goal_conditions["controlled_hosts"],
# known_services=self._goal_conditions["known_services"],
# known_data=self._goal_conditions["known_data"]
# )
# trajectory = {
# "goal": goal_state.as_dict,
# "end_reason":self._end_reason,
# "trajectory":steps
# }
# if not trajectory_filename:
# trajectory_filename = "NSG_trajectories.json"
# if trajectory["end_reason"]:
# self._trajectories.append(trajectory)
# logger.info("Saving trajectories")
# self.store_trajectories_to_file(trajectory_filename)

def reset(self, trajectory_filename=None)->None:
"""
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ Faker==23.2.1
importlib-metadata==7.0.1
iniconfig==2.0.0
Jinja2==3.1.3
jsonlines==4.0.0
jsonpickle==3.0.2
MarkupSafe==2.1.5
mypy-extensions==1.0.0
Expand Down
94 changes: 94 additions & 0 deletions trajectory_paper_task_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# Configuration file for the NetSecGame environment

coordinator:
# Define the conf for all the 'attacker' agents.
agents:
attackers:
goal:
description: "Exfiltrate data to '213.47.23.195'"
# Put randomize_goal_every_episode in True if you put any of the goal values in 'random'
is_any_part_of_goal_random: False
known_networks: []
#known_networks: [192.168.1.0/24, 192.168.3.0/24]
known_hosts: []
#known_hosts: [192.168.1.1, 192.168.1.2]
controlled_hosts: []
#controlled_hosts: [213.47.23.195, 192.168.1.3]
# Services are defined as a target host where the service must be, and then a description in the form 'name,type,version,is_local'
known_services: {}
#known_services: {192.168.1.3: [Local system, lanman server, 10.0.19041, False], 192.168.1.4: [Other system, SMB server, 21.2.39421, False]}
# In data, put the target host that must have the data and which data in format user,data
# Example to fix the data in one host
known_data: {213.47.23.195: [[User1,DataFromServer1]]}
# Example to fix two data in one host
#known_data: {213.47.23.195: [[User1,DataFromServer1], [User5,DataFromServer5]]}
# Example to fix the data in two host
#known_data: {213.47.23.195: [User1,DataFromServer1], 192.168.3.1: [User3,Data3FromServer3]}
# Example to ask a random data in a specific server. Putting 'random' in the data, forces the env to randomly choose where the goal data is
# known_data: {213.47.23.195: [random]}

start_position:
known_networks: []
known_hosts: []
# The attacker must always at least control the CC if the goal is to exfiltrate there
# Example of fixing the starting point of the agent in a local host
controlled_hosts: [213.47.23.195, random]
# Example of asking a random position to start the agent
# controlled_hosts: [213.47.23.195, random]
# Services are defined as a target host where the service must be, and then a description in the form 'name,type,version,is_local'
known_services: {}
# known_services: {192.168.1.3: [Local system, lanman server, 10.0.19041, False], 192.168.1.4: [Other system, SMB server, 21.2.39421, False]}
# Same format as before
known_data: {}

defenders:
# types are StochasticDefender and NoDefender
#type: 'StochasticDefender'
type: 'StochasticWithThreshold'
# type: 'NoDefender'
tw_size: 5
thresholds:
scan_network: # if both conditions are true, you are never detected
consecutive_actions: 2 # min amount of consecutive actions you can do without detection
tw_ratio: 0.25 # min ratio of actions in the tw below which you are not detected
find_services:
consecutive_actions: 3
tw_ratio: 0.3
exploit_service:
repeated_actions_episode: 2
tw_ratio: 0.25
find_data:
tw_ratio: 0.5
repeated_actions_episode: 2
exfiltrate_data:
consecutive_actions: 2
tw_ratio: 0.25
action_detetection_prob:
scan_network: 0.05
find_services: 0.075
exploit_service: 0.1
find_data: 0.025
exfiltrate_data: 0.025
env:
# random means to choose the seed in a random way, so it is not fixed
random_seed: 'random'
# Or you can fix the seed
# random_seed: 42
scenario: 'scenario1'
max_steps: 100
store_replay_buffer: True
use_dynamic_addresses: False
goal_reward: 100
detection_reward: -5
step_reward: -1
actions:
scan_network:
prob_success: 1.0
find_services:
prob_success: 1.0
exploit_service:
prob_success: 1.0
find_data:
prob_success: 1.0
exfiltrate_data:
prob_success: 1.0
Loading

0 comments on commit 4fff6ea

Please sign in to comment.