Merge pull request #214 from stratosphereips/ondra-add-optimal-trajec…

…tories Ondra add optimal trajectories
stratosphereips · Jul 3, 2024 · 4fff6ea · 4fff6ea
2 parents 770ae46 + d6c95ee
commit 4fff6ea
Show file tree

Hide file tree

Showing 11 changed files with 534 additions and 133 deletions.
diff --git a/.github/workflows/python-checks.yml b/.github/workflows/python-checks.yml
@@ -29,4 +29,4 @@ jobs:
       - name: Lint with ruff
         run: |
           # stop the build if there are Python syntax errors or undefined names
-          ruff --output-format=github --select=E9,F4,F6,F7,F8,N8 --ignore=F405 --target-version=py310 --line-length=120 .
+          ruff check --output-format=github --select=E9,F4,F6,F7,F8,N8 --ignore=F405 --target-version=py310 --line-length=120 .
diff --git a/.gitignore b/.gitignore
@@ -156,3 +156,4 @@ aim*/
 figures/*
 *trajectories*.json
 .vscode/settings.json
+trajectories/*
diff --git a/README.md b/README.md
@@ -102,7 +102,6 @@ env:
   random_seed: 42
   scenario: 'scenario1'
   max_steps: 15
-  store_replay_buffer: True
   use_dynamic_addresses: False
   use_firewall: True
   goal_reward: 100
@@ -243,6 +242,16 @@ For the data exfiltration we support 3 variants. The full scenario contains 5 cl
   <tr><td><img src="readme_images/scenario_1.png" alt="Scenario 1 - Data exfiltration" width="300"></td><td><img src="readme_images/scenario 1_small.png" alt="Scenario 1 - small" width="300"</td><td><img src="readme_images/scenario_1_tiny.png" alt="Scenario 1 - tiny" width="300"></td></tr>
 </table>
 
+## Trajectory storing and analysis
+Trajectory is a sequence of GameStates, Actions and rewards in one run of a game. It contains the complete information of the actions played by the agent, the rewards observed and their effect on the state of the environment. Trajectory visualization and analysis tools are described in [Trajectory analysis tools](./docs/Trajectory_analysis.md)
+
+Trajectories performed by the agents can be stored in a file using following configuration:
+```YAML
+env:
+  save_trajectories: True
+```
+> [!CAUTION]
+> Trajectory file can grow large very fast. It is recommended to use this feature on evaluation/testing runs only. By default this feature is not enabled.
 ## Testing the environment
 
 It is advised after every change you test if the env is running correctly by doing

diff --git a/coordinator.py b/coordinator.py
@@ -2,10 +2,12 @@
 # Server for the Aidojo project, coordinator
 # Author: sebastian garcia, sebastian.garcia@agents.fel.cvut.cz
 # Author: Ondrej Lukas, ondrej.lukas@aic.fel.cvut.cz
+import jsonlines
 import argparse
 import logging
 import json
 import asyncio
+from datetime import datetime
 from env.network_security_game import NetworkSecurityEnvironment
 from env.game_components import Action, Observation, ActionType, GameStatus, GameState
 from utils.utils import observation_as_dict, get_logging_level
@@ -279,6 +281,8 @@ def _initialize_new_player(self, agent_addr:tuple, agent_name:str, agent_role:st
         self._agent_states[agent_addr] = self._world.create_state_from_view(self._agent_starting_position[agent_addr])
         self._agent_goal_reached[agent_addr] = self._goal_reached(agent_addr) 
         self._agent_episode_ends[agent_addr] = False
+        if self._world.task_config.get_store_trajectories():
+            self._agent_trajectories[agent_addr] = self._reset_trajectory(agent_addr)
         self.logger.info(f"\tAgent {agent_name} ({agent_addr}), registred as {agent_role}")
         return Observation(self._agent_states[agent_addr], 0, False, {})
 
@@ -297,7 +301,7 @@ def _remove_player(self, agent_addr:tuple)->dict:
             agent_info["agent_info"] = self.agents.pop(agent_addr)
             self.logger.debug(f"\t{agent_info}")
         else:
-            self.logger.warning(f"\t Player {agent_addr} not present in the game!")
+            self.logger.info(f"\t Player {agent_addr} not present in the game!")
         return agent_info
 
     def _get_starting_position_per_role(self)->dict:
@@ -389,7 +393,11 @@ def _create_response_to_reset_game_action(self, agent_addr: tuple) -> dict:
         self.logger.info(
             f"Coordinator responding to RESET request from agent {agent_addr}"
         )
+        # store trajectory in file if needed
+        self._store_trajectory_to_file(agent_addr)
         new_observation = Observation(self._agent_states[agent_addr], 0, self.episode_end, {})
+        # reset trajectory
+        self._reset_trajectory(agent_addr)
         output_message_dict = {
             "to_agent": agent_addr,
             "status": str(GameStatus.OK),
@@ -402,6 +410,41 @@ def _create_response_to_reset_game_action(self, agent_addr: tuple) -> dict:
         }
         return output_message_dict
 
+    def _add_step_to_trajectory(self, agent_addr:tuple, action:Action, reward:float, next_state:GameState, end_reason:str)->None:
+        """
+        Method for adding one step to the agent trajectory.
+        """
+        if agent_addr in self._agent_trajectories:
+            self.logger.debug(f"Adding step to trajectory of {agent_addr}")
+            self._agent_trajectories[agent_addr]["trajectory"]["actions"].append(action.as_dict)
+            self._agent_trajectories[agent_addr]["trajectory"]["rewards"].append(reward)
+            self._agent_trajectories[agent_addr]["trajectory"]["states"].append(next_state.as_dict)
+            if end_reason:
+                self._agent_trajectories[agent_addr]["end_reason"] = end_reason
+
+    def _store_trajectory_to_file(self, agent_addr, location="./trajectories"):
+        self.logger.debug(f"Storing Trajectory of {agent_addr}in file")
+        if agent_addr in self._agent_trajectories:
+            agent_name, agent_role = self.agents[agent_addr] 
+            filename = os.path.join(location, f"{datetime.now():%Y-%m-%d}_{agent_name}_{agent_role}.jsonl")
+            with jsonlines.open(filename, "a") as writer:
+                writer.write(self._agent_trajectories[agent_addr])
+            self.logger.info(f"Trajectory of {agent_addr} strored in {filename}")
+
+    def _reset_trajectory(self,agent_addr)->dict:
+        agent_name, agent_role = self.agents[agent_addr]
+        self.logger.debug(f"Resetting trajectory of {agent_addr}")
+        return {
+                "trajectory":{
+                    "states":[self._agent_states[agent_addr].as_dict],
+                    "actions":[],
+                    "rewards":[],
+                },
+                "end_reason":None,
+                "agent_role":agent_role,
+                "agent_name":agent_name
+            }
+
     def _process_generic_action(self, agent_addr: tuple, action: Action) -> dict:
         """
         Method processing the Actions relevant to the environment
@@ -413,19 +456,25 @@ def _process_generic_action(self, agent_addr: tuple, action: Action) -> dict:
             self._agent_steps[agent_addr] += 1
             self.logger.info(f"{agent_addr} steps: {self._agent_steps[agent_addr]}")
 
+            current_state = self._agent_states[agent_addr]
             # Build new Observation for the agent
-            self._agent_states[agent_addr] = self._world.step(self._agent_states[agent_addr], action, agent_addr, self.world_type)
+            self._agent_states[agent_addr] = self._world.step(current_state, action, agent_addr, self.world_type)
             self._agent_goal_reached[agent_addr] = self._goal_reached(agent_addr)
 
             reward = self._world._rewards["step"]
             obs_info = {}
+            end_reason = None
             if self._agent_goal_reached[agent_addr]:
                 reward += self._world._rewards["goal"]
                 self._agent_episode_ends[agent_addr] = True
+                end_reason = "goal_reached"
                 obs_info = {'end_reason': "goal_reached"}
             elif self._agent_steps[agent_addr] >= self._steps_limit:
                 self._agent_episode_ends[agent_addr] = True
                 obs_info = {"end_reason": "max_steps"}
+                end_reason = "max_steps"
+            # record step in trajecory
+            self._add_step_to_trajectory(agent_addr, action, reward,self._agent_states[agent_addr], end_reason)
             new_observation = Observation(self._agent_states[agent_addr], reward, self.episode_end, info=obs_info)
 
             self._agent_observations[agent_addr] = new_observation
@@ -552,7 +601,7 @@ def goal_dict_satistfied(goal_dict:dict, known_dict: dict)-> bool:
         action="store",
         required=False,
         type=str,
-        default="ERROR",
+        default="WARNING",
     )
 
     args = parser.parse_args()

diff --git a/docs/Trajectory_analysis.md b/docs/Trajectory_analysis.md
@@ -0,0 +1,31 @@
+# Trajectories and Trajectory analusis
+Trajectories capture interactions of agents in AI Dojo. They can be stored in a file for future analysis using the configuration option `save_trajectories: True` in `env` section of the task configuration file. Trajectories are stored in a JSON format, one JSON object per line using [jsonlines](https://jsonlines.readthedocs.io/en/latest/). 
+
+### Example of the trajectory 
+Below we show an example of a trajectory consisting only from 1 step. Starting from state *S1*, the agent takes action*A1* and moves to state *S2* and is awarded with immediate reward `r = -1`:
+```json
+{
+    "agent_name": "ExampleAgent",
+    "agent_role": "Attacker",
+    "end_reason": "goal_reached",
+    "trajectory":
+        {
+            "states":[
+                "<DictRepresentation of State 1>",
+                "<DictRepresentation of State 2>"
+                ],
+            "actions":[
+                "<DictRepresentation of Action 1>"
+                ],
+            "rewards":[-1]
+        }
+}
+```
+`agent_name` and `agent_role` are provided by the agent upon registration in the game. `end_reason` identifies how did the episode end. Currently there are four options:
+1. `goal_reached` - the attacker succcessfully reached the goal state and won the game
+2. `detected` - the attacker was detected by the defender subsequently lost the game
+3. `max_steps` - the agent used the max allowed amount of steps and the episode was terminated
+4. `None` - the episode was interrupted before ending and the trajectory is incomplete.
+
+## Trajectory analysis
+
diff --git a/env/netsecenv_conf.yaml b/env/netsecenv_conf.yaml
@@ -79,6 +79,7 @@ env:
   store_replay_buffer: False
   use_dynamic_addresses: False
   use_firewall: True
+  save_trajectories: False
   goal_reward: 100
   detection_reward: -5
   step_reward: -1

diff --git a/env/network_security_game.py b/env/network_security_game.py
@@ -11,7 +11,6 @@
 import numpy as np
 import logging
 from faker import Faker
-import json
 from utils.utils import ConfigParser
 import subprocess
 import xml.etree.ElementTree as ElementTree
@@ -213,14 +212,14 @@ def __init__(self, task_config_file) -> None:
             logger.info("Dynamic change of the IP and network addresses enabled")
             self._faker_object = Faker()
             Faker.seed(seed)
-        # read if replay buffer should be store on disc
-        if self.task_config.get_store_replay_buffer():
-            logger.info("Storing of replay buffer enabled")
-            self._episode_replay_buffer = []
-            self._trajectories = []
-        else:
-            logger.info("Storing of replay buffer disabled")
-            self._episode_replay_buffer = None
+        # # read if replay buffer should be store on disc
+        # if self.task_config.get_store_replay_buffer():
+        #     logger.info("Storing of replay buffer enabled")
+        #     self._episode_replay_buffer = []
+        #     self._trajectories = []
+        # else:
+        #     logger.info("Storing of replay buffer disabled")
+        self._episode_replay_buffer = None
 
         # Make a copy of data placements so it is possible to reset to it when episode ends
         self._data_original = copy.deepcopy(self._data)
@@ -1012,34 +1011,34 @@ def update_goal_descriptions(self, goal_description):
             new_description = new_description.replace(str(ip), str(self._ip_mapping[ip]))
         return new_description
 
-    def store_trajectories_to_file(self, filename:str)->None:
-        if self._trajectories:
-            logger.info(f"Saving trajectories to '{filename}'")
-            with open(filename, "w") as outfile:
-                json.dump(self._trajectories, outfile)
+    # def store_trajectories_to_file(self, filename:str)->None:
+    #     if self._trajectories:
+    #         logger.info(f"Saving trajectories to '{filename}'")
+    #         with open(filename, "w") as outfile:
+    #             json.dump(self._trajectories, outfile)
 
-    def save_trajectories(self, trajectory_filename=None):
-        steps = []
-        for state,action,reward,next_state in self._episode_replay_buffer:
-            steps.append({"s": state.as_dict, "a":action.as_dict, "r":reward, "s_next":next_state.as_dict})
-        goal_state = components.GameState(
-            known_hosts=self._goal_conditions["known_hosts"],
-            known_networks=self._goal_conditions["known_networks"],
-            controlled_hosts=self._goal_conditions["controlled_hosts"],
-            known_services=self._goal_conditions["known_services"],
-            known_data=self._goal_conditions["known_data"]
-        )
-        trajectory = {
-            "goal": goal_state.as_dict,
-            "end_reason":self._end_reason,
-            "trajectory":steps
-        }
-        if not trajectory_filename:
-            trajectory_filename = "NSG_trajectories.json"
-        if trajectory["end_reason"]:
-            self._trajectories.append(trajectory)
-            logger.info("Saving trajectories")
-            self.store_trajectories_to_file(trajectory_filename)
+    # def save_trajectories(self, trajectory_filename=None):
+    #     steps = []
+    #     for state,action,reward,next_state in self._episode_replay_buffer:
+    #         steps.append({"s": state.as_dict, "a":action.as_dict, "r":reward, "s_next":next_state.as_dict})
+    #     goal_state = components.GameState(
+    #         known_hosts=self._goal_conditions["known_hosts"],
+    #         known_networks=self._goal_conditions["known_networks"],
+    #         controlled_hosts=self._goal_conditions["controlled_hosts"],
+    #         known_services=self._goal_conditions["known_services"],
+    #         known_data=self._goal_conditions["known_data"]
+    #     )
+    #     trajectory = {
+    #         "goal": goal_state.as_dict,
+    #         "end_reason":self._end_reason,
+    #         "trajectory":steps
+    #     }
+    #     if not trajectory_filename:
+    #         trajectory_filename = "NSG_trajectories.json"
+    #     if trajectory["end_reason"]:
+    #         self._trajectories.append(trajectory)
+    #         logger.info("Saving trajectories")
+    #         self.store_trajectories_to_file(trajectory_filename)
 
     def reset(self, trajectory_filename=None)->None: 
         """

diff --git a/requirements.txt b/requirements.txt
@@ -9,6 +9,7 @@ Faker==23.2.1
 importlib-metadata==7.0.1
 iniconfig==2.0.0
 Jinja2==3.1.3
+jsonlines==4.0.0
 jsonpickle==3.0.2
 MarkupSafe==2.1.5
 mypy-extensions==1.0.0

diff --git a/trajectory_paper_task_config.yaml b/trajectory_paper_task_config.yaml
@@ -0,0 +1,94 @@
+# Configuration file for the NetSecGame environment
+
+coordinator:
+  # Define the conf for all the 'attacker' agents.
+  agents:
+    attackers:
+      goal:
+        description: "Exfiltrate data to '213.47.23.195'"
+        # Put randomize_goal_every_episode in True if you put any of the goal values in 'random'
+        is_any_part_of_goal_random: False
+        known_networks: []
+        #known_networks: [192.168.1.0/24, 192.168.3.0/24]
+        known_hosts: []
+        #known_hosts: [192.168.1.1, 192.168.1.2]
+        controlled_hosts: []
+        #controlled_hosts: [213.47.23.195, 192.168.1.3]
+        # Services are defined as a target host where the service must be, and then a description in the form 'name,type,version,is_local'
+        known_services: {}
+        #known_services: {192.168.1.3: [Local system, lanman server, 10.0.19041, False], 192.168.1.4: [Other system, SMB server, 21.2.39421, False]}
+        # In data, put the target host that must have the data and which data in format user,data
+        # Example to fix the data in one host
+        known_data: {213.47.23.195: [[User1,DataFromServer1]]}
+        # Example to fix two data in one host
+        #known_data: {213.47.23.195: [[User1,DataFromServer1], [User5,DataFromServer5]]}
+        # Example to fix the data in two host
+        #known_data: {213.47.23.195: [User1,DataFromServer1], 192.168.3.1: [User3,Data3FromServer3]}
+        # Example to ask a random data in a specific server. Putting 'random' in the data, forces the env to randomly choose where the goal data is
+        # known_data: {213.47.23.195: [random]}
+
+      start_position:
+        known_networks: []
+        known_hosts: []
+        # The attacker must always at least control the CC if the goal is to exfiltrate there
+        # Example of fixing the starting point of the agent in a local host
+        controlled_hosts: [213.47.23.195, random]
+        # Example of asking a random position to start the agent
+        # controlled_hosts: [213.47.23.195, random]
+        # Services are defined as a target host where the service must be, and then a description in the form 'name,type,version,is_local'
+        known_services: {}
+        # known_services: {192.168.1.3: [Local system, lanman server, 10.0.19041, False], 192.168.1.4: [Other system, SMB server, 21.2.39421, False]}
+        # Same format as before
+        known_data: {}
+
+    defenders:
+      # types are StochasticDefender and NoDefender
+      #type: 'StochasticDefender'
+      type: 'StochasticWithThreshold'
+      # type: 'NoDefender'
+      tw_size: 5
+      thresholds:
+        scan_network: # if both conditions are true, you are never detected
+          consecutive_actions: 2 # min amount of consecutive actions you can do without detection
+          tw_ratio: 0.25 # min ratio of actions in the tw below which you are not detected
+        find_services:
+          consecutive_actions: 3
+          tw_ratio: 0.3
+        exploit_service:
+          repeated_actions_episode: 2
+          tw_ratio: 0.25
+        find_data:
+          tw_ratio: 0.5
+          repeated_actions_episode: 2
+        exfiltrate_data:
+          consecutive_actions: 2
+          tw_ratio: 0.25
+      action_detetection_prob:
+        scan_network: 0.05
+        find_services: 0.075
+        exploit_service: 0.1
+        find_data: 0.025
+        exfiltrate_data: 0.025
+env:
+  # random means to choose the seed in a random way, so it is not fixed
+  random_seed: 'random'
+  # Or you can fix the seed
+  # random_seed: 42
+  scenario: 'scenario1'
+  max_steps: 100
+  store_replay_buffer: True
+  use_dynamic_addresses: False
+  goal_reward: 100
+  detection_reward: -5
+  step_reward: -1
+  actions:
+    scan_network:
+      prob_success: 1.0
+    find_services:
+      prob_success: 1.0
+    exploit_service:
+      prob_success: 1.0
+    find_data:
+      prob_success: 1.0
+    exfiltrate_data:
+      prob_success: 1.0