diff --git a/README.md b/README.md
index bc371c737..3bae5d58c 100644
--- a/README.md
+++ b/README.md
@@ -90,7 +90,7 @@ version of the documentation is available [here](./releases/)
 | Release                                                       | Last date of support |
 |---------------------------------------------------------------|----------------------|
 | [v.0.4.0](https://github.com/Limmen/csle/releases/tag/v0.4.0) | 2024-02-07           |
-| [v.0.3.0](https://github.com/Limmen/csle/releases/tag/v0.3.0) | 2024-01-17           |
+| [v.0.3.0](https://github.com/Limmen/csle/releases/tag/v0.3.0) | ~~2024-01-17~~       |
 | [v.0.2.0](https://github.com/Limmen/csle/releases/tag/v0.2.0) | ~~2023-10-30~~       |
 | [v.0.1.0](https://github.com/Limmen/csle/releases/tag/v0.1.0) | ~~2023-06-06~~       |
 
diff --git a/examples/manual_play/cyborg_restore_defender.py b/examples/manual_play/cyborg_restore_defender.py
index 8f2cce7e4..e5ffe9b8c 100644
--- a/examples/manual_play/cyborg_restore_defender.py
+++ b/examples/manual_play/cyborg_restore_defender.py
@@ -1,28 +1,51 @@
 from gym_csle_cyborg.dao.csle_cyborg_config import CSLECyborgConfig
 from gym_csle_cyborg.dao.red_agent_type import RedAgentType
 from gym_csle_cyborg.envs.cyborg_scenario_two_defender import CyborgScenarioTwoDefender
-from gym_csle_cyborg.dao.blue_agent_action_type import BlueAgentActionType
+import gym_csle_cyborg.constants.constants as env_constants
 
 if __name__ == '__main__':
     config = CSLECyborgConfig(
         gym_env_name="csle-cyborg-scenario-two-v1", scenario=2, baseline_red_agents=[RedAgentType.B_LINE_AGENT],
-        maximum_steps=100, red_agent_distribution=[1.0], reduced_action_space=False, decoy_state=False,
-        scanned_state=False, decoy_optimization=False)
+        maximum_steps=100, red_agent_distribution=[1.0], reduced_action_space=True, decoy_state=True,
+        scanned_state=True, decoy_optimization=False)
     csle_cyborg_env = CyborgScenarioTwoDefender(config=config)
-    a = 1
-    R = 0
-    for t in range(1000):
-        o, r, done, _, info = csle_cyborg_env.step(a)
-        if done:
-            csle_cyborg_env.reset()
-        R += r
-        print(f"time-step: {t + 1}, cumulative reward: {R}, a: {a}")
-        for i in range(len(info["obs_per_host"])):
-            if csle_cyborg_env.cyborg_hostnames[i] == "User0":
-                continue
-            a = 1
-            if info["obs_per_host"][i]["compromised"].value > 0:
-                host = csle_cyborg_env.cyborg_hostnames[i]
-                action_type = BlueAgentActionType.RESTORE
-                a = csle_cyborg_env.cyborg_action_type_and_host_to_id[(action_type, host)]
-                break
+    o, info = csle_cyborg_env.reset()
+    initial_state_id = info[env_constants.ENV_METRICS.STATE]
+    csle_cyborg_env.step(1)
+    csle_cyborg_env.set_state(state=initial_state_id)
+    # print(csle_cyborg_env.cyborg_challenge_env.env.env.env.env.env.environment_controller.observation["Red"].data["User0"])
+    csle_cyborg_env.step(1)
+
+    # print("INITIAL2 STATE")
+    # print(csle_cyborg_env.get_true_table())
+    # # csle_cyborg_env.get_true_table()
+    # o, r, done, _, info = csle_cyborg_env.step(1)
+    # print("INITIAL1 STATE")
+    # print(csle_cyborg_env.get_true_table())
+    # initial_obs_id = info[env_constants.ENV_METRICS.OBSERVATION]
+    # initial_state_id = info[env_constants.ENV_METRICS.STATE]
+    # # csle_cyborg_env.set_state(state=initial_state_id)
+    # csle_cyborg_env.step(1)
+    # print("SECOND STATE")
+    # print(csle_cyborg_env.get_true_table())
+    # csle_cyborg_env.step(1)
+    # csle_cyborg_env.step(1)
+    # csle_cyborg_env.step(1)
+    # csle_cyborg_env.step(1)
+    # csle_cyborg_env.step(1)
+    # csle_cyborg_env.step(1)
+    # print(csle_cyborg_env.get_true_table())
+    # print("SET STATE")
+    # csle_cyborg_env.set_state(state=initial_state_id)
+    # print(csle_cyborg_env.get_true_table())
+    # csle_cyborg_env.step(1)
+    # print(csle_cyborg_env.get_true_table())
+    # csle_cyborg_env.step(1)
+    # print(csle_cyborg_env.get_true_table())
+    # csle_cyborg_env.step(1)
+    # print(csle_cyborg_env.get_true_table())
+    # csle_cyborg_env.step(1)
+    # csle_cyborg_env.step(1)
+    # csle_cyborg_env.step(1)
+    # csle_cyborg_env.step(1)
+    # print(csle_cyborg_env.get_true_table())
diff --git a/examples/manual_play/cyborg_test.py b/examples/manual_play/cyborg_test.py
index eaa030b6b..fc4707da5 100644
--- a/examples/manual_play/cyborg_test.py
+++ b/examples/manual_play/cyborg_test.py
@@ -2,78 +2,7 @@
 from gym_csle_cyborg.dao.csle_cyborg_config import CSLECyborgConfig
 from gym_csle_cyborg.dao.red_agent_type import RedAgentType
 from gym_csle_cyborg.envs.cyborg_scenario_two_defender import CyborgScenarioTwoDefender
-
-
-def info_to_vec(info, decoy_state, hosts):
-    """
-    Creates the state vector
-
-    :param info: the info
-    :param decoy_state: the decoy state
-    :param hosts: the host list
-    :return: the state vector
-    """
-    state_vec = []
-    for host in hosts:
-        known = info[host][3]
-        known = int(known)
-        scanned = info[host][4]
-        scanned = int(scanned)
-        access = info[host][5]
-        if access == "None":
-            access = 0
-        elif access == "User":
-            access = 1
-        else:
-            access = 2
-        d_state = len(decoy_state[host])
-        state_vec.append([known, scanned, access, d_state])
-    return state_vec
-
-
-def state_vec_to_id(state_vec):
-    """
-    Converts a state vector to an id
-
-    :param state_vec: the state vector to convert
-    :return: the id
-    """
-    bin_id = ""
-    for host_vec in state_vec:
-        host_bin_str = ""
-        for i, elem in enumerate(host_vec):
-            if i == 0:
-                host_bin_str += format(elem, '01b')
-            if i == 1:
-                host_bin_str += format(elem, '01b')
-            if i == 2:
-                host_bin_str += format(elem, '02b')
-            if i == 3:
-                host_bin_str += format(elem, '03b')
-        bin_id += host_bin_str
-    id = int(bin_id, 2)
-    return id
-
-
-def id_to_state_vec(id: int):
-    """
-    Converts an id to a state vector
-
-    :param id: the id to convert
-    :return: the state vector
-    """
-    bin_str = format(id, "091b")
-    host_bins = [bin_str[i:i + 7] for i in range(0, len(bin_str), 7)]
-    state_vec = []
-    for host_bin in host_bins:
-        known = int(host_bin[0:1], 2)
-        scanned = int(host_bin[1:2], 2)
-        access = int(host_bin[2:4], 2)
-        decoy = int(host_bin[4:7], 2)
-        host_vec = [known, scanned, access, decoy]
-        state_vec.append(host_vec)
-    return state_vec
-
+from gym_csle_cyborg.util.cyborg_env_util import CyborgEnvUtil
 
 if __name__ == '__main__':
     config = CSLECyborgConfig(
@@ -84,23 +13,30 @@ def id_to_state_vec(id: int):
     str_info = str(csle_cyborg_env.cyborg_challenge_env.env.env.env.info)
     states = {}
     state_idx = 0
-    host_state_lookup = host_state_to_id(hostnames=csle_cyborg_env.cyborg_hostnames)
     host_ids = list(csle_cyborg_env.cyborg_hostname_to_id.values())
 
     for i in range(100000):
         done = False
         csle_cyborg_env.reset()
         actions = list(csle_cyborg_env.action_id_to_type_and_host.keys())
-        state_key = str(csle_cyborg_env.cyborg_challenge_env.env.env.env.info)
-        if state_key not in states:
-            states[state_key] = state_idx
+        state_id = str(csle_cyborg_env.cyborg_challenge_env.env.env.env.info)
+        if state_id not in states:
+            states[state_id] = state_idx
             state_idx += 1
 
         while not done:
             a = random.choice(actions)
             o, r, done, _, info = csle_cyborg_env.step(a)
-            state_vec = info_to_vec(csle_cyborg_env.get_true_table().rows, csle_cyborg_env.decoy_state,
-                                    host_state_lookup, host_ids)
-            state_key = state_vec_to_id(state_vec=state_vec)
-            stv = id_to_state_vec(id=state_key)
-            assert stv == state_vec
+            state_vector = CyborgEnvUtil.state_to_vector(state=csle_cyborg_env.get_true_table().rows,
+                                                         decoy_state=csle_cyborg_env.decoy_state, host_ids=host_ids,
+                                                         scan_state=csle_cyborg_env.scan_state)
+            state_id = CyborgEnvUtil.state_vector_to_state_id(state_vector=state_vector)
+            converted_state_vector = CyborgEnvUtil.state_id_to_state_vector(state_id=state_id)
+            assert converted_state_vector == state_vector
+            obs_vector = CyborgEnvUtil.state_to_vector(state=csle_cyborg_env.get_table().rows,
+                                                       decoy_state=csle_cyborg_env.decoy_state,
+                                                       host_ids=host_ids, scan_state=csle_cyborg_env.scan_state,
+                                                       observation=True)
+            obs_id = CyborgEnvUtil.state_vector_to_state_id(state_vector=obs_vector, observation=True)
+            converted_obs_vector = CyborgEnvUtil.state_id_to_state_vector(state_id=obs_id, observation=True)
+            assert converted_obs_vector == obs_vector
diff --git a/examples/training/pomcp/cyborg_scenario_two_defender/README.md b/examples/training/pomcp/cyborg_scenario_two_defender/README.md
new file mode 100644
index 000000000..86eeededd
--- /dev/null
+++ b/examples/training/pomcp/cyborg_scenario_two_defender/README.md
@@ -0,0 +1,20 @@
+# POMCP for defender plannign in cyborg
+
+## Commands
+
+To run a script, execute:
+```bash
+python <script_name>
+```
+
+## Author & Maintainer
+
+Kim Hammar <kimham@kth.se>
+
+## Copyright and license
+
+[LICENSE](../../../../LICENSE.md)
+
+Creative Commons
+
+(C) 2020-2024, Kim Hammar
\ No newline at end of file
diff --git a/examples/training/pomcp/cyborg_scenario_two_defender/run_vs_random_attacker_v_001.py b/examples/training/pomcp/cyborg_scenario_two_defender/run_vs_random_attacker_v_001.py
new file mode 100644
index 000000000..bf24bad67
--- /dev/null
+++ b/examples/training/pomcp/cyborg_scenario_two_defender/run_vs_random_attacker_v_001.py
@@ -0,0 +1,89 @@
+import csle_common.constants.constants as constants
+from csle_common.dao.training.experiment_config import ExperimentConfig
+from csle_common.metastore.metastore_facade import MetastoreFacade
+from csle_common.dao.training.agent_type import AgentType
+from csle_common.dao.training.hparam import HParam
+from csle_common.dao.training.player_type import PlayerType
+from csle_agents.agents.pomcp.pomcp_agent import POMCPAgent
+import csle_agents.constants.constants as agents_constants
+from csle_agents.common.objective_type import ObjectiveType
+from gym_csle_cyborg.dao.csle_cyborg_config import CSLECyborgConfig
+from gym_csle_cyborg.dao.red_agent_type import RedAgentType
+from gym_csle_cyborg.envs.cyborg_scenario_two_defender import CyborgScenarioTwoDefender
+
+if __name__ == '__main__':
+    emulation_name = "csle-level9-040"
+    emulation_env_config = MetastoreFacade.get_emulation_by_name(emulation_name)
+    if emulation_env_config is None:
+        raise ValueError(f"Could not find an emulation environment with the name: {emulation_name}")
+    simulation_name = "csle-cyborg-001"
+    simulation_env_config = MetastoreFacade.get_simulation_by_name(simulation_name)
+    if simulation_env_config is None:
+        raise ValueError(f"Could not find a simulation with name: {simulation_name}")
+    simulation_env_config.simulation_env_input_config = CSLECyborgConfig(
+        gym_env_name="csle-cyborg-scenario-two-v1", scenario=2, baseline_red_agents=[RedAgentType.B_LINE_AGENT],
+        maximum_steps=100, red_agent_distribution=[1.0], reduced_action_space=True, scanned_state=True,
+        decoy_state=True, decoy_optimization=False)
+    csle_cyborg_env = CyborgScenarioTwoDefender(config=simulation_env_config.simulation_env_input_config)
+    A = csle_cyborg_env.get_action_space()
+    b1 = csle_cyborg_env.initial_belief
+    experiment_config = ExperimentConfig(
+        output_dir=f"{constants.LOGGING.DEFAULT_LOG_DIR}pomcp_test", title="POMCP test",
+        random_seeds=[399, 98912, 999, 555],
+        agent_type=AgentType.POMCP,
+        log_every=1,
+        hparams={
+            agents_constants.POMCP.N: HParam(value=50, name=agents_constants.POMCP.N,
+                                             descr="the number of episodes"),
+            agents_constants.POMCP.OBJECTIVE_TYPE: HParam(
+                value=ObjectiveType.MAX, name=agents_constants.POMCP.OBJECTIVE_TYPE,
+                descr="the type of objective (max or min)"),
+            agents_constants.POMCP.ROLLOUT_POLICY: HParam(
+                value=None, name=agents_constants.POMCP.ROLLOUT_POLICY,
+                descr="the policy to use for rollouts"),
+            agents_constants.POMCP.VALUE_FUNCTION: HParam(
+                value=lambda x: 0, name=agents_constants.POMCP.VALUE_FUNCTION,
+                descr="the value function to use for truncated rollouts"),
+            agents_constants.POMCP.A: HParam(value=A, name=agents_constants.POMCP.A, descr="the action space"),
+            agents_constants.POMCP.GAMMA: HParam(value=0.99, name=agents_constants.POMCP.GAMMA,
+                                                 descr="the discount factor"),
+            agents_constants.POMCP.REINVIGORATION: HParam(value=False, name=agents_constants.POMCP.REINVIGORATION,
+                                                          descr="whether reinvigoration should be used"),
+            agents_constants.POMCP.INITIAL_BELIEF: HParam(value=b1, name=agents_constants.POMCP.INITIAL_BELIEF,
+                                                          descr="the initial belief"),
+            agents_constants.POMCP.PLANNING_TIME: HParam(value=300, name=agents_constants.POMCP.PLANNING_TIME,
+                                                         descr="the planning time"),
+            agents_constants.POMCP.MAX_PARTICLES: HParam(value=1000, name=agents_constants.POMCP.MAX_PARTICLES,
+                                                         descr="the maximum number of belief particles"),
+            agents_constants.POMCP.MAX_DEPTH: HParam(value=500, name=agents_constants.POMCP.MAX_DEPTH,
+                                                     descr="the maximum depth for planning"),
+            agents_constants.POMCP.C: HParam(value=0.35, name=agents_constants.POMCP.C,
+                                             descr="the weighting factor for UCB exploration"),
+            agents_constants.POMCP.LOG_STEP_FREQUENCY: HParam(
+                value=1, name=agents_constants.POMCP.LOG_STEP_FREQUENCY, descr="frequency of logging time-steps"),
+            agents_constants.POMCP.DEFAULT_NODE_VALUE: HParam(
+                value=-2000, name=agents_constants.POMCP.DEFAULT_NODE_VALUE, descr="the default node value in "
+                                                                                   "the search tree"),
+            agents_constants.POMCP.VERBOSE: HParam(value=True, name=agents_constants.POMCP.VERBOSE,
+                                                   descr="verbose logging flag"),
+            agents_constants.COMMON.EVAL_BATCH_SIZE: HParam(value=100, name=agents_constants.COMMON.EVAL_BATCH_SIZE,
+                                                            descr="number of evaluation episodes"),
+            agents_constants.COMMON.CONFIDENCE_INTERVAL: HParam(
+                value=0.95, name=agents_constants.COMMON.CONFIDENCE_INTERVAL,
+                descr="confidence interval"),
+            agents_constants.COMMON.MAX_ENV_STEPS: HParam(
+                value=500, name=agents_constants.COMMON.MAX_ENV_STEPS,
+                descr="maximum number of steps in the environment (for envs with infinite horizon generally)"),
+            agents_constants.COMMON.RUNNING_AVERAGE: HParam(
+                value=100, name=agents_constants.COMMON.RUNNING_AVERAGE,
+                descr="the number of samples to include when computing the running avg"),
+            agents_constants.COMMON.GAMMA: HParam(
+                value=0.99, name=agents_constants.COMMON.GAMMA,
+                descr="the discount factor")
+        },
+        player_type=PlayerType.DEFENDER, player_idx=0
+    )
+    agent = POMCPAgent(emulation_env_config=emulation_env_config, simulation_env_config=simulation_env_config,
+                       experiment_config=experiment_config, save_to_metastore=False)
+    experiment_execution = agent.train()
+    MetastoreFacade.save_experiment_execution(experiment_execution)
diff --git a/examples/training/pomcp/stopping_pomdp_defender/README.md b/examples/training/pomcp/stopping_pomdp_defender/README.md
index ddfc2ad40..787ec14ab 100644
--- a/examples/training/pomcp/stopping_pomdp_defender/README.md
+++ b/examples/training/pomcp/stopping_pomdp_defender/README.md
@@ -1,4 +1,4 @@
-# Random Search for POMDP
+# POMCP for defenderp planning in stopping POMDP
 
 This directory contains example scripts for optimizing defender policies using random search for the POMDP from [https://ieeexplore.ieee.org/document/9779345](https://ieeexplore.ieee.org/document/9779345)
 
diff --git a/examples/training/pomcp/stopping_pomdp_defender/run_vs_random_attacker_v_001.py b/examples/training/pomcp/stopping_pomdp_defender/run_vs_random_attacker_v_001.py
index 2e4e59340..bec0283bb 100644
--- a/examples/training/pomcp/stopping_pomdp_defender/run_vs_random_attacker_v_001.py
+++ b/examples/training/pomcp/stopping_pomdp_defender/run_vs_random_attacker_v_001.py
@@ -53,6 +53,9 @@
     A = simulation_env_config.simulation_env_input_config.stopping_game_config.A1
     O = simulation_env_config.simulation_env_input_config.stopping_game_config.O
     b1 = simulation_env_config.simulation_env_input_config.stopping_game_config.b1
+    initial_belief = {}
+    for i in range(len(b1)):
+        initial_belief[i] = b1[i]
     rollout_policy = MultiThresholdStoppingPolicy(
         theta=[0.75], simulation_name=simulation_name, L=stopping_game_config.L,
         states=simulation_env_config.state_space_config.states, player_type=PlayerType.DEFENDER,
@@ -75,13 +78,14 @@
             agents_constants.POMCP.VALUE_FUNCTION: HParam(
                 value=lambda x: 0, name=agents_constants.POMCP.VALUE_FUNCTION,
                 descr="the value function to use for truncated rollouts"),
-            agents_constants.POMCP.S: HParam(value=S, name=agents_constants.POMCP.S, descr="the state space"),
-            agents_constants.POMCP.O: HParam(value=O, name=agents_constants.POMCP.O, descr="the observation space"),
             agents_constants.POMCP.A: HParam(value=A, name=agents_constants.POMCP.A, descr="the action space"),
             agents_constants.POMCP.GAMMA: HParam(value=0.99, name=agents_constants.POMCP.GAMMA,
                                                  descr="the discount factor"),
-            agents_constants.POMCP.INITIAL_BELIEF: HParam(value=b1, name=agents_constants.POMCP.INITIAL_BELIEF,
+            agents_constants.POMCP.INITIAL_BELIEF: HParam(value=initial_belief,
+                                                          name=agents_constants.POMCP.INITIAL_BELIEF,
                                                           descr="the initial belief"),
+            agents_constants.POMCP.REINVIGORATION: HParam(value=True, name=agents_constants.POMCP.REINVIGORATION,
+                                                          descr="whether reinvigoration should be used"),
             agents_constants.POMCP.PLANNING_TIME: HParam(value=120, name=agents_constants.POMCP.PLANNING_TIME,
                                                          descr="the planning time"),
             agents_constants.POMCP.MAX_PARTICLES: HParam(value=100, name=agents_constants.POMCP.MAX_PARTICLES,
@@ -90,8 +94,13 @@
                                                      descr="the maximum depth for planning"),
             agents_constants.POMCP.C: HParam(value=0.35, name=agents_constants.POMCP.C,
                                              descr="the weighting factor for UCB exploration"),
+            agents_constants.POMCP.DEFAULT_NODE_VALUE: HParam(
+                value=-2000, name=agents_constants.POMCP.DEFAULT_NODE_VALUE, descr="the default node value in "
+                                                                                   "the search tree"),
             agents_constants.POMCP.LOG_STEP_FREQUENCY: HParam(
                 value=1, name=agents_constants.POMCP.LOG_STEP_FREQUENCY, descr="frequency of logging time-steps"),
+            agents_constants.POMCP.VERBOSE: HParam(value=False, name=agents_constants.POMCP.VERBOSE,
+                                                   descr="verbose logging flag"),
             agents_constants.COMMON.EVAL_BATCH_SIZE: HParam(value=100, name=agents_constants.COMMON.EVAL_BATCH_SIZE,
                                                             descr="number of evaluation episodes"),
             agents_constants.COMMON.CONFIDENCE_INTERVAL: HParam(
diff --git a/examples/training/ppo/cyborg_scenario_two_defender/run_cyborg_scenario_two_defender_b_line_cardiff.py b/examples/training/ppo/cyborg_scenario_two_defender/run_cyborg_scenario_two_defender_b_line_cardiff.py
index cad77ea6c..40de164ff 100644
--- a/examples/training/ppo/cyborg_scenario_two_defender/run_cyborg_scenario_two_defender_b_line_cardiff.py
+++ b/examples/training/ppo/cyborg_scenario_two_defender/run_cyborg_scenario_two_defender_b_line_cardiff.py
@@ -34,7 +34,7 @@
                 descr="number of steps in the environment for doing rollouts between policy updates"),
             agents_constants.COMMON.BATCH_SIZE: HParam(value=16, name=agents_constants.COMMON.BATCH_SIZE,
                                                        descr="batch size for updates"),
-            agents_constants.COMMON.LEARNING_RATE: HParam(value=0.002,
+            agents_constants.COMMON.LEARNING_RATE: HParam(value=0.00005148,
                                                           name=agents_constants.COMMON.LEARNING_RATE,
                                                           descr="learning rate for updating the policy"),
             constants.NEURAL_NETWORKS.DEVICE: HParam(value="cpu",
@@ -66,7 +66,7 @@
                                                             name=agents_constants.PPO.NUM_GRADIENT_STEPS,
                                                             descr="number of gradient steps"),
             agents_constants.COMMON.NUM_TRAINING_TIMESTEPS: HParam(
-                value=int(2048) * 3500, name=agents_constants.COMMON.NUM_TRAINING_TIMESTEPS,
+                value=int(2048) * 4500, name=agents_constants.COMMON.NUM_TRAINING_TIMESTEPS,
                 descr="number of timesteps to train"),
             agents_constants.COMMON.EVAL_EVERY: HParam(value=10, name=agents_constants.COMMON.EVAL_EVERY,
                                                        descr="training iterations between evaluations"),
diff --git a/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/action_node.py b/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/action_node.py
index 02a20c7e0..65b5e5671 100644
--- a/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/action_node.py
+++ b/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/action_node.py
@@ -7,7 +7,7 @@ class ActionNode(Node):
     A node in the POMCP history tree where the last element of the history was an action
     """
 
-    def __init__(self, id: int, history: List[int], action: int, parent=None, value: float = 0.0,
+    def __init__(self, id: int, history: List[int], action: int, parent=None, value: float = -2000,
                  visit_count: int = 0) -> None:
         """
         Initializes the node
diff --git a/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/belief_node.py b/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/belief_node.py
index 6dc7829c6..7d9324321 100644
--- a/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/belief_node.py
+++ b/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/belief_node.py
@@ -9,7 +9,7 @@ class BeliefNode(Node):
     It also holds the received observation after which the belief is updated accordingly
     """
 
-    def __init__(self, id: int, history: List[int], observation: int, parent=None, value: float = 0.0,
+    def __init__(self, id: int, history: List[int], observation: int, parent=None, value: float = -2000,
                  visit_count: int = 0) -> None:
         """
         Initializing the node
diff --git a/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/belief_tree.py b/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/belief_tree.py
index b74a1e811..af0299e9a 100644
--- a/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/belief_tree.py
+++ b/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/belief_tree.py
@@ -10,18 +10,25 @@ class BeliefTree:
     of actions and observations.
     """
 
-    def __init__(self, root_particles: List[int]) -> None:
+    def __init__(self, root_particles: List[int], default_node_value) -> None:
         """
         Initializes the tree with a belief node with a set of particles
 
         :param root_particles: the particles to add to the root belief node
+        :param default_node_value: the default value of nodes in the tree
         """
         self.tree_size = 0
         self.nodes: Dict[int, Union[Node, None]] = {}
-        self.root = self.add(history=[], particle=root_particles, parent=None)
+        self.default_node_value = default_node_value
+        node: Node = self.add(history=[], particle=root_particles, parent=None, value=default_node_value)
+        if isinstance(node, BeliefNode):
+            self.root: BeliefNode = node
+        else:
+            raise ValueError("Invalid root node")
 
-    def add(self, history: List[int], parent: Union[ActionNode, BeliefNode, None], action: Union[int, None] = None,
-            observation: Union[int, None] = None, particle: Union[Any, None] = None):
+    def add(self, history: List[int], parent: Union[Node, ActionNode, BeliefNode, None],
+            action: Union[int, None] = None, observation: Union[int, None] = None, particle: Union[Any, None] = None,
+            value: float = 0) -> Node:
         """
         Creates and adds a new belief node or action node to the belief search tree
 
@@ -31,15 +38,16 @@ def add(self, history: List[int], parent: Union[ActionNode, BeliefNode, None], a
         :param observation: observation
         :param particle: new node's particle set
         :param cost: action cost of an action node
-        :return:
+        :param value: the value of the node
+        :return: The newly added node
         """
         # Create the node
         if action is not None:
-            new_node: Node = ActionNode(self.tree_size, history, parent=parent, action=action)
+            new_node: Node = ActionNode(self.tree_size, history, parent=parent, action=action, value=value)
         else:
             if observation is None:
                 observation = 0
-            new_node = BeliefNode(self.tree_size, history, parent=parent, observation=observation)
+            new_node = BeliefNode(self.tree_size, history, parent=parent, observation=observation, value=value)
 
         if particle is not None and isinstance(new_node, BeliefNode):
             new_node.add_particle(particle)
@@ -58,17 +66,19 @@ def find_or_create(self, history: List[int], parent: Union[None, BeliefNode, Act
         Search for the node corresponds to given history, otherwise create one using given params
         """
         # Start the search from the root node
-        current_node = self.root
+        root_node = self.root
+        current_node: Union[None, Node] = root_node
 
         # Start from the root node and then traverse down to the depth of the given history to see if the node
         # of this history  exists or not, otherwise add it
         history_length, root_history_length = len(history), len(self.root.history)
         for step in range(root_history_length, history_length):
-            current_node = current_node.get_child(history[step])
+            if current_node is not None:
+                current_node = current_node.get_child(history[step])
 
             # Node of this history does not exists so we add it
             if current_node is None:
-                return self.add(history=history, parent=parent, observation=observation)
+                return self.add(history=history, parent=parent, observation=observation, value=self.default_node_value)
         return current_node
 
     def prune(self, node, exclude=None):
diff --git a/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/node.py b/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/node.py
index 39f7763bc..583c573ff 100644
--- a/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/node.py
+++ b/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/node.py
@@ -7,7 +7,7 @@ class Node:
     Abstract node type, represents a node in the lookahead tree
     """
 
-    def __init__(self, id: int, history: List[int], parent=None, value: float = 0, visit_count: int = 0,
+    def __init__(self, id: int, history: List[int], parent=None, value: float = -2000, visit_count: int = 0,
                  observation: int = -1, action: int = -1) -> None:
         """
         Initializes the node
diff --git a/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp.py b/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp.py
index ae64abf20..e2aeba42c 100644
--- a/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp.py
+++ b/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp.py
@@ -1,4 +1,4 @@
-from typing import List, Union, Callable, Any
+from typing import List, Union, Callable, Any, Dict
 import time
 import numpy as np
 from csle_common.dao.simulation_config.base_env import BaseEnv
@@ -8,6 +8,7 @@
 from csle_agents.agents.pomcp.action_node import ActionNode
 from csle_agents.agents.pomcp.pomcp_util import POMCPUtil
 import csle_agents.constants.constants as constants
+from csle_common.logging.log import Logger
 
 
 class POMCP:
@@ -15,15 +16,16 @@ class POMCP:
     Class that implements the POMCP algorithm
     """
 
-    def __init__(self, S: List[int], O: List[int], A: List[int], gamma: float, env: BaseEnv, c: float,
-                 initial_belief: List[float], planning_time: float = 0.5, max_particles: int = 350,
+    def __init__(self, A: List[int], gamma: float, env: BaseEnv, c: float,
+                 initial_belief: Dict[int, float], planning_time: float = 0.5, max_particles: int = 350,
+                 reinvigoration: bool = False,
                  reinvigorated_particles_ratio: float = 0.1, rollout_policy: Union[Policy, None] = None,
-                 value_function: Union[Callable[[Any], float], None] = None) -> None:
+                 value_function: Union[Callable[[Any], float], None] = None, verbose: bool = False,
+                 default_node_value: float = 0) -> None:
         """
         Initializes the solver
 
         :param S: the state space
-        :param O: the observation space
         :param A: the action space
         :param gamma: the discount factor
         :param env: the environment for sampling
@@ -32,10 +34,11 @@ def __init__(self, S: List[int], O: List[int], A: List[int], gamma: float, env:
         :param planning_time: the planning time
         :param max_particles: the maximum number of particles (samples) for the belief state
         :param reinvigorated_particles_ratio: probability of new particles added when updating the belief state
+        :param reinvigoration: boolean flag indicating whether reinvigoration should be done
         :param rollout_policy: the rollout policy
+        :param verbose: boolean flag indicating whether logging should be verbose
+        :param default_node_value: the default value of nodes in the tree
         """
-        self.S = S
-        self.O = O
         self.A = A
         self.env = env
         self.gamma = gamma
@@ -45,20 +48,23 @@ def __init__(self, S: List[int], O: List[int], A: List[int], gamma: float, env:
         self.reinvigorated_particles_ratio = reinvigorated_particles_ratio
         self.rollout_policy = rollout_policy
         self.value_function = value_function
-        root_particles = POMCPUtil.generate_particles(
-            states=self.S, num_particles=self.max_particles, probability_vector=initial_belief)
-        self.tree = BeliefTree(root_particles=root_particles)
-
-    def compute_belief(self) -> List[float]:
+        self.initial_belief = initial_belief
+        self.reinvigoration = reinvigoration
+        self.default_node_value = default_node_value
+        root_particles = POMCPUtil.generate_particles(num_particles=self.max_particles, belief=initial_belief)
+        self.tree = BeliefTree(root_particles=root_particles, default_node_value=self.default_node_value)
+        self.verbose = verbose
+
+    def compute_belief(self) -> Dict[int, float]:
         """
         Computes the belief state based on the particles
 
         :return: the belief state
         """
-        belief_state = [0.0] * len(self.S)
+        belief_state = {}
         particle_distribution = POMCPUtil.convert_samples_to_distribution(self.tree.root.particles)
         for state, prob in particle_distribution.items():
-            belief_state[list(self.S).index(state)] = round(prob, 6)
+            belief_state[state] = round(prob, 6)
         return belief_state
 
     def rollout(self, state: int, history: List[int], depth: int, max_depth: int) -> float:
@@ -85,6 +91,8 @@ def rollout(self, state: int, history: List[int], depth: int, max_depth: int) ->
         self.env.set_state(state=state)
         _, r, _, _, info = self.env.step(a)
         s_prime = info[constants.COMMON.STATE]
+        if s_prime not in self.initial_belief:
+            self.initial_belief[s_prime] = 0.0
         o = info[constants.COMMON.OBSERVATION]
         return float(r) + self.gamma * self.rollout(state=s_prime, history=history + [a, o], depth=depth + 1,
                                                     max_depth=max_depth)
@@ -122,7 +130,7 @@ def simulate(self, state: int, max_depth: int, c: float, history: List[int], dep
         if not current_node.children:
             # since the node does not have any children, we first add them to the node
             for action in self.A:
-                self.tree.add(history + [action], parent=current_node, action=action)
+                self.tree.add(history + [action], parent=current_node, action=action, value=self.default_node_value)
             # Perform the rollout and return the value
             return self.rollout(state, history, depth, max_depth)
 
@@ -138,6 +146,8 @@ def simulate(self, state: int, max_depth: int, c: float, history: List[int], dep
         _, r, _, _, info = self.env.step(a)
         o = info[constants.COMMON.OBSERVATION]
         s_prime = info[constants.COMMON.STATE]
+        if s_prime not in self.initial_belief:
+            self.initial_belief[s_prime] = 0.0
 
         # Recursive call, continue the simulation from the new node
         R = float(r) + self.gamma * self.simulate(
@@ -169,6 +179,8 @@ def solve(self, max_depth: int) -> None:
             n += 1
             state = self.tree.root.sample_state()
             self.simulate(state, max_depth=max_depth, history=self.tree.root.history, c=self.c)
+            if self.verbose:
+                Logger.__call__().get_logger().info(f"Simulation time left {self.planning_time - time.time() + begin}s")
 
     def get_action(self) -> int:
         """
@@ -179,9 +191,13 @@ def get_action(self) -> int:
         """
         root = self.tree.root
         action_vals = [(action.value, action.action) for action in root.children]
+        if self.verbose:
+            for a in root.children:
+                Logger.__call__().get_logger().info(f"action: {a.action}, value: {a.value}, "
+                                                    f"visit count: {a.visit_count}")
         return int(max(action_vals)[1])
 
-    def update_tree_with_new_samples(self, action: int, observation: int) -> List[float]:
+    def update_tree_with_new_samples(self, action: int, observation: int) -> Dict[int, float]:
         """
         Updates the tree after an action has been selected and a new observation been received
 
@@ -193,7 +209,11 @@ def update_tree_with_new_samples(self, action: int, observation: int) -> List[fl
 
         # Since we executed an action we advance the tree and update the root to the the node corresponding to the
         # action that was selected
-        new_root = root.get_child(action).get_child(observation)
+        child = root.get_child(action)
+        if child is not None:
+            new_root = child.get_child(observation)
+        else:
+            raise ValueError("Could not find child node")
 
         # If we did not have a node in the tree corresponding to the observation that was observed, we select a random
         # belief node to be the new root (note that the action child node will always exist by definition of the
@@ -201,24 +221,32 @@ def update_tree_with_new_samples(self, action: int, observation: int) -> List[fl
         if new_root is None:
             # Get the action node
             action_node = root.get_child(action)
+            if action_node is None:
+                raise ValueError("Chould not find the action node")
 
             if action_node.children:
                 # If the action node has belief state nodes, select a random of them to be the new root
                 new_root = POMCPUtil.rand_choice(action_node.children)
             else:
                 # or create the new belief node randomly
-                particles = POMCPUtil.generate_particles(states=self.S, num_particles=self.max_particles,
-                                                         probability_vector=None)
+                random_belief = {}
+                for s in list(self.initial_belief.keys()):
+                    random_belief[s] = 1 / len(self.initial_belief)
+                particles = POMCPUtil.generate_particles(num_particles=self.max_particles, belief=random_belief)
                 new_root = self.tree.add(history=action_node.history + [observation], parent=action_node,
-                                         observation=observation, particle=particles)
+                                         observation=observation, particle=particles, value=self.default_node_value)
 
         # Check how many new particles are left to fill
-        new_root.particles = []
-        particle_slots = self.max_particles - len(new_root.particles)
+        if isinstance(new_root, BeliefNode):
+            particle_slots = self.max_particles - len(new_root.particles)
+        else:
+            raise ValueError("Invalid root node")
         if particle_slots > 0:
             # fill particles by Monte-Carlo using reject sampling
             particles = []
             while len(particles) < particle_slots:
+                if self.verbose:
+                    Logger.__call__().get_logger().info(f"Filling particles {len(particles)}/{particle_slots}")
                 s = root.sample_state()
                 self.env.set_state(state=s)
                 _, r, _, _, info = self.env.step(action)
@@ -237,11 +265,16 @@ def update_tree_with_new_samples(self, action: int, observation: int) -> List[fl
 
         # To avoid particle deprivation (i.e., that the algorithm gets stuck with the wrong belief)
         # we do particle reinvigoration here
-        if any([prob == 0.0 for prob in new_belief]):
+        if self.reinvigoration and len(self.initial_belief) > 0 and any([prob == 0.0 for prob in new_belief.values()]):
+            if self.verbose:
+                Logger.__call__().get_logger().info("Starting reinvigoration")
             # Generate same new particles randomly
+            random_belief = {}
+            for s in list(self.initial_belief.keys()):
+                random_belief[s] = 1 / len(self.initial_belief)
             mutations = POMCPUtil.generate_particles(
-                states=self.S, num_particles=int(self.max_particles * self.reinvigorated_particles_ratio),
-                probability_vector=None)
+                num_particles=int(self.max_particles * self.reinvigorated_particles_ratio),
+                belief=random_belief)
 
             # Randomly exchange some old particles for the new ones
             for particle in mutations:
diff --git a/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp_agent.py b/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp_agent.py
index 79f9f3908..5c6a58884 100644
--- a/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp_agent.py
+++ b/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp_agent.py
@@ -1,6 +1,7 @@
 from typing import Union, List, Dict, Optional
 import math
 import time
+import random
 import gymnasium as gym
 import os
 import numpy as np
@@ -30,7 +31,7 @@ class POMCPAgent(BaseAgent):
     def __init__(self, simulation_env_config: SimulationEnvConfig,
                  emulation_env_config: Union[None, EmulationEnvConfig],
                  experiment_config: ExperimentConfig, env: Optional[BaseEnv] = None,
-                 training_job: Optional[TrainingJobConfig] = None, save_to_metastore: bool = True):
+                 training_job: Optional[TrainingJobConfig] = None, save_to_metastore: bool = True) -> None:
         """
         Initializes the POMCP Agent
 
@@ -161,10 +162,11 @@ def hparam_names(self) -> List[str]:
         :return: a list with the hyperparameter names
         """
         return [agents_constants.POMCP.OBJECTIVE_TYPE, agents_constants.POMCP.ROLLOUT_POLICY,
-                agents_constants.POMCP.VALUE_FUNCTION, agents_constants.POMCP.N, agents_constants.POMCP.S,
-                agents_constants.POMCP.O, agents_constants.POMCP.A, agents_constants.POMCP.GAMMA,
+                agents_constants.POMCP.VALUE_FUNCTION, agents_constants.POMCP.N, agents_constants.POMCP.REINVIGORATION,
+                agents_constants.POMCP.A, agents_constants.POMCP.GAMMA,
                 agents_constants.POMCP.INITIAL_BELIEF, agents_constants.POMCP.PLANNING_TIME,
-                agents_constants.POMCP.LOG_STEP_FREQUENCY,
+                agents_constants.POMCP.LOG_STEP_FREQUENCY, agents_constants.POMCP.VERBOSE,
+                agents_constants.POMCP.DEFAULT_NODE_VALUE,
                 agents_constants.POMCP.MAX_PARTICLES, agents_constants.POMCP.C, agents_constants.POMCP.MAX_DEPTH,
                 agents_constants.COMMON.EVAL_BATCH_SIZE, agents_constants.COMMON.CONFIDENCE_INTERVAL,
                 agents_constants.COMMON.RUNNING_AVERAGE, agents_constants.COMMON.MAX_ENV_STEPS]
@@ -184,11 +186,12 @@ def pomcp(self, exp_result: ExperimentResult, seed: int,
         rollout_policy = self.experiment_config.hparams[agents_constants.POMCP.ROLLOUT_POLICY].value
         value_function = self.experiment_config.hparams[agents_constants.POMCP.VALUE_FUNCTION].value
         log_steps_frequency = self.experiment_config.hparams[agents_constants.POMCP.LOG_STEP_FREQUENCY].value
+        verbose = self.experiment_config.hparams[agents_constants.POMCP.VERBOSE].value
+        default_node_value = self.experiment_config.hparams[agents_constants.POMCP.DEFAULT_NODE_VALUE].value
         max_env_steps = self.experiment_config.hparams[agents_constants.COMMON.MAX_ENV_STEPS].value
         N = self.experiment_config.hparams[agents_constants.POMCP.N].value
-        S = self.experiment_config.hparams[agents_constants.POMCP.S].value
-        O = self.experiment_config.hparams[agents_constants.POMCP.O].value
         A = self.experiment_config.hparams[agents_constants.POMCP.A].value
+        reinvigoration = self.experiment_config.hparams[agents_constants.POMCP.REINVIGORATION].value
         gamma = self.experiment_config.hparams[agents_constants.POMCP.GAMMA].value
         b1 = self.experiment_config.hparams[agents_constants.POMCP.INITIAL_BELIEF].value
         planning_time = self.experiment_config.hparams[agents_constants.POMCP.PLANNING_TIME].value
@@ -208,9 +211,10 @@ def pomcp(self, exp_result: ExperimentResult, seed: int,
             eval_env.reset()
             train_env.reset()
             belief = b1.copy()
-            pomcp = POMCP(S=S, O=O, A=A, gamma=gamma, env=train_env, c=c, initial_belief=belief,
+            pomcp = POMCP(A=A, gamma=gamma, env=train_env, c=c, initial_belief=belief,
                           planning_time=planning_time, max_particles=max_particles, rollout_policy=rollout_policy,
-                          value_function=value_function)
+                          value_function=value_function, reinvigoration=reinvigoration, verbose=verbose,
+                          default_node_value=default_node_value)
             R = 0
             t = 1
             if t % log_steps_frequency == 0:
@@ -226,8 +230,10 @@ def pomcp(self, exp_result: ExperimentResult, seed: int,
                 R += r
                 t += 1
                 if t % log_steps_frequency == 0:
+                    b = list(map(lambda x: belief[x], random.sample(list(belief.keys()), min(10, len(belief.keys())))))
                     Logger.__call__().get_logger().info(f"[POMCP] t: {t}, a: {action}, r: {r}, o: {o}, "
-                                                        f"s_prime: {s_prime}, b: {belief}")
+                                                        f"s_prime: {s_prime}, b: {b}")
+                    Logger.__call__().get_logger().info(f"action: {eval_env.action_id_to_type_and_host[action]}")
 
             if i % self.experiment_config.log_every == 0:
                 # Logging
diff --git a/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp_util.py b/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp_util.py
index 39d4dcdae..7b767abc5 100644
--- a/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp_util.py
+++ b/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp_util.py
@@ -1,4 +1,4 @@
-from typing import List, Dict, Union, Any
+from typing import List, Dict, Any
 import numpy as np
 from csle_agents.agents.pomcp.node import Node
 from collections import Counter
@@ -23,7 +23,7 @@ def sample_from_distribution(probability_vector: List[float]) -> int:
         return int(sample)
 
     @staticmethod
-    def rand_choice(candidates: List[int]) -> Any:
+    def rand_choice(candidates: List[Any]) -> Any:
         """
         Selects an element from a given list uniformly at random
 
@@ -45,20 +45,16 @@ def convert_samples_to_distribution(samples) -> Dict[int, float]:
         return {k: v / _sum for k, v in cnt.items()}
 
     @staticmethod
-    def generate_particles(states: List[int], num_particles: int, probability_vector: Union[None, List[float]]):
+    def generate_particles(num_particles: int, belief: Dict[int, float]) -> List[int]:
         """
         Generates a list of particles (sample states) for a given list of states
         with a frequency determined by a given probability vector
 
-        :param states: the
-        :param n:
-        :param probability_vector: (optional) probability vector to determine the frequency of each sample
-        :return:
+        :param probability_vector: probability vector to determine the frequency of each sample
+        :return: sampled particles (states)
         """
-        # by default use uniform distribution for particles generation
-        if probability_vector is None:
-            probability_vector = [1 / len(states)] * len(states)
-        return [states[int(POMCPUtil.sample_from_distribution(probability_vector))] for _ in range(num_particles)]
+        states = list(belief.keys())
+        return [states[int(POMCPUtil.sample_from_distribution(list(belief.values())))] for _ in range(num_particles)]
 
     @staticmethod
     def ucb(history_visit_count, action_visit_count):
diff --git a/simulation-system/libs/csle-agents/src/csle_agents/constants/constants.py b/simulation-system/libs/csle-agents/src/csle_agents/constants/constants.py
index f1a8f7b05..5f5c8adf8 100644
--- a/simulation-system/libs/csle-agents/src/csle_agents/constants/constants.py
+++ b/simulation-system/libs/csle-agents/src/csle_agents/constants/constants.py
@@ -556,8 +556,11 @@ class POMCP:
     O = "O"
     GAMMA = "gamma"
     INITIAL_BELIEF = "initial_belief"
+    REINVIGORATION = "reinvigoration"
     PLANNING_TIME = "planning_time"
     MAX_PARTICLES = "max_particles"
     C = "c"
     MAX_DEPTH = "max_depth"
     LOG_STEP_FREQUENCY = "log_step_frequency"
+    DEFAULT_NODE_VALUE = "default_node_value"
+    VERBOSE = "verbose"
diff --git a/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/dao/acitvity_type.py b/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/dao/activity_type.py
similarity index 100%
rename from simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/dao/acitvity_type.py
rename to simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/dao/activity_type.py
diff --git a/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/envs/cyborg_scenario_two_defender.py b/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/envs/cyborg_scenario_two_defender.py
index af52f856e..2ab6af7db 100644
--- a/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/envs/cyborg_scenario_two_defender.py
+++ b/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/envs/cyborg_scenario_two_defender.py
@@ -1,4 +1,5 @@
 from typing import Tuple, Dict, List, Any, Union
+from copy import deepcopy
 import time
 import numpy as np
 from prettytable import PrettyTable
@@ -10,7 +11,7 @@
 import gym_csle_cyborg.constants.constants as env_constants
 from gym_csle_cyborg.dao.csle_cyborg_config import CSLECyborgConfig
 from gym_csle_cyborg.dao.blue_agent_action_type import BlueAgentActionType
-from gym_csle_cyborg.dao.acitvity_type import ActivityType
+from gym_csle_cyborg.dao.activity_type import ActivityType
 from gym_csle_cyborg.dao.compromised_type import CompromisedType
 from gym_csle_cyborg.dao.red_agent_type import RedAgentType
 from gym_csle_cyborg.util.cyborg_env_util import CyborgEnvUtil
@@ -84,7 +85,13 @@ def __init__(self, config: CSLECyborgConfig):
         self.traces: List[SimulationTrace] = []
         self.trace = SimulationTrace(simulation_env=self.config.gym_env_name)
 
+        # Lookup dict of states
+        self.visited_cyborg_states: Dict[int, Any] = {}
+        self.visited_scanned_states: Dict[int, List[int]] = {}
+        self.visited_decoy_states: Dict[int, List[List[BlueAgentActionType]]] = {}
+
         # Reset
+        self.initial_belief = {1: 1.0}
         self.reset()
         super().__init__()
 
@@ -164,7 +171,7 @@ def reset(self, seed: Union[None, int] = None, soft: bool = False, options: Unio
         for i in range(len(self.cyborg_hostnames)):
             self.scan_state.append(env_constants.CYBORG.NOT_SCANNED)
             self.decoy_state.append([])
-        info = self.populate_info(info=dict(info), obs=o)
+        info = self.populate_info(info=dict(info), obs=o, reset=True)
         if self.config.scanned_state:
             o = np.array(info[env_constants.CYBORG.VECTOR_OBS_PER_HOST]).flatten()
         if self.config.decoy_optimization:
@@ -177,12 +184,13 @@ def reset(self, seed: Union[None, int] = None, soft: bool = False, options: Unio
         self.trace = SimulationTrace(simulation_env=self.config.gym_env_name)
         return np.array(o), info
 
-    def populate_info(self, info: Dict[str, Any], obs: npt.NDArray[Any]) -> Dict[str, Any]:
+    def populate_info(self, info: Dict[str, Any], obs: npt.NDArray[Any], reset: bool = False) -> Dict[str, Any]:
         """
         Populates the info dict
 
         :param obs: the latest obs
         :param info: the dict to populate
+        :param reset: boolean flag indicating whether this was called from reset or not
         :return: the populated dict
         """
         info[env_constants.ENV_METRICS.RETURN] = sum(self.trace.defender_rewards)
@@ -209,6 +217,39 @@ def populate_info(self, info: Dict[str, Any], obs: npt.NDArray[Any]) -> Dict[str
             info[env_constants.CYBORG.OBS_PER_HOST].append(host_obs)
             host_vector_obs.append(self.scan_state[i])
             info[env_constants.CYBORG.VECTOR_OBS_PER_HOST].append(host_vector_obs)
+        host_ids = list(self.cyborg_hostname_to_id.values())
+        state_vector = CyborgEnvUtil.state_to_vector(state=self.get_true_table().rows,
+                                                     decoy_state=self.decoy_state,
+                                                     host_ids=host_ids,
+                                                     scan_state=self.scan_state)
+        state_id = CyborgEnvUtil.state_vector_to_state_id(state_vector=state_vector)
+        if reset:
+            self.initial_belief = {state_id: 1}
+        obs_vector = CyborgEnvUtil.state_to_vector(state=self.get_table().rows,
+                                                   decoy_state=self.decoy_state,
+                                                   host_ids=host_ids, scan_state=self.scan_state, observation=True)
+        obs_id = CyborgEnvUtil.state_vector_to_state_id(state_vector=obs_vector, observation=True)
+        info[env_constants.ENV_METRICS.STATE] = state_id
+        info[env_constants.ENV_METRICS.OBSERVATION] = obs_id
+        if state_id not in self.visited_cyborg_states:
+            agent_interfaces_copy = {}
+            for k, v in self.cyborg_challenge_env.env.env.env.env.env.environment_controller.agent_interfaces.items():
+                agent_interfaces_copy[k] = v.copy()
+            self.visited_cyborg_states[state_id] = \
+                (deepcopy(self.cyborg_challenge_env.env.env.env.env.env.environment_controller.state),
+                 deepcopy(self.cyborg_challenge_env.env.env.env.env.scanned_ips),
+                 agent_interfaces_copy,
+                 self.cyborg_challenge_env.env.env.env.env.env.environment_controller.done,
+                 deepcopy(self.cyborg_challenge_env.env.env.env.env.env.environment_controller.reward),
+                 deepcopy(self.cyborg_challenge_env.env.env.env.env.env.environment_controller.actions),
+                 self.cyborg_challenge_env.env.env.env.env.env.environment_controller.step,
+                 deepcopy(self.cyborg_challenge_env.env.env.env.env.env.environment_controller.hostname_ip_map),
+                 deepcopy(self.cyborg_challenge_env.env.env.env.env.env.environment_controller.subnet_cidr_map),
+                 deepcopy(self.cyborg_challenge_env.env.env.env.env.env.environment_controller.observation),
+                 self.cyborg_challenge_env.env.env.env.env.step_counter
+                 )
+            self.visited_scanned_states[state_id] = self.scan_state.copy()
+            self.visited_decoy_states[state_id] = self.decoy_state.copy()
         return info
 
     def get_table(self) -> PrettyTable:
@@ -367,7 +408,37 @@ def set_state(self, state: Any) -> None:
         :param state: the state
         :return: None
         """
-        raise NotImplementedError("This environment does not support the set_state method")
+        s = int(state)
+        if s in self.visited_cyborg_states:
+            self.cyborg_challenge_env.env.env.env.env.env.environment_controller.state = \
+                self.visited_cyborg_states[s][0]
+            self.cyborg_challenge_env.env.env.env.env.scanned_ips = self.visited_cyborg_states[s][1]
+            self.cyborg_challenge_env.env.env.env.env.env.environment_controller.agent_interfaces \
+                = self.visited_cyborg_states[s][2]
+            for k, v in self.cyborg_challenge_env.env.env.env.env.env.environment_controller.agent_interfaces.items():
+                v.action_space.create_action_params()
+            self.cyborg_challenge_env.env.env.env.env.env.environment_controller.done = self.visited_cyborg_states[s][3]
+            self.cyborg_challenge_env.env.env.env.env.env.environment_controller.reward = \
+                self.visited_cyborg_states[s][4]
+            self.cyborg_challenge_env.env.env.env.env.env.environment_controller.actions = \
+                self.visited_cyborg_states[s][5]
+            self.cyborg_challenge_env.env.env.env.env.env.environment_controller.step = \
+                self.visited_cyborg_states[s][6]
+            self.cyborg_challenge_env.env.env.env.env.env.environment_controller.hostname_ip_map = \
+                self.visited_cyborg_states[s][7]
+            self.cyborg_challenge_env.env.env.env.env.env.environment_controller.subnet_cidr_map = \
+                self.visited_cyborg_states[s][8]
+            self.cyborg_challenge_env.env.env.env.env.env.environment_controller.observation = \
+                self.visited_cyborg_states[s][9]
+            self.cyborg_challenge_env.env.env.env.env.step_counter = self.visited_cyborg_states[s][10]
+            # self.cyborg_challenge_env.env.env.env.env.observation_change(
+            #     self.cyborg_challenge_env.env.env.env.env.env.environment_controller.observation)
+            # self.cyborg_challenge_env.env.env.env.observation_change(
+            #     self.cyborg_challenge_env.env.env.env.env.env.environment_controller.observation)
+            self.decoy_state = self.visited_decoy_states[s]
+            self.scan_state = self.visited_scanned_states[s]
+        else:
+            raise NotImplementedError(f"Unknown state: {s}")
 
     def get_observation_from_history(self, history: List[int]) -> List[Any]:
         """
@@ -376,7 +447,20 @@ def get_observation_from_history(self, history: List[int]) -> List[Any]:
         :param history: the history to get the observation form
         :return: the observation
         """
-        raise NotImplementedError("This environment does not support the get_observation_from_history method")
+        obs_id = history[-1]
+        obs = CyborgEnvUtil.state_id_to_state_vector(state_id=obs_id, observation=True)
+        return obs
+
+    def get_action_space(self) -> List[int]:
+        """
+        Gets the action space of the defender
+
+        :return: a list of action ids
+        """
+        if self.config.reduced_action_space:
+            return list(self.action_id_to_type_and_host.keys())
+        else:
+            return list(self.cyborg_action_id_to_type_and_host.keys())
 
     def manual_play(self) -> None:
         """
diff --git a/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/util/cyborg_env_util.py b/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/util/cyborg_env_util.py
index edd552b3e..cd773124f 100644
--- a/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/util/cyborg_env_util.py
+++ b/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/util/cyborg_env_util.py
@@ -7,6 +7,7 @@
 from gym_csle_cyborg.dao.blue_agent_action_type import BlueAgentActionType
 from gym_csle_cyborg.dao.csle_cyborg_config import CSLECyborgConfig
 from gym_csle_cyborg.dao.red_agent_type import RedAgentType
+from gym_csle_cyborg.dao.activity_type import ActivityType
 
 
 class CyborgEnvUtil:
@@ -303,3 +304,104 @@ def get_decoy_state_space(config: CSLECyborgConfig) -> Tuple[List[int], Dict[Any
             return states, lookup_table, hosts_lookup_tables
         else:
             raise ValueError(f"Scenario: {config.scenario} not recognized")
+
+    @staticmethod
+    def state_to_vector(state: List[List[Any]], decoy_state: List[List[BlueAgentActionType]], host_ids: List[int],
+                        scan_state: List[int], observation: bool = False) -> List[List[int]]:
+        """
+        Creates the state vector
+
+        :param state: the state of the environment
+        :param decoy_state: the decoy state
+        :param scan_state: the scan state
+        :param host_ids: the list of host ids
+        :param observation: boolean flag indicating whether it is the true state or an observation of the state
+        :return: the state vector
+        """
+        state_vector = []
+        for host_id in host_ids:
+            host_known = -1
+            activity = -1
+            if not observation:
+                host_known = int(state[host_id][3])
+                host_scanned = int(state[host_id][4])
+                host_access = state[host_id][5]
+            else:
+                host_scanned = scan_state[host_id]
+                activity = ActivityType.from_str(state[host_id][3]).value
+                host_access = state[host_id][4]
+            if host_access == "None":
+                host_access = 0
+            elif host_access == "User":
+                host_access = 1
+            else:
+                host_access = 2
+            host_decoy_state = len(decoy_state[host_id])
+            if not observation:
+                state_vector.append([host_known, host_scanned, host_access, host_decoy_state])
+            else:
+                state_vector.append([activity, host_scanned, host_access, host_decoy_state])
+        return state_vector
+
+    @staticmethod
+    def state_vector_to_state_id(state_vector: List[List[int]], observation: bool = False) -> int:
+        """
+        Converts a state vector to an id
+
+        :param state_vector: the state vector to convert
+        :param observation: boolean flag indicating whether it is the true state or an observation of the state
+        :return: the id
+        """
+        binary_id_str = ""
+        for host_vec in state_vector:
+            host_binary_id_str = ""
+            for i, elem in enumerate(host_vec):
+                if not observation:
+                    if i == 0:
+                        host_binary_id_str += format(elem, '01b')
+                    if i == 1:
+                        host_binary_id_str += format(elem, '01b')
+                else:
+                    if i == 0:
+                        host_binary_id_str += format(elem, '02b')
+                    if i == 1:
+                        host_binary_id_str += format(elem, '02b')
+                if i == 2:
+                    host_binary_id_str += format(elem, '02b')
+                if i == 3:
+                    host_binary_id_str += format(elem, '03b')
+            binary_id_str += host_binary_id_str
+        state_id = int(binary_id_str, 2)
+        return state_id
+
+    @staticmethod
+    def state_id_to_state_vector(state_id: int, observation: bool = False) -> List[List[int]]:
+        """
+        Converts a state id to a state vector
+
+        :param state_id: the state id to convert
+        :param observation: boolean flag indicating whether it is the true state or an observation of the state
+        :return: the state vector
+        """
+        if not observation:
+            binary_id_str = format(state_id, "091b")
+            host_binary_ids_str = [binary_id_str[i:i + 7] for i in range(0, len(binary_id_str), 7)]
+        else:
+            binary_id_str = format(state_id, "0117b")
+            host_binary_ids_str = [binary_id_str[i:i + 9] for i in range(0, len(binary_id_str), 9)]
+        state_vector = []
+        for host_bin in host_binary_ids_str:
+            if not observation:
+                known = int(host_bin[0:1], 2)
+                scanned = int(host_bin[1:2], 2)
+                access = int(host_bin[2:4], 2)
+                decoy = int(host_bin[4:7], 2)
+                host_vector = [known, scanned, access, decoy]
+            else:
+                activity = int(host_bin[0:2], 2)
+                scanned = int(host_bin[2:4], 2)
+                access = int(host_bin[4:6], 2)
+                decoy = int(host_bin[6:9], 2)
+                host_vector = [activity, scanned, access, decoy]
+            state_vector.append(host_vector)
+        return state_vector