From 0b87deaf6d91480e8a730a27717d5a81606ee2c3 Mon Sep 17 00:00:00 2001
From: Matej Straka <strakammm@gmail.com>
Date: Tue, 29 Oct 2024 11:30:37 +0100
Subject: [PATCH 1/3] fix: Align priority moves with generalsio

---
 generals/core/game.py        | 12 ++++++++++--
 generals/core/observation.py |  3 +++
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/generals/core/game.py b/generals/core/game.py
index feda084..20769c0 100644
--- a/generals/core/game.py
+++ b/generals/core/game.py
@@ -17,6 +17,7 @@ class Game:
     def __init__(self, grid: Grid, agents: list[str]):
         # Agents
         self.agents = agents
+        self.agent_order = self.agents[:]
 
         # Grid
         _grid = grid.grid
@@ -56,6 +57,7 @@ def __init__(self, grid: Grid, agents: list[str]):
                         "opponent_land_count": gym.spaces.Discrete(self.max_army_value),
                         "opponent_army_count": gym.spaces.Discrete(self.max_army_value),
                         "timestep": gym.spaces.Discrete(self.max_timestep),
+                        "priority": gym.spaces.Discrete(2),
                     }
                 ),
                 "action_mask": gym.spaces.MultiBinary(self.grid_dims + (4,)),
@@ -99,8 +101,9 @@ def step(self, actions: dict[str, Action]) -> tuple[dict[str, Observation], dict
                 continue
             moves[agent] = (i, j, direction, army_to_move)
 
-        # Evaluate moves (smaller army movements are prioritized)
-        for agent in sorted(moves, key=lambda x: moves[x][3]):
+        for agent in self.agent_order:
+            if agent not in moves:
+                continue
             si, sj, direction, army_to_move = moves[agent]
 
             # Cap the amount of army to move (previous moves may have lowered available army)
@@ -135,6 +138,9 @@ def step(self, actions: dict[str, Action]) -> tuple[dict[str, Observation], dict
                 if square_winner != target_square_owner:
                     self.channels.ownership[target_square_owner][di, dj] = 0
 
+        # Swap agent order (because priority is alternating)
+        self.agent_order = self.agent_order[::-1]
+
         if not done_before_actions:
             self.time += 1
 
@@ -225,6 +231,7 @@ def agent_observation(self, agent: str) -> Observation:
         opponent_land_count = scores[opponent]["land"]
         opponent_army_count = scores[opponent]["army"]
         timestep = self.time
+        priority = 1 if agent == self.agents[0] else 0
 
         return Observation(
             armies=armies,
@@ -241,6 +248,7 @@ def agent_observation(self, agent: str) -> Observation:
             opponent_land_count=opponent_land_count,
             opponent_army_count=opponent_army_count,
             timestep=timestep,
+            priority=priority,
         )
 
     def agent_won(self, agent: str) -> bool:
diff --git a/generals/core/observation.py b/generals/core/observation.py
index cbd104b..b7aff2d 100644
--- a/generals/core/observation.py
+++ b/generals/core/observation.py
@@ -20,6 +20,7 @@ def __init__(
         opponent_land_count: int,
         opponent_army_count: int,
         timestep: int,
+        priority: int = 0,
     ):
         self.armies = armies
         self.generals = generals
@@ -35,6 +36,7 @@ def __init__(
         self.opponent_land_count = opponent_land_count
         self.opponent_army_count = opponent_army_count
         self.timestep = timestep
+        self.priority = priority
         # armies, generals, cities, mountains, empty, owner, fogged, structure in fog
 
     def action_mask(self) -> np.ndarray:
@@ -97,6 +99,7 @@ def as_dict(self, with_mask=True):
             "opponent_land_count": self.opponent_land_count,
             "opponent_army_count": self.opponent_army_count,
             "timestep": self.timestep,
+            "priority": self.priority,
         }
         if with_mask:
             obs = {

From 5000702a99983a540215cb7b74327caff55f932f Mon Sep 17 00:00:00 2001
From: Matej Straka <strakammm@gmail.com>
Date: Tue, 29 Oct 2024 11:41:47 +0100
Subject: [PATCH 2/3] fix: Make online observations include priority

---
 README.md                           | 19 ++-----------------
 generals/remote/generalsio_state.py |  2 ++
 2 files changed, 4 insertions(+), 17 deletions(-)

diff --git a/README.md b/README.md
index 2071cd2..4279c20 100644
--- a/README.md
+++ b/README.md
@@ -52,23 +52,7 @@ that has the signature explained in sections down below.
 
 ### Usage Example (🤸 Gymnasium)
 The example loop for running the game looks like this
-```python
-import gymnasium as gym
-from generals.agents import RandomAgent, ExpanderAgent # import your agent
-
-# Initialize agents
-agent = RandomAgent()
-npc = ExpanderAgent()
-
-# Create environment
-env = gym.make("gym-generals-v0", agent=agent, npc=npc, render_mode="human")
-
-observation, info = env.reset()
-terminated = truncated = False
-while not (terminated or truncated):
-    action = agent.act(observation)
-    observation, reward, terminated, truncated, info = env.step(action)
-    env.render()
+```python:examples/gymnasium_example.py
 ```
 
 > [!TIP]
@@ -174,6 +158,7 @@ The `observation` is a `Dict`. Values are either `numpy` matrices with shape `(N
 | `opponent_land_count`|     —     | Number of cells owned by the opponent                                        |
 | `opponent_army_count`|     —     | Total number of units owned by the opponent                                  |
 | `timestep`           |     —     | Current timestep of the game                                                 |
+| `priority`           |     —     | `1` if your move is evaluted first, `0` otherwise                            |
 
 The `action_mask` is a 3D array with shape `(N, M, 4)`, where each element corresponds to whether a move is valid from cell
 `[i, j]` in one of four directions: `0 (up)`, `1 (down)`, `2 (left)`, or `3 (right)`.
diff --git a/generals/remote/generalsio_state.py b/generals/remote/generalsio_state.py
index 9048e31..122473b 100644
--- a/generals/remote/generalsio_state.py
+++ b/generals/remote/generalsio_state.py
@@ -63,6 +63,7 @@ def get_observation(self) -> Observation:
         opponent_land_count = self.scores[self.opponent_index]["tiles"]
         opponent_army_count = self.scores[self.opponent_index]["total"]
         timestep = self.turn
+        priority = 1 if self.player_index == 0 else 0
 
         return Observation(
             armies=army,
@@ -79,4 +80,5 @@ def get_observation(self) -> Observation:
             opponent_land_count=opponent_land_count,
             opponent_army_count=opponent_army_count,
             timestep=timestep,
+            priority=priority,
         )

From 1e7bc6e3360261911e2de8f67d52e9a6dac025ad Mon Sep 17 00:00:00 2001
From: Matej Straka <strakammm@gmail.com>
Date: Tue, 29 Oct 2024 12:49:27 +0100
Subject: [PATCH 3/3] chore: Fix readme example

---
 README.md | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 4279c20..813d93d 100644
--- a/README.md
+++ b/README.md
@@ -52,7 +52,24 @@ that has the signature explained in sections down below.
 
 ### Usage Example (🤸 Gymnasium)
 The example loop for running the game looks like this
-```python:examples/gymnasium_example.py
+```python
+import gymnasium as gym
+
+from generals.agents import RandomAgent, ExpanderAgent
+
+# Initialize agents
+agent = RandomAgent()
+npc = ExpanderAgent()
+
+# Create environment
+env = gym.make("gym-generals-v0", agent=agent, npc=npc, render_mode="human")
+
+observation, info = env.reset()
+terminated = truncated = False
+while not (terminated or truncated):
+    action = agent.act(observation)
+    observation, reward, terminated, truncated, info = env.step(action)
+    env.render()
 ```
 
 > [!TIP]