From 0b87deaf6d91480e8a730a27717d5a81606ee2c3 Mon Sep 17 00:00:00 2001 From: Matej Straka Date: Tue, 29 Oct 2024 11:30:37 +0100 Subject: [PATCH 1/3] fix: Align priority moves with generalsio --- generals/core/game.py | 12 ++++++++++-- generals/core/observation.py | 3 +++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/generals/core/game.py b/generals/core/game.py index feda084..20769c0 100644 --- a/generals/core/game.py +++ b/generals/core/game.py @@ -17,6 +17,7 @@ class Game: def __init__(self, grid: Grid, agents: list[str]): # Agents self.agents = agents + self.agent_order = self.agents[:] # Grid _grid = grid.grid @@ -56,6 +57,7 @@ def __init__(self, grid: Grid, agents: list[str]): "opponent_land_count": gym.spaces.Discrete(self.max_army_value), "opponent_army_count": gym.spaces.Discrete(self.max_army_value), "timestep": gym.spaces.Discrete(self.max_timestep), + "priority": gym.spaces.Discrete(2), } ), "action_mask": gym.spaces.MultiBinary(self.grid_dims + (4,)), @@ -99,8 +101,9 @@ def step(self, actions: dict[str, Action]) -> tuple[dict[str, Observation], dict continue moves[agent] = (i, j, direction, army_to_move) - # Evaluate moves (smaller army movements are prioritized) - for agent in sorted(moves, key=lambda x: moves[x][3]): + for agent in self.agent_order: + if agent not in moves: + continue si, sj, direction, army_to_move = moves[agent] # Cap the amount of army to move (previous moves may have lowered available army) @@ -135,6 +138,9 @@ def step(self, actions: dict[str, Action]) -> tuple[dict[str, Observation], dict if square_winner != target_square_owner: self.channels.ownership[target_square_owner][di, dj] = 0 + # Swap agent order (because priority is alternating) + self.agent_order = self.agent_order[::-1] + if not done_before_actions: self.time += 1 @@ -225,6 +231,7 @@ def agent_observation(self, agent: str) -> Observation: opponent_land_count = scores[opponent]["land"] opponent_army_count = scores[opponent]["army"] timestep = self.time + priority = 1 if agent == self.agents[0] else 0 return Observation( armies=armies, @@ -241,6 +248,7 @@ def agent_observation(self, agent: str) -> Observation: opponent_land_count=opponent_land_count, opponent_army_count=opponent_army_count, timestep=timestep, + priority=priority, ) def agent_won(self, agent: str) -> bool: diff --git a/generals/core/observation.py b/generals/core/observation.py index cbd104b..b7aff2d 100644 --- a/generals/core/observation.py +++ b/generals/core/observation.py @@ -20,6 +20,7 @@ def __init__( opponent_land_count: int, opponent_army_count: int, timestep: int, + priority: int = 0, ): self.armies = armies self.generals = generals @@ -35,6 +36,7 @@ def __init__( self.opponent_land_count = opponent_land_count self.opponent_army_count = opponent_army_count self.timestep = timestep + self.priority = priority # armies, generals, cities, mountains, empty, owner, fogged, structure in fog def action_mask(self) -> np.ndarray: @@ -97,6 +99,7 @@ def as_dict(self, with_mask=True): "opponent_land_count": self.opponent_land_count, "opponent_army_count": self.opponent_army_count, "timestep": self.timestep, + "priority": self.priority, } if with_mask: obs = { From 5000702a99983a540215cb7b74327caff55f932f Mon Sep 17 00:00:00 2001 From: Matej Straka Date: Tue, 29 Oct 2024 11:41:47 +0100 Subject: [PATCH 2/3] fix: Make online observations include priority --- README.md | 19 ++----------------- generals/remote/generalsio_state.py | 2 ++ 2 files changed, 4 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 2071cd2..4279c20 100644 --- a/README.md +++ b/README.md @@ -52,23 +52,7 @@ that has the signature explained in sections down below. ### Usage Example (🤸 Gymnasium) The example loop for running the game looks like this -```python -import gymnasium as gym -from generals.agents import RandomAgent, ExpanderAgent # import your agent - -# Initialize agents -agent = RandomAgent() -npc = ExpanderAgent() - -# Create environment -env = gym.make("gym-generals-v0", agent=agent, npc=npc, render_mode="human") - -observation, info = env.reset() -terminated = truncated = False -while not (terminated or truncated): - action = agent.act(observation) - observation, reward, terminated, truncated, info = env.step(action) - env.render() +```python:examples/gymnasium_example.py ``` > [!TIP] @@ -174,6 +158,7 @@ The `observation` is a `Dict`. Values are either `numpy` matrices with shape `(N | `opponent_land_count`| — | Number of cells owned by the opponent | | `opponent_army_count`| — | Total number of units owned by the opponent | | `timestep` | — | Current timestep of the game | +| `priority` | — | `1` if your move is evaluted first, `0` otherwise | The `action_mask` is a 3D array with shape `(N, M, 4)`, where each element corresponds to whether a move is valid from cell `[i, j]` in one of four directions: `0 (up)`, `1 (down)`, `2 (left)`, or `3 (right)`. diff --git a/generals/remote/generalsio_state.py b/generals/remote/generalsio_state.py index 9048e31..122473b 100644 --- a/generals/remote/generalsio_state.py +++ b/generals/remote/generalsio_state.py @@ -63,6 +63,7 @@ def get_observation(self) -> Observation: opponent_land_count = self.scores[self.opponent_index]["tiles"] opponent_army_count = self.scores[self.opponent_index]["total"] timestep = self.turn + priority = 1 if self.player_index == 0 else 0 return Observation( armies=army, @@ -79,4 +80,5 @@ def get_observation(self) -> Observation: opponent_land_count=opponent_land_count, opponent_army_count=opponent_army_count, timestep=timestep, + priority=priority, ) From 1e7bc6e3360261911e2de8f67d52e9a6dac025ad Mon Sep 17 00:00:00 2001 From: Matej Straka Date: Tue, 29 Oct 2024 12:49:27 +0100 Subject: [PATCH 3/3] chore: Fix readme example --- README.md | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 4279c20..813d93d 100644 --- a/README.md +++ b/README.md @@ -52,7 +52,24 @@ that has the signature explained in sections down below. ### Usage Example (🤸 Gymnasium) The example loop for running the game looks like this -```python:examples/gymnasium_example.py +```python +import gymnasium as gym + +from generals.agents import RandomAgent, ExpanderAgent + +# Initialize agents +agent = RandomAgent() +npc = ExpanderAgent() + +# Create environment +env = gym.make("gym-generals-v0", agent=agent, npc=npc, render_mode="human") + +observation, info = env.reset() +terminated = truncated = False +while not (terminated or truncated): + action = agent.act(observation) + observation, reward, terminated, truncated, info = env.step(action) + env.render() ``` > [!TIP]