diff --git a/README.md b/README.md index a50afa8..7af309f 100644 --- a/README.md +++ b/README.md @@ -159,19 +159,19 @@ An observation for one agent is a dictionary `{"observation": observation, "acti The `observation` is a `Dict`. Values are either `numpy` matrices with shape `(N,M)`, or simple `int` constants: | Key | Shape | Description | | -------------------- | --------- | ---------------------------------------------------------------------------- | -| `army` | `(N,M)` | Number of units in a cell regardless of the owner | -| `general` | `(N,M)` | Mask indicating cells containing a general | -| `city` | `(N,M)` | Mask indicating cells containing a city | -| `visible_cells` | `(N,M)` | Mask indicating cells that are visible to the agent | -| `owned_cells` | `(N,M)` | Mask indicating cells owned by the agent | -| `opponent_cells` | `(N,M)` | Mask indicating cells owned by the opponent | -| `neutral_cells` | `(N,M)` | Mask indicating cells that are not owned by any agent | -| `structures_in_fog` | `(N,M)` | Mask indicating whether cells contain cities or mountains (in fog) | +| `armies` | `(N,M)` | Number of units in a visible cell regardless of the owner | +| `generals` | `(N,M)` | Mask indicating visible cells containing a general | +| `cities` | `(N,M)` | Mask indicating visible cells containing a city | +| `mountains` | `(N,M)` | Mask indicating visible cells containing mountains | +| `neutral_cells` | `(N,M)` | Mask indicating visible cells that are not owned by any agent | +| `owned_cells` | `(N,M)` | Mask indicating visible cells owned by the agent | +| `opponent_cells` | `(N,M)` | Mask indicating visible cells owned by the opponent | +| `fog_cells` | `(N,M)` | Mask indicating fog cells that are not mountains or cities | +| `structures_in_fog` | `(N,M)` | Mask showing cells containing either cities or mountains in fog | | `owned_land_count` | — | Number of cells the agent owns | | `owned_army_count` | — | Total number of units owned by the agent | | `opponent_land_count`| — | Number of cells owned by the opponent | | `opponent_army_count`| — | Total number of units owned by the opponent | -| `is_winner` | — | Indicates whether the agent won | | `timestep` | — | Current timestep of the game | The `action_mask` is a 3D array with shape `(N, M, 4)`, where each element corresponds to whether a move is valid from cell diff --git a/generals/agents/expander_agent.py b/generals/agents/expander_agent.py index 764d579..52ba32b 100644 --- a/generals/agents/expander_agent.py +++ b/generals/agents/expander_agent.py @@ -1,6 +1,6 @@ import numpy as np -from generals.core.config import Action, Direction +from generals.core.game import Action, Direction from generals.core.observation import Observation from .agent import Agent diff --git a/generals/core/config.py b/generals/core/config.py index 8ad6b91..73cd592 100644 --- a/generals/core/config.py +++ b/generals/core/config.py @@ -1,17 +1,6 @@ -from collections.abc import Callable from enum import Enum, IntEnum, StrEnum from importlib.resources import files -from typing import Any, Literal, TypeAlias - -import numpy as np - -# Type aliases -Action: TypeAlias = dict[str, int | np.ndarray] -Info: TypeAlias = dict[str, Any] - -Reward: TypeAlias = float -RewardFn: TypeAlias = Callable[["Observation", Action, bool, Info], Reward] -AgentID: TypeAlias = str +from typing import Literal # Game Literals PASSABLE: Literal["."] = "." diff --git a/generals/core/game.py b/generals/core/game.py index 9f25c37..feda084 100644 --- a/generals/core/game.py +++ b/generals/core/game.py @@ -1,13 +1,17 @@ -from typing import Any +from typing import Any, TypeAlias import gymnasium as gym import numpy as np from .channels import Channels -from .config import DIRECTIONS, Action, Info +from .config import DIRECTIONS from .grid import Grid from .observation import Observation +# Type aliases +Action: TypeAlias = dict[str, int | np.ndarray] +Info: TypeAlias = dict[str, Any] + class Game: def __init__(self, grid: Grid, agents: list[str]): @@ -42,10 +46,10 @@ def __init__(self, grid: Grid, agents: list[str]): "generals": grid_multi_binary, "cities": grid_multi_binary, "mountains": grid_multi_binary, + "neutral_cells": grid_multi_binary, "owned_cells": grid_multi_binary, "opponent_cells": grid_multi_binary, - "neutral_cells": grid_multi_binary, - "visible_cells": grid_multi_binary, + "fog_cells": grid_multi_binary, "structures_in_fog": grid_multi_binary, "owned_land_count": gym.spaces.Discrete(self.max_army_value), "owned_army_count": gym.spaces.Discrete(self.max_army_value), @@ -211,10 +215,11 @@ def agent_observation(self, agent: str) -> Observation: mountains = self.channels.mountains * visible generals = self.channels.generals * visible cities = self.channels.cities * visible + neutral_cells = self.channels.ownership_neutral * visible owned_cells = self.channels.ownership[agent] * visible opponent_cells = self.channels.ownership[opponent] * visible - neutral_cells = self.channels.ownership_neutral * visible structures_in_fog = invisible * (self.channels.mountains + self.channels.cities) + fog_cells = invisible - structures_in_fog owned_land_count = scores[agent]["land"] owned_army_count = scores[agent]["army"] opponent_land_count = scores[opponent]["land"] @@ -226,17 +231,17 @@ def agent_observation(self, agent: str) -> Observation: generals=generals, cities=cities, mountains=mountains, + neutral_cells=neutral_cells, owned_cells=owned_cells, opponent_cells=opponent_cells, - neutral_cells=neutral_cells, - visible_cells=visible, + fog_cells=fog_cells, structures_in_fog=structures_in_fog, owned_land_count=owned_land_count, owned_army_count=owned_army_count, opponent_land_count=opponent_land_count, opponent_army_count=opponent_army_count, timestep=timestep, - ).as_dict() + ) def agent_won(self, agent: str) -> bool: """ diff --git a/generals/core/observation.py b/generals/core/observation.py index ecd69ec..cbd104b 100644 --- a/generals/core/observation.py +++ b/generals/core/observation.py @@ -10,10 +10,10 @@ def __init__( generals: np.ndarray, cities: np.ndarray, mountains: np.ndarray, + neutral_cells: np.ndarray, owned_cells: np.ndarray, opponent_cells: np.ndarray, - neutral_cells: np.ndarray, - visible_cells: np.ndarray, + fog_cells: np.ndarray, structures_in_fog: np.ndarray, owned_land_count: int, owned_army_count: int, @@ -25,16 +25,17 @@ def __init__( self.generals = generals self.cities = cities self.mountains = mountains + self.neutral_cells = neutral_cells self.owned_cells = owned_cells self.opponent_cells = opponent_cells - self.neutral_cells = neutral_cells - self.visible_cells = visible_cells + self.fog_cells = fog_cells self.structures_in_fog = structures_in_fog self.owned_land_count = owned_land_count self.owned_army_count = owned_army_count self.opponent_land_count = opponent_land_count self.opponent_army_count = opponent_army_count self.timestep = timestep + # armies, generals, cities, mountains, empty, owner, fogged, structure in fog def action_mask(self) -> np.ndarray: """ @@ -86,10 +87,10 @@ def as_dict(self, with_mask=True): "generals": self.generals, "cities": self.cities, "mountains": self.mountains, + "neutral_cells": self.neutral_cells, "owned_cells": self.owned_cells, "opponent_cells": self.opponent_cells, - "neutral_cells": self.neutral_cells, - "visible_cells": self.visible_cells, + "fog_cells": self.fog_cells, "structures_in_fog": self.structures_in_fog, "owned_land_count": self.owned_land_count, "owned_army_count": self.owned_army_count, diff --git a/generals/envs/gymnasium_generals.py b/generals/envs/gymnasium_generals.py index 793924f..4a38844 100644 --- a/generals/envs/gymnasium_generals.py +++ b/generals/envs/gymnasium_generals.py @@ -1,10 +1,10 @@ +from collections.abc import Callable from copy import deepcopy -from typing import Any, SupportsFloat +from typing import Any, SupportsFloat, TypeAlias import gymnasium as gym from generals.agents import Agent, AgentFactory -from generals.core.config import Reward, RewardFn from generals.core.game import Action, Game, Info from generals.core.grid import GridFactory from generals.core.observation import Observation @@ -12,6 +12,9 @@ from generals.gui import GUI from generals.gui.properties import GuiMode +Reward: TypeAlias = float +RewardFn: TypeAlias = Callable[[Observation, Action, bool, Info], Reward] + class GymnasiumGenerals(gym.Env): metadata = { @@ -91,20 +94,21 @@ def reset( self.observation_space = self.game.observation_space self.action_space = self.game.action_space - observation = self.game.agent_observation(self.agent_id) + observation = self.game.agent_observation(self.agent_id).as_dict() info: dict[str, Any] = {} return observation, info def step(self, action: Action) -> tuple[Observation, SupportsFloat, bool, bool, dict[str, Any]]: # Get action of NPC - npc_observation = self.game.agent_observation(self.npc.id) + npc_observation = self.game.agent_observation(self.npc.id).as_dict() npc_action = self.npc.act(npc_observation) actions = {self.agent_id: action, self.npc.id: npc_action} observations, infos = self.game.step(actions) infos = {agent_id: {} for agent_id in self.agent_ids} + # From observations of all agents, pick only those relevant for the main agent - obs = observations[self.agent_id] + obs = observations[self.agent_id].as_dict() info = infos[self.agent_id] reward = self.reward_fn(obs, action, self.game.is_done(), info) terminated = self.game.is_done() @@ -127,14 +131,7 @@ def _default_reward( done: bool, info: Info, ) -> Reward: - """ - Give 0 if game still running, otherwise 1 for winner and -1 for loser. - """ - if done: - reward = 1 if observation["observation"]["is_winner"] else -1 - else: - reward = 0 - return reward + return 0 def close(self) -> None: if self.render_mode == "human": diff --git a/generals/envs/pettingzoo_generals.py b/generals/envs/pettingzoo_generals.py index bd550d0..d04c1f3 100644 --- a/generals/envs/pettingzoo_generals.py +++ b/generals/envs/pettingzoo_generals.py @@ -1,18 +1,22 @@ import functools +from collections.abc import Callable from copy import deepcopy -from typing import Any +from typing import Any, TypeAlias import pettingzoo # type: ignore from gymnasium import spaces from generals.agents.agent import Agent -from generals.core.config import AgentID, Reward, RewardFn from generals.core.game import Action, Game, Info, Observation from generals.core.grid import GridFactory from generals.core.replay import Replay from generals.gui import GUI from generals.gui.properties import GuiMode +AgentID: TypeAlias = str +Reward: TypeAlias = float +RewardFn: TypeAlias = Callable[[Observation, Action, bool, Info], Reward] + class PettingZooGenerals(pettingzoo.ParallelEnv): metadata: dict[str, Any] = { @@ -82,7 +86,7 @@ def reset( elif hasattr(self, "replay"): del self.replay - observations = self.game.get_all_observations() + observations = {agent: self.game.agent_observation(agent).as_dict() for agent in self.agents} infos: dict[str, Any] = {agent: {} for agent in self.agents} return observations, infos @@ -96,6 +100,7 @@ def step( dict[AgentID, Info], ]: observations, infos = self.game.step(actions) + observations = {agent: observation.as_dict() for agent, observation in observations.items()} # You probably want to set your truncation based on self.game.time truncation = False if self.truncation is None else self.game.time >= self.truncation truncated = {agent: truncation for agent in self.agents} @@ -128,14 +133,7 @@ def _default_reward( done: bool, info: Info, ) -> Reward: - """ - Give 0 if game still running, otherwise 1 for winner and -1 for loser. - """ - if done: - reward = 1 if observation["observation"]["is_winner"] else -1 - else: - reward = 0 - return reward + return 0 def close(self) -> None: if self.render_mode == "human": diff --git a/generals/remote/generalsio_client.py b/generals/remote/generalsio_client.py index 00bc7ac..2a4b242 100644 --- a/generals/remote/generalsio_client.py +++ b/generals/remote/generalsio_client.py @@ -1,9 +1,8 @@ import numpy as np -from scipy.ndimage import maximum_filter # type: ignore from socketio import SimpleClient # type: ignore from generals.agents.agent import Agent -from generals.core.game import Direction +from generals.core.config import Direction from generals.core.observation import Observation DIRECTIONS = [Direction.UP, Direction.DOWN, Direction.LEFT, Direction.RIGHT] @@ -36,7 +35,7 @@ def __str__(self) -> str: def apply_diff(old: list[int], diff: list[int]) -> list[int]: i = 0 - new = [] + new: list[int] = [] while i < len(diff): if diff[i] > 0: # matching new.extend(old[len(new) : len(new) + diff[i]]) @@ -68,8 +67,8 @@ def __init__(self, data: dict): self.n_players = len(self.usernames) - self.map = [] - self.cities = [] + self.map: list[int] = [] + self.cities: list[int] = [] def update(self, data: dict) -> None: self.turn = data["turn"] @@ -100,7 +99,7 @@ def get_observation(self) -> "Observation": opponent_cells = np.where(terrain == self.opponent_index, 1, 0) neutral_cells = np.where(terrain == -1, 1, 0) mountain_cells = np.where(terrain == -2, 1, 0) - visible_cells = maximum_filter(np.where(terrain == self.player_index, 1, 0), size=3) + fog_cells = np.where(terrain == -3, 1, 0) structures_in_fog = np.where(terrain == -4, 1, 0) owned_land_count = self.scores[self.player_index]["tiles"] owned_army_count = self.scores[self.player_index]["total"] @@ -113,10 +112,10 @@ def get_observation(self) -> "Observation": generals=generals, cities=cities, mountains=mountain_cells, + neutral_cells=neutral_cells, owned_cells=owned_cells, opponent_cells=opponent_cells, - neutral_cells=neutral_cells, - visible_cells=visible_cells, + fog_cells=fog_cells, structures_in_fog=structures_in_fog, owned_land_count=owned_land_count, owned_army_count=owned_army_count, @@ -210,8 +209,8 @@ def _play_game(self) -> None: # This code here should be made way prettier, its just POC action = self.agent.act(obs) if not action["pass"]: - source = action["cell"] - direction = DIRECTIONS[action["direction"]].value + source: np.ndarray = np.array(action["cell"]) + direction = np.array(DIRECTIONS[action["direction"]].value) split = action["split"] destination = source + direction # convert to index