Merge pull request #79 from strakam/registry-creation

Registry creation
strakam · Oct 6, 2024 · d7aa16c · d7aa16c
2 parents 1aed393 + e98ecf1
commit d7aa16c
Show file tree

Hide file tree

Showing 21 changed files with 277 additions and 151 deletions.
diff --git a/Makefile b/Makefile
@@ -11,12 +11,12 @@ gym:
 
 # Create new replay and run it
 make n_replay:
-	python3 -m examples.complete_example
-	python3 -m examples.replay_example
+	python3 -m examples.record_replay_example
+	python3 -m examples.show_replay_example
 
 # Run existing replay
 replay:
-	python3 -m examples.replay_example
+	python3 -m examples.show_replay_example
 
 ###################
 # Developer tools #
@@ -25,7 +25,6 @@ at:
 	pytest tests/test_game.py
 	pytest tests/test_map.py
 	python3 tests/gym_check.py
-	python3 tests/sb3_check.py
 
 test_performance:
 	python3 -m tests.parallel_api_check

diff --git a/README.md b/README.md
@@ -45,27 +45,30 @@ pip install -e .
 ## Usage example (🤸 Gymnasium)
 
 ```python
-from generals import gym_generals
-from generals.agents import RandomAgent, ExpanderAgent
+import gymnasium as gym
+from generals import AgentFactory
 
 # Initialize agents
-agent = RandomAgent()
-npc = ExpanderAgent()
+agent = AgentFactory.make_agent("expander")
+npc = AgentFactory.make_agent("random")
+
+env = gym.make(
+    "gym-generals-v0",
+    agent=agent,
+    npc=npc,
+    render_mode="human",
+)
 
-# Create environment -- render modes: {None, "human"}
-env = gym_generals(agent=agent, npc=npc, render_mode="human")
 observation, info = env.reset()
 
-done = False
-
-while not done:
-    action = agent.play(observation)
+terminated = truncated = False
+while not (terminated or truncated):
+    action = agent.act(observation)
     observation, reward, terminated, truncated, info = env.step(action)
-    done = terminated or truncated
-    env.render(fps=6)
+    env.render()
 ```
-You can also check an example for 🦁[PettingZoo](./examples/pettingzoo_example.py) or more extensive
-example [here](./examples/complete_example.py).
+You can also check an example for 🦁[PettingZoo](./examples/pettingzoo_example.py) or
+an example with commentary showcasing various features [here](./examples/complete_example.py).
 
 ## 🚀 Getting Started
 Creating your first agent is very simple. 
@@ -84,7 +87,7 @@ Creating your first agent is very simple.
 Grids are generated via `GridFactory`. You can instantiate the class with desired grid properties, and it will generate
 grid with these properties for each run.
 ```python
-from generals import pz_generals
+import gymnasium as gym
 from generals import GridFactory
 
 grid_factory = GridFactory(
@@ -95,16 +98,17 @@ grid_factory = GridFactory(
 )
 
 # Create environment
-env = pz_generals(grid_factory=grid_factory, ...)
+env = gym.make(
+    "gym-generals-v0",
+    grid_factory=grid_factory,
+    ...
+)
 ```
 You can also specify grids manually, as a string via `options` dict:
 ```python
-from generals import pz_generals
-from generals import GridFactory
-
-grid_factory = GridFactory()
-env = pz_generals(grid_factory=grid_factory, ...)
+import gymnasium as gym
 
+env = gym.make("gym-generals-v0", ...)
 grid = """
 .3.#
 #..A
@@ -127,10 +131,11 @@ Grids are encoded using these symbols:
 We can store replays and then analyze them. `Replay` class handles replay related functionality.
 ### Storing a replay
 ```python
-from generals import pz_generals
+import gymnasium as gym
+
+env = gym.make("gym-generals-v0", ...)
 
 options = {"replay": "my_replay"}
-env = pz_generals(...)
 env.reset(options=options) # The next game will be encoded in my_replay.pkl
 ```
 
@@ -200,6 +205,6 @@ def custom_reward_fn(observation, action, done, info):
     # Give agent a reward based on the number of cells they own
     return observation["observation"]["owned_land_count"]
 
-env = pz_generals(reward_fn=custom_reward_fn)
+env = gym.make(..., reward_fn=custom_reward_fn)
 observations, info = env.reset()
 ```
diff --git a/examples/complete_example.py b/examples/complete_example.py
@@ -1,47 +1,50 @@
-from generals import pz_generals
-from generals.agents import RandomAgent, ExpanderAgent
-from generals import GridFactory
-
-# Initialize agents - their names are then called for actions
-randomer = RandomAgent("Random1", color=(255, 125, 0))
-expander = ExpanderAgent("BigBoy")
-
-agents = {
-    randomer.name: randomer,
-    expander.name: expander,
-}
+import gymnasium as gym
+from generals import AgentFactory, GridFactory
+
+# Initialize agents -- see generals/agents/agent_factory.py for more options
+agent = AgentFactory.make_agent("expander")
+npc = AgentFactory.make_agent("random")
+
+# Initialize grid factory
+grid_factory = GridFactory(
+    grid_dims=(10, 10),                 # Grid height and width
+    mountain_density=0.2,               # Expected percentage of mountains
+    city_density=0.05,                  # Expected percentage of cities
+    general_positions=[(1, 2), (7, 8)], # Positions of the generals
+    seed=38                             # Seed to generate the same map every time
+)
 
-gf = GridFactory(
-    grid_dims=(4, 8), # height x width
-    mountain_density=0.2,
-    city_density=0.05,
-    general_positions=[(0, 0), (3, 3)],
+env = gym.make(
+    "gym-generals-v0",          # Environment name
+    grid_factory=grid_factory,  # Grid factory
+    agent=agent,                # Your agent (used to get metadata like name and color)
+    npc=npc,                    # NPC that will play against the agent
+    render_mode="human",        # "human" mode is for rendering, None is for no rendering
 )
 
-# Custom map that will override GridFactory for this game
-map = """
-A..#
-.#3#
-...#
-##B#
+# We can draw custom maps - see symbol explanations in README
+grid = """
+..#...##..
+..A.#..4..
+.3...1....
+...###....
+####...9.B
+...###....
+.2...5....
+....#..6..
+..#...##..
 """
 
-# Create environment
-env = pz_generals(gf, agents, render_mode=None) # Disable rendering
-
+# Options are used only for the next game
 options = {
-    "grid": map,
-    "replay_file": "replay",
+    "replay_file": "my_replay", # Save replay as my_replay.pkl
+    "grid": grid                # Use the custom map
 }
 
-observations, info = env.reset(options=options)
-done = False
+observation, info = env.reset(options=options)
 
-while not done:
-    actions = {}
-    for agent in env.agents:
-        # Ask agent for action
-        actions[agent] = agents[agent].play(observations[agent])
-    # All agents perform their actions
-    observations, rewards, terminated, truncated, info = env.step(actions)
-    done = any(terminated.values())
+terminated = truncated = False
+while not (terminated or truncated):
+    action = agent.act(observation)
+    observation, reward, terminated, truncated, info = env.step(action)
+    env.render()
diff --git a/examples/gymnasium_example.py b/examples/gymnasium_example.py
@@ -1,18 +1,21 @@
-from generals import gym_generals
-from generals.agents import RandomAgent, ExpanderAgent
+import gymnasium as gym
+from generals import AgentFactory
 
 # Initialize agents
-agent = RandomAgent()
-npc = ExpanderAgent()
+agent = AgentFactory.make_agent("expander")
+npc = AgentFactory.make_agent("random")
 
-# Create environment -- render modes: {None, "human"}
-env = gym_generals(agent=agent, npc=npc, render_mode="human")
-observation, info = env.reset()
+env = gym.make(
+    "gym-generals-v0",
+    agent=agent,
+    npc=npc,
+    render_mode="human",
+)
 
-done = False
+observation, info = env.reset()
 
-while not done:
-    action = agent.play(observation)
+terminated = truncated = False
+while not (terminated or truncated):
+    action = agent.act(observation)
     observation, reward, terminated, truncated, info = env.step(action)
-    done = terminated or truncated
-    env.render(fps=6)
+    env.render()
diff --git a/examples/pettingzoo_example.py b/examples/pettingzoo_example.py
@@ -1,17 +1,18 @@
-from generals import pz_generals
-from generals.agents import ExpanderAgent, RandomAgent
+import gymnasium as gym
+from generals.agents import AgentFactory
 
 # Initialize agents
-random = RandomAgent()
-expander = ExpanderAgent()
+random = AgentFactory.make_agent("random")
+expander = AgentFactory.make_agent("expander")
 
 agents = {
     random.name: random,
     expander.name: expander,
 }  # Environment calls agents by name
 
 # Create environment -- render modes: {None, "human"}
-env = pz_generals(agents=agents, render_mode="human")
+# env = pz_generals(agents=agents, render_mode="human")D
+env = gym.make("pz-generals-v0", agents=agents, render_mode="human")
 observations, info = env.reset()
 
 done = False
@@ -20,7 +21,7 @@
     actions = {}
     for agent in env.agents:
         # Ask agent for action
-        actions[agent] = agents[agent].play(observations[agent])
+        actions[agent] = agents[agent].act(observations[agent])
     # All agents perform their actions
     observations, rewards, terminated, truncated, info = env.step(actions)
     done = any(terminated.values()) or any(truncated.values())

diff --git a/examples/record_replay_example.py b/examples/record_replay_example.py
@@ -0,0 +1,35 @@
+import gymnasium as gym
+from generals import AgentFactory, GridFactory
+
+# Initialize agents -- see generals/agents/agent_factory.py for more options
+agent = AgentFactory.make_agent("expander")
+npc = AgentFactory.make_agent("random")
+
+# Initialize grid factory
+grid_factory = GridFactory(
+    grid_dims=(5, 5),                   # Grid height and width
+    mountain_density=0.2,               # Expected percentage of mountains
+    city_density=0.05,                  # Expected percentage of cities
+    general_positions=[(1, 2), (3, 4)], # Positions of the generals
+    seed=38                             # Seed to generate the same map every time
+)
+
+env = gym.make(
+    "gym-generals-v0",          # Environment name
+    grid_factory=grid_factory,  # Grid factory
+    agent=agent,                # Your agent (used to get metadata like name and color)
+    npc=npc,                    # NPC that will play against the agent
+)
+
+# Options are used only for the next game
+options = {
+    "replay_file": "my_replay", # Save replay as my_replay.pkl
+}
+
+observation, info = env.reset(options=options)
+
+terminated = truncated = False
+while not (terminated or truncated):
+    action = agent.act(observation)
+    observation, reward, terminated, truncated, info = env.step(action)
+    env.render()
diff --git a/examples/replay_example.py → examples/show_replay_example.py b/examples/replay_example.py → examples/show_replay_example.py
@@ -1,4 +1,4 @@
 from generals import Replay
 
-replay = Replay.load("replay.pkl")
+replay = Replay.load("my_replay.pkl")
 replay.play()
diff --git a/generals/__init__.py b/generals/__init__.py
@@ -1,6 +1,29 @@
 from .core.grid import GridFactory, Grid
-from .envs.env import pz_generals, gym_generals
 from .core.replay import Replay
+from .agents.agent_factory import AgentFactory
+from gymnasium.envs.registration import register
 
 
-__all__ = ['GridFactory', 'Grid', 'Replay', pz_generals, gym_generals]
+__all__ = [
+    "AgentFactory",
+    "GridFactory",
+    "Grid",
+    "Replay",
+]
+
+
+def _register_generals_envs():
+    register(
+        id="gym-generals-v0",
+        entry_point="generals.envs.env:gym_generals_v0",
+    )
+
+    register(
+        id="pz-generals-v0",
+        entry_point="generals.envs.env:pz_generals_v0",
+        disable_env_checker=True,
+    )
+
+
+
+_register_generals_envs()
diff --git a/generals/agents/__init__.py b/generals/agents/__init__.py
@@ -1,8 +1,7 @@
 # agents/__init__.py
 
-from .random_agent import RandomAgent
-from .expander_agent import ExpanderAgent
 from .agent import Agent
+from .agent_factory import AgentFactory
 
 # You can also define an __all__ list if you want to restrict what gets imported with *
-__all__ = ["Agent", "RandomAgent", "ExpanderAgent"]
+__all__ = ["Agent", "AgentFactory"]
diff --git a/generals/agents/agent.py b/generals/agents/agent.py
@@ -6,12 +6,12 @@ class Agent(ABC):
     Base class for all agents.
     """
 
-    def __init__(self, name, color):
+    def __init__(self, name="Agent", color=(67, 70, 86)):
         self.name = name
         self.color = color
 
     @abstractmethod
-    def play(self, observation):
+    def act(self, observation):
         """
         This method should be implemented by the child class.
         It should receive an observation and return an action.
@@ -28,3 +28,11 @@ def reset(self):
 
     def __str__(self):
         return self.name
+
+
+class EmptyAgent(Agent):
+    def act(self, observation):
+        return None
+
+    def reset(self):
+        pass