From b000dbfb75f03c167ac17beb45d591b6b5c42ed0 Mon Sep 17 00:00:00 2001 From: elliottower Date: Wed, 1 Feb 2023 04:00:03 -0500 Subject: [PATCH] Cleaned DQN tianshou code, updated command line arguments to be consistent, updated README --- README.md | 36 +++++++++++-------- gobblet/examples/example_DQN_tianshou.py | 45 ++++++++++++++++++------ gobblet/examples/example_basic.py | 4 ++- gobblet/examples/example_record_game.py | 4 +-- gobblet/examples/example_user_input.py | 7 ++-- gobblet/game/manual_policy.py | 2 ++ pyproject.toml | 2 +- 7 files changed, 66 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index afd17e0..0d452cb 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,8 @@ Interactive Multi-Agent Reinforcement Learning Environment for the [Gobblet](https://themindcafe.com.sg/wp-content/uploads/2018/07/Gobblet-Gobblers.pdf) board game using [PettingZoo](https://github.com/PettingZoo-Team/PettingZoo) and [Pygame](https://github.com/pygame/pygame). +Allows for users to play in the same environment, and even play against RL agents trained with [Tianshou](https://github.com/thu-ml/tianshou). +

Light         @@ -39,6 +41,16 @@ from gobblet import gobblet_v1 env = gobblet_v1.env() ``` +### Play against a DQL agent trained with Tianshou + +In the terminal, run the following: +``` +python gobblet/example_DQN_tianshou.py --epoch 50 --player 1 --cpu-players 2 +``` + +This will train a [DQN](https://tianshou.readthedocs.io/en/master/tutorials/dqn.html) model from Tianshou for 50 epochs, and launch an interactive game against the pre-trained agent. + +Use the argument ``--cpu-player`` to determine the number of CPU agents (1 or 2) and ``--player`` to choose which agent goes first (human first: 0, CPU first: 1) ### Play an interactive game @@ -47,30 +59,23 @@ In the terminal, run the following: python gobblet/examples/example_user_input.py" ``` To select a piece size, press a number key `1`, `2`, or `3`, or press `space` to cycle through pieces. Placing a piece is done by clicking on a square on the board. A preview will appear showing legal moves with the selected piece size. Clicking on an already placed piece will pick it up and prompt you to place it in a new location (re-placing in the same location is an illegal move). -### Screen recording of a game + +### Create screen recording of a game In the terminal, run the following: ``` python gobblet/examples/example_record_game.py" ``` -This will save a screen recording of a game to `gobblet/examples/game.gif` +This will save a screen recording of a game to `gobblet/examples/game.gif` -### Display a game between two basic CPU agents -In the terminal, run the following: -``` -python gobblet/examples/example_basic.py" -``` -This will launch a game with two agents choosing random actions (other agent types will be added in the future) - -### Train a DQL agent with Tianshou +### Watch a game between two basic CPU agents In the terminal, run the following: ``` -python gobblet/example_tianshou.py +python gobblet/examples/example_basic.py" ``` - -This will train a [DQN](https://tianshou.readthedocs.io/en/master/tutorials/dqn.html) model from Tianshou for 50 epochs, and then render the trained agent playing against a random agent in an example match. +This will launch a game with two basic agents choosing random actions. This file can be used as a starting point for prototyping future methods. ### Command Line Arguments @@ -78,17 +83,20 @@ This will train a [DQN](https://tianshou.readthedocs.io/en/master/tutorials/dqn. #### Game Modes -`example_user_input.py` and `example_record_game.py` take the following arguments: +All scripts besides`example_basic.py` (no support for interactive play) take the following arguments: The default game mode is human vs CPU, with the human playing as red and CPU as yellow. ``--player 1`` sets the human player as yellow, with the CPU moving first as red. +``--cpu-players 1`` will launch a game with one CPU agent and one human agent. (default) + ``--cpu-players 0`` will launch a game with no CPU agents, taking interactive input for both agents. ``--cpu-player 2`` will launch a game with two CPU agents, and takes no interactive input. + #### Display Modes `example_basic.py` takes the following arguments to change display mode: diff --git a/gobblet/examples/example_DQN_tianshou.py b/gobblet/examples/example_DQN_tianshou.py index 796d5a5..7fb2370 100644 --- a/gobblet/examples/example_DQN_tianshou.py +++ b/gobblet/examples/example_DQN_tianshou.py @@ -1,6 +1,5 @@ -# Modified from tutorial code -"""Modified code from Tianshou MARL training example: - +# adapted from https://github.com/Farama-Foundation/PettingZoo/blob/master/tutorials/Tianshou/3_cli_and_logging.py +""" This is a full example of using Tianshou with MARL to train agents, complete with argument parsing (CLI) and logging. Author: Will (https://github.com/WillDudley) @@ -31,6 +30,7 @@ from gobblet import gobblet_v1 from gobblet.game.collector_manual_policy import ManualPolicyCollector +import time def get_parser() -> argparse.ArgumentParser: @@ -57,9 +57,11 @@ def get_parser() -> argparse.ArgumentParser: parser.add_argument("--test-num", type=int, default=10) parser.add_argument("--logdir", type=str, default="log") parser.add_argument("--render", type=float, default=0.1) - parser.add_argument("--render_mode", type=str, default="human", help="options: human, human_full") + parser.add_argument("--render_mode", type=str, default="human", choices=["human","rgb_array", "text", "text_full"], help="Choose the rendering mode for the game.") parser.add_argument("--debug", action="store_true", help="enable to print extra debugging info") parser.add_argument("--self_play", action="store_true", help="enable training via self-play (as opposed to fixed opponent)") + parser.add_argument("--cpu-players", type=int, default=2, choices=[1, 2], help="Number of CPU players (options: 1, 2)") + parser.add_argument("--player", type=int, default=0, choices=[0,1], help="Choose which player to play as: red = 0, yellow = 1") parser.add_argument( "--win-rate", type=float, @@ -267,7 +269,15 @@ def watch( else: policy.policies[agents[:]].set_eps(args.eps_test) collector = Collector(policy, env, exploration_noise=True) - result = collector.collect(n_episode=1, render=0) + + # First step (while loop stopping conditions are not defined until we run the first step) + result = collector.collect(n_step=1, render=args.render) + time.sleep(0.25) + + while not (collector.data.terminated or collector.data.truncated): + result = collector.collect(n_step=1, render=args.render) + time.sleep(0.25) # Slow down rendering so the actions can be seen sequentially (otherwise moves happen too fast) + rews, lens = result["rews"], result["lens"] print(f"Final reward: {rews[:, args.agent_id - 1].mean()}, length: {lens.mean()}") @@ -285,7 +295,7 @@ def watch_selfplay(args, agent): print(f"Final reward: {rews[:, 0].mean()}, length: {lens.mean()}") -# Allows the user to input moves and play vs the learned agent +# ======== allows the user to input moves and play vs a pre-trained agent ====== def play( args: argparse.Namespace = get_args(), agent_learn: Optional[BasePolicy] = None, @@ -304,12 +314,22 @@ def play( pettingzoo_env = env.workers[0].env.env # DummyVectorEnv -> Tianshou PettingZoo Wrapper -> PettingZoo Env manual_policy = gobblet_v1.ManualPolicy(pettingzoo_env) # Gobblet keyboard input requires access to raw_env (uses functions from board) - # Get the first move from the CPU player - result = collector.collect(n_step=1, render=args.render) + # Get the first move from the CPU (human goes second)) + if args.player == 1: + result = collector.collect(n_step=1, render=args.render) + + # Get the first move from the player + else: + observation = {"observation": collector.data.obs.obs, + "action_mask": collector.data.obs.mask} # PettingZoo expects a dict with this format + action = manual_policy(observation, pettingzoo_env.agents[0]) + + result = collector.collect_result(action=action.reshape(1), render=args.render) while not (collector.data.terminated or collector.data.truncated): agent_id = collector.data.obs.agent_id - if agent_id == pettingzoo_env.agents[1]: + # If it is the players turn and there are less than 2 CPU players (at least one human player) + if agent_id == pettingzoo_env.agents[args.player]: # action_mask = collector.data.obs.mask[0] # action = np.random.choice(np.arange(len(action_mask)), p=action_mask / np.sum(action_mask)) observation = {"observation": collector.data.obs.obs, @@ -327,5 +347,8 @@ def play( # train the agent and watch its performance in a match! args = get_args() result, agent = train_agent(args) - # watch(args, agent) - play(args, agent) + if args.cpu_players == 2: + + watch(args, agent) + else: + play(args, agent) diff --git a/gobblet/examples/example_basic.py b/gobblet/examples/example_basic.py index 607ff82..2215968 100644 --- a/gobblet/examples/example_basic.py +++ b/gobblet/examples/example_basic.py @@ -7,8 +7,10 @@ def get_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser() parser.add_argument( - "--render-mode", type=str, default="human", help="options: human, rgb_array, text, text_full" + "--render_mode", type=str, default="human", choices=["human", "rgb_array", "text", "text_full"], + help="Choose the rendering mode for the game." ) + parser.add_argument( "--seed", type=int, default=None, help="random seed for board and policy" ) diff --git a/gobblet/examples/example_record_game.py b/gobblet/examples/example_record_game.py index ef85409..1aff207 100644 --- a/gobblet/examples/example_record_game.py +++ b/gobblet/examples/example_record_game.py @@ -9,10 +9,10 @@ def get_parser() -> argparse.ArgumentParser: "--seed", type=int, default=None, help="Set random seed manually (will only affect CPU agents)" ) parser.add_argument( - "--cpu-players", type=int, default=1, help="Number of CPU players (options: 1, 2)" + "--cpu-players", type=int, default=1, choices=[0, 1, 2], help="Number of CPU players (options: 0, 1, 2)" ) parser.add_argument( - "--player", type=int, default=0, help="Choose which player to play as: red = 0, yellow = 1" + "--player", type=int, default=0, choices=[0,1], help="Choose which player to play as: red = 0, yellow = 1" ) parser.add_argument( "--screen-width", type=int, default=640, help="Width of pygame screen in pixels" diff --git a/gobblet/examples/example_user_input.py b/gobblet/examples/example_user_input.py index e384afd..0ba7f7f 100644 --- a/gobblet/examples/example_user_input.py +++ b/gobblet/examples/example_user_input.py @@ -9,10 +9,10 @@ def get_parser() -> argparse.ArgumentParser: "--seed", type=int, default=None, help="Set random seed manually (will only affect CPU agents)" ) parser.add_argument( - "--cpu-players", type=int, default=1, help="Number of CPU players (options: 1, 2)" + "--cpu-players", type=int, default=1, choices=[0, 1, 2], help="Number of CPU players (options: 0, 1, 2)" ) parser.add_argument( - "--player", type=int, default=0, help="Choose which player to play as: red = 0, yellow = 1" + "--player", type=int, default=0, choices=[0,1], help="Choose which player to play as: red = 0, yellow = 1" ) parser.add_argument( "--screen-width", type=int, default=640, help="Width of pygame screen in pixels" @@ -38,9 +38,6 @@ def get_args() -> argparse.Namespace: env = gobblet_v1.env(render_mode="human", args=args) env.reset() - env.render() # need to render the environment before pygame can take user input - - manual_policy = gobblet_v1.ManualPolicy(env) for agent in env.agent_iter(): diff --git a/gobblet/game/manual_policy.py b/gobblet/game/manual_policy.py index 9c329ac..d81ed05 100644 --- a/gobblet/game/manual_policy.py +++ b/gobblet/game/manual_policy.py @@ -13,6 +13,8 @@ def __init__(self, env, agent_id: int = 0, recorder: GIFRecorder = None): self.agent = self.env.agents[self.agent_id] self.recorder = recorder + env.render() # need to render the environment before pygame can take user input + def __call__(self, observation, agent): recorder = self.recorder env = self.env diff --git a/pyproject.toml b/pyproject.toml index 897d439..f395831 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "gobblet-rl" -version = "1.2.1" +version = "1.2.2" authors = [ { name="Elliot Tower", email="elliot@elliottower.com" }, ]