-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
194 lines (170 loc) · 8.64 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
import argparse
import copy
import time
import numpy as np
from classes.action import Action
from classes.agent import Agent
from classes.episode_visualizer import EpisodeVisualizer
from classes.game import Game
from classes.interactive_visualizer import InteractiveVisualizer
from classes.racetrack_list import RacetrackList
from classes.state import State
from classes.utils import check_positive_int, get_track
def play_user(track: np.ndarray) -> None:
"""
Let the user play a game on a racetrack
:param track: The racetrack for the game
"""
game = Game(racetrack=track, random_state=42)
visualizer = InteractiveVisualizer(game.get_state_with_racetrack(), "racetrack")
print("Playing as user...")
while not game.is_finished():
print(f"* {game.get_state()}")
input_str = input("* Please input the change to velocity. format: \"<y> <x>\": ")
input_list = input_str.split(" ")
action = Action(int(input_list[0]), int(input_list[1]))
game.step(action)
visualizer.update_agent(game.get_state().agent_position)
print("* You reached the finish line!")
def play_single_game(game: Game, agent: Agent) -> list[tuple[State, Action, int]]:
episode: list[tuple[State, Action, int]] = []
while not game.is_finished() and game.get_n_steps() < 1000:
state = game.get_state()
action = agent.determine_best_action(state)
reward = game.step(action)
episode.append((state, action, reward))
return episode
def play_ai(track: np.ndarray, episodes_to_train: int, preliminary_results: int | None, testruns: int,
playstyle_interactive: bool) -> None:
"""
Train an AI on a racetrack, and then watch it play.
:param track: The racetrack for the game
:param episodes_to_train: How many episodes to train the agent
:param playstyle_interactive: If the game that AI plays should be shown life (aka interactively), or if the whole game should be shown in one static image (not interactively)
:param preliminary_results: After how many episodes to show preliminary results (aka do a test run). If `None`, then no preliminary results will be shown.
"""
if playstyle_interactive:
preliminary_results = None
agent = Agent(random_state=42)
game = Game(racetrack=track, random_state=42)
visualizer = EpisodeVisualizer()
# Train Model
print("Training agent...")
if preliminary_results is not None:
print("* <n_episode> <n_steps> ")
else:
print("* <n_episode> ")
start = time.time()
for i in range(1, episodes_to_train + 1):
# play one episode & train the agent on this episode
episode: list[tuple[State, Action, int]] = []
while not game.is_finished() and game.get_n_steps() < 1000:
state = game.get_state()
action = agent.determine_epsilon_action(state, 0.1)
reward = game.noisy_step(action)
episode.append((state, action, reward))
agent.learn(episode)
game.reset()
# show preliminary results, if specified
if preliminary_results is not None:
if i % preliminary_results == 0 or i == 1:
test_game = Game(racetrack=track, random_state=43)
test_episode = play_single_game(test_game, copy.deepcopy(agent))
test_episode.append((test_game.get_state(), Action(0, 0), 0)) # Append last state for drawing
visualizer.visualize_episode(track, test_episode,
f"racetrack | training: n_episode={i}, n_steps={test_game.get_n_steps()}")
print(f"* {i} {test_game.get_n_steps()}")
elif i % 500 == 0:
print(f"* {i}")
end = time.time()
print(f"* train time: {end - start : 2.4f}s")
# Evaluate Model
game = Game(racetrack=track, random_state=43)
print("Evaluating trained agent...")
if playstyle_interactive:
visualizer = InteractiveVisualizer(game.get_state_with_racetrack(), "racetrack")
while not game.is_finished() and game.get_n_steps() < 1000:
state = game.get_state()
action = agent.determine_best_action(state)
game.step(action)
visualizer.update_agent(game.get_state().agent_position)
print(
f"* ai plays step {game.get_n_steps()} [action: {action}, pos: {game.get_state().agent_position}, vel: {game.get_state().agent_velocity}]")
time.sleep(0.5)
print("* ai reached the finish line!")
else:
print(f"* plotting {testruns} games")
visualizer = EpisodeVisualizer()
for i in range(0, testruns):
episode: list[tuple[State, Action, int]] = []
while not game.is_finished() and game.get_n_steps() < 1000:
state = game.get_state()
action = agent.determine_best_action(state)
reward = game.step(action)
episode.append((state, action, reward))
episode.append((game.get_state(), Action(0, 0), 0)) # Append last state to episode for drawing
visualizer.visualize_episode(track, episode, f"racetrack | testrun {i + 1}, n_steps={game.get_n_steps()}")
game.reset()
def main() -> None:
# define arguments
parser = argparse.ArgumentParser(prog="racetrack",
description="Train an AI to drive on a simple racetrack, by using reinforcement learning with monte carlo")
parser.add_argument('-p', '--playstyle',
help="if the AI should play the game live (ai_interactive), or the game of the AI should be shown as static image (ai_static), or the user can play (user)",
choices=["user", "ai_interactive", "ai_static"], default="ai_static")
parser.add_argument('-e', '--episodes-to-train', help="how many episodes to train the model",
type=check_positive_int, default=3000)
group = parser.add_mutually_exclusive_group()
group.add_argument('-tr', '--track-random', help="generate a random racetrack with specified seed",
type=check_positive_int, metavar='SEED')
group.add_argument('-tn', '--track-number',
help="select a predefined racetrack, where the number represents the number of the map",
type=int, choices=range(0, RacetrackList.get_tracks_count()))
parser.add_argument('-pr', '--preliminary-results',
help="after how many episodes to show a preliminary result during training (only for ai_static)",
type=check_positive_int)
parser.add_argument('-fr', '--final-results',
help="how many final games to show after training (only for ai_static)",
type=check_positive_int)
# parse arguments
args = parser.parse_args()
playstyle = args.playstyle
episodes_to_train = args.episodes_to_train
preliminary_results = args.preliminary_results
final_results = args.final_results
track_random_seed = args.track_random
track_number = args.track_number
if track_number is None and track_random_seed is None:
track_number = 0 # set default
if final_results is None and playstyle == "ai_static":
final_results = 3 # set default
# print start configuration
print("Starting new game with:")
print(f"* playstyle = {playstyle}")
if track_number is not None:
print(f"* track = track {track_number}")
else:
print(f"* track = random with seed {track_random_seed}")
print(f"* episodes to train = {episodes_to_train}")
if playstyle == "ai_static":
if preliminary_results is None:
print(f"* preliminary results during training = none")
else:
print(f"* preliminary results during training = all {preliminary_results} episodes")
elif preliminary_results is not None:
print(f"* note: parameter 'preliminary_results' is ignored since playstyle is not 'ai_static'")
if playstyle == "ai_static":
print(f"* final results after training = {final_results} games")
elif final_results is not None:
print(f"* note: parameter 'final_results' is ignored since playstyle is not 'ai_static'")
# get track & play
track = get_track(track_number, track_random_seed)
match playstyle:
case "user":
play_user(track)
case "ai_interactive":
play_ai(track, episodes_to_train, preliminary_results, final_results, playstyle_interactive=True)
case "ai_static":
play_ai(track, episodes_to_train, preliminary_results, final_results, playstyle_interactive=False)
if __name__ == "__main__":
main()