forked from crowdAI/marlo-multi-agent-starter-kit
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun.py
116 lines (102 loc) · 3.79 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import marlo
import os
import json
def get_join_tokens():
if marlo.is_grading():
"""
In the crowdAI Evaluation environment obtain the join_tokens
from the evaluator
the `params` parameter passed to the `evaluator_join_token` only allows
the following keys :
"seed",
"tick_length",
"max_retries",
"retry_sleep",
"step_sleep",
"skip_steps",
"videoResolution",
"continuous_to_discrete",
"allowContinuousMovement",
"allowDiscreteMovement",
"allowAbsoluteMovement",
"add_noop_command",
"comp_all_commands"
# TODO: Add this to the official documentation ?
# Help Wanted :D Pull Requests welcome :D
"""
join_tokens = marlo.evaluator_join_token(params={})
else:
"""
When debugging locally,
Please ensure that you have a Minecraft client running on port 10000 and 10001
by doing :
$MALMO_MINECRAFT_ROOT/launchClient.sh -port 10000
$MALMO_MINECRAFT_ROOT/launchClient.sh -port 10001
"""
print("Generating join tokens locally...")
client_pool = [('127.0.0.1', 10000), ('127.0.0.1', 10001)]
join_tokens = marlo.make('MarLo-BuildbattleTrain1-v0',
params={
"client_pool": client_pool,
"agent_names" : [
"MarLo-Agent-0",
"MarLo-Agent-1"
]
})
return join_tokens
@marlo.threaded
def run_agent(join_token, agent_id):
"""
Where agent_id is an integral number starting from 0
In case, you have requested GPUs, then the agent_id will match
the GPU device id assigneed to this agent.
"""
env = marlo.init(join_token)
observation = env.reset()
done = False
count = 0
while not done:
_action = env.action_space.sample()
obs, reward, done, info = env.step(_action)
print("reward:", reward)
print("done:", done)
print("info", info)
# It is important to do this env.close()
env.close()
def run_episode():
"""
Single episode run
"""
join_tokens = get_join_tokens()
# When the required number of episodes are evaluated
# The evaluator returns False for join_tokens
if not join_tokens:
return
thread_handlers = []
"""
NOTE: If instead of a dynamic loop, you hard code the run_agent
function calls, then the evaluation of your code will fail in case
of a tournament, where multiple submissions can control different agents
in the same game.
"""
for _idx, join_token in enumerate(join_tokens):
# Run agent-N on a separate thread
thread_handler, _ = run_agent(join_token, _idx)
# Accumulate thread handlers
thread_handlers.append(thread_handler)
# Wait for threads to complete or raise an exception
marlo.utils.join_all(thread_handler)
print("Episode Run Complete")
if __name__ == "__main__":
"""
In case of debugging locally, run the episode just once
and in case of when the agent is being evaluated, continue
running episodes for as long as the evaluator keeps supplying
join_tokens.
"""
if not marlo.is_grading():
print("Running single episode...")
run_episode()
else:
while True:
run_episode()