-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMyBot2.py
executable file
·162 lines (131 loc) · 5.86 KB
/
MyBot2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
import argparse, copy, json, math, random
import logging
import pickle
import torch
import torch.nn.functional as F
from hlt import constants
from hlt.networking import Game
from hlt.positionals import Direction, Position
from actor_critic.train import (agent, args, env, map_dim, net, optimizer, writer)
parser = argparse.ArgumentParser()
parser.add_argument('--scale', action='store_true',
help='reward based on scale of halite returned to base')
parser.add_argument('--unrestricted', action='store_true',
help='allow an unrestricted number of ships')
cmdl_args = parser.parse_args()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
with open('data/data.json', 'r') as f:
data = json.load(f)
if data['game_num'] > 0:
# model_params = torch.load('model/model_{}.pth'.format(data['best_game']))
model_params = torch.load('model/model_{}.pth'.format(data['game_num'] - 1))
net.load_state_dict(model_params)
""" <<<Game Begin>>> """
game = Game()
game.ready("Actor-Critic")
logging.info("Successfully created bot! My Player ID is {}.".format(game.my_id))
all_rewards = []
go_home = {}
losses = []
current_rewards = []
actions = []
""" <<<Game Loop>>> """
while True:
game.update_frame()
me = game.me
game_map = game.game_map
env.update_observations(game_map, me)
command_queue = []
if not cmdl_args.unrestricted and game.turn_number == 1:
command_queue.append(me.shipyard.spawn())
frame_num = game.turn_number + data['num_turns_per_game'] * data['game_num']
logging.info('Frame number: {}'.format(frame_num))
for ship in me.get_ships():
if ship.position == me.shipyard.position:
go_home[ship.id] = False
new_env = copy.deepcopy(env)
new_state = torch.zeros(map_dim, map_dim).to(device)
if go_home[ship.id] and not game_map[me.shipyard.position].is_occupied:
destination = me.shipyard.position
action_idx = 5
else:
go_home[ship.id] = False
action_idx, action, rollout = agent.issue_command(net, env)
actions.append(rollout)
if action == constants.DOCK:
destination = me.shipyard.position
go_home[ship.id] = True
else:
destination = game_map.calculate_destination(ship, action)
movement = game_map.naive_navigate(ship, destination)
command_queue.append(ship.move(movement))
next_pos = game_map.calculate_next_position(ship, movement)
new_state[ship.position.x][ship.position.y] = 0
new_state[next_pos.x][next_pos.y] = 1
# calculate reward for action
if cmdl_args.scale:
if me.shipyard.position == next_pos:
halite_deposit_amount = ship.halite_amount - (0.1 * game_map[ship.position].halite_amount)
reward = -10.0 if halite_deposit_amount <= 100 else halite_deposit_amount
elif 0.1 * game_map[ship.position].halite_amount > ship.halite_amount:
reward = 0 - max(0.1 * game_map[ship.position].halite_amount, 100.0)
elif action == Direction.Still:
if ship.position == me.shipyard.position:
reward = -10.0
else:
reward = 0.25 * game_map[ship.position].halite_amount
elif game_map[next_pos].halite_amount > game_map[ship.position].halite_amount:
reward = 10.0
else:
reward = 0 - (0.1 * game_map[ship.position].halite_amount)
else:
if me.shipyard.position == next_pos:
hal_amount = ship.halite_amount - (0.1 * game_map[ship.position].halite_amount)
if hal_amount > 100:
reward = 5.0
elif hal_amount > 10:
reward = 0.5
else:
reward = -1.0
elif action == Direction.Still and game_map[next_pos].halite_amount > 0:
reward = 1.0
elif action == Direction.Still and game_map[next_pos].halite_amount == 0:
reward = -1.0
elif game_map[next_pos].halite_amount > game_map[ship.position].halite_amount:
reward = 1.0
else:
reward = -1.0
current_rewards.append(reward)
if game.turn_number % 100 == 0 and len(actions) > 0:
all_rewards += current_rewards
logging.info('Beginning back prop...')
total_reward = 0
policy_losses = []
value_losses = []
rewards = []
for r in current_rewards[::-1]:
total_reward = r + args.gamma * total_reward
rewards.insert(0, total_reward)
rewards = torch.tensor(rewards).to(device)
rewards = (rewards - rewards.mean()) / (rewards.std() + 1e-8)
for (log_prob, val), r in zip(actions, rewards):
reward = r - val.item()
policy_losses.append(-log_prob * reward)
value_losses.append(F.smooth_l1_loss(torch.tensor(val.item()).to(device), torch.tensor([r]).to(device)))
optimizer.zero_grad()
loss = torch.stack(policy_losses).sum() + torch.stack(value_losses).sum()
loss.backward()
optimizer.step()
losses.append(loss.item())
rewards = []
actions = []
logging.info('Saving model parameters')
torch.save(net.state_dict(), 'model/model_{}.pth'.format(data['game_num']))
with open('data/total_reward.json', 'w') as f:
json.dump(sum(all_rewards), f)
if len(losses) > 0:
with open('data/avg_loss.json', 'w') as f:
json.dump(sum(losses) / float(len(losses)), f)
if cmdl_args.unrestricted and game.turn_number <= 100 and me.halite_amount >= constants.SHIP_COST and not game_map[me.shipyard].is_occupied:
command_queue.append(me.shipyard.spawn())
game.end_turn(command_queue)