This repository was archived by the owner on Jun 13, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathagents.py
55 lines (44 loc) · 1.6 KB
/
agents.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import ckpt_util
from collections import *
import event_log
import models
import numpy as np
import replay_memory
import sys
import tensorflow as tf
import time
import util
def add_opts(parser):
parser.add_argument('--gpu-mem-fraction', type=float, default=0.5,
help="fraction of gpu mem to allocate")
class RandomAgent(object):
def __init__(self, opts):
self.stats_ = Counter()
def action_given(self, state, is_eval):
turn = (np.random.random()*2)-1 # (-1,1) for turn
move = (np.random.random()*2)-0.5 # (-0.5,1.5) for move, i.e. favor moving forward
return turn, move
def end_of_episode(self):
pass
class NafAgent(object):
def __init__(self, opts):
# self.opts = opts
self.network = models.NafNetwork("naf", action_dim=2, opts=opts)
config = tf.ConfigProto()
#config.gpu_options.allow_growth = True
#config.log_device_placement = True
config.gpu_options.per_process_gpu_memory_fraction = opts.gpu_mem_fraction
self.sess = tf.Session(config=config)
with self.sess.as_default():
# setup saver to load first set of ckpts. block until some are available
self.loader = ckpt_util.AgentCkptLoader(self.sess, opts.ckpt_dir)
self.loader.blocking_load_ckpt()
# dump info on vars
for v in tf.all_variables():
if '/biases:' not in v.name:
print >>sys.stderr, v.name, util.shape_and_product_of(v)
def action_given(self, state, is_eval):
with self.sess.as_default():
return self.network.action_given(state, add_noise=(not is_eval))
def end_of_episode(self):
self.loader.reload_if_new_ckpt()