Skip to content

Commit

Permalink
Merge pull request #303 from kengz/vec-env
Browse files Browse the repository at this point in the history
Add vector environment, fix image transpose bug
  • Loading branch information
kengz authored Apr 20, 2019
2 parents 031f2fb + 8e18e84 commit 5baf175
Show file tree
Hide file tree
Showing 10 changed files with 701 additions and 78 deletions.
8 changes: 4 additions & 4 deletions run_lab.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,16 @@
Specify what to run in `config/experiments.json`
Then run `python run_lab.py` or `yarn start`
'''
import os
# NOTE increase if needed. Pytorch thread overusage https://github.com/pytorch/pytorch/issues/975
os.environ['OMP_NUM_THREADS'] = '1'
from slm_lab import EVAL_MODES, TRAIN_MODES
from slm_lab.experiment import analysis, retro_analysis
from slm_lab.experiment.control import Session, Trial, Experiment
from slm_lab.experiment.monitor import InfoSpace
from slm_lab.lib import logger, util
from slm_lab.spec import spec_util
from xvfbwrapper import Xvfb
import os
import sys
import torch
import torch.multiprocessing as mp


Expand Down Expand Up @@ -72,7 +71,7 @@ def run_old_mode(spec_file, spec_name, lab_mode):
def run_by_mode(spec_file, spec_name, lab_mode):
'''The main run lab function for all lab_modes'''
logger.info(f'Running lab in mode: {lab_mode}')
# '@' is reserved for 'enjoy@{prename}'
# '@' is reserved for EVAL_MODES
os.environ['lab_mode'] = lab_mode.split('@')[0]
if lab_mode in TRAIN_MODES:
run_new_mode(spec_file, spec_name, lab_mode)
Expand All @@ -94,6 +93,7 @@ def main():


if __name__ == '__main__':
torch.set_num_threads(1) # prevent multithread slowdown
mp.set_start_method('spawn') # for distributed pytorch to work
if sys.platform == 'darwin':
# avoid xvfb for MacOS: https://github.com/nipy/nipype/issues/1400
Expand Down
2 changes: 1 addition & 1 deletion slm_lab/agent/net/conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ def build_conv_layers(self, conv_hid_layers):
def forward(self, x):
'''
The feedforward step
Note that PyTorch takes (c,w,h) but gym provides (w,h,c), so preprocessing must be done before passing to network
Note that PyTorch takes (c,h,w) but gym provides (h,w,c), so preprocessing must be done before passing to network
'''
x = self.conv_model(x)
x = x.view(x.size(0), -1) # to (batch_size, -1)
Expand Down
19 changes: 7 additions & 12 deletions slm_lab/env/openai.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from slm_lab.env.base import BaseEnv, ENV_DATA_NAMES
from slm_lab.env.wrapper import make_gym_env
from slm_lab.env.vec_env import make_gym_venv
from slm_lab.env.registration import register_env
from slm_lab.lib import logger, util
from slm_lab.lib.decorator import lab_api
Expand All @@ -10,22 +11,14 @@
logger = logger.get_logger(__name__)


def guard_reward(reward):
'''Some gym environments have buggy format and reward is in a np array'''
if np.isscalar(reward):
return reward
else: # some gym envs have weird reward format
assert len(reward) == 1
return reward[0]


class OpenAIEnv(BaseEnv):
'''
Wrapper for OpenAI Gym env to work with the Lab.
e.g. env_spec
"env": [{
"name": "CartPole-v0",
"num_envs": null,
"max_t": null,
"max_tick": 10000,
}],
Expand All @@ -40,7 +33,11 @@ def __init__(self, spec, e=None, env_space=None):
pass
seed = ps.get(spec, 'meta.random_seed')
stack_len = ps.get(spec, 'agent.0.memory.stack_len')
self.u_env = make_gym_env(self.name, seed, stack_len)
num_envs = ps.get(spec, f'env.{self.e}.num_envs')
if num_envs is None:
self.u_env = make_gym_env(self.name, seed, stack_len)
else: # make vector environment
self.u_env = make_gym_venv(self.name, seed, stack_len, num_envs)
self._set_attr_from_u_env(self.u_env)
self.max_t = self.max_t or self.u_env.spec.max_episode_steps
assert self.max_t is not None
Expand All @@ -65,7 +62,6 @@ def step(self, action):
if not self.is_discrete: # guard for continuous
action = np.array([action])
state, reward, done, _info = self.u_env.step(action)
reward = guard_reward(reward)
reward *= self.reward_scale
if util.to_render():
self.u_env.render()
Expand Down Expand Up @@ -109,7 +105,6 @@ def space_step(self, action_e):
if not self.is_discrete:
action = np.array([action])
state, reward, done, _info = self.u_env.step(action)
reward = guard_reward(reward)
reward *= self.reward_scale
if util.to_render():
self.u_env.render()
Expand Down
Loading

0 comments on commit 5baf175

Please sign in to comment.