Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Gym environments and stable-baselines integration for RL #3215

Merged
merged 15 commits into from
Dec 17, 2020
536 changes: 0 additions & 536 deletions PythonClient/car/DQNcar.py

This file was deleted.

545 changes: 0 additions & 545 deletions PythonClient/multirotor/DQNdrone.py

This file was deleted.

9 changes: 9 additions & 0 deletions PythonClient/reinforcement_learning/airgym/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from gym.envs.registration import register

register(
id="airsim-drone-sample-v0", entry_point="airgym.envs:AirSimDroneEnv",
)

register(
id="airsim-car-sample-v0", entry_point="airgym.envs:AirSimCarEnv",
)
4 changes: 4 additions & 0 deletions PythonClient/reinforcement_learning/airgym/envs/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from airgym.envs.airsim_env import AirSimEnv
from airgym.envs.car_env import AirSimCarEnv
from airgym.envs.drone_env import AirSimDroneEnv

31 changes: 31 additions & 0 deletions PythonClient/reinforcement_learning/airgym/envs/airsim_env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import numpy as np
import airsim

import gym
from gym import spaces


class AirSimEnv(gym.Env):
metadata = {"render.modes": ["rgb_array"]}

def __init__(self, image_shape):
self.observation_space = spaces.Box(0, 255, shape=image_shape, dtype=np.uint8)
self.viewer = None

def __del__(self):
raise NotImplementedError()

def _get_obs(self):
raise NotImplementedError()

def _compute_reward(self):
raise NotImplementedError()

def close(self):
raise NotImplementedError()

def step(self, action):
raise NotImplementedError()

def render(self):
return self._get_obs()
153 changes: 153 additions & 0 deletions PythonClient/reinforcement_learning/airgym/envs/car_env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
import setup_path
import airsim
import numpy as np
import math
import time

import gym
from gym import spaces
from airgym.envs.airsim_env import AirSimEnv


class AirSimCarEnv(AirSimEnv):
def __init__(self, ip_address, image_shape):
super().__init__(image_shape)

self.image_shape = image_shape
self.start_ts = 0

self.state = {
"position": np.zeros(3),
"collision": False,
"prev_position": np.zeros(3),
}

self.car = airsim.CarClient(ip=ip_address)
self.action_space = spaces.Discrete(6)

self.image_request = airsim.ImageRequest(
"0", airsim.ImageType.DepthPerspective, True, False
)

self.car_controls = airsim.CarControls()
self.car_state = None

self.state["pose"] = None
self.state["prev_pose"] = None
self.state["collision"] = None

def _setup_car(self):
self.car.reset()
self.car.enableApiControl(True)
self.car.armDisarm(True)
time.sleep(0.01)

def __del__(self):
self.car.reset()

def _do_action(self, action):
self.car_controls.brake = 0
self.car_controls.throttle = 1
if action == 0:
self.car_controls.throttle = 0
self.car_controls.brake = 1
elif action == 1:
self.car_controls.steering = 0
elif action == 2:
self.car_controls.steering = 0.5
elif action == 3:
self.car_controls.steering = -0.5
elif action == 4:
self.car_controls.steering = 0.25
else:
self.car_controls.steering = -0.25

self.car.setCarControls(self.car_controls)
time.sleep(1)

def transform_obs(self, responses):
img1d = np.array(responses[0].image_data_float, dtype=np.float)
img1d = 255 / np.maximum(np.ones(img1d.size), img1d)
img2d = np.reshape(img1d, (responses[0].height, responses[0].width))

from PIL import Image

image = Image.fromarray(img2d)
im_final = np.array(image.resize((84, 84)).convert("L"))

return im_final.reshape([84, 84, 1])

def _get_obs(self):
responses = self.car.simGetImages([self.image_request])
image = self.transform_obs(responses)

self.car_state = self.car.getCarState()
collision = self.car.simGetCollisionInfo().has_collided

self.state["prev_pose"] = self.state["pose"]
self.state["pose"] = self.car_state.kinematics_estimated
self.state["collision"] = collision

return image

def _compute_reward(self):
MAX_SPEED = 300
MIN_SPEED = 10
thresh_dist = 3.5
beta = 3

z = 0
pts = [
np.array([0, -1, z]),
np.array([130, -1, z]),
np.array([130, 125, z]),
np.array([0, 125, z]),
np.array([0, -1, z]),
np.array([130, -1, z]),
np.array([130, -128, z]),
np.array([0, -128, z]),
np.array([0, -1, z]),
]
pd = self.state["pose"].position
car_pt = np.array([pd.x_val, pd.y_val, pd.z_val])

dist = 10000000
for i in range(0, len(pts) - 1):
dist = min(
dist,
np.linalg.norm(np.cross((car_pt - pts[i]), (car_pt - pts[i + 1])))
/ np.linalg.norm(pts[i] - pts[i + 1]),
)

# print(dist)
if dist > thresh_dist:
reward = -3
else:
reward_dist = math.exp(-beta * dist) - 0.5
reward_speed = (
(self.car_state.speed - MIN_SPEED) / (MAX_SPEED - MIN_SPEED)
) - 0.5
reward = reward_dist + reward_speed

done = 0
if reward < -1:
done = 1
if self.car_controls.brake == 0:
if self.car_state.speed <= 1:
done = 1
if self.state["collision"]:
done = 1

return reward, done

def step(self, action):
self._do_action(action)
obs = self._get_obs()
reward, done = self._compute_reward()

return obs, reward, done, self.state

def reset(self):
self._setup_car()
self._do_action(1)
return self._get_obs()
164 changes: 164 additions & 0 deletions PythonClient/reinforcement_learning/airgym/envs/drone_env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
import setup_path
import airsim
import numpy as np
import math
import time
from argparse import ArgumentParser

import gym
from gym import spaces
from airgym.envs.airsim_env import AirSimEnv


class AirSimDroneEnv(AirSimEnv):
def __init__(self, ip_address, step_length, image_shape):
super().__init__(image_shape)
self.step_length = step_length
self.image_shape = image_shape

self.state = {
"position": np.zeros(3),
"collision": False,
"prev_position": np.zeros(3),
}

self.drone = airsim.MultirotorClient(ip=ip_address)
self.action_space = spaces.Discrete(7)
self._setup_flight()

self.image_request = airsim.ImageRequest(
3, airsim.ImageType.DepthPerspective, True, False
)

def __del__(self):
self.drone.reset()

def _setup_flight(self):
self.drone.reset()
self.drone.enableApiControl(True)
self.drone.armDisarm(True)

# Set home position and velocity
self.drone.moveToPositionAsync(-0.55265, -31.9786, -19.0225, 10).join()
self.drone.moveByVelocityAsync(1, -0.67, -0.8, 5).join()

def transform_obs(self, responses):
img1d = np.array(responses[0].image_data_float, dtype=np.float)
img1d = 255 / np.maximum(np.ones(img1d.size), img1d)
img2d = np.reshape(img1d, (responses[0].height, responses[0].width))

from PIL import Image

image = Image.fromarray(img2d)
im_final = np.array(image.resize((84, 84)).convert("L"))

return im_final.reshape([84, 84, 1])

def _get_obs(self):
responses = self.drone.simGetImages([self.image_request])
image = self.transform_obs(responses)
self.drone_state = self.drone.getMultirotorState()

self.state["prev_position"] = self.state["position"]
self.state["position"] = self.drone_state.kinematics_estimated.position
self.state["velocity"] = self.drone_state.kinematics_estimated.linear_velocity

collision = self.drone.simGetCollisionInfo().has_collided
self.state["collision"] = collision

return image

def _do_action(self, action):
quad_offset = self.interpret_action(action)
quad_vel = self.drone.getMultirotorState().kinematics_estimated.linear_velocity
self.drone.moveByVelocityAsync(
quad_vel.x_val + quad_offset[0],
quad_vel.y_val + quad_offset[1],
quad_vel.z_val + quad_offset[2],
5,
).join()

def _compute_reward(self):
thresh_dist = 7
beta = 1

z = -10
pts = [
np.array([-0.55265, -31.9786, -19.0225]),
np.array([48.59735, -63.3286, -60.07256]),
np.array([193.5974, -55.0786, -46.32256]),
np.array([369.2474, 35.32137, -62.5725]),
np.array([541.3474, 143.6714, -32.07256]),
]

quad_pt = np.array(
list(
(
self.state["position"].x_val,
self.state["position"].y_val,
self.state["position"].z_val,
)
)
)

if self.state["collision"]:
reward = -100
else:
dist = 10000000
for i in range(0, len(pts) - 1):
dist = min(
dist,
np.linalg.norm(np.cross((quad_pt - pts[i]), (quad_pt - pts[i + 1])))
/ np.linalg.norm(pts[i] - pts[i + 1]),
)

if dist > thresh_dist:
reward = -10
else:
reward_dist = math.exp(-beta * dist) - 0.5
reward_speed = (
np.linalg.norm(
[
self.state["velocity"].x_val,
self.state["velocity"].y_val,
self.state["velocity"].z_val,
]
)
- 0.5
)
reward = reward_dist + reward_speed

done = 0
if reward <= -10:
done = 1

return reward, done

def step(self, action):
self._do_action(action)
obs = self._get_obs()
reward, done = self._compute_reward()

return obs, reward, done, self.state

def reset(self):
self._setup_flight()
return self._get_obs()

def interpret_action(self, action):
if action == 0:
quad_offset = (self.step_length, 0, 0)
elif action == 1:
quad_offset = (0, self.step_length, 0)
elif action == 2:
quad_offset = (0, 0, self.step_length)
elif action == 3:
quad_offset = (-self.step_length, 0, 0)
elif action == 4:
quad_offset = (0, -self.step_length, 0)
elif action == 5:
quad_offset = (0, 0, -self.step_length)
else:
quad_offset = (0, 0, 0)

return quad_offset
Loading