-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathenvironment.py
100 lines (86 loc) · 3.09 KB
/
environment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import gym
from pytorch_DQN import Agent
# from utils import plotLearning
import numpy as np
import numpy as np
from scipy.integrate import odeint
from scipy.integrate import solve_ivp
import matplotlib.pyplot as plt
import gym
from gym import Env
from gym.spaces import Discrete, Box
import os
from stable_baselines3 import DQN
import random
if __name__=="__main__":
# env = gym.make('LunarLander-v2')
# env = gym.make('CartPole-v0')
A_tank=10 #m^2
qin=10 #m^3/min
qout=20
A_out_pipe=1
g=9.8
def height_model(x, t, action=0):
h = x[0]
# dhdt = A_tank*(qin - action*A_out_pipe*(np.sqrt(2*g*h)))
dhdt = (qin-action*qout)/A_tank
return dhdt
class tank_env(Env):
def __init__(self, set_point=None):
self.action_space = Discrete(2)
self.observation_space = Box(0, 100, shape=(2,))
self.max_length = 1000
if set_point==None:
self.set_point = random.randint(20,80)
else:
self.set_point=set_point
self.state = np.array([random.randint(20,80), self.set_point])
def step(self, action):
tn = np.linspace(0, 1, 2)
sol = odeint(height_model, self.state[0], tn, args=(action,))
self.state = np.array([sol[-1], self.set_point]).astype(dtype=float)
self.max_length-=1
reward=0
if self.state[0]<=0.0 or self.state[0]>=100.0:
done=True
elif self.max_length<=0:
done=True
elif self.state[0]<=(self.set_point+2) and self.state[0]>=(self.set_point-2):
done=False
reward = + 1
else:
done=False
reward = 0
info={}
return self.state, reward, done, info
def render(self):
pass
def reset(self, set_point=None):
self.state = np.array([random.randint(20,80), self.set_point]).astype(dtype=float)
self.max_length = 1000
if set_point==None:
self.set_point = random.randint(20,80)
else:
self.set_point=set_point
return self.state
env = tank_env()
agent = Agent(gamma=0.99, epsilon=1.0, batch_size=32, n_actions=2, eps_end=0.01, input_dims=[2], lr=0.001)
scores, eps_history = [], []
n_games = 100
for i in range(n_games):
score = 0
done = False
observation = env.reset()
while not done:
env.render()
action = agent.choose_action(observation)
observation_, reward, done, info = env.step(action)
score+=reward
agent.store_transition(observation, action, reward, observation_, done)
agent.learn()
observation = observation_
scores.append(score)
eps_history.append(agent.epsilon)
avg_score = np.mean(scores[-100:])
print(f'episode{i}, scores{score}, average_score{avg_score}, epsilon{agent.epsilon}')
x = [i+1 for i in range(n_games)]