-
Notifications
You must be signed in to change notification settings - Fork 22
/
dectiger.dpomdp
122 lines (121 loc) · 3.75 KB
/
dectiger.dpomdp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# This is a Dec-POMDP (.dpomdp) file for the Dec-Tiger problem.
# For more detailed documentation, see example.dpomdp
# Allright, here we go!
#
#The agents.
#----------
#Either 1) the number of agents:
# agents: %d
#or 2) a list of agent identifiers, e.g.:
# agents: agent1_name, name-of-agent2, ...
agents: 2
# discount: %f
discount: 1
#.0
# values: [ reward, cost ]
values: reward
# states: [ %d, <list of states> ]
states: tiger-left tiger-right
#
#Examples of this are:
# start: 0.3 0.1 0.0 0.2 0.5
# start: first-state
# start: 5
# start: uniform
# start include: first-state third state
# start include: 1 3
# start exclude: fifth-state seventh-state
start:
uniform
#
#The actions declarations
#------------------------
#the (number/list of) actions for each of the agents on a separate line
# actions:
# [ %d, <list of actions> ]
# [ %d, <list of actions> ]
# ...
# [ %d, <list of actions> ]
actions:
listen open-left open-right
listen open-left open-right
#the (number/list of) observations for each of the agents on a separate line
# observations:
# [ %d, <list of observations> ]
# [ %d, <list of observations> ]
# ...
# [ %d, <list of observations> ]
observations:
hear-left hear-right
hear-left hear-right
#Transition probabilities
# T: <a1 a2...an> : <start-state> : <end-state> : %f
#or
# T: <a1 a2...an> : <start-state> :
# %f %f ... %f P(s_1'|ja,s) ... P(s_k'|ja,s)
#or
# T: <a1 a2...an> : this is a |S| x |S| matrix
# %f %f ... %f P(s_1'|ja,s_1) ... P(s_k'|ja,s_1)
# %f %f ... %f ...
# ... ...
# %f %f ... %f P(s_1'|ja,s_k) ... P(s_k'|ja,s_k)
#or
# T: <a1 a2...an>
# [ identity, uniform ]
T: * :
uniform
#T:open-right open-right :
#uniform
T: listen listen :
identity
#Observation probabilities
# O: <a1 a2...an> : <end-state> : <o1 o2 ... om> : %f
#or
# O: <a1 a2...an> : <end-state> :
# %f %f ... %f P(jo_1|ja,s') ... P(jo_x|ja,s')
#or
# O:<a1 a2...an> : - a |S|x|JO| matrix
# %f %f ... %f P(jo_1|ja,s_1') ... P(jo_x|ja,s_1')
# %f %f ... %f ...
# ... ...
# %f %f ... %f P(jo_1|ja,s_k') ... P(jo_x|ja,s_k')
O: * :
uniform
O: listen listen : tiger-left : hear-left hear-left : 0.7225
O: listen listen : tiger-left : hear-left hear-right : 0.1275
O: listen listen : tiger-left : hear-right hear-left : 0.1275
O: listen listen : tiger-left : hear-right hear-right : 0.0225
O: listen listen : tiger-right : hear-right hear-right : 0.7225
O: listen listen : tiger-right : hear-left hear-right : 0.1275
O: listen listen : tiger-right : hear-right hear-left : 0.1275
O: listen listen : tiger-right : hear-left hear-left : 0.0225
#The rewards
#or
# R: <a1 a2...an> : <start-state> : <end-state> :
# %f %f ... %f
#or
# R: <a1 a2...an> : <start-state> :
# %f %f ... %f
# %f %f ... %f
# ...
# %f %f ... %f
#
#Typical problems only use R(s,ja) which is specified by:
# R: <a1 a2...an> : <start-state> : * : * : %f
R: listen listen: * : * : * : -2
R: open-left open-left : tiger-left : * : * : -50
R: open-right open-right : tiger-right : * : * : -50
R: open-left open-left : tiger-right : * : * : +20
R: open-right open-right : tiger-left : * : * : 20
R: open-left open-right: tiger-left : * : * : -100
R: open-left open-right: tiger-right : * : * : -100
R: open-right open-left: tiger-left : * : * : -100
R: open-right open-left: tiger-right : * : * : -100
R: open-left listen: tiger-left : * : * : -101
R: listen open-right: tiger-right : * : * : -101
R: listen open-left: tiger-left : * : * : -101
R: open-right listen: tiger-right : * : * : -101
R: listen open-right: tiger-left : * : * : 9
R: listen open-left: tiger-right : * : * : 9
R: open-right listen: tiger-left : * : * : 9
R: open-left listen: tiger-right : * : * : 9