-
Notifications
You must be signed in to change notification settings - Fork 68
/
Agent.java
executable file
·179 lines (146 loc) · 5.76 KB
/
Agent.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
package tracks.singlePlayer.advanced.sampleRS;
import core.game.StateObservation;
import core.player.AbstractPlayer;
import ontology.Types;
import tools.ElapsedCpuTimer;
import tracks.singlePlayer.tools.Heuristics.StateHeuristic;
import tracks.singlePlayer.tools.Heuristics.WinScoreHeuristic;
import java.util.*;
public class Agent extends AbstractPlayer {
// variable
private int SIMULATION_DEPTH = 10;
private double DISCOUNT = 1; //0.99;
// constants
private final long BREAK_MS = 10;
public static final double epsilon = 1e-6;
private ArrayList<Individual> population;
private int NUM_INDIVIDUALS;
private HashMap<Integer, Types.ACTIONS> action_mapping;
private int N_ACTIONS;
private ElapsedCpuTimer timer;
private Random randomGenerator;
private StateHeuristic heuristic;
private double acumTimeTakenEval = 0,avgTimeTakenEval = 0;
private int numEvals = 0;
private long remaining;
/**
* Public constructor with state observation and time due.
*
* @param stateObs state observation of the current game.
* @param elapsedTimer Timer for the controller creation.
*/
public Agent(StateObservation stateObs, ElapsedCpuTimer elapsedTimer) {
randomGenerator = new Random();
heuristic = new WinScoreHeuristic(stateObs);
this.timer = elapsedTimer;
// INITIALISE POPULATION
init_pop(stateObs);
}
@Override
public Types.ACTIONS act(StateObservation stateObs, ElapsedCpuTimer elapsedTimer) {
this.timer = elapsedTimer;
numEvals = 0;
acumTimeTakenEval = 0;
remaining = timer.remainingTimeMillis();
NUM_INDIVIDUALS = 0;
// INITIALISE POPULATION
init_pop(stateObs);
// RETURN ACTION
Types.ACTIONS best = get_best_action(population);
return best;
}
/**
* Evaluates an individual by rolling the current state with the actions in the individual
* and returning the value of the resulting state; random action chosen for the opponent
* @param individual - individual to be valued
* @param heuristic - heuristic to be used for state evaluation
* @param state - current state, root of rollouts
* @return - value of last state reached
*/
private double evaluate(Individual individual, StateHeuristic heuristic, StateObservation state) {
ElapsedCpuTimer elapsedTimerIterationEval = new ElapsedCpuTimer();
StateObservation st = state.copy();
int i;
for (i = 0; i < SIMULATION_DEPTH; i++) {
double acum = 0, avg;
if (! st.isGameOver()) {
ElapsedCpuTimer elapsedTimerIteration = new ElapsedCpuTimer();
st.advance(action_mapping.get(individual.actions[i]));
acum += elapsedTimerIteration.elapsedMillis();
avg = acum / (i+1);
remaining = timer.remainingTimeMillis();
if (remaining < 2*avg || remaining < BREAK_MS) break;
} else {
break;
}
}
StateObservation first = st.copy();
double value = heuristic.evaluateState(first);
// Apply discount factor
value *= Math.pow(DISCOUNT,i);
individual.value = value;
numEvals++;
acumTimeTakenEval += (elapsedTimerIterationEval.elapsedMillis());
avgTimeTakenEval = acumTimeTakenEval / numEvals;
remaining = timer.remainingTimeMillis();
return value;
}
/**
* Insert a new individual into the population at the specified position by replacing the old one.
* @param newind - individual to be inserted into population
* @param pop - population
* @param idx - position where individual should be inserted
* @param stateObs - current game state
*/
private void add_individual(Individual newind, Individual[] pop, int idx, StateObservation stateObs) {
evaluate(newind, heuristic, stateObs);
pop[idx] = newind.copy();
}
/**
* Initialize population
* @param stateObs - current game state
*/
private void init_pop(StateObservation stateObs) {
double remaining;
N_ACTIONS = stateObs.getAvailableActions().size() + 1;
action_mapping = new HashMap<>();
int k = 0;
for (Types.ACTIONS action : stateObs.getAvailableActions()) {
action_mapping.put(k, action);
k++;
}
action_mapping.put(k, Types.ACTIONS.ACTION_NIL);
NUM_INDIVIDUALS = 0;
population = new ArrayList<>();
do {
Individual newInd = new Individual(SIMULATION_DEPTH, N_ACTIONS, randomGenerator);
evaluate(newInd, heuristic, stateObs);
population.add(newInd);
remaining = timer.remainingTimeMillis();
NUM_INDIVIDUALS++;
} while(remaining > avgTimeTakenEval && remaining > BREAK_MS);
if (NUM_INDIVIDUALS > 1)
Collections.sort(population, new Comparator<Individual>() {
@Override
public int compare(Individual o1, Individual o2) {
if (o1 == null && o2 == null) {
return 0;
}
if (o1 == null) {
return 1;
}
if (o2 == null) {
return -1;
}
return o1.compareTo(o2);
}});
}
/**
* @param pop - last population obtained after evolution
* @return - first action of best individual in the population (found at index 0)
*/
private Types.ACTIONS get_best_action(ArrayList<Individual> pop) {
int bestAction = pop.get(0).actions[0];
return action_mapping.get(bestAction);
}
}