From 9c246cf0f5604acbcf72024c85eb9dd900b7db3a Mon Sep 17 00:00:00 2001
From: Tambet Matiisen <tambet.matiisen@gmail.com>
Date: Fri, 6 Jul 2018 23:25:03 +0300
Subject: [PATCH] Fixed couple of rare bugs.

---
 cython_env/cpommerman/forward_model.pyx | 33 ++++++++++++-------------
 cython_env/test.py                      |  3 ++-
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/cython_env/cpommerman/forward_model.pyx b/cython_env/cpommerman/forward_model.pyx
index 35ea61e..abcf947 100644
--- a/cython_env/cpommerman/forward_model.pyx
+++ b/cython_env/cpommerman/forward_model.pyx
@@ -106,7 +106,7 @@ cdef class ForwardModel(object):
                    list curr_flames,
                    byte max_blast_strength=10):
         cdef characters.Bomber agent, agent2
-        cdef characters.Bomb bomb
+        cdef characters.Bomb bomb, bomb2
         cdef characters.Flame flame
         cdef Position position, desired_position, curr_position, target_position, agent_position, bomb_position
         cdef constants.Action action, direction
@@ -153,7 +153,7 @@ cdef class ForwardModel(object):
         # Figure out desired next position for alive agents
         alive_agents = [agent for agent in curr_agents if agent.is_alive]
         for agent in alive_agents:
-            agent.desired_position = Position(agent.position.row, agent.position.col)
+            agent.desired_position = agent.position
             agent.delayed_position = Position(-1, -1)
             agent.kicked_bomb = None
 
@@ -177,7 +177,7 @@ cdef class ForwardModel(object):
 
         # Gather desired next positions for moving bombs. Handle kicks later.
         for bomb in curr_bombs:
-            bomb.desired_position = Position(bomb.position.row, bomb.position.col)
+            bomb.desired_position = bomb.position
             bomb.delayed_position = Position(-1, -1)
             bomb.kicked_agent = None
 
@@ -238,10 +238,10 @@ cdef class ForwardModel(object):
                     bomb.desired_position = bomb.position
                     if num < 0:
                         # Crossed bomb - revert that to prior position as well.
-                        bomb = curr_bombs[-num + 1]
-                        bomb.desired_position = bomb.position
+                        bomb2 = curr_bombs[-num - 1]
+                        bomb2.desired_position = bomb2.position
                 else:
-                    crossings[r][c][i] = -num_bomb - 1
+                    crossings[r][c][i] = -(num_bomb + 1)
 
         # Deal with multiple agents or multiple bomb collisions on desired next
         # position by resetting desired position to current position for
@@ -296,7 +296,7 @@ cdef class ForwardModel(object):
                 if Position_neq(desired_position, bomb.position):
                     # Bomb moved, but agent did not. The bomb should revert
                     # and stop.
-                    bomb.delayed_position = Position(bomb.position.row, bomb.position.col)
+                    bomb.delayed_position = bomb.position
                 continue
 
             # NOTE: At this point, we have that the agent in question tried to
@@ -305,8 +305,8 @@ cdef class ForwardModel(object):
                 # If we move the agent at this point, then we risk having two
                 # agents on a square in future iterations of the loop. So we
                 # push this change to the next stage instead.
-                bomb.delayed_position = Position(bomb.position.row, bomb.position.col)
-                agent.delayed_position = Position(agent.position.row, agent.position.col)
+                bomb.delayed_position = bomb.position
+                agent.delayed_position = agent.position
                 continue
 
             # Agent moved and can kick - see if the target for the kick never had anyhing on it
@@ -323,14 +323,14 @@ cdef class ForwardModel(object):
                 # However we need to set the bomb count on the current position to zero so
                 # that the agent can stay on this position.
                 bomb_occupancy[desired_position.row][desired_position.col] = 0
-                bomb.delayed_position = Position(target_position.row, target_position.col)
+                bomb.delayed_position = target_position
                 bomb.kicked_agent = agent
                 agent.kicked_bomb = bomb
                 bomb.moving_direction = direction
                 # Bombs may still collide and we then need to reverse bomb and agent ..
             else:
-                bomb.delayed_position = Position(bomb.position.row, bomb.position.col)
-                agent.delayed_position = Position(agent.position.row, agent.position.col)
+                bomb.delayed_position = bomb.position
+                agent.delayed_position = agent.position
 
         for bomb in curr_bombs:
             if Position_neq(bomb.delayed_position, Position(-1, -1)):
@@ -625,11 +625,7 @@ cdef class ForwardModel(object):
         cdef rewards_np = np.zeros(4, dtype=np.float32)
         cdef float[:] rewards = rewards_np
 
-        if step_count > max_steps:
-            # Game is over from time. Everyone gets -1.
-            rewards[:] = -1
-            return rewards_np
-        elif game_type == constants.GameType.FFA:
+        if game_type == constants.GameType.FFA:
             alive = 0
             for agent in agents:
                 rewards[agent.agent_id] = agent.is_alive
@@ -639,6 +635,9 @@ cdef class ForwardModel(object):
                 # An agent won. Give them +1, others -1.
                 for i in range(4):
                     rewards[i] = rewards[i] * 2 - 1
+            elif step_count > max_steps:
+                # Game is over from time. Everyone gets -1.
+                rewards[:] = -1
             else:
                 # Game running: 0 for alive, -1 for dead.
                 for i in range(4):
diff --git a/cython_env/test.py b/cython_env/test.py
index 78ef7f5..cf1b91d 100644
--- a/cython_env/test.py
+++ b/cython_env/test.py
@@ -178,7 +178,8 @@ def featurize_new(obs):
         except AssertionError:
             import traceback
             traceback.print_exc()
-            input()
+            import pdb
+            pdb.set_trace()
 
         # TEST 8: verify that setting state actually works
         # set state to previous timestep and compare that JSON matches