Skip to content

Commit

Permalink
OpenMM runner fails with more verbosity
Browse files Browse the repository at this point in the history
When the simulation fails in the OpenMM runner, it may appear as just a
freeze with no explanation. This is because the simulation would fail on
a separate thread. This commit adds prints in the simulation thread, so
the cause of the failure appears somewhere.

This is a workaround and it should be solved in a better way. If the
simulation fails, it should cause the server to stop with a proper error
message.
  • Loading branch information
jbarnoud committed Oct 9, 2023
1 parent 691aa56 commit 8b0a5cd
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 26 deletions.
26 changes: 17 additions & 9 deletions python-libraries/narupa-openmm/src/narupa/openmm/imd.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,13 +143,17 @@ def report(self, simulation: app.Simulation, state: mm.State) -> None:
if simulation.currentStep % self.frame_interval == 0:
if positions is None:
positions = state.getPositions(asNumpy=True)
frame_data = openmm_to_frame_data(
state=state, topology=None, include_positions=False
)
frame_data.particle_positions = positions
frame_data.user_energy = self._total_user_energy
self.frame_publisher.send_frame(self._frame_index, frame_data)
self._frame_index += 1
try:
frame_data = openmm_to_frame_data(
state=state, topology=None, include_positions=False
)
except Exception as err:
print(f"Error while building a frame: {err}")
else:
frame_data.particle_positions = positions
frame_data.user_energy = self._total_user_energy
self.frame_publisher.send_frame(self._frame_index, frame_data)
self._frame_index += 1

def _on_first_frame(self, simulation: app.Simulation):
"""
Expand All @@ -161,8 +165,12 @@ def _on_first_frame(self, simulation: app.Simulation):
if self._frame_index == 0:
state = simulation.context.getState(getPositions=True, getEnergy=True)
topology = simulation.topology
frame_data = openmm_to_frame_data(state=state, topology=topology)
self.frame_publisher.send_frame(self._frame_index, frame_data)
try:
frame_data = openmm_to_frame_data(state=state, topology=topology)
except Exception as err:
print(f"Error with the first frame: {err}")
else:
self.frame_publisher.send_frame(self._frame_index, frame_data)

@staticmethod
def get_masses(system: mm.System) -> np.ndarray:
Expand Down
37 changes: 20 additions & 17 deletions python-libraries/narupa-openmm/src/narupa/openmm/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,23 +296,26 @@ def run(
self._run_task = self.threads.submit(self._run, steps)

def _run(self, steps: Optional[int]) -> None:
remaining_steps = steps if steps is not None else float("inf")
for _ in self._variable_interval_generator.yield_interval():
if self._cancelled or remaining_steps <= 0:
break
steps_for_this_iteration = min(self.frame_interval, remaining_steps)
try:
self.simulation.step(steps_for_this_iteration)
except (ValueError, openmm.OpenMMException):
# We want to stop running if the simulation exploded in a way
# that prevents OpenMM to run. Otherwise, we will be a a state
# where OpenMM raises an exception which would make the runner
# unusable. The OpenMMException is typically raised by OpenMM
# itself when something is NaN; the ValueError is typically
# raised by the StateReporter when the energy is NaN.
break
remaining_steps -= steps_for_this_iteration
self._cancelled = False
try:
remaining_steps = steps if steps is not None else float("inf")
for _ in self._variable_interval_generator.yield_interval():
if self._cancelled or remaining_steps <= 0:
break
steps_for_this_iteration = min(self.frame_interval, remaining_steps)
try:
self.simulation.step(steps_for_this_iteration)
except (ValueError, openmm.OpenMMException):
# We want to stop running if the simulation exploded in a way
# that prevents OpenMM to run. Otherwise, we will be a a state
# where OpenMM raises an exception which would make the runner
# unusable. The OpenMMException is typically raised by OpenMM
# itself when something is NaN; the ValueError is typically
# raised by the StateReporter when the energy is NaN.
break
remaining_steps -= steps_for_this_iteration
self._cancelled = False
except Exception as err:
print(f"Error whith run: {err}")

def step(self):
with self._cancel_lock:
Expand Down

0 comments on commit 8b0a5cd

Please sign in to comment.