Skip to content

Commit

Permalink
OpenMM runner fails with more verbosity (#20)
Browse files Browse the repository at this point in the history
When the simulation fails in the OpenMM runner, it may appear as just a
freeze with no explanation. This is because the simulation would fail on
a separate thread. This commit adds prints in the simulation thread, so
the cause of the failure appears somewhere.

This is a workaround and it should be solved in a better way. If the
simulation fails, it should cause the server to stop with a proper error
message. See #19.
  • Loading branch information
jbarnoud authored Oct 9, 2023
2 parents 666d32d + 8b0a5cd commit 93702d1
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 26 deletions.
26 changes: 17 additions & 9 deletions python-libraries/narupa-openmm/src/narupa/openmm/imd.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,13 +143,17 @@ def report(self, simulation: app.Simulation, state: mm.State) -> None:
if simulation.currentStep % self.frame_interval == 0:
if positions is None:
positions = state.getPositions(asNumpy=True)
frame_data = openmm_to_frame_data(
state=state, topology=None, include_positions=False
)
frame_data.particle_positions = positions
frame_data.user_energy = self._total_user_energy
self.frame_publisher.send_frame(self._frame_index, frame_data)
self._frame_index += 1
try:
frame_data = openmm_to_frame_data(
state=state, topology=None, include_positions=False
)
except Exception as err:
print(f"Error while building a frame: {err}")
else:
frame_data.particle_positions = positions
frame_data.user_energy = self._total_user_energy
self.frame_publisher.send_frame(self._frame_index, frame_data)
self._frame_index += 1

def _on_first_frame(self, simulation: app.Simulation):
"""
Expand All @@ -161,8 +165,12 @@ def _on_first_frame(self, simulation: app.Simulation):
if self._frame_index == 0:
state = simulation.context.getState(getPositions=True, getEnergy=True)
topology = simulation.topology
frame_data = openmm_to_frame_data(state=state, topology=topology)
self.frame_publisher.send_frame(self._frame_index, frame_data)
try:
frame_data = openmm_to_frame_data(state=state, topology=topology)
except Exception as err:
print(f"Error with the first frame: {err}")
else:
self.frame_publisher.send_frame(self._frame_index, frame_data)

@staticmethod
def get_masses(system: mm.System) -> np.ndarray:
Expand Down
37 changes: 20 additions & 17 deletions python-libraries/narupa-openmm/src/narupa/openmm/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,23 +296,26 @@ def run(
self._run_task = self.threads.submit(self._run, steps)

def _run(self, steps: Optional[int]) -> None:
remaining_steps = steps if steps is not None else float("inf")
for _ in self._variable_interval_generator.yield_interval():
if self._cancelled or remaining_steps <= 0:
break
steps_for_this_iteration = min(self.frame_interval, remaining_steps)
try:
self.simulation.step(steps_for_this_iteration)
except (ValueError, openmm.OpenMMException):
# We want to stop running if the simulation exploded in a way
# that prevents OpenMM to run. Otherwise, we will be a a state
# where OpenMM raises an exception which would make the runner
# unusable. The OpenMMException is typically raised by OpenMM
# itself when something is NaN; the ValueError is typically
# raised by the StateReporter when the energy is NaN.
break
remaining_steps -= steps_for_this_iteration
self._cancelled = False
try:
remaining_steps = steps if steps is not None else float("inf")
for _ in self._variable_interval_generator.yield_interval():
if self._cancelled or remaining_steps <= 0:
break
steps_for_this_iteration = min(self.frame_interval, remaining_steps)
try:
self.simulation.step(steps_for_this_iteration)
except (ValueError, openmm.OpenMMException):
# We want to stop running if the simulation exploded in a way
# that prevents OpenMM to run. Otherwise, we will be a a state
# where OpenMM raises an exception which would make the runner
# unusable. The OpenMMException is typically raised by OpenMM
# itself when something is NaN; the ValueError is typically
# raised by the StateReporter when the energy is NaN.
break
remaining_steps -= steps_for_this_iteration
self._cancelled = False
except Exception as err:
print(f"Error whith run: {err}")

def step(self):
with self._cancel_lock:
Expand Down

0 comments on commit 93702d1

Please sign in to comment.