diff --git a/src/deadline_worker_agent/sessions/session.py b/src/deadline_worker_agent/sessions/session.py index 3ba3e9f2..98d23b9b 100644 --- a/src/deadline_worker_agent/sessions/session.py +++ b/src/deadline_worker_agent/sessions/session.py @@ -1033,9 +1033,10 @@ def _handle_action_update( now: datetime, ): if is_unsuccessful: - fail_message = ( - action_status.fail_message - or f"Action {current_action.definition.human_readable()} failed" + fail_message = action_status.fail_message or ( + f"TIMEOUT - Previous action exceeded runtime limit: {current_action.definition.human_readable()}" + if action_status.state == ActionState.TIMEOUT + else f"Previous action failed: {current_action.definition.human_readable()}" ) # If the current action failed, we mark future actions assigned to the session as @@ -1052,6 +1053,19 @@ def _handle_action_update( # UpdateWorkerSchedule request if so. self._current_action = None + if action_status.state == ActionState.TIMEOUT: + # If the action ended via timeout, then we're reporting this as a failed action + # but also surface the timeout was the reason for the fail so that they don't have + # to go log diving. + action_status = ActionStatus( + # Preserve properties + state=action_status.state, + progress=action_status.progress, + exit_code=action_status.exit_code, + # Replace the message to let the customer know that the action reached its runtime limit. + fail_message="TIMEOUT - Exceeded the allotted runtime limit.", + ) + completed_status = OPENJD_ACTION_STATE_TO_DEADLINE_COMPLETED_STATUS.get( action_status.state, None ) diff --git a/test/unit/sessions/test_session.py b/test/unit/sessions/test_session.py index 16591dda..e7fad32b 100644 --- a/test/unit/sessions/test_session.py +++ b/test/unit/sessions/test_session.py @@ -1028,6 +1028,40 @@ def mock_action_updated_impl_side_effect( current_action_lock_exit.assert_called_once() mock_report_action_update.assert_not_called() + def test_timeout_messaging( + self, + session: Session, + # We don't use the value of this fixture, but requiring it has the side-effect of assigning + # it as the current action of the session + current_action: CurrentAction, + ) -> None: + """Test that when an action is reported as TIMEOUT then we: + 1) Cancel all subsequent tasks as NEVER_ATTEMPTED; and + 2) Have an appropriate failure message on the action. + """ + # GIVEN + status = ActionStatus(state=ActionState.TIMEOUT, exit_code=-1, progress=24.4) + with ( + patch.object(session._queue, "cancel_all") as mock_cancel_all, + patch.object(session, "_report_action_update") as mock_report_action_update, + ): + # WHEN + session.update_action(status) + + # THEN + mock_cancel_all.assert_called_once_with( + cancel_outcome="NEVER_ATTEMPTED", message=ANY, ignore_env_exits=True + ) + assert "TIMEOUT" in mock_cancel_all.call_args.kwargs["message"] + mock_report_action_update.assert_called_once() + session_status = mock_report_action_update.call_args.args[0] + assert session_status.completed_status == "FAILED" + called_with_status = session_status.status + assert called_with_status.state == ActionState.TIMEOUT + assert "TIMEOUT" in called_with_status.fail_message + assert called_with_status.exit_code == status.exit_code + assert called_with_status.progress == status.progress + class TestSessionActionUpdatedImpl: """Test cases for Session._action_updated_impl()""" @@ -1075,7 +1109,7 @@ def test_failed_enter_env( session._current_action = current_action queue_cancel_all: MagicMock = session_action_queue.cancel_all expected_next_action_message = failed_action_status.fail_message or ( - f"Action {current_action.definition.human_readable()} failed" + f"Previous action failed: {current_action.definition.human_readable()}" ) expected_action_update = SessionActionStatus( id=action_id, @@ -1143,7 +1177,7 @@ def test_failed_task_run( session._current_action = current_action queue_cancel_all: MagicMock = session_action_queue.cancel_all expected_next_action_message = failed_action_status.fail_message or ( - f"Action {current_action.definition.human_readable()} failed" + f"Previous action failed: {current_action.definition.human_readable()}" ) expected_action_update = SessionActionStatus( id=action_id,