diff --git a/packages/zwave-js/src/lib/driver/Driver.ts b/packages/zwave-js/src/lib/driver/Driver.ts index 022af36393c8..c452b381f653 100644 --- a/packages/zwave-js/src/lib/driver/Driver.ts +++ b/packages/zwave-js/src/lib/driver/Driver.ts @@ -3493,27 +3493,132 @@ export class Driver extends TypedEventEmitter } } - private handleUnresponsiveController( + /** + * @internal + * Handles the case that the controller didn't acknowledge a command in time + * Returns `true` if the transaction failure was handled, `false` if it needs to be rejected. + */ + private handleMissingControllerACK( transaction: Transaction, - error: ZWaveError, + error: ZWaveError & { + code: ZWaveErrorCodes.Controller_Timeout; + context: "ACK"; + }, ): boolean { if (!this._controller) return false; const fail = (message: string) => { this.rejectTransaction(transaction, error); - void this.destroyWithMessage(message); }; + if (this._controller.status !== ControllerStatus.Unresponsive) { + // The controller was responsive before this transaction failed. + // Mark it as unresponsive and try to soft-reset it. + this.controller.setStatus( + ControllerStatus.Unresponsive, + ); + + this._recoveryPhase = ControllerRecoveryPhase.NoACK; + + this.driverLog.print( + "Attempting to recover unresponsive controller...", + "warn", + ); + + // Re-queue the transaction. + // Its message generator may have finished, so reset that too. + transaction.reset(); + this.queue.add(transaction.clone()); + + // Execute the soft-reset asynchronously + void this.softReset().then(() => { + // The controller responded. It is no longer unresponsive + this._controller?.setStatus(ControllerStatus.Ready); + this._recoveryPhase = ControllerRecoveryPhase.None; + }).catch(() => { + // Soft-reset failed. Reject the original transaction and try restarting the driver. + fail( + "Recovering unresponsive controller failed. Restarting the driver...", + ); + }); + + return true; + } else { + // We already attempted to recover from an unresponsive controller. + // Ending up here means the soft-reset also failed and the driver is about to be destroyed + return false; + } + } + + /** + * @internal + * Handles the case that the controller didn't send the callback for a SendData in time + * Returns `true` if the transaction failure was handled, `false` if it needs to be rejected. + */ + private handleMissingSendDataCallback( + transaction: Transaction, + error: ZWaveError & { + code: ZWaveErrorCodes.Controller_Timeout; + context: "callback"; + }, + ): boolean { + if (!this._controller) return false; + if ( this._recoveryPhase === ControllerRecoveryPhase.CallbackTimeoutAfterReset ) { + const node = this.getNodeUnsafe(transaction.message); + if (!node) return false; // This should never happen, but whatever + // The controller is still timing out transmitting after a soft reset, don't try again. - // Reject the transaction and destroy the driver. - fail("Controller is still timing out. Restarting the driver..."); + // Real-world experience has shown that for older controllers this situation can be caused by unresponsive nodes. - return true; + // The following is essentially a copy of handleMissingNodeACK, but with updated error messages + const messagePart1 = + "The node is causing the controller to become unresponsive"; + + if (node.canSleep) { + if (node.status === NodeStatus.Asleep) { + // We already moved the messages to the wakeup queue before. If we end up here, this means a command + // was sent that may be sent to potentially asleep nodes - including pings. + return false; + } + this.controllerLog.logNode( + node.id, + `${messagePart1}. It is probably asleep, moving its messages to the wakeup queue.`, + "warn", + ); + + // There is no longer a reference to the current transaction. If it should be moved to the wakeup queue, + // it temporarily needs to be added to the queue again. + const handled = this.mayMoveToWakeupQueue(transaction); + if (handled) { + this.queue.add(transaction); + } + + // Mark the node as asleep. This will move the messages to the wakeup queue + node.markAsAsleep(); + + return handled; + } else { + const errorMsg = `${messagePart1}, it is presumed dead`; + this.controllerLog.logNode(node.id, errorMsg, "warn"); + + node.markAsDead(); + + // There is no longer a reference to the current transaction on the queue, so we need to reject it separately. + transaction.setProgress({ + state: TransactionState.Failed, + reason: errorMsg, + }); + + transaction.abort(error); + this.rejectAllTransactionsForNode(node.id, errorMsg); + + return true; + } } else if (this._controller.status !== ControllerStatus.Unresponsive) { // The controller was responsive before this transaction failed. // Mark it as unresponsive and try to soft-reset it. @@ -3521,12 +3626,10 @@ export class Driver extends TypedEventEmitter ControllerStatus.Unresponsive, ); - this._recoveryPhase = isMissingControllerACK(error) - ? ControllerRecoveryPhase.NoACK - : ControllerRecoveryPhase.CallbackTimeout; + this._recoveryPhase = ControllerRecoveryPhase.CallbackTimeout; this.driverLog.print( - "Attempting to recover unresponsive controller...", + "Controller missed Send Data callback. Attempting to recover...", "warn", ); @@ -3540,26 +3643,16 @@ export class Driver extends TypedEventEmitter // The controller responded. It is no longer unresponsive this._controller?.setStatus(ControllerStatus.Ready); - if (this._recoveryPhase === ControllerRecoveryPhase.NoACK) { - this._recoveryPhase = ControllerRecoveryPhase.None; - } else if ( - this._recoveryPhase - === ControllerRecoveryPhase.CallbackTimeout - ) { - this._recoveryPhase = - ControllerRecoveryPhase.CallbackTimeoutAfterReset; - } + this._recoveryPhase = + ControllerRecoveryPhase.CallbackTimeoutAfterReset; }).catch(() => { - // Soft-reset failed. Reject the original transaction and try restarting the driver. - fail( - "Recovering unresponsive controller failed. Restarting the driver...", - ); + // Soft-reset failed. Just reject the original transaction. + this.rejectTransaction(transaction, error); }); return true; } else { - // We already attempted to recover from an unresponsive controller. - // Ending up here means the soft-reset also failed and the driver is about to be destroyed + // Not sure what to do here return false; } } @@ -5566,12 +5659,13 @@ ${handlers.length} left`, // If a node failed to respond in time, it might be sleeping if (this.isMissingNodeACK(transaction, error)) { if (this.handleMissingNodeACK(transaction as any, error)) return; + } else if (isMissingControllerACK(error)) { + if (this.handleMissingControllerACK(transaction, error)) return; } else if ( - isMissingControllerACK(error) - || (isSendData(transaction.message) - && isMissingControllerCallback(error)) + isSendData(transaction.message) + && isMissingControllerCallback(error) ) { - if (this.handleUnresponsiveController(transaction, error)) return; + if (this.handleMissingSendDataCallback(transaction, error)) return; } this.rejectTransaction(transaction, error); diff --git a/packages/zwave-js/src/lib/test/driver/sendDataMissingCallbackAbort.test.ts b/packages/zwave-js/src/lib/test/driver/sendDataMissingCallbackAbort.test.ts index f4e453f33eb7..49e7b69118da 100644 --- a/packages/zwave-js/src/lib/test/driver/sendDataMissingCallbackAbort.test.ts +++ b/packages/zwave-js/src/lib/test/driver/sendDataMissingCallbackAbort.test.ts @@ -6,7 +6,7 @@ import { MockControllerStateKeys, } from "../../controller/MockControllerState"; -import { ZWaveErrorCodes, assertZWaveError } from "@zwave-js/core"; +import { NodeStatus, ZWaveErrorCodes, assertZWaveError } from "@zwave-js/core"; import Sinon from "sinon"; import { SoftResetRequest } from "../../serialapi/misc/SoftResetRequest"; import { @@ -90,7 +90,8 @@ integrationTest( // Soft reset should restore normal operation if (msg instanceof SoftResetRequest) { shouldTimeOut = false; - return true; + // Delegate to the default behavior + return false; } }, }; @@ -124,8 +125,10 @@ integrationTest( ); integrationTest( - "Destroy driver if SendData is still missing the callback after soft-reset", + "Mark node as dead if SendData is still missing the callback after soft-reset", { + // Real-world experience has shown that for older controllers this situation can be caused by dead nodes + // We don't want to restart the driver in that case, but mark the node as dead instead // debug: true, // provisioningDirectory: path.join( @@ -206,14 +209,14 @@ integrationTest( (msg) => msg.functionType === FunctionType.SoftReset, ); - // The ping should eventually fail + // The ping should eventually fail and the node be marked dead t.false(await pingPromise); - // The driver should have been destroyed - await wait(100); - assertZWaveError(t, errorSpy.getCall(0).args[0], { - errorCode: ZWaveErrorCodes.Driver_Failed, - }); + t.is(node.status, NodeStatus.Dead); + + // The error event should not have been emitted + await wait(300); + t.is(errorSpy.callCount, 0); }, }, ); @@ -286,7 +289,8 @@ integrationTest( // Soft reset should restore normal operation if (msg instanceof SoftResetRequest) { shouldTimeOut = false; - return true; + // Delegate to the default behavior + return false; } }, };