Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a configurable escalation behaviour #162

Merged
merged 3 commits into from
Oct 13, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 94 additions & 8 deletions Sources/ServiceLifecycle/ServiceGroup.swift
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ public actor ServiceGroup: Sendable {
private let logger: Logger
/// The logging configuration.
private let loggingConfiguration: ServiceGroupConfiguration.LoggingConfiguration
/// The escalation configuration.
private let escalationConfiguration: ServiceGroupConfiguration.EscalationBehaviour
/// The signals that lead to graceful shutdown.
private let gracefulShutdownSignals: [UnixSignal]
/// The signals that lead to cancellation.
Expand All @@ -57,6 +59,7 @@ public actor ServiceGroup: Sendable {
self.cancellationSignals = configuration.cancellationSignals
self.logger = configuration.logger
self.loggingConfiguration = configuration.logging
self.escalationConfiguration = configuration.escalation
}

/// Initializes a new ``ServiceGroup``.
Expand Down Expand Up @@ -94,6 +97,7 @@ public actor ServiceGroup: Sendable {
self.cancellationSignals = configuration.cancellationSignals
self.logger = logger
self.loggingConfiguration = configuration.logging
self.escalationConfiguration = configuration.escalation
}

/// Runs all the services by spinning up a child task per service.
Expand Down Expand Up @@ -176,6 +180,8 @@ public actor ServiceGroup: Sendable {
case signalSequenceFinished
case gracefulShutdownCaught
case gracefulShutdownFinished
case gracefulShutdownTimedOut
case cancellationCaught
}

private func _run(
Expand All @@ -191,6 +197,10 @@ public actor ServiceGroup: Sendable {
]
)

// A task that is spawned when we got cancelled or
// we cancel the task group to keep track of a timeout.
var cancellationTimeoutTask: Task<Void, Never>?

// Using a result here since we want a task group that has non-throwing child tasks
// but the body itself is throwing
let result = try await withThrowingTaskGroup(of: ChildTaskResult.self, returning: Result<Void, Error>.self) { group in
Expand Down Expand Up @@ -267,6 +277,13 @@ public actor ServiceGroup: Sendable {
}
}

group.addTask {
// This child task is waiting forever until the group gets cancelled.
let (stream, _) = AsyncStream.makeStream(of: Void.self)
await stream.first { _ in true }
glbrntt marked this conversation as resolved.
Show resolved Hide resolved
return .cancellationCaught
}

// We are storing the services in an optional array now. When a slot in the array is
// empty it indicates that the service has been shutdown.
var services = services.map { Optional($0) }
Expand All @@ -293,7 +310,7 @@ public actor ServiceGroup: Sendable {
self.loggingConfiguration.keys.serviceKey: "\(service.service)",
]
)
group.cancelAll()
cancellationTimeoutTask = self.cancelGroupAndSpawnTimeoutIfNeeded(group: &group)
return .failure(ServiceGroupError.serviceFinishedUnexpectedly())

case .gracefullyShutdownGroup:
Expand All @@ -307,6 +324,7 @@ public actor ServiceGroup: Sendable {
do {
try await self.shutdownGracefully(
services: services,
cancellationTimeoutTask: &cancellationTimeoutTask,
group: &group,
gracefulShutdownManagers: gracefulShutdownManagers
)
Expand All @@ -327,7 +345,7 @@ public actor ServiceGroup: Sendable {
self.logger.debug(
"All services finished."
)
group.cancelAll()
cancellationTimeoutTask = self.cancelGroupAndSpawnTimeoutIfNeeded(group: &group)
return .success(())
}
}
Expand All @@ -342,7 +360,7 @@ public actor ServiceGroup: Sendable {
self.loggingConfiguration.keys.errorKey: "\(serviceError)",
]
)
group.cancelAll()
cancellationTimeoutTask = self.cancelGroupAndSpawnTimeoutIfNeeded(group: &group)
return .failure(serviceError)

case .gracefullyShutdownGroup:
Expand All @@ -358,6 +376,7 @@ public actor ServiceGroup: Sendable {
do {
try await self.shutdownGracefully(
services: services,
cancellationTimeoutTask: &cancellationTimeoutTask,
group: &group,
gracefulShutdownManagers: gracefulShutdownManagers
)
Expand All @@ -381,7 +400,7 @@ public actor ServiceGroup: Sendable {
"All services finished."
)

group.cancelAll()
cancellationTimeoutTask = self.cancelGroupAndSpawnTimeoutIfNeeded(group: &group)
return .success(())
}
}
Expand All @@ -398,6 +417,7 @@ public actor ServiceGroup: Sendable {
do {
try await self.shutdownGracefully(
services: services,
cancellationTimeoutTask: &cancellationTimeoutTask,
group: &group,
gracefulShutdownManagers: gracefulShutdownManagers
)
Expand All @@ -413,7 +433,7 @@ public actor ServiceGroup: Sendable {
]
)

group.cancelAll()
cancellationTimeoutTask = self.cancelGroupAndSpawnTimeoutIfNeeded(group: &group)
}

case .gracefulShutdownCaught:
Expand All @@ -423,19 +443,29 @@ public actor ServiceGroup: Sendable {
do {
try await self.shutdownGracefully(
services: services,
cancellationTimeoutTask: &cancellationTimeoutTask,
group: &group,
gracefulShutdownManagers: gracefulShutdownManagers
)
} catch {
return .failure(error)
}

case .cancellationCaught:
// We caught cancellation in our child task so we have to spawn
// our cancellation timeout task if needed
self.logger.debug("Caught cancellation.")
cancellationTimeoutTask = self.cancelGroupAndSpawnTimeoutIfNeeded(group: &group)

case .signalSequenceFinished, .gracefulShutdownFinished:
// This can happen when we are either cancelling everything or
// when the user did not specify any shutdown signals. We just have to tolerate
// this.
continue

case .gracefulShutdownTimedOut:
fatalError("Received gracefulShutdownTimedOut but never triggered a graceful shutdown")

case nil:
fatalError("Invalid result from group.next(). We checked if the group is empty before and still got nil")
}
Expand All @@ -447,18 +477,28 @@ public actor ServiceGroup: Sendable {
self.logger.debug(
"Service lifecycle ended"
)
cancellationTimeoutTask?.cancel()
try result.get()
}

private func shutdownGracefully(
services: [ServiceGroupConfiguration.ServiceConfiguration?],
cancellationTimeoutTask: inout Task<Void, Never>?,
group: inout ThrowingTaskGroup<ChildTaskResult, Error>,
gracefulShutdownManagers: [GracefulShutdownManager]
) async throws {
guard case .running = self.state else {
fatalError("Unexpected state")
}

if #available(macOS 13.0, *), let maximumGracefulShutdownDuration = self.escalationConfiguration.maximumGracefulShutdownDuration {
FranzBusch marked this conversation as resolved.
Show resolved Hide resolved
group.addTask {
try await Task.sleep(for: maximumGracefulShutdownDuration)
return .gracefulShutdownTimedOut
}
}


// We are storing the first error of a service that threw here.
var error: Error?

Expand Down Expand Up @@ -509,7 +549,7 @@ public actor ServiceGroup: Sendable {
]
)

group.cancelAll()
cancellationTimeoutTask = self.cancelGroupAndSpawnTimeoutIfNeeded(group: &group)
throw ServiceGroupError.serviceFinishedUnexpectedly()
}

Expand Down Expand Up @@ -561,9 +601,26 @@ public actor ServiceGroup: Sendable {
]
)

group.cancelAll()
cancellationTimeoutTask = self.cancelGroupAndSpawnTimeoutIfNeeded(group: &group)
}

case .gracefulShutdownTimedOut:
// Gracefully shutting down took longer than the user configured
// so we have to escalate it now.
self.logger.debug(
"Graceful shutdown took longer than allowed by the configuration. Cancelling the group now.",
metadata: [
self.loggingConfiguration.keys.serviceKey: "\(service.service)",
]
)
cancellationTimeoutTask = self.cancelGroupAndSpawnTimeoutIfNeeded(group: &group)

case .cancellationCaught:
// We caught cancellation in our child task so we have to spawn
// our cancellation timeout task if needed
self.logger.debug("Caught cancellation.")
cancellationTimeoutTask = self.cancelGroupAndSpawnTimeoutIfNeeded(group: &group)

case .signalSequenceFinished, .gracefulShutdownCaught, .gracefulShutdownFinished:
// We just have to tolerate this since signals and parent graceful shutdowns downs can race.
continue
Expand All @@ -575,7 +632,9 @@ public actor ServiceGroup: Sendable {

// If we hit this then all services are shutdown. The only thing remaining
// are the tasks that listen to the various graceful shutdown signals. We
// just have to cancel those
// just have to cancel those.
// In this case we don't have to spawn our cancellation timeout task since
// we are sure all other child tasks are handling cancellation appropriately.
group.cancelAll()

// If we saw an error during graceful shutdown from a service that triggers graceful
Expand All @@ -584,6 +643,33 @@ public actor ServiceGroup: Sendable {
throw error
}
}

private func cancelGroupAndSpawnTimeoutIfNeeded(
group: inout ThrowingTaskGroup<ChildTaskResult, Error>
) -> Task<Void, Never>? {
group.cancelAll()
if #available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *), let maximumCancellationDuration = self.escalationConfiguration.maximumCancellationDuration {
// We have to spawn an unstructured task here because the call to our `run`
// method might have already been cancelled and we need to protect the sleep
// from being cancelled.
return Task {
do {
self.logger.debug(
"Task cancellation timeout task started."
)
try await Task.sleep(for: maximumCancellationDuration)
self.logger.debug(
"Cancellation took longer than allowed by the configuration."
)
fatalError("Cancellation took longer than allowed by the configuration.")
} catch {
// We got cancelled so our services must have finished up.
}
}
} else {
return nil
}
}
}

// This should be removed once we support Swift 5.9+
Expand Down
62 changes: 62 additions & 0 deletions Sources/ServiceLifecycle/ServiceGroupConfiguration.swift
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,65 @@ public struct ServiceGroupConfiguration: Sendable {
}
}

/// The group's escalation configuration.
public struct EscalationBehaviour: Sendable {
FranzBusch marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't find the naming here that obvious, I think it needs some mention of shutdown/cancellation.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed. I removed the struct and just made them top-level properties

/// The maximum amount of time that graceful shutdown is allowed to take.
///
/// After this time has elapsed graceful shutdown will be escalated to task cancellation.
@available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *)
public var maximumGracefulShutdownDuration: Duration? {
get {
if let maximumGracefulShutdownDuration = self._maximumGracefulShutdownDuration {
return .init(
secondsComponent: maximumGracefulShutdownDuration.secondsComponent,
attosecondsComponent: maximumGracefulShutdownDuration.attosecondsComponent
)
} else {
return nil
}
}
set {
if let newValue = newValue {
self._maximumGracefulShutdownDuration = (newValue.components.seconds, newValue.components.attoseconds)
} else {
self._maximumCancellationDuration = nil
FranzBusch marked this conversation as resolved.
Show resolved Hide resolved
}
}
}

/// The maximum amount of time that task cancellation is allowed to take.
///
/// After this time has elapsed task cancellation will be escalated to a `fatalError`.
///
/// - Important: This setting is useful to guarantee that your application will exit at some point and
/// should be used to identify APIs that are not properly implementing task cancellation.
@available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *)
public var maximumCancellationDuration: Duration? {
get {
if let maximumCancellationDuration = self._maximumCancellationDuration {
return .init(
secondsComponent: maximumCancellationDuration.secondsComponent,
attosecondsComponent: maximumCancellationDuration.attosecondsComponent
)
} else {
return nil
}
}
set {
if let newValue = newValue {
self._maximumCancellationDuration = (newValue.components.seconds, newValue.components.attoseconds)
} else {
self._maximumCancellationDuration = nil
}
}
}

private var _maximumGracefulShutdownDuration: (secondsComponent: Int64, attosecondsComponent: Int64)?
private var _maximumCancellationDuration: (secondsComponent: Int64, attosecondsComponent: Int64)?
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this whole dance just to avoid availability on the type? Can't we just add the availability to the property on the configuration which uses it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is not allowed to add availability on a stored property. So we have to do this dance somewhere.


public init() {}
}

/// The groups's service configurations.
public var services: [ServiceConfiguration]

Expand All @@ -111,6 +170,9 @@ public struct ServiceGroupConfiguration: Sendable {
/// The group's logging configuration.
public var logging = LoggingConfiguration()

/// The group's escalation configuration.
public var escalation = EscalationBehaviour()

/// Initializes a new ``ServiceGroupConfiguration``.
///
/// - Parameters:
Expand Down
Loading