Skip to content

Commit

Permalink
Improve handling of concurrent crashes
Browse files Browse the repository at this point in the history
  • Loading branch information
nickdowell committed Jan 20, 2022
1 parent efe6aa8 commit f39da20
Show file tree
Hide file tree
Showing 11 changed files with 177 additions and 69 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@
#include "BSG_KSLogger.h"
#include "BSG_KSMach.h"

#include <stdatomic.h>

// ============================================================================
#pragma mark - Globals -
// ============================================================================
Expand Down Expand Up @@ -211,7 +213,45 @@ void bsg_kscrashsentry_clearContext(BSG_KSCrash_SentryContext *context) {
memcpy(context->reservedThreads, reservedThreads, sizeof(reservedThreads));
}

void bsg_kscrashsentry_beginHandlingCrash(BSG_KSCrash_SentryContext *context) {
bsg_kscrashsentry_clearContext(context);
context->handlingCrash = true;
bool bsg_kscrashsentry_beginHandlingCrash(const thread_t offender) {
static _Atomic(thread_t) firstOffender;
static thread_t firstHandlingThread;

thread_t expected = 0;
if (atomic_compare_exchange_strong(&firstOffender, &expected, offender)) {
firstHandlingThread = bsg_ksmachthread_self();

BSG_KSLOG_DEBUG("Handling app crash in thread 0x%x", offender);
bsg_kscrashsentry_clearContext(bsg_g_context);
bsg_g_context->handlingCrash = true;
bsg_g_context->offendingThread = offender;
return true;
}

if (offender == firstHandlingThread) {
BSG_KSLOG_INFO("Detected crash in the crash reporter. "
"Restoring original handlers.");
bsg_kscrashsentry_uninstall(BSG_KSCrashTypeAsyncSafe);

// Reset the context to write a recrash report.
bsg_kscrashsentry_clearContext(bsg_g_context);
bsg_g_context->crashedDuringCrashHandling = true;
bsg_g_context->handlingCrash = true;
bsg_g_context->offendingThread = offender;
return true;
}

BSG_KSLOG_DEBUG("Ignoring secondary app crash in thread 0x%x", offender);
// Block this thread to prevent the crash handling thread from being
// interrupted while writing the crash report. If we allowed the default
// handler to be triggered for this thread, the process would be killed
// before the crash report can be written. The process will be killed by the
// default handler once the handling thread has finished and threads resume.
while (bsg_g_context->handlingCrash) {}
return false;
}

void bsg_kscrashsentry_endHandlingCrash(void) {
BSG_KSLOG_DEBUG("Noting completion of crash handling");
bsg_g_context->handlingCrash = false;
}
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,10 @@ static void CPPExceptionTerminate(void) {
"the current NSException handler deal with it.");
isNSException = true;
// recordException() doesn't call beginHandlingCrash()
bsg_kscrashsentry_beginHandlingCrash(bsg_g_context);
bsg_recordException(exception);
bsg_g_context->handlingCrash = false;
if (bsg_kscrashsentry_beginHandlingCrash(bsg_ksmachthread_self())) {
bsg_recordException(exception);
bsg_kscrashsentry_endHandlingCrash();
}
} catch (std::exception &exc) {
strlcpy(descriptionBuff, exc.what(), sizeof(descriptionBuff));
} catch (std::exception *exc) {
Expand Down Expand Up @@ -159,22 +160,13 @@ static void CPPExceptionTerminate(void) {
}
bsg_g_captureNextStackTrace = (bsg_g_installed != 0);

if (!isNSException) {
bool wasHandlingCrash = bsg_g_context->handlingCrash;
bsg_kscrashsentry_beginHandlingCrash(bsg_g_context);

if (wasHandlingCrash) {
BSG_KSLOG_INFO("Detected crash in the crash reporter. "
"Restoring original handlers.");
bsg_g_context->crashedDuringCrashHandling = true;
bsg_kscrashsentry_uninstall((BSG_KSCrashType)BSG_KSCrashTypeAll);
}
if (!isNSException &&
bsg_kscrashsentry_beginHandlingCrash(bsg_ksmachthread_self())) {

BSG_KSLOG_DEBUG("Suspending all threads.");
bsg_kscrashsentry_suspendThreads();

bsg_g_context->crashType = BSG_KSCrashTypeCPPException;
bsg_g_context->offendingThread = bsg_ksmachthread_self();
bsg_g_context->registersAreValid = false;
bsg_g_context->stackTrace =
bsg_g_stackTrace + 1; // Don't record __cxa_throw stack entry
Expand All @@ -189,6 +181,7 @@ static void CPPExceptionTerminate(void) {
"Crash handling complete. Restoring original handlers.");
bsg_kscrashsentry_uninstall((BSG_KSCrashType)BSG_KSCrashTypeAll);
bsg_kscrashsentry_resumeThreads();
bsg_kscrashsentry_endHandlingCrash();
}
if (bsg_g_originalTerminateHandler != NULL) {
bsg_g_originalTerminateHandler();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -238,12 +238,8 @@ void *ksmachexc_i_handleExceptions(void *const userData) {

BSG_KSLOG_DEBUG("Trapped mach exception code 0x%llx, subcode 0x%llx",
exceptionMessage.code[0], exceptionMessage.code[1]);
if (bsg_g_installed) {
bool wasHandlingCrash = bsg_g_context->handlingCrash;
bsg_kscrashsentry_beginHandlingCrash(bsg_g_context);

BSG_KSLOG_DEBUG(
"Exception handler is installed. Continuing exception handling.");
if (bsg_g_installed &&
bsg_kscrashsentry_beginHandlingCrash(exceptionMessage.thread.name)) {

BSG_KSLOG_DEBUG("Suspending all threads");
bsg_kscrashsentry_suspendThreads();
Expand All @@ -264,15 +260,6 @@ void *ksmachexc_i_handleExceptions(void *const userData) {
bsg_ksmachexc_i_restoreExceptionPorts();
}

if (wasHandlingCrash) {
BSG_KSLOG_INFO("Detected crash in the crash reporter. Restoring "
"original handlers.");
// The crash reporter itself crashed. Make a note of this and
// uninstall all handlers so that we don't get stuck in a loop.
bsg_g_context->crashedDuringCrashHandling = true;
bsg_kscrashsentry_uninstall(BSG_KSCrashTypeAsyncSafe);
}

// Fill out crash information
BSG_KSLOG_DEBUG("Fetching machine state.");
BSG_STRUCT_MCONTEXT_L machineContext;
Expand All @@ -289,7 +276,6 @@ void *ksmachexc_i_handleExceptions(void *const userData) {

BSG_KSLOG_DEBUG("Filling out context.");
bsg_g_context->crashType = BSG_KSCrashTypeMachException;
bsg_g_context->offendingThread = exceptionMessage.thread.name;
bsg_g_context->registersAreValid = true;
bsg_g_context->mach.type = exceptionMessage.exception;
bsg_g_context->mach.code = exceptionMessage.code[0] & (int64_t)MACH_ERROR_CODE_MASK;
Expand All @@ -302,6 +288,7 @@ void *ksmachexc_i_handleExceptions(void *const userData) {
"Crash handling complete. Restoring original handlers.");
bsg_kscrashsentry_uninstall(BSG_KSCrashTypeAsyncSafe);
bsg_kscrashsentry_resumeThreads();
bsg_kscrashsentry_endHandlingCrash();
}

BSG_KSLOG_DEBUG("Replying to mach exception message.");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,30 +78,20 @@
*/
void bsg_ksnsexc_i_handleException(NSException *exception) {
BSG_KSLOG_DEBUG("Trapped exception %s", exception.description.UTF8String);
if (bsg_g_installed) {
bool wasHandlingCrash = bsg_g_context->handlingCrash;
bsg_kscrashsentry_beginHandlingCrash(bsg_g_context);

BSG_KSLOG_DEBUG(
"Exception handler is installed. Continuing exception handling.");

if (wasHandlingCrash) {
BSG_KSLOG_INFO("Detected crash in the crash reporter. Restoring "
"original handlers.");
bsg_g_context->crashedDuringCrashHandling = true;
bsg_kscrashsentry_uninstall(BSG_KSCrashTypeAll);
}
if (bsg_g_installed &&
bsg_kscrashsentry_beginHandlingCrash(bsg_ksmachthread_self())) {

bsg_recordException(exception);

BSG_KSLOG_DEBUG(
"Crash handling complete. Restoring original handlers.");
bsg_kscrashsentry_uninstall(BSG_KSCrashTypeAll);
bsg_kscrashsentry_endHandlingCrash();
}

if (bsg_g_previousUncaughtExceptionHandler != NULL) {
BSG_KSLOG_DEBUG("Calling original exception handler.");
bsg_g_previousUncaughtExceptionHandler(exception);
}
if (bsg_g_previousUncaughtExceptionHandler != NULL) {
BSG_KSLOG_DEBUG("Calling original exception handler.");
bsg_g_previousUncaughtExceptionHandler(exception);
}
}

Expand Down Expand Up @@ -165,9 +155,10 @@ void bsg_recordException(NSException *exception) {
static void bsg_reportException(id self, SEL _cmd, NSException *exception) {
BSG_KSLOG_DEBUG("reportException: %s", exception.description.UTF8String);

bsg_kscrashsentry_beginHandlingCrash(bsg_g_context);

bsg_recordException(exception);
if (bsg_kscrashsentry_beginHandlingCrash(bsg_ksmachthread_self())) {
bsg_recordException(exception);
bsg_kscrashsentry_endHandlingCrash();
}

#if TARGET_OS_MACCATALYST
// Mac Catalyst apps continue to run after an uncaught exception is thrown
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,30 @@ void bsg_kscrashsentry_suspendThreads(void);
*/
void bsg_kscrashsentry_resumeThreads(void);

/** Prepare the context for handling a new crash.
/**
* Prepares the context for handling a crash.
*
* Only a single crash report can be written per process lifetime, with the
* exception of a crash in a crash handling thread for which a "recrash" report
* can be written.
*
* True is returned if this crash should be processed. The caller should fill in
* the context's crash details and call bsg_g_context->onCrash(). The caller
* must call endHandlingCrash() once processing is complete. The process is
* then expected to die once the default crash handler executes.
*
* False is returned if a crash has already been handled, to prevent
* interrupting its processing and / or overwriting the existing crash report.
* In this case the call to beginHandlingCrash() blocks until the crash handling
* thread calls endHandlingCrash().
*/
bool bsg_kscrashsentry_beginHandlingCrash(const thread_t offender);

/**
* Sentries must call this to unblock any sencondary crashed threads that are
* waiting in beginHandlingCrash().
*/
void bsg_kscrashsentry_beginHandlingCrash(BSG_KSCrash_SentryContext *context);
void bsg_kscrashsentry_endHandlingCrash(void);

/** Clear a crash sentry context.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,26 +101,14 @@ static struct sigaction *get_previous_sigaction(int sigNum) {
void bsg_kssighndl_i_handleSignal(int sigNum, siginfo_t *signalInfo,
void *userContext) {
BSG_KSLOG_DEBUG("Trapped signal %d", sigNum);
if (bsg_g_enabled) {
bool wasHandlingCrash = bsg_g_context->handlingCrash;
bsg_kscrashsentry_beginHandlingCrash(bsg_g_context);

BSG_KSLOG_DEBUG(
"Signal handler is installed. Continuing signal handling.");
if (bsg_g_enabled &&
bsg_kscrashsentry_beginHandlingCrash(bsg_ksmachthread_self())) {

BSG_KSLOG_DEBUG("Suspending all threads.");
bsg_kscrashsentry_suspendThreads();

if (wasHandlingCrash) {
BSG_KSLOG_INFO("Detected crash in the crash reporter. Restoring "
"original handlers.");
bsg_g_context->crashedDuringCrashHandling = true;
bsg_kscrashsentry_uninstall(BSG_KSCrashTypeAsyncSafe);
}

BSG_KSLOG_DEBUG("Filling out context.");
bsg_g_context->crashType = BSG_KSCrashTypeSignal;
bsg_g_context->offendingThread = bsg_ksmachthread_self();
bsg_g_context->registersAreValid = true;
bsg_g_context->faultAddress = (uintptr_t)signalInfo->si_addr;
bsg_g_context->signal.userContext = userContext;
Expand All @@ -133,6 +121,7 @@ void bsg_kssighndl_i_handleSignal(int sigNum, siginfo_t *signalInfo,
"Crash handling complete. Restoring original handlers.");
bsg_kscrashsentry_uninstall(BSG_KSCrashTypeAsyncSafe);
bsg_kscrashsentry_resumeThreads();
bsg_kscrashsentry_endHandlingCrash();
}

BSG_KSLOG_DEBUG(
Expand Down
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
Changelog
=========

## TBD

* Improve reliability of crash reporting when multiple crashes occur concurrently.
[#1286](https://github.com/bugsnag/bugsnag-cocoa/pull/1286)

## 6.16.1 (2022-01-19)

### Bug fixes
Expand Down
7 changes: 7 additions & 0 deletions features/crashprobe.feature
Original file line number Diff line number Diff line change
Expand Up @@ -193,3 +193,10 @@ Feature: Reporting crash events
| Intel | EXC_BAD_INSTRUCTION |
And the exception "message" starts with "BUG IN CLIENT OF LIBDISPATCH: dispatch_"
And the event "metaData.error.crashInfo" starts with "BUG IN CLIENT OF LIBDISPATCH: dispatch_"

Scenario: Concurrent crashes should result in a single valid crash report
Given I run "ConcurrentCrashesScenario" and relaunch the crashed app
And I configure Bugsnag for "ConcurrentCrashesScenario"
And I wait to receive an error
Then the error is valid for the error reporting API
And the event "unhandled" is true
4 changes: 4 additions & 0 deletions features/fixtures/ios/iOSTestApp.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
01B6BB7525D5748800FC4DE6 /* LastRunInfoScenario.swift in Sources */ = {isa = PBXBuildFile; fileRef = 01B6BB7425D5748800FC4DE6 /* LastRunInfoScenario.swift */; };
01B6BBB625DA82B800FC4DE6 /* SendLaunchCrashesSynchronouslyScenario.swift in Sources */ = {isa = PBXBuildFile; fileRef = 01B6BBB525DA82B700FC4DE6 /* SendLaunchCrashesSynchronouslyScenario.swift */; };
01CD103926690463007FA5F0 /* libBugsnagStatic.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 01CD103826690463007FA5F0 /* libBugsnagStatic.a */; };
01DCB82B27985D690048640A /* ConcurrentCrashesScenario.mm in Sources */ = {isa = PBXBuildFile; fileRef = 01DCB82A27985D2C0048640A /* ConcurrentCrashesScenario.mm */; };
01DE903826CE99B800455213 /* CriticalThermalStateScenario.swift in Sources */ = {isa = PBXBuildFile; fileRef = 01DE903726CE99B800455213 /* CriticalThermalStateScenario.swift */; };
01DF442C2798044E00C31104 /* SystemConfiguration.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 01DF442B2798044E00C31104 /* SystemConfiguration.framework */; };
01E0DB0B25E8EBD100A740ED /* AppDurationScenario.swift in Sources */ = {isa = PBXBuildFile; fileRef = 01E0DB0A25E8EBD100A740ED /* AppDurationScenario.swift */; };
Expand Down Expand Up @@ -187,6 +188,7 @@
01B6BB7425D5748800FC4DE6 /* LastRunInfoScenario.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = LastRunInfoScenario.swift; sourceTree = "<group>"; };
01B6BBB525DA82B700FC4DE6 /* SendLaunchCrashesSynchronouslyScenario.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = SendLaunchCrashesSynchronouslyScenario.swift; sourceTree = "<group>"; };
01CD103826690463007FA5F0 /* libBugsnagStatic.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; path = libBugsnagStatic.a; sourceTree = BUILT_PRODUCTS_DIR; };
01DCB82A27985D2C0048640A /* ConcurrentCrashesScenario.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = ConcurrentCrashesScenario.mm; sourceTree = "<group>"; };
01DE903726CE99B800455213 /* CriticalThermalStateScenario.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CriticalThermalStateScenario.swift; sourceTree = "<group>"; };
01DF442B2798044E00C31104 /* SystemConfiguration.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = SystemConfiguration.framework; path = System/Library/Frameworks/SystemConfiguration.framework; sourceTree = SDKROOT; };
01E0DB0A25E8EBD100A740ED /* AppDurationScenario.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AppDurationScenario.swift; sourceTree = "<group>"; };
Expand Down Expand Up @@ -597,6 +599,7 @@
01FA9EC326D63BB20059FF4A /* AppHangInTerminationScenario.swift */,
01F1474325F282E600C2DC65 /* AppHangScenarios.swift */,
01AF6A52258A112F00FFC803 /* BareboneTestScenarios.swift */,
01DCB82A27985D2C0048640A /* ConcurrentCrashesScenario.mm */,
01DE903726CE99B800455213 /* CriticalThermalStateScenario.swift */,
0163BFA62583B3CF008DC28B /* DiscardClassesScenarios.swift */,
01847DD526453D4E00ADA4C7 /* InvalidCrashReportScenario.m */,
Expand Down Expand Up @@ -996,6 +999,7 @@
A1117E592535B29800014FDA /* OOMEnabledErrorTypesScenario.swift in Sources */,
F4295397AD31C1C1E64144F5 /* NonExistentMethodScenario.m in Sources */,
E700EE53247D31EA008CFFB6 /* OnErrorOverwriteScenario.swift in Sources */,
01DCB82B27985D690048640A /* ConcurrentCrashesScenario.mm in Sources */,
8AEFC79920F9132C00A78779 /* AutoSessionHandledEventsScenario.m in Sources */,
E7A324ED247E9DB3008B0052 /* BreadcrumbCallbackRemovalScenario.m in Sources */,
0037410F2473CF2300BE41AA /* AppAndDeviceAttributesScenario.swift in Sources */,
Expand Down
Loading

0 comments on commit f39da20

Please sign in to comment.