Skip to content

Commit

Permalink
Exclude common failure in smoke tests (#6469)
Browse files Browse the repository at this point in the history
## Summary of changes

Retries a common error flake that we can't get to the bottom of.

## Reason for change

The runtime _sometimes_ crashes after the app completes and we've shut
down, causing flake. We haven't managed to get to the bottom of it yet.

## Implementation details

Retry the smoke tests _once_ if we spot an error that looks like this:

```
ptrace(ATTACH, 14822) FAILED Operation not permitted
```

## Test coverage

Hard to test - as long as it passes for now that's good enough I think
  • Loading branch information
andrewlock authored Dec 20, 2024
1 parent 0503f1f commit 58d5516
Showing 1 changed file with 31 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,24 @@ protected void SetEnvironmentVariable(string key, string value)
/// <param name="expectedExitCode">Expected exit code</param>
/// <returns>Async operation</returns>
protected async Task CheckForSmoke(bool shouldDeserializeTraces = true, int expectedExitCode = 0)
{
// named pipes is notoriously flaky
var attemptsRemaining = 2;
while (attemptsRemaining > 0)
{
if (await RunCheck(shouldDeserializeTraces, expectedExitCode, attemptsRemaining > 1))
{
// all good
return;
}

await ErrorHelpers.SendMetric(Output, "dd_trace_dotnet.ci.tests.retries", EnvironmentHelper);
}

throw new Exception("Unreachable, should throw in RunCheck or return true");
}

private async Task<bool> RunCheck(bool shouldDeserializeTraces, int expectedExitCode, bool allowRetry)
{
var applicationPath = EnvironmentHelper.GetSampleApplicationPath().Replace(@"\\", @"\");
Output.WriteLine($"Application path: {applicationPath}");
Expand Down Expand Up @@ -106,7 +124,7 @@ protected async Task CheckForSmoke(bool shouldDeserializeTraces = true, int expe
if (AssumeSuccessOnTimeout)
{
Assert.True(true, "No smoke is a good sign for this case, even on timeout.");
return;
return true;
}
else
{
Expand Down Expand Up @@ -136,9 +154,18 @@ protected async Task CheckForSmoke(bool shouldDeserializeTraces = true, int expe

ErrorHelpers.CheckForKnownSkipConditions(Output, result.ExitCode, result.StandardError, EnvironmentHelper);

// TODO: Investigate and fix this!
if (allowRetry && Regex.IsMatch(result.StandardError, @"ptrace\(ATTACH, \d+\) FAILED Operation not permitted"))
{
// We have a "known" issue with getting errors like 'ptrace(ATTACH, 1234) FAILED Operation not permitted'
// It causes flake, happens during shutdown, but affects all runtimes.
// We don't have a good story for it now, so do a single retry...
Output.WriteLine($"Received 'ptrace(ATTACH, *) FAILED Operation not permitted' in standard error. Retrying once.");
return false;
}
#if !NET5_0_OR_GREATER
if (result.StandardOutput.Contains("App completed successfully")
&& Regex.IsMatch(result.StandardError, @"open\(/proc/\d+/mem\) FAILED 2 \(No such file or directory\)"))
&& Regex.IsMatch(result.StandardError, @"open\(/proc/\d+/mem\) FAILED 2 \(No such file or directory\)"))
{
// The above message is the last thing set before we exit.
// We can still get flake on shutdown (which we can't isolate), but for some reason
Expand All @@ -153,6 +180,8 @@ protected async Task CheckForSmoke(bool shouldDeserializeTraces = true, int expe
{
Assert.True(string.IsNullOrEmpty(result.StandardError), $"Expected no errors in smoke test: {result.StandardError}");
}

return true;
}
}
}

0 comments on commit 58d5516

Please sign in to comment.