Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve flaky integration tests #3836

Merged
merged 11 commits into from
May 7, 2024
42 changes: 37 additions & 5 deletions build/jobs/run-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ parameters:
- name: appServiceName
type: string
jobs:
- job: "integrationTests"

- job: "CosmosIntegrationTests"
pool:
name: '$(SharedLinuxPool)'
vmImage: '$(LinuxVmImage)'
Expand Down Expand Up @@ -34,22 +35,53 @@ jobs:
azureSubscription: $(ConnectedServiceName)
KeyVaultName: '${{ parameters.keyVaultName }}'

- task: DotNetCoreCLI@2
displayName: 'Run Cosmos Integration Tests'
inputs:
command: test
arguments: '"$(Agent.TempDirectory)/IntegrationTests/**/*${{ parameters.version }}.Tests.Integration*.dll" --filter DisplayName!~SqlServer -v normal'
workingDirectory: "$(System.ArtifactsDirectory)"
testRunTitle: '${{ parameters.version }} Integration'
env:
'CosmosDb:Host': $(CosmosDb--Host)
'CosmosDb:Key': $(CosmosDb--Key)

- job: "SqlIntegrationTests"
pool:
name: '$(SharedLinuxPool)'
vmImage: '$(LinuxVmImage)'
steps:
- task: DownloadBuildArtifacts@0
inputs:
buildType: 'current'
downloadType: 'single'
downloadPath: '$(System.ArtifactsDirectory)'
artifactName: 'IntegrationTests'

- task: ExtractFiles@1
displayName: 'Extract Integration Test Binaries'
inputs:
archiveFilePatterns: '$(System.ArtifactsDirectory)/IntegrationTests/Microsoft.Health.Fhir.${{ parameters.version }}.Tests.Integration.zip'
destinationFolder: '$(Agent.TempDirectory)/IntegrationTests/'

- task: UseDotNet@2
inputs:
useGlobalJson: true

- task: AzureKeyVault@1
displayName: 'Azure Key Vault: ${{ parameters.keyVaultName }}-sql'
inputs:
azureSubscription: $(ConnectedServiceName)
KeyVaultName: '${{ parameters.keyVaultName }}-sql'

- task: DotNetCoreCLI@2
displayName: 'Run Integration Tests'
displayName: 'Run Sql Integration Tests'
inputs:
command: test
arguments: '"$(Agent.TempDirectory)/IntegrationTests/**/*${{ parameters.version }}.Tests.Integration*.dll" --blame-hang-timeout 15m'
arguments: '"$(Agent.TempDirectory)/IntegrationTests/**/*${{ parameters.version }}.Tests.Integration*.dll" --filter DisplayName!~Cosmos -v normal'
workingDirectory: "$(System.ArtifactsDirectory)"
testRunTitle: '${{ parameters.version }} Integration'
env:
'CosmosDb:Host': $(CosmosDb--Host)
'CosmosDb:Key': $(CosmosDb--Key)
'SqlServer:ConnectionString': $(SqlServer--ConnectionString)

- job: 'cosmosE2eTests'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,7 @@ protected async Task StartAsync(double periodSec, CancellationToken cancellation

private async Task RunInternalAsync()
{
_cancellationToken.ThrowIfCancellationRequested();

if (_isRunning)
if (_isRunning || _cancellationToken.IsCancellationRequested)
{
return;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ public async Task GivenIncrementalLoad_MultipleInputVersionsOutOfOrderSomeNotExp
Assert.Equal(GetLastUpdated("2001"), result.Resource.Meta.LastUpdated);
}

[Fact]
[Fact(Skip = "Flaky test")]
public async Task GivenIncrementalLoad_MultipleInputVersionsOutOfOrderSomeNotExplicit_ResourceNotExisting()
{
var id = Guid.NewGuid().ToString("N");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,10 @@ public async Task GivenADatabaseSupportsResourceChangeCapture_WhenResourceChange
}
finally
{
await fhirStorageTestsFixture.DisposeAsync();
if (fhirStorageTestsFixture != null)
{
await fhirStorageTestsFixture.DisposeAsync();
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,10 @@ public async Task InitializeAsync()

public async Task DisposeAsync()
{
await (_storageFixture?.DisposeAsync() ?? Task.CompletedTask);
if (_storageFixture != null)
{
await _storageFixture.DisposeAsync();
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,14 @@
using System.Linq;
using Microsoft.Data.SqlClient;
using Microsoft.Health.Fhir.Tests.Common;
using Microsoft.Health.Fhir.Tests.Common.FixtureParameters;
using Microsoft.Health.SqlServer.Features.Storage;
using Microsoft.Health.Test.Utilities;
using Xunit;

namespace Microsoft.Health.Fhir.Tests.Integration.Persistence
{
[FhirStorageTestsFixtureArgumentSets(DataStore.SqlServer)]
[Trait(Traits.OwningTeam, OwningTeam.Fhir)]
[Trait(Traits.Category, Categories.DataSourceValidation)]
public class SqlDataReaderExtensionsTests : IClassFixture<SqlServerFhirStorageTestsFixture>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,15 @@
using Microsoft.Health.Core.Extensions;
using Microsoft.Health.Fhir.SqlServer.Features.Storage;
using Microsoft.Health.Fhir.Tests.Common;
using Microsoft.Health.Fhir.Tests.Common.FixtureParameters;
using Microsoft.Health.SqlServer;
using Microsoft.Health.Test.Utilities;
using Xunit;
using Xunit.Abstractions;

namespace Microsoft.Health.Fhir.Tests.Integration.Persistence
{
[FhirStorageTestsFixtureArgumentSets(DataStore.SqlServer)]
[Trait(Traits.OwningTeam, OwningTeam.Fhir)]
[Trait(Traits.Category, Categories.DataSourceValidation)]
public class SqlRetryServiceTests : IClassFixture<SqlServerFhirStorageTestsFixture>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
// -------------------------------------------------------------------------------------------------

using System;
using System.Diagnostics;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
Expand All @@ -13,7 +14,6 @@
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging.Abstractions;
using Microsoft.Extensions.Options;
using Microsoft.Health.Extensions.DependencyInjection;
using Microsoft.Health.Fhir.SqlServer.Features.Schema;
using Microsoft.Health.Fhir.SqlServer.Features.Storage;
using Microsoft.Health.Fhir.SqlServer.Features.Watchdogs;
Expand All @@ -40,6 +40,7 @@ public class SqlServerFhirStorageTestHelper : IFhirStorageTestHelper, ISqlServer
private readonly ISqlConnectionBuilder _sqlConnectionBuilder;
private readonly AsyncRetryPolicy _dbSetupRetryPolicy;
private readonly TestQueueClient _queueClient;
private static readonly SemaphoreSlim DbSetupSemaphore = new(4);

public SqlServerFhirStorageTestHelper(
string initialConnectionString,
Expand All @@ -60,50 +61,59 @@ public SqlServerFhirStorageTestHelper(
_dbSetupRetryPolicy = Policy
.Handle<Exception>()
.WaitAndRetryAsync(
retryCount: 7,
sleepDurationProvider: retryAttempt => TimeSpan.FromSeconds(Math.Pow(2, retryAttempt)));
retryCount: 20,
sleepDurationProvider: retryAttempt => TimeSpan.FromSeconds(3));
}

internal bool DropDatabase => true;

public async Task CreateAndInitializeDatabase(string databaseName, int maximumSupportedSchemaVersion, bool forceIncrementalSchemaUpgrade, SchemaInitializer schemaInitializer = null, CancellationToken cancellationToken = default)
{
var testConnectionString = new SqlConnectionStringBuilder(_initialConnectionString) { InitialCatalog = databaseName }.ToString();
schemaInitializer ??= CreateSchemaInitializer(testConnectionString, maximumSupportedSchemaVersion);
string testConnectionString = new SqlConnectionStringBuilder(_initialConnectionString) { InitialCatalog = databaseName }.ToString();

await _dbSetupRetryPolicy.ExecuteAsync(async () =>
{
// Create the database.
await using SqlConnection connection = await _sqlConnectionBuilder.GetSqlConnectionAsync(_masterDatabaseName, null, cancellationToken);
await connection.OpenAsync(cancellationToken);
await _dbSetupRetryPolicy.ExecuteAsync(
async () =>
{
// Create the database.
await using SqlConnection connection = await _sqlConnectionBuilder.GetSqlConnectionAsync(_masterDatabaseName, null, cancellationToken);
await connection.OpenAsync(cancellationToken);

await using SqlCommand command = connection.CreateCommand();
command.CommandTimeout = 600;
command.CommandText = @$"
await using SqlCommand command = connection.CreateCommand();
command.CommandTimeout = 600;
command.CommandText = @$"
IF NOT EXISTS (SELECT * FROM sys.databases WHERE name = '{databaseName}')
BEGIN
CREATE DATABASE {databaseName};
END";
await command.ExecuteNonQueryAsync(cancellationToken);
await connection.CloseAsync();
});

await DbSetupSemaphore.WaitAsync(cancellationToken);
try
{
await command.ExecuteNonQueryAsync(cancellationToken);
}
finally
{
DbSetupSemaphore.Release();
}

await connection.CloseAsync();
});

// Verify that we can connect to the new database. This sometimes does not work right away with Azure SQL.

await _dbSetupRetryPolicy.ExecuteAsync(async () =>
{
await using SqlConnection connection = await _sqlConnectionBuilder.GetSqlConnectionAsync(databaseName, null, cancellationToken);
await connection.OpenAsync(cancellationToken);
await using SqlCommand sqlCommand = connection.CreateCommand();
sqlCommand.CommandText = "SELECT 1";
await sqlCommand.ExecuteScalarAsync(cancellationToken);
await connection.CloseAsync();
});
await _dbSetupRetryPolicy.ExecuteAsync(
async () =>
{
await using SqlConnection connection = await _sqlConnectionBuilder.GetSqlConnectionAsync(databaseName, null, cancellationToken);
await connection.OpenAsync(cancellationToken);
await using SqlCommand sqlCommand = connection.CreateCommand();
sqlCommand.CommandText = "SELECT 1";
await sqlCommand.ExecuteScalarAsync(cancellationToken);
await connection.CloseAsync();
});

await _dbSetupRetryPolicy.ExecuteAsync(async () =>
{
await schemaInitializer.InitializeAsync(forceIncrementalSchemaUpgrade, cancellationToken);
});
schemaInitializer ??= CreateSchemaInitializer(testConnectionString, maximumSupportedSchemaVersion);
await _dbSetupRetryPolicy.ExecuteAsync(async () => { await schemaInitializer.InitializeAsync(forceIncrementalSchemaUpgrade, cancellationToken); });
await InitWatchdogsParameters(databaseName);
await EnableDatabaseLogging(databaseName);
await _sqlServerFhirModel.Initialize(maximumSupportedSchemaVersion, true, cancellationToken);
Expand Down Expand Up @@ -206,18 +216,31 @@ public async Task DeleteDatabase(string databaseName, CancellationToken cancella
return;
}

SqlConnection.ClearAllPools();
try
{
await DbSetupSemaphore.WaitAsync(cancellationToken);
try
{
SqlConnection.ClearAllPools();

await _dbSetupRetryPolicy.ExecuteAsync(async () =>
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we not want to retry the cleanup?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was mainly trying to find where all the time is spent for integration tests. I think we wait the max amount of time on retries to delete the database at the end of every fixture.
Strangely enough, when watching the resource group the databases all seemed to eventually drop, perhaps there's something async going on under the covers on the sql side.

On top of this, we delete the resource group at the end of the test which should cover anything that failed, and CI has a task to remove orphaned test databases.

await using SqlConnection connection = await _sqlConnectionBuilder.GetSqlConnectionAsync(_masterDatabaseName, null, cancellationToken);
await connection.OpenAsync(cancellationToken);
await using SqlCommand command = connection.CreateCommand();
command.CommandTimeout = 600;
command.CommandText = $"DROP DATABASE IF EXISTS {databaseName}";

await command.ExecuteNonQueryAsync(cancellationToken);
await connection.CloseAsync();
}
finally
{
DbSetupSemaphore.Release();
}
}
catch (Exception ex)
{
await using SqlConnection connection = await _sqlConnectionBuilder.GetSqlConnectionAsync(_masterDatabaseName, null, cancellationToken);
await connection.OpenAsync(cancellationToken);
await using SqlCommand command = connection.CreateCommand();
command.CommandTimeout = 600;
command.CommandText = $"DROP DATABASE IF EXISTS {databaseName}";
await command.ExecuteNonQueryAsync(cancellationToken);
await connection.CloseAsync();
});
Trace.TraceError("Failed to delete database: " + ex.Message);
}
}

public Task DeleteAllExportJobRecordsAsync(CancellationToken cancellationToken = default)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@

using Microsoft.Data.SqlClient;
using Microsoft.Health.Fhir.Tests.Common;
using Microsoft.Health.Fhir.Tests.Common.FixtureParameters;
using Microsoft.Health.SqlServer.Features.Storage;
using Microsoft.Health.Test.Utilities;
using Xunit;

namespace Microsoft.Health.Fhir.Tests.Integration.Persistence
{
[FhirStorageTestsFixtureArgumentSets(DataStore.SqlServer)]
[Trait(Traits.OwningTeam, OwningTeam.Fhir)]
[Trait(Traits.Category, Categories.Search)]
public class SqlServerMemberMatchTests : IClassFixture<SqlServerFhirStorageTestsFixture>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
using Microsoft.Health.Fhir.SqlServer.Features.Schema;
using Microsoft.Health.Fhir.SqlServer.Features.Storage;
using Microsoft.Health.Fhir.Tests.Common;
using Microsoft.Health.Fhir.Tests.Common.FixtureParameters;
using Microsoft.Health.SqlServer;
using Microsoft.Health.SqlServer.Configs;
using Microsoft.Health.SqlServer.Features.Client;
Expand All @@ -38,6 +39,7 @@

namespace Microsoft.Health.Fhir.Tests.Integration.Persistence
{
[FhirStorageTestsFixtureArgumentSets(DataStore.SqlServer)]
[Trait(Traits.OwningTeam, OwningTeam.Fhir)]
[Trait(Traits.Category, Categories.Schema)]
public class SqlServerSchemaUpgradeTests
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
using Microsoft.Health.Fhir.Core.UnitTests.Extensions;
using Microsoft.Health.Fhir.SqlServer.Features.Storage;
using Microsoft.Health.Fhir.Tests.Common;
using Microsoft.Health.Fhir.Tests.Common.FixtureParameters;
using Microsoft.Health.SqlServer.Features.Client;
using Microsoft.Health.Test.Utilities;
using Xunit;
Expand All @@ -23,6 +24,7 @@

namespace Microsoft.Health.Fhir.Tests.Integration.Persistence
{
[FhirStorageTestsFixtureArgumentSets(DataStore.SqlServer)]
[Trait(Traits.OwningTeam, OwningTeam.Fhir)]
[Trait(Traits.Category, Categories.DataSourceValidation)]
public class SqlServerSetMergeTests : IClassFixture<SqlServerFhirStorageTestsFixture>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,14 @@
using Microsoft.Health.Fhir.Core.Features.Persistence;
using Microsoft.Health.Fhir.SqlServer.Features.Schema;
using Microsoft.Health.Fhir.Tests.Common;
using Microsoft.Health.Fhir.Tests.Common.FixtureParameters;
using Microsoft.Health.Test.Utilities;
using Xunit;
using Task = System.Threading.Tasks.Task;

namespace Microsoft.Health.Fhir.Tests.Integration.Persistence
{
[FhirStorageTestsFixtureArgumentSets(DataStore.SqlServer)]
[Trait(Traits.OwningTeam, OwningTeam.Fhir)]
[Trait(Traits.Category, Categories.DataSourceValidation)]
public class SqlServerSqlCompatibilityTests
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
using Microsoft.Health.Fhir.SqlServer.Features.Storage.TvpRowGeneration.Merge;
using Microsoft.Health.Fhir.SqlServer.Features.Watchdogs;
using Microsoft.Health.Fhir.Tests.Common;
using Microsoft.Health.Fhir.Tests.Common.FixtureParameters;
using Microsoft.Health.Fhir.ValueSets;
using Microsoft.Health.Test.Utilities;
using NSubstitute;
Expand All @@ -33,6 +34,7 @@

namespace Microsoft.Health.Fhir.Tests.Integration.Persistence
{
[FhirStorageTestsFixtureArgumentSets(DataStore.SqlServer)]
[Trait(Traits.OwningTeam, OwningTeam.Fhir)]
[Trait(Traits.Category, Categories.DataSourceValidation)]
public class SqlServerWatchdogTests : IClassFixture<SqlServerFhirStorageTestsFixture>
Expand Down
Loading