Skip to content

Commit

Permalink
Restore job failure fix (#203)
Browse files Browse the repository at this point in the history
  • Loading branch information
hsubramanianaks authored Mar 20, 2023
1 parent 2aa8711 commit 48f75ad
Show file tree
Hide file tree
Showing 4 changed files with 132 additions and 2 deletions.
2 changes: 1 addition & 1 deletion deployment/settings/services/imagetag.setting
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
MINDARO_DEVHOSTAGENT_TAG=1.1.0
MINDARO_DEVHOSTAGENT_RESTORATIONJOB_TAG=1.1.0
MINDARO_DEVHOSTAGENT_RESTORATIONJOB_TAG=1.2.0
MINDARO_ROUTINGMANAGER_TAG=1.1.0
MINDARO_LOCALAGENT_TAG=1.1.0
70 changes: 70 additions & 0 deletions src/client.sln
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,16 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "endpointmanagerlauncher", "
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "dsc", "dsc\dsc.csproj", "{31BC54A2-FB46-4C69-B338-FFAF52D586AC}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "devhostAgent.restorationjob.tests", "devhostAgent.restorationjob.tests\devhostAgent.restorationjob.tests.csproj", "{56E8FA0C-7266-4AE7-88EC-28F924B3E586}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "devhostAgent.restorationjob", "devhostAgent.restorationjob\devhostAgent.restorationjob.csproj", "{8CD1C8F8-D634-4A61-B9EF-3052677F25C7}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "routingmanager", "routingmanager\routingmanager.csproj", "{4FE73882-FEDC-4353-B4D1-D0D678F22F1A}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "routingmanager.tests", "routingmanager.tests\routingmanager.tests.csproj", "{B252B0C6-9996-46D8-807F-D4611AA843F3}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "dsc.tests", "dsc.tests\dsc.tests.csproj", "{C41311CF-CF5D-42E1-91C3-9A5CFD579218}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -138,6 +148,66 @@ Global
{31BC54A2-FB46-4C69-B338-FFAF52D586AC}.Release|x64.Build.0 = Release|Any CPU
{31BC54A2-FB46-4C69-B338-FFAF52D586AC}.Release|x86.ActiveCfg = Release|Any CPU
{31BC54A2-FB46-4C69-B338-FFAF52D586AC}.Release|x86.Build.0 = Release|Any CPU
{56E8FA0C-7266-4AE7-88EC-28F924B3E586}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{56E8FA0C-7266-4AE7-88EC-28F924B3E586}.Debug|Any CPU.Build.0 = Debug|Any CPU
{56E8FA0C-7266-4AE7-88EC-28F924B3E586}.Debug|x64.ActiveCfg = Debug|Any CPU
{56E8FA0C-7266-4AE7-88EC-28F924B3E586}.Debug|x64.Build.0 = Debug|Any CPU
{56E8FA0C-7266-4AE7-88EC-28F924B3E586}.Debug|x86.ActiveCfg = Debug|Any CPU
{56E8FA0C-7266-4AE7-88EC-28F924B3E586}.Debug|x86.Build.0 = Debug|Any CPU
{56E8FA0C-7266-4AE7-88EC-28F924B3E586}.Release|Any CPU.ActiveCfg = Release|Any CPU
{56E8FA0C-7266-4AE7-88EC-28F924B3E586}.Release|Any CPU.Build.0 = Release|Any CPU
{56E8FA0C-7266-4AE7-88EC-28F924B3E586}.Release|x64.ActiveCfg = Release|Any CPU
{56E8FA0C-7266-4AE7-88EC-28F924B3E586}.Release|x64.Build.0 = Release|Any CPU
{56E8FA0C-7266-4AE7-88EC-28F924B3E586}.Release|x86.ActiveCfg = Release|Any CPU
{56E8FA0C-7266-4AE7-88EC-28F924B3E586}.Release|x86.Build.0 = Release|Any CPU
{8CD1C8F8-D634-4A61-B9EF-3052677F25C7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{8CD1C8F8-D634-4A61-B9EF-3052677F25C7}.Debug|Any CPU.Build.0 = Debug|Any CPU
{8CD1C8F8-D634-4A61-B9EF-3052677F25C7}.Debug|x64.ActiveCfg = Debug|Any CPU
{8CD1C8F8-D634-4A61-B9EF-3052677F25C7}.Debug|x64.Build.0 = Debug|Any CPU
{8CD1C8F8-D634-4A61-B9EF-3052677F25C7}.Debug|x86.ActiveCfg = Debug|Any CPU
{8CD1C8F8-D634-4A61-B9EF-3052677F25C7}.Debug|x86.Build.0 = Debug|Any CPU
{8CD1C8F8-D634-4A61-B9EF-3052677F25C7}.Release|Any CPU.ActiveCfg = Release|Any CPU
{8CD1C8F8-D634-4A61-B9EF-3052677F25C7}.Release|Any CPU.Build.0 = Release|Any CPU
{8CD1C8F8-D634-4A61-B9EF-3052677F25C7}.Release|x64.ActiveCfg = Release|Any CPU
{8CD1C8F8-D634-4A61-B9EF-3052677F25C7}.Release|x64.Build.0 = Release|Any CPU
{8CD1C8F8-D634-4A61-B9EF-3052677F25C7}.Release|x86.ActiveCfg = Release|Any CPU
{8CD1C8F8-D634-4A61-B9EF-3052677F25C7}.Release|x86.Build.0 = Release|Any CPU
{4FE73882-FEDC-4353-B4D1-D0D678F22F1A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{4FE73882-FEDC-4353-B4D1-D0D678F22F1A}.Debug|Any CPU.Build.0 = Debug|Any CPU
{4FE73882-FEDC-4353-B4D1-D0D678F22F1A}.Debug|x64.ActiveCfg = Debug|Any CPU
{4FE73882-FEDC-4353-B4D1-D0D678F22F1A}.Debug|x64.Build.0 = Debug|Any CPU
{4FE73882-FEDC-4353-B4D1-D0D678F22F1A}.Debug|x86.ActiveCfg = Debug|Any CPU
{4FE73882-FEDC-4353-B4D1-D0D678F22F1A}.Debug|x86.Build.0 = Debug|Any CPU
{4FE73882-FEDC-4353-B4D1-D0D678F22F1A}.Release|Any CPU.ActiveCfg = Release|Any CPU
{4FE73882-FEDC-4353-B4D1-D0D678F22F1A}.Release|Any CPU.Build.0 = Release|Any CPU
{4FE73882-FEDC-4353-B4D1-D0D678F22F1A}.Release|x64.ActiveCfg = Release|Any CPU
{4FE73882-FEDC-4353-B4D1-D0D678F22F1A}.Release|x64.Build.0 = Release|Any CPU
{4FE73882-FEDC-4353-B4D1-D0D678F22F1A}.Release|x86.ActiveCfg = Release|Any CPU
{4FE73882-FEDC-4353-B4D1-D0D678F22F1A}.Release|x86.Build.0 = Release|Any CPU
{B252B0C6-9996-46D8-807F-D4611AA843F3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{B252B0C6-9996-46D8-807F-D4611AA843F3}.Debug|Any CPU.Build.0 = Debug|Any CPU
{B252B0C6-9996-46D8-807F-D4611AA843F3}.Debug|x64.ActiveCfg = Debug|Any CPU
{B252B0C6-9996-46D8-807F-D4611AA843F3}.Debug|x64.Build.0 = Debug|Any CPU
{B252B0C6-9996-46D8-807F-D4611AA843F3}.Debug|x86.ActiveCfg = Debug|Any CPU
{B252B0C6-9996-46D8-807F-D4611AA843F3}.Debug|x86.Build.0 = Debug|Any CPU
{B252B0C6-9996-46D8-807F-D4611AA843F3}.Release|Any CPU.ActiveCfg = Release|Any CPU
{B252B0C6-9996-46D8-807F-D4611AA843F3}.Release|Any CPU.Build.0 = Release|Any CPU
{B252B0C6-9996-46D8-807F-D4611AA843F3}.Release|x64.ActiveCfg = Release|Any CPU
{B252B0C6-9996-46D8-807F-D4611AA843F3}.Release|x64.Build.0 = Release|Any CPU
{B252B0C6-9996-46D8-807F-D4611AA843F3}.Release|x86.ActiveCfg = Release|Any CPU
{B252B0C6-9996-46D8-807F-D4611AA843F3}.Release|x86.Build.0 = Release|Any CPU
{C41311CF-CF5D-42E1-91C3-9A5CFD579218}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{C41311CF-CF5D-42E1-91C3-9A5CFD579218}.Debug|Any CPU.Build.0 = Debug|Any CPU
{C41311CF-CF5D-42E1-91C3-9A5CFD579218}.Debug|x64.ActiveCfg = Debug|Any CPU
{C41311CF-CF5D-42E1-91C3-9A5CFD579218}.Debug|x64.Build.0 = Debug|Any CPU
{C41311CF-CF5D-42E1-91C3-9A5CFD579218}.Debug|x86.ActiveCfg = Debug|Any CPU
{C41311CF-CF5D-42E1-91C3-9A5CFD579218}.Debug|x86.Build.0 = Debug|Any CPU
{C41311CF-CF5D-42E1-91C3-9A5CFD579218}.Release|Any CPU.ActiveCfg = Release|Any CPU
{C41311CF-CF5D-42E1-91C3-9A5CFD579218}.Release|Any CPU.Build.0 = Release|Any CPU
{C41311CF-CF5D-42E1-91C3-9A5CFD579218}.Release|x64.ActiveCfg = Release|Any CPU
{C41311CF-CF5D-42E1-91C3-9A5CFD579218}.Release|x64.Build.0 = Release|Any CPU
{C41311CF-CF5D-42E1-91C3-9A5CFD579218}.Release|x86.ActiveCfg = Release|Any CPU
{C41311CF-CF5D-42E1-91C3-9A5CFD579218}.Release|x86.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down
48 changes: 48 additions & 0 deletions src/devhostAgent.restorationjob.tests/RestorationJobAppTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,54 @@ public void ExecutionTest(string patchStateFile, string testHelper)
method.Invoke(this, new object[] { false });
}

[Fact]
public void EnsureRestoresIflastPingWithSessionsIsNullAndRestoreTimeExceeded()
{
// restore time is set to zero seconds while initializing this test class file.
string patchStateJson = File.ReadAllText(Path.Combine("TestData", "DeploymentPatch.json"));
A.CallTo(() => _autoFake.Resolve<IFileSystem>().ReadAllTextFromFile(DevHostConstants.DevHostRestorationJob.PatchStateFullPath, A<int>._)).Returns(patchStateJson);
this.DeploymentPatch_Helper(true);

this.ConfigureHttpCall(GetSuccessPingResult(0)).NumberOfTimes(1);

int exitCode = _app.Execute(Array.Empty<string>(), default(CancellationToken));
Assert.Equal(0, exitCode);
A.CallTo(_fakeDelegatingHandler).MustHaveHappenedOnceExactly();
A.CallTo(() => _autoFake.Resolve<IRemoteRestoreJobCleaner>().CleanupRemoteRestoreJobByInstanceLabelAsync(A<string>._, A<string>._, A<CancellationToken>._)).MustHaveHappenedOnceExactly();
}
[Fact]
public void EnsureNoRestoreIfRestoreTimeIsNotExceeded()
{
// restore time is set to 1 minute
A.CallTo(() => _env.RestoreTimeout).Returns(TimeSpan.FromMinutes(1));
string patchStateJson = File.ReadAllText(Path.Combine("TestData", "DeploymentPatch.json"));
A.CallTo(() => _autoFake.Resolve<IFileSystem>().ReadAllTextFromFile(DevHostConstants.DevHostRestorationJob.PatchStateFullPath, A<int>._)).Returns(patchStateJson);
this.DeploymentPatch_Helper(true);

this.ConfigureHttpCall(GetSuccessPingResult(0)).NumberOfTimes(1);

int exitCode = _app.Execute(Array.Empty<string>(), default(CancellationToken));
Assert.Equal(1, exitCode);
A.CallTo(_fakeDelegatingHandler).MustHaveHappenedTwiceExactly();
A.CallTo(() => _autoFake.Resolve<IRemoteRestoreJobCleaner>().CleanupRemoteRestoreJobByInstanceLabelAsync(A<string>._, A<string>._, A<CancellationToken>._)).MustNotHaveHappened();
}

[Fact]
public void EnsureNoRestoreIflastPingWithSessionsIsNotNull()
{
string patchStateJson = File.ReadAllText(Path.Combine("TestData", "DeploymentPatch.json"));
A.CallTo(() => _autoFake.Resolve<IFileSystem>().ReadAllTextFromFile(DevHostConstants.DevHostRestorationJob.PatchStateFullPath, A<int>._)).Returns(patchStateJson);
this.DeploymentPatch_Helper(true);

this.ConfigureHttpCall(GetSuccessPingResult(3))
.NumberOfTimes(1);

int exitCode = _app.Execute(Array.Empty<string>(), default(CancellationToken));
Assert.Equal(1, exitCode);
A.CallTo(_fakeDelegatingHandler).MustHaveHappenedTwiceExactly();
A.CallTo(() => _autoFake.Resolve<IRemoteRestoreJobCleaner>().CleanupRemoteRestoreJobByInstanceLabelAsync(A<string>._, A<string>._, A<CancellationToken>._)).MustNotHaveHappened();
}

#region Test helpers

private void DeploymentPatch_Helper(bool isSetup)
Expand Down
14 changes: 13 additions & 1 deletion src/devhostagent.restorationjob/RestorationJobApp.cs
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ private async Task<ExitCode> ExecuteInnerAsync(CancellationToken cancellationTok
await Task.Delay(_restorationJobEnvironmentVariables.PingInterval, cancellationToken);
int numFailedPings = 0;
DateTimeOffset? lastPingWithSessions = null;
DateTimeOffset? timeSinceLastPingIsNull = null;
bool restoredWorkload = false;
while (!cancellationToken.IsCancellationRequested && !restoredWorkload)
{
Expand Down Expand Up @@ -140,8 +141,19 @@ private async Task<ExitCode> ExecuteInnerAsync(CancellationToken cancellationTok
else
{
perfLogger.SetProperty(HasConnectedClients, false);
TimeSpan? disconnectedTimeSpan = null;
if (lastPingWithSessions == null)
{
// first loop timeUntilLastPingIsNull will be set to current time and then next while loop it will preserve that time.
// if lastPingWithSessions is being null for last 60 seconds or more then restoration will happen.
timeSinceLastPingIsNull = timeSinceLastPingIsNull == null ? DateTimeOffset.Now : timeSinceLastPingIsNull;
disconnectedTimeSpan = DateTimeOffset.Now - timeSinceLastPingIsNull;
} else
{
disconnectedTimeSpan = DateTimeOffset.Now - lastPingWithSessions;
}

var disconnectedTimeSpan = DateTimeOffset.Now - lastPingWithSessions;

if (disconnectedTimeSpan != null && disconnectedTimeSpan.Value > _restorationJobEnvironmentVariables.RestoreTimeout)
{
// Restore workload
Expand Down

0 comments on commit 48f75ad

Please sign in to comment.