Skip to content

Commit

Permalink
Avastancu/joannaakl/service container error log (#2110)
Browse files Browse the repository at this point in the history
* adding support for a service container docker logs

* Adding Unit test to ContainerOperationProvider

* Adding another test to ContainerOperationProvider

* placed the docker logs output in dedicated ##group section

* Removed the exception thrown if the service container was not healthy

* Removed duplicated logging to the executionContext

* Updated the container logs sub-section message

* Print service containers only if they were healthy
Unhealthy service logs are printed in ContainerHealthCheckLogs called prior to this step.

* Removed recently added method to inspect docker logs
The method was doing the same thing as the existing DockerLogs method.

* Added execution context error
This will make a failed health check more visible in the UI without disrupting the execution of the program.

* Removing the section 'Waiting for all services to be ready'

Since nested subsections are not being displayed properly and we already need one subsection per service error.

* Update src/Runner.Worker/Container/DockerCommandManager.cs

Co-authored-by: Tingluo Huang <tingluohuang@github.com>

* Update src/Test/L0/TestHostContext.cs

Co-authored-by: Tingluo Huang <tingluohuang@github.com>

* Change the logic for printing Service Containers logs

Service container logs will be printed in the 'Start containers' section only if there is an error.
Healthy services will have their logs printed in the 'Stop Containers' section.

* Removed unused import

* Added back section group.

* Moved service containers error logs to separate group sections

* Removed the test testing the old logic flow.

* Remove unnecessary 'IsAnyUnhealthy' flag

* Remove printHello() function

* Add newline to TestHostContext

* Remove unnecessary field 'UnhealthyContainers'

* Rename boolean flag indicating service container failure

* Refactor healthcheck logic to separate method to enable unit testing.

* Remove the default value for bool variable

* Update src/Runner.Worker/ContainerOperationProvider.cs

Co-authored-by: Tingluo Huang <tingluohuang@github.com>

* Update src/Runner.Worker/ContainerOperationProvider.cs

Co-authored-by: Tingluo Huang <tingluohuang@github.com>

* Rename Healthcheck back to ContainerHealthcheck

* Make test sequential

* Unextract the container error logs method

* remove test asserting thrown exception

* Add configure await

* Update src/Test/L0/Worker/ContainerOperationProviderL0.cs

Co-authored-by: Tingluo Huang <tingluohuang@github.com>

* Update src/Test/L0/Worker/ContainerOperationProviderL0.cs

Co-authored-by: Tingluo Huang <tingluohuang@github.com>

* Update src/Test/L0/Worker/ContainerOperationProviderL0.cs

Co-authored-by: Tingluo Huang <tingluohuang@github.com>

* Update src/Test/L0/Worker/ContainerOperationProviderL0.cs

Co-authored-by: Tingluo Huang <tingluohuang@github.com>

* Update src/Test/L0/Worker/ContainerOperationProviderL0.cs

Co-authored-by: Tingluo Huang <tingluohuang@github.com>

* Add back test asserting exception

* Check service exit code if there is no healtcheck configured

* Remove unnecessary healthcheck for healthy service container

* Revert "Check service exit code if there is no healtcheck configured"

This reverts commit fec24e8.

Co-authored-by: Ava S <avastancu@github.com>
Co-authored-by: Tingluo Huang <tingluohuang@github.com>
  • Loading branch information
3 people committed Oct 5, 2022
1 parent 2145432 commit d28c357
Show file tree
Hide file tree
Showing 3 changed files with 150 additions and 19 deletions.
2 changes: 2 additions & 0 deletions src/Runner.Worker/Container/ContainerInfo.cs
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ public ContainerInfo(IHostContext hostContext, Pipelines.JobContainer container,
public bool IsJobContainer { get; set; }
public bool IsAlpine { get; set; }

public bool FailedInitialization { get; set; }

public IDictionary<string, string> ContainerEnvironmentVariables
{
get
Expand Down
59 changes: 40 additions & 19 deletions src/Runner.Worker/ContainerOperationProvider.cs
Original file line number Diff line number Diff line change
Expand Up @@ -98,12 +98,41 @@ public async Task StartContainersAsync(IExecutionContext executionContext, objec
await StartContainerAsync(executionContext, container);
}

await RunContainersHealthcheck(executionContext, containers);
}

public async Task RunContainersHealthcheck(IExecutionContext executionContext, List<ContainerInfo> containers)
{
executionContext.Output("##[group]Waiting for all services to be ready");

var unhealthyContainers = new List<ContainerInfo>();
foreach (var container in containers.Where(c => !c.IsJobContainer))
{
await ContainerHealthcheck(executionContext, container);
var healthcheck = await ContainerHealthcheck(executionContext, container);

if (!string.Equals(healthcheck, "healthy", StringComparison.OrdinalIgnoreCase))
{
unhealthyContainers.Add(container);
}
else
{
executionContext.Output($"{container.ContainerNetworkAlias} service is healthy.");
}
}
executionContext.Output("##[endgroup]");

if (unhealthyContainers.Count > 0)
{
foreach (var container in unhealthyContainers)
{
executionContext.Output($"##[group]Service container {container.ContainerNetworkAlias} failed.");
await _dockerManager.DockerLogs(context: executionContext, containerId: container.ContainerId);
executionContext.Error($"Failed to initialize container {container.ContainerImage}");
container.FailedInitialization = true;
executionContext.Output("##[endgroup]");
}
throw new InvalidOperationException("One or more containers failed to start.");
}
}

public async Task StopContainersAsync(IExecutionContext executionContext, object data)
Expand Down Expand Up @@ -299,16 +328,15 @@ private async Task StopContainerAsync(IExecutionContext executionContext, Contai

if (!string.IsNullOrEmpty(container.ContainerId))
{
if (!container.IsJobContainer)
if (!container.IsJobContainer && !container.FailedInitialization)
{
// Print logs for service container jobs (not the "action" job itself b/c that's already logged).
executionContext.Output($"Print service container logs: {container.ContainerDisplayName}");
executionContext.Output($"Print service container logs: {container.ContainerDisplayName}");

int logsExitCode = await _dockerManager.DockerLogs(executionContext, container.ContainerId);
if (logsExitCode != 0)
{
executionContext.Warning($"Docker logs fail with exit code {logsExitCode}");
}
int logsExitCode = await _dockerManager.DockerLogs(executionContext, container.ContainerId);
if (logsExitCode != 0)
{
executionContext.Warning($"Docker logs fail with exit code {logsExitCode}");
}
}

executionContext.Output($"Stop and remove container: {container.ContainerDisplayName}");
Expand Down Expand Up @@ -395,14 +423,14 @@ private async Task RemoveContainerNetworkAsync(IExecutionContext executionContex
}
}

private async Task ContainerHealthcheck(IExecutionContext executionContext, ContainerInfo container)
private async Task<string> ContainerHealthcheck(IExecutionContext executionContext, ContainerInfo container)
{
string healthCheck = "--format=\"{{if .Config.Healthcheck}}{{print .State.Health.Status}}{{end}}\"";
string serviceHealth = (await _dockerManager.DockerInspect(context: executionContext, dockerObject: container.ContainerId, options: healthCheck)).FirstOrDefault();
if (string.IsNullOrEmpty(serviceHealth))
{
// Container has no HEALTHCHECK
return;
return String.Empty;
}
var retryCount = 0;
while (string.Equals(serviceHealth, "starting", StringComparison.OrdinalIgnoreCase))
Expand All @@ -413,14 +441,7 @@ private async Task ContainerHealthcheck(IExecutionContext executionContext, Cont
serviceHealth = (await _dockerManager.DockerInspect(context: executionContext, dockerObject: container.ContainerId, options: healthCheck)).FirstOrDefault();
retryCount++;
}
if (string.Equals(serviceHealth, "healthy", StringComparison.OrdinalIgnoreCase))
{
executionContext.Output($"{container.ContainerNetworkAlias} service is healthy.");
}
else
{
throw new InvalidOperationException($"Failed to initialize, {container.ContainerNetworkAlias} service is {serviceHealth}.");
}
return serviceHealth;
}

private async Task<string> ContainerRegistryLogin(IExecutionContext executionContext, ContainerInfo container)
Expand Down
108 changes: 108 additions & 0 deletions src/Test/L0/Worker/ContainerOperationProviderL0.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
using GitHub.Runner.Worker;
using GitHub.Runner.Worker.Container;
using Xunit;
using Moq;
using GitHub.Runner.Worker.Container.ContainerHooks;
using System.Threading.Tasks;
using System.Collections.Generic;
using System.Runtime.CompilerServices;
using GitHub.DistributedTask.WebApi;
using System;

namespace GitHub.Runner.Common.Tests.Worker
{

public sealed class ContainerOperationProviderL0
{

private TestHostContext _hc;
private Mock<IExecutionContext> _ec;
private Mock<IDockerCommandManager> _dockerManager;
private Mock<IContainerHookManager> _containerHookManager;
private ContainerOperationProvider containerOperationProvider;
private Mock<IJobServerQueue> serverQueue;
private Mock<IPagingLogger> pagingLogger;
private List<string> healthyDockerStatus = new List<string> { "healthy" };
private List<string> unhealthyDockerStatus = new List<string> { "unhealthy" };
private List<string> dockerLogs = new List<string> { "log1", "log2", "log3" };

List<ContainerInfo> containers = new List<ContainerInfo>();

[Fact]
[Trait("Level", "L0")]
[Trait("Category", "Worker")]
public async void RunServiceContainersHealthcheck_UnhealthyServiceContainer_AssertFailedTask()
{
//Arrange
Setup();
_dockerManager.Setup(x => x.DockerInspect(_ec.Object, It.IsAny<string>(), It.IsAny<string>())).Returns(Task.FromResult(unhealthyDockerStatus));

//Act
try
{
await containerOperationProvider.RunContainersHealthcheck(_ec.Object, containers);
}
catch (InvalidOperationException)
{

//Assert
Assert.Equal(TaskResult.Failed, _ec.Object.Result ?? TaskResult.Failed);
}
}

[Fact]
[Trait("Level", "L0")]
[Trait("Category", "Worker")]
public async void RunServiceContainersHealthcheck_UnhealthyServiceContainer_AssertExceptionThrown()
{
//Arrange
Setup();
_dockerManager.Setup(x => x.DockerInspect(_ec.Object, It.IsAny<string>(), It.IsAny<string>())).Returns(Task.FromResult(unhealthyDockerStatus));

//Act and Assert
await Assert.ThrowsAsync<InvalidOperationException>(() => containerOperationProvider.RunContainersHealthcheck(_ec.Object, containers));

}

[Fact]
[Trait("Level", "L0")]
[Trait("Category", "Worker")]
public async void RunServiceContainersHealthcheck_healthyServiceContainer_AssertSucceededTask()
{
//Arrange
Setup();
_dockerManager.Setup(x => x.DockerInspect(_ec.Object, It.IsAny<string>(), It.IsAny<string>())).Returns(Task.FromResult(healthyDockerStatus));

//Act
await containerOperationProvider.RunContainersHealthcheck(_ec.Object, containers);

//Assert
Assert.Equal(TaskResult.Succeeded, _ec.Object.Result ?? TaskResult.Succeeded);

}

private void Setup([CallerMemberName] string testName = "")
{
containers.Add(new ContainerInfo() { ContainerImage = "ubuntu:16.04" });
_hc = new TestHostContext(this, testName);
_ec = new Mock<IExecutionContext>();
serverQueue = new Mock<IJobServerQueue>();
pagingLogger = new Mock<IPagingLogger>();

_dockerManager = new Mock<IDockerCommandManager>();
_containerHookManager = new Mock<IContainerHookManager>();
containerOperationProvider = new ContainerOperationProvider();

_hc.SetSingleton<IDockerCommandManager>(_dockerManager.Object);
_hc.SetSingleton<IJobServerQueue>(serverQueue.Object);
_hc.SetSingleton<IPagingLogger>(pagingLogger.Object);

_hc.SetSingleton<IDockerCommandManager>(_dockerManager.Object);
_hc.SetSingleton<IContainerHookManager>(_containerHookManager.Object);

_ec.Setup(x => x.Global).Returns(new GlobalContext());

containerOperationProvider.Initialize(_hc);
}
}
}

0 comments on commit d28c357

Please sign in to comment.