From b4503f4b8f910d46e4c69fff8ea13e91812d8a75 Mon Sep 17 00:00:00 2001 From: Cian Hatton Date: Mon, 6 Mar 2023 16:58:49 +0000 Subject: [PATCH] Uploading docker logs on test failures (#3236) --- .../e2e-compatibility-workflow-call.yaml | 9 + .github/workflows/e2e-test-workflow-call.yml | 14 ++ .github/workflows/e2e.yaml | 1 + .gitignore | 3 +- e2e/README.md | 6 + e2e/dockerutil/dockerutil.go | 72 ++++++++ e2e/testsuite/diagnostics/diagnostics.go | 157 ++++++++++++++++++ e2e/testsuite/testsuite.go | 16 +- 8 files changed, 274 insertions(+), 4 deletions(-) create mode 100644 e2e/dockerutil/dockerutil.go create mode 100644 e2e/testsuite/diagnostics/diagnostics.go diff --git a/.github/workflows/e2e-compatibility-workflow-call.yaml b/.github/workflows/e2e-compatibility-workflow-call.yaml index 58c5642a4f4..fddf2f6ffdc 100644 --- a/.github/workflows/e2e-compatibility-workflow-call.yaml +++ b/.github/workflows/e2e-compatibility-workflow-call.yaml @@ -54,3 +54,12 @@ jobs: CHAIN_B_TAG: "${{ matrix.chain-b }}" CHAIN_BINARY: "${{ matrix.chain-binary }}" RELAYER_TYPE: "${{ matrix.relayer-type }}" + - name: Upload Diagnostics + uses: actions/upload-artifact@v3 + # we only want to upload logs on test failures. + if: ${{ failure() }} + continue-on-error: true + with: + name: "${{ matrix.entrypoint }}-${{ matrix.test }}" + path: e2e/diagnostics + retention-days: 5 diff --git a/.github/workflows/e2e-test-workflow-call.yml b/.github/workflows/e2e-test-workflow-call.yml index 3ed7f572c11..8f45c8c1aa2 100644 --- a/.github/workflows/e2e-test-workflow-call.yml +++ b/.github/workflows/e2e-test-workflow-call.yml @@ -61,6 +61,11 @@ on: required: false type: boolean default: false + upload-logs: + description: "Specify flag to indicate that logs should be uploaded on failure" + required: false + type: boolean + default: false env: REGISTRY: ghcr.io @@ -160,6 +165,15 @@ jobs: with: go-version: 1.19 - name: Run e2e Test + id: e2e_test run: | cd e2e make e2e-test entrypoint=${{ matrix.entrypoint }} test=${{ matrix.test }} + - name: Upload Diagnostics + uses: actions/upload-artifact@v3 + if: ${{ failure() && inputs.upload-logs }} + continue-on-error: true + with: + name: "${{ matrix.entrypoint }}-${{ matrix.test }}" + path: e2e/diagnostics + retention-days: 5 diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index 9fe4b02cefd..585d4eca11d 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -62,6 +62,7 @@ jobs: secrets: inherit with: build-and-push-docker-image: true + upload-logs: true chain-image: ghcr.io/cosmos/ibc-go-simd chain-a-tag: "${{ needs.determine-image-tag.outputs.simd-tag }}" chain-b-tag: "${{ needs.determine-image-tag.outputs.simd-tag }}" diff --git a/.gitignore b/.gitignore index 1844a2347f6..b9c46cafa65 100644 --- a/.gitignore +++ b/.gitignore @@ -32,6 +32,7 @@ mytestnet coverage.txt profile.out sim_log_file +e2e/diagnostics # Vagrant .vagrant/ @@ -57,4 +58,4 @@ dependency-graph.png # Go go.work -go.work.sum \ No newline at end of file +go.work.sum diff --git a/e2e/README.md b/e2e/README.md index 56463c219be..d6a03e4485c 100644 --- a/e2e/README.md +++ b/e2e/README.md @@ -354,6 +354,12 @@ json matrix files under .github/compatibility-test-matrices and is equivalent to This issue doesn't seem to occur on other operating systems. +### Accessing Logs + +- When a test fails in GitHub. The logs of the test will be uploaded (viewable in the summary page of the workflow). Note: There + may be some discrepancy in the logs collected and the output of interchain test. The containers may run for a some + time after the logs are collected, resulting in the displayed logs to differ slightly. + ## Importable Workflow This repository contains an [importable workflow](https://github.com/cosmos/ibc-go/blob/bc963bcfd115a0e06b8196b114496db5ea011247/.github/workflows/e2e-compatibility-workflow-call.yaml) that can be used from any other repository to test chain upgrades. The workflow diff --git a/e2e/dockerutil/dockerutil.go b/e2e/dockerutil/dockerutil.go new file mode 100644 index 00000000000..aa7497645bf --- /dev/null +++ b/e2e/dockerutil/dockerutil.go @@ -0,0 +1,72 @@ +package dockerutil + +import ( + "archive/tar" + "context" + "fmt" + "io" + "path" + "testing" + + dockertypes "github.com/docker/docker/api/types" + "github.com/docker/docker/api/types/filters" + dockerclient "github.com/docker/docker/client" +) + +const testLabel = "ibc-test" + +// GetTestContainers returns all docker containers that have been created by interchain test. +func GetTestContainers(t *testing.T, ctx context.Context, dc *dockerclient.Client) ([]dockertypes.Container, error) { + t.Helper() + + testContainers, err := dc.ContainerList(ctx, dockertypes.ContainerListOptions{ + All: true, + Filters: filters.NewArgs( + // see: https://github.com/strangelove-ventures/interchaintest/blob/0bdc194c2aa11aa32479f32b19e1c50304301981/internal/dockerutil/setup.go#L31-L36 + // for the label needed to identify test containers. + filters.Arg("label", testLabel+"="+t.Name()), + ), + }) + + if err != nil { + return nil, fmt.Errorf("failed listing containers: %s", err) + } + + return testContainers, nil +} + +// GetContainerLogs returns the logs of a container as a byte array. +func GetContainerLogs(ctx context.Context, dc *dockerclient.Client, containerName string) ([]byte, error) { + readCloser, err := dc.ContainerLogs(ctx, containerName, dockertypes.ContainerLogsOptions{ + ShowStdout: true, + ShowStderr: true, + }) + if err != nil { + return nil, fmt.Errorf("failed reading logs in test cleanup: %s", err) + } + return io.ReadAll(readCloser) +} + +// GetFileContentsFromContainer reads the contents of a specific file from a container. +func GetFileContentsFromContainer(ctx context.Context, dc *dockerclient.Client, containerID, absolutePath string) ([]byte, error) { + readCloser, _, err := dc.CopyFromContainer(ctx, containerID, absolutePath) + if err != nil { + return nil, fmt.Errorf("copying from container: %w", err) + } + + defer readCloser.Close() + + fileName := path.Base(absolutePath) + tr := tar.NewReader(readCloser) + + hdr, err := tr.Next() + if err != nil { + return nil, err + } + + if hdr.Name != fileName { + return nil, fmt.Errorf("expected to find %s but found %s", fileName, hdr.Name) + } + + return io.ReadAll(tr) +} diff --git a/e2e/testsuite/diagnostics/diagnostics.go b/e2e/testsuite/diagnostics/diagnostics.go new file mode 100644 index 00000000000..6226dd4a339 --- /dev/null +++ b/e2e/testsuite/diagnostics/diagnostics.go @@ -0,0 +1,157 @@ +package diagnostics + +import ( + "context" + "fmt" + "os" + ospath "path" + "strings" + "testing" + + dockertypes "github.com/docker/docker/api/types" + dockerclient "github.com/docker/docker/client" + "github.com/strangelove-ventures/interchaintest/v7/ibc" + + "github.com/cosmos/ibc-go/e2e/dockerutil" + "github.com/cosmos/ibc-go/e2e/testconfig" +) + +const ( + e2eDir = "e2e" +) + +// Collect can be used in `t.Cleanup` and will copy all the of the container logs and relevant files +// into e2e//.log. These log files will be uploaded to GH upon test failure. +func Collect(t *testing.T, dc *dockerclient.Client, cfg testconfig.ChainOptions) { + t.Helper() + + if !t.Failed() { + t.Logf("test passed, not uploading logs") + return + } + + t.Logf("writing logs for test: %s", t.Name()) + + ctx := context.TODO() + e2eDir, err := getE2EDir(t) + if err != nil { + t.Logf("failed finding log directory: %s", err) + return + } + + logsDir := fmt.Sprintf("%s/diagnostics", e2eDir) + + if err := os.MkdirAll(fmt.Sprintf("%s/%s", logsDir, t.Name()), 0750); err != nil { + t.Logf("failed creating logs directory in test cleanup: %s", err) + return + } + + testContainers, err := dockerutil.GetTestContainers(t, ctx, dc) + if err != nil { + t.Logf("failed listing containers test cleanup: %s", err) + return + } + + for _, container := range testContainers { + containerName := getContainerName(t, container) + containerDir := fmt.Sprintf("%s/%s/%s", logsDir, t.Name(), containerName) + if err := os.MkdirAll(containerDir, 0750); err != nil { + t.Logf("failed creating logs directory for container %s: %s", containerDir, err) + continue + } + + logsBz, err := dockerutil.GetContainerLogs(ctx, dc, container.ID) + if err != nil { + t.Logf("failed reading logs in test cleanup: %s", err) + continue + } + + logFile := fmt.Sprintf("%s/%s.log", containerDir, containerName) + if err := os.WriteFile(logFile, logsBz, 0750); err != nil { + t.Logf("failed writing log file for container %s in test cleanup: %s", containerName, err) + continue + } + + t.Logf("successfully wrote log file %s", logFile) + diagnosticFiles := chainDiagnosticAbsoluteFilePaths(*cfg.ChainAConfig) + diagnosticFiles = append(diagnosticFiles, chainDiagnosticAbsoluteFilePaths(*cfg.ChainBConfig)...) + + for _, absoluteFilePathInContainer := range diagnosticFiles { + localFilePath := ospath.Join(containerDir, ospath.Base(absoluteFilePathInContainer)) + if err := fetchAndWriteDiagnosticsFile(ctx, dc, container.ID, localFilePath, absoluteFilePathInContainer); err != nil { + t.Logf("failed to fetch and write file %s for container %s in test cleanup: %s", absoluteFilePathInContainer, containerName, err) + continue + } + t.Logf("successfully wrote diagnostics file %s", absoluteFilePathInContainer) + } + } +} + +// getContainerName returns a either the ID of the container or stripped down human-readable +// version of the name if the name is non-empty. +// +// Note: You should still always use the ID when interacting with the docker client. +func getContainerName(t *testing.T, container dockertypes.Container) string { + t.Helper() + // container will always have an id, by may not have a name. + containerName := container.ID + if len(container.Names) > 0 { + containerName = container.Names[0] + // remove the test name from the container as the folder structure will provide this + // information already. + containerName = strings.TrimRight(containerName, "-"+t.Name()) + containerName = strings.TrimLeft(containerName, "/") + } + return containerName +} + +// fetchAndWriteDiagnosticsFile fetches the contents of a single file from the given container id and writes +// the contents of the file to a local path provided. +func fetchAndWriteDiagnosticsFile(ctx context.Context, dc *dockerclient.Client, containerID, localPath, absoluteFilePathInContainer string) error { + fileBz, err := dockerutil.GetFileContentsFromContainer(ctx, dc, containerID, absoluteFilePathInContainer) + if err != nil { + return err + } + + filePath := ospath.Join(localPath) + if err := os.WriteFile(filePath, fileBz, 0750); err != nil { + return err + } + + return nil +} + +// chainDiagnosticAbsoluteFilePaths returns a slice of absolute file paths (in the containers) which are the files that should be +// copied locally when fetching diagnostics. +func chainDiagnosticAbsoluteFilePaths(cfg ibc.ChainConfig) []string { + return []string{ + fmt.Sprintf("/var/cosmos-chain/%s/config/genesis.json", cfg.Name), + fmt.Sprintf("/var/cosmos-chain/%s/config/app.toml", cfg.Name), + fmt.Sprintf("/var/cosmos-chain/%s/config/config.toml", cfg.Name), + fmt.Sprintf("/var/cosmos-chain/%s/config/client.toml", cfg.Name), + } +} + +// getE2EDir finds the e2e directory above the test. +func getE2EDir(t *testing.T) (string, error) { + t.Helper() + + wd, err := os.Getwd() + if err != nil { + return "", err + } + + const maxAttempts = 100 + count := 0 + for ; !strings.HasSuffix(wd, e2eDir) || count > maxAttempts; wd = ospath.Dir(wd) { + count++ + } + + // arbitrary value to avoid getting stuck in an infinite loop if this is called + // in a context where the e2e directory does not exist. + if count > maxAttempts { + return "", fmt.Errorf("unable to find e2e directory after %d tries", maxAttempts) + } + + return wd, nil +} diff --git a/e2e/testsuite/testsuite.go b/e2e/testsuite/testsuite.go index edd4642dcb5..965e4177c86 100644 --- a/e2e/testsuite/testsuite.go +++ b/e2e/testsuite/testsuite.go @@ -35,6 +35,7 @@ import ( "github.com/cosmos/ibc-go/e2e/relayer" "github.com/cosmos/ibc-go/e2e/semverutil" "github.com/cosmos/ibc-go/e2e/testconfig" + "github.com/cosmos/ibc-go/e2e/testsuite/diagnostics" "github.com/cosmos/ibc-go/e2e/testvalues" controllertypes "github.com/cosmos/ibc-go/v7/modules/apps/27-interchain-accounts/controller/types" feetypes "github.com/cosmos/ibc-go/v7/modules/apps/29-fee/types" @@ -442,17 +443,26 @@ func (s *E2ETestSuite) AssertPacketRelayed(ctx context.Context, chain *cosmos.Co // test and can be retrieved with GetChains. func (s *E2ETestSuite) createCosmosChains(chainOptions testconfig.ChainOptions) (*cosmos.CosmosChain, *cosmos.CosmosChain) { client, network := interchaintest.DockerSetup(s.T()) + t := s.T() s.logger = zap.NewExample() s.DockerClient = client s.network = network - logger := zaptest.NewLogger(s.T()) + logger := zaptest.NewLogger(t) numValidators, numFullNodes := getValidatorsAndFullNodes() - chainA := cosmos.NewCosmosChain(s.T().Name(), *chainOptions.ChainAConfig, numValidators, numFullNodes, logger) - chainB := cosmos.NewCosmosChain(s.T().Name(), *chainOptions.ChainBConfig, numValidators, numFullNodes, logger) + chainA := cosmos.NewCosmosChain(t.Name(), *chainOptions.ChainAConfig, numValidators, numFullNodes, logger) + chainB := cosmos.NewCosmosChain(t.Name(), *chainOptions.ChainBConfig, numValidators, numFullNodes, logger) + + // this is intentionally called after the interchaintest.DockerSetup function. The above function registers a + // cleanup task which deletes all containers. By registering a cleanup function afterwards, it is executed first + // this allows us to process the logs before the containers are removed. + t.Cleanup(func() { + diagnostics.Collect(t, s.DockerClient, chainOptions) + }) + return chainA, chainB }