Skip to content

Commit

Permalink
fix: retry logic for pepr store call (#1109)
Browse files Browse the repository at this point in the history
## Description

The primary change in this PR is to properly handle undefined errors in
our `retryWithDelay` function. The jest tests added provide coverage of
the retry function testing a number of scenarios.

## Related Issue

No written issue, but token set failure seen on [this CI
run](https://github.com/defenseunicorns/uds-core/actions/runs/12281459257/job/34270405413?pr=1107)
and no retries observed in the logs.

## Type of change

- [x] Bug fix (non-breaking change which fixes an issue)
- [ ] New feature (non-breaking change which adds functionality)
- [ ] Other (security config, docs update, etc)

## Steps to Validate

This one is harder to reproduce the problem on, so the jest tests are a
good place to start for validating the expected functionality. To
reproduce almost the exact scenario:
```console
# Deploy slim dev, with the unicorn flavor to avoid rate limits
uds run slim-dev --set flavor=unicorn
# Delete pepr store to artifically add some store failure
kubectl delete peprstores.pepr.dev -n pepr-system pepr-uds-core-store
# Add a test namespace and package
kubectl create ns test
kubectl apply -f - <<EOF
apiVersion: uds.dev/v1alpha1
kind: Package
metadata:
  name: test
  namespace: test
spec:
  sso:
    - name: Test
      clientId: test
      redirectUris:
        - "https://test.uds.dev"
EOF
# Check the pepr watcher logs, you should see several lines like this: "Attempt <some number> of setStoreToken failed, retrying in 2000ms."
# This is where previously no retries would be attempted
kubectl logs -n pepr-system -l app=pepr-uds-core-watcher --tail -1 | grep "failed, retrying"
```

## Checklist before merging

- [x] Test, docs, adr added or updated as needed
- [x] [Contributor
Guide](https://github.com/defenseunicorns/uds-template-capability/blob/main/CONTRIBUTING.md)
followed
  • Loading branch information
mjnagel authored Dec 11, 2024
1 parent 8ecd5ff commit e4c0f61
Show file tree
Hide file tree
Showing 4 changed files with 109 additions and 13 deletions.
4 changes: 3 additions & 1 deletion src/pepr/operator/controllers/keycloak/client-sync.ts
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,9 @@ async function syncClient(

// Write the new token to the store
try {
await retryWithDelay(() => Store.setItemAndWait(name, client.registrationAccessToken!), log);
await retryWithDelay(async function setStoreToken() {
return Store.setItemAndWait(name, client.registrationAccessToken!);
}, log);
} catch (err) {
throw Error(
`Failed to set token in store for client '${client.clientId}', package ` +
Expand Down
88 changes: 88 additions & 0 deletions src/pepr/operator/controllers/utils.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
/**
* Copyright 2024 Defense Unicorns
* SPDX-License-Identifier: AGPL-3.0-or-later OR LicenseRef-Defense-Unicorns-Commercial
*/

import { beforeEach, describe, expect, it, jest } from "@jest/globals";
import { Logger } from "pino";
import { retryWithDelay } from "./utils";

describe("retryWithDelay", () => {
let mockLogger: Logger;

beforeEach(() => {
mockLogger = {
warn: jest.fn(),
level: "info",
fatal: jest.fn(),
error: jest.fn(),
info: jest.fn(),
debug: jest.fn(),
trace: jest.fn(),
} as unknown as Logger;
});

beforeEach(() => {});

it("should succeed on the first attempt", async () => {
const mockFn = jest.fn<() => Promise<string>>().mockResolvedValue("Success");

const result = await retryWithDelay(mockFn, mockLogger);

expect(result).toBe("Success");
expect(mockFn).toHaveBeenCalledTimes(1); // Called only once
expect(mockLogger.warn).not.toHaveBeenCalled(); // No warnings logged
});

it("should retry on failure and eventually succeed", async () => {
const mockFn = jest
.fn<() => Promise<string>>()
.mockRejectedValueOnce(new Error("Fail on 1st try")) // Fail first attempt
.mockResolvedValue("Success"); // Succeed on retry

const result = await retryWithDelay(mockFn, mockLogger, 3, 100);

expect(result).toBe("Success");
expect(mockFn).toHaveBeenCalledTimes(2); // Called twice (1 fail + 1 success)
expect(mockLogger.warn).toHaveBeenCalledTimes(1); // Warned once for the retry
expect(mockLogger.warn).toHaveBeenCalledWith(
expect.stringContaining("Attempt 1 of mockConstructor failed, retrying in 100ms."),
expect.objectContaining({ error: expect.any(String) }),
);
});

it("should retry when function rejects without an error", async () => {
const mockFn = jest
.fn<() => Promise<string>>()
.mockRejectedValueOnce(undefined) // Rejected with no error
.mockResolvedValue("Success"); // Succeed on retry

const result = await retryWithDelay(mockFn, mockLogger, 3, 100);

expect(result).toBe("Success");
expect(mockFn).toHaveBeenCalledTimes(2); // Called twice (1 fail + 1 success)
expect(mockLogger.warn).toHaveBeenCalledTimes(1);
expect(mockLogger.warn).toHaveBeenCalledWith(
expect.stringContaining("Attempt 1 of mockConstructor failed, retrying in 100ms."),
expect.objectContaining({ error: "Unknown Error" }),
);
});

it("should throw the original error after max retries", async () => {
const error = new Error("Persistent failure");
const mockFn = jest.fn<() => Promise<string>>().mockRejectedValue(error); // Always fails

await expect(retryWithDelay(mockFn, mockLogger, 3, 100)).rejects.toThrow("Persistent failure");

expect(mockFn).toHaveBeenCalledTimes(3); // Retries up to the limit
expect(mockLogger.warn).toHaveBeenCalledTimes(2); // Logged for each failure except the final one
expect(mockLogger.warn).toHaveBeenCalledWith(
expect.stringContaining("Attempt 1 of mockConstructor failed, retrying in 100ms."),
expect.objectContaining({ error: error.message }),
);
expect(mockLogger.warn).toHaveBeenCalledWith(
expect.stringContaining("Attempt 2 of mockConstructor failed, retrying in 100ms."),
expect.objectContaining({ error: error.message }),
);
});
});
23 changes: 15 additions & 8 deletions src/pepr/operator/controllers/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -99,15 +99,22 @@ export async function retryWithDelay<T>(
if (attempt >= retries) {
throw err; // Exceeded retries, rethrow the error.
}
let error = `${JSON.stringify(err)}`;
// Error responses from network calls (i.e. K8s().Get() will be this shape)
if (err.data?.message) {
error = err.data.message;
// Other error types have a message
} else if (err.message) {
error = err.message;
// We need to account for cases where we are receiving a rejected promise with undefined error
let error = "Unknown Error";
if (err) {
error = `${JSON.stringify(err)}`;
// Error responses from network calls (i.e. K8s().Get() will be this shape)
if (err.data?.message) {
error = err.data.message;
// Other error types have a message
} else if (err.message) {
error = err.message;
}
}
log.warn(`Attempt ${attempt} of ${fn.name} failed, retrying in ${delayMs}ms.`, { error });
log.warn(
`Attempt ${attempt} of ${fn.name || "anonymous function"} failed, retrying in ${delayMs}ms.`,
{ error },
);
await new Promise(resolve => setTimeout(resolve, delayMs));
}
}
Expand Down
7 changes: 3 additions & 4 deletions tasks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,11 @@ tasks:
- description: "Create the dev cluster"
task: setup:create-k3d-cluster

# Note: This currently is broken until https://github.com/zarf-dev/zarf/issues/2713 is resolved
# As a workaround you can edit the `src/istio/values/upstream-values.yaml` file to change ###ZARF_REGISTRY### to docker.io before running
# Note: the `registry-url` flag used here requires uds 0.19.2+
- description: "Deploy the Istio source package with Zarf Dev"
cmd: "uds zarf dev deploy src/istio --flavor upstream --no-progress"
cmd: "uds zarf dev deploy src/istio --flavor upstream --registry-url docker.io --no-progress"

# Note, this abuses the --flavor flag to only install the CRDs from this package - the "crds-only" flavor is not an explicit flavor of the package
# Note: this abuses the --flavor flag to only install the CRDs from this package - the "crds-only" flavor is not an explicit flavor of the package
- description: "Deploy the Prometheus-Stack source package with Zarf Dev to only install the CRDs"
cmd: "uds zarf dev deploy src/prometheus-stack --flavor crds-only --no-progress"

Expand Down

0 comments on commit e4c0f61

Please sign in to comment.