From 96e4ab32d8e36271ed1907bb3ed45a8354ce147f Mon Sep 17 00:00:00 2001 From: Alan Cruikshanks Date: Tue, 15 Aug 2023 15:24:02 +0100 Subject: [PATCH] Add Got retry hook to log when retries happen (#354) https://eaflood.atlassian.net/browse/WATER-4083 We had an issue with the Midlands SROC supplementary bill run. It would error and when checking the logs we found it was because the [charging-module-api](https://github.com/DEFRA/sroc-charging-module-api) was rejecting a `POST /transaction` request. ```json { "result": { "succeeded": false, "response": { "statusCode": 409, "body": { "statusCode": 409, "error": "Conflict", "message": "A transaction with Client ID 'ebee5d48-3191-4615-b86a-5bebb9026bfe' for Regime 'wrls' already exists.", "clientId": "ebee5d48-3191-4615-b86a-5bebb9026bfe" } } } } ``` But this should be impossible. We generate the ID's during the bill run. If the bill run errors all transaction records get deleted. We then found a second attempt to run the bill run succeeded. We've been unable to replicate the failed scenario but have a working theory. We believe a `POST /transaction` request is failing due to a timeout. That doesn't mean the CHA didn't receive the request, just that [Got](https://github.com/sindresorhus/got) timed out waiting for it. We have it configured to automatically retry these. So, in this scenario, the CHA receives the first request and creates the transaction but **Got** times out. It then sends the same request a second time, by which point the record already exists in the CHA so it rejects it. To confirm this, and see when **Got** is retrying this change adds a new [beforeRetry](https://github.com/sindresorhus/got/blob/main/documentation/9-hooks.md#beforeretry) hook to the **Got** instance we create. In it, we can log that a retry has taken place and details about it. --- app/lib/request.lib.js | 61 +++++++++++++++++++++--------------- test/lib/request.lib.test.js | 42 +++++++++++++++++++++++++ 2 files changed, 78 insertions(+), 25 deletions(-) diff --git a/app/lib/request.lib.js b/app/lib/request.lib.js index 62b63554ce..1f69c5ce47 100644 --- a/app/lib/request.lib.js +++ b/app/lib/request.lib.js @@ -42,31 +42,8 @@ async function post (url, additionalOptions = {}) { return _sendRequest('post', url, additionalOptions) } -async function _sendRequest (method, url, additionalOptions) { - const got = await _importGot() - const result = { - succeeded: false, - response: null - } - - try { - const options = _requestOptions(additionalOptions) - - result.response = await got[method](url, options) - - // If the result is not 2xx or 3xx Got will mark the result as unsuccessful using the response object's `ok:` - // property - result.succeeded = result.response.ok - } catch (error) { - // If it's a network error, for example 'ETIMEDOUT', we'll end up here - result.response = error - } - - if (!result.succeeded) { - _logFailure(method.toUpperCase(), result, url, additionalOptions) - } - - return result +function _beforeRetryHook (error, retryCount) { + global.GlobalNotifier.omg('Retrying HTTP request', { error, retryCount }) } async function _importGot () { @@ -145,6 +122,8 @@ function _requestOptions (additionalOptions) { // preference is to only retry in the event of a timeout on assumption the destination server might be busy but has // a chance to succeed when attempted again retry: { + // The default is also 2 retries before erroring. We specify it to make this fact visible + limit: 2, // We ensure that the only network errors Got retries are timeout errors errorCodes: ['ETIMEDOUT'], // By default, Got does not retry PATCH and POST requests. As we only retry timeouts there is no risk in retrying @@ -158,12 +137,44 @@ function _requestOptions (additionalOptions) { // > It is a good practice to set a timeout to prevent hanging requests. By default, there is no timeout set. timeout: { request: requestConfig.timeout + }, + hooks: { + beforeRetry: [ + _beforeRetryHook + ] } } return { ...defaultOptions, ...additionalOptions } } +async function _sendRequest (method, url, additionalOptions) { + const got = await _importGot() + const result = { + succeeded: false, + response: null + } + + try { + const options = _requestOptions(additionalOptions) + + result.response = await got[method](url, options) + + // If the result is not 2xx or 3xx Got will mark the result as unsuccessful using the response object's `ok:` + // property + result.succeeded = result.response.ok + } catch (error) { + // If it's a network error, for example 'ETIMEDOUT', we'll end up here + result.response = error + } + + if (!result.succeeded) { + _logFailure(method.toUpperCase(), result, url, additionalOptions) + } + + return result +} + module.exports = { get, patch, diff --git a/test/lib/request.lib.test.js b/test/lib/request.lib.test.js index 199bed83e7..9c85adf746 100644 --- a/test/lib/request.lib.test.js +++ b/test/lib/request.lib.test.js @@ -147,6 +147,13 @@ describe('RequestLib', () => { .persist() }) + it('logs when a retry has happened', async () => { + await RequestLib.get(testDomain) + + expect(notifierStub.omg.callCount).to.equal(2) + expect(notifierStub.omg.alwaysCalledWith('Retrying HTTP request')).to.be.true() + }) + describe('the result it returns', () => { it("has a 'succeeded' property marked as false", async () => { const result = await RequestLib.get(testDomain) @@ -175,6 +182,13 @@ describe('RequestLib', () => { .reply(200, { data: 'delayed hello world' }) }) + it('logs when a retry has happened', async () => { + await RequestLib.get(testDomain) + + expect(notifierStub.omg.callCount).to.equal(1) + expect(notifierStub.omg.alwaysCalledWith('Retrying HTTP request')).to.be.true() + }) + describe('the result it returns', () => { it("has a 'succeeded' property marked as true", async () => { const result = await RequestLib.get(testDomain) @@ -342,6 +356,13 @@ describe('RequestLib', () => { .persist() }) + it('logs when a retry has happened', async () => { + await RequestLib.patch(testDomain) + + expect(notifierStub.omg.callCount).to.equal(2) + expect(notifierStub.omg.alwaysCalledWith('Retrying HTTP request')).to.be.true() + }) + describe('the result it returns', () => { it("has a 'succeeded' property marked as false", async () => { const result = await RequestLib.patch(testDomain) @@ -370,6 +391,13 @@ describe('RequestLib', () => { .reply(200, { data: 'delayed hello world' }) }) + it('logs when a retry has happened', async () => { + await RequestLib.patch(testDomain) + + expect(notifierStub.omg.callCount).to.equal(1) + expect(notifierStub.omg.alwaysCalledWith('Retrying HTTP request')).to.be.true() + }) + describe('the result it returns', () => { it("has a 'succeeded' property marked as true", async () => { const result = await RequestLib.patch(testDomain) @@ -537,6 +565,13 @@ describe('RequestLib', () => { .persist() }) + it('logs when a retry has happened', async () => { + await RequestLib.post(testDomain) + + expect(notifierStub.omg.callCount).to.equal(2) + expect(notifierStub.omg.alwaysCalledWith('Retrying HTTP request')).to.be.true() + }) + describe('the result it returns', () => { it("has a 'succeeded' property marked as false", async () => { const result = await RequestLib.post(testDomain) @@ -565,6 +600,13 @@ describe('RequestLib', () => { .reply(200, { data: 'delayed hello world' }) }) + it('logs when a retry has happened', async () => { + await RequestLib.post(testDomain) + + expect(notifierStub.omg.callCount).to.equal(1) + expect(notifierStub.omg.alwaysCalledWith('Retrying HTTP request')).to.be.true() + }) + describe('the result it returns', () => { it("has a 'succeeded' property marked as true", async () => { const result = await RequestLib.post(testDomain)