Skip to content

Commit

Permalink
Retry Conflicts for AnotherOperationInProgress (#3653)
Browse files Browse the repository at this point in the history
The new azclient allows us to pass a callback function to decide if we
want to retry a request. The default behaviour is to only retry a
specific set of status codes. This extends that behaviour to also check
Conflict (409) and retry if the "error.code" field is
"AnotherOperationInProgress".

We're avoiding retrying all conflict status codes because some won't be
fixed with a retry, so we're targeting the specific case instead.

Tested this both manually stepping through the code with a debugger and
also created an integration test which previously failed but now passes
when configured to use the new azclient instead of autorest client.

To access this improved behaviour right now, set the environment
variable `PULUMI_ENABLE_AZCORE_BACKEND=true`. This will become the
default in the future, but is currently in testing behind a feature
flag.

This should address #903 which we will close once we've defaulted to
using the new azcore implementation:
- #3654
  • Loading branch information
danielrbradley authored Oct 28, 2024
1 parent ce90e01 commit 5056b3d
Show file tree
Hide file tree
Showing 4 changed files with 156 additions and 0 deletions.
38 changes: 38 additions & 0 deletions provider/pkg/azure/client_azcore.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,32 @@ func NewAzCoreClient(tokenCredential azcore.TokenCredential, userAgent string, a
opts.Retry.RetryDelay = 20 * time.Second
opts.Retry.MaxRetryDelay = 120 * time.Second

// These are the same as the default values in sdk/azcore/runtime/policy_retry.go setDefaults
retryableStatusCodes := []int{
http.StatusRequestTimeout, // 408
http.StatusTooManyRequests, // 429
http.StatusInternalServerError, // 500
http.StatusBadGateway, // 502
http.StatusServiceUnavailable, // 503
http.StatusGatewayTimeout, // 504
}

opts.Retry.ShouldRetry = func(resp *http.Response, err error) bool {
if err != nil {
return true
}
// Replicate default retry behaviour first.
if runtime.HasStatusCode(resp, retryableStatusCodes...) {
return true
}

if shouldRetryConflict(resp) {
return true
}

return false
}

pipeline, err := armruntime.NewPipeline("pulumi-azure-native", version.Version, tokenCredential,
runtime.PipelineOptions{}, opts)
if err != nil {
Expand All @@ -76,6 +102,18 @@ func NewAzCoreClient(tokenCredential azcore.TokenCredential, userAgent string, a
}, nil
}

func shouldRetryConflict(resp *http.Response) bool {
if runtime.HasStatusCode(resp, http.StatusConflict) {
err := runtime.NewResponseError(resp)
if responseErr, ok := err.(*azcore.ResponseError); ok {
if responseErr.ErrorCode == "AnotherOperationInProgress" {
return true
}
}
}
return false
}

func (c *azCoreClient) setHeaders(req *policy.Request, contentTypeJson bool) {
req.Raw().Header.Set("Accept", "application/json")
req.Raw().Header.Set("User-Agent", c.userAgent)
Expand Down
47 changes: 47 additions & 0 deletions provider/pkg/azure/client_azcore_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -630,3 +630,50 @@ func TestPutPatchRequestBody(t *testing.T) {
qp, "")
})
}

func TestShouldRetryConflict(t *testing.T) {
shouldRetry := func(status int, body string) bool {
resp := http.Response{
StatusCode: status,
Body: io.NopCloser(strings.NewReader(body)),
}
return shouldRetryConflict(&resp)
}
t.Run("Not a conflict", func(t *testing.T) {
assert.False(t, shouldRetry(200, ""))
})
t.Run("Blank body", func(t *testing.T) {
assert.False(t, shouldRetry(409, ""))
})
t.Run("Unreadable body", func(t *testing.T) {
resp := http.Response{
StatusCode: 409,
Body: errorReadCloser{},
}
assert.False(t, shouldRetryConflict(&resp))
})
t.Run("empty json", func(t *testing.T) {
assert.False(t, shouldRetry(409, "{}"))
})
t.Run("wrong error type", func(t *testing.T) {
assert.False(t, shouldRetry(409, `{"error": "Conflict"}`))
})
t.Run("no error code", func(t *testing.T) {
assert.False(t, shouldRetry(409, `{"error": {"message": "Conflict"}}`))
})
t.Run("other error code", func(t *testing.T) {
assert.False(t, shouldRetry(409, `{"error": {"code": "Conflict"}}`))
})
t.Run("AnotherOperationInProgress", func(t *testing.T) {
assert.True(t, shouldRetry(409, `{"error": {"code": "AnotherOperationInProgress"}}`))
})
}

type errorReadCloser struct{}

func (errorReadCloser) Read(p []byte) (n int, err error) {
return 0, errors.New("read error")
}
func (errorReadCloser) Close() error {
return nil
}
7 changes: 7 additions & 0 deletions provider/pkg/provider/provider_e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,13 @@ func TestRequiredContainers(t *testing.T) {
runTestProgram(t, "required-containers")
}

func TestParallelSubnetCreation(t *testing.T) {
// Force this test to use the new AZCore client
// We have to set this here rather than in the ProgramTest config because the provider is started up before starting the test.
os.Setenv("PULUMI_ENABLE_AZCORE_BACKEND", "true")
runTestProgram(t, "parallel-subnet-creation")
}

// runTestProgram runs an example from ./examples/<initialDir>
// Any editDirs are applied in order, and the program is run after each edit. e.g. ./examples/<editDir>
func runTestProgram(t *testing.T, initialDir string, editDirs ...string) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
name: parallel-subnet-creation
runtime: yaml
description: Test creating subnets in parallel, without dependencies which should trigger conflicts as only 1 can be updated at a time

resources:
resourceGroup:
type: azure-native:resources:ResourceGroup
virtualNetwork:
type: azure-native:network:VirtualNetwork
properties:
addressSpace:
addressPrefixes:
- 10.0.0.0/16
flowTimeoutInMinutes: 10
location: eastus
resourceGroupName: ${resourceGroup.name}
subnet1:
type: azure-native:network:Subnet
properties:
addressPrefix: "10.0.0.0/24"
resourceGroupName: ${resourceGroup.name}
virtualNetworkName: ${virtualNetwork.name}
subnet2:
type: azure-native:network:Subnet
properties:
addressPrefix: "10.0.1.0/24"
resourceGroupName: ${resourceGroup.name}
virtualNetworkName: ${virtualNetwork.name}
subnet3:
type: azure-native:network:Subnet
properties:
addressPrefix: "10.0.2.0/24"
resourceGroupName: ${resourceGroup.name}
virtualNetworkName: ${virtualNetwork.name}
subnet4:
type: azure-native:network:Subnet
properties:
addressPrefix: "10.0.3.0/24"
resourceGroupName: ${resourceGroup.name}
virtualNetworkName: ${virtualNetwork.name}
subnet5:
type: azure-native:network:Subnet
properties:
addressPrefix: "10.0.4.0/24"
resourceGroupName: ${resourceGroup.name}
virtualNetworkName: ${virtualNetwork.name}
subnet6:
type: azure-native:network:Subnet
properties:
addressPrefix: "10.0.5.0/24"
resourceGroupName: ${resourceGroup.name}
virtualNetworkName: ${virtualNetwork.name}
subnet7:
type: azure-native:network:Subnet
properties:
addressPrefix: "10.0.6.0/24"
resourceGroupName: ${resourceGroup.name}
virtualNetworkName: ${virtualNetwork.name}
subnet8:
type: azure-native:network:Subnet
properties:
addressPrefix: "10.0.7.0/24"
resourceGroupName: ${resourceGroup.name}
virtualNetworkName: ${virtualNetwork.name}

0 comments on commit 5056b3d

Please sign in to comment.