Skip to content

Commit 70d1772

Browse files
committed
Add retry to ccoctl gcp create functions
The ccoct gcp create functions occassionaly fail when recently created resources have not yet replicated in the cloud. This change adds retry functionality to increase success rate when this happens.
1 parent 166901d commit 70d1772

File tree

3 files changed

+45
-7
lines changed

3 files changed

+45
-7
lines changed

pkg/cmd/provisioning/azure/create_managed_identities.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -364,7 +364,7 @@ func createRoleAssignment(client *azureclients.AzureClientWrapper, managedIdenti
364364
var rawResponse *http.Response
365365
// Role assignment can fail due to a replication delay after creating the user-assigned managed identity
366366
// Try up to 24 times with a 10 second delay between each attempt, up to 4 minutes.
367-
for i := 0; i < 12; i++ {
367+
for i := 0; ; i++ {
368368
ctxWithResp := runtime.WithCaptureResponse(context.Background(), &rawResponse)
369369
roleAssignmentCreateResponse, err := client.RoleAssignmentClient.Create(
370370
ctxWithResp,

pkg/cmd/provisioning/gcp/create_service_accounts.go

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"os"
1010
"path/filepath"
1111
"strings"
12+
"time"
1213

1314
"github.com/pkg/errors"
1415
"github.com/spf13/cobra"
@@ -285,9 +286,26 @@ func createServiceAccount(ctx context.Context, client gcp.Client, name string, c
285286

286287
// Add member <-> role bindings for the project
287288
svcAcctBindingName := actuator.ServiceAccountBindingName(serviceAccount)
288-
err = actuator.EnsurePolicyBindingsForProject(client, roles, svcAcctBindingName)
289-
if err != nil {
290-
return "", errors.Wrap(err, fmt.Sprintf("Failed to add predefined roles for IAM service account %s", serviceAccount.DisplayName))
289+
// EnsurePolicyBindingsForProject can fail due to a replication delay after service account creation
290+
// Try up to 24 times with a 10 second delay between each attempt, up to 4 minutes.
291+
for i := 0; ; i++ {
292+
err = actuator.EnsurePolicyBindingsForProject(client, roles, svcAcctBindingName)
293+
if err != nil {
294+
if strings.Contains(err.Error(), "Service account "+serviceAccount.Email+" does not exist") {
295+
// The service account just created can't be found yet due to a replication delay so we need to retry.
296+
if i >= 23 {
297+
log.Fatal("Timed out adding predefined roles to IAM service account, this is most likely due to a replication delay following creation of the service account, please retry")
298+
break
299+
} else {
300+
log.Printf("Unable to add predefined roles to IAM service account, retrying...")
301+
time.Sleep(10 * time.Second)
302+
continue
303+
}
304+
}
305+
306+
return "", errors.Wrap(err, fmt.Sprintf("Failed to add predefined roles for IAM service account %s", serviceAccount.DisplayName))
307+
}
308+
break
291309
}
292310

293311
// Add member <-> role bindings for the IAM service account

pkg/cmd/provisioning/gcp/create_workload_identity_provider.go

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,11 @@ import (
55
"fmt"
66
"io/ioutil"
77
"log"
8+
"net/http"
89
"os"
910
"path/filepath"
1011
"strings"
12+
"time"
1113

1214
iamCloud "cloud.google.com/go/iam"
1315
"cloud.google.com/go/storage"
@@ -133,10 +135,28 @@ func createOIDCBucket(ctx context.Context, client gcp.Client, bucketName, region
133135
}
134136
log.Print("Bucket ", bucketName, " created")
135137

136-
policy, err := client.GetBucketPolicy(ctx, bucketName)
137-
if err != nil {
138-
return errors.Wrap(err, fmt.Sprintf("Failed to fetch IAM policy for bucket %s", bucketName))
138+
// GetBucketPolicy can fail due to a replication delay after bucket creation
139+
// Try up to 24 times with a 10 second delay between each attempt, up to 4 minutes.
140+
var policy *iamCloud.Policy3
141+
for i := 0; ; i++ {
142+
policy, err = client.GetBucketPolicy(ctx, bucketName)
143+
if err != nil {
144+
if gerr, ok := err.(*googleapi.Error); ok && gerr.Code == http.StatusNotFound {
145+
// The bucket just created can't be found yet due to a replication delay so we need to retry.
146+
if i >= 23 {
147+
log.Fatal("Timed out fetching IAM policy for bucket, this is most likely due to a replication delay following creation of the bucket, please retry")
148+
break
149+
} else {
150+
log.Printf("Unable to fetch IAM policy for bucket, retrying...")
151+
time.Sleep(10 * time.Second)
152+
continue
153+
}
154+
}
155+
return errors.Wrap(err, fmt.Sprintf("Failed to fetch IAM policy for bucket %s", bucketName))
156+
}
157+
break
139158
}
159+
140160
role := "roles/storage.objectViewer"
141161
policy.Bindings = append(policy.Bindings, &iampb.Binding{
142162
Role: role,

0 commit comments

Comments
 (0)