Skip to content

[release-4.16] OCPBUGS-45007: Add retry to ccoctl gcp create functions #794

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pkg/cmd/provisioning/azure/create_managed_identities.go
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,7 @@ func createRoleAssignment(client *azureclients.AzureClientWrapper, managedIdenti
var rawResponse *http.Response
// Role assignment can fail due to a replication delay after creating the user-assigned managed identity
// Try up to 24 times with a 10 second delay between each attempt, up to 4 minutes.
for i := 0; i < 12; i++ {
for i := 0; ; i++ {
ctxWithResp := runtime.WithCaptureResponse(context.Background(), &rawResponse)
roleAssignmentCreateResponse, err := client.RoleAssignmentClient.Create(
ctxWithResp,
Expand Down
24 changes: 21 additions & 3 deletions pkg/cmd/provisioning/gcp/create_service_accounts.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"os"
"path/filepath"
"strings"
"time"

"github.com/pkg/errors"
"github.com/spf13/cobra"
Expand Down Expand Up @@ -285,9 +286,26 @@ func createServiceAccount(ctx context.Context, client gcp.Client, name string, c

// Add member <-> role bindings for the project
svcAcctBindingName := actuator.ServiceAccountBindingName(serviceAccount)
err = actuator.EnsurePolicyBindingsForProject(client, roles, svcAcctBindingName)
if err != nil {
return "", errors.Wrap(err, fmt.Sprintf("Failed to add predefined roles for IAM service account %s", serviceAccount.DisplayName))
// EnsurePolicyBindingsForProject can fail due to a replication delay after service account creation
// Try up to 24 times with a 10 second delay between each attempt, up to 4 minutes.
for i := 0; ; i++ {
err = actuator.EnsurePolicyBindingsForProject(client, roles, svcAcctBindingName)
if err != nil {
if strings.Contains(err.Error(), "Service account "+serviceAccount.Email+" does not exist") {
// The service account just created can't be found yet due to a replication delay so we need to retry.
if i >= 23 {
log.Fatal("Timed out adding predefined roles to IAM service account, this is most likely due to a replication delay following creation of the service account, please retry")
break
} else {
log.Printf("Unable to add predefined roles to IAM service account, retrying...")
time.Sleep(10 * time.Second)
continue
}
}

return "", errors.Wrap(err, fmt.Sprintf("Failed to add predefined roles for IAM service account %s", serviceAccount.DisplayName))
}
break
}

// Add member <-> role bindings for the IAM service account
Expand Down
26 changes: 23 additions & 3 deletions pkg/cmd/provisioning/gcp/create_workload_identity_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@ import (
"fmt"
"io/ioutil"
"log"
"net/http"
"os"
"path/filepath"
"strings"
"time"

iamCloud "cloud.google.com/go/iam"
"cloud.google.com/go/storage"
Expand Down Expand Up @@ -133,10 +135,28 @@ func createOIDCBucket(ctx context.Context, client gcp.Client, bucketName, region
}
log.Print("Bucket ", bucketName, " created")

policy, err := client.GetBucketPolicy(ctx, bucketName)
if err != nil {
return errors.Wrap(err, fmt.Sprintf("Failed to fetch IAM policy for bucket %s", bucketName))
// GetBucketPolicy can fail due to a replication delay after bucket creation
// Try up to 24 times with a 10 second delay between each attempt, up to 4 minutes.
var policy *iamCloud.Policy3
for i := 0; ; i++ {
policy, err = client.GetBucketPolicy(ctx, bucketName)
if err != nil {
if gerr, ok := err.(*googleapi.Error); ok && gerr.Code == http.StatusNotFound {
// The bucket just created can't be found yet due to a replication delay so we need to retry.
if i >= 23 {
log.Fatal("Timed out fetching IAM policy for bucket, this is most likely due to a replication delay following creation of the bucket, please retry")
break
} else {
log.Printf("Unable to fetch IAM policy for bucket, retrying...")
time.Sleep(10 * time.Second)
continue
}
}
return errors.Wrap(err, fmt.Sprintf("Failed to fetch IAM policy for bucket %s", bucketName))
}
break
}

role := "roles/storage.objectViewer"
policy.Bindings = append(policy.Bindings, &iampb.Binding{
Role: role,
Expand Down