Skip to content

Commit 4dd803e

Browse files
fix: unstick helm commands by removing helm secrets (#154)
1 parent 4f2c256 commit 4dd803e

File tree

5 files changed

+386
-49
lines changed

5 files changed

+386
-49
lines changed

internal/cmd/local/k8s/client.go

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -33,50 +33,40 @@ type Client interface {
3333
// This is a blocking call, it should only return once the deployment has completed.
3434
DeploymentRestart(ctx context.Context, namespace, name string) error
3535

36-
// IngressCreate creates an ingress in the given namespace
36+
EventsWatch(ctx context.Context, namespace string) (watch.Interface, error)
37+
3738
IngressCreate(ctx context.Context, namespace string, ingress *networkingv1.Ingress) error
38-
// IngressExists returns true if the ingress exists in the namespace, false otherwise.
3939
IngressExists(ctx context.Context, namespace string, ingress string) bool
40-
// IngressUpdate updates an existing ingress in the given namespace
4140
IngressUpdate(ctx context.Context, namespace string, ingress *networkingv1.Ingress) error
4241

43-
// NamespaceCreate creates a namespace
42+
LogsGet(ctx context.Context, namespace string, name string) (string, error)
43+
4444
NamespaceCreate(ctx context.Context, namespace string) error
45-
// NamespaceExists returns true if the namespace exists, false otherwise
4645
NamespaceExists(ctx context.Context, namespace string) bool
47-
// NamespaceDelete deletes the existing namespace
4846
NamespaceDelete(ctx context.Context, namespace string) error
4947

50-
// PersistentVolumeCreate creates a persistent volume
5148
PersistentVolumeCreate(ctx context.Context, namespace, name string) error
52-
// PersistentVolumeExists returns true if the persistent volume exists, false otherwise
5349
PersistentVolumeExists(ctx context.Context, namespace, name string) bool
54-
// PersistentVolumeDelete deletes the existing persistent volume
5550
PersistentVolumeDelete(ctx context.Context, namespace, name string) error
5651

57-
// PersistentVolumeClaimCreate creates a persistent volume claim
5852
PersistentVolumeClaimCreate(ctx context.Context, namespace, name, volumeName string) error
59-
// PersistentVolumeClaimExists returns true if the persistent volume claim exists, false otherwise
6053
PersistentVolumeClaimExists(ctx context.Context, namespace, name, volumeName string) bool
61-
// PersistentVolumeClaimDelete deletes the existing persistent volume claim
6254
PersistentVolumeClaimDelete(ctx context.Context, namespace, name, volumeName string) error
6355

64-
// SecretCreateOrUpdate will update or create the secret name with the payload of data in the specified namespace
56+
PodList(ctx context.Context, namespace string) (*corev1.PodList, error)
57+
6558
SecretCreateOrUpdate(ctx context.Context, secret corev1.Secret) error
66-
// SecretGet returns the secrets for the namespace and name
59+
// SecretDeleteCollection deletes multiple secrets.
60+
// Note this takes a `type` and not a `name`. All secrets matching this type will be removed.
61+
SecretDeleteCollection(ctx context.Context, namespace, _type string) error
6762
SecretGet(ctx context.Context, namespace, name string) (*corev1.Secret, error)
6863

69-
// ServiceGet returns the service for the given namespace and name
7064
ServiceGet(ctx context.Context, namespace, name string) (*corev1.Service, error)
7165

66+
StreamPodLogs(ctx context.Context, namespace string, podName string, since time.Time) (io.ReadCloser, error)
67+
7268
// ServerVersionGet returns the kubernetes version.
7369
ServerVersionGet() (string, error)
74-
75-
EventsWatch(ctx context.Context, namespace string) (watch.Interface, error)
76-
77-
LogsGet(ctx context.Context, namespace string, name string) (string, error)
78-
StreamPodLogs(ctx context.Context, namespace string, podName string, since time.Time) (io.ReadCloser, error)
79-
PodList(ctx context.Context, namespace string) (*corev1.PodList, error)
8070
}
8171

8272
var _ Client = (*DefaultK8sClient)(nil)
@@ -289,6 +279,13 @@ func (d *DefaultK8sClient) SecretCreateOrUpdate(ctx context.Context, secret core
289279
return fmt.Errorf("unexpected error while handling the secret %s: %w", name, err)
290280
}
291281

282+
func (d *DefaultK8sClient) SecretDeleteCollection(ctx context.Context, namespace, _type string) error {
283+
listOptions := metav1.ListOptions{
284+
FieldSelector: "type=" + _type,
285+
}
286+
return d.ClientSet.CoreV1().Secrets(namespace).DeleteCollection(ctx, metav1.DeleteOptions{}, listOptions)
287+
}
288+
292289
func (d *DefaultK8sClient) SecretGet(ctx context.Context, namespace, name string) (*corev1.Secret, error) {
293290
secret, err := d.ClientSet.CoreV1().Secrets(namespace).Get(ctx, name, metav1.GetOptions{})
294291
if err != nil {

internal/cmd/local/k8s/k8stest/k8stest.go

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ type MockClient struct {
3030
FnPersistentVolumeClaimExists func(ctx context.Context, namespace, name, volumeName string) bool
3131
FnPersistentVolumeClaimDelete func(ctx context.Context, namespace, name, volumeName string) error
3232
FnSecretCreateOrUpdate func(ctx context.Context, secret corev1.Secret) error
33+
FnSecretDeleteCollection func(ctx context.Context, namespace, _type string) error
3334
FnSecretGet func(ctx context.Context, namespace, name string) (*corev1.Secret, error)
3435
FnServerVersionGet func() (string, error)
3536
FnServiceGet func(ctx context.Context, namespace, name string) (*corev1.Service, error)
@@ -146,6 +147,14 @@ func (m *MockClient) SecretGet(ctx context.Context, namespace, name string) (*co
146147
return nil, nil
147148
}
148149

150+
func (m *MockClient) SecretDeleteCollection(ctx context.Context, namespace, _type string) error {
151+
if m.FnSecretDeleteCollection != nil {
152+
return m.FnSecretDeleteCollection(ctx, namespace, _type)
153+
}
154+
155+
return nil
156+
}
157+
149158
func (m *MockClient) ServiceGet(ctx context.Context, namespace, name string) (*corev1.Service, error) {
150159
return m.FnServiceGet(ctx, namespace, name)
151160
}
@@ -180,7 +189,7 @@ func (m *MockClient) StreamPodLogs(ctx context.Context, namespace string, podNam
180189

181190
func (m *MockClient) PodList(ctx context.Context, namespace string) (*corev1.PodList, error) {
182191
if m.FnPodList == nil {
183-
return nil, nil
192+
return &corev1.PodList{}, nil
184193
}
185194
return m.FnPodList(ctx, namespace)
186195
}

internal/cmd/local/local/install.go

Lines changed: 58 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -589,6 +589,13 @@ type chartRequest struct {
589589
uninstallFirst bool
590590
}
591591

592+
// errHelmStuck is the error returned (only from a msg perspective, not this actual error) from the underlying helm
593+
// client when the most recent install/upgrade attempt was terminated early (e.g. via ctrl+c) and was
594+
// unable to (or not configured to) rollback to a prior version.
595+
//
596+
// The actual error returned by the underlying helm-client isn't exported.
597+
var errHelmStuck = errors.New("another operation (install/upgrade/rollback) is in progress")
598+
592599
// handleChart will handle the installation of a chart
593600
func (c *Command) handleChart(
594601
ctx context.Context,
@@ -648,29 +655,57 @@ func (c *Command) handleChart(
648655
}
649656
}
650657

651-
pterm.Info.Println(fmt.Sprintf(
652-
"Starting Helm Chart installation of '%s' (version: %s)",
653-
req.chartName, helmChart.Metadata.Version,
654-
))
655-
c.spinner.UpdateText(fmt.Sprintf(
656-
"Installing '%s' (version: %s) Helm Chart (this may take several minutes)",
657-
req.chartName, helmChart.Metadata.Version,
658-
))
659-
helmRelease, err := c.helm.InstallOrUpgradeChart(ctx, &helmclient.ChartSpec{
660-
ReleaseName: req.chartRelease,
661-
ChartName: req.chartLoc,
662-
CreateNamespace: true,
663-
Namespace: req.namespace,
664-
Wait: true,
665-
Timeout: 60 * time.Minute,
666-
ValuesYaml: req.valuesYAML,
667-
Version: req.chartVersion,
668-
},
669-
&helmclient.GenericHelmOptions{},
670-
)
671-
if err != nil {
672-
pterm.Error.Printfln("Failed to install %s Helm Chart", req.chartName)
673-
return fmt.Errorf("unable to install helm: %w", err)
658+
// This will be non-nil if the following for-loop is able to successfully install/upgrade the chart
659+
// AND that for-loop doesn't return early with an error.
660+
var helmRelease *release.Release
661+
662+
// it's possible that an existing helm installation is stuck in a non-final state
663+
// which this code will detect, attempt to clean up, and try again up to three times.
664+
// Only the helmStuckError (based on error-message equivalence) will be retried, all other errors
665+
// will be returned.
666+
for attemptCount := 0; attemptCount < 3; attemptCount++ {
667+
pterm.Info.Println(fmt.Sprintf(
668+
"Starting Helm Chart installation of '%s' (version: %s)",
669+
req.chartName, helmChart.Metadata.Version,
670+
))
671+
c.spinner.UpdateText(fmt.Sprintf(
672+
"Installing '%s' (version: %s) Helm Chart (this may take several minutes)",
673+
req.chartName, helmChart.Metadata.Version,
674+
))
675+
676+
helmRelease, err = c.helm.InstallOrUpgradeChart(ctx, &helmclient.ChartSpec{
677+
ReleaseName: req.chartRelease,
678+
ChartName: req.chartLoc,
679+
CreateNamespace: true,
680+
Namespace: req.namespace,
681+
Wait: true,
682+
Timeout: 60 * time.Minute,
683+
ValuesYaml: req.valuesYAML,
684+
Version: req.chartVersion,
685+
},
686+
&helmclient.GenericHelmOptions{},
687+
)
688+
689+
if err != nil {
690+
// If the error is the errHelmStuck error, attempt to resolve this by removing the helm release secret.
691+
// See: https://github.com/helm/helm/issues/8987#issuecomment-1082992461
692+
if strings.Contains(err.Error(), errHelmStuck.Error()) {
693+
if err := c.k8s.SecretDeleteCollection(ctx, common.AirbyteNamespace, "helm.sh/release.v1"); err != nil {
694+
pterm.Debug.Println(fmt.Sprintf("unable to delete secrets helm.sh/release.v1: %s", err))
695+
}
696+
continue
697+
}
698+
pterm.Error.Printfln("Failed to install %s Helm Chart", req.chartName)
699+
return fmt.Errorf("unable to install helm: %w", err)
700+
}
701+
break
702+
}
703+
704+
// If helmRelease is nil, that means we were unable to successfully install/upgrade the chart.
705+
// This is an error situation. As only one specific error message should cause this (all other errors
706+
// should have returned out of the for-loop), we can treat this as if the underlying helm-client
707+
if helmRelease == nil {
708+
return localerr.ErrHelmStuck
674709
}
675710

676711
c.tel.Attr(fmt.Sprintf("helm_%s_release_version", req.name), strconv.Itoa(helmRelease.Version))

0 commit comments

Comments
 (0)