Skip to content

Commit ddd6df5

Browse files
authored
fix: infinite reconciliation loop when app is in error (argoproj#23067)
Signed-off-by: Alexandre Gaudreault <[email protected]>
1 parent 927ed35 commit ddd6df5

File tree

7 files changed

+67
-28
lines changed

7 files changed

+67
-28
lines changed

controller/appcontroller.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1681,11 +1681,9 @@ func (ctrl *ApplicationController) processAppRefreshQueueItem() (processNext boo
16811681

16821682
project, hasErrors := ctrl.refreshAppConditions(app)
16831683
ts.AddCheckpoint("refresh_app_conditions_ms")
1684-
now := metav1.Now()
16851684
if hasErrors {
16861685
app.Status.Sync.Status = appv1.SyncStatusCodeUnknown
16871686
app.Status.Health.Status = health.HealthStatusUnknown
1688-
app.Status.Health.LastTransitionTime = &now
16891687
patchMs = ctrl.persistAppStatus(origApp, &app.Status)
16901688

16911689
if err := ctrl.cache.SetAppResourcesTree(app.InstanceName(ctrl.namespace), &appv1.ApplicationTree{}); err != nil {
@@ -1782,6 +1780,7 @@ func (ctrl *ApplicationController) processAppRefreshQueueItem() (processNext boo
17821780
ts.AddCheckpoint("auto_sync_ms")
17831781

17841782
if app.Status.ReconciledAt == nil || comparisonLevel >= CompareWithLatest {
1783+
now := metav1.Now()
17851784
app.Status.ReconciledAt = &now
17861785
}
17871786
app.Status.Sync = *compareResult.syncStatus
@@ -2012,9 +2011,15 @@ func (ctrl *ApplicationController) persistAppStatus(orig *appv1.Application, new
20122011
ctrl.logAppEvent(context.TODO(), orig, argo.EventInfo{Reason: argo.EventReasonResourceUpdated, Type: corev1.EventTypeNormal}, message)
20132012
}
20142013
if orig.Status.Health.Status != newStatus.Health.Status {
2014+
now := metav1.Now()
2015+
newStatus.Health.LastTransitionTime = &now
20152016
message := fmt.Sprintf("Updated health status: %s -> %s", orig.Status.Health.Status, newStatus.Health.Status)
20162017
ctrl.logAppEvent(context.TODO(), orig, argo.EventInfo{Reason: argo.EventReasonResourceUpdated, Type: corev1.EventTypeNormal}, message)
2018+
} else {
2019+
// make sure the last transition time is the same and populated if the health is the same
2020+
newStatus.Health.LastTransitionTime = orig.Status.Health.LastTransitionTime
20172021
}
2022+
20182023
var newAnnotations map[string]string
20192024
if orig.GetAnnotations() != nil {
20202025
newAnnotations = make(map[string]string)

controller/appcontroller_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1826,7 +1826,7 @@ apps/Deployment:
18261826
hs = {}
18271827
hs.status = ""
18281828
hs.message = ""
1829-
1829+
18301830
if obj.metadata ~= nil then
18311831
if obj.metadata.labels ~= nil then
18321832
current_status = obj.metadata.labels["status"]

controller/health.go

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ import (
88
"github.com/argoproj/gitops-engine/pkg/sync/ignore"
99
kubeutil "github.com/argoproj/gitops-engine/pkg/utils/kube"
1010
log "github.com/sirupsen/logrus"
11-
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1211
"k8s.io/apimachinery/pkg/runtime/schema"
1312

1413
"github.com/argoproj/argo-cd/v3/common"
@@ -22,7 +21,9 @@ func setApplicationHealth(resources []managedResource, statuses []appv1.Resource
2221
var savedErr error
2322
var errCount uint
2423

25-
appHealth := appv1.HealthStatus{Status: health.HealthStatusHealthy}
24+
appHealth := app.Status.Health.DeepCopy()
25+
appHealth.Status = health.HealthStatusHealthy
26+
2627
for i, res := range resources {
2728
if res.Target != nil && hookutil.Skip(res.Target) {
2829
continue
@@ -80,13 +81,6 @@ func setApplicationHealth(resources []managedResource, statuses []appv1.Resource
8081
appHealth.Status = healthStatus.Status
8182
}
8283
}
83-
// if the status didn't change, don't update the timestamp
84-
if app.Status.Health.Status == appHealth.Status && app.Status.Health.LastTransitionTime != nil {
85-
appHealth.LastTransitionTime = app.Status.Health.LastTransitionTime
86-
} else {
87-
now := metav1.Now()
88-
appHealth.LastTransitionTime = &now
89-
}
9084
if persistResourceHealth {
9185
app.Status.ResourceHealthSource = appv1.ResourceHealthLocationInline
9286
} else {
@@ -95,5 +89,5 @@ func setApplicationHealth(resources []managedResource, statuses []appv1.Resource
9589
if savedErr != nil && errCount > 1 {
9690
savedErr = fmt.Errorf("see application-controller logs for %d other errors; most recent error was: %w", errCount-1, savedErr)
9791
}
98-
return &appHealth, savedErr
92+
return appHealth, savedErr
9993
}

controller/health_test.go

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -73,17 +73,15 @@ func TestSetApplicationHealth(t *testing.T) {
7373
assert.NotNil(t, healthStatus.LastTransitionTime)
7474
assert.Nil(t, resourceStatuses[0].Health.LastTransitionTime)
7575
assert.Nil(t, resourceStatuses[1].Health.LastTransitionTime)
76-
previousLastTransitionTime := healthStatus.LastTransitionTime
7776
app.Status.Health = *healthStatus
7877

7978
// now mark the job as a hook and retry. it should ignore the hook and consider the app healthy
8079
failedJob.SetAnnotations(map[string]string{synccommon.AnnotationKeyHook: "PreSync"})
8180
healthStatus, err = setApplicationHealth(resources, resourceStatuses, nil, app, true)
8281
require.NoError(t, err)
8382
assert.Equal(t, health.HealthStatusHealthy, healthStatus.Status)
84-
// change in health, timestamp should change
85-
assert.NotEqual(t, *previousLastTransitionTime, *healthStatus.LastTransitionTime)
86-
previousLastTransitionTime = healthStatus.LastTransitionTime
83+
// timestamp should be the same in case health did not change
84+
assert.Equal(t, app.Status.Health.LastTransitionTime, healthStatus.LastTransitionTime)
8785
app.Status.Health = *healthStatus
8886

8987
// now we set the `argocd.argoproj.io/ignore-healthcheck: "true"` annotation on the job's target.
@@ -94,8 +92,7 @@ func TestSetApplicationHealth(t *testing.T) {
9492
healthStatus, err = setApplicationHealth(resources, resourceStatuses, nil, app, true)
9593
require.NoError(t, err)
9694
assert.Equal(t, health.HealthStatusHealthy, healthStatus.Status)
97-
// no change in health, timestamp shouldn't change
98-
assert.Equal(t, *previousLastTransitionTime, *healthStatus.LastTransitionTime)
95+
assert.Equal(t, app.Status.Health.LastTransitionTime, healthStatus.LastTransitionTime)
9996
}
10097

10198
func TestSetApplicationHealth_ResourceHealthNotPersisted(t *testing.T) {
@@ -125,7 +122,7 @@ func TestSetApplicationHealth_MissingResource(t *testing.T) {
125122
healthStatus, err := setApplicationHealth(resources, resourceStatuses, lua.ResourceHealthOverrides{}, app, true)
126123
require.NoError(t, err)
127124
assert.Equal(t, health.HealthStatusMissing, healthStatus.Status)
128-
assert.False(t, healthStatus.LastTransitionTime.IsZero())
125+
assert.Equal(t, app.Status.Health.LastTransitionTime, healthStatus.LastTransitionTime)
129126
}
130127

131128
func TestSetApplicationHealth_HealthImproves(t *testing.T) {
@@ -157,7 +154,7 @@ func TestSetApplicationHealth_HealthImproves(t *testing.T) {
157154
healthStatus, err := setApplicationHealth(resources, resourceStatuses, overrides, app, true)
158155
require.NoError(t, err)
159156
assert.Equal(t, tc.newStatus, healthStatus.Status)
160-
assert.NotEqual(t, testTimestamp, *healthStatus.LastTransitionTime)
157+
assert.Equal(t, app.Status.Health.LastTransitionTime, healthStatus.LastTransitionTime)
161158
})
162159
}
163160
}
@@ -174,6 +171,7 @@ func TestSetApplicationHealth_MissingResourceNoBuiltHealthCheck(t *testing.T) {
174171
healthStatus, err := setApplicationHealth(resources, resourceStatuses, lua.ResourceHealthOverrides{}, app, true)
175172
require.NoError(t, err)
176173
assert.Equal(t, health.HealthStatusHealthy, healthStatus.Status)
174+
assert.Equal(t, app.Status.Health.LastTransitionTime, healthStatus.LastTransitionTime)
177175
assert.Equal(t, health.HealthStatusMissing, resourceStatuses[0].Health.Status)
178176
})
179177

@@ -185,7 +183,7 @@ func TestSetApplicationHealth_MissingResourceNoBuiltHealthCheck(t *testing.T) {
185183
}, app, true)
186184
require.NoError(t, err)
187185
assert.Equal(t, health.HealthStatusMissing, healthStatus.Status)
188-
assert.False(t, healthStatus.LastTransitionTime.IsZero())
186+
assert.Equal(t, app.Status.Health.LastTransitionTime, healthStatus.LastTransitionTime)
189187
})
190188
}
191189

controller/state.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -518,15 +518,14 @@ func (m *appStateManager) CompareAppState(app *v1alpha1.Application, project *v1
518518

519519
// return unknown comparison result if basic comparison settings cannot be loaded
520520
if err != nil {
521-
now := metav1.Now()
522521
if hasMultipleSources {
523522
return &comparisonResult{
524523
syncStatus: &v1alpha1.SyncStatus{
525524
ComparedTo: app.Spec.BuildComparedToStatus(),
526525
Status: v1alpha1.SyncStatusCodeUnknown,
527526
Revisions: revisions,
528527
},
529-
healthStatus: &v1alpha1.HealthStatus{Status: health.HealthStatusUnknown, LastTransitionTime: &now},
528+
healthStatus: &v1alpha1.HealthStatus{Status: health.HealthStatusUnknown},
530529
}, nil
531530
}
532531
return &comparisonResult{
@@ -535,7 +534,7 @@ func (m *appStateManager) CompareAppState(app *v1alpha1.Application, project *v1
535534
Status: v1alpha1.SyncStatusCodeUnknown,
536535
Revision: revisions[0],
537536
},
538-
healthStatus: &v1alpha1.HealthStatus{Status: health.HealthStatusUnknown, LastTransitionTime: &now},
537+
healthStatus: &v1alpha1.HealthStatus{Status: health.HealthStatusUnknown},
539538
}, nil
540539
}
541540

controller/state_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -718,7 +718,7 @@ func TestSetHealth(t *testing.T) {
718718
require.NoError(t, err)
719719

720720
assert.Equal(t, health.HealthStatusHealthy, compRes.healthStatus.Status)
721-
assert.False(t, compRes.healthStatus.LastTransitionTime.IsZero())
721+
assert.Equal(t, app.Status.Health.LastTransitionTime, compRes.healthStatus.LastTransitionTime)
722722
}
723723

724724
func TestPreserveStatusTimestamp(t *testing.T) {
@@ -793,7 +793,7 @@ func TestSetHealthSelfReferencedApp(t *testing.T) {
793793
require.NoError(t, err)
794794

795795
assert.Equal(t, health.HealthStatusHealthy, compRes.healthStatus.Status)
796-
assert.False(t, compRes.healthStatus.LastTransitionTime.IsZero())
796+
assert.Equal(t, app.Status.Health.LastTransitionTime, compRes.healthStatus.LastTransitionTime)
797797
}
798798

799799
func TestSetManagedResourcesWithOrphanedResources(t *testing.T) {
@@ -869,7 +869,7 @@ func TestReturnUnknownComparisonStateOnSettingLoadError(t *testing.T) {
869869
require.NoError(t, err)
870870

871871
assert.Equal(t, health.HealthStatusUnknown, compRes.healthStatus.Status)
872-
assert.False(t, compRes.healthStatus.LastTransitionTime.IsZero())
872+
assert.Equal(t, app.Status.Health.LastTransitionTime, compRes.healthStatus.LastTransitionTime)
873873
assert.Equal(t, v1alpha1.SyncStatusCodeUnknown, compRes.syncStatus.Status)
874874
}
875875

test/e2e/app_management_test.go

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3029,3 +3029,46 @@ func TestDeletionConfirmation(t *testing.T) {
30293029
When().ConfirmDeletion().
30303030
Then().Expect(DoesNotExist())
30313031
}
3032+
3033+
func TestLastTransitionTimeUnchangedError(t *testing.T) {
3034+
// Ensure that, if the health status hasn't changed, the lastTransitionTime is not updated.
3035+
3036+
ctx := Given(t)
3037+
ctx.
3038+
Path(guestbookPath).
3039+
When().
3040+
And(func() {
3041+
// Manually create an application with an outdated reconciledAt field
3042+
manifest := fmt.Sprintf(`
3043+
apiVersion: argoproj.io/v1alpha1
3044+
kind: Application
3045+
metadata:
3046+
name: %s
3047+
spec:
3048+
project: default
3049+
source:
3050+
repoURL: %s
3051+
path: guestbook
3052+
targetRevision: HEAD
3053+
destination:
3054+
server: https://non-existent-cluster
3055+
namespace: default
3056+
status:
3057+
reconciledAt: "2023-01-01T00:00:00Z"
3058+
health:
3059+
status: Unknown
3060+
lastTransitionTime: "2023-01-01T00:00:00Z"
3061+
`, ctx.AppName(), fixture.RepoURL(fixture.RepoURLTypeFile))
3062+
_, err := fixture.RunWithStdin(manifest, "", "kubectl", "apply", "-n", fixture.ArgoCDNamespace, "-f", "-")
3063+
require.NoError(t, err)
3064+
}).
3065+
Refresh(RefreshTypeNormal).
3066+
Then().
3067+
And(func(app *Application) {
3068+
// Verify the health status is still Unknown
3069+
assert.Equal(t, health.HealthStatusUnknown, app.Status.Health.Status)
3070+
3071+
// Verify the lastTransitionTime has not been updated
3072+
assert.Equal(t, "2023-01-01T00:00:00Z", app.Status.Health.LastTransitionTime.UTC().Format(time.RFC3339))
3073+
})
3074+
}

0 commit comments

Comments
 (0)