Skip to content

Commit 6b15a04

Browse files
Aaron-9900mrysavymichalrysavy-ext95730blakepettersson
authored
fix: [cherry-pick] selfhealattemptscount needs to be reset at times (#22095, #20978) (#22583)
Signed-off-by: Michal Ryšavý <[email protected]> Signed-off-by: Blake Pettersson <[email protected]> Co-authored-by: Michal Ryšavý <[email protected]> Co-authored-by: Michal Ryšavý <[email protected]> Co-authored-by: Blake Pettersson <[email protected]>
1 parent 38985bd commit 6b15a04

File tree

2 files changed

+80
-13
lines changed

2 files changed

+80
-13
lines changed

controller/appcontroller.go

+21-11
Original file line numberDiff line numberDiff line change
@@ -2103,9 +2103,7 @@ func (ctrl *ApplicationController) autoSync(app *appv1.Application, syncStatus *
21032103
InitiatedBy: appv1.OperationInitiator{Automated: true},
21042104
Retry: appv1.RetryStrategy{Limit: 5},
21052105
}
2106-
if app.Status.OperationState != nil && app.Status.OperationState.Operation.Sync != nil {
2107-
op.Sync.SelfHealAttemptsCount = app.Status.OperationState.Operation.Sync.SelfHealAttemptsCount
2108-
}
2106+
21092107
if app.Spec.SyncPolicy.Retry != nil {
21102108
op.Retry = *app.Spec.SyncPolicy.Retry
21112109
}
@@ -2121,8 +2119,18 @@ func (ctrl *ApplicationController) autoSync(app *appv1.Application, syncStatus *
21212119
}
21222120
logCtx.Infof("Skipping auto-sync: most recent sync already to %s", desiredCommitSHA)
21232121
return nil, 0
2124-
} else if alreadyAttempted && selfHeal {
2125-
if shouldSelfHeal, retryAfter := ctrl.shouldSelfHeal(app); shouldSelfHeal {
2122+
} else if selfHeal {
2123+
shouldSelfHeal, retryAfter := ctrl.shouldSelfHeal(app, alreadyAttempted)
2124+
if app.Status.OperationState != nil && app.Status.OperationState.Operation.Sync != nil {
2125+
op.Sync.SelfHealAttemptsCount = app.Status.OperationState.Operation.Sync.SelfHealAttemptsCount
2126+
}
2127+
2128+
if alreadyAttempted {
2129+
if !shouldSelfHeal {
2130+
logCtx.Infof("Skipping auto-sync: already attempted sync to %s with timeout %v (retrying in %v)", desiredCommitSHA, ctrl.selfHealTimeout, retryAfter)
2131+
ctrl.requestAppRefresh(app.QualifiedName(), CompareWithLatest.Pointer(), &retryAfter)
2132+
return nil, 0
2133+
}
21262134
op.Sync.SelfHealAttemptsCount++
21272135
for _, resource := range resources {
21282136
if resource.Status != appv1.SyncStatusCodeSynced {
@@ -2133,10 +2141,6 @@ func (ctrl *ApplicationController) autoSync(app *appv1.Application, syncStatus *
21332141
})
21342142
}
21352143
}
2136-
} else {
2137-
logCtx.Infof("Skipping auto-sync: already attempted sync to %s with timeout %v (retrying in %v)", desiredCommitSHA, ctrl.selfHealTimeout, retryAfter)
2138-
ctrl.requestAppRefresh(app.QualifiedName(), CompareWithLatest.Pointer(), &retryAfter)
2139-
return nil, 0
21402144
}
21412145
}
21422146
ts.AddCheckpoint("already_attempted_check_ms")
@@ -2220,11 +2224,16 @@ func alreadyAttemptedSync(app *appv1.Application, commitSHA string, commitSHAsMS
22202224
}
22212225
}
22222226

2223-
func (ctrl *ApplicationController) shouldSelfHeal(app *appv1.Application) (bool, time.Duration) {
2227+
func (ctrl *ApplicationController) shouldSelfHeal(app *appv1.Application, alreadyAttempted bool) (bool, time.Duration) {
22242228
if app.Status.OperationState == nil {
22252229
return true, time.Duration(0)
22262230
}
22272231

2232+
// Reset counter if the prior sync was successful OR if the revision has changed
2233+
if !alreadyAttempted || app.Status.Sync.Status == appv1.SyncStatusCodeSynced {
2234+
app.Status.OperationState.Operation.Sync.SelfHealAttemptsCount = 0
2235+
}
2236+
22282237
var retryAfter time.Duration
22292238
if ctrl.selfHealBackOff == nil {
22302239
if app.Status.OperationState.FinishedAt == nil {
@@ -2236,7 +2245,8 @@ func (ctrl *ApplicationController) shouldSelfHeal(app *appv1.Application) (bool,
22362245
backOff := *ctrl.selfHealBackOff
22372246
backOff.Steps = int(app.Status.OperationState.Operation.Sync.SelfHealAttemptsCount)
22382247
var delay time.Duration
2239-
for backOff.Steps > 0 {
2248+
steps := backOff.Steps
2249+
for i := 0; i < steps; i++ {
22402250
delay = backOff.Step()
22412251
}
22422252
if app.Status.OperationState.FinishedAt == nil {

controller/appcontroller_test.go

+59-2
Original file line numberDiff line numberDiff line change
@@ -2533,7 +2533,7 @@ func TestSelfHealExponentialBackoff(t *testing.T) {
25332533
ctrl.selfHealBackOff = &wait.Backoff{
25342534
Factor: 3,
25352535
Duration: 2 * time.Second,
2536-
Cap: 5 * time.Minute,
2536+
Cap: 2 * time.Minute,
25372537
}
25382538

25392539
app := &v1alpha1.Application{
@@ -2548,38 +2548,95 @@ func TestSelfHealExponentialBackoff(t *testing.T) {
25482548

25492549
testCases := []struct {
25502550
attempts int64
2551+
expectedAttempts int64
25512552
finishedAt *metav1.Time
25522553
expectedDuration time.Duration
25532554
shouldSelfHeal bool
2555+
alreadyAttempted bool
2556+
syncStatus v1alpha1.SyncStatusCode
25542557
}{{
25552558
attempts: 0,
25562559
finishedAt: ptr.To(metav1.Now()),
25572560
expectedDuration: 0,
25582561
shouldSelfHeal: true,
2562+
alreadyAttempted: true,
2563+
expectedAttempts: 0,
2564+
syncStatus: v1alpha1.SyncStatusCodeOutOfSync,
25592565
}, {
25602566
attempts: 1,
25612567
finishedAt: ptr.To(metav1.Now()),
25622568
expectedDuration: 2 * time.Second,
25632569
shouldSelfHeal: false,
2570+
alreadyAttempted: true,
2571+
expectedAttempts: 1,
2572+
syncStatus: v1alpha1.SyncStatusCodeOutOfSync,
25642573
}, {
25652574
attempts: 2,
25662575
finishedAt: ptr.To(metav1.Now()),
25672576
expectedDuration: 6 * time.Second,
25682577
shouldSelfHeal: false,
2578+
alreadyAttempted: true,
2579+
expectedAttempts: 2,
2580+
syncStatus: v1alpha1.SyncStatusCodeOutOfSync,
25692581
}, {
25702582
attempts: 3,
25712583
finishedAt: nil,
25722584
expectedDuration: 18 * time.Second,
25732585
shouldSelfHeal: false,
2586+
alreadyAttempted: true,
2587+
expectedAttempts: 3,
2588+
syncStatus: v1alpha1.SyncStatusCodeOutOfSync,
2589+
}, {
2590+
attempts: 4,
2591+
finishedAt: nil,
2592+
expectedDuration: 54 * time.Second,
2593+
shouldSelfHeal: false,
2594+
alreadyAttempted: true,
2595+
expectedAttempts: 4,
2596+
syncStatus: v1alpha1.SyncStatusCodeOutOfSync,
2597+
}, {
2598+
attempts: 5,
2599+
finishedAt: nil,
2600+
expectedDuration: 120 * time.Second,
2601+
shouldSelfHeal: false,
2602+
alreadyAttempted: true,
2603+
expectedAttempts: 5,
2604+
syncStatus: v1alpha1.SyncStatusCodeOutOfSync,
2605+
}, {
2606+
attempts: 6,
2607+
finishedAt: nil,
2608+
expectedDuration: 120 * time.Second,
2609+
shouldSelfHeal: false,
2610+
alreadyAttempted: true,
2611+
expectedAttempts: 6,
2612+
syncStatus: v1alpha1.SyncStatusCodeOutOfSync,
2613+
}, {
2614+
attempts: 6,
2615+
finishedAt: nil,
2616+
expectedDuration: 0,
2617+
shouldSelfHeal: true,
2618+
alreadyAttempted: false,
2619+
expectedAttempts: 0,
2620+
syncStatus: v1alpha1.SyncStatusCodeOutOfSync,
2621+
}, {
2622+
attempts: 6,
2623+
finishedAt: nil,
2624+
expectedDuration: 0,
2625+
shouldSelfHeal: true,
2626+
alreadyAttempted: true,
2627+
expectedAttempts: 0,
2628+
syncStatus: v1alpha1.SyncStatusCodeSynced,
25742629
}}
25752630

25762631
for i := range testCases {
25772632
tc := testCases[i]
25782633
t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
25792634
app.Status.OperationState.Operation.Sync.SelfHealAttemptsCount = tc.attempts
25802635
app.Status.OperationState.FinishedAt = tc.finishedAt
2581-
ok, duration := ctrl.shouldSelfHeal(app)
2636+
app.Status.Sync.Status = tc.syncStatus
2637+
ok, duration := ctrl.shouldSelfHeal(app, tc.alreadyAttempted)
25822638
require.Equal(t, ok, tc.shouldSelfHeal)
2639+
require.Equal(t, tc.expectedAttempts, app.Status.OperationState.Operation.Sync.SelfHealAttemptsCount)
25832640
assertDurationAround(t, tc.expectedDuration, duration)
25842641
})
25852642
}

0 commit comments

Comments
 (0)