Skip to content

Commit a3adae4

Browse files
authored
Merge pull request #1171 from aryan9600/fix-primary-restart
Fix canary rollback behaviour
2 parents 67cc965 + c7c0c76 commit a3adae4

File tree

8 files changed

+256
-1
lines changed

8 files changed

+256
-1
lines changed

artifacts/flagger/crd.yaml

+3
Original file line numberDiff line numberDiff line change
@@ -1049,6 +1049,9 @@ spec:
10491049
lastAppliedSpec:
10501050
description: LastAppliedSpec of this canary
10511051
type: string
1052+
lastPromotedSpec:
1053+
description: LastPromotedSpec of this canary
1054+
type: string
10521055
lastTransitionTime:
10531056
description: LastTransitionTime of this canary
10541057
format: date-time

charts/flagger/crds/crd.yaml

+3
Original file line numberDiff line numberDiff line change
@@ -1049,6 +1049,9 @@ spec:
10491049
lastAppliedSpec:
10501050
description: LastAppliedSpec of this canary
10511051
type: string
1052+
lastPromotedSpec:
1053+
description: LastPromotedSpec of this canary
1054+
type: string
10521055
lastTransitionTime:
10531056
description: LastTransitionTime of this canary
10541057
format: date-time

kustomize/base/flagger/crd.yaml

+3
Original file line numberDiff line numberDiff line change
@@ -1046,6 +1046,9 @@ spec:
10461046
iterations:
10471047
description: Iteration count of the current canary analysis
10481048
type: number
1049+
lastPromotedSpec:
1050+
description: LastPromotedSpec of this canary
1051+
type: string
10491052
lastAppliedSpec:
10501053
description: LastAppliedSpec of this canary
10511054
type: string

pkg/canary/status.go

+3
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@ func syncCanaryStatus(flaggerClient clientset.Interface, cd *flaggerv1.Canary, s
4848
cdCopy.Status.FailedChecks = status.FailedChecks
4949
cdCopy.Status.Iterations = status.Iterations
5050
cdCopy.Status.LastAppliedSpec = hash
51+
if status.Phase == flaggerv1.CanaryPhaseInitialized {
52+
cdCopy.Status.LastPromotedSpec = hash
53+
}
5154
cdCopy.Status.LastTransitionTime = metav1.Now()
5255
setAll(cdCopy)
5356

pkg/controller/scheduler.go

+8
Original file line numberDiff line numberDiff line change
@@ -758,6 +758,14 @@ func (c *Controller) shouldAdvance(canary *flaggerv1.Canary, canaryController ca
758758
return true, nil
759759
}
760760

761+
// Make sure to sync lastAppliedSpec even if the canary is in a failed state.
762+
if canary.Status.Phase == flaggerv1.CanaryPhaseFailed {
763+
if err := canaryController.SyncStatus(canary, canary.Status); err != nil {
764+
c.logger.With("canary", fmt.Sprintf("%s.%s", canary.Name, canary.Namespace)).Errorf("%v", err)
765+
return false, err
766+
}
767+
}
768+
761769
newTarget, err := canaryController.HasTargetChanged(canary)
762770
if err != nil {
763771
return false, err

test/nginx/install.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ helm upgrade -i flagger ${REPO_ROOT}/charts/flagger \
2828
--set prometheus.install=true \
2929
--set meshProvider=nginx
3030

31-
# kubectl -n ingress-nginx set image deployment/flagger flagger=test/flagger:latest
31+
kubectl -n ingress-nginx set image deployment/flagger flagger=test/flagger:latest
3232

3333
kubectl -n ingress-nginx rollout status deployment/flagger
3434
kubectl -n ingress-nginx rollout status deployment/flagger-prometheus

test/nginx/run.sh

+2
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,5 @@ DIR="$(cd "$(dirname "$0")" && pwd)"
99

1010
"$REPO_ROOT"/test/workloads/init.sh
1111
"$DIR"/test-canary.sh
12+
"$REPO_ROOT"/test/workloads/init.sh
13+
"$DIR"/test-lifecycle.sh

test/nginx/test-lifecycle.sh

+233
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
#!/usr/bin/env bash
2+
3+
# This script runs e2e tests for Canary initialization, analysis and promotion
4+
# Prerequisites: Kubernetes Kind, Helm and NGINX ingress controller
5+
6+
set -o errexit
7+
8+
REPO_ROOT=$(git rev-parse --show-toplevel)
9+
10+
cat <<EOF | kubectl apply -f -
11+
apiVersion: networking.k8s.io/v1
12+
kind: Ingress
13+
metadata:
14+
name: podinfo
15+
namespace: test
16+
labels:
17+
app: podinfo
18+
annotations:
19+
kubernetes.io/ingress.class: "nginx"
20+
spec:
21+
rules:
22+
- host: "app.example.com"
23+
http:
24+
paths:
25+
- pathType: Prefix
26+
path: "/"
27+
backend:
28+
service:
29+
name: podinfo
30+
port:
31+
number: 80
32+
EOF
33+
34+
cat <<EOF | kubectl apply -f -
35+
apiVersion: flagger.app/v1beta1
36+
kind: Canary
37+
metadata:
38+
name: podinfo
39+
namespace: test
40+
spec:
41+
targetRef:
42+
apiVersion: apps/v1
43+
kind: Deployment
44+
name: podinfo
45+
ingressRef:
46+
apiVersion: networking.k8s.io/v1
47+
kind: Ingress
48+
name: podinfo
49+
progressDeadlineSeconds: 60
50+
service:
51+
port: 80
52+
targetPort: http
53+
analysis:
54+
interval: 10s
55+
threshold: 2
56+
maxWeight: 40
57+
stepWeight: 20
58+
metrics:
59+
- name: request-success-rate
60+
thresholdRange:
61+
min: 1
62+
interval: 30s
63+
webhooks:
64+
- name: load-test
65+
url: http://flagger-loadtester.test/
66+
metadata:
67+
type: cmd
68+
cmd: "hey -z 2m -q 10 -c 2 -host app.example.com http://ingress-nginx-controller.ingress-nginx/status/500"
69+
EOF
70+
71+
echo '>>> Waiting for primary to be ready'
72+
retries=50
73+
count=0
74+
ok=false
75+
until ${ok}; do
76+
kubectl -n test get canary/podinfo | grep 'Initialized' && ok=true || ok=false
77+
sleep 5
78+
count=$(($count + 1))
79+
if [[ ${count} -eq ${retries} ]]; then
80+
kubectl -n ingress-nginx logs deployment/flagger
81+
echo "No more retries left"
82+
exit 1
83+
fi
84+
done
85+
86+
echo '✔ Canary initialization test passed'
87+
88+
echo '>>> Triggering canary deployment'
89+
kubectl -n test set image deployment/podinfo podinfod=ghcr.io/stefanprodan/podinfo:6.0.1
90+
91+
echo '>>> Waiting for canary rollback'
92+
retries=50
93+
count=0
94+
ok=false
95+
until ${ok}; do
96+
kubectl -n test get canary/podinfo | grep 'Failed' && ok=true || ok=false
97+
sleep 10
98+
kubectl -n ingress-nginx logs deployment/flagger --tail 1
99+
count=$(($count + 1))
100+
if [[ ${count} -eq ${retries} ]]; then
101+
kubectl -n ingress-nginx logs deployment/flagger
102+
echo "No more retries left"
103+
exit 1
104+
fi
105+
done
106+
107+
echo '✔ Canary rollback test passed'
108+
109+
pod_hash=$(kubectl get pods -l app=podinfo-primary -n test -o=jsonpath='{.items[0].metadata.labels.pod-template-hash}')
110+
111+
echo '>>> Reverting canary deployment to match primary'
112+
kubectl -n test set image deployment/podinfo podinfod=ghcr.io/stefanprodan/podinfo:6.0.0
113+
114+
sleep 15
115+
116+
new_pod_hash=$(kubectl get pods -l app=podinfo-primary -n test -o=jsonpath='{.items[0].metadata.labels.pod-template-hash}')
117+
failed=false
118+
kubectl -n test get canary/podinfo | grep 'Failed' && failed=true || ok=false
119+
120+
if [ "$new_pod_hash" = "$pod_hash" -a "$failed" = true ]; then
121+
echo '✔ Canary not triggered upon reverting canary image to match primary '
122+
else
123+
echo '⨯ Canary got triggered upon reverting canary image to match primary'
124+
exit 1
125+
fi
126+
127+
echo '>>> Triggering canary deployment again'
128+
kubectl -n test set image deployment/podinfo podinfod=ghcr.io/stefanprodan/podinfo:6.0.1
129+
130+
echo '>>> Waiting for canary to start progress'
131+
retries=50
132+
count=0
133+
ok=false
134+
until ${ok}; do
135+
kubectl -n test get canary/podinfo | grep 'Progressing' && ok=true || ok=false
136+
sleep 1
137+
count=$(($count + 1))
138+
if [[ ${count} -eq ${retries} ]]; then
139+
kubectl -n ingress-nginx logs deployment/flagger
140+
kubectl -n test get httpproxy podinfo -oyaml
141+
echo "No more retries left"
142+
exit 1
143+
fi
144+
done
145+
146+
echo '>>> Waiting for canary rollback'
147+
retries=50
148+
count=0
149+
ok=false
150+
until ${ok}; do
151+
kubectl -n test get canary/podinfo | grep 'Failed' && ok=true || ok=false
152+
sleep 10
153+
kubectl -n ingress-nginx logs deployment/flagger --tail 1
154+
count=$(($count + 1))
155+
if [[ ${count} -eq ${retries} ]]; then
156+
kubectl -n ingress-nginx logs deployment/flagger
157+
echo "No more retries left"
158+
exit 1
159+
fi
160+
done
161+
162+
cat <<EOF | kubectl apply -f -
163+
apiVersion: flagger.app/v1beta1
164+
kind: Canary
165+
metadata:
166+
name: podinfo
167+
namespace: test
168+
spec:
169+
targetRef:
170+
apiVersion: apps/v1
171+
kind: Deployment
172+
name: podinfo
173+
ingressRef:
174+
apiVersion: networking.k8s.io/v1
175+
kind: Ingress
176+
name: podinfo
177+
progressDeadlineSeconds: 60
178+
service:
179+
port: 80
180+
targetPort: http
181+
analysis:
182+
interval: 15s
183+
threshold: 5
184+
maxWeight: 40
185+
stepWeight: 20
186+
metrics:
187+
- name: request-success-rate
188+
thresholdRange:
189+
min: 1
190+
interval: 30s
191+
webhooks:
192+
- name: load-test
193+
url: http://flagger-loadtester.test/
194+
metadata:
195+
type: cmd
196+
cmd: "hey -z 2m -q 10 -c 2 -host app.example.com http://ingress-nginx-controller.ingress-nginx/"
197+
EOF
198+
199+
echo '>>> Retrying failed canary run'
200+
kubectl -n test patch deploy/podinfo -p '[{"op": "add", "path":"/spec/template/metadata/annotations", "value": {"thisis": "theway"}}]' --type=json
201+
202+
echo '>>> Waiting for canary promotion'
203+
retries=50
204+
count=0
205+
ok=false
206+
until ${ok}; do
207+
kubectl -n test describe deployment/podinfo-primary | grep '6.0.1' && ok=true || ok=false
208+
sleep 10
209+
kubectl -n ingress-nginx logs deployment/flagger --tail 1
210+
count=$(($count + 1))
211+
if [[ ${count} -eq ${retries} ]]; then
212+
kubectl -n ingress-nginx logs deployment/flagger
213+
echo "No more retries left"
214+
exit 1
215+
fi
216+
done
217+
218+
echo '>>> Waiting for canary finalization'
219+
retries=50
220+
count=0
221+
ok=false
222+
until ${ok}; do
223+
kubectl -n test get canary/podinfo | grep 'Succeeded' && ok=true || ok=false
224+
sleep 5
225+
count=$(($count + 1))
226+
if [[ ${count} -eq ${retries} ]]; then
227+
kubectl -n ingress-nginx logs deployment/flagger
228+
echo "No more retries left"
229+
exit 1
230+
fi
231+
done
232+
233+
echo '✔ Canary promotion test passed'

0 commit comments

Comments
 (0)