Skip to content

Commit d9e1d9e

Browse files
kldancerykl
and
ykl
authored
[fix] When the Nat-gw pod container restarts unexpectedly, trigger nat-gw statefulset restart to restore the nat-gw pod configuration (#5070)
Signed-off-by: ykl <[email protected]> Co-authored-by: ykl <[email protected]>
1 parent 8c2049b commit d9e1d9e

File tree

3 files changed

+57
-18
lines changed

3 files changed

+57
-18
lines changed

pkg/controller/pod.go

+18
Original file line numberDiff line numberDiff line change
@@ -559,6 +559,12 @@ func (c *Controller) handleAddOrUpdatePod(key string) (err error) {
559559
}
560560
}
561561

562+
if vpcGwName, isVpcNatGw := pod.Annotations[util.VpcNatGatewayAnnotation]; isVpcNatGw {
563+
if needRestartNatGatewayPod(pod) {
564+
c.addOrUpdateVpcNatGatewayQueue.Add(vpcGwName)
565+
}
566+
}
567+
562568
// check if route subnet is need.
563569
return c.reconcileRouteSubnets(pod, needRouteSubnets(pod, podNets))
564570
}
@@ -1334,6 +1340,18 @@ func needAllocateSubnets(pod *v1.Pod, nets []*kubeovnNet) []*kubeovnNet {
13341340
return result
13351341
}
13361342

1343+
func needRestartNatGatewayPod(pod *v1.Pod) bool {
1344+
for _, psc := range pod.Status.ContainerStatuses {
1345+
if psc.Name != "vpc-nat-gw" {
1346+
continue
1347+
}
1348+
if psc.RestartCount > 0 {
1349+
return true
1350+
}
1351+
}
1352+
return false
1353+
}
1354+
13371355
func (c *Controller) podNeedSync(pod *v1.Pod) (bool, error) {
13381356
// 1. check annotations
13391357
if pod.Annotations == nil {

pkg/controller/vpc_nat_gateway.go

+24-4
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,19 @@ func (c *Controller) handleAddOrUpdateVpcNatGw(key string) error {
256256
return err
257257
}
258258

259+
var natGwPodContainerRestartCount int32
260+
oriPod, _err := c.getNatGwPod(key)
261+
if _err == nil {
262+
pod := oriPod.DeepCopy()
263+
for _, psc := range pod.Status.ContainerStatuses {
264+
if psc.Name != "vpc-nat-gw" {
265+
continue
266+
}
267+
natGwPodContainerRestartCount = psc.RestartCount
268+
break
269+
}
270+
}
271+
259272
// check or create statefulset
260273
needToCreate := false
261274
needToUpdate := false
@@ -269,11 +282,10 @@ func (c *Controller) handleAddOrUpdateVpcNatGw(key string) error {
269282
return err
270283
}
271284
}
272-
newSts := c.genNatGwStatefulSet(gw, oldSts.DeepCopy())
273-
if !needToCreate && isVpcNatGwChanged(gw) {
285+
newSts := c.genNatGwStatefulSet(gw, oldSts.DeepCopy(), natGwPodContainerRestartCount)
286+
if !needToCreate && (isVpcNatGwChanged(gw) || natGwPodContainerRestartCount > 0) {
274287
needToUpdate = true
275288
}
276-
277289
switch {
278290
case needToCreate:
279291
// if pod create successfully, will add initVpcNatGatewayQueue
@@ -729,7 +741,7 @@ func (c *Controller) execNatGwRules(pod *corev1.Pod, operation string, rules []s
729741
return nil
730742
}
731743

732-
func (c *Controller) genNatGwStatefulSet(gw *kubeovnv1.VpcNatGateway, oldSts *v1.StatefulSet) (newSts *v1.StatefulSet) {
744+
func (c *Controller) genNatGwStatefulSet(gw *kubeovnv1.VpcNatGateway, oldSts *v1.StatefulSet, natGwPodContainerRestartCount int32) (newSts *v1.StatefulSet) {
733745
replicas := int32(1)
734746
name := util.GenNatGwStsName(gw.Name)
735747
allowPrivilegeEscalation := true
@@ -749,6 +761,14 @@ func (c *Controller) genNatGwStatefulSet(gw *kubeovnv1.VpcNatGateway, oldSts *v1
749761
util.LogicalSwitchAnnotation: gw.Spec.Subnet,
750762
util.IPAddressAnnotation: gw.Spec.LanIP,
751763
}
764+
765+
if oldSts != nil && len(oldSts.Spec.Template.Annotations) != 0 {
766+
if _, ok := oldSts.Spec.Template.Annotations[util.VpcNatGatewayContainerRestartAnnotation]; !ok && natGwPodContainerRestartCount > 0 {
767+
podAnnotations[util.VpcNatGatewayContainerRestartAnnotation] = ""
768+
}
769+
}
770+
klog.V(3).Infof("%s podAnnotations:%v", gw.Name, podAnnotations)
771+
752772
for key, value := range podAnnotations {
753773
newPodAnnotations[key] = value
754774
}

pkg/util/const.go

+15-14
Original file line numberDiff line numberDiff line change
@@ -28,20 +28,21 @@ const (
2828
ExternalGatewayAnnotation = "ovn.kubernetes.io/external_gateway"
2929
ExternalGwPortNameAnnotation = "ovn.kubernetes.io/external_gw_port_name"
3030

31-
VpcNatGatewayAnnotation = "ovn.kubernetes.io/vpc_nat_gw"
32-
VpcNatGatewayInitAnnotation = "ovn.kubernetes.io/vpc_nat_gw_init"
33-
VpcEipsAnnotation = "ovn.kubernetes.io/vpc_eips"
34-
VpcFloatingIPMd5Annotation = "ovn.kubernetes.io/vpc_floating_ips"
35-
VpcDnatMd5Annotation = "ovn.kubernetes.io/vpc_dnat_md5"
36-
VpcSnatMd5Annotation = "ovn.kubernetes.io/vpc_snat_md5"
37-
VpcCIDRsAnnotation = "ovn.kubernetes.io/vpc_cidrs"
38-
VpcLbAnnotation = "ovn.kubernetes.io/vpc_lb"
39-
VpcExternalLabel = "ovn.kubernetes.io/vpc_external"
40-
VpcEipAnnotation = "ovn.kubernetes.io/vpc_eip"
41-
VpcDnatEPortLabel = "ovn.kubernetes.io/vpc_dnat_eport"
42-
VpcNatAnnotation = "ovn.kubernetes.io/vpc_nat"
43-
OvnEipTypeLabel = "ovn.kubernetes.io/ovn_eip_type"
44-
EipV4IpLabel = "ovn.kubernetes.io/eip_v4_ip"
31+
VpcNatGatewayAnnotation = "ovn.kubernetes.io/vpc_nat_gw"
32+
VpcNatGatewayInitAnnotation = "ovn.kubernetes.io/vpc_nat_gw_init"
33+
VpcNatGatewayContainerRestartAnnotation = "ovn.kubernetes.io/vpc_nat_gw_container_restarted"
34+
VpcEipsAnnotation = "ovn.kubernetes.io/vpc_eips"
35+
VpcFloatingIPMd5Annotation = "ovn.kubernetes.io/vpc_floating_ips"
36+
VpcDnatMd5Annotation = "ovn.kubernetes.io/vpc_dnat_md5"
37+
VpcSnatMd5Annotation = "ovn.kubernetes.io/vpc_snat_md5"
38+
VpcCIDRsAnnotation = "ovn.kubernetes.io/vpc_cidrs"
39+
VpcLbAnnotation = "ovn.kubernetes.io/vpc_lb"
40+
VpcExternalLabel = "ovn.kubernetes.io/vpc_external"
41+
VpcEipAnnotation = "ovn.kubernetes.io/vpc_eip"
42+
VpcDnatEPortLabel = "ovn.kubernetes.io/vpc_dnat_eport"
43+
VpcNatAnnotation = "ovn.kubernetes.io/vpc_nat"
44+
OvnEipTypeLabel = "ovn.kubernetes.io/ovn_eip_type"
45+
EipV4IpLabel = "ovn.kubernetes.io/eip_v4_ip"
4546

4647
SwitchLBRuleVipsAnnotation = "ovn.kubernetes.io/switch_lb_vip"
4748
SwitchLBRuleVip = "switch_lb_vip"

0 commit comments

Comments
 (0)