Skip to content

Commit 0440562

Browse files
author
ykl
committed
[fix] When the Nat-gw pod container restarts unexpectedly, trigger nat-gw statefulset restart to restore the nat-gw pod configuration
1 parent 6187010 commit 0440562

File tree

3 files changed

+57
-18
lines changed

3 files changed

+57
-18
lines changed

pkg/controller/pod.go

+18
Original file line numberDiff line numberDiff line change
@@ -564,6 +564,12 @@ func (c *Controller) handleAddOrUpdatePod(key string) (err error) {
564564
}
565565
}
566566

567+
if vpcGwName, isVpcNatGw := pod.Annotations[util.VpcNatGatewayAnnotation]; isVpcNatGw {
568+
if needRestartNatGatewayPod(pod) {
569+
c.addOrUpdateVpcNatGatewayQueue.Add(vpcGwName)
570+
}
571+
}
572+
567573
// check if route subnet is need.
568574
pod = cachedPod.DeepCopy()
569575
return c.reconcileRouteSubnets(cachedPod, pod, needRouteSubnets(pod, podNets))
@@ -1280,6 +1286,18 @@ func needAllocateSubnets(pod *v1.Pod, nets []*kubeovnNet) []*kubeovnNet {
12801286
return result
12811287
}
12821288

1289+
func needRestartNatGatewayPod(pod *v1.Pod) bool {
1290+
for _, psc := range pod.Status.ContainerStatuses {
1291+
if psc.Name != "vpc-nat-gw" {
1292+
continue
1293+
}
1294+
if psc.RestartCount > 0 {
1295+
return true
1296+
}
1297+
}
1298+
return false
1299+
}
1300+
12831301
func (c *Controller) podNeedSync(pod *v1.Pod) (bool, error) {
12841302
// 1. check annotations
12851303
if pod.Annotations == nil {

pkg/controller/vpc_nat_gateway.go

+24-4
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,19 @@ func (c *Controller) handleAddOrUpdateVpcNatGw(key string) error {
256256
return err
257257
}
258258

259+
var natGwPodContainerRestartCount int32
260+
oriPod, _err := c.getNatGwPod(key)
261+
if _err == nil {
262+
pod := oriPod.DeepCopy()
263+
for _, psc := range pod.Status.ContainerStatuses {
264+
if psc.Name != "vpc-nat-gw" {
265+
continue
266+
}
267+
natGwPodContainerRestartCount = psc.RestartCount
268+
break
269+
}
270+
}
271+
259272
// check or create statefulset
260273
needToCreate := false
261274
needToUpdate := false
@@ -269,11 +282,10 @@ func (c *Controller) handleAddOrUpdateVpcNatGw(key string) error {
269282
return err
270283
}
271284
}
272-
newSts := c.genNatGwStatefulSet(gw, oldSts.DeepCopy())
273-
if !needToCreate && isVpcNatGwChanged(gw) {
285+
newSts := c.genNatGwStatefulSet(gw, oldSts.DeepCopy(), natGwPodContainerRestartCount)
286+
if !needToCreate && (isVpcNatGwChanged(gw) || natGwPodContainerRestartCount > 0) {
274287
needToUpdate = true
275288
}
276-
277289
switch {
278290
case needToCreate:
279291
// if pod create successfully, will add initVpcNatGatewayQueue
@@ -741,7 +753,7 @@ func (c *Controller) execNatGwRules(pod *corev1.Pod, operation string, rules []s
741753
return nil
742754
}
743755

744-
func (c *Controller) genNatGwStatefulSet(gw *kubeovnv1.VpcNatGateway, oldSts *v1.StatefulSet) (newSts *v1.StatefulSet) {
756+
func (c *Controller) genNatGwStatefulSet(gw *kubeovnv1.VpcNatGateway, oldSts *v1.StatefulSet, natGwPodContainerRestartCount int32) (newSts *v1.StatefulSet) {
745757
replicas := int32(1)
746758
name := util.GenNatGwStsName(gw.Name)
747759
allowPrivilegeEscalation := true
@@ -761,6 +773,14 @@ func (c *Controller) genNatGwStatefulSet(gw *kubeovnv1.VpcNatGateway, oldSts *v1
761773
util.LogicalSwitchAnnotation: gw.Spec.Subnet,
762774
util.IPAddressAnnotation: gw.Spec.LanIP,
763775
}
776+
777+
if oldSts != nil && len(oldSts.Spec.Template.Annotations) != 0 {
778+
if _, ok := oldSts.Spec.Template.Annotations[util.VpcNatGatewayContainerRestartAnnotation]; !ok && natGwPodContainerRestartCount > 0 {
779+
podAnnotations[util.VpcNatGatewayContainerRestartAnnotation] = ""
780+
}
781+
}
782+
klog.V(3).Infof("%s podAnnotations:%v", gw.Name, podAnnotations)
783+
764784
for key, value := range podAnnotations {
765785
newPodAnnotations[key] = value
766786
}

pkg/util/const.go

+15-14
Original file line numberDiff line numberDiff line change
@@ -28,20 +28,21 @@ const (
2828
ExternalGatewayAnnotation = "ovn.kubernetes.io/external_gateway"
2929
ExternalGwPortNameAnnotation = "ovn.kubernetes.io/external_gw_port_name"
3030

31-
VpcNatGatewayAnnotation = "ovn.kubernetes.io/vpc_nat_gw"
32-
VpcNatGatewayInitAnnotation = "ovn.kubernetes.io/vpc_nat_gw_init"
33-
VpcEipsAnnotation = "ovn.kubernetes.io/vpc_eips"
34-
VpcFloatingIPMd5Annotation = "ovn.kubernetes.io/vpc_floating_ips"
35-
VpcDnatMd5Annotation = "ovn.kubernetes.io/vpc_dnat_md5"
36-
VpcSnatMd5Annotation = "ovn.kubernetes.io/vpc_snat_md5"
37-
VpcCIDRsAnnotation = "ovn.kubernetes.io/vpc_cidrs"
38-
VpcLbAnnotation = "ovn.kubernetes.io/vpc_lb"
39-
VpcExternalLabel = "ovn.kubernetes.io/vpc_external"
40-
VpcEipAnnotation = "ovn.kubernetes.io/vpc_eip"
41-
VpcDnatEPortLabel = "ovn.kubernetes.io/vpc_dnat_eport"
42-
VpcNatAnnotation = "ovn.kubernetes.io/vpc_nat"
43-
OvnEipTypeLabel = "ovn.kubernetes.io/ovn_eip_type"
44-
EipV4IpLabel = "ovn.kubernetes.io/eip_v4_ip"
31+
VpcNatGatewayAnnotation = "ovn.kubernetes.io/vpc_nat_gw"
32+
VpcNatGatewayInitAnnotation = "ovn.kubernetes.io/vpc_nat_gw_init"
33+
VpcNatGatewayContainerRestartAnnotation = "ovn.kubernetes.io/vpc_nat_gw_container_restarted"
34+
VpcEipsAnnotation = "ovn.kubernetes.io/vpc_eips"
35+
VpcFloatingIPMd5Annotation = "ovn.kubernetes.io/vpc_floating_ips"
36+
VpcDnatMd5Annotation = "ovn.kubernetes.io/vpc_dnat_md5"
37+
VpcSnatMd5Annotation = "ovn.kubernetes.io/vpc_snat_md5"
38+
VpcCIDRsAnnotation = "ovn.kubernetes.io/vpc_cidrs"
39+
VpcLbAnnotation = "ovn.kubernetes.io/vpc_lb"
40+
VpcExternalLabel = "ovn.kubernetes.io/vpc_external"
41+
VpcEipAnnotation = "ovn.kubernetes.io/vpc_eip"
42+
VpcDnatEPortLabel = "ovn.kubernetes.io/vpc_dnat_eport"
43+
VpcNatAnnotation = "ovn.kubernetes.io/vpc_nat"
44+
OvnEipTypeLabel = "ovn.kubernetes.io/ovn_eip_type"
45+
EipV4IpLabel = "ovn.kubernetes.io/eip_v4_ip"
4546

4647
SwitchLBRuleVipsAnnotation = "ovn.kubernetes.io/switch_lb_vip"
4748
SwitchLBRuleVip = "switch_lb_vip"

0 commit comments

Comments
 (0)