Skip to content

Commit b1c5f25

Browse files
kldancerykl
and
ykl
authored
[fix] When the Nat-gw pod container restarts unexpectedly, trigger nat-gw statefulset restart to restore the nat-gw pod configuration (#5072)
Signed-off-by: ykl <[email protected]> Co-authored-by: ykl <[email protected]>
1 parent 3f26180 commit b1c5f25

File tree

3 files changed

+57
-19
lines changed

3 files changed

+57
-19
lines changed

pkg/controller/pod.go

+18
Original file line numberDiff line numberDiff line change
@@ -485,6 +485,12 @@ func (c *Controller) handleAddOrUpdatePod(key string) (err error) {
485485
}
486486
}
487487

488+
if vpcGwName, isVpcNatGw := pod.Annotations[util.VpcNatGatewayAnnotation]; isVpcNatGw {
489+
if needRestartNatGatewayPod(pod) {
490+
c.addOrUpdateVpcNatGatewayQueue.Add(vpcGwName)
491+
}
492+
}
493+
488494
// check if route subnet is need.
489495
return c.reconcileRouteSubnets(pod, needRouteSubnets(pod, podNets))
490496
}
@@ -1349,6 +1355,18 @@ func needAllocateSubnets(pod *v1.Pod, nets []*kubeovnNet) []*kubeovnNet {
13491355
return result
13501356
}
13511357

1358+
func needRestartNatGatewayPod(pod *v1.Pod) bool {
1359+
for _, psc := range pod.Status.ContainerStatuses {
1360+
if psc.Name != "vpc-nat-gw" {
1361+
continue
1362+
}
1363+
if psc.RestartCount > 0 {
1364+
return true
1365+
}
1366+
}
1367+
return false
1368+
}
1369+
13521370
func (c *Controller) podNeedSync(pod *v1.Pod) (bool, error) {
13531371
// 1. check annotations
13541372
if pod.Annotations == nil {

pkg/controller/vpc_nat_gateway.go

+22-3
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,18 @@ func (c *Controller) handleAddOrUpdateVpcNatGw(key string) error {
179179
return err
180180
}
181181

182+
var natGwPodContainerRestartCount int32
183+
pod, _err := c.getNatGwPod(key)
184+
if _err == nil {
185+
for _, psc := range pod.Status.ContainerStatuses {
186+
if psc.Name != "vpc-nat-gw" {
187+
continue
188+
}
189+
natGwPodContainerRestartCount = psc.RestartCount
190+
break
191+
}
192+
}
193+
182194
// check or create statefulset
183195
needToCreate := false
184196
needToUpdate := false
@@ -191,12 +203,12 @@ func (c *Controller) handleAddOrUpdateVpcNatGw(key string) error {
191203
}
192204
needToCreate, oldSts = true, nil
193205
}
194-
newSts, err := c.genNatGwStatefulSet(gw, oldSts)
206+
newSts, err := c.genNatGwStatefulSet(gw, oldSts, natGwPodContainerRestartCount)
195207
if err != nil {
196208
klog.Error(err)
197209
return err
198210
}
199-
if !needToCreate && isVpcNatGwChanged(gw) {
211+
if !needToCreate && (isVpcNatGwChanged(gw) || natGwPodContainerRestartCount > 0) {
200212
needToUpdate = true
201213
}
202214

@@ -717,7 +729,7 @@ func (c *Controller) setNatGwAPIRoute(annotations map[string]string, nadNamespac
717729
return nil
718730
}
719731

720-
func (c *Controller) genNatGwStatefulSet(gw *kubeovnv1.VpcNatGateway, oldSts *v1.StatefulSet) (*v1.StatefulSet, error) {
732+
func (c *Controller) genNatGwStatefulSet(gw *kubeovnv1.VpcNatGateway, oldSts *v1.StatefulSet, natGwPodContainerRestartCount int32) (*v1.StatefulSet, error) {
721733
annotations := make(map[string]string, 7)
722734
if oldSts != nil && len(oldSts.Annotations) != 0 {
723735
annotations = maps.Clone(oldSts.Annotations)
@@ -731,6 +743,13 @@ func (c *Controller) genNatGwStatefulSet(gw *kubeovnv1.VpcNatGateway, oldSts *v1
731743
util.IPAddressAnnotation: gw.Spec.LanIP,
732744
}
733745

746+
if oldSts != nil && len(oldSts.Spec.Template.Annotations) != 0 {
747+
if _, ok := oldSts.Spec.Template.Annotations[util.VpcNatGatewayContainerRestartAnnotation]; !ok && natGwPodContainerRestartCount > 0 {
748+
podAnnotations[util.VpcNatGatewayContainerRestartAnnotation] = ""
749+
}
750+
}
751+
klog.V(3).Infof("%s podAnnotations:%v", gw.Name, podAnnotations)
752+
734753
// Add an interface that can reach the API server, we need access to it to probe Kube-OVN resources
735754
if gw.Spec.BgpSpeaker.Enabled {
736755
if err := c.setNatGwAPIAccess(podAnnotations, externalNetworkNad); err != nil {

pkg/util/const.go

+17-16
Original file line numberDiff line numberDiff line change
@@ -24,22 +24,23 @@ const (
2424
VMAnnotation = "ovn.kubernetes.io/virtualmachine"
2525
ActivationStrategyAnnotation = "ovn.kubernetes.io/activation_strategy"
2626

27-
VpcNatGatewayAnnotation = "ovn.kubernetes.io/vpc_nat_gw"
28-
VpcNatGatewayInitAnnotation = "ovn.kubernetes.io/vpc_nat_gw_init"
29-
VpcNatGatewayActivatedAnnotation = "ovn.kubernetes.io/vpc_nat_gw_activated"
30-
VpcEipsAnnotation = "ovn.kubernetes.io/vpc_eips"
31-
VpcFloatingIPMd5Annotation = "ovn.kubernetes.io/vpc_floating_ips"
32-
VpcDnatMd5Annotation = "ovn.kubernetes.io/vpc_dnat_md5"
33-
VpcSnatMd5Annotation = "ovn.kubernetes.io/vpc_snat_md5"
34-
VpcCIDRsAnnotation = "ovn.kubernetes.io/vpc_cidrs"
35-
VpcLbAnnotation = "ovn.kubernetes.io/vpc_lb"
36-
VpcExternalLabel = "ovn.kubernetes.io/vpc_external"
37-
VpcEipAnnotation = "ovn.kubernetes.io/vpc_eip"
38-
VpcDnatEPortLabel = "ovn.kubernetes.io/vpc_dnat_eport"
39-
VpcNatAnnotation = "ovn.kubernetes.io/vpc_nat"
40-
OvnEipTypeLabel = "ovn.kubernetes.io/ovn_eip_type"
41-
EipV4IpLabel = "ovn.kubernetes.io/eip_v4_ip"
42-
EipV6IpLabel = "ovn.kubernetes.io/eip_v6_ip"
27+
VpcNatGatewayAnnotation = "ovn.kubernetes.io/vpc_nat_gw"
28+
VpcNatGatewayInitAnnotation = "ovn.kubernetes.io/vpc_nat_gw_init"
29+
VpcNatGatewayContainerRestartAnnotation = "ovn.kubernetes.io/vpc_nat_gw_container_restarted"
30+
VpcNatGatewayActivatedAnnotation = "ovn.kubernetes.io/vpc_nat_gw_activated"
31+
VpcEipsAnnotation = "ovn.kubernetes.io/vpc_eips"
32+
VpcFloatingIPMd5Annotation = "ovn.kubernetes.io/vpc_floating_ips"
33+
VpcDnatMd5Annotation = "ovn.kubernetes.io/vpc_dnat_md5"
34+
VpcSnatMd5Annotation = "ovn.kubernetes.io/vpc_snat_md5"
35+
VpcCIDRsAnnotation = "ovn.kubernetes.io/vpc_cidrs"
36+
VpcLbAnnotation = "ovn.kubernetes.io/vpc_lb"
37+
VpcExternalLabel = "ovn.kubernetes.io/vpc_external"
38+
VpcEipAnnotation = "ovn.kubernetes.io/vpc_eip"
39+
VpcDnatEPortLabel = "ovn.kubernetes.io/vpc_dnat_eport"
40+
VpcNatAnnotation = "ovn.kubernetes.io/vpc_nat"
41+
OvnEipTypeLabel = "ovn.kubernetes.io/ovn_eip_type"
42+
EipV4IpLabel = "ovn.kubernetes.io/eip_v4_ip"
43+
EipV6IpLabel = "ovn.kubernetes.io/eip_v6_ip"
4344

4445
SwitchLBRuleVipsAnnotation = "ovn.kubernetes.io/switch_lb_vip"
4546
SwitchLBRuleVip = "switch_lb_vip"

0 commit comments

Comments
 (0)