Skip to content

Commit a38b004

Browse files
authored
gc: consider whether the sts pod is alive during lsp gc (#5122)
Signed-off-by: zhangzujian <[email protected]>
1 parent ded57da commit a38b004

File tree

2 files changed

+73
-12
lines changed

2 files changed

+73
-12
lines changed

pkg/controller/gc.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ func (c *Controller) markAndCleanLSP() error {
331331
ipMap := strset.NewWithSize(len(pods) + len(nodes))
332332
for _, pod := range pods {
333333
if isStsPod, stsName, stsUID := isStatefulSetPod(pod); isStsPod {
334-
if isStatefulSetPodToDel(c.config.KubeClient, pod, stsName, stsUID) {
334+
if isStatefulSetPodToGC(c.config.KubeClient, pod, stsName, stsUID) {
335335
continue
336336
}
337337
} else if !isPodAlive(pod) {

pkg/controller/pod.go

+72-11
Original file line numberDiff line numberDiff line change
@@ -925,8 +925,8 @@ func (c *Controller) handleDeletePod(key string) (err error) {
925925

926926
var keepIPCR bool
927927
if ok, stsName, stsUID := isStatefulSetPod(pod); ok {
928-
if pod.DeletionTimestamp != nil {
929-
klog.Infof("handle deletion of sts pod %s", podName)
928+
if !pod.DeletionTimestamp.IsZero() {
929+
klog.Infof("handle deletion of sts pod %s", podKey)
930930
toDel := isStatefulSetPodToDel(c.config.KubeClient, pod, stsName, stsUID)
931931
if !toDel {
932932
klog.Infof("try keep ip for sts pod %s", podKey)
@@ -949,7 +949,7 @@ func (c *Controller) handleDeletePod(key string) (err error) {
949949
if isVMPod && c.config.EnableKeepVMIP {
950950
ports, err := c.OVNNbClient.ListNormalLogicalSwitchPorts(true, map[string]string{"pod": podKey})
951951
if err != nil {
952-
klog.Errorf("failed to list lsps of pod '%s', %v", pod.Name, err)
952+
klog.Errorf("failed to list lsps of pod %s: %v", podKey, err)
953953
return err
954954
}
955955
for _, port := range ports {
@@ -960,7 +960,7 @@ func (c *Controller) handleDeletePod(key string) (err error) {
960960
}
961961
}
962962
if pod.DeletionTimestamp != nil {
963-
klog.Infof("handle deletion of vm pod %s", podName)
963+
klog.Infof("handle deletion of vm pod %s", podKey)
964964
vmToBeDel := c.isVMToDel(pod, vmName)
965965
if !vmToBeDel {
966966
klog.Infof("try keep ip for vm pod %s", podKey)
@@ -982,12 +982,12 @@ func (c *Controller) handleDeletePod(key string) (err error) {
982982

983983
podNets, err := c.getPodKubeovnNets(pod)
984984
if err != nil {
985-
klog.Errorf("failed to get pod nets %v", err)
985+
klog.Errorf("failed to get kube-ovn nets of pod %s: %v", podKey, err)
986986
}
987987
if !keepIPCR {
988988
ports, err := c.OVNNbClient.ListNormalLogicalSwitchPorts(true, map[string]string{"pod": podKey})
989989
if err != nil {
990-
klog.Errorf("failed to list lsps of pod '%s', %v", pod.Name, err)
990+
klog.Errorf("failed to list lsps of pod %s: %v", podKey, err)
991991
return err
992992
}
993993

@@ -1273,16 +1273,20 @@ func isStatefulSetPodToDel(c kubernetes.Interface, pod *v1.Pod, statefulSetName
12731273
if err != nil {
12741274
// statefulset is deleted
12751275
if k8serrors.IsNotFound(err) {
1276-
klog.Infof("statefulset %s is deleted", statefulSetName)
1276+
klog.Infof("statefulset %s/%s has been deleted", pod.Namespace, statefulSetName)
12771277
return true
12781278
}
1279-
klog.Errorf("failed to get statefulset %v", err)
1279+
klog.Errorf("failed to get statefulset %s/%s: %v", pod.Namespace, statefulSetName, err)
12801280
return false
12811281
}
12821282

12831283
// statefulset is being deleted, or it's a newly created one
1284-
if !sts.DeletionTimestamp.IsZero() || sts.UID != statefulSetUID {
1285-
klog.Infof("statefulset %s is being deleted", statefulSetName)
1284+
if !sts.DeletionTimestamp.IsZero() {
1285+
klog.Infof("statefulset %s/%s is being deleted", pod.Namespace, statefulSetName)
1286+
return true
1287+
}
1288+
if sts.UID != statefulSetUID {
1289+
klog.Infof("statefulset %s/%s is a newly created one", pod.Namespace, statefulSetName)
12861290
return true
12871291
}
12881292

@@ -1300,9 +1304,66 @@ func isStatefulSetPodToDel(c kubernetes.Interface, pod *v1.Pod, statefulSetName
13001304
startOrdinal = int64(sts.Spec.Ordinals.Start)
13011305
}
13021306
if index >= startOrdinal+int64(*sts.Spec.Replicas) {
1303-
klog.Infof("statefulset %s is down scaled", statefulSetName)
1307+
klog.Infof("statefulset %s/%s is down scaled", pod.Namespace, statefulSetName)
1308+
return true
1309+
}
1310+
return false
1311+
}
1312+
1313+
// only gc statefulset pod lsp when:
1314+
// 1. the statefulset has been deleted or is being deleted
1315+
// 2. the statefulset has been deleted and recreated
1316+
// 3. the statefulset is down scaled and the pod is not alive
1317+
func isStatefulSetPodToGC(c kubernetes.Interface, pod *v1.Pod, statefulSetName string, statefulSetUID types.UID) bool {
1318+
sts, err := c.AppsV1().StatefulSets(pod.Namespace).Get(context.Background(), statefulSetName, metav1.GetOptions{})
1319+
if err != nil {
1320+
// the statefulset has been deleted
1321+
if k8serrors.IsNotFound(err) {
1322+
klog.Infof("statefulset %s/%s has been deleted", pod.Namespace, statefulSetName)
1323+
return true
1324+
}
1325+
klog.Errorf("failed to get statefulset %s/%s: %v", pod.Namespace, statefulSetName, err)
1326+
return false
1327+
}
1328+
1329+
// statefulset is being deleted
1330+
if !sts.DeletionTimestamp.IsZero() {
1331+
klog.Infof("statefulset %s/%s is being deleted", pod.Namespace, statefulSetName)
1332+
return true
1333+
}
1334+
// the statefulset has been deleted and recreated
1335+
if sts.UID != statefulSetUID {
1336+
klog.Infof("statefulset %s/%s is a newly created one", pod.Namespace, statefulSetName)
13041337
return true
13051338
}
1339+
1340+
// the statefulset is down scaled and the pod is not alive
1341+
1342+
tempStrs := strings.Split(pod.Name, "-")
1343+
numStr := tempStrs[len(tempStrs)-1]
1344+
index, err := strconv.ParseInt(numStr, 10, 0)
1345+
if err != nil {
1346+
klog.Errorf("failed to parse %s to int", numStr)
1347+
return false
1348+
}
1349+
// down scaled
1350+
var startOrdinal int64
1351+
if sts.Spec.Ordinals != nil {
1352+
startOrdinal = int64(sts.Spec.Ordinals.Start)
1353+
}
1354+
if index >= startOrdinal+int64(*sts.Spec.Replicas) {
1355+
klog.Infof("statefulset %s/%s is down scaled", pod.Namespace, statefulSetName)
1356+
if !isPodAlive(pod) {
1357+
// we must check whether the pod is alive because we have to consider the following case:
1358+
// 1. the statefulset is down scaled to zero
1359+
// 2. the lsp gc is triggered
1360+
// 3. gc interval, e.g. 90s, is passed and the second gc is triggered
1361+
// 4. the sts is up scaled to the original replicas
1362+
// 5. the pod is still running and it will not be recreated
1363+
return true
1364+
}
1365+
}
1366+
13061367
return false
13071368
}
13081369

0 commit comments

Comments
 (0)