Skip to content

Commit e3daed8

Browse files
committed
fix handle terminating pvc when kubelet rebuild dsw
Signed-off-by: carlory <[email protected]>
1 parent ef4a6f9 commit e3daed8

File tree

2 files changed

+31
-52
lines changed

2 files changed

+31
-52
lines changed

pkg/kubelet/volumemanager/populator/desired_state_of_world_populator.go

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424
"context"
2525
"errors"
2626
"fmt"
27+
"slices"
2728
"sync"
2829
"time"
2930

@@ -528,15 +529,21 @@ func (dswp *desiredStateOfWorldPopulator) getPVCExtractPV(
528529
return nil, fmt.Errorf("failed to fetch PVC from API server: %v", err)
529530
}
530531

531-
// Pods that uses a PVC that is being deleted must not be started.
532+
// Pods that uses a PVC that is being deleted and not protected by
533+
// kubernetes.io/pvc-protection must not be started.
532534
//
533-
// In case an old kubelet is running without this check or some kubelets
534-
// have this feature disabled, the worst that can happen is that such
535-
// pod is scheduled. This was the default behavior in 1.8 and earlier
536-
// and users should not be that surprised.
535+
// 1) In case an old kubelet is running without this check, the worst
536+
// that can happen is that such pod is scheduled. This was the default
537+
// behavior in 1.8 and earlier and users should not be that surprised.
537538
// It should happen only in very rare case when scheduler schedules
538539
// a pod and user deletes a PVC that's used by it at the same time.
539-
if pvc.ObjectMeta.DeletionTimestamp != nil {
540+
//
541+
// 2) Adding a check for kubernetes.io/pvc-protection here to prevent
542+
// the existing running pods from being affected during the rebuild of
543+
// the desired state of the world cache when the kubelet is restarted.
544+
// It is safe for kubelet to add this check here because the PVC will
545+
// be stuck in Terminating state until the pod is deleted.
546+
if pvc.ObjectMeta.DeletionTimestamp != nil && !slices.Contains(pvc.Finalizers, util.PVCProtectionFinalizer) {
540547
return nil, errors.New("PVC is being deleted")
541548
}
542549

test/e2e/storage/csimock/csi_kubelet_restart.go

Lines changed: 18 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -19,27 +19,35 @@ package csimock
1919
import (
2020
"context"
2121
"fmt"
22-
"os"
23-
"os/exec"
24-
"strings"
2522

2623
"github.com/onsi/ginkgo/v2"
2724
"github.com/onsi/gomega"
2825
apierrors "k8s.io/apimachinery/pkg/api/errors"
2926
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
30-
"k8s.io/kubernetes/test/e2e/feature"
3127
"k8s.io/kubernetes/test/e2e/framework"
3228
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
29+
e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
30+
"k8s.io/kubernetes/test/e2e/storage/drivers"
3331
"k8s.io/kubernetes/test/e2e/storage/utils"
3432
admissionapi "k8s.io/pod-security-admission/api"
3533
)
3634

37-
var _ = utils.SIGDescribe("CSI Mock when kubelet restart", feature.Kind, framework.WithSerial(), framework.WithDisruptive(), func() {
35+
var _ = utils.SIGDescribe("CSI Mock when kubelet restart", framework.WithSerial(), framework.WithDisruptive(), func() {
3836
f := framework.NewDefaultFramework("csi-mock-when-kubelet-restart")
3937
f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
4038
m := newMockDriverSetup(f)
4139

40+
ginkgo.BeforeEach(func() {
41+
// These tests requires SSH to nodes, so the provider check should be identical to there
42+
// (the limiting factor is the implementation of util.go's e2essh.GetSigner(...)).
43+
44+
// Cluster must support node reboot
45+
e2eskipper.SkipUnlessProviderIs(framework.ProvidersWithSSH...)
46+
e2eskipper.SkipUnlessSSHKeyPresent()
47+
})
48+
4249
ginkgo.It("should not umount volume when the pvc is terminating but still used by a running pod", func(ctx context.Context) {
50+
4351
m.init(ctx, testParameters{
4452
registerDriver: true,
4553
})
@@ -51,16 +59,16 @@ var _ = utils.SIGDescribe("CSI Mock when kubelet restart", feature.Kind, framewo
5159
ginkgo.By("Waiting for the Pod to be running")
5260
err := e2epod.WaitForPodRunningInNamespace(ctx, f.ClientSet, pod)
5361
framework.ExpectNoError(err, "failed to wait for pod %s to be running", pod.Name)
62+
pod, err = f.ClientSet.CoreV1().Pods(pod.Namespace).Get(ctx, pod.Name, metav1.GetOptions{})
63+
framework.ExpectNoError(err, "failed to get pod %s", pod.Name)
5464

5565
ginkgo.By("Deleting the PVC")
5666
err = f.ClientSet.CoreV1().PersistentVolumeClaims(pvc.Namespace).Delete(ctx, pvc.Name, metav1.DeleteOptions{})
5767
framework.ExpectNoError(err, "failed to delete PVC %s", pvc.Name)
5868

5969
ginkgo.By("Restarting kubelet")
60-
err = stopKindKubelet(ctx)
61-
framework.ExpectNoError(err, "failed to stop kubelet")
62-
err = startKindKubelet(ctx)
63-
framework.ExpectNoError(err, "failed to start kubelet")
70+
utils.KubeletCommand(ctx, utils.KRestart, f.ClientSet, pod)
71+
ginkgo.DeferCleanup(utils.KubeletCommand, utils.KStart, f.ClientSet, pod)
6472

6573
ginkgo.By("Verifying the PVC is terminating during kubelet restart")
6674
pvc, err = f.ClientSet.CoreV1().PersistentVolumeClaims(pvc.Namespace).Get(ctx, pvc.Name, metav1.GetOptions{})
@@ -69,7 +77,7 @@ var _ = utils.SIGDescribe("CSI Mock when kubelet restart", feature.Kind, framewo
6977

7078
ginkgo.By(fmt.Sprintf("Verifying that the driver didn't receive NodeUnpublishVolume call for PVC %s", pvc.Name))
7179
gomega.Consistently(ctx,
72-
func(ctx context.Context) interface{} {
80+
func(ctx context.Context) []drivers.MockCSICall {
7381
calls, err := m.driver.GetCalls(ctx)
7482
if err != nil {
7583
if apierrors.IsUnexpectedServerError(err) {
@@ -90,39 +98,3 @@ var _ = utils.SIGDescribe("CSI Mock when kubelet restart", feature.Kind, framewo
9098
framework.ExpectNoError(err, "failed to wait for pod %s to be running", pod.Name)
9199
})
92100
})
93-
94-
func stopKindKubelet(ctx context.Context) error {
95-
return kubeletExec("systemctl", "stop", "kubelet")
96-
}
97-
98-
func startKindKubelet(ctx context.Context) error {
99-
return kubeletExec("systemctl", "start", "kubelet")
100-
}
101-
102-
// Run a command in container with kubelet (and the whole control plane as containers)
103-
func kubeletExec(command ...string) error {
104-
containerName := getKindContainerName()
105-
args := []string{"exec", containerName}
106-
args = append(args, command...)
107-
cmd := exec.Command("docker", args...)
108-
109-
out, err := cmd.CombinedOutput()
110-
if err != nil {
111-
return fmt.Errorf("command %q failed: %v\noutput:%s", prettyCmd(cmd), err, string(out))
112-
}
113-
114-
framework.Logf("command %q succeeded:\n%s", prettyCmd(cmd), string(out))
115-
return nil
116-
}
117-
118-
func getKindContainerName() string {
119-
clusterName := os.Getenv("KIND_CLUSTER_NAME")
120-
if clusterName == "" {
121-
clusterName = "kind"
122-
}
123-
return clusterName + "-control-plane"
124-
}
125-
126-
func prettyCmd(cmd *exec.Cmd) string {
127-
return fmt.Sprintf("%s %s", cmd.Path, strings.Join(cmd.Args, " "))
128-
}

0 commit comments

Comments
 (0)