Skip to content

Commit efddad2

Browse files
Merge pull request #509 from Danil-Grigorev/backport-435-443-0.6
[backport] #435 #443 to release-0.6
2 parents 9e0ddd5 + 2ecba00 commit efddad2

File tree

5 files changed

+206
-9
lines changed

5 files changed

+206
-9
lines changed

controlplane/api/v1beta1/rke2controlplane_types.go

+7
Original file line numberDiff line numberDiff line change
@@ -417,6 +417,13 @@ const (
417417
// RollingUpdateStrategyType replaces the old control planes by new one using rolling update
418418
// i.e. gradually scale up or down the old control planes and scale up or down the new one.
419419
RollingUpdateStrategyType RolloutStrategyType = "RollingUpdate"
420+
421+
// PreTerminateHookCleanupAnnotation is the annotation RKE2 sets on Machines to ensure it can later remove the
422+
// etcd member right before Machine termination (i.e. before InfraMachine deletion).
423+
// For RKE2 we need wait for all other pre-terminate hooks to finish to
424+
// ensure it runs last (thus ensuring that kubelet is still working while other pre-terminate hooks run
425+
// as it uses kubelet local mode).
426+
PreTerminateHookCleanupAnnotation = clusterv1.PreTerminateDeleteHookAnnotationPrefix + "/rke2-cleanup"
420427
)
421428

422429
func init() { //nolint:gochecknoinits

controlplane/internal/controllers/rke2controlplane_controller.go

+128-1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package controllers
1919
import (
2020
"context"
2121
"fmt"
22+
"strings"
2223
"time"
2324

2425
"github.com/blang/semver/v4"
@@ -32,6 +33,7 @@ import (
3233
kerrors "k8s.io/apimachinery/pkg/util/errors"
3334
"k8s.io/apimachinery/pkg/util/intstr"
3435
"k8s.io/client-go/tools/record"
36+
"k8s.io/klog/v2"
3537
ctrl "sigs.k8s.io/controller-runtime"
3638
"sigs.k8s.io/controller-runtime/pkg/client"
3739
"sigs.k8s.io/controller-runtime/pkg/controller"
@@ -514,6 +516,10 @@ func (r *RKE2ControlPlaneReconciler) reconcileNormal(
514516
return ctrl.Result{}, err
515517
}
516518

519+
if result, err := r.reconcilePreTerminateHook(ctx, controlPlane); err != nil || !result.IsZero() {
520+
return result, err
521+
}
522+
517523
// Control plane machines rollout due to configuration changes (e.g. upgrades) takes precedence over other operations.
518524
needRollout := controlPlane.MachinesNeedingRollout()
519525

@@ -698,14 +704,31 @@ func (r *RKE2ControlPlaneReconciler) reconcileDelete(ctx context.Context,
698704
}
699705

700706
// Delete control plane machines in parallel
701-
machinesToDelete := ownedMachines.Filter(collections.Not(collections.HasDeletionTimestamp))
707+
machinesToDelete := ownedMachines
702708

703709
var errs []error
704710

705711
for i := range machinesToDelete {
706712
m := machinesToDelete[i]
707713
logger := logger.WithValues("machine", m)
708714

715+
// During RKE2CP deletion we don't care about forwarding etcd leadership or removing etcd members.
716+
// So we are removing the pre-terminate hook.
717+
// This is important because when deleting RKE2CP we will delete all members of etcd and it's not possible
718+
// to forward etcd leadership without any member left after we went through the Machine deletion.
719+
// Also in this case the reconcileDelete code of the Machine controller won't execute Node drain
720+
// and wait for volume detach.
721+
if err := r.removePreTerminateHookAnnotationFromMachine(ctx, m); err != nil {
722+
errs = append(errs, err)
723+
724+
continue
725+
}
726+
727+
if !m.DeletionTimestamp.IsZero() {
728+
// Nothing to do, Machine already has deletionTimestamp set.
729+
continue
730+
}
731+
709732
if err := r.Client.Delete(ctx, machinesToDelete[i]); err != nil && !apierrors.IsNotFound(err) {
710733
logger.Error(err, "Failed to cleanup owned machine")
711734
errs = append(errs, err)
@@ -720,6 +743,8 @@ func (r *RKE2ControlPlaneReconciler) reconcileDelete(ctx context.Context,
720743
return ctrl.Result{}, err
721744
}
722745

746+
logger.Info("Waiting for control plane Machines to not exist anymore")
747+
723748
conditions.MarkFalse(rcp, controlplanev1.ResizedCondition, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, "")
724749

725750
return ctrl.Result{RequeueAfter: deleteRequeueAfter}, nil
@@ -909,6 +934,108 @@ func (r *RKE2ControlPlaneReconciler) ClusterToRKE2ControlPlane(ctx context.Conte
909934
}
910935
}
911936

937+
func (r *RKE2ControlPlaneReconciler) reconcilePreTerminateHook(ctx context.Context, controlPlane *rke2.ControlPlane) (ctrl.Result, error) {
938+
// Ensure that every active machine has the drain hook set
939+
patchHookAnnotation := false
940+
941+
for _, machine := range controlPlane.Machines.Filter(collections.ActiveMachines) {
942+
if _, exists := machine.Annotations[controlplanev1.PreTerminateHookCleanupAnnotation]; !exists {
943+
machine.Annotations[controlplanev1.PreTerminateHookCleanupAnnotation] = ""
944+
patchHookAnnotation = true
945+
}
946+
}
947+
948+
if patchHookAnnotation {
949+
// Patch machine annoations
950+
if err := controlPlane.PatchMachines(ctx); err != nil {
951+
return ctrl.Result{}, err
952+
}
953+
}
954+
955+
if !controlPlane.HasDeletingMachine() {
956+
return ctrl.Result{}, nil
957+
}
958+
959+
log := ctrl.LoggerFrom(ctx)
960+
961+
// Return early, if there is already a deleting Machine without the pre-terminate hook.
962+
// We are going to wait until this Machine goes away before running the pre-terminate hook on other Machines.
963+
for _, deletingMachine := range controlPlane.DeletingMachines() {
964+
if _, exists := deletingMachine.Annotations[controlplanev1.PreTerminateHookCleanupAnnotation]; !exists {
965+
return ctrl.Result{RequeueAfter: deleteRequeueAfter}, nil
966+
}
967+
}
968+
969+
// Pick the Machine with the oldest deletionTimestamp to keep this function deterministic / reentrant
970+
// so we only remove the pre-terminate hook from one Machine at a time.
971+
deletingMachines := controlPlane.DeletingMachines()
972+
deletingMachine := controlPlane.SortedByDeletionTimestamp(deletingMachines)[0]
973+
974+
log = log.WithValues("Machine", klog.KObj(deletingMachine))
975+
ctx = ctrl.LoggerInto(ctx, log)
976+
977+
// Return early if there are other pre-terminate hooks for the Machine.
978+
// The CAPRKE2 pre-terminate hook should be the one executed last, so that kubelet
979+
// is still working while other pre-terminate hooks are run.
980+
if machineHasOtherPreTerminateHooks(deletingMachine) {
981+
return ctrl.Result{RequeueAfter: deleteRequeueAfter}, nil
982+
}
983+
984+
// Return early because the Machine controller is not yet waiting for the pre-terminate hook.
985+
c := conditions.Get(deletingMachine, clusterv1.PreTerminateDeleteHookSucceededCondition)
986+
if c == nil || c.Status != corev1.ConditionFalse || c.Reason != clusterv1.WaitingExternalHookReason {
987+
return ctrl.Result{RequeueAfter: deleteRequeueAfter}, nil
988+
}
989+
990+
// The following will execute and remove the pre-terminate hook from the Machine.
991+
992+
// If we have more than 1 Machine and etcd is managed we forward etcd leadership and remove the member
993+
// to keep the etcd cluster healthy.
994+
if controlPlane.Machines.Len() > 1 {
995+
workloadCluster, err := r.GetWorkloadCluster(ctx, controlPlane)
996+
if err != nil {
997+
return ctrl.Result{}, errors.Wrapf(err,
998+
"failed to remove etcd member for deleting Machine %s: failed to create client to workload cluster", klog.KObj(deletingMachine))
999+
}
1000+
1001+
// Note: In regular deletion cases (remediation, scale down) the leader should have been already moved.
1002+
// We're doing this again here in case the Machine became leader again or the Machine deletion was
1003+
// triggered in another way (e.g. a user running kubectl delete machine)
1004+
etcdLeaderCandidate := controlPlane.Machines.Filter(collections.Not(collections.HasDeletionTimestamp)).Newest()
1005+
if etcdLeaderCandidate != nil {
1006+
if err := workloadCluster.ForwardEtcdLeadership(ctx, deletingMachine, etcdLeaderCandidate); err != nil {
1007+
return ctrl.Result{}, errors.Wrapf(err, "failed to move leadership to candidate Machine %s", etcdLeaderCandidate.Name)
1008+
}
1009+
} else {
1010+
log.Info("Skip forwarding etcd leadership, because there is no other control plane Machine without a deletionTimestamp")
1011+
}
1012+
1013+
// Note: Removing the etcd member will lead to the etcd and the kube-apiserver Pod on the Machine shutting down.
1014+
if err := workloadCluster.RemoveEtcdMemberForMachine(ctx, deletingMachine); err != nil {
1015+
return ctrl.Result{}, errors.Wrapf(err, "failed to remove etcd member for deleting Machine %s", klog.KObj(deletingMachine))
1016+
}
1017+
}
1018+
1019+
if err := r.removePreTerminateHookAnnotationFromMachine(ctx, deletingMachine); err != nil {
1020+
return ctrl.Result{}, err
1021+
}
1022+
1023+
log.Info("Waiting for Machines to be deleted", "machines",
1024+
strings.Join(controlPlane.Machines.Filter(collections.HasDeletionTimestamp).Names(), ", "))
1025+
1026+
return ctrl.Result{RequeueAfter: deleteRequeueAfter}, nil
1027+
}
1028+
1029+
func machineHasOtherPreTerminateHooks(machine *clusterv1.Machine) bool {
1030+
for k := range machine.Annotations {
1031+
if strings.HasPrefix(k, clusterv1.PreTerminateDeleteHookAnnotationPrefix) && k != controlplanev1.PreTerminateHookCleanupAnnotation {
1032+
return true
1033+
}
1034+
}
1035+
1036+
return false
1037+
}
1038+
9121039
// getWorkloadCluster gets a cluster object.
9131040
// The cluster comes with an etcd client generator to connect to any etcd pod living on a managed machine.
9141041
func (r *RKE2ControlPlaneReconciler) getWorkloadCluster(ctx context.Context, clusterKey types.NamespacedName) (rke2.WorkloadCluster, error) {

controlplane/internal/controllers/scale.go

+26-6
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,9 @@ import (
2929
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
3030
kerrors "k8s.io/apimachinery/pkg/util/errors"
3131
"k8s.io/apiserver/pkg/storage/names"
32+
"k8s.io/klog/v2"
3233
ctrl "sigs.k8s.io/controller-runtime"
34+
"sigs.k8s.io/controller-runtime/pkg/client"
3335
"sigs.k8s.io/controller-runtime/pkg/log"
3436

3537
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
@@ -159,11 +161,7 @@ func (r *RKE2ControlPlaneReconciler) scaleDownControlPlane(
159161
return ctrl.Result{}, err
160162
}
161163

162-
if err := r.workloadCluster.RemoveEtcdMemberForMachine(ctx, machineToDelete); err != nil {
163-
logger.Error(err, "Failed to remove etcd member for machine")
164-
165-
return ctrl.Result{}, err
166-
}
164+
// NOTE: etcd member removal will be performed by the rke2-cleanup hook after machine completes drain & all volumes are detached.
167165

168166
logger = logger.WithValues("machine", machineToDelete)
169167
if err := r.Client.Delete(ctx, machineToDelete); err != nil && !apierrors.IsNotFound(err) {
@@ -178,6 +176,25 @@ func (r *RKE2ControlPlaneReconciler) scaleDownControlPlane(
178176
return ctrl.Result{Requeue: true}, nil
179177
}
180178

179+
func (r *RKE2ControlPlaneReconciler) removePreTerminateHookAnnotationFromMachine(ctx context.Context, machine *clusterv1.Machine) error {
180+
if _, exists := machine.Annotations[controlplanev1.PreTerminateHookCleanupAnnotation]; !exists {
181+
// Nothing to do, the annotation is not set (anymore) on the Machine
182+
return nil
183+
}
184+
185+
log := ctrl.LoggerFrom(ctx)
186+
log.Info("Removing pre-terminate hook from control plane Machine")
187+
188+
machineOriginal := machine.DeepCopy()
189+
delete(machine.Annotations, controlplanev1.PreTerminateHookCleanupAnnotation)
190+
191+
if err := r.Client.Patch(ctx, machine, client.MergeFrom(machineOriginal)); err != nil {
192+
return errors.Wrapf(err, "failed to remove pre-terminate hook from control plane Machine %s", klog.KObj(machine))
193+
}
194+
195+
return nil
196+
}
197+
181198
// preflightChecks checks if the control plane is stable before proceeding with a scale up/scale down operation,
182199
// where stable means that:
183200
// - There are no machine deletion in progress
@@ -447,7 +464,10 @@ func (r *RKE2ControlPlaneReconciler) generateMachine(
447464
return errors.Wrap(err, "failed to marshal cluster configuration")
448465
}
449466

450-
machine.SetAnnotations(map[string]string{controlplanev1.RKE2ServerConfigurationAnnotation: string(serverConfig)})
467+
machine.SetAnnotations(map[string]string{
468+
controlplanev1.RKE2ServerConfigurationAnnotation: string(serverConfig),
469+
controlplanev1.PreTerminateHookCleanupAnnotation: "",
470+
})
451471

452472
if err := r.Client.Create(ctx, machine); err != nil {
453473
return errors.Wrap(err, "failed to create machine")

hack/ensure-kubectl.sh

+1-2
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,7 @@ install_plugins() {
7171
tar zxvf "${KREW}.tar.gz" &&
7272
./"${KREW}" install krew
7373
)
74-
kubectl krew index add crust-gather https://github.com/crust-gather/crust-gather.git || true
75-
kubectl krew install crust-gather/crust-gather
74+
kubectl krew install crust-gather
7675
}
7776

7877
verify_kubectl_version

pkg/rke2/control_plane.go

+44
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package rke2
1818

1919
import (
2020
"context"
21+
"sort"
2122

2223
"github.com/go-logr/logr"
2324
"github.com/pkg/errors"
@@ -268,6 +269,23 @@ func (c *ControlPlane) HasDeletingMachine() bool {
268269
return len(c.Machines.Filter(collections.HasDeletionTimestamp)) > 0
269270
}
270271

272+
// DeletingMachines returns machines in the control plane that are in the process of being deleted.
273+
func (c *ControlPlane) DeletingMachines() collections.Machines {
274+
return c.Machines.Filter(collections.HasDeletionTimestamp)
275+
}
276+
277+
// SortedByDeletionTimestamp returns the machines sorted by deletion timestamp.
278+
func (c *ControlPlane) SortedByDeletionTimestamp(s collections.Machines) []*clusterv1.Machine {
279+
res := make(machinesByDeletionTimestamp, 0, len(s))
280+
for _, value := range s {
281+
res = append(res, value)
282+
}
283+
284+
sort.Sort(res)
285+
286+
return res
287+
}
288+
271289
// MachinesNeedingRollout return a list of machines that need to be rolled out.
272290
func (c *ControlPlane) MachinesNeedingRollout() collections.Machines {
273291
// Ignore machines to be deleted.
@@ -383,3 +401,29 @@ func (c *ControlPlane) PatchMachines(ctx context.Context) error {
383401

384402
return kerrors.NewAggregate(errList)
385403
}
404+
405+
// machinesByDeletionTimestamp sorts a list of Machines by deletion timestamp, using their names as a tie breaker.
406+
// Machines without DeletionTimestamp go after machines with this field set.
407+
type machinesByDeletionTimestamp []*clusterv1.Machine
408+
409+
func (o machinesByDeletionTimestamp) Len() int { return len(o) }
410+
func (o machinesByDeletionTimestamp) Swap(i, j int) { o[i], o[j] = o[j], o[i] }
411+
func (o machinesByDeletionTimestamp) Less(i, j int) bool {
412+
if o[i].DeletionTimestamp == nil && o[j].DeletionTimestamp == nil {
413+
return o[i].Name < o[j].Name
414+
}
415+
416+
if o[i].DeletionTimestamp == nil {
417+
return false
418+
}
419+
420+
if o[j].DeletionTimestamp == nil {
421+
return true
422+
}
423+
424+
if o[i].DeletionTimestamp.Equal(o[j].DeletionTimestamp) {
425+
return o[i].Name < o[j].Name
426+
}
427+
428+
return o[i].DeletionTimestamp.Before(o[j].DeletionTimestamp)
429+
}

0 commit comments

Comments
 (0)