Skip to content

Commit ca9f4e1

Browse files
authored
Merge pull request #4213 from jnummelin/feature/etcd-member-mgmt
Manage Etcd peers using custom resources
2 parents 70de5fa + e8c5dd8 commit ca9f4e1

30 files changed

+1827
-6
lines changed

Makefile

+7-1
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,12 @@ controllergen_targets += pkg/apis/autopilot/v1beta2/.controller-gen.stamp
124124
pkg/apis/autopilot/v1beta2/.controller-gen.stamp: $(shell find pkg/apis/autopilot/v1beta2/ -maxdepth 1 -type f -name \*.go)
125125
pkg/apis/autopilot/v1beta2/.controller-gen.stamp: gen_output_dir = autopilot
126126

127+
controllergen_targets += pkg/apis/etcd/v1beta1/.controller-gen.stamp
128+
pkg/apis/etcd/v1beta1/.controller-gen.stamp: $(shell find pkg/apis/etcd/v1beta1/ -maxdepth 1 -type f -name \*.go)
129+
pkg/apis/etcd/v1beta1/.controller-gen.stamp: gen_output_dir = etcd
130+
127131
codegen_targets += $(controllergen_targets)
132+
128133
pkg/apis/%/.controller-gen.stamp: .k0sbuild.docker-image.k0s hack/tools/boilerplate.go.txt hack/tools/Makefile.variables
129134
rm -rf 'static/manifests/$(gen_output_dir)/CustomResourceDefinition'
130135
mkdir -p 'static/manifests/$(gen_output_dir)'
@@ -137,7 +142,7 @@ pkg/apis/%/.controller-gen.stamp: .k0sbuild.docker-image.k0s hack/tools/boilerpl
137142
&& mv -f -- "$$gendir"/zz_generated.deepcopy.go '$(dir $@).'
138143
touch -- '$@'
139144

140-
clientset_input_dirs := pkg/apis/autopilot/v1beta2 pkg/apis/k0s/v1beta1 pkg/apis/helm/v1beta1
145+
clientset_input_dirs := pkg/apis/autopilot/v1beta2 pkg/apis/k0s/v1beta1 pkg/apis/helm/v1beta1 pkg/apis/etcd/v1beta1
141146
codegen_targets += pkg/client/clientset/.client-gen.stamp
142147
pkg/client/clientset/.client-gen.stamp: $(shell find $(clientset_input_dirs) -type f -name \*.go -not -name \*_test.go -not -name zz_\*)
143148
pkg/client/clientset/.client-gen.stamp: .k0sbuild.docker-image.k0s hack/tools/boilerplate.go.txt embedded-bins/Makefile.variables
@@ -163,6 +168,7 @@ static/zz_generated_assets.go: .k0sbuild.docker-image.k0s hack/tools/Makefile.va
163168
static/manifests/helm/CustomResourceDefinition/... \
164169
static/manifests/v1beta1/CustomResourceDefinition/... \
165170
static/manifests/autopilot/CustomResourceDefinition/... \
171+
static/manifests/etcd/CustomResourceDefinition/... \
166172
static/manifests/calico/... \
167173
static/manifests/windows/... \
168174
static/misc/...

cmd/controller/controller.go

+13
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,19 @@ func (c *command) start(ctx context.Context) error {
325325
CertManager: worker.NewCertificateManager(ctx, c.K0sVars.KubeletAuthConfigPath),
326326
})
327327

328+
if nodeConfig.Spec.Storage.Type == v1beta1.EtcdStorageType && !nodeConfig.Spec.Storage.Etcd.IsExternalClusterUsed() {
329+
etcdReconciler, err := controller.NewEtcdMemberReconciler(adminClientFactory, c.K0sVars, nodeConfig.Spec.Storage.Etcd, leaderElector)
330+
if err != nil {
331+
return err
332+
}
333+
etcdCRDSaver, err := controller.NewManifestsSaver("etcd-member", c.K0sVars.DataDir)
334+
if err != nil {
335+
return fmt.Errorf("failed to initialize etcd-member manifests saver: %w", err)
336+
}
337+
clusterComponents.Add(ctx, controller.NewCRD(etcdCRDSaver, []string{"etcd"}))
338+
nodeComponents.Add(ctx, etcdReconciler)
339+
}
340+
328341
perfTimer.Checkpoint("starting-certificates-init")
329342
certs := &Certificates{
330343
ClusterSpec: nodeConfig.Spec,

docs/remove_controller.md

+55
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,61 @@ k0s reset
4141
reboot
4242
```
4343

44+
### Declarative Etcd member management
45+
46+
Starting from version 1.30, k0s also supports a declarative way to remove an
47+
etcd member. Since in k0s the etcd cluster is set up so that the etcd API is
48+
**NOT** exposed outside the nodes, it makes it difficult for external automation
49+
like Cluster API, Terraform, etc. to handle controller node replacements.
50+
51+
Each controller manages their own `EtcdMember` object.
52+
53+
```shell
54+
k0s kubectl get etcdmember
55+
NAME PEER ADDRESS MEMBER ID JOINED RECONCILE STATUS
56+
controller0 172.17.0.2 b8e14bda2255bc24 True
57+
controller1 172.17.0.3 cb242476916c8a58 True
58+
controller2 172.17.0.4 9c90504b1bc867bb True
59+
```
60+
61+
By marking an `EtcdMember` object to leave the etcd cluster, k0s will handle the
62+
interaction with etcd. For example, in a 3 controller HA setup, you can
63+
remove a member by flagging it to leave:
64+
65+
```console
66+
$ kubectl patch etcdmember controller2 -p '{"spec":{"leave":true}}' --type merge
67+
etcdmember.etcd.k0sproject.io/controller2 patched
68+
```
69+
70+
The join/leave status is tracked in the object's conditions. This allows you to
71+
wait for the leave to actually happen:
72+
73+
```console
74+
$ kubectl wait etcdmember controller2 --for condition=Joined=False
75+
etcdmember.etcd.k0sproject.io/controller2 condition met
76+
```
77+
78+
You'll see the node left etcd cluster:
79+
80+
```console
81+
$ k0s kc get etcdmember
82+
NAME PEER ADDRESS MEMBER ID JOINED RECONCILE STATUS
83+
controller0 172.17.0.2 b8e14bda2255bc24 True
84+
controller1 172.17.0.3 cb242476916c8a58 True
85+
controller2 172.17.0.4 9c90504b1bc867bb False Success
86+
```
87+
88+
```console
89+
$ k0s etcd member-list
90+
{"members":{"controller0":"https://172.17.0.2:2380","controller1":"https://172.17.0.3:2380"}}
91+
```
92+
93+
The objects for members that have already left the etcd cluster are kept
94+
available for tracking purposes. Once the member has left the cluster, the
95+
object status will reflect that it is safe to remove it.
96+
97+
**Note:** If you re-join same node without removing the corresponding `etcdmember` object the desired state will be updated back to `spec.leave: false` automatically. This is since currently in k0s there's no easy way to prevent a node joining etcd cluster.
98+
4499
## Replace a controller
45100

46101
To replace a controller, you first remove the old controller (like described above) then follow the [manual installation procedure](k0s-multi-node.md) to add the new one.

internal/testutil/kube_client.go

+9
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package testutil
1818

1919
import (
2020
"fmt"
21+
2122
"k8s.io/client-go/rest"
2223

2324
"k8s.io/apimachinery/pkg/runtime"
@@ -32,9 +33,13 @@ import (
3233
restfake "k8s.io/client-go/rest/fake"
3334
kubetesting "k8s.io/client-go/testing"
3435

36+
etcdMemberClient "github.com/k0sproject/k0s/pkg/client/clientset/typed/etcd/v1beta1"
3537
cfgClient "github.com/k0sproject/k0s/pkg/client/clientset/typed/k0s/v1beta1"
38+
kubeutil "github.com/k0sproject/k0s/pkg/kubernetes"
3639
)
3740

41+
var _ kubeutil.ClientFactoryInterface = (*FakeClientFactory)(nil)
42+
3843
// NewFakeClientFactory creates new client factory which uses internally only the kube fake client interface
3944
func NewFakeClientFactory(objects ...runtime.Object) FakeClientFactory {
4045
rawDiscovery := &discoveryfake.FakeDiscovery{Fake: &kubetesting.Fake{}}
@@ -89,3 +94,7 @@ func (f FakeClientFactory) GetRESTClient() (rest.Interface, error) {
8994
func (f FakeClientFactory) GetRESTConfig() *rest.Config {
9095
return &rest.Config{}
9196
}
97+
98+
func (f FakeClientFactory) GetEtcdMemberClient() (etcdMemberClient.EtcdMemberInterface, error) {
99+
return nil, fmt.Errorf("NOT IMPLEMENTED")
100+
}

inttest/Makefile.variables

+1
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ smoketests := \
3434
check-dualstack \
3535
check-dualstack-dynamicconfig \
3636
check-embedded-binaries \
37+
check-etcdmember \
3738
check-externaletcd \
3839
check-extraargs \
3940
check-hacontrolplane \

inttest/common/bootloosesuite.go

+23
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ import (
4343

4444
"github.com/k0sproject/k0s/internal/pkg/file"
4545
apclient "github.com/k0sproject/k0s/pkg/client/clientset"
46+
etcdmemberclient "github.com/k0sproject/k0s/pkg/client/clientset/typed/etcd/v1beta1"
4647
"github.com/k0sproject/k0s/pkg/constant"
4748
"github.com/k0sproject/k0s/pkg/k0scontext"
4849
"github.com/k0sproject/k0s/pkg/kubernetes/watch"
@@ -774,6 +775,18 @@ func (s *BootlooseSuite) StopController(name string) error {
774775
return s.launchDelegate.StopController(s.Context(), ssh)
775776
}
776777

778+
func (s *BootlooseSuite) RestartController(name string) error {
779+
ssh, err := s.SSH(s.Context(), name)
780+
s.Require().NoError(err)
781+
defer ssh.Disconnect()
782+
s.T().Log("killing k0s")
783+
err = s.launchDelegate.StopController(s.Context(), ssh)
784+
if err != nil {
785+
return err
786+
}
787+
return s.launchDelegate.StartController(s.Context(), ssh)
788+
}
789+
777790
func (s *BootlooseSuite) StartController(name string) error {
778791
ssh, err := s.SSH(s.Context(), name)
779792
s.Require().NoError(err)
@@ -896,6 +909,16 @@ func (s *BootlooseSuite) ExtensionsClient(node string, k0sKubeconfigArgs ...stri
896909
return extclient.NewForConfig(cfg)
897910
}
898911

912+
// EtcdMemberClient return a client for accessing etcd member CRDs
913+
func (s *BootlooseSuite) EtcdMemberClient(node string, k0sKubeconfigArgs ...string) (*etcdmemberclient.EtcdV1beta1Client, error) {
914+
cfg, err := s.GetKubeConfig(node, k0sKubeconfigArgs...)
915+
if err != nil {
916+
return nil, err
917+
}
918+
919+
return etcdmemberclient.NewForConfig(cfg)
920+
}
921+
899922
// WaitForNodeReady wait that we see the given node in "Ready" state in kubernetes API
900923
func (s *BootlooseSuite) WaitForNodeReady(name string, kc kubernetes.Interface) error {
901924
s.T().Logf("waiting to see %s ready in kube API", name)

inttest/common/util.go

+10
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,16 @@ func VerifyKubeletMetrics(ctx context.Context, kc *kubernetes.Clientset, node st
334334
})
335335
}
336336

337+
func ResetNode(name string, suite *BootlooseSuite) error {
338+
ssh, err := suite.SSH(suite.Context(), name)
339+
if err != nil {
340+
return err
341+
}
342+
defer ssh.Disconnect()
343+
_, err = ssh.ExecWithOutput(suite.Context(), fmt.Sprintf("%s reset --debug", suite.K0sFullPath))
344+
return err
345+
}
346+
337347
// Retrieves the LogfFn stored in context, falling back to use testing.T's Logf
338348
// if the context has a *testing.T or logrus's Infof as a last resort.
339349
func logfFrom(ctx context.Context) LogfFn {

0 commit comments

Comments
 (0)