@@ -25,6 +25,7 @@ import (
25
25
"slices"
26
26
"strconv"
27
27
"strings"
28
+ "sync"
28
29
"time"
29
30
30
31
"github.com/aenix-io/etcd-operator/internal/log"
@@ -47,6 +48,10 @@ import (
47
48
clientv3 "go.etcd.io/etcd/client/v3"
48
49
)
49
50
51
+ const (
52
+ etcdDefaultTimeout = 5 * time .Second
53
+ )
54
+
50
55
// EtcdClusterReconciler reconciles a EtcdCluster object
51
56
type EtcdClusterReconciler struct {
52
57
client.Client
@@ -56,6 +61,7 @@ type EtcdClusterReconciler struct {
56
61
// +kubebuilder:rbac:groups=etcd.aenix.io,resources=etcdclusters,verbs=get;list;watch;create;update;patch;delete
57
62
// +kubebuilder:rbac:groups=etcd.aenix.io,resources=etcdclusters/status,verbs=get;update;patch
58
63
// +kubebuilder:rbac:groups=etcd.aenix.io,resources=etcdclusters/finalizers,verbs=update
64
+ // +kubebuilder:rbac:groups="",resources=endpoints,verbs=get;list;watch
59
65
// +kubebuilder:rbac:groups="",resources=configmaps,verbs=get;list;watch;create;update;watch;delete;patch
60
66
// +kubebuilder:rbac:groups="",resources=services,verbs=get;create;delete;update;patch;list;watch
61
67
// +kubebuilder:rbac:groups="",resources=secrets,verbs=view;list;watch
@@ -80,13 +86,68 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request)
80
86
return reconcile.Result {}, nil
81
87
}
82
88
89
+ state := observables {}
90
+
91
+ // create two services and the pdb
92
+ err = r .ensureUnconditionalObjects (ctx , instance )
93
+ if err != nil {
94
+ return ctrl.Result {}, err
95
+ }
96
+
97
+ // fetch STS if exists
98
+ err = r .Get (ctx , req .NamespacedName , & state .statefulSet )
99
+ if client .IgnoreNotFound (err ) != nil {
100
+ return ctrl.Result {}, fmt .Errorf ("couldn't get statefulset: %w" , err )
101
+ }
102
+ state .stsExists = state .statefulSet .UID != ""
103
+
104
+ // fetch endpoints
105
+ clusterClient , singleClients , err := factory .NewEtcdClientSet (ctx , instance , r .Client )
106
+ if err != nil {
107
+ return ctrl.Result {}, err
108
+ }
109
+ state .endpointsFound = clusterClient != nil && singleClients != nil
110
+
111
+ if ! state .endpointsFound {
112
+ if ! state .stsExists {
113
+ // TODO: happy path for new cluster creation
114
+ log .Debug (ctx , "happy path for new cluster creation (not yet implemented)" )
115
+ }
116
+ }
117
+
118
+ // get status of every endpoint and member list from every endpoint
119
+ state .etcdStatuses = make ([]etcdStatus , len (singleClients ))
120
+ {
121
+ var wg sync.WaitGroup
122
+ ctx , cancel := context .WithTimeout (ctx , etcdDefaultTimeout )
123
+ for i := range singleClients {
124
+ wg .Add (1 )
125
+ go func (i int ) {
126
+ defer wg .Done ()
127
+ state .etcdStatuses [i ].fill (ctx , singleClients [i ])
128
+ }(i )
129
+ }
130
+ wg .Wait ()
131
+ cancel ()
132
+ }
133
+ state .setClusterID ()
134
+ if state .inSplitbrain () {
135
+ log .Error (ctx , fmt .Errorf ("etcd cluster in splitbrain" ), "etcd cluster in splitbrain, dropping from reconciliation queue" )
136
+ factory .SetCondition (instance , factory .NewCondition (etcdaenixiov1alpha1 .EtcdConditionError ).
137
+ WithStatus (true ).
138
+ WithReason (string (etcdaenixiov1alpha1 .EtcdCondTypeSplitbrain )).
139
+ WithMessage (string (etcdaenixiov1alpha1 .EtcdErrorCondSplitbrainMessage )).
140
+ Complete (),
141
+ )
142
+ return r .updateStatus (ctx , instance )
143
+ }
83
144
// fill conditions
84
145
if len (instance .Status .Conditions ) == 0 {
85
146
factory .FillConditions (instance )
86
147
}
87
148
88
149
// ensure managed resources
89
- if err = r .ensureClusterObjects (ctx , instance ); err != nil {
150
+ if err = r .ensureConditionalClusterObjects (ctx , instance ); err != nil {
90
151
return r .updateStatusOnErr (ctx , instance , fmt .Errorf ("cannot create Cluster auxiliary objects: %w" , err ))
91
152
}
92
153
@@ -138,8 +199,8 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request)
138
199
return r .updateStatus (ctx , instance )
139
200
}
140
201
141
- // ensureClusterObjects creates or updates all objects owned by cluster CR
142
- func (r * EtcdClusterReconciler ) ensureClusterObjects (
202
+ // ensureConditionalClusterObjects creates or updates all objects owned by cluster CR
203
+ func (r * EtcdClusterReconciler ) ensureConditionalClusterObjects (
143
204
ctx context.Context , cluster * etcdaenixiov1alpha1.EtcdCluster ) error {
144
205
145
206
if err := factory .CreateOrUpdateClusterStateConfigMap (ctx , cluster , r .Client ); err != nil {
@@ -148,30 +209,12 @@ func (r *EtcdClusterReconciler) ensureClusterObjects(
148
209
}
149
210
log .Debug (ctx , "cluster state configmap reconciled" )
150
211
151
- if err := factory .CreateOrUpdateHeadlessService (ctx , cluster , r .Client ); err != nil {
152
- log .Error (ctx , err , "reconcile headless service failed" )
153
- return err
154
- }
155
- log .Debug (ctx , "headless service reconciled" )
156
-
157
212
if err := factory .CreateOrUpdateStatefulSet (ctx , cluster , r .Client ); err != nil {
158
213
log .Error (ctx , err , "reconcile statefulset failed" )
159
214
return err
160
215
}
161
216
log .Debug (ctx , "statefulset reconciled" )
162
217
163
- if err := factory .CreateOrUpdateClientService (ctx , cluster , r .Client ); err != nil {
164
- log .Error (ctx , err , "reconcile client service failed" )
165
- return err
166
- }
167
- log .Debug (ctx , "client service reconciled" )
168
-
169
- if err := factory .CreateOrUpdatePdb (ctx , cluster , r .Client ); err != nil {
170
- log .Error (ctx , err , "reconcile pdb failed" )
171
- return err
172
- }
173
- log .Debug (ctx , "pdb reconciled" )
174
-
175
218
return nil
176
219
}
177
220
@@ -498,3 +541,57 @@ func (r *EtcdClusterReconciler) disableAuth(ctx context.Context, authClient clie
498
541
499
542
return nil
500
543
}
544
+
545
+ // ensureUnconditionalObjects creates the two services and the PDB
546
+ // which can be created at the start of the reconciliation loop
547
+ // without any risk of disrupting the etcd cluster
548
+ func (r * EtcdClusterReconciler ) ensureUnconditionalObjects (ctx context.Context , instance * etcdaenixiov1alpha1.EtcdCluster ) error {
549
+ const concurrentOperations = 3
550
+ c := make (chan error )
551
+ defer close (c )
552
+ ctx , cancel := context .WithCancel (ctx )
553
+ defer cancel ()
554
+ var wg sync.WaitGroup
555
+ wg .Add (concurrentOperations )
556
+ wrapWithMsg := func (err error , msg string ) error {
557
+ if err != nil {
558
+ return fmt .Errorf (msg + ": %w" , err )
559
+ }
560
+ return nil
561
+ }
562
+ go func (chan <- error ) {
563
+ defer wg .Done ()
564
+ select {
565
+ case <- ctx .Done ():
566
+ case c <- wrapWithMsg (factory .CreateOrUpdateClientService (ctx , instance , r .Client ),
567
+ "couldn't ensure client service" ):
568
+ }
569
+ }(c )
570
+ go func (chan <- error ) {
571
+ defer wg .Done ()
572
+ select {
573
+ case <- ctx .Done ():
574
+ case c <- wrapWithMsg (factory .CreateOrUpdateHeadlessService (ctx , instance , r .Client ),
575
+ "couldn't ensure headless service" ):
576
+ }
577
+ }(c )
578
+ go func (chan <- error ) {
579
+ defer wg .Done ()
580
+ select {
581
+ case <- ctx .Done ():
582
+ case c <- wrapWithMsg (factory .CreateOrUpdatePdb (ctx , instance , r .Client ),
583
+ "couldn't ensure pod disruption budget" ):
584
+ }
585
+ }(c )
586
+
587
+ for i := 0 ; i < concurrentOperations ; i ++ {
588
+ if err := <- c ; err != nil {
589
+ cancel ()
590
+
591
+ // let all goroutines select the ctx.Done() case to avoid races on closed channels
592
+ wg .Wait ()
593
+ return err
594
+ }
595
+ }
596
+ return nil
597
+ }
0 commit comments