Skip to content

Commit 7d82a13

Browse files
yash97sushrk
authored andcommitted
Merging Eni cleanup branch to master (#535)
* Centralized leaked ENI cleanup- CNINode CRD changes * Centralized leaked ENI cleanup- refactor periodic cleanup & add node termination cleaner (#505) * Centralized leaked ENI cleanup- refactor periodic cleanup & add node termination cleaner * defer updating metrics * WIP commit no.3 * instance id as tag, not requeuing on node cleanup failure * not requeing on node termination cleaner failure * minor log change * adding node termination failure metric * minor test change * using paginated calls to describe network interface while eni cleanup * refactor based on comments --------- Co-authored-by: Yash Thakkar <[email protected]> --------- Co-authored-by: sushrk <[email protected]> Co-authored-by: Sushmitha Ravikumar <[email protected]>
1 parent df7ed4d commit 7d82a13

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+2102
-710
lines changed

PROJECT

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ resources:
1919
version: v1beta1
2020
- api:
2121
crdVersion: v1
22+
controller: true
2223
domain: k8s.aws
2324
group: vpcresources
2425
kind: CNINode

apis/vpcresources/v1alpha1/cninode_types.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ type Feature struct {
3535
// CNINodeSpec defines the desired state of CNINode
3636
type CNINodeSpec struct {
3737
Features []Feature `json:"features,omitempty"`
38+
// Additional tag key/value added to all network interfaces provisioned by the vpc-resource-controller and VPC-CNI
39+
Tags map[string]string `json:"tags,omitempty"`
3840
}
3941

4042
// CNINodeStatus defines the managed VPC resources.

apis/vpcresources/v1alpha1/zz_generated.deepcopy.go

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/bases/vpcresources.k8s.aws_cninodes.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,12 @@ spec:
6161
type: string
6262
type: object
6363
type: array
64+
tags:
65+
additionalProperties:
66+
type: string
67+
description: Additional tag key/value added to all network interfaces
68+
provisioned by the vpc-resource-controller and VPC-CNI
69+
type: object
6470
type: object
6571
status:
6672
description: CNINodeStatus defines the managed VPC resources.

config/rbac/role.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ rules:
6060
- create
6161
- get
6262
- list
63+
- patch
64+
- update
6365
- watch
6466
- apiGroups:
6567
- vpcresources.k8s.aws

controllers/core/configmap_controller.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ import (
2727

2828
"github.com/go-logr/logr"
2929
corev1 "k8s.io/api/core/v1"
30-
v1 "k8s.io/api/core/v1"
3130
"k8s.io/apimachinery/pkg/api/errors"
3231
"k8s.io/apimachinery/pkg/runtime"
3332
ctrl "sigs.k8s.io/controller-runtime"
@@ -86,7 +85,7 @@ func (r *ConfigMapReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
8685
r.K8sAPI,
8786
utils.BranchENICoolDownUpdateReason,
8887
fmt.Sprintf("Branch ENI cool down period has been updated to %s", cooldown.GetCoolDown().GetCoolDownPeriod()),
89-
v1.EventTypeNormal,
88+
corev1.EventTypeNormal,
9089
r.Log,
9190
)
9291
}

controllers/core/configmap_controller_test.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ import (
2222
"github.com/golang/mock/gomock"
2323
"github.com/stretchr/testify/assert"
2424
corev1 "k8s.io/api/core/v1"
25-
v1 "k8s.io/api/core/v1"
2625
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2726
"k8s.io/apimachinery/pkg/runtime"
2827
"k8s.io/apimachinery/pkg/types"
@@ -225,8 +224,8 @@ func Test_Reconcile_UpdateNode_Error(t *testing.T) {
225224

226225
}
227226

228-
func createCoolDownMockCM(cooldownTime string) *v1.ConfigMap {
229-
return &v1.ConfigMap{
227+
func createCoolDownMockCM(cooldownTime string) *corev1.ConfigMap {
228+
return &corev1.ConfigMap{
230229
ObjectMeta: metav1.ObjectMeta{
231230
Name: config.VpcCniConfigMapName,
232231
Namespace: config.KubeSystemNamespace,

controllers/core/node_controller.go

Lines changed: 0 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ package controllers
1515

1616
import (
1717
"context"
18-
"fmt"
1918
"net/http"
2019
"time"
2120

@@ -25,8 +24,6 @@ import (
2524
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/k8s"
2625
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/node/manager"
2726
"github.com/google/uuid"
28-
"github.com/prometheus/client_golang/prometheus"
29-
"sigs.k8s.io/controller-runtime/pkg/metrics"
3027

3128
"github.com/go-logr/logr"
3229
corev1 "k8s.io/api/core/v1"
@@ -36,21 +33,9 @@ import (
3633
ctrl "sigs.k8s.io/controller-runtime"
3734
"sigs.k8s.io/controller-runtime/pkg/client"
3835
"sigs.k8s.io/controller-runtime/pkg/controller"
39-
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
4036
"sigs.k8s.io/controller-runtime/pkg/healthz"
4137
)
4238

43-
var (
44-
leakedCNINodeResourceCount = prometheus.NewCounter(
45-
prometheus.CounterOpts{
46-
Name: "orphaned_cninode_objects",
47-
Help: "The number of CNINode objects that do not have a parent Node object (likely indicating a leak from a deleted node)",
48-
},
49-
)
50-
51-
prometheusRegistered = false
52-
)
53-
5439
// MaxNodeConcurrentReconciles is the number of go routines that can invoke
5540
// Reconcile in parallel. Since Node Reconciler, performs local operation
5641
// on cache only a single go routine should be sufficient. Using more than
@@ -93,23 +78,6 @@ func (r *NodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.
9378

9479
if nodeErr := r.Client.Get(ctx, req.NamespacedName, node); nodeErr != nil {
9580
if errors.IsNotFound(nodeErr) {
96-
// clean up CNINode finalizer
97-
cniNode := &v1alpha1.CNINode{}
98-
if cninodeErr := r.Client.Get(ctx, req.NamespacedName, cniNode); cninodeErr == nil {
99-
if yes := controllerutil.ContainsFinalizer(cniNode, NodeTerminationFinalizer); yes {
100-
updated := cniNode.DeepCopy()
101-
if yes = controllerutil.RemoveFinalizer(updated, NodeTerminationFinalizer); yes {
102-
if err := r.Client.Patch(ctx, updated, client.MergeFrom(cniNode)); err != nil {
103-
return ctrl.Result{}, err
104-
}
105-
r.Log.Info("removed leaked CNINode resource's finalizer", "cninode", cniNode.Name)
106-
}
107-
leakedCNINodeResourceCount.Inc()
108-
}
109-
} else if !errors.IsNotFound(cninodeErr) {
110-
return ctrl.Result{}, fmt.Errorf("failed getting CNINode %s from cached client, %w", cniNode.Name, cninodeErr)
111-
}
112-
11381
// clean up local cached nodes
11482
_, found := r.Manager.GetNode(req.Name)
11583
if found {
@@ -148,8 +116,6 @@ func (r *NodeReconciler) SetupWithManager(mgr ctrl.Manager, maxConcurrentReconci
148116
map[string]healthz.Checker{"health-node-controller": r.Check()},
149117
)
150118

151-
prometheusRegister()
152-
153119
return ctrl.NewControllerManagedBy(mgr).
154120
For(&corev1.Node{}).
155121
WithOptions(controller.Options{MaxConcurrentReconciles: maxConcurrentReconciles}).
@@ -193,11 +159,3 @@ func (r *NodeReconciler) Check() healthz.Checker {
193159
return err
194160
}
195161
}
196-
197-
func prometheusRegister() {
198-
if !prometheusRegistered {
199-
metrics.Registry.MustRegister(leakedCNINodeResourceCount)
200-
201-
prometheusRegistered = true
202-
}
203-
}

controllers/core/node_controller_test.go

Lines changed: 0 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ import (
3232
"k8s.io/apimachinery/pkg/types"
3333
"sigs.k8s.io/controller-runtime/pkg/client"
3434
fakeClient "sigs.k8s.io/controller-runtime/pkg/client/fake"
35-
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
3635
"sigs.k8s.io/controller-runtime/pkg/log/zap"
3736
"sigs.k8s.io/controller-runtime/pkg/reconcile"
3837
)
@@ -143,43 +142,6 @@ func TestNodeReconciler_Reconcile_DeleteNonExistentNode(t *testing.T) {
143142
assert.Equal(t, res, reconcile.Result{})
144143
}
145144

146-
func TestNodeReconciler_Reconcile_DeleteNonExistentNodesCNINode(t *testing.T) {
147-
ctrl := gomock.NewController(t)
148-
defer ctrl.Finish()
149-
150-
mock := NewNodeMock(ctrl)
151-
cniNode := &v1alpha1.CNINode{
152-
ObjectMeta: v1.ObjectMeta{
153-
Name: mockNodeName,
154-
Finalizers: []string{NodeTerminationFinalizer},
155-
},
156-
}
157-
mock.Reconciler.Client = fakeClient.NewClientBuilder().WithScheme(mock.Reconciler.Scheme).WithObjects(cniNode).Build()
158-
159-
mock.Conditions.EXPECT().GetPodDataStoreSyncStatus().Return(true)
160-
mock.Manager.EXPECT().GetNode(mockNodeName).Return(mock.MockNode, false)
161-
162-
original := &v1alpha1.CNINode{}
163-
err := mock.Reconciler.Client.Get(context.TODO(), types.NamespacedName{Name: cniNode.Name}, original)
164-
assert.NoError(t, err)
165-
assert.True(t, controllerutil.ContainsFinalizer(original, NodeTerminationFinalizer), "the CNINode has finalizer")
166-
167-
res, err := mock.Reconciler.Reconcile(context.TODO(), reconcileRequest)
168-
assert.NoError(t, err)
169-
assert.Equal(t, res, reconcile.Result{})
170-
171-
node := &corev1.Node{}
172-
updated := &v1alpha1.CNINode{}
173-
err = mock.Reconciler.Client.Get(context.TODO(), types.NamespacedName{Name: cniNode.Name}, node)
174-
assert.Error(t, err, "the node shouldn't existing")
175-
assert.True(t, errors.IsNotFound(err))
176-
177-
err = mock.Reconciler.Client.Get(context.TODO(), types.NamespacedName{Name: cniNode.Name}, updated)
178-
assert.NoError(t, err)
179-
assert.True(t, updated.Name == mockNodeName, "the CNINode should existing and waiting for finalizer removal")
180-
assert.False(t, controllerutil.ContainsFinalizer(updated, NodeTerminationFinalizer), "CNINode finalizer should be removed when the node is gone")
181-
}
182-
183145
func TestNodeReconciler_Reconcile_DeleteNonExistentUnmanagedNode(t *testing.T) {
184146
ctrl := gomock.NewController(t)
185147
defer ctrl.Finish()

controllers/core/pod_controller.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ func (r *PodReconciler) Reconcile(request custom.Request) (ctrl.Result, error) {
153153
} else {
154154
result, err = resourceHandler.HandleCreate(int(totalCount), pod)
155155
}
156-
if err != nil || result.Requeue == true {
156+
if err != nil || result.Requeue {
157157
return result, err
158158
}
159159
logger.V(1).Info("handled resource without error",

0 commit comments

Comments
 (0)