Skip to content

Commit bd3ec5d

Browse files
committed
Centralized leaked ENI cleanup- CNINode CRD changes
1 parent f93b912 commit bd3ec5d

File tree

14 files changed

+143
-29
lines changed

14 files changed

+143
-29
lines changed

apis/vpcresources/v1alpha1/cninode_types.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ type Feature struct {
3535
// CNINodeSpec defines the desired state of CNINode
3636
type CNINodeSpec struct {
3737
Features []Feature `json:"features,omitempty"`
38+
// Additional tag key/value added to all network interfaces provisioned by the vpc-resource-controller and VPC-CNI
39+
Tags map[string]string `json:"tags,omitempty"`
3840
}
3941

4042
// CNINodeStatus defines the managed VPC resources.

apis/vpcresources/v1alpha1/zz_generated.deepcopy.go

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/bases/vpcresources.k8s.aws_cninodes.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,12 @@ spec:
6161
type: string
6262
type: object
6363
type: array
64+
tags:
65+
additionalProperties:
66+
type: string
67+
description: Additional tag key/value added to all network interfaces
68+
provisioned by the vpc-resource-controller and VPC-CNI
69+
type: object
6470
type: object
6571
status:
6672
description: CNINodeStatus defines the managed VPC resources.

controllers/core/configmap_controller.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ import (
2727

2828
"github.com/go-logr/logr"
2929
corev1 "k8s.io/api/core/v1"
30-
v1 "k8s.io/api/core/v1"
3130
"k8s.io/apimachinery/pkg/api/errors"
3231
"k8s.io/apimachinery/pkg/runtime"
3332
ctrl "sigs.k8s.io/controller-runtime"
@@ -86,7 +85,7 @@ func (r *ConfigMapReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
8685
r.K8sAPI,
8786
utils.BranchENICoolDownUpdateReason,
8887
fmt.Sprintf("Branch ENI cool down period has been updated to %s", cooldown.GetCoolDown().GetCoolDownPeriod()),
89-
v1.EventTypeNormal,
88+
corev1.EventTypeNormal,
9089
r.Log,
9190
)
9291
}

controllers/core/configmap_controller_test.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ import (
2222
"github.com/golang/mock/gomock"
2323
"github.com/stretchr/testify/assert"
2424
corev1 "k8s.io/api/core/v1"
25-
v1 "k8s.io/api/core/v1"
2625
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2726
"k8s.io/apimachinery/pkg/runtime"
2827
"k8s.io/apimachinery/pkg/types"
@@ -225,8 +224,8 @@ func Test_Reconcile_UpdateNode_Error(t *testing.T) {
225224

226225
}
227226

228-
func createCoolDownMockCM(cooldownTime string) *v1.ConfigMap {
229-
return &v1.ConfigMap{
227+
func createCoolDownMockCM(cooldownTime string) *corev1.ConfigMap {
228+
return &corev1.ConfigMap{
230229
ObjectMeta: metav1.ObjectMeta{
231230
Name: config.VpcCniConfigMapName,
232231
Namespace: config.KubeSystemNamespace,

controllers/core/pod_controller.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ func (r *PodReconciler) Reconcile(request custom.Request) (ctrl.Result, error) {
153153
} else {
154154
result, err = resourceHandler.HandleCreate(int(totalCount), pod)
155155
}
156-
if err != nil || result.Requeue == true {
156+
if err != nil || result.Requeue {
157157
return result, err
158158
}
159159
logger.V(1).Info("handled resource without error",

main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,7 @@ func main() {
341341
nodeManagerWorkers := asyncWorkers.NewDefaultWorkerPool("node async workers",
342342
nodeWorkerCount, 1, ctrl.Log.WithName("node async workers"), ctx)
343343
nodeManager, err := manager.NewNodeManager(ctrl.Log.WithName("node manager"), resourceManager,
344-
apiWrapper, nodeManagerWorkers, controllerConditions, version.GitVersion, healthzHandler)
344+
apiWrapper, nodeManagerWorkers, controllerConditions, clusterName, version.GitVersion, healthzHandler)
345345

346346
if err != nil {
347347
ctrl.Log.Error(err, "failed to init node manager")

mocks/amazon-vcp-resource-controller-k8s/pkg/k8s/mock_k8swrapper.go

Lines changed: 4 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/config/type.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ const (
5151
OSWindows = "windows"
5252
// OSLinux is the the linux Operating System
5353
OSLinux = "linux"
54+
// Node termination finalizer on CNINode CRD
55+
NodeTerminationFinalizer = "networking.k8s.aws/resource-cleanup"
5456
)
5557

5658
// EC2 Tags
@@ -65,6 +67,7 @@ const (
6567
NetworkInterfaceOwnerTagKey = "eks:eni:owner"
6668
NetworkInterfaceOwnerTagValue = "eks-vpc-resource-controller"
6769
NetworkInterfaceOwnerVPCCNITagValue = "amazon-vpc-cni"
70+
CNINodeClusterNameKey = "cluster.k8s.amazonaws.com/name"
6871
)
6972

7073
const (

pkg/k8s/finalizer.go

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License"). You may
4+
// not use this file except in compliance with the License. A copy of the
5+
// License is located at
6+
//
7+
// http://aws.amazon.com/apache2.0/
8+
//
9+
// or in the "license" file accompanying this file. This file is distributed
10+
// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
11+
// express or implied. See the License for the specific language governing
12+
// permissions and limitations under the License.
13+
14+
package k8s
15+
16+
import (
17+
"context"
18+
19+
"github.com/go-logr/logr"
20+
"sigs.k8s.io/controller-runtime/pkg/client"
21+
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
22+
)
23+
24+
type FinalizerManager interface {
25+
AddFinalizers(ctx context.Context, object client.Object, finalizers ...string) error
26+
RemoveFinalizers(ctx context.Context, object client.Object, finalizers ...string) error
27+
}
28+
29+
func NewDefaultFinalizerManager(k8sClient client.Client, log logr.Logger) FinalizerManager {
30+
return &defaultFinalizerManager{
31+
k8sClient: k8sClient,
32+
log: log,
33+
}
34+
}
35+
36+
type defaultFinalizerManager struct {
37+
k8sClient client.Client
38+
log logr.Logger
39+
}
40+
41+
func (m *defaultFinalizerManager) AddFinalizers(ctx context.Context, obj client.Object, finalizers ...string) error {
42+
if err := m.k8sClient.Get(ctx, client.ObjectKeyFromObject(obj), obj); err != nil {
43+
return err
44+
}
45+
46+
oldObj := obj.DeepCopyObject().(client.Object)
47+
needsUpdate := false
48+
for _, finalizer := range finalizers {
49+
if !controllerutil.ContainsFinalizer(obj, finalizer) {
50+
m.log.Info("adding finalizer", "object", obj.GetObjectKind().GroupVersionKind().Kind, "name", obj.GetName(), "finalizer", finalizer)
51+
controllerutil.AddFinalizer(obj, finalizer)
52+
needsUpdate = true
53+
}
54+
}
55+
if !needsUpdate {
56+
return nil
57+
}
58+
return m.k8sClient.Patch(ctx, obj, client.MergeFromWithOptions(oldObj, client.MergeFromWithOptimisticLock{}))
59+
}
60+
61+
func (m *defaultFinalizerManager) RemoveFinalizers(ctx context.Context, obj client.Object, finalizers ...string) error {
62+
if err := m.k8sClient.Get(ctx, client.ObjectKeyFromObject(obj), obj); err != nil {
63+
return err
64+
}
65+
66+
oldObj := obj.DeepCopyObject().(client.Object)
67+
needsUpdate := false
68+
for _, finalizer := range finalizers {
69+
if controllerutil.ContainsFinalizer(obj, finalizer) {
70+
m.log.Info("removing finalizer", "object", obj.GetObjectKind().GroupVersionKind().Kind, "name", obj.GetName(), "finalizer", finalizer)
71+
controllerutil.RemoveFinalizer(obj, finalizer)
72+
needsUpdate = true
73+
}
74+
}
75+
if !needsUpdate {
76+
return nil
77+
}
78+
return m.k8sClient.Patch(ctx, obj, client.MergeFromWithOptions(oldObj, client.MergeFromWithOptimisticLock{}))
79+
}

pkg/k8s/wrapper.go

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ package k8s
1515

1616
import (
1717
"context"
18+
"fmt"
1819
"strconv"
1920

2021
"github.com/aws/amazon-vpc-cni-k8s/pkg/apis/crd/v1alpha1"
@@ -80,7 +81,7 @@ type K8sWrapper interface {
8081
AddLabelToManageNode(node *v1.Node, labelKey string, labelValue string) (bool, error)
8182
ListEvents(ops []client.ListOption) (*eventsv1.EventList, error)
8283
GetCNINode(namespacedName types.NamespacedName) (*rcv1alpha1.CNINode, error)
83-
CreateCNINode(node *v1.Node) error
84+
CreateCNINode(node *v1.Node, clusterName string) error
8485
}
8586

8687
// k8sWrapper is the wrapper object with the client
@@ -233,7 +234,7 @@ func (k *k8sWrapper) GetCNINode(namespacedName types.NamespacedName) (*rcv1alpha
233234
return cninode, nil
234235
}
235236

236-
func (k *k8sWrapper) CreateCNINode(node *v1.Node) error {
237+
func (k *k8sWrapper) CreateCNINode(node *v1.Node, clusterName string) error {
237238
cniNode := &rcv1alpha1.CNINode{
238239
ObjectMeta: metav1.ObjectMeta{
239240
Name: node.Name,
@@ -248,6 +249,16 @@ func (k *k8sWrapper) CreateCNINode(node *v1.Node) error {
248249
Controller: lo.ToPtr(true),
249250
},
250251
},
252+
Labels: map[string]string{
253+
// OS is a standard label & is set by Kubernetes, so we can skip checking if it is set
254+
config.NodeLabelOS: node.ObjectMeta.Labels[config.NodeLabelOS],
255+
},
256+
Finalizers: []string{config.NodeTerminationFinalizer}, // finalizer to clean up leaked ENIs at node termination
257+
},
258+
Spec: rcv1alpha1.CNINodeSpec{
259+
Tags: map[string]string{
260+
fmt.Sprintf(config.CNINodeClusterNameKey): clusterName,
261+
},
251262
},
252263
}
253264

pkg/k8s/wrapper_test.go

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ import (
3737

3838
var (
3939
nodeName = "node-name"
40+
mockClusterName = "cluster-name"
4041
mockResourceName = config.ResourceNamePodENI
4142

4243
existingResource = "extended-resource"
@@ -45,6 +46,9 @@ var (
4546
TypeMeta: metav1.TypeMeta{},
4647
ObjectMeta: metav1.ObjectMeta{
4748
Name: nodeName,
49+
Labels: map[string]string{
50+
config.NodeLabelOS: config.OSLinux,
51+
},
4852
},
4953
Spec: v1.NodeSpec{},
5054
Status: v1.NodeStatus{
@@ -196,20 +200,20 @@ func TestK8sWrapper_CreateCNINodeWithExistedObject_NoError(t *testing.T) {
196200
ctrl := gomock.NewController(t)
197201
wrapper, _, _ := getMockK8sWrapperWithClient(ctrl, []runtime.Object{mockCNINode})
198202

199-
err := wrapper.CreateCNINode(mockNode)
203+
err := wrapper.CreateCNINode(mockNode, mockClusterName)
200204
assert.NoError(t, err)
201205
cniNode, err := wrapper.GetCNINode(types.NamespacedName{Name: mockNode.Name})
202206
assert.NoError(t, err)
203207
assert.Equal(t, mockNode.Name, cniNode.Name)
204-
err = wrapper.CreateCNINode(mockNode)
208+
err = wrapper.CreateCNINode(mockNode, mockClusterName)
205209
assert.NoError(t, err)
206210
}
207211

208212
func TestK8sWrapper_CreateCNINode_NoError(t *testing.T) {
209213
ctrl := gomock.NewController(t)
210214
wrapper, _, _ := getMockK8sWrapperWithClient(ctrl, []runtime.Object{mockCNINode})
211215

212-
err := wrapper.CreateCNINode(mockNode)
216+
err := wrapper.CreateCNINode(mockNode, mockClusterName)
213217
assert.NoError(t, err)
214218
cniNode, err := wrapper.GetCNINode(types.NamespacedName{Name: mockNode.Name})
215219
assert.NoError(t, err)

pkg/node/manager/manager.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ type manager struct {
5858
conditions condition.Conditions
5959
controllerVersion string
6060
stopHealthCheckAt time.Time
61+
clusterName string
6162
}
6263

6364
// Manager to perform operation on list of managed/un-managed node
@@ -102,7 +103,7 @@ const pausingHealthCheckDuration = 10 * time.Minute
102103

103104
// NewNodeManager returns a new node manager
104105
func NewNodeManager(logger logr.Logger, resourceManager resource.ResourceManager,
105-
wrapper api.Wrapper, worker asyncWorker.Worker, conditions condition.Conditions, controllerVersion string, healthzHandler *rcHealthz.HealthzHandler) (Manager, error) {
106+
wrapper api.Wrapper, worker asyncWorker.Worker, conditions condition.Conditions, clusterName string, controllerVersion string, healthzHandler *rcHealthz.HealthzHandler) (Manager, error) {
106107

107108
manager := &manager{
108109
resourceManager: resourceManager,
@@ -112,6 +113,7 @@ func NewNodeManager(logger logr.Logger, resourceManager resource.ResourceManager
112113
worker: worker,
113114
conditions: conditions,
114115
controllerVersion: controllerVersion,
116+
clusterName: clusterName,
115117
}
116118

117119
// add health check on subpath for node manager
@@ -228,7 +230,7 @@ func (m *manager) CreateCNINodeIfNotExisting(node *v1.Node) error {
228230
); err != nil {
229231
if apierrors.IsNotFound(err) {
230232
m.Log.Info("Will create a new CNINode", "CNINodeName", node.Name)
231-
return m.wrapper.K8sAPI.CreateCNINode(node)
233+
return m.wrapper.K8sAPI.CreateCNINode(node, m.clusterName)
232234
}
233235
return err
234236
} else {
@@ -459,7 +461,7 @@ func (m *manager) performAsyncOperation(job interface{}) (ctrl.Result, error) {
459461
log.V(1).Info("successfully performed node operation")
460462
return ctrl.Result{}, nil
461463
}
462-
log.Error(err, "failed to performed node operation")
464+
log.Error(err, "failed to perform node operation")
463465

464466
return ctrl.Result{}, nil
465467
}

0 commit comments

Comments
 (0)