diff --git a/PROJECT b/PROJECT index b9b09ae0..22571eb8 100644 --- a/PROJECT +++ b/PROJECT @@ -19,6 +19,7 @@ resources: version: v1beta1 - api: crdVersion: v1 + controller: true domain: k8s.aws group: vpcresources kind: CNINode diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 70d80de7..55ada365 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -60,6 +60,8 @@ rules: - create - get - list + - patch + - update - watch - apiGroups: - vpcresources.k8s.aws diff --git a/controllers/core/node_controller.go b/controllers/core/node_controller.go index a794bef7..241fc0cf 100644 --- a/controllers/core/node_controller.go +++ b/controllers/core/node_controller.go @@ -15,7 +15,6 @@ package controllers import ( "context" - "fmt" "net/http" "time" @@ -25,8 +24,6 @@ import ( "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/k8s" "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/node/manager" "github.com/google/uuid" - "github.com/prometheus/client_golang/prometheus" - "sigs.k8s.io/controller-runtime/pkg/metrics" "github.com/go-logr/logr" corev1 "k8s.io/api/core/v1" @@ -36,21 +33,9 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller" - "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/healthz" ) -var ( - leakedCNINodeResourceCount = prometheus.NewCounter( - prometheus.CounterOpts{ - Name: "orphaned_cninode_objects", - Help: "The number of leaked cninode resources", - }, - ) - - prometheusRegistered = false -) - // MaxNodeConcurrentReconciles is the number of go routines that can invoke // Reconcile in parallel. Since Node Reconciler, performs local operation // on cache only a single go routine should be sufficient. Using more than @@ -93,23 +78,6 @@ func (r *NodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl. if nodeErr := r.Client.Get(ctx, req.NamespacedName, node); nodeErr != nil { if errors.IsNotFound(nodeErr) { - // clean up CNINode finalizer - cniNode := &v1alpha1.CNINode{} - if cninodeErr := r.Client.Get(ctx, req.NamespacedName, cniNode); cninodeErr == nil { - if yes := controllerutil.ContainsFinalizer(cniNode, NodeTerminationFinalizer); yes { - updated := cniNode.DeepCopy() - if yes = controllerutil.RemoveFinalizer(updated, NodeTerminationFinalizer); yes { - if err := r.Client.Patch(ctx, updated, client.MergeFrom(cniNode)); err != nil { - return ctrl.Result{}, err - } - r.Log.Info("removed leaked CNINode resource's finalizer", "cninode", cniNode.Name) - } - leakedCNINodeResourceCount.Inc() - } - } else if !errors.IsNotFound(cninodeErr) { - return ctrl.Result{}, fmt.Errorf("failed getting CNINode %s from cached client, %w", cniNode.Name, cninodeErr) - } - // clean up local cached nodes _, found := r.Manager.GetNode(req.Name) if found { @@ -148,8 +116,6 @@ func (r *NodeReconciler) SetupWithManager(mgr ctrl.Manager, maxConcurrentReconci map[string]healthz.Checker{"health-node-controller": r.Check()}, ) - prometheusRegister() - return ctrl.NewControllerManagedBy(mgr). For(&corev1.Node{}). WithOptions(controller.Options{MaxConcurrentReconciles: maxConcurrentReconciles}). @@ -193,11 +159,3 @@ func (r *NodeReconciler) Check() healthz.Checker { return err } } - -func prometheusRegister() { - if !prometheusRegistered { - metrics.Registry.MustRegister(leakedCNINodeResourceCount) - - prometheusRegistered = true - } -} diff --git a/controllers/core/node_controller_test.go b/controllers/core/node_controller_test.go index 311a35b6..128eeb44 100644 --- a/controllers/core/node_controller_test.go +++ b/controllers/core/node_controller_test.go @@ -26,13 +26,11 @@ import ( "github.com/golang/mock/gomock" "github.com/stretchr/testify/assert" corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" fakeClient "sigs.k8s.io/controller-runtime/pkg/client/fake" - "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/log/zap" "sigs.k8s.io/controller-runtime/pkg/reconcile" ) @@ -143,43 +141,6 @@ func TestNodeReconciler_Reconcile_DeleteNonExistentNode(t *testing.T) { assert.Equal(t, res, reconcile.Result{}) } -func TestNodeReconciler_Reconcile_DeleteNonExistentNodesCNINode(t *testing.T) { - ctrl := gomock.NewController(t) - defer ctrl.Finish() - - mock := NewNodeMock(ctrl) - cniNode := &v1alpha1.CNINode{ - ObjectMeta: v1.ObjectMeta{ - Name: mockNodeName, - Finalizers: []string{NodeTerminationFinalizer}, - }, - } - mock.Reconciler.Client = fakeClient.NewClientBuilder().WithScheme(mock.Reconciler.Scheme).WithObjects(cniNode).Build() - - mock.Conditions.EXPECT().GetPodDataStoreSyncStatus().Return(true) - mock.Manager.EXPECT().GetNode(mockNodeName).Return(mock.MockNode, false) - - original := &v1alpha1.CNINode{} - err := mock.Reconciler.Client.Get(context.TODO(), types.NamespacedName{Name: cniNode.Name}, original) - assert.NoError(t, err) - assert.True(t, controllerutil.ContainsFinalizer(original, NodeTerminationFinalizer), "the CNINode has finalizer") - - res, err := mock.Reconciler.Reconcile(context.TODO(), reconcileRequest) - assert.NoError(t, err) - assert.Equal(t, res, reconcile.Result{}) - - node := &corev1.Node{} - updated := &v1alpha1.CNINode{} - err = mock.Reconciler.Client.Get(context.TODO(), types.NamespacedName{Name: cniNode.Name}, node) - assert.Error(t, err, "the node shouldn't existing") - assert.True(t, errors.IsNotFound(err)) - - err = mock.Reconciler.Client.Get(context.TODO(), types.NamespacedName{Name: cniNode.Name}, updated) - assert.NoError(t, err) - assert.True(t, updated.Name == mockNodeName, "the CNINode should existing and waiting for finalizer removal") - assert.False(t, controllerutil.ContainsFinalizer(updated, NodeTerminationFinalizer), "CNINode finalizer should be removed when the node is gone") -} - func TestNodeReconciler_Reconcile_DeleteNonExistentUnmanagedNode(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() diff --git a/controllers/crds/cninode_controller.go b/controllers/crds/cninode_controller.go new file mode 100644 index 00000000..cdd7a65d --- /dev/null +++ b/controllers/crds/cninode_controller.go @@ -0,0 +1,272 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +package crds + +import ( + "context" + "time" + + "github.com/aws/amazon-vpc-resource-controller-k8s/apis/vpcresources/v1alpha1" + ec2API "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/aws/ec2/api" + "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/aws/ec2/api/cleanup" + "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config" + "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/k8s" + "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/utils" + "github.com/go-logr/logr" + "github.com/prometheus/client_golang/prometheus" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/util/retry" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/metrics" +) + +var ( + prometheusRegistered = false + recreateCNINodeCallCount = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "recreate_cniNode_call_count", + Help: "The number of requests made by controller to recreate CNINode when node exists", + }, + ) + recreateCNINodeErrCount = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "recreate_cniNode_err_count", + Help: "The number of requests that failed when controller tried to recreate the CNINode", + }, + ) +) + +func prometheusRegister() { + prometheusRegistered = true + + metrics.Registry.MustRegister( + recreateCNINodeCallCount, + recreateCNINodeErrCount) + + prometheusRegistered = true +} + +// CNINodeReconciler reconciles a CNINode object +type CNINodeReconciler struct { + client.Client + scheme *runtime.Scheme + context context.Context + log logr.Logger + eC2Wrapper ec2API.EC2Wrapper + k8sAPI k8s.K8sWrapper + clusterName string + vpcId string + finalizerManager k8s.FinalizerManager +} + +func NewCNINodeReconciler( + client client.Client, + scheme *runtime.Scheme, + ctx context.Context, + logger logr.Logger, + ec2Wrapper ec2API.EC2Wrapper, + k8sWrapper k8s.K8sWrapper, + clusterName string, + vpcId string, + finalizerManager k8s.FinalizerManager, +) *CNINodeReconciler { + return &CNINodeReconciler{ + Client: client, + scheme: scheme, + context: ctx, + log: logger, + eC2Wrapper: ec2Wrapper, + k8sAPI: k8sWrapper, + clusterName: clusterName, + vpcId: vpcId, + finalizerManager: finalizerManager, + } +} + +//+kubebuilder:rbac:groups=vpcresources.k8s.aws,resources=cninodes,verbs=get;list;watch;create;update;patch; + +// Reconcile handles CNINode create/update/delete events +// Reconciler will add the finalizer and cluster name tag if it does not exist and finalize on CNINode on deletion to clean up leaked resource on node +func (r *CNINodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + cniNode := &v1alpha1.CNINode{} + if err := r.Client.Get(ctx, req.NamespacedName, cniNode); err != nil { + // Ignore not found error + return ctrl.Result{}, client.IgnoreNotFound(err) + } + + nodeFound := true + node := &v1.Node{} + if err := r.Client.Get(ctx, req.NamespacedName, node); err != nil { + if errors.IsNotFound(err) { + nodeFound = false + } else { + r.log.Error(err, "failed to get the node object in CNINode reconciliation, will retry") + // Requeue request so it can be retried + return ctrl.Result{}, err + } + } + + if cniNode.GetDeletionTimestamp().IsZero() { + shouldPatch := false + cniNodeCopy := cniNode.DeepCopy() + // Add cluster name tag if it does not exist + val, ok := cniNode.Spec.Tags[config.CNINodeClusterNameKey] + if !ok || val != r.clusterName { + if len(cniNodeCopy.Spec.Tags) != 0 { + cniNodeCopy.Spec.Tags[config.CNINodeClusterNameKey] = r.clusterName + } else { + cniNodeCopy.Spec.Tags = map[string]string{ + config.CNINodeClusterNameKey: r.clusterName, + } + } + shouldPatch = true + } + // if node exists, get & add OS label if it does not exist on CNINode + if nodeFound { + nodeLabelOS := node.ObjectMeta.Labels[config.NodeLabelOS] + val, ok = cniNode.ObjectMeta.Labels[config.NodeLabelOS] + if !ok || val != nodeLabelOS { + if len(cniNodeCopy.ObjectMeta.Labels) != 0 { + cniNodeCopy.ObjectMeta.Labels[config.NodeLabelOS] = nodeLabelOS + } else { + cniNodeCopy.ObjectMeta.Labels = map[string]string{ + config.NodeLabelOS: nodeLabelOS, + } + } + shouldPatch = true + } + } + + if shouldPatch { + r.log.Info("patching CNINode to add required fields Tags and Labels", "cninode", cniNode.Name) + return ctrl.Result{}, r.Client.Patch(ctx, cniNodeCopy, client.MergeFromWithOptions(cniNode, client.MergeFromWithOptimisticLock{})) + } + + // Add finalizer if it does not exist + if err := r.finalizerManager.AddFinalizers(ctx, cniNode, config.NodeTerminationFinalizer); err != nil { + r.log.Error(err, "failed to add finalizer on CNINode, will retry", "cniNode", cniNode.Name, "finalizer", config.NodeTerminationFinalizer) + return ctrl.Result{}, err + } + return ctrl.Result{}, nil + + } else { // CNINode is marked for deletion + if !nodeFound { + // node is also deleted, proceed with running the cleanup routine and remove the finalizer + + // run cleanup for Linux nodes only + if val, ok := cniNode.ObjectMeta.Labels[config.NodeLabelOS]; ok && val == config.OSLinux { + r.log.Info("running the finalizer routine on cniNode", "cniNode", cniNode.Name) + cleaner := &cleanup.NodeTerminationCleaner{ + NodeID: cniNode.Spec.Tags[config.NetworkInterfaceNodeIDKey], + } + cleaner.ENICleaner = &cleanup.ENICleaner{ + EC2Wrapper: r.eC2Wrapper, + Manager: cleaner, + VpcId: r.vpcId, + Log: ctrl.Log.WithName("eniCleaner").WithName("node"), + } + + if err := cleaner.DeleteLeakedResources(); err != nil { + r.log.Error(err, "failed to cleanup resources during node termination") + ec2API.NodeTerminationENICleanupFailure.Inc() + } + } + + if err := r.finalizerManager.RemoveFinalizers(ctx, cniNode, config.NodeTerminationFinalizer); err != nil { + r.log.Error(err, "failed to remove finalizer on CNINode, will retry", "cniNode", cniNode.Name, "finalizer", config.NodeTerminationFinalizer) + return ctrl.Result{}, err + } + return ctrl.Result{}, nil + } else { + // node exists, do not run the cleanup routine(periodic cleanup routine will delete leaked ENIs), remove the finalizer, + // delete old CNINode and recreate CNINode from the object + + // Create a copy to recreate CNINode object + newCNINode := &v1alpha1.CNINode{ + ObjectMeta: metav1.ObjectMeta{ + Name: cniNode.Name, + Namespace: "", + OwnerReferences: cniNode.OwnerReferences, + // TODO: should we include finalizers at object creation or let controller patch it on Create/Update event? + Finalizers: cniNode.Finalizers, + }, + Spec: cniNode.Spec, + } + + if err := r.finalizerManager.RemoveFinalizers(ctx, cniNode, config.NodeTerminationFinalizer); err != nil { + r.log.Error(err, "failed to remove finalizer on CNINode, will retry") + return ctrl.Result{}, err + } + // wait till CNINode is deleted before recreation as the new object will be created with same name to avoid "object already exists" error + if err := r.waitTillCNINodeDeleted(client.ObjectKeyFromObject(newCNINode)); err != nil { + // raise event if CNINode was not deleted after removing the finalizer + r.k8sAPI.BroadcastEvent(cniNode, utils.CNINodeDeleteFailed, "CNINode delete failed, will be retried", + v1.EventTypeWarning) + // requeue to retry CNINode deletion if node exists + return ctrl.Result{}, err + } + + r.log.Info("creating CNINode after it has been deleted as node still exists", "cniNode", newCNINode.Name) + recreateCNINodeCallCount.Inc() + if err := r.createCNINodeFromObj(ctx, newCNINode); err != nil { + recreateCNINodeErrCount.Inc() + // raise event on if CNINode is deleted and could not be recreated by controller + utils.SendNodeEventWithNodeName(r.k8sAPI, node.Name, utils.CNINodeCreateFailed, + "CNINode was deleted and failed to be recreated by the vpc-resource-controller", v1.EventTypeWarning, r.log) + // return nil as object is deleted and we cannot recreate the object now + return ctrl.Result{}, nil + } + r.log.Info("successfully recreated CNINode", "cniNode", newCNINode.Name) + } + } + return ctrl.Result{}, nil +} + +// SetupWithManager sets up the controller with the Manager. +func (r *CNINodeReconciler) SetupWithManager(mgr ctrl.Manager, maxNodeConcurrentReconciles int) error { + if !prometheusRegistered { + prometheusRegister() + } + return ctrl.NewControllerManagedBy(mgr). + For(&v1alpha1.CNINode{}). + WithOptions(controller.Options{MaxConcurrentReconciles: maxNodeConcurrentReconciles}). + Complete(r) +} + +// waitTillCNINodeDeleted waits for CNINode to be deleted with timeout and returns error +func (r *CNINodeReconciler) waitTillCNINodeDeleted(nameSpacedCNINode types.NamespacedName) error { + oldCNINode := &v1alpha1.CNINode{} + + return wait.PollUntilContextTimeout(context.TODO(), 500*time.Millisecond, time.Second*3, true, func(ctx context.Context) (bool, error) { + if err := r.Client.Get(ctx, nameSpacedCNINode, oldCNINode); err != nil && errors.IsNotFound(err) { + return true, nil + } + return false, nil + }) +} + +// createCNINodeFromObj will create CNINode with backoff and returns error if CNINode is not recreated +func (r *CNINodeReconciler) createCNINodeFromObj(ctx context.Context, newCNINode client.Object) error { + return retry.OnError(retry.DefaultBackoff, func(error) bool { return true }, + func() error { + return r.Client.Create(ctx, newCNINode) + }) +} diff --git a/controllers/crds/cninode_controller_test.go b/controllers/crds/cninode_controller_test.go new file mode 100644 index 00000000..24ea61a0 --- /dev/null +++ b/controllers/crds/cninode_controller_test.go @@ -0,0 +1,122 @@ +package crds + +import ( + "context" + "testing" + + "github.com/aws/amazon-vpc-resource-controller-k8s/apis/vpcresources/v1alpha1" + mock_cleanup "github.com/aws/amazon-vpc-resource-controller-k8s/mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api/cleanup" + mock_k8s "github.com/aws/amazon-vpc-resource-controller-k8s/mocks/amazon-vcp-resource-controller-k8s/pkg/k8s" + "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config" + "github.com/golang/mock/gomock" + "github.com/stretchr/testify/assert" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + fakeClient "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + "sigs.k8s.io/controller-runtime/pkg/reconcile" +) + +type CNINodeMock struct { + Reconciler CNINodeReconciler +} + +var ( + mockName = "node-name" + mockClusterName = "test-cluster" + mockNodeWithLabel = &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: mockName, + Labels: map[string]string{ + config.NodeLabelOS: "linux", + }, + }, + } + reconcileRequest = reconcile.Request{ + NamespacedName: types.NamespacedName{ + Name: mockName, + }, + } +) + +func NewCNINodeMock(ctrl *gomock.Controller, mockObjects ...client.Object) CNINodeMock { + scheme := runtime.NewScheme() + _ = corev1.AddToScheme(scheme) + _ = v1alpha1.AddToScheme(scheme) + client := fakeClient.NewClientBuilder().WithScheme(scheme).WithObjects(mockObjects...).Build() + return CNINodeMock{ + Reconciler: CNINodeReconciler{ + Client: client, + scheme: scheme, + log: zap.New(), + clusterName: mockClusterName, + vpcId: "vpc-000000000000", + }, + } +} + +func TestCNINodeReconcile(t *testing.T) { + type args struct { + mockNode *corev1.Node + mockCNINode *v1alpha1.CNINode + } + type fields struct { + mockResourceCleaner *mock_cleanup.MockResourceCleaner + mockK8sApi *mock_k8s.MockK8sWrapper + mockFinalizerManager *mock_k8s.MockFinalizerManager + } + tests := []struct { + name string + args args + prepare func(f *fields) + asserts func(reconcile.Result, error, *v1alpha1.CNINode) + }{ + { + name: "verify clusterName tag and labels are added if missing", + args: args{ + mockNode: mockNodeWithLabel, + mockCNINode: &v1alpha1.CNINode{ + ObjectMeta: metav1.ObjectMeta{ + Name: mockName, + }, + }, + }, + prepare: nil, + asserts: func(res reconcile.Result, err error, cniNode *v1alpha1.CNINode) { + assert.NoError(t, err) + assert.Equal(t, res, reconcile.Result{}) + assert.Equal(t, cniNode.Labels, map[string]string{config.NodeLabelOS: "linux"}) + assert.Equal(t, cniNode.Spec.Tags, map[string]string{config.CNINodeClusterNameKey: mockClusterName}) + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + + mock := NewCNINodeMock(ctrl, tt.args.mockNode, tt.args.mockCNINode) + f := fields{ + mockResourceCleaner: mock_cleanup.NewMockResourceCleaner(ctrl), + mockK8sApi: mock_k8s.NewMockK8sWrapper(ctrl), + mockFinalizerManager: mock_k8s.NewMockFinalizerManager(ctrl), + } + if tt.prepare != nil { + tt.prepare(&f) + } + res, err := mock.Reconciler.Reconcile(context.Background(), reconcileRequest) + + cniNode := &v1alpha1.CNINode{} + getErr := mock.Reconciler.Client.Get(context.Background(), reconcileRequest.NamespacedName, cniNode) + assert.NoError(t, getErr) + + if tt.asserts != nil { + tt.asserts(res, err, cniNode) + } + }) + } + +} diff --git a/controllers/crds/suite_test.go b/controllers/crds/suite_test.go new file mode 100644 index 00000000..d0d49241 --- /dev/null +++ b/controllers/crds/suite_test.go @@ -0,0 +1,79 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +package crds + +import ( + "path/filepath" + "testing" + + "github.com/aws/amazon-vpc-resource-controller-k8s/apis/vpcresources/v1alpha1" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/rest" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/envtest" + logf "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + //+kubebuilder:scaffold:imports +) + +// These tests use Ginkgo (BDD-style Go testing framework). Refer to +// http://onsi.github.io/ginkgo/ to learn more about Ginkgo. + +var cfg *rest.Config +var k8sClient client.Client +var testEnv *envtest.Environment + +func TestAPIs(t *testing.T) { + RegisterFailHandler(Fail) + + RunSpecs(t, "Controller Suite") +} + +var _ = BeforeSuite(func() { + done := make(chan interface{}) + go func() { + logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true))) + + By("bootstrapping test environment") + testEnv = &envtest.Environment{ + CRDDirectoryPaths: []string{filepath.Join("..", "..", "config", "crd", "bases")}, + ErrorIfCRDPathMissing: false, + } + + var err error + // cfg is defined in this file globally. + cfg, err = testEnv.Start() + Expect(err).NotTo(HaveOccurred()) + Expect(cfg).NotTo(BeNil()) + + err = v1alpha1.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + //+kubebuilder:scaffold:scheme + + k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) + Expect(err).NotTo(HaveOccurred()) + Expect(k8sClient).NotTo(BeNil()) + }() + Eventually(done, 60).Should(BeClosed()) + +}) + +var _ = AfterSuite(func() { + By("tearing down the test environment") + err := testEnv.Stop() + Expect(err).NotTo(HaveOccurred()) +}) diff --git a/go.mod b/go.mod index 9e434946..20f879e7 100644 --- a/go.mod +++ b/go.mod @@ -48,7 +48,7 @@ require ( github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/protobuf v1.5.4 // indirect - github.com/google/go-cmp v0.6.0 // indirect + github.com/google/go-cmp v0.6.0 github.com/google/gofuzz v1.2.0 // indirect github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db // indirect github.com/imdario/mergo v0.3.13 // indirect diff --git a/main.go b/main.go index 8ef148d9..6c91f5bd 100644 --- a/main.go +++ b/main.go @@ -26,8 +26,10 @@ import ( vpcresourcesv1beta1 "github.com/aws/amazon-vpc-resource-controller-k8s/apis/vpcresources/v1beta1" "github.com/aws/amazon-vpc-resource-controller-k8s/controllers/apps" corecontroller "github.com/aws/amazon-vpc-resource-controller-k8s/controllers/core" + crdcontroller "github.com/aws/amazon-vpc-resource-controller-k8s/controllers/crds" "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/api" ec2API "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/aws/ec2/api" + eniCleaner "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/aws/ec2/api/cleanup" "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/condition" "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config" rcHealthz "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/healthz" @@ -362,12 +364,17 @@ func main() { os.Exit(1) } - if err := (&ec2API.ENICleaner{ - EC2Wrapper: ec2Wrapper, + cleaner := &eniCleaner.ClusterENICleaner{ ClusterName: clusterName, - Log: ctrl.Log.WithName("eni cleaner"), - VPCID: vpcID, - }).SetupWithManager(ctx, mgr, healthzHandler); err != nil { + } + cleaner.ENICleaner = &eniCleaner.ENICleaner{ + EC2Wrapper: ec2Wrapper, + Manager: cleaner, + VpcId: vpcID, + Log: ctrl.Log.WithName("eniCleaner").WithName("cluster"), + } + + if err := cleaner.SetupWithManager(ctx, mgr, healthzHandler); err != nil { setupLog.Error(err, "unable to start eni cleaner") os.Exit(1) } @@ -417,6 +424,21 @@ func main() { os.Exit(1) } + finalizerManager := k8s.NewDefaultFinalizerManager(mgr.GetClient(), ctrl.Log.WithName("finalizer manager")) + if err = (crdcontroller.NewCNINodeReconciler( + mgr.GetClient(), + mgr.GetScheme(), + ctx, + ctrl.Log.WithName("controllers").WithName("CNINode"), + ec2Wrapper, + k8sApi, + clusterName, + vpcID, + finalizerManager, + ).SetupWithManager(mgr, maxNodeConcurrentReconciles)); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "CNINode") + os.Exit(1) + } // +kubebuilder:scaffold:builder setupLog.Info("setting up webhook server") webhookServer := mgr.GetWebhookServer() diff --git a/mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api/cleanup/mock_resource_cleaner.go b/mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api/cleanup/mock_resource_cleaner.go new file mode 100644 index 00000000..4ed17141 --- /dev/null +++ b/mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api/cleanup/mock_resource_cleaner.go @@ -0,0 +1,61 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +// Code generated by MockGen. DO NOT EDIT. +// Source: github.com/aws/amazon-vpc-resource-controller-k8s/pkg/aws/ec2/api/cleanup (interfaces: ResourceCleaner) + +// Package mock_cleanup is a generated GoMock package. +package mock_cleanup + +import ( + reflect "reflect" + + gomock "github.com/golang/mock/gomock" +) + +// MockResourceCleaner is a mock of ResourceCleaner interface. +type MockResourceCleaner struct { + ctrl *gomock.Controller + recorder *MockResourceCleanerMockRecorder +} + +// MockResourceCleanerMockRecorder is the mock recorder for MockResourceCleaner. +type MockResourceCleanerMockRecorder struct { + mock *MockResourceCleaner +} + +// NewMockResourceCleaner creates a new mock instance. +func NewMockResourceCleaner(ctrl *gomock.Controller) *MockResourceCleaner { + mock := &MockResourceCleaner{ctrl: ctrl} + mock.recorder = &MockResourceCleanerMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *MockResourceCleaner) EXPECT() *MockResourceCleanerMockRecorder { + return m.recorder +} + +// DeleteLeakedResources mocks base method. +func (m *MockResourceCleaner) DeleteLeakedResources() error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DeleteLeakedResources") + ret0, _ := ret[0].(error) + return ret0 +} + +// DeleteLeakedResources indicates an expected call of DeleteLeakedResources. +func (mr *MockResourceCleanerMockRecorder) DeleteLeakedResources() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteLeakedResources", reflect.TypeOf((*MockResourceCleaner)(nil).DeleteLeakedResources)) +} diff --git a/mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api/mock_ec2_apihelper.go b/mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api/mock_ec2_apihelper.go index 19f7e104..6bd94eb3 100644 --- a/mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api/mock_ec2_apihelper.go +++ b/mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api/mock_ec2_apihelper.go @@ -195,6 +195,20 @@ func (mr *MockEC2APIHelperMockRecorder) DetachNetworkInterfaceFromInstance(arg0 return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DetachNetworkInterfaceFromInstance", reflect.TypeOf((*MockEC2APIHelper)(nil).DetachNetworkInterfaceFromInstance), arg0) } +// DisassociateTrunkInterface mocks base method. +func (m *MockEC2APIHelper) DisassociateTrunkInterface(arg0 *string) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DisassociateTrunkInterface", arg0) + ret0, _ := ret[0].(error) + return ret0 +} + +// DisassociateTrunkInterface indicates an expected call of DisassociateTrunkInterface. +func (mr *MockEC2APIHelperMockRecorder) DisassociateTrunkInterface(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DisassociateTrunkInterface", reflect.TypeOf((*MockEC2APIHelper)(nil).DisassociateTrunkInterface), arg0) +} + // GetBranchNetworkInterface mocks base method. func (m *MockEC2APIHelper) GetBranchNetworkInterface(arg0, arg1 *string) ([]*ec2.NetworkInterface, error) { m.ctrl.T.Helper() diff --git a/mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api/mock_ec2_wrapper.go b/mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api/mock_ec2_wrapper.go index f40d94c6..cc4b9ffc 100644 --- a/mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api/mock_ec2_wrapper.go +++ b/mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api/mock_ec2_wrapper.go @@ -48,211 +48,240 @@ func (m *MockEC2Wrapper) EXPECT() *MockEC2WrapperMockRecorder { } // AssignPrivateIPAddresses mocks base method. -func (m *MockEC2Wrapper) AssignPrivateIPAddresses(arg0 *ec2.AssignPrivateIpAddressesInput) (*ec2.AssignPrivateIpAddressesOutput, error) { +func (m *MockEC2Wrapper) AssignPrivateIPAddresses(input *ec2.AssignPrivateIpAddressesInput) (*ec2.AssignPrivateIpAddressesOutput, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "AssignPrivateIPAddresses", arg0) + ret := m.ctrl.Call(m, "AssignPrivateIPAddresses", input) ret0, _ := ret[0].(*ec2.AssignPrivateIpAddressesOutput) ret1, _ := ret[1].(error) return ret0, ret1 } // AssignPrivateIPAddresses indicates an expected call of AssignPrivateIPAddresses. -func (mr *MockEC2WrapperMockRecorder) AssignPrivateIPAddresses(arg0 interface{}) *gomock.Call { +func (mr *MockEC2WrapperMockRecorder) AssignPrivateIPAddresses(input interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AssignPrivateIPAddresses", reflect.TypeOf((*MockEC2Wrapper)(nil).AssignPrivateIPAddresses), arg0) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AssignPrivateIPAddresses", reflect.TypeOf((*MockEC2Wrapper)(nil).AssignPrivateIPAddresses), input) } // AssociateTrunkInterface mocks base method. -func (m *MockEC2Wrapper) AssociateTrunkInterface(arg0 *ec2.AssociateTrunkInterfaceInput) (*ec2.AssociateTrunkInterfaceOutput, error) { +func (m *MockEC2Wrapper) AssociateTrunkInterface(input *ec2.AssociateTrunkInterfaceInput) (*ec2.AssociateTrunkInterfaceOutput, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "AssociateTrunkInterface", arg0) + ret := m.ctrl.Call(m, "AssociateTrunkInterface", input) ret0, _ := ret[0].(*ec2.AssociateTrunkInterfaceOutput) ret1, _ := ret[1].(error) return ret0, ret1 } // AssociateTrunkInterface indicates an expected call of AssociateTrunkInterface. -func (mr *MockEC2WrapperMockRecorder) AssociateTrunkInterface(arg0 interface{}) *gomock.Call { +func (mr *MockEC2WrapperMockRecorder) AssociateTrunkInterface(input interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AssociateTrunkInterface", reflect.TypeOf((*MockEC2Wrapper)(nil).AssociateTrunkInterface), arg0) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AssociateTrunkInterface", reflect.TypeOf((*MockEC2Wrapper)(nil).AssociateTrunkInterface), input) } // AttachNetworkInterface mocks base method. -func (m *MockEC2Wrapper) AttachNetworkInterface(arg0 *ec2.AttachNetworkInterfaceInput) (*ec2.AttachNetworkInterfaceOutput, error) { +func (m *MockEC2Wrapper) AttachNetworkInterface(input *ec2.AttachNetworkInterfaceInput) (*ec2.AttachNetworkInterfaceOutput, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "AttachNetworkInterface", arg0) + ret := m.ctrl.Call(m, "AttachNetworkInterface", input) ret0, _ := ret[0].(*ec2.AttachNetworkInterfaceOutput) ret1, _ := ret[1].(error) return ret0, ret1 } // AttachNetworkInterface indicates an expected call of AttachNetworkInterface. -func (mr *MockEC2WrapperMockRecorder) AttachNetworkInterface(arg0 interface{}) *gomock.Call { +func (mr *MockEC2WrapperMockRecorder) AttachNetworkInterface(input interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AttachNetworkInterface", reflect.TypeOf((*MockEC2Wrapper)(nil).AttachNetworkInterface), arg0) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AttachNetworkInterface", reflect.TypeOf((*MockEC2Wrapper)(nil).AttachNetworkInterface), input) } // CreateNetworkInterface mocks base method. -func (m *MockEC2Wrapper) CreateNetworkInterface(arg0 *ec2.CreateNetworkInterfaceInput) (*ec2.CreateNetworkInterfaceOutput, error) { +func (m *MockEC2Wrapper) CreateNetworkInterface(input *ec2.CreateNetworkInterfaceInput) (*ec2.CreateNetworkInterfaceOutput, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "CreateNetworkInterface", arg0) + ret := m.ctrl.Call(m, "CreateNetworkInterface", input) ret0, _ := ret[0].(*ec2.CreateNetworkInterfaceOutput) ret1, _ := ret[1].(error) return ret0, ret1 } // CreateNetworkInterface indicates an expected call of CreateNetworkInterface. -func (mr *MockEC2WrapperMockRecorder) CreateNetworkInterface(arg0 interface{}) *gomock.Call { +func (mr *MockEC2WrapperMockRecorder) CreateNetworkInterface(input interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateNetworkInterface", reflect.TypeOf((*MockEC2Wrapper)(nil).CreateNetworkInterface), arg0) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateNetworkInterface", reflect.TypeOf((*MockEC2Wrapper)(nil).CreateNetworkInterface), input) } // CreateNetworkInterfacePermission mocks base method. -func (m *MockEC2Wrapper) CreateNetworkInterfacePermission(arg0 *ec2.CreateNetworkInterfacePermissionInput) (*ec2.CreateNetworkInterfacePermissionOutput, error) { +func (m *MockEC2Wrapper) CreateNetworkInterfacePermission(input *ec2.CreateNetworkInterfacePermissionInput) (*ec2.CreateNetworkInterfacePermissionOutput, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "CreateNetworkInterfacePermission", arg0) + ret := m.ctrl.Call(m, "CreateNetworkInterfacePermission", input) ret0, _ := ret[0].(*ec2.CreateNetworkInterfacePermissionOutput) ret1, _ := ret[1].(error) return ret0, ret1 } // CreateNetworkInterfacePermission indicates an expected call of CreateNetworkInterfacePermission. -func (mr *MockEC2WrapperMockRecorder) CreateNetworkInterfacePermission(arg0 interface{}) *gomock.Call { +func (mr *MockEC2WrapperMockRecorder) CreateNetworkInterfacePermission(input interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateNetworkInterfacePermission", reflect.TypeOf((*MockEC2Wrapper)(nil).CreateNetworkInterfacePermission), arg0) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateNetworkInterfacePermission", reflect.TypeOf((*MockEC2Wrapper)(nil).CreateNetworkInterfacePermission), input) } // CreateTags mocks base method. -func (m *MockEC2Wrapper) CreateTags(arg0 *ec2.CreateTagsInput) (*ec2.CreateTagsOutput, error) { +func (m *MockEC2Wrapper) CreateTags(input *ec2.CreateTagsInput) (*ec2.CreateTagsOutput, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "CreateTags", arg0) + ret := m.ctrl.Call(m, "CreateTags", input) ret0, _ := ret[0].(*ec2.CreateTagsOutput) ret1, _ := ret[1].(error) return ret0, ret1 } // CreateTags indicates an expected call of CreateTags. -func (mr *MockEC2WrapperMockRecorder) CreateTags(arg0 interface{}) *gomock.Call { +func (mr *MockEC2WrapperMockRecorder) CreateTags(input interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateTags", reflect.TypeOf((*MockEC2Wrapper)(nil).CreateTags), arg0) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateTags", reflect.TypeOf((*MockEC2Wrapper)(nil).CreateTags), input) } // DeleteNetworkInterface mocks base method. -func (m *MockEC2Wrapper) DeleteNetworkInterface(arg0 *ec2.DeleteNetworkInterfaceInput) (*ec2.DeleteNetworkInterfaceOutput, error) { +func (m *MockEC2Wrapper) DeleteNetworkInterface(input *ec2.DeleteNetworkInterfaceInput) (*ec2.DeleteNetworkInterfaceOutput, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "DeleteNetworkInterface", arg0) + ret := m.ctrl.Call(m, "DeleteNetworkInterface", input) ret0, _ := ret[0].(*ec2.DeleteNetworkInterfaceOutput) ret1, _ := ret[1].(error) return ret0, ret1 } // DeleteNetworkInterface indicates an expected call of DeleteNetworkInterface. -func (mr *MockEC2WrapperMockRecorder) DeleteNetworkInterface(arg0 interface{}) *gomock.Call { +func (mr *MockEC2WrapperMockRecorder) DeleteNetworkInterface(input interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteNetworkInterface", reflect.TypeOf((*MockEC2Wrapper)(nil).DeleteNetworkInterface), arg0) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteNetworkInterface", reflect.TypeOf((*MockEC2Wrapper)(nil).DeleteNetworkInterface), input) } // DescribeInstances mocks base method. -func (m *MockEC2Wrapper) DescribeInstances(arg0 *ec2.DescribeInstancesInput) (*ec2.DescribeInstancesOutput, error) { +func (m *MockEC2Wrapper) DescribeInstances(input *ec2.DescribeInstancesInput) (*ec2.DescribeInstancesOutput, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "DescribeInstances", arg0) + ret := m.ctrl.Call(m, "DescribeInstances", input) ret0, _ := ret[0].(*ec2.DescribeInstancesOutput) ret1, _ := ret[1].(error) return ret0, ret1 } // DescribeInstances indicates an expected call of DescribeInstances. -func (mr *MockEC2WrapperMockRecorder) DescribeInstances(arg0 interface{}) *gomock.Call { +func (mr *MockEC2WrapperMockRecorder) DescribeInstances(input interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DescribeInstances", reflect.TypeOf((*MockEC2Wrapper)(nil).DescribeInstances), arg0) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DescribeInstances", reflect.TypeOf((*MockEC2Wrapper)(nil).DescribeInstances), input) } // DescribeNetworkInterfaces mocks base method. -func (m *MockEC2Wrapper) DescribeNetworkInterfaces(arg0 *ec2.DescribeNetworkInterfacesInput) (*ec2.DescribeNetworkInterfacesOutput, error) { +func (m *MockEC2Wrapper) DescribeNetworkInterfaces(input *ec2.DescribeNetworkInterfacesInput) (*ec2.DescribeNetworkInterfacesOutput, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "DescribeNetworkInterfaces", arg0) + ret := m.ctrl.Call(m, "DescribeNetworkInterfaces", input) ret0, _ := ret[0].(*ec2.DescribeNetworkInterfacesOutput) ret1, _ := ret[1].(error) return ret0, ret1 } // DescribeNetworkInterfaces indicates an expected call of DescribeNetworkInterfaces. -func (mr *MockEC2WrapperMockRecorder) DescribeNetworkInterfaces(arg0 interface{}) *gomock.Call { +func (mr *MockEC2WrapperMockRecorder) DescribeNetworkInterfaces(input interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DescribeNetworkInterfaces", reflect.TypeOf((*MockEC2Wrapper)(nil).DescribeNetworkInterfaces), arg0) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DescribeNetworkInterfaces", reflect.TypeOf((*MockEC2Wrapper)(nil).DescribeNetworkInterfaces), input) +} + +// DescribeNetworkInterfacesPagesWithRetry mocks base method. +func (m *MockEC2Wrapper) DescribeNetworkInterfacesPagesWithRetry(input *ec2.DescribeNetworkInterfacesInput) ([]*ec2.NetworkInterface, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DescribeNetworkInterfacesPagesWithRetry", input) + ret0, _ := ret[0].([]*ec2.NetworkInterface) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// DescribeNetworkInterfacesPagesWithRetry indicates an expected call of DescribeNetworkInterfacesPagesWithRetry. +func (mr *MockEC2WrapperMockRecorder) DescribeNetworkInterfacesPagesWithRetry(input interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DescribeNetworkInterfacesPagesWithRetry", reflect.TypeOf((*MockEC2Wrapper)(nil).DescribeNetworkInterfacesPagesWithRetry), input) } // DescribeSubnets mocks base method. -func (m *MockEC2Wrapper) DescribeSubnets(arg0 *ec2.DescribeSubnetsInput) (*ec2.DescribeSubnetsOutput, error) { +func (m *MockEC2Wrapper) DescribeSubnets(input *ec2.DescribeSubnetsInput) (*ec2.DescribeSubnetsOutput, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "DescribeSubnets", arg0) + ret := m.ctrl.Call(m, "DescribeSubnets", input) ret0, _ := ret[0].(*ec2.DescribeSubnetsOutput) ret1, _ := ret[1].(error) return ret0, ret1 } // DescribeSubnets indicates an expected call of DescribeSubnets. -func (mr *MockEC2WrapperMockRecorder) DescribeSubnets(arg0 interface{}) *gomock.Call { +func (mr *MockEC2WrapperMockRecorder) DescribeSubnets(input interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DescribeSubnets", reflect.TypeOf((*MockEC2Wrapper)(nil).DescribeSubnets), arg0) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DescribeSubnets", reflect.TypeOf((*MockEC2Wrapper)(nil).DescribeSubnets), input) } // DescribeTrunkInterfaceAssociations mocks base method. -func (m *MockEC2Wrapper) DescribeTrunkInterfaceAssociations(arg0 *ec2.DescribeTrunkInterfaceAssociationsInput) (*ec2.DescribeTrunkInterfaceAssociationsOutput, error) { +func (m *MockEC2Wrapper) DescribeTrunkInterfaceAssociations(input *ec2.DescribeTrunkInterfaceAssociationsInput) (*ec2.DescribeTrunkInterfaceAssociationsOutput, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "DescribeTrunkInterfaceAssociations", arg0) + ret := m.ctrl.Call(m, "DescribeTrunkInterfaceAssociations", input) ret0, _ := ret[0].(*ec2.DescribeTrunkInterfaceAssociationsOutput) ret1, _ := ret[1].(error) return ret0, ret1 } // DescribeTrunkInterfaceAssociations indicates an expected call of DescribeTrunkInterfaceAssociations. -func (mr *MockEC2WrapperMockRecorder) DescribeTrunkInterfaceAssociations(arg0 interface{}) *gomock.Call { +func (mr *MockEC2WrapperMockRecorder) DescribeTrunkInterfaceAssociations(input interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DescribeTrunkInterfaceAssociations", reflect.TypeOf((*MockEC2Wrapper)(nil).DescribeTrunkInterfaceAssociations), arg0) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DescribeTrunkInterfaceAssociations", reflect.TypeOf((*MockEC2Wrapper)(nil).DescribeTrunkInterfaceAssociations), input) } // DetachNetworkInterface mocks base method. -func (m *MockEC2Wrapper) DetachNetworkInterface(arg0 *ec2.DetachNetworkInterfaceInput) (*ec2.DetachNetworkInterfaceOutput, error) { +func (m *MockEC2Wrapper) DetachNetworkInterface(input *ec2.DetachNetworkInterfaceInput) (*ec2.DetachNetworkInterfaceOutput, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "DetachNetworkInterface", arg0) + ret := m.ctrl.Call(m, "DetachNetworkInterface", input) ret0, _ := ret[0].(*ec2.DetachNetworkInterfaceOutput) ret1, _ := ret[1].(error) return ret0, ret1 } // DetachNetworkInterface indicates an expected call of DetachNetworkInterface. -func (mr *MockEC2WrapperMockRecorder) DetachNetworkInterface(arg0 interface{}) *gomock.Call { +func (mr *MockEC2WrapperMockRecorder) DetachNetworkInterface(input interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DetachNetworkInterface", reflect.TypeOf((*MockEC2Wrapper)(nil).DetachNetworkInterface), input) +} + +// DisassociateTrunkInterface mocks base method. +func (m *MockEC2Wrapper) DisassociateTrunkInterface(input *ec2.DisassociateTrunkInterfaceInput) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DisassociateTrunkInterface", input) + ret0, _ := ret[0].(error) + return ret0 +} + +// DisassociateTrunkInterface indicates an expected call of DisassociateTrunkInterface. +func (mr *MockEC2WrapperMockRecorder) DisassociateTrunkInterface(input interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DetachNetworkInterface", reflect.TypeOf((*MockEC2Wrapper)(nil).DetachNetworkInterface), arg0) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DisassociateTrunkInterface", reflect.TypeOf((*MockEC2Wrapper)(nil).DisassociateTrunkInterface), input) } // ModifyNetworkInterfaceAttribute mocks base method. -func (m *MockEC2Wrapper) ModifyNetworkInterfaceAttribute(arg0 *ec2.ModifyNetworkInterfaceAttributeInput) (*ec2.ModifyNetworkInterfaceAttributeOutput, error) { +func (m *MockEC2Wrapper) ModifyNetworkInterfaceAttribute(input *ec2.ModifyNetworkInterfaceAttributeInput) (*ec2.ModifyNetworkInterfaceAttributeOutput, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "ModifyNetworkInterfaceAttribute", arg0) + ret := m.ctrl.Call(m, "ModifyNetworkInterfaceAttribute", input) ret0, _ := ret[0].(*ec2.ModifyNetworkInterfaceAttributeOutput) ret1, _ := ret[1].(error) return ret0, ret1 } // ModifyNetworkInterfaceAttribute indicates an expected call of ModifyNetworkInterfaceAttribute. -func (mr *MockEC2WrapperMockRecorder) ModifyNetworkInterfaceAttribute(arg0 interface{}) *gomock.Call { +func (mr *MockEC2WrapperMockRecorder) ModifyNetworkInterfaceAttribute(input interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ModifyNetworkInterfaceAttribute", reflect.TypeOf((*MockEC2Wrapper)(nil).ModifyNetworkInterfaceAttribute), arg0) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ModifyNetworkInterfaceAttribute", reflect.TypeOf((*MockEC2Wrapper)(nil).ModifyNetworkInterfaceAttribute), input) } // UnassignPrivateIPAddresses mocks base method. -func (m *MockEC2Wrapper) UnassignPrivateIPAddresses(arg0 *ec2.UnassignPrivateIpAddressesInput) (*ec2.UnassignPrivateIpAddressesOutput, error) { +func (m *MockEC2Wrapper) UnassignPrivateIPAddresses(input *ec2.UnassignPrivateIpAddressesInput) (*ec2.UnassignPrivateIpAddressesOutput, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "UnassignPrivateIPAddresses", arg0) + ret := m.ctrl.Call(m, "UnassignPrivateIPAddresses", input) ret0, _ := ret[0].(*ec2.UnassignPrivateIpAddressesOutput) ret1, _ := ret[1].(error) return ret0, ret1 } // UnassignPrivateIPAddresses indicates an expected call of UnassignPrivateIPAddresses. -func (mr *MockEC2WrapperMockRecorder) UnassignPrivateIPAddresses(arg0 interface{}) *gomock.Call { +func (mr *MockEC2WrapperMockRecorder) UnassignPrivateIPAddresses(input interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "UnassignPrivateIPAddresses", reflect.TypeOf((*MockEC2Wrapper)(nil).UnassignPrivateIPAddresses), arg0) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "UnassignPrivateIPAddresses", reflect.TypeOf((*MockEC2Wrapper)(nil).UnassignPrivateIPAddresses), input) } diff --git a/mocks/amazon-vcp-resource-controller-k8s/pkg/k8s/mock_finalizer.go b/mocks/amazon-vcp-resource-controller-k8s/pkg/k8s/mock_finalizer.go new file mode 100644 index 00000000..343268d2 --- /dev/null +++ b/mocks/amazon-vcp-resource-controller-k8s/pkg/k8s/mock_finalizer.go @@ -0,0 +1,87 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +// Code generated by MockGen. DO NOT EDIT. +// Source: github.com/aws/amazon-vpc-resource-controller-k8s/pkg/k8s (interfaces: FinalizerManager) + +// Package mock_k8s is a generated GoMock package. +package mock_k8s + +import ( + context "context" + reflect "reflect" + + gomock "github.com/golang/mock/gomock" + client "sigs.k8s.io/controller-runtime/pkg/client" +) + +// MockFinalizerManager is a mock of FinalizerManager interface. +type MockFinalizerManager struct { + ctrl *gomock.Controller + recorder *MockFinalizerManagerMockRecorder +} + +// MockFinalizerManagerMockRecorder is the mock recorder for MockFinalizerManager. +type MockFinalizerManagerMockRecorder struct { + mock *MockFinalizerManager +} + +// NewMockFinalizerManager creates a new mock instance. +func NewMockFinalizerManager(ctrl *gomock.Controller) *MockFinalizerManager { + mock := &MockFinalizerManager{ctrl: ctrl} + mock.recorder = &MockFinalizerManagerMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *MockFinalizerManager) EXPECT() *MockFinalizerManagerMockRecorder { + return m.recorder +} + +// AddFinalizers mocks base method. +func (m *MockFinalizerManager) AddFinalizers(arg0 context.Context, arg1 client.Object, arg2 ...string) error { + m.ctrl.T.Helper() + varargs := []interface{}{arg0, arg1} + for _, a := range arg2 { + varargs = append(varargs, a) + } + ret := m.ctrl.Call(m, "AddFinalizers", varargs...) + ret0, _ := ret[0].(error) + return ret0 +} + +// AddFinalizers indicates an expected call of AddFinalizers. +func (mr *MockFinalizerManagerMockRecorder) AddFinalizers(arg0, arg1 interface{}, arg2 ...interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + varargs := append([]interface{}{arg0, arg1}, arg2...) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AddFinalizers", reflect.TypeOf((*MockFinalizerManager)(nil).AddFinalizers), varargs...) +} + +// RemoveFinalizers mocks base method. +func (m *MockFinalizerManager) RemoveFinalizers(arg0 context.Context, arg1 client.Object, arg2 ...string) error { + m.ctrl.T.Helper() + varargs := []interface{}{arg0, arg1} + for _, a := range arg2 { + varargs = append(varargs, a) + } + ret := m.ctrl.Call(m, "RemoveFinalizers", varargs...) + ret0, _ := ret[0].(error) + return ret0 +} + +// RemoveFinalizers indicates an expected call of RemoveFinalizers. +func (mr *MockFinalizerManagerMockRecorder) RemoveFinalizers(arg0, arg1 interface{}, arg2 ...interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + varargs := append([]interface{}{arg0, arg1}, arg2...) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RemoveFinalizers", reflect.TypeOf((*MockFinalizerManager)(nil).RemoveFinalizers), varargs...) +} diff --git a/mocks/amazon-vcp-resource-controller-k8s/pkg/provider/branch/trunk/mock_trunk.go b/mocks/amazon-vcp-resource-controller-k8s/pkg/provider/branch/trunk/mock_trunk.go index ac4b1c73..59aae85c 100644 --- a/mocks/amazon-vcp-resource-controller-k8s/pkg/provider/branch/trunk/mock_trunk.go +++ b/mocks/amazon-vcp-resource-controller-k8s/pkg/provider/branch/trunk/mock_trunk.go @@ -64,18 +64,6 @@ func (mr *MockTrunkENIMockRecorder) CreateAndAssociateBranchENIs(arg0, arg1, arg return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateAndAssociateBranchENIs", reflect.TypeOf((*MockTrunkENI)(nil).CreateAndAssociateBranchENIs), arg0, arg1, arg2) } -// DeleteAllBranchENIs mocks base method. -func (m *MockTrunkENI) DeleteAllBranchENIs() { - m.ctrl.T.Helper() - m.ctrl.Call(m, "DeleteAllBranchENIs") -} - -// DeleteAllBranchENIs indicates an expected call of DeleteAllBranchENIs. -func (mr *MockTrunkENIMockRecorder) DeleteAllBranchENIs() *gomock.Call { - mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteAllBranchENIs", reflect.TypeOf((*MockTrunkENI)(nil).DeleteAllBranchENIs)) -} - // DeleteCooledDownENIs mocks base method. func (m *MockTrunkENI) DeleteCooledDownENIs() { m.ctrl.T.Helper() diff --git a/pkg/aws/ec2/api/cleanup/eni_cleanup.go b/pkg/aws/ec2/api/cleanup/eni_cleanup.go new file mode 100644 index 00000000..755e74dc --- /dev/null +++ b/pkg/aws/ec2/api/cleanup/eni_cleanup.go @@ -0,0 +1,213 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +package cleanup + +import ( + "context" + "fmt" + "strings" + "time" + + "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/aws/ec2/api" + "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config" + rcHealthz "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/healthz" + "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/utils" + + ec2Errors "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/aws/errors" + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/ec2" + "github.com/go-logr/logr" + kerrors "k8s.io/apimachinery/pkg/util/errors" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/healthz" +) + +// NetworkInterfaceManager interface allows to define the ENI filters and checks if ENI should be deleted for different callers like in the periodic cleanup routine or +// during node termination +type NetworkInterfaceManager interface { + GetENITagFilters() []*ec2.Filter + ShouldDeleteENI(eniID *string) bool + UpdateAvailableENIsIfNeeded(eniMap *map[string]struct{}) + UpdateCleanupMetrics(vpcrcAvailableCount *int, vpccniAvailableCount *int, leakedENICount *int) +} + +type ENICleaner struct { + EC2Wrapper api.EC2Wrapper + Manager NetworkInterfaceManager + VpcId string + Log logr.Logger +} + +// common filters for describing network interfaces +var CommonNetworkInterfaceFilters = []*ec2.Filter{ + { + Name: aws.String("status"), + Values: []*string{aws.String(ec2.NetworkInterfaceStatusAvailable)}, + }, + { + Name: aws.String("tag:" + config.NetworkInterfaceOwnerTagKey), + Values: aws.StringSlice([]string{config.NetworkInterfaceOwnerTagValue, + config.NetworkInterfaceOwnerVPCCNITagValue}), + }, +} + +// ClusterENICleaner periodically deletes leaked network interfaces(provisioned by the controller or VPC-CNI) in the cluster +type ClusterENICleaner struct { + ClusterName string + shutdown bool + ctx context.Context + availableENIs map[string]struct{} + *ENICleaner +} + +func (e *ClusterENICleaner) SetupWithManager(ctx context.Context, mgr ctrl.Manager, healthzHandler *rcHealthz.HealthzHandler) error { + e.ctx = ctx + e.availableENIs = make(map[string]struct{}) + healthzHandler.AddControllersHealthCheckers( + map[string]healthz.Checker{ + "health-interface-cleaner": rcHealthz.SimplePing("interface cleanup", e.Log), + }, + ) + + return mgr.Add(e) +} + +// StartENICleaner starts the ENI Cleaner routine that cleans up dangling ENIs created by the controller +func (e *ClusterENICleaner) Start(ctx context.Context) error { + e.Log.Info("starting eni clean up routine") + + go func() { + <-ctx.Done() + e.shutdown = true + }() + // Perform ENI cleanup after fixed time intervals till shut down variable is set to true on receiving the shutdown + // signal + for !e.shutdown { + e.DeleteLeakedResources() + time.Sleep(config.ENICleanUpInterval) + } + + return nil +} + +// DeleteLeakedResources describes all the network interfaces in available status that are created by the controller or VPC-CNI +// This is called by periodically by ClusterENICleaner which deletes available ENIs cluster-wide, and by the NodeTermination cleaner on node termination +// The available ENIs are deleted if ShouldDeleteENI is true, defined in the respective cleaners +// The function also updates metrics for the periodic cleanup routine and the node termination cleanup +func (e *ENICleaner) DeleteLeakedResources() error { + var errors []error + availableENIs := make(map[string]struct{}) + vpcrcAvailableCount := 0 + vpccniAvailableCount := 0 + leakedENICount := 0 + defer e.Manager.UpdateCleanupMetrics(&vpcrcAvailableCount, &vpccniAvailableCount, &leakedENICount) + + filters := append(CommonNetworkInterfaceFilters, []*ec2.Filter{ + { + Name: aws.String("vpc-id"), + Values: []*string{aws.String(e.VpcId)}, + }, + }...) + // get cleaner specific filters + filters = append(filters, e.Manager.GetENITagFilters()...) + describeNetworkInterfaceIp := &ec2.DescribeNetworkInterfacesInput{ + Filters: filters, + } + + networkInterfaces, err := e.EC2Wrapper.DescribeNetworkInterfacesPagesWithRetry(describeNetworkInterfaceIp) + if err != nil { + e.Log.Error(err, "failed to describe network interfaces, cleanup will be retried in next cycle") + return err + } + + for _, nwInterface := range networkInterfaces { + if e.Manager.ShouldDeleteENI(nwInterface.NetworkInterfaceId) { + tagMap := utils.GetTagKeyValueMap(nwInterface.TagSet) + if val, ok := tagMap[config.NetworkInterfaceOwnerTagKey]; ok { + // Increment promethues metrics for number of leaked ENIs cleaned up + switch val { + case config.NetworkInterfaceOwnerTagValue: + vpcrcAvailableCount += 1 + case config.NetworkInterfaceOwnerVPCCNITagValue: + vpccniAvailableCount += 1 + default: + // We should not hit this case as we only filter for relevant tag values, log error and continue if unexpected ENIs found + e.Log.Error(fmt.Errorf("found available ENI not created by VPC-CNI/VPC-RC"), "eniID", *nwInterface.NetworkInterfaceId) + continue + } + } + _, err := e.EC2Wrapper.DeleteNetworkInterface(&ec2.DeleteNetworkInterfaceInput{ + NetworkInterfaceId: nwInterface.NetworkInterfaceId, + }) + if err != nil { + if !strings.Contains(err.Error(), ec2Errors.NotFoundInterfaceID) { // ignore InvalidNetworkInterfaceID.NotFound error + // append err and continue, we will retry deletion in the next period/reconcile + leakedENICount += 1 + errors = append(errors, fmt.Errorf("failed to delete leaked network interface %v:%v", *nwInterface.NetworkInterfaceId, err)) + e.Log.Error(err, "failed to delete the leaked network interface", + "id", *nwInterface.NetworkInterfaceId) + } + continue + } + e.Log.Info("deleted leaked ENI successfully", "eni id", nwInterface.NetworkInterfaceId, "instance id", nwInterface.Attachment.InstanceId) + } else { + // Seeing the ENI for the first time, add it to the new list of available network interfaces + availableENIs[*nwInterface.NetworkInterfaceId] = struct{}{} + e.Log.Info("adding eni to to the map of available ENIs, will be removed if present in "+ + "next run too", "id", *nwInterface.NetworkInterfaceId) + } + } + e.Manager.UpdateAvailableENIsIfNeeded(&availableENIs) + return kerrors.NewAggregate(errors) +} + +func (e *ClusterENICleaner) GetENITagFilters() []*ec2.Filter { + clusterNameTagKey := fmt.Sprintf(config.ClusterNameTagKeyFormat, e.ClusterName) + return []*ec2.Filter{ + { + Name: aws.String("tag:" + clusterNameTagKey), + Values: []*string{aws.String(config.ClusterNameTagValue)}, + }, + } +} + +// ShouldDeleteENI returns true if the ENI should be deleted. +func (e *ClusterENICleaner) ShouldDeleteENI(eniID *string) bool { + if _, exists := e.availableENIs[*eniID]; exists { + return true + } + return false +} + +// Set the available ENIs to the list of ENIs seen in the current cycle +// This adds ENIs that should not be deleted in the current cleanup cycle to the internal cache so it can be deleted in next cycle +// This prevents the clean up routine to remove ENIs that are created by another routines and are yet not attached to +// an instance or associated with a trunk interface in the periodic cleanup routine + +// Example +// 1st cycle, Describe Available NetworkInterface Result - Interface 1, Interface 2, Interface 3 +// 2nd cycle, Describe Available NetworkInterface Result - Interface 2, Interface 3 +// In the second cycle we can conclude that Interface 2 and 3 are leaked because they have been sitting for the time +// interval between cycle 1 and 2 and hence can be safely deleted. And we can also conclude that Interface 1 was +// created but not attached at the the time when 1st cycle ran and hence it should not be deleted. +func (e *ClusterENICleaner) UpdateAvailableENIsIfNeeded(eniMap *map[string]struct{}) { + e.availableENIs = *eniMap +} + +// Update cluster cleanup metrics for the current cleanup cycle +func (e *ClusterENICleaner) UpdateCleanupMetrics(vpcrcAvailableCount *int, vpccniAvailableCount *int, leakedENICount *int) { + api.VpcRcAvailableClusterENICnt.Set(float64(*vpcrcAvailableCount)) + api.VpcCniAvailableClusterENICnt.Set(float64(*vpccniAvailableCount)) + api.LeakedENIClusterCleanupCnt.Set(float64(*leakedENICount)) +} diff --git a/pkg/aws/ec2/api/cleanup/eni_cleanup_test.go b/pkg/aws/ec2/api/cleanup/eni_cleanup_test.go new file mode 100644 index 00000000..ec229e7d --- /dev/null +++ b/pkg/aws/ec2/api/cleanup/eni_cleanup_test.go @@ -0,0 +1,242 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +package cleanup + +import ( + "context" + "fmt" + "reflect" + "testing" + + mock_api "github.com/aws/amazon-vpc-resource-controller-k8s/mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api" + "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/ec2" + "github.com/golang/mock/gomock" + "github.com/stretchr/testify/assert" +) + +var ( + mockClusterName = "cluster-name" + mockNodeID = "i-00000000000000001" + mockClusterNameTagKey = fmt.Sprintf(config.ClusterNameTagKeyFormat, mockClusterName) + + mockNetworkInterfaceId1 = "eni-000000000000000" + mockNetworkInterfaceId2 = "eni-000000000000001" + mockNetworkInterfaceId3 = "eni-000000000000002" + + mockVpcId = "vpc-0000000000000000" + + mockClusterTagInput = &ec2.DescribeNetworkInterfacesInput{ + Filters: []*ec2.Filter{ + { + Name: aws.String("status"), + Values: []*string{aws.String(ec2.NetworkInterfaceStatusAvailable)}, + }, + { + Name: aws.String("tag:" + config.NetworkInterfaceOwnerTagKey), + Values: aws.StringSlice([]string{config.NetworkInterfaceOwnerTagValue, + config.NetworkInterfaceOwnerVPCCNITagValue}), + }, + { + Name: aws.String("vpc-id"), + Values: []*string{aws.String(mockVpcId)}, + }, + { + Name: aws.String("tag:" + mockClusterNameTagKey), + Values: []*string{aws.String(config.ClusterNameTagValue)}, + }, + }, + } + + mockNodeIDTagInput = &ec2.DescribeNetworkInterfacesInput{ + Filters: []*ec2.Filter{ + { + Name: aws.String("status"), + Values: []*string{aws.String(ec2.NetworkInterfaceStatusAvailable)}, + }, + { + Name: aws.String("tag:" + config.NetworkInterfaceOwnerTagKey), + Values: aws.StringSlice([]string{config.NetworkInterfaceOwnerTagValue, + config.NetworkInterfaceOwnerVPCCNITagValue}), + }, + { + Name: aws.String("vpc-id"), + Values: []*string{aws.String(mockVpcId)}, + }, + { + Name: aws.String("tag:" + config.NetworkInterfaceNodeIDKey), + Values: []*string{aws.String(mockNodeID)}, + }, + }, + } + + NetworkInterfacesWith1And2 = []*ec2.NetworkInterface{ + {NetworkInterfaceId: &mockNetworkInterfaceId1, Attachment: &ec2.NetworkInterfaceAttachment{InstanceId: &mockNodeID}}, + {NetworkInterfaceId: &mockNetworkInterfaceId2, Attachment: &ec2.NetworkInterfaceAttachment{InstanceId: &mockNodeID}}, + } + + NetworkInterfacesWith1And3 = []*ec2.NetworkInterface{ + {NetworkInterfaceId: &mockNetworkInterfaceId1, Attachment: &ec2.NetworkInterfaceAttachment{InstanceId: &mockNodeID}}, + {NetworkInterfaceId: &mockNetworkInterfaceId3, Attachment: &ec2.NetworkInterfaceAttachment{InstanceId: &mockNodeID}}, + } +) + +func getMockClusterENICleaner(ctrl *gomock.Controller) (*ClusterENICleaner, *mock_api.MockEC2Wrapper) { + mockEC2Wrapper := mock_api.NewMockEC2Wrapper(ctrl) + mockclusterENICleaner := ClusterENICleaner{ + availableENIs: map[string]struct{}{}, + ctx: context.Background(), + ClusterName: mockClusterName, + } + mockclusterENICleaner.ENICleaner = &ENICleaner{ + EC2Wrapper: mockEC2Wrapper, + Manager: &mockclusterENICleaner, + Log: zap.New(zap.UseDevMode(true)).WithName("cluster eni cleaner test"), + VpcId: mockVpcId, + } + return &mockclusterENICleaner, mockEC2Wrapper +} + +func TestENICleaner_DeleteLeakedResources(t *testing.T) { + type fields struct { + mockEC2Wrapper *mock_api.MockEC2Wrapper + clusterENICleaner *ClusterENICleaner + } + testENICleaner_DeleteLeakedResources := []struct { + name string + getENICleaner func() (*ENICleaner, *ClusterENICleaner) + prepare func(f *fields) + assertFirstCall func(f *fields) + assertSecondCall func(f *fields) + }{ + { + name: "ClusterENICleaner, verifies leaked ENIs are deleted in the periodic cleanup routine and availableENI is updated", + getENICleaner: func() (*ENICleaner, *ClusterENICleaner) { + mockClusterENICleaner := &ClusterENICleaner{ + ClusterName: mockClusterName, + ctx: context.Background(), + availableENIs: map[string]struct{}{}, + } + mockClusterENICleaner.ENICleaner = &ENICleaner{ + Manager: mockClusterENICleaner, + VpcId: mockVpcId, + Log: zap.New(zap.UseDevMode(true)).WithName("cluster eni cleaner test"), + } + return mockClusterENICleaner.ENICleaner, mockClusterENICleaner + }, + prepare: func(f *fields) { + gomock.InOrder( + // Return network interface 1 and 2 in first cycle + f.mockEC2Wrapper.EXPECT().DescribeNetworkInterfacesPagesWithRetry(mockClusterTagInput). + Return(NetworkInterfacesWith1And2, nil), + // Return network interface 1 and 3 in the second cycle + f.mockEC2Wrapper.EXPECT().DescribeNetworkInterfacesPagesWithRetry(mockClusterTagInput). + Return(NetworkInterfacesWith1And3, nil), + // Expect to delete the network interface 1 + f.mockEC2Wrapper.EXPECT().DeleteNetworkInterface( + &ec2.DeleteNetworkInterfaceInput{NetworkInterfaceId: &mockNetworkInterfaceId1}).Return(nil, nil), + ) + + }, + assertFirstCall: func(f *fields) { + // After first call, network interface 1 and 2 should be added to the map of available ENIs + assert.True(t, reflect.DeepEqual( + map[string]struct{}{mockNetworkInterfaceId1: {}, mockNetworkInterfaceId2: {}}, f.clusterENICleaner.availableENIs)) + + }, + assertSecondCall: func(f *fields) { + // After second call, network interface 1 should be deleted and network interface 3 added to list + assert.True(t, reflect.DeepEqual( + map[string]struct{}{mockNetworkInterfaceId3: {}}, f.clusterENICleaner.availableENIs)) + }, + }, + { + name: "NodeTerminationENICleaner, verifies ENIs are deleted on node termination", + getENICleaner: func() (*ENICleaner, *ClusterENICleaner) { + mocknodeCleaner := &NodeTerminationCleaner{ + NodeID: mockNodeID, + } + mocknodeCleaner.ENICleaner = &ENICleaner{ + Manager: mocknodeCleaner, + VpcId: mockVpcId, + Log: zap.New(zap.UseDevMode(true)).WithName("cluster eni cleaner test"), + } + return mocknodeCleaner.ENICleaner, nil + }, + prepare: func(f *fields) { + gomock.InOrder( + + // Return network interface 1 and 2 in first cycle, expect to call delete on both + f.mockEC2Wrapper.EXPECT().DescribeNetworkInterfacesPagesWithRetry(mockNodeIDTagInput). + Return(NetworkInterfacesWith1And2, nil), + f.mockEC2Wrapper.EXPECT().DeleteNetworkInterface( + &ec2.DeleteNetworkInterfaceInput{NetworkInterfaceId: &mockNetworkInterfaceId1}).Return(nil, nil), + f.mockEC2Wrapper.EXPECT().DeleteNetworkInterface( + &ec2.DeleteNetworkInterfaceInput{NetworkInterfaceId: &mockNetworkInterfaceId2}).Return(nil, nil), + // Return network interface 1 and 3 in the second cycle, again expect to call delete on both + f.mockEC2Wrapper.EXPECT().DescribeNetworkInterfacesPagesWithRetry(mockNodeIDTagInput). + Return(NetworkInterfacesWith1And3, nil), + f.mockEC2Wrapper.EXPECT().DeleteNetworkInterface( + &ec2.DeleteNetworkInterfaceInput{NetworkInterfaceId: &mockNetworkInterfaceId1}).Return(nil, nil), + f.mockEC2Wrapper.EXPECT().DeleteNetworkInterface( + &ec2.DeleteNetworkInterfaceInput{NetworkInterfaceId: &mockNetworkInterfaceId3}).Return(nil, nil), + ) + }, + }, + } + + for _, tt := range testENICleaner_DeleteLeakedResources { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockEC2Wrapper := mock_api.NewMockEC2Wrapper(ctrl) + var mockENICleaner *ENICleaner + var mockClusterENICleaner *ClusterENICleaner + if tt.getENICleaner != nil { + mockENICleaner, mockClusterENICleaner = tt.getENICleaner() + } + mockENICleaner.EC2Wrapper = mockEC2Wrapper + f := fields{ + mockEC2Wrapper: mockEC2Wrapper, + clusterENICleaner: mockClusterENICleaner, + } + if tt.prepare != nil { + tt.prepare(&f) + } + + err := mockENICleaner.DeleteLeakedResources() + assert.NoError(t, err) + if tt.assertFirstCall != nil { + tt.assertFirstCall(&f) + } + + err = mockENICleaner.DeleteLeakedResources() + assert.NoError(t, err) + if tt.assertSecondCall != nil { + tt.assertSecondCall(&f) + } + } +} + +// TestENICleaner_StartENICleaner_Shutdown tests that ENICleaner would not start if shutdown is set to true. +func TestENICleaner_StartENICleaner_Shutdown(t *testing.T) { + ctrl := gomock.NewController(t) + eniCleaner, _ := getMockClusterENICleaner(ctrl) + + eniCleaner.shutdown = true + + eniCleaner.Start(context.TODO()) +} diff --git a/pkg/aws/ec2/api/cleanup/node_cleanup.go b/pkg/aws/ec2/api/cleanup/node_cleanup.go new file mode 100644 index 00000000..7aa59741 --- /dev/null +++ b/pkg/aws/ec2/api/cleanup/node_cleanup.go @@ -0,0 +1,50 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +package cleanup + +import ( + "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config" + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/ec2" +) + +// NodeTerminationCleanerto handle resource cleanup at node termination +type NodeTerminationCleaner struct { + NodeID string + *ENICleaner +} + +func (n *NodeTerminationCleaner) GetENITagFilters() []*ec2.Filter { + return []*ec2.Filter{ + { + Name: aws.String("tag:" + config.NetworkInterfaceNodeIDKey), + Values: []*string{aws.String(n.NodeID)}, + }, + } +} + +// Return true. As the node is terminating all available ENIs need to be deleted +func (n *NodeTerminationCleaner) ShouldDeleteENI(eniID *string) bool { + return true +} + +func (n *NodeTerminationCleaner) UpdateAvailableENIsIfNeeded(eniMap *map[string]struct{}) { + // Nothing to do for the node termination cleaner + return +} + +// Updating node termination metrics does not make much sense as it will be updated on each node deletion and does not give us much info +func (n *NodeTerminationCleaner) UpdateCleanupMetrics(vpcrcAvailableCount *int, vpccniAvailableCount *int, leakedENICount *int) { + return +} diff --git a/pkg/aws/ec2/api/cleanup/resource_cleaner.go b/pkg/aws/ec2/api/cleanup/resource_cleaner.go new file mode 100644 index 00000000..d5c7e6d1 --- /dev/null +++ b/pkg/aws/ec2/api/cleanup/resource_cleaner.go @@ -0,0 +1,19 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +package cleanup + +// ResourceCleaner interface should be implemented by components that need to delete leaked AWS resources +type ResourceCleaner interface { + DeleteLeakedResources() error +} diff --git a/pkg/aws/ec2/api/eni_cleanup.go b/pkg/aws/ec2/api/eni_cleanup.go deleted file mode 100644 index 583529a8..00000000 --- a/pkg/aws/ec2/api/eni_cleanup.go +++ /dev/null @@ -1,204 +0,0 @@ -// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"). You may -// not use this file except in compliance with the License. A copy of the -// License is located at -// -// http://aws.amazon.com/apache2.0/ -// -// or in the "license" file accompanying this file. This file is distributed -// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -// express or implied. See the License for the specific language governing -// permissions and limitations under the License. - -package api - -import ( - "context" - "fmt" - "strings" - "time" - - "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config" - rcHealthz "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/healthz" - "github.com/prometheus/client_golang/prometheus" - "golang.org/x/exp/slices" - - ec2Errors "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/aws/errors" - "github.com/aws/aws-sdk-go/aws" - "github.com/aws/aws-sdk-go/service/ec2" - "github.com/go-logr/logr" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/healthz" -) - -type ENICleaner struct { - EC2Wrapper EC2Wrapper - ClusterName string - Log logr.Logger - VPCID string - - availableENIs map[string]struct{} - shutdown bool - clusterNameTagKey string - ctx context.Context -} - -var ( - vpccniAvailableENICnt = prometheus.NewGauge( - prometheus.GaugeOpts{ - Name: "vpc_cni_created_available_eni_count", - Help: "The number of available ENIs created by VPC-CNI that controller will try to delete in each cleanup cycle", - }, - ) - vpcrcAvailableENICnt = prometheus.NewGauge( - prometheus.GaugeOpts{ - Name: "vpc_rc_created_available_eni_count", - Help: "The number of available ENIs created by VPC-RC that controller will try to delete in each cleanup cycle", - }, - ) - leakedENICnt = prometheus.NewGauge( - prometheus.GaugeOpts{ - Name: "leaked_eni_count", - Help: "The number of available ENIs that failed to be deleted by the controller in each cleanup cycle", - }, - ) -) - -func (e *ENICleaner) SetupWithManager(ctx context.Context, mgr ctrl.Manager, healthzHandler *rcHealthz.HealthzHandler) error { - e.clusterNameTagKey = fmt.Sprintf(config.ClusterNameTagKeyFormat, e.ClusterName) - e.availableENIs = make(map[string]struct{}) - e.ctx = ctx - - healthzHandler.AddControllersHealthCheckers( - map[string]healthz.Checker{ - "health-interface-cleaner": rcHealthz.SimplePing("interface cleanup", e.Log), - }, - ) - - return mgr.Add(e) -} - -// StartENICleaner starts the ENI Cleaner routine that cleans up dangling ENIs created by the controller -func (e *ENICleaner) Start(ctx context.Context) error { - e.Log.Info("starting eni clean up routine") - // Start routine to listen for shut down signal, on receiving the signal it set shutdown to true - go func() { - <-ctx.Done() - e.shutdown = true - }() - // Perform ENI cleanup after fixed time intervals till shut down variable is set to true on receiving the shutdown - // signal - for !e.shutdown { - e.cleanUpAvailableENIs() - time.Sleep(config.ENICleanUpInterval) - } - - return nil -} - -// cleanUpAvailableENIs describes all the network interfaces in available status that are created by the controller, -// on seeing the a network interface for the first time, it is added to the map of available network interfaces, on -// seeing the network interface for the second time the network interface is deleted. This ensures that we are deleting -// the network interfaces that have been in available for upto the time interval between running the clean up routine. -// This prevents the clean up routine to remove ENIs that are created by another routines and are yet not attached to -// an instance or associated with a trunk interface -// Example, -// 1st cycle, Describe Available NetworkInterface Result - Interface 1, Interface 2, Interface 3 -// 2nd cycle, Describe Available NetworkInterface Result - Interface 2, Interface 3 -// In the second cycle we can conclude that Interface 2 and 3 are leaked because they have been sitting for the time -// interval between cycle 1 and 2 and hence can be safely deleted. And we can also conclude that Interface 1 was -// created but not attached at the the time when 1st cycle ran and hence it should not be deleted. -func (e *ENICleaner) cleanUpAvailableENIs() { - vpcrcAvailableCount := 0 - vpccniAvailableCount := 0 - leakedENICount := 0 - - describeNetworkInterfaceIp := &ec2.DescribeNetworkInterfacesInput{ - Filters: []*ec2.Filter{ - { - Name: aws.String("status"), - Values: []*string{aws.String(ec2.NetworkInterfaceStatusAvailable)}, - }, - { - Name: aws.String("tag:" + e.clusterNameTagKey), - Values: []*string{aws.String(config.ClusterNameTagValue)}, - }, - { - Name: aws.String("tag:" + config.NetworkInterfaceOwnerTagKey), - Values: aws.StringSlice([]string{config.NetworkInterfaceOwnerTagValue, - config.NetworkInterfaceOwnerVPCCNITagValue}), - }, - { - Name: aws.String("vpc-id"), - Values: []*string{aws.String(e.VPCID)}, - }, - }, - } - - availableENIs := make(map[string]struct{}) - - for { - describeNetworkInterfaceOp, err := e.EC2Wrapper.DescribeNetworkInterfaces(describeNetworkInterfaceIp) - if err != nil { - e.Log.Error(err, "failed to describe network interfaces, will retry") - return - } - - for _, networkInterface := range describeNetworkInterfaceOp.NetworkInterfaces { - if _, exists := e.availableENIs[*networkInterface.NetworkInterfaceId]; exists { - // Increment promethues metrics for number of leaked ENIs cleaned up - if tagIdx := slices.IndexFunc(networkInterface.TagSet, func(tag *ec2.Tag) bool { - return *tag.Key == config.NetworkInterfaceOwnerTagKey - }); tagIdx != -1 { - switch *networkInterface.TagSet[tagIdx].Value { - case config.NetworkInterfaceOwnerTagValue: - vpcrcAvailableCount += 1 - case config.NetworkInterfaceOwnerVPCCNITagValue: - vpccniAvailableCount += 1 - default: - // We should not hit this case as we only filter for relevant tag values, log error and continue if unexpected ENIs found - e.Log.Error(fmt.Errorf("found available ENI not created by VPC-CNI/VPC-RC"), "eniID", *networkInterface.NetworkInterfaceId) - continue - } - } - - // The ENI in available state has been sitting for at least the eni clean up interval and it should - // be removed - _, err := e.EC2Wrapper.DeleteNetworkInterface(&ec2.DeleteNetworkInterfaceInput{ - NetworkInterfaceId: networkInterface.NetworkInterfaceId, - }) - if err != nil { - if !strings.Contains(err.Error(), ec2Errors.NotFoundInterfaceID) { // ignore InvalidNetworkInterfaceID.NotFound error - // append err and continue, we will retry deletion in the next period/reconcile - leakedENICount += 1 - - e.Log.Error(err, "failed to delete the dangling network interface", - "id", *networkInterface.NetworkInterfaceId) - } - continue - } - e.Log.Info("deleted dangling ENI successfully", - "eni id", networkInterface.NetworkInterfaceId) - } else { - // Seeing the ENI for the first time, add it to the new list of available network interfaces - availableENIs[*networkInterface.NetworkInterfaceId] = struct{}{} - e.Log.V(1).Info("adding eni to to the map of available ENIs, will be removed if present in "+ - "next run too", "id", *networkInterface.NetworkInterfaceId) - } - } - - if describeNetworkInterfaceOp.NextToken == nil { - break - } - - describeNetworkInterfaceIp.NextToken = describeNetworkInterfaceOp.NextToken - } - - // Update leaked ENI metrics - vpcrcAvailableENICnt.Set(float64(vpcrcAvailableCount)) - vpccniAvailableENICnt.Set(float64(vpccniAvailableCount)) - leakedENICnt.Set(float64(leakedENICount)) - // Set the available ENIs to the list of ENIs seen in the current cycle - e.availableENIs = availableENIs -} diff --git a/pkg/aws/ec2/api/eni_cleanup_test.go b/pkg/aws/ec2/api/eni_cleanup_test.go deleted file mode 100644 index e00127c0..00000000 --- a/pkg/aws/ec2/api/eni_cleanup_test.go +++ /dev/null @@ -1,124 +0,0 @@ -// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"). You may -// not use this file except in compliance with the License. A copy of the -// License is located at -// -// http://aws.amazon.com/apache2.0/ -// -// or in the "license" file accompanying this file. This file is distributed -// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -// express or implied. See the License for the specific language governing -// permissions and limitations under the License. - -package api - -import ( - "context" - "fmt" - "reflect" - "testing" - - mock_api "github.com/aws/amazon-vpc-resource-controller-k8s/mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api" - "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config" - - "github.com/aws/aws-sdk-go/aws" - "github.com/aws/aws-sdk-go/service/ec2" - "github.com/golang/mock/gomock" - "github.com/stretchr/testify/assert" - "sigs.k8s.io/controller-runtime/pkg/log/zap" -) - -var ( - mockClusterName = "cluster-name" - mockClusterNameTagKey = fmt.Sprintf(config.ClusterNameTagKeyFormat, mockClusterName) - - mockNetworkInterfaceId1 = "eni-000000000000000" - mockNetworkInterfaceId2 = "eni-000000000000001" - mockNetworkInterfaceId3 = "eni-000000000000002" - - mockVPCID = "vpc-0000000000000000" - - mockDescribeNetworkInterfaceIp = &ec2.DescribeNetworkInterfacesInput{ - Filters: []*ec2.Filter{ - { - Name: aws.String("status"), - Values: []*string{aws.String(ec2.NetworkInterfaceStatusAvailable)}, - }, - { - Name: aws.String("tag:" + mockClusterNameTagKey), - Values: []*string{aws.String(config.ClusterNameTagValue)}, - }, - { - Name: aws.String("tag:" + config.NetworkInterfaceOwnerTagKey), - Values: aws.StringSlice([]string{config.NetworkInterfaceOwnerTagValue, - config.NetworkInterfaceOwnerVPCCNITagValue}), - }, - { - Name: aws.String("vpc-id"), - Values: []*string{aws.String(mockVPCID)}, - }, - }, - } - mockDescribeInterfaceOpWith1And2 = &ec2.DescribeNetworkInterfacesOutput{ - NetworkInterfaces: []*ec2.NetworkInterface{ - {NetworkInterfaceId: &mockNetworkInterfaceId1}, - {NetworkInterfaceId: &mockNetworkInterfaceId2}, - }, - } - mockDescribeInterfaceOpWith1And3 = &ec2.DescribeNetworkInterfacesOutput{ - NetworkInterfaces: []*ec2.NetworkInterface{ - {NetworkInterfaceId: &mockNetworkInterfaceId1}, - {NetworkInterfaceId: &mockNetworkInterfaceId3}, - }, - } -) - -func getMockENICleaner(ctrl *gomock.Controller) (*ENICleaner, *mock_api.MockEC2Wrapper) { - mockEC2Wrapper := mock_api.NewMockEC2Wrapper(ctrl) - return &ENICleaner{ - EC2Wrapper: mockEC2Wrapper, - availableENIs: map[string]struct{}{}, - Log: zap.New(zap.UseDevMode(true)), - VPCID: mockVPCID, - clusterNameTagKey: mockClusterNameTagKey, - ctx: context.Background(), - }, mockEC2Wrapper -} - -func TestENICleaner_cleanUpAvailableENIs(t *testing.T) { - ctrl := gomock.NewController(t) - eniCleaner, mockWrapper := getMockENICleaner(ctrl) - - gomock.InOrder( - // Return network interface 1 and 2 in first cycle - mockWrapper.EXPECT().DescribeNetworkInterfaces(mockDescribeNetworkInterfaceIp). - Return(mockDescribeInterfaceOpWith1And2, nil), - // Return network interface 1 and 3 in the second cycle - mockWrapper.EXPECT().DescribeNetworkInterfaces(mockDescribeNetworkInterfaceIp). - Return(mockDescribeInterfaceOpWith1And3, nil), - // Expect to delete the network interface 1 - mockWrapper.EXPECT().DeleteNetworkInterface( - &ec2.DeleteNetworkInterfaceInput{NetworkInterfaceId: &mockNetworkInterfaceId1}).Return(nil, nil), - ) - - // Run 1st cycle, network interface 1 and 2 should be added to the map of available ENIs - eniCleaner.cleanUpAvailableENIs() - assert.True(t, reflect.DeepEqual( - map[string]struct{}{mockNetworkInterfaceId1: {}, mockNetworkInterfaceId2: {}}, eniCleaner.availableENIs)) - - // Run the second cycle, this time network interface 1 should be deleted and network interface 3 added to list - eniCleaner.cleanUpAvailableENIs() - assert.True(t, reflect.DeepEqual( - map[string]struct{}{mockNetworkInterfaceId3: {}}, eniCleaner.availableENIs)) -} - -// TestENICleaner_StartENICleaner_Shutdown tests that ENICleaner would not start if shutdown is set to true. -func TestENICleaner_StartENICleaner_Shutdown(t *testing.T) { - ctrl := gomock.NewController(t) - eniCleaner, _ := getMockENICleaner(ctrl) - - eniCleaner.shutdown = true - - eniCleaner.Start(context.TODO()) -} diff --git a/pkg/aws/ec2/api/helper.go b/pkg/aws/ec2/api/helper.go index 14a7864f..e06e6b06 100644 --- a/pkg/aws/ec2/api/helper.go +++ b/pkg/aws/ec2/api/helper.go @@ -93,6 +93,7 @@ type EC2APIHelper interface { GetInstanceDetails(instanceId *string) (*ec2.Instance, error) AssignIPv4ResourcesAndWaitTillReady(eniID string, resourceType config.ResourceType, count int) ([]string, error) UnassignIPv4Resources(eniID string, resourceType config.ResourceType, resources []string) error + DisassociateTrunkInterface(associationID *string) error } // CreateNetworkInterface creates a new network interface @@ -101,7 +102,7 @@ func (h *ec2APIHelper) CreateNetworkInterface(description *string, subnetId *str eniDescription := CreateENIDescriptionPrefix + *description var ec2SecurityGroups []*string - if securityGroups != nil && len(securityGroups) != 0 { + if len(securityGroups) > 0 { // Only add security groups if there are one or more security group provided, otherwise API call will fail instead // of creating the interface with default security groups ec2SecurityGroups = aws.StringSlice(securityGroups) @@ -405,10 +406,7 @@ func (h *ec2APIHelper) WaitForNetworkInterfaceStatusChange(networkInterfaceId *s err := retry.OnError(waitForENIAttachment, func(err error) bool { - if err == ErrRetryAttachmentStatusCheck { - return true - } - return false + return err == ErrRetryAttachmentStatusCheck }, func() error { interfaces, err := h.DescribeNetworkInterfaces([]*string{networkInterfaceId}) if err == nil && len(interfaces) == 1 { @@ -603,7 +601,6 @@ func (h *ec2APIHelper) GetBranchNetworkInterface(trunkID, subnetID *string) ([]* describeNetworkInterfacesInput.NextToken = describeNetworkInterfaceOutput.NextToken } - return nwInterfaces, nil } @@ -623,3 +620,10 @@ func (h *ec2APIHelper) DetachAndDeleteNetworkInterface(attachmentID *string, nwI } return nil } + +func (h *ec2APIHelper) DisassociateTrunkInterface(associationID *string) error { + input := &ec2.DisassociateTrunkInterfaceInput{ + AssociationId: associationID, + } + return h.ec2Wrapper.DisassociateTrunkInterface(input) +} diff --git a/pkg/aws/ec2/api/helper_test.go b/pkg/aws/ec2/api/helper_test.go index 6981c99a..390ad983 100644 --- a/pkg/aws/ec2/api/helper_test.go +++ b/pkg/aws/ec2/api/helper_test.go @@ -67,7 +67,7 @@ var ( eniDescription = "mock description of eni" eniDescriptionWithPrefix = "aws-k8s-" + eniDescription - mockError = fmt.Errorf("failed to do ec2 call") + errMock = fmt.Errorf("failed to do ec2 call") ) var ( @@ -177,9 +177,7 @@ var ( TagSet: branchTag2, } - tokenID = "token" - - describeTrunkInterfaceInput1 = &ec2.DescribeNetworkInterfacesInput{ + describeTrunkInterfaceInput = &ec2.DescribeNetworkInterfacesInput{ Filters: []*ec2.Filter{ { Name: aws.String("tag:" + config.TrunkENIIDTag), @@ -191,26 +189,9 @@ var ( }, }, } - describeTrunkInterfaceInput2 = &ec2.DescribeNetworkInterfacesInput{ - Filters: []*ec2.Filter{ - { - Name: aws.String("tag:" + config.TrunkENIIDTag), - Values: []*string{&trunkInterfaceId}, - }, - { - Name: aws.String("subnet-id"), - Values: aws.StringSlice([]string{subnetId}), - }, - }, - NextToken: &tokenID, - } - describeTrunkInterfaceOutput1 = &ec2.DescribeNetworkInterfacesOutput{ - NetworkInterfaces: []*ec2.NetworkInterface{&networkInterface1}, - NextToken: &tokenID, - } - describeTrunkInterfaceOutput2 = &ec2.DescribeNetworkInterfacesOutput{ - NetworkInterfaces: []*ec2.NetworkInterface{&networkInterface2}, + describeTrunkInterfaceOutput = &ec2.DescribeNetworkInterfacesOutput{ + NetworkInterfaces: []*ec2.NetworkInterface{&networkInterface1, &networkInterface2}, } describeTrunkInterfaceAssociationsInput = &ec2.DescribeTrunkInterfaceAssociationsInput{ @@ -413,13 +394,13 @@ func TestEc2APIHelper_AssociateBranchToTrunk_Error(t *testing.T) { ec2ApiHelper, mockWrapper := getMockWrapper(ctrl) // Return empty association response - mockWrapper.EXPECT().AssociateTrunkInterface(associateTrunkInterfaceInput).Return(nil, mockError) + mockWrapper.EXPECT().AssociateTrunkInterface(associateTrunkInterfaceInput).Return(nil, errMock) mockWrapper.EXPECT().CreateNetworkInterfacePermission(createNetworkInterfacePermissionInputBranch). Return(nil, nil) _, err := ec2ApiHelper.AssociateBranchToTrunk(&trunkInterfaceId, &branchInterfaceId, vlanId) - assert.Error(t, mockError, err) + assert.Error(t, errMock, err) } // TestEc2APIHelper_CreateNetworkInterface_NoSecondaryIP tests network interface creation when no secondary IPs are @@ -502,7 +483,7 @@ func TestEc2APIHelper_CreateNetworkInterface_WithSecondaryIPAndPrefixCount_Error createNetworkInterfaceInput.SecondaryPrivateIpAddressCount = nil createNetworkInterfaceInput.Ipv4PrefixCount = nil - assert.Error(t, mockError, err) + assert.Error(t, errMock, err) } // TestEc2APIHelper_CreateNetworkInterface_TypeTrunk tests network interface creation with the interface type trunk @@ -550,11 +531,11 @@ func TestEc2APIHelper_CreateNetworkInterface_Error(t *testing.T) { ec2ApiHelper, mockWrapper := getMockWrapper(ctrl) - mockWrapper.EXPECT().CreateNetworkInterface(createNetworkInterfaceInput).Return(nil, mockError) + mockWrapper.EXPECT().CreateNetworkInterface(createNetworkInterfaceInput).Return(nil, errMock) _, err := ec2ApiHelper.CreateNetworkInterface(&eniDescription, &subnetId, securityGroups, tags, nil, nil) - assert.Error(t, err, mockError) + assert.Error(t, err, errMock) } // TestEc2APIHelper_DeleteNetworkInterface tests delete network interface returns correct response in case of valid @@ -579,10 +560,10 @@ func TestEc2APIHelper_DeleteNetworkInterface_Error(t *testing.T) { ec2ApiHelper, mockWrapper := getMockWrapper(ctrl) - mockWrapper.EXPECT().DeleteNetworkInterface(deleteNetworkInterfaceInput).Return(nil, mockError).Times(maxRetryOnError) + mockWrapper.EXPECT().DeleteNetworkInterface(deleteNetworkInterfaceInput).Return(nil, errMock).Times(maxRetryOnError) err := ec2ApiHelper.DeleteNetworkInterface(&branchInterfaceId) - assert.Error(t, mockError, err) + assert.Error(t, errMock, err) } // TestEc2APIHelper_DeleteNetworkInterface_ErrorThenSuccess tests that if delete network call fails initially and @@ -594,7 +575,7 @@ func TestEc2APIHelper_DeleteNetworkInterface_ErrorThenSuccess(t *testing.T) { ec2ApiHelper, mockWrapper := getMockWrapper(ctrl) gomock.InOrder( - mockWrapper.EXPECT().DeleteNetworkInterface(deleteNetworkInterfaceInput).Return(nil, mockError).Times(2), + mockWrapper.EXPECT().DeleteNetworkInterface(deleteNetworkInterfaceInput).Return(nil, errMock).Times(2), mockWrapper.EXPECT().DeleteNetworkInterface(deleteNetworkInterfaceInput).Return(nil, nil).Times(1), ) @@ -634,10 +615,10 @@ func TestEc2APIHelper_GetSubnet_Error(t *testing.T) { defer ctrl.Finish() ec2ApiHelper, mockWrapper := getMockWrapper(ctrl) - mockWrapper.EXPECT().DescribeSubnets(describeSubnetInput).Return(nil, mockError) + mockWrapper.EXPECT().DescribeSubnets(describeSubnetInput).Return(nil, errMock) _, err := ec2ApiHelper.GetSubnet(&subnetId) - assert.Error(t, mockError, err) + assert.Error(t, errMock, err) } // TestEc2APIHelper_GetNetworkInterfaceOfInstance tests that describe network interface returns no errors @@ -664,10 +645,10 @@ func TestEc2APIHelper_GetNetworkInterfaceOfInstance_Error(t *testing.T) { ec2ApiHelper, mockWrapper := getMockWrapper(ctrl) - mockWrapper.EXPECT().DescribeInstances(describeNetworkInterfaceInputUsingInstanceId).Return(nil, mockError) + mockWrapper.EXPECT().DescribeInstances(describeNetworkInterfaceInputUsingInstanceId).Return(nil, errMock) _, err := ec2ApiHelper.GetInstanceNetworkInterface(&instanceId) - assert.Error(t, mockError, err) + assert.Error(t, errMock, err) } // TestEc2APIHelper_DescribeNetworkInterfaces tests describe network interface call works as expected under @@ -695,10 +676,10 @@ func TestEc2APIHelper_DescribeNetworkInterfaces_Error(t *testing.T) { ec2ApiHelper, mockWrapper := getMockWrapper(ctrl) mockWrapper.EXPECT().DescribeNetworkInterfaces(describeNetworkInterfaceInputUsingInterfaceId). - Return(nil, mockError) + Return(nil, errMock) _, err := ec2ApiHelper.DescribeNetworkInterfaces([]*string{&branchInterfaceId, &branchInterfaceId2}) - assert.Error(t, mockError, err) + assert.Error(t, errMock, err) } // TestEc2APIHelper_DescribeTrunkInterfaceAssociation tests that the describe trunk interface association returns @@ -741,10 +722,10 @@ func TestEc2APIHelper_DescribeTrunkInterfaceAssociation_Error(t *testing.T) { ec2ApiHelper, mockWrapper := getMockWrapper(ctrl) mockWrapper.EXPECT().DescribeTrunkInterfaceAssociations(describeTrunkInterfaceAssociationsInput). - Return(nil, mockError) + Return(nil, errMock) _, err := ec2ApiHelper.DescribeTrunkInterfaceAssociation(&trunkInterfaceId) - assert.Error(t, mockError, err) + assert.Error(t, errMock, err) } func TestEc2APIHelper_CreateAndAttachNetworkInterface(t *testing.T) { @@ -782,7 +763,7 @@ func TestEc2APIHelper_CreateAndAttachNetworkInterface_DeleteOnAttachFailed(t *te ec2ApiHelper, mockWrapper := getMockWrapper(ctrl) mockWrapper.EXPECT().CreateNetworkInterface(createNetworkInterfaceInput).Return(createNetworkInterfaceOutput, nil) - mockWrapper.EXPECT().AttachNetworkInterface(attachNetworkInterfaceInput).Return(attachNetworkInterfaceOutput, mockError) + mockWrapper.EXPECT().AttachNetworkInterface(attachNetworkInterfaceInput).Return(attachNetworkInterfaceOutput, errMock) // Test delete is called mockWrapper.EXPECT().DeleteNetworkInterface(deleteNetworkInterfaceInput).Return(nil, nil) @@ -804,7 +785,7 @@ func TestEc2APIHelper_CreateAndAttachNetworkInterface_DeleteOnSetTerminationFail mockWrapper.EXPECT().CreateNetworkInterface(createNetworkInterfaceInput).Return(createNetworkInterfaceOutput, nil) mockWrapper.EXPECT().AttachNetworkInterface(attachNetworkInterfaceInput).Return(attachNetworkInterfaceOutput, nil) - mockWrapper.EXPECT().ModifyNetworkInterfaceAttribute(modifyNetworkInterfaceAttributeInput).Return(nil, mockError) + mockWrapper.EXPECT().ModifyNetworkInterfaceAttribute(modifyNetworkInterfaceAttributeInput).Return(nil, errMock) // Test detach and delete is called mockWrapper.EXPECT().DetachNetworkInterface(detachNetworkInterfaceInput).Return(nil, nil) @@ -841,10 +822,10 @@ func TestEc2APIHelper_SetDeleteOnTermination_Error(t *testing.T) { ec2ApiHelper, mockWrapper := getMockWrapper(ctrl) mockWrapper.EXPECT().ModifyNetworkInterfaceAttribute(modifyNetworkInterfaceAttributeInput). - Return(nil, mockError) + Return(nil, errMock) err := ec2ApiHelper.SetDeleteOnTermination(&attachmentId, &branchInterfaceId) - assert.Error(t, mockError, err) + assert.Error(t, errMock, err) } // TestEC2APIHelper_AttachNetworkInterfaceToInstance no error is returned when valid inputs are passed @@ -884,10 +865,10 @@ func TestEC2APIHelper_AttachNetworkInterfaceToInstance_Error(t *testing.T) { ec2ApiHelper, mockWrapper := getMockWrapper(ctrl) - mockWrapper.EXPECT().AttachNetworkInterface(attachNetworkInterfaceInput).Return(nil, mockError) + mockWrapper.EXPECT().AttachNetworkInterface(attachNetworkInterfaceInput).Return(nil, errMock) _, err := ec2ApiHelper.AttachNetworkInterfaceToInstance(&instanceId, &branchInterfaceId, &deviceIndex) - assert.Error(t, mockError, err) + assert.Error(t, errMock, err) } // TestEc2APIHelper_DetachNetworkInterfaceFromInstance tests ec2 api call returns no error on valid input @@ -938,10 +919,10 @@ func TestEC2ADIHelper_WaitForNetworkInterfaceStatusChange_NonRetryableError(t *t statusAvailable := "available" mockWrapper.EXPECT().DescribeNetworkInterfaces(describeNetworkInterfaceInputUsingOneInterfaceId). - Return(nil, mockError) + Return(nil, errMock) err := ec2ApiHelper.WaitForNetworkInterfaceStatusChange(&branchInterfaceId, statusAvailable) - assert.Error(t, mockError, err) + assert.Error(t, errMock, err) } // TestEc2APIHelper_DetachAndDeleteNetworkInterface tests the ec2 api calls are called in order with the desired input @@ -974,10 +955,10 @@ func TestEc2APIHelper_DetachAndDeleteNetworkInterface_Error(t *testing.T) { ec2ApiHelper, mockWrapper := getMockWrapper(ctrl) - mockWrapper.EXPECT().DetachNetworkInterface(detachNetworkInterfaceInput).Return(nil, mockError) + mockWrapper.EXPECT().DetachNetworkInterface(detachNetworkInterfaceInput).Return(nil, errMock) err := ec2ApiHelper.DetachAndDeleteNetworkInterface(&attachmentId, &branchInterfaceId) - assert.Error(t, mockError, err) + assert.Error(t, errMock, err) } @@ -1019,10 +1000,10 @@ func TestEC2APIHelper_GetInstanceDetails_Error(t *testing.T) { ec2ApiHelper, mockWrapper := getMockWrapper(ctrl) - mockWrapper.EXPECT().DescribeInstances(describeInstanceInput).Return(nil, mockError) + mockWrapper.EXPECT().DescribeInstances(describeInstanceInput).Return(nil, errMock) _, err := ec2ApiHelper.GetInstanceDetails(&instanceId) - assert.Error(t, mockError, err) + assert.Error(t, errMock, err) } // TestEC2APIHelper_AssignIPv4ResourcesAndWaitTillReady_TypeIPv4Address tests that once new IP addresses are assigned they are returned @@ -1067,11 +1048,11 @@ func TestEC2APIHelper_AssignIPv4ResourcesAndWaitTillReady_TypeIPv4Address_Error( ec2ApiHelper, mockWrapper := getMockWrapper(ctrl) - mockWrapper.EXPECT().AssignPrivateIPAddresses(assignPrivateIPInput).Return(nil, mockError) + mockWrapper.EXPECT().AssignPrivateIPAddresses(assignPrivateIPInput).Return(nil, errMock) _, err := ec2ApiHelper.AssignIPv4ResourcesAndWaitTillReady(eniID, config.ResourceTypeIPv4Address, 2) - assert.Error(t, mockError, err) + assert.Error(t, errMock, err) } // TestEC2APIHelper_AssignIPv4ResourcesAndWaitTillReady_TypeIPv4Prefix_Error tests that error is returned if the assign private IP call @@ -1082,11 +1063,11 @@ func TestEC2APIHelper_AssignIPv4ResourcesAndWaitTillReady_TypeIPv4Prefix_Error(t ec2ApiHelper, mockWrapper := getMockWrapper(ctrl) - mockWrapper.EXPECT().AssignPrivateIPAddresses(assignPrivateIPInputPrefix).Return(nil, mockError) + mockWrapper.EXPECT().AssignPrivateIPAddresses(assignPrivateIPInputPrefix).Return(nil, errMock) _, err := ec2ApiHelper.AssignIPv4ResourcesAndWaitTillReady(eniID, config.ResourceTypeIPv4Prefix, 2) - assert.Error(t, mockError, err) + assert.Error(t, errMock, err) } // TestEC2APIHelper_AssignIPv4ResourcesAndWaitTillReady_TypeIPv4Address_AttachedAfterSecondDescribe tests if the describe call is called @@ -1190,14 +1171,13 @@ func TestEC2APIHelper_AssignIPv4ResourcesAndWaitTillReady_TypeIPv4Prefix_Describ } // TestEc2APIHelper_GetBranchNetworkInterface_PaginatedResults returns the branch interface when paginated results is returned -func TestEc2APIHelper_GetBranchNetworkInterface_PaginatedResults(t *testing.T) { +func TestEc2APIHelper_GetBranchNetworkInterface(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() ec2ApiHelper, mockWrapper := getMockWrapper(ctrl) - mockWrapper.EXPECT().DescribeNetworkInterfaces(describeTrunkInterfaceInput1).Return(describeTrunkInterfaceOutput1, nil) - mockWrapper.EXPECT().DescribeNetworkInterfaces(describeTrunkInterfaceInput2).Return(describeTrunkInterfaceOutput2, nil) + mockWrapper.EXPECT().DescribeNetworkInterfaces(describeTrunkInterfaceInput).Return(describeTrunkInterfaceOutput, nil) branchInterfaces, err := ec2ApiHelper.GetBranchNetworkInterface(&trunkInterfaceId, &subnetId) assert.NoError(t, err) diff --git a/pkg/aws/ec2/api/wrapper.go b/pkg/aws/ec2/api/wrapper.go index 29e93156..0f9b426d 100644 --- a/pkg/aws/ec2/api/wrapper.go +++ b/pkg/aws/ec2/api/wrapper.go @@ -20,7 +20,9 @@ import ( "time" "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/utils" + "k8s.io/apimachinery/pkg/util/wait" + "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/credentials" "github.com/aws/aws-sdk-go/aws/credentials/stscreds" @@ -51,12 +53,14 @@ type EC2Wrapper interface { AssignPrivateIPAddresses(input *ec2.AssignPrivateIpAddressesInput) (*ec2.AssignPrivateIpAddressesOutput, error) UnassignPrivateIPAddresses(input *ec2.UnassignPrivateIpAddressesInput) (*ec2.UnassignPrivateIpAddressesOutput, error) DescribeNetworkInterfaces(input *ec2.DescribeNetworkInterfacesInput) (*ec2.DescribeNetworkInterfacesOutput, error) + DescribeNetworkInterfacesPagesWithRetry(input *ec2.DescribeNetworkInterfacesInput) ([]*ec2.NetworkInterface, error) CreateTags(input *ec2.CreateTagsInput) (*ec2.CreateTagsOutput, error) DescribeSubnets(input *ec2.DescribeSubnetsInput) (*ec2.DescribeSubnetsOutput, error) AssociateTrunkInterface(input *ec2.AssociateTrunkInterfaceInput) (*ec2.AssociateTrunkInterfaceOutput, error) DescribeTrunkInterfaceAssociations(input *ec2.DescribeTrunkInterfaceAssociationsInput) (*ec2.DescribeTrunkInterfaceAssociationsOutput, error) ModifyNetworkInterfaceAttribute(input *ec2.ModifyNetworkInterfaceAttributeInput) (*ec2.ModifyNetworkInterfaceAttributeOutput, error) CreateNetworkInterfacePermission(input *ec2.CreateNetworkInterfacePermissionInput) (*ec2.CreateNetworkInterfacePermissionOutput, error) + DisassociateTrunkInterface(input *ec2.DisassociateTrunkInterfaceInput) error } var ( @@ -252,7 +256,7 @@ var ( ec2AssociateTrunkInterfaceAPIErrCnt = prometheus.NewCounter( prometheus.CounterOpts{ Name: "ec2_associate_trunk_interface_api_err_count", - Help: "The number of errors encountered while disassociating Trunk with Branch ENI", + Help: "The number of errors encountered while associating Trunk with Branch ENI", }, ) @@ -306,6 +310,60 @@ var ( }, ) + ec2DisassociateTrunkInterfaceCallCnt = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "ec2_disassociate_trunk_interface_api_req_count", + Help: "The number of calls made to EC2 to remove association between a branch and trunk network interface", + }, + ) + + ec2DisassociateTrunkInterfaceErrCnt = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "ec2_disassociate_trunk_interface_api_err_count", + Help: "The number of errors encountered while removing association between a branch and trunk network interface", + }, + ) + + VpcCniAvailableClusterENICnt = prometheus.NewGauge( + prometheus.GaugeOpts{ + Name: "vpc_cni_created_available_eni_count", + Help: "The number of available ENIs created by VPC-CNI that will tried to be deleted by the controller", + }, + ) + + VpcRcAvailableClusterENICnt = prometheus.NewGauge( + prometheus.GaugeOpts{ + Name: "vpc_rc_created_available_eni_count", + Help: "The number of available ENIs created by VPC-RC that will tried to be deleted by the controller", + }, + ) + + LeakedENIClusterCleanupCnt = prometheus.NewGauge( + prometheus.GaugeOpts{ + Name: "leaked_eni_count", + Help: "The number of available ENIs that failed to be deleted by the controller", + }, + ) + + ec2DescribeNetworkInterfacesPagesAPICallCnt = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "ec2_describe_network_interfaces_pages_api_call_count", + Help: "The number of calls made to describe network interfaces (paginated)", + }, + ) + ec2DescribeNetworkInterfacesPagesAPIErrCnt = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "ec2_describe_network_interfaces_pages_api_err_count", + Help: "The number of errors encountered while making call to describe network interfaces (paginated)", + }, + ) + NodeTerminationENICleanupFailure = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "node_termination_eni_cleanup_failures_total", + Help: "Total number of ENI cleanup failures during node termination, tracked per cleanup attempt", + }, + ) + prometheusRegistered = false ) @@ -344,9 +402,14 @@ func prometheusRegister() { ec2modifyNetworkInterfaceAttributeAPICallCnt, ec2modifyNetworkInterfaceAttributeAPIErrCnt, ec2APICallLatencies, - vpccniAvailableENICnt, - vpcrcAvailableENICnt, - leakedENICnt, + ec2DisassociateTrunkInterfaceCallCnt, + ec2DisassociateTrunkInterfaceErrCnt, + VpcRcAvailableClusterENICnt, + VpcCniAvailableClusterENICnt, + LeakedENIClusterCleanupCnt, + ec2DescribeNetworkInterfacesPagesAPICallCnt, + ec2DescribeNetworkInterfacesPagesAPIErrCnt, + NodeTerminationENICleanupFailure, ) prometheusRegistered = true @@ -641,6 +704,59 @@ func (e *ec2Wrapper) DescribeNetworkInterfaces(input *ec2.DescribeNetworkInterfa return describeNetworkInterfacesOutput, err } +// DescribeNetworkInterfacesPages returns network interfaces that match the filters specified in the input +// with retry mechanism for handling API throttling +func (e *ec2Wrapper) DescribeNetworkInterfacesPagesWithRetry(input *ec2.DescribeNetworkInterfacesInput) ([]*ec2.NetworkInterface, error) { + if input.MaxResults == nil { + input.MaxResults = aws.Int64(config.DescribeNetworkInterfacesMaxResults) + } + + start := time.Now() + defer func() { + ec2APICallLatencies.WithLabelValues("describe_network_interfaces_pages").Observe(timeSinceMs(start)) + }() + + var apiError error + for attempt := 1; attempt <= MaxRetries; attempt++ { + attemptInterfaces := make([]*ec2.NetworkInterface, 0, config.DescribeNetworkInterfacesMaxResults) + + err := e.userServiceClient.DescribeNetworkInterfacesPages(input, func(output *ec2.DescribeNetworkInterfacesOutput, _ bool) bool { + ec2APICallCnt.Inc() + ec2DescribeNetworkInterfacesPagesAPICallCnt.Inc() + + // Currently only network interface ID and the tag set is required, only add required details to avoid consuming extra memory + for _, nwInterface := range output.NetworkInterfaces { + attemptInterfaces = append(attemptInterfaces, &ec2.NetworkInterface{ + NetworkInterfaceId: nwInterface.NetworkInterfaceId, + TagSet: nwInterface.TagSet, + }) + } + // Default bucket size for paginated non-mutating call is 100 and refill rate is 20. + // According to this doc https://docs.aws.amazon.com/ec2/latest/devguide/ec2-api-throttling.html. + // This sleep range will keep api calls in range of 8-12 requests per second, well under refill rate. + time.Sleep(wait.Jitter(100*time.Millisecond, 0.2)) + return true + + }) + + if err == nil { + return attemptInterfaces, nil + } + ec2APIErrCnt.Inc() + ec2DescribeNetworkInterfacesPagesAPIErrCnt.Inc() + apiError = err + + if request.IsErrorThrottle(err) && attempt < MaxRetries { + e.log.Info("Throttling error, will retry", "attempt", attempt) + backoff := time.Duration(attempt) * 500 * time.Millisecond + time.Sleep(wait.Jitter(backoff, 0.1)) + continue + } + return nil, err + } + return nil, apiError +} + func (e *ec2Wrapper) AssignPrivateIPAddresses(input *ec2.AssignPrivateIpAddressesInput) (*ec2.AssignPrivateIpAddressesOutput, error) { start := time.Now() assignPrivateIPAddressesOutput, err := e.userServiceClient.AssignPrivateIpAddresses(input) @@ -674,9 +790,9 @@ func (e *ec2Wrapper) UnassignPrivateIPAddresses(input *ec2.UnassignPrivateIpAddr // Metric updates ec2APICallCnt.Inc() ec2UnassignPrivateIPAddressAPICallCnt.Inc() - if input.PrivateIpAddresses != nil && len(input.PrivateIpAddresses) != 0 { + if len(input.PrivateIpAddresses) > 0 { numUnassignedSecondaryIPAddress.Add(float64(len(input.PrivateIpAddresses))) - } else if input.Ipv4Prefixes != nil && len(input.Ipv4Prefixes) != 0 { + } else if len(input.Ipv4Prefixes) > 0 { numUnassignedIPv4Prefixes.Add(float64(len(input.Ipv4Prefixes))) } @@ -822,3 +938,19 @@ func (e *ec2Wrapper) getRegionalStsEndpoint(partitionID, region string) (endpoin } return res, nil } + +func (e *ec2Wrapper) DisassociateTrunkInterface(input *ec2.DisassociateTrunkInterfaceInput) error { + start := time.Now() + // Using the instance role + _, err := e.instanceServiceClient.DisassociateTrunkInterface(input) + ec2APICallLatencies.WithLabelValues("disassociate_branch_from_trunk").Observe(timeSinceMs(start)) + + ec2APICallCnt.Inc() + ec2DisassociateTrunkInterfaceCallCnt.Inc() + + if err != nil { + ec2APIErrCnt.Inc() + ec2DisassociateTrunkInterfaceErrCnt.Inc() + } + return err +} diff --git a/pkg/config/type.go b/pkg/config/type.go index 348c8a2d..30dc81dc 100644 --- a/pkg/config/type.go +++ b/pkg/config/type.go @@ -67,6 +67,7 @@ const ( NetworkInterfaceOwnerTagKey = "eks:eni:owner" NetworkInterfaceOwnerTagValue = "eks-vpc-resource-controller" NetworkInterfaceOwnerVPCCNITagValue = "amazon-vpc-cni" + NetworkInterfaceNodeIDKey = "node.k8s.amazonaws.com/instance_id" CNINodeClusterNameKey = "cluster.k8s.amazonaws.com/name" ) @@ -89,6 +90,8 @@ const ( VpcCNIDaemonSetName = "aws-node" OldVPCControllerDeploymentName = "vpc-resource-controller" BranchENICooldownPeriodKey = "branch-eni-cooldown" + // DescribeNetworkInterfacesMaxResults defines the max number of requests to return for DescribeNetworkInterfaces API call + DescribeNetworkInterfacesMaxResults = int64(1000) ) type ResourceType string diff --git a/pkg/provider/branch/provider.go b/pkg/provider/branch/provider.go index c79780b1..e4848062 100644 --- a/pkg/provider/branch/provider.go +++ b/pkg/provider/branch/provider.go @@ -82,7 +82,7 @@ var ( // NodeDeleteRequeueRequestDelay represents the time after which the resources belonging to a node will be cleaned // up after receiving the actual node delete event. - NodeDeleteRequeueRequestDelay = time.Minute * 5 + NodeDeleteRequeueRequestDelay = time.Minute * 1 prometheusRegistered = false @@ -179,8 +179,8 @@ func (b *branchENIProvider) InitResource(instance ec2.EC2Instance) error { } branchProviderOperationLatency.WithLabelValues(operationInitTrunk, "1").Observe(timeSinceSeconds(start)) - // Add the Trunk ENI to cache - if err := b.addTrunkToCache(nodeName, trunkENI); err != nil { + // Add the Trunk ENI to cache if it does not already exist + if err := b.addTrunkToCache(nodeName, trunkENI); err != nil && err != ErrTrunkExistInCache { branchProviderOperationsErrCount.WithLabelValues("add_trunk_to_cache").Inc() return err } @@ -238,12 +238,14 @@ func (b *branchENIProvider) ProcessAsyncJob(job interface{}) (ctrl.Result, error // DeleteNode deletes all the cached branch ENIs associated with the trunk and removes the trunk from the cache. func (b *branchENIProvider) DeleteNode(nodeName string) (ctrl.Result, error) { - trunkENI, isPresent := b.getTrunkFromCache(nodeName) + _, isPresent := b.getTrunkFromCache(nodeName) if !isPresent { return ctrl.Result{}, fmt.Errorf("failed to find node %s", nodeName) } - trunkENI.DeleteAllBranchENIs() + // At this point, the finalizer routine should have deleted all available branch ENIs + // Any leaked ENIs will be deleted by the periodic cleanup routine if cluster is active + // remove trunk from cache and de-initializer the resource provider b.removeTrunkFromCache(nodeName) b.log.Info("de-initialized resource provider successfully", "nodeName", nodeName) diff --git a/pkg/provider/branch/trunk/trunk.go b/pkg/provider/branch/trunk/trunk.go index ddd814b4..04d78907 100644 --- a/pkg/provider/branch/trunk/trunk.go +++ b/pkg/provider/branch/trunk/trunk.go @@ -102,8 +102,6 @@ type TrunkENI interface { Reconcile(pods []v1.Pod) bool // PushENIsToFrontOfDeleteQueue pushes the eni network interfaces to the front of the delete queue PushENIsToFrontOfDeleteQueue(*v1.Pod, []*ENIDetails) - // DeleteAllBranchENIs deletes all the branch ENI associated with the trunk and also clears the cool down queue - DeleteAllBranchENIs() // Introspect returns the state of the Trunk ENI Introspect() IntrospectResponse } @@ -126,6 +124,8 @@ type trunkENI struct { uidToBranchENIMap map[string][]*ENIDetails // deleteQueue is the queue of ENIs that are being cooled down before being deleted deleteQueue []*ENIDetails + // nodeName tag is the tag added to trunk and branch ENIs created on the node + nodeNameTag []*awsEC2.Tag } // PodENI is a json convertible structure that stores the Branch ENI details that can be @@ -147,6 +147,8 @@ type ENIDetails struct { deletionTimeStamp time.Time // deleteRetryCount is the deleteRetryCount int + // ID of association between branch and trunk ENI + AssociationID string `json:"associationID"` } type IntrospectResponse struct { @@ -176,6 +178,12 @@ func NewTrunkENI(logger logr.Logger, instance ec2.EC2Instance, helper api.EC2API ec2ApiHelper: helper, instance: instance, uidToBranchENIMap: make(map[string][]*ENIDetails), + nodeNameTag: []*awsEC2.Tag{ + { + Key: aws.String(config.NetworkInterfaceNodeIDKey), + Value: aws.String(instance.InstanceID()), + }, + }, } } @@ -231,7 +239,7 @@ func (t *trunkENI) InitTrunk(instance ec2.EC2Instance, podList []v1.Pod) error { } trunk, err := t.ec2ApiHelper.CreateAndAttachNetworkInterface(&instanceID, aws.String(t.instance.SubnetID()), - t.instance.CurrentInstanceSecurityGroups(), nil, &freeIndex, &TrunkEniDescription, &InterfaceTypeTrunk, nil) + t.instance.CurrentInstanceSecurityGroups(), t.nodeNameTag, &freeIndex, &TrunkEniDescription, &InterfaceTypeTrunk, nil) if err != nil { trunkENIOperationsErrCount.WithLabelValues("create_trunk_eni").Inc() return err @@ -418,6 +426,8 @@ func (t *trunkENI) CreateAndAssociateBranchENIs(pod *v1.Pod, securityGroups []st Value: &t.trunkENIId, }, } + // append the nodeName tag to add to branch ENIs + tags = append(tags, t.nodeNameTag...) // Create Branch ENI nwInterface, err = t.ec2ApiHelper.CreateNetworkInterface(&BranchEniDescription, aws.String(t.instance.SubnetID()), securityGroups, tags, nil, nil) @@ -444,12 +454,14 @@ func (t *trunkENI) CreateAndAssociateBranchENIs(pod *v1.Pod, securityGroups []st newENIs = append(newENIs, newENI) // Associate Branch to trunk - _, err = t.ec2ApiHelper.AssociateBranchToTrunk(&t.trunkENIId, nwInterface.NetworkInterfaceId, vlanID) + var associationOutput *awsEC2.AssociateTrunkInterfaceOutput + associationOutput, err = t.ec2ApiHelper.AssociateBranchToTrunk(&t.trunkENIId, nwInterface.NetworkInterfaceId, vlanID) if err != nil { err = fmt.Errorf("associating branch to trunk, %w", err) trunkENIOperationsErrCount.WithLabelValues("associate_branch").Inc() break } + newENI.AssociationID = *associationOutput.InterfaceAssociation.AssociationId } if err != nil { @@ -467,31 +479,6 @@ func (t *trunkENI) CreateAndAssociateBranchENIs(pod *v1.Pod, securityGroups []st return newENIs, nil } -// DeleteAllBranchENIs deletes all the branch ENIs associated with the trunk and all the ENIs present in the cool down -// queue, this is the last API call to the the Trunk ENI before it is removed from cache -func (t *trunkENI) DeleteAllBranchENIs() { - // Delete all the branch used by the pod on this trunk ENI - // Since after this call, the trunk will be removed from cache. No need to clean up its branch map - for _, podENIs := range t.uidToBranchENIMap { - for _, eni := range podENIs { - err := t.deleteENI(eni) - if err != nil { - // Just log, if the ENI still exists it can be removed by the dangling ENI cleaner routine - t.log.Error(err, "failed to delete eni", "eni id", eni.ID) - } - } - } - - // Delete all the branch ENI present in the cool down queue - for _, eni := range t.deleteQueue { - err := t.deleteENI(eni) - if err != nil { - // Just log, if the ENI still exists it can be removed by the dangling ENI cleaner routine - t.log.Error(err, "failed to delete eni", "eni id", eni.ID) - } - } -} - // DeleteBranchNetworkInterface deletes the branch network interface and returns an error in case of failure to delete func (t *trunkENI) PushBranchENIsToCoolDownQueue(UID string) { // Lock is required as Reconciler is also performing operation concurrently @@ -545,7 +532,19 @@ func (t *trunkENI) DeleteCooledDownENIs() { // deleteENIs deletes the provided ENIs and frees up the Vlan assigned to then func (t *trunkENI) deleteENI(eniDetail *ENIDetails) (err error) { - // Delete Branch network interface first + // Disassociate branch ENI from trunk if association ID exists and delete branch network interface + if eniDetail.AssociationID != "" { + err = t.ec2ApiHelper.DisassociateTrunkInterface(&eniDetail.AssociationID) + if err != nil { + trunkENIOperationsErrCount.WithLabelValues("disassociate_trunk_error").Inc() + if !strings.Contains(err.Error(), ec2Errors.NotFoundAssociationID) { + t.log.Error(err, "failed to disassociate branch ENI from trunk, will try to delete the branch ENI") + // Not returning error here, fallback to force branch ENI deletion + } else { + t.log.Info("AssociationID not found when disassociating branch from trunk ENI, it is already disassociated so delete the branch ENI") + } + } + } err = t.ec2ApiHelper.DeleteNetworkInterface(&eniDetail.ID) if err != nil { branchENIOperationsFailureCount.WithLabelValues("delete_branch_error").Inc() diff --git a/pkg/provider/branch/trunk/trunk_test.go b/pkg/provider/branch/trunk/trunk_test.go index 49dcaf0d..b446bfc9 100644 --- a/pkg/provider/branch/trunk/trunk_test.go +++ b/pkg/provider/branch/trunk/trunk_test.go @@ -62,8 +62,9 @@ var ( Name: MockPodName1, Namespace: MockPodNamespace1, Annotations: map[string]string{config.ResourceNamePodENI: "[{\"eniId\":\"eni-00000000000000000\",\"ifAddress\":\"FF:FF:FF:FF:FF:FF\",\"privateIp\":\"192.168.0.15\"," + - "\"ipv6Addr\":\"2600::\",\"vlanId\":1,\"subnetCidr\":\"192.168.0.0/16\",\"subnetV6Cidr\":\"2600::/64\"},{\"eniId\":\"eni-00000000000000001\",\"ifAddress\":\"" + - "FF:FF:FF:FF:FF:F9\",\"privateIp\":\"192.168.0.16\",\"ipv6Addr\":\"2600::1\",\"vlanId\":2,\"subnetCidr\":\"192.168.0.0/16\",\"subnetV6Cidr\":\"2600::/64\"}]"}}, + "\"ipv6Addr\":\"2600::\",\"vlanId\":1,\"subnetCidr\":\"192.168.0.0/16\",\"subnetV6Cidr\":\"2600::/64\",\"AssociationId\":\"trunk-assoc-0000000000000000\"},{\"eniId\":\"eni-00000000000000001\"" + + ",\"ifAddress\":\"FF:FF:FF:FF:FF:F9\",\"privateIp\":\"192.168.0.16\",\"ipv6Addr\":\"2600::1\",\"vlanId\":2,\"subnetCidr\":\"192.168.0.0/16\",\"subnetV6Cidr\":\"2600::/64\"," + + "\"AssociationId\":\"trunk-assoc-0000000000000001\"}]"}}, Spec: v1.PodSpec{NodeName: NodeName}, Status: v1.PodStatus{}, } @@ -93,20 +94,23 @@ var ( SecurityGroups = []string{SecurityGroup1, SecurityGroup2} // Branch Interface 1 - Branch1Id = "eni-00000000000000000" - MacAddr1 = "FF:FF:FF:FF:FF:FF" - BranchIp1 = "192.168.0.15" - BranchV6Ip1 = "2600::" - VlanId1 = 1 + Branch1Id = "eni-00000000000000000" + MacAddr1 = "FF:FF:FF:FF:FF:FF" + BranchIp1 = "192.168.0.15" + BranchV6Ip1 = "2600::" + VlanId1 = 1 + MockAssociationID1 = "trunk-assoc-0000000000000000" + MockAssociationID2 = "trunk-assoc-0000000000000001" EniDetails1 = &ENIDetails{ - ID: Branch1Id, - MACAdd: MacAddr1, - IPV4Addr: BranchIp1, - IPV6Addr: BranchV6Ip1, - VlanID: VlanId1, - SubnetCIDR: SubnetCidrBlock, - SubnetV6CIDR: SubnetV6CidrBlock, + ID: Branch1Id, + MACAdd: MacAddr1, + IPV4Addr: BranchIp1, + IPV6Addr: BranchV6Ip1, + VlanID: VlanId1, + SubnetCIDR: SubnetCidrBlock, + SubnetV6CIDR: SubnetV6CidrBlock, + AssociationID: MockAssociationID1, } branchENIs1 = []*ENIDetails{EniDetails1} @@ -126,13 +130,14 @@ var ( VlanId2 = 2 EniDetails2 = &ENIDetails{ - ID: Branch2Id, - MACAdd: MacAddr2, - IPV4Addr: BranchIp2, - IPV6Addr: BranchV6Ip2, - VlanID: VlanId2, - SubnetCIDR: SubnetCidrBlock, - SubnetV6CIDR: SubnetV6CidrBlock, + ID: Branch2Id, + MACAdd: MacAddr2, + IPV4Addr: BranchIp2, + IPV6Addr: BranchV6Ip2, + VlanID: VlanId2, + SubnetCIDR: SubnetCidrBlock, + SubnetV6CIDR: SubnetV6CidrBlock, + AssociationID: MockAssociationID2, } BranchInterface2 = &awsEc2.NetworkInterface{ @@ -142,8 +147,6 @@ var ( Ipv6Address: &BranchV6Ip2, } - branchENIs2 = []*ENIDetails{EniDetails2} - // Trunk Interface trunkId = "eni-00000000000000002" trunkInterface = &awsEc2.NetworkInterface{ @@ -189,17 +192,27 @@ var ( }, } - trunkAssociationsBranch1And2 = []*awsEc2.TrunkInterfaceAssociation{ - { - BranchInterfaceId: &EniDetails1.ID, - VlanId: aws.Int64(int64(EniDetails1.VlanID)), + mockAssociationOutput1 = &awsEc2.AssociateTrunkInterfaceOutput{ + InterfaceAssociation: &awsEc2.TrunkInterfaceAssociation{ + AssociationId: &MockAssociationID1, }, - { - BranchInterfaceId: &EniDetails2.ID, - VlanId: aws.Int64(int64(EniDetails2.VlanID)), + } + mockAssociationOutput2 = &awsEc2.AssociateTrunkInterfaceOutput{ + InterfaceAssociation: &awsEc2.TrunkInterfaceAssociation{ + AssociationId: &MockAssociationID2, }, } + ENIDetailsMissingAssociationID = &ENIDetails{ + ID: Branch2Id, + MACAdd: MacAddr2, + IPV4Addr: BranchIp2, + IPV6Addr: BranchV6Ip2, + VlanID: VlanId2, + SubnetCIDR: SubnetCidrBlock, + SubnetV6CIDR: SubnetV6CidrBlock, + } + MockError = fmt.Errorf("mock error") ) @@ -229,11 +242,17 @@ func getMockTrunk() trunkENI { log: log, usedVlanIds: make([]bool, MaxAllocatableVlanIds), uidToBranchENIMap: map[string][]*ENIDetails{}, + nodeNameTag: []*awsEc2.Tag{ + { + Key: aws.String(config.NetworkInterfaceNodeIDKey), + Value: aws.String(FakeInstance.InstanceID()), + }, + }, } } func TestNewTrunkENI(t *testing.T) { - trunkENI := NewTrunkENI(zap.New(), nil, nil) + trunkENI := NewTrunkENI(zap.New(), FakeInstance, nil) assert.NotNil(t, trunkENI) } @@ -400,34 +419,106 @@ func TestTrunkENI_getBranchInterfaceMap_EmptyList(t *testing.T) { assert.Zero(t, len(branchENIsMap)) } -// TestTrunkENI_deleteENI tests the trunk is deleted and vlan ID freed in case of no errors +// TestTrunkENI_deleteENI tests deleting branch ENI func TestTrunkENI_deleteENI(t *testing.T) { - ctrl := gomock.NewController(t) - defer ctrl.Finish() - - trunkENI, ec2APIHelper, _ := getMockHelperInstanceAndTrunkObject(ctrl) - trunkENI.markVlanAssigned(VlanId1) - - ec2APIHelper.EXPECT().DeleteNetworkInterface(&Branch1Id).Return(nil) - - err := trunkENI.deleteENI(EniDetails1) - assert.NoError(t, err) - assert.False(t, trunkENI.usedVlanIds[VlanId1]) -} - -// TestTrunkENI_deleteENI_Fail tests if the ENI deletion fails then the vlan ID is not freed -func TestTrunkENI_deleteENI_Fail(t *testing.T) { - ctrl := gomock.NewController(t) - defer ctrl.Finish() + type args struct { + eniDetail *ENIDetails + VlanID int + } + type fields struct { + mockEC2APIHelper *mock_api.MockEC2APIHelper + trunkENI *trunkENI + } + testTrunkENI_deleteENI := []struct { + name string + prepare func(f *fields) + args args + wantErr bool + asserts func(f *fields) + }{ + { + name: "Vland_Freed, verifies VLANID is freed when branch ENI is deleted", + prepare: func(f *fields) { + f.mockEC2APIHelper.EXPECT().DisassociateTrunkInterface(&MockAssociationID1).Return(nil) + f.mockEC2APIHelper.EXPECT().DeleteNetworkInterface(&Branch1Id).Return(nil) + }, + args: args{ + eniDetail: EniDetails1, + VlanID: VlanId1, + }, + wantErr: false, + asserts: func(f *fields) { + assert.False(t, f.trunkENI.usedVlanIds[VlanId1]) + }, + }, + { + name: "Vland_NotFreed, verifies VLANID is not freed when branch ENI delete fails", + prepare: func(f *fields) { + f.mockEC2APIHelper.EXPECT().DisassociateTrunkInterface(&MockAssociationID1).Return(nil) + f.mockEC2APIHelper.EXPECT().DeleteNetworkInterface(&Branch1Id).Return(MockError) + }, + args: args{ + eniDetail: EniDetails1, + VlanID: VlanId1, + }, + wantErr: true, + asserts: func(f *fields) { + assert.True(t, f.trunkENI.usedVlanIds[VlanId1]) + }, + }, + { + name: "DisassociateTrunkInterface_Fails, verifies branch ENI is deleted when disassociation fails for backward compatibility", + prepare: func(f *fields) { + f.mockEC2APIHelper.EXPECT().DisassociateTrunkInterface(&MockAssociationID1).Return(MockError) + f.mockEC2APIHelper.EXPECT().DeleteNetworkInterface(&Branch1Id).Return(nil) + }, + args: args{ + eniDetail: EniDetails1, + VlanID: VlanId1, + }, + wantErr: false, + asserts: func(f *fields) { + assert.False(t, f.trunkENI.usedVlanIds[VlanId1]) + }, + }, + { + name: "MissingAssociationID, verifies DisassociateTrunkInterface is skipped when association ID is missing and branch ENI is deleted for backward compatibility", + prepare: func(f *fields) { + f.mockEC2APIHelper.EXPECT().DeleteNetworkInterface(&Branch2Id).Return(nil) + }, + args: args{ + eniDetail: ENIDetailsMissingAssociationID, + VlanID: VlanId2, + }, + wantErr: false, + asserts: func(f *fields) { + assert.False(t, f.trunkENI.usedVlanIds[VlanId2]) + }, + }, + } - trunkENI, ec2APIHelper, _ := getMockHelperInstanceAndTrunkObject(ctrl) - trunkENI.markVlanAssigned(VlanId1) + for _, tt := range testTrunkENI_deleteENI { + t.Run(tt.name, func(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() - ec2APIHelper.EXPECT().DeleteNetworkInterface(&Branch1Id).Return(MockError) + trunkENI, ec2APIHelper, _ := getMockHelperInstanceAndTrunkObject(ctrl) + trunkENI.markVlanAssigned(tt.args.VlanID) - err := trunkENI.deleteENI(EniDetails1) - assert.Error(t, MockError, err) - assert.True(t, trunkENI.usedVlanIds[VlanId1]) + f := fields{ + mockEC2APIHelper: ec2APIHelper, + trunkENI: trunkENI, + } + if tt.prepare != nil { + tt.prepare(&f) + } + err := f.trunkENI.deleteENI(tt.args.eniDetail) + assert.Equal(t, err != nil, tt.wantErr) + if tt.asserts != nil { + tt.asserts(&f) + } + }) + } } // TestTrunkENI_DeleteCooledDownENIs_NotCooledDown tests that ENIs that have not cooled down are not deleted @@ -463,7 +554,9 @@ func TestTrunkENI_DeleteCooledDownENIs_NoDeletionTimeStamp(t *testing.T) { trunkENI.deleteQueue = append(trunkENI.deleteQueue, EniDetails1, EniDetails2) + ec2APIHelper.EXPECT().DisassociateTrunkInterface(&MockAssociationID1).Return(nil) ec2APIHelper.EXPECT().DeleteNetworkInterface(&EniDetails1.ID).Return(nil) + ec2APIHelper.EXPECT().DisassociateTrunkInterface(&MockAssociationID2).Return(nil) ec2APIHelper.EXPECT().DeleteNetworkInterface(&EniDetails2.ID).Return(nil) mockK8sAPI := mock_k8s.NewMockK8sWrapper(ctrl) @@ -487,6 +580,7 @@ func TestTrunkENI_DeleteCooledDownENIs_CooledDownResource(t *testing.T) { trunkENI.deleteQueue = append(trunkENI.deleteQueue, EniDetails1, EniDetails2) + ec2APIHelper.EXPECT().DisassociateTrunkInterface(&MockAssociationID1).Return(nil) ec2APIHelper.EXPECT().DeleteNetworkInterface(&EniDetails1.ID).Return(nil) mockK8sAPI := mock_k8s.NewMockK8sWrapper(ctrl) @@ -512,16 +606,17 @@ func TestTrunkENI_DeleteCooledDownENIs_DeleteFailed(t *testing.T) { trunkENI.usedVlanIds[VlanId2] = true trunkENI.deleteQueue = append(trunkENI.deleteQueue, EniDetails1, EniDetails2) - gomock.InOrder( - coolDown.EXPECT().GetCoolDownPeriod().Return(time.Second*60).AnyTimes(), - ec2APIHelper.EXPECT().DeleteNetworkInterface(&EniDetails1.ID).Return(MockError).Times(MaxDeleteRetries), - ec2APIHelper.EXPECT().DeleteNetworkInterface(&EniDetails2.ID).Return(nil), - ) mockK8sAPI := mock_k8s.NewMockK8sWrapper(ctrl) mockK8sAPI.EXPECT().GetConfigMap(config.VpcCniConfigMapName, config.KubeSystemNamespace).Return(createCoolDownMockCM("60"), nil) cooldown.InitCoolDownPeriod(mockK8sAPI, zap.New(zap.UseDevMode(true)).WithName("cooldown")) + coolDown.EXPECT().GetCoolDownPeriod().Return(time.Second * 60).AnyTimes() + ec2APIHelper.EXPECT().DisassociateTrunkInterface(&MockAssociationID1).Return(nil).Times(MaxDeleteRetries) + ec2APIHelper.EXPECT().DeleteNetworkInterface(&EniDetails1.ID).Return(MockError).Times(MaxDeleteRetries) + ec2APIHelper.EXPECT().DisassociateTrunkInterface(&MockAssociationID2).Return(nil) + ec2APIHelper.EXPECT().DeleteNetworkInterface(&EniDetails2.ID).Return(nil) + trunkENI.DeleteCooledDownENIs() assert.Zero(t, len(trunkENI.deleteQueue)) } @@ -598,7 +693,7 @@ func TestTrunkENI_InitTrunk(t *testing.T) { f.mockEC2APIHelper.EXPECT().GetInstanceNetworkInterface(&InstanceId).Return([]*awsEc2.InstanceNetworkInterface{}, nil) f.mockInstance.EXPECT().GetHighestUnusedDeviceIndex().Return(freeIndex, nil) f.mockInstance.EXPECT().SubnetID().Return(SubnetId) - f.mockEC2APIHelper.EXPECT().CreateAndAttachNetworkInterface(&InstanceId, &SubnetId, SecurityGroups, nil, + f.mockEC2APIHelper.EXPECT().CreateAndAttachNetworkInterface(&InstanceId, &SubnetId, SecurityGroups, f.trunkENI.nodeNameTag, &freeIndex, &TrunkEniDescription, &InterfaceTypeTrunk, nil).Return(trunkInterface, nil) }, // Pass nil to set the instance to fields.mockInstance in the function later @@ -731,23 +826,6 @@ func TestTrunkENI_InitTrunk(t *testing.T) { } } -// TestTrunkENI_DeleteAllBranchENIs tests all branch ENI associated with the trunk are deleted -func TestTrunkENI_DeleteAllBranchENIs(t *testing.T) { - ctrl := gomock.NewController(t) - defer ctrl.Finish() - - trunkENI, mockEC2APIHelper, _ := getMockHelperInstanceAndTrunkObject(ctrl) - trunkENI.uidToBranchENIMap[PodUID] = branchENIs1 - trunkENI.uidToBranchENIMap[PodUID2] = branchENIs2 - trunkENI.deleteQueue = append(trunkENI.deleteQueue, branchENIs1[0]) - - // Since we added the same branch ENIs in the cool down queue and in the pod to eni map - mockEC2APIHelper.EXPECT().DeleteNetworkInterface(&Branch1Id).Return(nil).Times(2) - mockEC2APIHelper.EXPECT().DeleteNetworkInterface(&Branch2Id).Return(nil) - - trunkENI.DeleteAllBranchENIs() -} - // TestTrunkENI_CreateAndAssociateBranchENIs test branch is created and associated with the trunk and valid eni details // are returned func TestTrunkENI_CreateAndAssociateBranchENIs(t *testing.T) { @@ -763,11 +841,11 @@ func TestTrunkENI_CreateAndAssociateBranchENIs(t *testing.T) { mockInstance.EXPECT().SubnetV6CidrBlock().Return(SubnetV6CidrBlock).Times(2) mockEC2APIHelper.EXPECT().CreateNetworkInterface(&BranchEniDescription, &SubnetId, SecurityGroups, - vlan1Tag, nil, nil).Return(BranchInterface1, nil) - mockEC2APIHelper.EXPECT().AssociateBranchToTrunk(&trunkId, &Branch1Id, VlanId1).Return(nil, nil) - mockEC2APIHelper.EXPECT().CreateNetworkInterface(&BranchEniDescription, &SubnetId, SecurityGroups, vlan2Tag, + append(vlan1Tag, trunkENI.nodeNameTag...), nil, nil).Return(BranchInterface1, nil) + mockEC2APIHelper.EXPECT().AssociateBranchToTrunk(&trunkId, &Branch1Id, VlanId1).Return(mockAssociationOutput1, nil) + mockEC2APIHelper.EXPECT().CreateNetworkInterface(&BranchEniDescription, &SubnetId, SecurityGroups, append(vlan2Tag, trunkENI.nodeNameTag...), nil, nil).Return(BranchInterface2, nil) - mockEC2APIHelper.EXPECT().AssociateBranchToTrunk(&trunkId, &Branch2Id, VlanId2).Return(nil, nil) + mockEC2APIHelper.EXPECT().AssociateBranchToTrunk(&trunkId, &Branch2Id, VlanId2).Return(mockAssociationOutput2, nil) eniDetails, err := trunkENI.CreateAndAssociateBranchENIs(MockPod2, SecurityGroups, 2) expectedENIDetails := []*ENIDetails{EniDetails1, EniDetails2} @@ -797,11 +875,11 @@ func TestTrunkENI_CreateAndAssociateBranchENIs_InstanceSecurityGroup(t *testing. mockInstance.EXPECT().CurrentInstanceSecurityGroups().Return(InstanceSecurityGroup) mockEC2APIHelper.EXPECT().CreateNetworkInterface(&BranchEniDescription, &SubnetId, InstanceSecurityGroup, - vlan1Tag, nil, nil).Return(BranchInterface1, nil) - mockEC2APIHelper.EXPECT().AssociateBranchToTrunk(&trunkId, &Branch1Id, VlanId1).Return(nil, nil) + append(vlan1Tag, trunkENI.nodeNameTag...), nil, nil).Return(BranchInterface1, nil) + mockEC2APIHelper.EXPECT().AssociateBranchToTrunk(&trunkId, &Branch1Id, VlanId1).Return(mockAssociationOutput1, nil) mockEC2APIHelper.EXPECT().CreateNetworkInterface(&BranchEniDescription, &SubnetId, InstanceSecurityGroup, - vlan2Tag, nil, nil).Return(BranchInterface2, nil) - mockEC2APIHelper.EXPECT().AssociateBranchToTrunk(&trunkId, &Branch2Id, VlanId2).Return(nil, nil) + append(vlan2Tag, trunkENI.nodeNameTag...), nil, nil).Return(BranchInterface2, nil) + mockEC2APIHelper.EXPECT().AssociateBranchToTrunk(&trunkId, &Branch2Id, VlanId2).Return(mockAssociationOutput2, nil) eniDetails, err := trunkENI.CreateAndAssociateBranchENIs(MockPod2, []string{}, 2) expectedENIDetails := []*ENIDetails{EniDetails1, EniDetails2} @@ -831,16 +909,16 @@ func TestTrunkENI_CreateAndAssociateBranchENIs_ErrorAssociate(t *testing.T) { gomock.InOrder( mockEC2APIHelper.EXPECT().CreateNetworkInterface(&BranchEniDescription, &SubnetId, SecurityGroups, - vlan1Tag, nil, nil).Return(BranchInterface1, nil), - mockEC2APIHelper.EXPECT().AssociateBranchToTrunk(&trunkId, &Branch1Id, VlanId1).Return(nil, nil), + append(vlan1Tag, trunkENI.nodeNameTag...), nil, nil).Return(BranchInterface1, nil), + mockEC2APIHelper.EXPECT().AssociateBranchToTrunk(&trunkId, &Branch1Id, VlanId1).Return(mockAssociationOutput1, nil), mockEC2APIHelper.EXPECT().CreateNetworkInterface(&BranchEniDescription, &SubnetId, SecurityGroups, - vlan2Tag, nil, nil).Return(BranchInterface2, nil), + append(vlan2Tag, trunkENI.nodeNameTag...), nil, nil).Return(BranchInterface2, nil), mockEC2APIHelper.EXPECT().AssociateBranchToTrunk(&trunkId, &Branch2Id, VlanId2).Return(nil, MockError), ) _, err := trunkENI.CreateAndAssociateBranchENIs(MockPod2, SecurityGroups, 2) assert.Error(t, MockError, err) - assert.Equal(t, []*ENIDetails{EniDetails1, EniDetails2}, trunkENI.deleteQueue) + assert.Equal(t, []*ENIDetails{EniDetails1, ENIDetailsMissingAssociationID}, trunkENI.deleteQueue) } // TestTrunkENI_CreateAndAssociateBranchENIs_ErrorCreate tests if error is returned on associate then the created interfaces @@ -858,10 +936,10 @@ func TestTrunkENI_CreateAndAssociateBranchENIs_ErrorCreate(t *testing.T) { mockInstance.EXPECT().SubnetV6CidrBlock().Return(SubnetV6CidrBlock).Times(1) gomock.InOrder( - mockEC2APIHelper.EXPECT().CreateNetworkInterface(&BranchEniDescription, &SubnetId, SecurityGroups, vlan1Tag, + mockEC2APIHelper.EXPECT().CreateNetworkInterface(&BranchEniDescription, &SubnetId, SecurityGroups, append(vlan1Tag, trunkENI.nodeNameTag...), nil, nil).Return(BranchInterface1, nil), - mockEC2APIHelper.EXPECT().AssociateBranchToTrunk(&trunkId, &Branch1Id, VlanId1).Return(nil, nil), - mockEC2APIHelper.EXPECT().CreateNetworkInterface(&BranchEniDescription, &SubnetId, SecurityGroups, vlan2Tag, + mockEC2APIHelper.EXPECT().AssociateBranchToTrunk(&trunkId, &Branch1Id, VlanId1).Return(mockAssociationOutput1, nil), + mockEC2APIHelper.EXPECT().CreateNetworkInterface(&BranchEniDescription, &SubnetId, SecurityGroups, append(vlan2Tag, trunkENI.nodeNameTag...), nil, nil).Return(nil, MockError), ) diff --git a/pkg/utils/events.go b/pkg/utils/events.go index 6afef7ad..4ac5d03c 100644 --- a/pkg/utils/events.go +++ b/pkg/utils/events.go @@ -23,12 +23,13 @@ import ( const ( UnsupportedInstanceTypeReason = "Unsupported" InsufficientCidrBlocksReason = "InsufficientCidrBlocks" - CNINodeCreatedReason = "CNINodeCreation" NodeTrunkInitiatedReason = "NodeTrunkInitiated" NodeTrunkFailedInitializationReason = "NodeTrunkFailedInit" EniConfigNameNotFoundReason = "EniConfigNameNotFound" VersionNotice = "ControllerVersionNotice" BranchENICoolDownUpdateReason = "BranchENICoolDownPeriodUpdated" + CNINodeDeleteFailed = "CNINodeDeletionFailed" + CNINodeCreateFailed = "CNINodeCreationFailed" ) func SendNodeEventWithNodeName(client k8s.K8sWrapper, nodeName, reason, msg, eventType string, logger logr.Logger) { diff --git a/scripts/gen_mocks.sh b/scripts/gen_mocks.sh index c0b65e9d..c6111f9e 100755 --- a/scripts/gen_mocks.sh +++ b/scripts/gen_mocks.sh @@ -3,9 +3,11 @@ alias mockgen='mockgen -copyright_file=templates/copyright.txt' mockgen -destination=../mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api/mock_ec2_wrapper.go github.com/aws/amazon-vpc-resource-controller-k8s/pkg/aws/ec2/api EC2Wrapper mockgen -destination=../mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api/mock_ec2_apihelper.go github.com/aws/amazon-vpc-resource-controller-k8s/pkg/aws/ec2/api EC2APIHelper mockgen -destination=../mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/mock_instance.go github.com/aws/amazon-vpc-resource-controller-k8s/pkg/aws/ec2 EC2Instance +mockgen -destination=../mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api/cleanup/mock_resource_cleaner.go github.com/aws/amazon-vpc-resource-controller-k8s/pkg/aws/ec2/api/cleanup ResourceCleaner # package k8s mocks mockgen -destination=../mocks/amazon-vcp-resource-controller-k8s/pkg/k8s/mock_k8swrapper.go github.com/aws/amazon-vpc-resource-controller-k8s/pkg/k8s K8sWrapper mockgen -destination=../mocks/amazon-vcp-resource-controller-k8s/pkg/k8s/pod/mock_podapiwrapper.go github.com/aws/amazon-vpc-resource-controller-k8s/pkg/k8s/pod PodClientAPIWrapper +mockgen -destination=../mocks/amazon-vcp-resource-controller-k8s/pkg/k8s/mock_finalizer.go github.com/aws/amazon-vpc-resource-controller-k8s/pkg/k8s FinalizerManager # package worker mocks mockgen -destination=../mocks/amazon-vcp-resource-controller-k8s/pkg/worker/mock_worker.go github.com/aws/amazon-vpc-resource-controller-k8s/pkg/worker Worker # package handler mocks diff --git a/test/framework/options.go b/test/framework/options.go index c4bdbe94..1b7b82c4 100644 --- a/test/framework/options.go +++ b/test/framework/options.go @@ -15,6 +15,8 @@ package framework import ( "flag" + "os" + "strings" "github.com/pkg/errors" "k8s.io/client-go/tools/clientcmd" @@ -32,6 +34,7 @@ type Options struct { AWSRegion string AWSVPCID string ReleasedImageVersion string + ClusterRoleArn string } func (options *Options) BindFlags() { @@ -40,6 +43,7 @@ func (options *Options) BindFlags() { flag.StringVar(&options.AWSRegion, "aws-region", "", `AWS Region for the kubernetes cluster`) flag.StringVar(&options.AWSVPCID, "aws-vpc-id", "", `AWS VPC ID for the kubernetes cluster`) flag.StringVar(&options.ReleasedImageVersion, "latest-released-rc-image-tag", "v1.1.3", `VPC RC latest released image`) + flag.StringVar(&options.ClusterRoleArn, "cluster-role-arn", "", "EKS Cluster role ARN") } func (options *Options) Validate() error { @@ -58,5 +62,10 @@ func (options *Options) Validate() error { if len(options.ReleasedImageVersion) == 0 { return errors.Errorf("%s must be set!", "latest-released-rc-image-tag") } + dir, err := os.Executable() + if err == nil && len(options.ClusterRoleArn) == 0 && strings.Contains(dir, "ec2api") { + return errors.Errorf("%s must be set when running ec2api tests", "cluster-role-arn") + } + return nil } diff --git a/test/framework/resource/aws/ec2/manager.go b/test/framework/resource/aws/ec2/manager.go index be6d0c3a..02bfff7e 100644 --- a/test/framework/resource/aws/ec2/manager.go +++ b/test/framework/resource/aws/ec2/manager.go @@ -19,6 +19,7 @@ import ( "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/aws/vpc" "github.com/aws/amazon-vpc-resource-controller-k8s/test/framework/utils" + "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/awserr" "github.com/aws/aws-sdk-go/service/ec2" diff --git a/test/framework/resource/k8s/node/manager.go b/test/framework/resource/k8s/node/manager.go index c69c74c3..a7e9e28b 100644 --- a/test/framework/resource/k8s/node/manager.go +++ b/test/framework/resource/k8s/node/manager.go @@ -34,6 +34,8 @@ type Manager interface { GetCNINode(node *v1.Node) (*cninode.CNINode, error) GetCNINodeList() (*cninode.CNINodeList, error) GetInstanceID(node *v1.Node) string + DeleteCNINode(cniNode *cninode.CNINode) error + UpdateCNINode(oldCNINode, newCNINode *cninode.CNINode) error } type defaultManager struct { @@ -127,3 +129,13 @@ func (d *defaultManager) GetInstanceID(node *v1.Node) string { } return "" } + +func (d *defaultManager) DeleteCNINode(cniNode *cninode.CNINode) error { + err := d.k8sClient.Delete(context.Background(), cniNode) + return err +} + +func (d *defaultManager) UpdateCNINode(oldCNINode, newCNINode *cninode.CNINode) error { + err := d.k8sClient.Patch(context.Background(), newCNINode, client.MergeFromWithOptions(oldCNINode, client.MergeFromWithOptimisticLock{})) + return err +} diff --git a/test/integration/cninode/cninode_test.go b/test/integration/cninode/cninode_test.go index 8173a348..5d577f75 100644 --- a/test/integration/cninode/cninode_test.go +++ b/test/integration/cninode/cninode_test.go @@ -15,14 +15,21 @@ package cninode_test import ( "context" + "time" + "github.com/aws/amazon-vpc-resource-controller-k8s/apis/vpcresources/v1alpha1" "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config" "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/utils" "github.com/aws/amazon-vpc-resource-controller-k8s/test/framework/resource/k8s/node" testUtils "github.com/aws/amazon-vpc-resource-controller-k8s/test/framework/utils" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/wait" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" ) var _ = Describe("[CANARY]CNINode test", func() { @@ -71,6 +78,73 @@ var _ = Describe("[CANARY]CNINode test", func() { }) }) }) + + Describe("CNINode is re-created when node exists", func() { + Context("when CNINode is deleted but node exists", func() { + It("it should re-create CNINode", func() { + nodeList, err := frameWork.NodeManager.GetNodesWithOS(config.OSLinux) + Expect(err).ToNot(HaveOccurred()) + cniNode, err := frameWork.NodeManager.GetCNINode(&nodeList.Items[0]) + Expect(err).ToNot(HaveOccurred()) + err = frameWork.NodeManager.DeleteCNINode(cniNode) + Expect(err).ToNot(HaveOccurred()) + time.Sleep(testUtils.PollIntervalShort) // allow time to re-create CNINode + _, err = frameWork.NodeManager.GetCNINode(&nodeList.Items[0]) + Expect(err).ToNot(HaveOccurred()) + VerifyCNINodeFields(cniNode) + }) + }) + + }) + + Describe("CNINode update tests", func() { + var cniNode *v1alpha1.CNINode + var node *v1.Node + BeforeEach(func() { + nodeList, err := frameWork.NodeManager.GetNodesWithOS(config.OSLinux) + Expect(err).ToNot(HaveOccurred()) + node = &nodeList.Items[0] + cniNode, err = frameWork.NodeManager.GetCNINode(node) + Expect(err).ToNot(HaveOccurred()) + VerifyCNINodeFields(cniNode) + }) + AfterEach(func() { + time.Sleep(testUtils.PollIntervalShort) + newCNINode, err := frameWork.NodeManager.GetCNINode(node) + Expect(err).ToNot(HaveOccurred()) + // Verify CNINode after update matches CNINode before update + Expect(newCNINode).To(BeComparableTo(cniNode, cmp.Options{ + cmpopts.IgnoreTypes(metav1.TypeMeta{}), + cmpopts.IgnoreFields(metav1.ObjectMeta{}, "ResourceVersion", "Generation", "ManagedFields"), + })) + }) + + Context("when finalizer is removed", func() { + It("it should add the finalizer", func() { + cniNodeCopy := cniNode.DeepCopy() + controllerutil.RemoveFinalizer(cniNodeCopy, config.NodeTerminationFinalizer) + err := frameWork.NodeManager.UpdateCNINode(cniNode, cniNodeCopy) + Expect(err).ToNot(HaveOccurred()) + }) + }) + Context("when Tags is removed", func() { + It("it should add the Tags", func() { + cniNodeCopy := cniNode.DeepCopy() + cniNodeCopy.Spec.Tags = map[string]string{} + err := frameWork.NodeManager.UpdateCNINode(cniNode, cniNodeCopy) + Expect(err).ToNot(HaveOccurred()) + }) + }) + Context("when Label is removed", func() { + It("it should add the label", func() { + cniNodeCopy := cniNode.DeepCopy() + cniNodeCopy.ObjectMeta.Labels = map[string]string{} + err := frameWork.NodeManager.UpdateCNINode(cniNode, cniNodeCopy) + Expect(err).ToNot(HaveOccurred()) + }) + }) + }) + }) func ListNodesAndGetAutoScalingGroupName() string { @@ -104,3 +178,17 @@ func WaitTillNodeSizeUpdated(desiredSize int) error { }) return err } + +// Verify finalizer, tag, and label is set on new CNINode +func VerifyCNINodeFields(cniNode *v1alpha1.CNINode) { + By("verifying finalizer is set") + Expect(cniNode.ObjectMeta.Finalizers).To(ContainElement(config.NodeTerminationFinalizer)) + // For maps, ContainElement searches through the map's values. + By("verifying cluster name tag is set") + Expect(cniNode.Spec.Tags).To(ContainElement(frameWork.Options.ClusterName)) + Expect(config.CNINodeClusterNameKey).To(BeKeyOf(cniNode.Spec.Tags)) + + By("verifying node OS label is set") + Expect(cniNode.ObjectMeta.Labels).To(ContainElement(config.OSLinux)) + Expect(config.NodeLabelOS).To(BeKeyOf(cniNode.ObjectMeta.Labels)) +} diff --git a/test/integration/ec2api/ec2api_suite_test.go b/test/integration/ec2api/ec2api_suite_test.go new file mode 100644 index 00000000..1183cc7d --- /dev/null +++ b/test/integration/ec2api/ec2api_suite_test.go @@ -0,0 +1,46 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. +package ec2api_test + +import ( + "testing" + + "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config" + "github.com/aws/amazon-vpc-resource-controller-k8s/test/framework" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestEc2api(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "EC2API Suite") +} + +var frameWork *framework.Framework +var nodeListLen int +var _ = BeforeSuite(func() { + By("creating a framework") + frameWork = framework.New(framework.GlobalOptions) + By("verify node count before test") + nodeList, err := frameWork.NodeManager.GetNodesWithOS(config.OSLinux) + Expect(err).ToNot(HaveOccurred()) + nodeListLen = len(nodeList.Items) + Expect(nodeListLen).To(BeNumerically(">", 1)) +}) + +var _ = AfterSuite(func() { + nodeList, err := frameWork.NodeManager.GetNodesWithOS(config.OSLinux) + Expect(err).ToNot(HaveOccurred()) + By("verifying node count after test is unchanged") + Expect(len(nodeList.Items)).To(Equal(nodeListLen)) +}) diff --git a/test/integration/ec2api/ec2api_test.go b/test/integration/ec2api/ec2api_test.go new file mode 100644 index 00000000..d119dddf --- /dev/null +++ b/test/integration/ec2api/ec2api_test.go @@ -0,0 +1,128 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. +package ec2api_test + +import ( + "strings" + "time" + + "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config" + "github.com/aws/amazon-vpc-resource-controller-k8s/test/framework/utils" + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/credentials/stscreds" + "github.com/aws/aws-sdk-go/aws/session" + "github.com/aws/aws-sdk-go/service/ec2" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +// requires AmazonEKSVPCResourceController policy to be attached to the EKS cluster role +var _ = Describe("[LOCAL] Test IAM permissions for EC2 API calls", func() { + var instanceID string + var subnetID string + var instanceType string + var nwInterfaceID string + var err error + BeforeEach(func() { + By("getting instance details") + nodeList, err := frameWork.NodeManager.GetNodesWithOS(config.OSLinux) + Expect(err).ToNot(HaveOccurred()) + Expect(nodeList.Items).ToNot(BeEmpty()) + instanceID = frameWork.NodeManager.GetInstanceID(&nodeList.Items[0]) + ec2Instance, err := frameWork.EC2Manager.GetInstanceDetails(instanceID) + Expect(err).ToNot(HaveOccurred()) + subnetID = *ec2Instance.SubnetId + instanceType = *ec2Instance.InstanceType + + }) + AfterEach(func() { + By("deleting test interface") + err = frameWork.EC2Manager.DeleteNetworkInterface(nwInterfaceID) + Expect(err).ToNot(HaveOccurred()) + }) + Describe("Test DeleteNetworkInterface permission", func() { + Context("when instance is terminated", func() { + It("it should only delete ENIs provisioned by the controller or vpc-cni", func() { + By("creating test ENI without eks:eni:owner tag and attach to EC2 instance") + nwInterfaceID, err = frameWork.EC2Manager.CreateAndAttachNetworkInterface(subnetID, instanceID, instanceType) + Expect(err).ToNot(HaveOccurred()) + By("terminating the instance and sleeping") + err = frameWork.EC2Manager.TerminateInstances(instanceID) + Expect(err).ToNot(HaveOccurred()) + // allow time for instance to be deleted and ENI to be available, new node to be ready + time.Sleep(utils.ResourceCreationTimeout) + By("verifying ENI is not deleted by controller") + err = frameWork.EC2Manager.DescribeNetworkInterface(nwInterfaceID) + Expect(err).ToNot(HaveOccurred()) + }) + }) + }) + Describe("Test CreateNetworkInterfacePermission permission", func() { + var ec2Client *ec2.EC2 + var accountID string + var wantErr bool + JustBeforeEach(func() { + arnSplit := strings.Split(frameWork.Options.ClusterRoleArn, ":") + accountID = arnSplit[len(arnSplit)-2] + By("assuming EKS cluster role") + sess := session.Must(session.NewSession()) + creds := stscreds.NewCredentials(sess, frameWork.Options.ClusterRoleArn) + ec2Client = ec2.New(sess, &aws.Config{Credentials: creds}) + }) + JustAfterEach(func() { + By("creating network interface permission") + _, err = ec2Client.CreateNetworkInterfacePermission(&ec2.CreateNetworkInterfacePermissionInput{ + AwsAccountId: aws.String(accountID), + NetworkInterfaceId: aws.String(nwInterfaceID), + Permission: aws.String(ec2.InterfacePermissionTypeInstanceAttach), + }) + By("validating error is nil or as expected") + Expect(err != nil).To(Equal(wantErr)) + }) + Context("CreateNetworkInterfacePermission on ENI WITH required tag eks:eni:owner=eks-vpc-resource-controller", func() { + It("it should grant CreateNetworkInterfacePermission", func() { + By("creating network interface") + nwInterfaceOp, err := ec2Client.CreateNetworkInterface(&ec2.CreateNetworkInterfaceInput{ + SubnetId: aws.String(subnetID), + TagSpecifications: []*ec2.TagSpecification{ + { + ResourceType: aws.String(ec2.ResourceTypeNetworkInterface), + Tags: []*ec2.Tag{ + { + Key: aws.String(config.NetworkInterfaceOwnerTagKey), + Value: aws.String((config.NetworkInterfaceOwnerTagValue)), + }, + }, + }, + }, + Description: aws.String("VPC-Resource-Controller integration test ENI"), + }) + Expect(err).ToNot(HaveOccurred()) + nwInterfaceID = *nwInterfaceOp.NetworkInterface.NetworkInterfaceId + wantErr = false + }) + }) + Context("CreateNetworkInterfacePermission on ENI WITHOUT required tag eks:eni:owner=eks-vpc-resource-controller", func() { + It("it should not grant CreateNetworkInterfacePermission", func() { + By("creating network interface") + nwInterfaceOp, err := ec2Client.CreateNetworkInterface(&ec2.CreateNetworkInterfaceInput{ + SubnetId: aws.String(subnetID), + Description: aws.String("VPC-Resource-Controller integration test ENI"), + }) + Expect(err).ToNot(HaveOccurred()) + nwInterfaceID = *nwInterfaceOp.NetworkInterface.NetworkInterfaceId + wantErr = true + }) + }) + }) +})