Skip to content

Add support to configure branch ENI cooldown period via configmap #342

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Dec 7, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions controllers/core/configmap_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
rcHealthz "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/healthz"
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/k8s"
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/node/manager"
cooldown "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/provider/branch/cooldown"

"github.com/go-logr/logr"
corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -73,6 +74,17 @@ func (r *ConfigMapReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
}
}

// Check if branch ENI cooldown period is updated
curCoolDownPeriod := cooldown.GetCoolDown().GetCoolDownPeriod()

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this defaulted to 30s since if first time get fails what will be the value?

newCoolDownPeriod, err := cooldown.GetVpcCniConfigMapCoolDownPeriodOrDefault(r.K8sAPI, r.Log)
if curCoolDownPeriod != newCoolDownPeriod {
if err != nil {
// If any error, newCoolDownPeriod will be set to the default value
r.Log.Info("setting cool down period to default", "cool down period", newCoolDownPeriod)
}
cooldown.GetCoolDown().SetCoolDownPeriod(newCoolDownPeriod)
}

// Check if the Windows IPAM flag has changed
newWinIPAMEnabledCond := r.Condition.IsWindowsIPAMEnabled()

Expand Down
4 changes: 4 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import (
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/k8s"
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/k8s/pod"
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/node/manager"
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/provider/branch/cooldown"
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/resource"
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/utils"
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/version"
Expand Down Expand Up @@ -290,6 +291,9 @@ func main() {
controllerConditions := condition.NewControllerConditions(
ctrl.Log.WithName("controller conditions"), k8sApi, enableWindowsPrefixDelegation)

// initialize the branch ENI cool down period
cooldown.InitCoolDownPeriod(k8sApi, ctrl.Log)

// when Windows PD feature flag is OFF, do not initialize resource for prefix IPs
var supportedResources []string
if enableWindowsPrefixDelegation {
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pkg/config/type.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ const (
KubeSystemNamespace = "kube-system"
VpcCNIDaemonSetName = "aws-node"
OldVPCControllerDeploymentName = "vpc-resource-controller"
BranchENICooldownPeriodKey = "branch-eni-cooldown"
)

type ResourceType string
Expand Down
85 changes: 85 additions & 0 deletions pkg/provider/branch/cooldown/cooldown.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"). You may
// not use this file except in compliance with the License. A copy of the
// License is located at
//
// http://aws.amazon.com/apache2.0/
//
// or in the "license" file accompanying this file. This file is distributed
// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
// express or implied. See the License for the specific language governing
// permissions and limitations under the License.

package cooldown

import (
"fmt"
"strconv"
"sync"
"time"

"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config"
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/k8s"
"github.com/go-logr/logr"
)

// Global variable for CoolDownPeriod allows packages to Get and Set the coolDown period
var coolDown *cooldown

type cooldown struct {
mu sync.RWMutex
// CoolDownPeriod is the period to wait before deleting the branch ENI for propagation of ip tables rule for deleted pod
coolDownPeriod time.Duration
}

type CoolDown interface {
GetCoolDownPeriod() time.Duration
SetCoolDownPeriod(time.Duration)
}

const (
DefaultCoolDownPeriod = time.Second * 60
)

// Initialize coolDown period by setting the value in configmap or to default
func InitCoolDownPeriod(k8sApi k8s.K8sWrapper, log logr.Logger) {
coolDown = &cooldown{}
coolDownPeriod, err := GetVpcCniConfigMapCoolDownPeriodOrDefault(k8sApi, log)
if err != nil {
log.Info("setting coolDown period to default", "cool down period", coolDownPeriod)
}
coolDown.coolDownPeriod = coolDownPeriod
}

func GetCoolDown() CoolDown {
return coolDown
}

func GetVpcCniConfigMapCoolDownPeriodOrDefault(k8sApi k8s.K8sWrapper, log logr.Logger) (time.Duration, error) {
vpcCniConfigMap, err := k8sApi.GetConfigMap(config.VpcCniConfigMapName, config.KubeSystemNamespace)
if err == nil && vpcCniConfigMap.Data != nil {
if val, ok := vpcCniConfigMap.Data[config.BranchENICooldownPeriodKey]; ok {
coolDownPeriodInt, err := strconv.Atoi(val)
if err != nil {
log.Error(err, "failed to parse branch ENI coolDown period", "cool down period", val)
} else {
return time.Second * time.Duration(coolDownPeriodInt), nil
}
}
}
// If configmap not found, or configmap data not found, or error in parsing coolDown period, return default coolDown period and error
return DefaultCoolDownPeriod, fmt.Errorf("failed to get coolDown period period:%v", err)
}

func (c *cooldown) GetCoolDownPeriod() time.Duration {
c.mu.Lock()
defer c.mu.Unlock()
return c.coolDownPeriod
}

func (c *cooldown) SetCoolDownPeriod(newCoolDownPeriod time.Duration) {
c.mu.Lock()
defer c.mu.Unlock()
c.coolDownPeriod = newCoolDownPeriod
}
98 changes: 98 additions & 0 deletions pkg/provider/branch/cooldown/cooldown_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"). You may
// not use this file except in compliance with the License. A copy of the
// License is located at
//
// http://aws.amazon.com/apache2.0/
//
// or in the "license" file accompanying this file. This file is distributed
// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
// express or implied. See the License for the specific language governing
// permissions and limitations under the License.

package cooldown

import (
"fmt"
"testing"
"time"

mock_k8s "github.com/aws/amazon-vpc-resource-controller-k8s/mocks/amazon-vcp-resource-controller-k8s/pkg/k8s"
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config"
"github.com/golang/mock/gomock"
"github.com/stretchr/testify/assert"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
)

var log = zap.New(zap.UseDevMode(true)).WithName("cooldown test")
var (
mockConfigMap30s = &corev1.ConfigMap{
TypeMeta: metav1.TypeMeta{},
ObjectMeta: metav1.ObjectMeta{Name: config.VpcCniConfigMapName, Namespace: config.KubeSystemNamespace},
Data: map[string]string{config.BranchENICooldownPeriodKey: "30"},
}
mockConfigMapNilData = &corev1.ConfigMap{
TypeMeta: metav1.TypeMeta{},
ObjectMeta: metav1.ObjectMeta{Name: config.VpcCniConfigMapName, Namespace: config.KubeSystemNamespace},
Data: map[string]string{},
}
mockConfigMapErrData = &corev1.ConfigMap{
TypeMeta: metav1.TypeMeta{},
ObjectMeta: metav1.ObjectMeta{Name: config.VpcCniConfigMapName, Namespace: config.KubeSystemNamespace},
Data: map[string]string{config.BranchENICooldownPeriodKey: "aaa"},
}
)

func TestCoolDown_InitCoolDownPeriod(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()

type args struct {
vpcCniConfigMap *corev1.ConfigMap
}
tests := []struct {
name string
args args
expectedCoolDown time.Duration
err error
}{
{
name: "VpcCniConfigMap_Exists, verifies cooldown period is set to configmap value when exists",
args: args{vpcCniConfigMap: mockConfigMap30s},
expectedCoolDown: time.Second * 30,
err: nil,
},
{
name: "VpcCniConfigMap_NotExists, verifies cool down period is set to default when configmap does not exist",
args: args{},
expectedCoolDown: time.Second * 60,
err: fmt.Errorf("mock error"),
},
{
name: "VpcCniConfigMap_Exists_NilData, verifies cool period is set to default when configmap data does not exist",
args: args{vpcCniConfigMap: mockConfigMapNilData},
expectedCoolDown: time.Second * 60,
err: nil,
},
{
name: "VpcCniConfigMap_Exists_ErrData, verifies cool period is set to default when configmap data is incorrect",
args: args{vpcCniConfigMap: mockConfigMapErrData},
expectedCoolDown: time.Second * 60,
err: nil,
},
}

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
ctrl = gomock.NewController(t)
defer ctrl.Finish()
})
mockK8sApi := mock_k8s.NewMockK8sWrapper(ctrl)
mockK8sApi.EXPECT().GetConfigMap(config.VpcCniConfigMapName, config.KubeSystemNamespace).Return(test.args.vpcCniConfigMap, test.err)
InitCoolDownPeriod(mockK8sApi, log)
assert.Equal(t, test.expectedCoolDown, coolDown.coolDownPeriod)
}
}
4 changes: 2 additions & 2 deletions pkg/provider/branch/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
rcHealthz "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/healthz"
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/pool"
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/provider"
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/provider/branch/cooldown"
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/provider/branch/trunk"
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/utils"
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/worker"
Expand Down Expand Up @@ -71,7 +72,6 @@ var (

ReasonTrunkENICreationFailed = "TrunkENICreationFailed"

reconcileRequeueRequest = ctrl.Result{RequeueAfter: time.Minute * 30, Requeue: true}
deleteQueueRequeueRequest = ctrl.Result{RequeueAfter: time.Second * 30, Requeue: true}

// NodeDeleteRequeueRequestDelay represents the time after which the resources belonging to a node will be cleaned
Expand Down Expand Up @@ -358,7 +358,7 @@ func (b *branchENIProvider) CreateAndAnnotateResources(podNamespace string, podN
branchENIs, err := trunkENI.CreateAndAssociateBranchENIs(pod, securityGroups, resourceCount)
if err != nil {
if err == trunk.ErrCurrentlyAtMaxCapacity {
return ctrl.Result{RequeueAfter: config.CoolDownPeriod, Requeue: true}, nil
return ctrl.Result{RequeueAfter: cooldown.GetCoolDown().GetCoolDownPeriod(), Requeue: true}, nil
}
b.apiWrapper.K8sAPI.BroadcastEvent(pod, ReasonBranchAllocationFailed,
fmt.Sprintf("failed to allocate branch ENI to pod: %v", err), v1.EventTypeWarning)
Expand Down
5 changes: 2 additions & 3 deletions pkg/provider/branch/trunk/trunk.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
ec2Errors "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/aws/errors"
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/aws/vpc"
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config"
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/provider/branch/cooldown"

"github.com/aws/aws-sdk-go/aws"
awsEC2 "github.com/aws/aws-sdk-go/service/ec2"
Expand All @@ -38,8 +39,6 @@ import (
const (
// MaxAllocatableVlanIds is the maximum number of Vlan Ids that can be allocated per trunk.
MaxAllocatableVlanIds = 121
// CoolDownPeriod is the period to wait before deleting the branch ENI for propagation of ip tables rule for deleted pod
CoolDownPeriod = time.Second * 30
// MaxDeleteRetries is the maximum number of times the ENI will be retried before being removed from the delete queue
MaxDeleteRetries = 3
)
Expand Down Expand Up @@ -475,7 +474,7 @@ func (t *trunkENI) PushBranchENIsToCoolDownQueue(UID string) {
func (t *trunkENI) DeleteCooledDownENIs() {
for eni, hasENI := t.popENIFromDeleteQueue(); hasENI; eni, hasENI = t.popENIFromDeleteQueue() {
if eni.deletionTimeStamp.IsZero() ||
time.Now().After(eni.deletionTimeStamp.Add(CoolDownPeriod)) {
time.Now().After(eni.deletionTimeStamp.Add(cooldown.GetCoolDown().GetCoolDownPeriod())) {
err := t.deleteENI(eni)
if err != nil {
eni.deleteRetryCount++
Expand Down
Loading