diff --git a/pkg/ipamd/datastore/data_store.go b/pkg/ipamd/datastore/data_store.go index b85b58510b..ee4c5d1e9e 100644 --- a/pkg/ipamd/datastore/data_store.go +++ b/pkg/ipamd/datastore/data_store.go @@ -841,8 +841,7 @@ func (ds *DataStore) GetEFAENIs() map[string]bool { return ret } -// IsRequiredForWarmIPTarget determines if this ENI has warm IPs that are required to fulfill whatever WARM_IP_TARGET is -// set to. +// IsRequiredForWarmIPTarget determines if this ENI has warm IPs that are required to fulfill whatever WARM_IP_TARGET is set to. func (ds *DataStore) isRequiredForWarmIPTarget(warmIPTarget int, eni *ENI) bool { otherWarmIPs := 0 for _, other := range ds.eniPool { @@ -863,8 +862,7 @@ func (ds *DataStore) isRequiredForWarmIPTarget(warmIPTarget int, eni *ENI) bool return otherWarmIPs < warmIPTarget } -// IsRequiredForMinimumIPTarget determines if this ENI is necessary to fulfill whatever MINIMUM_IP_TARGET is -// set to. +// IsRequiredForMinimumIPTarget determines if this ENI is necessary to fulfill whatever MINIMUM_IP_TARGET is set to. func (ds *DataStore) isRequiredForMinimumIPTarget(minimumIPTarget int, eni *ENI) bool { otherIPs := 0 for _, other := range ds.eniPool { @@ -885,8 +883,7 @@ func (ds *DataStore) isRequiredForMinimumIPTarget(minimumIPTarget int, eni *ENI) return otherIPs < minimumIPTarget } -// IsRequiredForWarmPrefixTarget determines if this ENI is necessary to fulfill whatever WARM_PREFIX_TARGET is -// set to. +// IsRequiredForWarmPrefixTarget determines if this ENI is necessary to fulfill whatever WARM_PREFIX_TARGET is set to. func (ds *DataStore) isRequiredForWarmPrefixTarget(warmPrefixTarget int, eni *ENI) bool { freePrefixes := 0 for _, other := range ds.eniPool { @@ -1007,7 +1004,6 @@ func (ds *DataStore) RemoveUnusedENIFromStore(warmIPTarget, minimumIPTarget, war } removableENI := deletableENI.ID - for _, availableCidr := range ds.eniPool[removableENI].AvailableIPv4Cidrs { ds.total -= availableCidr.Size() if availableCidr.IsPrefix { @@ -1277,7 +1273,6 @@ func (ds *DataStore) GetFreePrefixes() int { freePrefixes++ } } - } return freePrefixes } diff --git a/pkg/ipamd/datastore/data_store_test.go b/pkg/ipamd/datastore/data_store_test.go index eeeac95d8a..5e1925fdc5 100644 --- a/pkg/ipamd/datastore/data_store_test.go +++ b/pkg/ipamd/datastore/data_store_test.go @@ -1024,18 +1024,21 @@ func TestWarmENIInteractions(t *testing.T) { _ = ds.AddENI("eni-2", 2, false, false, false) _ = ds.AddENI("eni-3", 3, false, false, false) + // Add an IP address to ENI 1 and assign it to a pod ipv4Addr := net.IPNet{IP: net.ParseIP("1.1.1.1"), Mask: net.IPv4Mask(255, 255, 255, 255)} _ = ds.AddIPv4CidrToStore("eni-1", ipv4Addr, false) key1 := IPAMKey{"net0", "sandbox-1", "eth0"} _, _, err := ds.AssignPodIPv4Address(key1, IPAMMetadata{K8SPodNamespace: "default", K8SPodName: "sample-pod-1"}) assert.NoError(t, err) + // Add another IP address to ENI 1 and assign a pod ipv4Addr = net.IPNet{IP: net.ParseIP("1.1.1.2"), Mask: net.IPv4Mask(255, 255, 255, 255)} _ = ds.AddIPv4CidrToStore("eni-1", ipv4Addr, false) key2 := IPAMKey{"net0", "sandbox-2", "eth0"} _, _, err = ds.AssignPodIPv4Address(key2, IPAMMetadata{K8SPodNamespace: "default", K8SPodName: "sample-pod-2"}) assert.NoError(t, err) + // Add two IP addresses to ENI 2 and one IP address to ENI 3 ipv4Addr = net.IPNet{IP: net.ParseIP("1.1.2.1"), Mask: net.IPv4Mask(255, 255, 255, 255)} _ = ds.AddIPv4CidrToStore("eni-2", ipv4Addr, false) ipv4Addr = net.IPNet{IP: net.ParseIP("1.1.2.2"), Mask: net.IPv4Mask(255, 255, 255, 255)} @@ -1043,44 +1046,76 @@ func TestWarmENIInteractions(t *testing.T) { ipv4Addr = net.IPNet{IP: net.ParseIP("1.1.3.1"), Mask: net.IPv4Mask(255, 255, 255, 255)} _ = ds.AddIPv4CidrToStore("eni-3", ipv4Addr, false) - noWarmIPTarget := 0 - ds.eniPool["eni-2"].createTime = time.Time{} ds.eniPool["eni-3"].createTime = time.Time{} - // We have three ENIs, 5 IPs and two pods on ENI 1. Each ENI can handle two pods. + // We have 3 ENIs, 5 IPs and 2 pods on ENI 1. + // ENI 1: 2 IPs allocated, 2 IPs in use + // ENI 2: 2 IPs allocated, 0 IPs in use + // ENI 3: 1 IP allocated, 0 IPs in use + // => 3 free IPs // We should not be able to remove any ENIs if either warmIPTarget >= 3 or minimumWarmIPTarget >= 5 + + // WARM IP TARGET=3, MINIMUM_IP_TARGET=1 => no ENI should be removed eni := ds.RemoveUnusedENIFromStore(3, 1, 0) assert.Equal(t, "", eni) - // Should not be able to free this ENI because we want at least 5 IPs, which requires at least three ENIs + + // WARM IP TARGET=1, MINIMUM_IP_TARGET=5 => no ENI should be removed eni = ds.RemoveUnusedENIFromStore(1, 5, 0) assert.Equal(t, "", eni) - // Should be able to free an ENI because both warmIPTarget and minimumWarmIPTarget are both effectively 4 + + // WARM IP TARGET=2, MINIMUM_IP_TARGET=4 => ENI 3 should be removed as we only need 2 free IPs, which ENI 2 has removedEni := ds.RemoveUnusedENIFromStore(2, 4, 0) - assert.Contains(t, []string{"eni-2", "eni-3"}, removedEni) + assert.Equal(t, "eni-3", removedEni) - // Should not be able to free an ENI because minimumWarmIPTarget requires at least two ENIs and no warm IP target - eni = ds.RemoveUnusedENIFromStore(noWarmIPTarget, 3, 0) - assert.Equal(t, "", eni) - // Should be able to free an ENI because one ENI can provide a minimum count of 2 IPs - secondRemovedEni := ds.RemoveUnusedENIFromStore(noWarmIPTarget, 2, 0) - assert.Contains(t, []string{"eni-2", "eni-3"}, secondRemovedEni) + // We have 2 ENIs, 4 IPs and 2 pods on ENI 1. + // ENI 1: 2 IPs allocated, 2 IPs in use + // ENI 2: 2 IPs allocated, 0 IPs in use + // => 2 free IPs - assert.NotEqual(t, removedEni, secondRemovedEni, "The two removed ENIs should not be the same ENI.") + // WARM IP TARGET=0, MINIMUM_IP_TARGET=3 => no ENI should be removed + eni = ds.RemoveUnusedENIFromStore(0, 3, 0) + assert.Equal(t, "", eni) - _ = ds.AddENI("eni-4", 3, false, true, false) - _ = ds.AddENI("eni-5", 3, false, false, true) + // WARM IP TARGET=0, MINIMUM_IP_TARGET=2 => ENI 2 should be removed as ENI 1 covers the requirements + removedEni = ds.RemoveUnusedENIFromStore(0, 2, 0) + assert.Contains(t, "eni-2", removedEni) + // Add 2 more ENIs to the datastore and add 1 IP address to each of them + ds.AddENI("eni-4", 4, false, true, false) // trunk ENI + ds.AddENI("eni-5", 5, false, false, true) // EFA ENI ipv4Addr = net.IPNet{IP: net.ParseIP("1.1.4.1"), Mask: net.IPv4Mask(255, 255, 255, 255)} - _ = ds.AddIPv4CidrToStore("eni-4", ipv4Addr, false) + ds.AddIPv4CidrToStore("eni-4", ipv4Addr, false) ipv4Addr = net.IPNet{IP: net.ParseIP("1.1.5.1"), Mask: net.IPv4Mask(255, 255, 255, 255)} - _ = ds.AddIPv4CidrToStore("eni-5", ipv4Addr, false) - + ds.AddIPv4CidrToStore("eni-5", ipv4Addr, false) ds.eniPool["eni-4"].createTime = time.Time{} ds.eniPool["eni-5"].createTime = time.Time{} - thirdRemovedEni := ds.RemoveUnusedENIFromStore(noWarmIPTarget, 2, 0) - // None of the others can be removed... - assert.Equal(t, "", thirdRemovedEni) + + // We have 3 ENIs, 4 IPs and 2 pods on ENI 1. + // ENI 1: 2 IPs allocated, 2 IPs in use + // ENI 4: 1 IPs allocated, 0 IPs in use + // ENI 5: 1 IPs allocated, 0 IPs in use + // => 2 free IPs + + // WARM IP TARGET=0, MINIMUM_IP_TARGET=2 => no ENI can be removed because ENI 4 is a trunk ENI and ENI 5 is an EFA ENI + removedEni = ds.RemoveUnusedENIFromStore(0, 2, 0) + assert.Equal(t, "", removedEni) + assert.Equal(t, 3, ds.GetENIs()) + + // Add 1 more normal ENI to the datastore + ds.AddENI("eni-6", 6, false, false, false) // trunk ENI + ds.eniPool["eni-6"].createTime = time.Time{} + + // We have 4 ENIs, 4 IPs and 2 pods on ENI 1. + // ENI 1: 2 IPs allocated, 2 IPs in use + // ENI 4: 1 IPs allocated, 0 IPs in use + // ENI 5: 1 IPs allocated, 0 IPs in use + // ENI 6: 0 IPs allocated, 0 IPs in use + // => 2 free IPs + + // WARM IP TARGET=0, MINIMUM_IP_TARGET=2 => ENI 6 can be removed + removedEni = ds.RemoveUnusedENIFromStore(0, 2, 0) + assert.Equal(t, "eni-6", removedEni) assert.Equal(t, 3, ds.GetENIs()) } diff --git a/pkg/ipamd/ipamd.go b/pkg/ipamd/ipamd.go index 7f958647e8..6ea08d42a1 100644 --- a/pkg/ipamd/ipamd.go +++ b/pkg/ipamd/ipamd.go @@ -154,10 +154,10 @@ const ( ipV4AddrFamily = "4" ipV6AddrFamily = "6" - //insufficientCidrErrorCooldown is the amount of time reconciler will wait before trying to fetch - //more IPs/prefixes for an ENI. With InsufficientCidr we know the subnet doesn't have enough IPs so - //instead of retrying every 5s which would lead to increase in EC2 AllocIPAddress calls, we wait for - //120 seconds for a retry. + // insufficientCidrErrorCooldown is the amount of time reconciler will wait before trying to fetch + // more IPs/prefixes for an ENI. With InsufficientCidr we know the subnet doesn't have enough IPs so + // instead of retrying every 5s which would lead to increase in EC2 AllocIPAddress calls, we wait for + // 120 seconds for a retry. insufficientCidrErrorCooldown = 120 * time.Second // envManageUntaggedENI is used to determine if untagged ENIs should be managed or unmanaged @@ -216,6 +216,7 @@ type IPAMContext struct { lastInsufficientCidrError time.Time enableManageUntaggedMode bool enablePodIPAnnotation bool + maxPods int // maximum number of pods that can be scheduled on the node } // setUnmanagedENIs will rebuild the set of ENI IDs for ENIs tagged as "no_manage" @@ -499,17 +500,26 @@ func (c *IPAMContext) nodeInit() error { }, 30*time.Second) } + // Make a k8s client request for the current node so that max pods can be derived + node, err := k8sapi.GetNode(ctx, c.k8sClient) + if err != nil { + log.Errorf("Failed to get node", err) + podENIErrInc("nodeInit") + return err + } + + maxPods, isInt64 := node.Status.Capacity.Pods().AsInt64() + if !isInt64 { + log.Errorf("Failed to parse max pods: %s", node.Status.Capacity.Pods().String) + podENIErrInc("nodeInit") + return errors.New("error while trying to determine max pods") + } + c.maxPods = int(maxPods) + if c.useCustomNetworking { // When custom networking is enabled and a valid ENIConfig is found, IPAMD patches the CNINode // resource for this instance. The operation is safe as enabling/disabling custom networking // requires terminating the previous instance. - node, err := k8sapi.GetNode(ctx, c.k8sClient) - if err != nil { - log.Errorf("Failed to get node", err) - podENIErrInc("nodeInit") - return err - } - eniConfigName, err := eniconfig.GetNodeSpecificENIConfigName(node) if err == nil && eniConfigName != "default" { // If Security Groups for Pods is enabled, the VPC Resource Controller must also know that Custom Networking is enabled @@ -528,7 +538,8 @@ func (c *IPAMContext) nodeInit() error { } // On node init, check if datastore pool needs to be increased. If so, attach CIDRs from existing ENIs and attach new ENIs. - if !c.disableENIProvisioning && c.isDatastorePoolTooLow() { + datastorePoolTooLow, _ := c.isDatastorePoolTooLow() + if !c.disableENIProvisioning && datastorePoolTooLow { if err := c.increaseDatastorePool(ctx); err != nil { // Note that the only error currently returned by increaseDatastorePool is an error attaching CIDRs (other than insufficient IPs) podENIErrInc("nodeInit") @@ -616,6 +627,8 @@ func (c *IPAMContext) StartNodeIPPoolManager() { return } + log.Infof("IP pool manager - max pods: %d, warm IP target: %d, warm prefix target: %d, warm ENI target: %d, minimum IP target: %d", + c.maxPods, c.warmIPTarget, c.warmPrefixTarget, c.warmENITarget, c.minimumIPTarget) sleepDuration := ipPoolMonitorInterval / 2 ctx := context.Background() for { @@ -634,9 +647,13 @@ func (c *IPAMContext) updateIPPoolIfRequired(ctx context.Context) { c.tryEnableSecurityGroupsForPods(ctx) } - if c.isDatastorePoolTooLow() { + datastorePoolTooLow, stats := c.isDatastorePoolTooLow() + // Each iteration, log the current datastore IP stats + log.Debugf("IP stats - total IPs: %d, assigned IPs: %d, cooldown IPs: %d", stats.TotalIPs, stats.AssignedIPs, stats.CooldownIPs) + + if datastorePoolTooLow { c.increaseDatastorePool(ctx) - } else if c.isDatastorePoolTooHigh() { + } else if c.isDatastorePoolTooHigh(stats) { c.decreaseDatastorePool(decreaseIPPoolInterval) } if c.shouldRemoveExtraENIs() { @@ -646,6 +663,7 @@ func (c *IPAMContext) updateIPPoolIfRequired(ctx context.Context) { // decreaseDatastorePool runs every `interval` and attempts to return unused ENIs and IPs func (c *IPAMContext) decreaseDatastorePool(interval time.Duration) { + log.Debug("Starting to decrease pool size") prometheusmetrics.IpamdActionsInprogress.WithLabelValues("decreaseDatastorePool").Add(float64(1)) defer prometheusmetrics.IpamdActionsInprogress.WithLabelValues("decreaseDatastorePool").Sub(float64(1)) @@ -658,7 +676,6 @@ func (c *IPAMContext) decreaseDatastorePool(interval time.Duration) { log.Debugf("Starting to decrease Datastore pool") c.tryUnassignCidrsFromAll() - c.lastDecreaseIPPool = now c.lastNodeIPPoolAction = now @@ -693,12 +710,11 @@ func (c *IPAMContext) tryFreeENI() { } -// tryUnassignIPsorPrefixesFromAll determines if there are IPs to free when we have extra IPs beyond the target and warmIPTargetDefined -// is enabled, deallocate extra IP addresses +// When warm IP/prefix targets are defined, free extra IPs func (c *IPAMContext) tryUnassignCidrsFromAll() { - _, over, warmTargetDefined := c.datastoreTargetState() + _, over, warmIPTargetsDefined := c.datastoreTargetState(nil) // If WARM IP targets are not defined, check if WARM_PREFIX_TARGET is defined. - if !warmTargetDefined { + if !warmIPTargetsDefined { over = c.computeExtraPrefixesOverWarmTarget() } @@ -746,25 +762,12 @@ func (c *IPAMContext) tryUnassignCidrsFromAll() { } } +// PRECONDITION: isDatastorePoolTooLow returned true func (c *IPAMContext) increaseDatastorePool(ctx context.Context) error { log.Debug("Starting to increase pool size") prometheusmetrics.IpamdActionsInprogress.WithLabelValues("increaseDatastorePool").Add(float64(1)) defer prometheusmetrics.IpamdActionsInprogress.WithLabelValues("increaseDatastorePool").Sub(float64(1)) - short, _, warmIPTargetDefined := c.datastoreTargetState() - if warmIPTargetDefined && short == 0 { - log.Debugf("Skipping increase Datastore pool, warm target reached") - return nil - } - - if !warmIPTargetDefined { - shortPrefix, warmTargetDefined := c.datastorePrefixTargetState() - if warmTargetDefined && shortPrefix == 0 { - log.Debugf("Skipping increase Datastore pool, warm prefix target reached") - return nil - } - } - if c.isTerminating() { log.Debug("AWS CNI is terminating, will not try to attach any new IPs or ENIs right now") return nil @@ -811,11 +814,6 @@ func (c *IPAMContext) increaseDatastorePool(ctx context.Context) error { func (c *IPAMContext) updateLastNodeIPPoolAction() { c.lastNodeIPPoolAction = time.Now() stats := c.dataStore.GetIPStats(ipV4AddrFamily) - if !c.enablePrefixDelegation { - log.Debugf("Successfully increased IP pool: %s", stats) - } else { - log.Debugf("Successfully increased Prefix pool: %s", stats) - } c.logPoolStats(stats) } @@ -873,42 +871,28 @@ func (c *IPAMContext) tryAllocateENI(ctx context.Context) error { return nil } -// For an ENI, try to fill in missing IPs on an existing ENI with PD disabled -// try to fill in missing Prefixes on an existing ENI with PD enabled +// For an ENI, fill in missing IPs or prefixes. +// PRECONDITION: isDatastorePoolTooLow returned true func (c *IPAMContext) tryAssignCidrs() (increasedPool bool, err error) { - short, _, warmIPTargetDefined := c.datastoreTargetState() - if warmIPTargetDefined && short == 0 { - log.Infof("Warm IP target set and short is 0 so not assigning Cidrs (IPs or Prefixes)") - return false, nil - } - - if !warmIPTargetDefined { - shortPrefix, warmTargetDefined := c.datastorePrefixTargetState() - if warmTargetDefined && shortPrefix == 0 { - log.Infof("Warm prefix target set and short is 0 so not assigning Cidrs (Prefixes)") - return false, nil - } - } - - if !c.enablePrefixDelegation { - return c.tryAssignIPs() - } else { + if c.enablePrefixDelegation { return c.tryAssignPrefixes() + } else { + return c.tryAssignIPs() } } -// For an ENI, try to fill in missing IPs on an existing ENI +// For an ENI, try to fill in missing IPs on an existing ENI. +// PRECONDITION: isDatastorePoolTooLow returned true func (c *IPAMContext) tryAssignIPs() (increasedPool bool, err error) { // If WARM_IP_TARGET is set, only proceed if we are short of target - short, _, warmIPTargetDefined := c.datastoreTargetState() - if warmIPTargetDefined && short == 0 { + short, _, warmIPTargetsDefined := c.datastoreTargetState(nil) + if warmIPTargetsDefined && short == 0 { return false, nil } - // If WARM_IP_TARGET is set we only want to allocate up to that target - // to avoid overallocating and releasing + // If WARM_IP_TARGET is set we only want to allocate up to that target to avoid overallocating and releasing toAllocate := c.maxIPsPerENI - if warmIPTargetDefined { + if warmIPTargetsDefined { toAllocate = short } @@ -995,6 +979,7 @@ func (c *IPAMContext) assignIPv6Prefix(eniID string) (err error) { return nil } +// PRECONDITION: isDatastorePoolTooLow returned true func (c *IPAMContext) tryAssignPrefixes() (increasedPool bool, err error) { toAllocate := c.getPrefixesNeeded() // Returns an ENI which has space for more prefixes to be attached, but this @@ -1233,14 +1218,13 @@ func (c *IPAMContext) tryEnableSecurityGroupsForPods(ctx context.Context) { } } -// shouldRemoveExtraENIs returns true if we should attempt to find an ENI to free. When WARM_IP_TARGET is set, we -// always check and do verification in getDeletableENI() -// PD enabled : If the WARM_PREFIX_TARGET is spread across ENIs and we have more than needed then this function will return true. -// but if the number of prefixes are on just one ENI and is more than available even then it returns true so getDeletableENI will +// shouldRemoveExtraENIs returns true if we should attempt to find an ENI to free +// PD enabled: If the WARM_PREFIX_TARGET is spread across ENIs and we have more than needed, this function will return true. +// If the number of prefixes are on just one ENI, and there are more than available, it returns true so getDeletableENI will // recheck if we need the ENI for prefix target. func (c *IPAMContext) shouldRemoveExtraENIs() bool { - _, _, warmTargetDefined := c.datastoreTargetState() - if warmTargetDefined { + // When WARM_IP_TARGET is set, return true as verification is always done in getDeletableENI() + if c.warmIPTargetsDefined() { return true } @@ -1256,29 +1240,27 @@ func (c *IPAMContext) shouldRemoveExtraENIs() bool { } shouldRemoveExtra = available >= (warmTarget)*c.maxIPsPerENI - if shouldRemoveExtra { c.logPoolStats(stats) log.Debugf("It might be possible to remove extra ENIs because available (%d) >= (ENI/Prefix target + 1 (%d) + 1) * addrsPerENI (%d)", available, warmTarget, c.maxIPsPerENI) } else if c.enablePrefixDelegation { - // When prefix target count is reduced, datastorehigh would have deleted extra prefixes over the warm prefix target. - // Hence available will be less than (warmTarget)*c.maxIPsPerENI but there can be some extra ENIs which are not used hence see if we can clean it up. + // When prefix target count is reduced, datastore would have deleted extra prefixes over the warm prefix target. + // Hence available will be less than (warmTarget)*c.maxIPsPerENI, but there can be some extra ENIs which are not used hence see if we can clean it up. shouldRemoveExtra = c.dataStore.CheckFreeableENIexists() } return shouldRemoveExtra } func (c *IPAMContext) computeExtraPrefixesOverWarmTarget() int { - over := 0 if !c.warmPrefixTargetDefined() { - return over + return 0 } freePrefixes := c.dataStore.GetFreePrefixes() - over = max(freePrefixes-c.warmPrefixTarget, 0) + over := max(freePrefixes-c.warmPrefixTarget, 0) stats := c.dataStore.GetIPStats(ipV4AddrFamily) - log.Debugf("computeExtraPrefixesOverWarmTarget available %d over %d warm_prefix_target %d", stats.AvailableAddresses(), over, c.warmPrefixTarget) + log.Debugf("computeExtraPrefixesOverWarmTarget - available: %d, over: %d, warm_prefix_target: %d", stats.AvailableAddresses(), over, c.warmPrefixTarget) c.logPoolStats(stats) return over } @@ -1666,6 +1648,11 @@ func (c *IPAMContext) verifyAndAddPrefixesToDatastore(eni string, attachedENIPre return seenIPs } +// return true when WARM_IP_TARGET or MINIMUM_IP_TARGET is defined +func (c *IPAMContext) warmIPTargetsDefined() bool { + return c.warmIPTarget != noWarmIPTarget || c.minimumIPTarget != noMinimumIPTarget +} + // UseCustomNetworkCfg returns whether Pods needs to use pod specific configuration or not. func UseCustomNetworkCfg() bool { return parseBoolEnvVar(envCustomNetworkCfg, false) @@ -1696,7 +1683,6 @@ func dsBackingStorePath() string { func getWarmIPTarget() int { inputStr, found := os.LookupEnv(envWarmIPTarget) - if !found { return noWarmIPTarget } @@ -1712,7 +1698,6 @@ func getWarmIPTarget() int { func getMinimumIPTarget() int { inputStr, found := os.LookupEnv(envMinimumIPTarget) - if !found { return noMinimumIPTarget } @@ -1791,16 +1776,18 @@ func (c *IPAMContext) filterUnmanagedENIs(enis []awsutils.ENIMetadata) []awsutil return ret } -// datastoreTargetState determines the number of IPs `short` or `over` our WARM_IP_TARGET, -// accounting for the MINIMUM_IP_TARGET -// With prefix delegation this function determines the number of Prefixes `short` or `over` -func (c *IPAMContext) datastoreTargetState() (short int, over int, enabled bool) { - if c.warmIPTarget == noWarmIPTarget && c.minimumIPTarget == noMinimumIPTarget { +// datastoreTargetState determines the number of IPs `short` or `over` our WARM_IP_TARGET, accounting for the MINIMUM_IP_TARGET. +// With prefix delegation, this function determines the number of Prefixes `short` or `over` +func (c *IPAMContext) datastoreTargetState(stats *datastore.DataStoreStats) (short int, over int, enabled bool) { + if !c.warmIPTargetsDefined() { // there is no WARM_IP_TARGET defined and no MINIMUM_IP_TARGET, fallback to use all IP addresses on ENI return 0, 0, false } - stats := c.dataStore.GetIPStats(ipV4AddrFamily) + // Calculating DataStore stats can be expensive, so allow the caller to optionally pass stats it already calculated + if stats == nil { + stats = c.dataStore.GetIPStats(ipV4AddrFamily) + } available := stats.AvailableAddresses() // short is greater than 0 when we have fewer available IPs than the warm IP target @@ -1834,11 +1821,8 @@ func (c *IPAMContext) datastoreTargetState() (short int, over int, enabled bool) freePrefixes := c.dataStore.GetFreePrefixes() overPrefix := max(min(freePrefixes, stats.TotalPrefixes-prefixNeededForWarmIP), 0) overPrefix = max(min(overPrefix, stats.TotalPrefixes-prefixNeededForMinIP), 0) - log.Debugf("Current warm IP stats : target: %d, short(prefixes): %d, over(prefixes): %d, stats: %s", c.warmIPTarget, shortPrefix, overPrefix, stats) return shortPrefix, overPrefix, true } - log.Debugf("Current warm IP stats : target: %d, short: %d, over: %d, stats: %s", c.warmIPTarget, short, over, stats) - return short, over, true } @@ -2005,7 +1989,7 @@ func (c *IPAMContext) AnnotatePod(podName string, podNamespace string, key strin } func (c *IPAMContext) tryUnassignIPsFromENIs() { - log.Debugf("In tryUnassignIPsFromENIs") + log.Debugf("tryUnassignIPsFromENIs") eniInfos := c.dataStore.GetENIInfos() for eniID := range eniInfos.ENIs { c.tryUnassignIPFromENI(eniID) @@ -2014,7 +1998,6 @@ func (c *IPAMContext) tryUnassignIPsFromENIs() { func (c *IPAMContext) tryUnassignIPFromENI(eniID string) { freeableIPs := c.dataStore.FreeableIPs(eniID) - if len(freeableIPs) == 0 { log.Debugf("No freeable IPs") return @@ -2044,6 +2027,7 @@ func (c *IPAMContext) tryUnassignIPFromENI(eniID string) { } func (c *IPAMContext) tryUnassignPrefixesFromENIs() { + log.Debugf("tryUnassignPrefixesFromENIs") eniInfos := c.dataStore.GetENIInfos() for eniID := range eniInfos.ENIs { c.tryUnassignPrefixFromENI(eniID) @@ -2080,14 +2064,14 @@ func (c *IPAMContext) tryUnassignPrefixFromENI(eniID string) { func (c *IPAMContext) GetENIResourcesToAllocate() int { var resourcesToAllocate int - if !c.enablePrefixDelegation { + if c.enablePrefixDelegation { + resourcesToAllocate = min(c.getPrefixesNeeded(), c.maxPrefixesPerENI) + } else { resourcesToAllocate = c.maxIPsPerENI - short, _, warmTargetDefined := c.datastoreTargetState() + short, _, warmTargetDefined := c.datastoreTargetState(nil) if warmTargetDefined { resourcesToAllocate = min(short, c.maxIPsPerENI) } - } else { - resourcesToAllocate = min(c.getPrefixesNeeded(), c.maxPrefixesPerENI) } return resourcesToAllocate } @@ -2122,44 +2106,48 @@ func (c *IPAMContext) hasRoomForEni() bool { return c.dataStore.GetENIs() < (c.maxENI - c.unmanagedENI - trunkEni) } -func (c *IPAMContext) isDatastorePoolTooLow() bool { - short, _, warmTargetDefined := c.datastoreTargetState() - if warmTargetDefined { - return short > 0 +func (c *IPAMContext) isDatastorePoolTooLow() (bool, *datastore.DataStoreStats) { + stats := c.dataStore.GetIPStats(ipV4AddrFamily) + // If max pods has been reached, pool is not too low + if stats.TotalIPs >= c.maxPods { + return false, stats } - stats := c.dataStore.GetIPStats(ipV4AddrFamily) - available := stats.AvailableAddresses() + short, _, warmTargetDefined := c.datastoreTargetState(stats) + if warmTargetDefined { + return short > 0, stats + } warmTarget := c.warmENITarget totalIPs := c.maxIPsPerENI - if c.enablePrefixDelegation { warmTarget = c.warmPrefixTarget _, maxIpsPerPrefix, _ := datastore.GetPrefixDelegationDefaults() totalIPs = maxIpsPerPrefix } + available := stats.AvailableAddresses() poolTooLow := available < totalIPs*warmTarget || (warmTarget == 0 && available == 0) if poolTooLow { log.Debugf("IP pool is too low: available (%d) < ENI target (%d) * addrsPerENI (%d)", available, warmTarget, totalIPs) c.logPoolStats(stats) } - return poolTooLow + return poolTooLow, stats } -func (c *IPAMContext) isDatastorePoolTooHigh() bool { - _, over, warmTargetDefined := c.datastoreTargetState() +func (c *IPAMContext) isDatastorePoolTooHigh(stats *datastore.DataStoreStats) bool { + // NOTE: IPs may be allocated in chunks (full ENIs of prefixes), so the "too-high" condition does not check max pods. The limit is enforced on the allocation side. + _, over, warmTargetDefined := c.datastoreTargetState(stats) if warmTargetDefined { return over > 0 } - //For the existing ENIs check if we can cleanup prefixes + // For the existing ENIs check if we can cleanup prefixes if c.warmPrefixTargetDefined() { freePrefixes := c.dataStore.GetFreePrefixes() poolTooHigh := freePrefixes > c.warmPrefixTarget if poolTooHigh { - log.Debugf("Prefix pool is high so might be able to deallocate : free prefixes %d and warm prefix target %d", freePrefixes, c.warmPrefixTarget) + log.Debugf("Prefix pool is high so might be able to deallocate - free prefixes: %d, warm prefix target: %d", freePrefixes, c.warmPrefixTarget) } return poolTooHigh } @@ -2208,11 +2196,11 @@ func (c *IPAMContext) getPrefixesNeeded() int { // TODO - post GA we can evaluate to see if these two calls can be merged. // datastoreTargetState already has complex math so adding Prefix target will make it even more complex. - short, _, warmIPTargetDefined := c.datastoreTargetState() + short, _, warmIPTargetsDefined := c.datastoreTargetState(nil) shortPrefixes, warmPrefixTargetDefined := c.datastorePrefixTargetState() // WARM_IP_TARGET takes precendence over WARM_PREFIX_TARGET - if warmIPTargetDefined { + if warmIPTargetsDefined { toAllocate = max(toAllocate, short) } else if warmPrefixTargetDefined { toAllocate = max(toAllocate, shortPrefixes) diff --git a/pkg/ipamd/ipamd_test.go b/pkg/ipamd/ipamd_test.go index cd6bee4a70..8c360edd42 100644 --- a/pkg/ipamd/ipamd_test.go +++ b/pkg/ipamd/ipamd_test.go @@ -32,6 +32,7 @@ import ( "github.com/vishvananda/netlink" corev1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" @@ -174,11 +175,16 @@ func TestNodeInit(t *testing.T) { m.network.EXPECT().GetExternalServiceCIDRs().Return(nil) m.network.EXPECT().UpdateExternalServiceIpRules(gomock.Any(), gomock.Any()) + maxPods, _ := resource.ParseQuantity("500") fakeNode := v1.Node{ TypeMeta: metav1.TypeMeta{Kind: "Node"}, ObjectMeta: metav1.ObjectMeta{Name: myNodeName}, Spec: v1.NodeSpec{}, - Status: v1.NodeStatus{}, + Status: v1.NodeStatus{ + Capacity: v1.ResourceList{ + v1.ResourcePods: maxPods, + }, + }, } m.k8sClient.Create(ctx, &fakeNode) @@ -258,11 +264,16 @@ func TestNodeInitwithPDenabledIPv4Mode(t *testing.T) { m.network.EXPECT().GetExternalServiceCIDRs().Return(nil) m.network.EXPECT().UpdateExternalServiceIpRules(gomock.Any(), gomock.Any()) + maxPods, _ := resource.ParseQuantity("500") fakeNode := v1.Node{ TypeMeta: metav1.TypeMeta{Kind: "Node"}, ObjectMeta: metav1.ObjectMeta{Name: myNodeName}, Spec: v1.NodeSpec{}, - Status: v1.NodeStatus{}, + Status: v1.NodeStatus{ + Capacity: v1.ResourceList{ + v1.ResourcePods: maxPods, + }, + }, } m.k8sClient.Create(ctx, &fakeNode) @@ -499,7 +510,6 @@ func testIncreaseIPPool(t *testing.T, useENIConfig bool, unschedulabeNode bool) primaryIP: make(map[string]string), terminating: int32(0), } - mockContext.dataStore = testDatastore() primary := true @@ -775,14 +785,14 @@ func TestDecreaseIPPool(t *testing.T) { m.awsutils.EXPECT().DeallocPrefixAddresses(gomock.Any(), gomock.Any()).Times(1) m.awsutils.EXPECT().DeallocIPAddresses(gomock.Any(), gomock.Any()).Times(1) - short, over, enabled := mockContext.datastoreTargetState() + short, over, enabled := mockContext.datastoreTargetState(nil) assert.Equal(t, 0, short) // there would not be any shortage assert.Equal(t, 1, over) // out of 4 IPs we have 2 IPs assigned, warm IP target is 1, so over is 1 assert.Equal(t, true, enabled) // there is warm ip target enabled with the value of 1 mockContext.decreaseDatastorePool(10 * time.Second) - short, over, enabled = mockContext.datastoreTargetState() + short, over, enabled = mockContext.datastoreTargetState(nil) assert.Equal(t, 0, short) // there would not be any shortage assert.Equal(t, 0, over) // after the above deallocation this should be zero assert.Equal(t, true, enabled) // there is warm ip target enabled with the value of 1 @@ -790,7 +800,7 @@ func TestDecreaseIPPool(t *testing.T) { //make another call just to ensure that more deallocations do not happen mockContext.decreaseDatastorePool(10 * time.Second) - short, over, enabled = mockContext.datastoreTargetState() + short, over, enabled = mockContext.datastoreTargetState(nil) assert.Equal(t, 0, short) // there would not be any shortage assert.Equal(t, 0, over) // after the above deallocation this should be zero assert.Equal(t, true, enabled) // there is warm ip target enabled with the value of 1 @@ -1115,14 +1125,13 @@ func TestGetWarmIPTargetState(t *testing.T) { primaryIP: make(map[string]string), terminating: int32(0), } - mockContext.dataStore = testDatastore() - _, _, warmIPTargetDefined := mockContext.datastoreTargetState() + _, _, warmIPTargetDefined := mockContext.datastoreTargetState(nil) assert.False(t, warmIPTargetDefined) mockContext.warmIPTarget = 5 - short, over, warmIPTargetDefined := mockContext.datastoreTargetState() + short, over, warmIPTargetDefined := mockContext.datastoreTargetState(nil) assert.True(t, warmIPTargetDefined) assert.Equal(t, 5, short) assert.Equal(t, 0, over) @@ -1134,7 +1143,7 @@ func TestGetWarmIPTargetState(t *testing.T) { ipv4Addr = net.IPNet{IP: net.ParseIP("1.1.1.2"), Mask: net.IPv4Mask(255, 255, 255, 255)} _ = mockContext.dataStore.AddIPv4CidrToStore("eni-1", ipv4Addr, false) - short, over, warmIPTargetDefined = mockContext.datastoreTargetState() + short, over, warmIPTargetDefined = mockContext.datastoreTargetState(nil) assert.True(t, warmIPTargetDefined) assert.Equal(t, 3, short) assert.Equal(t, 0, over) @@ -1147,13 +1156,13 @@ func TestGetWarmIPTargetState(t *testing.T) { ipv4Addr = net.IPNet{IP: net.ParseIP("1.1.1.5"), Mask: net.IPv4Mask(255, 255, 255, 255)} _ = mockContext.dataStore.AddIPv4CidrToStore("eni-1", ipv4Addr, false) - short, over, warmIPTargetDefined = mockContext.datastoreTargetState() + short, over, warmIPTargetDefined = mockContext.datastoreTargetState(nil) assert.True(t, warmIPTargetDefined) assert.Equal(t, 0, short) assert.Equal(t, 0, over) } -func TestGetWarmIPTargetStatewithPDenabled(t *testing.T) { +func TestGetWarmIPTargetStateWithPDenabled(t *testing.T) { m := setup(t) defer m.ctrl.Finish() @@ -1167,11 +1176,11 @@ func TestGetWarmIPTargetStatewithPDenabled(t *testing.T) { mockContext.dataStore = testDatastorewithPrefix() - _, _, warmIPTargetDefined := mockContext.datastoreTargetState() + _, _, warmIPTargetDefined := mockContext.datastoreTargetState(nil) assert.False(t, warmIPTargetDefined) mockContext.warmIPTarget = 5 - short, over, warmIPTargetDefined := mockContext.datastoreTargetState() + short, over, warmIPTargetDefined := mockContext.datastoreTargetState(nil) assert.True(t, warmIPTargetDefined) assert.Equal(t, 1, short) assert.Equal(t, 0, over) @@ -1184,7 +1193,7 @@ func TestGetWarmIPTargetStatewithPDenabled(t *testing.T) { _, ipnet, _ = net.ParseCIDR("20.1.1.0/28") _ = mockContext.dataStore.AddIPv4CidrToStore("eni-1", *ipnet, true) - short, over, warmIPTargetDefined = mockContext.datastoreTargetState() + short, over, warmIPTargetDefined = mockContext.datastoreTargetState(nil) assert.True(t, warmIPTargetDefined) assert.Equal(t, 0, short) assert.Equal(t, 1, over) @@ -1193,7 +1202,7 @@ func TestGetWarmIPTargetStatewithPDenabled(t *testing.T) { _, ipnet, _ = net.ParseCIDR("20.1.1.0/28") _ = mockContext.dataStore.DelIPv4CidrFromStore("eni-1", *ipnet, true) - short, over, warmIPTargetDefined = mockContext.datastoreTargetState() + short, over, warmIPTargetDefined = mockContext.datastoreTargetState(nil) assert.True(t, warmIPTargetDefined) assert.Equal(t, 0, short) assert.Equal(t, 0, over) @@ -1209,6 +1218,7 @@ func TestIPAMContext_nodeIPPoolTooLow(t *testing.T) { warmENITarget int warmIPTarget int datastore *datastore.DataStore + maxPods int } tests := []struct { @@ -1216,14 +1226,15 @@ func TestIPAMContext_nodeIPPoolTooLow(t *testing.T) { fields fields want bool }{ - {"Test new ds, all defaults", fields{14, 4, 1, 0, testDatastore()}, true}, - {"Test new ds, 0 ENIs", fields{14, 4, 0, 0, testDatastore()}, true}, - {"Test new ds, 3 warm IPs", fields{14, 4, 0, 3, testDatastore()}, true}, - {"Test 3 unused IPs, 1 warm", fields{3, 4, 1, 1, datastoreWith3FreeIPs()}, false}, - {"Test 1 used, 1 warm ENI", fields{3, 4, 1, 0, datastoreWith1Pod1()}, true}, - {"Test 1 used, 0 warm ENI", fields{3, 4, 0, 0, datastoreWith1Pod1()}, false}, - {"Test 3 used, 1 warm ENI", fields{3, 4, 1, 0, datastoreWith3Pods()}, true}, - {"Test 3 used, 0 warm ENI", fields{3, 4, 0, 0, datastoreWith3Pods()}, true}, + {"Test new ds, all defaults", fields{14, 4, 1, 0, testDatastore(), 500}, true}, + {"Test new ds, 0 ENIs", fields{14, 4, 0, 0, testDatastore(), 500}, true}, + {"Test new ds, 3 warm IPs", fields{14, 4, 0, 3, testDatastore(), 500}, true}, + {"Test 3 unused IPs, 1 warm", fields{3, 4, 1, 1, datastoreWith3FreeIPs(), 500}, false}, + {"Test 1 used, 1 warm ENI", fields{3, 4, 1, 0, datastoreWith1Pod1(), 500}, true}, + {"Test 1 used, 0 warm ENI", fields{3, 4, 0, 0, datastoreWith1Pod1(), 500}, false}, + {"Test 3 used, 1 warm ENI", fields{3, 4, 1, 0, datastoreWith3Pods(), 500}, true}, + {"Test 3 used, 0 warm ENI", fields{3, 4, 0, 0, datastoreWith3Pods(), 500}, true}, + {"Test max pods exceeded", fields{3, 4, 0, 5, datastoreWith3Pods(), 3}, false}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -1237,8 +1248,9 @@ func TestIPAMContext_nodeIPPoolTooLow(t *testing.T) { warmENITarget: tt.fields.warmENITarget, warmIPTarget: tt.fields.warmIPTarget, enablePrefixDelegation: false, + maxPods: tt.fields.maxPods, } - if got := c.isDatastorePoolTooLow(); got != tt.want { + if got, _ := c.isDatastorePoolTooLow(); got != tt.want { t.Errorf("nodeIPPoolTooLow() = %v, want %v", got, tt.want) } }) @@ -1255,6 +1267,7 @@ func TestIPAMContext_nodePrefixPoolTooLow(t *testing.T) { maxPrefixesPerENI int warmPrefixTarget int datastore *datastore.DataStore + maxPods int } tests := []struct { @@ -1262,13 +1275,14 @@ func TestIPAMContext_nodePrefixPoolTooLow(t *testing.T) { fields fields want bool }{ - {"Test new ds, all defaults", fields{256, 4, 16, 1, testDatastore()}, true}, - {"Test new ds, 0 ENIs", fields{256, 4, 16, 0, testDatastore()}, true}, - {"Test 3 unused IPs, 1 warm", fields{256, 4, 16, 1, datastoreWithFreeIPsFromPrefix()}, false}, - {"Test 1 used, 1 warm Prefix", fields{256, 4, 16, 1, datastoreWith1Pod1FromPrefix()}, true}, - {"Test 1 used, 0 warm Prefix", fields{256, 4, 16, 0, datastoreWith1Pod1FromPrefix()}, false}, - {"Test 3 used, 1 warm Prefix", fields{256, 4, 16, 1, datastoreWith3PodsFromPrefix()}, true}, - {"Test 3 used, 0 warm Prefix", fields{256, 4, 16, 0, datastoreWith3PodsFromPrefix()}, false}, + {"Test new ds, all defaults", fields{256, 4, 16, 1, testDatastore(), 500}, true}, + {"Test new ds, 0 ENIs", fields{256, 4, 16, 0, testDatastore(), 500}, true}, + {"Test 3 unused IPs, 1 warm", fields{256, 4, 16, 1, datastoreWithFreeIPsFromPrefix(), 500}, false}, + {"Test 1 used, 1 warm Prefix", fields{256, 4, 16, 1, datastoreWith1Pod1FromPrefix(), 500}, true}, + {"Test 1 used, 0 warm Prefix", fields{256, 4, 16, 0, datastoreWith1Pod1FromPrefix(), 500}, false}, + {"Test 3 used, 1 warm Prefix", fields{256, 4, 16, 1, datastoreWith3PodsFromPrefix(), 500}, true}, + {"Test 3 used, 0 warm Prefix", fields{256, 4, 16, 0, datastoreWith3PodsFromPrefix(), 500}, false}, + {"Test max pods exceeded", fields{256, 4, 16, 1, datastoreWith3PodsFromPrefix(), 4}, false}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -1282,8 +1296,9 @@ func TestIPAMContext_nodePrefixPoolTooLow(t *testing.T) { maxENI: tt.fields.maxEni, warmPrefixTarget: tt.fields.warmPrefixTarget, enablePrefixDelegation: true, + maxPods: tt.fields.maxPods, } - if got := c.isDatastorePoolTooLow(); got != tt.want { + if got, _ := c.isDatastorePoolTooLow(); got != tt.want { t.Errorf("nodeIPPoolTooLow() = %v, want %v", got, tt.want) } })