Skip to content

Commit 68207da

Browse files
authored
Merge pull request #1663 from ricardomaraschini/strict-pod-eviction
feat: introduce strict eviction policy
2 parents cca28f7 + 35a7178 commit 68207da

File tree

7 files changed

+149
-6
lines changed

7 files changed

+149
-6
lines changed

README.md

+15
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,12 @@ strategy evicts pods from `underutilized nodes` (those with usage below `thresho
405405
so that they can be recreated in appropriately utilized nodes.
406406
The strategy will abort if any number of `underutilized nodes` or `appropriately utilized nodes` is zero.
407407

408+
To control pod eviction from underutilized nodes, use the `evictionModes`
409+
array. A lenient policy, which evicts pods regardless of their resource
410+
requests, is the default. To enable a stricter policy that only evicts pods
411+
with resource requests defined for the provided threshold resources, add the
412+
option `OnlyThresholdingResources` to the `evictionModes` configuration.
413+
408414
**NOTE:** Node resource consumption is determined by the requests and limits of pods, not actual usage.
409415
This approach is chosen in order to maintain consistency with the kube-scheduler, which follows the same
410416
design for scheduling pods onto nodes. This means that resource usage as reported by Kubelet (or commands
@@ -417,8 +423,15 @@ actual usage metrics. Implementing metrics-based descheduling is currently TODO
417423
|---|---|
418424
|`thresholds`|map(string:int)|
419425
|`numberOfNodes`|int|
426+
|`evictionModes`|list(string)|
420427
|`evictableNamespaces`|(see [namespace filtering](#namespace-filtering))|
421428

429+
**Supported Eviction Modes:**
430+
431+
|Name|Description|
432+
|---|---|
433+
|`OnlyThresholdingResources`|Evict only pods that have resource requests defined for the provided threshold resources.|
434+
422435
**Example:**
423436

424437
```yaml
@@ -437,6 +450,8 @@ profiles:
437450
exclude:
438451
- "kube-system"
439452
- "namespace1"
453+
evictionModes:
454+
- "OnlyThresholdingResources"
440455
plugins:
441456
balance:
442457
enabled:

pkg/framework/plugins/nodeutilization/highnodeutilization.go

+20-2
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package nodeutilization
1919
import (
2020
"context"
2121
"fmt"
22+
"slices"
2223

2324
v1 "k8s.io/api/core/v1"
2425
"k8s.io/apimachinery/pkg/runtime"
@@ -74,9 +75,26 @@ func NewHighNodeUtilization(
7475
highThresholds[rname] = MaxResourcePercentage
7576
}
7677

78+
// get the resource names for which we have a threshold. this is
79+
// later used when determining if we are going to evict a pod.
80+
resourceThresholds := getResourceNames(args.Thresholds)
81+
82+
// by default we evict pods from the under utilized nodes even if they
83+
// don't define a request for a given threshold. this works most of the
84+
// times and there is an use case for it. When using the restrict mode
85+
// we evaluate if the pod has a request for any of the resources the
86+
// user has provided as threshold.
87+
filters := []podutil.FilterFunc{handle.Evictor().Filter}
88+
if slices.Contains(args.EvictionModes, EvictionModeOnlyThresholdingResources) {
89+
filters = append(
90+
filters,
91+
withResourceRequestForAny(resourceThresholds...),
92+
)
93+
}
94+
7795
podFilter, err := podutil.
7896
NewOptions().
79-
WithFilter(handle.Evictor().Filter).
97+
WithFilter(podutil.WrapFilterFuncs(filters...)).
8098
BuildFilterFunc()
8199
if err != nil {
82100
return nil, fmt.Errorf("error initializing pod filter function: %v", err)
@@ -87,7 +105,7 @@ func NewHighNodeUtilization(
87105
// all we consider the basic resources (cpu, memory, pods).
88106
resourceNames := uniquifyResourceNames(
89107
append(
90-
getResourceNames(args.Thresholds),
108+
resourceThresholds,
91109
v1.ResourceCPU,
92110
v1.ResourceMemory,
93111
v1.ResourcePods,

pkg/framework/plugins/nodeutilization/highnodeutilization_test.go

+55-4
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ func TestHighNodeUtilization(t *testing.T) {
4848
testCases := []struct {
4949
name string
5050
thresholds api.ResourceThresholds
51+
evictionModes []EvictionMode
5152
nodes []*v1.Node
5253
pods []*v1.Pod
5354
expectedPodsEvicted uint
@@ -433,6 +434,53 @@ func TestHighNodeUtilization(t *testing.T) {
433434
},
434435
expectedPodsEvicted: 0,
435436
},
437+
{
438+
name: "with extended resource threshold and no extended resource pods",
439+
thresholds: api.ResourceThresholds{
440+
extendedResource: 40,
441+
},
442+
evictionModes: []EvictionMode{EvictionModeOnlyThresholdingResources},
443+
nodes: []*v1.Node{
444+
test.BuildTestNode(n1NodeName, 4000, 3000, 10, func(node *v1.Node) {
445+
test.SetNodeExtendedResource(node, extendedResource, 10)
446+
}),
447+
test.BuildTestNode(n2NodeName, 4000, 3000, 10, func(node *v1.Node) {
448+
test.SetNodeExtendedResource(node, extendedResource, 10)
449+
}),
450+
test.BuildTestNode(n3NodeName, 4000, 3000, 10, func(node *v1.Node) {
451+
test.SetNodeExtendedResource(node, extendedResource, 10)
452+
}),
453+
},
454+
pods: []*v1.Pod{
455+
// pods on node1 have the extended resource
456+
// request set and they put the node in the
457+
// over utilization range.
458+
test.BuildTestPod("p1", 100, 0, n1NodeName, func(pod *v1.Pod) {
459+
test.SetRSOwnerRef(pod)
460+
test.SetPodExtendedResourceRequest(pod, extendedResource, 3)
461+
}),
462+
test.BuildTestPod("p2", 100, 0, n1NodeName, func(pod *v1.Pod) {
463+
test.SetRSOwnerRef(pod)
464+
test.SetPodExtendedResourceRequest(pod, extendedResource, 3)
465+
}),
466+
// pods in the other nodes must not be evicted
467+
// because they do not have the extended
468+
// resource defined in their requests.
469+
test.BuildTestPod("p3", 500, 0, n2NodeName, func(pod *v1.Pod) {
470+
test.SetRSOwnerRef(pod)
471+
}),
472+
test.BuildTestPod("p4", 500, 0, n2NodeName, func(pod *v1.Pod) {
473+
test.SetRSOwnerRef(pod)
474+
}),
475+
test.BuildTestPod("p5", 500, 0, n2NodeName, func(pod *v1.Pod) {
476+
test.SetRSOwnerRef(pod)
477+
}),
478+
test.BuildTestPod("p6", 500, 0, n2NodeName, func(pod *v1.Pod) {
479+
test.SetRSOwnerRef(pod)
480+
}),
481+
},
482+
expectedPodsEvicted: 0,
483+
},
436484
}
437485

438486
for _, testCase := range testCases {
@@ -474,10 +522,13 @@ func TestHighNodeUtilization(t *testing.T) {
474522
})
475523
}
476524

477-
plugin, err := NewHighNodeUtilization(&HighNodeUtilizationArgs{
478-
Thresholds: testCase.thresholds,
479-
},
480-
handle)
525+
plugin, err := NewHighNodeUtilization(
526+
&HighNodeUtilizationArgs{
527+
Thresholds: testCase.thresholds,
528+
EvictionModes: testCase.evictionModes,
529+
},
530+
handle,
531+
)
481532
if err != nil {
482533
t.Fatalf("Unable to initialize the plugin: %v", err)
483534
}

pkg/framework/plugins/nodeutilization/nodeutilization.go

+17
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import (
3232
"k8s.io/utils/ptr"
3333
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
3434
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
35+
"sigs.k8s.io/descheduler/pkg/descheduler/pod"
3536
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
3637
"sigs.k8s.io/descheduler/pkg/framework/plugins/nodeutilization/normalizer"
3738
frameworktypes "sigs.k8s.io/descheduler/pkg/framework/types"
@@ -752,3 +753,19 @@ func assessAvailableResourceInNodes(
752753

753754
return available, nil
754755
}
756+
757+
// withResourceRequestForAny returns a filter function that checks if a pod
758+
// has a resource request specified for any of the given resources names.
759+
func withResourceRequestForAny(names ...v1.ResourceName) pod.FilterFunc {
760+
return func(pod *v1.Pod) bool {
761+
all := append(pod.Spec.Containers, pod.Spec.InitContainers...)
762+
for _, name := range names {
763+
for _, container := range all {
764+
if _, ok := container.Resources.Requests[name]; ok {
765+
return true
766+
}
767+
}
768+
}
769+
return false
770+
}
771+
}

pkg/framework/plugins/nodeutilization/types.go

+19
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,18 @@ import (
1818
"sigs.k8s.io/descheduler/pkg/api"
1919
)
2020

21+
// EvictionMode describe a mode of eviction. See the list below for the
22+
// available modes.
23+
type EvictionMode string
24+
25+
const (
26+
// EvictionModeOnlyThresholdingResources makes the descheduler evict
27+
// only pods that have a resource request defined for any of the user
28+
// provided thresholds. If the pod does not request the resource, it
29+
// will not be evicted.
30+
EvictionModeOnlyThresholdingResources EvictionMode = "OnlyThresholdingResources"
31+
)
32+
2133
// +k8s:deepcopy-gen=true
2234
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
2335

@@ -48,6 +60,13 @@ type HighNodeUtilizationArgs struct {
4860
Thresholds api.ResourceThresholds `json:"thresholds"`
4961
NumberOfNodes int `json:"numberOfNodes,omitempty"`
5062

63+
// EvictionModes is a set of modes to be taken into account when the
64+
// descheduler evicts pods. For example the mode
65+
// `OnlyThresholdingResources` can be used to make sure the descheduler
66+
// only evicts pods who have resource requests for the defined
67+
// thresholds.
68+
EvictionModes []EvictionMode `json:"evictionModes,omitempty"`
69+
5170
// Naming this one differently since namespaces are still
5271
// considered while considering resources used by pods
5372
// but then filtered out before eviction

pkg/framework/plugins/nodeutilization/validation.go

+18
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,25 @@ func ValidateHighNodeUtilizationArgs(obj runtime.Object) error {
3030
if err != nil {
3131
return err
3232
}
33+
// make sure we know about the eviction modes defined by the user.
34+
return validateEvictionModes(args.EvictionModes)
35+
}
36+
37+
// validateEvictionModes checks if the eviction modes are valid/known
38+
// to the descheduler.
39+
func validateEvictionModes(modes []EvictionMode) error {
40+
// we are using this approach to make the code more extensible
41+
// in the future.
42+
validModes := map[EvictionMode]bool{
43+
EvictionModeOnlyThresholdingResources: true,
44+
}
3345

46+
for _, mode := range modes {
47+
if validModes[mode] {
48+
continue
49+
}
50+
return fmt.Errorf("invalid eviction mode %s", mode)
51+
}
3452
return nil
3553
}
3654

pkg/framework/plugins/nodeutilization/zz_generated.deepcopy.go

+5
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)