Skip to content

Commit 89535b9

Browse files
authored
Merge pull request #1649 from ricardomaraschini/normalizer
feat: refactoring thresholds and usage assessment
2 parents b300fae + 54d0a22 commit 89535b9

9 files changed

+1812
-554
lines changed

go.mod

+2-1
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ require (
3232
sigs.k8s.io/yaml v1.4.0
3333
)
3434

35+
require golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56
36+
3537
require (
3638
cel.dev/expr v0.18.0 // indirect
3739
github.com/BurntSushi/toml v0.3.1 // indirect
@@ -98,7 +100,6 @@ require (
98100
go.uber.org/multierr v1.11.0 // indirect
99101
go.uber.org/zap v1.27.0 // indirect
100102
golang.org/x/crypto v0.31.0 // indirect
101-
golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect
102103
golang.org/x/mod v0.21.0 // indirect
103104
golang.org/x/net v0.30.0 // indirect
104105
golang.org/x/oauth2 v0.23.0 // indirect

pkg/framework/plugins/nodeutilization/highnodeutilization.go

+132-95
Original file line numberDiff line numberDiff line change
@@ -28,164 +28,225 @@ import (
2828
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
2929

3030
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
31+
"sigs.k8s.io/descheduler/pkg/framework/plugins/nodeutilization/normalizer"
3132
frameworktypes "sigs.k8s.io/descheduler/pkg/framework/types"
3233
)
3334

3435
const HighNodeUtilizationPluginName = "HighNodeUtilization"
3536

36-
// HighNodeUtilization evicts pods from under utilized nodes so that scheduler can schedule according to its plugin.
37-
// Note that CPU/Memory requests are used to calculate nodes' utilization and not the actual resource usage.
37+
// this lines makes sure that HighNodeUtilization implements the BalancePlugin
38+
// interface.
39+
var _ frameworktypes.BalancePlugin = &HighNodeUtilization{}
3840

41+
// HighNodeUtilization evicts pods from under utilized nodes so that scheduler
42+
// can schedule according to its plugin. Note that CPU/Memory requests are used
43+
// to calculate nodes' utilization and not the actual resource usage.
3944
type HighNodeUtilization struct {
40-
handle frameworktypes.Handle
41-
args *HighNodeUtilizationArgs
42-
podFilter func(pod *v1.Pod) bool
43-
underutilizationCriteria []interface{}
44-
resourceNames []v1.ResourceName
45-
targetThresholds api.ResourceThresholds
46-
usageClient usageClient
45+
handle frameworktypes.Handle
46+
args *HighNodeUtilizationArgs
47+
podFilter func(pod *v1.Pod) bool
48+
criteria []any
49+
resourceNames []v1.ResourceName
50+
highThresholds api.ResourceThresholds
51+
usageClient usageClient
4752
}
4853

49-
var _ frameworktypes.BalancePlugin = &HighNodeUtilization{}
50-
51-
// NewHighNodeUtilization builds plugin from its arguments while passing a handle
52-
func NewHighNodeUtilization(args runtime.Object, handle frameworktypes.Handle) (frameworktypes.Plugin, error) {
53-
highNodeUtilizatioArgs, ok := args.(*HighNodeUtilizationArgs)
54+
// NewHighNodeUtilization builds plugin from its arguments while passing a handle.
55+
func NewHighNodeUtilization(
56+
genericArgs runtime.Object, handle frameworktypes.Handle,
57+
) (frameworktypes.Plugin, error) {
58+
args, ok := genericArgs.(*HighNodeUtilizationArgs)
5459
if !ok {
55-
return nil, fmt.Errorf("want args to be of type HighNodeUtilizationArgs, got %T", args)
60+
return nil, fmt.Errorf(
61+
"want args to be of type HighNodeUtilizationArgs, got %T",
62+
genericArgs,
63+
)
5664
}
5765

58-
targetThresholds := make(api.ResourceThresholds)
59-
setDefaultForThresholds(highNodeUtilizatioArgs.Thresholds, targetThresholds)
60-
resourceNames := getResourceNames(targetThresholds)
61-
62-
underutilizationCriteria := []interface{}{
63-
"CPU", highNodeUtilizatioArgs.Thresholds[v1.ResourceCPU],
64-
"Mem", highNodeUtilizatioArgs.Thresholds[v1.ResourceMemory],
65-
"Pods", highNodeUtilizatioArgs.Thresholds[v1.ResourcePods],
66+
// this plugins worries only about thresholds but the nodeplugins
67+
// package was made to take two thresholds into account, one for low
68+
// and another for high usage. here we make sure we set the high
69+
// threshold to the maximum value for all resources for which we have a
70+
// threshold.
71+
highThresholds := make(api.ResourceThresholds)
72+
for rname := range args.Thresholds {
73+
highThresholds[rname] = MaxResourcePercentage
6674
}
67-
for name := range highNodeUtilizatioArgs.Thresholds {
68-
if !nodeutil.IsBasicResource(name) {
69-
underutilizationCriteria = append(underutilizationCriteria, string(name), int64(highNodeUtilizatioArgs.Thresholds[name]))
70-
}
75+
76+
// criteria is a list of thresholds that are used to determine if a node
77+
// is underutilized. it is used only for logging purposes.
78+
criteria := []any{}
79+
for rname, rvalue := range args.Thresholds {
80+
criteria = append(criteria, rname, rvalue)
7181
}
7282

73-
podFilter, err := podutil.NewOptions().
83+
podFilter, err := podutil.
84+
NewOptions().
7485
WithFilter(handle.Evictor().Filter).
7586
BuildFilterFunc()
7687
if err != nil {
7788
return nil, fmt.Errorf("error initializing pod filter function: %v", err)
7889
}
7990

91+
// resourceNames is a list of all resource names this plugin cares
92+
// about. we care about the resources for which we have a threshold and
93+
// all we consider the basic resources (cpu, memory, pods).
94+
resourceNames := uniquifyResourceNames(
95+
append(
96+
getResourceNames(args.Thresholds),
97+
v1.ResourceCPU,
98+
v1.ResourceMemory,
99+
v1.ResourcePods,
100+
),
101+
)
102+
80103
return &HighNodeUtilization{
81-
handle: handle,
82-
args: highNodeUtilizatioArgs,
83-
resourceNames: resourceNames,
84-
targetThresholds: targetThresholds,
85-
underutilizationCriteria: underutilizationCriteria,
86-
podFilter: podFilter,
87-
usageClient: newRequestedUsageClient(resourceNames, handle.GetPodsAssignedToNodeFunc()),
104+
handle: handle,
105+
args: args,
106+
resourceNames: resourceNames,
107+
highThresholds: highThresholds,
108+
criteria: criteria,
109+
podFilter: podFilter,
110+
usageClient: newRequestedUsageClient(
111+
resourceNames,
112+
handle.GetPodsAssignedToNodeFunc(),
113+
),
88114
}, nil
89115
}
90116

91-
// Name retrieves the plugin name
117+
// Name retrieves the plugin name.
92118
func (h *HighNodeUtilization) Name() string {
93119
return HighNodeUtilizationPluginName
94120
}
95121

96-
// Balance extension point implementation for the plugin
122+
// Balance holds the main logic of the plugin. It evicts pods from under
123+
// utilized nodes. The goal here is to concentrate pods in fewer nodes so that
124+
// less nodes are used.
97125
func (h *HighNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *frameworktypes.Status {
98126
if err := h.usageClient.sync(ctx, nodes); err != nil {
99127
return &frameworktypes.Status{
100128
Err: fmt.Errorf("error getting node usage: %v", err),
101129
}
102130
}
103131

132+
// take a picture of the current state of the nodes, everything else
133+
// here is based on this snapshot.
104134
nodesMap, nodesUsageMap, podListMap := getNodeUsageSnapshot(nodes, h.usageClient)
105-
nodeThresholdsMap := getStaticNodeThresholds(nodes, h.args.Thresholds, h.targetThresholds)
106-
nodesUsageAsNodeThresholdsMap := nodeUsageToResourceThresholds(nodesUsageMap, nodesMap)
135+
capacities := referencedResourceListForNodesCapacity(nodes)
136+
137+
// node usages are not presented as percentages over the capacity.
138+
// we need to normalize them to be able to compare them with the
139+
// thresholds. thresholds are already provided by the user in
140+
// percentage.
141+
usage, thresholds := assessNodesUsagesAndStaticThresholds(
142+
nodesUsageMap, capacities, h.args.Thresholds, h.highThresholds,
143+
)
144+
145+
// classify nodes in two groups: underutilized and schedulable. we will
146+
// later try to move pods from the first group to the second.
107147
nodeGroups := classifyNodeUsage(
108-
nodesUsageAsNodeThresholdsMap,
109-
nodeThresholdsMap,
148+
usage, thresholds,
110149
[]classifierFnc{
111-
// underutilized nodes
150+
// underutilized nodes.
112151
func(nodeName string, usage, threshold api.ResourceThresholds) bool {
113152
return isNodeBelowThreshold(usage, threshold)
114153
},
115-
// every other node that is schedulable
154+
// schedulable nodes.
116155
func(nodeName string, usage, threshold api.ResourceThresholds) bool {
117156
if nodeutil.IsNodeUnschedulable(nodesMap[nodeName]) {
118-
klog.V(2).InfoS("Node is unschedulable", "node", klog.KObj(nodesMap[nodeName]))
157+
klog.V(2).InfoS(
158+
"Node is unschedulable",
159+
"node", klog.KObj(nodesMap[nodeName]),
160+
)
119161
return false
120162
}
121163
return true
122164
},
123165
},
124166
)
125167

126-
// convert groups node []NodeInfo
168+
// the nodeplugin package works by means of NodeInfo structures. these
169+
// structures hold a series of information about the nodes. now that
170+
// we have classified the nodes, we can build the NodeInfo structures
171+
// for each group. NodeInfo structs carry usage and available resources
172+
// for each node.
127173
nodeInfos := make([][]NodeInfo, 2)
128174
category := []string{"underutilized", "overutilized"}
129175
for i := range nodeGroups {
130176
for nodeName := range nodeGroups[i] {
131-
klog.InfoS("Node is "+category[i], "node", klog.KObj(nodesMap[nodeName]), "usage", nodesUsageMap[nodeName], "usagePercentage", resourceUsagePercentages(nodesUsageMap[nodeName], nodesMap[nodeName], true))
177+
klog.InfoS(
178+
"Node has been classified",
179+
"category", category[i],
180+
"node", klog.KObj(nodesMap[nodeName]),
181+
"usage", nodesUsageMap[nodeName],
182+
"usagePercentage", normalizer.Round(usage[nodeName]),
183+
)
132184
nodeInfos[i] = append(nodeInfos[i], NodeInfo{
133185
NodeUsage: NodeUsage{
134186
node: nodesMap[nodeName],
135-
usage: nodesUsageMap[nodeName], // get back the original node usage
187+
usage: nodesUsageMap[nodeName],
136188
allPods: podListMap[nodeName],
137189
},
138-
thresholds: NodeThresholds{
139-
lowResourceThreshold: resourceThresholdsToNodeUsage(nodeThresholdsMap[nodeName][0], nodesMap[nodeName]),
140-
highResourceThreshold: resourceThresholdsToNodeUsage(nodeThresholdsMap[nodeName][1], nodesMap[nodeName]),
141-
},
190+
available: capNodeCapacitiesToThreshold(
191+
nodesMap[nodeName],
192+
thresholds[nodeName][1],
193+
h.resourceNames,
194+
),
142195
})
143196
}
144197
}
145198

146-
sourceNodes := nodeInfos[0]
147-
highNodes := nodeInfos[1]
199+
lowNodes, schedulableNodes := nodeInfos[0], nodeInfos[1]
148200

149-
// log message in one line
150-
klog.V(1).InfoS("Criteria for a node below target utilization", h.underutilizationCriteria...)
151-
klog.V(1).InfoS("Number of underutilized nodes", "totalNumber", len(sourceNodes))
201+
klog.V(1).InfoS("Criteria for a node below target utilization", h.criteria...)
202+
klog.V(1).InfoS("Number of underutilized nodes", "totalNumber", len(lowNodes))
152203

153-
if len(sourceNodes) == 0 {
154-
klog.V(1).InfoS("No node is underutilized, nothing to do here, you might tune your thresholds further")
204+
if len(lowNodes) == 0 {
205+
klog.V(1).InfoS(
206+
"No node is underutilized, nothing to do here, you might tune your thresholds further",
207+
)
155208
return nil
156209
}
157-
if len(sourceNodes) <= h.args.NumberOfNodes {
158-
klog.V(1).InfoS("Number of nodes underutilized is less or equal than NumberOfNodes, nothing to do here", "underutilizedNodes", len(sourceNodes), "numberOfNodes", h.args.NumberOfNodes)
210+
211+
if len(lowNodes) <= h.args.NumberOfNodes {
212+
klog.V(1).InfoS(
213+
"Number of nodes underutilized is less or equal than NumberOfNodes, nothing to do here",
214+
"underutilizedNodes", len(lowNodes),
215+
"numberOfNodes", h.args.NumberOfNodes,
216+
)
159217
return nil
160218
}
161-
if len(sourceNodes) == len(nodes) {
219+
220+
if len(lowNodes) == len(nodes) {
162221
klog.V(1).InfoS("All nodes are underutilized, nothing to do here")
163222
return nil
164223
}
165-
if len(highNodes) == 0 {
224+
225+
if len(schedulableNodes) == 0 {
166226
klog.V(1).InfoS("No node is available to schedule the pods, nothing to do here")
167227
return nil
168228
}
169229

170-
// stop if the total available usage has dropped to zero - no more pods can be scheduled
171-
continueEvictionCond := func(nodeInfo NodeInfo, totalAvailableUsage api.ReferencedResourceList) bool {
172-
for name := range totalAvailableUsage {
173-
if totalAvailableUsage[name].CmpInt64(0) < 1 {
230+
// stops the eviction process if the total available capacity sage has
231+
// dropped to zero - no more pods can be scheduled. this will signalize
232+
// to stop if any of the available resources has dropped to zero.
233+
continueEvictionCond := func(_ NodeInfo, avail api.ReferencedResourceList) bool {
234+
for name := range avail {
235+
if avail[name].CmpInt64(0) < 1 {
174236
return false
175237
}
176238
}
177-
178239
return true
179240
}
180241

181-
// Sort the nodes by the usage in ascending order
182-
sortNodesByUsage(sourceNodes, true)
242+
// sorts the nodes by the usage in ascending order.
243+
sortNodesByUsage(lowNodes, true)
183244

184245
evictPodsFromSourceNodes(
185246
ctx,
186247
h.args.EvictableNamespaces,
187-
sourceNodes,
188-
highNodes,
248+
lowNodes,
249+
schedulableNodes,
189250
h.handle.Evictor(),
190251
evictions.EvictOptions{StrategyName: HighNodeUtilizationPluginName},
191252
h.podFilter,
@@ -197,27 +258,3 @@ func (h *HighNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fr
197258

198259
return nil
199260
}
200-
201-
func setDefaultForThresholds(thresholds, targetThresholds api.ResourceThresholds) {
202-
// check if Pods/CPU/Mem are set, if not, set them to 100
203-
if _, ok := thresholds[v1.ResourcePods]; !ok {
204-
thresholds[v1.ResourcePods] = MaxResourcePercentage
205-
}
206-
if _, ok := thresholds[v1.ResourceCPU]; !ok {
207-
thresholds[v1.ResourceCPU] = MaxResourcePercentage
208-
}
209-
if _, ok := thresholds[v1.ResourceMemory]; !ok {
210-
thresholds[v1.ResourceMemory] = MaxResourcePercentage
211-
}
212-
213-
// Default targetThreshold resource values to 100
214-
targetThresholds[v1.ResourcePods] = MaxResourcePercentage
215-
targetThresholds[v1.ResourceCPU] = MaxResourcePercentage
216-
targetThresholds[v1.ResourceMemory] = MaxResourcePercentage
217-
218-
for name := range thresholds {
219-
if !nodeutil.IsBasicResource(name) {
220-
targetThresholds[name] = MaxResourcePercentage
221-
}
222-
}
223-
}

pkg/framework/plugins/nodeutilization/highnodeutilization_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ func TestHighNodeUtilization(t *testing.T) {
244244
},
245245
// All pods are assumed to be burstable (test.BuildTestNode always sets both cpu/memory resource requests to some value)
246246
pods: []*v1.Pod{
247-
test.BuildTestPod("p1", 400, 0, n1NodeName, func(pod *v1.Pod) {
247+
test.BuildTestPod("p1", 0, 0, n1NodeName, func(pod *v1.Pod) {
248248
test.SetRSOwnerRef(pod)
249249
test.MakeBestEffortPod(pod)
250250
}),

0 commit comments

Comments
 (0)