@@ -28,164 +28,225 @@ import (
28
28
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
29
29
30
30
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
31
+ "sigs.k8s.io/descheduler/pkg/framework/plugins/nodeutilization/normalizer"
31
32
frameworktypes "sigs.k8s.io/descheduler/pkg/framework/types"
32
33
)
33
34
34
35
const HighNodeUtilizationPluginName = "HighNodeUtilization"
35
36
36
- // HighNodeUtilization evicts pods from under utilized nodes so that scheduler can schedule according to its plugin.
37
- // Note that CPU/Memory requests are used to calculate nodes' utilization and not the actual resource usage.
37
+ // this lines makes sure that HighNodeUtilization implements the BalancePlugin
38
+ // interface.
39
+ var _ frameworktypes.BalancePlugin = & HighNodeUtilization {}
38
40
41
+ // HighNodeUtilization evicts pods from under utilized nodes so that scheduler
42
+ // can schedule according to its plugin. Note that CPU/Memory requests are used
43
+ // to calculate nodes' utilization and not the actual resource usage.
39
44
type HighNodeUtilization struct {
40
- handle frameworktypes.Handle
41
- args * HighNodeUtilizationArgs
42
- podFilter func (pod * v1.Pod ) bool
43
- underutilizationCriteria [] interface {}
44
- resourceNames []v1.ResourceName
45
- targetThresholds api.ResourceThresholds
46
- usageClient usageClient
45
+ handle frameworktypes.Handle
46
+ args * HighNodeUtilizationArgs
47
+ podFilter func (pod * v1.Pod ) bool
48
+ criteria [] any
49
+ resourceNames []v1.ResourceName
50
+ highThresholds api.ResourceThresholds
51
+ usageClient usageClient
47
52
}
48
53
49
- var _ frameworktypes. BalancePlugin = & HighNodeUtilization {}
50
-
51
- // NewHighNodeUtilization builds plugin from its arguments while passing a handle
52
- func NewHighNodeUtilization ( args runtime. Object , handle frameworktypes. Handle ) (frameworktypes.Plugin , error ) {
53
- highNodeUtilizatioArgs , ok := args .(* HighNodeUtilizationArgs )
54
+ // NewHighNodeUtilization builds plugin from its arguments while passing a handle.
55
+ func NewHighNodeUtilization (
56
+ genericArgs runtime. Object , handle frameworktypes. Handle ,
57
+ ) (frameworktypes.Plugin , error ) {
58
+ args , ok := genericArgs .(* HighNodeUtilizationArgs )
54
59
if ! ok {
55
- return nil , fmt .Errorf ("want args to be of type HighNodeUtilizationArgs, got %T" , args )
60
+ return nil , fmt .Errorf (
61
+ "want args to be of type HighNodeUtilizationArgs, got %T" ,
62
+ genericArgs ,
63
+ )
56
64
}
57
65
58
- targetThresholds := make (api. ResourceThresholds )
59
- setDefaultForThresholds ( highNodeUtilizatioArgs . Thresholds , targetThresholds )
60
- resourceNames := getResourceNames ( targetThresholds )
61
-
62
- underutilizationCriteria := [] interface {}{
63
- "CPU" , highNodeUtilizatioArgs . Thresholds [ v1 . ResourceCPU ],
64
- "Mem" , highNodeUtilizatioArgs .Thresholds [ v1 . ResourceMemory ],
65
- "Pods" , highNodeUtilizatioArgs . Thresholds [ v1 . ResourcePods ],
66
+ // this plugins worries only about thresholds but the nodeplugins
67
+ // package was made to take two thresholds into account, one for low
68
+ // and another for high usage. here we make sure we set the high
69
+ // threshold to the maximum value for all resources for which we have a
70
+ // threshold.
71
+ highThresholds := make (api. ResourceThresholds )
72
+ for rname := range args .Thresholds {
73
+ highThresholds [ rname ] = MaxResourcePercentage
66
74
}
67
- for name := range highNodeUtilizatioArgs .Thresholds {
68
- if ! nodeutil .IsBasicResource (name ) {
69
- underutilizationCriteria = append (underutilizationCriteria , string (name ), int64 (highNodeUtilizatioArgs .Thresholds [name ]))
70
- }
75
+
76
+ // criteria is a list of thresholds that are used to determine if a node
77
+ // is underutilized. it is used only for logging purposes.
78
+ criteria := []any {}
79
+ for rname , rvalue := range args .Thresholds {
80
+ criteria = append (criteria , rname , rvalue )
71
81
}
72
82
73
- podFilter , err := podutil .NewOptions ().
83
+ podFilter , err := podutil .
84
+ NewOptions ().
74
85
WithFilter (handle .Evictor ().Filter ).
75
86
BuildFilterFunc ()
76
87
if err != nil {
77
88
return nil , fmt .Errorf ("error initializing pod filter function: %v" , err )
78
89
}
79
90
91
+ // resourceNames is a list of all resource names this plugin cares
92
+ // about. we care about the resources for which we have a threshold and
93
+ // all we consider the basic resources (cpu, memory, pods).
94
+ resourceNames := uniquifyResourceNames (
95
+ append (
96
+ getResourceNames (args .Thresholds ),
97
+ v1 .ResourceCPU ,
98
+ v1 .ResourceMemory ,
99
+ v1 .ResourcePods ,
100
+ ),
101
+ )
102
+
80
103
return & HighNodeUtilization {
81
- handle : handle ,
82
- args : highNodeUtilizatioArgs ,
83
- resourceNames : resourceNames ,
84
- targetThresholds : targetThresholds ,
85
- underutilizationCriteria : underutilizationCriteria ,
86
- podFilter : podFilter ,
87
- usageClient : newRequestedUsageClient (resourceNames , handle .GetPodsAssignedToNodeFunc ()),
104
+ handle : handle ,
105
+ args : args ,
106
+ resourceNames : resourceNames ,
107
+ highThresholds : highThresholds ,
108
+ criteria : criteria ,
109
+ podFilter : podFilter ,
110
+ usageClient : newRequestedUsageClient (
111
+ resourceNames ,
112
+ handle .GetPodsAssignedToNodeFunc (),
113
+ ),
88
114
}, nil
89
115
}
90
116
91
- // Name retrieves the plugin name
117
+ // Name retrieves the plugin name.
92
118
func (h * HighNodeUtilization ) Name () string {
93
119
return HighNodeUtilizationPluginName
94
120
}
95
121
96
- // Balance extension point implementation for the plugin
122
+ // Balance holds the main logic of the plugin. It evicts pods from under
123
+ // utilized nodes. The goal here is to concentrate pods in fewer nodes so that
124
+ // less nodes are used.
97
125
func (h * HighNodeUtilization ) Balance (ctx context.Context , nodes []* v1.Node ) * frameworktypes.Status {
98
126
if err := h .usageClient .sync (ctx , nodes ); err != nil {
99
127
return & frameworktypes.Status {
100
128
Err : fmt .Errorf ("error getting node usage: %v" , err ),
101
129
}
102
130
}
103
131
132
+ // take a picture of the current state of the nodes, everything else
133
+ // here is based on this snapshot.
104
134
nodesMap , nodesUsageMap , podListMap := getNodeUsageSnapshot (nodes , h .usageClient )
105
- nodeThresholdsMap := getStaticNodeThresholds (nodes , h .args .Thresholds , h .targetThresholds )
106
- nodesUsageAsNodeThresholdsMap := nodeUsageToResourceThresholds (nodesUsageMap , nodesMap )
135
+ capacities := referencedResourceListForNodesCapacity (nodes )
136
+
137
+ // node usages are not presented as percentages over the capacity.
138
+ // we need to normalize them to be able to compare them with the
139
+ // thresholds. thresholds are already provided by the user in
140
+ // percentage.
141
+ usage , thresholds := assessNodesUsagesAndStaticThresholds (
142
+ nodesUsageMap , capacities , h .args .Thresholds , h .highThresholds ,
143
+ )
144
+
145
+ // classify nodes in two groups: underutilized and schedulable. we will
146
+ // later try to move pods from the first group to the second.
107
147
nodeGroups := classifyNodeUsage (
108
- nodesUsageAsNodeThresholdsMap ,
109
- nodeThresholdsMap ,
148
+ usage , thresholds ,
110
149
[]classifierFnc {
111
- // underutilized nodes
150
+ // underutilized nodes.
112
151
func (nodeName string , usage , threshold api.ResourceThresholds ) bool {
113
152
return isNodeBelowThreshold (usage , threshold )
114
153
},
115
- // every other node that is schedulable
154
+ // schedulable nodes.
116
155
func (nodeName string , usage , threshold api.ResourceThresholds ) bool {
117
156
if nodeutil .IsNodeUnschedulable (nodesMap [nodeName ]) {
118
- klog .V (2 ).InfoS ("Node is unschedulable" , "node" , klog .KObj (nodesMap [nodeName ]))
157
+ klog .V (2 ).InfoS (
158
+ "Node is unschedulable" ,
159
+ "node" , klog .KObj (nodesMap [nodeName ]),
160
+ )
119
161
return false
120
162
}
121
163
return true
122
164
},
123
165
},
124
166
)
125
167
126
- // convert groups node []NodeInfo
168
+ // the nodeplugin package works by means of NodeInfo structures. these
169
+ // structures hold a series of information about the nodes. now that
170
+ // we have classified the nodes, we can build the NodeInfo structures
171
+ // for each group. NodeInfo structs carry usage and available resources
172
+ // for each node.
127
173
nodeInfos := make ([][]NodeInfo , 2 )
128
174
category := []string {"underutilized" , "overutilized" }
129
175
for i := range nodeGroups {
130
176
for nodeName := range nodeGroups [i ] {
131
- klog .InfoS ("Node is " + category [i ], "node" , klog .KObj (nodesMap [nodeName ]), "usage" , nodesUsageMap [nodeName ], "usagePercentage" , resourceUsagePercentages (nodesUsageMap [nodeName ], nodesMap [nodeName ], true ))
177
+ klog .InfoS (
178
+ "Node has been classified" ,
179
+ "category" , category [i ],
180
+ "node" , klog .KObj (nodesMap [nodeName ]),
181
+ "usage" , nodesUsageMap [nodeName ],
182
+ "usagePercentage" , normalizer .Round (usage [nodeName ]),
183
+ )
132
184
nodeInfos [i ] = append (nodeInfos [i ], NodeInfo {
133
185
NodeUsage : NodeUsage {
134
186
node : nodesMap [nodeName ],
135
- usage : nodesUsageMap [nodeName ], // get back the original node usage
187
+ usage : nodesUsageMap [nodeName ],
136
188
allPods : podListMap [nodeName ],
137
189
},
138
- thresholds : NodeThresholds {
139
- lowResourceThreshold : resourceThresholdsToNodeUsage (nodeThresholdsMap [nodeName ][0 ], nodesMap [nodeName ]),
140
- highResourceThreshold : resourceThresholdsToNodeUsage (nodeThresholdsMap [nodeName ][1 ], nodesMap [nodeName ]),
141
- },
190
+ available : capNodeCapacitiesToThreshold (
191
+ nodesMap [nodeName ],
192
+ thresholds [nodeName ][1 ],
193
+ h .resourceNames ,
194
+ ),
142
195
})
143
196
}
144
197
}
145
198
146
- sourceNodes := nodeInfos [0 ]
147
- highNodes := nodeInfos [1 ]
199
+ lowNodes , schedulableNodes := nodeInfos [0 ], nodeInfos [1 ]
148
200
149
- // log message in one line
150
- klog .V (1 ).InfoS ("Criteria for a node below target utilization" , h .underutilizationCriteria ... )
151
- klog .V (1 ).InfoS ("Number of underutilized nodes" , "totalNumber" , len (sourceNodes ))
201
+ klog .V (1 ).InfoS ("Criteria for a node below target utilization" , h .criteria ... )
202
+ klog .V (1 ).InfoS ("Number of underutilized nodes" , "totalNumber" , len (lowNodes ))
152
203
153
- if len (sourceNodes ) == 0 {
154
- klog .V (1 ).InfoS ("No node is underutilized, nothing to do here, you might tune your thresholds further" )
204
+ if len (lowNodes ) == 0 {
205
+ klog .V (1 ).InfoS (
206
+ "No node is underutilized, nothing to do here, you might tune your thresholds further" ,
207
+ )
155
208
return nil
156
209
}
157
- if len (sourceNodes ) <= h .args .NumberOfNodes {
158
- klog .V (1 ).InfoS ("Number of nodes underutilized is less or equal than NumberOfNodes, nothing to do here" , "underutilizedNodes" , len (sourceNodes ), "numberOfNodes" , h .args .NumberOfNodes )
210
+
211
+ if len (lowNodes ) <= h .args .NumberOfNodes {
212
+ klog .V (1 ).InfoS (
213
+ "Number of nodes underutilized is less or equal than NumberOfNodes, nothing to do here" ,
214
+ "underutilizedNodes" , len (lowNodes ),
215
+ "numberOfNodes" , h .args .NumberOfNodes ,
216
+ )
159
217
return nil
160
218
}
161
- if len (sourceNodes ) == len (nodes ) {
219
+
220
+ if len (lowNodes ) == len (nodes ) {
162
221
klog .V (1 ).InfoS ("All nodes are underutilized, nothing to do here" )
163
222
return nil
164
223
}
165
- if len (highNodes ) == 0 {
224
+
225
+ if len (schedulableNodes ) == 0 {
166
226
klog .V (1 ).InfoS ("No node is available to schedule the pods, nothing to do here" )
167
227
return nil
168
228
}
169
229
170
- // stop if the total available usage has dropped to zero - no more pods can be scheduled
171
- continueEvictionCond := func (nodeInfo NodeInfo , totalAvailableUsage api.ReferencedResourceList ) bool {
172
- for name := range totalAvailableUsage {
173
- if totalAvailableUsage [name ].CmpInt64 (0 ) < 1 {
230
+ // stops the eviction process if the total available capacity sage has
231
+ // dropped to zero - no more pods can be scheduled. this will signalize
232
+ // to stop if any of the available resources has dropped to zero.
233
+ continueEvictionCond := func (_ NodeInfo , avail api.ReferencedResourceList ) bool {
234
+ for name := range avail {
235
+ if avail [name ].CmpInt64 (0 ) < 1 {
174
236
return false
175
237
}
176
238
}
177
-
178
239
return true
179
240
}
180
241
181
- // Sort the nodes by the usage in ascending order
182
- sortNodesByUsage (sourceNodes , true )
242
+ // sorts the nodes by the usage in ascending order.
243
+ sortNodesByUsage (lowNodes , true )
183
244
184
245
evictPodsFromSourceNodes (
185
246
ctx ,
186
247
h .args .EvictableNamespaces ,
187
- sourceNodes ,
188
- highNodes ,
248
+ lowNodes ,
249
+ schedulableNodes ,
189
250
h .handle .Evictor (),
190
251
evictions.EvictOptions {StrategyName : HighNodeUtilizationPluginName },
191
252
h .podFilter ,
@@ -197,27 +258,3 @@ func (h *HighNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fr
197
258
198
259
return nil
199
260
}
200
-
201
- func setDefaultForThresholds (thresholds , targetThresholds api.ResourceThresholds ) {
202
- // check if Pods/CPU/Mem are set, if not, set them to 100
203
- if _ , ok := thresholds [v1 .ResourcePods ]; ! ok {
204
- thresholds [v1 .ResourcePods ] = MaxResourcePercentage
205
- }
206
- if _ , ok := thresholds [v1 .ResourceCPU ]; ! ok {
207
- thresholds [v1 .ResourceCPU ] = MaxResourcePercentage
208
- }
209
- if _ , ok := thresholds [v1 .ResourceMemory ]; ! ok {
210
- thresholds [v1 .ResourceMemory ] = MaxResourcePercentage
211
- }
212
-
213
- // Default targetThreshold resource values to 100
214
- targetThresholds [v1 .ResourcePods ] = MaxResourcePercentage
215
- targetThresholds [v1 .ResourceCPU ] = MaxResourcePercentage
216
- targetThresholds [v1 .ResourceMemory ] = MaxResourcePercentage
217
-
218
- for name := range thresholds {
219
- if ! nodeutil .IsBasicResource (name ) {
220
- targetThresholds [name ] = MaxResourcePercentage
221
- }
222
- }
223
- }
0 commit comments