Skip to content

Commit 749e81c

Browse files
Check whether pod matches the inter-pod anti-affinity of another Pod in a given Node in NodeFit() (#1356)
* Check if Pod matches inter-pod anti-affinity of other pod on node as part of NodeFit() * Add unit tests for checking inter-pod anti-affinity match in NodeFit() * Export setPodAntiAffinity() helper func to test utils * Add docs for inter-pod anti-affinity in README * Refactor logic for inter-pod anti-affinity to use in multiple pkgs * Move logic for finding match between pods with antiaffinity out of framework to reuse in other pkgs * Move interpod antiaffinity funcs to pkg/utils/predicates.go * Add unit tests for inter-pod anti-affinity check * Test logic in GroupByNodeName * Test NodeFit() case where pods matches inter-pod anti-affinity * Test for inter-pod anti-affinity pods match terms, have label selector * NodeFit inter-pod anti-affinity check returns early if affinity spec not set
1 parent dc2cf72 commit 749e81c

File tree

12 files changed

+328
-132
lines changed

12 files changed

+328
-132
lines changed

README.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -881,6 +881,7 @@ profiles:
881881
- `nodeAffinity` on the pod
882882
- Resource `requests` made by the pod and the resources available on other nodes
883883
- Whether any of the other nodes are marked as `unschedulable`
884+
- Any `podAntiAffinity` between the pod and the pods on the other nodes
884885

885886
E.g.
886887

@@ -902,7 +903,7 @@ profiles:
902903
- "PodLifeTime"
903904
```
904905

905-
Note that node fit filtering references the current pod spec, and not that of it's owner.
906+
Note that node fit filtering references the current pod spec, and not that of its owner.
906907
Thus, if the pod is owned by a ReplicationController (and that ReplicationController was modified recently),
907908
the pod may be running with an outdated spec, which the descheduler will reference when determining node fit.
908909
This is expected behavior as the descheduler is a "best-effort" mechanism.

pkg/descheduler/node/node.go

+26
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,13 @@ func NodeFit(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, node *v
130130
errors = append(errors, fmt.Errorf("node is not schedulable"))
131131
}
132132

133+
// Check if pod matches inter-pod anti-affinity rule of pod on node
134+
if match, err := podMatchesInterPodAntiAffinity(nodeIndexer, pod, node); err != nil {
135+
errors = append(errors, err)
136+
} else if match {
137+
errors = append(errors, fmt.Errorf("pod matches inter-pod anti-affinity rule of other pod on node"))
138+
}
139+
133140
return errors
134141
}
135142

@@ -323,3 +330,22 @@ func PodMatchNodeSelector(pod *v1.Pod, node *v1.Node) bool {
323330
}
324331
return matches
325332
}
333+
334+
// podMatchesInterPodAntiAffinity checks if the pod matches the anti-affinity rule
335+
// of another pod that is already on the given node.
336+
// If a match is found, it returns true.
337+
func podMatchesInterPodAntiAffinity(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, node *v1.Node) (bool, error) {
338+
if pod.Spec.Affinity == nil || pod.Spec.Affinity.PodAntiAffinity == nil {
339+
return false, nil
340+
}
341+
342+
podsOnNode, err := podutil.ListPodsOnANode(node.Name, nodeIndexer, nil)
343+
if err != nil {
344+
return false, fmt.Errorf("error listing all pods: %v", err)
345+
}
346+
347+
podsInANamespace := podutil.GroupByNamespace(podsOnNode)
348+
nodeMap := utils.CreateNodeMap([]*v1.Node{node})
349+
350+
return utils.CheckPodsWithAntiAffinityExist(pod, podsInANamespace, nodeMap), nil
351+
}

pkg/descheduler/node/node_test.go

+23-2
Original file line numberDiff line numberDiff line change
@@ -754,7 +754,11 @@ func TestPodFitsAnyOtherNode(t *testing.T) {
754754
}
755755

756756
func TestNodeFit(t *testing.T) {
757-
node := test.BuildTestNode("node", 64000, 128*1000*1000*1000, 2, nil)
757+
node := test.BuildTestNode("node", 64000, 128*1000*1000*1000, 2, func(node *v1.Node) {
758+
node.ObjectMeta.Labels = map[string]string{
759+
"region": "main-region",
760+
}
761+
})
758762
tests := []struct {
759763
description string
760764
pod *v1.Pod
@@ -781,6 +785,22 @@ func TestNodeFit(t *testing.T) {
781785
},
782786
err: errors.New("insufficient pods"),
783787
},
788+
{
789+
description: "matches inter-pod anti-affinity rule of pod on node",
790+
pod: test.PodWithPodAntiAffinity(test.BuildTestPod("p1", 1000, 1000, node.Name, nil), "foo", "bar"),
791+
node: node,
792+
podsOnNode: []*v1.Pod{
793+
test.PodWithPodAntiAffinity(test.BuildTestPod("p2", 1000, 1000, node.Name, nil), "foo", "bar"),
794+
},
795+
err: errors.New("pod matches inter-pod anti-affinity rule of other pod on node"),
796+
},
797+
{
798+
description: "pod fits on node",
799+
pod: test.BuildTestPod("p1", 1000, 1000, "", func(pod *v1.Pod) {}),
800+
node: node,
801+
podsOnNode: []*v1.Pod{},
802+
err: nil,
803+
},
784804
}
785805

786806
for _, tc := range tests {
@@ -804,7 +824,8 @@ func TestNodeFit(t *testing.T) {
804824

805825
sharedInformerFactory.Start(ctx.Done())
806826
sharedInformerFactory.WaitForCacheSync(ctx.Done())
807-
if errs := NodeFit(getPodsAssignedToNode, tc.pod, tc.node); (len(errs) == 0 && tc.err != nil) || errs[0].Error() != tc.err.Error() {
827+
errs := NodeFit(getPodsAssignedToNode, tc.pod, tc.node)
828+
if (len(errs) == 0 && tc.err != nil) || (len(errs) > 0 && errs[0].Error() != tc.err.Error()) {
808829
t.Errorf("Test %#v failed, got %v, expect %v", tc.description, errs, tc.err)
809830
}
810831
})

pkg/descheduler/pod/pods.go

+9
Original file line numberDiff line numberDiff line change
@@ -257,3 +257,12 @@ func SortPodsBasedOnAge(pods []*v1.Pod) {
257257
return pods[i].CreationTimestamp.Before(&pods[j].CreationTimestamp)
258258
})
259259
}
260+
261+
func GroupByNodeName(pods []*v1.Pod) map[string][]*v1.Pod {
262+
m := make(map[string][]*v1.Pod)
263+
for i := 0; i < len(pods); i++ {
264+
pod := pods[i]
265+
m[pod.Spec.NodeName] = append(m[pod.Spec.NodeName], pod)
266+
}
267+
return m
268+
}

pkg/descheduler/pod/pods_test.go

+64
Original file line numberDiff line numberDiff line change
@@ -176,3 +176,67 @@ func TestSortPodsBasedOnAge(t *testing.T) {
176176
}
177177
}
178178
}
179+
180+
func TestGroupByNodeName(t *testing.T) {
181+
tests := []struct {
182+
name string
183+
pods []*v1.Pod
184+
expMap map[string][]*v1.Pod
185+
}{
186+
{
187+
name: "list of pods is empty",
188+
pods: []*v1.Pod{},
189+
expMap: map[string][]*v1.Pod{},
190+
},
191+
{
192+
name: "pods are on same node",
193+
pods: []*v1.Pod{
194+
{Spec: v1.PodSpec{
195+
NodeName: "node1",
196+
}},
197+
{Spec: v1.PodSpec{
198+
NodeName: "node1",
199+
}},
200+
},
201+
expMap: map[string][]*v1.Pod{"node1": {
202+
{Spec: v1.PodSpec{
203+
NodeName: "node1",
204+
}},
205+
{Spec: v1.PodSpec{
206+
NodeName: "node1",
207+
}},
208+
}},
209+
},
210+
{
211+
name: "pods are on different nodes",
212+
pods: []*v1.Pod{
213+
{Spec: v1.PodSpec{
214+
NodeName: "node1",
215+
}},
216+
{Spec: v1.PodSpec{
217+
NodeName: "node2",
218+
}},
219+
},
220+
expMap: map[string][]*v1.Pod{
221+
"node1": {
222+
{Spec: v1.PodSpec{
223+
NodeName: "node1",
224+
}},
225+
},
226+
"node2": {
227+
{Spec: v1.PodSpec{
228+
NodeName: "node2",
229+
}},
230+
},
231+
},
232+
},
233+
}
234+
for _, test := range tests {
235+
t.Run(test.name, func(t *testing.T) {
236+
resultMap := GroupByNodeName(test.pods)
237+
if !reflect.DeepEqual(resultMap, test.expMap) {
238+
t.Errorf("Expected %v node map, got %v", test.expMap, resultMap)
239+
}
240+
})
241+
}
242+
}

pkg/framework/plugins/removepodsviolatinginterpodantiaffinity/pod_antiaffinity.go

+13-96
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ import (
2828
"sigs.k8s.io/descheduler/pkg/utils"
2929

3030
v1 "k8s.io/api/core/v1"
31-
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3231
"k8s.io/klog/v2"
3332
)
3433

@@ -86,8 +85,8 @@ func (d *RemovePodsViolatingInterPodAntiAffinity) Deschedule(ctx context.Context
8685
}
8786

8887
podsInANamespace := podutil.GroupByNamespace(pods)
89-
podsOnANode := groupByNodeName(pods)
90-
nodeMap := createNodeMap(nodes)
88+
podsOnANode := podutil.GroupByNodeName(pods)
89+
nodeMap := utils.CreateNodeMap(nodes)
9190

9291
loop:
9392
for _, node := range nodes {
@@ -97,15 +96,17 @@ loop:
9796
podutil.SortPodsBasedOnPriorityLowToHigh(pods)
9897
totalPods := len(pods)
9998
for i := 0; i < totalPods; i++ {
100-
if checkPodsWithAntiAffinityExist(pods[i], podsInANamespace, nodeMap) && d.handle.Evictor().Filter(pods[i]) && d.handle.Evictor().PreEvictionFilter(pods[i]) {
101-
if d.handle.Evictor().Evict(ctx, pods[i], evictions.EvictOptions{StrategyName: PluginName}) {
102-
// Since the current pod is evicted all other pods which have anti-affinity with this
103-
// pod need not be evicted.
104-
// Update allPods.
105-
podsInANamespace = removePodFromNamespaceMap(pods[i], podsInANamespace)
106-
pods = append(pods[:i], pods[i+1:]...)
107-
i--
108-
totalPods--
99+
if utils.CheckPodsWithAntiAffinityExist(pods[i], podsInANamespace, nodeMap) {
100+
if d.handle.Evictor().Filter(pods[i]) && d.handle.Evictor().PreEvictionFilter(pods[i]) {
101+
if d.handle.Evictor().Evict(ctx, pods[i], evictions.EvictOptions{StrategyName: PluginName}) {
102+
// Since the current pod is evicted all other pods which have anti-affinity with this
103+
// pod need not be evicted.
104+
// Update allPods.
105+
podsInANamespace = removePodFromNamespaceMap(pods[i], podsInANamespace)
106+
pods = append(pods[:i], pods[i+1:]...)
107+
i--
108+
totalPods--
109+
}
109110
}
110111
}
111112
if d.handle.Evictor().NodeLimitExceeded(node) {
@@ -130,87 +131,3 @@ func removePodFromNamespaceMap(podToRemove *v1.Pod, podMap map[string][]*v1.Pod)
130131
}
131132
return podMap
132133
}
133-
134-
func groupByNodeName(pods []*v1.Pod) map[string][]*v1.Pod {
135-
m := make(map[string][]*v1.Pod)
136-
for i := 0; i < len(pods); i++ {
137-
pod := pods[i]
138-
m[pod.Spec.NodeName] = append(m[pod.Spec.NodeName], pod)
139-
}
140-
return m
141-
}
142-
143-
func createNodeMap(nodes []*v1.Node) map[string]*v1.Node {
144-
m := make(map[string]*v1.Node, len(nodes))
145-
for _, node := range nodes {
146-
m[node.GetName()] = node
147-
}
148-
return m
149-
}
150-
151-
// checkPodsWithAntiAffinityExist checks if there are other pods on the node that the current pod cannot tolerate.
152-
func checkPodsWithAntiAffinityExist(pod *v1.Pod, pods map[string][]*v1.Pod, nodeMap map[string]*v1.Node) bool {
153-
affinity := pod.Spec.Affinity
154-
if affinity != nil && affinity.PodAntiAffinity != nil {
155-
for _, term := range getPodAntiAffinityTerms(affinity.PodAntiAffinity) {
156-
namespaces := utils.GetNamespacesFromPodAffinityTerm(pod, &term)
157-
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
158-
if err != nil {
159-
klog.ErrorS(err, "Unable to convert LabelSelector into Selector")
160-
return false
161-
}
162-
for namespace := range namespaces {
163-
for _, existingPod := range pods[namespace] {
164-
if existingPod.Name != pod.Name && utils.PodMatchesTermsNamespaceAndSelector(existingPod, namespaces, selector) {
165-
node, ok := nodeMap[pod.Spec.NodeName]
166-
if !ok {
167-
continue
168-
}
169-
nodeHavingExistingPod, ok := nodeMap[existingPod.Spec.NodeName]
170-
if !ok {
171-
continue
172-
}
173-
if hasSameLabelValue(node, nodeHavingExistingPod, term.TopologyKey) {
174-
klog.V(1).InfoS("Found Pods violating PodAntiAffinity", "pod to evicted", klog.KObj(pod))
175-
return true
176-
}
177-
}
178-
}
179-
}
180-
}
181-
}
182-
return false
183-
}
184-
185-
func hasSameLabelValue(node1, node2 *v1.Node, key string) bool {
186-
if node1.Name == node2.Name {
187-
return true
188-
}
189-
node1Labels := node1.Labels
190-
if node1Labels == nil {
191-
return false
192-
}
193-
node2Labels := node2.Labels
194-
if node2Labels == nil {
195-
return false
196-
}
197-
value1, ok := node1Labels[key]
198-
if !ok {
199-
return false
200-
}
201-
value2, ok := node2Labels[key]
202-
if !ok {
203-
return false
204-
}
205-
return value1 == value2
206-
}
207-
208-
// getPodAntiAffinityTerms gets the antiaffinity terms for the given pod.
209-
func getPodAntiAffinityTerms(podAntiAffinity *v1.PodAntiAffinity) (terms []v1.PodAffinityTerm) {
210-
if podAntiAffinity != nil {
211-
if len(podAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution) != 0 {
212-
terms = podAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution
213-
}
214-
}
215-
return terms
216-
}

pkg/framework/plugins/removepodsviolatinginterpodantiaffinity/pod_antiaffinity_test.go

+8-29
Original file line numberDiff line numberDiff line change
@@ -96,14 +96,14 @@ func TestPodAntiAffinity(t *testing.T) {
9696
test.SetNormalOwnerRef(p11)
9797

9898
// set pod anti affinity
99-
setPodAntiAffinity(p1, "foo", "bar")
100-
setPodAntiAffinity(p3, "foo", "bar")
101-
setPodAntiAffinity(p4, "foo", "bar")
102-
setPodAntiAffinity(p5, "foo1", "bar1")
103-
setPodAntiAffinity(p6, "foo1", "bar1")
104-
setPodAntiAffinity(p7, "foo", "bar")
105-
setPodAntiAffinity(p9, "foo", "bar")
106-
setPodAntiAffinity(p10, "foo", "bar")
99+
test.SetPodAntiAffinity(p1, "foo", "bar")
100+
test.SetPodAntiAffinity(p3, "foo", "bar")
101+
test.SetPodAntiAffinity(p4, "foo", "bar")
102+
test.SetPodAntiAffinity(p5, "foo1", "bar1")
103+
test.SetPodAntiAffinity(p6, "foo1", "bar1")
104+
test.SetPodAntiAffinity(p7, "foo", "bar")
105+
test.SetPodAntiAffinity(p9, "foo", "bar")
106+
test.SetPodAntiAffinity(p10, "foo", "bar")
107107

108108
// set pod priority
109109
test.SetPodPriority(p5, 100)
@@ -284,24 +284,3 @@ func TestPodAntiAffinity(t *testing.T) {
284284
})
285285
}
286286
}
287-
288-
func setPodAntiAffinity(inputPod *v1.Pod, labelKey, labelValue string) {
289-
inputPod.Spec.Affinity = &v1.Affinity{
290-
PodAntiAffinity: &v1.PodAntiAffinity{
291-
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
292-
{
293-
LabelSelector: &metav1.LabelSelector{
294-
MatchExpressions: []metav1.LabelSelectorRequirement{
295-
{
296-
Key: labelKey,
297-
Operator: metav1.LabelSelectorOpIn,
298-
Values: []string{labelValue},
299-
},
300-
},
301-
},
302-
TopologyKey: "region",
303-
},
304-
},
305-
},
306-
}
307-
}

0 commit comments

Comments
 (0)