@@ -565,6 +565,7 @@ func (sd *ScaleDown) TryToScaleDown(allNodes []*apiv1.Node, pods []*apiv1.Pod, p
565
565
nodesWithoutMaster := filterOutMasters (allNodes , pods )
566
566
candidates := make ([]* apiv1.Node , 0 )
567
567
readinessMap := make (map [string ]bool )
568
+ candidateNodeGroups := make (map [string ]cloudprovider.NodeGroup )
568
569
569
570
resourceLimiter , errCP := sd .context .CloudProvider .GetResourceLimiter ()
570
571
if errCP != nil {
@@ -635,6 +636,7 @@ func (sd *ScaleDown) TryToScaleDown(allNodes []*apiv1.Node, pods []*apiv1.Pod, p
635
636
}
636
637
637
638
candidates = append (candidates , node )
639
+ candidateNodeGroups [node .Name ] = nodeGroup
638
640
}
639
641
}
640
642
if len (candidates ) == 0 {
@@ -649,7 +651,7 @@ func (sd *ScaleDown) TryToScaleDown(allNodes []*apiv1.Node, pods []*apiv1.Pod, p
649
651
if len (emptyNodes ) > 0 {
650
652
nodeDeletionStart := time .Now ()
651
653
confirmation := make (chan errors.AutoscalerError , len (emptyNodes ))
652
- sd .scheduleDeleteEmptyNodes (emptyNodes , sd .context .ClientSet , sd .context .Recorder , readinessMap , confirmation )
654
+ sd .scheduleDeleteEmptyNodes (emptyNodes , sd .context .ClientSet , sd .context .Recorder , readinessMap , candidateNodeGroups , confirmation )
653
655
err := sd .waitForEmptyNodesDeleted (emptyNodes , confirmation )
654
656
nodeDeletionDuration = time .Now ().Sub (nodeDeletionStart )
655
657
if err == nil {
@@ -701,10 +703,11 @@ func (sd *ScaleDown) TryToScaleDown(allNodes []*apiv1.Node, pods []*apiv1.Pod, p
701
703
glog .Errorf ("Failed to delete %s: %v" , toRemove .Node .Name , err )
702
704
return
703
705
}
706
+ nodeGroup := candidateNodeGroups [toRemove .Node .Name ]
704
707
if readinessMap [toRemove .Node .Name ] {
705
- metrics .RegisterScaleDown (1 , gpu .GetGpuTypeForMetrics (toRemove .Node ), metrics .Underutilized )
708
+ metrics .RegisterScaleDown (1 , gpu .GetGpuTypeForMetrics (toRemove .Node , nodeGroup ), metrics .Underutilized )
706
709
} else {
707
- metrics .RegisterScaleDown (1 , gpu .GetGpuTypeForMetrics (toRemove .Node ), metrics .Unready )
710
+ metrics .RegisterScaleDown (1 , gpu .GetGpuTypeForMetrics (toRemove .Node , nodeGroup ), metrics .Unready )
708
711
}
709
712
}()
710
713
@@ -784,7 +787,8 @@ func getEmptyNodes(candidates []*apiv1.Node, pods []*apiv1.Pod, maxEmptyBulkDele
784
787
}
785
788
786
789
func (sd * ScaleDown ) scheduleDeleteEmptyNodes (emptyNodes []* apiv1.Node , client kube_client.Interface ,
787
- recorder kube_record.EventRecorder , readinessMap map [string ]bool , confirmation chan errors.AutoscalerError ) {
790
+ recorder kube_record.EventRecorder , readinessMap map [string ]bool ,
791
+ candidateNodeGroups map [string ]cloudprovider.NodeGroup , confirmation chan errors.AutoscalerError ) {
788
792
for _ , node := range emptyNodes {
789
793
glog .V (0 ).Infof ("Scale-down: removing empty node %s" , node .Name )
790
794
sd .context .LogRecorder .Eventf (apiv1 .EventTypeNormal , "ScaleDownEmpty" , "Scale-down: removing empty node %s" , node .Name )
@@ -809,10 +813,11 @@ func (sd *ScaleDown) scheduleDeleteEmptyNodes(emptyNodes []*apiv1.Node, client k
809
813
deleteErr = deleteNodeFromCloudProvider (nodeToDelete , sd .context .CloudProvider ,
810
814
sd .context .Recorder , sd .clusterStateRegistry )
811
815
if deleteErr == nil {
816
+ nodeGroup := candidateNodeGroups [nodeToDelete .Name ]
812
817
if readinessMap [nodeToDelete .Name ] {
813
- metrics .RegisterScaleDown (1 , gpu .GetGpuTypeForMetrics (nodeToDelete ), metrics .Empty )
818
+ metrics .RegisterScaleDown (1 , gpu .GetGpuTypeForMetrics (nodeToDelete , nodeGroup ), metrics .Empty )
814
819
} else {
815
- metrics .RegisterScaleDown (1 , gpu .GetGpuTypeForMetrics (nodeToDelete ), metrics .Unready )
820
+ metrics .RegisterScaleDown (1 , gpu .GetGpuTypeForMetrics (nodeToDelete , nodeGroup ), metrics .Unready )
816
821
}
817
822
}
818
823
confirmation <- deleteErr
0 commit comments