@@ -34,6 +34,7 @@ type ENICleaner struct {
34
34
EC2Wrapper EC2Wrapper
35
35
ClusterName string
36
36
Log logr.Logger
37
+ VPCID string
37
38
38
39
availableENIs map [string ]struct {}
39
40
shutdown bool
@@ -42,16 +43,22 @@ type ENICleaner struct {
42
43
}
43
44
44
45
var (
45
- vpcCniLeakedENICleanupCnt = prometheus .NewCounter (
46
- prometheus.CounterOpts {
47
- Name : "vpc_cni_created_leaked_eni_cleanup_count " ,
48
- Help : "The number of leaked ENIs created by VPC-CNI that is cleaned up by the controller " ,
46
+ vpccniAvailableENICnt = prometheus .NewGauge (
47
+ prometheus.GaugeOpts {
48
+ Name : "vpc_cni_created_available_eni_count " ,
49
+ Help : "The number of available ENIs created by VPC-CNI that controller will try to delete in each cleanup cycle " ,
49
50
},
50
51
)
51
- vpcrcLeakedENICleanupCnt = prometheus .NewCounter (
52
- prometheus.CounterOpts {
53
- Name : "vpc_rc_created_leaked_eni_cleanup_count" ,
54
- Help : "The number of leaked ENIs created by VPC-RC that is cleaned up by the controller" ,
52
+ vpcrcAvailableENICnt = prometheus .NewGauge (
53
+ prometheus.GaugeOpts {
54
+ Name : "vpc_rc_created_available_eni_count" ,
55
+ Help : "The number of available ENIs created by VPC-RC that controller will try to delete in each cleanup cycle" ,
56
+ },
57
+ )
58
+ leakedENICnt = prometheus .NewGauge (
59
+ prometheus.GaugeOpts {
60
+ Name : "leaked_eni_count" ,
61
+ Help : "The number of available ENIs that failed to be deleted by the controller in each cleanup cycle" ,
55
62
},
56
63
)
57
64
)
@@ -101,6 +108,9 @@ func (e *ENICleaner) Start(ctx context.Context) error {
101
108
// interval between cycle 1 and 2 and hence can be safely deleted. And we can also conclude that Interface 1 was
102
109
// created but not attached at the the time when 1st cycle ran and hence it should not be deleted.
103
110
func (e * ENICleaner ) cleanUpAvailableENIs () {
111
+ vpcrcAvailableCount := 0
112
+ vpccniAvailableCount := 0
113
+ leakedENICount := 0
104
114
describeNetworkInterfaceIp := & ec2.DescribeNetworkInterfacesInput {
105
115
Filters : []* ec2.Filter {
106
116
{
@@ -116,63 +126,65 @@ func (e *ENICleaner) cleanUpAvailableENIs() {
116
126
Values : aws .StringSlice ([]string {config .NetworkInterfaceOwnerTagValue ,
117
127
config .NetworkInterfaceOwnerVPCCNITagValue }),
118
128
},
129
+ {
130
+ Name : aws .String ("vpc-id" ),
131
+ Values : []* string {aws .String (e .VPCID )},
132
+ },
119
133
},
120
134
}
121
135
122
136
availableENIs := make (map [string ]struct {})
123
137
124
- for {
125
- describeNetworkInterfaceOp , err := e .EC2Wrapper .DescribeNetworkInterfaces (describeNetworkInterfaceIp )
126
- if err != nil {
127
- e .Log .Error (err , "failed to describe network interfaces, will retry" )
128
- return
129
- }
130
-
131
- for _ , networkInterface := range describeNetworkInterfaceOp .NetworkInterfaces {
132
- if _ , exists := e .availableENIs [* networkInterface .NetworkInterfaceId ]; exists {
133
- // Increment promethues metrics for number of leaked ENIs cleaned up
134
- if tagIdx := slices .IndexFunc (networkInterface .TagSet , func (tag * ec2.Tag ) bool {
135
- return * tag .Key == config .NetworkInterfaceOwnerTagKey
136
- }); tagIdx != - 1 {
137
- switch * networkInterface .TagSet [tagIdx ].Value {
138
- case config .NetworkInterfaceOwnerTagValue :
139
- vpcrcLeakedENICleanupCnt .Inc ()
140
- case config .NetworkInterfaceOwnerVPCCNITagValue :
141
- vpcCniLeakedENICleanupCnt .Inc ()
142
- default :
143
- // We will not hit this case as we only filter for above two tag values, adding it for any future use cases
144
- e .Log .Info ("found available ENI not created by VPC-CNI/VPC-RC" )
145
- }
146
- }
138
+ networkInterfaces , err := e .EC2Wrapper .DescribeNetworkInterfacesPages (describeNetworkInterfaceIp )
139
+ if err != nil {
140
+ e .Log .Error (err , "failed to describe network interfaces, cleanup will be retried in next cycle" )
141
+ return
142
+ }
147
143
148
- // The ENI in available state has been sitting for at least the eni clean up interval and it should
149
- // be removed
150
- _ , err := e .EC2Wrapper .DeleteNetworkInterface (& ec2.DeleteNetworkInterfaceInput {
151
- NetworkInterfaceId : networkInterface .NetworkInterfaceId ,
152
- })
153
- if err != nil {
154
- // Log and continue, if the ENI is still present it will be cleaned up in next 2 cycles
155
- e .Log .Error (err , "failed to delete the dangling network interface" ,
156
- "id" , * networkInterface .NetworkInterfaceId )
144
+ for _ , networkInterface := range networkInterfaces {
145
+ if _ , exists := e .availableENIs [* networkInterface .NetworkInterfaceId ]; exists {
146
+ // Increment promethues metrics for number of leaked ENIs cleaned up
147
+ if tagIdx := slices .IndexFunc (networkInterface .TagSet , func (tag * ec2.Tag ) bool {
148
+ return * tag .Key == config .NetworkInterfaceOwnerTagKey
149
+ }); tagIdx != - 1 {
150
+ switch * networkInterface .TagSet [tagIdx ].Value {
151
+ case config .NetworkInterfaceOwnerTagValue :
152
+ vpcrcAvailableCount += 1
153
+ case config .NetworkInterfaceOwnerVPCCNITagValue :
154
+ vpccniAvailableCount += 1
155
+ default :
156
+ // We should not hit this case as we only filter for relevant tag values, log error and continue if unexpected ENIs found
157
+ e .Log .Error (fmt .Errorf ("found available ENI not created by VPC-CNI/VPC-RC" ), "eniID" , * networkInterface .NetworkInterfaceId )
157
158
continue
158
159
}
159
- e .Log .Info ("deleted dangling ENI successfully" ,
160
- "eni id" , networkInterface .NetworkInterfaceId )
161
- } else {
162
- // Seeing the ENI for the first time, add it to the new list of available network interfaces
163
- availableENIs [* networkInterface .NetworkInterfaceId ] = struct {}{}
164
- e .Log .V (1 ).Info ("adding eni to to the map of available ENIs, will be removed if present in " +
165
- "next run too" , "id" , * networkInterface .NetworkInterfaceId )
166
160
}
167
- }
168
161
169
- if describeNetworkInterfaceOp .NextToken == nil {
170
- break
162
+ // The ENI in available state has been sitting for at least the eni clean up interval and it should
163
+ // be removed
164
+ _ , err := e .EC2Wrapper .DeleteNetworkInterface (& ec2.DeleteNetworkInterfaceInput {
165
+ NetworkInterfaceId : networkInterface .NetworkInterfaceId ,
166
+ })
167
+ if err != nil {
168
+ leakedENICount += 1
169
+ // Log and continue, if the ENI is still present it will be cleaned up in next 2 cycles
170
+ e .Log .Error (err , "failed to delete the dangling network interface" ,
171
+ "id" , * networkInterface .NetworkInterfaceId )
172
+ continue
173
+ }
174
+ e .Log .Info ("deleted dangling ENI successfully" ,
175
+ "eni id" , networkInterface .NetworkInterfaceId )
176
+ } else {
177
+ // Seeing the ENI for the first time, add it to the new list of available network interfaces
178
+ availableENIs [* networkInterface .NetworkInterfaceId ] = struct {}{}
179
+ e .Log .V (1 ).Info ("adding eni to to the map of available ENIs, will be removed if present in " +
180
+ "next run too" , "id" , * networkInterface .NetworkInterfaceId )
171
181
}
172
-
173
- describeNetworkInterfaceIp .NextToken = describeNetworkInterfaceOp .NextToken
174
182
}
175
183
184
+ // Update leaked ENI metrics
185
+ vpcrcAvailableENICnt .Set (float64 (vpcrcAvailableCount ))
186
+ vpccniAvailableENICnt .Set (float64 (vpccniAvailableCount ))
187
+ leakedENICnt .Set (float64 (leakedENICount ))
176
188
// Set the available ENIs to the list of ENIs seen in the current cycle
177
189
e .availableENIs = availableENIs
178
190
}
0 commit comments