18
18
*/
19
19
public class EC2RetentionStrategy extends RetentionStrategy <SlaveComputer > implements ExecutorListener {
20
20
21
- private static final int RE_CHECK_IN_MINUTE = 1 ;
21
+ private static final int RE_CHECK_IN_A_MINUTE = 1 ;
22
22
23
23
private static final Logger LOGGER = Logger .getLogger (EC2RetentionStrategy .class .getName ());
24
24
@@ -42,52 +42,62 @@ public long check(final SlaveComputer computer) {
42
42
if (cloud == null ) {
43
43
LOGGER .warning ("Cloud is null for computer " + fc .getDisplayName ()
44
44
+ ". This should be autofixed in a few minutes, if not please create an issue for the plugin" );
45
- return RE_CHECK_IN_MINUTE ;
45
+ return RE_CHECK_IN_A_MINUTE ;
46
46
}
47
47
48
48
// Ensure that the EC2FleetCloud cannot be mutated from under us while
49
49
// we're doing this check
50
50
// Ensure nobody provisions onto this node until we've done
51
51
// checking
52
52
boolean shouldAcceptTasks = fc .isAcceptingTasks ();
53
- boolean justTerminated = false ;
53
+ boolean markedForTermination = false ;
54
54
fc .setAcceptingTasks (false );
55
55
try {
56
56
if (fc .isIdle ()) {
57
- final EC2AgentTerminationReason reason ;
58
- if (isIdleForTooLong (cloud , fc )) {
59
- reason = EC2AgentTerminationReason .IDLE_FOR_TOO_LONG ;
57
+ Node node = fc .getNode ();
58
+ if (node == null ) {
59
+ return RE_CHECK_IN_A_MINUTE ;
60
+ }
61
+
62
+ EC2AgentTerminationReason reason ;
63
+ // Determine the reason for termination from specific to generic use cases.
64
+ // Reasoning for checking all cases of termination initiated by the plugin:
65
+ // A user-initiated change to cloud configuration creates a new EC2FleetCloud object, erasing class fields containing data like instance IDs to terminate.
66
+ // Hence, determine the reasons for termination here using persisted fields for accurate handling of termination.
67
+ if (fc .isMarkedForDeletion ()) {
68
+ reason = EC2AgentTerminationReason .AGENT_DELETED ;
60
69
} else if (cloud .hasExcessCapacity ()) {
61
70
reason = EC2AgentTerminationReason .EXCESS_CAPACITY ;
71
+ } else if (cloud instanceof EC2FleetCloud && !((EC2FleetCloud ) cloud ).hasUnlimitedUsesForNodes ()
72
+ && ((EC2FleetNode )node ).getUsesRemaining () <= 0 ) {
73
+ reason = EC2AgentTerminationReason .MAX_TOTAL_USES_EXHAUSTED ;
74
+ } else if (isIdleForTooLong (cloud , fc )) {
75
+ reason = EC2AgentTerminationReason .IDLE_FOR_TOO_LONG ;
62
76
} else {
63
- return 0 ;
64
- }
65
-
66
- // Find instance ID
67
- Node compNode = fc .getNode ();
68
- if (compNode == null ) {
69
- return 0 ;
77
+ return RE_CHECK_IN_A_MINUTE ;
70
78
}
71
79
72
- final String instanceId = compNode .getNodeName ();
73
- if (cloud .scheduleToTerminate (instanceId , false , reason )) {
80
+ final String instanceId = node .getNodeName ();
81
+ final boolean ignoreMinConstraints = reason .equals (EC2AgentTerminationReason .MAX_TOTAL_USES_EXHAUSTED );
82
+ if (cloud .scheduleToTerminate (instanceId , ignoreMinConstraints , reason )) {
74
83
// Instance successfully scheduled for termination, so no longer accept tasks (i.e. suspended)
75
84
shouldAcceptTasks = false ;
76
85
LOGGER .fine (String .format ("Suspended node %s after scheduling instance for termination, reason: %s." ,
77
- compNode .getDisplayName (), instanceId , reason ));
78
- justTerminated = true ;
86
+ node .getDisplayName (), instanceId , reason ));
87
+ markedForTermination = true ;
79
88
}
80
89
}
81
90
82
- if (cloud .isAlwaysReconnect () && !justTerminated && fc .isOffline () && !fc .isConnecting () && fc .isLaunchSupported ()) {
91
+ // if connection to the computer is lost for some reason, try to reconnect if configured to do so.
92
+ if (cloud .isAlwaysReconnect () && !markedForTermination && fc .isOffline () && !fc .isConnecting () && fc .isLaunchSupported ()) {
83
93
LOGGER .log (Level .INFO , "Reconnecting to instance: " + fc .getDisplayName ());
84
94
fc .tryReconnect ();
85
95
}
86
96
} finally {
87
97
fc .setAcceptingTasks (shouldAcceptTasks );
88
98
}
89
99
90
- return RE_CHECK_IN_MINUTE ;
100
+ return RE_CHECK_IN_A_MINUTE ;
91
101
}
92
102
93
103
@ Override
@@ -121,37 +131,56 @@ public void taskAccepted(Executor executor, Queue.Task task) {
121
131
final EC2FleetNode ec2FleetNode = computer .getNode ();
122
132
if (ec2FleetNode != null ) {
123
133
final int maxTotalUses = ec2FleetNode .getMaxTotalUses ();
124
- if (maxTotalUses <= -1 ) {
125
- LOGGER .fine ("maxTotalUses set to unlimited (" + ec2FleetNode .getMaxTotalUses () + ") for agent " + computer .getName ());
126
- } else if (maxTotalUses <= 1 ) {
127
- LOGGER .info ("maxTotalUses drained - suspending agent after current build " + computer .getName ());
128
- computer .setAcceptingTasks (false );
129
- } else {
130
- ec2FleetNode .setMaxTotalUses (ec2FleetNode .getMaxTotalUses () - 1 );
131
- LOGGER .info ("Agent " + computer .getName () + " has " + ec2FleetNode .getMaxTotalUses () + " builds left" );
134
+ if (maxTotalUses <= -1 ) { // unlimited uses
135
+ LOGGER .fine ("maxTotalUses set to unlimited (" + maxTotalUses + ") for agent " + computer .getName ());
136
+ } else { // limited uses
137
+ if (ec2FleetNode .getUsesRemaining () > 1 ) {
138
+ ec2FleetNode .decrementUsesRemaining ();
139
+ LOGGER .info ("Agent " + computer .getName () + " has " + ec2FleetNode .getUsesRemaining () + " builds left" );
140
+ } else if (ec2FleetNode .getUsesRemaining () == 1 ) { // current task should be the last task for this agent
141
+ LOGGER .info (String .format ("maxTotalUses drained - suspending agent %s after current build" , computer .getName ()));
142
+ computer .setAcceptingTasks (false );
143
+ ec2FleetNode .decrementUsesRemaining ();
144
+ } else {
145
+ // don't decrement when usesRemaining=0, as -1 has a special meaning.
146
+ LOGGER .warning (String .format ("Agent %s accepted a task after being suspended!!! MaxTotalUses: %d, uses remaining: %d" ,
147
+ computer .getName (), ec2FleetNode .getMaxTotalUses (), ec2FleetNode .getUsesRemaining ()));
148
+ }
132
149
}
133
150
}
134
151
}
135
152
}
136
153
137
154
@ Override
138
155
public void taskCompleted (Executor executor , Queue .Task task , long l ) {
139
- postJobAction (executor );
156
+ postJobAction (executor , null );
140
157
}
141
158
142
159
@ Override
143
160
public void taskCompletedWithProblems (Executor executor , Queue .Task task , long l , Throwable throwable ) {
144
- postJobAction (executor );
161
+ postJobAction (executor , throwable );
145
162
}
146
163
147
- private void postJobAction (Executor executor ) {
164
+ private void postJobAction (final Executor executor , final Throwable throwable ) {
165
+ if (throwable != null ) {
166
+ LOGGER .warning (String .format ("Build %s completed with problems on agent %s. TimeSpentInQueue: %ds, duration: %ds, problems: %s" ,
167
+ executor .getCurrentExecutable (), executor .getOwner ().getName (),
168
+ TimeUnit .MILLISECONDS .toSeconds (executor .getTimeSpentInQueue ()),
169
+ TimeUnit .MILLISECONDS .toSeconds (executor .getElapsedTime ()), throwable .getMessage ()));
170
+ } else {
171
+ LOGGER .info (String .format ("Build %s completed successfully on agent %s. TimeSpentInQueue: %ds, duration: %ds." ,
172
+ executor .getCurrentExecutable (), executor .getOwner ().getName (),
173
+ TimeUnit .MILLISECONDS .toSeconds (executor .getTimeSpentInQueue ()),
174
+ TimeUnit .MILLISECONDS .toSeconds (executor .getElapsedTime ())));
175
+ }
176
+
148
177
final EC2FleetNodeComputer computer = (EC2FleetNodeComputer ) executor .getOwner ();
149
- if (computer != null ) {
178
+ if (computer != null ) {
150
179
final EC2FleetNode ec2FleetNode = computer .getNode ();
151
180
if (ec2FleetNode != null ) {
152
181
final AbstractEC2FleetCloud cloud = ec2FleetNode .getCloud ();
153
182
if (computer .countBusy () <= 1 && !computer .isAcceptingTasks ()) {
154
- LOGGER .info ("Calling scheduleToTerminate for node " + ec2FleetNode .getNodeName () + " due to maxTotalUses (" + ec2FleetNode . getMaxTotalUses () + ") " );
183
+ LOGGER .info ("Calling scheduleToTerminate for node " + ec2FleetNode .getNodeName () + " due to exhausted maxTotalUses. " );
155
184
// Schedule instance for termination even if it breaches minSize and minSpareSize constraints
156
185
cloud .scheduleToTerminate (ec2FleetNode .getNodeName (), true , EC2AgentTerminationReason .MAX_TOTAL_USES_EXHAUSTED );
157
186
}
0 commit comments