Skip to content

Commit 03300ce

Browse files
Merge pull request #503 from gbhat618/update-clean-lost-node
Update clean lost node without requiring a instanceId
2 parents 4439803 + fec2061 commit 03300ce

File tree

9 files changed

+629
-136
lines changed

9 files changed

+629
-136
lines changed

src/main/java/com/google/jenkins/plugins/computeengine/CleanLostNodesWork.java

+86-16
Original file line numberDiff line numberDiff line change
@@ -16,17 +16,23 @@
1616

1717
package com.google.jenkins.plugins.computeengine;
1818

19-
import static com.google.jenkins.plugins.computeengine.ComputeEngineCloud.CLOUD_ID_LABEL_KEY;
2019
import static java.util.Collections.emptyList;
2120

2221
import com.google.api.services.compute.model.Instance;
22+
import com.google.common.annotations.VisibleForTesting;
2323
import com.google.common.collect.ImmutableMap;
24+
import com.google.jenkins.plugins.computeengine.client.ComputeClientV2;
2425
import hudson.Extension;
2526
import hudson.model.PeriodicWork;
2627
import hudson.model.Slave;
2728
import java.io.IOException;
29+
import java.security.GeneralSecurityException;
30+
import java.time.LocalDateTime;
31+
import java.time.OffsetDateTime;
32+
import java.time.ZoneOffset;
33+
import java.time.format.DateTimeFormatter;
34+
import java.time.temporal.ChronoUnit;
2835
import java.util.List;
29-
import java.util.Map;
3036
import java.util.Set;
3137
import java.util.logging.Level;
3238
import java.util.logging.Logger;
@@ -39,11 +45,27 @@
3945
@Symbol("cleanLostNodesWork")
4046
public class CleanLostNodesWork extends PeriodicWork {
4147
protected final Logger logger = Logger.getLogger(getClass().getName());
48+
public static final String NODE_IN_USE_LABEL_KEY = "jenkins_node_last_refresh";
49+
public static final long RECURRENCE_PERIOD = Long.parseLong(
50+
System.getProperty(CleanLostNodesWork.class.getName() + ".recurrencePeriod", String.valueOf(HOUR)));
51+
52+
@VisibleForTesting
53+
public static final int LOST_MULTIPLIER = 3;
54+
/**
55+
* The formatter for the label timestamp value as per google label format,
56+
* "The value can only contain lowercase letters, numeric characters, underscores and dashes.
57+
* The value can be at most 63 characters long. International characters are allowed".
58+
*/
59+
private static final DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy_MM_dd't'HH_mm_ss_SSS'z'");
4260

4361
/** {@inheritDoc} */
4462
@Override
4563
public long getRecurrencePeriod() {
46-
return HOUR;
64+
return RECURRENCE_PERIOD;
65+
}
66+
67+
public static String getLastRefreshLabelVal() {
68+
return formatter.format(OffsetDateTime.now(ZoneOffset.UTC));
4769
}
4870

4971
/** {@inheritDoc} */
@@ -55,22 +77,50 @@ protected void doRun() {
5577

5678
private void cleanCloud(ComputeEngineCloud cloud) {
5779
logger.log(Level.FINEST, "Cleaning cloud " + cloud.getCloudName());
58-
List<Instance> remoteInstances = findRemoteInstances(cloud);
80+
ComputeClientV2 clientV2;
81+
try {
82+
clientV2 = cloud.getClientV2();
83+
} catch (GeneralSecurityException | IOException ex) {
84+
logger.log(Level.WARNING, "Error getting clientV2 for cloud " + cloud.getCloudName(), ex);
85+
return;
86+
}
87+
List<Instance> remoteInstances = findRunningRemoteInstances(clientV2);
5988
Set<String> localInstances = findLocalInstances(cloud);
89+
if (!(localInstances.isEmpty() || remoteInstances.isEmpty())) {
90+
updateLocalInstancesLabel(clientV2, localInstances, remoteInstances);
91+
}
6092
remoteInstances.stream()
6193
.filter(remote -> isOrphaned(remote, localInstances))
6294
.forEach(remote -> terminateInstance(remote, cloud));
6395
}
6496

6597
private boolean isOrphaned(Instance remote, Set<String> localInstances) {
66-
String instanceName = remote.getName();
67-
logger.log(Level.FINEST, "Checking instance " + instanceName);
68-
return !localInstances.contains(instanceName);
98+
/* It is necessary to check if the remote instance is present in localInstances.
99+
The `remote` instance has an old timestamp because it hasn't been fetched again
100+
after the `updateLocalInstancesLabel` call, to avoid extra network calls.
101+
*/
102+
if (localInstances.contains(remote.getName())) {
103+
return false;
104+
}
105+
String nodeLastRefresh = remote.getLabels().get(NODE_IN_USE_LABEL_KEY);
106+
if (nodeLastRefresh == null) {
107+
return false;
108+
}
109+
OffsetDateTime lastRefresh =
110+
LocalDateTime.parse(nodeLastRefresh, formatter).atOffset(ZoneOffset.UTC);
111+
boolean isOrphan = lastRefresh
112+
.plus(RECURRENCE_PERIOD * LOST_MULTIPLIER, ChronoUnit.MILLIS)
113+
.isBefore(OffsetDateTime.now(ZoneOffset.UTC));
114+
logger.log(
115+
Level.FINEST,
116+
() -> "Instance " + remote.getName() + " last_refresh label value: " + nodeLastRefresh + ", isOrphan: "
117+
+ isOrphan);
118+
return isOrphan;
69119
}
70120

71121
private void terminateInstance(Instance remote, ComputeEngineCloud cloud) {
72122
String instanceName = remote.getName();
73-
logger.log(Level.INFO, "Remote instance " + instanceName + " not found locally, removing it");
123+
logger.log(Level.INFO, "Removing orphaned instance: " + instanceName);
74124
try {
75125
cloud.getClient().terminateInstanceAsync(cloud.getProjectId(), remote.getZone(), instanceName);
76126
} catch (IOException ex) {
@@ -86,27 +136,47 @@ private List<ComputeEngineCloud> getClouds() {
86136
}
87137

88138
private Set<String> findLocalInstances(ComputeEngineCloud cloud) {
89-
return Jenkins.get().getNodes().stream()
139+
var localInstances = Jenkins.get().getNodes().stream()
90140
.filter(node -> node instanceof ComputeEngineInstance)
91141
.map(node -> (ComputeEngineInstance) node)
92142
.filter(node -> node.getCloud().equals(cloud))
93143
.map(Slave::getNodeName)
94144
.collect(Collectors.toSet());
145+
logger.log(Level.FINEST, () -> "Found " + localInstances.size() + " local instances");
146+
return localInstances;
95147
}
96148

97-
private List<Instance> findRemoteInstances(ComputeEngineCloud cloud) {
98-
Map<String, String> filterLabel = ImmutableMap.of(CLOUD_ID_LABEL_KEY, cloud.getInstanceId());
149+
private List<Instance> findRunningRemoteInstances(ComputeClientV2 clientV2) {
99150
try {
100-
return cloud.getClient().listInstancesWithLabel(cloud.getProjectId(), filterLabel).stream()
101-
.filter(instance -> shouldTerminateStatus(instance.getStatus()))
102-
.collect(Collectors.toList());
151+
var remoteInstances = clientV2.retrieveInstanceByLabelKeyAndStatus(NODE_IN_USE_LABEL_KEY, "RUNNING");
152+
logger.log(Level.FINEST, () -> "Found " + remoteInstances.size() + " running remote instances");
153+
return remoteInstances;
103154
} catch (IOException ex) {
104155
logger.log(Level.WARNING, "Error finding remote instances", ex);
105156
return emptyList();
106157
}
107158
}
108159

109-
private boolean shouldTerminateStatus(String status) {
110-
return !status.equals("STOPPING");
160+
/**
161+
* Updates the label of the local instances to indicate they are still in use. The method makes N network calls
162+
* for N local instances, couldn't find any bulk update apis.
163+
*/
164+
private void updateLocalInstancesLabel(
165+
ComputeClientV2 clientV2, Set<String> localInstances, List<Instance> remoteInstances) {
166+
var remoteInstancesByName =
167+
remoteInstances.stream().collect(Collectors.toMap(Instance::getName, instance -> instance));
168+
var labelToUpdate = ImmutableMap.of(NODE_IN_USE_LABEL_KEY, getLastRefreshLabelVal());
169+
for (String instanceName : localInstances) {
170+
var remoteInstance = remoteInstancesByName.get(instanceName);
171+
if (remoteInstance == null) {
172+
continue;
173+
}
174+
try {
175+
clientV2.updateInstanceLabels(remoteInstance, labelToUpdate);
176+
logger.log(Level.FINEST, () -> "Updated label for instance " + instanceName);
177+
} catch (IOException e) {
178+
logger.log(Level.WARNING, "Error updating label for instance " + instanceName, e);
179+
}
180+
}
111181
}
112182
}

src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineCloud.java

+16
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import com.google.common.base.Strings;
3030
import com.google.common.collect.ImmutableMap;
3131
import com.google.jenkins.plugins.computeengine.client.ClientUtil;
32+
import com.google.jenkins.plugins.computeengine.client.ComputeClientV2;
3233
import com.google.jenkins.plugins.credentials.oauth.GoogleOAuth2Credentials;
3334
import edu.umd.cs.findbugs.annotations.NonNull;
3435
import hudson.Extension;
@@ -48,6 +49,7 @@
4849
import hudson.util.ListBoxModel;
4950
import java.io.IOException;
5051
import java.io.PrintStream;
52+
import java.security.GeneralSecurityException;
5153
import java.util.ArrayList;
5254
import java.util.Collection;
5355
import java.util.Iterator;
@@ -90,6 +92,7 @@ public class ComputeEngineCloud extends AbstractCloudImpl {
9092
private List<InstanceConfiguration> configurations;
9193

9294
private transient volatile ComputeClient client;
95+
private transient volatile ComputeClientV2 clientV2;
9396
private boolean noDelayProvisioning;
9497

9598
@DataBoundConstructor
@@ -161,6 +164,8 @@ protected Object readResolve() {
161164

162165
// Apply a label that identifies the name of this instance configuration
163166
configuration.appendLabel(CONFIG_LABEL_KEY, configuration.getNamePrefix());
167+
configuration.appendLabel(
168+
CleanLostNodesWork.NODE_IN_USE_LABEL_KEY, CleanLostNodesWork.getLastRefreshLabelVal());
164169
}
165170
}
166171
setInstanceId(instanceId);
@@ -209,6 +214,17 @@ public ComputeClient getClient() {
209214
return client;
210215
}
211216

217+
public ComputeClientV2 getClientV2() throws IOException, GeneralSecurityException {
218+
if (clientV2 == null) {
219+
synchronized (this) {
220+
if (clientV2 == null) {
221+
clientV2 = ClientUtil.createComputeClientV2(projectId, credentialsId);
222+
}
223+
}
224+
}
225+
return clientV2;
226+
}
227+
212228
/**
213229
* Set configurations for this cloud.
214230
*

src/main/java/com/google/jenkins/plugins/computeengine/client/ClientUtil.java

+17
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@
44
import com.cloudbees.plugins.credentials.CredentialsProvider;
55
import com.cloudbees.plugins.credentials.domains.DomainRequirement;
66
import com.google.api.client.auth.oauth2.Credential;
7+
import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport;
78
import com.google.api.client.http.HttpTransport;
9+
import com.google.api.client.json.jackson2.JacksonFactory;
10+
import com.google.api.services.compute.Compute;
811
import com.google.cloud.graphite.platforms.plugin.client.ClientFactory;
912
import com.google.common.base.Preconditions;
1013
import com.google.common.base.Strings;
@@ -19,6 +22,7 @@
1922
import java.security.GeneralSecurityException;
2023
import java.util.List;
2124
import java.util.Optional;
25+
import jenkins.model.Jenkins;
2226

2327
/** Utilities for using the gcp-plugin-core clients. */
2428
public class ClientUtil {
@@ -88,4 +92,17 @@ private static GoogleRobotCredentials getRobotCredentials(
8892
private static Credential getGoogleCredential(GoogleRobotCredentials credentials) throws GeneralSecurityException {
8993
return credentials.getGoogleCredential(new ComputeEngineScopeRequirement());
9094
}
95+
96+
public static ComputeClientV2 createComputeClientV2(String projectId, String credentialsId)
97+
throws GeneralSecurityException, IOException {
98+
Credential httpRequestInitializer = ClientUtil.getGoogleCredential(
99+
ClientUtil.getRobotCredentials(Jenkins.get(), ImmutableList.of(), credentialsId));
100+
Compute compute = new Compute.Builder(
101+
GoogleNetHttpTransport.newTrustedTransport(), new JacksonFactory(), httpRequestInitializer)
102+
.setGoogleClientRequestInitializer(request ->
103+
request.setRequestHeaders(request.getRequestHeaders().setUserAgent(APPLICATION_NAME)))
104+
.setApplicationName(ClientUtil.APPLICATION_NAME)
105+
.build();
106+
return new ComputeClientV2(projectId, compute);
107+
}
91108
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
package com.google.jenkins.plugins.computeengine.client;
2+
3+
import com.google.api.services.compute.Compute;
4+
import com.google.api.services.compute.model.Instance;
5+
import com.google.api.services.compute.model.InstancesScopedList;
6+
import com.google.api.services.compute.model.InstancesSetLabelsRequest;
7+
import java.io.IOException;
8+
import java.util.List;
9+
import java.util.Map;
10+
import java.util.Objects;
11+
import java.util.stream.Collectors;
12+
import lombok.Getter;
13+
14+
/**
15+
* Extends {@link com.google.cloud.graphite.platforms.plugin.client.ComputeClient} with additional functionalities.
16+
* <p>This class serves as a venue for implementing features not available in the archived Graphite Java library
17+
* (<a href="https://github.com/GoogleCloudPlatform/gcp-plugin-core-java">gcp-plugin-core-java</a>, last updated in December 2019).
18+
* Consideration for the gradual evolution of this class is suggested, including the re-implementation of methods
19+
* currently utilized from the Graphite library, to ensure dependency solely on the Google API Java Client Services
20+
* library (<a href="https://github.com/googleapis/google-api-java-client-services">google-api-java-client-services</a>).
21+
* This approach aims to eventually eliminate the reliance on the Graphite library.
22+
*/
23+
public class ComputeClientV2 {
24+
25+
private final String projectId;
26+
27+
@Getter
28+
private final Compute compute;
29+
30+
public ComputeClientV2(String projectId, Compute compute) {
31+
this.projectId = projectId;
32+
this.compute = compute;
33+
}
34+
35+
/**
36+
* Updates the labels of a specified {@code instance} by merging or replacing them with {@code newLabels}.
37+
* <p>This method adds any new labels found in {@code newLabels} to the instance's existing labels and updates
38+
* the values of any existing labels if they are also present in {@code newLabels}. Labels existing on the instance
39+
* that are not in {@code newLabels} remain unchanged. This operation can only result in the addition of new
40+
* labels or the modification of existing ones.
41+
*
42+
* @param instance the instance whose labels are to be updated; must not be {@code null}
43+
* @param newLabels the new labels to be merged with or replace the existing labels of the instance; must not be {@code null}
44+
* @throws IOException if an I/O error occurs during the label update process.
45+
*/
46+
public void updateInstanceLabels(Instance instance, Map<String, String> newLabels) throws IOException {
47+
var allLabels = instance.getLabels();
48+
allLabels.putAll(newLabels);
49+
var labelsRequest = new InstancesSetLabelsRequest()
50+
.setLabels(allLabels)
51+
.setLabelFingerprint(instance.getLabelFingerprint());
52+
String zoneLink = instance.getZone();
53+
String zone = zoneLink.substring(zoneLink.lastIndexOf("/") + 1);
54+
compute.instances()
55+
.setLabels(projectId, zone, instance.getName(), labelsRequest)
56+
.execute();
57+
}
58+
59+
/**
60+
* Fetches instances by label key existence and status.
61+
* <p>Applies Google Compute Engine aggregated list syntax for filtering:
62+
* <a href="https://cloud.google.com/compute/docs/reference/rest/v1/instances/aggregatedList">aggregatedList API</a>.
63+
*
64+
* @param key the non-empty label key to filter by.
65+
* @param status the instance status (RUNNING, STOPPING, etc.) as defined in:
66+
* <a href="https://cloud.google.com/compute/docs/instances/instance-lifecycle#instance-states">Instance States</a>.
67+
* @return List of {@link Instance} matching criteria, or empty list if none.
68+
* @throws IOException for communication issues with Compute Engine API.
69+
*/
70+
public List<Instance> retrieveInstanceByLabelKeyAndStatus(String key, String status) throws IOException {
71+
String filter = "labels." + key + ":*" + " AND status=" + status;
72+
var response =
73+
compute.instances().aggregatedList(projectId).setFilter(filter).execute();
74+
var items = response.getItems();
75+
if (items == null) {
76+
return List.of();
77+
}
78+
return items.values().stream()
79+
.map(InstancesScopedList::getInstances)
80+
.filter(Objects::nonNull)
81+
.flatMap(List::stream)
82+
.collect(Collectors.toList());
83+
}
84+
}

0 commit comments

Comments
 (0)