Skip to content

Commit dd7f980

Browse files
sceleDrew Macrae
authored and
Drew Macrae
committed
Implement extra resources for test actions
Add support for user-specified resource types in the resource manager. This generalizes the CPU, RAM and "test count" resource support for other resource types such as the number of GPUs, available GPU memory, the number of embedded devices connected to the host, etc. The available amount of extra resources can be specified using the new --local_extra_resources=<resourcename>=<amount> command line flag, which is analoguous to the existing --local_cpu_resources and --local_memory_resources flags. Tests can then declare the amount of extra resources they need by using a "resources:<resourcename>:<amount>" tag.
1 parent 3b33757 commit dd7f980

File tree

10 files changed

+260
-24
lines changed

10 files changed

+260
-24
lines changed

src/main/java/com/google/devtools/build/lib/actions/BUILD

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ java_library(
128128
"//third_party:flogger",
129129
"//third_party:guava",
130130
"//third_party:jsr305",
131+
"//third_party:rxjava3",
131132
"//third_party/protobuf:protobuf_java",
132133
],
133134
)
@@ -311,6 +312,7 @@ java_library(
311312
"//third_party:flogger",
312313
"//third_party:guava",
313314
"//third_party:jsr305",
315+
"//third_party:rxjava3",
314316
],
315317
)
316318

src/main/java/com/google/devtools/build/lib/actions/ExecutionRequirements.java

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,35 @@ public String parseIfMatches(String tag) throws ValidationException {
154154
return null;
155155
});
156156

157+
/** How many extra resources an action requires for execution. */
158+
public static final ParseableRequirement RESOURCES =
159+
ParseableRequirement.create(
160+
"resources:<str>:<int>",
161+
Pattern.compile("resources:(.+:.+)"),
162+
s -> {
163+
Preconditions.checkNotNull(s);
164+
165+
int splitIndex = s.indexOf(":");
166+
String resourceCount = s.substring(splitIndex+1);
167+
int value;
168+
try {
169+
value = Integer.parseInt(resourceCount);
170+
} catch (NumberFormatException e) {
171+
return "can't be parsed as an integer";
172+
}
173+
174+
// De-and-reserialize & compare to only allow canonical integer formats.
175+
if (!Integer.toString(value).equals(resourceCount)) {
176+
return "must be in canonical format (e.g. '4' instead of '+04')";
177+
}
178+
179+
if (value < 1) {
180+
return "can't be zero or negative";
181+
}
182+
183+
return null;
184+
});
185+
157186
/** If an action supports running in persistent worker mode. */
158187
public static final String SUPPORTS_WORKERS = "supports-workers";
159188

src/main/java/com/google/devtools/build/lib/actions/ResourceManager.java

Lines changed: 64 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,12 @@
2727
import com.google.devtools.build.lib.worker.WorkerPool;
2828
import java.io.IOException;
2929
import java.util.Deque;
30+
import java.util.HashMap;
31+
import java.util.HashSet;
3032
import java.util.Iterator;
3133
import java.util.LinkedList;
34+
import java.util.Map;
35+
import java.util.Set;
3236
import java.util.concurrent.CountDownLatch;
3337
import javax.annotation.Nullable;
3438

@@ -171,13 +175,19 @@ public static ResourceManager instance() {
171175
// definition in the ResourceSet class.
172176
private double usedRam;
173177

178+
// Used amount of extra resources. Corresponds to the extra resource
179+
// definition in the ResourceSet class.
180+
private Map<String, Float> usedExtraResources;
181+
174182
// Used local test count. Corresponds to the local test count definition in the ResourceSet class.
175183
private int usedLocalTestCount;
176184

177185
/** If set, local-only actions are given priority over dynamically run actions. */
178186
private boolean prioritizeLocalActions;
179187

180-
private ResourceManager() {}
188+
private ResourceManager() {
189+
usedExtraResources = new HashMap<>();
190+
}
181191

182192
@VisibleForTesting
183193
public static ResourceManager instanceForTestingOnly() {
@@ -192,6 +202,7 @@ public static ResourceManager instanceForTestingOnly() {
192202
public synchronized void resetResourceUsage() {
193203
usedCpu = 0;
194204
usedRam = 0;
205+
usedExtraResources = new HashMap<>();
195206
usedLocalTestCount = 0;
196207
for (Pair<ResourceSet, LatchWithWorker> request : localRequests) {
197208
request.second.latch.countDown();
@@ -286,6 +297,17 @@ private Worker incrementResources(ResourceSet resources)
286297
throws IOException, InterruptedException {
287298
usedCpu += resources.getCpuUsage();
288299
usedRam += resources.getMemoryMb();
300+
301+
resources.getExtraResourceUsage().entrySet().forEach(
302+
resource -> {
303+
String key = (String)resource.getKey();
304+
float value = resource.getValue();
305+
if (usedExtraResources.containsKey(key)) {
306+
value += (float)usedExtraResources.get(key);
307+
}
308+
usedExtraResources.put(key, value);
309+
});
310+
289311
usedLocalTestCount += resources.getLocalTestCount();
290312

291313
if (resources.getWorkerKey() != null) {
@@ -298,6 +320,7 @@ private Worker incrementResources(ResourceSet resources)
298320
public synchronized boolean inUse() {
299321
return usedCpu != 0.0
300322
|| usedRam != 0.0
323+
|| !usedExtraResources.isEmpty()
301324
|| usedLocalTestCount != 0
302325
|| !localRequests.isEmpty()
303326
|| !dynamicWorkerRequests.isEmpty()
@@ -405,6 +428,13 @@ private boolean release(ResourceSet resources, @Nullable Worker worker)
405428
private synchronized void releaseResourcesOnly(ResourceSet resources) {
406429
usedCpu -= resources.getCpuUsage();
407430
usedRam -= resources.getMemoryMb();
431+
432+
for (Map.Entry<String, Float> resource : resources.getExtraResourceUsage().entrySet()) {
433+
String key = (String)resource.getKey();
434+
float value = (float)usedExtraResources.get(key) - resource.getValue();
435+
usedExtraResources.put(key, value);
436+
}
437+
408438
usedLocalTestCount -= resources.getLocalTestCount();
409439

410440
// TODO(bazel-team): (2010) rounding error can accumulate and value below can end up being
@@ -416,6 +446,19 @@ private synchronized void releaseResourcesOnly(ResourceSet resources) {
416446
if (usedRam < epsilon) {
417447
usedRam = 0;
418448
}
449+
450+
Set<String> toRemove = new HashSet<>();
451+
usedExtraResources.entrySet().forEach(
452+
resource -> {
453+
String key = (String)resource.getKey();
454+
float value = (float)usedExtraResources.get(key);
455+
if (value < epsilon) {
456+
toRemove.add(key);
457+
}
458+
});
459+
for (String key : toRemove) {
460+
usedExtraResources.remove(key);
461+
}
419462
}
420463

421464
private synchronized boolean processAllWaitingThreads() throws IOException, InterruptedException {
@@ -454,6 +497,23 @@ private synchronized void processWaitingThreads(Deque<Pair<ResourceSet, LatchWit
454497
}
455498
}
456499

500+
/**
501+
* Return true iff all requested extra resources are considered to be available.
502+
*/
503+
private boolean areExtraResourcesAvailable(ResourceSet resources) {
504+
for (Map.Entry<String, Float> resource : resources.getExtraResourceUsage().entrySet()) {
505+
String key = (String)resource.getKey();
506+
float used = (float)usedExtraResources.getOrDefault(key, 0f);
507+
float requested = resource.getValue();
508+
float available = (float)availableResources.getExtraResourceUsage().getOrDefault(key, 0f);
509+
float epsilon = 0.0001f; // Account for possible rounding errors.
510+
if (requested != 0.0 && used != 0.0 && requested + used > available + epsilon) {
511+
return false;
512+
}
513+
}
514+
return true;
515+
}
516+
457517
// Method will return true if all requested resources are considered to be available.
458518
@VisibleForTesting
459519
boolean areResourcesAvailable(ResourceSet resources) {
@@ -472,7 +532,7 @@ boolean areResourcesAvailable(ResourceSet resources) {
472532
workerKey == null
473533
|| (activeWorkers < availableWorkers && workerPool.couldBeBorrowed(workerKey));
474534

475-
if (usedCpu == 0.0 && usedRam == 0.0 && usedLocalTestCount == 0 && workerIsAvailable) {
535+
if (usedCpu == 0.0 && usedRam == 0.0 && usedExtraResources.isEmpty() && usedLocalTestCount == 0 && workerIsAvailable) {
476536
return true;
477537
}
478538
// Use only MIN_NECESSARY_???_RATIO of the resource value to check for
@@ -503,7 +563,8 @@ boolean areResourcesAvailable(ResourceSet resources) {
503563
localTestCount == 0
504564
|| usedLocalTestCount == 0
505565
|| usedLocalTestCount + localTestCount <= availableLocalTestCount;
506-
return cpuIsAvailable && ramIsAvailable && localTestCountIsAvailable && workerIsAvailable;
566+
boolean extraResourcesIsAvailable = areExtraResourcesAvailable(resources);
567+
return cpuIsAvailable && ramIsAvailable && extraResourcesIsAvailable && localTestCountIsAvailable && workerIsAvailable;
507568
}
508569

509570
@VisibleForTesting

src/main/java/com/google/devtools/build/lib/actions/ResourceSet.java

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,16 @@
1414

1515
package com.google.devtools.build.lib.actions;
1616

17+
import com.google.common.base.Joiner;
1718
import com.google.common.base.Splitter;
19+
import com.google.common.collect.ImmutableMap;
1820
import com.google.common.primitives.Doubles;
1921
import com.google.devtools.build.lib.concurrent.ThreadSafety.Immutable;
2022
import com.google.devtools.build.lib.util.OS;
2123
import com.google.devtools.build.lib.worker.WorkerKey;
2224
import com.google.devtools.common.options.Converter;
2325
import com.google.devtools.common.options.OptionsParsingException;
26+
import io.reactivex.rxjava3.annotations.NonNull;
2427
import java.util.Iterator;
2528
import java.util.NoSuchElementException;
2629
import javax.annotation.Nullable;
@@ -43,16 +46,26 @@ public class ResourceSet implements ResourceSetOrBuilder {
4346
/** The number of CPUs, or fractions thereof. */
4447
private final double cpuUsage;
4548

49+
/**
50+
* Map of extra resources (for example: GPUs, embedded boards, ...) mapping
51+
* name of the resource to a value.
52+
*/
53+
private final ImmutableMap<String, Float> extraResourceUsage;
54+
4655
/** The number of local tests. */
4756
private final int localTestCount;
4857

4958
/** The workerKey of used worker. Null if no worker is used. */
5059
@Nullable private final WorkerKey workerKey;
5160

52-
private ResourceSet(
53-
double memoryMb, double cpuUsage, int localTestCount, @Nullable WorkerKey workerKey) {
61+
private ResourceSet(double memoryMb, double cpuUsage, int localTestCount, @Nullable WorkerKey workerKey) {
62+
this(memoryMb, cpuUsage, ImmutableMap.of(), localTestCount, workerKey);
63+
}
64+
65+
private ResourceSet(double memoryMb, double cpuUsage, @NonNull ImmutableMap<String, Float> extraResourceUsage, int localTestCount, @Nullable WorkerKey workerKey) {
5466
this.memoryMb = memoryMb;
5567
this.cpuUsage = cpuUsage;
68+
this.extraResourceUsage = extraResourceUsage;
5669
this.localTestCount = localTestCount;
5770
this.workerKey = workerKey;
5871
}
@@ -83,21 +96,31 @@ public static ResourceSet createWithLocalTestCount(int localTestCount) {
8396
}
8497

8598
/**
86-
* Returns a new ResourceSet with the provided values for memoryMb, cpuUsage, ioUsage, and
99+
* Returns a new ResourceSet with the provided values for memoryMb, cpuUsage, and
87100
* localTestCount. Most action resource definitions should use {@link #createWithRamCpu} or {@link
88101
* #createWithLocalTestCount(int)}. Use this method primarily when constructing ResourceSets that
89102
* represent available resources.
90103
*/
91104
public static ResourceSet create(double memoryMb, double cpuUsage, int localTestCount) {
92-
return createWithWorkerKey(memoryMb, cpuUsage, localTestCount, /* workerKey= */ null);
105+
return ResourceSet.create(memoryMb, cpuUsage, ImmutableMap.of(), localTestCount, /* wolkerKey= */ null);
106+
}
107+
108+
/**
109+
* Returns a new ResourceSet with the provided values for memoryMb, cpuUsage, extraResources, and
110+
* localTestCount. Most action resource definitions should use {@link #createWithRamCpu} or
111+
* {@link #createWithLocalTestCount(int)}. Use this method primarily when constructing
112+
* ResourceSets that represent available resources.
113+
*/
114+
public static ResourceSet create(double memoryMb, double cpuUsage, ImmutableMap<String, Float> extraResourceUsage, int localTestCount) {
115+
return createWithWorkerKey(memoryMb, cpuUsage, extraResourceUseage, localTestCount, /* workerKey= */ null);
93116
}
94117

95118
public static ResourceSet createWithWorkerKey(
96-
double memoryMb, double cpuUsage, int localTestCount, WorkerKey workerKey) {
97-
if (memoryMb == 0 && cpuUsage == 0 && localTestCount == 0 && workerKey == null) {
119+
double memoryMb, double cpuUsage, ImmutableMap<String, Float> extraResourceUsage, int localTestCount, WorkerKey workerKey) {
120+
if (memoryMb == 0 && cpuUsage == 0 && extraResourceUsage.size() == 0 && localTestCount == 0 && workerKey == null) {
98121
return ZERO;
99122
}
100-
return new ResourceSet(memoryMb, cpuUsage, localTestCount, workerKey);
123+
return new ResourceSet(memoryMb, cpuUsage, extraResourceUsage, localTestCount, workerKey);
101124
}
102125

103126
/** Returns the amount of real memory (resident set size) used in MB. */
@@ -124,6 +147,10 @@ public double getCpuUsage() {
124147
return cpuUsage;
125148
}
126149

150+
public ImmutableMap<String, Float> getExtraResourceUsage() {
151+
return extraResourceUsage;
152+
}
153+
127154
/** Returns the local test count used. */
128155
public int getLocalTestCount() {
129156
return localTestCount;
@@ -138,6 +165,7 @@ public String toString() {
138165
+ "CPU: "
139166
+ cpuUsage
140167
+ "\n"
168+
+ Joiner.on("\n").withKeyValueSeparator(": ").join(extraResourceUsage.entrySet())
141169
+ "Local tests: "
142170
+ localTestCount
143171
+ "\n";

src/main/java/com/google/devtools/build/lib/analysis/test/TestTargetProperties.java

Lines changed: 47 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import com.google.devtools.build.lib.server.FailureDetails.FailureDetail;
3232
import com.google.devtools.build.lib.server.FailureDetails.TestAction;
3333
import com.google.devtools.build.lib.server.FailureDetails.TestAction.Code;
34+
import java.util.HashMap;
3435
import java.util.List;
3536
import java.util.Map;
3637

@@ -160,25 +161,29 @@ public ResourceSet getLocalResourceUsage(Label label, boolean usingLocalTestJobs
160161
}
161162

162163
ResourceSet testResourcesFromSize = TestTargetProperties.getResourceSetFromSize(size);
164+
return ResourceSet.create(
165+
testResourcesFromSize.getMemoryMb(),
166+
getLocalCpuResourceUsage(label),
167+
getLocalExtraResourceUsage(label),
168+
testResourcesFromSize.getLocalTestCount());
169+
}
163170

171+
private double getLocalCpuResourceUsage(Label label) throws UserExecException {
172+
ResourceSet testResourcesFromSize = TestTargetProperties.getResourceSetFromSize(size);
164173
// Tests can override their CPU reservation with a "cpu:<n>" tag.
165-
ResourceSet testResourcesFromTag = null;
174+
double cpuCount = -1.0;
166175
for (String tag : executionInfo.keySet()) {
167176
try {
168177
String cpus = ExecutionRequirements.CPU.parseIfMatches(tag);
169178
if (cpus != null) {
170-
if (testResourcesFromTag != null) {
179+
if (cpuCount != -1.0) {
171180
String message =
172181
String.format(
173182
"%s has more than one '%s' tag, but duplicate tags aren't allowed",
174183
label, ExecutionRequirements.CPU.userFriendlyName());
175184
throw new UserExecException(createFailureDetail(message, Code.DUPLICATE_CPU_TAGS));
176185
}
177-
testResourcesFromTag =
178-
ResourceSet.create(
179-
testResourcesFromSize.getMemoryMb(),
180-
Float.parseFloat(cpus),
181-
testResourcesFromSize.getLocalTestCount());
186+
cpuCount = Float.parseFloat(cpus);
182187
}
183188
} catch (ValidationException e) {
184189
String message =
@@ -191,10 +196,44 @@ public ResourceSet getLocalResourceUsage(Label label, boolean usingLocalTestJobs
191196
throw new UserExecException(createFailureDetail(message, Code.INVALID_CPU_TAG));
192197
}
193198
}
199+
return cpuCount != -1.0 ? cpuCount : testResourcesFromSize.getCpuUsage();
200+
}
194201

195-
return testResourcesFromTag != null ? testResourcesFromTag : testResourcesFromSize;
202+
private ImmutableMap<String, Float> getLocalExtraResourceUsage(Label label) throws UserExecException {
203+
// Tests can specify requirements for extra resources using "resources:<resourcename>:<amount>" tag.
204+
Map<String, Float> extraResources = new HashMap<>();
205+
for (String tag : executionInfo.keySet()) {
206+
try {
207+
String extras = ExecutionRequirements.RESOURCES.parseIfMatches(tag);
208+
if (extras != null) {
209+
int splitIndex = extras.indexOf(":");
210+
String resourceName = extras.substring(0, splitIndex);
211+
String resourceCount = extras.substring(splitIndex+1);
212+
if (extraResources.get(resourceName) != null) {
213+
String message =
214+
String.format(
215+
"%s has more than one '%s' tag, but duplicate tags aren't allowed",
216+
label, ExecutionRequirements.RESOURCES.userFriendlyName());
217+
throw new UserExecException(createFailureDetail(message, Code.DUPLICATE_CPU_TAGS));
218+
}
219+
extraResources.put(resourceName, Float.parseFloat(resourceCount));
220+
}
221+
} catch (ValidationException e) {
222+
String message =
223+
String.format(
224+
"%s has a '%s' tag, but its value '%s' didn't pass validation: %s",
225+
label,
226+
ExecutionRequirements.RESOURCES.userFriendlyName(),
227+
e.getTagValue(),
228+
e.getMessage());
229+
throw new UserExecException(createFailureDetail(message, Code.INVALID_CPU_TAG));
230+
}
231+
}
232+
return ImmutableMap.copyOf(extraResources);
196233
}
197234

235+
236+
198237
private static FailureDetail createFailureDetail(String message, Code detailedCode) {
199238
return FailureDetail.newBuilder()
200239
.setMessage(message)

0 commit comments

Comments
 (0)